PyPI - cognite-neat - Versions diffs - 0.90.2__py3-none-any.whl → 0.91.0__py3-none-any.whl - Mend

cognite-neat 0.90.2py3-none-any.whl → 0.91.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (30) hide show

cognite/neat/_version.py +1 -1
cognite/neat/graph/extractors/__init__.py +3 -0
cognite/neat/graph/extractors/_dexpi.py +4 -4
cognite/neat/graph/extractors/_iodd.py +160 -0
cognite/neat/issues/_base.py +6 -2
cognite/neat/rules/exporters/_rules2excel.py +3 -3
cognite/neat/rules/exporters/_rules2yaml.py +5 -1
cognite/neat/rules/models/__init__.py +2 -2
cognite/neat/rules/models/_base_input.py +2 -2
cognite/neat/rules/models/_base_rules.py +142 -142
cognite/neat/rules/models/asset/_rules.py +1 -34
cognite/neat/rules/models/dms/_rules.py +127 -46
cognite/neat/rules/models/dms/_validation.py +2 -2
cognite/neat/rules/models/domain.py +16 -19
cognite/neat/rules/models/entities/_single_value.py +25 -11
cognite/neat/rules/models/entities/_types.py +0 -10
cognite/neat/rules/models/information/_rules.py +68 -43
cognite/neat/rules/models/information/_validation.py +5 -5
cognite/neat/rules/transformers/_converters.py +6 -8
cognite/neat/rules/transformers/_pipelines.py +8 -4
cognite/neat/store/_base.py +1 -1
cognite/neat/utils/xml_.py +27 -12
{cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/METADATA +1 -1
{cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/RECORD +27 -29
cognite/neat/rules/models/asset/_serializer.py +0 -73
cognite/neat/rules/models/dms/_serializer.py +0 -157
cognite/neat/rules/models/information/_serializer.py +0 -73
{cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/LICENSE +0 -0
{cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/WHEEL +0 -0
{cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/entry_points.txt +0 -0

cognite/neat/rules/models/_base_rules.py CHANGED Viewed

@@ -4,13 +4,11 @@ its sub-models and validators.
 from __future__ import annotations
-import math
 import sys
 import types
 from abc import ABC, abstractmethod
-from collections.abc import Callable, Iterator
-from functools import wraps
-from typing import Annotated, Any, ClassVar, Generic, Literal, TypeVar
+from collections.abc import Callable, Hashable, Iterator, MutableSequence, Sequence
+from typing import Annotated, Any, ClassVar, Literal, SupportsIndex, TypeVar, get_args, get_origin, overload
 import pandas as pd
 from pydantic import (
@@ -18,79 +16,25 @@ from pydantic import (
     BeforeValidator,
     ConfigDict,
     Field,
+    GetCoreSchemaHandler,
     PlainSerializer,
     field_validator,
     model_serializer,
-    model_validator,
 )
-from pydantic.fields import FieldInfo
 from pydantic.main import IncEx
+from pydantic_core import core_schema
 if sys.version_info >= (3, 11):
     from enum import StrEnum
+    from typing import Self
 else:
     from backports.strenum import StrEnum
+    from typing_extensions import Self
 METADATA_VALUE_MAX_LENGTH = 5120
-def replace_nan_floats_with_default(values: dict, model_fields: dict[str, FieldInfo]) -> dict:
-    output = {}
-    for field_name, value in values.items():
-        is_nan_float = isinstance(value, float) and math.isnan(value)
-        if not is_nan_float:
-            output[field_name] = value
-            continue
-        if field_name in model_fields:
-            output[field_name] = model_fields[field_name].default
-        else:
-            # field_name may be an alias
-            source_name = next((name for name, field in model_fields.items() if field.alias == field_name), None)
-            if source_name:
-                output[field_name] = model_fields[source_name].default
-            else:
-                # Just pass it through if it is not an alias.
-                output[field_name] = value
-    return output
-def skip_field_validator(validators_field):
-    def decorator(func):
-        @wraps(func)
-        def wrapper(cls, value, values):
-            if isinstance(values, dict):
-                to_skip = values.get(validators_field, set())
-            else:
-                try:
-                    to_skip = values.data.get(validators_field, set())
-                except Exception:
-                    to_skip = set()
-            if "all" in to_skip or func.__name__ in to_skip:
-                return value
-            return func(cls, value, values)
-        return wrapper
-    return decorator
-def skip_model_validator(validators_field):
-    def decorator(func):
-        @wraps(func)
-        def wrapper(self):
-            to_skip = getattr(self, validators_field, set())
-            if "all" in to_skip or func.__name__ in to_skip:
-                return self
-            return func(self)
-        return wrapper
-    return decorator
 def _get_required_fields(model: type[BaseModel], use_alias: bool = False) -> set[str]:
     """Get required fields from a pydantic model.
@@ -148,7 +92,7 @@ class MatchType(StrEnum):
     partial = "partial"
-class RuleModel(BaseModel):
+class SchemaModel(BaseModel):
     model_config: ClassVar[ConfigDict] = ConfigDict(
         populate_by_name=True,
         str_strip_whitespace=True,
@@ -164,50 +108,8 @@ class RuleModel(BaseModel):
         """Returns a set of mandatory fields for the model."""
         return _get_required_fields(cls, use_alias)
-    @classmethod
-    def sheets(cls, by_alias: bool = False) -> list[str]:
-        """Returns a list of sheet names for the model."""
-        return [
-            (field.alias or field_name) if by_alias else field_name
-            for field_name, field in cls.model_fields.items()
-            if field_name != "validators_to_skip"
-        ]
-    @classmethod
-    def headers_by_sheet(cls, by_alias: bool = False) -> dict[str, list[str]]:
-        """Returns a list of headers for the model."""
-        headers_by_sheet: dict[str, list[str]] = {}
-        for field_name, field in cls.model_fields.items():
-            if field_name == "validators_to_skip":
-                continue
-            sheet_name = (field.alias or field_name) if by_alias else field_name
-            annotation = field.annotation
-            if isinstance(annotation, types.UnionType):
-                annotation = annotation.__args__[0]
-            try:
-                if isinstance(annotation, type) and issubclass(annotation, SheetList):
-                    # We know that this is a SheetList, so we can safely access the annotation
-                    # which is the concrete type of the SheetEntity.
-                    model_fields = annotation.model_fields["data"].annotation.__args__[0].model_fields  # type: ignore[union-attr]
-                elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
-                    model_fields = annotation.model_fields
-                else:
-                    model_fields = {}
-            except TypeError:
-                # Python 3.10 raises TypeError: issubclass() arg 1 must be a class
-                # when calling issubclass(annotation, SheetList) with the dict annotation
-                model_fields = {}
-            headers_by_sheet[sheet_name] = [
-                (field.alias or field_name) if by_alias else field_name
-                for field_name, field in model_fields.items()
-                if field_name != "validators_to_skip"
-            ]
-        return headers_by_sheet
-class BaseMetadata(RuleModel):
+class BaseMetadata(SchemaModel):
     """
     Metadata model for data model
     """
@@ -242,13 +144,13 @@ class BaseMetadata(RuleModel):
         raise NotImplementedError()
-class BaseRules(RuleModel, ABC):
+class BaseRules(SchemaModel, ABC):
     """
     Rules is a core concept in `neat`. This represents fusion of data model
     definitions and (optionally) the transformation rules used to transform the data/graph
     from the source representation to the target representation defined by the data model.
-    The rules are defined in a Excel sheet and then parsed into a `Rules` object. The
-    `Rules` object is then used to generate data model and the`RDF` graph made of data
+    The rules are defined in an Excel sheet and then parsed into a `Rules` object. The
+    `Rules` object is then used to generate data model and the `RDF` graph made of data
     model instances.
     Args:
@@ -257,70 +159,158 @@ class BaseRules(RuleModel, ABC):
     """
     metadata: BaseMetadata
+    reference: Self | None = Field(None, alias="Reference")
+    @classmethod
+    def headers_by_sheet(cls, by_alias: bool = False) -> dict[str, list[str]]:
+        """Returns a list of headers for the model, typically used by ExcelExporter"""
+        headers_by_sheet: dict[str, list[str]] = {}
+        for field_name, field in cls.model_fields.items():
+            if field_name == "validators_to_skip":
+                continue
+            sheet_name = (field.alias or field_name) if by_alias else field_name
+            annotation = field.annotation
+            if isinstance(annotation, types.UnionType):
+                annotation = annotation.__args__[0]
+            try:
+                if isinstance(annotation, types.GenericAlias) and get_origin(annotation) is SheetList:
+                    # We know that this is a SheetList, so we can safely access the annotation
+                    # which is the concrete type of the SheetEntity.
+                    model_fields = get_args(annotation)[0].model_fields  # type: ignore[union-attr]
+                elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
+                    model_fields = annotation.model_fields
+                else:
+                    model_fields = {}
+            except TypeError:
+                # Python 3.10 raises TypeError: issubclass() arg 1 must be a class
+                # when calling issubclass(annotation, SheetList) with the dict annotation
+                model_fields = {}
+            headers_by_sheet[sheet_name] = [
+                (field.alias or field_name) if by_alias else field_name
+                for field_name, field in model_fields.items()
+                if field_name != "validators_to_skip" and not field.exclude
+            ]
+        return headers_by_sheet
     def dump(
         self,
+        entities_exclude_defaults: bool = True,
+        as_reference: bool = False,
         mode: Literal["python", "json"] = "python",
         by_alias: bool = False,
         exclude: IncEx = None,
         exclude_none: bool = False,
         exclude_unset: bool = False,
         exclude_defaults: bool = False,
-        as_reference: bool = False,
     ) -> dict[str, Any]:
         """Dump the model to a dictionary.
         This is used in the Exporters to dump rules in the required format.
+        Args:
+            entities_exclude_defaults: Whether to exclude default prefix (and version) for entities.
+                For example, given a class that is dumped as 'my_prefix:MyClass', if the prefix for the rules
+                set in metadata.prefix = 'my_prefix', then this class will be dumped as 'MyClass' when this flag is set.
+                Defaults to True.
+            as_reference (bool, optional): Whether to dump as reference. For Information and DMS rules, this will
+                set the reference column/field to the reference of that entity. This is used in the ExcelExporter
+                to dump a reference model.
+            mode: The mode in which `to_python` should run.
+                If mode is 'json', the output will only contain JSON serializable types.
+                If mode is 'python', the output may contain non-JSON-serializable Python objects.
+            by_alias: Whether to use the field's alias in the dictionary key if defined.
+            exclude: A set of fields to exclude from the output.
+            exclude_none: Whether to exclude fields that have a value of `None`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that are set to their default value.
         """
-        return self.model_dump(
+        for field_name in self.model_fields.keys():
+            value = getattr(self, field_name)
+            # Ensure deterministic order of properties, classes, views, and so on
+            if isinstance(value, SheetList):
+                value.sort(key=lambda x: x._identifier())
+        context: dict[str, Any] = {"as_reference": as_reference}
+        if entities_exclude_defaults:
+            context["metadata"] = self.metadata
+        exclude_input: IncEx
+        if self.reference is None:
+            exclude_input = exclude
+        else:
+            # If the rules has a reference, we dump that separately with the as_reference flag set to True.
+            # We don't want to include the reference in the main dump, so we exclude it here.
+            # This is to include whatever is in the exclude set from the user.
+            if isinstance(exclude, dict):
+                exclude_input = exclude.copy()
+                exclude_input["reference"] = {"__all__"}  # type: ignore[index]
+            elif isinstance(exclude, set):
+                exclude_input = exclude.copy()
+                exclude_input.add("reference")  # type: ignore[arg-type]
+            else:
+                exclude_input = {"reference"}
+        output = self.model_dump(
             mode=mode,
             by_alias=by_alias,
-            exclude=exclude,
+            exclude=exclude_input,
             exclude_none=exclude_none,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
+            context=context,
         )
-# An sheet entity is either a class or a property.
-class SheetEntity(RuleModel):
+        is_reference_user_excluded = isinstance(exclude, dict | set) and "reference" in exclude
+        if self.reference is not None and not is_reference_user_excluded:
+            # If the rules has a reference, we dump that separately with the as_reference flag set to True.
+            # Unless the user has explicitly excluded the reference.
+            output["Reference" if by_alias else "reference"] = self.reference.dump(
+                mode=mode,
+                by_alias=by_alias,
+                exclude=exclude,
+                exclude_none=exclude_none,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                entities_exclude_defaults=entities_exclude_defaults,
+                as_reference=True,
+            )
+        return output
+class SheetRow(SchemaModel):
     @field_validator("*", mode="before")
     def strip_string(cls, value: Any) -> Any:
         if isinstance(value, str):
             return value.strip()
         return value
+    @abstractmethod
+    def _identifier(self) -> tuple[Hashable, ...]:
+        raise NotImplementedError()
-T_Entity = TypeVar("T_Entity", bound=SheetEntity)
-class SheetList(BaseModel, Generic[T_Entity]):
-    data: list[T_Entity] = Field(default_factory=list)
-    @model_validator(mode="before")
-    def from_list_format(cls, values: Any) -> Any:
-        if isinstance(values, list):
-            return {"data": values}
-        return values
-    def __contains__(self, item: str) -> bool:
-        return item in self.data
+T_SheetRow = TypeVar("T_SheetRow", bound=SheetRow)
-    def __len__(self) -> int:
-        return len(self.data)
-    def __iter__(self) -> Iterator[T_Entity]:  # type: ignore[override]
-        return iter(self.data)
+class SheetList(list, MutableSequence[T_SheetRow]):
+    @classmethod
+    def __get_pydantic_core_schema__(cls, source: Any, handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
+        if args := get_args(source):
+            item_type = args[0]
+        else:
+            # Someone use SheetList without specifying the type
+            raise TypeError("SheetList must be used with a type argument, e.g., SheetList[InformationProperty]")
-    def append(self, value: T_Entity) -> None:
-        self.data.append(value)
+        instance_schema = core_schema.is_instance_schema(cls)
+        sequence_row_schema = handler.generate_schema(Sequence[item_type])  # type: ignore[valid-type]
-    def extend(self, values: list[T_Entity]) -> None:
-        self.data.extend(values)
+        non_instance_schema = core_schema.no_info_after_validator_function(SheetList, sequence_row_schema)
+        return core_schema.union_schema([instance_schema, non_instance_schema])
     def to_pandas(self, drop_na_columns: bool = True, include: list[str] | None = None) -> pd.DataFrame:
         """Converts ResourceDict to pandas DataFrame."""
-        df = pd.DataFrame([entity.model_dump() for entity in self.data])
+        df = pd.DataFrame([entity.model_dump() for entity in self])
         if drop_na_columns:
             df = df.dropna(axis=1, how="all")
         if include is not None:
@@ -331,10 +321,20 @@ class SheetList(BaseModel, Generic[T_Entity]):
         """Returns HTML representation of ResourceDict."""
         return self.to_pandas(drop_na_columns=True)._repr_html_()  # type: ignore[operator]
-    @classmethod
-    def mandatory_fields(cls, use_alias=False) -> set[str]:
-        """Returns a set of mandatory fields for the model."""
-        return _get_required_fields(cls, use_alias)
+    # Implemented to get correct type hints
+    def __iter__(self) -> Iterator[T_SheetRow]:
+        return super().__iter__()
+    @overload
+    def __getitem__(self, index: SupportsIndex) -> T_SheetRow: ...
+    @overload
+    def __getitem__(self, index: slice) -> SheetList[T_SheetRow]: ...
+    def __getitem__(self, index: SupportsIndex | slice, /) -> T_SheetRow | SheetList[T_SheetRow]:
+        if isinstance(index, slice):
+            return SheetList[T_SheetRow](super().__getitem__(index))
+        return super().__getitem__(index)
 ExtensionCategoryType = Annotated[

cognite/neat/rules/models/asset/_rules.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import sys
-from typing import Any, ClassVar, Literal, cast
+from typing import Any, ClassVar, cast
 from pydantic import Field, field_validator, model_validator
-from pydantic.main import IncEx
 from rdflib import Namespace
 from cognite.neat.constants import get_default_prefixes
@@ -104,35 +103,3 @@ class AssetRules(BaseRules):
         if issue_list.has_errors:
             raise issue_list.as_exception()
         return self
-    def dump(
-        self,
-        mode: Literal["python", "json"] = "python",
-        by_alias: bool = False,
-        exclude: IncEx = None,
-        exclude_none: bool = False,
-        exclude_unset: bool = False,
-        exclude_defaults: bool = False,
-        as_reference: bool = False,
-    ) -> dict[str, Any]:
-        from ._serializer import _AssetRulesSerializer
-        dumped = self.model_dump(
-            mode=mode,
-            by_alias=by_alias,
-            exclude=exclude,
-            exclude_none=exclude_none,
-            exclude_unset=exclude_unset,
-            exclude_defaults=exclude_defaults,
-        )
-        prefix = self.metadata.prefix
-        serializer = _AssetRulesSerializer(by_alias, prefix)
-        cleaned = serializer.clean(dumped, as_reference)
-        last = "Last" if by_alias else "last"
-        if last_dump := cleaned.get(last):
-            cleaned[last] = serializer.clean(last_dump, False)
-        reference = "Reference" if by_alias else "reference"
-        if self.reference and (ref_dump := cleaned.get(reference)):
-            prefix = self.reference.metadata.prefix
-            cleaned[reference] = _AssetRulesSerializer(by_alias, prefix).clean(ref_dump, True)
-        return cleaned

cognite-neat 0.90.2__py3-none-any.whl → 0.91.0__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.90.2py3-none-any.whl → 0.91.0py3-none-any.whl