PyPI - cognite-neat - Versions diffs - 0.104.0__py3-none-any.whl → 0.105.1__py3-none-any.whl - Mend

cognite-neat 0.104.0py3-none-any.whl → 0.105.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (143) hide show

cognite/neat/_client/_api/data_modeling_loaders.py +83 -23
cognite/neat/_client/_api/schema.py +2 -1
cognite/neat/_client/data_classes/neat_sequence.py +261 -0
cognite/neat/_client/data_classes/schema.py +5 -1
cognite/neat/_client/testing.py +33 -0
cognite/neat/_constants.py +57 -0
cognite/neat/_graph/extractors/_classic_cdf/_base.py +6 -5
cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +225 -11
cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
cognite/neat/_graph/loaders/_rdf2dms.py +31 -5
cognite/neat/_graph/transformers/__init__.py +3 -1
cognite/neat/_graph/transformers/_classic_cdf.py +39 -51
cognite/neat/_graph/transformers/_rdfpath.py +14 -15
cognite/neat/_graph/transformers/_value_type.py +72 -0
cognite/neat/_issues/__init__.py +0 -2
cognite/neat/_issues/_base.py +19 -35
cognite/neat/_issues/warnings/__init__.py +6 -1
cognite/neat/_issues/warnings/_general.py +7 -0
cognite/neat/_issues/warnings/_properties.py +11 -0
cognite/neat/_issues/warnings/_resources.py +11 -0
cognite/neat/_rules/exporters/_rules2dms.py +35 -1
cognite/neat/_rules/exporters/_rules2excel.py +2 -2
cognite/neat/_rules/importers/_dms2rules.py +66 -55
cognite/neat/_rules/models/_base_rules.py +4 -1
cognite/neat/_rules/models/entities/_wrapped.py +10 -5
cognite/neat/_rules/models/mapping/_classic2core.yaml +239 -38
cognite/neat/_rules/transformers/__init__.py +8 -2
cognite/neat/_rules/transformers/_converters.py +271 -188
cognite/neat/_rules/transformers/_mapping.py +75 -59
cognite/neat/_rules/transformers/_verification.py +2 -3
cognite/neat/_session/_inspect.py +3 -1
cognite/neat/_session/_prepare.py +112 -24
cognite/neat/_session/_read.py +33 -70
cognite/neat/_session/_state.py +2 -2
cognite/neat/_session/_to.py +2 -2
cognite/neat/_store/_rules_store.py +4 -8
cognite/neat/_utils/reader/_base.py +27 -0
cognite/neat/_version.py +1 -1
{cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/METADATA +4 -3
cognite_neat-0.105.1.dist-info/RECORD +179 -0
{cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/WHEEL +1 -1
cognite/neat/_app/api/__init__.py +0 -0
cognite/neat/_app/api/asgi/metrics.py +0 -4
cognite/neat/_app/api/configuration.py +0 -98
cognite/neat/_app/api/context_manager/__init__.py +0 -3
cognite/neat/_app/api/context_manager/manager.py +0 -16
cognite/neat/_app/api/data_classes/__init__.py +0 -0
cognite/neat/_app/api/data_classes/rest.py +0 -59
cognite/neat/_app/api/explorer.py +0 -66
cognite/neat/_app/api/routers/configuration.py +0 -25
cognite/neat/_app/api/routers/crud.py +0 -102
cognite/neat/_app/api/routers/metrics.py +0 -10
cognite/neat/_app/api/routers/workflows.py +0 -224
cognite/neat/_app/api/utils/__init__.py +0 -0
cognite/neat/_app/api/utils/data_mapping.py +0 -17
cognite/neat/_app/api/utils/logging.py +0 -26
cognite/neat/_app/api/utils/query_templates.py +0 -92
cognite/neat/_app/main.py +0 -17
cognite/neat/_app/monitoring/__init__.py +0 -0
cognite/neat/_app/monitoring/metrics.py +0 -69
cognite/neat/_app/ui/index.html +0 -1
cognite/neat/_app/ui/neat-app/.gitignore +0 -23
cognite/neat/_app/ui/neat-app/README.md +0 -70
cognite/neat/_app/ui/neat-app/build/asset-manifest.json +0 -14
cognite/neat/_app/ui/neat-app/build/favicon.ico +0 -0
cognite/neat/_app/ui/neat-app/build/img/architect-icon.svg +0 -116
cognite/neat/_app/ui/neat-app/build/img/developer-icon.svg +0 -112
cognite/neat/_app/ui/neat-app/build/img/sme-icon.svg +0 -34
cognite/neat/_app/ui/neat-app/build/index.html +0 -1
cognite/neat/_app/ui/neat-app/build/logo192.png +0 -0
cognite/neat/_app/ui/neat-app/build/manifest.json +0 -25
cognite/neat/_app/ui/neat-app/build/robots.txt +0 -3
cognite/neat/_app/ui/neat-app/build/static/css/main.72e3d92e.css +0 -2
cognite/neat/_app/ui/neat-app/build/static/css/main.72e3d92e.css.map +0 -1
cognite/neat/_app/ui/neat-app/build/static/js/main.5a52cf09.js +0 -3
cognite/neat/_app/ui/neat-app/build/static/js/main.5a52cf09.js.LICENSE.txt +0 -88
cognite/neat/_app/ui/neat-app/build/static/js/main.5a52cf09.js.map +0 -1
cognite/neat/_app/ui/neat-app/build/static/media/logo.8093b84df9ed36a174c629d6fe0b730d.svg +0 -1
cognite/neat/_app/ui/neat-app/package-lock.json +0 -18306
cognite/neat/_app/ui/neat-app/package.json +0 -62
cognite/neat/_app/ui/neat-app/public/favicon.ico +0 -0
cognite/neat/_app/ui/neat-app/public/img/architect-icon.svg +0 -116
cognite/neat/_app/ui/neat-app/public/img/developer-icon.svg +0 -112
cognite/neat/_app/ui/neat-app/public/img/sme-icon.svg +0 -34
cognite/neat/_app/ui/neat-app/public/index.html +0 -43
cognite/neat/_app/ui/neat-app/public/logo192.png +0 -0
cognite/neat/_app/ui/neat-app/public/manifest.json +0 -25
cognite/neat/_app/ui/neat-app/public/robots.txt +0 -3
cognite/neat/_app/ui/neat-app/src/App.css +0 -38
cognite/neat/_app/ui/neat-app/src/App.js +0 -17
cognite/neat/_app/ui/neat-app/src/App.test.js +0 -8
cognite/neat/_app/ui/neat-app/src/MainContainer.tsx +0 -70
cognite/neat/_app/ui/neat-app/src/components/JsonViewer.tsx +0 -43
cognite/neat/_app/ui/neat-app/src/components/LocalUploader.tsx +0 -124
cognite/neat/_app/ui/neat-app/src/components/OverviewComponentEditorDialog.tsx +0 -63
cognite/neat/_app/ui/neat-app/src/components/StepEditorDialog.tsx +0 -511
cognite/neat/_app/ui/neat-app/src/components/TabPanel.tsx +0 -36
cognite/neat/_app/ui/neat-app/src/components/Utils.tsx +0 -56
cognite/neat/_app/ui/neat-app/src/components/WorkflowDeleteDialog.tsx +0 -60
cognite/neat/_app/ui/neat-app/src/components/WorkflowExecutionReport.tsx +0 -112
cognite/neat/_app/ui/neat-app/src/components/WorkflowImportExportDialog.tsx +0 -67
cognite/neat/_app/ui/neat-app/src/components/WorkflowMetadataDialog.tsx +0 -79
cognite/neat/_app/ui/neat-app/src/index.css +0 -13
cognite/neat/_app/ui/neat-app/src/index.js +0 -13
cognite/neat/_app/ui/neat-app/src/logo.svg +0 -1
cognite/neat/_app/ui/neat-app/src/reportWebVitals.js +0 -13
cognite/neat/_app/ui/neat-app/src/setupTests.js +0 -5
cognite/neat/_app/ui/neat-app/src/types/WorkflowTypes.ts +0 -388
cognite/neat/_app/ui/neat-app/src/views/AboutView.tsx +0 -61
cognite/neat/_app/ui/neat-app/src/views/ConfigView.tsx +0 -184
cognite/neat/_app/ui/neat-app/src/views/GlobalConfigView.tsx +0 -180
cognite/neat/_app/ui/neat-app/src/views/WorkflowView.tsx +0 -570
cognite/neat/_app/ui/neat-app/tsconfig.json +0 -27
cognite/neat/_workflows/__init__.py +0 -17
cognite/neat/_workflows/base.py +0 -590
cognite/neat/_workflows/cdf_store.py +0 -393
cognite/neat/_workflows/examples/Export_DMS/workflow.yaml +0 -89
cognite/neat/_workflows/examples/Export_Semantic_Data_Model/workflow.yaml +0 -66
cognite/neat/_workflows/examples/Import_DMS/workflow.yaml +0 -65
cognite/neat/_workflows/examples/Validate_Rules/workflow.yaml +0 -67
cognite/neat/_workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
cognite/neat/_workflows/manager.py +0 -292
cognite/neat/_workflows/model.py +0 -203
cognite/neat/_workflows/steps/__init__.py +0 -0
cognite/neat/_workflows/steps/data_contracts.py +0 -109
cognite/neat/_workflows/steps/lib/__init__.py +0 -0
cognite/neat/_workflows/steps/lib/current/__init__.py +0 -6
cognite/neat/_workflows/steps/lib/current/graph_extractor.py +0 -100
cognite/neat/_workflows/steps/lib/current/graph_loader.py +0 -51
cognite/neat/_workflows/steps/lib/current/graph_store.py +0 -48
cognite/neat/_workflows/steps/lib/current/rules_exporter.py +0 -537
cognite/neat/_workflows/steps/lib/current/rules_importer.py +0 -323
cognite/neat/_workflows/steps/lib/current/rules_validator.py +0 -106
cognite/neat/_workflows/steps/lib/io/__init__.py +0 -1
cognite/neat/_workflows/steps/lib/io/io_steps.py +0 -393
cognite/neat/_workflows/steps/step_model.py +0 -79
cognite/neat/_workflows/steps_registry.py +0 -218
cognite/neat/_workflows/tasks.py +0 -18
cognite/neat/_workflows/triggers.py +0 -169
cognite/neat/_workflows/utils.py +0 -19
cognite_neat-0.104.0.dist-info/RECORD +0 -276
{cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/LICENSE +0 -0
{cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/entry_points.txt +0 -0

cognite/neat/_graph/extractors/_classic_cdf/_sequences.py CHANGED Viewed

@@ -1,37 +1,251 @@
-from collections.abc import Iterable
+import itertools
+import json
+from collections.abc import Callable, Iterable, Set
 from pathlib import Path
+from typing import Any
 from cognite.client import CogniteClient
-from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
+from cognite.client.data_classes import Sequence, SequenceFilter
+from rdflib import RDF, XSD, Literal, Namespace, URIRef
-from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
+from cognite.neat._client.data_classes.neat_sequence import NeatSequence, NeatSequenceList
+from cognite.neat._shared import Triple
+from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
-class SequencesExtractor(ClassicCDFBaseExtractor[Sequence]):
-    """Extract data from Cognite Data Fusions Sequences into Neat."""
+class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
+    """Extract data from Cognite Data Fusions Sequences into Neat.
+    Args:
+        items (Iterable[T_CogniteResource]): An iterable of classic resource.
+        namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
+        to_type (Callable[[T_CogniteResource], str | None], optional): A function to convert an item to a type.
+            Defaults to None. If None or if the function returns None, the asset will be set to the default type.
+        total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
+            is installed. Defaults to None.
+        limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
+            testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
+            limit the extraction to 1000 assets to test the setup.
+        unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
+            a JSON string.
+        skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
+           values in this set will be skipped.
+        camel_case (bool, optional): Whether to use camelCase instead of snake_case for property names.
+            Defaults to True.
+        as_write (bool, optional): Whether to use the write/request format of the items. Defaults to False.
+        unpack_columns (bool, optional): Whether to unpack columns. Defaults to False.
+    """
     _default_rdf_type = "Sequence"
+    _column_rdf_type = "ColumnClass"
     _instance_id_prefix = InstanceIdPrefix.sequence
+    def __init__(
+        self,
+        items: Iterable[NeatSequence],
+        namespace: Namespace | None = None,
+        to_type: Callable[[NeatSequence], str | None] | None = None,
+        total: int | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
+        camel_case: bool = True,
+        as_write: bool = False,
+        unpack_columns: bool = False,
+    ):
+        super().__init__(
+            items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write
+        )
+        self.unpack_columns = unpack_columns
     @classmethod
-    def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Sequence]]:
+    def from_dataset(
+        cls,
+        client: CogniteClient,
+        data_set_external_id: str,
+        namespace: Namespace | None = None,
+        to_type: Callable[[NeatSequence], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
+        camel_case: bool = True,
+        as_write: bool = False,
+        unpack_columns: bool = False,
+    ):
+        total, items = cls._from_dataset(client, data_set_external_id)
+        return cls(
+            items,
+            namespace,
+            to_type,
+            total,
+            limit,
+            unpack_metadata,
+            skip_metadata_values,
+            camel_case,
+            as_write,
+            unpack_columns,
+        )
+    @classmethod
+    def from_hierarchy(
+        cls,
+        client: CogniteClient,
+        root_asset_external_id: str,
+        namespace: Namespace | None = None,
+        to_type: Callable[[NeatSequence], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
+        camel_case: bool = True,
+        as_write: bool = False,
+        unpack_columns: bool = False,
+    ):
+        total, items = cls._from_hierarchy(client, root_asset_external_id)
+        return cls(
+            items,
+            namespace,
+            to_type,
+            total,
+            limit,
+            unpack_metadata,
+            skip_metadata_values,
+            camel_case,
+            as_write,
+            unpack_columns,
+        )
+    @classmethod
+    def from_file(
+        cls,
+        file_path: str | Path,
+        namespace: Namespace | None = None,
+        to_type: Callable[[NeatSequence], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
+        camel_case: bool = True,
+        as_write: bool = False,
+        unpack_columns: bool = False,
+    ):
+        total, items = cls._from_file(file_path)
+        return cls(
+            items,
+            namespace,
+            to_type,
+            total,
+            limit,
+            unpack_metadata,
+            skip_metadata_values,
+            camel_case,
+            as_write,
+            unpack_columns,
+        )
+    @classmethod
+    def _from_dataset(
+        cls, client: CogniteClient, data_set_external_id: str
+    ) -> tuple[int | None, Iterable[NeatSequence]]:
         total = client.sequences.aggregate_count(
             filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
         )
         items = client.sequences(data_set_external_ids=data_set_external_id)
-        return total, items
+        return total, cls._lookup_rows(items, client)
     @classmethod
     def _from_hierarchy(
         cls, client: CogniteClient, root_asset_external_id: str
-    ) -> tuple[int | None, Iterable[Sequence]]:
+    ) -> tuple[int | None, Iterable[NeatSequence]]:
         total = client.sequences.aggregate_count(
             filter=SequenceFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
         )
         items = client.sequences(asset_subtree_external_ids=[root_asset_external_id])
-        return total, items
+        return total, cls._lookup_rows(items, client)
     @classmethod
-    def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Sequence]]:
-        sequences = SequenceList.load(Path(file_path).read_text())
+    def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[NeatSequence]]:
+        sequences = NeatSequenceList.load(Path(file_path).read_text())
         return len(sequences), sequences
+    @classmethod
+    def _lookup_rows(cls, sequence_iterable: Iterable[Sequence], client: CogniteClient) -> Iterable[NeatSequence]:
+        iterator = iter(sequence_iterable)
+        for sequences in iter(lambda: list(itertools.islice(iterator, client.config.max_workers)), []):
+            # The PySDK uses max_workers to limit the number of requests made in parallel.
+            # We can only get one set of sequence rows per request, so we chunk the sequences up into groups of
+            # max_workers and then make a request to get all the rows for those sequences in one go.
+            sequence_list = list(sequences)
+            row_list = client.sequences.rows.retrieve(id=[seq.id for seq in sequence_list])
+            rows_by_sequence_id = {row.id: row.rows for row in row_list}
+            for seq in sequence_list:
+                yield NeatSequence.from_cognite_sequence(seq, rows_by_sequence_id.get(seq.id))
+    def _item2triples_special_cases(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
+        """For sequences, columns and rows are special cases.'"""
+        if self.unpack_columns:
+            return self._unpack_columns(id_, dumped)
+        else:
+            return self._default_columns_and_rows(id_, dumped)
+    def _default_columns_and_rows(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
+        triples: list[Triple] = []
+        if "columns" in dumped:
+            columns = dumped.pop("columns")
+            triples.extend(
+                [
+                    (
+                        id_,
+                        self.namespace.columns,
+                        # Rows have a rowNumber, so we introduce colNumber here to be consistent.
+                        Literal(json.dumps({"colNumber": no, **col}), datatype=XSD._NS["json"]),
+                    )
+                    for no, col in enumerate(columns, 1)
+                ]
+            )
+        if "rows" in dumped:
+            rows = dumped.pop("rows")
+            triples.extend(
+                [(id_, self.namespace.rows, Literal(json.dumps(row), datatype=XSD._NS["json"])) for row in rows]
+            )
+        return triples
+    def _unpack_columns(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
+        triples: list[Triple] = []
+        columnValueTypes: list[str] = []
+        column_order: list[str] = []
+        if columns := dumped.pop("columns", None):
+            for col in columns:
+                external_id = col.pop("externalId")
+                column_order.append(external_id)
+                value_type = col.pop("valueType")
+                columnValueTypes.append(value_type)
+                col_id = self.namespace[f"Column_{external_id}"]
+                triples.append((id_, self.namespace[external_id], col_id))
+                type_ = self.namespace[self._column_rdf_type]
+                triples.append((col_id, RDF.type, type_))
+                if metadata := col.pop("metadata", None):
+                    triples.extend(self._metadata_to_triples(col_id, metadata))
+                # Should only be name and description left in col
+                for key, value in col.items():
+                    if value is None:
+                        continue
+                    triples.append((col_id, self.namespace[key], Literal(value, datatype=XSD.string)))
+            triples.append(
+                (id_, self.namespace.columnOrder, Literal(json.dumps(column_order), datatype=XSD._NS["json"]))
+            )
+            triples.append(
+                (id_, self.namespace.columnValueTypes, Literal(json.dumps(columnValueTypes), datatype=XSD._NS["json"]))
+            )
+        if rows := dumped.pop("rows", None):
+            values_by_column: list[list[Any]] = [[] for _ in column_order]
+            for row in rows:
+                for i, value in enumerate(row["values"]):
+                    values_by_column[i].append(value)
+            for col_name, values in zip(column_order, values_by_column, strict=False):
+                triples.append(
+                    (id_, self.namespace[f"{col_name}Values"], Literal(json.dumps(values), datatype=XSD._NS["json"]))
+                )
+        return triples

cognite/neat/_graph/extractors/_mock_graph_generator.py CHANGED Viewed

@@ -183,7 +183,7 @@ def _get_generation_order(
     parent_col: str = "source_class",
     child_col: str = "target_class",
 ) -> dict:
-    parent_child_list: list[list[str]] = class_linkage[[parent_col, child_col]].values.tolist()
+    parent_child_list: list[list[str]] = class_linkage[[parent_col, child_col]].values.tolist()  # type: ignore[assignment]
     # Build a directed graph and a list of all names that have no parent
     graph: dict[str, set] = {name: set() for tup in parent_child_list for name in tup}
     has_parent: dict[str, bool] = {name: False for tup in parent_child_list for name in tup}

cognite/neat/_graph/loaders/_rdf2dms.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import itertools
 import json
+import warnings
 from collections import defaultdict
 from collections.abc import Iterable, Sequence
 from graphlib import TopologicalSorter
 from pathlib import Path
-from typing import Any, get_args
+from typing import Any, cast, get_args
 import yaml
 from cognite.client import CogniteClient
@@ -19,6 +20,7 @@ from pydantic import BaseModel, ValidationInfo, create_model, field_validator
 from rdflib import RDF, URIRef
 from cognite.neat._client import NeatClient
+from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
 from cognite.neat._graph._tracking import LogTracker, Tracker
 from cognite.neat._issues import IssueList, NeatIssue, NeatIssueList
 from cognite.neat._issues.errors import (
@@ -27,7 +29,7 @@ from cognite.neat._issues.errors import (
     ResourceDuplicatedError,
     ResourceRetrievalError,
 )
-from cognite.neat._issues.warnings import PropertyTypeNotSupportedWarning
+from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
 from cognite.neat._rules.analysis._dms import DMSAnalysis
 from cognite.neat._rules.models import DMSRules
 from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
@@ -303,6 +305,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             if isinstance(prop, dm.EdgeConnection):
                 edge_by_property[prop_id] = prop_id, prop
             if isinstance(prop, dm.MappedProperty):
+                if is_readonly_property(prop.container, prop.container_property_identifier):
+                    continue
                 if isinstance(prop.type, dm.DirectRelation):
                     if prop.container == dm.ContainerId("cdf_cdm", "CogniteTimeSeries") and prop_id == "unit":
                         unit_properties.append(prop_id)
@@ -343,9 +348,14 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             return value
-        def parse_json_string(cls, value: Any, info: ValidationInfo) -> dict:
+        def parse_json_string(cls, value: Any, info: ValidationInfo) -> dict | list:
             if isinstance(value, dict):
                 return value
+            elif isinstance(value, list):
+                try:
+                    return [json.loads(v) if isinstance(v, str) else v for v in value]
+                except json.JSONDecodeError as error:
+                    raise ValueError(f"Not valid JSON string for {info.field_name}: {value}, error {error}") from error
             elif isinstance(value, str):
                 try:
                     return json.loads(value)
@@ -364,7 +374,21 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
                 # We validate above that we only get one value for single direct relations.
                 if list.__name__ in _get_field_value_types(cls, info):
-                    return [{"space": self.instance_space, "externalId": remove_namespace_from_uri(v)} for v in value]
+                    result = [{"space": self.instance_space, "externalId": remove_namespace_from_uri(v)} for v in value]
+                    if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
+                        return result
+                    warnings.warn(
+                        PropertyDirectRelationLimitWarning(
+                            identifier="unknown",
+                            resource_type="view property",
+                            property_name=cast(str, cls.model_fields[info.field_name].alias or info.field_name),
+                            limit=DMS_DIRECT_RELATION_LIST_LIMIT,
+                        ),
+                        stacklevel=2,
+                    )
+                    # To get deterministic results, we sort by space and externalId
+                    result.sort(key=lambda x: (x["space"], x["externalId"]))
+                    return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
                 elif value:
                     return {"space": self.instance_space, "externalId": remove_namespace_from_uri(value[0])}
                 return {}
@@ -401,7 +425,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             space=self.instance_space,
             external_id=identifier,
             type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
-            sources=[dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump().items()))],
+            sources=[
+                dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
+            ],
         )
     def _create_edges(

cognite/neat/_graph/transformers/__init__.py CHANGED Viewed

@@ -15,7 +15,7 @@ from ._prune_graph import (
     PruneTypes,
 )
 from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
-from ._value_type import ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
+from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
 __all__ = [
     "AddAssetDepth",
@@ -26,6 +26,7 @@ __all__ = [
     "AssetSequenceConnector",
     "AssetTimeSeriesConnector",
     "AttachPropertyFromTargetToSource",
+    "ConnectionToLiteral",
     "ConvertLiteral",
     "LiteralToEntity",
     "MakeConnectionOnExactMatch",
@@ -55,4 +56,5 @@ Transformers = (
     | PruneInstancesOfUnknownType
     | ConvertLiteral
     | LiteralToEntity
+    | ConnectionToLiteral
 )

cognite/neat/_graph/transformers/_classic_cdf.py CHANGED Viewed

@@ -6,6 +6,7 @@ from functools import lru_cache
 from typing import cast
 from rdflib import RDF, Graph, Literal, Namespace, URIRef
+from rdflib.query import ResultRow
 from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
 from cognite.neat._graph import extractors
@@ -18,71 +19,57 @@ from cognite.neat._utils.rdf_ import (
     remove_namespace_from_uri,
 )
-from ._base import BaseTransformer
+from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
-# TODO: standardise
-class AddAssetDepth(BaseTransformer):
-    description: str = "Adds depth of asset in the asset hierarchy to the graph"
+class AddAssetDepth(BaseTransformerStandardised):
+    description: str = "Adds depth of asset in the asset hierarchy and optionally types asset based on depth"
     _use_only_once: bool = True
     _need_changes = frozenset({str(extractors.AssetsExtractor.__name__)})
-    _parent_template: str = """SELECT ?child ?parent WHERE {{
-                              <{asset_id}> <{parent_prop}> ?child .
-                              OPTIONAL{{?child <{parent_prop}>+ ?parent .}}}}"""
-    _root_template: str = """SELECT ?root WHERE {{
-                             <{asset_id}> <{root_prop}> ?root .}}"""
     def __init__(
         self,
         asset_type: URIRef | None = None,
-        root_prop: URIRef | None = None,
         parent_prop: URIRef | None = None,
         depth_typing: dict[int, str] | None = None,
     ):
         self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
-        self.root_prop = root_prop or DEFAULT_NAMESPACE.rootId
         self.parent_prop = parent_prop or DEFAULT_NAMESPACE.parentId
         self.depth_typing = depth_typing
-    def transform(self, graph: Graph) -> None:
-        """Adds depth of asset in the asset hierarchy to the graph."""
-        for result in graph.query(f"SELECT DISTINCT ?asset_id WHERE {{?asset_id a <{self.asset_type}>}}"):
-            asset_id = cast(tuple, result)[0]
-            if depth := self.get_depth(graph, asset_id, self.root_prop, self.parent_prop):
-                graph.add((asset_id, DEFAULT_NAMESPACE.depth, Literal(depth)))
-                if self.depth_typing and (type_ := self.depth_typing.get(depth, None)):
-                    # remove existing type
-                    graph.remove((asset_id, RDF.type, None))
-                    # add new type
-                    graph.add((asset_id, RDF.type, DEFAULT_NAMESPACE[type_]))
-    @classmethod
-    def get_depth(
-        cls,
-        graph: Graph,
-        asset_id: URIRef,
-        root_prop: URIRef,
-        parent_prop: URIRef,
-    ) -> int | None:
-        """Get asset depth in the asset hierarchy."""
-        # Handles non-root assets
-        if result := list(graph.query(cls._parent_template.format(asset_id=asset_id, parent_prop=parent_prop))):
-            return len(cast(list[tuple], result)) + 2 if cast(list[tuple], result)[0][1] else 2
-        # Handles root assets
-        elif (
-            (result := list(graph.query(cls._root_template.format(asset_id=asset_id, root_prop=root_prop))))
-            and len(cast(list[tuple], result)) == 1
-            and cast(list[tuple], result)[0][0] == asset_id
-        ):
-            return 1
-        else:
-            return None
+    def _iterate_query(self) -> str:
+        query = """SELECT ?asset (IF(?isRoot, 0, COUNT(?parent)) AS ?parentCount)
+                   WHERE {{
+                        ?asset a <{asset_type}> .
+                        OPTIONAL {{ ?asset <{parent_prop}>+ ?parent . }}
+                        BIND(IF(BOUND(?parent), false, true) AS ?isRoot)}}
+                   GROUP BY ?asset ?isRoot
+                   ORDER BY DESC(?parentCount)"""
+        return query.format(
+            asset_type=self.asset_type,
+            parent_prop=self.parent_prop,
+        )
+    def _count_query(self) -> str:
+        query = """SELECT (COUNT(?asset) as ?count)
+                   WHERE {{ ?asset a <{asset_type}> . }}"""
+        return query.format(asset_type=self.asset_type)
+    def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
+        row_output = RowTransformationOutput()
+        subject, object = query_result_row
+        row_output.add_triples.append(cast(Triple, (subject, DEFAULT_NAMESPACE.depth, object)))
+        if self.depth_typing and (type_ := self.depth_typing.get(int(object), None)):
+            row_output.remove_triples.append(cast(Triple, (subject, RDF.type, self.asset_type)))
+            row_output.add_triples.append(cast(Triple, (subject, RDF.type, DEFAULT_NAMESPACE[type_])))
+        row_output.instances_modified_count += 1
+        return row_output
 # TODO: standardise
@@ -375,7 +362,8 @@ WHERE {{
     ) -> list[Triple]:
         relationship_triples = cast(list[Triple], list(graph.query(f"DESCRIBE <{relationship_id}>")))
         object_by_predicates = cast(
-            dict[str, URIRef | Literal], {remove_namespace_from_uri(row[1]): row[2] for row in relationship_triples}
+            dict[str, URIRef | Literal],
+            {remove_namespace_from_uri(row[1]): row[2] for row in relationship_triples if row[1] != RDF.type},
         )
         source_external_id = cast(URIRef, object_by_predicates["sourceExternalId"])
         target_source_id = cast(URIRef, object_by_predicates["targetExternalId"])

cognite/neat/_graph/transformers/_rdfpath.py CHANGED Viewed

@@ -1,15 +1,14 @@
 from typing import cast
 from urllib.parse import quote
-from rdflib import Graph, URIRef
+from rdflib import Graph, Namespace, URIRef
 from rdflib.query import ResultRow
-from cognite.neat._constants import DEFAULT_NAMESPACE
 from cognite.neat._rules.analysis import InformationAnalysis
 from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
 from cognite.neat._rules.models.information import InformationRules
 from cognite.neat._shared import Triple
-from cognite.neat._utils.rdf_ import remove_namespace_from_uri
+from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
 from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
@@ -76,11 +75,11 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
         self.subject_predicate = subject_predicate
         self.object_type = object_type
         self.object_predicate = object_predicate
+        subject_namespace = Namespace(get_namespace(subject_type))
         self.connection = (
-            DEFAULT_NAMESPACE[quote(connection.strip())]
+            subject_namespace[quote(connection.strip())]
             if isinstance(connection, str)
-            else connection or DEFAULT_NAMESPACE[remove_namespace_from_uri(self.object_type).lower()]
+            else connection or subject_namespace[remove_namespace_from_uri(self.object_type).lower()]
         )
         self.limit = limit
@@ -88,10 +87,10 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
     def _iterate_query(self) -> str:
         query = """SELECT DISTINCT ?subject ?object
                             WHERE {{
-                                ?subject a <{subject_type}> .
-                                ?subject <{subject_predicate}> ?value .
-                                ?object <{object_predicate}> ?value .
-                                ?object a <{object_type}> .
+                                ?subject a <{subject_type}> ;
+                                        <{subject_predicate}> ?value .
+                                ?object a <{object_type}> ;
+                                        <{object_predicate}> ?value .
                             }}"""
         if self.limit and isinstance(self.limit, int) and self.limit > 0:
@@ -105,12 +104,12 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
         )
     def _count_query(self) -> str:
-        query = """SELECT (COUNT(DISTINCT (?subject ?object)) as ?count)
+        query = """SELECT (COUNT(DISTINCT ?subject) as ?count)
                     WHERE {{
-                        ?subject a <{subject_type}> .
-                        ?subject <{subject_predicate}> ?value .
-                        ?object <{object_predicate}> ?value .
-                        ?object a <{object_type}> .
+                        ?subject a <{subject_type}> ;
+                                <{subject_predicate}> ?value .
+                        ?object a <{object_type}> ;
+                                <{object_predicate}> ?value .
                     }}"""
         if self.limit and isinstance(self.limit, int) and self.limit > 0:

cognite/neat/_graph/transformers/_value_type.py CHANGED Viewed

@@ -223,3 +223,75 @@ class LiteralToEntity(BaseTransformerStandardised):
         row_output.instances_modified_count += 1  # we modify the old entity
         return row_output
+class ConnectionToLiteral(BaseTransformerStandardised):
+    description = "Converts an entity connection to a literal value"
+    def __init__(self, subject_type: URIRef | None, subject_predicate: URIRef) -> None:
+        self.subject_type = subject_type
+        self.subject_predicate = subject_predicate
+    def _iterate_query(self) -> str:
+        if self.subject_type is None:
+            query = """SELECT ?instance ?object
+            WHERE {{
+              ?instance <{subject_predicate}> ?object
+              FILTER(isIRI(?object))
+            }}"""
+            return query.format(subject_predicate=self.subject_predicate)
+        else:
+            query = """SELECT ?instance ?object
+                WHERE {{
+                  ?instance a <{subject_type}> .
+                  ?instance <{subject_predicate}> ?object
+                  FILTER(isIRI(?object))
+                }}"""
+            return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
+    def _skip_count_query(self) -> str:
+        if self.subject_type is None:
+            query = """SELECT (COUNT(?object) AS ?objectCount)
+                        WHERE {{
+                          ?instance <{subject_predicate}> ?object
+                          FILTER(isLiteral(?object))
+                        }}"""
+            return query.format(subject_predicate=self.subject_predicate)
+        else:
+            query = """SELECT (COUNT(?object) AS ?objectCount)
+                        WHERE {{
+                          ?instance a <{subject_type}> .
+                          ?instance <{subject_predicate}> ?object
+                          FILTER(isLiteral(?object))
+                        }}"""
+            return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
+    def _count_query(self) -> str:
+        if self.subject_type is None:
+            query = """SELECT (COUNT(?object) AS ?objectCount)
+                WHERE {{
+                  ?instance <{subject_predicate}> ?object
+                  FILTER(isIRI(?object))
+                }}"""
+            return query.format(subject_predicate=self.subject_predicate)
+        else:
+            query = """SELECT (COUNT(?object) AS ?objectCount)
+                        WHERE {{
+                          ?instance a <{subject_type}> .
+                          ?instance <{subject_predicate}> ?object
+                          FILTER(isIRI(?object))
+                        }}"""
+            return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
+    def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
+        row_output = RowTransformationOutput()
+        instance, object_entity = cast(tuple[URIRef, URIRef], query_result_row)
+        value = remove_namespace_from_uri(object_entity)
+        row_output.add_triples.append((instance, self.subject_predicate, rdflib.Literal(value)))
+        row_output.remove_triples.append((instance, self.subject_predicate, object_entity))
+        row_output.instances_modified_count += 1
+        return row_output

cognite/neat/_issues/__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@ as some helper classes to handle them like NeatIssueList"""
 from ._base import (
     DefaultWarning,
-    FutureResult,
     IssueList,
     MultiValueError,
     NeatError,
@@ -16,7 +15,6 @@ from ._base import (
 __all__ = [
     "DefaultWarning",
-    "FutureResult",
     "IssueList",
     "MultiValueError",
     "NeatError",

cognite-neat 0.104.0__py3-none-any.whl → 0.105.1__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.104.0py3-none-any.whl → 0.105.1py3-none-any.whl