PyPI - cognite-neat - Versions diffs - 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl - Mend

cognite-neat 0.107.0py3-none-any.whl → 0.109.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (69) hide show

cognite/neat/_constants.py +35 -1
cognite/neat/_graph/_shared.py +4 -0
cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
cognite/neat/_graph/extractors/_dms.py +162 -47
cognite/neat/_graph/extractors/_dms_graph.py +54 -4
cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
cognite/neat/_graph/extractors/_rdf_file.py +3 -2
cognite/neat/_graph/loaders/__init__.py +1 -3
cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
cognite/neat/_graph/queries/_base.py +144 -84
cognite/neat/_graph/queries/_construct.py +1 -1
cognite/neat/_graph/transformers/__init__.py +3 -1
cognite/neat/_graph/transformers/_base.py +4 -4
cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
cognite/neat/_graph/transformers/_prune_graph.py +3 -3
cognite/neat/_graph/transformers/_rdfpath.py +3 -4
cognite/neat/_graph/transformers/_value_type.py +71 -13
cognite/neat/_issues/errors/__init__.py +2 -0
cognite/neat/_issues/errors/_external.py +8 -0
cognite/neat/_issues/errors/_resources.py +1 -1
cognite/neat/_issues/warnings/__init__.py +0 -2
cognite/neat/_issues/warnings/_models.py +1 -1
cognite/neat/_issues/warnings/_properties.py +0 -8
cognite/neat/_issues/warnings/_resources.py +1 -1
cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
cognite/neat/_rules/importers/__init__.py +3 -1
cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
cognite/neat/_rules/importers/_rdf/_base.py +2 -2
cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
cognite/neat/_rules/models/_base_rules.py +22 -11
cognite/neat/_rules/models/dms/_exporter.py +5 -4
cognite/neat/_rules/models/dms/_rules.py +1 -8
cognite/neat/_rules/models/dms/_rules_input.py +4 -0
cognite/neat/_rules/models/information/_rules_input.py +5 -0
cognite/neat/_rules/transformers/__init__.py +10 -3
cognite/neat/_rules/transformers/_base.py +6 -1
cognite/neat/_rules/transformers/_converters.py +530 -364
cognite/neat/_rules/transformers/_mapping.py +4 -4
cognite/neat/_session/_base.py +100 -47
cognite/neat/_session/_create.py +133 -0
cognite/neat/_session/_drop.py +60 -2
cognite/neat/_session/_fix.py +28 -0
cognite/neat/_session/_inspect.py +22 -7
cognite/neat/_session/_mapping.py +8 -8
cognite/neat/_session/_prepare.py +3 -247
cognite/neat/_session/_read.py +138 -17
cognite/neat/_session/_set.py +50 -1
cognite/neat/_session/_show.py +16 -43
cognite/neat/_session/_state.py +53 -52
cognite/neat/_session/_to.py +11 -4
cognite/neat/_session/_wizard.py +1 -1
cognite/neat/_session/exceptions.py +8 -1
cognite/neat/_store/_graph_store.py +301 -146
cognite/neat/_store/_provenance.py +36 -20
cognite/neat/_store/_rules_store.py +253 -267
cognite/neat/_store/exceptions.py +40 -4
cognite/neat/_utils/auth.py +5 -3
cognite/neat/_version.py +1 -1
{cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
{cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
{cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
{cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
{cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0

cognite/neat/_graph/extractors/_dms.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import urllib.parse
-from collections.abc import Iterable, Iterator
+from collections.abc import Iterable, Iterator, Set
+from functools import cached_property
 from typing import cast
 from cognite.client import CogniteClient
@@ -9,34 +10,48 @@ from cognite.client.data_classes.data_modeling.instances import Instance, Proper
 from cognite.client.utils.useful_types import SequenceNotStr
 from rdflib import RDF, XSD, Literal, Namespace, URIRef
-from cognite.neat._constants import DEFAULT_SPACE_URI
+from cognite.neat._config import GLOBAL_CONFIG
+from cognite.neat._constants import DEFAULT_SPACE_URI, is_readonly_property
 from cognite.neat._issues.errors import ResourceRetrievalError
 from cognite.neat._shared import Triple
+from cognite.neat._utils.auxiliary import string_to_ideal_type
+from cognite.neat._utils.collection_ import iterate_progress_bar
 from ._base import BaseExtractor
+DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
 class DMSExtractor(BaseExtractor):
     """Extract data from Cognite Data Fusion DMS instances into Neat.
     Args:
-        items: The items to extract.
-        total: The total number of items to extract. If provided, this will be used to estimate the progress.
+        total_instances_pair_by_view: A dictionary where the key is the view id and the value is a tuple with the total
+            number of instances and an iterable of instances.
         limit: The maximum number of items to extract.
         overwrite_namespace: If provided, this will overwrite the space of the extracted items.
+        unpack_json: If True, JSON objects will be unpacked into RDF literals.
+        empty_values: If unpack_json is True, when unpacking JSON objects, if a key has a value in this set, it will be
+            considered as an empty value and skipped.
+        str_to_ideal_type: If unpack_json is True, when unpacking JSON objects, if the value is a string, the extractor
+            will try to convert it to the ideal type.
     """
     def __init__(
         self,
-        items: Iterable[Instance],
-        total: int | None = None,
+        total_instances_pair_by_view: dict[dm.ViewId, tuple[int | None, Iterable[Instance]]],
         limit: int | None = None,
         overwrite_namespace: Namespace | None = None,
+        unpack_json: bool = False,
+        empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
+        str_to_ideal_type: bool = False,
     ) -> None:
-        self.items = items
-        self.total = total
+        self.total_instances_pair_by_view = total_instances_pair_by_view
         self.limit = limit
         self.overwrite_namespace = overwrite_namespace
+        self.unpack_json = unpack_json
+        self.empty_values = empty_values
+        self.str_to_ideal_type = str_to_ideal_type
     @classmethod
     def from_data_model(
@@ -46,6 +61,8 @@ class DMSExtractor(BaseExtractor):
         limit: int | None = None,
         overwrite_namespace: Namespace | None = None,
         instance_space: str | SequenceNotStr[str] | None = None,
+        unpack_json: bool = False,
+        str_to_ideal_type: bool = False,
     ) -> "DMSExtractor":
         """Create an extractor from a data model.
@@ -55,11 +72,20 @@ class DMSExtractor(BaseExtractor):
             limit: The maximum number of instances to extract.
             overwrite_namespace: If provided, this will overwrite the space of the extracted items.
             instance_space: The space to extract instances from.
+            unpack_json: If True, JSON objects will be unpacked into RDF literals.
         """
         retrieved = client.data_modeling.data_models.retrieve(data_model, inline_views=True)
         if not retrieved:
             raise ResourceRetrievalError(dm.DataModelId.load(data_model), "data model", "Data Model is missing in CDF")
-        return cls.from_views(client, retrieved.latest_version().views, limit, overwrite_namespace, instance_space)
+        return cls.from_views(
+            client,
+            retrieved.latest_version().views,
+            limit,
+            overwrite_namespace,
+            instance_space,
+            unpack_json,
+            str_to_ideal_type,
+        )
     @classmethod
     def from_views(
@@ -69,6 +95,8 @@ class DMSExtractor(BaseExtractor):
         limit: int | None = None,
         overwrite_namespace: Namespace | None = None,
         instance_space: str | SequenceNotStr[str] | None = None,
+        unpack_json: bool = False,
+        str_to_ideal_type: bool = False,
     ) -> "DMSExtractor":
         """Create an extractor from a set of views.
@@ -78,19 +106,43 @@ class DMSExtractor(BaseExtractor):
             limit: The maximum number of instances to extract.
             overwrite_namespace: If provided, this will overwrite the space of the extracted items.
             instance_space: The space to extract instances from.
+            unpack_json: If True, JSON objects will be unpacked into RDF literals.
+            str_to_ideal_type: If True, when unpacking JSON objects, if the value is a string, the extractor will try to
+                convert it to the ideal type.
         """
+        total_instances_pair_by_view: dict[dm.ViewId, tuple[int | None, Iterable[Instance]]] = {}
+        for view in views:
+            instance_iterator = _ViewInstanceIterator(client, view, instance_space)
+            total_instances_pair_by_view[view.as_id()] = (instance_iterator.count, instance_iterator)
         return cls(
-            _InstanceIterator(client, views, instance_space),
-            total=None,
+            total_instances_pair_by_view=total_instances_pair_by_view,
             limit=limit,
             overwrite_namespace=overwrite_namespace,
+            unpack_json=unpack_json,
+            str_to_ideal_type=str_to_ideal_type,
         )
     def extract(self) -> Iterable[Triple]:
-        for count, item in enumerate(self.items, 1):
-            if self.limit and count > self.limit:
-                break
-            yield from self._extract_instance(item)
+        total_instances = sum(total for total, _ in self.total_instances_pair_by_view.values() if total is not None)
+        use_progress_bar = (
+            GLOBAL_CONFIG.use_iterate_bar_threshold and total_instances > GLOBAL_CONFIG.use_iterate_bar_threshold
+        )
+        for view_id, (total, instances) in self.total_instances_pair_by_view.items():
+            if total == 0:
+                continue
+            if use_progress_bar and total is not None:
+                instances = iterate_progress_bar(
+                    instances,
+                    total,
+                    f"Extracting instances from {view_id.space}:{view_id.external_id}(version={view_id.version})",
+                )
+            for count, item in enumerate(instances, 1):
+                if self.limit and count > self.limit:
+                    break
+                yield from self._extract_instance(item)
     def _extract_instance(self, instance: Instance) -> Iterable[Triple]:
         if isinstance(instance, dm.Edge):
@@ -105,7 +157,6 @@ class DMSExtractor(BaseExtractor):
                 # If the edge has properties, we create a node for the edge and connect it to the start and end nodes.
                 id_ = self._as_uri_ref(instance)
                 yield id_, RDF.type, self._as_uri_ref(instance.type)
-                yield id_, RDF.type, self._get_namespace(instance.space).Edge
                 yield (
                     id_,
                     self._as_uri_ref(dm.DirectRelationReference(instance.space, "startNode")),
@@ -121,6 +172,9 @@ class DMSExtractor(BaseExtractor):
             id_ = self._as_uri_ref(instance)
             if instance.type:
                 type_ = self._as_uri_ref(cast(dm.DirectRelationReference, instance.type))
+            elif len(instance.properties) == 1:
+                view_id = next(iter(instance.properties.keys()))
+                type_ = self._get_namespace(view_id.space)[urllib.parse.quote(view_id.external_id)]
             else:
                 type_ = self._get_namespace(instance.space).Node
@@ -135,20 +189,38 @@ class DMSExtractor(BaseExtractor):
         for view_id, properties in instance.properties.items():
             namespace = self._get_namespace(view_id.space)
             for key, value in properties.items():
-                for object_ in self._get_objects(value):
-                    yield id_, namespace[key], object_
+                for predicate_str, object_ in self._get_predicate_objects_pair(key, value):
+                    yield id_, namespace[urllib.parse.quote(predicate_str)], object_
-    def _get_objects(self, value: PropertyValue) -> Iterable[Literal | URIRef]:
+    def _get_predicate_objects_pair(self, key: str, value: PropertyValue) -> Iterable[tuple[str, Literal | URIRef]]:
         if isinstance(value, str | float | bool | int):
-            yield Literal(value)
+            yield key, Literal(value)
         elif isinstance(value, dict) and "space" in value and "externalId" in value:
-            yield self._as_uri_ref(dm.DirectRelationReference.load(value))
+            yield key, self._as_uri_ref(dm.DirectRelationReference.load(value))
+        elif isinstance(value, dict) and self.unpack_json:
+            for sub_key, sub_value in value.items():
+                if isinstance(sub_value, str):
+                    if sub_value.casefold() in self.empty_values:
+                        continue
+                    if self.str_to_ideal_type:
+                        yield sub_key, Literal(string_to_ideal_type(sub_value))
+                    else:
+                        yield sub_key, Literal(sub_value)
+                elif isinstance(sub_value, int | float | bool):
+                    yield sub_key, Literal(sub_value)
+                elif isinstance(sub_value, dict):
+                    yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", sub_value)
+                elif isinstance(sub_value, list):
+                    for item in sub_value:
+                        yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", item)
+                else:
+                    yield sub_key, Literal(str(sub_value))
         elif isinstance(value, dict):
             # This object is a json object.
-            yield Literal(str(value), datatype=XSD._NS["json"])
+            yield key, Literal(str(value), datatype=XSD._NS["json"])
         elif isinstance(value, list):
             for item in value:
-                yield from self._get_objects(item)
+                yield from self._get_predicate_objects_pair(key, item)
     def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
         return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
@@ -159,34 +231,77 @@ class DMSExtractor(BaseExtractor):
         return Namespace(DEFAULT_SPACE_URI.format(space=urllib.parse.quote(space)))
-class _InstanceIterator(Iterable[Instance]):
-    def __init__(
-        self, client: CogniteClient, views: Iterable[dm.View], instance_space: str | SequenceNotStr[str] | None = None
-    ):
+class _ViewInstanceIterator(Iterable[Instance]):
+    def __init__(self, client: CogniteClient, view: dm.View, instance_space: str | SequenceNotStr[str] | None = None):
         self.client = client
-        self.views = views
+        self.view = view
         self.instance_space = instance_space
+    @cached_property
+    def count(self) -> int:
+        node_count = edge_count = 0
+        if self.view.used_for in ("node", "all"):
+            node_count = int(
+                self.client.data_modeling.instances.aggregate(
+                    view=self.view.as_id(),
+                    aggregates=dm.aggregations.Count("externalId"),
+                    instance_type="node",
+                    space=self.instance_space,
+                ).value
+            )
+        if self.view.used_for in ("edge", "all"):
+            edge_count = int(
+                self.client.data_modeling.instances.aggregate(
+                    view=self.view.as_id(),
+                    aggregates=dm.aggregations.Count("externalId"),
+                    instance_type="edge",
+                    space=self.instance_space,
+                ).value
+            )
+        return node_count + edge_count
     def __iter__(self) -> Iterator[Instance]:
-        for view in self.views:
-            view_id = view.as_id()
-            # All nodes and edges with properties
-            if view.used_for in ("node", "all"):
-                yield from self.client.data_modeling.instances(
-                    chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
-                )
-            if view.used_for in ("edge", "all"):
+        view_id = self.view.as_id()
+        read_only_properties = {
+            prop_id
+            for prop_id, prop in self.view.properties.items()
+            if isinstance(prop, dm.MappedProperty)
+            and is_readonly_property(prop.container, prop.container_property_identifier)
+        }
+        # All nodes and edges with properties
+        if self.view.used_for in ("node", "all"):
+            node_iterable: Iterable[Instance] = self.client.data_modeling.instances(
+                chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
+            )
+            if read_only_properties:
+                node_iterable = self._remove_read_only_properties(node_iterable, read_only_properties, view_id)
+            yield from node_iterable
+        if self.view.used_for in ("edge", "all"):
+            yield from self.client.data_modeling.instances(
+                chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
+            )
+        for prop in self.view.properties.values():
+            if isinstance(prop, dm.EdgeConnection):
+                if prop.edge_source:
+                    # All edges with properties are extracted from the edge source
+                    continue
                 yield from self.client.data_modeling.instances(
-                    chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
+                    chunk_size=None,
+                    instance_type="edge",
+                    filter=dm.filters.Equals(
+                        ["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
+                    ),
+                    space=self.instance_space,
                 )
-            for prop in view.properties.values():
-                if isinstance(prop, dm.EdgeConnection):
-                    yield from self.client.data_modeling.instances(
-                        chunk_size=None,
-                        instance_type="edge",
-                        filter=dm.filters.Equals(
-                            ["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
-                        ),
-                        space=self.instance_space,
-                    )
+    @staticmethod
+    def _remove_read_only_properties(
+        nodes: Iterable[Instance], read_only_properties: Set[str], view_id: dm.ViewId
+    ) -> Iterable[Instance]:
+        for node in nodes:
+            if properties := node.properties.get(view_id):
+                for read_only in read_only_properties:
+                    properties.pop(read_only, None)
+            yield node

cognite/neat/_graph/extractors/_dms_graph.py CHANGED Viewed

@@ -6,11 +6,12 @@ from cognite.client.utils.useful_types import SequenceNotStr
 from rdflib import Namespace, URIRef
 from cognite.neat._client import NeatClient
-from cognite.neat._constants import DEFAULT_NAMESPACE
+from cognite.neat._constants import COGNITE_SPACES, DEFAULT_NAMESPACE
 from cognite.neat._issues import IssueList, NeatIssue, catch_warnings
 from cognite.neat._issues.warnings import CDFAuthWarning, ResourceNotFoundWarning, ResourceRetrievalWarning
 from cognite.neat._rules.importers import DMSImporter
 from cognite.neat._rules.models import DMSRules, InformationRules
+from cognite.neat._rules.models.data_types import Json
 from cognite.neat._rules.transformers import DMSToInformation, VerifyDMSRules
 from cognite.neat._shared import Triple
@@ -26,12 +27,18 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
         namespace: Namespace = DEFAULT_NAMESPACE,
         issues: Sequence[NeatIssue] | None = None,
         instance_space: str | SequenceNotStr[str] | None = None,
+        skip_cognite_views: bool = True,
+        unpack_json: bool = False,
+        str_to_ideal_type: bool = False,
     ) -> None:
         self._client = client
         self._data_model = data_model
         self._namespace = namespace or DEFAULT_NAMESPACE
         self._issues = IssueList(issues)
         self._instance_space = instance_space
+        self._skip_cognite_views = skip_cognite_views
+        self._unpack_json = unpack_json
+        self._str_to_ideal_type = str_to_ideal_type
         self._views: list[dm.View] | None = None
         self._information_rules: InformationRules | None = None
@@ -44,6 +51,9 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
         client: NeatClient,
         namespace: Namespace = DEFAULT_NAMESPACE,
         instance_space: str | SequenceNotStr[str] | None = None,
+        skip_cognite_views: bool = True,
+        unpack_json: bool = False,
+        str_to_ideal_type: bool = False,
     ) -> "DMSGraphExtractor":
         issues: list[NeatIssue] = []
         try:
@@ -51,14 +61,37 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
         except CogniteAPIError as e:
             issues.append(CDFAuthWarning("retrieving data model", str(e)))
             return cls(
-                cls._create_empty_model(dm.DataModelId.load(data_model_id)), client, namespace, issues, instance_space
+                cls._create_empty_model(dm.DataModelId.load(data_model_id)),
+                client,
+                namespace,
+                issues,
+                instance_space,
+                skip_cognite_views,
+                unpack_json,
+                str_to_ideal_type,
             )
         if not data_model:
             issues.append(ResourceRetrievalWarning(frozenset({data_model_id}), "data model"))
             return cls(
-                cls._create_empty_model(dm.DataModelId.load(data_model_id)), client, namespace, issues, instance_space
+                cls._create_empty_model(dm.DataModelId.load(data_model_id)),
+                client,
+                namespace,
+                issues,
+                instance_space,
+                skip_cognite_views,
+                unpack_json,
+                str_to_ideal_type,
             )
-        return cls(data_model.latest_version(), client, namespace, issues, instance_space)
+        return cls(
+            data_model.latest_version(),
+            client,
+            namespace,
+            issues,
+            instance_space,
+            skip_cognite_views,
+            unpack_json,
+            str_to_ideal_type,
+        )
     @classmethod
     def _create_empty_model(cls, data_model_id: dm.DataModelId) -> dm.DataModel:
@@ -92,11 +125,16 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
     def extract(self) -> Iterable[Triple]:
         """Extracts the knowledge graph from the data model."""
         views = self._model_views
+        if self._skip_cognite_views:
+            views = [view for view in views if view.space not in COGNITE_SPACES]
         yield from DMSExtractor.from_views(
             self._client,
             views,
             overwrite_namespace=self._namespace,
             instance_space=self._instance_space,
+            unpack_json=self._unpack_json,
+            str_to_ideal_type=self._str_to_ideal_type,
         ).extract()
     def _get_views(self) -> list[dm.View]:
@@ -141,6 +179,18 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
         # The DMS and Information rules must be created together to link them property.
         importer = DMSImporter.from_data_model(self._client, self._data_model)
         unverified_dms = importer.to_rules()
+        if self._unpack_json and (dms_rules := unverified_dms.rules):
+            # Drop the JSON properties from the DMS rules as these are no longer valid.
+            json_name = Json().name  # To avoid instantiating Json multiple times.
+            dms_rules.properties = [
+                prop
+                for prop in dms_rules.properties
+                if not (
+                    isinstance(prop.value_type, Json)
+                    or (isinstance(prop.value_type, str) and prop.value_type == json_name)
+                )
+            ]
         with catch_warnings() as issues:
             # Any errors occur will be raised and caught outside the extractor.
             verified_dms = VerifyDMSRules(client=self._client).transform(unverified_dms)

cognite/neat/_graph/extractors/_mock_graph_generator.py CHANGED Viewed

@@ -141,7 +141,7 @@ def generate_triples(
     # pregenerate instance ids for each remaining class
     instance_ids = {
-        key: [URIRef(namespace[f"{key.suffix}-{i+1}"]) for i in range(value)] for key, value in class_count.items()
+        key: [URIRef(namespace[f"{key.suffix}-{i + 1}"]) for i in range(value)] for key, value in class_count.items()
     }
     # create triple for each class instance defining its type

cognite/neat/_graph/extractors/_rdf_file.py CHANGED Viewed

@@ -36,7 +36,6 @@ class RdfFileExtractor(BaseExtractor):
         self.format = guess_format(str(self.filepath) if isinstance(self.filepath, Path) else self.filepath.name)
-        print(self.format)
         if isinstance(self.filepath, Path) and not self.filepath.exists():
             self.issue_list.append(FileNotFoundNeatError(self.filepath))
@@ -55,7 +54,7 @@ class RdfFileExtractor(BaseExtractor):
     def from_zip(
         cls,
         filepath: Path,
-        filename: str = "neat-session/instances/instances.ttl",
+        filename: str = "neat-session/instances/instances.trig",
         base_uri: URIRef = DEFAULT_BASE_URI,
         issue_list: IssueList | None = None,
     ):
@@ -69,6 +68,8 @@ class RdfFileExtractor(BaseExtractor):
                 if file_info.filename == filename:
                     # We need to open the file in the zip file, and close it upon
                     # triple extraction ...
+                    print(file_info)
                     file = zip_ref.open(file_info)
                     return cls(cast(zipfile.ZipExtFile, file), base_uri, issue_list)

cognite/neat/_graph/loaders/__init__.py CHANGED Viewed

@@ -20,6 +20,4 @@ def _repr_html_() -> str:
         ]
     )._repr_html_()
-    return (
-        "<strong>Loader</strong> A loader writes data from Neat's triple storage into a target system" f"<br />{table}"
-    )
+    return f"<strong>Loader</strong> A loader writes data from Neat's triple storage into a target system<br />{table}"

cognite/neat/_graph/loaders/_rdf2dms.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import itertools
 import json
+import urllib.parse
 import warnings
 from collections import defaultdict
 from collections.abc import Iterable, Sequence
@@ -70,6 +71,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
         tracker: type[Tracker] | None = None,
         rules: DMSRules | None = None,
         client: NeatClient | None = None,
+        unquote_external_ids: bool = False,
     ):
         super().__init__(graph_store)
         self.data_model = data_model
@@ -79,6 +81,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
         self._tracker: type[Tracker] = tracker or LogTracker
         self.rules = rules
         self._client = client
+        self._unquote_external_ids = unquote_external_ids
     @classmethod
     def from_data_model_id(
@@ -99,7 +102,12 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
     @classmethod
     def from_rules(
-        cls, rules: DMSRules, graph_store: NeatGraphStore, instance_space: str, client: NeatClient | None = None
+        cls,
+        rules: DMSRules,
+        graph_store: NeatGraphStore,
+        instance_space: str,
+        client: NeatClient | None = None,
+        unquote_external_ids: bool = False,
     ) -> "DMSLoader":
         issues: list[NeatIssue] = []
         data_model: dm.DataModel[dm.View] | None = None
@@ -125,6 +133,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             issues,
             rules=rules,
             client=client,
+            unquote_external_ids=unquote_external_ids,
         )
     def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
@@ -142,7 +151,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             if self.rules and self.rules.metadata.logical
             else None
         )
         view_and_count_by_id = self._select_views_with_instances(self.data_model.views)
         if self._client:
             view_and_count_by_id, properties_point_to_self = self._sort_by_direct_relation_dependencies(
                 view_and_count_by_id
@@ -425,20 +436,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
             else:
                 raise ValueError(f"Expect valid JSON string or dict for {info.field_name}: {value}")
-        def parse_text(cls, value: Any, info: ValidationInfo) -> Any:
-            if isinstance(value, list):
-                return [remove_namespace_from_uri(v) for v in value]
-            else:
-                return remove_namespace_from_uri(value)
         if json_fields:
             validators["parse_json_string"] = field_validator(*json_fields, mode="before")(parse_json_string)  # type: ignore[assignment, arg-type]
         validators["parse_list"] = field_validator("*", mode="before")(parse_list)  # type: ignore[assignment, arg-type]
-        if text_fields:
-            validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text)  # type: ignore[assignment, arg-type]
         if direct_relation_by_property:
             def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
@@ -490,6 +492,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
     ) -> dm.InstanceApply:
         type_ = properties.pop(RDF.type, [None])[0]
         created = pydantic_cls.model_validate(properties)
+        if self._unquote_external_ids:
+            identifier = urllib.parse.unquote(identifier)
         return dm.NodeApply(
             space=self.instance_space,
@@ -514,6 +518,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
         if type_ is None:
             raise ValueError(f"Missing type for edge {identifier}")
+        if self._unquote_external_ids:
+            identifier = urllib.parse.unquote(identifier)
         return dm.EdgeApply(
             space=self.instance_space,
             external_id=identifier,
@@ -550,6 +557,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
                 yield error
             for target in values:
                 external_id = f"{identifier}.{prop_id}.{target}"
+                if self._unquote_external_ids:
+                    external_id = urllib.parse.unquote(external_id)
                 yield dm.EdgeApply(
                     space=self.instance_space,
                     external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),

cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.107.0py3-none-any.whl → 0.109.0py3-none-any.whl