PyPI - cognite-neat - Versions diffs - 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl - Mend

cognite-neat 0.87.6py3-none-any.whl → 0.88.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (171) hide show

cognite/neat/_version.py +1 -1
cognite/neat/app/api/data_classes/rest.py +0 -19
cognite/neat/app/api/explorer.py +6 -4
cognite/neat/app/api/routers/configuration.py +1 -1
cognite/neat/app/api/routers/crud.py +11 -21
cognite/neat/app/api/routers/workflows.py +24 -94
cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
cognite/neat/app/ui/neat-app/build/index.html +1 -1
cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
cognite/neat/config.py +44 -27
cognite/neat/exceptions.py +6 -0
cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
cognite/neat/graph/queries/_base.py +22 -29
cognite/neat/graph/queries/_shared.py +1 -1
cognite/neat/graph/stores/_base.py +24 -11
cognite/neat/graph/transformers/_rdfpath.py +3 -2
cognite/neat/issues.py +8 -0
cognite/neat/rules/exporters/_rules2ontology.py +28 -20
cognite/neat/rules/exporters/_validation.py +15 -21
cognite/neat/rules/importers/_inference2rules.py +31 -35
cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +3 -7
cognite/neat/rules/importers/_spreadsheet2rules.py +30 -27
cognite/neat/rules/issues/dms.py +20 -0
cognite/neat/rules/issues/importing.py +15 -0
cognite/neat/rules/issues/ontology.py +298 -0
cognite/neat/rules/issues/spreadsheet.py +48 -0
cognite/neat/rules/issues/tables.py +72 -0
cognite/neat/rules/models/_rdfpath.py +4 -4
cognite/neat/rules/models/_types/_field.py +9 -19
cognite/neat/rules/models/information/_rules.py +5 -4
cognite/neat/utils/rdf_.py +17 -9
cognite/neat/utils/regex_patterns.py +52 -0
cognite/neat/workflows/steps/data_contracts.py +17 -43
cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
cognite/neat/workflows/steps_registry.py +5 -7
{cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/METADATA +2 -6
cognite_neat-0.88.1.dist-info/RECORD +209 -0
cognite/neat/app/api/routers/core.py +0 -91
cognite/neat/app/api/routers/data_exploration.py +0 -336
cognite/neat/app/api/routers/rules.py +0 -203
cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
cognite/neat/graph/stores/_oxrdflib.py +0 -247
cognite/neat/legacy/__init__.py +0 -0
cognite/neat/legacy/graph/__init__.py +0 -3
cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
cognite/neat/legacy/graph/examples/__init__.py +0 -10
cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
cognite/neat/legacy/graph/exceptions.py +0 -90
cognite/neat/legacy/graph/extractors/__init__.py +0 -6
cognite/neat/legacy/graph/extractors/_base.py +0 -14
cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
cognite/neat/legacy/graph/loaders/__init__.py +0 -23
cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
cognite/neat/legacy/graph/loaders/_base.py +0 -67
cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
cognite/neat/legacy/graph/loaders/core/models.py +0 -136
cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
cognite/neat/legacy/graph/loaders/validator.py +0 -87
cognite/neat/legacy/graph/models.py +0 -6
cognite/neat/legacy/graph/stores/__init__.py +0 -13
cognite/neat/legacy/graph/stores/_base.py +0 -400
cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
cognite/neat/legacy/graph/transformations/__init__.py +0 -0
cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
cognite/neat/legacy/graph/transformations/transformer.py +0 -322
cognite/neat/legacy/rules/__init__.py +0 -0
cognite/neat/legacy/rules/analysis.py +0 -231
cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
cognite/neat/legacy/rules/examples/__init__.py +0 -18
cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
cognite/neat/legacy/rules/exceptions.py +0 -2972
cognite/neat/legacy/rules/exporters/__init__.py +0 -20
cognite/neat/legacy/rules/exporters/_base.py +0 -45
cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
cognite/neat/legacy/rules/exporters/_validation.py +0 -146
cognite/neat/legacy/rules/importers/__init__.py +0 -22
cognite/neat/legacy/rules/importers/_base.py +0 -66
cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
cognite/neat/legacy/rules/models/__init__.py +0 -5
cognite/neat/legacy/rules/models/_base.py +0 -151
cognite/neat/legacy/rules/models/raw_rules.py +0 -316
cognite/neat/legacy/rules/models/rdfpath.py +0 -237
cognite/neat/legacy/rules/models/rules.py +0 -1289
cognite/neat/legacy/rules/models/tables.py +0 -9
cognite/neat/legacy/rules/models/value_types.py +0 -118
cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
cognite/neat/rules/exceptions.py +0 -2972
cognite/neat/rules/models/_types/_base.py +0 -16
cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
cognite/neat/workflows/migration/__init__.py +0 -0
cognite/neat/workflows/migration/steps.py +0 -91
cognite/neat/workflows/migration/wf_manifests.py +0 -33
cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
cognite_neat-0.87.6.dist-info/RECORD +0 -319
{cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/LICENSE +0 -0
{cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/WHEEL +0 -0
{cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/entry_points.txt +0 -0

cognite/neat/graph/extractors/_classic_cdf/_labels.py CHANGED Viewed

@@ -1,33 +1,37 @@
-from collections.abc import Iterable
+from collections.abc import Callable, Set
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast
 from cognite.client import CogniteClient
 from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
 from rdflib import RDF, Literal, Namespace
-from cognite.neat.constants import DEFAULT_NAMESPACE
-from cognite.neat.graph.extractors._base import BaseExtractor
 from cognite.neat.graph.models import Triple
 from cognite.neat.utils.auxiliary import create_sha256_hash
+from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
-class LabelsExtractor(BaseExtractor):
+class LabelsExtractor(ClassicCDFExtractor[LabelDefinition]):
     """Extract data from Cognite Data Fusions Labels into Neat.
     Args:
-        labels (Iterable[LabelDefinition]): An iterable of labels.
+        items (Iterable[LabelDefinition]): An iterable of items.
         namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
+        to_type (Callable[[LabelDefinition], str | None], optional): A function to convert an item to a type.
+            Defaults to None. If None or if the function returns None, the asset will be set to the default type.
+        total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
+            is installed. Defaults to None.
+        limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
+            testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
+            limit the extraction to 1000 assets to test the setup.
+        unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
+            a JSON string.
+        skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
+           values in this set will be skipped.
     """
-    def __init__(
-        self,
-        labels: Iterable[LabelDefinition],
-        namespace: Namespace | None = None,
-    ):
-        self.namespace = namespace or DEFAULT_NAMESPACE
-        self.labels = labels
+    _default_rdf_type = "Label"
     @classmethod
     def from_dataset(
@@ -35,57 +39,76 @@ class LabelsExtractor(BaseExtractor):
         client: CogniteClient,
         data_set_external_id: str,
         namespace: Namespace | None = None,
+        to_type: Callable[[LabelDefinition], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
     ):
         return cls(
-            cast(
-                Iterable[LabelDefinition],
-                client.labels(data_set_external_ids=data_set_external_id),
-            ),
-            namespace,
+            client.labels(data_set_external_ids=data_set_external_id),
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
         )
     @classmethod
-    def from_file(cls, file_path: str, namespace: Namespace | None = None):
-        return cls(LabelDefinitionList.load(Path(file_path).read_text()), namespace)
+    def from_file(
+        cls,
+        file_path: str,
+        namespace: Namespace | None = None,
+        to_type: Callable[[LabelDefinition], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
+    ):
+        labels = LabelDefinitionList.load(Path(file_path).read_text())
+        return cls(
+            labels,
+            total=len(labels),
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
+        )
-    def extract(self) -> Iterable[Triple]:
-        """Extract labels as triples."""
-        for label in self.labels:
-            yield from self._labels2triples(label)
+    def _item2triples(self, label: LabelDefinition) -> list[Triple]:
+        if not label.external_id:
+            return []
-    def _labels2triples(self, label: LabelDefinition) -> list[Triple]:
-        if label.external_id:
-            id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
+        id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
-            # Set rdf type
-            triples: list[Triple] = [(id_, RDF.type, self.namespace.Label)]
+        type_ = self._get_rdf_type(label)
+        # Set rdf type
+        triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
-            # Create attributes
-            triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
+        # Create attributes
+        triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
-            if label.name:
-                triples.append((id_, self.namespace.name, Literal(label.name)))
+        if label.name:
+            triples.append((id_, self.namespace.name, Literal(label.name)))
-            if label.description:
-                triples.append((id_, self.namespace.description, Literal(label.description)))
+        if label.description:
+            triples.append((id_, self.namespace.description, Literal(label.description)))
-            if label.created_time:
-                triples.append(
-                    (
-                        id_,
-                        self.namespace.created_time,
-                        Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
-                    )
+        if label.created_time:
+            triples.append(
+                (
+                    id_,
+                    self.namespace.created_time,
+                    Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
                 )
-            if label.data_set_id:
-                triples.append(
-                    (
-                        id_,
-                        self.namespace.data_set_id,
-                        self.namespace[f"Dataset_{label.data_set_id}"],
-                    )
+            )
+        if label.data_set_id:
+            triples.append(
+                (
+                    id_,
+                    self.namespace.data_set_id,
+                    self.namespace[f"Dataset_{label.data_set_id}"],
                 )
+            )
-            return triples
-        return []
+        return triples

cognite/neat/graph/extractors/_classic_cdf/_relationships.py CHANGED Viewed

@@ -1,34 +1,38 @@
-from collections.abc import Iterable
+from collections.abc import Callable, Set
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast
 from urllib.parse import quote
 from cognite.client import CogniteClient
 from cognite.client.data_classes import Relationship, RelationshipList
 from rdflib import RDF, Literal, Namespace
-from cognite.neat.constants import DEFAULT_NAMESPACE
-from cognite.neat.graph.extractors._base import BaseExtractor
 from cognite.neat.graph.models import Triple
 from cognite.neat.utils.auxiliary import create_sha256_hash
+from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
-class RelationshipsExtractor(BaseExtractor):
+class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
     """Extract data from Cognite Data Fusions Relationships into Neat.
     Args:
-        relationships (Iterable[Asset]): An iterable of relationships.
+        items (Iterable[Relationship]): An iterable of items.
         namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
+        to_type (Callable[[Relationship], str | None], optional): A function to convert an item to a type.
+            Defaults to None. If None or if the function returns None, the asset will be set to the default type.
+        total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
+            is installed. Defaults to None.
+        limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
+            testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
+            limit the extraction to 1000 assets to test the setup.
+        unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
+            a JSON string.
+        skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
+           values in this set will be skipped.
     """
-    def __init__(
-        self,
-        relationships: Iterable[Relationship],
-        namespace: Namespace | None = None,
-    ):
-        self.namespace = namespace or DEFAULT_NAMESPACE
-        self.relationships = relationships
+    _default_rdf_type = "Relationship"
     @classmethod
     def from_dataset(
@@ -36,33 +40,51 @@ class RelationshipsExtractor(BaseExtractor):
         client: CogniteClient,
         data_set_external_id: str,
         namespace: Namespace | None = None,
+        to_type: Callable[[Relationship], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
     ):
         return cls(
-            cast(
-                Iterable[Relationship],
-                client.relationships(data_set_external_ids=data_set_external_id),
-            ),
-            namespace,
+            client.relationships(data_set_external_ids=data_set_external_id),
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
         )
     @classmethod
-    def from_file(cls, file_path: str, namespace: Namespace | None = None):
-        return cls(RelationshipList.load(Path(file_path).read_text()), namespace)
-    def extract(self) -> Iterable[Triple]:
-        """Extracts an asset with the given asset_id."""
-        for relationship in self.relationships:
-            yield from self._relationship2triples(relationship)
+    def from_file(
+        cls,
+        file_path: str,
+        namespace: Namespace | None = None,
+        to_type: Callable[[Relationship], str | None] | None = None,
+        limit: int | None = None,
+        unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
+    ):
+        relationships = RelationshipList.load(Path(file_path).read_text())
+        return cls(
+            relationships,
+            namespace=namespace,
+            total=len(relationships),
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
+        )
-    def _relationship2triples(self, relationship: Relationship) -> list[Triple]:
+    def _item2triples(self, relationship: Relationship) -> list[Triple]:
         """Converts an asset to triples."""
         if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
             # relationships do not have an internal id, so we generate one
             id_ = self.namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
+            type_ = self._get_rdf_type(relationship)
             # Set rdf type
-            triples: list[Triple] = [(id_, RDF.type, self.namespace["Relationship"])]
+            triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
             # Set source and target types
             if source_type := relationship.source_type:

cognite/neat/graph/extractors/_classic_cdf/_sequences.py CHANGED Viewed

@@ -1,39 +1,36 @@
-import json
-from collections.abc import Iterable
+from collections.abc import Callable, Set
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast
 from cognite.client import CogniteClient
-from cognite.client.data_classes import Sequence, SequenceList
-from pydantic import AnyHttpUrl, ValidationError
-from rdflib import RDF, Literal, Namespace, URIRef
+from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
+from rdflib import RDF, Literal, Namespace
-from cognite.neat.constants import DEFAULT_NAMESPACE
-from cognite.neat.graph.extractors._base import BaseExtractor
 from cognite.neat.graph.models import Triple
-from cognite.neat.utils.auxiliary import string_to_ideal_type
+from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
-class SequencesExtractor(BaseExtractor):
+class SequencesExtractor(ClassicCDFExtractor[Sequence]):
     """Extract data from Cognite Data Fusions Sequences into Neat.
     Args:
-        sequence (Iterable[Sequence]): An iterable of sequences.
+        items (Iterable[Sequence]): An iterable of items.
         namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
+        to_type (Callable[[Sequence], str | None], optional): A function to convert an item to a type.
+            Defaults to None. If None or if the function returns None, the asset will be set to the default type.
+        total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
+            is installed. Defaults to None.
+        limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
+            testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
+            limit the extraction to 1000 assets to test the setup.
         unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
             a JSON string.
+        skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
+           values in this set will be skipped.
     """
-    def __init__(
-        self,
-        sequence: Iterable[Sequence],
-        namespace: Namespace | None = None,
-        unpack_metadata: bool = True,
-    ):
-        self.namespace = namespace or DEFAULT_NAMESPACE
-        self.sequence = sequence
-        self.unpack_metadata = unpack_metadata
+    _default_rdf_type = "Sequence"
     @classmethod
     def from_dataset(
@@ -41,15 +38,22 @@ class SequencesExtractor(BaseExtractor):
         client: CogniteClient,
         data_set_external_id: str,
         namespace: Namespace | None = None,
+        to_type: Callable[[Sequence], str | None] | None = None,
+        limit: int | None = None,
         unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
     ):
+        total = client.sequences.aggregate_count(
+            filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
+        )
         return cls(
-            cast(
-                Iterable[Sequence],
-                client.sequences(data_set_external_ids=data_set_external_id),
-            ),
-            namespace,
-            unpack_metadata,
+            client.sequences(data_set_external_ids=data_set_external_id),
+            total=total,
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
         )
     @classmethod
@@ -57,20 +61,28 @@ class SequencesExtractor(BaseExtractor):
         cls,
         file_path: str,
         namespace: Namespace | None = None,
+        to_type: Callable[[Sequence], str | None] | None = None,
+        limit: int | None = None,
         unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
     ):
-        return cls(SequenceList.load(Path(file_path).read_text()), namespace, unpack_metadata)
-    def extract(self) -> Iterable[Triple]:
-        """Extract sequences as triples."""
-        for sequence in self.sequence:
-            yield from self._sequence2triples(sequence)
+        sequences = SequenceList.load(Path(file_path).read_text())
+        return cls(
+            sequences,
+            total=len(sequences),
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
+        )
-    def _sequence2triples(self, sequence: Sequence) -> list[Triple]:
+    def _item2triples(self, sequence: Sequence) -> list[Triple]:
         id_ = self.namespace[f"Sequence_{sequence.id}"]
+        type_ = self._get_rdf_type(sequence)
         # Set rdf type
-        triples: list[Triple] = [(id_, RDF.type, self.namespace.Sequence)]
+        triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
         # Create attributes
@@ -81,22 +93,7 @@ class SequencesExtractor(BaseExtractor):
             triples.append((id_, self.namespace.name, Literal(sequence.name)))
         if sequence.metadata:
-            if self.unpack_metadata:
-                for key, value in sequence.metadata.items():
-                    if value:
-                        type_aware_value = string_to_ideal_type(value)
-                        try:
-                            triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value)))))  # type: ignore
-                        except ValidationError:
-                            triples.append((id_, self.namespace[key], Literal(type_aware_value)))
-            else:
-                triples.append(
-                    (
-                        id_,
-                        self.namespace.metadata,
-                        Literal(json.dumps(sequence.metadata)),
-                    )
-                )
+            triples.extend(self._metadata_to_triples(id_, sequence.metadata))
         if sequence.description:
             triples.append((id_, self.namespace.description, Literal(sequence.description)))

cognite/neat/graph/extractors/_classic_cdf/_timeseries.py CHANGED Viewed

@@ -1,39 +1,37 @@
-import json
-from collections.abc import Iterable
+from collections.abc import Callable, Set
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast
 from cognite.client import CogniteClient
-from cognite.client.data_classes import TimeSeries, TimeSeriesList
+from cognite.client.data_classes import TimeSeries, TimeSeriesFilter, TimeSeriesList
 from pydantic import AnyHttpUrl, ValidationError
 from rdflib import RDF, Literal, Namespace, URIRef
-from cognite.neat.constants import DEFAULT_NAMESPACE
-from cognite.neat.graph.extractors._base import BaseExtractor
 from cognite.neat.graph.models import Triple
-from cognite.neat.utils.auxiliary import string_to_ideal_type
+from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
-class TimeSeriesExtractor(BaseExtractor):
+class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
     """Extract data from Cognite Data Fusions TimeSeries into Neat.
     Args:
-        timeseries (Iterable[TimeSeries]): An iterable of timeseries.
+        items (Iterable[TimeSeries]): An iterable of items.
         namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
+        to_type (Callable[[TimeSeries], str | None], optional): A function to convert an item to a type.
+            Defaults to None. If None or if the function returns None, the asset will be set to the default type.
+        total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
+            is installed. Defaults to None.
+        limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
+            testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
+            limit the extraction to 1000 assets to test the setup.
         unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
             a JSON string.
+        skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
+           values in this set will be skipped.
     """
-    def __init__(
-        self,
-        timeseries: Iterable[TimeSeries],
-        namespace: Namespace | None = None,
-        unpack_metadata: bool = True,
-    ):
-        self.namespace = namespace or DEFAULT_NAMESPACE
-        self.timeseries = timeseries
-        self.unpack_metadata = unpack_metadata
+    _default_rdf_type = "TimeSeries"
     @classmethod
     def from_dataset(
@@ -41,15 +39,23 @@ class TimeSeriesExtractor(BaseExtractor):
         client: CogniteClient,
         data_set_external_id: str,
         namespace: Namespace | None = None,
+        to_type: Callable[[TimeSeries], str | None] | None = None,
+        limit: int | None = None,
         unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
     ):
+        total = client.time_series.aggregate_count(
+            filter=TimeSeriesFilter(data_set_ids=[{"externalId": data_set_external_id}])
+        )
         return cls(
-            cast(
-                Iterable[TimeSeries],
-                client.time_series(data_set_external_ids=data_set_external_id),
-            ),
-            namespace,
-            unpack_metadata,
+            client.time_series(data_set_external_ids=data_set_external_id),
+            total=total,
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
         )
     @classmethod
@@ -57,23 +63,30 @@ class TimeSeriesExtractor(BaseExtractor):
         cls,
         file_path: str,
         namespace: Namespace | None = None,
+        to_type: Callable[[TimeSeries], str | None] | None = None,
+        limit: int | None = None,
         unpack_metadata: bool = True,
+        skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
     ):
-        return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace, unpack_metadata)
-    def extract(self) -> Iterable[Triple]:
-        """Extract timeseries as triples."""
-        for timeseries in self.timeseries:
-            yield from self._timeseries2triples(timeseries)
+        timeseries = TimeSeriesList.load(Path(file_path).read_text())
+        return cls(
+            timeseries,
+            total=len(timeseries),
+            namespace=namespace,
+            to_type=to_type,
+            limit=limit,
+            unpack_metadata=unpack_metadata,
+            skip_metadata_values=skip_metadata_values,
+        )
-    def _timeseries2triples(self, timeseries: TimeSeries) -> list[Triple]:
+    def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
         id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
         # Set rdf type
-        triples: list[Triple] = [(id_, RDF.type, self.namespace.TimeSeries)]
+        type_ = self._get_rdf_type(timeseries)
+        triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
         # Create attributes
         if timeseries.external_id:
             triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
@@ -84,22 +97,7 @@ class TimeSeriesExtractor(BaseExtractor):
             triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
         if timeseries.metadata:
-            if self.unpack_metadata:
-                for key, value in timeseries.metadata.items():
-                    if value:
-                        type_aware_value = string_to_ideal_type(value)
-                        try:
-                            triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value)))))  # type: ignore
-                        except ValidationError:
-                            triples.append((id_, self.namespace[key], Literal(type_aware_value)))
-            else:
-                triples.append(
-                    (
-                        id_,
-                        self.namespace.metadata,
-                        Literal(json.dumps(timeseries.metadata)),
-                    )
-                )
+            triples.extend(self._metadata_to_triples(id_, timeseries.metadata))
         if timeseries.unit:
             triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))

cognite/neat/graph/queries/_base.py CHANGED Viewed

@@ -98,47 +98,40 @@ class Queries:
         self,
         instance_id: URIRef,
         property_renaming_config: dict | None = None,
-    ) -> tuple[str, dict[str, list[str]]]:
+    ) -> tuple[str, dict[str, list[str]]] | None:
         """DESCRIBE instance for a given class from the graph store
         Args:
             instance_id: Instance id for which we want to generate query
-            property_rename_config: Dictionary to rename properties, default None
+            property_renaming_config: Dictionary to rename properties, default None
         Returns:
             Dictionary of instance properties
         """
         property_values: dict[str, list[str]] = defaultdict(list)
-        for subject, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
-            if object_.lower() not in [
+        identifier = remove_namespace_from_uri(instance_id, validation="prefix")
+        for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
+            if object_.lower() in [
                 "",
                 "none",
                 "nan",
                 "null",
             ]:
-                # we are skipping deep validation with Pydantic to remove namespace here
-                # as it reduces time to process triples by 10-15x
-                identifier, value = cast(  # type: ignore[misc]
-                    (str, str),
-                    remove_namespace_from_uri(*(subject, object_), validation="prefix"),
-                )  # type: ignore[misc, index]
-                # use-case: calling describe without renaming properties
-                # losing the namespace from the predicate!
-                if not property_renaming_config and predicate != RDF.type:
-                    property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
-                # use-case: calling describe with renaming properties
-                # renaming the property to the new name, if the property is defined
-                # in the RULES sheet
-                elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
-                    property_values[property_].append(value)
-                # use-case: skip the property if it is not defined in property_renaming_config
-                else:
-                    continue
+                continue
+            # we are skipping deep validation with Pydantic to remove namespace here
+            # as it reduces time to process triples by 10-15x
+            value = remove_namespace_from_uri(object_, validation="prefix")
+            # use-case: calling describe without renaming properties
+            # losing the namespace from the predicate!
+            if not property_renaming_config and predicate != RDF.type:
+                property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
+            # use-case: calling describe with renaming properties
+            # renaming the property to the new name, if the property is defined
+            # in the RULES sheet
+            elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
+                property_values[property_].append(value)
         if property_values:
             return (
@@ -146,7 +139,7 @@ class Queries:
                 property_values,
             )
         else:
-            return ()  # type: ignore [return-value]
+            return None
     def construct_instances_of_class(
         self,
@@ -177,7 +170,7 @@ class Queries:
             result = self.graph.query(query)
             # We cannot include the RDF.type in case there is a neat:type property
-            return [remove_namespace_from_uri(*triple) for triple in result if triple[1] != RDF.type]  # type: ignore[misc, index]
+            return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type]  # type: ignore[misc, index]
         else:
             warnings.warn(
                 "No rules found for the graph store, returning empty list.",

cognite-neat 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.87.6py3-none-any.whl → 0.88.1py3-none-any.whl