PyPI - cognite-neat - Versions diffs - 0.80.3__py3-none-any.whl → 0.81.1__py3-none-any.whl - Mend

cognite-neat 0.80.3py3-none-any.whl → 0.81.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (17) hide show

cognite/neat/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.80.3"
1	+ __version__ = "0.81.1"

cognite/neat/graph/extractors/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ._classic_cdf._labels import LabelsExtractor
 from ._classic_cdf._relationships import RelationshipsExtractor
 from ._classic_cdf._sequences import SequencesExtractor
 from ._classic_cdf._timeseries import TimeSeriesExtractor
+from ._dexpi import DexpiExtractor
 from ._mock_graph_generator import MockGraphGenerator
 from ._rdf_file import RdfFileExtractor
@@ -18,6 +19,7 @@ __all__ = [
     "FilesExtractor",
     "LabelsExtractor",
     "RdfFileExtractor",
+    "DexpiExtractor",
 ]
@@ -31,4 +33,5 @@ TripleExtractors = (
     | FilesExtractor
     | LabelsExtractor
     | RdfFileExtractor
+    | DexpiExtractor
 )

cognite/neat/graph/extractors/_classic_cdf/_assets.py CHANGED Viewed

@@ -1,10 +1,9 @@
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
 from urllib.parse import quote
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import Asset, AssetList
 from rdflib import RDF, Literal, Namespace
@@ -49,7 +48,7 @@ class AssetsExtractor(BaseExtractor):
     @classmethod
     def _asset2triples(cls, asset: Asset, namespace: Namespace) -> list[Triple]:
         """Converts an asset to triples."""
-        id_ = namespace[str(asset.id)]
+        id_ = namespace[f"Asset_{asset.id}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace["Asset"])]
@@ -72,14 +71,14 @@ class AssetsExtractor(BaseExtractor):
             (
                 id_,
                 namespace.created_time,
-                Literal(datetime.fromtimestamp(asset.created_time / 1000, pytz.utc)),
+                Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
             )
         )
         triples.append(
             (
                 id_,
                 namespace.last_updated_time,
-                Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, pytz.utc)),
+                Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
             )
         )
@@ -87,7 +86,7 @@ class AssetsExtractor(BaseExtractor):
             for label in asset.labels:
                 # external_id can create ill-formed URIs, so we create websafe URIs
                 # since labels do not have internal ids, we use the external_id as the id
-                triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
+                triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
         if asset.metadata:
             for key, value in asset.metadata.items():
@@ -96,12 +95,12 @@ class AssetsExtractor(BaseExtractor):
         # Create connections:
         if asset.parent_id:
-            triples.append((id_, namespace.parent, namespace[str(asset.parent_id)]))
+            triples.append((id_, namespace.parent, namespace[f"Asset_{asset.parent_id}"]))
         if asset.root_id:
-            triples.append((id_, namespace.root, namespace[str(asset.root_id)]))
+            triples.append((id_, namespace.root, namespace[f"Asset_{asset.root_id}"]))
         if asset.data_set_id:
-            triples.append((id_, namespace.dataset, namespace[str(asset.data_set_id)]))
+            triples.append((id_, namespace.dataset, namespace[f"Dataset_{asset.data_set_id}"]))
         return triples

cognite/neat/graph/extractors/_classic_cdf/_events.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import Event, EventList
 from pydantic import AnyHttpUrl, ValidationError
@@ -44,7 +43,7 @@ class EventsExtractor(BaseExtractor):
     @classmethod
     def _event2triples(cls, event: Event, namespace: Namespace) -> list[Triple]:
-        id_ = namespace[str(event.id)]
+        id_ = namespace[f"Event_{event.id}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace.Event)]
@@ -77,7 +76,7 @@ class EventsExtractor(BaseExtractor):
         if event.created_time:
             triples.append(
-                (id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, pytz.utc)))
+                (id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)))
             )
         if event.last_updated_time:
@@ -85,7 +84,7 @@ class EventsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.last_updated_time,
-                    Literal(datetime.fromtimestamp(event.last_updated_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
                 )
             )
@@ -94,7 +93,7 @@ class EventsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.start_time,
-                    Literal(datetime.fromtimestamp(event.start_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
                 )
             )
@@ -103,15 +102,15 @@ class EventsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.end_time,
-                    Literal(datetime.fromtimestamp(event.end_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
                 )
             )
         if event.data_set_id:
-            triples.append((id_, namespace.data_set_id, namespace[str(event.data_set_id)]))
+            triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{event.data_set_id}"]))
         if event.asset_ids:
             for asset_id in event.asset_ids:
-                triples.append((id_, namespace.asset, namespace[str(asset_id)]))
+                triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
         return triples

cognite/neat/graph/extractors/_classic_cdf/_files.py CHANGED Viewed

@@ -1,10 +1,9 @@
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
 from urllib.parse import quote
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import FileMetadata, FileMetadataList
 from pydantic import AnyHttpUrl, ValidationError
@@ -45,7 +44,7 @@ class FilesExtractor(BaseExtractor):
     @classmethod
     def _file2triples(cls, file: FileMetadata, namespace: Namespace) -> list[Triple]:
-        id_ = namespace[str(file.id)]
+        id_ = namespace[f"File_{file.id}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace.File)]
@@ -81,7 +80,7 @@ class FilesExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.source_created_time,
-                    Literal(datetime.fromtimestamp(file.source_created_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
                 )
             )
         if file.source_modified_time:
@@ -89,17 +88,17 @@ class FilesExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.source_created_time,
-                    Literal(datetime.fromtimestamp(file.source_modified_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
                 )
             )
         if file.uploaded_time:
             triples.append(
-                (id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, pytz.utc)))
+                (id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)))
             )
         if file.created_time:
             triples.append(
-                (id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, pytz.utc)))
+                (id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)))
             )
         if file.last_updated_time:
@@ -107,7 +106,7 @@ class FilesExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.last_updated_time,
-                    Literal(datetime.fromtimestamp(file.last_updated_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
                 )
             )
@@ -115,17 +114,17 @@ class FilesExtractor(BaseExtractor):
             for label in file.labels:
                 # external_id can create ill-formed URIs, so we create websafe URIs
                 # since labels do not have internal ids, we use the external_id as the id
-                triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
+                triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
         if file.security_categories:
             for category in file.security_categories:
                 triples.append((id_, namespace.security_categories, Literal(category)))
         if file.data_set_id:
-            triples.append((id_, namespace.data_set_id, namespace[str(file.data_set_id)]))
+            triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{file.data_set_id}"]))
         if file.asset_ids:
             for asset_id in file.asset_ids:
-                triples.append((id_, namespace.asset, namespace[str(asset_id)]))
+                triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
         return triples

cognite/neat/graph/extractors/_classic_cdf/_labels.py CHANGED Viewed

@@ -1,10 +1,9 @@
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
 from urllib.parse import quote
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
 from rdflib import RDF, Literal, Namespace
@@ -45,7 +44,7 @@ class LabelsExtractor(BaseExtractor):
     @classmethod
     def _labels2triples(cls, label: LabelDefinition, namespace: Namespace) -> list[Triple]:
-        id_ = namespace[quote(cast(str, label.external_id))]
+        id_ = namespace[f"Label_{quote(label.dump()['externalId'])}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace.Label)]
@@ -63,10 +62,10 @@ class LabelsExtractor(BaseExtractor):
         if label.created_time:
             triples.append(
-                (id_, namespace.created_time, Literal(datetime.fromtimestamp(label.created_time / 1000, pytz.utc)))
+                (id_, namespace.created_time, Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)))
             )
         if label.data_set_id:
-            triples.append((id_, namespace.data_set_id, namespace[str(label.data_set_id)]))
+            triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{label.data_set_id}"]))
         return triples

cognite/neat/graph/extractors/_classic_cdf/_relationships.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import uuid
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
 from urllib.parse import quote
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import Relationship, RelationshipList
 from rdflib import RDF, Literal, Namespace
@@ -49,27 +48,27 @@ class RelationshipsExtractor(BaseExtractor):
         """Converts an asset to triples."""
         # relationships do not have an internal id, so we generate one
-        id_ = namespace[str(uuid.uuid4())]
+        id_ = namespace[f"Relationship_{uuid.uuid4()}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]
         # Set source and target types
-        if relationship.source_type:
+        if source_type := relationship.source_type:
             triples.append(
                 (
                     id_,
                     namespace.source_type,
-                    namespace[relationship.source_type.title()],
+                    namespace[source_type.title()],
                 )
             )
-        if relationship.target_type:
+        if target_type := relationship.target_type:
             triples.append(
                 (
                     id_,
                     namespace.target_type,
-                    namespace[relationship.target_type.title()],
+                    namespace[target_type.title()],
                 )
             )
@@ -100,7 +99,7 @@ class RelationshipsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.start_time,
-                    Literal(datetime.fromtimestamp(relationship.start_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
                 )
             )
@@ -109,7 +108,7 @@ class RelationshipsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.end_time,
-                    Literal(datetime.fromtimestamp(relationship.end_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
                 )
             )
@@ -118,7 +117,7 @@ class RelationshipsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.created_time,
-                    Literal(datetime.fromtimestamp(relationship.created_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
                 )
             )
@@ -127,7 +126,7 @@ class RelationshipsExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.last_updated_time,
-                    Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
                 )
             )
@@ -144,10 +143,10 @@ class RelationshipsExtractor(BaseExtractor):
             for label in relationship.labels:
                 # external_id can create ill-formed URIs, so we create websafe URIs
                 # since labels do not have internal ids, we use the external_id as the id
-                triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
+                triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
         # Create connection
         if relationship.data_set_id:
-            triples.append((id_, namespace.dataset, namespace[str(relationship.data_set_id)]))
+            triples.append((id_, namespace.dataset, namespace[f"Dataset_{relationship.data_set_id}"]))
         return triples

cognite/neat/graph/extractors/_classic_cdf/_sequences.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import Sequence, SequenceList
 from pydantic import AnyHttpUrl, ValidationError
@@ -44,7 +43,7 @@ class SequencesExtractor(BaseExtractor):
     @classmethod
     def _sequence2triples(cls, sequence: Sequence, namespace: Namespace) -> list[Triple]:
-        id_ = namespace[str(sequence.id)]
+        id_ = namespace[f"Sequence_{sequence.id}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]
@@ -71,7 +70,11 @@ class SequencesExtractor(BaseExtractor):
         if sequence.created_time:
             triples.append(
-                (id_, namespace.created_time, Literal(datetime.fromtimestamp(sequence.created_time / 1000, pytz.utc)))
+                (
+                    id_,
+                    namespace.created_time,
+                    Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
+                )
             )
         if sequence.last_updated_time:
@@ -79,14 +82,14 @@ class SequencesExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.last_updated_time,
-                    Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
                 )
             )
         if sequence.data_set_id:
-            triples.append((id_, namespace.data_set_id, namespace[str(sequence.data_set_id)]))
+            triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{sequence.data_set_id}"]))
         if sequence.asset_id:
-            triples.append((id_, namespace.asset, namespace[str(sequence.asset_id)]))
+            triples.append((id_, namespace.asset, namespace[f"Asset_{sequence.asset_id}"]))
         return triples

cognite/neat/graph/extractors/_classic_cdf/_timeseries.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
-import pytz
 from cognite.client import CogniteClient
 from cognite.client.data_classes import TimeSeries, TimeSeriesList
 from pydantic import AnyHttpUrl, ValidationError
@@ -46,7 +45,7 @@ class TimeSeriesExtractor(BaseExtractor):
     @classmethod
     def _timeseries2triples(cls, timeseries: TimeSeries, namespace: Namespace) -> list[Triple]:
-        id_ = namespace[str(timeseries.id)]
+        id_ = namespace[f"TimeSeries_{timeseries.id}"]
         # Set rdf type
         triples: list[Triple] = [(id_, RDF.type, namespace.TimeSeries)]
@@ -86,7 +85,11 @@ class TimeSeriesExtractor(BaseExtractor):
         if timeseries.created_time:
             triples.append(
-                (id_, namespace.created_time, Literal(datetime.fromtimestamp(timeseries.created_time / 1000, pytz.utc)))
+                (
+                    id_,
+                    namespace.created_time,
+                    Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
+                )
             )
         if timeseries.last_updated_time:
@@ -94,7 +97,7 @@ class TimeSeriesExtractor(BaseExtractor):
                 (
                     id_,
                     namespace.last_updated_time,
-                    Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, pytz.utc)),
+                    Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
                 )
             )
@@ -110,9 +113,9 @@ class TimeSeriesExtractor(BaseExtractor):
                 triples.append((id_, namespace.unit_external_id, Literal(timeseries.unit_external_id)))
         if timeseries.data_set_id:
-            triples.append((id_, namespace.dataset, namespace[str(timeseries.data_set_id)]))
+            triples.append((id_, namespace.dataset, namespace[f"Dataset_{timeseries.data_set_id}"]))
         if timeseries.asset_id:
-            triples.append((id_, namespace.asset, namespace[str(timeseries.asset_id)]))
+            triples.append((id_, namespace.asset, namespace[f"Asset_{timeseries.asset_id}"]))
         return triples

cognite/neat/graph/extractors/_dexpi.py ADDED Viewed

@@ -0,0 +1,212 @@
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+from collections.abc import Iterable
+from pathlib import Path
+from xml.etree.ElementTree import Element
+from rdflib import RDF, RDFS, XSD, Literal, Namespace, URIRef
+from cognite.neat.constants import DEFAULT_NAMESPACE
+from cognite.neat.graph.extractors._base import BaseExtractor
+from cognite.neat.graph.models import Triple
+from cognite.neat.utils.utils import as_neat_compliant_uri
+from cognite.neat.utils.xml import get_children, iterate_tree
+DEXPI = Namespace("http://sandbox.dexpi.org/rdl/")
+class DexpiExtractor(BaseExtractor):
+    """
+    DEXPI-XML extractor of RDF triples
+    Args:
+        root: XML root element of DEXPI file.
+        namespace: Optional custom namespace to use for extracted triples that define data
+                    model instances. Defaults to DEFAULT_NAMESPACE.
+    """
+    def __init__(
+        self,
+        root: Element,
+        namespace: Namespace | None = None,
+    ):
+        self.root = root
+        self.namespace = namespace or DEFAULT_NAMESPACE
+    @classmethod
+    def from_file(cls, filepath: str | Path, namespace: Namespace | None = None):
+        return cls(ET.parse(filepath).getroot(), namespace)
+    @classmethod
+    def from_url(cls, url: str, namespace: Namespace | None = None):
+        from io import BytesIO
+        import requests
+        response = requests.get(url)
+        response.raise_for_status()
+        return cls(ET.parse(BytesIO(response.content)).getroot(), namespace)
+    def extract(self) -> Iterable[Triple]:
+        """Extracts RDF triples from DEXPI XML file."""
+        for element in iterate_tree(self.root):
+            yield from self._element2triples(element, self.namespace)
+    @classmethod
+    def _element2triples(cls, element: Element, namespace: Namespace) -> list[Triple]:
+        """Converts an element to triples."""
+        triples: list[Triple] = []
+        if (
+            "ComponentClass" in element.attrib
+            and element.attrib["ComponentClass"] != "Label"
+            and "ID" in element.attrib
+        ):
+            id_ = namespace[element.attrib["ID"]]
+            if node_triples := cls._element2node_triples(id_, element):
+                triples.extend(node_triples)
+            if edge_triples := cls._element2edge_triples(id_, element, namespace):
+                triples.extend(edge_triples)
+        return triples
+    @classmethod
+    def _element2edge_triples(cls, id_: URIRef, element: Element, namespace: Namespace) -> list[Triple]:
+        triples: list[Triple] = []
+        # connection triples
+        if connections := get_children(element, "Connection"):
+            for connection in connections:
+                if "FromID" in connection.attrib and "ToID" in connection.attrib:
+                    triples.append(
+                        (
+                            namespace[connection.attrib["FromID"]],
+                            DEXPI.connection,
+                            namespace[connection.attrib["ToID"]],
+                        )
+                    )
+        # association triples
+        if associations := get_children(element, "Association"):
+            for association in associations:
+                if "Type" in association.attrib and "ItemID" in association.attrib:
+                    association_type = cls._to_uri_friendly_association_type(association)
+                    triples.append(
+                        (
+                            id_,
+                            DEXPI[f"association/{association_type}"],
+                            namespace[association.attrib["ItemID"]],
+                        )
+                    )
+        # children-parent triples
+        for child in element:
+            if "ID" in child.attrib and child.tag != "Label":
+                camel_case_property = child.tag[0].lower() + child.tag[1:]
+                triples.append(
+                    (
+                        id_,
+                        DEXPI[f"children/{camel_case_property}"],
+                        namespace[child.attrib["ID"]],
+                    )
+                )
+        return triples
+    @classmethod
+    def _to_uri_friendly_association_type(cls, association: Element):
+        association_type = "".join(
+            [word.capitalize() if i != 0 else word for i, word in enumerate(association.attrib["Type"].split(" "))]
+        )
+        return association_type
+    @classmethod
+    def _element2node_triples(cls, id_: URIRef, element: Element) -> list[Triple]:
+        """Converts an XML element to triples."""
+        triples: list[Triple] = []
+        # adding tag triple if exists
+        if tag := element.tag:
+            triples.append((id_, DEXPI.tag, Literal(str(tag))))
+        # adding attributes triples
+        if attributes := element.attrib:
+            if component_class := attributes.get("ComponentClass", None):
+                triples.append((id_, DEXPI.ComponentClass, Literal(component_class)))
+            if component_name := attributes.get("ComponentName", None):
+                triples.append((id_, DEXPI.ComponentName, Literal(component_name)))
+            if type_ := attributes.get("ComponentClassURI", None):
+                triples.append((id_, RDF.type, URIRef(type_)))
+        # add label triple
+        if label := cls._get_element_label(element):
+            triples.append((id_, RDFS.label, Literal(label)))
+        # add generic attributes triples
+        if generic_attributes := cls._get_element_generic_attributes(element):
+            for attribute, value_definitions in generic_attributes.items():
+                predicate = as_neat_compliant_uri(attribute)
+                for value_definition in value_definitions:
+                    if literal := cls._value_definition2literal(value_definition):
+                        triples.append((id_, predicate, literal))
+        return triples
+    @classmethod
+    def _value_definition2literal(cls, definition: dict) -> Literal | None:
+        if "Value" not in definition or "Format" not in definition:
+            return None
+        # case: when language is present we create add language tag to the literal
+        elif "Language" in definition and "Value" in definition:
+            return Literal(definition["Value"], lang=definition["Language"])
+        # case: when ValueURI is present we use it instead of Value
+        # this would be candidate for ENUMs in CDF
+        elif "ValueURI" in definition:
+            return Literal(definition["ValueURI"], datatype=XSD[definition["Format"]])
+        # case: when Format is not string we make sure to add the datatype
+        elif definition["Format"].lower() != "string":
+            return Literal(definition["Value"], datatype=XSD[definition["Format"]])
+        # case: when Format is string we add the literal without datatype (easier to read triples, less noise)
+        else:
+            return Literal(definition["Value"])
+    @classmethod
+    def _get_element_label(cls, element: Element) -> str | None:
+        if children := get_children(element, "Label", 1):
+            if grandchildren := get_children(children[0], "Text", 1):
+                if "String" in grandchildren[0].attrib:
+                    return grandchildren[0].attrib["String"]
+        # extension for schema version 3.3, where text is used to "label" without a <label> parent
+        elif children := get_children(element, "Text", 1):
+            if "String" in children[0].attrib:
+                return children[0].attrib["String"]
+        return None
+    @classmethod
+    def _get_element_generic_attributes(cls, element: Element) -> dict:
+        # TODO: This requires more work as there are multiple groupings of GenericAttributes
+        attributes = defaultdict(list)
+        if children := get_children(element, "GenericAttributes", 1):
+            if grandchildren := get_children(children[0], "GenericAttribute"):
+                for generic_attribute in grandchildren:
+                    # extension for schema version 3.3, where "AttributeURI" is not included
+                    if "AttributeURI" in generic_attribute.attrib:
+                        if generic_attribute.attrib["AttributeURI"] not in attributes:
+                            attributes[generic_attribute.attrib["AttributeURI"]] = [generic_attribute.attrib]
+                        else:
+                            attributes[generic_attribute.attrib["AttributeURI"]].append(generic_attribute.attrib)
+        return attributes

cognite/neat/graph/stores/_base.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import sys
 import warnings
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import cast
-import pytz
 from rdflib import RDF, Graph, Namespace, URIRef
 from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
 from rdflib.query import ResultRow
@@ -41,14 +40,14 @@ class NeatGraphStore:
         graph: Graph,
         rules: InformationRules | None = None,
     ):
-        _start = datetime.now(pytz.utc)
+        _start = datetime.now(timezone.utc)
         self.graph = graph
         self.provenance = Provenance(
             [
                 Change.record(
                     activity=f"{type(self).__name__}.__init__",
                     start=_start,
-                    end=datetime.now(pytz.utc),
+                    end=datetime.now(timezone.utc),
                     description=f"Initialize graph store as {type(self.graph.store).__name__}",
                 )
             ]
@@ -62,7 +61,7 @@ class NeatGraphStore:
     def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
         """Adds prefixes to the graph store."""
-        _start = datetime.now(pytz.utc)
+        _start = datetime.now(timezone.utc)
         for prefix, namespace in prefixes.items():
             self.graph.bind(prefix, namespace)
@@ -70,7 +69,7 @@ class NeatGraphStore:
             Change.record(
                 activity=f"{type(self).__name__}._upsert_prefixes",
                 start=_start,
-                end=datetime.now(pytz.utc),
+                end=datetime.now(timezone.utc),
                 description="Upsert prefixes to graph store",
             )
         )
@@ -124,11 +123,22 @@ class NeatGraphStore:
         return cls(graph, rules)
     def write(self, extractor: TripleExtractors) -> None:
+        _start = datetime.now(timezone.utc)
         if isinstance(extractor, RdfFileExtractor):
             self._parse_file(extractor.filepath, extractor.mime_type, extractor.base_uri)
         else:
             self._add_triples(extractor.extract())
+        self.provenance.append(
+            Change.record(
+                activity=f"{type(extractor).__name__}",
+                start=_start,
+                end=datetime.now(timezone.utc),
+                description=f"Extracted triples to graph store using {type(extractor).__name__}",
+            )
+        )
     def _parse_file(
         self,
         filepath: Path,

cognite/neat/utils/utils.py CHANGED Viewed

@@ -147,6 +147,13 @@ def get_namespace(URI: URIRef, special_separator: str = "#_") -> str:
         return "/".join(URI.split("/")[:-1]) + "/"
+def as_neat_compliant_uri(uri: URIRef) -> URIRef:
+    namespace = get_namespace(uri)
+    id_ = remove_namespace(uri)
+    compliant_uri = re.sub(r"[^a-zA-Z0-9-_.]", "", id_)
+    return URIRef(f"{namespace}{compliant_uri}")
 def convert_rdflib_content(content: Literal | URIRef | dict | list) -> Any:
     if isinstance(content, Literal) or isinstance(content, URIRef):
         return content.toPython()

{cognite_neat-0.80.3.dist-info → cognite_neat-0.81.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cognite-neat
-Version: 0.80.3
+Version: 0.81.1
 Summary: Knowledge graph transformation
 Home-page: https://cognite-neat.readthedocs-hosted.com/
 License: Apache-2.0

{cognite_neat-0.80.3.dist-info → cognite_neat-0.81.1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 cognite/neat/__init__.py,sha256=v-rRiDOgZ3sQSMQKq0vgUQZvpeOkoHFXissAx6Ktg84,61
-cognite/neat/_version.py,sha256=2sBSmuW0uNJNnwbbj_2wdeCNnLYr-TNazMPCCt9nA8w,23
+cognite/neat/_version.py,sha256=zEgwrETiUxKyTUiAscaK4aVaaACC4OjGI69_cwGk124,23
 cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
 cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
@@ -53,16 +53,17 @@ cognite/neat/graph/examples/Knowledge-Graph-Nordic44.xml,sha256=U2Ns-M4LRjT1fBkh
 cognite/neat/graph/examples/__init__.py,sha256=yAjHVY3b5jOjmbW-iLbhvu7BG014TpGi3K4igkDqW5I,368
 cognite/neat/graph/examples/skos-capturing-sheet-wind-topics.xlsx,sha256=CV_yK5ZSbYS_ktfIZUPD8Sevs47zpswLXQUDFkGE4Gw,45798
 cognite/neat/graph/exceptions.py,sha256=R6pyOH774n9w2x_X_nrUr8OMAdjJMf_XPIqAvxIQaWo,3401
-cognite/neat/graph/extractors/__init__.py,sha256=PRKYPCnxofQ3i_iiJ3xGjEligLgqAPDw2TSlcZt0MlU,947
+cognite/neat/graph/extractors/__init__.py,sha256=ozXL6ZLK36wp3uX4UACRVs6rbvynQg2JQlDgL1UM1Wk,1025
 cognite/neat/graph/extractors/_base.py,sha256=TOXDnlqske8DgnJwA0THDVRgmR79Acjm56yF0E-2w7I,356
 cognite/neat/graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=S5QB_38ysVodGRMqr_SWYYaUtkUCS6a6L2b5D1T-888,3812
-cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=cYd-A7bvRw2S-FDvvE58PPDNE7uhoq2Lhu9En2i6E58,3961
-cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=8CpqZl8pLBRNJ6oxxp2YLfCupxlXJQ6h0ymUlI1GzH8,4783
-cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=GcMPoecniy3g59enKD71F3fghvnN4K3uj1Z9bo2ZKIE,2367
-cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=5kClA5zBlhyPT6hfanLP-upLvMcE6mLU4AhkRp49NYQ,4985
-cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=ov-n8cBEC73AMO1xam2GUDHv-7SyOEWXWRxLXh9flyY,3298
-cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=xlnJ4fKvCJawZO6l6EHpx36RRAafd3BdYWS0ajNnGVM,4449
+cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=8FQvJqi8nclkQJ7YmSo4yNqs9uExaoCn_whMW8cIAx0,3855
+cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=Z0vPcyOz4mCwY0Dqa5wAQZjczO1dbTUGM0X4Y10NLGQ,3995
+cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=-6nCkXUCAnDsv4eDFDEiQ-U4SGhmW1VLxZJFUcszqjU,4831
+cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=wm7JFmsk7sHsOVpTsGBE0wargIuHD09Xu-OHK_Bm20g,2386
+cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=n7gISeyhLjiaWYLWWRj20jmaYgdvJBdYSiZ0G8ZW6mk,5035
+cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=o4yxkf81FGFrKkflvlyDYie05fTYsT_LcRFM63OTVCI,3406
+cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=KTYmL8vhXijlmkN1UFQrGpaCllpRekr1y55SoLhlLbg,4559
+cognite/neat/graph/extractors/_dexpi.py,sha256=N_xaI3wxBdMBePikEEMW-HhMijSmnwQNIqQJA_WUcbY,8526
 cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=gziG2FFsLk-HmA9uxAeT9RCjVpFxjkCTLiC4tq2zgvw,14961
 cognite/neat/graph/extractors/_rdf_file.py,sha256=w4-XgPgNsmZOkNxjO1ZQCcopTntmmtxfDBkQxn1se6E,463
 cognite/neat/graph/issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -72,7 +73,7 @@ cognite/neat/graph/loaders/_base.py,sha256=bdYC6CwsHVqnQa1QzOhL68qQhF1OtrsearqH6
 cognite/neat/graph/loaders/_rdf2dms.py,sha256=Tn7vy6XwXFXpVDn7uzfzgJMJapbPITerKaF5b5Y4ol4,12857
 cognite/neat/graph/models.py,sha256=AtLgZh2qyRP6NRetjQCy9qLMuTQB0CH52Zsev-qa2sk,149
 cognite/neat/graph/stores/__init__.py,sha256=G-VG_YwfRt1kuPao07PDJyZ3w_0-eguzLUM13n-Z_RA,64
-cognite/neat/graph/stores/_base.py,sha256=DGmguO0qE5sLHgHG757ymP-cFtEimKvD57Irr3FH9yY,9106
+cognite/neat/graph/stores/_base.py,sha256=6MZAXygT6sHTQ1LWm_TDb2Ws6fgNJ-r4evwcLywpBVk,9481
 cognite/neat/graph/stores/_oxrdflib.py,sha256=A5zeRm5_e8ui_ihGpgstRDg_N7qcLZ3QZBRGrOXSGI0,9569
 cognite/neat/graph/stores/_provenance.py,sha256=Y20-I8dP3DwTQ1sdI_eC4va2Az2FpK0oZwdfJ5T-2wc,3279
 cognite/neat/issues.py,sha256=pxQfqfBseMDE8JM0iqZnkLXngeyeFfT0TFtu1UuAd4c,4629
@@ -246,7 +247,7 @@ cognite/neat/utils/exceptions.py,sha256=-w4cAcvcoWLf-_ZwAl7QV_NysfqtQzIOd1Ti-mpx
 cognite/neat/utils/spreadsheet.py,sha256=LI0c7dlW0zXHkHw0NvB-gg6Df6cDcE3FbiaHBYLXdzQ,2714
 cognite/neat/utils/text.py,sha256=4bg1_Q0lg7KsoxaDOvXrVyeY78BJN8i-27BlyDzUCls,3082
 cognite/neat/utils/upload.py,sha256=XaAKqyMhz6qXbUrttGNIXZxFRPJvrnbMpDRF8GEiK2g,2707
-cognite/neat/utils/utils.py,sha256=OOuL0l-pv_8gDJCpXGBx-U9CEYDKQffP9dt8Dbg5kdU,13807
+cognite/neat/utils/utils.py,sha256=p5qiqL4p4yC2z1pJWRaw-GSITOvBMCvj8zSo58NNsCo,14031
 cognite/neat/utils/xml.py,sha256=ppLT3lQKVp8wOP-m8-tFY8uB2P4R76l7R_-kUtsABng,992
 cognite/neat/workflows/__init__.py,sha256=oiKub_U9f5cA0I1nKl5dFkR4BD8_6Be9eMzQ_50PwP0,396
 cognite/neat/workflows/_exceptions.py,sha256=ugI_X1XNpikAiL8zIggBjcx6q7WvOpRIgvxHrj2Rhr4,1348
@@ -292,8 +293,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
 cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
 cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
 cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
-cognite_neat-0.80.3.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
-cognite_neat-0.80.3.dist-info/METADATA,sha256=2a_btNnYxdfU9_bgJ7wvzywNoLEdUcTL5BkiZ9RhvTY,9290
-cognite_neat-0.80.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-cognite_neat-0.80.3.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
-cognite_neat-0.80.3.dist-info/RECORD,,
+cognite_neat-0.81.1.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
+cognite_neat-0.81.1.dist-info/METADATA,sha256=HvhapkfdDjeI4wNtoR6rsyloUItiSR1R9D-6VbnoIiQ,9290
+cognite_neat-0.81.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+cognite_neat-0.81.1.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
+cognite_neat-0.81.1.dist-info/RECORD,,

{cognite_neat-0.80.3.dist-info → cognite_neat-0.81.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{cognite_neat-0.80.3.dist-info → cognite_neat-0.81.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{cognite_neat-0.80.3.dist-info → cognite_neat-0.81.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cognite-neat 0.80.3__py3-none-any.whl → 0.81.1__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.80.3py3-none-any.whl → 0.81.1py3-none-any.whl