PyPI - cognite-neat - Versions diffs - 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl - Mend

cognite-neat 0.106.0py3-none-any.whl → 0.108.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (67) hide show

cognite/neat/_constants.py +35 -1
cognite/neat/_graph/_shared.py +4 -0
cognite/neat/_graph/extractors/__init__.py +5 -1
cognite/neat/_graph/extractors/_base.py +32 -0
cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
cognite/neat/_graph/extractors/_dms.py +196 -47
cognite/neat/_graph/extractors/_dms_graph.py +199 -0
cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
cognite/neat/_graph/extractors/_rdf_file.py +33 -5
cognite/neat/_graph/loaders/__init__.py +1 -3
cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
cognite/neat/_graph/queries/_base.py +140 -84
cognite/neat/_graph/queries/_construct.py +2 -2
cognite/neat/_graph/transformers/__init__.py +8 -1
cognite/neat/_graph/transformers/_base.py +9 -1
cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
cognite/neat/_graph/transformers/_rdfpath.py +3 -3
cognite/neat/_graph/transformers/_value_type.py +106 -45
cognite/neat/_issues/errors/_resources.py +1 -1
cognite/neat/_issues/warnings/__init__.py +0 -2
cognite/neat/_issues/warnings/_models.py +1 -1
cognite/neat/_issues/warnings/_properties.py +0 -8
cognite/neat/_rules/analysis/_base.py +1 -1
cognite/neat/_rules/analysis/_information.py +14 -13
cognite/neat/_rules/catalog/__init__.py +1 -0
cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
cognite/neat/_rules/importers/__init__.py +3 -1
cognite/neat/_rules/importers/_dms2rules.py +7 -5
cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
cognite/neat/_rules/importers/_rdf/_base.py +2 -2
cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
cognite/neat/_rules/models/_base_rules.py +13 -15
cognite/neat/_rules/models/_types.py +5 -0
cognite/neat/_rules/models/dms/_rules.py +51 -10
cognite/neat/_rules/models/dms/_rules_input.py +4 -0
cognite/neat/_rules/models/information/_rules.py +48 -5
cognite/neat/_rules/models/information/_rules_input.py +6 -1
cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
cognite/neat/_rules/transformers/__init__.py +10 -0
cognite/neat/_rules/transformers/_converters.py +300 -62
cognite/neat/_session/_base.py +57 -10
cognite/neat/_session/_drop.py +5 -1
cognite/neat/_session/_inspect.py +3 -2
cognite/neat/_session/_mapping.py +17 -6
cognite/neat/_session/_prepare.py +0 -47
cognite/neat/_session/_read.py +115 -10
cognite/neat/_session/_set.py +27 -0
cognite/neat/_session/_show.py +4 -4
cognite/neat/_session/_state.py +12 -1
cognite/neat/_session/_to.py +43 -2
cognite/neat/_session/_wizard.py +1 -1
cognite/neat/_session/exceptions.py +8 -3
cognite/neat/_store/_graph_store.py +331 -136
cognite/neat/_store/_rules_store.py +130 -1
cognite/neat/_utils/auth.py +3 -1
cognite/neat/_version.py +1 -1
{cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
{cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
{cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
{cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
{cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0

cognite/neat/_store/_graph_store.py CHANGED Viewed

@@ -3,15 +3,16 @@ import warnings
 from collections.abc import Iterable
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast
+from typing import cast, overload
+from zipfile import ZipExtFile
 import pandas as pd
 from pandas import Index
-from rdflib import Dataset, Namespace, URIRef
+from rdflib import Dataset, Graph, Namespace, URIRef
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
 from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
-from cognite.neat._constants import DEFAULT_NAMESPACE
-from cognite.neat._graph._shared import rdflib_to_oxi_type
+from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
 from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
 from cognite.neat._graph.queries import Queries
 from cognite.neat._graph.transformers import Transformers
@@ -21,7 +22,7 @@ from cognite.neat._rules.models import InformationRules
 from cognite.neat._rules.models.entities import ClassEntity
 from cognite.neat._shared import InstanceType, Triple
 from cognite.neat._utils.auxiliary import local_import
-from cognite.neat._utils.rdf_ import add_triples_in_batch
+from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
 from ._provenance import Change, Provenance
@@ -38,80 +39,130 @@ class NeatGraphStore:
     Args:
         graph : Instance of rdflib.Graph class for graph storage
         rules:
+    !!! note "Dataset"
+        The store leverages a RDF dataset which is defined as a collection of RDF graphs
+        where all but one are named graphs associated with URIRef (the graph name),
+        and the unnamed default graph which is in context of rdflib library has an
+        identifier URIRef('urn:x-rdflib:default').
     """
     rdf_store_type: str
     def __init__(
         self,
-        graph: Dataset,
-        rules: InformationRules | None = None,
+        dataset: Dataset,
+        default_named_graph: URIRef | None = None,
     ):
-        self.rules: InformationRules | None = None
+        self.rules: dict[URIRef, InformationRules] = {}
+        self.base_namespace: dict[URIRef, Namespace] = {}
         _start = datetime.now(timezone.utc)
-        self.graph = graph
+        self.dataset = dataset
         self.provenance = Provenance(
             [
                 Change.record(
                     activity=f"{type(self).__name__}.__init__",
                     start=_start,
                     end=datetime.now(timezone.utc),
-                    description=f"Initialize graph store as {type(self.graph.store).__name__}",
+                    description=f"Initialize graph store as {type(self.dataset.store).__name__}",
                 )
             ]
         )
-        if rules:
-            self.add_rules(rules)
-        else:
-            self.base_namespace = DEFAULT_NAMESPACE
+        self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
-        self.queries = Queries(self.graph, self.rules)
+        self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
+    def graph(self, named_graph: URIRef | None = None) -> Graph:
+        """Get named graph from the dataset to query over"""
+        return self.dataset.graph(named_graph or self.default_named_graph)
     @property
     def type_(self) -> str:
         "Return type of the graph store"
-        return type(self.graph.store).__name__
+        return type(self.dataset.store).__name__
+    # no destination
+    @overload
+    def serialize(self, filepath: None = None) -> str: ...
+    # with destination
+    @overload
+    def serialize(self, filepath: Path) -> None: ...
+    def serialize(self, filepath: Path | None = None) -> None | str:
+        """Serialize the graph store to a file.
+        Args:
+            filepath: File path to serialize the graph store to
-    def add_rules(self, rules: InformationRules) -> None:
-        """This method is used to add rules to the graph store and it is the only correct
-        way to add rules to the graph store, after the graph store has been initialized.
+        Returns:
+            Serialized graph store
+        !!! note "Trig Format"
+            Notice that instead of turtle format we are using trig format for serialization.
+            This is because trig format is a superset of turtle format and it allows us to
+            serialize named graphs as well. Allowing serialization of one or more named graphs
+            including the default graph.
         """
+        if filepath:
+            self.dataset.serialize(
+                filepath,
+                format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
+            )
+            return None
+        else:
+            return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
-        self.rules = rules
-        self.base_namespace = self.rules.metadata.namespace
-        self.queries = Queries(self.graph, self.rules)
-        self.provenance.append(
-            Change.record(
-                activity=f"{type(self)}.rules",
-                start=datetime.now(timezone.utc),
-                end=datetime.now(timezone.utc),
-                description=f"Added rules to graph store as {type(self.rules).__name__}",
+    def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
+        """This method is used to add rules to a named graph stored in the graph store.
+        Args:
+            rules: InformationRules object containing rules to be added to the named graph
+            named_graph: URIRef of the named graph to store the rules in, by default None
+                        rules will be added to the default graph
+        """
+        named_graph = named_graph or self.default_named_graph
+        if named_graph in self.named_graphs:
+            # attaching appropriate namespace to the rules
+            # as well base_namespace
+            self.rules[named_graph] = rules
+            self.base_namespace[named_graph] = rules.metadata.namespace
+            self.queries = Queries(self.dataset, self.rules)
+            self.provenance.append(
+                Change.record(
+                    activity=f"{type(self)}.rules",
+                    start=datetime.now(timezone.utc),
+                    end=datetime.now(timezone.utc),
+                    description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
+                )
             )
-        )
-        if self.rules.prefixes:
-            self._upsert_prefixes(self.rules.prefixes)
+            if self.rules[named_graph].prefixes:
+                self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
-    def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
+    def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
         """Adds prefixes to the graph store."""
         _start = datetime.now(timezone.utc)
         for prefix, namespace in prefixes.items():
-            self.graph.bind(prefix, namespace)
+            self.graph(named_graph).bind(prefix, namespace)
         self.provenance.append(
             Change.record(
                 activity=f"{type(self).__name__}._upsert_prefixes",
                 start=_start,
                 end=datetime.now(timezone.utc),
-                description="Upsert prefixes to graph store",
+                description="Upsert prefixes to the name graph {named_graph}",
             )
         )
     @classmethod
-    def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
-        return cls(Dataset(), rules)
+    def from_memory_store(cls) -> "Self":
+        return cls(Dataset())
     @classmethod
     def from_sparql_store(
@@ -119,7 +170,6 @@ class NeatGraphStore:
         query_endpoint: str | None = None,
         update_endpoint: str | None = None,
         returnFormat: str = "csv",
-        rules: InformationRules | None = None,
     ) -> "Self":
         store = SPARQLUpdateStore(
             query_endpoint=query_endpoint,
@@ -130,10 +180,27 @@ class NeatGraphStore:
             autocommit=False,
         )
         graph = Dataset(store=store)
-        return cls(graph, rules)
+        return cls(graph)
     @classmethod
-    def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
+    def from_oxi_remote_store(
+        cls,
+        remote_url: str,
+        autocommit: bool = False,
+    ) -> "Self":
+        """Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
+        return cls(
+            dataset=Dataset(
+                store=SPARQLUpdateStore(
+                    query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
+                ),
+                default_union=True,
+            )
+        )
+    @classmethod
+    def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
         """Creates a NeatGraphStore from an Oxigraph store."""
         local_import("pyoxigraph", "oxi")
         local_import("oxrdflib", "oxi")
@@ -152,29 +219,41 @@ class NeatGraphStore:
         else:
             raise Exception("Error initializing Oxigraph store")
-        graph = Dataset(
-            store=oxrdflib.OxigraphStore(store=oxi_store),
+        return cls(
+            dataset=Dataset(
+                store=oxrdflib.OxigraphStore(store=oxi_store),
+            )
         )
-        return cls(graph, rules)
-    def write(self, extractor: TripleExtractors) -> IssueList:
+    def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
         last_change: Change | None = None
+        named_graph = named_graph or self.default_named_graph
         with catch_issues() as issue_list:
             _start = datetime.now(timezone.utc)
             success = True
             if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
-                self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
+                self._parse_file(
+                    named_graph,
+                    extractor.filepath,
+                    cast(str, extractor.format),
+                    extractor.base_uri,
+                )
+                if isinstance(extractor.filepath, ZipExtFile):
+                    extractor.filepath.close()
             elif isinstance(extractor, RdfFileExtractor):
                 success = False
                 issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
                 warnings.warn(
-                    f"Cannot write to graph store with {type(extractor).__name__}, errors found in file:\n{issue_text}",
+                    (
+                        f"Cannot write to named graph {named_graph} with "
+                        f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
+                    ),
                     stacklevel=2,
                 )
             else:
-                self._add_triples(extractor.extract())
+                self._add_triples(extractor.extract(), named_graph=named_graph)
             if success:
                 _end = datetime.now(timezone.utc)
@@ -189,7 +268,7 @@ class NeatGraphStore:
                         activity=activity,
                         start=_start,
                         end=_end,
-                        description=f"Extracted triples to graph store using {type(extractor).__name__}",
+                        description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
                     )
                     self.provenance.append(last_change)
         if last_change:
@@ -197,26 +276,56 @@ class NeatGraphStore:
         return issue_list
     def _read_via_rules_linkage(
-        self, class_neat_id: URIRef, property_link_pairs: dict[str, URIRef] | None
+        self,
+        class_neat_id: URIRef,
+        property_link_pairs: dict[str, URIRef] | None,
+        named_graph: URIRef | None = None,
     ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
-        if self.rules is None:
-            warnings.warn("Rules not found in graph store! Aborting!", stacklevel=2)
+        named_graph = named_graph or self.default_named_graph
+        if named_graph not in self.named_graphs:
+            warnings.warn(
+                f"Named graph {named_graph} not found in graph store, cannot read",
+                stacklevel=2,
+            )
             return
+        if not self.rules or named_graph not in self.rules:
+            warnings.warn(
+                f"Rules for named graph {named_graph} not found in graph store!",
+                stacklevel=2,
+            )
+            return
         if self.multi_type_instances:
             warnings.warn(
                 "Multi typed instances detected, issues with loading can occur!",
                 stacklevel=2,
             )
-        if cls := InformationAnalysis(self.rules).classes_by_neat_id.get(class_neat_id):
+        analysis = InformationAnalysis(self.rules[named_graph])
+        if cls := analysis.classes_by_neat_id.get(class_neat_id):
             if property_link_pairs:
                 property_renaming_config = {
                     prop_uri: prop_name
                     for prop_name, prop_neat_id in property_link_pairs.items()
-                    if (
-                        prop_uri := InformationAnalysis(self.rules).neat_id_to_transformation_property_uri(prop_neat_id)
-                    )
+                    if (prop_uri := analysis.neat_id_to_instance_source_property_uri(prop_neat_id))
                 }
+                if information_properties := analysis.classes_with_properties(consider_inheritance=True).get(
+                    cls.class_
+                ):
+                    for prop in information_properties:
+                        if prop.neatId is None:
+                            continue
+                        # Include renaming done in the Information rules that are not present in the
+                        # property_link_pairs. The use case for this renaming to startNode and endNode
+                        # properties that are not part of DMSRules but will typically be present
+                        # in the Information rules.
+                        if (
+                            uri := analysis.neat_id_to_instance_source_property_uri(prop.neatId)
+                        ) and uri not in property_renaming_config:
+                            property_renaming_config[uri] = prop.property_
                 yield from self._read_via_class_entity(cls.class_, property_renaming_config)
                 return
@@ -231,9 +340,22 @@ class NeatGraphStore:
         self,
         class_entity: ClassEntity,
         property_renaming_config: dict[URIRef, str] | None = None,
+        named_graph: URIRef | None = None,
     ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
-        if self.rules is None:
-            warnings.warn("Rules not found in graph store!", stacklevel=2)
+        named_graph = named_graph or self.default_named_graph
+        if named_graph not in self.named_graphs:
+            warnings.warn(
+                f"Named graph {named_graph} not found in graph store, cannot read",
+                stacklevel=2,
+            )
+            return
+        if not self.rules or named_graph not in self.rules:
+            warnings.warn(
+                f"Rules for named graph {named_graph} not found in graph store!",
+                stacklevel=2,
+            )
             return
         if self.multi_type_instances:
             warnings.warn(
@@ -241,28 +363,28 @@ class NeatGraphStore:
                 stacklevel=2,
             )
-        if class_entity not in [definition.class_ for definition in self.rules.classes]:
+        if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
             warnings.warn("Desired type not found in graph!", stacklevel=2)
             return
-        if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
+        if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
             warnings.warn(
                 f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
                 stacklevel=2,
             )
             return
-        has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
+        has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
         has_self_reference_transformations = InformationAnalysis(
-            self.rules
+            self.rules[named_graph]
         ).has_self_reference_property_transformations()
         if has_hop_transformations or has_self_reference_transformations:
             msg = (
-                f"Rules contain [{'Hop' if has_hop_transformations else '' }"
-                f", {'SelfReferenceProperty' if has_self_reference_transformations else '' }]"
+                f"Rules contain [{'Hop' if has_hop_transformations else ''}"
+                f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
                 " rdfpath."
-                f" Run [{'ReduceHopTraversal' if has_hop_transformations else '' }"
-                f", {'AddSelfReferenceProperty' if has_self_reference_transformations else '' }]"
+                f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
+                f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
                 " transformer(s) first!"
             )
@@ -277,23 +399,19 @@ class NeatGraphStore:
         # get potential property renaming config
         property_renaming_config = property_renaming_config or InformationAnalysis(
-            self.rules
+            self.rules[named_graph]
         ).define_property_renaming_config(class_entity)
-        # get property types to guide process of removing or not namespaces from results
-        property_types = InformationAnalysis(self.rules).property_types(class_entity)
         for instance_id in instance_ids:
             if res := self.queries.describe(
                 instance_id=instance_id,
                 instance_type=class_entity.suffix,
                 property_renaming_config=property_renaming_config,
-                property_types=property_types,
             ):
                 yield res
     def read(
-        self,
-        class_: str,
+        self, class_: str, named_graph: URIRef | None = None
     ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
         """Read instances for given class from the graph store.
@@ -302,9 +420,20 @@ class NeatGraphStore:
             the rules which are attached to the graph store.
         """
+        named_graph = named_graph or self.default_named_graph
+        if named_graph not in self.named_graphs:
+            warnings.warn(
+                f"Named graph {named_graph} not found in graph store, cannot read",
+                stacklevel=2,
+            )
+            return
-        if not self.rules:
-            warnings.warn("Rules not found in graph store!", stacklevel=2)
+        if not self.rules or named_graph not in self.rules:
+            warnings.warn(
+                f"Rules for named graph {named_graph} not found in graph store!",
+                stacklevel=2,
+            )
             return
         if self.multi_type_instances:
             warnings.warn(
@@ -312,15 +441,15 @@ class NeatGraphStore:
                 stacklevel=2,
             )
-        class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
+        class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
-        if class_entity not in [definition.class_ for definition in self.rules.classes]:
+        if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
             warnings.warn("Desired type not found in graph!", stacklevel=2)
             return
         yield from self._read_via_class_entity(class_entity)
-    def count_of_id(self, neat_id: URIRef) -> int:
+    def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
         """Count the number of instances of a given type
         Args:
@@ -329,18 +458,31 @@ class NeatGraphStore:
         Returns:
             Number of instances
         """
-        if not self.rules:
-            warnings.warn("Rules not found in graph store!", stacklevel=2)
+        named_graph = named_graph or self.default_named_graph
+        if named_graph not in self.named_graphs:
+            warnings.warn(
+                f"Named graph {named_graph} not found in graph store, cannot count",
+                stacklevel=2,
+            )
+            return 0
+        if not self.rules or named_graph not in self.rules:
+            warnings.warn(
+                f"Rules for named graph {named_graph} not found in graph store!",
+                stacklevel=2,
+            )
             return 0
         class_entity = next(
-            (definition.class_ for definition in self.rules.classes if definition.neatId == neat_id), None
+            (definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
+            None,
         )
         if not class_entity:
             warnings.warn("Desired type not found in graph!", stacklevel=2)
             return 0
-        if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
+        if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
             warnings.warn(
                 f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
                 stacklevel=2,
@@ -351,17 +493,19 @@ class NeatGraphStore:
     def count_of_type(self, class_uri: URIRef) -> int:
         query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
-        return int(next(iter(self.graph.query(query)))[0])  # type: ignore[arg-type, index]
+        return int(next(iter(self.dataset.query(query)))[0])  # type: ignore[arg-type, index]
     def _parse_file(
         self,
-        filepath: Path,
+        named_graph: URIRef,
+        filepath: Path | ZipExtFile,
         format: str = "turtle",
         base_uri: URIRef | None = None,
     ) -> None:
         """Imports graph data from file.
         Args:
+            named_graph : URIRef of the named graph to store the data in
             filepath : File path to file containing graph data, by default None
             format : rdflib format file containing RDF graph, by default "turtle"
             base_uri : base URI to add to graph in case of relative URIs, by default None
@@ -375,28 +519,38 @@ class NeatGraphStore:
         """
         # Oxigraph store, do not want to type hint this as it is an optional dependency
-        if type(self.graph.store).__name__ == "OxigraphStore":
+        if self.type_ == "OxigraphStore":
             local_import("pyoxigraph", "oxi")
-            # this is necessary to trigger rdflib oxigraph plugin
-            self.graph.parse(
-                filepath,
-                format=rdflib_to_oxi_type(format),
-                transactional=False,
-                publicID=base_uri,
-            )
-            self.graph.store._store.optimize()  # type: ignore[attr-defined]
+            if format in quad_formats():
+                self.dataset.parse(
+                    filepath,  # type: ignore[arg-type]
+                    format=rdflib_to_oxi_type(format),
+                    transactional=False,
+                    publicID=base_uri,
+                )
+            else:
+                self.graph(named_graph).parse(
+                    filepath,  # type: ignore[arg-type]
+                    format=rdflib_to_oxi_type(format),
+                    transactional=False,
+                    publicID=base_uri,
+                )
+            self.dataset.store._store.optimize()  # type: ignore[attr-defined]
         # All other stores
         else:
-            if filepath.is_file():
-                self.graph.parse(filepath, publicID=base_uri)
+            if format in quad_formats():
+                self.dataset.parse(filepath, publicID=base_uri, format=format)  # type: ignore[arg-type]
             else:
-                for filename in filepath.iterdir():
-                    if filename.is_file():
-                        self.graph.parse(filename, publicID=base_uri)
+                self.graph(named_graph).parse(filepath, publicID=base_uri, format=format)  # type: ignore[arg-type]
-    def _add_triples(self, triples: Iterable[Triple], batch_size: int = 10_000):
+    def _add_triples(
+        self,
+        triples: Iterable[Triple],
+        named_graph: URIRef,
+        batch_size: int = 10_000,
+    ) -> None:
         """Adds triples to the graph store in batches.
         Args:
@@ -404,66 +558,103 @@ class NeatGraphStore:
             batch_size: Batch size of triples per commit, by default 10_000
             verbose: Verbose mode, by default False
         """
-        add_triples_in_batch(self.graph, triples, batch_size)
+        add_triples_in_batch(self.graph(named_graph), triples, batch_size)
-    def transform(self, transformer: Transformers) -> None:
+    def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
         """Transforms the graph store using a transformer."""
-        missing_changes = [
-            change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
-        ]
-        if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
-            warnings.warn(
-                f"Cannot transform graph store with {type(transformer).__name__}, already applied",
-                stacklevel=2,
-            )
-        elif missing_changes:
-            warnings.warn(
-                (
-                    f"Cannot transform graph store with {type(transformer).__name__}, "
-                    f"missing one or more required changes [{', '.join(missing_changes)}]"
-                ),
-                stacklevel=2,
-            )
+        named_graph = named_graph or self.default_named_graph
+        if named_graph in self.named_graphs:
+            missing_changes = [
+                change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
+            ]
+            if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
+                warnings.warn(
+                    f"Cannot transform graph store with {type(transformer).__name__}, already applied",
+                    stacklevel=2,
+                )
+            elif missing_changes:
+                warnings.warn(
+                    (
+                        f"Cannot transform graph store with {type(transformer).__name__}, "
+                        f"missing one or more required changes [{', '.join(missing_changes)}]"
+                    ),
+                    stacklevel=2,
+                )
-        else:
-            _start = datetime.now(timezone.utc)
-            transformer.transform(self.graph)
-            self.provenance.append(
-                Change.record(
-                    activity=f"{type(transformer).__name__}",
-                    start=_start,
-                    end=datetime.now(timezone.utc),
-                    description=transformer.description,
+            else:
+                _start = datetime.now(timezone.utc)
+                transformer.transform(self.graph(named_graph))
+                self.provenance.append(
+                    Change.record(
+                        activity=f"{type(transformer).__name__}",
+                        start=_start,
+                        end=datetime.now(timezone.utc),
+                        description=transformer.description,
+                    )
                 )
+        else:
+            warnings.warn(
+                f"Named graph {named_graph} not found in graph store, cannot transform",
+                stacklevel=2,
             )
     @property
-    def summary(self) -> pd.DataFrame:
-        return pd.DataFrame(self.queries.summarize_instances(), columns=["Type", "Occurrence"])
+    def summary(self) -> dict[URIRef, pd.DataFrame]:
+        return {
+            named_graph: pd.DataFrame(
+                self.queries.summarize_instances(named_graph),
+                columns=["Type", "Occurrence"],
+            )
+            for named_graph in self.named_graphs
+        }
     @property
-    def multi_type_instances(self) -> dict[str, list[str]]:
-        return self.queries.multi_type_instances()
+    def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
+        return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
     def _repr_html_(self) -> str:
         provenance = self.provenance._repr_html_()
-        summary: pd.DataFrame = self.summary
+        summary: dict[URIRef, pd.DataFrame] = self.summary
-        if summary.empty:
+        def _short_name_of_graph(named_graph: URIRef) -> str:
+            return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
+        if not summary:
             summary_text = "<br /><strong>Graph is empty</strong><br />"
         else:
+            all_types = set().union(
+                *[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
+            )
             summary_text = (
                 "<br /><strong>Overview</strong>:"  # type: ignore
-                f"<ul><li>{len(summary)} types</strong></li>"
-                f"<li>{sum(summary['Occurrence'])} instances</strong></li></ul>"
-                f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}"  # type: ignore[operator]
+                f"<ul><li>{len(summary)} named graphs</strong></li>"
+                f"<li>Total of {len(all_types)} unique types</strong></li>"
             )
-        if self.multi_type_instances:
-            summary_text += "<br><strong>Multi value instances detected! Loading could have issues!</strong></br>"  # type: ignore
+            for named_graph, table in summary.items():
+                summary_text += (
+                    f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
+                    " graph</strong></li>"
+                )
+            summary_text += "</ul>"
+            for named_graph, table in summary.items():
+                summary_text += (
+                    f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
+                    f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}"  # type: ignore[operator]
+                )
-        return f"{summary_text}" f"{provenance}"
+        for named_graph, multi_value_instances in self.multi_type_instances.items():
+            if multi_value_instances:
+                summary_text += (
+                    f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
+                    "graph! Loading could have issues!</strong></br>"
+                )
+        return f"{summary_text}{provenance}"
     def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
         """Shorten summary to top 5 types by occurrence."""
@@ -490,3 +681,7 @@ class NeatGraphStore:
         shorter_summary.index = cast(Index, indexes)
         return shorter_summary
+    @property
+    def named_graphs(self) -> list[URIRef]:
+        return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]

cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.106.0py3-none-any.whl → 0.108.0py3-none-any.whl