PyPI - cognite-neat - Versions diffs - 0.110.0__py3-none-any.whl → 0.111.1__py3-none-any.whl - Mend

cognite-neat 0.110.0py3-none-any.whl → 0.111.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (53) hide show

cognite/neat/_alpha.py +6 -0
cognite/neat/_client/_api/schema.py +26 -0
cognite/neat/_client/data_classes/schema.py +1 -1
cognite/neat/_constants.py +4 -1
cognite/neat/_graph/extractors/__init__.py +4 -0
cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
cognite/neat/_graph/extractors/_dict.py +102 -0
cognite/neat/_graph/extractors/_dms.py +27 -40
cognite/neat/_graph/extractors/_dms_graph.py +30 -3
cognite/neat/_graph/extractors/_raw.py +67 -0
cognite/neat/_graph/loaders/_base.py +20 -4
cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
cognite/neat/_graph/queries/_base.py +137 -43
cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
cognite/neat/_issues/_factory.py +9 -1
cognite/neat/_issues/errors/__init__.py +2 -0
cognite/neat/_issues/errors/_external.py +7 -0
cognite/neat/_issues/warnings/user_modeling.py +12 -0
cognite/neat/_rules/_constants.py +3 -0
cognite/neat/_rules/analysis/_base.py +29 -50
cognite/neat/_rules/exporters/_rules2excel.py +1 -1
cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
cognite/neat/_rules/models/_base_rules.py +0 -2
cognite/neat/_rules/models/data_types.py +7 -0
cognite/neat/_rules/models/dms/_exporter.py +9 -8
cognite/neat/_rules/models/dms/_rules.py +26 -1
cognite/neat/_rules/models/dms/_rules_input.py +5 -1
cognite/neat/_rules/models/dms/_validation.py +101 -1
cognite/neat/_rules/models/entities/_single_value.py +8 -3
cognite/neat/_rules/models/entities/_wrapped.py +2 -2
cognite/neat/_rules/models/information/_rules_input.py +1 -0
cognite/neat/_rules/models/information/_validation.py +64 -17
cognite/neat/_rules/transformers/_converters.py +7 -2
cognite/neat/_session/_base.py +2 -0
cognite/neat/_session/_explore.py +39 -0
cognite/neat/_session/_inspect.py +25 -6
cognite/neat/_session/_read.py +67 -3
cognite/neat/_session/_set.py +7 -1
cognite/neat/_session/_state.py +6 -0
cognite/neat/_session/_to.py +115 -8
cognite/neat/_store/_graph_store.py +8 -4
cognite/neat/_utils/rdf_.py +34 -3
cognite/neat/_utils/text.py +72 -4
cognite/neat/_utils/upload.py +2 -0
cognite/neat/_version.py +2 -2
{cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/METADATA +1 -1
{cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/RECORD +53 -50
{cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/LICENSE +0 -0
{cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/WHEEL +0 -0
{cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/entry_points.txt +0 -0

cognite/neat/_session/_inspect.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import difflib
-from collections.abc import Callable
+from collections.abc import Callable, Set
 from typing import Literal, overload
 import pandas as pd
@@ -85,11 +85,13 @@ class InspectIssues:
     def __init__(self, state: SessionState) -> None:
         self._state = state
+        self._max_display = 50
     @overload
     def __call__(
         self,
         search: str | None = None,
+        include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
         return_dataframe: Literal[True] = (False if IN_NOTEBOOK else True),  # type: ignore[assignment]
     ) -> pd.DataFrame: ...
@@ -97,12 +99,14 @@ class InspectIssues:
     def __call__(
         self,
         search: str | None = None,
+        include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
         return_dataframe: Literal[False] = (False if IN_NOTEBOOK else True),  # type: ignore[assignment]
     ) -> None: ...
     def __call__(
         self,
         search: str | None = None,
+        include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
         return_dataframe: bool = (False if IN_NOTEBOOK else True),  # type: ignore[assignment]
     ) -> pd.DataFrame | None:
         """Returns the issues of the current data model."""
@@ -113,6 +117,13 @@ class InspectIssues:
         elif issues is None:
             self._print("No issues found.")
             return pd.DataFrame() if return_dataframe else None
+        include_set = {include} if isinstance(include, str) else include
+        if "all" in include_set:
+            include_set = {"errors", "warning"}
+        if "warning" not in include_set:
+            issues = issues.errors
+        if "errors" not in include_set:
+            issues = issues.warnings
         if issues and search is not None:
             unique_types = {type(issue).__name__ for issue in issues}
@@ -120,18 +131,21 @@ class InspectIssues:
             issues = IssueList([issue for issue in issues if type(issue).__name__ in closest_match])
         issue_str = "\n".join(
-            [f"  * **{type(issue).__name__}**: {issue.as_message(include_type=False)}" for issue in issues]
+            [
+                f"  * **{type(issue).__name__}**: {issue.as_message(include_type=False)}"
+                for issue in issues[: self._max_display]
+            ]
+            + ([] if len(issues) <= 50 else [f"  * ... {len(issues) - self._max_display} more"])
         )
         markdown_str = f"### {len(issues)} issues found\n\n{issue_str}"
         if IN_NOTEBOOK:
             from IPython.display import Markdown, display
             display(Markdown(markdown_str))
         elif RICH_AVAILABLE:
-            from rich import print
+            from rich import print as rprint
-            print(RichMarkdown(markdown_str))
+            rprint(RichMarkdown(markdown_str))
         if return_dataframe:
             return issues.to_pandas()
@@ -170,6 +184,7 @@ class InspectOutcome:
 class InspectUploadOutcome:
     def __init__(self, get_last_outcome: Callable[[], UploadResultList]) -> None:
         self._get_last_outcome = get_last_outcome
+        self._max_display = 50
     @staticmethod
     def _as_set(value: str | list[str] | None) -> set[str] | None:
@@ -223,7 +238,7 @@ class InspectUploadOutcome:
             from IPython.display import Markdown, display
             lines: list[str] = []
-            for item in outcome:
+            for line_no, item in enumerate(outcome):
                 lines.append(f"### {item.name}")
                 if unique_errors := set(item.error_messages):
                     lines.append("#### Errors")
@@ -255,6 +270,10 @@ class InspectUploadOutcome:
                         else:
                             lines.append(f"  * {value}")
+                if line_no >= self._max_display:
+                    lines.append(f"### ... {len(outcome) - self._max_display} more")
+                    break
             display(Markdown("\n".join(lines)))
         if return_dataframe:

cognite/neat/_session/_read.py CHANGED Viewed

@@ -153,6 +153,45 @@ class CDFReadAPI(BaseReadAPI):
         )
         return self._state.write_graph(extractor)
+    def raw(
+        self,
+        db_name: str,
+        table_name: str,
+        type: str | None = None,
+        foreign_keys: str | SequenceNotStr[str] | None = None,
+        unpack_json: bool = False,
+        str_to_ideal_type: bool = False,
+    ) -> IssueList:
+        """Reads a raw table from CDF to the knowledge graph.
+        Args:
+            db_name: The name of the database
+            table_name: The name of the table, this will be assumed to be the type of the instances.
+            type: The type of instances in the table. If None, the table name will be used.
+            foreign_keys: The name of the columns that are foreign keys. If None, no foreign keys are used.
+            unpack_json: If True, the JSON objects will be unpacked into the graph.
+            str_to_ideal_type: If True, the string values will be converted to ideal types.
+        Returns:
+            IssueList: A list of issues that occurred during the extraction.
+        Example:
+            ```python
+            neat.read.cdf.raw("my_db", "my_table", "Asset")
+            ```
+        """
+        extractor = extractors.RAWExtractor(
+            self._get_client,
+            db_name=db_name,
+            table_name=table_name,
+            table_type=type,
+            foreign_keys=foreign_keys,
+            unpack_json=unpack_json,
+            str_to_ideal_type=str_to_ideal_type,
+        )
+        return self._state.instances.store.write(extractor)
 @session_class_wrapper
 class CDFClassicAPI(BaseReadAPI):
@@ -229,6 +268,8 @@ class CDFClassicAPI(BaseReadAPI):
         identifier: Literal["id", "externalId"] = "id",
         reference_timeseries: bool = False,
         reference_files: bool = False,
+        unpack_metadata: bool = False,
+        skip_sequence_rows: bool = False,
     ) -> IssueList:
         namespace = CLASSIC_CDF_NAMESPACE
         extractor = extractors.ClassicGraphExtractor(
@@ -238,7 +279,11 @@ class CDFClassicAPI(BaseReadAPI):
             namespace=namespace,
             prefix="Classic",
             identifier=identifier,
+            unpack_metadata=unpack_metadata,
+            skip_sequence_rows=skip_sequence_rows,
         )
+        self._state.instances.neat_prefix_by_predicate_uri.update(extractor.neat_prefix_by_predicate_uri)
+        self._state.instances.neat_prefix_by_type_uri.update(extractor.neat_prefix_by_type_uri)
         extract_issues = self._state.write_graph(extractor)
         if identifier == "externalId":
             self._state.quoted_source_identifiers = True
@@ -361,6 +406,9 @@ class CSVReadAPI(BaseReadAPI):
     """
     def __call__(self, io: Any, type: str, primary_key: str) -> None:
+        warnings.filterwarnings("default")
+        AlphaFlags.csv_read.warn()
         engine = import_engine()
         engine.set.format = "csv"
         engine.set.file = NeatReader.create(io).materialize_path()
@@ -416,6 +464,9 @@ class XMLReadAPI(BaseReadAPI):
             - remove associations between nodes that do not exist in the extracted graph
             - remove edges to nodes that do not exist in the extracted graph
         """
+        warnings.filterwarnings("default")
+        AlphaFlags.dexpi_read.warn()
         path = NeatReader.create(io).materialize_path()
         engine = import_engine()
         engine.set.format = "dexpi"
@@ -467,6 +518,9 @@ class XMLReadAPI(BaseReadAPI):
             - remove unused attributes
             - remove edges to nodes that do not exist in the extracted graph
         """
+        warnings.filterwarnings("default")
+        AlphaFlags.aml_read.warn()
         path = NeatReader.create(io).materialize_path()
         engine = import_engine()
         engine.set.format = "aml"
@@ -518,6 +572,9 @@ class RDFReadAPI(BaseReadAPI):
             neat.read.rdf.ontology("url_or_path_to_owl_source")
             ```
         """
+        warnings.filterwarnings("default")
+        AlphaFlags.ontology_read.warn()
         reader = NeatReader.create(io)
         importer = importers.OWLImporter.from_file(reader.materialize_path(), source_name=f"file {reader!s}")
         return self._state.rule_import(importer)
@@ -533,10 +590,18 @@ class RDFReadAPI(BaseReadAPI):
             neat.read.rdf.imf("url_or_path_to_imf_source")
             ```
         """
+        warnings.filterwarnings("default")
+        AlphaFlags.imf_read.warn()
         reader = NeatReader.create(io)
         importer = importers.IMFImporter.from_file(reader.materialize_path(), source_name=f"file {reader!s}")
         return self._state.rule_import(importer)
+    def instances(self, io: Any) -> IssueList:
+        reader = NeatReader.create(io)
+        self._state.instances.store.write(extractors.RdfFileExtractor(reader.materialize_path()))
+        return IssueList()
     def __call__(
         self,
         io: Any,
@@ -560,9 +625,8 @@ class RDFReadAPI(BaseReadAPI):
                 raise ValueError(f"Expected ontology, imf types or instances, got {source}")
         elif type == "instances":
-            reader = NeatReader.create(io)
-            self._state.instances.store.write(extractors.RdfFileExtractor(reader.materialize_path()))
-            return IssueList()
+            return self.instances(io)
         else:
             raise NeatSessionError(f"Expected data model or instances, got {type}")

cognite/neat/_session/_set.py CHANGED Viewed

@@ -23,14 +23,20 @@ class SetAPI:
         self._verbose = verbose
         self.instances = SetInstances(state, verbose)
-    def data_model_id(self, new_model_id: dm.DataModelId | tuple[str, str, str]) -> IssueList:
+    def data_model_id(self, new_model_id: dm.DataModelId | tuple[str, str, str], name: str | None = None) -> IssueList:
         """Sets the data model ID of the latest verified data model. Set the data model id as a tuple of strings
         following the template (<data_model_space>, <data_model_name>, <data_model_version>).
+        Args:
+            new_model_id (dm.DataModelId | tuple[str, str, str]): The new data model id.
+            name (str, optional): The display name of the data model. If not set, the external ID will be used
+                to generate the name.
         Example:
             Set a new data model id:
             ```python
             neat.set.data_model_id(("my_data_model_space", "My_Data_Model", "v1"))
+            neat.set.data_model_id(("my_data_model_space", "MyDataModel", "v1"), name="My Data Model")
             ```
         """
         if self._state.rule_store.empty:

cognite/neat/_session/_state.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from pathlib import Path
 from typing import Literal, cast
+from rdflib import URIRef
 from cognite.neat._client import NeatClient
 from cognite.neat._graph.extractors import KnowledgeGraphExtractor
 from cognite.neat._issues import IssueList
@@ -74,6 +76,10 @@ class InstancesState:
         self.storage_path = storage_path
         self.issue_lists = IssueList()
         self.outcome = UploadResultList()
+        # These contain prefixes added by Neat at the extraction stage.
+        # We store them such that they can be removed in the load stage.
+        self.neat_prefix_by_predicate_uri: dict[URIRef, str] = {}
+        self.neat_prefix_by_type_uri: dict[URIRef, str] = {}
         # Ensure that error handling is done in the constructor
         self.store: NeatGraphStore = _session_method_wrapper(self._create_store, "NeatSession")()

cognite/neat/_session/_to.py CHANGED Viewed

@@ -10,7 +10,7 @@ from cognite.client.data_classes.data_modeling import DataModelIdentifier
 from cognite.neat._alpha import AlphaFlags
 from cognite.neat._constants import COGNITE_MODELS
 from cognite.neat._graph import loaders
-from cognite.neat._issues import IssueList, catch_issues
+from cognite.neat._issues import IssueList, NeatIssue, catch_issues
 from cognite.neat._rules import exporters
 from cognite.neat._rules._constants import PATTERNS
 from cognite.neat._rules._shared import VerifiedRules
@@ -35,6 +35,32 @@ class ToAPI:
         self._state = state
         self._verbose = verbose
         self.cdf = CDFToAPI(state, verbose)
+        self._python = ToPythonAPI(state, verbose)
+    def ontology(self, io: Any) -> None:
+        """Export the data model to ontology.
+        Args:
+            io: The file path to file-like object to write the session to.
+        Example:
+            Export the session to a file
+            ```python
+            ontology_file_name = "neat_session.ttl"
+            neat.to.ontology(ontology_file_name)
+            ```
+        """
+        warnings.filterwarnings("default")
+        AlphaFlags.to_ontology.warn()
+        filepath = Path(io)
+        if filepath.suffix != ".ttl":
+            warnings.warn("File extension is not .ttl, adding it to the file name", stacklevel=2)
+            filepath = filepath.with_suffix(".ttl")
+        exporter = exporters.OWLExporter()
+        self._state.rule_store.export_to_file(exporter, Path(io))
+        return None
     def excel(
         self,
@@ -209,6 +235,7 @@ class ToAPI:
             neat.to.yaml(your_folder_name, format="toolkit")
             ```
         """
         if format == "neat":
             exporter = exporters.YAMLExporter()
             if io is None:
@@ -270,28 +297,41 @@ class CDFToAPI:
             ```
         """
+        return self._instances(instance_space=space, space_from_property=space_property)
+    def _instances(
+        self,
+        instance_space: str | None = None,
+        space_from_property: str | None = None,
+        use_source_space: bool = False,
+    ) -> UploadResultList:
         if not self._state.client:
             raise NeatSessionError("No CDF client provided!")
         client = self._state.client
-        space = space or f"{self._state.rule_store.last_verified_dms_rules.metadata.space}_instances"
+        dms_rules = self._state.rule_store.last_verified_dms_rules
+        instance_space = instance_space or f"{dms_rules.metadata.space}_instances"
-        if space and space == self._state.rule_store.last_verified_dms_rules.metadata.space:
+        if instance_space and instance_space == dms_rules.metadata.space:
             raise NeatSessionError("Space for instances must be different from the data model space.")
-        elif not PATTERNS.space_compliance.match(str(space)):
+        elif not PATTERNS.space_compliance.match(str(instance_space)):
             raise NeatSessionError("Please provide a valid space name. {PATTERNS.space_compliance.pattern}")
-        if not client.data_modeling.spaces.retrieve(space):
-            client.data_modeling.spaces.apply(dm.SpaceApply(space=space))
+        if not client.data_modeling.spaces.retrieve(instance_space):
+            client.data_modeling.spaces.apply(dm.SpaceApply(space=instance_space))
         loader = loaders.DMSLoader(
             self._state.rule_store.last_verified_dms_rules,
             self._state.rule_store.last_verified_information_rules,
             self._state.instances.store,
-            instance_space=space,
+            instance_space=instance_space,
             client=client,
+            space_property=space_from_property,
+            use_source_space=use_source_space,
             # In case urllib.parse.quote() was run on the extraction, we need to run
             # urllib.parse.unquote() on the load.
-            unquote_external_ids=self._state.quoted_source_identifiers,
+            unquote_external_ids=True,
+            neat_prefix_by_predicate_uri=self._state.instances.neat_prefix_by_predicate_uri,
+            neat_prefix_by_type_uri=self._state.instances.neat_prefix_by_type_uri,
         )
         result = loader.load_into_cdf(client)
@@ -334,3 +374,70 @@ class CDFToAPI:
         result = self._state.rule_store.export_to_cdf(exporter, self._state.client, dry_run)
         print("You can inspect the details with the .inspect.outcome.data_model(...) method.")
         return result
+@session_class_wrapper
+class ToPythonAPI:
+    """API used to write the contents of a NeatSession to Python objects"""
+    def __init__(self, state: SessionState, verbose: bool) -> None:
+        self._state = state
+        self._verbose = verbose
+    def instances(
+        self,
+        instance_space: str | None = None,
+        space_from_property: str | None = None,
+        use_source_space: bool = False,
+    ) -> tuple[list[dm.InstanceApply], IssueList]:
+        """Export the verified DMS instances to Python objects.
+        Args:
+            instance_space: The name of the instance space to use. Defaults to None.
+            space_from_property: This is an alternative to the 'instance_space' argument. If provided,
+                the space will be set to the value of the property with the given name for each instance.
+                If the property is not found, the 'instance_space' argument will be used. Defaults to None.
+            use_source_space: If True, the instance space will be set to the source space of the instance.
+                This is only relevant if the instances were extracted from CDF data models. Defaults to False.
+        Returns:
+            list[dm.InstanceApply]: The instances as Python objects.
+        Example:
+            Export instances to Python objects
+            ```python
+            instances = neat.to._python.instances()
+            ```
+            Export instances to Python objects using the `dataSetId` property as the space
+            ```python
+            instances = neat.to._python.instances(space_from_property="dataSetId")
+            ```
+        """
+        dms_rules = self._state.rule_store.last_verified_dms_rules
+        instance_space = instance_space or f"{dms_rules.metadata.space}_instances"
+        if instance_space and instance_space == dms_rules.metadata.space:
+            raise NeatSessionError("Space for instances must be different from the data model space.")
+        elif not PATTERNS.space_compliance.match(str(instance_space)):
+            raise NeatSessionError(f"Please provide a valid space name. {PATTERNS.space_compliance.pattern}")
+        loader = loaders.DMSLoader(
+            self._state.rule_store.last_verified_dms_rules,
+            self._state.rule_store.last_verified_information_rules,
+            self._state.instances.store,
+            instance_space=instance_space,
+            space_property=space_from_property,
+            use_source_space=use_source_space,
+            unquote_external_ids=True,
+            neat_prefix_by_predicate_uri=self._state.instances.neat_prefix_by_predicate_uri,
+            neat_prefix_by_type_uri=self._state.instances.neat_prefix_by_type_uri,
+        )
+        issue_list = IssueList()
+        instances: list[dm.InstanceApply] = []
+        for item in loader.load(stop_on_exception=False):
+            if isinstance(item, dm.InstanceApply):
+                instances.append(item)
+            elif isinstance(item, NeatIssue):
+                issue_list.append(item)
+        return instances, issue_list

cognite/neat/_store/_graph_store.py CHANGED Viewed

@@ -3,7 +3,7 @@ import warnings
 from collections.abc import Iterable
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast, overload
+from typing import Any, cast, overload
 from zipfile import ZipExtFile
 import pandas as pd
@@ -239,14 +239,18 @@ class NeatGraphStore:
         class_uri: URIRef,
         named_graph: URIRef | None = None,
         property_renaming_config: dict[URIRef, str] | None = None,
-    ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
+        remove_uri_namespace: bool = True,
+    ) -> Iterable[tuple[URIRef, dict[str | InstanceType, list[Any]]]]:
         named_graph = named_graph or self.default_named_graph
-        instance_ids = self.queries.list_instances_ids_of_class(class_uri, named_graph=named_graph)
+        instance_ids = self.queries.list_instances_ids(class_uri, named_graph=named_graph)
         for instance_id in instance_ids:
             if res := self.queries.describe(
-                instance_id=instance_id, instance_type=class_uri, property_renaming_config=property_renaming_config
+                instance_id=instance_id,
+                instance_type=class_uri,
+                property_renaming_config=property_renaming_config,
+                remove_uri_namespace=remove_uri_namespace,
             ):
                 yield res

cognite/neat/_utils/rdf_.py CHANGED Viewed

@@ -7,6 +7,8 @@ from pydantic import HttpUrl, TypeAdapter, ValidationError
 from rdflib import Graph, Namespace, URIRef
 from rdflib import Literal as RdfLiteral
+from cognite.neat._constants import SPACE_URI_PATTERN
 Triple: TypeAlias = tuple[URIRef, URIRef, RdfLiteral | URIRef]
@@ -100,12 +102,41 @@ def get_namespace(URI: URIRef, special_separator: str = "#_") -> str:
     str
         Entity id without namespace
     """
+    return split_uri(URI, special_separator)[0]
+def namespace_as_space(namespace: str) -> str | None:
+    if match := SPACE_URI_PATTERN.match(namespace):
+        return match.group("space")
+    return None
+def split_uri(URI: URIRef, special_separator: str = "#_") -> tuple[str, str]:
+    """Splits URI into namespace and entity name
+    Parameters
+    ----------
+    URI : URIRef
+        URI of an entity
+    special_separator : str
+        Special separator to use instead of # or / if present in URI
+        Set by default to "#_" which covers special client use case
+    Returns
+    -------
+    tuple[str, str]
+        Tuple of namespace and entity name
+    """
     if special_separator in URI:
-        return URI.split(special_separator)[0] + special_separator
+        namespace, rest = URI.split(special_separator, maxsplit=1)
+        namespace += special_separator
     elif "#" in URI:
-        return URI.split("#")[0] + "#"
+        namespace, rest = URI.split("#", maxsplit=1)
+        namespace += "#"
     else:
-        return "/".join(URI.split("/")[:-1]) + "/"
+        namespace, rest = URI.rsplit("/", maxsplit=1)
+        namespace += "/"
+    return namespace, rest
 def as_neat_compliant_uri(uri: URIRef) -> URIRef:

cognite/neat/_utils/text.py CHANGED Viewed

@@ -1,7 +1,44 @@
 import re
-from collections.abc import Collection
+import urllib.parse
+from collections.abc import Collection, Set
+from re import Pattern
 from typing import Any
+from cognite.neat._rules._constants import get_reserved_words
+PREPOSITIONS = frozenset(
+    {
+        "in",
+        "on",
+        "at",
+        "by",
+        "for",
+        "with",
+        "about",
+        "against",
+        "between",
+        "into",
+        "through",
+        "during",
+        "before",
+        "after",
+        "above",
+        "below",
+        "to",
+        "from",
+        "up",
+        "down",
+        "out",
+        "off",
+        "over",
+        "under",
+        "again",
+        "further",
+        "then",
+        "once",
+    }
+)
 def to_camel_case(string: str) -> str:
     """Convert snake_case_name to camelCaseName.
@@ -127,6 +164,18 @@ def to_snake_case(string: str) -> str:
     return "_".join(map(str.lower, words))
+def to_words(string: str) -> str:
+    """Converts snake_case camelCase or PascalCase to words."""
+    return to_snake_case(string).replace("_", " ")
+def title(text: str, skip_words: Set[str] = PREPOSITIONS) -> str:
+    """Converts text to title case, skipping prepositions."""
+    words = (word.lower() for word in text.split())
+    titled_words = (word.capitalize() if word not in skip_words else word for word in words)
+    return " ".join(titled_words)
 def sentence_or_string_to_camel(string: str) -> str:
     # Could be a combination of kebab and pascal/camel case
     if " " in string:
@@ -159,7 +208,8 @@ def humanize_collection(collection: Collection[Any], /, *, sort: bool = True) ->
 class NamingStandardization:
-    _clean_pattern = re.compile(r"[^a-zA-Z0-9_]+")
+    _letter_number_underscore = re.compile(r"[^a-zA-Z0-9_]+")
+    _letter_number_underscore_hyphen = re.compile(r"[^a-zA-Z0-9_-]+")
     _multi_underscore_pattern = re.compile(r"_+")
     _start_letter_pattern = re.compile(r"^[a-zA-Z]")
@@ -182,6 +232,24 @@ class NamingStandardization:
         return to_camel_case(clean)
     @classmethod
-    def _clean_string(cls, raw: str) -> str:
-        raw = cls._clean_pattern.sub("_", raw)
+    def standardize_space_str(cls, raw: str) -> str:
+        clean = cls._clean_string(raw, cls._letter_number_underscore_hyphen)
+        if not cls._start_letter_pattern.match(clean):
+            clean = f"sp_{clean}"
+        if clean in set(get_reserved_words("space")):
+            clean = f"my_{clean}"
+        if len(clean) > 43:
+            clean = clean[:43]
+        if not (clean[-1].isalnum()) and len(clean) == 43:
+            clean = f"{clean[:-1]}x"
+        elif not clean[-1].isalnum():
+            clean = f"{clean}x"
+        if not clean:
+            raise ValueError("Space name must contain at least one letter.")
+        return to_snake_case(clean)
+    @classmethod
+    def _clean_string(cls, raw: str, clean_pattern: Pattern[str] = _letter_number_underscore) -> str:
+        raw = urllib.parse.unquote(raw)
+        raw = clean_pattern.sub("_", raw)
         return cls._multi_underscore_pattern.sub("_", raw)

cognite/neat/_utils/upload.py CHANGED Viewed

@@ -55,6 +55,7 @@ class UploadResult(UploadResultCore, Generic[T_ID]):
     failed_upserted: set[T_ID] = field(default_factory=set)
     failed_changed: set[T_ID] = field(default_factory=set)
     failed_deleted: set[T_ID] = field(default_factory=set)
+    failed_items: list = field(default_factory=list)
     @property
     def failed(self) -> int:
@@ -129,4 +130,5 @@ class UploadResult(UploadResultCore, Generic[T_ID]):
             failed_upserted=self.failed_upserted.union(other.failed_upserted),
             failed_changed=self.failed_changed.union(other.failed_changed),
             failed_deleted=self.failed_deleted.union(other.failed_deleted),
+            failed_items=self.failed_items + other.failed_items,
         )

cognite/neat/_version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = "0.110.0"
-__engine__ = "^2.0.3"
+__version__ = "0.111.1"
+__engine__ = "^2.0.4"

{cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: cognite-neat
-Version: 0.110.0
+Version: 0.111.1
 Summary: Knowledge graph transformation
 License: Apache-2.0
 Author: Nikola Vasiljevic

cognite-neat 0.110.0__py3-none-any.whl → 0.111.1__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.110.0py3-none-any.whl → 0.111.1py3-none-any.whl