PyPI - cognite-neat - Versions diffs - 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl - Mend

cognite-neat 0.105.2py3-none-any.whl → 0.107.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (54) hide show

cognite/neat/_config.py +6 -260
cognite/neat/_graph/extractors/__init__.py +5 -1
cognite/neat/_graph/extractors/_base.py +32 -0
cognite/neat/_graph/extractors/_classic_cdf/_base.py +42 -16
cognite/neat/_graph/extractors/_classic_cdf/_classic.py +78 -8
cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +10 -3
cognite/neat/_graph/extractors/_dms.py +48 -14
cognite/neat/_graph/extractors/_dms_graph.py +149 -0
cognite/neat/_graph/extractors/_rdf_file.py +32 -5
cognite/neat/_graph/loaders/_rdf2dms.py +119 -20
cognite/neat/_graph/queries/_construct.py +1 -1
cognite/neat/_graph/transformers/__init__.py +5 -0
cognite/neat/_graph/transformers/_base.py +13 -9
cognite/neat/_graph/transformers/_classic_cdf.py +141 -44
cognite/neat/_graph/transformers/_rdfpath.py +4 -4
cognite/neat/_graph/transformers/_value_type.py +54 -44
cognite/neat/_issues/warnings/_external.py +1 -1
cognite/neat/_rules/analysis/_base.py +1 -1
cognite/neat/_rules/analysis/_information.py +14 -13
cognite/neat/_rules/catalog/__init__.py +1 -0
cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
cognite/neat/_rules/importers/_dms2rules.py +7 -5
cognite/neat/_rules/importers/_rdf/_inference2rules.py +5 -3
cognite/neat/_rules/models/_base_rules.py +0 -12
cognite/neat/_rules/models/_types.py +5 -0
cognite/neat/_rules/models/dms/_rules.py +50 -2
cognite/neat/_rules/models/information/_rules.py +48 -5
cognite/neat/_rules/models/information/_rules_input.py +1 -1
cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
cognite/neat/_rules/transformers/__init__.py +4 -0
cognite/neat/_rules/transformers/_converters.py +209 -62
cognite/neat/_rules/transformers/_mapping.py +3 -2
cognite/neat/_session/_base.py +8 -13
cognite/neat/_session/_inspect.py +6 -2
cognite/neat/_session/_mapping.py +22 -13
cognite/neat/_session/_prepare.py +9 -57
cognite/neat/_session/_read.py +96 -29
cognite/neat/_session/_set.py +9 -0
cognite/neat/_session/_state.py +10 -1
cognite/neat/_session/_to.py +51 -15
cognite/neat/_session/exceptions.py +7 -3
cognite/neat/_store/_graph_store.py +85 -39
cognite/neat/_store/_rules_store.py +22 -0
cognite/neat/_utils/auth.py +2 -0
cognite/neat/_utils/collection_.py +32 -11
cognite/neat/_version.py +1 -1
{cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -8
{cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +54 -52
{cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
{cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
{cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0

cognite/neat/_config.py CHANGED Viewed

@@ -1,265 +1,11 @@
-import json
-import logging
-import os
-import shutil
-import sys
-from pathlib import Path
-from typing import Any, Literal, cast
+from typing import Literal
-import yaml
-from pydantic import BaseModel, Field, model_validator
-from yaml import safe_load
+from pydantic import BaseModel
-from cognite.neat._constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
-from cognite.neat._utils.auth import EnvironmentVariables
-if sys.version_info >= (3, 11):
-    from enum import StrEnum
-    from typing import Self
-else:
-    from backports.strenum import StrEnum
-    from typing_extensions import Self
+class NeatConfig(BaseModel, validate_assignment=True):
+    progress_bar: Literal["tqdm", "rich", "tqdm-notebook", "infer"] | None = "infer"
+    use_iterate_bar_threshold: int | None = 500
-LOG_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
-LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
-class RulesStoreType(StrEnum):
-    """Rules Store type"""
-    CDF = "cdf"
-    FILE = "file"
-    URL = "url"
-    GOOGLE_SHEET = "google_sheet"
-class WorkflowsStoreType(StrEnum):
-    """Workflows Store type"""
-    CDF = "cdf"
-    FILE = "file"
-    URL = "url"
-class Config(BaseModel, arbitrary_types_allowed=True):
-    workflows_store_type: WorkflowsStoreType = WorkflowsStoreType.FILE
-    data_store_path: Path = Field(default_factory=lambda: Path.cwd() / "data")
-    workflow_downloader_filter: list[str] | None = Field(
-        description="List of workflow names+tags to filter on when downloading workflows from CDF. "
-        "Example name:workflow_name=version,tag:tag_name",
-        default=None,
-    )
-    cdf_auth_config: EnvironmentVariables = Field(default_factory=EnvironmentVariables.default)
-    cdf_default_dataset_id: int = 0
-    load_examples: bool = True
-    log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
-    log_format: str = LOG_FORMAT
-    download_workflows_from_cdf: bool = Field(
-        default=False,
-        description="Downloads all workflows from CDF automatically and stores them locally",
-    )
-    stop_on_error: bool = False
-    @model_validator(mode="before")
-    def backwards_compatible(cls, data: Any):
-        if not isinstance(data, dict):
-            return data
-        if "cdf_client" in data:
-            cdf_client = data["cdf_client"]
-            if isinstance(cdf_client, dict):
-                if "base_url" in cdf_client:
-                    base_url = cdf_client["base_url"]
-                    cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
-                else:
-                    base_url, cluster = "Missing", "Missing"
-                if "scopes" in cdf_client:
-                    scopes = cdf_client["scopes"]
-                    if isinstance(scopes, list):
-                        scopes = ",".join(scopes)
-                else:
-                    scopes = "Missing"
-                data["cdf_auth_config"] = EnvironmentVariables(
-                    CDF_PROJECT=cdf_client.get("project", "Missing"),
-                    CDF_CLUSTER=cluster,
-                    CDF_URL=base_url,
-                    IDP_CLIENT_ID=cdf_client.get("client_id", "Missing"),
-                    IDP_CLIENT_SECRET=cdf_client.get("client_secret", "Missing"),
-                    IDP_TOKEN_URL=cdf_client.get("token_url", "Missing"),
-                    IDP_SCOPES=scopes,
-                    CDF_TIMEOUT=int(cdf_client.get("timeout", 60)),
-                    CDF_MAX_WORKERS=int(cdf_client.get("max_workers", 3)),
-                )
-        return data
-    def as_legacy_config(
-        self,
-    ) -> dict[str, Any]:
-        config: dict[str, Any] = {}
-        config["workflows_store_type"] = self.workflows_store_type
-        config["data_store_path"] = str(self.data_store_path)
-        config["workflows_downloader_filter"] = self.workflow_downloader_filter
-        config["cdf_client"] = {}
-        if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
-            config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
-        if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
-            config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
-        if self.cdf_auth_config.CDF_URL:
-            config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
-        if self.cdf_auth_config.IDP_CLIENT_ID:
-            config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
-        if self.cdf_auth_config.IDP_CLIENT_SECRET:
-            config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
-        if self.cdf_auth_config.IDP_TOKEN_URL:
-            config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
-        if self.cdf_auth_config.IDP_SCOPES:
-            config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
-        if self.cdf_auth_config.CDF_TIMEOUT:
-            config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
-        if self.cdf_auth_config.CDF_MAX_WORKERS:
-            config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
-        config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
-        config["load_examples"] = self.load_examples
-        config["log_level"] = self.log_level
-        config["log_format"] = self.log_format
-        config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
-        config["stop_on_error"] = self.stop_on_error
-        return config
-    @property
-    def _dir_suffix(self) -> str:
-        is_test_running = "pytest" in sys.modules
-        if is_test_running:
-            # Todo change the below to f"-{os.getpid()}" when all tests supports parallel execution.
-            return ""
-        return ""
-    @property
-    def rules_store_path(self) -> Path:
-        return self.data_store_path / f"rules{self._dir_suffix}"
-    @property
-    def workflows_store_path(self) -> Path:
-        return self.data_store_path / f"workflows{self._dir_suffix}"
-    @property
-    def source_graph_path(self) -> Path:
-        return self.data_store_path / f"source-graphs{self._dir_suffix}"
-    @property
-    def staging_path(self) -> Path:
-        return self.data_store_path / f"staging{self._dir_suffix}"
-    @classmethod
-    def from_yaml(cls, filepath: Path) -> Self:
-        return cls(**safe_load(filepath.read_text()))
-    def to_yaml(self, filepath: Path):
-        # Parse as json to avoid Path and Enum objects
-        dump = json.loads(self.model_dump_json())
-        with filepath.open("w") as f:
-            yaml.safe_dump(dump, f)
-    @classmethod
-    def from_env(cls) -> Self:
-        missing = "Missing"
-        # This is to be backwards compatible with the old config
-        base_url: str | None = None
-        if "NEAT_CDF_BASE_URL" in os.environ:
-            base_url = os.environ["NEAT_CDF_BASE_URL"]
-        if isinstance(base_url, str):
-            cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
-        else:
-            cluster = missing
-        variables = EnvironmentVariables(
-            CDF_PROJECT=os.environ.get("NEAT_CDF_PROJECT", missing),
-            CDF_CLUSTER=cluster,
-            CDF_URL=base_url,
-            IDP_CLIENT_ID=os.environ.get("NEAT_CDF_CLIENT_ID"),
-            IDP_CLIENT_SECRET=os.environ.get("NEAT_CDF_CLIENT_SECRET"),
-            IDP_TOKEN_URL=os.environ.get("NEAT_CDF_TOKEN_URL"),
-            IDP_SCOPES=os.environ.get("NEAT_CDF_SCOPES"),
-            CDF_TIMEOUT=int(os.environ["NEAT_CDF_CLIENT_TIMEOUT"] if "NEAT_CDF_CLIENT_TIMEOUT" in os.environ else 60),
-            CDF_MAX_WORKERS=int(
-                os.environ["NEAT_CDF_CLIENT_MAX_WORKERS"] if "NEAT_CDF_CLIENT_MAX_WORKERS" in os.environ else 3
-            ),
-        )
-        if workflow_downloader_filter_value := os.environ.get("NEAT_WORKFLOW_DOWNLOADER_FILTER", None):
-            workflow_downloader_filter = workflow_downloader_filter_value.split(",")
-        else:
-            workflow_downloader_filter = None
-        return cls(
-            cdf_auth_config=variables,
-            workflows_store_type=os.environ.get(  # type: ignore[arg-type]
-                "NEAT_WORKFLOWS_STORE_TYPE", WorkflowsStoreType.FILE
-            ),
-            data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "_app/data")),
-            cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
-            log_level=cast(
-                Literal["ERROR", "WARNING", "INFO", "DEBUG"],
-                os.environ.get("NEAT_LOG_LEVEL", "INFO"),
-            ),
-            workflow_downloader_filter=workflow_downloader_filter,
-            load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
-        )
-def copy_examples_to_directory(config: Config):
-    """
-    Copier over all the examples to the target_data_directory,
-    without overwriting
-    Args:
-        target_data_dir : The target directory
-        suffix : The suffix to add to the directory names
-    """
-    print(f"Copying examples into {config.data_store_path}")
-    _copy_examples(EXAMPLE_RULES, config.rules_store_path)
-    _copy_examples(EXAMPLE_GRAPHS, config.source_graph_path)
-    _copy_examples(EXAMPLE_WORKFLOWS, config.workflows_store_path)
-    config.staging_path.mkdir(exist_ok=True, parents=True)
-def create_data_dir_structure(config: Config) -> None:
-    """
-    Create the data directory structure in empty directory
-    Args:
-        target_data_dir : The target directory
-        suffix : The suffix to add to the directory names
-    """
-    for path in (
-        config.rules_store_path,
-        config.source_graph_path,
-        config.staging_path,
-        config.workflows_store_path,
-    ):
-        path.mkdir(exist_ok=True, parents=True)
-def _copy_examples(source_dir: Path, target_dir: Path):
-    for current in source_dir.rglob("*"):
-        if current.is_dir():
-            continue
-        relative = current.relative_to(source_dir)
-        if not (target := target_dir / relative).exists():
-            target.parent.mkdir(exist_ok=True, parents=True)
-            shutil.copy2(current, target)
-def configure_logging(level: str = "DEBUG", log_format: str = LOG_FORMAT):
-    """Configure logging based on config."""
-    logging.basicConfig(format=log_format, level=logging.getLevelName(level), datefmt=LOG_DATE_FORMAT)
+GLOBAL_CONFIG = NeatConfig()

cognite/neat/_graph/extractors/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from cognite.neat._session.engine._interface import Extractor as EngineExtractor
-from ._base import BaseExtractor
+from ._base import BaseExtractor, KnowledgeGraphExtractor
 from ._classic_cdf._assets import AssetsExtractor
 from ._classic_cdf._classic import ClassicGraphExtractor
 from ._classic_cdf._data_sets import DataSetExtractor
@@ -12,6 +12,7 @@ from ._classic_cdf._sequences import SequencesExtractor
 from ._classic_cdf._timeseries import TimeSeriesExtractor
 from ._dexpi import DexpiExtractor
 from ._dms import DMSExtractor
+from ._dms_graph import DMSGraphExtractor
 from ._iodd import IODDExtractor
 from ._mock_graph_generator import MockGraphGenerator
 from ._rdf_file import RdfFileExtractor
@@ -21,11 +22,13 @@ __all__ = [
     "BaseExtractor",
     "ClassicGraphExtractor",
     "DMSExtractor",
+    "DMSGraphExtractor",
     "DataSetExtractor",
     "DexpiExtractor",
     "EventsExtractor",
     "FilesExtractor",
     "IODDExtractor",
+    "KnowledgeGraphExtractor",
     "LabelsExtractor",
     "MockGraphGenerator",
     "RdfFileExtractor",
@@ -51,6 +54,7 @@ TripleExtractors = (
     | ClassicGraphExtractor
     | DataSetExtractor
     | EngineExtractor
+    | DMSGraphExtractor
 )

cognite/neat/_graph/extractors/_base.py CHANGED Viewed

@@ -1,9 +1,17 @@
 from abc import abstractmethod
 from collections.abc import Iterable
+from typing import TYPE_CHECKING
+from rdflib import URIRef
+from cognite.neat._constants import DEFAULT_NAMESPACE
+from cognite.neat._rules.models import InformationRules
 from cognite.neat._shared import Triple
 from cognite.neat._utils.auxiliary import class_html_doc
+if TYPE_CHECKING:
+    from cognite.neat._store._provenance import Agent as ProvenanceAgent
 class BaseExtractor:
     """This is the base class for all extractors. It defines the interface that
@@ -24,3 +32,27 @@ class BaseExtractor:
     @classmethod
     def _repr_html_(cls) -> str:
         return class_html_doc(cls)
+class KnowledgeGraphExtractor(BaseExtractor):
+    """A knowledge graph extractor extracts triples with a schema"""
+    @abstractmethod
+    def get_information_rules(self) -> InformationRules:
+        """Returns the information rules that the extractor uses."""
+        raise NotImplementedError()
+    @property
+    def description(self) -> str:
+        return self.__doc__.strip().split("\n")[0] if self.__doc__ else "Missing"
+    @property
+    def source_uri(self) -> URIRef:
+        raise NotImplementedError
+    @property
+    def agent(self) -> "ProvenanceAgent":
+        """Provenance agent for the importer."""
+        from cognite.neat._store._provenance import Agent as ProvenanceAgent
+        return ProvenanceAgent(id_=DEFAULT_NAMESPACE[f"agent/{type(self).__name__}"])

cognite/neat/_graph/extractors/_classic_cdf/_base.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import re
 import sys
+import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Sequence, Set
 from datetime import datetime, timezone
@@ -9,13 +10,16 @@ from typing import Any, Generic, TypeVar
 from cognite.client import CogniteClient
 from cognite.client.data_classes._base import WriteableCogniteResource
+from cognite.client.exceptions import CogniteAPIError
 from pydantic import AnyHttpUrl, ValidationError
 from rdflib import RDF, XSD, Literal, Namespace, URIRef
 from cognite.neat._constants import DEFAULT_NAMESPACE
 from cognite.neat._graph.extractors._base import BaseExtractor
+from cognite.neat._issues.warnings import CDFAuthWarning
 from cognite.neat._shared import Triple
 from cognite.neat._utils.auxiliary import string_to_ideal_type
+from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
 T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
@@ -85,6 +89,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
         camel_case: bool = True,
         as_write: bool = False,
+        prefix: str | None = None,
     ):
         self.namespace = namespace or DEFAULT_NAMESPACE
         self.items = items
@@ -95,20 +100,15 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         self.skip_metadata_values = skip_metadata_values
         self.camel_case = camel_case
         self.as_write = as_write
+        self.prefix = prefix
     def extract(self) -> Iterable[Triple]:
         """Extracts an asset with the given asset_id."""
-        if self.total:
-            try:
-                from rich.progress import track
-            except ModuleNotFoundError:
-                to_iterate = self.items
-            else:
-                to_iterate = track(
-                    self.items,
-                    total=self.limit or self.total,
-                    description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
-                )
+        if self.total is not None and self.total > 0:
+            to_iterate = iterate_progress_bar_if_above_config_threshold(
+                self.items, self.total, f"Extracting {type(self).__name__.removesuffix('Extractor')}"
+            )
         else:
             to_iterate = self.items
         for no, asset in enumerate(to_iterate):
@@ -176,6 +176,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         type_ = self._default_rdf_type
         if self.to_type:
             type_ = self.to_type(item) or type_
+        if self.prefix:
+            type_ = f"{self.prefix}{type_}"
         return self._SPACE_PATTERN.sub("_", type_)
     def _as_object(self, raw: Any, key: str) -> Literal | URIRef:
@@ -220,9 +222,12 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
         camel_case: bool = True,
         as_write: bool = False,
+        prefix: str | None = None,
     ):
-        total, items = cls._from_dataset(client, data_set_external_id)
-        return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
+        total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
+        return cls(
+            items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
+        )
     @classmethod
     @abstractmethod
@@ -243,9 +248,12 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
         camel_case: bool = True,
         as_write: bool = False,
+        prefix: str | None = None,
     ):
-        total, items = cls._from_hierarchy(client, root_asset_external_id)
-        return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
+        total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
+        return cls(
+            items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
+        )
     @classmethod
     @abstractmethod
@@ -265,11 +273,29 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
         camel_case: bool = True,
         as_write: bool = False,
+        prefix: str | None = None,
     ):
         total, items = cls._from_file(file_path)
-        return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
+        return cls(
+            items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
+        )
     @classmethod
     @abstractmethod
     def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
         raise NotImplementedError
+    @classmethod
+    def _handle_no_access(
+        cls, action: Callable[[], tuple[int | None, Iterable[T_CogniteResource]]]
+    ) -> tuple[int | None, Iterable[T_CogniteResource]]:
+        try:
+            return action()
+        except CogniteAPIError as e:
+            if e.code == 403:
+                warnings.warn(
+                    CDFAuthWarning(f"extract {cls.__name__.removesuffix('Extractor').casefold()}", str(e)), stacklevel=2
+                )
+                return 0, []
+            else:
+                raise e

cognite/neat/_graph/extractors/_classic_cdf/_classic.py CHANGED Viewed

@@ -1,18 +1,25 @@
 import warnings
 from collections import defaultdict
 from collections.abc import Iterable, Sequence
-from typing import ClassVar, NamedTuple
+from typing import ClassVar, NamedTuple, cast
 from cognite.client import CogniteClient
 from cognite.client.exceptions import CogniteAPIError
-from rdflib import Namespace
+from rdflib import Namespace, URIRef
-from cognite.neat._constants import CLASSIC_CDF_NAMESPACE
-from cognite.neat._graph.extractors._base import BaseExtractor
+from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE, get_default_prefixes_and_namespaces
+from cognite.neat._graph.extractors._base import KnowledgeGraphExtractor
+from cognite.neat._issues.errors import NeatValueError
 from cognite.neat._issues.warnings import CDFAuthWarning
+from cognite.neat._rules._shared import ReadRules
+from cognite.neat._rules.catalog import classic_model
+from cognite.neat._rules.models import InformationInputRules, InformationRules
+from cognite.neat._rules.models._rdfpath import Entity as RDFPathEntity
+from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
 from cognite.neat._shared import Triple
 from cognite.neat._utils.collection_ import chunker, iterate_progress_bar
 from cognite.neat._utils.rdf_ import remove_namespace_from_uri
+from cognite.neat._utils.text import to_snake
 from ._assets import AssetsExtractor
 from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
@@ -37,7 +44,7 @@ class _ClassicCoreType(NamedTuple):
     api_name: str
-class ClassicGraphExtractor(BaseExtractor):
+class ClassicGraphExtractor(KnowledgeGraphExtractor):
     """This extractor extracts all classic CDF Resources.
     The Classic Graph consists of the following core resource type.
@@ -93,6 +100,7 @@ class ClassicGraphExtractor(BaseExtractor):
         root_asset_external_id: str | None = None,
         namespace: Namespace | None = None,
         limit_per_type: int | None = None,
+        prefix: str | None = None,
     ):
         self._client = client
         if sum([bool(data_set_external_id), bool(root_asset_external_id)]) != 1:
@@ -101,8 +109,14 @@ class ClassicGraphExtractor(BaseExtractor):
         self._data_set_external_id = data_set_external_id
         self._namespace = namespace or CLASSIC_CDF_NAMESPACE
         self._extractor_args = dict(
-            namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True, limit=limit_per_type
+            namespace=self._namespace,
+            unpack_metadata=False,
+            as_write=True,
+            camel_case=True,
+            limit=limit_per_type,
+            prefix=prefix,
         )
+        self._prefix = prefix
         self._limit_per_type = limit_per_type
         self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
@@ -144,6 +158,59 @@ class ClassicGraphExtractor(BaseExtractor):
         else:
             self._extracted_data_sets = True
+    def get_information_rules(self) -> InformationRules:
+        # To avoid circular imports
+        from cognite.neat._rules.importers import ExcelImporter
+        unverified = cast(ReadRules[InformationInputRules], ExcelImporter(classic_model).to_rules())
+        if unverified.rules is None:
+            raise NeatValueError(f"Could not read the classic model rules from {classic_model}.")
+        verified = unverified.rules.as_verified_rules()
+        prefixes = get_default_prefixes_and_namespaces()
+        instance_prefix: str | None = next((k for k, v in prefixes.items() if v == self._namespace), None)
+        if instance_prefix is None:
+            # We need to add a new prefix
+            instance_prefix = f"prefix_{len(prefixes) + 1}"
+            prefixes[instance_prefix] = self._namespace
+        verified.prefixes = prefixes
+        is_snake_case = self._extractor_args["camel_case"] is False
+        for prop in verified.properties:
+            prop_id = prop.property_
+            if is_snake_case:
+                prop_id = to_snake(prop_id)
+            prop.instance_source = RDFPath(
+                traversal=SingleProperty(
+                    class_=RDFPathEntity(prefix=instance_prefix, suffix=prop.class_.suffix),
+                    property=RDFPathEntity(prefix=instance_prefix, suffix=prop_id),
+                )
+            )
+        return verified
+    @property
+    def description(self) -> str:
+        if self._data_set_external_id:
+            source = f"data set {self._data_set_external_id}."
+        elif self._root_asset_external_id:
+            source = f"root asset {self._root_asset_external_id}."
+        else:
+            source = "unknown source."
+        return f"Extracting clasic CDF Graph (Assets, TimeSeries, Sequences, Events, Files) from {source}."
+    @property
+    def source_uri(self) -> URIRef:
+        if self._data_set_external_id:
+            resource = "dataset"
+            external_id = self._data_set_external_id
+        elif self._root_asset_external_id:
+            resource = "asset"
+            external_id = self._root_asset_external_id
+        else:
+            resource = "unknown"
+            external_id = "unknown"
+        return DEFAULT_NAMESPACE[f"{self._client.config.project}/{resource}/{external_id}"]
     def _extract_core_start_nodes(self):
         for core_node in self._classic_node_types:
             if self._data_set_external_id:
@@ -217,7 +284,7 @@ class ClassicGraphExtractor(BaseExtractor):
                 self._source_external_ids_by_type[resource_type].add(remove_namespace_from_uri(triple[2]))
             elif triple[1] == self._namespace.labels:
                 self._labels.add(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.label))
-            elif triple[1] == self._namespace.datasetId:
+            elif triple[1] == self._namespace.dataSetId:
                 self._data_set_ids.add(
                     int(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.data_set))
                 )
@@ -226,4 +293,7 @@ class ClassicGraphExtractor(BaseExtractor):
     @staticmethod
     def _chunk(items: Sequence, description: str) -> Iterable:
         to_iterate: Iterable = chunker(items, chunk_size=1000)
-        return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
+        if items:
+            return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
+        else:
+            return to_iterate

cognite/neat/_graph/extractors/_classic_cdf/_relationships.py CHANGED Viewed

@@ -28,6 +28,7 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
         skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
         camel_case: bool = True,
         as_write: bool = False,
+        prefix: str | None = None,
     ):
         super().__init__(
             items,
@@ -39,6 +40,7 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
             skip_metadata_values=skip_metadata_values,
             camel_case=camel_case,
             as_write=as_write,
+            prefix=prefix,
         )
         # This is used by the ClassicExtractor to log the target nodes, such
         # that it can extract them.

cognite-neat 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.105.2py3-none-any.whl → 0.107.0py3-none-any.whl