PyPI - cognite-neat - Versions diffs - 0.105.1__py3-none-any.whl → 0.106.0__py3-none-any.whl - Mend

cognite-neat 0.105.1py3-none-any.whl → 0.106.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

cognite/neat/_config.py +6 -260
cognite/neat/_graph/extractors/_classic_cdf/_base.py +26 -13
cognite/neat/_graph/extractors/_classic_cdf/_classic.py +4 -1
cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +2 -2
cognite/neat/_graph/loaders/_rdf2dms.py +7 -2
cognite/neat/_graph/transformers/_base.py +4 -8
cognite/neat/_graph/transformers/_classic_cdf.py +164 -80
cognite/neat/_graph/transformers/_rdfpath.py +1 -1
cognite/neat/_issues/warnings/_external.py +1 -1
cognite/neat/_rules/importers/_rdf/_inference2rules.py +4 -2
cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
cognite/neat/_rules/transformers/_mapping.py +3 -2
cognite/neat/_session/_base.py +6 -7
cognite/neat/_session/_inspect.py +6 -2
cognite/neat/_session/_mapping.py +6 -8
cognite/neat/_session/_prepare.py +9 -10
cognite/neat/_session/_read.py +35 -26
cognite/neat/_session/_set.py +9 -0
cognite/neat/_session/_state.py +3 -1
cognite/neat/_session/_to.py +11 -13
cognite/neat/_store/_graph_store.py +33 -28
cognite/neat/_utils/auth.py +35 -15
cognite/neat/_utils/collection_.py +32 -11
cognite/neat/_version.py +1 -1
{cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/METADATA +1 -7
{cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/RECORD +29 -29
{cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/LICENSE +0 -0
{cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/WHEEL +0 -0
{cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/entry_points.txt +0 -0

cognite/neat/_config.py CHANGED Viewed

@@ -1,265 +1,11 @@
-import json
-import logging
-import os
-import shutil
-import sys
-from pathlib import Path
-from typing import Any, Literal, cast
+from typing import Literal
-import yaml
-from pydantic import BaseModel, Field, model_validator
-from yaml import safe_load
+from pydantic import BaseModel
-from cognite.neat._constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
-from cognite.neat._utils.auth import EnvironmentVariables
-if sys.version_info >= (3, 11):
-    from enum import StrEnum
-    from typing import Self
-else:
-    from backports.strenum import StrEnum
-    from typing_extensions import Self
+class NeatConfig(BaseModel, validate_assignment=True):
+    progress_bar: Literal["tqdm", "rich", "tqdm-notebook", "infer"] | None = "infer"
+    use_iterate_bar_threshold: int | None = 500
-LOG_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
-LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
-class RulesStoreType(StrEnum):
-    """Rules Store type"""
-    CDF = "cdf"
-    FILE = "file"
-    URL = "url"
-    GOOGLE_SHEET = "google_sheet"
-class WorkflowsStoreType(StrEnum):
-    """Workflows Store type"""
-    CDF = "cdf"
-    FILE = "file"
-    URL = "url"
-class Config(BaseModel, arbitrary_types_allowed=True):
-    workflows_store_type: WorkflowsStoreType = WorkflowsStoreType.FILE
-    data_store_path: Path = Field(default_factory=lambda: Path.cwd() / "data")
-    workflow_downloader_filter: list[str] | None = Field(
-        description="List of workflow names+tags to filter on when downloading workflows from CDF. "
-        "Example name:workflow_name=version,tag:tag_name",
-        default=None,
-    )
-    cdf_auth_config: EnvironmentVariables = Field(default_factory=EnvironmentVariables.default)
-    cdf_default_dataset_id: int = 0
-    load_examples: bool = True
-    log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
-    log_format: str = LOG_FORMAT
-    download_workflows_from_cdf: bool = Field(
-        default=False,
-        description="Downloads all workflows from CDF automatically and stores them locally",
-    )
-    stop_on_error: bool = False
-    @model_validator(mode="before")
-    def backwards_compatible(cls, data: Any):
-        if not isinstance(data, dict):
-            return data
-        if "cdf_client" in data:
-            cdf_client = data["cdf_client"]
-            if isinstance(cdf_client, dict):
-                if "base_url" in cdf_client:
-                    base_url = cdf_client["base_url"]
-                    cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
-                else:
-                    base_url, cluster = "Missing", "Missing"
-                if "scopes" in cdf_client:
-                    scopes = cdf_client["scopes"]
-                    if isinstance(scopes, list):
-                        scopes = ",".join(scopes)
-                else:
-                    scopes = "Missing"
-                data["cdf_auth_config"] = EnvironmentVariables(
-                    CDF_PROJECT=cdf_client.get("project", "Missing"),
-                    CDF_CLUSTER=cluster,
-                    CDF_URL=base_url,
-                    IDP_CLIENT_ID=cdf_client.get("client_id", "Missing"),
-                    IDP_CLIENT_SECRET=cdf_client.get("client_secret", "Missing"),
-                    IDP_TOKEN_URL=cdf_client.get("token_url", "Missing"),
-                    IDP_SCOPES=scopes,
-                    CDF_TIMEOUT=int(cdf_client.get("timeout", 60)),
-                    CDF_MAX_WORKERS=int(cdf_client.get("max_workers", 3)),
-                )
-        return data
-    def as_legacy_config(
-        self,
-    ) -> dict[str, Any]:
-        config: dict[str, Any] = {}
-        config["workflows_store_type"] = self.workflows_store_type
-        config["data_store_path"] = str(self.data_store_path)
-        config["workflows_downloader_filter"] = self.workflow_downloader_filter
-        config["cdf_client"] = {}
-        if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
-            config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
-        if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
-            config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
-        if self.cdf_auth_config.CDF_URL:
-            config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
-        if self.cdf_auth_config.IDP_CLIENT_ID:
-            config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
-        if self.cdf_auth_config.IDP_CLIENT_SECRET:
-            config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
-        if self.cdf_auth_config.IDP_TOKEN_URL:
-            config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
-        if self.cdf_auth_config.IDP_SCOPES:
-            config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
-        if self.cdf_auth_config.CDF_TIMEOUT:
-            config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
-        if self.cdf_auth_config.CDF_MAX_WORKERS:
-            config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
-        config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
-        config["load_examples"] = self.load_examples
-        config["log_level"] = self.log_level
-        config["log_format"] = self.log_format
-        config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
-        config["stop_on_error"] = self.stop_on_error
-        return config
-    @property
-    def _dir_suffix(self) -> str:
-        is_test_running = "pytest" in sys.modules
-        if is_test_running:
-            # Todo change the below to f"-{os.getpid()}" when all tests supports parallel execution.
-            return ""
-        return ""
-    @property
-    def rules_store_path(self) -> Path:
-        return self.data_store_path / f"rules{self._dir_suffix}"
-    @property
-    def workflows_store_path(self) -> Path:
-        return self.data_store_path / f"workflows{self._dir_suffix}"
-    @property
-    def source_graph_path(self) -> Path:
-        return self.data_store_path / f"source-graphs{self._dir_suffix}"
-    @property
-    def staging_path(self) -> Path:
-        return self.data_store_path / f"staging{self._dir_suffix}"
-    @classmethod
-    def from_yaml(cls, filepath: Path) -> Self:
-        return cls(**safe_load(filepath.read_text()))
-    def to_yaml(self, filepath: Path):
-        # Parse as json to avoid Path and Enum objects
-        dump = json.loads(self.model_dump_json())
-        with filepath.open("w") as f:
-            yaml.safe_dump(dump, f)
-    @classmethod
-    def from_env(cls) -> Self:
-        missing = "Missing"
-        # This is to be backwards compatible with the old config
-        base_url: str | None = None
-        if "NEAT_CDF_BASE_URL" in os.environ:
-            base_url = os.environ["NEAT_CDF_BASE_URL"]
-        if isinstance(base_url, str):
-            cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
-        else:
-            cluster = missing
-        variables = EnvironmentVariables(
-            CDF_PROJECT=os.environ.get("NEAT_CDF_PROJECT", missing),
-            CDF_CLUSTER=cluster,
-            CDF_URL=base_url,
-            IDP_CLIENT_ID=os.environ.get("NEAT_CDF_CLIENT_ID"),
-            IDP_CLIENT_SECRET=os.environ.get("NEAT_CDF_CLIENT_SECRET"),
-            IDP_TOKEN_URL=os.environ.get("NEAT_CDF_TOKEN_URL"),
-            IDP_SCOPES=os.environ.get("NEAT_CDF_SCOPES"),
-            CDF_TIMEOUT=int(os.environ["NEAT_CDF_CLIENT_TIMEOUT"] if "NEAT_CDF_CLIENT_TIMEOUT" in os.environ else 60),
-            CDF_MAX_WORKERS=int(
-                os.environ["NEAT_CDF_CLIENT_MAX_WORKERS"] if "NEAT_CDF_CLIENT_MAX_WORKERS" in os.environ else 3
-            ),
-        )
-        if workflow_downloader_filter_value := os.environ.get("NEAT_WORKFLOW_DOWNLOADER_FILTER", None):
-            workflow_downloader_filter = workflow_downloader_filter_value.split(",")
-        else:
-            workflow_downloader_filter = None
-        return cls(
-            cdf_auth_config=variables,
-            workflows_store_type=os.environ.get(  # type: ignore[arg-type]
-                "NEAT_WORKFLOWS_STORE_TYPE", WorkflowsStoreType.FILE
-            ),
-            data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "_app/data")),
-            cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
-            log_level=cast(
-                Literal["ERROR", "WARNING", "INFO", "DEBUG"],
-                os.environ.get("NEAT_LOG_LEVEL", "INFO"),
-            ),
-            workflow_downloader_filter=workflow_downloader_filter,
-            load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
-        )
-def copy_examples_to_directory(config: Config):
-    """
-    Copier over all the examples to the target_data_directory,
-    without overwriting
-    Args:
-        target_data_dir : The target directory
-        suffix : The suffix to add to the directory names
-    """
-    print(f"Copying examples into {config.data_store_path}")
-    _copy_examples(EXAMPLE_RULES, config.rules_store_path)
-    _copy_examples(EXAMPLE_GRAPHS, config.source_graph_path)
-    _copy_examples(EXAMPLE_WORKFLOWS, config.workflows_store_path)
-    config.staging_path.mkdir(exist_ok=True, parents=True)
-def create_data_dir_structure(config: Config) -> None:
-    """
-    Create the data directory structure in empty directory
-    Args:
-        target_data_dir : The target directory
-        suffix : The suffix to add to the directory names
-    """
-    for path in (
-        config.rules_store_path,
-        config.source_graph_path,
-        config.staging_path,
-        config.workflows_store_path,
-    ):
-        path.mkdir(exist_ok=True, parents=True)
-def _copy_examples(source_dir: Path, target_dir: Path):
-    for current in source_dir.rglob("*"):
-        if current.is_dir():
-            continue
-        relative = current.relative_to(source_dir)
-        if not (target := target_dir / relative).exists():
-            target.parent.mkdir(exist_ok=True, parents=True)
-            shutil.copy2(current, target)
-def configure_logging(level: str = "DEBUG", log_format: str = LOG_FORMAT):
-    """Configure logging based on config."""
-    logging.basicConfig(format=log_format, level=logging.getLevelName(level), datefmt=LOG_DATE_FORMAT)
+GLOBAL_CONFIG = NeatConfig()

cognite/neat/_graph/extractors/_classic_cdf/_base.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import re
 import sys
+import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Sequence, Set
 from datetime import datetime, timezone
@@ -9,13 +10,16 @@ from typing import Any, Generic, TypeVar
 from cognite.client import CogniteClient
 from cognite.client.data_classes._base import WriteableCogniteResource
+from cognite.client.exceptions import CogniteAPIError
 from pydantic import AnyHttpUrl, ValidationError
 from rdflib import RDF, XSD, Literal, Namespace, URIRef
 from cognite.neat._constants import DEFAULT_NAMESPACE
 from cognite.neat._graph.extractors._base import BaseExtractor
+from cognite.neat._issues.warnings import CDFAuthWarning
 from cognite.neat._shared import Triple
 from cognite.neat._utils.auxiliary import string_to_ideal_type
+from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
 T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
@@ -98,17 +102,11 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
     def extract(self) -> Iterable[Triple]:
         """Extracts an asset with the given asset_id."""
-        if self.total:
-            try:
-                from rich.progress import track
-            except ModuleNotFoundError:
-                to_iterate = self.items
-            else:
-                to_iterate = track(
-                    self.items,
-                    total=self.limit or self.total,
-                    description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
-                )
+        if self.total is not None and self.total > 0:
+            to_iterate = iterate_progress_bar_if_above_config_threshold(
+                self.items, self.total, f"Extracting {type(self).__name__.removesuffix('Extractor')}"
+            )
         else:
             to_iterate = self.items
         for no, asset in enumerate(to_iterate):
@@ -221,7 +219,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         camel_case: bool = True,
         as_write: bool = False,
     ):
-        total, items = cls._from_dataset(client, data_set_external_id)
+        total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
         return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
     @classmethod
@@ -244,7 +242,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
         camel_case: bool = True,
         as_write: bool = False,
     ):
-        total, items = cls._from_hierarchy(client, root_asset_external_id)
+        total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
         return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
     @classmethod
@@ -273,3 +271,18 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
     @abstractmethod
     def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
         raise NotImplementedError
+    @classmethod
+    def _handle_no_access(
+        cls, action: Callable[[], tuple[int | None, Iterable[T_CogniteResource]]]
+    ) -> tuple[int | None, Iterable[T_CogniteResource]]:
+        try:
+            return action()
+        except CogniteAPIError as e:
+            if e.code == 403:
+                warnings.warn(
+                    CDFAuthWarning(f"extract {cls.__name__.removesuffix('Extractor').casefold()}", str(e)), stacklevel=2
+                )
+                return 0, []
+            else:
+                raise e

cognite/neat/_graph/extractors/_classic_cdf/_classic.py CHANGED Viewed

@@ -226,4 +226,7 @@ class ClassicGraphExtractor(BaseExtractor):
     @staticmethod
     def _chunk(items: Sequence, description: str) -> Iterable:
         to_iterate: Iterable = chunker(items, chunk_size=1000)
-        return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
+        if items:
+            return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
+        else:
+            return to_iterate

cognite/neat/_graph/extractors/_classic_cdf/_sequences.py CHANGED Viewed

@@ -73,7 +73,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
         as_write: bool = False,
         unpack_columns: bool = False,
     ):
-        total, items = cls._from_dataset(client, data_set_external_id)
+        total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
         return cls(
             items,
             namespace,
@@ -101,7 +101,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
         as_write: bool = False,
         unpack_columns: bool = False,
     ):
-        total, items = cls._from_hierarchy(client, root_asset_external_id)
+        total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
         return cls(
             items,
             namespace,

cognite/neat/_graph/loaders/_rdf2dms.py CHANGED Viewed

@@ -37,6 +37,7 @@ from cognite.neat._rules.models.entities._single_value import ViewEntity
 from cognite.neat._shared import InstanceType
 from cognite.neat._store import NeatGraphStore
 from cognite.neat._utils.auxiliary import create_sha256_hash
+from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
 from cognite.neat._utils.rdf_ import remove_namespace_from_uri
 from cognite.neat._utils.upload import UploadResult
@@ -157,7 +158,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
                 view_ids.append(f"{view_id!r} (self)")
         tracker = self._tracker(type(self).__name__, view_ids, "views")
-        for view_id, (view, _) in view_and_count_by_id.items():
+        for view_id, (view, instance_count) in view_and_count_by_id.items():
             pydantic_cls, edge_by_type, issues = self._create_validation_classes(view)  # type: ignore[var-annotated]
             yield from issues
             tracker.issue(issues)
@@ -194,7 +195,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
                     # this assumes no changes in the suffix of view and class
                     reader = self.graph_store.read(view.external_id)
-                for identifier, properties in reader:
+                instance_iterable = iterate_progress_bar_if_above_config_threshold(
+                    reader, instance_count, f"Loading {track_id}"
+                )
+                for identifier, properties in instance_iterable:
                     if skip_properties:
                         properties = {k: v for k, v in properties.items() if k not in skip_properties}
                     try:

cognite/neat/_graph/transformers/_base.py CHANGED Viewed

@@ -8,7 +8,7 @@ from rdflib.query import ResultRow
 from cognite.neat._issues.warnings import NeatValueWarning
 from cognite.neat._shared import Triple
-from cognite.neat._utils.collection_ import iterate_progress_bar
+from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
 from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
 To_Add_Triples: TypeAlias = list[Triple]
@@ -42,7 +42,6 @@ class BaseTransformerStandardised(ABC):
     description: str
     _use_only_once: bool = False
     _need_changes: ClassVar[frozenset[str]] = frozenset()
-    _use_iterate_bar_threshold: int = 500
     @abstractmethod
     def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
@@ -99,12 +98,9 @@ class BaseTransformerStandardised(ABC):
             return outcome
         result_iterable = graph.query(self._iterate_query())
-        if iteration_count > self._use_iterate_bar_threshold:
-            result_iterable = iterate_progress_bar(  # type: ignore[misc, assignment]
-                result_iterable,
-                total=iteration_count,
-                description=self.description,
-            )
+        result_iterable = iterate_progress_bar_if_above_config_threshold(
+            result_iterable, iteration_count, self.description
+        )
         for row in result_iterable:
             row = cast(ResultRow, row)

cognite-neat 0.105.1__py3-none-any.whl → 0.106.0__py3-none-any.whl

cognite-neat 0.105.1py3-none-any.whl → 0.106.0py3-none-any.whl