cognite-neat 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (54) hide show
  1. cognite/neat/_config.py +6 -260
  2. cognite/neat/_graph/extractors/__init__.py +5 -1
  3. cognite/neat/_graph/extractors/_base.py +32 -0
  4. cognite/neat/_graph/extractors/_classic_cdf/_base.py +42 -16
  5. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +78 -8
  6. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
  7. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +10 -3
  8. cognite/neat/_graph/extractors/_dms.py +48 -14
  9. cognite/neat/_graph/extractors/_dms_graph.py +149 -0
  10. cognite/neat/_graph/extractors/_rdf_file.py +32 -5
  11. cognite/neat/_graph/loaders/_rdf2dms.py +119 -20
  12. cognite/neat/_graph/queries/_construct.py +1 -1
  13. cognite/neat/_graph/transformers/__init__.py +5 -0
  14. cognite/neat/_graph/transformers/_base.py +13 -9
  15. cognite/neat/_graph/transformers/_classic_cdf.py +141 -44
  16. cognite/neat/_graph/transformers/_rdfpath.py +4 -4
  17. cognite/neat/_graph/transformers/_value_type.py +54 -44
  18. cognite/neat/_issues/warnings/_external.py +1 -1
  19. cognite/neat/_rules/analysis/_base.py +1 -1
  20. cognite/neat/_rules/analysis/_information.py +14 -13
  21. cognite/neat/_rules/catalog/__init__.py +1 -0
  22. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  23. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  24. cognite/neat/_rules/importers/_dms2rules.py +7 -5
  25. cognite/neat/_rules/importers/_rdf/_inference2rules.py +5 -3
  26. cognite/neat/_rules/models/_base_rules.py +0 -12
  27. cognite/neat/_rules/models/_types.py +5 -0
  28. cognite/neat/_rules/models/dms/_rules.py +50 -2
  29. cognite/neat/_rules/models/information/_rules.py +48 -5
  30. cognite/neat/_rules/models/information/_rules_input.py +1 -1
  31. cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
  32. cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
  33. cognite/neat/_rules/transformers/__init__.py +4 -0
  34. cognite/neat/_rules/transformers/_converters.py +209 -62
  35. cognite/neat/_rules/transformers/_mapping.py +3 -2
  36. cognite/neat/_session/_base.py +8 -13
  37. cognite/neat/_session/_inspect.py +6 -2
  38. cognite/neat/_session/_mapping.py +22 -13
  39. cognite/neat/_session/_prepare.py +9 -57
  40. cognite/neat/_session/_read.py +96 -29
  41. cognite/neat/_session/_set.py +9 -0
  42. cognite/neat/_session/_state.py +10 -1
  43. cognite/neat/_session/_to.py +51 -15
  44. cognite/neat/_session/exceptions.py +7 -3
  45. cognite/neat/_store/_graph_store.py +85 -39
  46. cognite/neat/_store/_rules_store.py +22 -0
  47. cognite/neat/_utils/auth.py +2 -0
  48. cognite/neat/_utils/collection_.py +32 -11
  49. cognite/neat/_version.py +1 -1
  50. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -8
  51. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +54 -52
  52. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
  53. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
  54. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0
cognite/neat/_config.py CHANGED
@@ -1,265 +1,11 @@
1
- import json
2
- import logging
3
- import os
4
- import shutil
5
- import sys
6
- from pathlib import Path
7
- from typing import Any, Literal, cast
1
+ from typing import Literal
8
2
 
9
- import yaml
10
- from pydantic import BaseModel, Field, model_validator
11
- from yaml import safe_load
3
+ from pydantic import BaseModel
12
4
 
13
- from cognite.neat._constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
14
- from cognite.neat._utils.auth import EnvironmentVariables
15
5
 
16
- if sys.version_info >= (3, 11):
17
- from enum import StrEnum
18
- from typing import Self
19
- else:
20
- from backports.strenum import StrEnum
21
- from typing_extensions import Self
6
+ class NeatConfig(BaseModel, validate_assignment=True):
7
+ progress_bar: Literal["tqdm", "rich", "tqdm-notebook", "infer"] | None = "infer"
8
+ use_iterate_bar_threshold: int | None = 500
22
9
 
23
- LOG_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
24
- LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
25
10
 
26
-
27
- class RulesStoreType(StrEnum):
28
- """Rules Store type"""
29
-
30
- CDF = "cdf"
31
- FILE = "file"
32
- URL = "url"
33
- GOOGLE_SHEET = "google_sheet"
34
-
35
-
36
- class WorkflowsStoreType(StrEnum):
37
- """Workflows Store type"""
38
-
39
- CDF = "cdf"
40
- FILE = "file"
41
- URL = "url"
42
-
43
-
44
- class Config(BaseModel, arbitrary_types_allowed=True):
45
- workflows_store_type: WorkflowsStoreType = WorkflowsStoreType.FILE
46
- data_store_path: Path = Field(default_factory=lambda: Path.cwd() / "data")
47
-
48
- workflow_downloader_filter: list[str] | None = Field(
49
- description="List of workflow names+tags to filter on when downloading workflows from CDF. "
50
- "Example name:workflow_name=version,tag:tag_name",
51
- default=None,
52
- )
53
-
54
- cdf_auth_config: EnvironmentVariables = Field(default_factory=EnvironmentVariables.default)
55
- cdf_default_dataset_id: int = 0
56
- load_examples: bool = True
57
-
58
- log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
59
- log_format: str = LOG_FORMAT
60
- download_workflows_from_cdf: bool = Field(
61
- default=False,
62
- description="Downloads all workflows from CDF automatically and stores them locally",
63
- )
64
- stop_on_error: bool = False
65
-
66
- @model_validator(mode="before")
67
- def backwards_compatible(cls, data: Any):
68
- if not isinstance(data, dict):
69
- return data
70
- if "cdf_client" in data:
71
- cdf_client = data["cdf_client"]
72
- if isinstance(cdf_client, dict):
73
- if "base_url" in cdf_client:
74
- base_url = cdf_client["base_url"]
75
- cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
76
- else:
77
- base_url, cluster = "Missing", "Missing"
78
- if "scopes" in cdf_client:
79
- scopes = cdf_client["scopes"]
80
- if isinstance(scopes, list):
81
- scopes = ",".join(scopes)
82
- else:
83
- scopes = "Missing"
84
- data["cdf_auth_config"] = EnvironmentVariables(
85
- CDF_PROJECT=cdf_client.get("project", "Missing"),
86
- CDF_CLUSTER=cluster,
87
- CDF_URL=base_url,
88
- IDP_CLIENT_ID=cdf_client.get("client_id", "Missing"),
89
- IDP_CLIENT_SECRET=cdf_client.get("client_secret", "Missing"),
90
- IDP_TOKEN_URL=cdf_client.get("token_url", "Missing"),
91
- IDP_SCOPES=scopes,
92
- CDF_TIMEOUT=int(cdf_client.get("timeout", 60)),
93
- CDF_MAX_WORKERS=int(cdf_client.get("max_workers", 3)),
94
- )
95
- return data
96
-
97
- def as_legacy_config(
98
- self,
99
- ) -> dict[str, Any]:
100
- config: dict[str, Any] = {}
101
-
102
- config["workflows_store_type"] = self.workflows_store_type
103
- config["data_store_path"] = str(self.data_store_path)
104
- config["workflows_downloader_filter"] = self.workflow_downloader_filter
105
-
106
- config["cdf_client"] = {}
107
- if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
108
- config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
109
- if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
110
- config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
111
- if self.cdf_auth_config.CDF_URL:
112
- config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
113
- if self.cdf_auth_config.IDP_CLIENT_ID:
114
- config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
115
- if self.cdf_auth_config.IDP_CLIENT_SECRET:
116
- config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
117
- if self.cdf_auth_config.IDP_TOKEN_URL:
118
- config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
119
- if self.cdf_auth_config.IDP_SCOPES:
120
- config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
121
- if self.cdf_auth_config.CDF_TIMEOUT:
122
- config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
123
- if self.cdf_auth_config.CDF_MAX_WORKERS:
124
- config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
125
-
126
- config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
127
- config["load_examples"] = self.load_examples
128
- config["log_level"] = self.log_level
129
- config["log_format"] = self.log_format
130
- config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
131
- config["stop_on_error"] = self.stop_on_error
132
-
133
- return config
134
-
135
- @property
136
- def _dir_suffix(self) -> str:
137
- is_test_running = "pytest" in sys.modules
138
- if is_test_running:
139
- # Todo change the below to f"-{os.getpid()}" when all tests supports parallel execution.
140
- return ""
141
- return ""
142
-
143
- @property
144
- def rules_store_path(self) -> Path:
145
- return self.data_store_path / f"rules{self._dir_suffix}"
146
-
147
- @property
148
- def workflows_store_path(self) -> Path:
149
- return self.data_store_path / f"workflows{self._dir_suffix}"
150
-
151
- @property
152
- def source_graph_path(self) -> Path:
153
- return self.data_store_path / f"source-graphs{self._dir_suffix}"
154
-
155
- @property
156
- def staging_path(self) -> Path:
157
- return self.data_store_path / f"staging{self._dir_suffix}"
158
-
159
- @classmethod
160
- def from_yaml(cls, filepath: Path) -> Self:
161
- return cls(**safe_load(filepath.read_text()))
162
-
163
- def to_yaml(self, filepath: Path):
164
- # Parse as json to avoid Path and Enum objects
165
- dump = json.loads(self.model_dump_json())
166
-
167
- with filepath.open("w") as f:
168
- yaml.safe_dump(dump, f)
169
-
170
- @classmethod
171
- def from_env(cls) -> Self:
172
- missing = "Missing"
173
- # This is to be backwards compatible with the old config
174
-
175
- base_url: str | None = None
176
- if "NEAT_CDF_BASE_URL" in os.environ:
177
- base_url = os.environ["NEAT_CDF_BASE_URL"]
178
- if isinstance(base_url, str):
179
- cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
180
- else:
181
- cluster = missing
182
- variables = EnvironmentVariables(
183
- CDF_PROJECT=os.environ.get("NEAT_CDF_PROJECT", missing),
184
- CDF_CLUSTER=cluster,
185
- CDF_URL=base_url,
186
- IDP_CLIENT_ID=os.environ.get("NEAT_CDF_CLIENT_ID"),
187
- IDP_CLIENT_SECRET=os.environ.get("NEAT_CDF_CLIENT_SECRET"),
188
- IDP_TOKEN_URL=os.environ.get("NEAT_CDF_TOKEN_URL"),
189
- IDP_SCOPES=os.environ.get("NEAT_CDF_SCOPES"),
190
- CDF_TIMEOUT=int(os.environ["NEAT_CDF_CLIENT_TIMEOUT"] if "NEAT_CDF_CLIENT_TIMEOUT" in os.environ else 60),
191
- CDF_MAX_WORKERS=int(
192
- os.environ["NEAT_CDF_CLIENT_MAX_WORKERS"] if "NEAT_CDF_CLIENT_MAX_WORKERS" in os.environ else 3
193
- ),
194
- )
195
-
196
- if workflow_downloader_filter_value := os.environ.get("NEAT_WORKFLOW_DOWNLOADER_FILTER", None):
197
- workflow_downloader_filter = workflow_downloader_filter_value.split(",")
198
- else:
199
- workflow_downloader_filter = None
200
-
201
- return cls(
202
- cdf_auth_config=variables,
203
- workflows_store_type=os.environ.get( # type: ignore[arg-type]
204
- "NEAT_WORKFLOWS_STORE_TYPE", WorkflowsStoreType.FILE
205
- ),
206
- data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "_app/data")),
207
- cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
208
- log_level=cast(
209
- Literal["ERROR", "WARNING", "INFO", "DEBUG"],
210
- os.environ.get("NEAT_LOG_LEVEL", "INFO"),
211
- ),
212
- workflow_downloader_filter=workflow_downloader_filter,
213
- load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
214
- )
215
-
216
-
217
- def copy_examples_to_directory(config: Config):
218
- """
219
- Copier over all the examples to the target_data_directory,
220
- without overwriting
221
-
222
- Args:
223
- target_data_dir : The target directory
224
- suffix : The suffix to add to the directory names
225
-
226
- """
227
-
228
- print(f"Copying examples into {config.data_store_path}")
229
- _copy_examples(EXAMPLE_RULES, config.rules_store_path)
230
- _copy_examples(EXAMPLE_GRAPHS, config.source_graph_path)
231
- _copy_examples(EXAMPLE_WORKFLOWS, config.workflows_store_path)
232
- config.staging_path.mkdir(exist_ok=True, parents=True)
233
-
234
-
235
- def create_data_dir_structure(config: Config) -> None:
236
- """
237
- Create the data directory structure in empty directory
238
-
239
- Args:
240
- target_data_dir : The target directory
241
- suffix : The suffix to add to the directory names
242
-
243
- """
244
- for path in (
245
- config.rules_store_path,
246
- config.source_graph_path,
247
- config.staging_path,
248
- config.workflows_store_path,
249
- ):
250
- path.mkdir(exist_ok=True, parents=True)
251
-
252
-
253
- def _copy_examples(source_dir: Path, target_dir: Path):
254
- for current in source_dir.rglob("*"):
255
- if current.is_dir():
256
- continue
257
- relative = current.relative_to(source_dir)
258
- if not (target := target_dir / relative).exists():
259
- target.parent.mkdir(exist_ok=True, parents=True)
260
- shutil.copy2(current, target)
261
-
262
-
263
- def configure_logging(level: str = "DEBUG", log_format: str = LOG_FORMAT):
264
- """Configure logging based on config."""
265
- logging.basicConfig(format=log_format, level=logging.getLevelName(level), datefmt=LOG_DATE_FORMAT)
11
+ GLOBAL_CONFIG = NeatConfig()
@@ -1,6 +1,6 @@
1
1
  from cognite.neat._session.engine._interface import Extractor as EngineExtractor
2
2
 
3
- from ._base import BaseExtractor
3
+ from ._base import BaseExtractor, KnowledgeGraphExtractor
4
4
  from ._classic_cdf._assets import AssetsExtractor
5
5
  from ._classic_cdf._classic import ClassicGraphExtractor
6
6
  from ._classic_cdf._data_sets import DataSetExtractor
@@ -12,6 +12,7 @@ from ._classic_cdf._sequences import SequencesExtractor
12
12
  from ._classic_cdf._timeseries import TimeSeriesExtractor
13
13
  from ._dexpi import DexpiExtractor
14
14
  from ._dms import DMSExtractor
15
+ from ._dms_graph import DMSGraphExtractor
15
16
  from ._iodd import IODDExtractor
16
17
  from ._mock_graph_generator import MockGraphGenerator
17
18
  from ._rdf_file import RdfFileExtractor
@@ -21,11 +22,13 @@ __all__ = [
21
22
  "BaseExtractor",
22
23
  "ClassicGraphExtractor",
23
24
  "DMSExtractor",
25
+ "DMSGraphExtractor",
24
26
  "DataSetExtractor",
25
27
  "DexpiExtractor",
26
28
  "EventsExtractor",
27
29
  "FilesExtractor",
28
30
  "IODDExtractor",
31
+ "KnowledgeGraphExtractor",
29
32
  "LabelsExtractor",
30
33
  "MockGraphGenerator",
31
34
  "RdfFileExtractor",
@@ -51,6 +54,7 @@ TripleExtractors = (
51
54
  | ClassicGraphExtractor
52
55
  | DataSetExtractor
53
56
  | EngineExtractor
57
+ | DMSGraphExtractor
54
58
  )
55
59
 
56
60
 
@@ -1,9 +1,17 @@
1
1
  from abc import abstractmethod
2
2
  from collections.abc import Iterable
3
+ from typing import TYPE_CHECKING
3
4
 
5
+ from rdflib import URIRef
6
+
7
+ from cognite.neat._constants import DEFAULT_NAMESPACE
8
+ from cognite.neat._rules.models import InformationRules
4
9
  from cognite.neat._shared import Triple
5
10
  from cognite.neat._utils.auxiliary import class_html_doc
6
11
 
12
+ if TYPE_CHECKING:
13
+ from cognite.neat._store._provenance import Agent as ProvenanceAgent
14
+
7
15
 
8
16
  class BaseExtractor:
9
17
  """This is the base class for all extractors. It defines the interface that
@@ -24,3 +32,27 @@ class BaseExtractor:
24
32
  @classmethod
25
33
  def _repr_html_(cls) -> str:
26
34
  return class_html_doc(cls)
35
+
36
+
37
+ class KnowledgeGraphExtractor(BaseExtractor):
38
+ """A knowledge graph extractor extracts triples with a schema"""
39
+
40
+ @abstractmethod
41
+ def get_information_rules(self) -> InformationRules:
42
+ """Returns the information rules that the extractor uses."""
43
+ raise NotImplementedError()
44
+
45
+ @property
46
+ def description(self) -> str:
47
+ return self.__doc__.strip().split("\n")[0] if self.__doc__ else "Missing"
48
+
49
+ @property
50
+ def source_uri(self) -> URIRef:
51
+ raise NotImplementedError
52
+
53
+ @property
54
+ def agent(self) -> "ProvenanceAgent":
55
+ """Provenance agent for the importer."""
56
+ from cognite.neat._store._provenance import Agent as ProvenanceAgent
57
+
58
+ return ProvenanceAgent(id_=DEFAULT_NAMESPACE[f"agent/{type(self).__name__}"])
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import re
3
3
  import sys
4
+ import warnings
4
5
  from abc import ABC, abstractmethod
5
6
  from collections.abc import Callable, Iterable, Sequence, Set
6
7
  from datetime import datetime, timezone
@@ -9,13 +10,16 @@ from typing import Any, Generic, TypeVar
9
10
 
10
11
  from cognite.client import CogniteClient
11
12
  from cognite.client.data_classes._base import WriteableCogniteResource
13
+ from cognite.client.exceptions import CogniteAPIError
12
14
  from pydantic import AnyHttpUrl, ValidationError
13
15
  from rdflib import RDF, XSD, Literal, Namespace, URIRef
14
16
 
15
17
  from cognite.neat._constants import DEFAULT_NAMESPACE
16
18
  from cognite.neat._graph.extractors._base import BaseExtractor
19
+ from cognite.neat._issues.warnings import CDFAuthWarning
17
20
  from cognite.neat._shared import Triple
18
21
  from cognite.neat._utils.auxiliary import string_to_ideal_type
22
+ from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
19
23
 
20
24
  T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
21
25
 
@@ -85,6 +89,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
85
89
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
86
90
  camel_case: bool = True,
87
91
  as_write: bool = False,
92
+ prefix: str | None = None,
88
93
  ):
89
94
  self.namespace = namespace or DEFAULT_NAMESPACE
90
95
  self.items = items
@@ -95,20 +100,15 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
95
100
  self.skip_metadata_values = skip_metadata_values
96
101
  self.camel_case = camel_case
97
102
  self.as_write = as_write
103
+ self.prefix = prefix
98
104
 
99
105
  def extract(self) -> Iterable[Triple]:
100
106
  """Extracts an asset with the given asset_id."""
101
- if self.total:
102
- try:
103
- from rich.progress import track
104
- except ModuleNotFoundError:
105
- to_iterate = self.items
106
- else:
107
- to_iterate = track(
108
- self.items,
109
- total=self.limit or self.total,
110
- description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
111
- )
107
+
108
+ if self.total is not None and self.total > 0:
109
+ to_iterate = iterate_progress_bar_if_above_config_threshold(
110
+ self.items, self.total, f"Extracting {type(self).__name__.removesuffix('Extractor')}"
111
+ )
112
112
  else:
113
113
  to_iterate = self.items
114
114
  for no, asset in enumerate(to_iterate):
@@ -176,6 +176,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
176
176
  type_ = self._default_rdf_type
177
177
  if self.to_type:
178
178
  type_ = self.to_type(item) or type_
179
+ if self.prefix:
180
+ type_ = f"{self.prefix}{type_}"
179
181
  return self._SPACE_PATTERN.sub("_", type_)
180
182
 
181
183
  def _as_object(self, raw: Any, key: str) -> Literal | URIRef:
@@ -220,9 +222,12 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
220
222
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
221
223
  camel_case: bool = True,
222
224
  as_write: bool = False,
225
+ prefix: str | None = None,
223
226
  ):
224
- total, items = cls._from_dataset(client, data_set_external_id)
225
- return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
227
+ total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
228
+ return cls(
229
+ items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
230
+ )
226
231
 
227
232
  @classmethod
228
233
  @abstractmethod
@@ -243,9 +248,12 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
243
248
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
244
249
  camel_case: bool = True,
245
250
  as_write: bool = False,
251
+ prefix: str | None = None,
246
252
  ):
247
- total, items = cls._from_hierarchy(client, root_asset_external_id)
248
- return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
253
+ total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
254
+ return cls(
255
+ items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
256
+ )
249
257
 
250
258
  @classmethod
251
259
  @abstractmethod
@@ -265,11 +273,29 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
265
273
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
266
274
  camel_case: bool = True,
267
275
  as_write: bool = False,
276
+ prefix: str | None = None,
268
277
  ):
269
278
  total, items = cls._from_file(file_path)
270
- return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
279
+ return cls(
280
+ items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
281
+ )
271
282
 
272
283
  @classmethod
273
284
  @abstractmethod
274
285
  def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
275
286
  raise NotImplementedError
287
+
288
+ @classmethod
289
+ def _handle_no_access(
290
+ cls, action: Callable[[], tuple[int | None, Iterable[T_CogniteResource]]]
291
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
292
+ try:
293
+ return action()
294
+ except CogniteAPIError as e:
295
+ if e.code == 403:
296
+ warnings.warn(
297
+ CDFAuthWarning(f"extract {cls.__name__.removesuffix('Extractor').casefold()}", str(e)), stacklevel=2
298
+ )
299
+ return 0, []
300
+ else:
301
+ raise e
@@ -1,18 +1,25 @@
1
1
  import warnings
2
2
  from collections import defaultdict
3
3
  from collections.abc import Iterable, Sequence
4
- from typing import ClassVar, NamedTuple
4
+ from typing import ClassVar, NamedTuple, cast
5
5
 
6
6
  from cognite.client import CogniteClient
7
7
  from cognite.client.exceptions import CogniteAPIError
8
- from rdflib import Namespace
8
+ from rdflib import Namespace, URIRef
9
9
 
10
- from cognite.neat._constants import CLASSIC_CDF_NAMESPACE
11
- from cognite.neat._graph.extractors._base import BaseExtractor
10
+ from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE, get_default_prefixes_and_namespaces
11
+ from cognite.neat._graph.extractors._base import KnowledgeGraphExtractor
12
+ from cognite.neat._issues.errors import NeatValueError
12
13
  from cognite.neat._issues.warnings import CDFAuthWarning
14
+ from cognite.neat._rules._shared import ReadRules
15
+ from cognite.neat._rules.catalog import classic_model
16
+ from cognite.neat._rules.models import InformationInputRules, InformationRules
17
+ from cognite.neat._rules.models._rdfpath import Entity as RDFPathEntity
18
+ from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
13
19
  from cognite.neat._shared import Triple
14
20
  from cognite.neat._utils.collection_ import chunker, iterate_progress_bar
15
21
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
22
+ from cognite.neat._utils.text import to_snake
16
23
 
17
24
  from ._assets import AssetsExtractor
18
25
  from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
@@ -37,7 +44,7 @@ class _ClassicCoreType(NamedTuple):
37
44
  api_name: str
38
45
 
39
46
 
40
- class ClassicGraphExtractor(BaseExtractor):
47
+ class ClassicGraphExtractor(KnowledgeGraphExtractor):
41
48
  """This extractor extracts all classic CDF Resources.
42
49
 
43
50
  The Classic Graph consists of the following core resource type.
@@ -93,6 +100,7 @@ class ClassicGraphExtractor(BaseExtractor):
93
100
  root_asset_external_id: str | None = None,
94
101
  namespace: Namespace | None = None,
95
102
  limit_per_type: int | None = None,
103
+ prefix: str | None = None,
96
104
  ):
97
105
  self._client = client
98
106
  if sum([bool(data_set_external_id), bool(root_asset_external_id)]) != 1:
@@ -101,8 +109,14 @@ class ClassicGraphExtractor(BaseExtractor):
101
109
  self._data_set_external_id = data_set_external_id
102
110
  self._namespace = namespace or CLASSIC_CDF_NAMESPACE
103
111
  self._extractor_args = dict(
104
- namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True, limit=limit_per_type
112
+ namespace=self._namespace,
113
+ unpack_metadata=False,
114
+ as_write=True,
115
+ camel_case=True,
116
+ limit=limit_per_type,
117
+ prefix=prefix,
105
118
  )
119
+ self._prefix = prefix
106
120
  self._limit_per_type = limit_per_type
107
121
 
108
122
  self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
@@ -144,6 +158,59 @@ class ClassicGraphExtractor(BaseExtractor):
144
158
  else:
145
159
  self._extracted_data_sets = True
146
160
 
161
+ def get_information_rules(self) -> InformationRules:
162
+ # To avoid circular imports
163
+ from cognite.neat._rules.importers import ExcelImporter
164
+
165
+ unverified = cast(ReadRules[InformationInputRules], ExcelImporter(classic_model).to_rules())
166
+ if unverified.rules is None:
167
+ raise NeatValueError(f"Could not read the classic model rules from {classic_model}.")
168
+
169
+ verified = unverified.rules.as_verified_rules()
170
+ prefixes = get_default_prefixes_and_namespaces()
171
+ instance_prefix: str | None = next((k for k, v in prefixes.items() if v == self._namespace), None)
172
+ if instance_prefix is None:
173
+ # We need to add a new prefix
174
+ instance_prefix = f"prefix_{len(prefixes) + 1}"
175
+ prefixes[instance_prefix] = self._namespace
176
+ verified.prefixes = prefixes
177
+
178
+ is_snake_case = self._extractor_args["camel_case"] is False
179
+ for prop in verified.properties:
180
+ prop_id = prop.property_
181
+ if is_snake_case:
182
+ prop_id = to_snake(prop_id)
183
+ prop.instance_source = RDFPath(
184
+ traversal=SingleProperty(
185
+ class_=RDFPathEntity(prefix=instance_prefix, suffix=prop.class_.suffix),
186
+ property=RDFPathEntity(prefix=instance_prefix, suffix=prop_id),
187
+ )
188
+ )
189
+ return verified
190
+
191
+ @property
192
+ def description(self) -> str:
193
+ if self._data_set_external_id:
194
+ source = f"data set {self._data_set_external_id}."
195
+ elif self._root_asset_external_id:
196
+ source = f"root asset {self._root_asset_external_id}."
197
+ else:
198
+ source = "unknown source."
199
+ return f"Extracting clasic CDF Graph (Assets, TimeSeries, Sequences, Events, Files) from {source}."
200
+
201
+ @property
202
+ def source_uri(self) -> URIRef:
203
+ if self._data_set_external_id:
204
+ resource = "dataset"
205
+ external_id = self._data_set_external_id
206
+ elif self._root_asset_external_id:
207
+ resource = "asset"
208
+ external_id = self._root_asset_external_id
209
+ else:
210
+ resource = "unknown"
211
+ external_id = "unknown"
212
+ return DEFAULT_NAMESPACE[f"{self._client.config.project}/{resource}/{external_id}"]
213
+
147
214
  def _extract_core_start_nodes(self):
148
215
  for core_node in self._classic_node_types:
149
216
  if self._data_set_external_id:
@@ -217,7 +284,7 @@ class ClassicGraphExtractor(BaseExtractor):
217
284
  self._source_external_ids_by_type[resource_type].add(remove_namespace_from_uri(triple[2]))
218
285
  elif triple[1] == self._namespace.labels:
219
286
  self._labels.add(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.label))
220
- elif triple[1] == self._namespace.datasetId:
287
+ elif triple[1] == self._namespace.dataSetId:
221
288
  self._data_set_ids.add(
222
289
  int(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.data_set))
223
290
  )
@@ -226,4 +293,7 @@ class ClassicGraphExtractor(BaseExtractor):
226
293
  @staticmethod
227
294
  def _chunk(items: Sequence, description: str) -> Iterable:
228
295
  to_iterate: Iterable = chunker(items, chunk_size=1000)
229
- return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
296
+ if items:
297
+ return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
298
+ else:
299
+ return to_iterate
@@ -28,6 +28,7 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
28
28
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
29
29
  camel_case: bool = True,
30
30
  as_write: bool = False,
31
+ prefix: str | None = None,
31
32
  ):
32
33
  super().__init__(
33
34
  items,
@@ -39,6 +40,7 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
39
40
  skip_metadata_values=skip_metadata_values,
40
41
  camel_case=camel_case,
41
42
  as_write=as_write,
43
+ prefix=prefix,
42
44
  )
43
45
  # This is used by the ClassicExtractor to log the target nodes, such
44
46
  # that it can extract them.