cognite-neat 0.98.0__py3-none-any.whl → 0.99.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (103) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +585 -0
  3. cognite/neat/_client/_api/schema.py +111 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/_client/data_classes/schema.py +495 -0
  8. cognite/neat/_constants.py +27 -4
  9. cognite/neat/_graph/_shared.py +14 -15
  10. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  11. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  12. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +25 -14
  13. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  14. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  15. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  16. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  17. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  18. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  19. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  20. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  21. cognite/neat/_graph/loaders/_rdf2dms.py +2 -2
  22. cognite/neat/_graph/queries/_base.py +17 -1
  23. cognite/neat/_graph/transformers/_classic_cdf.py +74 -147
  24. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  25. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  26. cognite/neat/_issues/_base.py +26 -17
  27. cognite/neat/_issues/errors/__init__.py +4 -2
  28. cognite/neat/_issues/errors/_external.py +7 -0
  29. cognite/neat/_issues/errors/_properties.py +2 -7
  30. cognite/neat/_issues/errors/_resources.py +1 -1
  31. cognite/neat/_issues/warnings/__init__.py +8 -0
  32. cognite/neat/_issues/warnings/_external.py +16 -0
  33. cognite/neat/_issues/warnings/_properties.py +16 -0
  34. cognite/neat/_issues/warnings/_resources.py +26 -2
  35. cognite/neat/_issues/warnings/user_modeling.py +4 -4
  36. cognite/neat/_rules/_constants.py +8 -11
  37. cognite/neat/_rules/analysis/_base.py +8 -4
  38. cognite/neat/_rules/exporters/_base.py +3 -4
  39. cognite/neat/_rules/exporters/_rules2dms.py +33 -46
  40. cognite/neat/_rules/importers/__init__.py +1 -3
  41. cognite/neat/_rules/importers/_base.py +1 -1
  42. cognite/neat/_rules/importers/_dms2rules.py +6 -29
  43. cognite/neat/_rules/importers/_rdf/__init__.py +5 -0
  44. cognite/neat/_rules/importers/_rdf/_base.py +34 -11
  45. cognite/neat/_rules/importers/_rdf/_imf2rules.py +91 -0
  46. cognite/neat/_rules/importers/_rdf/_inference2rules.py +43 -35
  47. cognite/neat/_rules/importers/_rdf/_owl2rules.py +80 -0
  48. cognite/neat/_rules/importers/_rdf/_shared.py +138 -441
  49. cognite/neat/_rules/models/__init__.py +1 -1
  50. cognite/neat/_rules/models/_base_rules.py +22 -12
  51. cognite/neat/_rules/models/dms/__init__.py +4 -2
  52. cognite/neat/_rules/models/dms/_exporter.py +45 -48
  53. cognite/neat/_rules/models/dms/_rules.py +20 -17
  54. cognite/neat/_rules/models/dms/_rules_input.py +52 -8
  55. cognite/neat/_rules/models/dms/_validation.py +391 -119
  56. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  57. cognite/neat/_rules/models/information/__init__.py +2 -0
  58. cognite/neat/_rules/models/information/_rules.py +0 -67
  59. cognite/neat/_rules/models/information/_validation.py +9 -9
  60. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  61. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  62. cognite/neat/_rules/models/mapping/_classic2core.yaml +343 -0
  63. cognite/neat/_rules/transformers/__init__.py +2 -2
  64. cognite/neat/_rules/transformers/_converters.py +110 -11
  65. cognite/neat/_rules/transformers/_mapping.py +105 -30
  66. cognite/neat/_rules/transformers/_pipelines.py +1 -1
  67. cognite/neat/_rules/transformers/_verification.py +31 -3
  68. cognite/neat/_session/_base.py +24 -8
  69. cognite/neat/_session/_drop.py +35 -0
  70. cognite/neat/_session/_inspect.py +17 -5
  71. cognite/neat/_session/_mapping.py +39 -0
  72. cognite/neat/_session/_prepare.py +219 -23
  73. cognite/neat/_session/_read.py +49 -12
  74. cognite/neat/_session/_to.py +8 -5
  75. cognite/neat/_session/exceptions.py +4 -0
  76. cognite/neat/_store/_base.py +27 -24
  77. cognite/neat/_utils/rdf_.py +34 -5
  78. cognite/neat/_version.py +1 -1
  79. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +5 -88
  80. cognite/neat/_workflows/steps/lib/current/rules_importer.py +3 -14
  81. cognite/neat/_workflows/steps/lib/current/rules_validator.py +6 -7
  82. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/METADATA +3 -3
  83. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/RECORD +87 -92
  84. cognite/neat/_rules/importers/_rdf/_imf2rules/__init__.py +0 -3
  85. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +0 -86
  86. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +0 -29
  87. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +0 -130
  88. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2rules.py +0 -154
  89. cognite/neat/_rules/importers/_rdf/_owl2rules/__init__.py +0 -3
  90. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +0 -58
  91. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +0 -65
  92. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +0 -59
  93. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2rules.py +0 -39
  94. cognite/neat/_rules/models/dms/_schema.py +0 -1101
  95. cognite/neat/_rules/models/mapping/_base.py +0 -131
  96. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  97. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  98. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  99. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  100. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  101. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/LICENSE +0 -0
  102. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/WHEEL +0 -0
  103. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/entry_points.txt +0 -0
@@ -1,177 +1,37 @@
1
- from collections.abc import Callable, Iterable, Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
  from typing import cast
5
4
 
6
5
  from cognite.client import CogniteClient
7
6
  from cognite.client.data_classes import Asset, AssetFilter, AssetList
8
- from rdflib import RDF, Literal, Namespace
9
7
 
10
- from cognite.neat._shared import Triple
11
-
12
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
13
- from ._labels import LabelsExtractor
8
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
14
9
 
15
10
 
16
11
  class AssetsExtractor(ClassicCDFBaseExtractor[Asset]):
17
- """Extract data from Cognite Data Fusions Assets into Neat.
18
-
19
- Args:
20
- items (Iterable[Asset]): An iterable of assets.
21
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
22
- to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
23
- If None or if the function returns None, the asset will be set to the default type "Asset".
24
- total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
25
- is installed. Defaults to None.
26
- limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
27
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
28
- limit the extraction to 1000 assets to test the setup.
29
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
30
- a JSON string.
31
- skip_metadata_values (set[str] | frozenset[str] | None, optional): A set of values to skip when unpacking
32
- metadata. Defaults to frozenset({"nan", "null", "none", ""}).
33
- """
12
+ """Extract data from Cognite Data Fusions Assets into Neat."""
34
13
 
35
14
  _default_rdf_type = "Asset"
15
+ _instance_id_prefix = InstanceIdPrefix.asset
36
16
 
37
17
  @classmethod
38
- def from_dataset(
39
- cls,
40
- client: CogniteClient,
41
- data_set_external_id: str,
42
- namespace: Namespace | None = None,
43
- to_type: Callable[[Asset], str | None] | None = None,
44
- limit: int | None = None,
45
- unpack_metadata: bool = True,
46
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
47
- ):
18
+ def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Asset]]:
48
19
  total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
49
-
50
- return cls(
51
- client.assets(data_set_external_ids=data_set_external_id),
52
- namespace,
53
- to_type,
54
- total,
55
- limit,
56
- unpack_metadata=unpack_metadata,
57
- skip_metadata_values=skip_metadata_values,
58
- )
20
+ items = client.assets(data_set_external_ids=data_set_external_id)
21
+ return total, items
59
22
 
60
23
  @classmethod
61
- def from_hierarchy(
62
- cls,
63
- client: CogniteClient,
64
- root_asset_external_id: str,
65
- namespace: Namespace | None = None,
66
- to_type: Callable[[Asset], str | None] | None = None,
67
- limit: int | None = None,
68
- unpack_metadata: bool = True,
69
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
70
- ):
24
+ def _from_hierarchy(cls, client: CogniteClient, root_asset_external_id: str) -> tuple[int | None, Iterable[Asset]]:
71
25
  total = client.assets.aggregate_count(
72
26
  filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
73
27
  )
74
-
75
- return cls(
76
- cast(
77
- Iterable[Asset],
78
- client.assets(asset_subtree_external_ids=root_asset_external_id),
79
- ),
80
- namespace,
81
- to_type,
82
- total,
83
- limit,
84
- unpack_metadata=unpack_metadata,
85
- skip_metadata_values=skip_metadata_values,
28
+ items = cast(
29
+ Iterable[Asset],
30
+ client.assets(asset_subtree_external_ids=root_asset_external_id),
86
31
  )
32
+ return total, items
87
33
 
88
34
  @classmethod
89
- def from_file(
90
- cls,
91
- file_path: str,
92
- namespace: Namespace | None = None,
93
- to_type: Callable[[Asset], str] | None = None,
94
- limit: int | None = None,
95
- unpack_metadata: bool = True,
96
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
97
- ):
35
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Asset]]:
98
36
  assets = AssetList.load(Path(file_path).read_text())
99
- return cls(
100
- assets,
101
- namespace,
102
- to_type,
103
- total=len(assets),
104
- limit=limit,
105
- unpack_metadata=unpack_metadata,
106
- skip_metadata_values=skip_metadata_values,
107
- )
108
-
109
- def _item2triples(self, asset: Asset) -> list[Triple]:
110
- """Converts an asset to triples."""
111
- id_ = self.namespace[f"{InstanceIdPrefix.asset}{asset.id}"]
112
-
113
- type_ = self._get_rdf_type(asset)
114
-
115
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
116
-
117
- # Create attributes
118
- if asset.name:
119
- triples.append((id_, self.namespace.name, Literal(asset.name)))
120
-
121
- if asset.description:
122
- triples.append((id_, self.namespace.description, Literal(asset.description)))
123
-
124
- if asset.external_id:
125
- triples.append((id_, self.namespace.external_id, Literal(asset.external_id)))
126
-
127
- if asset.source:
128
- triples.append((id_, self.namespace.source, Literal(asset.source)))
129
-
130
- # properties' ref creation and update
131
- triples.append(
132
- (
133
- id_,
134
- self.namespace.created_time,
135
- Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
136
- )
137
- )
138
- triples.append(
139
- (
140
- id_,
141
- self.namespace.last_updated_time,
142
- Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
143
- )
144
- )
145
-
146
- if asset.labels:
147
- for label in asset.labels:
148
- # external_id can create ill-formed URIs, so we create websafe URIs
149
- # since labels do not have internal ids, we use the external_id as the id
150
- triples.append(
151
- (
152
- id_,
153
- self.namespace.label,
154
- self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
155
- )
156
- )
157
-
158
- if asset.metadata:
159
- triples.extend(self._metadata_to_triples(id_, asset.metadata))
160
-
161
- # Create connections:
162
- if asset.parent_id:
163
- triples.append((id_, self.namespace.parent, self.namespace[f"{InstanceIdPrefix.asset}{asset.parent_id}"]))
164
-
165
- if asset.root_id:
166
- triples.append((id_, self.namespace.root, self.namespace[f"{InstanceIdPrefix.asset}{asset.root_id}"]))
167
-
168
- if asset.data_set_id:
169
- triples.append(
170
- (
171
- id_,
172
- self.namespace.dataset,
173
- self.namespace[f"{InstanceIdPrefix.data_set}{asset.data_set_id}"],
174
- )
175
- )
176
-
177
- return triples
37
+ return len(assets), assets
@@ -2,18 +2,22 @@ import json
2
2
  import re
3
3
  import sys
4
4
  from abc import ABC, abstractmethod
5
- from collections.abc import Callable, Iterable, Set
6
- from typing import Generic, TypeVar
5
+ from collections.abc import Callable, Iterable, Sequence, Set
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any, Generic, TypeVar
7
9
 
8
- from cognite.client.data_classes._base import CogniteResource
9
- from rdflib import XSD, Literal, Namespace, URIRef
10
+ from cognite.client import CogniteClient
11
+ from cognite.client.data_classes._base import WriteableCogniteResource
12
+ from pydantic import AnyHttpUrl, ValidationError
13
+ from rdflib import RDF, XSD, Literal, Namespace, URIRef
10
14
 
11
15
  from cognite.neat._constants import DEFAULT_NAMESPACE
12
16
  from cognite.neat._graph.extractors._base import BaseExtractor
13
17
  from cognite.neat._shared import Triple
14
18
  from cognite.neat._utils.auxiliary import string_to_ideal_type
15
19
 
16
- T_CogniteResource = TypeVar("T_CogniteResource", bound=CogniteResource)
20
+ T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
17
21
 
18
22
  DEFAULT_SKIP_METADATA_VALUES = frozenset({"nan", "null", "none", ""})
19
23
 
@@ -61,9 +65,13 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
61
65
  a JSON string.
62
66
  skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
63
67
  values in this set will be skipped.
68
+ camel_case (bool, optional): Whether to use camelCase instead of snake_case for property names.
69
+ Defaults to True.
70
+ as_write (bool, optional): Whether to use the write/request format of the items. Defaults to False.
64
71
  """
65
72
 
66
73
  _default_rdf_type: str
74
+ _instance_id_prefix: str
67
75
  _SPACE_PATTERN = re.compile(r"\s+")
68
76
 
69
77
  def __init__(
@@ -75,6 +83,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
75
83
  limit: int | None = None,
76
84
  unpack_metadata: bool = True,
77
85
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
86
+ camel_case: bool = True,
87
+ as_write: bool = False,
78
88
  ):
79
89
  self.namespace = namespace or DEFAULT_NAMESPACE
80
90
  self.items = items
@@ -83,6 +93,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
83
93
  self.limit = min(limit, total) if limit and total else limit
84
94
  self.unpack_metadata = unpack_metadata
85
95
  self.skip_metadata_values = skip_metadata_values
96
+ self.camel_case = camel_case
97
+ self.as_write = as_write
86
98
 
87
99
  def extract(self) -> Iterable[Triple]:
88
100
  """Extracts an asset with the given asset_id."""
@@ -104,9 +116,48 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
104
116
  if self.limit and no >= self.limit:
105
117
  break
106
118
 
107
- @abstractmethod
108
119
  def _item2triples(self, item: T_CogniteResource) -> list[Triple]:
109
- raise NotImplementedError()
120
+ id_value: str | None
121
+ if hasattr(item, "id"):
122
+ id_value = str(item.id)
123
+ else:
124
+ id_value = self._fallback_id(item)
125
+ if id_value is None:
126
+ return []
127
+
128
+ id_ = self.namespace[f"{self._instance_id_prefix}{id_value}"]
129
+
130
+ type_ = self._get_rdf_type(item)
131
+
132
+ # Set rdf type
133
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
134
+ if self.as_write:
135
+ item = item.as_write()
136
+ dumped = item.dump(self.camel_case)
137
+ dumped.pop("id", None)
138
+ # We have parentId so we don't need parentExternalId
139
+ dumped.pop("parentExternalId", None)
140
+ if "metadata" in dumped:
141
+ triples.extend(self._metadata_to_triples(id_, dumped.pop("metadata")))
142
+ if "columns" in dumped:
143
+ columns = dumped.pop("columns")
144
+ triples.append(
145
+ (id_, self.namespace.columns, Literal(json.dumps({"columns": columns}), datatype=XSD._NS["json"]))
146
+ )
147
+
148
+ for key, value in dumped.items():
149
+ if value is None or value == []:
150
+ continue
151
+ values = value if isinstance(value, Sequence) and not isinstance(value, str) else [value]
152
+ for raw in values:
153
+ triples.append((id_, self.namespace[key], self._as_object(raw, key)))
154
+ return triples
155
+
156
+ def _fallback_id(self, item: T_CogniteResource) -> str | None:
157
+ raise AttributeError(
158
+ f"Item of type {type(item)} does not have an id attribute. "
159
+ f"Please implement the _fallback_id method in the extractor."
160
+ )
110
161
 
111
162
  def _metadata_to_triples(self, id_: URIRef, metadata: dict[str, str]) -> Iterable[Triple]:
112
163
  if self.unpack_metadata:
@@ -125,3 +176,99 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
125
176
  if self.to_type:
126
177
  type_ = self.to_type(item) or type_
127
178
  return self._SPACE_PATTERN.sub("_", type_)
179
+
180
+ def _as_object(self, raw: Any, key: str) -> Literal | URIRef:
181
+ if key in {"data_set_id", "dataSetId"}:
182
+ return self.namespace[f"{InstanceIdPrefix.data_set}{raw}"]
183
+ elif key in {"assetId", "asset_id", "assetIds", "asset_ids", "parentId", "rootId", "parent_id", "root_id"}:
184
+ return self.namespace[f"{InstanceIdPrefix.asset}{raw}"]
185
+ elif key in {
186
+ "startTime",
187
+ "endTime",
188
+ "createdTime",
189
+ "lastUpdatedTime",
190
+ "start_time",
191
+ "end_time",
192
+ "created_time",
193
+ "last_updated_time",
194
+ } and isinstance(raw, int):
195
+ return Literal(datetime.fromtimestamp(raw / 1000, timezone.utc), datatype=XSD.dateTime)
196
+ elif key == "labels":
197
+ from ._labels import LabelsExtractor
198
+
199
+ return self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(raw)}"]
200
+ elif key in {"sourceType", "targetType", "source_type", "target_type"} and isinstance(raw, str):
201
+ # Relationship types. Titled so they can be looked up.
202
+ return self.namespace[raw.title()]
203
+ elif key in {"unit_external_id", "unitExternalId"}:
204
+ try:
205
+ return URIRef(str(AnyHttpUrl(raw)))
206
+ except ValidationError:
207
+ ...
208
+ return Literal(raw)
209
+
210
+ @classmethod
211
+ def from_dataset(
212
+ cls,
213
+ client: CogniteClient,
214
+ data_set_external_id: str,
215
+ namespace: Namespace | None = None,
216
+ to_type: Callable[[T_CogniteResource], str | None] | None = None,
217
+ limit: int | None = None,
218
+ unpack_metadata: bool = True,
219
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
220
+ camel_case: bool = True,
221
+ as_write: bool = False,
222
+ ):
223
+ total, items = cls._from_dataset(client, data_set_external_id)
224
+ return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
225
+
226
+ @classmethod
227
+ @abstractmethod
228
+ def _from_dataset(
229
+ cls, client: CogniteClient, data_set_external_id: str
230
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
231
+ raise NotImplementedError
232
+
233
+ @classmethod
234
+ def from_hierarchy(
235
+ cls,
236
+ client: CogniteClient,
237
+ root_asset_external_id: str,
238
+ namespace: Namespace | None = None,
239
+ to_type: Callable[[T_CogniteResource], str | None] | None = None,
240
+ limit: int | None = None,
241
+ unpack_metadata: bool = True,
242
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
243
+ camel_case: bool = True,
244
+ as_write: bool = False,
245
+ ):
246
+ total, items = cls._from_hierarchy(client, root_asset_external_id)
247
+ return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
248
+
249
+ @classmethod
250
+ @abstractmethod
251
+ def _from_hierarchy(
252
+ cls, client: CogniteClient, root_asset_external_id: str
253
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
254
+ raise NotImplementedError
255
+
256
+ @classmethod
257
+ def from_file(
258
+ cls,
259
+ file_path: str | Path,
260
+ namespace: Namespace | None = None,
261
+ to_type: Callable[[T_CogniteResource], str | None] | None = None,
262
+ limit: int | None = None,
263
+ unpack_metadata: bool = True,
264
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
265
+ camel_case: bool = True,
266
+ as_write: bool = False,
267
+ ):
268
+ total, items = cls._from_file(file_path)
269
+ return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
270
+
271
+ @classmethod
272
+ @abstractmethod
273
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
274
+ raise NotImplementedError
@@ -1,12 +1,15 @@
1
+ import warnings
1
2
  from collections import defaultdict
2
3
  from collections.abc import Iterable, Sequence
3
4
  from typing import ClassVar, NamedTuple
4
5
 
5
6
  from cognite.client import CogniteClient
7
+ from cognite.client.exceptions import CogniteAPIError
6
8
  from rdflib import Namespace
7
9
 
8
- from cognite.neat._constants import DEFAULT_NAMESPACE
10
+ from cognite.neat._constants import CLASSIC_CDF_NAMESPACE
9
11
  from cognite.neat._graph.extractors._base import BaseExtractor
12
+ from cognite.neat._issues.warnings import CDFAuthWarning
10
13
  from cognite.neat._shared import Triple
11
14
  from cognite.neat._utils.collection_ import chunker
12
15
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
@@ -95,7 +98,8 @@ class ClassicGraphExtractor(BaseExtractor):
95
98
  raise ValueError("Exactly one of data_set_external_id or root_asset_external_id must be set.")
96
99
  self._root_asset_external_id = root_asset_external_id
97
100
  self._data_set_external_id = data_set_external_id
98
- self._namespace = namespace or DEFAULT_NAMESPACE
101
+ self._namespace = namespace or CLASSIC_CDF_NAMESPACE
102
+ self._extractor_args = dict(namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True)
99
103
 
100
104
  self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
101
105
  self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
@@ -110,18 +114,25 @@ class ClassicGraphExtractor(BaseExtractor):
110
114
 
111
115
  yield from self._extract_core_end_nodes()
112
116
 
113
- yield from self._extract_labels()
114
- yield from self._extract_data_sets()
117
+ try:
118
+ yield from self._extract_labels()
119
+ except CogniteAPIError as e:
120
+ warnings.warn(CDFAuthWarning("extract labels", str(e)), stacklevel=2)
121
+
122
+ try:
123
+ yield from self._extract_data_sets()
124
+ except CogniteAPIError as e:
125
+ warnings.warn(CDFAuthWarning("extract data sets", str(e)), stacklevel=2)
115
126
 
116
127
  def _extract_core_start_nodes(self):
117
128
  for core_node in self._classic_node_types:
118
129
  if self._data_set_external_id:
119
130
  extractor = core_node.extractor_cls.from_dataset(
120
- self._client, self._data_set_external_id, self._namespace, unpack_metadata=False
131
+ self._client, self._data_set_external_id, **self._extractor_args
121
132
  )
122
133
  elif self._root_asset_external_id:
123
134
  extractor = core_node.extractor_cls.from_hierarchy(
124
- self._client, self._root_asset_external_id, self._namespace, unpack_metadata=False
135
+ self._client, self._root_asset_external_id, **self._extractor_args
125
136
  )
126
137
  else:
127
138
  raise ValueError("Exactly one of data_set_external_id or root_asset_external_id must be set.")
@@ -135,7 +146,7 @@ class ClassicGraphExtractor(BaseExtractor):
135
146
  relationship_iterator = self._client.relationships(
136
147
  source_external_ids=list(chunk), source_types=[start_type]
137
148
  )
138
- extractor = RelationshipsExtractor(relationship_iterator, self._namespace, unpack_metadata=False)
149
+ extractor = RelationshipsExtractor(relationship_iterator, **self._extractor_args)
139
150
  # This is a private attribute, but we need to set it to log the target nodes.
140
151
  extractor._log_target_nodes = True
141
152
 
@@ -165,28 +176,28 @@ class ClassicGraphExtractor(BaseExtractor):
165
176
  description=f"Extracting end nodes {core_node.resource_type.removesuffix('_')}",
166
177
  ):
167
178
  resource_iterator = api.retrieve_multiple(external_ids=list(chunk), ignore_unknown_ids=True)
168
- extractor = core_node.extractor_cls(resource_iterator, self._namespace, unpack_metadata=False)
179
+ extractor = core_node.extractor_cls(resource_iterator, **self._extractor_args)
169
180
  yield from self._extract_with_logging_label_dataset(extractor)
170
181
 
171
182
  def _extract_labels(self):
172
183
  for chunk in self._chunk(list(self._labels), description="Extracting labels"):
173
184
  label_iterator = self._client.labels.retrieve(external_id=list(chunk), ignore_unknown_ids=True)
174
- yield from LabelsExtractor(label_iterator, self._namespace).extract()
185
+ yield from LabelsExtractor(label_iterator, **self._extractor_args).extract()
175
186
 
176
187
  def _extract_data_sets(self):
177
188
  for chunk in self._chunk(list(self._data_set_ids), description="Extracting data sets"):
178
189
  data_set_iterator = self._client.data_sets.retrieve_multiple(ids=list(chunk), ignore_unknown_ids=True)
179
- yield from DataSetExtractor(data_set_iterator, self._namespace, unpack_metadata=False).extract()
190
+ yield from DataSetExtractor(data_set_iterator, **self._extractor_args).extract()
180
191
 
181
192
  def _extract_with_logging_label_dataset(
182
193
  self, extractor: ClassicCDFBaseExtractor, resource_type: InstanceIdPrefix | None = None
183
194
  ) -> Iterable[Triple]:
184
195
  for triple in extractor.extract():
185
- if triple[1] == self._namespace.external_id and resource_type is not None:
196
+ if triple[1] == self._namespace.externalId and resource_type is not None:
186
197
  self._source_external_ids_by_type[resource_type].add(remove_namespace_from_uri(triple[2]))
187
- elif triple[1] == self._namespace.label:
198
+ elif triple[1] == self._namespace.labels:
188
199
  self._labels.add(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.label))
189
- elif triple[1] == self._namespace.dataset:
200
+ elif triple[1] == self._namespace.datasetId:
190
201
  self._data_set_ids.add(
191
202
  int(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.data_set))
192
203
  )
@@ -202,7 +213,7 @@ class ClassicGraphExtractor(BaseExtractor):
202
213
  else:
203
214
  to_iterate = track(
204
215
  to_iterate,
205
- total=(len(items) // 1000) + 1,
216
+ total=(len(items) // 1_000) + 1,
206
217
  description=description,
207
218
  )
208
219
  return to_iterate
@@ -1,110 +1,35 @@
1
- from collections.abc import Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
 
5
4
  from cognite.client import CogniteClient
6
5
  from cognite.client.data_classes import DataSet, DataSetList
7
6
  from cognite.client.utils.useful_types import SequenceNotStr
8
- from rdflib import RDF, Literal, Namespace
9
7
 
10
- from cognite.neat._shared import Triple
11
-
12
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
8
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
13
9
 
14
10
 
15
11
  class DataSetExtractor(ClassicCDFBaseExtractor[DataSet]):
16
- """Extract DataSets from Cognite Data Fusions into Neat.
17
-
18
- Args:
19
- items (Iterable[Asset]): An iterable of assets.
20
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
- to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
22
- If None or if the function returns None, the asset will be set to the default type "Asset".
23
- total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
24
- is installed. Defaults to None.
25
- limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
26
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
- limit the extraction to 1000 assets to test the setup.
28
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
29
- a JSON string.
30
- skip_metadata_values (set[str] | frozenset[str] | None, optional): A set of values to skip when unpacking
31
- metadata. Defaults to frozenset({"nan", "null", "none", ""}).
32
- """
12
+ """Extract DataSets from Cognite Data Fusions into Neat."""
33
13
 
34
14
  _default_rdf_type = "DataSet"
15
+ _instance_id_prefix = InstanceIdPrefix.data_set
35
16
 
36
17
  @classmethod
37
- def from_dataset(
18
+ def _from_dataset(
38
19
  cls,
39
20
  client: CogniteClient,
40
- data_set_external_id: SequenceNotStr[str],
41
- namespace: Namespace | None = None,
42
- unpack_metadata: bool = True,
43
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
44
- ):
45
- return cls(
46
- client.data_sets.retrieve_multiple(external_ids=data_set_external_id),
47
- namespace=namespace,
48
- total=len(data_set_external_id),
49
- unpack_metadata=unpack_metadata,
50
- skip_metadata_values=skip_metadata_values,
51
- )
21
+ data_set_external_id: SequenceNotStr[str], # type: ignore[override]
22
+ ) -> tuple[int | None, Iterable[DataSet]]:
23
+ items = client.data_sets.retrieve_multiple(external_ids=data_set_external_id)
24
+ return len(items), items
52
25
 
53
26
  @classmethod
54
- def from_file(
55
- cls,
56
- file_path: str,
57
- namespace: Namespace | None = None,
58
- unpack_metadata: bool = True,
59
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
60
- ):
61
- data_sets = DataSetList.load(Path(file_path).read_text())
62
- return cls(
63
- data_sets,
64
- namespace=namespace,
65
- total=len(data_sets),
66
- unpack_metadata=unpack_metadata,
67
- skip_metadata_values=skip_metadata_values,
68
- )
69
-
70
- def _item2triples(self, item: DataSet) -> list[Triple]:
71
- """Converts an asset to triples."""
72
- id_ = self.namespace[f"{InstanceIdPrefix.data_set}{item.id}"]
73
-
74
- type_ = self._get_rdf_type(item)
75
-
76
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
77
-
78
- # Create attributes
79
- if item.name:
80
- triples.append((id_, self.namespace.name, Literal(item.name)))
27
+ def _from_hierarchy(
28
+ cls, client: CogniteClient, root_asset_external_id: str
29
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
30
+ raise NotImplementedError("DataSets do not have a hierarchy.")
81
31
 
82
- if item.description:
83
- triples.append((id_, self.namespace.description, Literal(item.description)))
84
-
85
- if item.external_id:
86
- triples.append((id_, self.namespace.external_id, Literal(item.external_id)))
87
-
88
- # properties' ref creation and update
89
- triples.append(
90
- (
91
- id_,
92
- self.namespace.created_time,
93
- Literal(datetime.fromtimestamp(item.created_time / 1000, timezone.utc)),
94
- )
95
- )
96
- triples.append(
97
- (
98
- id_,
99
- self.namespace.last_updated_time,
100
- Literal(datetime.fromtimestamp(item.last_updated_time / 1000, timezone.utc)),
101
- )
102
- )
103
-
104
- if item.write_protected:
105
- triples.append((id_, self.namespace.write_protected, Literal(item.write_protected)))
106
-
107
- if item.metadata:
108
- triples.extend(self._metadata_to_triples(id_, item.metadata))
109
-
110
- return triples
32
+ @classmethod
33
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[DataSet]]:
34
+ data_sets = DataSetList.load(Path(file_path).read_text())
35
+ return len(data_sets), data_sets