cognite-neat 0.98.0__py3-none-any.whl → 0.99.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (72) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
  3. cognite/neat/_client/_api/schema.py +50 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +21 -281
  8. cognite/neat/_graph/_shared.py +14 -15
  9. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  10. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  11. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
  12. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  13. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  14. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  15. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  16. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  17. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  18. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  19. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  20. cognite/neat/_graph/queries/_base.py +17 -1
  21. cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
  22. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  23. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  24. cognite/neat/_issues/warnings/__init__.py +6 -0
  25. cognite/neat/_issues/warnings/_external.py +8 -0
  26. cognite/neat/_issues/warnings/_properties.py +16 -0
  27. cognite/neat/_rules/_constants.py +7 -6
  28. cognite/neat/_rules/analysis/_base.py +8 -4
  29. cognite/neat/_rules/exporters/_base.py +3 -4
  30. cognite/neat/_rules/exporters/_rules2dms.py +29 -40
  31. cognite/neat/_rules/importers/_dms2rules.py +4 -5
  32. cognite/neat/_rules/importers/_rdf/_inference2rules.py +25 -33
  33. cognite/neat/_rules/models/__init__.py +1 -1
  34. cognite/neat/_rules/models/_base_rules.py +22 -12
  35. cognite/neat/_rules/models/dms/__init__.py +2 -2
  36. cognite/neat/_rules/models/dms/_exporter.py +15 -20
  37. cognite/neat/_rules/models/dms/_rules.py +48 -3
  38. cognite/neat/_rules/models/dms/_rules_input.py +52 -8
  39. cognite/neat/_rules/models/dms/_validation.py +10 -5
  40. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  41. cognite/neat/_rules/models/information/_rules.py +0 -8
  42. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  43. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  44. cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
  45. cognite/neat/_rules/transformers/__init__.py +2 -2
  46. cognite/neat/_rules/transformers/_converters.py +110 -11
  47. cognite/neat/_rules/transformers/_mapping.py +105 -30
  48. cognite/neat/_rules/transformers/_verification.py +5 -2
  49. cognite/neat/_session/_base.py +49 -8
  50. cognite/neat/_session/_drop.py +35 -0
  51. cognite/neat/_session/_inspect.py +17 -5
  52. cognite/neat/_session/_mapping.py +39 -0
  53. cognite/neat/_session/_prepare.py +218 -23
  54. cognite/neat/_session/_read.py +49 -12
  55. cognite/neat/_session/_to.py +3 -3
  56. cognite/neat/_store/_base.py +27 -24
  57. cognite/neat/_utils/rdf_.py +28 -1
  58. cognite/neat/_version.py +1 -1
  59. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +8 -3
  60. cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
  61. cognite/neat/_workflows/steps/lib/current/rules_validator.py +3 -2
  62. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +3 -3
  63. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +67 -64
  64. cognite/neat/_rules/models/mapping/_base.py +0 -131
  65. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  66. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  67. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  68. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  69. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  70. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
  71. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
  72. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
@@ -1,39 +1,21 @@
1
1
  from collections import defaultdict
2
2
  from collections.abc import Callable, Iterable, Set
3
- from datetime import datetime, timezone
4
3
  from pathlib import Path
5
4
 
6
5
  from cognite.client import CogniteClient
7
6
  from cognite.client.data_classes import Relationship, RelationshipList
8
- from rdflib import RDF, Literal, Namespace
7
+ from rdflib import Namespace
9
8
 
10
- from cognite.neat._shared import Triple
11
9
  from cognite.neat._utils.auxiliary import create_sha256_hash
12
10
 
13
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
14
- from ._labels import LabelsExtractor
11
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
15
12
 
16
13
 
17
14
  class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
18
- """Extract data from Cognite Data Fusions Relationships into Neat.
19
-
20
- Args:
21
- items (Iterable[Relationship]): An iterable of items.
22
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
23
- to_type (Callable[[Relationship], str | None], optional): A function to convert an item to a type.
24
- Defaults to None. If None or if the function returns None, the asset will be set to the default type.
25
- total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
26
- is installed. Defaults to None.
27
- limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
28
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
29
- limit the extraction to 1000 assets to test the setup.
30
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
31
- a JSON string.
32
- skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
33
- values in this set will be skipped.
34
- """
15
+ """Extract data from Cognite Data Fusions Relationships into Neat."""
35
16
 
36
17
  _default_rdf_type = "Relationship"
18
+ _instance_id_prefix = InstanceIdPrefix.relationship
37
19
 
38
20
  def __init__(
39
21
  self,
@@ -44,6 +26,8 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
44
26
  limit: int | None = None,
45
27
  unpack_metadata: bool = True,
46
28
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
29
+ camel_case: bool = True,
30
+ as_write: bool = False,
47
31
  ):
48
32
  super().__init__(
49
33
  items,
@@ -53,6 +37,8 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
53
37
  limit=limit,
54
38
  unpack_metadata=unpack_metadata,
55
39
  skip_metadata_values=skip_metadata_values,
40
+ camel_case=camel_case,
41
+ as_write=as_write,
56
42
  )
57
43
  # This is used by the ClassicExtractor to log the target nodes, such
58
44
  # that it can extract them.
@@ -61,167 +47,30 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
61
47
  self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
62
48
 
63
49
  @classmethod
64
- def from_dataset(
50
+ def _from_dataset(
65
51
  cls,
66
52
  client: CogniteClient,
67
53
  data_set_external_id: str,
68
- namespace: Namespace | None = None,
69
- to_type: Callable[[Relationship], str | None] | None = None,
70
- limit: int | None = None,
71
- unpack_metadata: bool = True,
72
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
73
- ):
74
- return cls(
75
- client.relationships(data_set_external_ids=data_set_external_id),
76
- namespace=namespace,
77
- to_type=to_type,
78
- limit=limit,
79
- unpack_metadata=unpack_metadata,
80
- skip_metadata_values=skip_metadata_values,
81
- )
54
+ ) -> tuple[int | None, Iterable[Relationship]]:
55
+ items = client.relationships(data_set_external_ids=data_set_external_id)
56
+ return None, items
82
57
 
83
58
  @classmethod
84
- def from_file(
85
- cls,
86
- file_path: str,
87
- namespace: Namespace | None = None,
88
- to_type: Callable[[Relationship], str | None] | None = None,
89
- limit: int | None = None,
90
- unpack_metadata: bool = True,
91
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
92
- ):
93
- relationships = RelationshipList.load(Path(file_path).read_text())
94
- return cls(
95
- relationships,
96
- namespace=namespace,
97
- total=len(relationships),
98
- to_type=to_type,
99
- limit=limit,
100
- unpack_metadata=unpack_metadata,
101
- skip_metadata_values=skip_metadata_values,
102
- )
103
-
104
- def _item2triples(self, relationship: Relationship) -> list[Triple]:
105
- """Converts an asset to triples."""
59
+ def _from_hierarchy(
60
+ cls, client: CogniteClient, root_asset_external_id: str
61
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
62
+ raise NotImplementedError("Relationships do not have a hierarchy.")
106
63
 
107
- if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
108
- if self._log_target_nodes and relationship.target_type and relationship.target_external_id:
109
- self._target_external_ids_by_type[InstanceIdPrefix.from_str(relationship.target_type)].add(
110
- relationship.target_external_id
111
- )
112
-
113
- # relationships do not have an internal id, so we generate one
114
- id_ = self.namespace[f"{InstanceIdPrefix.relationship}{create_sha256_hash(relationship.external_id)}"]
115
-
116
- type_ = self._get_rdf_type(relationship)
117
- # Set rdf type
118
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
119
-
120
- # Set source and target types
121
- if source_type := relationship.source_type:
122
- triples.append(
123
- (
124
- id_,
125
- self.namespace.source_type,
126
- self.namespace[source_type.title()],
127
- )
128
- )
129
-
130
- if target_type := relationship.target_type:
131
- triples.append(
132
- (
133
- id_,
134
- self.namespace.target_type,
135
- self.namespace[target_type.title()],
136
- )
137
- )
138
-
139
- # Create attributes
140
-
141
- triples.append((id_, self.namespace.external_id, Literal(relationship.external_id)))
142
-
143
- triples.append(
144
- (
145
- id_,
146
- self.namespace.source_external_id,
147
- Literal(relationship.source_external_id),
148
- )
149
- )
150
-
151
- triples.append(
152
- (
153
- id_,
154
- self.namespace.target_external_id,
155
- Literal(relationship.target_external_id),
156
- )
157
- )
158
-
159
- if relationship.start_time:
160
- triples.append(
161
- (
162
- id_,
163
- self.namespace.start_time,
164
- Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
165
- )
166
- )
167
-
168
- if relationship.end_time:
169
- triples.append(
170
- (
171
- id_,
172
- self.namespace.end_time,
173
- Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
174
- )
175
- )
176
-
177
- if relationship.created_time:
178
- triples.append(
179
- (
180
- id_,
181
- self.namespace.created_time,
182
- Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
183
- )
184
- )
185
-
186
- if relationship.last_updated_time:
187
- triples.append(
188
- (
189
- id_,
190
- self.namespace.last_updated_time,
191
- Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
192
- )
193
- )
194
-
195
- if relationship.confidence:
196
- triples.append(
197
- (
198
- id_,
199
- self.namespace.confidence,
200
- Literal(relationship.confidence),
201
- )
202
- )
203
-
204
- if relationship.labels:
205
- for label in relationship.labels:
206
- # external_id can create ill-formed URIs, so we create websafe URIs
207
- # since labels do not have internal ids, we use the external_id as the id
208
- triples.append(
209
- (
210
- id_,
211
- self.namespace.label,
212
- self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
213
- )
214
- )
64
+ @classmethod
65
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Relationship]]:
66
+ relationships = RelationshipList.load(Path(file_path).read_text())
67
+ return len(relationships), relationships
215
68
 
216
- # Create connection
217
- if relationship.data_set_id:
218
- triples.append(
219
- (
220
- id_,
221
- self.namespace.dataset,
222
- self.namespace[f"{InstanceIdPrefix.data_set}{relationship.data_set_id}"],
223
- )
69
+ def _fallback_id(self, item: Relationship) -> str | None:
70
+ if item.external_id and item.source_external_id and item.target_external_id:
71
+ if self._log_target_nodes and item.target_type and item.target_external_id:
72
+ self._target_external_ids_by_type[InstanceIdPrefix.from_str(item.target_type)].add(
73
+ item.target_external_id
224
74
  )
225
-
226
- return triples
227
- return []
75
+ return create_sha256_hash(item.external_id)
76
+ return None
@@ -1,162 +1,37 @@
1
- from collections.abc import Callable, Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
 
5
4
  from cognite.client import CogniteClient
6
5
  from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
7
- from rdflib import RDF, Literal, Namespace
8
6
 
9
- from cognite.neat._shared import Triple
10
-
11
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
7
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
12
8
 
13
9
 
14
10
  class SequencesExtractor(ClassicCDFBaseExtractor[Sequence]):
15
- """Extract data from Cognite Data Fusions Sequences into Neat.
16
-
17
- Args:
18
- items (Iterable[Sequence]): An iterable of items.
19
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
20
- to_type (Callable[[Sequence], str | None], optional): A function to convert an item to a type.
21
- Defaults to None. If None or if the function returns None, the asset will be set to the default type.
22
- total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
23
- is installed. Defaults to None.
24
- limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
25
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
26
- limit the extraction to 1000 assets to test the setup.
27
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
28
- a JSON string.
29
- skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
30
- values in this set will be skipped.
31
- """
11
+ """Extract data from Cognite Data Fusions Sequences into Neat."""
32
12
 
33
13
  _default_rdf_type = "Sequence"
14
+ _instance_id_prefix = InstanceIdPrefix.sequence
34
15
 
35
16
  @classmethod
36
- def from_dataset(
37
- cls,
38
- client: CogniteClient,
39
- data_set_external_id: str,
40
- namespace: Namespace | None = None,
41
- to_type: Callable[[Sequence], str | None] | None = None,
42
- limit: int | None = None,
43
- unpack_metadata: bool = True,
44
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
45
- ):
17
+ def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Sequence]]:
46
18
  total = client.sequences.aggregate_count(
47
19
  filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
48
20
  )
49
- return cls(
50
- client.sequences(data_set_external_ids=data_set_external_id),
51
- total=total,
52
- namespace=namespace,
53
- to_type=to_type,
54
- limit=limit,
55
- unpack_metadata=unpack_metadata,
56
- skip_metadata_values=skip_metadata_values,
57
- )
21
+ items = client.sequences(data_set_external_ids=data_set_external_id)
22
+ return total, items
58
23
 
59
24
  @classmethod
60
- def from_hierarchy(
61
- cls,
62
- client: CogniteClient,
63
- root_asset_external_id: str,
64
- namespace: Namespace | None = None,
65
- to_type: Callable[[Sequence], str | None] | None = None,
66
- limit: int | None = None,
67
- unpack_metadata: bool = True,
68
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
69
- ):
25
+ def _from_hierarchy(
26
+ cls, client: CogniteClient, root_asset_external_id: str
27
+ ) -> tuple[int | None, Iterable[Sequence]]:
70
28
  total = client.sequences.aggregate_count(
71
29
  filter=SequenceFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
72
30
  )
73
-
74
- return cls(
75
- client.sequences(asset_subtree_external_ids=[root_asset_external_id]),
76
- namespace,
77
- to_type,
78
- total,
79
- limit,
80
- unpack_metadata=unpack_metadata,
81
- skip_metadata_values=skip_metadata_values,
82
- )
31
+ items = client.sequences(asset_subtree_external_ids=[root_asset_external_id])
32
+ return total, items
83
33
 
84
34
  @classmethod
85
- def from_file(
86
- cls,
87
- file_path: str,
88
- namespace: Namespace | None = None,
89
- to_type: Callable[[Sequence], str | None] | None = None,
90
- limit: int | None = None,
91
- unpack_metadata: bool = True,
92
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
93
- ):
35
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Sequence]]:
94
36
  sequences = SequenceList.load(Path(file_path).read_text())
95
- return cls(
96
- sequences,
97
- total=len(sequences),
98
- namespace=namespace,
99
- to_type=to_type,
100
- limit=limit,
101
- unpack_metadata=unpack_metadata,
102
- skip_metadata_values=skip_metadata_values,
103
- )
104
-
105
- def _item2triples(self, sequence: Sequence) -> list[Triple]:
106
- id_ = self.namespace[f"{InstanceIdPrefix.sequence}{sequence.id}"]
107
-
108
- type_ = self._get_rdf_type(sequence)
109
- # Set rdf type
110
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
111
-
112
- # Create attributes
113
-
114
- if sequence.external_id:
115
- triples.append((id_, self.namespace.external_id, Literal(sequence.external_id)))
116
-
117
- if sequence.name:
118
- triples.append((id_, self.namespace.name, Literal(sequence.name)))
119
-
120
- if sequence.metadata:
121
- triples.extend(self._metadata_to_triples(id_, sequence.metadata))
122
-
123
- if sequence.description:
124
- triples.append((id_, self.namespace.description, Literal(sequence.description)))
125
-
126
- if sequence.created_time:
127
- triples.append(
128
- (
129
- id_,
130
- self.namespace.created_time,
131
- Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
132
- )
133
- )
134
-
135
- if sequence.last_updated_time:
136
- triples.append(
137
- (
138
- id_,
139
- self.namespace.last_updated_time,
140
- Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
141
- )
142
- )
143
-
144
- if sequence.data_set_id:
145
- triples.append(
146
- (
147
- id_,
148
- self.namespace.data_set_id,
149
- self.namespace[f"{InstanceIdPrefix.data_set}{sequence.data_set_id}"],
150
- )
151
- )
152
-
153
- if sequence.asset_id:
154
- triples.append(
155
- (
156
- id_,
157
- self.namespace.asset,
158
- self.namespace[f"{InstanceIdPrefix.asset}{sequence.asset_id}"],
159
- )
160
- )
161
-
162
- return triples
37
+ return len(sequences), sequences
@@ -1,199 +1,41 @@
1
- from collections.abc import Callable, Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
 
5
4
  from cognite.client import CogniteClient
6
5
  from cognite.client.data_classes import TimeSeries, TimeSeriesFilter, TimeSeriesList
7
- from pydantic import AnyHttpUrl, ValidationError
8
- from rdflib import RDF, Literal, Namespace, URIRef
9
6
 
10
- from cognite.neat._shared import Triple
11
-
12
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
7
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
13
8
 
14
9
 
15
10
  class TimeSeriesExtractor(ClassicCDFBaseExtractor[TimeSeries]):
16
- """Extract data from Cognite Data Fusions TimeSeries into Neat.
17
-
18
- Args:
19
- items (Iterable[TimeSeries]): An iterable of items.
20
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
- to_type (Callable[[TimeSeries], str | None], optional): A function to convert an item to a type.
22
- Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
- total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
- is installed. Defaults to None.
25
- limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
- limit the extraction to 1000 assets to test the setup.
28
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
29
- a JSON string.
30
- skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
- values in this set will be skipped.
32
- """
11
+ """Extract data from Cognite Data Fusions TimeSeries into Neat."""
33
12
 
34
13
  _default_rdf_type = "TimeSeries"
14
+ _instance_id_prefix = InstanceIdPrefix.time_series
35
15
 
36
16
  @classmethod
37
- def from_dataset(
17
+ def _from_dataset(
38
18
  cls,
39
19
  client: CogniteClient,
40
20
  data_set_external_id: str,
41
- namespace: Namespace | None = None,
42
- to_type: Callable[[TimeSeries], str | None] | None = None,
43
- limit: int | None = None,
44
- unpack_metadata: bool = True,
45
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
46
- ):
21
+ ) -> tuple[int | None, Iterable[TimeSeries]]:
47
22
  total = client.time_series.aggregate_count(
48
23
  filter=TimeSeriesFilter(data_set_ids=[{"externalId": data_set_external_id}])
49
24
  )
50
-
51
- return cls(
52
- client.time_series(data_set_external_ids=data_set_external_id),
53
- total=total,
54
- namespace=namespace,
55
- to_type=to_type,
56
- limit=limit,
57
- unpack_metadata=unpack_metadata,
58
- skip_metadata_values=skip_metadata_values,
59
- )
25
+ items = client.time_series(data_set_external_ids=data_set_external_id)
26
+ return total, items
60
27
 
61
28
  @classmethod
62
- def from_hierarchy(
63
- cls,
64
- client: CogniteClient,
65
- root_asset_external_id: str,
66
- namespace: Namespace | None = None,
67
- to_type: Callable[[TimeSeries], str | None] | None = None,
68
- limit: int | None = None,
69
- unpack_metadata: bool = True,
70
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
71
- ):
29
+ def _from_hierarchy(
30
+ cls, client: CogniteClient, root_asset_external_id: str
31
+ ) -> tuple[int | None, Iterable[TimeSeries]]:
72
32
  total = client.time_series.aggregate_count(
73
33
  filter=TimeSeriesFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
74
34
  )
75
-
76
- return cls(
77
- client.time_series(asset_external_ids=[root_asset_external_id]),
78
- namespace,
79
- to_type,
80
- total,
81
- limit,
82
- unpack_metadata=unpack_metadata,
83
- skip_metadata_values=skip_metadata_values,
84
- )
35
+ items = client.time_series(asset_subtree_external_ids=root_asset_external_id)
36
+ return total, items
85
37
 
86
38
  @classmethod
87
- def from_file(
88
- cls,
89
- file_path: str,
90
- namespace: Namespace | None = None,
91
- to_type: Callable[[TimeSeries], str | None] | None = None,
92
- limit: int | None = None,
93
- unpack_metadata: bool = True,
94
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
95
- ):
39
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[TimeSeries]]:
96
40
  timeseries = TimeSeriesList.load(Path(file_path).read_text())
97
- return cls(
98
- timeseries,
99
- total=len(timeseries),
100
- namespace=namespace,
101
- to_type=to_type,
102
- limit=limit,
103
- unpack_metadata=unpack_metadata,
104
- skip_metadata_values=skip_metadata_values,
105
- )
106
-
107
- def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
108
- id_ = self.namespace[f"{InstanceIdPrefix.time_series}{timeseries.id}"]
109
-
110
- # Set rdf type
111
- type_ = self._get_rdf_type(timeseries)
112
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
113
-
114
- # Create attributes
115
- if timeseries.external_id:
116
- triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
117
-
118
- if timeseries.name:
119
- triples.append((id_, self.namespace.name, Literal(timeseries.name)))
120
-
121
- if timeseries.is_string:
122
- triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
123
-
124
- if timeseries.metadata:
125
- triples.extend(self._metadata_to_triples(id_, timeseries.metadata))
126
-
127
- if timeseries.unit:
128
- triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
129
-
130
- if self.namespace.is_step:
131
- triples.append((id_, self.namespace.is_step, Literal(timeseries.is_step)))
132
-
133
- if timeseries.description:
134
- triples.append((id_, self.namespace.description, Literal(timeseries.description)))
135
-
136
- if timeseries.security_categories:
137
- for category in timeseries.security_categories:
138
- triples.append((id_, self.namespace.security_categories, Literal(category)))
139
-
140
- if timeseries.created_time:
141
- triples.append(
142
- (
143
- id_,
144
- self.namespace.created_time,
145
- Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
146
- )
147
- )
148
-
149
- if timeseries.last_updated_time:
150
- triples.append(
151
- (
152
- id_,
153
- self.namespace.last_updated_time,
154
- Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
155
- )
156
- )
157
-
158
- if timeseries.legacy_name:
159
- triples.append((id_, self.namespace.legacy_name, Literal(timeseries.legacy_name)))
160
-
161
- # Create connections
162
- if timeseries.unit_external_id:
163
- # try to create connection to QUDT unit catalog
164
- try:
165
- triples.append(
166
- (
167
- id_,
168
- self.namespace.unit_external_id,
169
- URIRef(str(AnyHttpUrl(timeseries.unit_external_id))),
170
- )
171
- )
172
- except ValidationError:
173
- triples.append(
174
- (
175
- id_,
176
- self.namespace.unit_external_id,
177
- Literal(timeseries.unit_external_id),
178
- )
179
- )
180
-
181
- if timeseries.data_set_id:
182
- triples.append(
183
- (
184
- id_,
185
- self.namespace.dataset,
186
- self.namespace[f"{InstanceIdPrefix.data_set}{timeseries.data_set_id}"],
187
- )
188
- )
189
-
190
- if timeseries.asset_id:
191
- triples.append(
192
- (
193
- id_,
194
- self.namespace.asset,
195
- self.namespace[f"{InstanceIdPrefix.asset}{timeseries.asset_id}"],
196
- )
197
- )
198
-
199
- return triples
41
+ return len(timeseries), timeseries
@@ -1,12 +1,12 @@
1
1
  from collections.abc import Iterable
2
2
  from pathlib import Path
3
- from typing import cast
3
+ from typing import get_args
4
4
 
5
5
  from rdflib import URIRef
6
6
  from rdflib.util import guess_format
7
7
 
8
8
  from cognite.neat._constants import DEFAULT_BASE_URI
9
- from cognite.neat._graph._shared import rdflib_to_mime_types
9
+ from cognite.neat._graph._shared import RDFTypes
10
10
  from cognite.neat._graph.extractors._base import BaseExtractor
11
11
  from cognite.neat._issues._base import IssueList
12
12
  from cognite.neat._issues.errors import FileNotFoundNeatError, FileTypeUnexpectedError
@@ -29,19 +29,18 @@ class RdfFileExtractor(BaseExtractor):
29
29
  issue_list: IssueList | None = None,
30
30
  ):
31
31
  self.issue_list = issue_list or IssueList(title=f"{filepath.name}")
32
-
33
- self.filepath = filepath
34
- self.mime_type = rdflib_to_mime_types(cast(str, guess_format(str(self.filepath))))
35
32
  self.base_uri = base_uri
33
+ self.filepath = filepath
34
+ self.format = guess_format(str(self.filepath))
36
35
 
37
36
  if not self.filepath.exists():
38
37
  self.issue_list.append(FileNotFoundNeatError(self.filepath))
39
38
 
40
- if not self.mime_type:
39
+ if not self.format:
41
40
  self.issue_list.append(
42
41
  FileTypeUnexpectedError(
43
42
  self.filepath,
44
- frozenset([".rdf", ".ttl", ".nt", ".n3", ".owl", ".nq", ".trig"]),
43
+ frozenset(get_args(RDFTypes)),
45
44
  )
46
45
  )
47
46