cognite-neat 0.98.0__py3-none-any.whl → 0.99.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_client/__init__.py +4 -0
- cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
- cognite/neat/_client/_api/schema.py +50 -0
- cognite/neat/_client/_api_client.py +17 -0
- cognite/neat/_client/data_classes/__init__.py +0 -0
- cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
- cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +21 -281
- cognite/neat/_graph/_shared.py +14 -15
- cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
- cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
- cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
- cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
- cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
- cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
- cognite/neat/_graph/extractors/_rdf_file.py +6 -7
- cognite/neat/_graph/queries/_base.py +17 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
- cognite/neat/_graph/transformers/_prune_graph.py +1 -1
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +6 -0
- cognite/neat/_issues/warnings/_external.py +8 -0
- cognite/neat/_issues/warnings/_properties.py +16 -0
- cognite/neat/_rules/_constants.py +7 -6
- cognite/neat/_rules/analysis/_base.py +8 -4
- cognite/neat/_rules/exporters/_base.py +3 -4
- cognite/neat/_rules/exporters/_rules2dms.py +29 -40
- cognite/neat/_rules/importers/_dms2rules.py +4 -5
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +25 -33
- cognite/neat/_rules/models/__init__.py +1 -1
- cognite/neat/_rules/models/_base_rules.py +22 -12
- cognite/neat/_rules/models/dms/__init__.py +2 -2
- cognite/neat/_rules/models/dms/_exporter.py +15 -20
- cognite/neat/_rules/models/dms/_rules.py +48 -3
- cognite/neat/_rules/models/dms/_rules_input.py +52 -8
- cognite/neat/_rules/models/dms/_validation.py +10 -5
- cognite/neat/_rules/models/entities/_single_value.py +32 -4
- cognite/neat/_rules/models/information/_rules.py +0 -8
- cognite/neat/_rules/models/mapping/__init__.py +2 -3
- cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
- cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
- cognite/neat/_rules/transformers/__init__.py +2 -2
- cognite/neat/_rules/transformers/_converters.py +110 -11
- cognite/neat/_rules/transformers/_mapping.py +105 -30
- cognite/neat/_rules/transformers/_verification.py +5 -2
- cognite/neat/_session/_base.py +49 -8
- cognite/neat/_session/_drop.py +35 -0
- cognite/neat/_session/_inspect.py +17 -5
- cognite/neat/_session/_mapping.py +39 -0
- cognite/neat/_session/_prepare.py +218 -23
- cognite/neat/_session/_read.py +49 -12
- cognite/neat/_session/_to.py +3 -3
- cognite/neat/_store/_base.py +27 -24
- cognite/neat/_utils/rdf_.py +28 -1
- cognite/neat/_version.py +1 -1
- cognite/neat/_workflows/steps/lib/current/rules_exporter.py +8 -3
- cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
- cognite/neat/_workflows/steps/lib/current/rules_validator.py +3 -2
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +3 -3
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +67 -64
- cognite/neat/_rules/models/mapping/_base.py +0 -131
- cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
- cognite/neat/_utils/cdf/loaders/_base.py +0 -54
- cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
- cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
- /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,39 +1,21 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from collections.abc import Callable, Iterable, Set
|
|
3
|
-
from datetime import datetime, timezone
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
from cognite.client import CogniteClient
|
|
7
6
|
from cognite.client.data_classes import Relationship, RelationshipList
|
|
8
|
-
from rdflib import
|
|
7
|
+
from rdflib import Namespace
|
|
9
8
|
|
|
10
|
-
from cognite.neat._shared import Triple
|
|
11
9
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
12
10
|
|
|
13
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
14
|
-
from ._labels import LabelsExtractor
|
|
11
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
|
|
15
12
|
|
|
16
13
|
|
|
17
14
|
class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
18
|
-
"""Extract data from Cognite Data Fusions Relationships into Neat.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
items (Iterable[Relationship]): An iterable of items.
|
|
22
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
23
|
-
to_type (Callable[[Relationship], str | None], optional): A function to convert an item to a type.
|
|
24
|
-
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
25
|
-
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
26
|
-
is installed. Defaults to None.
|
|
27
|
-
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
28
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
29
|
-
limit the extraction to 1000 assets to test the setup.
|
|
30
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
31
|
-
a JSON string.
|
|
32
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
33
|
-
values in this set will be skipped.
|
|
34
|
-
"""
|
|
15
|
+
"""Extract data from Cognite Data Fusions Relationships into Neat."""
|
|
35
16
|
|
|
36
17
|
_default_rdf_type = "Relationship"
|
|
18
|
+
_instance_id_prefix = InstanceIdPrefix.relationship
|
|
37
19
|
|
|
38
20
|
def __init__(
|
|
39
21
|
self,
|
|
@@ -44,6 +26,8 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
|
44
26
|
limit: int | None = None,
|
|
45
27
|
unpack_metadata: bool = True,
|
|
46
28
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
29
|
+
camel_case: bool = True,
|
|
30
|
+
as_write: bool = False,
|
|
47
31
|
):
|
|
48
32
|
super().__init__(
|
|
49
33
|
items,
|
|
@@ -53,6 +37,8 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
|
53
37
|
limit=limit,
|
|
54
38
|
unpack_metadata=unpack_metadata,
|
|
55
39
|
skip_metadata_values=skip_metadata_values,
|
|
40
|
+
camel_case=camel_case,
|
|
41
|
+
as_write=as_write,
|
|
56
42
|
)
|
|
57
43
|
# This is used by the ClassicExtractor to log the target nodes, such
|
|
58
44
|
# that it can extract them.
|
|
@@ -61,167 +47,30 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
|
61
47
|
self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
62
48
|
|
|
63
49
|
@classmethod
|
|
64
|
-
def
|
|
50
|
+
def _from_dataset(
|
|
65
51
|
cls,
|
|
66
52
|
client: CogniteClient,
|
|
67
53
|
data_set_external_id: str,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
unpack_metadata: bool = True,
|
|
72
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
73
|
-
):
|
|
74
|
-
return cls(
|
|
75
|
-
client.relationships(data_set_external_ids=data_set_external_id),
|
|
76
|
-
namespace=namespace,
|
|
77
|
-
to_type=to_type,
|
|
78
|
-
limit=limit,
|
|
79
|
-
unpack_metadata=unpack_metadata,
|
|
80
|
-
skip_metadata_values=skip_metadata_values,
|
|
81
|
-
)
|
|
54
|
+
) -> tuple[int | None, Iterable[Relationship]]:
|
|
55
|
+
items = client.relationships(data_set_external_ids=data_set_external_id)
|
|
56
|
+
return None, items
|
|
82
57
|
|
|
83
58
|
@classmethod
|
|
84
|
-
def
|
|
85
|
-
cls,
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
to_type: Callable[[Relationship], str | None] | None = None,
|
|
89
|
-
limit: int | None = None,
|
|
90
|
-
unpack_metadata: bool = True,
|
|
91
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
92
|
-
):
|
|
93
|
-
relationships = RelationshipList.load(Path(file_path).read_text())
|
|
94
|
-
return cls(
|
|
95
|
-
relationships,
|
|
96
|
-
namespace=namespace,
|
|
97
|
-
total=len(relationships),
|
|
98
|
-
to_type=to_type,
|
|
99
|
-
limit=limit,
|
|
100
|
-
unpack_metadata=unpack_metadata,
|
|
101
|
-
skip_metadata_values=skip_metadata_values,
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
def _item2triples(self, relationship: Relationship) -> list[Triple]:
|
|
105
|
-
"""Converts an asset to triples."""
|
|
59
|
+
def _from_hierarchy(
|
|
60
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
61
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
62
|
+
raise NotImplementedError("Relationships do not have a hierarchy.")
|
|
106
63
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
# relationships do not have an internal id, so we generate one
|
|
114
|
-
id_ = self.namespace[f"{InstanceIdPrefix.relationship}{create_sha256_hash(relationship.external_id)}"]
|
|
115
|
-
|
|
116
|
-
type_ = self._get_rdf_type(relationship)
|
|
117
|
-
# Set rdf type
|
|
118
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
119
|
-
|
|
120
|
-
# Set source and target types
|
|
121
|
-
if source_type := relationship.source_type:
|
|
122
|
-
triples.append(
|
|
123
|
-
(
|
|
124
|
-
id_,
|
|
125
|
-
self.namespace.source_type,
|
|
126
|
-
self.namespace[source_type.title()],
|
|
127
|
-
)
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
if target_type := relationship.target_type:
|
|
131
|
-
triples.append(
|
|
132
|
-
(
|
|
133
|
-
id_,
|
|
134
|
-
self.namespace.target_type,
|
|
135
|
-
self.namespace[target_type.title()],
|
|
136
|
-
)
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
# Create attributes
|
|
140
|
-
|
|
141
|
-
triples.append((id_, self.namespace.external_id, Literal(relationship.external_id)))
|
|
142
|
-
|
|
143
|
-
triples.append(
|
|
144
|
-
(
|
|
145
|
-
id_,
|
|
146
|
-
self.namespace.source_external_id,
|
|
147
|
-
Literal(relationship.source_external_id),
|
|
148
|
-
)
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
triples.append(
|
|
152
|
-
(
|
|
153
|
-
id_,
|
|
154
|
-
self.namespace.target_external_id,
|
|
155
|
-
Literal(relationship.target_external_id),
|
|
156
|
-
)
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
if relationship.start_time:
|
|
160
|
-
triples.append(
|
|
161
|
-
(
|
|
162
|
-
id_,
|
|
163
|
-
self.namespace.start_time,
|
|
164
|
-
Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
|
|
165
|
-
)
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
if relationship.end_time:
|
|
169
|
-
triples.append(
|
|
170
|
-
(
|
|
171
|
-
id_,
|
|
172
|
-
self.namespace.end_time,
|
|
173
|
-
Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
|
|
174
|
-
)
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
if relationship.created_time:
|
|
178
|
-
triples.append(
|
|
179
|
-
(
|
|
180
|
-
id_,
|
|
181
|
-
self.namespace.created_time,
|
|
182
|
-
Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
|
|
183
|
-
)
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
if relationship.last_updated_time:
|
|
187
|
-
triples.append(
|
|
188
|
-
(
|
|
189
|
-
id_,
|
|
190
|
-
self.namespace.last_updated_time,
|
|
191
|
-
Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
|
|
192
|
-
)
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
if relationship.confidence:
|
|
196
|
-
triples.append(
|
|
197
|
-
(
|
|
198
|
-
id_,
|
|
199
|
-
self.namespace.confidence,
|
|
200
|
-
Literal(relationship.confidence),
|
|
201
|
-
)
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
if relationship.labels:
|
|
205
|
-
for label in relationship.labels:
|
|
206
|
-
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
207
|
-
# since labels do not have internal ids, we use the external_id as the id
|
|
208
|
-
triples.append(
|
|
209
|
-
(
|
|
210
|
-
id_,
|
|
211
|
-
self.namespace.label,
|
|
212
|
-
self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
|
|
213
|
-
)
|
|
214
|
-
)
|
|
64
|
+
@classmethod
|
|
65
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Relationship]]:
|
|
66
|
+
relationships = RelationshipList.load(Path(file_path).read_text())
|
|
67
|
+
return len(relationships), relationships
|
|
215
68
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
self.namespace.dataset,
|
|
222
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{relationship.data_set_id}"],
|
|
223
|
-
)
|
|
69
|
+
def _fallback_id(self, item: Relationship) -> str | None:
|
|
70
|
+
if item.external_id and item.source_external_id and item.target_external_id:
|
|
71
|
+
if self._log_target_nodes and item.target_type and item.target_external_id:
|
|
72
|
+
self._target_external_ids_by_type[InstanceIdPrefix.from_str(item.target_type)].add(
|
|
73
|
+
item.target_external_id
|
|
224
74
|
)
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
return []
|
|
75
|
+
return create_sha256_hash(item.external_id)
|
|
76
|
+
return None
|
|
@@ -1,162 +1,37 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
from cognite.client import CogniteClient
|
|
6
5
|
from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
|
|
7
|
-
from rdflib import RDF, Literal, Namespace
|
|
8
6
|
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
7
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
12
8
|
|
|
13
9
|
|
|
14
10
|
class SequencesExtractor(ClassicCDFBaseExtractor[Sequence]):
|
|
15
|
-
"""Extract data from Cognite Data Fusions Sequences into Neat.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
items (Iterable[Sequence]): An iterable of items.
|
|
19
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
20
|
-
to_type (Callable[[Sequence], str | None], optional): A function to convert an item to a type.
|
|
21
|
-
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
22
|
-
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
23
|
-
is installed. Defaults to None.
|
|
24
|
-
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
25
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
26
|
-
limit the extraction to 1000 assets to test the setup.
|
|
27
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
28
|
-
a JSON string.
|
|
29
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
30
|
-
values in this set will be skipped.
|
|
31
|
-
"""
|
|
11
|
+
"""Extract data from Cognite Data Fusions Sequences into Neat."""
|
|
32
12
|
|
|
33
13
|
_default_rdf_type = "Sequence"
|
|
14
|
+
_instance_id_prefix = InstanceIdPrefix.sequence
|
|
34
15
|
|
|
35
16
|
@classmethod
|
|
36
|
-
def
|
|
37
|
-
cls,
|
|
38
|
-
client: CogniteClient,
|
|
39
|
-
data_set_external_id: str,
|
|
40
|
-
namespace: Namespace | None = None,
|
|
41
|
-
to_type: Callable[[Sequence], str | None] | None = None,
|
|
42
|
-
limit: int | None = None,
|
|
43
|
-
unpack_metadata: bool = True,
|
|
44
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
45
|
-
):
|
|
17
|
+
def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Sequence]]:
|
|
46
18
|
total = client.sequences.aggregate_count(
|
|
47
19
|
filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
|
|
48
20
|
)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
total=total,
|
|
52
|
-
namespace=namespace,
|
|
53
|
-
to_type=to_type,
|
|
54
|
-
limit=limit,
|
|
55
|
-
unpack_metadata=unpack_metadata,
|
|
56
|
-
skip_metadata_values=skip_metadata_values,
|
|
57
|
-
)
|
|
21
|
+
items = client.sequences(data_set_external_ids=data_set_external_id)
|
|
22
|
+
return total, items
|
|
58
23
|
|
|
59
24
|
@classmethod
|
|
60
|
-
def
|
|
61
|
-
cls,
|
|
62
|
-
|
|
63
|
-
root_asset_external_id: str,
|
|
64
|
-
namespace: Namespace | None = None,
|
|
65
|
-
to_type: Callable[[Sequence], str | None] | None = None,
|
|
66
|
-
limit: int | None = None,
|
|
67
|
-
unpack_metadata: bool = True,
|
|
68
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
69
|
-
):
|
|
25
|
+
def _from_hierarchy(
|
|
26
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
27
|
+
) -> tuple[int | None, Iterable[Sequence]]:
|
|
70
28
|
total = client.sequences.aggregate_count(
|
|
71
29
|
filter=SequenceFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
72
30
|
)
|
|
73
|
-
|
|
74
|
-
return
|
|
75
|
-
client.sequences(asset_subtree_external_ids=[root_asset_external_id]),
|
|
76
|
-
namespace,
|
|
77
|
-
to_type,
|
|
78
|
-
total,
|
|
79
|
-
limit,
|
|
80
|
-
unpack_metadata=unpack_metadata,
|
|
81
|
-
skip_metadata_values=skip_metadata_values,
|
|
82
|
-
)
|
|
31
|
+
items = client.sequences(asset_subtree_external_ids=[root_asset_external_id])
|
|
32
|
+
return total, items
|
|
83
33
|
|
|
84
34
|
@classmethod
|
|
85
|
-
def
|
|
86
|
-
cls,
|
|
87
|
-
file_path: str,
|
|
88
|
-
namespace: Namespace | None = None,
|
|
89
|
-
to_type: Callable[[Sequence], str | None] | None = None,
|
|
90
|
-
limit: int | None = None,
|
|
91
|
-
unpack_metadata: bool = True,
|
|
92
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
93
|
-
):
|
|
35
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Sequence]]:
|
|
94
36
|
sequences = SequenceList.load(Path(file_path).read_text())
|
|
95
|
-
return
|
|
96
|
-
sequences,
|
|
97
|
-
total=len(sequences),
|
|
98
|
-
namespace=namespace,
|
|
99
|
-
to_type=to_type,
|
|
100
|
-
limit=limit,
|
|
101
|
-
unpack_metadata=unpack_metadata,
|
|
102
|
-
skip_metadata_values=skip_metadata_values,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
def _item2triples(self, sequence: Sequence) -> list[Triple]:
|
|
106
|
-
id_ = self.namespace[f"{InstanceIdPrefix.sequence}{sequence.id}"]
|
|
107
|
-
|
|
108
|
-
type_ = self._get_rdf_type(sequence)
|
|
109
|
-
# Set rdf type
|
|
110
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
111
|
-
|
|
112
|
-
# Create attributes
|
|
113
|
-
|
|
114
|
-
if sequence.external_id:
|
|
115
|
-
triples.append((id_, self.namespace.external_id, Literal(sequence.external_id)))
|
|
116
|
-
|
|
117
|
-
if sequence.name:
|
|
118
|
-
triples.append((id_, self.namespace.name, Literal(sequence.name)))
|
|
119
|
-
|
|
120
|
-
if sequence.metadata:
|
|
121
|
-
triples.extend(self._metadata_to_triples(id_, sequence.metadata))
|
|
122
|
-
|
|
123
|
-
if sequence.description:
|
|
124
|
-
triples.append((id_, self.namespace.description, Literal(sequence.description)))
|
|
125
|
-
|
|
126
|
-
if sequence.created_time:
|
|
127
|
-
triples.append(
|
|
128
|
-
(
|
|
129
|
-
id_,
|
|
130
|
-
self.namespace.created_time,
|
|
131
|
-
Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
|
|
132
|
-
)
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
if sequence.last_updated_time:
|
|
136
|
-
triples.append(
|
|
137
|
-
(
|
|
138
|
-
id_,
|
|
139
|
-
self.namespace.last_updated_time,
|
|
140
|
-
Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
|
|
141
|
-
)
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
if sequence.data_set_id:
|
|
145
|
-
triples.append(
|
|
146
|
-
(
|
|
147
|
-
id_,
|
|
148
|
-
self.namespace.data_set_id,
|
|
149
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{sequence.data_set_id}"],
|
|
150
|
-
)
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
if sequence.asset_id:
|
|
154
|
-
triples.append(
|
|
155
|
-
(
|
|
156
|
-
id_,
|
|
157
|
-
self.namespace.asset,
|
|
158
|
-
self.namespace[f"{InstanceIdPrefix.asset}{sequence.asset_id}"],
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
return triples
|
|
37
|
+
return len(sequences), sequences
|
|
@@ -1,199 +1,41 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
from cognite.client import CogniteClient
|
|
6
5
|
from cognite.client.data_classes import TimeSeries, TimeSeriesFilter, TimeSeriesList
|
|
7
|
-
from pydantic import AnyHttpUrl, ValidationError
|
|
8
|
-
from rdflib import RDF, Literal, Namespace, URIRef
|
|
9
6
|
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
7
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
class TimeSeriesExtractor(ClassicCDFBaseExtractor[TimeSeries]):
|
|
16
|
-
"""Extract data from Cognite Data Fusions TimeSeries into Neat.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
items (Iterable[TimeSeries]): An iterable of items.
|
|
20
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
-
to_type (Callable[[TimeSeries], str | None], optional): A function to convert an item to a type.
|
|
22
|
-
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
23
|
-
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
24
|
-
is installed. Defaults to None.
|
|
25
|
-
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
26
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
-
limit the extraction to 1000 assets to test the setup.
|
|
28
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
29
|
-
a JSON string.
|
|
30
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
31
|
-
values in this set will be skipped.
|
|
32
|
-
"""
|
|
11
|
+
"""Extract data from Cognite Data Fusions TimeSeries into Neat."""
|
|
33
12
|
|
|
34
13
|
_default_rdf_type = "TimeSeries"
|
|
14
|
+
_instance_id_prefix = InstanceIdPrefix.time_series
|
|
35
15
|
|
|
36
16
|
@classmethod
|
|
37
|
-
def
|
|
17
|
+
def _from_dataset(
|
|
38
18
|
cls,
|
|
39
19
|
client: CogniteClient,
|
|
40
20
|
data_set_external_id: str,
|
|
41
|
-
|
|
42
|
-
to_type: Callable[[TimeSeries], str | None] | None = None,
|
|
43
|
-
limit: int | None = None,
|
|
44
|
-
unpack_metadata: bool = True,
|
|
45
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
46
|
-
):
|
|
21
|
+
) -> tuple[int | None, Iterable[TimeSeries]]:
|
|
47
22
|
total = client.time_series.aggregate_count(
|
|
48
23
|
filter=TimeSeriesFilter(data_set_ids=[{"externalId": data_set_external_id}])
|
|
49
24
|
)
|
|
50
|
-
|
|
51
|
-
return
|
|
52
|
-
client.time_series(data_set_external_ids=data_set_external_id),
|
|
53
|
-
total=total,
|
|
54
|
-
namespace=namespace,
|
|
55
|
-
to_type=to_type,
|
|
56
|
-
limit=limit,
|
|
57
|
-
unpack_metadata=unpack_metadata,
|
|
58
|
-
skip_metadata_values=skip_metadata_values,
|
|
59
|
-
)
|
|
25
|
+
items = client.time_series(data_set_external_ids=data_set_external_id)
|
|
26
|
+
return total, items
|
|
60
27
|
|
|
61
28
|
@classmethod
|
|
62
|
-
def
|
|
63
|
-
cls,
|
|
64
|
-
|
|
65
|
-
root_asset_external_id: str,
|
|
66
|
-
namespace: Namespace | None = None,
|
|
67
|
-
to_type: Callable[[TimeSeries], str | None] | None = None,
|
|
68
|
-
limit: int | None = None,
|
|
69
|
-
unpack_metadata: bool = True,
|
|
70
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
71
|
-
):
|
|
29
|
+
def _from_hierarchy(
|
|
30
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
31
|
+
) -> tuple[int | None, Iterable[TimeSeries]]:
|
|
72
32
|
total = client.time_series.aggregate_count(
|
|
73
33
|
filter=TimeSeriesFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
74
34
|
)
|
|
75
|
-
|
|
76
|
-
return
|
|
77
|
-
client.time_series(asset_external_ids=[root_asset_external_id]),
|
|
78
|
-
namespace,
|
|
79
|
-
to_type,
|
|
80
|
-
total,
|
|
81
|
-
limit,
|
|
82
|
-
unpack_metadata=unpack_metadata,
|
|
83
|
-
skip_metadata_values=skip_metadata_values,
|
|
84
|
-
)
|
|
35
|
+
items = client.time_series(asset_subtree_external_ids=root_asset_external_id)
|
|
36
|
+
return total, items
|
|
85
37
|
|
|
86
38
|
@classmethod
|
|
87
|
-
def
|
|
88
|
-
cls,
|
|
89
|
-
file_path: str,
|
|
90
|
-
namespace: Namespace | None = None,
|
|
91
|
-
to_type: Callable[[TimeSeries], str | None] | None = None,
|
|
92
|
-
limit: int | None = None,
|
|
93
|
-
unpack_metadata: bool = True,
|
|
94
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
95
|
-
):
|
|
39
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[TimeSeries]]:
|
|
96
40
|
timeseries = TimeSeriesList.load(Path(file_path).read_text())
|
|
97
|
-
return
|
|
98
|
-
timeseries,
|
|
99
|
-
total=len(timeseries),
|
|
100
|
-
namespace=namespace,
|
|
101
|
-
to_type=to_type,
|
|
102
|
-
limit=limit,
|
|
103
|
-
unpack_metadata=unpack_metadata,
|
|
104
|
-
skip_metadata_values=skip_metadata_values,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
|
|
108
|
-
id_ = self.namespace[f"{InstanceIdPrefix.time_series}{timeseries.id}"]
|
|
109
|
-
|
|
110
|
-
# Set rdf type
|
|
111
|
-
type_ = self._get_rdf_type(timeseries)
|
|
112
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
113
|
-
|
|
114
|
-
# Create attributes
|
|
115
|
-
if timeseries.external_id:
|
|
116
|
-
triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
|
|
117
|
-
|
|
118
|
-
if timeseries.name:
|
|
119
|
-
triples.append((id_, self.namespace.name, Literal(timeseries.name)))
|
|
120
|
-
|
|
121
|
-
if timeseries.is_string:
|
|
122
|
-
triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
|
|
123
|
-
|
|
124
|
-
if timeseries.metadata:
|
|
125
|
-
triples.extend(self._metadata_to_triples(id_, timeseries.metadata))
|
|
126
|
-
|
|
127
|
-
if timeseries.unit:
|
|
128
|
-
triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
|
|
129
|
-
|
|
130
|
-
if self.namespace.is_step:
|
|
131
|
-
triples.append((id_, self.namespace.is_step, Literal(timeseries.is_step)))
|
|
132
|
-
|
|
133
|
-
if timeseries.description:
|
|
134
|
-
triples.append((id_, self.namespace.description, Literal(timeseries.description)))
|
|
135
|
-
|
|
136
|
-
if timeseries.security_categories:
|
|
137
|
-
for category in timeseries.security_categories:
|
|
138
|
-
triples.append((id_, self.namespace.security_categories, Literal(category)))
|
|
139
|
-
|
|
140
|
-
if timeseries.created_time:
|
|
141
|
-
triples.append(
|
|
142
|
-
(
|
|
143
|
-
id_,
|
|
144
|
-
self.namespace.created_time,
|
|
145
|
-
Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
|
|
146
|
-
)
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
if timeseries.last_updated_time:
|
|
150
|
-
triples.append(
|
|
151
|
-
(
|
|
152
|
-
id_,
|
|
153
|
-
self.namespace.last_updated_time,
|
|
154
|
-
Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
|
|
155
|
-
)
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
if timeseries.legacy_name:
|
|
159
|
-
triples.append((id_, self.namespace.legacy_name, Literal(timeseries.legacy_name)))
|
|
160
|
-
|
|
161
|
-
# Create connections
|
|
162
|
-
if timeseries.unit_external_id:
|
|
163
|
-
# try to create connection to QUDT unit catalog
|
|
164
|
-
try:
|
|
165
|
-
triples.append(
|
|
166
|
-
(
|
|
167
|
-
id_,
|
|
168
|
-
self.namespace.unit_external_id,
|
|
169
|
-
URIRef(str(AnyHttpUrl(timeseries.unit_external_id))),
|
|
170
|
-
)
|
|
171
|
-
)
|
|
172
|
-
except ValidationError:
|
|
173
|
-
triples.append(
|
|
174
|
-
(
|
|
175
|
-
id_,
|
|
176
|
-
self.namespace.unit_external_id,
|
|
177
|
-
Literal(timeseries.unit_external_id),
|
|
178
|
-
)
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
if timeseries.data_set_id:
|
|
182
|
-
triples.append(
|
|
183
|
-
(
|
|
184
|
-
id_,
|
|
185
|
-
self.namespace.dataset,
|
|
186
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{timeseries.data_set_id}"],
|
|
187
|
-
)
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
if timeseries.asset_id:
|
|
191
|
-
triples.append(
|
|
192
|
-
(
|
|
193
|
-
id_,
|
|
194
|
-
self.namespace.asset,
|
|
195
|
-
self.namespace[f"{InstanceIdPrefix.asset}{timeseries.asset_id}"],
|
|
196
|
-
)
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
return triples
|
|
41
|
+
return len(timeseries), timeseries
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from collections.abc import Iterable
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import get_args
|
|
4
4
|
|
|
5
5
|
from rdflib import URIRef
|
|
6
6
|
from rdflib.util import guess_format
|
|
7
7
|
|
|
8
8
|
from cognite.neat._constants import DEFAULT_BASE_URI
|
|
9
|
-
from cognite.neat._graph._shared import
|
|
9
|
+
from cognite.neat._graph._shared import RDFTypes
|
|
10
10
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
11
11
|
from cognite.neat._issues._base import IssueList
|
|
12
12
|
from cognite.neat._issues.errors import FileNotFoundNeatError, FileTypeUnexpectedError
|
|
@@ -29,19 +29,18 @@ class RdfFileExtractor(BaseExtractor):
|
|
|
29
29
|
issue_list: IssueList | None = None,
|
|
30
30
|
):
|
|
31
31
|
self.issue_list = issue_list or IssueList(title=f"{filepath.name}")
|
|
32
|
-
|
|
33
|
-
self.filepath = filepath
|
|
34
|
-
self.mime_type = rdflib_to_mime_types(cast(str, guess_format(str(self.filepath))))
|
|
35
32
|
self.base_uri = base_uri
|
|
33
|
+
self.filepath = filepath
|
|
34
|
+
self.format = guess_format(str(self.filepath))
|
|
36
35
|
|
|
37
36
|
if not self.filepath.exists():
|
|
38
37
|
self.issue_list.append(FileNotFoundNeatError(self.filepath))
|
|
39
38
|
|
|
40
|
-
if not self.
|
|
39
|
+
if not self.format:
|
|
41
40
|
self.issue_list.append(
|
|
42
41
|
FileTypeUnexpectedError(
|
|
43
42
|
self.filepath,
|
|
44
|
-
frozenset(
|
|
43
|
+
frozenset(get_args(RDFTypes)),
|
|
45
44
|
)
|
|
46
45
|
)
|
|
47
46
|
|