cognite-neat 0.91.0__py3-none-any.whl → 0.92.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/extractors/__init__.py +2 -0
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -8
- cognite/neat/graph/extractors/_classic_cdf/_base.py +26 -1
- cognite/neat/graph/extractors/_classic_cdf/_classic.py +208 -0
- cognite/neat/graph/extractors/_classic_cdf/_data_sets.py +110 -0
- cognite/neat/graph/extractors/_classic_cdf/_events.py +30 -5
- cognite/neat/graph/extractors/_classic_cdf/_files.py +33 -8
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +14 -6
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +38 -7
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +30 -5
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +30 -5
- cognite/neat/utils/collection_.py +4 -3
- {cognite_neat-0.91.0.dist-info → cognite_neat-0.92.1.dist-info}/METADATA +2 -2
- {cognite_neat-0.91.0.dist-info → cognite_neat-0.92.1.dist-info}/RECORD +18 -16
- {cognite_neat-0.91.0.dist-info → cognite_neat-0.92.1.dist-info}/LICENSE +0 -0
- {cognite_neat-0.91.0.dist-info → cognite_neat-0.92.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.91.0.dist-info → cognite_neat-0.92.1.dist-info}/entry_points.txt +0 -0
cognite/neat/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.92.1"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from ._base import BaseExtractor
|
|
2
2
|
from ._classic_cdf._assets import AssetsExtractor
|
|
3
|
+
from ._classic_cdf._classic import ClassicGraphExtractor
|
|
3
4
|
from ._classic_cdf._events import EventsExtractor
|
|
4
5
|
from ._classic_cdf._files import FilesExtractor
|
|
5
6
|
from ._classic_cdf._labels import LabelsExtractor
|
|
@@ -15,6 +16,7 @@ from ._rdf_file import RdfFileExtractor
|
|
|
15
16
|
__all__ = [
|
|
16
17
|
"BaseExtractor",
|
|
17
18
|
"AssetsExtractor",
|
|
19
|
+
"ClassicGraphExtractor",
|
|
18
20
|
"MockGraphGenerator",
|
|
19
21
|
"RelationshipsExtractor",
|
|
20
22
|
"TimeSeriesExtractor",
|
|
@@ -8,12 +8,12 @@ from cognite.client.data_classes import Asset, AssetFilter, AssetList
|
|
|
8
8
|
from rdflib import RDF, Literal, Namespace
|
|
9
9
|
|
|
10
10
|
from cognite.neat.graph.models import Triple
|
|
11
|
-
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
12
11
|
|
|
13
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
12
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
|
+
from ._labels import LabelsExtractor
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
class AssetsExtractor(
|
|
16
|
+
class AssetsExtractor(ClassicCDFBaseExtractor[Asset]):
|
|
17
17
|
"""Extract data from Cognite Data Fusions Assets into Neat.
|
|
18
18
|
|
|
19
19
|
Args:
|
|
@@ -108,7 +108,7 @@ class AssetsExtractor(ClassicCDFExtractor[Asset]):
|
|
|
108
108
|
|
|
109
109
|
def _item2triples(self, asset: Asset) -> list[Triple]:
|
|
110
110
|
"""Converts an asset to triples."""
|
|
111
|
-
id_ = self.namespace[f"
|
|
111
|
+
id_ = self.namespace[f"{InstanceIdPrefix.asset}{asset.id}"]
|
|
112
112
|
|
|
113
113
|
type_ = self._get_rdf_type(asset)
|
|
114
114
|
|
|
@@ -151,7 +151,7 @@ class AssetsExtractor(ClassicCDFExtractor[Asset]):
|
|
|
151
151
|
(
|
|
152
152
|
id_,
|
|
153
153
|
self.namespace.label,
|
|
154
|
-
self.namespace[f"
|
|
154
|
+
self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
|
|
155
155
|
)
|
|
156
156
|
)
|
|
157
157
|
|
|
@@ -160,17 +160,17 @@ class AssetsExtractor(ClassicCDFExtractor[Asset]):
|
|
|
160
160
|
|
|
161
161
|
# Create connections:
|
|
162
162
|
if asset.parent_id:
|
|
163
|
-
triples.append((id_, self.namespace.parent, self.namespace[f"
|
|
163
|
+
triples.append((id_, self.namespace.parent, self.namespace[f"{InstanceIdPrefix.asset}{asset.parent_id}"]))
|
|
164
164
|
|
|
165
165
|
if asset.root_id:
|
|
166
|
-
triples.append((id_, self.namespace.root, self.namespace[f"
|
|
166
|
+
triples.append((id_, self.namespace.root, self.namespace[f"{InstanceIdPrefix.asset}{asset.root_id}"]))
|
|
167
167
|
|
|
168
168
|
if asset.data_set_id:
|
|
169
169
|
triples.append(
|
|
170
170
|
(
|
|
171
171
|
id_,
|
|
172
172
|
self.namespace.dataset,
|
|
173
|
-
self.namespace[f"
|
|
173
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{asset.data_set_id}"],
|
|
174
174
|
)
|
|
175
175
|
)
|
|
176
176
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
|
+
import sys
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from collections.abc import Callable, Iterable, Set
|
|
5
6
|
from typing import Generic, TypeVar
|
|
@@ -16,8 +17,32 @@ T_CogniteResource = TypeVar("T_CogniteResource", bound=CogniteResource)
|
|
|
16
17
|
|
|
17
18
|
DEFAULT_SKIP_METADATA_VALUES = frozenset({"nan", "null", "none", ""})
|
|
18
19
|
|
|
20
|
+
if sys.version_info >= (3, 11):
|
|
21
|
+
from enum import StrEnum
|
|
22
|
+
else:
|
|
23
|
+
from backports.strenum import StrEnum
|
|
19
24
|
|
|
20
|
-
|
|
25
|
+
|
|
26
|
+
class InstanceIdPrefix(StrEnum):
|
|
27
|
+
asset = "Asset_"
|
|
28
|
+
label = "Label_"
|
|
29
|
+
relationship = "Relationship_"
|
|
30
|
+
sequence = "Sequence_"
|
|
31
|
+
file = "File_"
|
|
32
|
+
time_series = "TimeSeries_"
|
|
33
|
+
event = "Event_"
|
|
34
|
+
data_set = "DataSet_"
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_str(cls, raw: str) -> "InstanceIdPrefix":
|
|
38
|
+
raw = raw.title() + "_"
|
|
39
|
+
if raw == "Timeseries_":
|
|
40
|
+
return cls.time_series
|
|
41
|
+
else:
|
|
42
|
+
return cls(raw)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
21
46
|
"""This is the Base Extractor for all classic CDF resources.
|
|
22
47
|
|
|
23
48
|
A classic resource is recognized in that it has a metadata attribute of type dict[str, str].
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from collections.abc import Iterable, Sequence
|
|
3
|
+
from typing import ClassVar, NamedTuple
|
|
4
|
+
|
|
5
|
+
from cognite.client import CogniteClient
|
|
6
|
+
from rdflib import Namespace
|
|
7
|
+
|
|
8
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
9
|
+
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
10
|
+
from cognite.neat.graph.models import Triple
|
|
11
|
+
from cognite.neat.utils.collection_ import chunker
|
|
12
|
+
from cognite.neat.utils.rdf_ import remove_namespace_from_uri
|
|
13
|
+
|
|
14
|
+
from ._assets import AssetsExtractor
|
|
15
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
16
|
+
from ._data_sets import DataSetExtractor
|
|
17
|
+
from ._events import EventsExtractor
|
|
18
|
+
from ._files import FilesExtractor
|
|
19
|
+
from ._labels import LabelsExtractor
|
|
20
|
+
from ._relationships import RelationshipsExtractor
|
|
21
|
+
from ._sequences import SequencesExtractor
|
|
22
|
+
from ._timeseries import TimeSeriesExtractor
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class _ClassicCoreType(NamedTuple):
|
|
26
|
+
extractor_cls: (
|
|
27
|
+
type[AssetsExtractor]
|
|
28
|
+
| type[TimeSeriesExtractor]
|
|
29
|
+
| type[SequencesExtractor]
|
|
30
|
+
| type[EventsExtractor]
|
|
31
|
+
| type[FilesExtractor]
|
|
32
|
+
)
|
|
33
|
+
resource_type: InstanceIdPrefix
|
|
34
|
+
api_name: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ClassicGraphExtractor(BaseExtractor):
|
|
38
|
+
"""This extractor extracts all classic CDF Resources.
|
|
39
|
+
|
|
40
|
+
The Classic Graph consists of the following core resource type.
|
|
41
|
+
|
|
42
|
+
Classic Node CDF Resources:
|
|
43
|
+
- Assets
|
|
44
|
+
- TimeSeries
|
|
45
|
+
- Sequences
|
|
46
|
+
- Events
|
|
47
|
+
- Files
|
|
48
|
+
|
|
49
|
+
All the classic node CDF resources can have one or more connections to one or more assets. This
|
|
50
|
+
will match a direct relationship in the data modeling of CDF.
|
|
51
|
+
|
|
52
|
+
In addition, you have relationships between the classic node CDF resources. This matches an edge
|
|
53
|
+
in the data modeling of CDF.
|
|
54
|
+
|
|
55
|
+
Finally, you have labels and data sets that to organize the graph. In which data sets have a similar,
|
|
56
|
+
but different, role as a space in data modeling. While labels can be compared to node types in data modeling,
|
|
57
|
+
used to quickly filter and find nodes/edges.
|
|
58
|
+
|
|
59
|
+
This extractor will extract the classic CDF graph into Neat starting from either a data set or a root asset.
|
|
60
|
+
|
|
61
|
+
It works as follows:
|
|
62
|
+
|
|
63
|
+
1. Extract all core nodes (assets, time series, sequences, events, files) filtered by the given data set or
|
|
64
|
+
root asset.
|
|
65
|
+
2. Extract all relationships starting from any of the extracted core nodes.
|
|
66
|
+
3. Extract all core nodes that are targets of the relationships that are not already extracted.
|
|
67
|
+
4. Extract all labels that are connected to the extracted core nodes/relationships.
|
|
68
|
+
5. Extract all data sets that are connected to the extracted core nodes/relationships.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
client (CogniteClient): The Cognite client to use.
|
|
72
|
+
data_set_external_id (str, optional): The data set external id to extract from. Defaults to None.
|
|
73
|
+
root_asset_external_id (str, optional): The root asset external id to extract from. Defaults to None.
|
|
74
|
+
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
# These are the core resource types in the classic CDF.
|
|
78
|
+
_classic_node_types: ClassVar[tuple[_ClassicCoreType, ...]] = (
|
|
79
|
+
_ClassicCoreType(AssetsExtractor, InstanceIdPrefix.asset, "assets"),
|
|
80
|
+
_ClassicCoreType(TimeSeriesExtractor, InstanceIdPrefix.time_series, "time_series"),
|
|
81
|
+
_ClassicCoreType(SequencesExtractor, InstanceIdPrefix.sequence, "sequences"),
|
|
82
|
+
_ClassicCoreType(EventsExtractor, InstanceIdPrefix.event, "events"),
|
|
83
|
+
_ClassicCoreType(FilesExtractor, InstanceIdPrefix.file, "files"),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
client: CogniteClient,
|
|
89
|
+
data_set_external_id: str | None = None,
|
|
90
|
+
root_asset_external_id: str | None = None,
|
|
91
|
+
namespace: Namespace | None = None,
|
|
92
|
+
):
|
|
93
|
+
self._client = client
|
|
94
|
+
if sum([bool(data_set_external_id), bool(root_asset_external_id)]) != 1:
|
|
95
|
+
raise ValueError("Exactly one of data_set_external_id or root_asset_external_id must be set.")
|
|
96
|
+
self._root_asset_external_id = root_asset_external_id
|
|
97
|
+
self._data_set_external_id = data_set_external_id
|
|
98
|
+
self._namespace = namespace or DEFAULT_NAMESPACE
|
|
99
|
+
|
|
100
|
+
self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
101
|
+
self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
102
|
+
self._labels: set[str] = set()
|
|
103
|
+
self._data_set_ids: set[int] = set()
|
|
104
|
+
|
|
105
|
+
def extract(self) -> Iterable[Triple]:
|
|
106
|
+
"""Extracts all classic CDF Resources."""
|
|
107
|
+
yield from self._extract_core_start_nodes()
|
|
108
|
+
|
|
109
|
+
yield from self._extract_start_node_relationships()
|
|
110
|
+
|
|
111
|
+
yield from self._extract_core_end_nodes()
|
|
112
|
+
|
|
113
|
+
yield from self._extract_labels()
|
|
114
|
+
yield from self._extract_data_sets()
|
|
115
|
+
|
|
116
|
+
def _extract_core_start_nodes(self):
|
|
117
|
+
for core_node in self._classic_node_types:
|
|
118
|
+
if self._data_set_external_id:
|
|
119
|
+
extractor = core_node.extractor_cls.from_dataset(
|
|
120
|
+
self._client, self._data_set_external_id, self._namespace, unpack_metadata=False
|
|
121
|
+
)
|
|
122
|
+
elif self._root_asset_external_id:
|
|
123
|
+
extractor = core_node.extractor_cls.from_hierarchy(
|
|
124
|
+
self._client, self._root_asset_external_id, self._namespace, unpack_metadata=False
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
raise ValueError("Exactly one of data_set_external_id or root_asset_external_id must be set.")
|
|
128
|
+
|
|
129
|
+
yield from self._extract_with_logging_label_dataset(extractor, core_node.resource_type)
|
|
130
|
+
|
|
131
|
+
def _extract_start_node_relationships(self):
|
|
132
|
+
for start_resource_type, source_external_ids in self._source_external_ids_by_type.items():
|
|
133
|
+
start_type = start_resource_type.removesuffix("_")
|
|
134
|
+
for chunk in self._chunk(list(source_external_ids), description=f"Extracting {start_type} relationships"):
|
|
135
|
+
relationship_iterator = self._client.relationships(
|
|
136
|
+
source_external_ids=list(chunk), source_types=[start_type]
|
|
137
|
+
)
|
|
138
|
+
extractor = RelationshipsExtractor(relationship_iterator, self._namespace, unpack_metadata=False)
|
|
139
|
+
# This is a private attribute, but we need to set it to log the target nodes.
|
|
140
|
+
extractor._log_target_nodes = True
|
|
141
|
+
|
|
142
|
+
yield from extractor.extract()
|
|
143
|
+
|
|
144
|
+
# After the extraction is done, we need to update all the new target nodes so
|
|
145
|
+
# we can extract them in the next step.
|
|
146
|
+
for end_type, target_external_ids in extractor._target_external_ids_by_type.items():
|
|
147
|
+
for external_id in target_external_ids:
|
|
148
|
+
# We only want to extract the target nodes that are not already extracted.
|
|
149
|
+
# Even though _source_external_ids_by_type is a defaultdict, we have to check if the key exists.
|
|
150
|
+
# This is because we might not have extracted any nodes of that type yet, and looking up
|
|
151
|
+
# a key that does not exist will create it. We are iterating of this dictionary, and
|
|
152
|
+
# we do not want to create new keys while iterating.
|
|
153
|
+
if (
|
|
154
|
+
end_type not in self._source_external_ids_by_type
|
|
155
|
+
or external_id not in self._source_external_ids_by_type[end_type]
|
|
156
|
+
):
|
|
157
|
+
self._target_external_ids_by_type[end_type].add(external_id)
|
|
158
|
+
|
|
159
|
+
def _extract_core_end_nodes(self):
|
|
160
|
+
for core_node in self._classic_node_types:
|
|
161
|
+
target_external_ids = self._target_external_ids_by_type[core_node.resource_type]
|
|
162
|
+
api = getattr(self._client, core_node.api_name)
|
|
163
|
+
for chunk in self._chunk(
|
|
164
|
+
list(target_external_ids),
|
|
165
|
+
description=f"Extracting end nodes {core_node.resource_type.removesuffix('_')}",
|
|
166
|
+
):
|
|
167
|
+
resource_iterator = api.retrieve_multiple(external_ids=list(chunk), ignore_unknown_ids=True)
|
|
168
|
+
extractor = core_node.extractor_cls(resource_iterator, self._namespace, unpack_metadata=False)
|
|
169
|
+
yield from self._extract_with_logging_label_dataset(extractor)
|
|
170
|
+
|
|
171
|
+
def _extract_labels(self):
|
|
172
|
+
for chunk in self._chunk(list(self._labels), description="Extracting labels"):
|
|
173
|
+
label_iterator = self._client.labels.retrieve(external_id=list(chunk), ignore_unknown_ids=True)
|
|
174
|
+
yield from LabelsExtractor(label_iterator, self._namespace).extract()
|
|
175
|
+
|
|
176
|
+
def _extract_data_sets(self):
|
|
177
|
+
for chunk in self._chunk(list(self._data_set_ids), description="Extracting data sets"):
|
|
178
|
+
data_set_iterator = self._client.data_sets.retrieve_multiple(ids=list(chunk), ignore_unknown_ids=True)
|
|
179
|
+
yield from DataSetExtractor(data_set_iterator, self._namespace, unpack_metadata=False).extract()
|
|
180
|
+
|
|
181
|
+
def _extract_with_logging_label_dataset(
|
|
182
|
+
self, extractor: ClassicCDFBaseExtractor, resource_type: InstanceIdPrefix | None = None
|
|
183
|
+
) -> Iterable[Triple]:
|
|
184
|
+
for triple in extractor.extract():
|
|
185
|
+
if triple[1] == self._namespace.external_id and resource_type is not None:
|
|
186
|
+
self._source_external_ids_by_type[resource_type].add(remove_namespace_from_uri(triple[2]))
|
|
187
|
+
elif triple[1] == self._namespace.label:
|
|
188
|
+
self._labels.add(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.label))
|
|
189
|
+
elif triple[1] == self._namespace.dataset:
|
|
190
|
+
self._data_set_ids.add(
|
|
191
|
+
int(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.data_set))
|
|
192
|
+
)
|
|
193
|
+
yield triple
|
|
194
|
+
|
|
195
|
+
@staticmethod
|
|
196
|
+
def _chunk(items: Sequence, description: str) -> Iterable:
|
|
197
|
+
to_iterate: Iterable = chunker(items, chunk_size=1000)
|
|
198
|
+
try:
|
|
199
|
+
from rich.progress import track
|
|
200
|
+
except ModuleNotFoundError:
|
|
201
|
+
...
|
|
202
|
+
else:
|
|
203
|
+
to_iterate = track(
|
|
204
|
+
to_iterate,
|
|
205
|
+
total=(len(items) // 1000) + 1,
|
|
206
|
+
description=description,
|
|
207
|
+
)
|
|
208
|
+
return to_iterate
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from collections.abc import Set
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from cognite.client import CogniteClient
|
|
6
|
+
from cognite.client.data_classes import DataSet, DataSetList
|
|
7
|
+
from cognite.client.utils.useful_types import SequenceNotStr
|
|
8
|
+
from rdflib import RDF, Literal, Namespace
|
|
9
|
+
|
|
10
|
+
from cognite.neat.graph.models import Triple
|
|
11
|
+
|
|
12
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataSetExtractor(ClassicCDFBaseExtractor[DataSet]):
|
|
16
|
+
"""Extract DataSets from Cognite Data Fusions into Neat.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
items (Iterable[Asset]): An iterable of assets.
|
|
20
|
+
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
+
to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
|
|
22
|
+
If None or if the function returns None, the asset will be set to the default type "Asset".
|
|
23
|
+
total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
|
|
24
|
+
is installed. Defaults to None.
|
|
25
|
+
limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
|
|
26
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
+
limit the extraction to 1000 assets to test the setup.
|
|
28
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
29
|
+
a JSON string.
|
|
30
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): A set of values to skip when unpacking
|
|
31
|
+
metadata. Defaults to frozenset({"nan", "null", "none", ""}).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
_default_rdf_type = "DataSet"
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_dataset(
|
|
38
|
+
cls,
|
|
39
|
+
client: CogniteClient,
|
|
40
|
+
data_set_external_id: SequenceNotStr[str],
|
|
41
|
+
namespace: Namespace | None = None,
|
|
42
|
+
unpack_metadata: bool = True,
|
|
43
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
44
|
+
):
|
|
45
|
+
return cls(
|
|
46
|
+
client.data_sets.retrieve_multiple(external_ids=data_set_external_id),
|
|
47
|
+
namespace=namespace,
|
|
48
|
+
total=len(data_set_external_id),
|
|
49
|
+
unpack_metadata=unpack_metadata,
|
|
50
|
+
skip_metadata_values=skip_metadata_values,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_file(
|
|
55
|
+
cls,
|
|
56
|
+
file_path: str,
|
|
57
|
+
namespace: Namespace | None = None,
|
|
58
|
+
unpack_metadata: bool = True,
|
|
59
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
60
|
+
):
|
|
61
|
+
data_sets = DataSetList.load(Path(file_path).read_text())
|
|
62
|
+
return cls(
|
|
63
|
+
data_sets,
|
|
64
|
+
namespace=namespace,
|
|
65
|
+
total=len(data_sets),
|
|
66
|
+
unpack_metadata=unpack_metadata,
|
|
67
|
+
skip_metadata_values=skip_metadata_values,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def _item2triples(self, item: DataSet) -> list[Triple]:
|
|
71
|
+
"""Converts an asset to triples."""
|
|
72
|
+
id_ = self.namespace[f"{InstanceIdPrefix.data_set}{item.id}"]
|
|
73
|
+
|
|
74
|
+
type_ = self._get_rdf_type(item)
|
|
75
|
+
|
|
76
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
77
|
+
|
|
78
|
+
# Create attributes
|
|
79
|
+
if item.name:
|
|
80
|
+
triples.append((id_, self.namespace.name, Literal(item.name)))
|
|
81
|
+
|
|
82
|
+
if item.description:
|
|
83
|
+
triples.append((id_, self.namespace.description, Literal(item.description)))
|
|
84
|
+
|
|
85
|
+
if item.external_id:
|
|
86
|
+
triples.append((id_, self.namespace.external_id, Literal(item.external_id)))
|
|
87
|
+
|
|
88
|
+
# properties' ref creation and update
|
|
89
|
+
triples.append(
|
|
90
|
+
(
|
|
91
|
+
id_,
|
|
92
|
+
self.namespace.created_time,
|
|
93
|
+
Literal(datetime.fromtimestamp(item.created_time / 1000, timezone.utc)),
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
triples.append(
|
|
97
|
+
(
|
|
98
|
+
id_,
|
|
99
|
+
self.namespace.last_updated_time,
|
|
100
|
+
Literal(datetime.fromtimestamp(item.last_updated_time / 1000, timezone.utc)),
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if item.write_protected:
|
|
105
|
+
triples.append((id_, self.namespace.write_protected, Literal(item.write_protected)))
|
|
106
|
+
|
|
107
|
+
if item.metadata:
|
|
108
|
+
triples.extend(self._metadata_to_triples(id_, item.metadata))
|
|
109
|
+
|
|
110
|
+
return triples
|
|
@@ -8,10 +8,10 @@ from rdflib import RDF, Literal, Namespace
|
|
|
8
8
|
|
|
9
9
|
from cognite.neat.graph.models import Triple
|
|
10
10
|
|
|
11
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
11
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class EventsExtractor(
|
|
14
|
+
class EventsExtractor(ClassicCDFBaseExtractor[Event]):
|
|
15
15
|
"""Extract data from Cognite Data Fusions Events into Neat.
|
|
16
16
|
|
|
17
17
|
Args:
|
|
@@ -55,6 +55,31 @@ class EventsExtractor(ClassicCDFExtractor[Event]):
|
|
|
55
55
|
skip_metadata_values=skip_metadata_values,
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_hierarchy(
|
|
60
|
+
cls,
|
|
61
|
+
client: CogniteClient,
|
|
62
|
+
root_asset_external_id: str,
|
|
63
|
+
namespace: Namespace | None = None,
|
|
64
|
+
to_type: Callable[[Event], str | None] | None = None,
|
|
65
|
+
limit: int | None = None,
|
|
66
|
+
unpack_metadata: bool = True,
|
|
67
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
68
|
+
):
|
|
69
|
+
total = client.events.aggregate_count(
|
|
70
|
+
filter=EventFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return cls(
|
|
74
|
+
client.events(asset_subtree_external_ids=[root_asset_external_id]),
|
|
75
|
+
namespace,
|
|
76
|
+
to_type,
|
|
77
|
+
total,
|
|
78
|
+
limit,
|
|
79
|
+
unpack_metadata=unpack_metadata,
|
|
80
|
+
skip_metadata_values=skip_metadata_values,
|
|
81
|
+
)
|
|
82
|
+
|
|
58
83
|
@classmethod
|
|
59
84
|
def from_file(
|
|
60
85
|
cls,
|
|
@@ -78,7 +103,7 @@ class EventsExtractor(ClassicCDFExtractor[Event]):
|
|
|
78
103
|
)
|
|
79
104
|
|
|
80
105
|
def _item2triples(self, event: Event) -> list[Triple]:
|
|
81
|
-
id_ = self.namespace[f"
|
|
106
|
+
id_ = self.namespace[f"{InstanceIdPrefix.event}{event.id}"]
|
|
82
107
|
|
|
83
108
|
type_ = self._get_rdf_type(event)
|
|
84
109
|
|
|
@@ -146,12 +171,12 @@ class EventsExtractor(ClassicCDFExtractor[Event]):
|
|
|
146
171
|
(
|
|
147
172
|
id_,
|
|
148
173
|
self.namespace.data_set_id,
|
|
149
|
-
self.namespace[f"
|
|
174
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{event.data_set_id}"],
|
|
150
175
|
)
|
|
151
176
|
)
|
|
152
177
|
|
|
153
178
|
if event.asset_ids:
|
|
154
179
|
for asset_id in event.asset_ids:
|
|
155
|
-
triples.append((id_, self.namespace.asset, self.namespace[f"
|
|
180
|
+
triples.append((id_, self.namespace.asset, self.namespace[f"{InstanceIdPrefix.asset}{asset_id}"]))
|
|
156
181
|
|
|
157
182
|
return triples
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
from collections.abc import Callable, Set
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from urllib.parse import quote
|
|
5
4
|
|
|
6
5
|
from cognite.client import CogniteClient
|
|
7
|
-
from cognite.client.data_classes import FileMetadata, FileMetadataList
|
|
6
|
+
from cognite.client.data_classes import FileMetadata, FileMetadataFilter, FileMetadataList
|
|
8
7
|
from rdflib import RDF, Literal, Namespace
|
|
9
8
|
|
|
10
9
|
from cognite.neat.graph.models import Triple
|
|
11
10
|
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
11
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
12
|
+
from ._labels import LabelsExtractor
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class FilesExtractor(
|
|
15
|
+
class FilesExtractor(ClassicCDFBaseExtractor[FileMetadata]):
|
|
16
16
|
"""Extract data from Cognite Data Fusions files metadata into Neat.
|
|
17
17
|
|
|
18
18
|
Args:
|
|
@@ -53,6 +53,31 @@ class FilesExtractor(ClassicCDFExtractor[FileMetadata]):
|
|
|
53
53
|
skip_metadata_values=skip_metadata_values,
|
|
54
54
|
)
|
|
55
55
|
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_hierarchy(
|
|
58
|
+
cls,
|
|
59
|
+
client: CogniteClient,
|
|
60
|
+
root_asset_external_id: str,
|
|
61
|
+
namespace: Namespace | None = None,
|
|
62
|
+
to_type: Callable[[FileMetadata], str | None] | None = None,
|
|
63
|
+
limit: int | None = None,
|
|
64
|
+
unpack_metadata: bool = True,
|
|
65
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
66
|
+
):
|
|
67
|
+
total = client.files.aggregate(
|
|
68
|
+
filter=FileMetadataFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
69
|
+
)[0].count
|
|
70
|
+
|
|
71
|
+
return cls(
|
|
72
|
+
client.files(asset_subtree_external_ids=[root_asset_external_id]),
|
|
73
|
+
namespace,
|
|
74
|
+
to_type,
|
|
75
|
+
total,
|
|
76
|
+
limit,
|
|
77
|
+
unpack_metadata=unpack_metadata,
|
|
78
|
+
skip_metadata_values=skip_metadata_values,
|
|
79
|
+
)
|
|
80
|
+
|
|
56
81
|
@classmethod
|
|
57
82
|
def from_file(
|
|
58
83
|
cls,
|
|
@@ -75,7 +100,7 @@ class FilesExtractor(ClassicCDFExtractor[FileMetadata]):
|
|
|
75
100
|
)
|
|
76
101
|
|
|
77
102
|
def _item2triples(self, file: FileMetadata) -> list[Triple]:
|
|
78
|
-
id_ = self.namespace[f"
|
|
103
|
+
id_ = self.namespace[f"{InstanceIdPrefix.file}{file.id}"]
|
|
79
104
|
|
|
80
105
|
type_ = self._get_rdf_type(file)
|
|
81
106
|
|
|
@@ -153,7 +178,7 @@ class FilesExtractor(ClassicCDFExtractor[FileMetadata]):
|
|
|
153
178
|
(
|
|
154
179
|
id_,
|
|
155
180
|
self.namespace.label,
|
|
156
|
-
self.namespace[f"
|
|
181
|
+
self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
|
|
157
182
|
)
|
|
158
183
|
)
|
|
159
184
|
|
|
@@ -166,12 +191,12 @@ class FilesExtractor(ClassicCDFExtractor[FileMetadata]):
|
|
|
166
191
|
(
|
|
167
192
|
id_,
|
|
168
193
|
self.namespace.data_set_id,
|
|
169
|
-
self.namespace[f"
|
|
194
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{file.data_set_id}"],
|
|
170
195
|
)
|
|
171
196
|
)
|
|
172
197
|
|
|
173
198
|
if file.asset_ids:
|
|
174
199
|
for asset_id in file.asset_ids:
|
|
175
|
-
triples.append((id_, self.namespace.asset, self.namespace[f"
|
|
200
|
+
triples.append((id_, self.namespace.asset, self.namespace[f"{InstanceIdPrefix.asset}{asset_id}"]))
|
|
176
201
|
|
|
177
202
|
return triples
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
from collections.abc import Callable, Set
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from urllib.parse import quote
|
|
4
5
|
|
|
5
6
|
from cognite.client import CogniteClient
|
|
6
|
-
from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
|
|
7
|
+
from cognite.client.data_classes import Label, LabelDefinition, LabelDefinitionList
|
|
7
8
|
from rdflib import RDF, Literal, Namespace
|
|
8
9
|
|
|
9
10
|
from cognite.neat.graph.models import Triple
|
|
10
|
-
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
11
11
|
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
12
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class LabelsExtractor(
|
|
15
|
+
class LabelsExtractor(ClassicCDFBaseExtractor[LabelDefinition]):
|
|
16
16
|
"""Extract data from Cognite Data Fusions Labels into Neat.
|
|
17
17
|
|
|
18
18
|
Args:
|
|
@@ -78,7 +78,7 @@ class LabelsExtractor(ClassicCDFExtractor[LabelDefinition]):
|
|
|
78
78
|
if not label.external_id:
|
|
79
79
|
return []
|
|
80
80
|
|
|
81
|
-
id_ = self.namespace[f"
|
|
81
|
+
id_ = self.namespace[f"{InstanceIdPrefix.label}{self._label_id(label)}"]
|
|
82
82
|
|
|
83
83
|
type_ = self._get_rdf_type(label)
|
|
84
84
|
# Set rdf type
|
|
@@ -107,8 +107,16 @@ class LabelsExtractor(ClassicCDFExtractor[LabelDefinition]):
|
|
|
107
107
|
(
|
|
108
108
|
id_,
|
|
109
109
|
self.namespace.data_set_id,
|
|
110
|
-
self.namespace[f"
|
|
110
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{label.data_set_id}"],
|
|
111
111
|
)
|
|
112
112
|
)
|
|
113
113
|
|
|
114
114
|
return triples
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _label_id(label: Label | LabelDefinition) -> str:
|
|
118
|
+
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
119
|
+
# since labels do not have internal ids, we use the external_id as the id
|
|
120
|
+
if label.external_id is None:
|
|
121
|
+
raise ValueError("External id must be set of the label")
|
|
122
|
+
return quote(label.external_id)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from collections
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from collections.abc import Callable, Iterable, Set
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from urllib.parse import quote
|
|
5
5
|
|
|
6
6
|
from cognite.client import CogniteClient
|
|
7
7
|
from cognite.client.data_classes import Relationship, RelationshipList
|
|
@@ -10,10 +10,11 @@ from rdflib import RDF, Literal, Namespace
|
|
|
10
10
|
from cognite.neat.graph.models import Triple
|
|
11
11
|
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
12
12
|
|
|
13
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
13
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
14
|
+
from ._labels import LabelsExtractor
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
class RelationshipsExtractor(
|
|
17
|
+
class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
17
18
|
"""Extract data from Cognite Data Fusions Relationships into Neat.
|
|
18
19
|
|
|
19
20
|
Args:
|
|
@@ -34,6 +35,31 @@ class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
|
|
|
34
35
|
|
|
35
36
|
_default_rdf_type = "Relationship"
|
|
36
37
|
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
items: Iterable[Relationship],
|
|
41
|
+
namespace: Namespace | None = None,
|
|
42
|
+
to_type: Callable[[Relationship], str | None] | None = None,
|
|
43
|
+
total: int | None = None,
|
|
44
|
+
limit: int | None = None,
|
|
45
|
+
unpack_metadata: bool = True,
|
|
46
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
47
|
+
):
|
|
48
|
+
super().__init__(
|
|
49
|
+
items,
|
|
50
|
+
namespace=namespace,
|
|
51
|
+
to_type=to_type,
|
|
52
|
+
total=total,
|
|
53
|
+
limit=limit,
|
|
54
|
+
unpack_metadata=unpack_metadata,
|
|
55
|
+
skip_metadata_values=skip_metadata_values,
|
|
56
|
+
)
|
|
57
|
+
# This is used by the ClassicExtractor to log the target nodes, such
|
|
58
|
+
# that it can extract them.
|
|
59
|
+
# It is private to avoid exposing it to the user.
|
|
60
|
+
self._log_target_nodes = False
|
|
61
|
+
self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
62
|
+
|
|
37
63
|
@classmethod
|
|
38
64
|
def from_dataset(
|
|
39
65
|
cls,
|
|
@@ -79,8 +105,13 @@ class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
|
|
|
79
105
|
"""Converts an asset to triples."""
|
|
80
106
|
|
|
81
107
|
if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
|
|
108
|
+
if self._log_target_nodes and relationship.target_type and relationship.target_external_id:
|
|
109
|
+
self._target_external_ids_by_type[InstanceIdPrefix.from_str(relationship.target_type)].add(
|
|
110
|
+
relationship.target_external_id
|
|
111
|
+
)
|
|
112
|
+
|
|
82
113
|
# relationships do not have an internal id, so we generate one
|
|
83
|
-
id_ = self.namespace[f"
|
|
114
|
+
id_ = self.namespace[f"{InstanceIdPrefix.relationship}{create_sha256_hash(relationship.external_id)}"]
|
|
84
115
|
|
|
85
116
|
type_ = self._get_rdf_type(relationship)
|
|
86
117
|
# Set rdf type
|
|
@@ -178,7 +209,7 @@ class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
|
|
|
178
209
|
(
|
|
179
210
|
id_,
|
|
180
211
|
self.namespace.label,
|
|
181
|
-
self.namespace[f"
|
|
212
|
+
self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
|
|
182
213
|
)
|
|
183
214
|
)
|
|
184
215
|
|
|
@@ -188,7 +219,7 @@ class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
|
|
|
188
219
|
(
|
|
189
220
|
id_,
|
|
190
221
|
self.namespace.dataset,
|
|
191
|
-
self.namespace[f"
|
|
222
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{relationship.data_set_id}"],
|
|
192
223
|
)
|
|
193
224
|
)
|
|
194
225
|
|
|
@@ -8,10 +8,10 @@ from rdflib import RDF, Literal, Namespace
|
|
|
8
8
|
|
|
9
9
|
from cognite.neat.graph.models import Triple
|
|
10
10
|
|
|
11
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
11
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class SequencesExtractor(
|
|
14
|
+
class SequencesExtractor(ClassicCDFBaseExtractor[Sequence]):
|
|
15
15
|
"""Extract data from Cognite Data Fusions Sequences into Neat.
|
|
16
16
|
|
|
17
17
|
Args:
|
|
@@ -56,6 +56,31 @@ class SequencesExtractor(ClassicCDFExtractor[Sequence]):
|
|
|
56
56
|
skip_metadata_values=skip_metadata_values,
|
|
57
57
|
)
|
|
58
58
|
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_hierarchy(
|
|
61
|
+
cls,
|
|
62
|
+
client: CogniteClient,
|
|
63
|
+
root_asset_external_id: str,
|
|
64
|
+
namespace: Namespace | None = None,
|
|
65
|
+
to_type: Callable[[Sequence], str | None] | None = None,
|
|
66
|
+
limit: int | None = None,
|
|
67
|
+
unpack_metadata: bool = True,
|
|
68
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
69
|
+
):
|
|
70
|
+
total = client.sequences.aggregate_count(
|
|
71
|
+
filter=SequenceFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return cls(
|
|
75
|
+
client.sequences(asset_subtree_external_ids=[root_asset_external_id]),
|
|
76
|
+
namespace,
|
|
77
|
+
to_type,
|
|
78
|
+
total,
|
|
79
|
+
limit,
|
|
80
|
+
unpack_metadata=unpack_metadata,
|
|
81
|
+
skip_metadata_values=skip_metadata_values,
|
|
82
|
+
)
|
|
83
|
+
|
|
59
84
|
@classmethod
|
|
60
85
|
def from_file(
|
|
61
86
|
cls,
|
|
@@ -78,7 +103,7 @@ class SequencesExtractor(ClassicCDFExtractor[Sequence]):
|
|
|
78
103
|
)
|
|
79
104
|
|
|
80
105
|
def _item2triples(self, sequence: Sequence) -> list[Triple]:
|
|
81
|
-
id_ = self.namespace[f"
|
|
106
|
+
id_ = self.namespace[f"{InstanceIdPrefix.sequence}{sequence.id}"]
|
|
82
107
|
|
|
83
108
|
type_ = self._get_rdf_type(sequence)
|
|
84
109
|
# Set rdf type
|
|
@@ -121,7 +146,7 @@ class SequencesExtractor(ClassicCDFExtractor[Sequence]):
|
|
|
121
146
|
(
|
|
122
147
|
id_,
|
|
123
148
|
self.namespace.data_set_id,
|
|
124
|
-
self.namespace[f"
|
|
149
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{sequence.data_set_id}"],
|
|
125
150
|
)
|
|
126
151
|
)
|
|
127
152
|
|
|
@@ -130,7 +155,7 @@ class SequencesExtractor(ClassicCDFExtractor[Sequence]):
|
|
|
130
155
|
(
|
|
131
156
|
id_,
|
|
132
157
|
self.namespace.asset,
|
|
133
|
-
self.namespace[f"
|
|
158
|
+
self.namespace[f"{InstanceIdPrefix.asset}{sequence.asset_id}"],
|
|
134
159
|
)
|
|
135
160
|
)
|
|
136
161
|
|
|
@@ -9,10 +9,10 @@ from rdflib import RDF, Literal, Namespace, URIRef
|
|
|
9
9
|
|
|
10
10
|
from cognite.neat.graph.models import Triple
|
|
11
11
|
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES,
|
|
12
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class TimeSeriesExtractor(
|
|
15
|
+
class TimeSeriesExtractor(ClassicCDFBaseExtractor[TimeSeries]):
|
|
16
16
|
"""Extract data from Cognite Data Fusions TimeSeries into Neat.
|
|
17
17
|
|
|
18
18
|
Args:
|
|
@@ -58,6 +58,31 @@ class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
|
|
|
58
58
|
skip_metadata_values=skip_metadata_values,
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_hierarchy(
|
|
63
|
+
cls,
|
|
64
|
+
client: CogniteClient,
|
|
65
|
+
root_asset_external_id: str,
|
|
66
|
+
namespace: Namespace | None = None,
|
|
67
|
+
to_type: Callable[[TimeSeries], str | None] | None = None,
|
|
68
|
+
limit: int | None = None,
|
|
69
|
+
unpack_metadata: bool = True,
|
|
70
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
71
|
+
):
|
|
72
|
+
total = client.time_series.aggregate_count(
|
|
73
|
+
filter=TimeSeriesFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return cls(
|
|
77
|
+
client.time_series(asset_external_ids=[root_asset_external_id]),
|
|
78
|
+
namespace,
|
|
79
|
+
to_type,
|
|
80
|
+
total,
|
|
81
|
+
limit,
|
|
82
|
+
unpack_metadata=unpack_metadata,
|
|
83
|
+
skip_metadata_values=skip_metadata_values,
|
|
84
|
+
)
|
|
85
|
+
|
|
61
86
|
@classmethod
|
|
62
87
|
def from_file(
|
|
63
88
|
cls,
|
|
@@ -80,7 +105,7 @@ class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
|
|
|
80
105
|
)
|
|
81
106
|
|
|
82
107
|
def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
|
|
83
|
-
id_ = self.namespace[f"
|
|
108
|
+
id_ = self.namespace[f"{InstanceIdPrefix.time_series}{timeseries.id}"]
|
|
84
109
|
|
|
85
110
|
# Set rdf type
|
|
86
111
|
type_ = self._get_rdf_type(timeseries)
|
|
@@ -158,7 +183,7 @@ class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
|
|
|
158
183
|
(
|
|
159
184
|
id_,
|
|
160
185
|
self.namespace.dataset,
|
|
161
|
-
self.namespace[f"
|
|
186
|
+
self.namespace[f"{InstanceIdPrefix.data_set}{timeseries.data_set_id}"],
|
|
162
187
|
)
|
|
163
188
|
)
|
|
164
189
|
|
|
@@ -167,7 +192,7 @@ class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
|
|
|
167
192
|
(
|
|
168
193
|
id_,
|
|
169
194
|
self.namespace.asset,
|
|
170
|
-
self.namespace[f"
|
|
195
|
+
self.namespace[f"{InstanceIdPrefix.asset}{timeseries.asset_id}"],
|
|
171
196
|
)
|
|
172
197
|
)
|
|
173
198
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from collections import Counter
|
|
2
|
-
from collections.abc import Sequence
|
|
2
|
+
from collections.abc import Iterable, Sequence
|
|
3
3
|
from typing import TypeVar
|
|
4
4
|
|
|
5
5
|
T_Element = TypeVar("T_Element")
|
|
@@ -14,5 +14,6 @@ def most_occurring_element(list_of_elements: list[T_Element]) -> T_Element:
|
|
|
14
14
|
return counts.most_common(1)[0][0]
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def chunker(sequence: Sequence[T_Element], chunk_size: int) ->
|
|
18
|
-
|
|
17
|
+
def chunker(sequence: Sequence[T_Element], chunk_size: int) -> Iterable[Sequence[T_Element]]:
|
|
18
|
+
for i in range(0, len(sequence), chunk_size):
|
|
19
|
+
yield sequence[i : i + chunk_size]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cognite-neat
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.92.1
|
|
4
4
|
Summary: Knowledge graph transformation
|
|
5
5
|
Home-page: https://cognite-neat.readthedocs-hosted.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -59,7 +59,7 @@ Description-Content-Type: text/markdown
|
|
|
59
59
|
|
|
60
60
|
# kNowlEdge grAph Transformer (NEAT)
|
|
61
61
|
|
|
62
|
-
[](https://github.com/cognitedata/neat/actions/workflows/release.yaml)
|
|
63
63
|
[](https://cognite-neat.readthedocs-hosted.com/en/latest/?badge=latest)
|
|
64
64
|
[](https://github.com/cognitedata/neat)
|
|
65
65
|
[](https://pypi.org/project/cognite-neat/)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
cognite/neat/__init__.py,sha256=AiexNcHdAHFbrrbo9c65gtil1dqx_SGraDH1PSsXjKE,126
|
|
2
2
|
cognite/neat/_shared.py,sha256=RSaHm2eJceTlvb-hMMe4nHgoHdPYDfN3XcxDXo24k3A,1530
|
|
3
|
-
cognite/neat/_version.py,sha256=
|
|
3
|
+
cognite/neat/_version.py,sha256=UAHzlES5E7AnCRpSS4aAUrJQvopOZoD9QiA0Ws0h2t0,23
|
|
4
4
|
cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
|
|
6
6
|
cognite/neat/app/api/configuration.py,sha256=L1DCtLZ1HZku8I2z-JWd5RDsXhIsboFsKwAMhkrm-bY,3600
|
|
@@ -49,17 +49,19 @@ cognite/neat/graph/examples/Knowledge-Graph-Nordic44-dirty.xml,sha256=ujJip6XBs5
|
|
|
49
49
|
cognite/neat/graph/examples/Knowledge-Graph-Nordic44.xml,sha256=U2Ns-M4LRjT1fBkhmRj63ur7jDzlRtHK9yOLf_npZ_g,1437996
|
|
50
50
|
cognite/neat/graph/examples/__init__.py,sha256=yAjHVY3b5jOjmbW-iLbhvu7BG014TpGi3K4igkDqW5I,368
|
|
51
51
|
cognite/neat/graph/examples/skos-capturing-sheet-wind-topics.xlsx,sha256=CV_yK5ZSbYS_ktfIZUPD8Sevs47zpswLXQUDFkGE4Gw,45798
|
|
52
|
-
cognite/neat/graph/extractors/__init__.py,sha256=
|
|
52
|
+
cognite/neat/graph/extractors/__init__.py,sha256=9A_AsBwdbnYDo-20c18wcu__S-h9PHCeQB_fGteK5mo,1858
|
|
53
53
|
cognite/neat/graph/extractors/_base.py,sha256=8IWygpkQTwo0UOmbbwWVI7540_klTVdUVX2JjVPFRIs,498
|
|
54
54
|
cognite/neat/graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=
|
|
56
|
-
cognite/neat/graph/extractors/_classic_cdf/_base.py,sha256=
|
|
57
|
-
cognite/neat/graph/extractors/_classic_cdf/
|
|
58
|
-
cognite/neat/graph/extractors/_classic_cdf/
|
|
59
|
-
cognite/neat/graph/extractors/_classic_cdf/
|
|
60
|
-
cognite/neat/graph/extractors/_classic_cdf/
|
|
61
|
-
cognite/neat/graph/extractors/_classic_cdf/
|
|
62
|
-
cognite/neat/graph/extractors/_classic_cdf/
|
|
55
|
+
cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=3zpwp4LIXf_dQgfXEQCmp0CNprBYGLC-VjNmhMXc-4k,6604
|
|
56
|
+
cognite/neat/graph/extractors/_classic_cdf/_base.py,sha256=5jCegqeqQr_Bxwskt0ns7aiivzz1KyEIxqD5Xojoa7Y,5071
|
|
57
|
+
cognite/neat/graph/extractors/_classic_cdf/_classic.py,sha256=Al4lO-vydyrqFjAt_OanmuIjd2fM4SS1TtukZj3YBM8,10186
|
|
58
|
+
cognite/neat/graph/extractors/_classic_cdf/_data_sets.py,sha256=_jn2YM_87flrJ2pdLUKApQ3I1M02zjnhDSGKhl0vdDU,4297
|
|
59
|
+
cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=08c1IavgRWdBem8YbSQnzXEYa41QYojmTilzZBbfAR0,6508
|
|
60
|
+
cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=hi5Zf_-JWbRN1wEoGc-JIT81YOcWGxWwPAh162Urs4s,7554
|
|
61
|
+
cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=fNi_eqG2rfxMHyPUzAqRSARp_QRY3drYHH_WcoLzz0M,4766
|
|
62
|
+
cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=wSYQK1QiRD9aYnjOJWXOCjpEnpH6s0LqPcqi3gC9ckU,8792
|
|
63
|
+
cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=HMC-tmGE1Wgc7c1KeDyx0BlnYOxGbyhUHkoOHl1Yz60,6006
|
|
64
|
+
cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=ZO7-dgIq7EavvmVfo0gAWHHM_WoNcfPJe7zRArt4W6w,7502
|
|
63
65
|
cognite/neat/graph/extractors/_dexpi.py,sha256=s3Ff3s3CMy0Xalixtfvnu-3yrEsCF26-9WVhIIZBZQI,9432
|
|
64
66
|
cognite/neat/graph/extractors/_dms.py,sha256=NbqY1nrisn-dJq8_qeCatpgJwm6-cKFsTISDR5afdbU,6688
|
|
65
67
|
cognite/neat/graph/extractors/_iodd.py,sha256=RiUxhVsV99YxrWjMxPQcLQzYG0xLYqZCEHduCoG6P8Q,6759
|
|
@@ -179,7 +181,7 @@ cognite/neat/utils/cdf/loaders/__init__.py,sha256=s2aPR5XLo6WZ0ybstAJlcGFYkA7CyH
|
|
|
179
181
|
cognite/neat/utils/cdf/loaders/_base.py,sha256=ryNC_AMXIESWXuTVJ-02L-HSVSpD6V49XdLTRYeFg70,1764
|
|
180
182
|
cognite/neat/utils/cdf/loaders/_data_modeling.py,sha256=0jynnwZ7utnG2KIkVRJBNvDshUCzwX29LZuGSlm6qUM,13216
|
|
181
183
|
cognite/neat/utils/cdf/loaders/_ingestion.py,sha256=QbF4ntaGUum6yTRTq9WNg8n49TQpfzF1T7H-Bx740ws,6326
|
|
182
|
-
cognite/neat/utils/collection_.py,sha256=
|
|
184
|
+
cognite/neat/utils/collection_.py,sha256=m5pn78NRQ9vx8l-NdMTAdYDLCTB8CQLX-rqxAWt7vyg,602
|
|
183
185
|
cognite/neat/utils/rdf_.py,sha256=VXDBQUt86vRntiGhejK35PlsbvKCUkuQQa1cMYz4SIc,5656
|
|
184
186
|
cognite/neat/utils/regex_patterns.py,sha256=rXOBW9d-n_eAffm9zVRIPFB7ElLS8VDdsvFDsr01Q-M,2155
|
|
185
187
|
cognite/neat/utils/spreadsheet.py,sha256=LI0c7dlW0zXHkHw0NvB-gg6Df6cDcE3FbiaHBYLXdzQ,2714
|
|
@@ -214,8 +216,8 @@ cognite/neat/workflows/steps_registry.py,sha256=FjMsFBlFFy82ABUzDnWoFidYODV3pp3c
|
|
|
214
216
|
cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
|
|
215
217
|
cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
|
|
216
218
|
cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
|
|
217
|
-
cognite_neat-0.
|
|
218
|
-
cognite_neat-0.
|
|
219
|
-
cognite_neat-0.
|
|
220
|
-
cognite_neat-0.
|
|
221
|
-
cognite_neat-0.
|
|
219
|
+
cognite_neat-0.92.1.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
|
|
220
|
+
cognite_neat-0.92.1.dist-info/METADATA,sha256=YHC8hU6I2-w_BCUtS64Nz2X41ypo6W6dzei3NIhzOUw,9431
|
|
221
|
+
cognite_neat-0.92.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
222
|
+
cognite_neat-0.92.1.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
|
|
223
|
+
cognite_neat-0.92.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|