cognite-neat 0.98.0__py3-none-any.whl → 0.99.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_client/__init__.py +4 -0
- cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
- cognite/neat/_client/_api/schema.py +50 -0
- cognite/neat/_client/_api_client.py +17 -0
- cognite/neat/_client/data_classes/__init__.py +0 -0
- cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
- cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +21 -281
- cognite/neat/_graph/_shared.py +14 -15
- cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
- cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
- cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
- cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
- cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
- cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
- cognite/neat/_graph/extractors/_rdf_file.py +6 -7
- cognite/neat/_graph/queries/_base.py +17 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
- cognite/neat/_graph/transformers/_prune_graph.py +1 -1
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +6 -0
- cognite/neat/_issues/warnings/_external.py +8 -0
- cognite/neat/_issues/warnings/_properties.py +16 -0
- cognite/neat/_rules/_constants.py +7 -6
- cognite/neat/_rules/analysis/_base.py +8 -4
- cognite/neat/_rules/exporters/_base.py +3 -4
- cognite/neat/_rules/exporters/_rules2dms.py +29 -40
- cognite/neat/_rules/importers/_dms2rules.py +4 -5
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +25 -33
- cognite/neat/_rules/models/__init__.py +1 -1
- cognite/neat/_rules/models/_base_rules.py +22 -12
- cognite/neat/_rules/models/dms/__init__.py +2 -2
- cognite/neat/_rules/models/dms/_exporter.py +15 -20
- cognite/neat/_rules/models/dms/_rules.py +48 -3
- cognite/neat/_rules/models/dms/_rules_input.py +52 -8
- cognite/neat/_rules/models/dms/_validation.py +10 -5
- cognite/neat/_rules/models/entities/_single_value.py +32 -4
- cognite/neat/_rules/models/information/_rules.py +0 -8
- cognite/neat/_rules/models/mapping/__init__.py +2 -3
- cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
- cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
- cognite/neat/_rules/transformers/__init__.py +2 -2
- cognite/neat/_rules/transformers/_converters.py +110 -11
- cognite/neat/_rules/transformers/_mapping.py +105 -30
- cognite/neat/_rules/transformers/_verification.py +5 -2
- cognite/neat/_session/_base.py +49 -8
- cognite/neat/_session/_drop.py +35 -0
- cognite/neat/_session/_inspect.py +17 -5
- cognite/neat/_session/_mapping.py +39 -0
- cognite/neat/_session/_prepare.py +218 -23
- cognite/neat/_session/_read.py +49 -12
- cognite/neat/_session/_to.py +3 -3
- cognite/neat/_store/_base.py +27 -24
- cognite/neat/_utils/rdf_.py +28 -1
- cognite/neat/_version.py +1 -1
- cognite/neat/_workflows/steps/lib/current/rules_exporter.py +8 -3
- cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
- cognite/neat/_workflows/steps/lib/current/rules_validator.py +3 -2
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +3 -3
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +67 -64
- cognite/neat/_rules/models/mapping/_base.py +0 -131
- cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
- cognite/neat/_utils/cdf/loaders/_base.py +0 -54
- cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
- cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
- /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,182 +1,33 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
from cognite.client import CogniteClient
|
|
6
5
|
from cognite.client.data_classes import Event, EventFilter, EventList
|
|
7
|
-
from rdflib import RDF, Literal, Namespace
|
|
8
6
|
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
7
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
12
8
|
|
|
13
9
|
|
|
14
10
|
class EventsExtractor(ClassicCDFBaseExtractor[Event]):
|
|
15
|
-
"""Extract data from Cognite Data Fusions Events into Neat.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
items (Iterable[Event]): An iterable of items.
|
|
19
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
20
|
-
to_type (Callable[[Event], str | None], optional): A function to convert an item to a type.
|
|
21
|
-
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
22
|
-
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
23
|
-
is installed. Defaults to None.
|
|
24
|
-
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
25
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
26
|
-
limit the extraction to 1000 assets to test the setup.
|
|
27
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
28
|
-
a JSON string.
|
|
29
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
30
|
-
values in this set will be skipped.
|
|
31
|
-
"""
|
|
11
|
+
"""Extract data from Cognite Data Fusions Events into Neat."""
|
|
32
12
|
|
|
33
13
|
_default_rdf_type = "Event"
|
|
14
|
+
_instance_id_prefix = InstanceIdPrefix.event
|
|
34
15
|
|
|
35
16
|
@classmethod
|
|
36
|
-
def
|
|
37
|
-
cls,
|
|
38
|
-
client: CogniteClient,
|
|
39
|
-
data_set_external_id: str,
|
|
40
|
-
namespace: Namespace | None = None,
|
|
41
|
-
to_type: Callable[[Event], str | None] | None = None,
|
|
42
|
-
limit: int | None = None,
|
|
43
|
-
unpack_metadata: bool = True,
|
|
44
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
45
|
-
):
|
|
17
|
+
def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Event]]:
|
|
46
18
|
total = client.events.aggregate_count(filter=EventFilter(data_set_ids=[{"externalId": data_set_external_id}]))
|
|
47
|
-
|
|
48
|
-
return
|
|
49
|
-
client.events(data_set_external_ids=data_set_external_id),
|
|
50
|
-
namespace,
|
|
51
|
-
to_type,
|
|
52
|
-
total=total,
|
|
53
|
-
limit=limit,
|
|
54
|
-
unpack_metadata=unpack_metadata,
|
|
55
|
-
skip_metadata_values=skip_metadata_values,
|
|
56
|
-
)
|
|
19
|
+
items = client.events(data_set_external_ids=data_set_external_id)
|
|
20
|
+
return total, items
|
|
57
21
|
|
|
58
22
|
@classmethod
|
|
59
|
-
def
|
|
60
|
-
cls,
|
|
61
|
-
client: CogniteClient,
|
|
62
|
-
root_asset_external_id: str,
|
|
63
|
-
namespace: Namespace | None = None,
|
|
64
|
-
to_type: Callable[[Event], str | None] | None = None,
|
|
65
|
-
limit: int | None = None,
|
|
66
|
-
unpack_metadata: bool = True,
|
|
67
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
68
|
-
):
|
|
23
|
+
def _from_hierarchy(cls, client: CogniteClient, root_asset_external_id: str) -> tuple[int | None, Iterable[Event]]:
|
|
69
24
|
total = client.events.aggregate_count(
|
|
70
25
|
filter=EventFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
71
26
|
)
|
|
72
|
-
|
|
73
|
-
return
|
|
74
|
-
client.events(asset_subtree_external_ids=[root_asset_external_id]),
|
|
75
|
-
namespace,
|
|
76
|
-
to_type,
|
|
77
|
-
total,
|
|
78
|
-
limit,
|
|
79
|
-
unpack_metadata=unpack_metadata,
|
|
80
|
-
skip_metadata_values=skip_metadata_values,
|
|
81
|
-
)
|
|
27
|
+
items = client.events(asset_subtree_external_ids=[root_asset_external_id])
|
|
28
|
+
return total, items
|
|
82
29
|
|
|
83
30
|
@classmethod
|
|
84
|
-
def
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
namespace: Namespace | None = None,
|
|
88
|
-
to_type: Callable[[Event], str | None] | None = None,
|
|
89
|
-
limit: int | None = None,
|
|
90
|
-
unpack_metadata: bool = True,
|
|
91
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
92
|
-
):
|
|
93
|
-
events = EventList.load(Path(file_path).read_text())
|
|
94
|
-
|
|
95
|
-
return cls(
|
|
96
|
-
events,
|
|
97
|
-
namespace,
|
|
98
|
-
to_type,
|
|
99
|
-
total=len(events),
|
|
100
|
-
limit=limit,
|
|
101
|
-
unpack_metadata=unpack_metadata,
|
|
102
|
-
skip_metadata_values=skip_metadata_values,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
def _item2triples(self, event: Event) -> list[Triple]:
|
|
106
|
-
id_ = self.namespace[f"{InstanceIdPrefix.event}{event.id}"]
|
|
107
|
-
|
|
108
|
-
type_ = self._get_rdf_type(event)
|
|
109
|
-
|
|
110
|
-
# Set rdf type
|
|
111
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
112
|
-
|
|
113
|
-
# Create attributes
|
|
114
|
-
|
|
115
|
-
if event.external_id:
|
|
116
|
-
triples.append((id_, self.namespace.external_id, Literal(event.external_id)))
|
|
117
|
-
|
|
118
|
-
if event.source:
|
|
119
|
-
triples.append((id_, self.namespace.type, Literal(event.source)))
|
|
120
|
-
|
|
121
|
-
if event.type:
|
|
122
|
-
triples.append((id_, self.namespace.type, Literal(event.type)))
|
|
123
|
-
|
|
124
|
-
if event.subtype:
|
|
125
|
-
triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
|
|
126
|
-
|
|
127
|
-
if event.metadata:
|
|
128
|
-
triples.extend(self._metadata_to_triples(id_, event.metadata))
|
|
129
|
-
|
|
130
|
-
if event.description:
|
|
131
|
-
triples.append((id_, self.namespace.description, Literal(event.description)))
|
|
132
|
-
|
|
133
|
-
if event.created_time:
|
|
134
|
-
triples.append(
|
|
135
|
-
(
|
|
136
|
-
id_,
|
|
137
|
-
self.namespace.created_time,
|
|
138
|
-
Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)),
|
|
139
|
-
)
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
if event.last_updated_time:
|
|
143
|
-
triples.append(
|
|
144
|
-
(
|
|
145
|
-
id_,
|
|
146
|
-
self.namespace.last_updated_time,
|
|
147
|
-
Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
|
|
148
|
-
)
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
if event.start_time:
|
|
152
|
-
triples.append(
|
|
153
|
-
(
|
|
154
|
-
id_,
|
|
155
|
-
self.namespace.start_time,
|
|
156
|
-
Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
|
|
157
|
-
)
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
if event.end_time:
|
|
161
|
-
triples.append(
|
|
162
|
-
(
|
|
163
|
-
id_,
|
|
164
|
-
self.namespace.end_time,
|
|
165
|
-
Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
|
|
166
|
-
)
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
if event.data_set_id:
|
|
170
|
-
triples.append(
|
|
171
|
-
(
|
|
172
|
-
id_,
|
|
173
|
-
self.namespace.data_set_id,
|
|
174
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{event.data_set_id}"],
|
|
175
|
-
)
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
if event.asset_ids:
|
|
179
|
-
for asset_id in event.asset_ids:
|
|
180
|
-
triples.append((id_, self.namespace.asset, self.namespace[f"{InstanceIdPrefix.asset}{asset_id}"]))
|
|
181
|
-
|
|
182
|
-
return triples
|
|
31
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Event]]:
|
|
32
|
+
assets = EventList.load(Path(file_path).read_text())
|
|
33
|
+
return len(assets), assets
|
|
@@ -1,202 +1,38 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
from cognite.client import CogniteClient
|
|
6
5
|
from cognite.client.data_classes import FileMetadata, FileMetadataFilter, FileMetadataList
|
|
7
|
-
from rdflib import RDF, Literal, Namespace
|
|
8
6
|
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
12
|
-
from ._labels import LabelsExtractor
|
|
7
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
class FilesExtractor(ClassicCDFBaseExtractor[FileMetadata]):
|
|
16
|
-
"""Extract data from Cognite Data Fusions files metadata into Neat.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
items (Iterable[FileMetadata]): An iterable of items.
|
|
20
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
-
to_type (Callable[[FileMetadata], str | None], optional): A function to convert an item to a type.
|
|
22
|
-
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
23
|
-
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
24
|
-
is installed. Defaults to None.
|
|
25
|
-
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
26
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
-
limit the extraction to 1000 assets to test the setup.
|
|
28
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
29
|
-
a JSON string.
|
|
30
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
31
|
-
values in this set will be skipped.
|
|
32
|
-
"""
|
|
11
|
+
"""Extract data from Cognite Data Fusions files metadata into Neat."""
|
|
33
12
|
|
|
34
13
|
_default_rdf_type = "File"
|
|
14
|
+
_instance_id_prefix = InstanceIdPrefix.file
|
|
35
15
|
|
|
36
16
|
@classmethod
|
|
37
|
-
def
|
|
17
|
+
def _from_dataset(
|
|
38
18
|
cls,
|
|
39
19
|
client: CogniteClient,
|
|
40
20
|
data_set_external_id: str,
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
unpack_metadata: bool = True,
|
|
45
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
46
|
-
):
|
|
47
|
-
return cls(
|
|
48
|
-
client.files(data_set_external_ids=data_set_external_id),
|
|
49
|
-
namespace=namespace,
|
|
50
|
-
to_type=to_type,
|
|
51
|
-
limit=limit,
|
|
52
|
-
unpack_metadata=unpack_metadata,
|
|
53
|
-
skip_metadata_values=skip_metadata_values,
|
|
54
|
-
)
|
|
21
|
+
) -> tuple[int | None, Iterable[FileMetadata]]:
|
|
22
|
+
items = client.files(data_set_external_ids=data_set_external_id)
|
|
23
|
+
return None, items
|
|
55
24
|
|
|
56
25
|
@classmethod
|
|
57
|
-
def
|
|
58
|
-
cls,
|
|
59
|
-
|
|
60
|
-
root_asset_external_id: str,
|
|
61
|
-
namespace: Namespace | None = None,
|
|
62
|
-
to_type: Callable[[FileMetadata], str | None] | None = None,
|
|
63
|
-
limit: int | None = None,
|
|
64
|
-
unpack_metadata: bool = True,
|
|
65
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
66
|
-
):
|
|
26
|
+
def _from_hierarchy(
|
|
27
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
28
|
+
) -> tuple[int | None, Iterable[FileMetadata]]:
|
|
67
29
|
total = client.files.aggregate(
|
|
68
30
|
filter=FileMetadataFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
69
31
|
)[0].count
|
|
70
|
-
|
|
71
|
-
return
|
|
72
|
-
client.files(asset_subtree_external_ids=[root_asset_external_id]),
|
|
73
|
-
namespace,
|
|
74
|
-
to_type,
|
|
75
|
-
total,
|
|
76
|
-
limit,
|
|
77
|
-
unpack_metadata=unpack_metadata,
|
|
78
|
-
skip_metadata_values=skip_metadata_values,
|
|
79
|
-
)
|
|
32
|
+
items = client.files(asset_subtree_external_ids=root_asset_external_id)
|
|
33
|
+
return total, items
|
|
80
34
|
|
|
81
35
|
@classmethod
|
|
82
|
-
def
|
|
83
|
-
cls,
|
|
84
|
-
file_path: str,
|
|
85
|
-
namespace: Namespace | None = None,
|
|
86
|
-
to_type: Callable[[FileMetadata], str | None] | None = None,
|
|
87
|
-
limit: int | None = None,
|
|
88
|
-
unpack_metadata: bool = True,
|
|
89
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
90
|
-
):
|
|
36
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[FileMetadata]]:
|
|
91
37
|
file_metadata = FileMetadataList.load(Path(file_path).read_text())
|
|
92
|
-
return
|
|
93
|
-
file_metadata,
|
|
94
|
-
namespace=namespace,
|
|
95
|
-
to_type=to_type,
|
|
96
|
-
limit=limit,
|
|
97
|
-
total=len(file_metadata),
|
|
98
|
-
unpack_metadata=unpack_metadata,
|
|
99
|
-
skip_metadata_values=skip_metadata_values,
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
def _item2triples(self, file: FileMetadata) -> list[Triple]:
|
|
103
|
-
id_ = self.namespace[f"{InstanceIdPrefix.file}{file.id}"]
|
|
104
|
-
|
|
105
|
-
type_ = self._get_rdf_type(file)
|
|
106
|
-
|
|
107
|
-
# Set rdf type
|
|
108
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
109
|
-
|
|
110
|
-
# Create attributes
|
|
111
|
-
|
|
112
|
-
if file.external_id:
|
|
113
|
-
triples.append((id_, self.namespace.external_id, Literal(file.external_id)))
|
|
114
|
-
|
|
115
|
-
if file.source:
|
|
116
|
-
triples.append((id_, self.namespace.type, Literal(file.source)))
|
|
117
|
-
|
|
118
|
-
if file.mime_type:
|
|
119
|
-
triples.append((id_, self.namespace.mime_type, Literal(file.mime_type)))
|
|
120
|
-
|
|
121
|
-
if file.uploaded:
|
|
122
|
-
triples.append((id_, self.namespace.uploaded, Literal(file.uploaded)))
|
|
123
|
-
|
|
124
|
-
if file.source:
|
|
125
|
-
triples.append((id_, self.namespace.source, Literal(file.source)))
|
|
126
|
-
|
|
127
|
-
if file.metadata:
|
|
128
|
-
triples.extend(self._metadata_to_triples(id_, file.metadata))
|
|
129
|
-
|
|
130
|
-
if file.source_created_time:
|
|
131
|
-
triples.append(
|
|
132
|
-
(
|
|
133
|
-
id_,
|
|
134
|
-
self.namespace.source_created_time,
|
|
135
|
-
Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
|
|
136
|
-
)
|
|
137
|
-
)
|
|
138
|
-
if file.source_modified_time:
|
|
139
|
-
triples.append(
|
|
140
|
-
(
|
|
141
|
-
id_,
|
|
142
|
-
self.namespace.source_created_time,
|
|
143
|
-
Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
|
|
144
|
-
)
|
|
145
|
-
)
|
|
146
|
-
if file.uploaded_time:
|
|
147
|
-
triples.append(
|
|
148
|
-
(
|
|
149
|
-
id_,
|
|
150
|
-
self.namespace.uploaded_time,
|
|
151
|
-
Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)),
|
|
152
|
-
)
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
if file.created_time:
|
|
156
|
-
triples.append(
|
|
157
|
-
(
|
|
158
|
-
id_,
|
|
159
|
-
self.namespace.created_time,
|
|
160
|
-
Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)),
|
|
161
|
-
)
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
if file.last_updated_time:
|
|
165
|
-
triples.append(
|
|
166
|
-
(
|
|
167
|
-
id_,
|
|
168
|
-
self.namespace.last_updated_time,
|
|
169
|
-
Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
|
|
170
|
-
)
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
if file.labels:
|
|
174
|
-
for label in file.labels:
|
|
175
|
-
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
176
|
-
# since labels do not have internal ids, we use the external_id as the id
|
|
177
|
-
triples.append(
|
|
178
|
-
(
|
|
179
|
-
id_,
|
|
180
|
-
self.namespace.label,
|
|
181
|
-
self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
|
|
182
|
-
)
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
if file.security_categories:
|
|
186
|
-
for category in file.security_categories:
|
|
187
|
-
triples.append((id_, self.namespace.security_categories, Literal(category)))
|
|
188
|
-
|
|
189
|
-
if file.data_set_id:
|
|
190
|
-
triples.append(
|
|
191
|
-
(
|
|
192
|
-
id_,
|
|
193
|
-
self.namespace.data_set_id,
|
|
194
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{file.data_set_id}"],
|
|
195
|
-
)
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
if file.asset_ids:
|
|
199
|
-
for asset_id in file.asset_ids:
|
|
200
|
-
triples.append((id_, self.namespace.asset, self.namespace[f"{InstanceIdPrefix.asset}{asset_id}"]))
|
|
201
|
-
|
|
202
|
-
return triples
|
|
38
|
+
return len(file_metadata), file_metadata
|
|
@@ -1,122 +1,54 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from urllib.parse import quote
|
|
5
4
|
|
|
6
5
|
from cognite.client import CogniteClient
|
|
7
6
|
from cognite.client.data_classes import Label, LabelDefinition, LabelDefinitionList
|
|
8
|
-
from rdflib import RDF, Literal, Namespace
|
|
9
7
|
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
8
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
|
|
13
9
|
|
|
14
10
|
|
|
15
11
|
class LabelsExtractor(ClassicCDFBaseExtractor[LabelDefinition]):
|
|
16
|
-
"""Extract data from Cognite Data Fusions Labels into Neat.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
items (Iterable[LabelDefinition]): An iterable of items.
|
|
20
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
-
to_type (Callable[[LabelDefinition], str | None], optional): A function to convert an item to a type.
|
|
22
|
-
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
23
|
-
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
24
|
-
is installed. Defaults to None.
|
|
25
|
-
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
26
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
-
limit the extraction to 1000 assets to test the setup.
|
|
28
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
29
|
-
a JSON string.
|
|
30
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
31
|
-
values in this set will be skipped.
|
|
32
|
-
"""
|
|
12
|
+
"""Extract data from Cognite Data Fusions Labels into Neat."""
|
|
33
13
|
|
|
34
14
|
_default_rdf_type = "Label"
|
|
15
|
+
_instance_id_prefix = InstanceIdPrefix.label
|
|
35
16
|
|
|
36
17
|
@classmethod
|
|
37
|
-
def
|
|
38
|
-
cls,
|
|
39
|
-
|
|
40
|
-
data_set_external_id
|
|
41
|
-
|
|
42
|
-
to_type: Callable[[LabelDefinition], str | None] | None = None,
|
|
43
|
-
limit: int | None = None,
|
|
44
|
-
unpack_metadata: bool = True,
|
|
45
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
46
|
-
):
|
|
47
|
-
return cls(
|
|
48
|
-
client.labels(data_set_external_ids=data_set_external_id),
|
|
49
|
-
namespace=namespace,
|
|
50
|
-
to_type=to_type,
|
|
51
|
-
limit=limit,
|
|
52
|
-
unpack_metadata=unpack_metadata,
|
|
53
|
-
skip_metadata_values=skip_metadata_values,
|
|
54
|
-
)
|
|
18
|
+
def _from_dataset(
|
|
19
|
+
cls, client: CogniteClient, data_set_external_id: str
|
|
20
|
+
) -> tuple[int | None, Iterable[LabelDefinition]]:
|
|
21
|
+
items = client.labels(data_set_external_ids=data_set_external_id)
|
|
22
|
+
return None, items
|
|
55
23
|
|
|
56
24
|
@classmethod
|
|
57
|
-
def
|
|
58
|
-
cls,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
to_type: Callable[[LabelDefinition], str | None] | None = None,
|
|
62
|
-
limit: int | None = None,
|
|
63
|
-
unpack_metadata: bool = True,
|
|
64
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
65
|
-
):
|
|
66
|
-
labels = LabelDefinitionList.load(Path(file_path).read_text())
|
|
67
|
-
return cls(
|
|
68
|
-
labels,
|
|
69
|
-
total=len(labels),
|
|
70
|
-
namespace=namespace,
|
|
71
|
-
to_type=to_type,
|
|
72
|
-
limit=limit,
|
|
73
|
-
unpack_metadata=unpack_metadata,
|
|
74
|
-
skip_metadata_values=skip_metadata_values,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
def _item2triples(self, label: LabelDefinition) -> list[Triple]:
|
|
78
|
-
if not label.external_id:
|
|
79
|
-
return []
|
|
80
|
-
|
|
81
|
-
id_ = self.namespace[f"{InstanceIdPrefix.label}{self._label_id(label)}"]
|
|
82
|
-
|
|
83
|
-
type_ = self._get_rdf_type(label)
|
|
84
|
-
# Set rdf type
|
|
85
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
25
|
+
def _from_hierarchy(
|
|
26
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
27
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
28
|
+
raise NotImplementedError("Hierarchy is not supported for labels")
|
|
86
29
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
triples.append((id_, self.namespace.name, Literal(label.name)))
|
|
92
|
-
|
|
93
|
-
if label.description:
|
|
94
|
-
triples.append((id_, self.namespace.description, Literal(label.description)))
|
|
95
|
-
|
|
96
|
-
if label.created_time:
|
|
97
|
-
triples.append(
|
|
98
|
-
(
|
|
99
|
-
id_,
|
|
100
|
-
self.namespace.created_time,
|
|
101
|
-
Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
|
|
102
|
-
)
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
if label.data_set_id:
|
|
106
|
-
triples.append(
|
|
107
|
-
(
|
|
108
|
-
id_,
|
|
109
|
-
self.namespace.data_set_id,
|
|
110
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{label.data_set_id}"],
|
|
111
|
-
)
|
|
112
|
-
)
|
|
30
|
+
@classmethod
|
|
31
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[LabelDefinition]]:
|
|
32
|
+
labels = LabelDefinitionList.load(Path(file_path).read_text())
|
|
33
|
+
return len(labels), labels
|
|
113
34
|
|
|
114
|
-
|
|
35
|
+
def _fallback_id(self, item: LabelDefinition) -> str | None:
|
|
36
|
+
if not item.external_id:
|
|
37
|
+
return None
|
|
38
|
+
return self._label_id(item)
|
|
115
39
|
|
|
116
40
|
@staticmethod
|
|
117
|
-
def _label_id(label: Label | LabelDefinition) -> str:
|
|
41
|
+
def _label_id(label: Label | LabelDefinition | dict) -> str:
|
|
118
42
|
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
119
43
|
# since labels do not have internal ids, we use the external_id as the id
|
|
120
|
-
|
|
44
|
+
external_id: str | None = None
|
|
45
|
+
if isinstance(label, dict):
|
|
46
|
+
if "externalId" in label:
|
|
47
|
+
external_id = label["externalId"]
|
|
48
|
+
elif "external_id" in label:
|
|
49
|
+
external_id = label["external_id"]
|
|
50
|
+
else:
|
|
51
|
+
external_id = label.external_id
|
|
52
|
+
if external_id is None:
|
|
121
53
|
raise ValueError("External id must be set of the label")
|
|
122
|
-
return quote(
|
|
54
|
+
return quote(external_id)
|