cognite-neat 0.85.7__py3-none-any.whl → 0.85.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +116 -24
- cognite/neat/graph/extractors/_classic_cdf/_events.py +56 -26
- cognite/neat/graph/extractors/_classic_cdf/_files.py +73 -29
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +20 -11
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +35 -20
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +60 -22
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +78 -30
- cognite/neat/rules/importers/_inference2rules.py +89 -23
- cognite/neat/rules/models/data_types.py +1 -1
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/METADATA +1 -1
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/RECORD +15 -15
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/LICENSE +0 -0
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/WHEEL +0 -0
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/entry_points.txt +0 -0
cognite/neat/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.85.
|
|
1
|
+
__version__ = "0.85.9"
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
1
3
|
from collections.abc import Callable, Iterable
|
|
2
4
|
from datetime import datetime, timezone
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
from typing import cast
|
|
5
7
|
|
|
6
8
|
from cognite.client import CogniteClient
|
|
7
|
-
from cognite.client.data_classes import Asset, AssetList
|
|
9
|
+
from cognite.client.data_classes import Asset, AssetFilter, AssetList
|
|
8
10
|
from rdflib import RDF, Literal, Namespace
|
|
9
11
|
|
|
10
12
|
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
@@ -21,17 +23,32 @@ class AssetsExtractor(BaseExtractor):
|
|
|
21
23
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
22
24
|
to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
|
|
23
25
|
If None or if the function returns None, the asset will be set to the default type "Asset".
|
|
26
|
+
total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
|
|
27
|
+
is installed. Defaults to None.
|
|
28
|
+
limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
|
|
29
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
30
|
+
limit the extraction to 1000 assets to test the setup.
|
|
31
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
32
|
+
a JSON string.
|
|
24
33
|
"""
|
|
25
34
|
|
|
35
|
+
_SPACE_PATTERN = re.compile(r"\s+")
|
|
36
|
+
|
|
26
37
|
def __init__(
|
|
27
38
|
self,
|
|
28
39
|
assets: Iterable[Asset],
|
|
29
40
|
namespace: Namespace | None = None,
|
|
30
41
|
to_type: Callable[[Asset], str | None] | None = None,
|
|
42
|
+
total: int | None = None,
|
|
43
|
+
limit: int | None = None,
|
|
44
|
+
unpack_metadata: bool = True,
|
|
31
45
|
):
|
|
32
46
|
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
33
47
|
self.assets = assets
|
|
34
48
|
self.to_type = to_type
|
|
49
|
+
self.total = total
|
|
50
|
+
self.limit = min(limit, total) if limit and total else limit
|
|
51
|
+
self.unpack_metadata = unpack_metadata
|
|
35
52
|
|
|
36
53
|
@classmethod
|
|
37
54
|
def from_dataset(
|
|
@@ -40,8 +57,22 @@ class AssetsExtractor(BaseExtractor):
|
|
|
40
57
|
data_set_external_id: str,
|
|
41
58
|
namespace: Namespace | None = None,
|
|
42
59
|
to_type: Callable[[Asset], str | None] | None = None,
|
|
60
|
+
limit: int | None = None,
|
|
61
|
+
unpack_metadata: bool = True,
|
|
43
62
|
):
|
|
44
|
-
|
|
63
|
+
total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
|
|
64
|
+
|
|
65
|
+
return cls(
|
|
66
|
+
cast(
|
|
67
|
+
Iterable[Asset],
|
|
68
|
+
client.assets(data_set_external_ids=data_set_external_id),
|
|
69
|
+
),
|
|
70
|
+
namespace,
|
|
71
|
+
to_type,
|
|
72
|
+
total,
|
|
73
|
+
limit,
|
|
74
|
+
unpack_metadata=unpack_metadata,
|
|
75
|
+
)
|
|
45
76
|
|
|
46
77
|
@classmethod
|
|
47
78
|
def from_hierarchy(
|
|
@@ -50,57 +81,99 @@ class AssetsExtractor(BaseExtractor):
|
|
|
50
81
|
root_asset_external_id: str,
|
|
51
82
|
namespace: Namespace | None = None,
|
|
52
83
|
to_type: Callable[[Asset], str | None] | None = None,
|
|
84
|
+
limit: int | None = None,
|
|
85
|
+
unpack_metadata: bool = True,
|
|
53
86
|
):
|
|
87
|
+
total = client.assets.aggregate_count(
|
|
88
|
+
filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
89
|
+
)
|
|
90
|
+
|
|
54
91
|
return cls(
|
|
55
|
-
cast(
|
|
92
|
+
cast(
|
|
93
|
+
Iterable[Asset],
|
|
94
|
+
client.assets(asset_subtree_external_ids=root_asset_external_id),
|
|
95
|
+
),
|
|
96
|
+
namespace,
|
|
97
|
+
to_type,
|
|
98
|
+
total,
|
|
99
|
+
limit,
|
|
100
|
+
unpack_metadata=unpack_metadata,
|
|
56
101
|
)
|
|
57
102
|
|
|
58
103
|
@classmethod
|
|
59
104
|
def from_file(
|
|
60
|
-
cls,
|
|
105
|
+
cls,
|
|
106
|
+
file_path: str,
|
|
107
|
+
namespace: Namespace | None = None,
|
|
108
|
+
to_type: Callable[[Asset], str] | None = None,
|
|
109
|
+
limit: int | None = None,
|
|
110
|
+
unpack_metadata: bool = True,
|
|
61
111
|
):
|
|
62
|
-
return cls(
|
|
112
|
+
return cls(
|
|
113
|
+
AssetList.load(Path(file_path).read_text()),
|
|
114
|
+
namespace,
|
|
115
|
+
to_type,
|
|
116
|
+
limit,
|
|
117
|
+
unpack_metadata=unpack_metadata,
|
|
118
|
+
)
|
|
63
119
|
|
|
64
120
|
def extract(self) -> Iterable[Triple]:
|
|
65
121
|
"""Extracts an asset with the given asset_id."""
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
122
|
+
if self.total:
|
|
123
|
+
try:
|
|
124
|
+
from rich.progress import track
|
|
125
|
+
except ModuleNotFoundError:
|
|
126
|
+
to_iterate = self.assets
|
|
127
|
+
else:
|
|
128
|
+
to_iterate = track(
|
|
129
|
+
self.assets,
|
|
130
|
+
total=self.limit or self.total,
|
|
131
|
+
description="Extracting Assets",
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
to_iterate = self.assets
|
|
135
|
+
for no, asset in enumerate(to_iterate):
|
|
136
|
+
yield from self._asset2triples(asset)
|
|
137
|
+
if self.limit and no >= self.limit:
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
def _asset2triples(self, asset: Asset) -> list[Triple]:
|
|
70
141
|
"""Converts an asset to triples."""
|
|
71
|
-
id_ = namespace[f"Asset_{asset.id}"]
|
|
142
|
+
id_ = self.namespace[f"Asset_{asset.id}"]
|
|
72
143
|
|
|
73
144
|
# Set rdf type
|
|
74
145
|
type_ = "Asset"
|
|
75
146
|
if self.to_type:
|
|
76
147
|
type_ = self.to_type(asset) or type_
|
|
77
|
-
|
|
148
|
+
type_ = self._SPACE_PATTERN.sub("_", type_)
|
|
149
|
+
|
|
150
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
78
151
|
|
|
79
152
|
# Create attributes
|
|
80
153
|
if asset.name:
|
|
81
|
-
triples.append((id_, namespace.name, Literal(asset.name)))
|
|
154
|
+
triples.append((id_, self.namespace.name, Literal(asset.name)))
|
|
82
155
|
|
|
83
156
|
if asset.description:
|
|
84
|
-
triples.append((id_, namespace.description, Literal(asset.description)))
|
|
157
|
+
triples.append((id_, self.namespace.description, Literal(asset.description)))
|
|
85
158
|
|
|
86
159
|
if asset.external_id:
|
|
87
|
-
triples.append((id_, namespace.external_id, Literal(asset.external_id)))
|
|
160
|
+
triples.append((id_, self.namespace.external_id, Literal(asset.external_id)))
|
|
88
161
|
|
|
89
162
|
if asset.source:
|
|
90
|
-
triples.append((id_, namespace.source, Literal(asset.source)))
|
|
163
|
+
triples.append((id_, self.namespace.source, Literal(asset.source)))
|
|
91
164
|
|
|
92
165
|
# properties ref creation and update
|
|
93
166
|
triples.append(
|
|
94
167
|
(
|
|
95
168
|
id_,
|
|
96
|
-
namespace.created_time,
|
|
169
|
+
self.namespace.created_time,
|
|
97
170
|
Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
|
|
98
171
|
)
|
|
99
172
|
)
|
|
100
173
|
triples.append(
|
|
101
174
|
(
|
|
102
175
|
id_,
|
|
103
|
-
namespace.last_updated_time,
|
|
176
|
+
self.namespace.last_updated_time,
|
|
104
177
|
Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
|
|
105
178
|
)
|
|
106
179
|
)
|
|
@@ -110,22 +183,41 @@ class AssetsExtractor(BaseExtractor):
|
|
|
110
183
|
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
111
184
|
# since labels do not have internal ids, we use the external_id as the id
|
|
112
185
|
triples.append(
|
|
113
|
-
(
|
|
186
|
+
(
|
|
187
|
+
id_,
|
|
188
|
+
self.namespace.label,
|
|
189
|
+
self.namespace[f"Label_{create_sha256_hash(label.dump()['externalId'])}"],
|
|
190
|
+
)
|
|
114
191
|
)
|
|
115
192
|
|
|
116
193
|
if asset.metadata:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
194
|
+
if self.unpack_metadata:
|
|
195
|
+
for key, value in asset.metadata.items():
|
|
196
|
+
if value:
|
|
197
|
+
triples.append(
|
|
198
|
+
(
|
|
199
|
+
id_,
|
|
200
|
+
self.namespace[key],
|
|
201
|
+
Literal(string_to_ideal_type(value)),
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
triples.append((id_, self.namespace.metadata, Literal(json.dumps(asset.metadata))))
|
|
120
206
|
|
|
121
207
|
# Create connections:
|
|
122
208
|
if asset.parent_id:
|
|
123
|
-
triples.append((id_, namespace.parent, namespace[f"Asset_{asset.parent_id}"]))
|
|
209
|
+
triples.append((id_, self.namespace.parent, self.namespace[f"Asset_{asset.parent_id}"]))
|
|
124
210
|
|
|
125
211
|
if asset.root_id:
|
|
126
|
-
triples.append((id_, namespace.root, namespace[f"Asset_{asset.root_id}"]))
|
|
212
|
+
triples.append((id_, self.namespace.root, self.namespace[f"Asset_{asset.root_id}"]))
|
|
127
213
|
|
|
128
214
|
if asset.data_set_id:
|
|
129
|
-
triples.append(
|
|
215
|
+
triples.append(
|
|
216
|
+
(
|
|
217
|
+
id_,
|
|
218
|
+
self.namespace.dataset,
|
|
219
|
+
self.namespace[f"Dataset_{asset.data_set_id}"],
|
|
220
|
+
)
|
|
221
|
+
)
|
|
130
222
|
|
|
131
223
|
return triples
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
4
|
from pathlib import Path
|
|
@@ -20,15 +21,19 @@ class EventsExtractor(BaseExtractor):
|
|
|
20
21
|
Args:
|
|
21
22
|
events (Iterable[Event]): An iterable of events.
|
|
22
23
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
24
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
25
|
+
a JSON string.
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
28
|
def __init__(
|
|
26
29
|
self,
|
|
27
30
|
events: Iterable[Event],
|
|
28
31
|
namespace: Namespace | None = None,
|
|
32
|
+
unpack_metadata: bool = True,
|
|
29
33
|
):
|
|
30
34
|
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
31
35
|
self.events = events
|
|
36
|
+
self.unpack_metadata = unpack_metadata
|
|
32
37
|
|
|
33
38
|
@classmethod
|
|
34
39
|
def from_dataset(
|
|
@@ -36,61 +41,80 @@ class EventsExtractor(BaseExtractor):
|
|
|
36
41
|
client: CogniteClient,
|
|
37
42
|
data_set_external_id: str,
|
|
38
43
|
namespace: Namespace | None = None,
|
|
44
|
+
unpack_metadata: bool = True,
|
|
39
45
|
):
|
|
40
|
-
return cls(
|
|
46
|
+
return cls(
|
|
47
|
+
cast(
|
|
48
|
+
Iterable[Event],
|
|
49
|
+
client.events(data_set_external_ids=data_set_external_id),
|
|
50
|
+
),
|
|
51
|
+
namespace,
|
|
52
|
+
unpack_metadata,
|
|
53
|
+
)
|
|
41
54
|
|
|
42
55
|
@classmethod
|
|
43
|
-
def from_file(
|
|
44
|
-
|
|
56
|
+
def from_file(
|
|
57
|
+
cls,
|
|
58
|
+
file_path: str,
|
|
59
|
+
namespace: Namespace | None = None,
|
|
60
|
+
unpack_metadata: bool = True,
|
|
61
|
+
):
|
|
62
|
+
return cls(EventList.load(Path(file_path).read_text()), namespace, unpack_metadata)
|
|
45
63
|
|
|
46
64
|
def extract(self) -> Iterable[Triple]:
|
|
47
65
|
"""Extract events as triples."""
|
|
48
66
|
for event in self.events:
|
|
49
|
-
yield from self._event2triples(event
|
|
67
|
+
yield from self._event2triples(event)
|
|
50
68
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
id_ = namespace[f"Event_{event.id}"]
|
|
69
|
+
def _event2triples(self, event: Event) -> list[Triple]:
|
|
70
|
+
id_ = self.namespace[f"Event_{event.id}"]
|
|
54
71
|
|
|
55
72
|
# Set rdf type
|
|
56
|
-
triples: list[Triple] = [(id_, RDF.type, namespace.Event)]
|
|
73
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace.Event)]
|
|
57
74
|
|
|
58
75
|
# Create attributes
|
|
59
76
|
|
|
60
77
|
if event.external_id:
|
|
61
|
-
triples.append((id_, namespace.external_id, Literal(event.external_id)))
|
|
78
|
+
triples.append((id_, self.namespace.external_id, Literal(event.external_id)))
|
|
62
79
|
|
|
63
80
|
if event.source:
|
|
64
|
-
triples.append((id_, namespace.type, Literal(event.source)))
|
|
81
|
+
triples.append((id_, self.namespace.type, Literal(event.source)))
|
|
65
82
|
|
|
66
83
|
if event.type:
|
|
67
|
-
triples.append((id_, namespace.type, Literal(event.type)))
|
|
84
|
+
triples.append((id_, self.namespace.type, Literal(event.type)))
|
|
68
85
|
|
|
69
86
|
if event.subtype:
|
|
70
|
-
triples.append((id_, namespace.subtype, Literal(event.subtype)))
|
|
87
|
+
triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
|
|
71
88
|
|
|
72
89
|
if event.metadata:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
90
|
+
if self.unpack_metadata:
|
|
91
|
+
for key, value in event.metadata.items():
|
|
92
|
+
if value:
|
|
93
|
+
type_aware_value = string_to_ideal_type(value)
|
|
94
|
+
try:
|
|
95
|
+
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
96
|
+
except ValidationError:
|
|
97
|
+
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
98
|
+
else:
|
|
99
|
+
triples.append((id_, self.namespace.metadata, Literal(json.dumps(event.metadata))))
|
|
80
100
|
|
|
81
101
|
if event.description:
|
|
82
|
-
triples.append((id_, namespace.description, Literal(event.description)))
|
|
102
|
+
triples.append((id_, self.namespace.description, Literal(event.description)))
|
|
83
103
|
|
|
84
104
|
if event.created_time:
|
|
85
105
|
triples.append(
|
|
86
|
-
(
|
|
106
|
+
(
|
|
107
|
+
id_,
|
|
108
|
+
self.namespace.created_time,
|
|
109
|
+
Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)),
|
|
110
|
+
)
|
|
87
111
|
)
|
|
88
112
|
|
|
89
113
|
if event.last_updated_time:
|
|
90
114
|
triples.append(
|
|
91
115
|
(
|
|
92
116
|
id_,
|
|
93
|
-
namespace.last_updated_time,
|
|
117
|
+
self.namespace.last_updated_time,
|
|
94
118
|
Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
|
|
95
119
|
)
|
|
96
120
|
)
|
|
@@ -99,7 +123,7 @@ class EventsExtractor(BaseExtractor):
|
|
|
99
123
|
triples.append(
|
|
100
124
|
(
|
|
101
125
|
id_,
|
|
102
|
-
namespace.start_time,
|
|
126
|
+
self.namespace.start_time,
|
|
103
127
|
Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
|
|
104
128
|
)
|
|
105
129
|
)
|
|
@@ -108,16 +132,22 @@ class EventsExtractor(BaseExtractor):
|
|
|
108
132
|
triples.append(
|
|
109
133
|
(
|
|
110
134
|
id_,
|
|
111
|
-
namespace.end_time,
|
|
135
|
+
self.namespace.end_time,
|
|
112
136
|
Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
|
|
113
137
|
)
|
|
114
138
|
)
|
|
115
139
|
|
|
116
140
|
if event.data_set_id:
|
|
117
|
-
triples.append(
|
|
141
|
+
triples.append(
|
|
142
|
+
(
|
|
143
|
+
id_,
|
|
144
|
+
self.namespace.data_set_id,
|
|
145
|
+
self.namespace[f"Dataset_{event.data_set_id}"],
|
|
146
|
+
)
|
|
147
|
+
)
|
|
118
148
|
|
|
119
149
|
if event.asset_ids:
|
|
120
150
|
for asset_id in event.asset_ids:
|
|
121
|
-
triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
|
|
151
|
+
triples.append((id_, self.namespace.asset, self.namespace[f"Asset_{asset_id}"]))
|
|
122
152
|
|
|
123
153
|
return triples
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
4
|
from pathlib import Path
|
|
@@ -21,15 +22,19 @@ class FilesExtractor(BaseExtractor):
|
|
|
21
22
|
Args:
|
|
22
23
|
files_metadata (Iterable[FileMetadata]): An iterable of files metadata.
|
|
23
24
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
25
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
26
|
+
a JSON string.
|
|
24
27
|
"""
|
|
25
28
|
|
|
26
29
|
def __init__(
|
|
27
30
|
self,
|
|
28
31
|
files_metadata: Iterable[FileMetadata],
|
|
29
32
|
namespace: Namespace | None = None,
|
|
33
|
+
unpack_metadata: bool = True,
|
|
30
34
|
):
|
|
31
35
|
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
32
36
|
self.files_metadata = files_metadata
|
|
37
|
+
self.unpack_metadata = unpack_metadata
|
|
33
38
|
|
|
34
39
|
@classmethod
|
|
35
40
|
def from_dataset(
|
|
@@ -37,56 +42,75 @@ class FilesExtractor(BaseExtractor):
|
|
|
37
42
|
client: CogniteClient,
|
|
38
43
|
data_set_external_id: str,
|
|
39
44
|
namespace: Namespace | None = None,
|
|
45
|
+
unpack_metadata: bool = True,
|
|
40
46
|
):
|
|
41
|
-
return cls(
|
|
47
|
+
return cls(
|
|
48
|
+
cast(
|
|
49
|
+
Iterable[FileMetadata],
|
|
50
|
+
client.files(data_set_external_ids=data_set_external_id),
|
|
51
|
+
),
|
|
52
|
+
namespace,
|
|
53
|
+
unpack_metadata,
|
|
54
|
+
)
|
|
42
55
|
|
|
43
56
|
@classmethod
|
|
44
|
-
def from_file(
|
|
45
|
-
|
|
57
|
+
def from_file(
|
|
58
|
+
cls,
|
|
59
|
+
file_path: str,
|
|
60
|
+
namespace: Namespace | None = None,
|
|
61
|
+
unpack_metadata: bool = True,
|
|
62
|
+
):
|
|
63
|
+
return cls(
|
|
64
|
+
FileMetadataList.load(Path(file_path).read_text()),
|
|
65
|
+
namespace,
|
|
66
|
+
unpack_metadata,
|
|
67
|
+
)
|
|
46
68
|
|
|
47
69
|
def extract(self) -> Iterable[Triple]:
|
|
48
70
|
"""Extract files metadata as triples."""
|
|
49
71
|
for event in self.files_metadata:
|
|
50
|
-
yield from self._file2triples(event
|
|
72
|
+
yield from self._file2triples(event)
|
|
51
73
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
id_ = namespace[f"File_{file.id}"]
|
|
74
|
+
def _file2triples(self, file: FileMetadata) -> list[Triple]:
|
|
75
|
+
id_ = self.namespace[f"File_{file.id}"]
|
|
55
76
|
|
|
56
77
|
# Set rdf type
|
|
57
|
-
triples: list[Triple] = [(id_, RDF.type, namespace.File)]
|
|
78
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace.File)]
|
|
58
79
|
|
|
59
80
|
# Create attributes
|
|
60
81
|
|
|
61
82
|
if file.external_id:
|
|
62
|
-
triples.append((id_, namespace.external_id, Literal(file.external_id)))
|
|
83
|
+
triples.append((id_, self.namespace.external_id, Literal(file.external_id)))
|
|
63
84
|
|
|
64
85
|
if file.source:
|
|
65
|
-
triples.append((id_, namespace.type, Literal(file.source)))
|
|
86
|
+
triples.append((id_, self.namespace.type, Literal(file.source)))
|
|
66
87
|
|
|
67
88
|
if file.mime_type:
|
|
68
|
-
triples.append((id_, namespace.mime_type, Literal(file.mime_type)))
|
|
89
|
+
triples.append((id_, self.namespace.mime_type, Literal(file.mime_type)))
|
|
69
90
|
|
|
70
91
|
if file.uploaded:
|
|
71
|
-
triples.append((id_, namespace.uploaded, Literal(file.uploaded)))
|
|
92
|
+
triples.append((id_, self.namespace.uploaded, Literal(file.uploaded)))
|
|
72
93
|
|
|
73
94
|
if file.source:
|
|
74
|
-
triples.append((id_, namespace.source, Literal(file.source)))
|
|
95
|
+
triples.append((id_, self.namespace.source, Literal(file.source)))
|
|
75
96
|
|
|
76
97
|
if file.metadata:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
98
|
+
if self.unpack_metadata:
|
|
99
|
+
for key, value in file.metadata.items():
|
|
100
|
+
if value:
|
|
101
|
+
type_aware_value = string_to_ideal_type(value)
|
|
102
|
+
try:
|
|
103
|
+
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
104
|
+
except ValidationError:
|
|
105
|
+
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
106
|
+
else:
|
|
107
|
+
triples.append((id_, self.namespace.metadata, Literal(json.dumps(file.metadata))))
|
|
84
108
|
|
|
85
109
|
if file.source_created_time:
|
|
86
110
|
triples.append(
|
|
87
111
|
(
|
|
88
112
|
id_,
|
|
89
|
-
namespace.source_created_time,
|
|
113
|
+
self.namespace.source_created_time,
|
|
90
114
|
Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
|
|
91
115
|
)
|
|
92
116
|
)
|
|
@@ -94,25 +118,33 @@ class FilesExtractor(BaseExtractor):
|
|
|
94
118
|
triples.append(
|
|
95
119
|
(
|
|
96
120
|
id_,
|
|
97
|
-
namespace.source_created_time,
|
|
121
|
+
self.namespace.source_created_time,
|
|
98
122
|
Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
|
|
99
123
|
)
|
|
100
124
|
)
|
|
101
125
|
if file.uploaded_time:
|
|
102
126
|
triples.append(
|
|
103
|
-
(
|
|
127
|
+
(
|
|
128
|
+
id_,
|
|
129
|
+
self.namespace.uploaded_time,
|
|
130
|
+
Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)),
|
|
131
|
+
)
|
|
104
132
|
)
|
|
105
133
|
|
|
106
134
|
if file.created_time:
|
|
107
135
|
triples.append(
|
|
108
|
-
(
|
|
136
|
+
(
|
|
137
|
+
id_,
|
|
138
|
+
self.namespace.created_time,
|
|
139
|
+
Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)),
|
|
140
|
+
)
|
|
109
141
|
)
|
|
110
142
|
|
|
111
143
|
if file.last_updated_time:
|
|
112
144
|
triples.append(
|
|
113
145
|
(
|
|
114
146
|
id_,
|
|
115
|
-
namespace.last_updated_time,
|
|
147
|
+
self.namespace.last_updated_time,
|
|
116
148
|
Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
|
|
117
149
|
)
|
|
118
150
|
)
|
|
@@ -121,17 +153,29 @@ class FilesExtractor(BaseExtractor):
|
|
|
121
153
|
for label in file.labels:
|
|
122
154
|
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
123
155
|
# since labels do not have internal ids, we use the external_id as the id
|
|
124
|
-
triples.append(
|
|
156
|
+
triples.append(
|
|
157
|
+
(
|
|
158
|
+
id_,
|
|
159
|
+
self.namespace.label,
|
|
160
|
+
self.namespace[f"Label_{quote(label.dump()['externalId'])}"],
|
|
161
|
+
)
|
|
162
|
+
)
|
|
125
163
|
|
|
126
164
|
if file.security_categories:
|
|
127
165
|
for category in file.security_categories:
|
|
128
|
-
triples.append((id_, namespace.security_categories, Literal(category)))
|
|
166
|
+
triples.append((id_, self.namespace.security_categories, Literal(category)))
|
|
129
167
|
|
|
130
168
|
if file.data_set_id:
|
|
131
|
-
triples.append(
|
|
169
|
+
triples.append(
|
|
170
|
+
(
|
|
171
|
+
id_,
|
|
172
|
+
self.namespace.data_set_id,
|
|
173
|
+
self.namespace[f"Dataset_{file.data_set_id}"],
|
|
174
|
+
)
|
|
175
|
+
)
|
|
132
176
|
|
|
133
177
|
if file.asset_ids:
|
|
134
178
|
for asset_id in file.asset_ids:
|
|
135
|
-
triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
|
|
179
|
+
triples.append((id_, self.namespace.asset, self.namespace[f"Asset_{asset_id}"]))
|
|
136
180
|
|
|
137
181
|
return triples
|
|
@@ -37,7 +37,11 @@ class LabelsExtractor(BaseExtractor):
|
|
|
37
37
|
namespace: Namespace | None = None,
|
|
38
38
|
):
|
|
39
39
|
return cls(
|
|
40
|
-
cast(
|
|
40
|
+
cast(
|
|
41
|
+
Iterable[LabelDefinition],
|
|
42
|
+
client.labels(data_set_external_ids=data_set_external_id),
|
|
43
|
+
),
|
|
44
|
+
namespace,
|
|
41
45
|
)
|
|
42
46
|
|
|
43
47
|
@classmethod
|
|
@@ -47,36 +51,41 @@ class LabelsExtractor(BaseExtractor):
|
|
|
47
51
|
def extract(self) -> Iterable[Triple]:
|
|
48
52
|
"""Extract labels as triples."""
|
|
49
53
|
for label in self.labels:
|
|
50
|
-
yield from self._labels2triples(label
|
|
54
|
+
yield from self._labels2triples(label)
|
|
51
55
|
|
|
52
|
-
|
|
53
|
-
def _labels2triples(cls, label: LabelDefinition, namespace: Namespace) -> list[Triple]:
|
|
56
|
+
def _labels2triples(self, label: LabelDefinition) -> list[Triple]:
|
|
54
57
|
if label.external_id:
|
|
55
|
-
id_ = namespace[f"Label_{create_sha256_hash(label.external_id)}"]
|
|
58
|
+
id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
|
|
56
59
|
|
|
57
60
|
# Set rdf type
|
|
58
|
-
triples: list[Triple] = [(id_, RDF.type, namespace.Label)]
|
|
61
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace.Label)]
|
|
59
62
|
|
|
60
63
|
# Create attributes
|
|
61
|
-
triples.append((id_, namespace.external_id, Literal(label.external_id)))
|
|
64
|
+
triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
|
|
62
65
|
|
|
63
66
|
if label.name:
|
|
64
|
-
triples.append((id_, namespace.name, Literal(label.name)))
|
|
67
|
+
triples.append((id_, self.namespace.name, Literal(label.name)))
|
|
65
68
|
|
|
66
69
|
if label.description:
|
|
67
|
-
triples.append((id_, namespace.description, Literal(label.description)))
|
|
70
|
+
triples.append((id_, self.namespace.description, Literal(label.description)))
|
|
68
71
|
|
|
69
72
|
if label.created_time:
|
|
70
73
|
triples.append(
|
|
71
74
|
(
|
|
72
75
|
id_,
|
|
73
|
-
namespace.created_time,
|
|
76
|
+
self.namespace.created_time,
|
|
74
77
|
Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
|
|
75
78
|
)
|
|
76
79
|
)
|
|
77
80
|
|
|
78
81
|
if label.data_set_id:
|
|
79
|
-
triples.append(
|
|
82
|
+
triples.append(
|
|
83
|
+
(
|
|
84
|
+
id_,
|
|
85
|
+
self.namespace.data_set_id,
|
|
86
|
+
self.namespace[f"Dataset_{label.data_set_id}"],
|
|
87
|
+
)
|
|
88
|
+
)
|
|
80
89
|
|
|
81
90
|
return triples
|
|
82
91
|
return []
|