cognite-neat 0.85.6__py3-none-any.whl → 0.85.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (35) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/routers/data_exploration.py +2 -2
  3. cognite/neat/graph/extractors/_classic_cdf/_assets.py +136 -27
  4. cognite/neat/graph/extractors/_classic_cdf/_events.py +56 -26
  5. cognite/neat/graph/extractors/_classic_cdf/_files.py +73 -29
  6. cognite/neat/graph/extractors/_classic_cdf/_labels.py +20 -11
  7. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +35 -20
  8. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +60 -22
  9. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +78 -30
  10. cognite/neat/graph/extractors/_mock_graph_generator.py +6 -2
  11. cognite/neat/graph/loaders/_base.py +3 -3
  12. cognite/neat/graph/loaders/_rdf2dms.py +13 -11
  13. cognite/neat/graph/queries/_base.py +16 -6
  14. cognite/neat/graph/queries/_shared.py +2 -2
  15. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +4 -2
  16. cognite/neat/legacy/graph/loaders/_asset_loader.py +5 -5
  17. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +4 -4
  18. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +2 -2
  19. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +2 -2
  20. cognite/neat/legacy/graph/transformations/transformer.py +2 -2
  21. cognite/neat/legacy/rules/exporters/_rules2ontology.py +6 -6
  22. cognite/neat/legacy/rules/importers/_graph2rules.py +4 -4
  23. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +3 -3
  24. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +5 -5
  25. cognite/neat/rules/exporters/_rules2ontology.py +6 -6
  26. cognite/neat/rules/importers/_inference2rules.py +35 -25
  27. cognite/neat/rules/importers/_owl2rules/_owl2classes.py +3 -3
  28. cognite/neat/rules/importers/_owl2rules/_owl2properties.py +5 -5
  29. cognite/neat/utils/__init__.py +2 -2
  30. cognite/neat/utils/utils.py +6 -6
  31. {cognite_neat-0.85.6.dist-info → cognite_neat-0.85.8.dist-info}/METADATA +1 -1
  32. {cognite_neat-0.85.6.dist-info → cognite_neat-0.85.8.dist-info}/RECORD +35 -35
  33. {cognite_neat-0.85.6.dist-info → cognite_neat-0.85.8.dist-info}/LICENSE +0 -0
  34. {cognite_neat-0.85.6.dist-info → cognite_neat-0.85.8.dist-info}/WHEEL +0 -0
  35. {cognite_neat-0.85.6.dist-info → cognite_neat-0.85.8.dist-info}/entry_points.txt +0 -0
cognite/neat/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.85.6"
1
+ __version__ = "0.85.8"
@@ -17,7 +17,7 @@ from cognite.neat.app.api.data_classes.rest import (
17
17
  from cognite.neat.app.api.utils.data_mapping import rdf_result_to_api_response
18
18
  from cognite.neat.app.api.utils.query_templates import query_templates
19
19
  from cognite.neat.legacy.graph.transformations import query_generator
20
- from cognite.neat.utils.utils import remove_namespace
20
+ from cognite.neat.utils.utils import remove_namespace_from_uri
21
21
  from cognite.neat.workflows.steps.data_contracts import RulesData, SolutionGraph, SourceGraph
22
22
 
23
23
  router = APIRouter()
@@ -51,7 +51,7 @@ def get_datatype_properties(request: DatatypePropertyRequest):
51
51
  {
52
52
  "id": row[rdflib.Variable("property")],
53
53
  "count": int(row[rdflib.Variable("occurrence")]),
54
- "name": remove_namespace(row[rdflib.Variable("property")]),
54
+ "name": remove_namespace_from_uri(row[rdflib.Variable("property")]),
55
55
  }
56
56
  for row in results["rows"]
57
57
  ]
@@ -1,10 +1,12 @@
1
- from collections.abc import Iterable
1
+ import json
2
+ import re
3
+ from collections.abc import Callable, Iterable
2
4
  from datetime import datetime, timezone
3
5
  from pathlib import Path
4
6
  from typing import cast
5
7
 
6
8
  from cognite.client import CogniteClient
7
- from cognite.client.data_classes import Asset, AssetList
9
+ from cognite.client.data_classes import Asset, AssetFilter, AssetList
8
10
  from rdflib import RDF, Literal, Namespace
9
11
 
10
12
  from cognite.neat.constants import DEFAULT_NAMESPACE
@@ -19,15 +21,34 @@ class AssetsExtractor(BaseExtractor):
19
21
  Args:
20
22
  assets (Iterable[Asset]): An iterable of assets.
21
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
25
+ If None or if the function returns None, the asset will be set to the default type "Asset".
26
+ total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
27
+ is installed. Defaults to None.
28
+ limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
29
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
30
+ limit the extraction to 1000 assets to test the setup.
31
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
32
+ a JSON string.
22
33
  """
23
34
 
35
+ _SPACE_PATTERN = re.compile(r"\s+")
36
+
24
37
  def __init__(
25
38
  self,
26
39
  assets: Iterable[Asset],
27
40
  namespace: Namespace | None = None,
41
+ to_type: Callable[[Asset], str | None] | None = None,
42
+ total: int | None = None,
43
+ limit: int | None = None,
44
+ unpack_metadata: bool = False,
28
45
  ):
29
46
  self.namespace = namespace or DEFAULT_NAMESPACE
30
47
  self.assets = assets
48
+ self.to_type = to_type
49
+ self.total = total
50
+ self.limit = min(limit, total) if limit and total else limit
51
+ self.unpack_metadata = unpack_metadata
31
52
 
32
53
  @classmethod
33
54
  def from_dataset(
@@ -35,55 +56,124 @@ class AssetsExtractor(BaseExtractor):
35
56
  client: CogniteClient,
36
57
  data_set_external_id: str,
37
58
  namespace: Namespace | None = None,
59
+ to_type: Callable[[Asset], str | None] | None = None,
60
+ limit: int | None = None,
61
+ unpack_metadata: bool = False,
38
62
  ):
39
- return cls(cast(Iterable[Asset], client.assets(data_set_external_ids=data_set_external_id)), namespace)
63
+ total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
64
+
65
+ return cls(
66
+ cast(
67
+ Iterable[Asset],
68
+ client.assets(data_set_external_ids=data_set_external_id),
69
+ ),
70
+ namespace,
71
+ to_type,
72
+ total,
73
+ limit,
74
+ unpack_metadata=unpack_metadata,
75
+ )
40
76
 
41
77
  @classmethod
42
- def from_hierarchy(cls, client: CogniteClient, root_asset_external_id: str, namespace: Namespace | None = None):
43
- return cls(cast(Iterable[Asset], client.assets(asset_subtree_external_ids=root_asset_external_id)), namespace)
78
+ def from_hierarchy(
79
+ cls,
80
+ client: CogniteClient,
81
+ root_asset_external_id: str,
82
+ namespace: Namespace | None = None,
83
+ to_type: Callable[[Asset], str | None] | None = None,
84
+ limit: int | None = None,
85
+ unpack_metadata: bool = False,
86
+ ):
87
+ total = client.assets.aggregate_count(
88
+ filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
89
+ )
90
+
91
+ return cls(
92
+ cast(
93
+ Iterable[Asset],
94
+ client.assets(asset_subtree_external_ids=root_asset_external_id),
95
+ ),
96
+ namespace,
97
+ to_type,
98
+ total,
99
+ limit,
100
+ unpack_metadata=unpack_metadata,
101
+ )
44
102
 
45
103
  @classmethod
46
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
47
- return cls(AssetList.load(Path(file_path).read_text()), namespace)
104
+ def from_file(
105
+ cls,
106
+ file_path: str,
107
+ namespace: Namespace | None = None,
108
+ to_type: Callable[[Asset], str] | None = None,
109
+ limit: int | None = None,
110
+ unpack_metadata: bool = False,
111
+ ):
112
+ return cls(
113
+ AssetList.load(Path(file_path).read_text()),
114
+ namespace,
115
+ to_type,
116
+ limit,
117
+ unpack_metadata=unpack_metadata,
118
+ )
48
119
 
49
120
  def extract(self) -> Iterable[Triple]:
50
121
  """Extracts an asset with the given asset_id."""
51
- for asset in self.assets:
52
- yield from self._asset2triples(asset, self.namespace)
53
-
54
- @classmethod
55
- def _asset2triples(cls, asset: Asset, namespace: Namespace) -> list[Triple]:
122
+ if self.total:
123
+ try:
124
+ from rich.progress import track
125
+ except ModuleNotFoundError:
126
+ to_iterate = self.assets
127
+ else:
128
+ to_iterate = track(
129
+ self.assets,
130
+ total=self.limit or self.total,
131
+ description="Extracting Assets",
132
+ )
133
+ else:
134
+ to_iterate = self.assets
135
+ for no, asset in enumerate(to_iterate):
136
+ yield from self._asset2triples(asset)
137
+ if self.limit and no >= self.limit:
138
+ break
139
+
140
+ def _asset2triples(self, asset: Asset) -> list[Triple]:
56
141
  """Converts an asset to triples."""
57
- id_ = namespace[f"Asset_{asset.id}"]
142
+ id_ = self.namespace[f"Asset_{asset.id}"]
58
143
 
59
144
  # Set rdf type
60
- triples: list[Triple] = [(id_, RDF.type, namespace["Asset"])]
145
+ type_ = "Asset"
146
+ if self.to_type:
147
+ type_ = self.to_type(asset) or type_
148
+ type_ = self._SPACE_PATTERN.sub("_", type_)
149
+
150
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
61
151
 
62
152
  # Create attributes
63
153
  if asset.name:
64
- triples.append((id_, namespace.name, Literal(asset.name)))
154
+ triples.append((id_, self.namespace.name, Literal(asset.name)))
65
155
 
66
156
  if asset.description:
67
- triples.append((id_, namespace.description, Literal(asset.description)))
157
+ triples.append((id_, self.namespace.description, Literal(asset.description)))
68
158
 
69
159
  if asset.external_id:
70
- triples.append((id_, namespace.external_id, Literal(asset.external_id)))
160
+ triples.append((id_, self.namespace.external_id, Literal(asset.external_id)))
71
161
 
72
162
  if asset.source:
73
- triples.append((id_, namespace.source, Literal(asset.source)))
163
+ triples.append((id_, self.namespace.source, Literal(asset.source)))
74
164
 
75
165
  # properties ref creation and update
76
166
  triples.append(
77
167
  (
78
168
  id_,
79
- namespace.created_time,
169
+ self.namespace.created_time,
80
170
  Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
81
171
  )
82
172
  )
83
173
  triples.append(
84
174
  (
85
175
  id_,
86
- namespace.last_updated_time,
176
+ self.namespace.last_updated_time,
87
177
  Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
88
178
  )
89
179
  )
@@ -93,22 +183,41 @@ class AssetsExtractor(BaseExtractor):
93
183
  # external_id can create ill-formed URIs, so we create websafe URIs
94
184
  # since labels do not have internal ids, we use the external_id as the id
95
185
  triples.append(
96
- (id_, namespace.label, namespace[f"Label_{create_sha256_hash(label.dump()['externalId'])}"])
186
+ (
187
+ id_,
188
+ self.namespace.label,
189
+ self.namespace[f"Label_{create_sha256_hash(label.dump()['externalId'])}"],
190
+ )
97
191
  )
98
192
 
99
193
  if asset.metadata:
100
- for key, value in asset.metadata.items():
101
- if value:
102
- triples.append((id_, namespace[key], Literal(string_to_ideal_type(value))))
194
+ if self.unpack_metadata:
195
+ for key, value in asset.metadata.items():
196
+ if value:
197
+ triples.append(
198
+ (
199
+ id_,
200
+ self.namespace[key],
201
+ Literal(string_to_ideal_type(value)),
202
+ )
203
+ )
204
+ else:
205
+ triples.append((id_, self.namespace.metadata, Literal(json.dumps(asset.metadata))))
103
206
 
104
207
  # Create connections:
105
208
  if asset.parent_id:
106
- triples.append((id_, namespace.parent, namespace[f"Asset_{asset.parent_id}"]))
209
+ triples.append((id_, self.namespace.parent, self.namespace[f"Asset_{asset.parent_id}"]))
107
210
 
108
211
  if asset.root_id:
109
- triples.append((id_, namespace.root, namespace[f"Asset_{asset.root_id}"]))
212
+ triples.append((id_, self.namespace.root, self.namespace[f"Asset_{asset.root_id}"]))
110
213
 
111
214
  if asset.data_set_id:
112
- triples.append((id_, namespace.dataset, namespace[f"Dataset_{asset.data_set_id}"]))
215
+ triples.append(
216
+ (
217
+ id_,
218
+ self.namespace.dataset,
219
+ self.namespace[f"Dataset_{asset.data_set_id}"],
220
+ )
221
+ )
113
222
 
114
223
  return triples
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -20,15 +21,19 @@ class EventsExtractor(BaseExtractor):
20
21
  Args:
21
22
  events (Iterable[Event]): An iterable of events.
22
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
+ a JSON string.
23
26
  """
24
27
 
25
28
  def __init__(
26
29
  self,
27
30
  events: Iterable[Event],
28
31
  namespace: Namespace | None = None,
32
+ unpack_metadata: bool = False,
29
33
  ):
30
34
  self.namespace = namespace or DEFAULT_NAMESPACE
31
35
  self.events = events
36
+ self.unpack_metadata = unpack_metadata
32
37
 
33
38
  @classmethod
34
39
  def from_dataset(
@@ -36,61 +41,80 @@ class EventsExtractor(BaseExtractor):
36
41
  client: CogniteClient,
37
42
  data_set_external_id: str,
38
43
  namespace: Namespace | None = None,
44
+ unpack_metadata: bool = False,
39
45
  ):
40
- return cls(cast(Iterable[Event], client.events(data_set_external_ids=data_set_external_id)), namespace)
46
+ return cls(
47
+ cast(
48
+ Iterable[Event],
49
+ client.events(data_set_external_ids=data_set_external_id),
50
+ ),
51
+ namespace,
52
+ unpack_metadata,
53
+ )
41
54
 
42
55
  @classmethod
43
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
44
- return cls(EventList.load(Path(file_path).read_text()), namespace)
56
+ def from_file(
57
+ cls,
58
+ file_path: str,
59
+ namespace: Namespace | None = None,
60
+ unpack_metadata: bool = False,
61
+ ):
62
+ return cls(EventList.load(Path(file_path).read_text()), namespace, unpack_metadata)
45
63
 
46
64
  def extract(self) -> Iterable[Triple]:
47
65
  """Extract events as triples."""
48
66
  for event in self.events:
49
- yield from self._event2triples(event, self.namespace)
67
+ yield from self._event2triples(event)
50
68
 
51
- @classmethod
52
- def _event2triples(cls, event: Event, namespace: Namespace) -> list[Triple]:
53
- id_ = namespace[f"Event_{event.id}"]
69
+ def _event2triples(self, event: Event) -> list[Triple]:
70
+ id_ = self.namespace[f"Event_{event.id}"]
54
71
 
55
72
  # Set rdf type
56
- triples: list[Triple] = [(id_, RDF.type, namespace.Event)]
73
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.Event)]
57
74
 
58
75
  # Create attributes
59
76
 
60
77
  if event.external_id:
61
- triples.append((id_, namespace.external_id, Literal(event.external_id)))
78
+ triples.append((id_, self.namespace.external_id, Literal(event.external_id)))
62
79
 
63
80
  if event.source:
64
- triples.append((id_, namespace.type, Literal(event.source)))
81
+ triples.append((id_, self.namespace.type, Literal(event.source)))
65
82
 
66
83
  if event.type:
67
- triples.append((id_, namespace.type, Literal(event.type)))
84
+ triples.append((id_, self.namespace.type, Literal(event.type)))
68
85
 
69
86
  if event.subtype:
70
- triples.append((id_, namespace.subtype, Literal(event.subtype)))
87
+ triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
71
88
 
72
89
  if event.metadata:
73
- for key, value in event.metadata.items():
74
- if value:
75
- type_aware_value = string_to_ideal_type(value)
76
- try:
77
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
78
- except ValidationError:
79
- triples.append((id_, namespace[key], Literal(type_aware_value)))
90
+ if self.unpack_metadata:
91
+ for key, value in event.metadata.items():
92
+ if value:
93
+ type_aware_value = string_to_ideal_type(value)
94
+ try:
95
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
96
+ except ValidationError:
97
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
98
+ else:
99
+ triples.append((id_, self.namespace.metadata, Literal(json.dumps(event.metadata))))
80
100
 
81
101
  if event.description:
82
- triples.append((id_, namespace.description, Literal(event.description)))
102
+ triples.append((id_, self.namespace.description, Literal(event.description)))
83
103
 
84
104
  if event.created_time:
85
105
  triples.append(
86
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)))
106
+ (
107
+ id_,
108
+ self.namespace.created_time,
109
+ Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)),
110
+ )
87
111
  )
88
112
 
89
113
  if event.last_updated_time:
90
114
  triples.append(
91
115
  (
92
116
  id_,
93
- namespace.last_updated_time,
117
+ self.namespace.last_updated_time,
94
118
  Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
95
119
  )
96
120
  )
@@ -99,7 +123,7 @@ class EventsExtractor(BaseExtractor):
99
123
  triples.append(
100
124
  (
101
125
  id_,
102
- namespace.start_time,
126
+ self.namespace.start_time,
103
127
  Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
104
128
  )
105
129
  )
@@ -108,16 +132,22 @@ class EventsExtractor(BaseExtractor):
108
132
  triples.append(
109
133
  (
110
134
  id_,
111
- namespace.end_time,
135
+ self.namespace.end_time,
112
136
  Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
113
137
  )
114
138
  )
115
139
 
116
140
  if event.data_set_id:
117
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{event.data_set_id}"]))
141
+ triples.append(
142
+ (
143
+ id_,
144
+ self.namespace.data_set_id,
145
+ self.namespace[f"Dataset_{event.data_set_id}"],
146
+ )
147
+ )
118
148
 
119
149
  if event.asset_ids:
120
150
  for asset_id in event.asset_ids:
121
- triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
151
+ triples.append((id_, self.namespace.asset, self.namespace[f"Asset_{asset_id}"]))
122
152
 
123
153
  return triples
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -21,15 +22,19 @@ class FilesExtractor(BaseExtractor):
21
22
  Args:
22
23
  files_metadata (Iterable[FileMetadata]): An iterable of files metadata.
23
24
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
25
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
26
+ a JSON string.
24
27
  """
25
28
 
26
29
  def __init__(
27
30
  self,
28
31
  files_metadata: Iterable[FileMetadata],
29
32
  namespace: Namespace | None = None,
33
+ unpack_metadata: bool = False,
30
34
  ):
31
35
  self.namespace = namespace or DEFAULT_NAMESPACE
32
36
  self.files_metadata = files_metadata
37
+ self.unpack_metadata = unpack_metadata
33
38
 
34
39
  @classmethod
35
40
  def from_dataset(
@@ -37,56 +42,75 @@ class FilesExtractor(BaseExtractor):
37
42
  client: CogniteClient,
38
43
  data_set_external_id: str,
39
44
  namespace: Namespace | None = None,
45
+ unpack_metadata: bool = False,
40
46
  ):
41
- return cls(cast(Iterable[FileMetadata], client.files(data_set_external_ids=data_set_external_id)), namespace)
47
+ return cls(
48
+ cast(
49
+ Iterable[FileMetadata],
50
+ client.files(data_set_external_ids=data_set_external_id),
51
+ ),
52
+ namespace,
53
+ unpack_metadata,
54
+ )
42
55
 
43
56
  @classmethod
44
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
45
- return cls(FileMetadataList.load(Path(file_path).read_text()), namespace)
57
+ def from_file(
58
+ cls,
59
+ file_path: str,
60
+ namespace: Namespace | None = None,
61
+ unpack_metadata: bool = False,
62
+ ):
63
+ return cls(
64
+ FileMetadataList.load(Path(file_path).read_text()),
65
+ namespace,
66
+ unpack_metadata,
67
+ )
46
68
 
47
69
  def extract(self) -> Iterable[Triple]:
48
70
  """Extract files metadata as triples."""
49
71
  for event in self.files_metadata:
50
- yield from self._file2triples(event, self.namespace)
72
+ yield from self._file2triples(event)
51
73
 
52
- @classmethod
53
- def _file2triples(cls, file: FileMetadata, namespace: Namespace) -> list[Triple]:
54
- id_ = namespace[f"File_{file.id}"]
74
+ def _file2triples(self, file: FileMetadata) -> list[Triple]:
75
+ id_ = self.namespace[f"File_{file.id}"]
55
76
 
56
77
  # Set rdf type
57
- triples: list[Triple] = [(id_, RDF.type, namespace.File)]
78
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.File)]
58
79
 
59
80
  # Create attributes
60
81
 
61
82
  if file.external_id:
62
- triples.append((id_, namespace.external_id, Literal(file.external_id)))
83
+ triples.append((id_, self.namespace.external_id, Literal(file.external_id)))
63
84
 
64
85
  if file.source:
65
- triples.append((id_, namespace.type, Literal(file.source)))
86
+ triples.append((id_, self.namespace.type, Literal(file.source)))
66
87
 
67
88
  if file.mime_type:
68
- triples.append((id_, namespace.mime_type, Literal(file.mime_type)))
89
+ triples.append((id_, self.namespace.mime_type, Literal(file.mime_type)))
69
90
 
70
91
  if file.uploaded:
71
- triples.append((id_, namespace.uploaded, Literal(file.uploaded)))
92
+ triples.append((id_, self.namespace.uploaded, Literal(file.uploaded)))
72
93
 
73
94
  if file.source:
74
- triples.append((id_, namespace.source, Literal(file.source)))
95
+ triples.append((id_, self.namespace.source, Literal(file.source)))
75
96
 
76
97
  if file.metadata:
77
- for key, value in file.metadata.items():
78
- if value:
79
- type_aware_value = string_to_ideal_type(value)
80
- try:
81
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
82
- except ValidationError:
83
- triples.append((id_, namespace[key], Literal(type_aware_value)))
98
+ if self.unpack_metadata:
99
+ for key, value in file.metadata.items():
100
+ if value:
101
+ type_aware_value = string_to_ideal_type(value)
102
+ try:
103
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
104
+ except ValidationError:
105
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
106
+ else:
107
+ triples.append((id_, self.namespace.metadata, Literal(json.dumps(file.metadata))))
84
108
 
85
109
  if file.source_created_time:
86
110
  triples.append(
87
111
  (
88
112
  id_,
89
- namespace.source_created_time,
113
+ self.namespace.source_created_time,
90
114
  Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
91
115
  )
92
116
  )
@@ -94,25 +118,33 @@ class FilesExtractor(BaseExtractor):
94
118
  triples.append(
95
119
  (
96
120
  id_,
97
- namespace.source_created_time,
121
+ self.namespace.source_created_time,
98
122
  Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
99
123
  )
100
124
  )
101
125
  if file.uploaded_time:
102
126
  triples.append(
103
- (id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)))
127
+ (
128
+ id_,
129
+ self.namespace.uploaded_time,
130
+ Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)),
131
+ )
104
132
  )
105
133
 
106
134
  if file.created_time:
107
135
  triples.append(
108
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)))
136
+ (
137
+ id_,
138
+ self.namespace.created_time,
139
+ Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)),
140
+ )
109
141
  )
110
142
 
111
143
  if file.last_updated_time:
112
144
  triples.append(
113
145
  (
114
146
  id_,
115
- namespace.last_updated_time,
147
+ self.namespace.last_updated_time,
116
148
  Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
117
149
  )
118
150
  )
@@ -121,17 +153,29 @@ class FilesExtractor(BaseExtractor):
121
153
  for label in file.labels:
122
154
  # external_id can create ill-formed URIs, so we create websafe URIs
123
155
  # since labels do not have internal ids, we use the external_id as the id
124
- triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
156
+ triples.append(
157
+ (
158
+ id_,
159
+ self.namespace.label,
160
+ self.namespace[f"Label_{quote(label.dump()['externalId'])}"],
161
+ )
162
+ )
125
163
 
126
164
  if file.security_categories:
127
165
  for category in file.security_categories:
128
- triples.append((id_, namespace.security_categories, Literal(category)))
166
+ triples.append((id_, self.namespace.security_categories, Literal(category)))
129
167
 
130
168
  if file.data_set_id:
131
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{file.data_set_id}"]))
169
+ triples.append(
170
+ (
171
+ id_,
172
+ self.namespace.data_set_id,
173
+ self.namespace[f"Dataset_{file.data_set_id}"],
174
+ )
175
+ )
132
176
 
133
177
  if file.asset_ids:
134
178
  for asset_id in file.asset_ids:
135
- triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
179
+ triples.append((id_, self.namespace.asset, self.namespace[f"Asset_{asset_id}"]))
136
180
 
137
181
  return triples