cognite-neat 0.85.7__py3-none-any.whl → 0.85.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

cognite/neat/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.85.7"
1
+ __version__ = "0.85.8"
@@ -1,10 +1,12 @@
1
+ import json
2
+ import re
1
3
  from collections.abc import Callable, Iterable
2
4
  from datetime import datetime, timezone
3
5
  from pathlib import Path
4
6
  from typing import cast
5
7
 
6
8
  from cognite.client import CogniteClient
7
- from cognite.client.data_classes import Asset, AssetList
9
+ from cognite.client.data_classes import Asset, AssetFilter, AssetList
8
10
  from rdflib import RDF, Literal, Namespace
9
11
 
10
12
  from cognite.neat.constants import DEFAULT_NAMESPACE
@@ -21,17 +23,32 @@ class AssetsExtractor(BaseExtractor):
21
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
22
24
  to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
23
25
  If None or if the function returns None, the asset will be set to the default type "Asset".
26
+ total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
27
+ is installed. Defaults to None.
28
+ limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
29
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
30
+ limit the extraction to 1000 assets to test the setup.
31
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
32
+ a JSON string.
24
33
  """
25
34
 
35
+ _SPACE_PATTERN = re.compile(r"\s+")
36
+
26
37
  def __init__(
27
38
  self,
28
39
  assets: Iterable[Asset],
29
40
  namespace: Namespace | None = None,
30
41
  to_type: Callable[[Asset], str | None] | None = None,
42
+ total: int | None = None,
43
+ limit: int | None = None,
44
+ unpack_metadata: bool = False,
31
45
  ):
32
46
  self.namespace = namespace or DEFAULT_NAMESPACE
33
47
  self.assets = assets
34
48
  self.to_type = to_type
49
+ self.total = total
50
+ self.limit = min(limit, total) if limit and total else limit
51
+ self.unpack_metadata = unpack_metadata
35
52
 
36
53
  @classmethod
37
54
  def from_dataset(
@@ -40,8 +57,22 @@ class AssetsExtractor(BaseExtractor):
40
57
  data_set_external_id: str,
41
58
  namespace: Namespace | None = None,
42
59
  to_type: Callable[[Asset], str | None] | None = None,
60
+ limit: int | None = None,
61
+ unpack_metadata: bool = False,
43
62
  ):
44
- return cls(cast(Iterable[Asset], client.assets(data_set_external_ids=data_set_external_id)), namespace, to_type)
63
+ total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
64
+
65
+ return cls(
66
+ cast(
67
+ Iterable[Asset],
68
+ client.assets(data_set_external_ids=data_set_external_id),
69
+ ),
70
+ namespace,
71
+ to_type,
72
+ total,
73
+ limit,
74
+ unpack_metadata=unpack_metadata,
75
+ )
45
76
 
46
77
  @classmethod
47
78
  def from_hierarchy(
@@ -50,57 +81,99 @@ class AssetsExtractor(BaseExtractor):
50
81
  root_asset_external_id: str,
51
82
  namespace: Namespace | None = None,
52
83
  to_type: Callable[[Asset], str | None] | None = None,
84
+ limit: int | None = None,
85
+ unpack_metadata: bool = False,
53
86
  ):
87
+ total = client.assets.aggregate_count(
88
+ filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
89
+ )
90
+
54
91
  return cls(
55
- cast(Iterable[Asset], client.assets(asset_subtree_external_ids=root_asset_external_id)), namespace, to_type
92
+ cast(
93
+ Iterable[Asset],
94
+ client.assets(asset_subtree_external_ids=root_asset_external_id),
95
+ ),
96
+ namespace,
97
+ to_type,
98
+ total,
99
+ limit,
100
+ unpack_metadata=unpack_metadata,
56
101
  )
57
102
 
58
103
  @classmethod
59
104
  def from_file(
60
- cls, file_path: str, namespace: Namespace | None = None, to_type: Callable[[Asset], str] | None = None
105
+ cls,
106
+ file_path: str,
107
+ namespace: Namespace | None = None,
108
+ to_type: Callable[[Asset], str] | None = None,
109
+ limit: int | None = None,
110
+ unpack_metadata: bool = False,
61
111
  ):
62
- return cls(AssetList.load(Path(file_path).read_text()), namespace, to_type)
112
+ return cls(
113
+ AssetList.load(Path(file_path).read_text()),
114
+ namespace,
115
+ to_type,
116
+ limit,
117
+ unpack_metadata=unpack_metadata,
118
+ )
63
119
 
64
120
  def extract(self) -> Iterable[Triple]:
65
121
  """Extracts an asset with the given asset_id."""
66
- for asset in self.assets:
67
- yield from self._asset2triples(asset, self.namespace)
68
-
69
- def _asset2triples(self, asset: Asset, namespace: Namespace) -> list[Triple]:
122
+ if self.total:
123
+ try:
124
+ from rich.progress import track
125
+ except ModuleNotFoundError:
126
+ to_iterate = self.assets
127
+ else:
128
+ to_iterate = track(
129
+ self.assets,
130
+ total=self.limit or self.total,
131
+ description="Extracting Assets",
132
+ )
133
+ else:
134
+ to_iterate = self.assets
135
+ for no, asset in enumerate(to_iterate):
136
+ yield from self._asset2triples(asset)
137
+ if self.limit and no >= self.limit:
138
+ break
139
+
140
+ def _asset2triples(self, asset: Asset) -> list[Triple]:
70
141
  """Converts an asset to triples."""
71
- id_ = namespace[f"Asset_{asset.id}"]
142
+ id_ = self.namespace[f"Asset_{asset.id}"]
72
143
 
73
144
  # Set rdf type
74
145
  type_ = "Asset"
75
146
  if self.to_type:
76
147
  type_ = self.to_type(asset) or type_
77
- triples: list[Triple] = [(id_, RDF.type, namespace[type_])]
148
+ type_ = self._SPACE_PATTERN.sub("_", type_)
149
+
150
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
78
151
 
79
152
  # Create attributes
80
153
  if asset.name:
81
- triples.append((id_, namespace.name, Literal(asset.name)))
154
+ triples.append((id_, self.namespace.name, Literal(asset.name)))
82
155
 
83
156
  if asset.description:
84
- triples.append((id_, namespace.description, Literal(asset.description)))
157
+ triples.append((id_, self.namespace.description, Literal(asset.description)))
85
158
 
86
159
  if asset.external_id:
87
- triples.append((id_, namespace.external_id, Literal(asset.external_id)))
160
+ triples.append((id_, self.namespace.external_id, Literal(asset.external_id)))
88
161
 
89
162
  if asset.source:
90
- triples.append((id_, namespace.source, Literal(asset.source)))
163
+ triples.append((id_, self.namespace.source, Literal(asset.source)))
91
164
 
92
165
  # properties ref creation and update
93
166
  triples.append(
94
167
  (
95
168
  id_,
96
- namespace.created_time,
169
+ self.namespace.created_time,
97
170
  Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
98
171
  )
99
172
  )
100
173
  triples.append(
101
174
  (
102
175
  id_,
103
- namespace.last_updated_time,
176
+ self.namespace.last_updated_time,
104
177
  Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
105
178
  )
106
179
  )
@@ -110,22 +183,41 @@ class AssetsExtractor(BaseExtractor):
110
183
  # external_id can create ill-formed URIs, so we create websafe URIs
111
184
  # since labels do not have internal ids, we use the external_id as the id
112
185
  triples.append(
113
- (id_, namespace.label, namespace[f"Label_{create_sha256_hash(label.dump()['externalId'])}"])
186
+ (
187
+ id_,
188
+ self.namespace.label,
189
+ self.namespace[f"Label_{create_sha256_hash(label.dump()['externalId'])}"],
190
+ )
114
191
  )
115
192
 
116
193
  if asset.metadata:
117
- for key, value in asset.metadata.items():
118
- if value:
119
- triples.append((id_, namespace[key], Literal(string_to_ideal_type(value))))
194
+ if self.unpack_metadata:
195
+ for key, value in asset.metadata.items():
196
+ if value:
197
+ triples.append(
198
+ (
199
+ id_,
200
+ self.namespace[key],
201
+ Literal(string_to_ideal_type(value)),
202
+ )
203
+ )
204
+ else:
205
+ triples.append((id_, self.namespace.metadata, Literal(json.dumps(asset.metadata))))
120
206
 
121
207
  # Create connections:
122
208
  if asset.parent_id:
123
- triples.append((id_, namespace.parent, namespace[f"Asset_{asset.parent_id}"]))
209
+ triples.append((id_, self.namespace.parent, self.namespace[f"Asset_{asset.parent_id}"]))
124
210
 
125
211
  if asset.root_id:
126
- triples.append((id_, namespace.root, namespace[f"Asset_{asset.root_id}"]))
212
+ triples.append((id_, self.namespace.root, self.namespace[f"Asset_{asset.root_id}"]))
127
213
 
128
214
  if asset.data_set_id:
129
- triples.append((id_, namespace.dataset, namespace[f"Dataset_{asset.data_set_id}"]))
215
+ triples.append(
216
+ (
217
+ id_,
218
+ self.namespace.dataset,
219
+ self.namespace[f"Dataset_{asset.data_set_id}"],
220
+ )
221
+ )
130
222
 
131
223
  return triples
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -20,15 +21,19 @@ class EventsExtractor(BaseExtractor):
20
21
  Args:
21
22
  events (Iterable[Event]): An iterable of events.
22
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
+ a JSON string.
23
26
  """
24
27
 
25
28
  def __init__(
26
29
  self,
27
30
  events: Iterable[Event],
28
31
  namespace: Namespace | None = None,
32
+ unpack_metadata: bool = False,
29
33
  ):
30
34
  self.namespace = namespace or DEFAULT_NAMESPACE
31
35
  self.events = events
36
+ self.unpack_metadata = unpack_metadata
32
37
 
33
38
  @classmethod
34
39
  def from_dataset(
@@ -36,61 +41,80 @@ class EventsExtractor(BaseExtractor):
36
41
  client: CogniteClient,
37
42
  data_set_external_id: str,
38
43
  namespace: Namespace | None = None,
44
+ unpack_metadata: bool = False,
39
45
  ):
40
- return cls(cast(Iterable[Event], client.events(data_set_external_ids=data_set_external_id)), namespace)
46
+ return cls(
47
+ cast(
48
+ Iterable[Event],
49
+ client.events(data_set_external_ids=data_set_external_id),
50
+ ),
51
+ namespace,
52
+ unpack_metadata,
53
+ )
41
54
 
42
55
  @classmethod
43
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
44
- return cls(EventList.load(Path(file_path).read_text()), namespace)
56
+ def from_file(
57
+ cls,
58
+ file_path: str,
59
+ namespace: Namespace | None = None,
60
+ unpack_metadata: bool = False,
61
+ ):
62
+ return cls(EventList.load(Path(file_path).read_text()), namespace, unpack_metadata)
45
63
 
46
64
  def extract(self) -> Iterable[Triple]:
47
65
  """Extract events as triples."""
48
66
  for event in self.events:
49
- yield from self._event2triples(event, self.namespace)
67
+ yield from self._event2triples(event)
50
68
 
51
- @classmethod
52
- def _event2triples(cls, event: Event, namespace: Namespace) -> list[Triple]:
53
- id_ = namespace[f"Event_{event.id}"]
69
+ def _event2triples(self, event: Event) -> list[Triple]:
70
+ id_ = self.namespace[f"Event_{event.id}"]
54
71
 
55
72
  # Set rdf type
56
- triples: list[Triple] = [(id_, RDF.type, namespace.Event)]
73
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.Event)]
57
74
 
58
75
  # Create attributes
59
76
 
60
77
  if event.external_id:
61
- triples.append((id_, namespace.external_id, Literal(event.external_id)))
78
+ triples.append((id_, self.namespace.external_id, Literal(event.external_id)))
62
79
 
63
80
  if event.source:
64
- triples.append((id_, namespace.type, Literal(event.source)))
81
+ triples.append((id_, self.namespace.type, Literal(event.source)))
65
82
 
66
83
  if event.type:
67
- triples.append((id_, namespace.type, Literal(event.type)))
84
+ triples.append((id_, self.namespace.type, Literal(event.type)))
68
85
 
69
86
  if event.subtype:
70
- triples.append((id_, namespace.subtype, Literal(event.subtype)))
87
+ triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
71
88
 
72
89
  if event.metadata:
73
- for key, value in event.metadata.items():
74
- if value:
75
- type_aware_value = string_to_ideal_type(value)
76
- try:
77
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
78
- except ValidationError:
79
- triples.append((id_, namespace[key], Literal(type_aware_value)))
90
+ if self.unpack_metadata:
91
+ for key, value in event.metadata.items():
92
+ if value:
93
+ type_aware_value = string_to_ideal_type(value)
94
+ try:
95
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
96
+ except ValidationError:
97
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
98
+ else:
99
+ triples.append((id_, self.namespace.metadata, Literal(json.dumps(event.metadata))))
80
100
 
81
101
  if event.description:
82
- triples.append((id_, namespace.description, Literal(event.description)))
102
+ triples.append((id_, self.namespace.description, Literal(event.description)))
83
103
 
84
104
  if event.created_time:
85
105
  triples.append(
86
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)))
106
+ (
107
+ id_,
108
+ self.namespace.created_time,
109
+ Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)),
110
+ )
87
111
  )
88
112
 
89
113
  if event.last_updated_time:
90
114
  triples.append(
91
115
  (
92
116
  id_,
93
- namespace.last_updated_time,
117
+ self.namespace.last_updated_time,
94
118
  Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
95
119
  )
96
120
  )
@@ -99,7 +123,7 @@ class EventsExtractor(BaseExtractor):
99
123
  triples.append(
100
124
  (
101
125
  id_,
102
- namespace.start_time,
126
+ self.namespace.start_time,
103
127
  Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
104
128
  )
105
129
  )
@@ -108,16 +132,22 @@ class EventsExtractor(BaseExtractor):
108
132
  triples.append(
109
133
  (
110
134
  id_,
111
- namespace.end_time,
135
+ self.namespace.end_time,
112
136
  Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
113
137
  )
114
138
  )
115
139
 
116
140
  if event.data_set_id:
117
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{event.data_set_id}"]))
141
+ triples.append(
142
+ (
143
+ id_,
144
+ self.namespace.data_set_id,
145
+ self.namespace[f"Dataset_{event.data_set_id}"],
146
+ )
147
+ )
118
148
 
119
149
  if event.asset_ids:
120
150
  for asset_id in event.asset_ids:
121
- triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
151
+ triples.append((id_, self.namespace.asset, self.namespace[f"Asset_{asset_id}"]))
122
152
 
123
153
  return triples
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -21,15 +22,19 @@ class FilesExtractor(BaseExtractor):
21
22
  Args:
22
23
  files_metadata (Iterable[FileMetadata]): An iterable of files metadata.
23
24
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
25
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
26
+ a JSON string.
24
27
  """
25
28
 
26
29
  def __init__(
27
30
  self,
28
31
  files_metadata: Iterable[FileMetadata],
29
32
  namespace: Namespace | None = None,
33
+ unpack_metadata: bool = False,
30
34
  ):
31
35
  self.namespace = namespace or DEFAULT_NAMESPACE
32
36
  self.files_metadata = files_metadata
37
+ self.unpack_metadata = unpack_metadata
33
38
 
34
39
  @classmethod
35
40
  def from_dataset(
@@ -37,56 +42,75 @@ class FilesExtractor(BaseExtractor):
37
42
  client: CogniteClient,
38
43
  data_set_external_id: str,
39
44
  namespace: Namespace | None = None,
45
+ unpack_metadata: bool = False,
40
46
  ):
41
- return cls(cast(Iterable[FileMetadata], client.files(data_set_external_ids=data_set_external_id)), namespace)
47
+ return cls(
48
+ cast(
49
+ Iterable[FileMetadata],
50
+ client.files(data_set_external_ids=data_set_external_id),
51
+ ),
52
+ namespace,
53
+ unpack_metadata,
54
+ )
42
55
 
43
56
  @classmethod
44
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
45
- return cls(FileMetadataList.load(Path(file_path).read_text()), namespace)
57
+ def from_file(
58
+ cls,
59
+ file_path: str,
60
+ namespace: Namespace | None = None,
61
+ unpack_metadata: bool = False,
62
+ ):
63
+ return cls(
64
+ FileMetadataList.load(Path(file_path).read_text()),
65
+ namespace,
66
+ unpack_metadata,
67
+ )
46
68
 
47
69
  def extract(self) -> Iterable[Triple]:
48
70
  """Extract files metadata as triples."""
49
71
  for event in self.files_metadata:
50
- yield from self._file2triples(event, self.namespace)
72
+ yield from self._file2triples(event)
51
73
 
52
- @classmethod
53
- def _file2triples(cls, file: FileMetadata, namespace: Namespace) -> list[Triple]:
54
- id_ = namespace[f"File_{file.id}"]
74
+ def _file2triples(self, file: FileMetadata) -> list[Triple]:
75
+ id_ = self.namespace[f"File_{file.id}"]
55
76
 
56
77
  # Set rdf type
57
- triples: list[Triple] = [(id_, RDF.type, namespace.File)]
78
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.File)]
58
79
 
59
80
  # Create attributes
60
81
 
61
82
  if file.external_id:
62
- triples.append((id_, namespace.external_id, Literal(file.external_id)))
83
+ triples.append((id_, self.namespace.external_id, Literal(file.external_id)))
63
84
 
64
85
  if file.source:
65
- triples.append((id_, namespace.type, Literal(file.source)))
86
+ triples.append((id_, self.namespace.type, Literal(file.source)))
66
87
 
67
88
  if file.mime_type:
68
- triples.append((id_, namespace.mime_type, Literal(file.mime_type)))
89
+ triples.append((id_, self.namespace.mime_type, Literal(file.mime_type)))
69
90
 
70
91
  if file.uploaded:
71
- triples.append((id_, namespace.uploaded, Literal(file.uploaded)))
92
+ triples.append((id_, self.namespace.uploaded, Literal(file.uploaded)))
72
93
 
73
94
  if file.source:
74
- triples.append((id_, namespace.source, Literal(file.source)))
95
+ triples.append((id_, self.namespace.source, Literal(file.source)))
75
96
 
76
97
  if file.metadata:
77
- for key, value in file.metadata.items():
78
- if value:
79
- type_aware_value = string_to_ideal_type(value)
80
- try:
81
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
82
- except ValidationError:
83
- triples.append((id_, namespace[key], Literal(type_aware_value)))
98
+ if self.unpack_metadata:
99
+ for key, value in file.metadata.items():
100
+ if value:
101
+ type_aware_value = string_to_ideal_type(value)
102
+ try:
103
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
104
+ except ValidationError:
105
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
106
+ else:
107
+ triples.append((id_, self.namespace.metadata, Literal(json.dumps(file.metadata))))
84
108
 
85
109
  if file.source_created_time:
86
110
  triples.append(
87
111
  (
88
112
  id_,
89
- namespace.source_created_time,
113
+ self.namespace.source_created_time,
90
114
  Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
91
115
  )
92
116
  )
@@ -94,25 +118,33 @@ class FilesExtractor(BaseExtractor):
94
118
  triples.append(
95
119
  (
96
120
  id_,
97
- namespace.source_created_time,
121
+ self.namespace.source_created_time,
98
122
  Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
99
123
  )
100
124
  )
101
125
  if file.uploaded_time:
102
126
  triples.append(
103
- (id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)))
127
+ (
128
+ id_,
129
+ self.namespace.uploaded_time,
130
+ Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)),
131
+ )
104
132
  )
105
133
 
106
134
  if file.created_time:
107
135
  triples.append(
108
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)))
136
+ (
137
+ id_,
138
+ self.namespace.created_time,
139
+ Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)),
140
+ )
109
141
  )
110
142
 
111
143
  if file.last_updated_time:
112
144
  triples.append(
113
145
  (
114
146
  id_,
115
- namespace.last_updated_time,
147
+ self.namespace.last_updated_time,
116
148
  Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
117
149
  )
118
150
  )
@@ -121,17 +153,29 @@ class FilesExtractor(BaseExtractor):
121
153
  for label in file.labels:
122
154
  # external_id can create ill-formed URIs, so we create websafe URIs
123
155
  # since labels do not have internal ids, we use the external_id as the id
124
- triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
156
+ triples.append(
157
+ (
158
+ id_,
159
+ self.namespace.label,
160
+ self.namespace[f"Label_{quote(label.dump()['externalId'])}"],
161
+ )
162
+ )
125
163
 
126
164
  if file.security_categories:
127
165
  for category in file.security_categories:
128
- triples.append((id_, namespace.security_categories, Literal(category)))
166
+ triples.append((id_, self.namespace.security_categories, Literal(category)))
129
167
 
130
168
  if file.data_set_id:
131
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{file.data_set_id}"]))
169
+ triples.append(
170
+ (
171
+ id_,
172
+ self.namespace.data_set_id,
173
+ self.namespace[f"Dataset_{file.data_set_id}"],
174
+ )
175
+ )
132
176
 
133
177
  if file.asset_ids:
134
178
  for asset_id in file.asset_ids:
135
- triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
179
+ triples.append((id_, self.namespace.asset, self.namespace[f"Asset_{asset_id}"]))
136
180
 
137
181
  return triples
@@ -37,7 +37,11 @@ class LabelsExtractor(BaseExtractor):
37
37
  namespace: Namespace | None = None,
38
38
  ):
39
39
  return cls(
40
- cast(Iterable[LabelDefinition], client.labels(data_set_external_ids=data_set_external_id)), namespace
40
+ cast(
41
+ Iterable[LabelDefinition],
42
+ client.labels(data_set_external_ids=data_set_external_id),
43
+ ),
44
+ namespace,
41
45
  )
42
46
 
43
47
  @classmethod
@@ -47,36 +51,41 @@ class LabelsExtractor(BaseExtractor):
47
51
  def extract(self) -> Iterable[Triple]:
48
52
  """Extract labels as triples."""
49
53
  for label in self.labels:
50
- yield from self._labels2triples(label, self.namespace)
54
+ yield from self._labels2triples(label)
51
55
 
52
- @classmethod
53
- def _labels2triples(cls, label: LabelDefinition, namespace: Namespace) -> list[Triple]:
56
+ def _labels2triples(self, label: LabelDefinition) -> list[Triple]:
54
57
  if label.external_id:
55
- id_ = namespace[f"Label_{create_sha256_hash(label.external_id)}"]
58
+ id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
56
59
 
57
60
  # Set rdf type
58
- triples: list[Triple] = [(id_, RDF.type, namespace.Label)]
61
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.Label)]
59
62
 
60
63
  # Create attributes
61
- triples.append((id_, namespace.external_id, Literal(label.external_id)))
64
+ triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
62
65
 
63
66
  if label.name:
64
- triples.append((id_, namespace.name, Literal(label.name)))
67
+ triples.append((id_, self.namespace.name, Literal(label.name)))
65
68
 
66
69
  if label.description:
67
- triples.append((id_, namespace.description, Literal(label.description)))
70
+ triples.append((id_, self.namespace.description, Literal(label.description)))
68
71
 
69
72
  if label.created_time:
70
73
  triples.append(
71
74
  (
72
75
  id_,
73
- namespace.created_time,
76
+ self.namespace.created_time,
74
77
  Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
75
78
  )
76
79
  )
77
80
 
78
81
  if label.data_set_id:
79
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{label.data_set_id}"]))
82
+ triples.append(
83
+ (
84
+ id_,
85
+ self.namespace.data_set_id,
86
+ self.namespace[f"Dataset_{label.data_set_id}"],
87
+ )
88
+ )
80
89
 
81
90
  return triples
82
91
  return []
@@ -38,7 +38,11 @@ class RelationshipsExtractor(BaseExtractor):
38
38
  namespace: Namespace | None = None,
39
39
  ):
40
40
  return cls(
41
- cast(Iterable[Relationship], client.relationships(data_set_external_ids=data_set_external_id)), namespace
41
+ cast(
42
+ Iterable[Relationship],
43
+ client.relationships(data_set_external_ids=data_set_external_id),
44
+ ),
45
+ namespace,
42
46
  )
43
47
 
44
48
  @classmethod
@@ -48,26 +52,25 @@ class RelationshipsExtractor(BaseExtractor):
48
52
  def extract(self) -> Iterable[Triple]:
49
53
  """Extracts an asset with the given asset_id."""
50
54
  for relationship in self.relationships:
51
- yield from self._relationship2triples(relationship, self.namespace)
55
+ yield from self._relationship2triples(relationship)
52
56
 
53
- @classmethod
54
- def _relationship2triples(cls, relationship: Relationship, namespace: Namespace) -> list[Triple]:
57
+ def _relationship2triples(self, relationship: Relationship) -> list[Triple]:
55
58
  """Converts an asset to triples."""
56
59
 
57
60
  if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
58
61
  # relationships do not have an internal id, so we generate one
59
- id_ = namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
62
+ id_ = self.namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
60
63
 
61
64
  # Set rdf type
62
- triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]
65
+ triples: list[Triple] = [(id_, RDF.type, self.namespace["Relationship"])]
63
66
 
64
67
  # Set source and target types
65
68
  if source_type := relationship.source_type:
66
69
  triples.append(
67
70
  (
68
71
  id_,
69
- namespace.source_type,
70
- namespace[source_type.title()],
72
+ self.namespace.source_type,
73
+ self.namespace[source_type.title()],
71
74
  )
72
75
  )
73
76
 
@@ -75,19 +78,19 @@ class RelationshipsExtractor(BaseExtractor):
75
78
  triples.append(
76
79
  (
77
80
  id_,
78
- namespace.target_type,
79
- namespace[target_type.title()],
81
+ self.namespace.target_type,
82
+ self.namespace[target_type.title()],
80
83
  )
81
84
  )
82
85
 
83
86
  # Create attributes
84
87
 
85
- triples.append((id_, namespace.external_id, Literal(relationship.external_id)))
88
+ triples.append((id_, self.namespace.external_id, Literal(relationship.external_id)))
86
89
 
87
90
  triples.append(
88
91
  (
89
92
  id_,
90
- namespace.source_external_id,
93
+ self.namespace.source_external_id,
91
94
  Literal(relationship.source_external_id),
92
95
  )
93
96
  )
@@ -95,7 +98,7 @@ class RelationshipsExtractor(BaseExtractor):
95
98
  triples.append(
96
99
  (
97
100
  id_,
98
- namespace.target_external_id,
101
+ self.namespace.target_external_id,
99
102
  Literal(relationship.target_external_id),
100
103
  )
101
104
  )
@@ -104,7 +107,7 @@ class RelationshipsExtractor(BaseExtractor):
104
107
  triples.append(
105
108
  (
106
109
  id_,
107
- namespace.start_time,
110
+ self.namespace.start_time,
108
111
  Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
109
112
  )
110
113
  )
@@ -113,7 +116,7 @@ class RelationshipsExtractor(BaseExtractor):
113
116
  triples.append(
114
117
  (
115
118
  id_,
116
- namespace.end_time,
119
+ self.namespace.end_time,
117
120
  Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
118
121
  )
119
122
  )
@@ -122,7 +125,7 @@ class RelationshipsExtractor(BaseExtractor):
122
125
  triples.append(
123
126
  (
124
127
  id_,
125
- namespace.created_time,
128
+ self.namespace.created_time,
126
129
  Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
127
130
  )
128
131
  )
@@ -131,7 +134,7 @@ class RelationshipsExtractor(BaseExtractor):
131
134
  triples.append(
132
135
  (
133
136
  id_,
134
- namespace.last_updated_time,
137
+ self.namespace.last_updated_time,
135
138
  Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
136
139
  )
137
140
  )
@@ -140,7 +143,7 @@ class RelationshipsExtractor(BaseExtractor):
140
143
  triples.append(
141
144
  (
142
145
  id_,
143
- namespace.confidence,
146
+ self.namespace.confidence,
144
147
  Literal(relationship.confidence),
145
148
  )
146
149
  )
@@ -149,11 +152,23 @@ class RelationshipsExtractor(BaseExtractor):
149
152
  for label in relationship.labels:
150
153
  # external_id can create ill-formed URIs, so we create websafe URIs
151
154
  # since labels do not have internal ids, we use the external_id as the id
152
- triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
155
+ triples.append(
156
+ (
157
+ id_,
158
+ self.namespace.label,
159
+ self.namespace[f"Label_{quote(label.dump()['externalId'])}"],
160
+ )
161
+ )
153
162
 
154
163
  # Create connection
155
164
  if relationship.data_set_id:
156
- triples.append((id_, namespace.dataset, namespace[f"Dataset_{relationship.data_set_id}"]))
165
+ triples.append(
166
+ (
167
+ id_,
168
+ self.namespace.dataset,
169
+ self.namespace[f"Dataset_{relationship.data_set_id}"],
170
+ )
171
+ )
157
172
 
158
173
  return triples
159
174
  return []
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -20,15 +21,19 @@ class SequencesExtractor(BaseExtractor):
20
21
  Args:
21
22
  sequence (Iterable[Sequence]): An iterable of sequences.
22
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
+ a JSON string.
23
26
  """
24
27
 
25
28
  def __init__(
26
29
  self,
27
30
  sequence: Iterable[Sequence],
28
31
  namespace: Namespace | None = None,
32
+ unpack_metadata: bool = False,
29
33
  ):
30
34
  self.namespace = namespace or DEFAULT_NAMESPACE
31
35
  self.sequence = sequence
36
+ self.unpack_metadata = unpack_metadata
32
37
 
33
38
  @classmethod
34
39
  def from_dataset(
@@ -36,50 +41,71 @@ class SequencesExtractor(BaseExtractor):
36
41
  client: CogniteClient,
37
42
  data_set_external_id: str,
38
43
  namespace: Namespace | None = None,
44
+ unpack_metadata: bool = False,
39
45
  ):
40
- return cls(cast(Iterable[Sequence], client.sequences(data_set_external_ids=data_set_external_id)), namespace)
46
+ return cls(
47
+ cast(
48
+ Iterable[Sequence],
49
+ client.sequences(data_set_external_ids=data_set_external_id),
50
+ ),
51
+ namespace,
52
+ unpack_metadata,
53
+ )
41
54
 
42
55
  @classmethod
43
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
44
- return cls(SequenceList.load(Path(file_path).read_text()), namespace)
56
+ def from_file(
57
+ cls,
58
+ file_path: str,
59
+ namespace: Namespace | None = None,
60
+ unpack_metadata: bool = False,
61
+ ):
62
+ return cls(SequenceList.load(Path(file_path).read_text()), namespace, unpack_metadata)
45
63
 
46
64
  def extract(self) -> Iterable[Triple]:
47
65
  """Extract sequences as triples."""
48
66
  for sequence in self.sequence:
49
- yield from self._sequence2triples(sequence, self.namespace)
67
+ yield from self._sequence2triples(sequence)
50
68
 
51
- @classmethod
52
- def _sequence2triples(cls, sequence: Sequence, namespace: Namespace) -> list[Triple]:
53
- id_ = namespace[f"Sequence_{sequence.id}"]
69
+ def _sequence2triples(self, sequence: Sequence) -> list[Triple]:
70
+ id_ = self.namespace[f"Sequence_{sequence.id}"]
54
71
 
55
72
  # Set rdf type
56
- triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]
73
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.Sequence)]
57
74
 
58
75
  # Create attributes
59
76
 
60
77
  if sequence.external_id:
61
- triples.append((id_, namespace.external_id, Literal(sequence.external_id)))
78
+ triples.append((id_, self.namespace.external_id, Literal(sequence.external_id)))
62
79
 
63
80
  if sequence.name:
64
- triples.append((id_, namespace.name, Literal(sequence.name)))
81
+ triples.append((id_, self.namespace.name, Literal(sequence.name)))
65
82
 
66
83
  if sequence.metadata:
67
- for key, value in sequence.metadata.items():
68
- if value:
69
- type_aware_value = string_to_ideal_type(value)
70
- try:
71
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
72
- except ValidationError:
73
- triples.append((id_, namespace[key], Literal(type_aware_value)))
84
+ if self.unpack_metadata:
85
+ for key, value in sequence.metadata.items():
86
+ if value:
87
+ type_aware_value = string_to_ideal_type(value)
88
+ try:
89
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
90
+ except ValidationError:
91
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
92
+ else:
93
+ triples.append(
94
+ (
95
+ id_,
96
+ self.namespace.metadata,
97
+ Literal(json.dumps(sequence.metadata)),
98
+ )
99
+ )
74
100
 
75
101
  if sequence.description:
76
- triples.append((id_, namespace.description, Literal(sequence.description)))
102
+ triples.append((id_, self.namespace.description, Literal(sequence.description)))
77
103
 
78
104
  if sequence.created_time:
79
105
  triples.append(
80
106
  (
81
107
  id_,
82
- namespace.created_time,
108
+ self.namespace.created_time,
83
109
  Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
84
110
  )
85
111
  )
@@ -88,15 +114,27 @@ class SequencesExtractor(BaseExtractor):
88
114
  triples.append(
89
115
  (
90
116
  id_,
91
- namespace.last_updated_time,
117
+ self.namespace.last_updated_time,
92
118
  Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
93
119
  )
94
120
  )
95
121
 
96
122
  if sequence.data_set_id:
97
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{sequence.data_set_id}"]))
123
+ triples.append(
124
+ (
125
+ id_,
126
+ self.namespace.data_set_id,
127
+ self.namespace[f"Dataset_{sequence.data_set_id}"],
128
+ )
129
+ )
98
130
 
99
131
  if sequence.asset_id:
100
- triples.append((id_, namespace.asset, namespace[f"Asset_{sequence.asset_id}"]))
132
+ triples.append(
133
+ (
134
+ id_,
135
+ self.namespace.asset,
136
+ self.namespace[f"Asset_{sequence.asset_id}"],
137
+ )
138
+ )
101
139
 
102
140
  return triples
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -20,15 +21,19 @@ class TimeSeriesExtractor(BaseExtractor):
20
21
  Args:
21
22
  timeseries (Iterable[TimeSeries]): An iterable of timeseries.
22
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
+ a JSON string.
23
26
  """
24
27
 
25
28
  def __init__(
26
29
  self,
27
30
  timeseries: Iterable[TimeSeries],
28
31
  namespace: Namespace | None = None,
32
+ unpack_metadata: bool = False,
29
33
  ):
30
34
  self.namespace = namespace or DEFAULT_NAMESPACE
31
35
  self.timeseries = timeseries
36
+ self.unpack_metadata = unpack_metadata
32
37
 
33
38
  @classmethod
34
39
  def from_dataset(
@@ -36,65 +41,84 @@ class TimeSeriesExtractor(BaseExtractor):
36
41
  client: CogniteClient,
37
42
  data_set_external_id: str,
38
43
  namespace: Namespace | None = None,
44
+ unpack_metadata: bool = False,
39
45
  ):
40
46
  return cls(
41
- cast(Iterable[TimeSeries], client.time_series(data_set_external_ids=data_set_external_id)), namespace
47
+ cast(
48
+ Iterable[TimeSeries],
49
+ client.time_series(data_set_external_ids=data_set_external_id),
50
+ ),
51
+ namespace,
52
+ unpack_metadata,
42
53
  )
43
54
 
44
55
  @classmethod
45
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
46
- return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace)
56
+ def from_file(
57
+ cls,
58
+ file_path: str,
59
+ namespace: Namespace | None = None,
60
+ unpack_metadata: bool = False,
61
+ ):
62
+ return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace, unpack_metadata)
47
63
 
48
64
  def extract(self) -> Iterable[Triple]:
49
65
  """Extract timeseries as triples."""
50
66
  for timeseries in self.timeseries:
51
- yield from self._timeseries2triples(timeseries, self.namespace)
67
+ yield from self._timeseries2triples(timeseries)
52
68
 
53
- @classmethod
54
- def _timeseries2triples(cls, timeseries: TimeSeries, namespace: Namespace) -> list[Triple]:
55
- id_ = namespace[f"TimeSeries_{timeseries.id}"]
69
+ def _timeseries2triples(self, timeseries: TimeSeries) -> list[Triple]:
70
+ id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
56
71
 
57
72
  # Set rdf type
58
- triples: list[Triple] = [(id_, RDF.type, namespace.TimeSeries)]
73
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.TimeSeries)]
59
74
 
60
75
  # Create attributes
61
76
 
62
77
  if timeseries.external_id:
63
- triples.append((id_, namespace.external_id, Literal(timeseries.external_id)))
78
+ triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
64
79
 
65
80
  if timeseries.name:
66
- triples.append((id_, namespace.name, Literal(timeseries.name)))
81
+ triples.append((id_, self.namespace.name, Literal(timeseries.name)))
67
82
 
68
83
  if timeseries.is_string:
69
- triples.append((id_, namespace.is_string, Literal(timeseries.is_string)))
84
+ triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
70
85
 
71
86
  if timeseries.metadata:
72
- for key, value in timeseries.metadata.items():
73
- if value:
74
- type_aware_value = string_to_ideal_type(value)
75
- try:
76
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
77
- except ValidationError:
78
- triples.append((id_, namespace[key], Literal(type_aware_value)))
87
+ if self.unpack_metadata:
88
+ for key, value in timeseries.metadata.items():
89
+ if value:
90
+ type_aware_value = string_to_ideal_type(value)
91
+ try:
92
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
93
+ except ValidationError:
94
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
95
+ else:
96
+ triples.append(
97
+ (
98
+ id_,
99
+ self.namespace.metadata,
100
+ Literal(json.dumps(timeseries.metadata)),
101
+ )
102
+ )
79
103
 
80
104
  if timeseries.unit:
81
- triples.append((id_, namespace.unit, Literal(timeseries.unit)))
105
+ triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
82
106
 
83
- if namespace.is_step:
84
- triples.append((id_, namespace.is_step, Literal(timeseries.is_step)))
107
+ if self.namespace.is_step:
108
+ triples.append((id_, self.namespace.is_step, Literal(timeseries.is_step)))
85
109
 
86
110
  if timeseries.description:
87
- triples.append((id_, namespace.description, Literal(timeseries.description)))
111
+ triples.append((id_, self.namespace.description, Literal(timeseries.description)))
88
112
 
89
113
  if timeseries.security_categories:
90
114
  for category in timeseries.security_categories:
91
- triples.append((id_, namespace.security_categories, Literal(category)))
115
+ triples.append((id_, self.namespace.security_categories, Literal(category)))
92
116
 
93
117
  if timeseries.created_time:
94
118
  triples.append(
95
119
  (
96
120
  id_,
97
- namespace.created_time,
121
+ self.namespace.created_time,
98
122
  Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
99
123
  )
100
124
  )
@@ -103,26 +127,50 @@ class TimeSeriesExtractor(BaseExtractor):
103
127
  triples.append(
104
128
  (
105
129
  id_,
106
- namespace.last_updated_time,
130
+ self.namespace.last_updated_time,
107
131
  Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
108
132
  )
109
133
  )
110
134
 
111
135
  if timeseries.legacy_name:
112
- triples.append((id_, namespace.legacy_name, Literal(timeseries.legacy_name)))
136
+ triples.append((id_, self.namespace.legacy_name, Literal(timeseries.legacy_name)))
113
137
 
114
138
  # Create connections
115
139
  if timeseries.unit_external_id:
116
140
  # try to create connection to QUDT unit catalog
117
141
  try:
118
- triples.append((id_, namespace.unit_external_id, URIRef(str(AnyHttpUrl(timeseries.unit_external_id)))))
142
+ triples.append(
143
+ (
144
+ id_,
145
+ self.namespace.unit_external_id,
146
+ URIRef(str(AnyHttpUrl(timeseries.unit_external_id))),
147
+ )
148
+ )
119
149
  except ValidationError:
120
- triples.append((id_, namespace.unit_external_id, Literal(timeseries.unit_external_id)))
150
+ triples.append(
151
+ (
152
+ id_,
153
+ self.namespace.unit_external_id,
154
+ Literal(timeseries.unit_external_id),
155
+ )
156
+ )
121
157
 
122
158
  if timeseries.data_set_id:
123
- triples.append((id_, namespace.dataset, namespace[f"Dataset_{timeseries.data_set_id}"]))
159
+ triples.append(
160
+ (
161
+ id_,
162
+ self.namespace.dataset,
163
+ self.namespace[f"Dataset_{timeseries.data_set_id}"],
164
+ )
165
+ )
124
166
 
125
167
  if timeseries.asset_id:
126
- triples.append((id_, namespace.asset, namespace[f"Asset_{timeseries.asset_id}"]))
168
+ triples.append(
169
+ (
170
+ id_,
171
+ self.namespace.asset,
172
+ self.namespace[f"Asset_{timeseries.asset_id}"],
173
+ )
174
+ )
127
175
 
128
176
  return triples
@@ -36,28 +36,36 @@ class InferenceImporter(BaseImporter):
36
36
  """Infers rules from a triple store.
37
37
 
38
38
  Rules inference through analysis of knowledge graph provided in various formats.
39
- Use the factory methods to create an triples store from sources such as
39
+ Use the factory methods to create a triple store from sources such as
40
40
  RDF files, JSON files, YAML files, XML files, or directly from a graph store.
41
41
 
42
42
  Args:
43
43
  issue_list: Issue list to store issues
44
44
  graph: Knowledge graph
45
45
  max_number_of_instance: Maximum number of instances to be used in inference
46
+ prefix: Prefix to be used for the inferred model
46
47
  """
47
48
 
48
- def __init__(self, issue_list: IssueList, graph: Graph, max_number_of_instance: int = -1):
49
+ def __init__(
50
+ self, issue_list: IssueList, graph: Graph, max_number_of_instance: int = -1, prefix: str = "inferred"
51
+ ) -> None:
49
52
  self.issue_list = issue_list
50
53
  self.graph = graph
51
54
  self.max_number_of_instance = max_number_of_instance
55
+ self.prefix = prefix
52
56
 
53
57
  @classmethod
54
- def from_graph_store(cls, store: NeatGraphStore, max_number_of_instance: int = -1):
58
+ def from_graph_store(
59
+ cls, store: NeatGraphStore, max_number_of_instance: int = -1, prefix: str = "inferred"
60
+ ) -> "InferenceImporter":
55
61
  issue_list = IssueList(title="Inferred from graph store")
56
62
 
57
- return cls(issue_list, store.graph, max_number_of_instance=max_number_of_instance)
63
+ return cls(issue_list, store.graph, max_number_of_instance=max_number_of_instance, prefix=prefix)
58
64
 
59
65
  @classmethod
60
- def from_rdf_file(cls, filepath: Path, max_number_of_instance: int = -1):
66
+ def from_rdf_file(
67
+ cls, filepath: Path, max_number_of_instance: int = -1, prefix: str = "inferred"
68
+ ) -> "InferenceImporter":
61
69
  issue_list = IssueList(title=f"'{filepath.name}'")
62
70
 
63
71
  graph = Graph()
@@ -66,18 +74,24 @@ class InferenceImporter(BaseImporter):
66
74
  except Exception:
67
75
  issue_list.append(issues.fileread.FileReadError(filepath))
68
76
 
69
- return cls(issue_list, graph, max_number_of_instance=max_number_of_instance)
77
+ return cls(issue_list, graph, max_number_of_instance=max_number_of_instance, prefix=prefix)
70
78
 
71
79
  @classmethod
72
- def from_json_file(cls, filepath: Path, max_number_of_instance: int = -1):
80
+ def from_json_file(
81
+ cls, filepath: Path, max_number_of_instance: int = -1, prefix: str = "inferred"
82
+ ) -> "InferenceImporter":
73
83
  raise NotImplementedError("JSON file format is not supported yet.")
74
84
 
75
85
  @classmethod
76
- def from_yaml_file(cls, filepath: Path, max_number_of_instance: int = -1):
86
+ def from_yaml_file(
87
+ cls, filepath: Path, max_number_of_instance: int = -1, prefix: str = "inferred"
88
+ ) -> "InferenceImporter":
77
89
  raise NotImplementedError("YAML file format is not supported yet.")
78
90
 
79
91
  @classmethod
80
- def from_xml_file(cls, filepath: Path, max_number_of_instance: int = -1):
92
+ def from_xml_file(
93
+ cls, filepath: Path, max_number_of_instance: int = -1, prefix: str = "inferred"
94
+ ) -> "InferenceImporter":
81
95
  raise NotImplementedError("JSON file format is not supported yet.")
82
96
 
83
97
  @overload
@@ -239,8 +253,7 @@ class InferenceImporter(BaseImporter):
239
253
  if Namespace(get_namespace(URI)) not in prefixes.values():
240
254
  prefixes[f"prefix-{len(prefixes)+1}"] = Namespace(get_namespace(URI))
241
255
 
242
- @classmethod
243
- def _default_metadata(cls):
256
+ def _default_metadata(self):
244
257
  return InformationMetadata(
245
258
  name="Inferred Model",
246
259
  creator="NEAT",
@@ -248,7 +261,7 @@ class InferenceImporter(BaseImporter):
248
261
  created=datetime.now(),
249
262
  updated=datetime.now(),
250
263
  description="Inferred model from knowledge graph",
251
- prefix="inferred",
264
+ prefix=self.prefix,
252
265
  namespace=DEFAULT_NAMESPACE,
253
266
  )
254
267
 
@@ -262,12 +275,9 @@ class InferenceImporter(BaseImporter):
262
275
 
263
276
  @classmethod
264
277
  def _read_value_type_occurrence_from_comment(cls, value_type: str, comment: str) -> int:
265
- return int(
266
- cast(
267
- re.Match,
268
- re.search(
269
- rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
270
- comment,
271
- ),
272
- ).group(1)
273
- )
278
+ if result := re.search(
279
+ rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
280
+ comment,
281
+ ):
282
+ return int(result.group(1))
283
+ return 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-neat
3
- Version: 0.85.7
3
+ Version: 0.85.8
4
4
  Summary: Knowledge graph transformation
5
5
  Home-page: https://cognite-neat.readthedocs-hosted.com/
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  cognite/neat/__init__.py,sha256=AiexNcHdAHFbrrbo9c65gtil1dqx_SGraDH1PSsXjKE,126
2
2
  cognite/neat/_shared.py,sha256=RSaHm2eJceTlvb-hMMe4nHgoHdPYDfN3XcxDXo24k3A,1530
3
- cognite/neat/_version.py,sha256=hVJ3xHshW3ChykiL5r0Maacil2KmJncrUYZGRzPoqic,23
3
+ cognite/neat/_version.py,sha256=XNxvWbbG3Du3rtKNhMT-MnhnUVoPrUHeH989x6qqhYo,23
4
4
  cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
6
6
  cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
@@ -57,13 +57,13 @@ cognite/neat/graph/exceptions.py,sha256=R6pyOH774n9w2x_X_nrUr8OMAdjJMf_XPIqAvxIQ
57
57
  cognite/neat/graph/extractors/__init__.py,sha256=nXcNp6i3-1HteIkr8Ujxk4b09W5jk27Q3eWuwjcnGnM,1647
58
58
  cognite/neat/graph/extractors/_base.py,sha256=8IWygpkQTwo0UOmbbwWVI7540_klTVdUVX2JjVPFRIs,498
59
59
  cognite/neat/graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=xrinx9SF9WMrrbbsYr2eSi7ZGOmlia3ltPq573fAoVg,4809
61
- cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=SGZWKCxppECIQkwQs5M2e_SoF-eGilCW2KiyXk2PmzM,4230
62
- cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=o35K0_ouq7hjR_lAVRjWbuRsuAzlS78S_97am5TFU5A,5129
63
- cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=4JxQHPDciMjbk7F6GxMa-HfhOgAv8LT3VO3mRfEgQ0E,2832
64
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=jgIN__nztlhLwoIJw59s2-Blc9gxIm7YDha5qEoXBSg,5654
65
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=5FuhwpgDiGG51C0bQacQ4LD6KkutUaU1cX2NSy_krhU,3652
66
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=Ui7WRAvot3KJFwpzqmEYvRs3cN0qh93ocJjYaNLfH30,4811
60
+ cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=ZLLiHlN0RB4Drx0b0uBLLiV8gowlMleamRBSKUrCgyI,7854
61
+ cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=UiysU7AGlWMQuEMObaJN1JvMNWWtXmjBzFsk3SLg8V4,5112
62
+ cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=TPjDF6VfRImhrJlfFNne3Plm6NmrGwtaGKqP_vZ8s6Y,6289
63
+ cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=9DQ1hblU3gbLTbWuthKLWkSL_eNH69izitzMSQ1FdD8,3011
64
+ cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=GzxnvIflmHVnWdlNFoLrnxJjM8c2YuMzzMC7IL7Q4cg,6040
65
+ cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=3L2c7z1I4LQZYgA8CScsOGfvamIdCc19I5ZIzPVVgsM,4680
66
+ cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=HCUjTTLqj9rTxb7xemyxNXYRqsKVUzRAXi0BLyo2Ctc,6125
67
67
  cognite/neat/graph/extractors/_dexpi.py,sha256=xIw3kSaQ17k_bAuecvrVRic70PUhFHtcyy-ReLt36Q4,9385
68
68
  cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=w6lIgwvVwkvK725S9MLFZU8lLxjxXt_621_nChupAkQ,14791
69
69
  cognite/neat/graph/extractors/_rdf_file.py,sha256=ialMCLv9WH5k6v1YMfozfcmAYhz8OVo9jVhsKMyQkDA,763
@@ -203,7 +203,7 @@ cognite/neat/rules/importers/_dtdl2rules/_unit_lookup.py,sha256=wW4saKva61Q_i17g
203
203
  cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py,sha256=ysmWUxZ0npwrTB0uiH5jA0v37sfCwowGaYk17IyxPUU,12663
204
204
  cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py,sha256=Psj3C2jembY_Wu7WWJIFIwrMawvjISjeqfBnoRy_csw,6740
205
205
  cognite/neat/rules/importers/_dtdl2rules/spec.py,sha256=tim_MfN1J0F3Oeqk3BMgIA82d_MZvhRuRMsLK3B4PYc,11897
206
- cognite/neat/rules/importers/_inference2rules.py,sha256=TlzTpO1IUyeUjODuDLqnlC-LeLhT-S6V6OpL-5YSQRo,11801
206
+ cognite/neat/rules/importers/_inference2rules.py,sha256=S_U2M8z5oZaFD3yPDSG__xUJSCmFkQyrKleHixrzYlU,12204
207
207
  cognite/neat/rules/importers/_owl2rules/__init__.py,sha256=tdGcrgtozdQyST-pTlxIa4cLBNTLvtk1nNYR4vOdFSw,63
208
208
  cognite/neat/rules/importers/_owl2rules/_owl2classes.py,sha256=dACjYeCa_OhpQgqccI4w478dEINbISUMzpVkCOoRRL8,7384
209
209
  cognite/neat/rules/importers/_owl2rules/_owl2metadata.py,sha256=nwnUaBNAAYMoBre2UmsnkJXUuaqGEpR3U3txDrH2w6g,7527
@@ -310,8 +310,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
310
310
  cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
311
311
  cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
312
312
  cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
313
- cognite_neat-0.85.7.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
314
- cognite_neat-0.85.7.dist-info/METADATA,sha256=FOumx12OP-CCVVbdM2preeoHr-pPzrxnLKzYbaGsETU,9493
315
- cognite_neat-0.85.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
316
- cognite_neat-0.85.7.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
317
- cognite_neat-0.85.7.dist-info/RECORD,,
313
+ cognite_neat-0.85.8.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
314
+ cognite_neat-0.85.8.dist-info/METADATA,sha256=LMGF9Z866Ni01TGc12LHk09EjNbbSxjiZS-gzMtvGsY,9493
315
+ cognite_neat-0.85.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
316
+ cognite_neat-0.85.8.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
317
+ cognite_neat-0.85.8.dist-info/RECORD,,