cognite-neat 0.105.1__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. cognite/neat/_config.py +6 -260
  2. cognite/neat/_graph/extractors/_classic_cdf/_base.py +26 -13
  3. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +4 -1
  4. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +2 -2
  5. cognite/neat/_graph/loaders/_rdf2dms.py +7 -2
  6. cognite/neat/_graph/transformers/_base.py +4 -8
  7. cognite/neat/_graph/transformers/_classic_cdf.py +164 -80
  8. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  9. cognite/neat/_issues/warnings/_external.py +1 -1
  10. cognite/neat/_rules/importers/_rdf/_inference2rules.py +4 -2
  11. cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
  12. cognite/neat/_rules/transformers/_mapping.py +3 -2
  13. cognite/neat/_session/_base.py +6 -7
  14. cognite/neat/_session/_inspect.py +6 -2
  15. cognite/neat/_session/_mapping.py +6 -8
  16. cognite/neat/_session/_prepare.py +9 -10
  17. cognite/neat/_session/_read.py +35 -26
  18. cognite/neat/_session/_set.py +9 -0
  19. cognite/neat/_session/_state.py +3 -1
  20. cognite/neat/_session/_to.py +11 -13
  21. cognite/neat/_store/_graph_store.py +33 -28
  22. cognite/neat/_utils/auth.py +35 -15
  23. cognite/neat/_utils/collection_.py +32 -11
  24. cognite/neat/_version.py +1 -1
  25. {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/METADATA +1 -7
  26. {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/RECORD +29 -29
  27. {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/LICENSE +0 -0
  28. {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/WHEEL +0 -0
  29. {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,3 @@
1
- import textwrap
2
1
  import warnings
3
2
  from abc import ABC
4
3
  from collections.abc import Callable, Iterable
@@ -15,6 +14,7 @@ from cognite.neat._utils.collection_ import iterate_progress_bar
15
14
  from cognite.neat._utils.rdf_ import (
16
15
  Triple,
17
16
  add_triples_in_batch,
17
+ get_namespace,
18
18
  remove_instance_ids_in_batch,
19
19
  remove_namespace_from_uri,
20
20
  )
@@ -72,91 +72,165 @@ class AddAssetDepth(BaseTransformerStandardised):
72
72
  return row_output
73
73
 
74
74
 
75
- # TODO: standardise
76
- class BaseAssetConnector(BaseTransformer, ABC):
77
- _asset_type: URIRef = DEFAULT_NAMESPACE.Asset
78
- _item_type: URIRef
79
- _default_attribute: URIRef
80
- _connection_type: URIRef
75
+ class BaseAssetConnector(BaseTransformerStandardised, ABC):
76
+ description: str = "Connects assets to other cognite resources, thus forming bi-directional connection"
77
+ _use_only_once: bool = True
81
78
 
82
- _select_item_ids = "SELECT DISTINCT ?item_id WHERE {{?item_id a <{item_type}>}}"
83
- _select_connected_assets: str = textwrap.dedent("""SELECT ?asset_id WHERE {{
84
- <{item_id}> <{attribute}> ?asset_id .
85
- ?asset_id a <{asset_type}>}}""")
79
+ def _count_query(self) -> str:
80
+ query = """SELECT (COUNT(?asset) as ?count)
81
+ WHERE {{
82
+ ?resource a <{resource_type}> .
83
+ ?resource <{connection}> ?asset .
84
+ ?asset a <{asset_type}> .
85
+ }}"""
86
86
 
87
- def __init__(self, attribute: URIRef | None = None) -> None:
88
- self._attribute = attribute or self._default_attribute
87
+ return query.format(
88
+ asset_type=self.asset_type,
89
+ resource_type=self.resource_type,
90
+ connection=self.resource_to_asset_connection,
91
+ )
89
92
 
90
- def transform(self, graph: Graph) -> None:
91
- for item_id, *_ in graph.query(self._select_item_ids.format(item_type=self._item_type)): # type: ignore[misc]
92
- triples: list[Triple] = []
93
- for asset_id, *_ in graph.query( # type: ignore[misc]
94
- self._select_connected_assets.format(
95
- item_id=item_id, attribute=self._attribute, asset_type=self._asset_type
96
- )
97
- ):
98
- triples.append((asset_id, self._connection_type, item_id)) # type: ignore[arg-type]
99
- add_triples_in_batch(graph, triples)
93
+ def _iterate_query(self) -> str:
94
+ query = """SELECT ?asset ?resource
95
+ WHERE {{
96
+ ?resource a <{resource_type}> .
97
+ ?resource <{connection}> ?asset .
98
+ ?asset a <{asset_type}> .
99
+ }}"""
100
+
101
+ return query.format(
102
+ asset_type=self.asset_type,
103
+ resource_type=self.resource_type,
104
+ connection=self.resource_to_asset_connection,
105
+ )
106
+
107
+ def __init__(
108
+ self,
109
+ resource_to_asset_connection: URIRef,
110
+ resource_type: URIRef,
111
+ asset_to_resource_connection: URIRef | None = None,
112
+ asset_type: URIRef | None = None,
113
+ ) -> None:
114
+ self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
115
+ self.resource_to_asset_connection = resource_to_asset_connection
116
+ self.resource_type = resource_type
117
+
118
+ if asset_to_resource_connection:
119
+ self.asset_to_resource_connection = asset_to_resource_connection
120
+ else:
121
+ namespace = Namespace(get_namespace(resource_type))
122
+ type_ = remove_namespace_from_uri(resource_type)
123
+ self.asset_to_resource_connection = namespace[type_[0].lower() + type_[1:]]
124
+
125
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
126
+ row_output = RowTransformationOutput()
127
+ subject, object = query_result_row
128
+
129
+ row_output.add_triples.append(cast(Triple, (subject, self.asset_to_resource_connection, object)))
130
+
131
+ row_output.instances_modified_count += 1
132
+
133
+ return row_output
100
134
 
101
135
 
102
136
  class AssetTimeSeriesConnector(BaseAssetConnector):
103
137
  description: str = "Connects assets to timeseries, thus forming bi-directional connection"
104
- _use_only_once: bool = True
105
138
  _need_changes = frozenset(
106
139
  {
107
140
  str(extractors.AssetsExtractor.__name__),
108
141
  str(extractors.TimeSeriesExtractor.__name__),
109
142
  }
110
143
  )
111
- _item_type = DEFAULT_NAMESPACE.TimeSeries
112
- _default_attribute = DEFAULT_NAMESPACE.assetId
113
- _connection_type = DEFAULT_NAMESPACE.timeSeries
144
+
145
+ def __init__(
146
+ self,
147
+ resource_to_asset_connection: URIRef | None = None,
148
+ resource_type: URIRef | None = None,
149
+ asset_to_resource_connection: URIRef | None = None,
150
+ asset_type: URIRef | None = None,
151
+ ):
152
+ super().__init__(
153
+ resource_to_asset_connection=resource_to_asset_connection or DEFAULT_NAMESPACE.assetId,
154
+ resource_type=resource_type or DEFAULT_NAMESPACE.TimeSeries,
155
+ asset_to_resource_connection=asset_to_resource_connection or DEFAULT_NAMESPACE.timeSeries,
156
+ asset_type=asset_type or DEFAULT_NAMESPACE.Asset,
157
+ )
114
158
 
115
159
 
116
160
  class AssetSequenceConnector(BaseAssetConnector):
117
161
  description: str = "Connects assets to sequences, thus forming bi-directional connection"
118
- _use_only_once: bool = True
119
162
  _need_changes = frozenset(
120
163
  {
121
164
  str(extractors.AssetsExtractor.__name__),
122
165
  str(extractors.SequencesExtractor.__name__),
123
166
  }
124
167
  )
125
- _item_type = DEFAULT_NAMESPACE.Sequence
126
- _default_attribute = DEFAULT_NAMESPACE.assetId
127
- _connection_type = DEFAULT_NAMESPACE.sequence
168
+
169
+ def __init__(
170
+ self,
171
+ resource_to_asset_connection: URIRef | None = None,
172
+ resource_type: URIRef | None = None,
173
+ asset_to_resource_connection: URIRef | None = None,
174
+ asset_type: URIRef | None = None,
175
+ ):
176
+ super().__init__(
177
+ resource_to_asset_connection=resource_to_asset_connection or DEFAULT_NAMESPACE.assetId,
178
+ resource_type=resource_type or DEFAULT_NAMESPACE.Sequence,
179
+ asset_to_resource_connection=asset_to_resource_connection or DEFAULT_NAMESPACE.sequence,
180
+ asset_type=asset_type or DEFAULT_NAMESPACE.Asset,
181
+ )
128
182
 
129
183
 
130
184
  class AssetFileConnector(BaseAssetConnector):
131
185
  description: str = "Connects assets to files, thus forming bi-directional connection"
132
- _use_only_once: bool = True
133
186
  _need_changes = frozenset(
134
187
  {
135
188
  str(extractors.AssetsExtractor.__name__),
136
189
  str(extractors.FilesExtractor.__name__),
137
190
  }
138
191
  )
139
- _item_type = DEFAULT_NAMESPACE.File
140
- _default_attribute = DEFAULT_NAMESPACE.assetIds
141
- _connection_type = DEFAULT_NAMESPACE.file
192
+
193
+ def __init__(
194
+ self,
195
+ resource_to_asset_connection: URIRef | None = None,
196
+ resource_type: URIRef | None = None,
197
+ asset_to_resource_connection: URIRef | None = None,
198
+ asset_type: URIRef | None = None,
199
+ ):
200
+ super().__init__(
201
+ resource_to_asset_connection=resource_to_asset_connection or DEFAULT_NAMESPACE.assetIds,
202
+ resource_type=resource_type or DEFAULT_NAMESPACE.File,
203
+ asset_to_resource_connection=asset_to_resource_connection or DEFAULT_NAMESPACE.file,
204
+ asset_type=asset_type or DEFAULT_NAMESPACE.Asset,
205
+ )
142
206
 
143
207
 
144
208
  class AssetEventConnector(BaseAssetConnector):
145
209
  description: str = "Connects assets to events, thus forming bi-directional connection"
146
- _use_only_once: bool = True
147
210
  _need_changes = frozenset(
148
211
  {
149
212
  str(extractors.AssetsExtractor.__name__),
150
213
  str(extractors.EventsExtractor.__name__),
151
214
  }
152
215
  )
153
- _item_type = DEFAULT_NAMESPACE.Event
154
- _default_attribute = DEFAULT_NAMESPACE.assetIds
155
- _connection_type = DEFAULT_NAMESPACE.event
216
+
217
+ def __init__(
218
+ self,
219
+ resource_to_asset_connection: URIRef | None = None,
220
+ resource_type: URIRef | None = None,
221
+ asset_to_resource_connection: URIRef | None = None,
222
+ asset_type: URIRef | None = None,
223
+ ):
224
+ super().__init__(
225
+ resource_to_asset_connection=resource_to_asset_connection or DEFAULT_NAMESPACE.assetIds,
226
+ resource_type=resource_type or DEFAULT_NAMESPACE.Event,
227
+ asset_to_resource_connection=asset_to_resource_connection or DEFAULT_NAMESPACE.event,
228
+ asset_type=asset_type or DEFAULT_NAMESPACE.Asset,
229
+ )
156
230
 
157
231
 
158
232
  # TODO: standardise
159
- class AssetRelationshipConnector(BaseTransformer):
233
+ class AssetRelationshipConnector(BaseTransformerStandardised):
160
234
  description: str = "Connects assets via relationships"
161
235
  _use_only_once: bool = True
162
236
  _need_changes = frozenset(
@@ -174,6 +248,44 @@ class AssetRelationshipConnector(BaseTransformer):
174
248
  ?target <{asset_xid_property}> ?target_xid .
175
249
  ?target a <{asset_type}> .}}"""
176
250
 
251
+ def _count_query(self) -> str:
252
+ query = """SELECT (COUNT(?target) as ?count) WHERE {{
253
+ ?relationship a <{relationship_type}> .
254
+ ?relationship <{relationship_source_xid_prop}> ?source_xid .
255
+ ?source <{asset_xid_property}> ?source_xid .
256
+ ?source a <{asset_type}> .
257
+
258
+ ?relationship <{relationship_target_xid_prop}> ?target_xid .
259
+ ?target <{asset_xid_property}> ?target_xid .
260
+ ?target a <{asset_type}> .}}"""
261
+
262
+ return query.format(
263
+ relationship_type=self.relationship_type,
264
+ relationship_source_xid_prop=self.relationship_source_xid_prop,
265
+ relationship_target_xid_prop=self.relationship_target_xid_prop,
266
+ asset_xid_property=self.asset_xid_property,
267
+ asset_type=self.asset_type,
268
+ )
269
+
270
+ def _iterate_query(self) -> str:
271
+ query = """SELECT ?source ?relationship ?target WHERE {{
272
+ ?relationship a <{relationship_type}> .
273
+ ?relationship <{relationship_source_xid_prop}> ?source_xid .
274
+ ?source <{asset_xid_property}> ?source_xid .
275
+ ?source a <{asset_type}> .
276
+
277
+ ?relationship <{relationship_target_xid_prop}> ?target_xid .
278
+ ?target <{asset_xid_property}> ?target_xid .
279
+ ?target a <{asset_type}> .}}"""
280
+
281
+ return query.format(
282
+ relationship_type=self.relationship_type,
283
+ relationship_source_xid_prop=self.relationship_source_xid_prop,
284
+ relationship_target_xid_prop=self.relationship_target_xid_prop,
285
+ asset_xid_property=self.asset_xid_property,
286
+ asset_type=self.asset_type,
287
+ )
288
+
177
289
  def __init__(
178
290
  self,
179
291
  asset_type: URIRef | None = None,
@@ -188,48 +300,20 @@ class AssetRelationshipConnector(BaseTransformer):
188
300
  self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
189
301
  self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
190
302
 
191
- def transform(self, graph: Graph) -> None:
192
- for relationship_id_result in graph.query(
193
- f"SELECT DISTINCT ?relationship_id WHERE {{?relationship_id a <{self.relationship_type}>}}"
194
- ):
195
- relationship_id: URIRef = cast(tuple, relationship_id_result)[0]
196
-
197
- if assets_id_res := list(
198
- graph.query(
199
- self._asset_template.format(
200
- relationship_id=relationship_id,
201
- asset_xid_property=self.asset_xid_property,
202
- relationship_source_xid_prop=self.relationship_source_xid_prop,
203
- relationship_target_xid_prop=self.relationship_target_xid_prop,
204
- asset_type=self.asset_type,
205
- )
206
- )
207
- ):
208
- # files can be connected to multiple assets in the graph
209
- for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
210
- # create a relationship between the two assets
211
- graph.add(
212
- (
213
- source_asset_id,
214
- DEFAULT_NAMESPACE.relationship,
215
- relationship_id,
216
- )
217
- )
218
- graph.add(
219
- (
220
- target_asset_id,
221
- DEFAULT_NAMESPACE.relationship,
222
- relationship_id,
223
- )
224
- )
303
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
304
+ row_output = RowTransformationOutput()
305
+ source, relationship, target = query_result_row
306
+
307
+ row_output.add_triples.append(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
308
+ row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
309
+ row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
225
310
 
226
- # add source and target to the relationship
227
- graph.add((relationship_id, DEFAULT_NAMESPACE.source, source_asset_id))
228
- graph.add((relationship_id, DEFAULT_NAMESPACE.target, target_asset_id))
311
+ row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
312
+ row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
229
313
 
230
- # remove properties that are not needed, specifically the external ids
231
- graph.remove((relationship_id, self.relationship_source_xid_prop, None))
232
- graph.remove((relationship_id, self.relationship_target_xid_prop, None))
314
+ row_output.instances_modified_count += 2
315
+
316
+ return row_output
233
317
 
234
318
 
235
319
  # TODO: standardise
@@ -59,7 +59,7 @@ class AddSelfReferenceProperty(BaseTransformer):
59
59
 
60
60
  class MakeConnectionOnExactMatch(BaseTransformerStandardised):
61
61
  description: str = "Adds property that contains id of reference to all references of given class in Rules"
62
- _use_only_once: bool = True
62
+ _use_only_once: bool = False
63
63
  _need_changes = frozenset({})
64
64
 
65
65
  def __init__(
@@ -42,7 +42,7 @@ class FileItemNotSupportedWarning(NeatWarning):
42
42
 
43
43
  @dataclass(unsafe_hash=True)
44
44
  class CDFAuthWarning(NeatWarning):
45
- """Failed to {action} due to {reason}"""
45
+ """Failed to {action}: {reason}"""
46
46
 
47
47
  action: str
48
48
  reason: str
@@ -17,6 +17,7 @@ from cognite.neat._rules.models.information import (
17
17
  )
18
18
  from cognite.neat._store import NeatGraphStore
19
19
  from cognite.neat._store._provenance import INSTANCES_ENTITY
20
+ from cognite.neat._utils.collection_ import iterate_progress_bar
20
21
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
21
22
 
22
23
  from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
@@ -27,7 +28,6 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
27
28
  WHERE { ?s a ?class . }
28
29
  group by ?class order by DESC(?instances)"""
29
30
 
30
-
31
31
  INSTANCES_OF_CLASS_QUERY = """SELECT ?s ?propertyCount WHERE { ?s a <class> . BIND ('Unknown' as ?propertyCount) }"""
32
32
 
33
33
 
@@ -171,8 +171,10 @@ class InferenceImporter(BaseRDFImporter):
171
171
  INSTANCES_OF_CLASS_QUERY if self.max_number_of_instance == -1 else INSTANCES_OF_CLASS_RICHNESS_ORDERED_QUERY
172
172
  )
173
173
 
174
+ classes_iterable = iterate_progress_bar(classes.items(), len(classes), "Inferring classes")
175
+
174
176
  # Infers all the properties of the class
175
- for class_id, class_definition in classes.items():
177
+ for class_id, class_definition in classes_iterable:
176
178
  for ( # type: ignore[misc]
177
179
  instance,
178
180
  _,