cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (67) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/__init__.py +5 -1
  4. cognite/neat/_graph/extractors/_base.py +32 -0
  5. cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
  6. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
  7. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
  8. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
  9. cognite/neat/_graph/extractors/_dms.py +196 -47
  10. cognite/neat/_graph/extractors/_dms_graph.py +199 -0
  11. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  12. cognite/neat/_graph/extractors/_rdf_file.py +33 -5
  13. cognite/neat/_graph/loaders/__init__.py +1 -3
  14. cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
  15. cognite/neat/_graph/queries/_base.py +140 -84
  16. cognite/neat/_graph/queries/_construct.py +2 -2
  17. cognite/neat/_graph/transformers/__init__.py +8 -1
  18. cognite/neat/_graph/transformers/_base.py +9 -1
  19. cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
  20. cognite/neat/_graph/transformers/_rdfpath.py +3 -3
  21. cognite/neat/_graph/transformers/_value_type.py +106 -45
  22. cognite/neat/_issues/errors/_resources.py +1 -1
  23. cognite/neat/_issues/warnings/__init__.py +0 -2
  24. cognite/neat/_issues/warnings/_models.py +1 -1
  25. cognite/neat/_issues/warnings/_properties.py +0 -8
  26. cognite/neat/_rules/analysis/_base.py +1 -1
  27. cognite/neat/_rules/analysis/_information.py +14 -13
  28. cognite/neat/_rules/catalog/__init__.py +1 -0
  29. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  30. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  31. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  32. cognite/neat/_rules/importers/__init__.py +3 -1
  33. cognite/neat/_rules/importers/_dms2rules.py +7 -5
  34. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  35. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  36. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  37. cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
  38. cognite/neat/_rules/models/_base_rules.py +13 -15
  39. cognite/neat/_rules/models/_types.py +5 -0
  40. cognite/neat/_rules/models/dms/_rules.py +51 -10
  41. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  42. cognite/neat/_rules/models/information/_rules.py +48 -5
  43. cognite/neat/_rules/models/information/_rules_input.py +6 -1
  44. cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
  45. cognite/neat/_rules/transformers/__init__.py +10 -0
  46. cognite/neat/_rules/transformers/_converters.py +300 -62
  47. cognite/neat/_session/_base.py +57 -10
  48. cognite/neat/_session/_drop.py +5 -1
  49. cognite/neat/_session/_inspect.py +3 -2
  50. cognite/neat/_session/_mapping.py +17 -6
  51. cognite/neat/_session/_prepare.py +0 -47
  52. cognite/neat/_session/_read.py +115 -10
  53. cognite/neat/_session/_set.py +27 -0
  54. cognite/neat/_session/_show.py +4 -4
  55. cognite/neat/_session/_state.py +12 -1
  56. cognite/neat/_session/_to.py +43 -2
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -3
  59. cognite/neat/_store/_graph_store.py +331 -136
  60. cognite/neat/_store/_rules_store.py +130 -1
  61. cognite/neat/_utils/auth.py +3 -1
  62. cognite/neat/_version.py +1 -1
  63. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
  64. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
  65. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
  66. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
  67. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
@@ -3,11 +3,12 @@ from collections import defaultdict
3
3
  from collections.abc import Iterable
4
4
  from typing import Literal, cast, overload
5
5
 
6
- from rdflib import RDF, Graph, Namespace, URIRef
6
+ from rdflib import RDF, Dataset, Graph, Namespace, URIRef
7
7
  from rdflib import Literal as RdfLiteral
8
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
8
9
  from rdflib.query import ResultRow
9
10
 
10
- from cognite.neat._constants import UNKNOWN_TYPE
11
+ from cognite.neat._constants import NEAT
11
12
  from cognite.neat._rules._constants import EntityTypes
12
13
  from cognite.neat._rules.models.entities import ClassEntity
13
14
  from cognite.neat._rules.models.information import InformationRules
@@ -20,11 +21,21 @@ from ._construct import build_construct_query
20
21
  class Queries:
21
22
  """Helper class for storing standard queries for the graph store."""
22
23
 
23
- def __init__(self, graph: Graph, rules: InformationRules | None = None):
24
- self.graph = graph
25
- self.rules = rules
26
-
27
- def summarize_instances(self) -> list[tuple]:
24
+ def __init__(
25
+ self,
26
+ dataset: Dataset,
27
+ rules: dict[URIRef, InformationRules] | None = None,
28
+ default_named_graph: URIRef | None = None,
29
+ ):
30
+ self.dataset = dataset
31
+ self.rules = rules or {}
32
+ self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
33
+
34
+ def graph(self, named_graph: URIRef | None = None) -> Graph:
35
+ """Get named graph from the dataset to query over"""
36
+ return self.dataset.graph(named_graph or self.default_named_graph)
37
+
38
+ def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
28
39
  """Summarize instances in the graph store by class and count"""
29
40
 
30
41
  query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
@@ -39,36 +50,55 @@ class Queries:
39
50
  remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
40
51
  cast(RdfLiteral, cast(tuple, res)[1]).value,
41
52
  )
42
- for res in list(self.graph.query(query_statement))
53
+ for res in list(self.graph(named_graph=named_graph).query(query_statement))
43
54
  ]
44
55
 
45
- @property
46
- def types(self) -> dict[URIRef, str]:
56
+ def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
47
57
  """Types and their short form in the graph"""
48
58
  query = """SELECT DISTINCT ?type
49
59
  WHERE {?s a ?type .}"""
50
- return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
51
60
 
52
- def type_uri(self, type_: str) -> list[URIRef]:
61
+ return { # type: ignore[misc, index, arg-type]
62
+ cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
63
+ for (type_,) in list(self.graph(named_graph).query(query))
64
+ }
65
+
66
+ def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
53
67
  """Get the URIRef of a type"""
54
- return [k for k, v in self.types.items() if v == type_]
68
+ return [k for k, v in self.types(named_graph).items() if v == type_]
69
+
70
+ def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
71
+ """Properties and their short form in the graph
55
72
 
56
- @property
57
- def properties(self) -> dict[URIRef, str]:
73
+ Args:
74
+ named_graph: Named graph to query over, default None (default graph)
75
+
76
+ """
58
77
  query = """SELECT DISTINCT ?property
59
78
  WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
60
- return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
79
+ return { # type: ignore[misc, index, arg-type]
80
+ cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
81
+ for (type_,) in list(self.graph(named_graph).query(query))
82
+ }
61
83
 
62
- def property_uri(self, property_: str) -> list[URIRef]:
63
- """Get the URIRef of a type"""
64
- return [k for k, v in self.properties.items() if v == property_]
84
+ def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
85
+ """Get the URIRef of a property
65
86
 
66
- def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
87
+ Args:
88
+ property_: Property to find URIRef for
89
+ named_graph: Named graph to query over, default None (default graph)
90
+ """
91
+ return [k for k, v in self.properties(named_graph).items() if v == property_]
92
+
93
+ def list_instances_ids_of_class(
94
+ self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
95
+ ) -> list[URIRef]:
67
96
  """Get instances ids for a given class
68
97
 
69
98
  Args:
70
99
  class_uri: Class for which instances are to be found
71
100
  limit: Max number of instances to return, by default -1 meaning all instances
101
+ named_graph: Named graph to query over, default None (default graph)
72
102
 
73
103
  Returns:
74
104
  List of class instance URIs
@@ -76,16 +106,17 @@ class Queries:
76
106
  query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
77
107
  "class", class_uri
78
108
  ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
79
- return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
109
+ return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
80
110
 
81
- def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
111
+ def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
82
112
  """Get all triples for instances of a given class
83
113
 
84
114
  Args:
85
115
  class_uri: Class for which instances are to be found
116
+ named_graph: Named graph to query over, default None (default graph)
86
117
 
87
118
  Returns:
88
- List of triples for instances of the given class
119
+ List of triples for instances of the given class in the named graph
89
120
  """
90
121
  query = (
91
122
  f"SELECT ?instance ?prop ?value "
@@ -93,17 +124,22 @@ class Queries:
93
124
  )
94
125
 
95
126
  # Select queries gives an iterable of result rows
96
- return cast(list[ResultRow], list(self.graph.query(query)))
127
+ return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
97
128
 
98
- def triples_of_type_instances(self, rdf_type: str | URIRef) -> list[tuple[str, str, str]]:
129
+ def triples_of_type_instances(
130
+ self, rdf_type: str | URIRef, named_graph: URIRef | None = None
131
+ ) -> list[tuple[str, str, str]]:
99
132
  """Get all triples of a given type.
100
133
 
101
- This method assumes the graph has been transformed into the default namespace.
134
+ Args:
135
+ rdf_type: Type URI to query
136
+ named_graph: Named graph to query over, default None (default graph)
102
137
  """
138
+ named_graph = named_graph or self.default_named_graph
103
139
  if isinstance(rdf_type, URIRef):
104
140
  rdf_uri = rdf_type
105
- elif isinstance(rdf_type, str) and self.rules:
106
- rdf_uri = self.rules.metadata.namespace[rdf_type]
141
+ elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
142
+ rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
107
143
  else:
108
144
  warnings.warn(
109
145
  "Unknown namespace. Please either provide a URIRef or set the rules of the store.",
@@ -117,46 +153,50 @@ class Queries:
117
153
  "order by ?instance"
118
154
  )
119
155
 
120
- result = self.graph.query(query)
156
+ result = self.graph(named_graph).query(query)
121
157
 
122
158
  # We cannot include the RDF.type in case there is a neat:type property
123
159
  return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
124
160
 
125
- def type_with_property(self, type_: URIRef, property_uri: URIRef) -> bool:
161
+ def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
126
162
  """Check if a property exists in the graph store
127
163
 
128
164
  Args:
165
+ type_: Type URI to check
129
166
  property_uri: Property URI to check
167
+ named_graph: Named graph to query over, default None (default graph)
130
168
 
131
169
  Returns:
132
170
  True if property exists, False otherwise
133
171
  """
134
172
  query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
135
- return bool(list(self.graph.query(query)))
173
+ return bool(list(self.graph(named_graph).query(query)))
136
174
 
137
- def has_namespace(self, namespace: Namespace) -> bool:
175
+ def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
138
176
  """Check if a namespace exists in the graph store
139
177
 
140
178
  Args:
141
179
  namespace: Namespace to check
180
+ named_graph: Named graph to query over, default None (default graph)
142
181
 
143
182
  Returns:
144
183
  True if namespace exists, False otherwise
145
184
  """
146
185
  query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
147
- return bool(self.graph.query(query))
186
+ return bool(self.graph(named_graph).query(query))
148
187
 
149
- def has_type(self, type_: URIRef) -> bool:
188
+ def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
150
189
  """Check if a type exists in the graph store
151
190
 
152
191
  Args:
153
192
  type_: Type to check
193
+ named_graph: Named graph to query over, default None (default graph)
154
194
 
155
195
  Returns:
156
196
  True if type exists, False otherwise
157
197
  """
158
198
  query = f"ASK WHERE {{ ?s a <{type_}> }}"
159
- return bool(self.graph.query(query))
199
+ return bool(self.graph(named_graph).query(query))
160
200
 
161
201
  def describe(
162
202
  self,
@@ -164,6 +204,7 @@ class Queries:
164
204
  instance_type: str | None = None,
165
205
  property_renaming_config: dict | None = None,
166
206
  property_types: dict[str, EntityTypes] | None = None,
207
+ named_graph: URIRef | None = None,
167
208
  ) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
168
209
  """DESCRIBE instance for a given class from the graph store
169
210
 
@@ -172,6 +213,7 @@ class Queries:
172
213
  instance_type: Type of the instance, default None (will be inferred from triples)
173
214
  property_renaming_config: Dictionary to rename properties, default None (no renaming)
174
215
  property_types: Dictionary of property types, default None (helper for removal of namespace)
216
+ named_graph: Named graph to query over, default None (default graph)
175
217
 
176
218
 
177
219
  Returns:
@@ -179,7 +221,7 @@ class Queries:
179
221
  """
180
222
  property_values: dict[str, list[str]] = defaultdict(list)
181
223
  identifier = remove_namespace_from_uri(instance_id, validation="prefix")
182
- for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
224
+ for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
183
225
  if object_.lower() in [
184
226
  "",
185
227
  "none",
@@ -190,45 +232,28 @@ class Queries:
190
232
 
191
233
  # set property
192
234
  if property_renaming_config and predicate != RDF.type:
193
- property_ = property_renaming_config.get(
194
- predicate, remove_namespace_from_uri(predicate, validation="prefix")
195
- )
235
+ property_ = remove_namespace_from_uri(predicate, validation="prefix")
236
+ renamed_property_ = property_renaming_config.get(predicate, property_)
237
+
196
238
  elif not property_renaming_config and predicate != RDF.type:
197
239
  property_ = remove_namespace_from_uri(predicate, validation="prefix")
240
+ renamed_property_ = property_
241
+
198
242
  else:
199
243
  property_ = RDF.type
244
+ renamed_property_ = property_
200
245
 
201
- # set value
202
- # if it is URIRef and property type is object property, we need to remove namespace
203
- # if it URIref but we are doing this into data type property, we do not remove namespace
204
- # case 1 for RDF type we remove namespace
205
- if property_ == RDF.type:
206
- value = remove_namespace_from_uri(object_, validation="prefix")
207
-
208
- # case 2 for define object properties we remove namespace
209
- elif (
210
- isinstance(object_, URIRef)
211
- and property_types
212
- and (
213
- property_types.get(property_, None) == EntityTypes.object_property
214
- or property_types.get(property_, None) == EntityTypes.undefined
215
- )
216
- ):
246
+ if isinstance(object_, URIRef):
217
247
  value = remove_namespace_from_uri(object_, validation="prefix")
218
-
219
- # case 3 when property type is not defined and returned value is URIRef we remove namespace
220
- elif isinstance(object_, URIRef) and not property_types:
221
- value = remove_namespace_from_uri(object_, validation="prefix")
222
-
223
- # case 4 for data type properties we do not remove namespace but keep the entire value
224
- # but we drop the datatype part, and keep everything to be string (data loader will do the conversion)
225
- # for value type it expects (if possible)
248
+ elif isinstance(object_, RdfLiteral):
249
+ value = object_.toPython()
226
250
  else:
251
+ # It is a blank node
227
252
  value = str(object_)
228
253
 
229
254
  # add type to the dictionary
230
255
  if predicate != RDF.type:
231
- property_values[property_].append(value)
256
+ property_values[renamed_property_].append(value)
232
257
  else:
233
258
  # guarding against multiple rdf:type values as this is not allowed in CDF
234
259
  if RDF.type not in property_values:
@@ -249,6 +274,7 @@ class Queries:
249
274
  class_: str,
250
275
  properties_optional: bool = True,
251
276
  instance_id: URIRef | None = None,
277
+ named_graph: URIRef | None = None,
252
278
  ) -> list[tuple[str, str, str]]:
253
279
  """CONSTRUCT instances for a given class from the graph store
254
280
 
@@ -256,21 +282,29 @@ class Queries:
256
282
  class_: Class entity for which we want to generate query
257
283
  properties_optional: Whether to make all properties optional, default True
258
284
  instance_ids: List of instance ids to filter on, default None (all)
285
+ named_graph: Named graph to query over, default None (default graph
259
286
 
260
287
  Returns:
261
288
  List of triples for instances of the given class
262
289
  """
263
-
264
- if self.rules and (
265
- query := build_construct_query(
266
- class_=ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_),
267
- graph=self.graph,
268
- rules=self.rules,
269
- properties_optional=properties_optional,
270
- instance_id=instance_id,
290
+ named_graph = named_graph or self.default_named_graph
291
+ if (
292
+ self.rules
293
+ and self.rules.get(named_graph)
294
+ and (
295
+ query := build_construct_query(
296
+ class_=ClassEntity(
297
+ prefix=self.rules[named_graph].metadata.prefix,
298
+ suffix=class_,
299
+ ),
300
+ graph=self.graph(named_graph),
301
+ rules=self.rules[named_graph],
302
+ properties_optional=properties_optional,
303
+ instance_id=instance_id,
304
+ )
271
305
  )
272
306
  ):
273
- result = self.graph.query(query)
307
+ result = self.graph(named_graph).query(query)
274
308
 
275
309
  # We cannot include the RDF.type in case there is a neat:type property
276
310
  return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
@@ -281,25 +315,36 @@ class Queries:
281
315
  )
282
316
  return []
283
317
 
284
- def list_triples(self, limit: int = 25) -> list[ResultRow]:
318
+ def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
285
319
  """List triples in the graph store
286
320
 
287
321
  Args:
288
322
  limit: Max number of triples to return, by default 25
323
+ named_graph: Named graph to query over, default None (default graph)
289
324
 
290
325
  Returns:
291
326
  List of triples
292
327
  """
293
328
  query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
294
- return cast(list[ResultRow], list(self.graph.query(query)))
329
+ return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
295
330
 
296
331
  @overload
297
332
  def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
298
333
 
299
334
  @overload
300
- def list_types(self, remove_namespace: Literal[True], limit: int = 25) -> list[str]: ...
335
+ def list_types(
336
+ self,
337
+ remove_namespace: Literal[True],
338
+ limit: int = 25,
339
+ named_graph: URIRef | None = None,
340
+ ) -> list[str]: ...
301
341
 
302
- def list_types(self, remove_namespace: bool = False, limit: int = 25) -> list[ResultRow] | list[str]:
342
+ def list_types(
343
+ self,
344
+ remove_namespace: bool = False,
345
+ limit: int = 25,
346
+ named_graph: URIRef | None = None,
347
+ ) -> list[ResultRow] | list[str]:
303
348
  """List types in the graph store
304
349
 
305
350
  Args:
@@ -310,13 +355,14 @@ class Queries:
310
355
  List of types
311
356
  """
312
357
  query = f"SELECT DISTINCT ?type WHERE {{ ?subject a ?type }} LIMIT {limit}"
313
- result = cast(list[ResultRow], list(self.graph.query(query)))
358
+ result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
314
359
  if remove_namespace:
315
360
  return [remove_namespace_from_uri(res[0]) for res in result]
316
361
  return result
317
362
 
318
363
  def multi_value_type_property(
319
364
  self,
365
+ named_graph: URIRef | None = None,
320
366
  ) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
321
367
  query = """SELECT ?sourceType ?property
322
368
  (GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
@@ -341,15 +387,20 @@ class Queries:
341
387
  value_types,
342
388
  ) in cast(
343
389
  ResultRow,
344
- self.graph.query(query.format(unknownType=str(UNKNOWN_TYPE))),
390
+ self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
345
391
  ):
346
392
  yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
347
393
 
348
- def drop_types(self, type_: list[URIRef]) -> dict[URIRef, int]:
394
+ def drop_types(
395
+ self,
396
+ type_: list[URIRef],
397
+ named_graph: URIRef | None = None,
398
+ ) -> dict[URIRef, int]:
349
399
  """Drop types from the graph store
350
400
 
351
401
  Args:
352
402
  type_: List of types to drop
403
+ named_graph: Named graph to query over, default None (default graph
353
404
 
354
405
  Returns:
355
406
  Dictionary of dropped types
@@ -358,11 +409,16 @@ class Queries:
358
409
  for t in type_:
359
410
  instance_ids = self.list_instances_ids_of_class(t)
360
411
  dropped_types[t] = len(instance_ids)
361
- remove_instance_ids_in_batch(self.graph, instance_ids)
412
+ remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
362
413
  return dropped_types
363
414
 
364
- def multi_type_instances(self) -> dict[str, list[str]]:
365
- """Find instances with multiple types"""
415
+ def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
416
+ """Find instances with multiple types
417
+
418
+ Args:
419
+ named_graph: Named graph to query over, default None (default graph)
420
+
421
+ """
366
422
 
367
423
  query = """
368
424
  SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
@@ -374,7 +430,7 @@ class Queries:
374
430
  """
375
431
 
376
432
  result = {}
377
- for instance, types in self.graph.query(query): # type: ignore
433
+ for instance, types in self.graph(named_graph).query(query): # type: ignore
378
434
  result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
379
435
 
380
436
  return result
@@ -106,7 +106,7 @@ def to_construct_triples(
106
106
  non_inherited_starting_rdf_types = []
107
107
 
108
108
  for transformation in transformations:
109
- traversal = cast(RDFPath, transformation.transformation).traversal
109
+ traversal = cast(RDFPath, transformation.instance_source).traversal
110
110
 
111
111
  # keeping track of starting rdf types of non-inherited transformations/properties
112
112
  if isinstance(traversal, Traversal) and not transformation.inherited:
@@ -115,7 +115,7 @@ def to_construct_triples(
115
115
  graph_template_triple = Triple(
116
116
  subject="?instance",
117
117
  predicate=f"{transformation.class_.prefix}:{transformation.property_}",
118
- object=f'?{re.sub(r"[^_a-zA-Z0-9/_]", "_", str(transformation.property_).lower())}',
118
+ object=f"?{re.sub(r'[^_a-zA-Z0-9/_]', '_', str(transformation.property_).lower())}",
119
119
  optional=False,
120
120
  )
121
121
  templates.append(graph_template_triple)
@@ -1,3 +1,4 @@
1
+ from ._base import BaseTransformerStandardised
1
2
  from ._classic_cdf import (
2
3
  AddAssetDepth,
3
4
  AssetEventConnector,
@@ -5,6 +6,7 @@ from ._classic_cdf import (
5
6
  AssetRelationshipConnector,
6
7
  AssetSequenceConnector,
7
8
  AssetTimeSeriesConnector,
9
+ LookupRelationshipSourceTarget,
8
10
  RelationshipAsEdgeTransformer,
9
11
  )
10
12
  from ._prune_graph import (
@@ -15,7 +17,7 @@ from ._prune_graph import (
15
17
  PruneTypes,
16
18
  )
17
19
  from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
18
- from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
20
+ from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetNeatType, SplitMultiValueProperty
19
21
 
20
22
  __all__ = [
21
23
  "AddAssetDepth",
@@ -29,12 +31,14 @@ __all__ = [
29
31
  "ConnectionToLiteral",
30
32
  "ConvertLiteral",
31
33
  "LiteralToEntity",
34
+ "LookupRelationshipSourceTarget",
32
35
  "MakeConnectionOnExactMatch",
33
36
  "PruneDanglingNodes",
34
37
  "PruneDeadEndEdges",
35
38
  "PruneInstancesOfUnknownType",
36
39
  "PruneTypes",
37
40
  "RelationshipAsEdgeTransformer",
41
+ "SetNeatType",
38
42
  "SplitMultiValueProperty",
39
43
  ]
40
44
 
@@ -57,4 +61,7 @@ Transformers = (
57
61
  | ConvertLiteral
58
62
  | LiteralToEntity
59
63
  | ConnectionToLiteral
64
+ | BaseTransformerStandardised
65
+ | LookupRelationshipSourceTarget
66
+ | SetNeatType
60
67
  )
@@ -1,6 +1,7 @@
1
1
  import dataclasses
2
2
  import warnings
3
3
  from abc import ABC, abstractmethod
4
+ from collections.abc import Iterator
4
5
  from typing import ClassVar, TypeAlias, cast
5
6
 
6
7
  from rdflib import Graph
@@ -65,9 +66,16 @@ class BaseTransformerStandardised(ABC):
65
66
  The query to use for extracting target triples from the graph and performing the transformation.
66
67
  Returns:
67
68
  A query string.
69
+
70
+ !!! note "Complex Queries"
71
+ In majority of cases the query should be a simple SELECT query. However, in case
72
+ when there is a need to have one or more sub iterators, one can overwrite the ._iterator() method
68
73
  """
69
74
  raise NotImplementedError()
70
75
 
76
+ def _iterator(self, graph: Graph) -> Iterator:
77
+ yield from graph.query(self._iterate_query())
78
+
71
79
  def _skip_count_query(self) -> str:
72
80
  """
73
81
  The query to use for extracting target triples from the graph and performing the transformation.
@@ -97,7 +105,7 @@ class BaseTransformerStandardised(ABC):
97
105
  if iteration_count == 0:
98
106
  return outcome
99
107
 
100
- result_iterable = graph.query(self._iterate_query())
108
+ result_iterable = self._iterator(graph)
101
109
  result_iterable = iterate_progress_bar_if_above_config_threshold(
102
110
  result_iterable, iteration_count, self.description
103
111
  )
@@ -1,6 +1,7 @@
1
+ import urllib.parse
1
2
  import warnings
2
3
  from abc import ABC
3
- from collections.abc import Callable, Iterable
4
+ from collections.abc import Callable, Iterable, Iterator
4
5
  from functools import lru_cache
5
6
  from typing import cast
6
7
 
@@ -9,6 +10,7 @@ from rdflib.query import ResultRow
9
10
 
10
11
  from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
11
12
  from cognite.neat._graph import extractors
13
+ from cognite.neat._issues.errors import NeatValueError
12
14
  from cognite.neat._issues.warnings import ResourceNotFoundWarning
13
15
  from cognite.neat._utils.collection_ import iterate_progress_bar
14
16
  from cognite.neat._utils.rdf_ import (
@@ -229,7 +231,6 @@ class AssetEventConnector(BaseAssetConnector):
229
231
  )
230
232
 
231
233
 
232
- # TODO: standardise
233
234
  class AssetRelationshipConnector(BaseTransformerStandardised):
234
235
  description: str = "Connects assets via relationships"
235
236
  _use_only_once: bool = True
@@ -465,7 +466,7 @@ WHERE {{
465
466
  ResourceNotFoundWarning(target_source_id, "class", str(relationship_id), "class"), stacklevel=2
466
467
  )
467
468
  return []
468
- edge_id = str(object_by_predicates["externalId"])
469
+ edge_id = urllib.parse.quote(str(object_by_predicates["externalId"]))
469
470
  # If there is properties on the relationship, we create a new intermediate node
470
471
  edge_type = self._namespace[f"{source_type}To{target_type}Edge"]
471
472
  return self._create_edge(
@@ -516,3 +517,89 @@ WHERE {{
516
517
 
517
518
  def _predicate(self, target_type: str) -> URIRef:
518
519
  return self._namespace[f"relationship{target_type.capitalize()}"]
520
+
521
+
522
+ class LookupRelationshipSourceTarget(BaseTransformerStandardised):
523
+ """When relationships are extracted, the source and target are extracted as literals. This transformers
524
+ lookup the externalID of the source and target and replaces the literals with the URIRef of the entity.
525
+ """
526
+
527
+ description = "Lookup relationships source and target externalId"
528
+ _use_only_once: bool = True
529
+ _need_changes = frozenset({extractors.RelationshipsExtractor.__name__})
530
+
531
+ _lookup_entity_query = """SELECT ?entity
532
+ WHERE {{
533
+ ?entity a <{entity_type}> .
534
+ ?entity <{namespace}externalId> "{external_id}" .
535
+ }}"""
536
+
537
+ def __init__(self, namespace: Namespace = CLASSIC_CDF_NAMESPACE, type_prefix: str | None = None) -> None:
538
+ self._namespace = namespace
539
+ self._type_prefix = type_prefix
540
+ self._lookup_entity: Callable[[URIRef, str], URIRef] | None = None
541
+
542
+ def _count_query(self) -> str:
543
+ return f"""SELECT (COUNT(?instance) AS ?instanceCount)
544
+ WHERE {{
545
+ ?instance a <{self._namespace}ClassicRelationship> .
546
+ }}"""
547
+
548
+ def _iterate_query(self) -> str:
549
+ return f"""SELECT ?instance ?source ?sourceType ?target ?targetType
550
+ WHERE {{
551
+ ?instance a <{self._namespace}ClassicRelationship> .
552
+ ?instance <{self._namespace}sourceExternalId> ?source .
553
+ ?instance <{self._namespace}targetExternalId> ?target .
554
+ ?instance <{self._namespace}sourceType> ?sourceType .
555
+ ?instance <{self._namespace}targetType> ?targetType
556
+ }}"""
557
+
558
+ def _iterator(self, graph: Graph) -> Iterator:
559
+ self._lookup_entity = self.create_lookup_entity_with_external_id(graph, self._namespace, self._type_prefix)
560
+ yield from graph.query(self._iterate_query())
561
+
562
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
563
+ output = RowTransformationOutput()
564
+ instance, source, source_type, target, target_type = cast(
565
+ tuple[URIRef, Literal, URIRef, Literal, URIRef], query_result_row
566
+ )
567
+ if self._lookup_entity is None:
568
+ raise NeatValueError(f"{type(self)}: .operation() called before .transform()")
569
+ try:
570
+ source_id = self._lookup_entity(source_type, source.toPython())
571
+ except ValueError:
572
+ warnings.warn(ResourceNotFoundWarning(source, "class", str(instance), "class"), stacklevel=2)
573
+ return output
574
+
575
+ try:
576
+ target_id = self._lookup_entity(target_type, target.toPython())
577
+ except ValueError:
578
+ warnings.warn(ResourceNotFoundWarning(target, "class", str(instance), "class"), stacklevel=2)
579
+ return output
580
+
581
+ output.remove_triples.append((instance, self._namespace.sourceExternalId, source))
582
+ output.remove_triples.append((instance, self._namespace.targetExternalId, target))
583
+ output.add_triples.append((instance, self._namespace.sourceExternalId, source_id))
584
+ output.add_triples.append((instance, self._namespace.targetExternalId, target_id))
585
+ output.instances_modified_count += 1
586
+ return output
587
+
588
+ @staticmethod
589
+ def create_lookup_entity_with_external_id(
590
+ graph: Graph, namespace: Namespace, type_prefix: str | None
591
+ ) -> Callable[[URIRef, str], URIRef]:
592
+ @lru_cache(maxsize=10_000)
593
+ def lookup_entity_with_external_id(entity_type: URIRef, external_id: str) -> URIRef:
594
+ if type_prefix:
595
+ entity_type = namespace[type_prefix + remove_namespace_from_uri(entity_type)]
596
+
597
+ query = LookupRelationshipSourceTarget._lookup_entity_query.format(
598
+ namespace=namespace, entity_type=entity_type, external_id=external_id
599
+ )
600
+ result = list(graph.query(query))
601
+ if len(result) == 1:
602
+ return cast(URIRef, result[0][0]) # type: ignore[index]
603
+ raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
604
+
605
+ return lookup_entity_with_external_id