cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (69) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +144 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_base.py +4 -4
  17. cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
  18. cognite/neat/_graph/transformers/_prune_graph.py +3 -3
  19. cognite/neat/_graph/transformers/_rdfpath.py +3 -4
  20. cognite/neat/_graph/transformers/_value_type.py +71 -13
  21. cognite/neat/_issues/errors/__init__.py +2 -0
  22. cognite/neat/_issues/errors/_external.py +8 -0
  23. cognite/neat/_issues/errors/_resources.py +1 -1
  24. cognite/neat/_issues/warnings/__init__.py +0 -2
  25. cognite/neat/_issues/warnings/_models.py +1 -1
  26. cognite/neat/_issues/warnings/_properties.py +0 -8
  27. cognite/neat/_issues/warnings/_resources.py +1 -1
  28. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  29. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  30. cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
  31. cognite/neat/_rules/importers/__init__.py +3 -1
  32. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  33. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  34. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  35. cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
  36. cognite/neat/_rules/models/_base_rules.py +22 -11
  37. cognite/neat/_rules/models/dms/_exporter.py +5 -4
  38. cognite/neat/_rules/models/dms/_rules.py +1 -8
  39. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  40. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  41. cognite/neat/_rules/transformers/__init__.py +10 -3
  42. cognite/neat/_rules/transformers/_base.py +6 -1
  43. cognite/neat/_rules/transformers/_converters.py +530 -364
  44. cognite/neat/_rules/transformers/_mapping.py +4 -4
  45. cognite/neat/_session/_base.py +100 -47
  46. cognite/neat/_session/_create.py +133 -0
  47. cognite/neat/_session/_drop.py +60 -2
  48. cognite/neat/_session/_fix.py +28 -0
  49. cognite/neat/_session/_inspect.py +22 -7
  50. cognite/neat/_session/_mapping.py +8 -8
  51. cognite/neat/_session/_prepare.py +3 -247
  52. cognite/neat/_session/_read.py +138 -17
  53. cognite/neat/_session/_set.py +50 -1
  54. cognite/neat/_session/_show.py +16 -43
  55. cognite/neat/_session/_state.py +53 -52
  56. cognite/neat/_session/_to.py +11 -4
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -1
  59. cognite/neat/_store/_graph_store.py +301 -146
  60. cognite/neat/_store/_provenance.py +36 -20
  61. cognite/neat/_store/_rules_store.py +253 -267
  62. cognite/neat/_store/exceptions.py +40 -4
  63. cognite/neat/_utils/auth.py +5 -3
  64. cognite/neat/_version.py +1 -1
  65. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
  66. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
  67. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
  68. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
  69. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
@@ -3,11 +3,12 @@ from collections import defaultdict
3
3
  from collections.abc import Iterable
4
4
  from typing import Literal, cast, overload
5
5
 
6
- from rdflib import RDF, Graph, Namespace, URIRef
6
+ from rdflib import RDF, Dataset, Graph, Namespace, URIRef
7
7
  from rdflib import Literal as RdfLiteral
8
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
8
9
  from rdflib.query import ResultRow
9
10
 
10
- from cognite.neat._constants import UNKNOWN_TYPE
11
+ from cognite.neat._constants import NEAT
11
12
  from cognite.neat._rules._constants import EntityTypes
12
13
  from cognite.neat._rules.models.entities import ClassEntity
13
14
  from cognite.neat._rules.models.information import InformationRules
@@ -20,11 +21,21 @@ from ._construct import build_construct_query
20
21
  class Queries:
21
22
  """Helper class for storing standard queries for the graph store."""
22
23
 
23
- def __init__(self, graph: Graph, rules: InformationRules | None = None):
24
- self.graph = graph
25
- self.rules = rules
26
-
27
- def summarize_instances(self) -> list[tuple]:
24
+ def __init__(
25
+ self,
26
+ dataset: Dataset,
27
+ rules: dict[URIRef, InformationRules] | None = None,
28
+ default_named_graph: URIRef | None = None,
29
+ ):
30
+ self.dataset = dataset
31
+ self.rules = rules or {}
32
+ self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
33
+
34
+ def graph(self, named_graph: URIRef | None = None) -> Graph:
35
+ """Get named graph from the dataset to query over"""
36
+ return self.dataset.graph(named_graph or self.default_named_graph)
37
+
38
+ def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
28
39
  """Summarize instances in the graph store by class and count"""
29
40
 
30
41
  query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
@@ -39,36 +50,55 @@ class Queries:
39
50
  remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
40
51
  cast(RdfLiteral, cast(tuple, res)[1]).value,
41
52
  )
42
- for res in list(self.graph.query(query_statement))
53
+ for res in list(self.graph(named_graph=named_graph).query(query_statement))
43
54
  ]
44
55
 
45
- @property
46
- def types(self) -> dict[URIRef, str]:
56
+ def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
47
57
  """Types and their short form in the graph"""
48
58
  query = """SELECT DISTINCT ?type
49
59
  WHERE {?s a ?type .}"""
50
- return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
51
60
 
52
- def type_uri(self, type_: str) -> list[URIRef]:
61
+ return { # type: ignore[misc, index, arg-type]
62
+ cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
63
+ for (type_,) in list(self.graph(named_graph).query(query))
64
+ }
65
+
66
+ def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
53
67
  """Get the URIRef of a type"""
54
- return [k for k, v in self.types.items() if v == type_]
68
+ return [k for k, v in self.types(named_graph).items() if v == type_]
69
+
70
+ def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
71
+ """Properties and their short form in the graph
55
72
 
56
- @property
57
- def properties(self) -> dict[URIRef, str]:
73
+ Args:
74
+ named_graph: Named graph to query over, default None (default graph)
75
+
76
+ """
58
77
  query = """SELECT DISTINCT ?property
59
78
  WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
60
- return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
79
+ return { # type: ignore[misc, index, arg-type]
80
+ cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
81
+ for (type_,) in list(self.graph(named_graph).query(query))
82
+ }
61
83
 
62
- def property_uri(self, property_: str) -> list[URIRef]:
63
- """Get the URIRef of a type"""
64
- return [k for k, v in self.properties.items() if v == property_]
84
+ def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
85
+ """Get the URIRef of a property
65
86
 
66
- def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
87
+ Args:
88
+ property_: Property to find URIRef for
89
+ named_graph: Named graph to query over, default None (default graph)
90
+ """
91
+ return [k for k, v in self.properties(named_graph).items() if v == property_]
92
+
93
+ def list_instances_ids_of_class(
94
+ self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
95
+ ) -> list[URIRef]:
67
96
  """Get instances ids for a given class
68
97
 
69
98
  Args:
70
99
  class_uri: Class for which instances are to be found
71
100
  limit: Max number of instances to return, by default -1 meaning all instances
101
+ named_graph: Named graph to query over, default None (default graph)
72
102
 
73
103
  Returns:
74
104
  List of class instance URIs
@@ -76,16 +106,17 @@ class Queries:
76
106
  query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
77
107
  "class", class_uri
78
108
  ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
79
- return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
109
+ return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
80
110
 
81
- def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
111
+ def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
82
112
  """Get all triples for instances of a given class
83
113
 
84
114
  Args:
85
115
  class_uri: Class for which instances are to be found
116
+ named_graph: Named graph to query over, default None (default graph)
86
117
 
87
118
  Returns:
88
- List of triples for instances of the given class
119
+ List of triples for instances of the given class in the named graph
89
120
  """
90
121
  query = (
91
122
  f"SELECT ?instance ?prop ?value "
@@ -93,17 +124,22 @@ class Queries:
93
124
  )
94
125
 
95
126
  # Select queries gives an iterable of result rows
96
- return cast(list[ResultRow], list(self.graph.query(query)))
127
+ return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
97
128
 
98
- def triples_of_type_instances(self, rdf_type: str | URIRef) -> list[tuple[str, str, str]]:
129
+ def triples_of_type_instances(
130
+ self, rdf_type: str | URIRef, named_graph: URIRef | None = None
131
+ ) -> list[tuple[str, str, str]]:
99
132
  """Get all triples of a given type.
100
133
 
101
- This method assumes the graph has been transformed into the default namespace.
134
+ Args:
135
+ rdf_type: Type URI to query
136
+ named_graph: Named graph to query over, default None (default graph)
102
137
  """
138
+ named_graph = named_graph or self.default_named_graph
103
139
  if isinstance(rdf_type, URIRef):
104
140
  rdf_uri = rdf_type
105
- elif isinstance(rdf_type, str) and self.rules:
106
- rdf_uri = self.rules.metadata.namespace[rdf_type]
141
+ elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
142
+ rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
107
143
  else:
108
144
  warnings.warn(
109
145
  "Unknown namespace. Please either provide a URIRef or set the rules of the store.",
@@ -117,46 +153,54 @@ class Queries:
117
153
  "order by ?instance"
118
154
  )
119
155
 
120
- result = self.graph.query(query)
156
+ result = self.graph(named_graph).query(query)
121
157
 
122
158
  # We cannot include the RDF.type in case there is a neat:type property
123
159
  return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
124
160
 
125
- def type_with_property(self, type_: URIRef, property_uri: URIRef) -> bool:
161
+ def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
126
162
  """Check if a property exists in the graph store
127
163
 
128
164
  Args:
165
+ type_: Type URI to check
129
166
  property_uri: Property URI to check
167
+ named_graph: Named graph to query over, default None (default graph)
130
168
 
131
169
  Returns:
132
170
  True if property exists, False otherwise
133
171
  """
134
172
  query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
135
- return bool(list(self.graph.query(query)))
173
+ return bool(list(self.graph(named_graph).query(query)))
136
174
 
137
- def has_namespace(self, namespace: Namespace) -> bool:
175
+ def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
138
176
  """Check if a namespace exists in the graph store
139
177
 
140
178
  Args:
141
179
  namespace: Namespace to check
180
+ named_graph: Named graph to query over, default None (default graph)
142
181
 
143
182
  Returns:
144
183
  True if namespace exists, False otherwise
145
184
  """
146
185
  query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
147
- return bool(self.graph.query(query))
186
+ return bool(self.graph(named_graph).query(query))
148
187
 
149
- def has_type(self, type_: URIRef) -> bool:
188
+ def has_data(self) -> bool:
189
+ """Check if the graph store has data"""
190
+ return cast(bool, next(iter(self.dataset.query("ASK WHERE { ?s ?p ?o }"))))
191
+
192
+ def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
150
193
  """Check if a type exists in the graph store
151
194
 
152
195
  Args:
153
196
  type_: Type to check
197
+ named_graph: Named graph to query over, default None (default graph)
154
198
 
155
199
  Returns:
156
200
  True if type exists, False otherwise
157
201
  """
158
202
  query = f"ASK WHERE {{ ?s a <{type_}> }}"
159
- return bool(self.graph.query(query))
203
+ return bool(self.graph(named_graph).query(query))
160
204
 
161
205
  def describe(
162
206
  self,
@@ -164,6 +208,7 @@ class Queries:
164
208
  instance_type: str | None = None,
165
209
  property_renaming_config: dict | None = None,
166
210
  property_types: dict[str, EntityTypes] | None = None,
211
+ named_graph: URIRef | None = None,
167
212
  ) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
168
213
  """DESCRIBE instance for a given class from the graph store
169
214
 
@@ -172,6 +217,7 @@ class Queries:
172
217
  instance_type: Type of the instance, default None (will be inferred from triples)
173
218
  property_renaming_config: Dictionary to rename properties, default None (no renaming)
174
219
  property_types: Dictionary of property types, default None (helper for removal of namespace)
220
+ named_graph: Named graph to query over, default None (default graph)
175
221
 
176
222
 
177
223
  Returns:
@@ -179,7 +225,7 @@ class Queries:
179
225
  """
180
226
  property_values: dict[str, list[str]] = defaultdict(list)
181
227
  identifier = remove_namespace_from_uri(instance_id, validation="prefix")
182
- for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
228
+ for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
183
229
  if object_.lower() in [
184
230
  "",
185
231
  "none",
@@ -190,45 +236,28 @@ class Queries:
190
236
 
191
237
  # set property
192
238
  if property_renaming_config and predicate != RDF.type:
193
- property_ = property_renaming_config.get(
194
- predicate, remove_namespace_from_uri(predicate, validation="prefix")
195
- )
239
+ property_ = remove_namespace_from_uri(predicate, validation="prefix")
240
+ renamed_property_ = property_renaming_config.get(predicate, property_)
241
+
196
242
  elif not property_renaming_config and predicate != RDF.type:
197
243
  property_ = remove_namespace_from_uri(predicate, validation="prefix")
244
+ renamed_property_ = property_
245
+
198
246
  else:
199
247
  property_ = RDF.type
248
+ renamed_property_ = property_
200
249
 
201
- # set value
202
- # if it is URIRef and property type is object property, we need to remove namespace
203
- # if it URIref but we are doing this into data type property, we do not remove namespace
204
- # case 1 for RDF type we remove namespace
205
- if property_ == RDF.type:
250
+ if isinstance(object_, URIRef):
206
251
  value = remove_namespace_from_uri(object_, validation="prefix")
207
-
208
- # case 2 for define object properties we remove namespace
209
- elif (
210
- isinstance(object_, URIRef)
211
- and property_types
212
- and (
213
- property_types.get(property_, None) == EntityTypes.object_property
214
- or property_types.get(property_, None) == EntityTypes.undefined
215
- )
216
- ):
217
- value = remove_namespace_from_uri(object_, validation="prefix")
218
-
219
- # case 3 when property type is not defined and returned value is URIRef we remove namespace
220
- elif isinstance(object_, URIRef) and not property_types:
221
- value = remove_namespace_from_uri(object_, validation="prefix")
222
-
223
- # case 4 for data type properties we do not remove namespace but keep the entire value
224
- # but we drop the datatype part, and keep everything to be string (data loader will do the conversion)
225
- # for value type it expects (if possible)
252
+ elif isinstance(object_, RdfLiteral):
253
+ value = object_.toPython()
226
254
  else:
255
+ # It is a blank node
227
256
  value = str(object_)
228
257
 
229
258
  # add type to the dictionary
230
259
  if predicate != RDF.type:
231
- property_values[property_].append(value)
260
+ property_values[renamed_property_].append(value)
232
261
  else:
233
262
  # guarding against multiple rdf:type values as this is not allowed in CDF
234
263
  if RDF.type not in property_values:
@@ -249,6 +278,7 @@ class Queries:
249
278
  class_: str,
250
279
  properties_optional: bool = True,
251
280
  instance_id: URIRef | None = None,
281
+ named_graph: URIRef | None = None,
252
282
  ) -> list[tuple[str, str, str]]:
253
283
  """CONSTRUCT instances for a given class from the graph store
254
284
 
@@ -256,21 +286,29 @@ class Queries:
256
286
  class_: Class entity for which we want to generate query
257
287
  properties_optional: Whether to make all properties optional, default True
258
288
  instance_ids: List of instance ids to filter on, default None (all)
289
+ named_graph: Named graph to query over, default None (default graph
259
290
 
260
291
  Returns:
261
292
  List of triples for instances of the given class
262
293
  """
263
-
264
- if self.rules and (
265
- query := build_construct_query(
266
- class_=ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_),
267
- graph=self.graph,
268
- rules=self.rules,
269
- properties_optional=properties_optional,
270
- instance_id=instance_id,
294
+ named_graph = named_graph or self.default_named_graph
295
+ if (
296
+ self.rules
297
+ and self.rules.get(named_graph)
298
+ and (
299
+ query := build_construct_query(
300
+ class_=ClassEntity(
301
+ prefix=self.rules[named_graph].metadata.prefix,
302
+ suffix=class_,
303
+ ),
304
+ graph=self.graph(named_graph),
305
+ rules=self.rules[named_graph],
306
+ properties_optional=properties_optional,
307
+ instance_id=instance_id,
308
+ )
271
309
  )
272
310
  ):
273
- result = self.graph.query(query)
311
+ result = self.graph(named_graph).query(query)
274
312
 
275
313
  # We cannot include the RDF.type in case there is a neat:type property
276
314
  return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
@@ -281,25 +319,36 @@ class Queries:
281
319
  )
282
320
  return []
283
321
 
284
- def list_triples(self, limit: int = 25) -> list[ResultRow]:
322
+ def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
285
323
  """List triples in the graph store
286
324
 
287
325
  Args:
288
326
  limit: Max number of triples to return, by default 25
327
+ named_graph: Named graph to query over, default None (default graph)
289
328
 
290
329
  Returns:
291
330
  List of triples
292
331
  """
293
332
  query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
294
- return cast(list[ResultRow], list(self.graph.query(query)))
333
+ return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
295
334
 
296
335
  @overload
297
336
  def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
298
337
 
299
338
  @overload
300
- def list_types(self, remove_namespace: Literal[True], limit: int = 25) -> list[str]: ...
339
+ def list_types(
340
+ self,
341
+ remove_namespace: Literal[True],
342
+ limit: int = 25,
343
+ named_graph: URIRef | None = None,
344
+ ) -> list[str]: ...
301
345
 
302
- def list_types(self, remove_namespace: bool = False, limit: int = 25) -> list[ResultRow] | list[str]:
346
+ def list_types(
347
+ self,
348
+ remove_namespace: bool = False,
349
+ limit: int = 25,
350
+ named_graph: URIRef | None = None,
351
+ ) -> list[ResultRow] | list[str]:
303
352
  """List types in the graph store
304
353
 
305
354
  Args:
@@ -310,13 +359,14 @@ class Queries:
310
359
  List of types
311
360
  """
312
361
  query = f"SELECT DISTINCT ?type WHERE {{ ?subject a ?type }} LIMIT {limit}"
313
- result = cast(list[ResultRow], list(self.graph.query(query)))
362
+ result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
314
363
  if remove_namespace:
315
364
  return [remove_namespace_from_uri(res[0]) for res in result]
316
365
  return result
317
366
 
318
367
  def multi_value_type_property(
319
368
  self,
369
+ named_graph: URIRef | None = None,
320
370
  ) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
321
371
  query = """SELECT ?sourceType ?property
322
372
  (GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
@@ -341,15 +391,20 @@ class Queries:
341
391
  value_types,
342
392
  ) in cast(
343
393
  ResultRow,
344
- self.graph.query(query.format(unknownType=str(UNKNOWN_TYPE))),
394
+ self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
345
395
  ):
346
396
  yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
347
397
 
348
- def drop_types(self, type_: list[URIRef]) -> dict[URIRef, int]:
398
+ def drop_types(
399
+ self,
400
+ type_: list[URIRef],
401
+ named_graph: URIRef | None = None,
402
+ ) -> dict[URIRef, int]:
349
403
  """Drop types from the graph store
350
404
 
351
405
  Args:
352
406
  type_: List of types to drop
407
+ named_graph: Named graph to query over, default None (default graph
353
408
 
354
409
  Returns:
355
410
  Dictionary of dropped types
@@ -358,11 +413,16 @@ class Queries:
358
413
  for t in type_:
359
414
  instance_ids = self.list_instances_ids_of_class(t)
360
415
  dropped_types[t] = len(instance_ids)
361
- remove_instance_ids_in_batch(self.graph, instance_ids)
416
+ remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
362
417
  return dropped_types
363
418
 
364
- def multi_type_instances(self) -> dict[str, list[str]]:
365
- """Find instances with multiple types"""
419
+ def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
420
+ """Find instances with multiple types
421
+
422
+ Args:
423
+ named_graph: Named graph to query over, default None (default graph)
424
+
425
+ """
366
426
 
367
427
  query = """
368
428
  SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
@@ -374,7 +434,7 @@ class Queries:
374
434
  """
375
435
 
376
436
  result = {}
377
- for instance, types in self.graph.query(query): # type: ignore
437
+ for instance, types in self.graph(named_graph).query(query): # type: ignore
378
438
  result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
379
439
 
380
440
  return result
@@ -115,7 +115,7 @@ def to_construct_triples(
115
115
  graph_template_triple = Triple(
116
116
  subject="?instance",
117
117
  predicate=f"{transformation.class_.prefix}:{transformation.property_}",
118
- object=f'?{re.sub(r"[^_a-zA-Z0-9/_]", "_", str(transformation.property_).lower())}',
118
+ object=f"?{re.sub(r'[^_a-zA-Z0-9/_]', '_', str(transformation.property_).lower())}",
119
119
  optional=False,
120
120
  )
121
121
  templates.append(graph_template_triple)
@@ -17,7 +17,7 @@ from ._prune_graph import (
17
17
  PruneTypes,
18
18
  )
19
19
  from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
20
- from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
20
+ from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetType, SplitMultiValueProperty
21
21
 
22
22
  __all__ = [
23
23
  "AddAssetDepth",
@@ -38,6 +38,7 @@ __all__ = [
38
38
  "PruneInstancesOfUnknownType",
39
39
  "PruneTypes",
40
40
  "RelationshipAsEdgeTransformer",
41
+ "SetType",
41
42
  "SplitMultiValueProperty",
42
43
  ]
43
44
 
@@ -62,4 +63,5 @@ Transformers = (
62
63
  | ConnectionToLiteral
63
64
  | BaseTransformerStandardised
64
65
  | LookupRelationshipSourceTarget
66
+ | SetType
65
67
  )
@@ -12,14 +12,14 @@ from cognite.neat._shared import Triple
12
12
  from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
13
13
  from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
14
14
 
15
- To_Add_Triples: TypeAlias = list[Triple]
16
- To_Remove_Triples: TypeAlias = list[Triple]
15
+ To_Add_Triples: TypeAlias = set[Triple]
16
+ To_Remove_Triples: TypeAlias = set[Triple]
17
17
 
18
18
 
19
19
  @dataclasses.dataclass
20
20
  class RowTransformationOutput:
21
- remove_triples: To_Remove_Triples = dataclasses.field(default_factory=list)
22
- add_triples: To_Add_Triples = dataclasses.field(default_factory=list)
21
+ remove_triples: To_Remove_Triples = dataclasses.field(default_factory=set)
22
+ add_triples: To_Add_Triples = dataclasses.field(default_factory=set)
23
23
  instances_removed_count: int = 0
24
24
  instances_added_count: int = 0
25
25
  instances_modified_count: int = 0
@@ -63,11 +63,11 @@ class AddAssetDepth(BaseTransformerStandardised):
63
63
  row_output = RowTransformationOutput()
64
64
  subject, object = query_result_row
65
65
 
66
- row_output.add_triples.append(cast(Triple, (subject, DEFAULT_NAMESPACE.depth, object)))
66
+ row_output.add_triples.add(cast(Triple, (subject, DEFAULT_NAMESPACE.depth, object)))
67
67
 
68
68
  if self.depth_typing and (type_ := self.depth_typing.get(int(object), None)):
69
- row_output.remove_triples.append(cast(Triple, (subject, RDF.type, self.asset_type)))
70
- row_output.add_triples.append(cast(Triple, (subject, RDF.type, DEFAULT_NAMESPACE[type_])))
69
+ row_output.remove_triples.add(cast(Triple, (subject, RDF.type, self.asset_type)))
70
+ row_output.add_triples.add(cast(Triple, (subject, RDF.type, DEFAULT_NAMESPACE[type_])))
71
71
 
72
72
  row_output.instances_modified_count += 1
73
73
 
@@ -128,7 +128,7 @@ class BaseAssetConnector(BaseTransformerStandardised, ABC):
128
128
  row_output = RowTransformationOutput()
129
129
  subject, object = query_result_row
130
130
 
131
- row_output.add_triples.append(cast(Triple, (subject, self.asset_to_resource_connection, object)))
131
+ row_output.add_triples.add(cast(Triple, (subject, self.asset_to_resource_connection, object)))
132
132
 
133
133
  row_output.instances_modified_count += 1
134
134
 
@@ -305,12 +305,12 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
305
305
  row_output = RowTransformationOutput()
306
306
  source, relationship, target = query_result_row
307
307
 
308
- row_output.add_triples.append(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
309
- row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
310
- row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
308
+ row_output.add_triples.add(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
309
+ row_output.add_triples.add(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
310
+ row_output.add_triples.add(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
311
311
 
312
- row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
313
- row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
312
+ row_output.remove_triples.add(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
313
+ row_output.remove_triples.add(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
314
314
 
315
315
  row_output.instances_modified_count += 2
316
316
 
@@ -578,10 +578,10 @@ WHERE {{
578
578
  warnings.warn(ResourceNotFoundWarning(target, "class", str(instance), "class"), stacklevel=2)
579
579
  return output
580
580
 
581
- output.remove_triples.append((instance, self._namespace.sourceExternalId, source))
582
- output.remove_triples.append((instance, self._namespace.targetExternalId, target))
583
- output.add_triples.append((instance, self._namespace.sourceExternalId, source_id))
584
- output.add_triples.append((instance, self._namespace.targetExternalId, target_id))
581
+ output.remove_triples.add((instance, self._namespace.sourceExternalId, source))
582
+ output.remove_triples.add((instance, self._namespace.targetExternalId, target))
583
+ output.add_triples.add((instance, self._namespace.sourceExternalId, source_id))
584
+ output.add_triples.add((instance, self._namespace.targetExternalId, target_id))
585
585
  output.instances_modified_count += 1
586
586
  return output
587
587
 
@@ -237,7 +237,7 @@ class PruneTypes(BaseTransformerStandardised):
237
237
  row_output = RowTransformationOutput()
238
238
 
239
239
  (subject,) = query_result_row
240
- row_output.remove_triples.append((subject, None, None)) # type: ignore
240
+ row_output.remove_triples.add((subject, None, None)) # type: ignore
241
241
  row_output.instances_removed_count = 1
242
242
 
243
243
  return row_output
@@ -272,7 +272,7 @@ class PruneDeadEndEdges(BaseTransformerStandardised):
272
272
 
273
273
  def operation(self, row: ResultRow) -> RowTransformationOutput:
274
274
  row_output = RowTransformationOutput()
275
- row_output.remove_triples.append(cast(Triple, row))
275
+ row_output.remove_triples.add(cast(Triple, row))
276
276
  row_output.instances_modified_count = 1
277
277
 
278
278
  return row_output
@@ -307,7 +307,7 @@ class PruneInstancesOfUnknownType(BaseTransformerStandardised):
307
307
  def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
308
308
  row_output = RowTransformationOutput()
309
309
  (subject,) = query_result_row
310
- row_output.remove_triples.append(cast(Triple, (subject, None, None)))
310
+ row_output.remove_triples.add(cast(Triple, (subject, None, None)))
311
311
  row_output.instances_removed_count = 1
312
312
 
313
313
  return row_output
@@ -7,7 +7,6 @@ from rdflib.query import ResultRow
7
7
  from cognite.neat._rules.analysis import InformationAnalysis
8
8
  from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
9
9
  from cognite.neat._rules.models.information import InformationRules
10
- from cognite.neat._shared import Triple
11
10
  from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
12
11
 
13
12
  from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
@@ -76,7 +75,7 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
76
75
  self.object_type = object_type
77
76
  self.object_predicate = object_predicate
78
77
  subject_namespace = Namespace(get_namespace(subject_type))
79
- self.connection = (
78
+ self.connection: URIRef = (
80
79
  subject_namespace[quote(connection.strip())]
81
80
  if isinstance(connection, str)
82
81
  else connection or subject_namespace[remove_namespace_from_uri(self.object_type).lower()]
@@ -125,8 +124,8 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
125
124
  def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
126
125
  row_output = RowTransformationOutput()
127
126
 
128
- subject, object = query_result_row
127
+ subject, object = cast(tuple[URIRef, URIRef], query_result_row)
129
128
 
130
- row_output.add_triples.append(cast(Triple, (subject, self.connection, object)))
129
+ row_output.add_triples.add((subject, self.connection, object))
131
130
  row_output.instances_modified_count += 1
132
131
  return row_output