cognite-neat 0.107.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (52) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +83 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +140 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_value_type.py +54 -3
  17. cognite/neat/_issues/errors/_resources.py +1 -1
  18. cognite/neat/_issues/warnings/__init__.py +0 -2
  19. cognite/neat/_issues/warnings/_models.py +1 -1
  20. cognite/neat/_issues/warnings/_properties.py +0 -8
  21. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  22. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  23. cognite/neat/_rules/importers/__init__.py +3 -1
  24. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  25. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  26. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  27. cognite/neat/_rules/importers/_rdf/_inference2rules.py +241 -18
  28. cognite/neat/_rules/models/_base_rules.py +13 -3
  29. cognite/neat/_rules/models/dms/_rules.py +1 -8
  30. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  31. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  32. cognite/neat/_rules/transformers/__init__.py +6 -0
  33. cognite/neat/_rules/transformers/_converters.py +98 -7
  34. cognite/neat/_session/_base.py +55 -4
  35. cognite/neat/_session/_drop.py +5 -1
  36. cognite/neat/_session/_inspect.py +3 -2
  37. cognite/neat/_session/_read.py +61 -14
  38. cognite/neat/_session/_set.py +27 -0
  39. cognite/neat/_session/_show.py +4 -4
  40. cognite/neat/_session/_state.py +8 -4
  41. cognite/neat/_session/_to.py +4 -1
  42. cognite/neat/_session/_wizard.py +1 -1
  43. cognite/neat/_session/exceptions.py +2 -1
  44. cognite/neat/_store/_graph_store.py +287 -133
  45. cognite/neat/_store/_rules_store.py +108 -1
  46. cognite/neat/_utils/auth.py +1 -1
  47. cognite/neat/_version.py +1 -1
  48. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +1 -1
  49. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +52 -52
  50. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
  51. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +0 -0
  52. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
@@ -3,11 +3,12 @@ from collections import defaultdict
3
3
  from collections.abc import Iterable
4
4
  from typing import Literal, cast, overload
5
5
 
6
- from rdflib import RDF, Graph, Namespace, URIRef
6
+ from rdflib import RDF, Dataset, Graph, Namespace, URIRef
7
7
  from rdflib import Literal as RdfLiteral
8
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
8
9
  from rdflib.query import ResultRow
9
10
 
10
- from cognite.neat._constants import UNKNOWN_TYPE
11
+ from cognite.neat._constants import NEAT
11
12
  from cognite.neat._rules._constants import EntityTypes
12
13
  from cognite.neat._rules.models.entities import ClassEntity
13
14
  from cognite.neat._rules.models.information import InformationRules
@@ -20,11 +21,21 @@ from ._construct import build_construct_query
20
21
  class Queries:
21
22
  """Helper class for storing standard queries for the graph store."""
22
23
 
23
- def __init__(self, graph: Graph, rules: InformationRules | None = None):
24
- self.graph = graph
25
- self.rules = rules
26
-
27
- def summarize_instances(self) -> list[tuple]:
24
+ def __init__(
25
+ self,
26
+ dataset: Dataset,
27
+ rules: dict[URIRef, InformationRules] | None = None,
28
+ default_named_graph: URIRef | None = None,
29
+ ):
30
+ self.dataset = dataset
31
+ self.rules = rules or {}
32
+ self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
33
+
34
+ def graph(self, named_graph: URIRef | None = None) -> Graph:
35
+ """Get named graph from the dataset to query over"""
36
+ return self.dataset.graph(named_graph or self.default_named_graph)
37
+
38
+ def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
28
39
  """Summarize instances in the graph store by class and count"""
29
40
 
30
41
  query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
@@ -39,36 +50,55 @@ class Queries:
39
50
  remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
40
51
  cast(RdfLiteral, cast(tuple, res)[1]).value,
41
52
  )
42
- for res in list(self.graph.query(query_statement))
53
+ for res in list(self.graph(named_graph=named_graph).query(query_statement))
43
54
  ]
44
55
 
45
- @property
46
- def types(self) -> dict[URIRef, str]:
56
+ def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
47
57
  """Types and their short form in the graph"""
48
58
  query = """SELECT DISTINCT ?type
49
59
  WHERE {?s a ?type .}"""
50
- return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
51
60
 
52
- def type_uri(self, type_: str) -> list[URIRef]:
61
+ return { # type: ignore[misc, index, arg-type]
62
+ cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
63
+ for (type_,) in list(self.graph(named_graph).query(query))
64
+ }
65
+
66
+ def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
53
67
  """Get the URIRef of a type"""
54
- return [k for k, v in self.types.items() if v == type_]
68
+ return [k for k, v in self.types(named_graph).items() if v == type_]
69
+
70
+ def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
71
+ """Properties and their short form in the graph
55
72
 
56
- @property
57
- def properties(self) -> dict[URIRef, str]:
73
+ Args:
74
+ named_graph: Named graph to query over, default None (default graph)
75
+
76
+ """
58
77
  query = """SELECT DISTINCT ?property
59
78
  WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
60
- return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
79
+ return { # type: ignore[misc, index, arg-type]
80
+ cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
81
+ for (type_,) in list(self.graph(named_graph).query(query))
82
+ }
61
83
 
62
- def property_uri(self, property_: str) -> list[URIRef]:
63
- """Get the URIRef of a type"""
64
- return [k for k, v in self.properties.items() if v == property_]
84
+ def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
85
+ """Get the URIRef of a property
65
86
 
66
- def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
87
+ Args:
88
+ property_: Property to find URIRef for
89
+ named_graph: Named graph to query over, default None (default graph)
90
+ """
91
+ return [k for k, v in self.properties(named_graph).items() if v == property_]
92
+
93
+ def list_instances_ids_of_class(
94
+ self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
95
+ ) -> list[URIRef]:
67
96
  """Get instances ids for a given class
68
97
 
69
98
  Args:
70
99
  class_uri: Class for which instances are to be found
71
100
  limit: Max number of instances to return, by default -1 meaning all instances
101
+ named_graph: Named graph to query over, default None (default graph)
72
102
 
73
103
  Returns:
74
104
  List of class instance URIs
@@ -76,16 +106,17 @@ class Queries:
76
106
  query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
77
107
  "class", class_uri
78
108
  ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
79
- return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
109
+ return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
80
110
 
81
- def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
111
+ def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
82
112
  """Get all triples for instances of a given class
83
113
 
84
114
  Args:
85
115
  class_uri: Class for which instances are to be found
116
+ named_graph: Named graph to query over, default None (default graph)
86
117
 
87
118
  Returns:
88
- List of triples for instances of the given class
119
+ List of triples for instances of the given class in the named graph
89
120
  """
90
121
  query = (
91
122
  f"SELECT ?instance ?prop ?value "
@@ -93,17 +124,22 @@ class Queries:
93
124
  )
94
125
 
95
126
  # Select queries gives an iterable of result rows
96
- return cast(list[ResultRow], list(self.graph.query(query)))
127
+ return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
97
128
 
98
- def triples_of_type_instances(self, rdf_type: str | URIRef) -> list[tuple[str, str, str]]:
129
+ def triples_of_type_instances(
130
+ self, rdf_type: str | URIRef, named_graph: URIRef | None = None
131
+ ) -> list[tuple[str, str, str]]:
99
132
  """Get all triples of a given type.
100
133
 
101
- This method assumes the graph has been transformed into the default namespace.
134
+ Args:
135
+ rdf_type: Type URI to query
136
+ named_graph: Named graph to query over, default None (default graph)
102
137
  """
138
+ named_graph = named_graph or self.default_named_graph
103
139
  if isinstance(rdf_type, URIRef):
104
140
  rdf_uri = rdf_type
105
- elif isinstance(rdf_type, str) and self.rules:
106
- rdf_uri = self.rules.metadata.namespace[rdf_type]
141
+ elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
142
+ rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
107
143
  else:
108
144
  warnings.warn(
109
145
  "Unknown namespace. Please either provide a URIRef or set the rules of the store.",
@@ -117,46 +153,50 @@ class Queries:
117
153
  "order by ?instance"
118
154
  )
119
155
 
120
- result = self.graph.query(query)
156
+ result = self.graph(named_graph).query(query)
121
157
 
122
158
  # We cannot include the RDF.type in case there is a neat:type property
123
159
  return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
124
160
 
125
- def type_with_property(self, type_: URIRef, property_uri: URIRef) -> bool:
161
+ def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
126
162
  """Check if a property exists in the graph store
127
163
 
128
164
  Args:
165
+ type_: Type URI to check
129
166
  property_uri: Property URI to check
167
+ named_graph: Named graph to query over, default None (default graph)
130
168
 
131
169
  Returns:
132
170
  True if property exists, False otherwise
133
171
  """
134
172
  query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
135
- return bool(list(self.graph.query(query)))
173
+ return bool(list(self.graph(named_graph).query(query)))
136
174
 
137
- def has_namespace(self, namespace: Namespace) -> bool:
175
+ def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
138
176
  """Check if a namespace exists in the graph store
139
177
 
140
178
  Args:
141
179
  namespace: Namespace to check
180
+ named_graph: Named graph to query over, default None (default graph)
142
181
 
143
182
  Returns:
144
183
  True if namespace exists, False otherwise
145
184
  """
146
185
  query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
147
- return bool(self.graph.query(query))
186
+ return bool(self.graph(named_graph).query(query))
148
187
 
149
- def has_type(self, type_: URIRef) -> bool:
188
+ def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
150
189
  """Check if a type exists in the graph store
151
190
 
152
191
  Args:
153
192
  type_: Type to check
193
+ named_graph: Named graph to query over, default None (default graph)
154
194
 
155
195
  Returns:
156
196
  True if type exists, False otherwise
157
197
  """
158
198
  query = f"ASK WHERE {{ ?s a <{type_}> }}"
159
- return bool(self.graph.query(query))
199
+ return bool(self.graph(named_graph).query(query))
160
200
 
161
201
  def describe(
162
202
  self,
@@ -164,6 +204,7 @@ class Queries:
164
204
  instance_type: str | None = None,
165
205
  property_renaming_config: dict | None = None,
166
206
  property_types: dict[str, EntityTypes] | None = None,
207
+ named_graph: URIRef | None = None,
167
208
  ) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
168
209
  """DESCRIBE instance for a given class from the graph store
169
210
 
@@ -172,6 +213,7 @@ class Queries:
172
213
  instance_type: Type of the instance, default None (will be inferred from triples)
173
214
  property_renaming_config: Dictionary to rename properties, default None (no renaming)
174
215
  property_types: Dictionary of property types, default None (helper for removal of namespace)
216
+ named_graph: Named graph to query over, default None (default graph)
175
217
 
176
218
 
177
219
  Returns:
@@ -179,7 +221,7 @@ class Queries:
179
221
  """
180
222
  property_values: dict[str, list[str]] = defaultdict(list)
181
223
  identifier = remove_namespace_from_uri(instance_id, validation="prefix")
182
- for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
224
+ for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
183
225
  if object_.lower() in [
184
226
  "",
185
227
  "none",
@@ -190,45 +232,28 @@ class Queries:
190
232
 
191
233
  # set property
192
234
  if property_renaming_config and predicate != RDF.type:
193
- property_ = property_renaming_config.get(
194
- predicate, remove_namespace_from_uri(predicate, validation="prefix")
195
- )
235
+ property_ = remove_namespace_from_uri(predicate, validation="prefix")
236
+ renamed_property_ = property_renaming_config.get(predicate, property_)
237
+
196
238
  elif not property_renaming_config and predicate != RDF.type:
197
239
  property_ = remove_namespace_from_uri(predicate, validation="prefix")
240
+ renamed_property_ = property_
241
+
198
242
  else:
199
243
  property_ = RDF.type
244
+ renamed_property_ = property_
200
245
 
201
- # set value
202
- # if it is URIRef and property type is object property, we need to remove namespace
203
- # if it URIref but we are doing this into data type property, we do not remove namespace
204
- # case 1 for RDF type we remove namespace
205
- if property_ == RDF.type:
206
- value = remove_namespace_from_uri(object_, validation="prefix")
207
-
208
- # case 2 for define object properties we remove namespace
209
- elif (
210
- isinstance(object_, URIRef)
211
- and property_types
212
- and (
213
- property_types.get(property_, None) == EntityTypes.object_property
214
- or property_types.get(property_, None) == EntityTypes.undefined
215
- )
216
- ):
246
+ if isinstance(object_, URIRef):
217
247
  value = remove_namespace_from_uri(object_, validation="prefix")
218
-
219
- # case 3 when property type is not defined and returned value is URIRef we remove namespace
220
- elif isinstance(object_, URIRef) and not property_types:
221
- value = remove_namespace_from_uri(object_, validation="prefix")
222
-
223
- # case 4 for data type properties we do not remove namespace but keep the entire value
224
- # but we drop the datatype part, and keep everything to be string (data loader will do the conversion)
225
- # for value type it expects (if possible)
248
+ elif isinstance(object_, RdfLiteral):
249
+ value = object_.toPython()
226
250
  else:
251
+ # It is a blank node
227
252
  value = str(object_)
228
253
 
229
254
  # add type to the dictionary
230
255
  if predicate != RDF.type:
231
- property_values[property_].append(value)
256
+ property_values[renamed_property_].append(value)
232
257
  else:
233
258
  # guarding against multiple rdf:type values as this is not allowed in CDF
234
259
  if RDF.type not in property_values:
@@ -249,6 +274,7 @@ class Queries:
249
274
  class_: str,
250
275
  properties_optional: bool = True,
251
276
  instance_id: URIRef | None = None,
277
+ named_graph: URIRef | None = None,
252
278
  ) -> list[tuple[str, str, str]]:
253
279
  """CONSTRUCT instances for a given class from the graph store
254
280
 
@@ -256,21 +282,29 @@ class Queries:
256
282
  class_: Class entity for which we want to generate query
257
283
  properties_optional: Whether to make all properties optional, default True
258
284
  instance_ids: List of instance ids to filter on, default None (all)
285
+ named_graph: Named graph to query over, default None (default graph
259
286
 
260
287
  Returns:
261
288
  List of triples for instances of the given class
262
289
  """
263
-
264
- if self.rules and (
265
- query := build_construct_query(
266
- class_=ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_),
267
- graph=self.graph,
268
- rules=self.rules,
269
- properties_optional=properties_optional,
270
- instance_id=instance_id,
290
+ named_graph = named_graph or self.default_named_graph
291
+ if (
292
+ self.rules
293
+ and self.rules.get(named_graph)
294
+ and (
295
+ query := build_construct_query(
296
+ class_=ClassEntity(
297
+ prefix=self.rules[named_graph].metadata.prefix,
298
+ suffix=class_,
299
+ ),
300
+ graph=self.graph(named_graph),
301
+ rules=self.rules[named_graph],
302
+ properties_optional=properties_optional,
303
+ instance_id=instance_id,
304
+ )
271
305
  )
272
306
  ):
273
- result = self.graph.query(query)
307
+ result = self.graph(named_graph).query(query)
274
308
 
275
309
  # We cannot include the RDF.type in case there is a neat:type property
276
310
  return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
@@ -281,25 +315,36 @@ class Queries:
281
315
  )
282
316
  return []
283
317
 
284
- def list_triples(self, limit: int = 25) -> list[ResultRow]:
318
+ def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
285
319
  """List triples in the graph store
286
320
 
287
321
  Args:
288
322
  limit: Max number of triples to return, by default 25
323
+ named_graph: Named graph to query over, default None (default graph)
289
324
 
290
325
  Returns:
291
326
  List of triples
292
327
  """
293
328
  query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
294
- return cast(list[ResultRow], list(self.graph.query(query)))
329
+ return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
295
330
 
296
331
  @overload
297
332
  def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
298
333
 
299
334
  @overload
300
- def list_types(self, remove_namespace: Literal[True], limit: int = 25) -> list[str]: ...
335
+ def list_types(
336
+ self,
337
+ remove_namespace: Literal[True],
338
+ limit: int = 25,
339
+ named_graph: URIRef | None = None,
340
+ ) -> list[str]: ...
301
341
 
302
- def list_types(self, remove_namespace: bool = False, limit: int = 25) -> list[ResultRow] | list[str]:
342
+ def list_types(
343
+ self,
344
+ remove_namespace: bool = False,
345
+ limit: int = 25,
346
+ named_graph: URIRef | None = None,
347
+ ) -> list[ResultRow] | list[str]:
303
348
  """List types in the graph store
304
349
 
305
350
  Args:
@@ -310,13 +355,14 @@ class Queries:
310
355
  List of types
311
356
  """
312
357
  query = f"SELECT DISTINCT ?type WHERE {{ ?subject a ?type }} LIMIT {limit}"
313
- result = cast(list[ResultRow], list(self.graph.query(query)))
358
+ result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
314
359
  if remove_namespace:
315
360
  return [remove_namespace_from_uri(res[0]) for res in result]
316
361
  return result
317
362
 
318
363
  def multi_value_type_property(
319
364
  self,
365
+ named_graph: URIRef | None = None,
320
366
  ) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
321
367
  query = """SELECT ?sourceType ?property
322
368
  (GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
@@ -341,15 +387,20 @@ class Queries:
341
387
  value_types,
342
388
  ) in cast(
343
389
  ResultRow,
344
- self.graph.query(query.format(unknownType=str(UNKNOWN_TYPE))),
390
+ self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
345
391
  ):
346
392
  yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
347
393
 
348
- def drop_types(self, type_: list[URIRef]) -> dict[URIRef, int]:
394
+ def drop_types(
395
+ self,
396
+ type_: list[URIRef],
397
+ named_graph: URIRef | None = None,
398
+ ) -> dict[URIRef, int]:
349
399
  """Drop types from the graph store
350
400
 
351
401
  Args:
352
402
  type_: List of types to drop
403
+ named_graph: Named graph to query over, default None (default graph
353
404
 
354
405
  Returns:
355
406
  Dictionary of dropped types
@@ -358,11 +409,16 @@ class Queries:
358
409
  for t in type_:
359
410
  instance_ids = self.list_instances_ids_of_class(t)
360
411
  dropped_types[t] = len(instance_ids)
361
- remove_instance_ids_in_batch(self.graph, instance_ids)
412
+ remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
362
413
  return dropped_types
363
414
 
364
- def multi_type_instances(self) -> dict[str, list[str]]:
365
- """Find instances with multiple types"""
415
+ def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
416
+ """Find instances with multiple types
417
+
418
+ Args:
419
+ named_graph: Named graph to query over, default None (default graph)
420
+
421
+ """
366
422
 
367
423
  query = """
368
424
  SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
@@ -374,7 +430,7 @@ class Queries:
374
430
  """
375
431
 
376
432
  result = {}
377
- for instance, types in self.graph.query(query): # type: ignore
433
+ for instance, types in self.graph(named_graph).query(query): # type: ignore
378
434
  result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
379
435
 
380
436
  return result
@@ -115,7 +115,7 @@ def to_construct_triples(
115
115
  graph_template_triple = Triple(
116
116
  subject="?instance",
117
117
  predicate=f"{transformation.class_.prefix}:{transformation.property_}",
118
- object=f'?{re.sub(r"[^_a-zA-Z0-9/_]", "_", str(transformation.property_).lower())}',
118
+ object=f"?{re.sub(r'[^_a-zA-Z0-9/_]', '_', str(transformation.property_).lower())}",
119
119
  optional=False,
120
120
  )
121
121
  templates.append(graph_template_triple)
@@ -17,7 +17,7 @@ from ._prune_graph import (
17
17
  PruneTypes,
18
18
  )
19
19
  from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
20
- from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
20
+ from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetNeatType, SplitMultiValueProperty
21
21
 
22
22
  __all__ = [
23
23
  "AddAssetDepth",
@@ -38,6 +38,7 @@ __all__ = [
38
38
  "PruneInstancesOfUnknownType",
39
39
  "PruneTypes",
40
40
  "RelationshipAsEdgeTransformer",
41
+ "SetNeatType",
41
42
  "SplitMultiValueProperty",
42
43
  ]
43
44
 
@@ -62,4 +63,5 @@ Transformers = (
62
63
  | ConnectionToLiteral
63
64
  | BaseTransformerStandardised
64
65
  | LookupRelationshipSourceTarget
66
+ | SetNeatType
65
67
  )
@@ -4,10 +4,10 @@ from typing import Any, cast
4
4
  from urllib.parse import quote
5
5
 
6
6
  import rdflib
7
- from rdflib import RDF, Namespace, URIRef
7
+ from rdflib import RDF, Literal, Namespace, URIRef
8
8
  from rdflib.query import ResultRow
9
9
 
10
- from cognite.neat._constants import UNKNOWN_TYPE
10
+ from cognite.neat._constants import NEAT
11
11
  from cognite.neat._issues.warnings import PropertyDataTypeConversionWarning
12
12
  from cognite.neat._utils.auxiliary import string_to_ideal_type
13
13
  from cognite.neat._utils.rdf_ import Triple, get_namespace, remove_namespace_from_uri
@@ -24,7 +24,7 @@ class SplitMultiValueProperty(BaseTransformerStandardised):
24
24
  _need_changes = frozenset({})
25
25
 
26
26
  def __init__(self, unknown_type: URIRef | None = None) -> None:
27
- self.unknown_type = unknown_type or UNKNOWN_TYPE
27
+ self.unknown_type = unknown_type or NEAT.UnknownType
28
28
 
29
29
  def _iterate_query(self) -> str:
30
30
  query = """SELECT ?subjectType ?property
@@ -305,3 +305,54 @@ class ConnectionToLiteral(BaseTransformerStandardised):
305
305
  row_output.instances_modified_count += 1
306
306
 
307
307
  return row_output
308
+
309
+
310
+ class SetNeatType(BaseTransformerStandardised):
311
+ description = "Set the sub type of an instance based on the property"
312
+
313
+ def __init__(
314
+ self, subject_type: URIRef, subject_predicate: URIRef, drop_property: bool, namespace: Namespace | None = None
315
+ ) -> None:
316
+ self.subject_type = subject_type
317
+ self.subject_predicate = subject_predicate
318
+ self.drop_property = drop_property
319
+ self._namespace = namespace or Namespace(get_namespace(subject_type))
320
+
321
+ def _count_query(self) -> str:
322
+ query = """SELECT (COUNT(?object) AS ?objectCount)
323
+ WHERE {{
324
+ ?instance a <{subject_type}> .
325
+ ?instance <{subject_predicate}> ?object
326
+ FILTER(isLiteral(?object))
327
+ }}"""
328
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
329
+
330
+ def _skip_count_query(self) -> str:
331
+ query = """SELECT (COUNT(?object) AS ?objectCount)
332
+ WHERE {{
333
+ ?instance a <{subject_type}> .
334
+ ?instance <{subject_predicate}> ?object
335
+ FILTER(isIRI(?object))
336
+ }}"""
337
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
338
+
339
+ def _iterate_query(self) -> str:
340
+ query = """SELECT ?instance ?object
341
+ WHERE {{
342
+ ?instance a <{subject_type}> .
343
+ ?instance <{subject_predicate}> ?object
344
+ FILTER(isLiteral(?object))
345
+ }}"""
346
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
347
+
348
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
349
+ row_output = RowTransformationOutput()
350
+
351
+ instance, object_literal = cast(tuple[URIRef, Literal], query_result_row)
352
+ if self.drop_property:
353
+ row_output.remove_triples.append((instance, self.subject_predicate, object_literal))
354
+
355
+ row_output.add_triples.append((instance, NEAT.type, self._namespace[str(object_literal.toPython())]))
356
+ row_output.instances_modified_count += 1
357
+
358
+ return row_output
@@ -30,7 +30,7 @@ class ResourceRetrievalError(ResourceError[T_Identifier]):
30
30
 
31
31
  @dataclass(unsafe_hash=True)
32
32
  class ResourceNotFoundError(ResourceError, Generic[T_Identifier, T_ReferenceIdentifier]):
33
- """The {resource_type} with identifier {identifier} does not exist"""
33
+ """The {resource_type} with identifier '{identifier}' does not exist"""
34
34
 
35
35
  extra = " This is expected by {referred_type} {referred_by}."
36
36
 
@@ -31,7 +31,6 @@ from ._properties import (
31
31
  PropertyDirectRelationLimitWarning,
32
32
  PropertyNotFoundWarning,
33
33
  PropertyOverwritingWarning,
34
- PropertySkippedWarning,
35
34
  PropertyTypeNotSupportedWarning,
36
35
  PropertyValueTypeUndefinedWarning,
37
36
  )
@@ -68,7 +67,6 @@ __all__ = [
68
67
  "PropertyDirectRelationLimitWarning",
69
68
  "PropertyNotFoundWarning",
70
69
  "PropertyOverwritingWarning",
71
- "PropertySkippedWarning",
72
70
  "PropertyTypeNotSupportedWarning",
73
71
  "PropertyValueTypeUndefinedWarning",
74
72
  "RegexViolationWarning",
@@ -74,7 +74,7 @@ class CDFNotSupportedWarning(NeatWarning, ABC):
74
74
  class NotSupportedViewContainerLimitWarning(CDFNotSupportedWarning):
75
75
  """The view {view_id} maps, {count} containers, which is more than the limit {limit}."""
76
76
 
77
- fix = "Reduce the number of containers the view maps to." ""
77
+ fix = "Reduce the number of containers the view maps to."
78
78
 
79
79
  view_id: ViewId
80
80
  count: int
@@ -65,14 +65,6 @@ class PropertyOverwritingWarning(PropertyWarning[T_Identifier]):
65
65
  overwriting: tuple[str, ...]
66
66
 
67
67
 
68
- @dataclass(unsafe_hash=True)
69
- class PropertySkippedWarning(PropertyWarning[T_Identifier]):
70
- """The {resource_type} with identifier {identifier} has a property {property_name}
71
- which is skipped. {reason}."""
72
-
73
- reason: str
74
-
75
-
76
68
  @dataclass(unsafe_hash=True)
77
69
  class PropertyDataTypeConversionWarning(PropertyWarning[T_Identifier]):
78
70
  """The {resource_type} with identifier {identifier} failed to convert the property {property_name}: {error}"""
@@ -96,13 +96,13 @@ class InstanceTemplateExporter(BaseExporter[InformationRules, Workbook]):
96
96
  def _add_index_identifiers(workbook: Workbook, sheet: str, no_rows: int):
97
97
  """Adds index-based auto identifier to a sheet identifier column"""
98
98
  for i in range(no_rows):
99
- workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{sheet}-{i+1}")'
99
+ workbook[sheet][f"A{i + 2}"] = f'=IF(ISBLANK(B{i + 2}), "","{sheet}-{i + 1}")'
100
100
 
101
101
 
102
102
  def _add_uuid_identifiers(workbook: Workbook, sheet: str, no_rows: int):
103
103
  """Adds UUID-based auto identifier to a sheet identifier column"""
104
104
  for i in range(no_rows):
105
- workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{sheet}-{uuid.uuid4()}")'
105
+ workbook[sheet][f"A{i + 2}"] = f'=IF(ISBLANK(B{i + 2}), "","{sheet}-{uuid.uuid4()}")'
106
106
 
107
107
 
108
108
  def _add_drop_down_list(
@@ -122,7 +122,7 @@ def _add_drop_down_list(
122
122
  workbook[sheet].add_data_validation(drop_down_list)
123
123
 
124
124
  for i in range(no_rows):
125
- drop_down_list.add(workbook[sheet][f"{column}{i+2}"])
125
+ drop_down_list.add(workbook[sheet][f"{column}{i + 2}"])
126
126
 
127
127
 
128
128
  def _adjust_column_width(workbook: Workbook):
@@ -1,7 +1,7 @@
1
1
  from ._base import BaseImporter
2
2
  from ._dms2rules import DMSImporter
3
3
  from ._dtdl2rules import DTDLImporter
4
- from ._rdf import IMFImporter, InferenceImporter, OWLImporter
4
+ from ._rdf import IMFImporter, InferenceImporter, OWLImporter, SubclassInferenceImporter
5
5
  from ._spreadsheet2rules import ExcelImporter, GoogleSheetImporter
6
6
  from ._yaml2rules import YAMLImporter
7
7
 
@@ -14,6 +14,7 @@ __all__ = [
14
14
  "IMFImporter",
15
15
  "InferenceImporter",
16
16
  "OWLImporter",
17
+ "SubclassInferenceImporter",
17
18
  "YAMLImporter",
18
19
  ]
19
20
 
@@ -26,6 +27,7 @@ RulesImporters = (
26
27
  | DTDLImporter
27
28
  | YAMLImporter
28
29
  | InferenceImporter
30
+ | SubclassInferenceImporter
29
31
  )
30
32
 
31
33