cognite-neat 0.109.4__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (88) hide show
  1. cognite/neat/_alpha.py +8 -0
  2. cognite/neat/_client/_api/schema.py +43 -1
  3. cognite/neat/_client/data_classes/schema.py +4 -4
  4. cognite/neat/_constants.py +15 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +48 -19
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_iodd.py +3 -3
  14. cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
  15. cognite/neat/_graph/extractors/_raw.py +67 -0
  16. cognite/neat/_graph/loaders/_base.py +20 -4
  17. cognite/neat/_graph/loaders/_rdf2dms.py +476 -383
  18. cognite/neat/_graph/queries/_base.py +163 -133
  19. cognite/neat/_graph/transformers/__init__.py +1 -3
  20. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  21. cognite/neat/_graph/transformers/_rdfpath.py +2 -49
  22. cognite/neat/_issues/__init__.py +1 -6
  23. cognite/neat/_issues/_base.py +21 -252
  24. cognite/neat/_issues/_contextmanagers.py +46 -0
  25. cognite/neat/_issues/_factory.py +69 -0
  26. cognite/neat/_issues/errors/__init__.py +20 -4
  27. cognite/neat/_issues/errors/_external.py +7 -0
  28. cognite/neat/_issues/errors/_wrapper.py +81 -3
  29. cognite/neat/_issues/formatters.py +4 -4
  30. cognite/neat/_issues/warnings/__init__.py +3 -2
  31. cognite/neat/_issues/warnings/_properties.py +8 -0
  32. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  33. cognite/neat/_rules/_constants.py +12 -0
  34. cognite/neat/_rules/_shared.py +3 -2
  35. cognite/neat/_rules/analysis/__init__.py +2 -3
  36. cognite/neat/_rules/analysis/_base.py +430 -259
  37. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  38. cognite/neat/_rules/exporters/_rules2excel.py +3 -9
  39. cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
  40. cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
  41. cognite/neat/_rules/importers/_base.py +2 -47
  42. cognite/neat/_rules/importers/_dms2rules.py +7 -10
  43. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
  44. cognite/neat/_rules/importers/_rdf/_inference2rules.py +66 -26
  45. cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
  46. cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
  47. cognite/neat/_rules/models/_base_rules.py +0 -2
  48. cognite/neat/_rules/models/data_types.py +7 -0
  49. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  50. cognite/neat/_rules/models/dms/_rules.py +29 -2
  51. cognite/neat/_rules/models/dms/_rules_input.py +9 -1
  52. cognite/neat/_rules/models/dms/_validation.py +115 -5
  53. cognite/neat/_rules/models/entities/_loaders.py +1 -1
  54. cognite/neat/_rules/models/entities/_multi_value.py +2 -2
  55. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  56. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  57. cognite/neat/_rules/models/information/_rules.py +18 -17
  58. cognite/neat/_rules/models/information/_rules_input.py +3 -1
  59. cognite/neat/_rules/models/information/_validation.py +66 -17
  60. cognite/neat/_rules/transformers/__init__.py +8 -2
  61. cognite/neat/_rules/transformers/_converters.py +234 -44
  62. cognite/neat/_rules/transformers/_verification.py +5 -10
  63. cognite/neat/_session/_base.py +6 -4
  64. cognite/neat/_session/_explore.py +39 -0
  65. cognite/neat/_session/_inspect.py +25 -6
  66. cognite/neat/_session/_prepare.py +12 -0
  67. cognite/neat/_session/_read.py +88 -20
  68. cognite/neat/_session/_set.py +7 -1
  69. cognite/neat/_session/_show.py +11 -123
  70. cognite/neat/_session/_state.py +6 -2
  71. cognite/neat/_session/_subset.py +64 -0
  72. cognite/neat/_session/_to.py +177 -19
  73. cognite/neat/_store/_graph_store.py +9 -246
  74. cognite/neat/_utils/rdf_.py +36 -5
  75. cognite/neat/_utils/spreadsheet.py +44 -1
  76. cognite/neat/_utils/text.py +124 -37
  77. cognite/neat/_utils/upload.py +2 -0
  78. cognite/neat/_version.py +2 -2
  79. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
  80. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +83 -82
  81. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +1 -1
  82. cognite/neat/_graph/queries/_construct.py +0 -187
  83. cognite/neat/_graph/queries/_shared.py +0 -173
  84. cognite/neat/_rules/analysis/_dms.py +0 -57
  85. cognite/neat/_rules/analysis/_information.py +0 -249
  86. cognite/neat/_rules/models/_rdfpath.py +0 -372
  87. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
  88. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
@@ -1,22 +1,17 @@
1
- import warnings
1
+ import urllib.parse
2
2
  from collections import defaultdict
3
3
  from collections.abc import Iterable
4
- from typing import Literal, cast, overload
4
+ from typing import Any, Literal, cast, overload
5
5
 
6
- from rdflib import RDF, Dataset, Graph, Namespace, URIRef
6
+ from rdflib import RDF, XSD, Dataset, Graph, Namespace, URIRef
7
7
  from rdflib import Literal as RdfLiteral
8
8
  from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
9
9
  from rdflib.query import ResultRow
10
10
 
11
11
  from cognite.neat._constants import NEAT
12
- from cognite.neat._rules._constants import EntityTypes
13
- from cognite.neat._rules.models.entities import ClassEntity
14
- from cognite.neat._rules.models.information import InformationRules
15
12
  from cognite.neat._shared import InstanceType
16
13
  from cognite.neat._utils.rdf_ import remove_instance_ids_in_batch, remove_namespace_from_uri
17
14
 
18
- from ._construct import build_construct_query
19
-
20
15
 
21
16
  class Queries:
22
17
  """Helper class for storing standard queries for the graph store."""
@@ -24,20 +19,17 @@ class Queries:
24
19
  def __init__(
25
20
  self,
26
21
  dataset: Dataset,
27
- rules: dict[URIRef, InformationRules] | None = None,
28
22
  default_named_graph: URIRef | None = None,
29
23
  ):
30
24
  self.dataset = dataset
31
- self.rules = rules or {}
32
25
  self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
33
26
 
34
27
  def graph(self, named_graph: URIRef | None = None) -> Graph:
35
28
  """Get named graph from the dataset to query over"""
36
29
  return self.dataset.graph(named_graph or self.default_named_graph)
37
30
 
38
- def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
31
+ def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple[str, int]]:
39
32
  """Summarize instances in the graph store by class and count"""
40
-
41
33
  query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
42
34
  WHERE {
43
35
  ?instance a ?class .
@@ -45,12 +37,12 @@ class Queries:
45
37
  GROUP BY ?class
46
38
  ORDER BY DESC(?instanceCount) """
47
39
 
48
- return [
40
+ return [ # type: ignore[misc]
49
41
  (
50
- remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
51
- cast(RdfLiteral, cast(tuple, res)[1]).value,
42
+ remove_namespace_from_uri(cast(URIRef, class_)),
43
+ cast(RdfLiteral, count).value,
52
44
  )
53
- for res in list(self.graph(named_graph=named_graph).query(query_statement))
45
+ for class_, count in self.graph(named_graph=named_graph).query(query_statement)
54
46
  ]
55
47
 
56
48
  def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
@@ -81,6 +73,20 @@ class Queries:
81
73
  for (type_,) in list(self.graph(named_graph).query(query))
82
74
  }
83
75
 
76
+ def properties_by_type(self, named_graph: URIRef | None = None) -> dict[URIRef, dict[URIRef, str]]:
77
+ """Properties and their short form in the graph by type
78
+
79
+ Args:
80
+ named_graph: Named graph to query over, default None (default graph)
81
+
82
+ """
83
+ query = """SELECT DISTINCT ?type ?property
84
+ WHERE {?s a ?type . ?s ?property ?o . FILTER(?property != rdf:type)}"""
85
+ properties_by_type: dict[URIRef, dict[URIRef, str]] = defaultdict(dict)
86
+ for type_, property_ in cast(ResultRow, list(self.graph(named_graph).query(query))):
87
+ properties_by_type[type_][property_] = remove_namespace_from_uri(property_) # type: ignore[index]
88
+ return properties_by_type
89
+
84
90
  def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
85
91
  """Get the URIRef of a property
86
92
 
@@ -90,73 +96,38 @@ class Queries:
90
96
  """
91
97
  return [k for k, v in self.properties(named_graph).items() if v == property_]
92
98
 
93
- def list_instances_ids_of_class(
99
+ @overload
100
+ def list_instances_ids(
101
+ self, class_uri: None = None, limit: int = -1, named_graph: URIRef | None = None
102
+ ) -> Iterable[tuple[URIRef, URIRef]]: ...
103
+
104
+ @overload
105
+ def list_instances_ids(
94
106
  self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
95
- ) -> list[URIRef]:
96
- """Get instances ids for a given class
107
+ ) -> Iterable[URIRef]: ...
108
+
109
+ def list_instances_ids(
110
+ self, class_uri: URIRef | None = None, limit: int = -1, named_graph: URIRef | None = None
111
+ ) -> Iterable[URIRef] | Iterable[tuple[URIRef, URIRef]]:
112
+ """List all instance IDs
97
113
 
98
114
  Args:
99
- class_uri: Class for which instances are to be found
115
+ class_uri: Class for which instances are to be found, default None (all instances)
100
116
  limit: Max number of instances to return, by default -1 meaning all instances
101
117
  named_graph: Named graph to query over, default None (default graph)
102
118
 
103
119
  Returns:
104
120
  List of class instance URIs
105
121
  """
106
- query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
107
- "class", class_uri
108
- ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
109
- return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
110
-
111
- def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
112
- """Get all triples for instances of a given class
113
-
114
- Args:
115
- class_uri: Class for which instances are to be found
116
- named_graph: Named graph to query over, default None (default graph)
117
-
118
- Returns:
119
- List of triples for instances of the given class in the named graph
120
- """
121
- query = (
122
- f"SELECT ?instance ?prop ?value "
123
- f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
124
- )
125
-
126
- # Select queries gives an iterable of result rows
127
- return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
128
-
129
- def triples_of_type_instances(
130
- self, rdf_type: str | URIRef, named_graph: URIRef | None = None
131
- ) -> list[tuple[str, str, str]]:
132
- """Get all triples of a given type.
133
-
134
- Args:
135
- rdf_type: Type URI to query
136
- named_graph: Named graph to query over, default None (default graph)
137
- """
138
- named_graph = named_graph or self.default_named_graph
139
- if isinstance(rdf_type, URIRef):
140
- rdf_uri = rdf_type
141
- elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
142
- rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
122
+ query = "SELECT DISTINCT ?subject"
123
+ if class_uri:
124
+ query += f" WHERE {{ ?subject a <{class_uri}> .}}"
143
125
  else:
144
- warnings.warn(
145
- "Unknown namespace. Please either provide a URIRef or set the rules of the store.",
146
- stacklevel=2,
147
- )
148
- return []
149
-
150
- query = (
151
- "SELECT ?instance ?prop ?value "
152
- f"WHERE {{ ?instance a <{rdf_uri}> . ?instance ?prop ?value . }} "
153
- "order by ?instance"
154
- )
155
-
156
- result = self.graph(named_graph).query(query)
157
-
158
- # We cannot include the RDF.type in case there is a neat:type property
159
- return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
126
+ query += " ?type WHERE {{ ?subject a ?type .}}"
127
+ if limit != -1:
128
+ query += f" LIMIT {limit}"
129
+ # MyPy is not very happy with RDFLib, so just ignore the type hinting here
130
+ return (tuple(res) if class_uri is None else res[0] for res in self.graph(named_graph).query(query)) # type: ignore[index, return-value, arg-type]
160
131
 
161
132
  def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
162
133
  """Check if a property exists in the graph store
@@ -205,26 +176,25 @@ class Queries:
205
176
  def describe(
206
177
  self,
207
178
  instance_id: URIRef,
208
- instance_type: str | None = None,
179
+ instance_type: URIRef | None = None,
209
180
  property_renaming_config: dict | None = None,
210
- property_types: dict[str, EntityTypes] | None = None,
211
181
  named_graph: URIRef | None = None,
212
- ) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
182
+ remove_uri_namespace: bool = True,
183
+ ) -> tuple[URIRef, dict[str | InstanceType, list[Any]]] | None:
213
184
  """DESCRIBE instance for a given class from the graph store
214
185
 
215
186
  Args:
216
187
  instance_id: Instance id for which we want to generate query
217
188
  instance_type: Type of the instance, default None (will be inferred from triples)
218
189
  property_renaming_config: Dictionary to rename properties, default None (no renaming)
219
- property_types: Dictionary of property types, default None (helper for removal of namespace)
220
190
  named_graph: Named graph to query over, default None (default graph)
191
+ remove_uri_namespace: Whether to remove the namespace from the URI, by default True
221
192
 
222
193
 
223
194
  Returns:
224
195
  Dictionary of instance properties
225
196
  """
226
- property_values: dict[str, list[str]] = defaultdict(list)
227
- identifier = remove_namespace_from_uri(instance_id, validation="prefix")
197
+ property_values: dict[str, list[str] | list[URIRef]] = defaultdict(list)
228
198
  for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
229
199
  if object_.lower() in [
230
200
  "",
@@ -247,78 +217,42 @@ class Queries:
247
217
  property_ = RDF.type
248
218
  renamed_property_ = property_
249
219
 
250
- if isinstance(object_, URIRef):
220
+ value: Any
221
+ if isinstance(object_, URIRef) and remove_uri_namespace:
222
+ # These properties contain the space in the Namespace.
251
223
  value = remove_namespace_from_uri(object_, validation="prefix")
224
+ elif isinstance(object_, URIRef):
225
+ value = object_
252
226
  elif isinstance(object_, RdfLiteral):
253
- value = object_.toPython()
227
+ if object_.datatype == XSD._NS["json"]:
228
+ # For JSON literals, the .toPython() returns a Literal object.
229
+ value = str(object_)
230
+ else:
231
+ value = object_.toPython()
254
232
  else:
255
233
  # It is a blank node
256
234
  value = str(object_)
257
235
 
258
236
  # add type to the dictionary
259
237
  if predicate != RDF.type:
260
- property_values[renamed_property_].append(value)
238
+ property_values[renamed_property_].append(value) # type: ignore[arg-type]
261
239
  else:
262
240
  # guarding against multiple rdf:type values as this is not allowed in CDF
263
241
  if RDF.type not in property_values:
264
- property_values[RDF.type].append(instance_type if instance_type else value)
242
+ property_values[RDF.type].append(
243
+ remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value # type: ignore[arg-type]
244
+ )
265
245
  else:
266
246
  # we should not have multiple rdf:type values
267
247
  continue
268
248
  if property_values:
269
249
  return (
270
- identifier,
250
+ instance_id,
271
251
  property_values,
272
252
  )
273
253
  else:
274
254
  return None
275
255
 
276
- def construct_instances_of_class(
277
- self,
278
- class_: str,
279
- properties_optional: bool = True,
280
- instance_id: URIRef | None = None,
281
- named_graph: URIRef | None = None,
282
- ) -> list[tuple[str, str, str]]:
283
- """CONSTRUCT instances for a given class from the graph store
284
-
285
- Args:
286
- class_: Class entity for which we want to generate query
287
- properties_optional: Whether to make all properties optional, default True
288
- instance_ids: List of instance ids to filter on, default None (all)
289
- named_graph: Named graph to query over, default None (default graph
290
-
291
- Returns:
292
- List of triples for instances of the given class
293
- """
294
- named_graph = named_graph or self.default_named_graph
295
- if (
296
- self.rules
297
- and self.rules.get(named_graph)
298
- and (
299
- query := build_construct_query(
300
- class_=ClassEntity(
301
- prefix=self.rules[named_graph].metadata.prefix,
302
- suffix=class_,
303
- ),
304
- graph=self.graph(named_graph),
305
- rules=self.rules[named_graph],
306
- properties_optional=properties_optional,
307
- instance_id=instance_id,
308
- )
309
- )
310
- ):
311
- result = self.graph(named_graph).query(query)
312
-
313
- # We cannot include the RDF.type in case there is a neat:type property
314
- return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
315
- else:
316
- warnings.warn(
317
- "No rules found for the graph store, returning empty list.",
318
- stacklevel=2,
319
- )
320
- return []
321
-
322
256
  def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
323
257
  """List triples in the graph store
324
258
 
@@ -346,7 +280,7 @@ class Queries:
346
280
  def list_types(
347
281
  self,
348
282
  remove_namespace: bool = False,
349
- limit: int = 25,
283
+ limit: int | None = 25,
350
284
  named_graph: URIRef | None = None,
351
285
  ) -> list[ResultRow] | list[str]:
352
286
  """List types in the graph store
@@ -358,7 +292,9 @@ class Queries:
358
292
  Returns:
359
293
  List of types
360
294
  """
361
- query = f"SELECT DISTINCT ?type WHERE {{ ?subject a ?type }} LIMIT {limit}"
295
+ query = "SELECT DISTINCT ?type WHERE { ?subject a ?type }"
296
+ if limit is not None:
297
+ query += f" LIMIT {limit}"
362
298
  result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
363
299
  if remove_namespace:
364
300
  return [remove_namespace_from_uri(res[0]) for res in result]
@@ -411,7 +347,7 @@ class Queries:
411
347
  """
412
348
  dropped_types: dict[URIRef, int] = {}
413
349
  for t in type_:
414
- instance_ids = self.list_instances_ids_of_class(t)
350
+ instance_ids = list(self.list_instances_ids(t))
415
351
  dropped_types[t] = len(instance_ids)
416
352
  remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
417
353
  return dropped_types
@@ -438,3 +374,97 @@ class Queries:
438
374
  result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
439
375
 
440
376
  return result
377
+
378
+ def count_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> int:
379
+ query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
380
+ return int(next(iter(self.graph(named_graph).query(query)))[0]) # type: ignore[arg-type, index]
381
+
382
+ def types_with_instance_and_property_count(
383
+ self, remove_namespace: bool = True, named_graph: URIRef | None = None
384
+ ) -> list[dict[str, Any]]:
385
+ query = """
386
+ SELECT ?type (COUNT(DISTINCT ?instance) AS ?instanceCount) (COUNT(DISTINCT ?property) AS ?propertyCount)
387
+ WHERE {
388
+ ?instance a ?type .
389
+ ?instance ?property ?value .
390
+ FILTER(?property != rdf:type)
391
+ }
392
+ GROUP BY ?type
393
+ ORDER BY DESC(?instanceCount)"""
394
+ return [
395
+ {
396
+ "type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
397
+ "instanceCount": cast(RdfLiteral, instance_count).toPython(),
398
+ "propertyCount": cast(RdfLiteral, property_count).toPython(),
399
+ }
400
+ for type_, instance_count, property_count in list(
401
+ cast(list[ResultRow], self.graph(named_graph).query(query))
402
+ )
403
+ ]
404
+
405
+ def properties_with_count(
406
+ self, remove_namespace: bool = True, named_graph: URIRef | None = None
407
+ ) -> list[dict[str, Any]]:
408
+ instance_count_by_type = {
409
+ entry["type"]: entry["instanceCount"]
410
+ for entry in self.types_with_instance_and_property_count(remove_namespace=False, named_graph=named_graph)
411
+ }
412
+ query = """SELECT ?type ?property (COUNT(DISTINCT ?instance) AS ?instanceCount)
413
+ WHERE {
414
+ ?instance a ?type .
415
+ ?instance ?property ?value .
416
+ FILTER(?property != rdf:type)
417
+ }
418
+ GROUP BY ?type ?property
419
+ ORDER BY ASC(?type) ASC(?property)"""
420
+ return [
421
+ {
422
+ "type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
423
+ "property": urllib.parse.unquote(remove_namespace_from_uri(property)) if remove_namespace else property,
424
+ "instanceCount": cast(RdfLiteral, instance_count).toPython(),
425
+ "total": instance_count_by_type[type_],
426
+ }
427
+ for type_, property, instance_count in list(cast(list[ResultRow], self.graph(named_graph).query(query)))
428
+ ]
429
+
430
+ @overload
431
+ def instances_with_properties(
432
+ self, type: URIRef, remove_namespace: Literal[False], named_graph: URIRef | None = None
433
+ ) -> dict[URIRef, set[URIRef]]: ...
434
+
435
+ @overload
436
+ def instances_with_properties(
437
+ self, type: URIRef, remove_namespace: Literal[True], named_graph: URIRef | None = None
438
+ ) -> dict[str, set[str]]: ...
439
+
440
+ def instances_with_properties(
441
+ self, type: URIRef, remove_namespace: bool = True, named_graph: URIRef | None = None
442
+ ) -> dict[str, set[str]] | dict[URIRef, set[URIRef]]:
443
+ query = """SELECT DISTINCT ?instance ?property
444
+ WHERE {{
445
+ ?instance a <{type}> .
446
+ ?instance ?property ?value .
447
+ FILTER(?property != rdf:type)
448
+ }}"""
449
+ result = defaultdict(set)
450
+ for instance, property_ in cast(Iterable[ResultRow], self.graph(named_graph).query(query.format(type=type))):
451
+ instance_str = urllib.parse.unquote(remove_namespace_from_uri(instance)) if remove_namespace else instance
452
+ property_str = urllib.parse.unquote(remove_namespace_from_uri(property_)) if remove_namespace else property_
453
+ result[instance_str].add(property_str)
454
+ return result
455
+
456
+ def list_instances_ids_by_space(
457
+ self, space_property: URIRef, named_graph: URIRef | None = None
458
+ ) -> Iterable[tuple[URIRef, str]]:
459
+ """Returns instance ids by space"""
460
+ query = f"""SELECT DISTINCT ?instance ?space
461
+ WHERE {{?instance <{space_property}> ?space}}"""
462
+
463
+ for result in cast(Iterable[ResultRow], self.graph(named_graph).query(query)):
464
+ instance_id, space = cast(tuple[URIRef, URIRef | RdfLiteral], result)
465
+ if isinstance(space, URIRef):
466
+ yield instance_id, remove_namespace_from_uri(space)
467
+ elif isinstance(space, RdfLiteral):
468
+ yield instance_id, str(space.toPython())
469
+ else:
470
+ yield instance_id, str(space)
@@ -16,12 +16,11 @@ from ._prune_graph import (
16
16
  PruneInstancesOfUnknownType,
17
17
  PruneTypes,
18
18
  )
19
- from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
19
+ from ._rdfpath import MakeConnectionOnExactMatch
20
20
  from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetType, SplitMultiValueProperty
21
21
 
22
22
  __all__ = [
23
23
  "AddAssetDepth",
24
- "AddSelfReferenceProperty",
25
24
  "AssetEventConnector",
26
25
  "AssetFileConnector",
27
26
  "AssetRelationshipConnector",
@@ -49,7 +48,6 @@ Transformers = (
49
48
  | AssetFileConnector
50
49
  | AssetEventConnector
51
50
  | AssetRelationshipConnector
52
- | AddSelfReferenceProperty
53
51
  | SplitMultiValueProperty
54
52
  | RelationshipAsEdgeTransformer
55
53
  | MakeConnectionOnExactMatch
@@ -240,50 +240,36 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
240
240
  str(extractors.RelationshipsExtractor.__name__),
241
241
  }
242
242
  )
243
- _asset_template: str = """SELECT ?source ?target WHERE {{
244
- <{relationship_id}> <{relationship_source_xid_prop}> ?source_xid .
245
- ?source <{asset_xid_property}> ?source_xid .
246
- ?source a <{asset_type}> .
247
-
248
- <{relationship_id}> <{relationship_target_xid_prop}> ?target_xid .
249
- ?target <{asset_xid_property}> ?target_xid .
250
- ?target a <{asset_type}> .}}"""
251
243
 
252
244
  def _count_query(self) -> str:
253
- query = """SELECT (COUNT(?target) as ?count) WHERE {{
245
+ query = """SELECT (COUNT(?target_xid) as ?count) WHERE {{
254
246
  ?relationship a <{relationship_type}> .
255
247
  ?relationship <{relationship_source_xid_prop}> ?source_xid .
256
- ?source <{asset_xid_property}> ?source_xid .
257
- ?source a <{asset_type}> .
248
+ ?source_xid a <{asset_type}> .
258
249
 
259
250
  ?relationship <{relationship_target_xid_prop}> ?target_xid .
260
- ?target <{asset_xid_property}> ?target_xid .
261
- ?target a <{asset_type}> .}}"""
251
+ ?target_xid a <{asset_type}> .}}"""
262
252
 
263
253
  return query.format(
264
254
  relationship_type=self.relationship_type,
265
255
  relationship_source_xid_prop=self.relationship_source_xid_prop,
266
256
  relationship_target_xid_prop=self.relationship_target_xid_prop,
267
- asset_xid_property=self.asset_xid_property,
268
257
  asset_type=self.asset_type,
269
258
  )
270
259
 
271
260
  def _iterate_query(self) -> str:
272
- query = """SELECT ?source ?relationship ?target WHERE {{
261
+ query = """SELECT ?source_xid ?relationship ?target_xid WHERE {{
273
262
  ?relationship a <{relationship_type}> .
274
263
  ?relationship <{relationship_source_xid_prop}> ?source_xid .
275
- ?source <{asset_xid_property}> ?source_xid .
276
- ?source a <{asset_type}> .
264
+ ?source_xid a <{asset_type}> .
277
265
 
278
266
  ?relationship <{relationship_target_xid_prop}> ?target_xid .
279
- ?target <{asset_xid_property}> ?target_xid .
280
- ?target a <{asset_type}> .}}"""
267
+ ?target_xid a <{asset_type}> .}}"""
281
268
 
282
269
  return query.format(
283
270
  relationship_type=self.relationship_type,
284
271
  relationship_source_xid_prop=self.relationship_source_xid_prop,
285
272
  relationship_target_xid_prop=self.relationship_target_xid_prop,
286
- asset_xid_property=self.asset_xid_property,
287
273
  asset_type=self.asset_type,
288
274
  )
289
275
 
@@ -293,13 +279,11 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
293
279
  relationship_type: URIRef | None = None,
294
280
  relationship_source_xid_prop: URIRef | None = None,
295
281
  relationship_target_xid_prop: URIRef | None = None,
296
- asset_xid_property: URIRef | None = None,
297
282
  ):
298
283
  self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
299
284
  self.relationship_type = relationship_type or DEFAULT_NAMESPACE.Relationship
300
285
  self.relationship_source_xid_prop = relationship_source_xid_prop or DEFAULT_NAMESPACE.sourceExternalId
301
286
  self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
302
- self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
303
287
 
304
288
  def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
305
289
  row_output = RowTransformationOutput()
@@ -1,59 +1,12 @@
1
1
  from typing import cast
2
2
  from urllib.parse import quote
3
3
 
4
- from rdflib import Graph, Namespace, URIRef
4
+ from rdflib import Namespace, URIRef
5
5
  from rdflib.query import ResultRow
6
6
 
7
- from cognite.neat._rules.analysis import InformationAnalysis
8
- from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
9
- from cognite.neat._rules.models.information import InformationRules
10
7
  from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
11
8
 
12
- from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
13
-
14
-
15
- class ReduceHopTraversal(BaseTransformer):
16
- """ReduceHopTraversal is a transformer that reduces the number of hops to direct connection."""
17
-
18
- ...
19
-
20
-
21
- # TODO: Standardise
22
- class AddSelfReferenceProperty(BaseTransformer):
23
- description: str = "Adds property that contains id of reference to all references of given class in Rules"
24
- _use_only_once: bool = True
25
- _need_changes = frozenset({})
26
- _ref_template: str = """SELECT ?s WHERE {{?s a <{type_}>}}"""
27
-
28
- def __init__(
29
- self,
30
- rules: InformationRules,
31
- ):
32
- self.rules = rules
33
- self.properties = InformationAnalysis(rules).all_reference_transformations()
34
-
35
- def transform(self, graph: Graph) -> None:
36
- for property_ in self.properties:
37
- prefix = property_.instance_source.traversal.class_.prefix
38
- suffix = property_.instance_source.traversal.class_.suffix
39
-
40
- namespace = self.rules.prefixes[prefix] if prefix in self.rules.prefixes else self.rules.metadata.namespace
41
-
42
- for (reference,) in graph.query(self._ref_template.format(type_=namespace[suffix])): # type: ignore [misc]
43
- graph.add(
44
- (
45
- reference,
46
- self.rules.metadata.namespace[property_.property_],
47
- reference,
48
- )
49
- )
50
-
51
- traversal = SingleProperty.from_string(
52
- class_=property_.view.id,
53
- property_=f"{self.rules.metadata.prefix}:{property_.property_}",
54
- )
55
-
56
- property_.instance_source = RDFPath(traversal=traversal)
9
+ from ._base import BaseTransformerStandardised, RowTransformationOutput
57
10
 
58
11
 
59
12
  class MakeConnectionOnExactMatch(BaseTransformerStandardised):
@@ -2,24 +2,19 @@
2
2
  as some helper classes to handle them like NeatIssueList"""
3
3
 
4
4
  from ._base import (
5
- DefaultWarning,
6
5
  IssueList,
7
6
  MultiValueError,
8
7
  NeatError,
9
8
  NeatIssue,
10
- NeatIssueList,
11
9
  NeatWarning,
12
- catch_issues,
13
- catch_warnings,
14
10
  )
11
+ from ._contextmanagers import catch_issues, catch_warnings
15
12
 
16
13
  __all__ = [
17
- "DefaultWarning",
18
14
  "IssueList",
19
15
  "MultiValueError",
20
16
  "NeatError",
21
17
  "NeatIssue",
22
- "NeatIssueList",
23
18
  "NeatWarning",
24
19
  "catch_issues",
25
20
  "catch_warnings",