cognite-neat 0.119.2__py3-none-any.whl → 0.119.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (30) hide show
  1. cognite/neat/_constants.py +34 -70
  2. cognite/neat/_graph/extractors/__init__.py +0 -6
  3. cognite/neat/_graph/loaders/_rdf2dms.py +5 -5
  4. cognite/neat/_graph/queries/__init__.py +1 -1
  5. cognite/neat/_graph/queries/_base.py +2 -456
  6. cognite/neat/_graph/queries/_queries.py +16 -0
  7. cognite/neat/_graph/queries/_select.py +440 -0
  8. cognite/neat/_graph/queries/_update.py +37 -0
  9. cognite/neat/_rules/exporters/_rules2excel.py +240 -107
  10. cognite/neat/_rules/models/_base_rules.py +16 -1
  11. cognite/neat/_rules/models/dms/_validation.py +10 -1
  12. cognite/neat/_rules/transformers/_converters.py +16 -6
  13. cognite/neat/_session/_drop.py +2 -2
  14. cognite/neat/_session/_explore.py +4 -4
  15. cognite/neat/_session/_prepare.py +5 -5
  16. cognite/neat/_session/_read.py +6 -0
  17. cognite/neat/_session/_set.py +3 -3
  18. cognite/neat/_session/_show.py +1 -1
  19. cognite/neat/_session/_template.py +24 -5
  20. cognite/neat/_state/README.md +23 -0
  21. cognite/neat/_store/_graph_store.py +38 -39
  22. cognite/neat/_version.py +1 -1
  23. {cognite_neat-0.119.2.dist-info → cognite_neat-0.119.4.dist-info}/METADATA +37 -2
  24. {cognite_neat-0.119.2.dist-info → cognite_neat-0.119.4.dist-info}/RECORD +27 -26
  25. cognite/neat/_graph/extractors/_dexpi.py +0 -234
  26. cognite/neat/_graph/extractors/_iodd.py +0 -403
  27. cognite/neat/_graph/transformers/_iodd.py +0 -30
  28. {cognite_neat-0.119.2.dist-info → cognite_neat-0.119.4.dist-info}/LICENSE +0 -0
  29. {cognite_neat-0.119.2.dist-info → cognite_neat-0.119.4.dist-info}/WHEEL +0 -0
  30. {cognite_neat-0.119.2.dist-info → cognite_neat-0.119.4.dist-info}/entry_points.txt +0 -0
@@ -65,76 +65,40 @@ COGNITE_SPACES = frozenset(
65
65
  }
66
66
  )
67
67
 
68
- COGNITE_CORE_CONCEPTS = frozenset(
69
- {
70
- "CogniteFile",
71
- "CogniteCubeMap",
72
- "CogniteCADRevision",
73
- "CognitePointCloudVolume",
74
- "Cognite360ImageAnnotation",
75
- "CogniteAnnotation",
76
- "CogniteUnit",
77
- "CogniteAsset",
78
- "Cognite3DObject",
79
- "Cognite3DRevision",
80
- "Cognite360Image",
81
- "CogniteDiagramAnnotation",
82
- "Cognite360ImageCollection",
83
- "Cognite360ImageStation",
84
- "CognitePointCloudModel",
85
- "CogniteTimeSeries",
86
- "Cognite3DTransformation",
87
- "CogniteEquipment",
88
- "Cognite360ImageModel",
89
- "CogniteAssetClass",
90
- "CogniteAssetType",
91
- "CogniteEquipmentType",
92
- "Cognite3DModel",
93
- "CogniteCADModel",
94
- "CognitePointCloudRevision",
95
- "CogniteCADNode",
96
- "CogniteFileCategory",
97
- "CogniteActivity",
98
- }
99
- )
100
-
101
-
102
- COGNITE_CORE_FEATURES = frozenset(
103
- {
104
- "CogniteDescribable",
105
- "CogniteSourceable",
106
- "CogniteSourceSystem",
107
- "CogniteSchedulable",
108
- "CogniteVisualizable",
109
- }
110
- )
111
-
112
- COGNITE_3D_CONCEPTS = frozenset(
113
- {
114
- "Cognite3DModel",
115
- "Cognite3DObject",
116
- "Cognite3DRevision",
117
- "Cognite3DTransformation",
118
- "Cognite360Image",
119
- "Cognite360ImageAnnotation",
120
- "Cognite360ImageCollection",
121
- "Cognite360ImageModel",
122
- "Cognite360ImageStation",
123
- "CogniteCADModel",
124
- "CogniteCADNode",
125
- "CogniteCADRevision",
126
- "CogniteCubeMap",
127
- "CognitePointCloudModel",
128
- "CognitePointCloudRevision",
129
- "CognitePointCloudVolume",
130
- }
131
- )
132
-
133
- COGNITE_ANNOTATION = frozenset(
134
- {
135
- "CogniteAnnotation",
136
- "CogniteDiagramAnnotation",
137
- }
68
+ COGNITE_CONCEPTS = (
69
+ "CogniteAsset",
70
+ "CogniteEquipment",
71
+ "CogniteActivity",
72
+ "CogniteTimeSeries",
73
+ "CogniteFile",
74
+ "CogniteUnit",
75
+ "CogniteAssetClass",
76
+ "CogniteAssetType",
77
+ "CogniteEquipmentType",
78
+ "CogniteFileCategory",
79
+ "CogniteDescribable",
80
+ "CogniteSourceable",
81
+ "CogniteSourceSystem",
82
+ "CogniteSchedulable",
83
+ "CogniteVisualizable",
84
+ "CogniteAnnotation",
85
+ "CogniteDiagramAnnotation",
86
+ "CogniteCubeMap",
87
+ "CogniteCADRevision",
88
+ "CognitePointCloudVolume",
89
+ "Cognite360ImageAnnotation",
90
+ "Cognite3DObject",
91
+ "Cognite3DRevision",
92
+ "Cognite360Image",
93
+ "Cognite360ImageCollection",
94
+ "Cognite360ImageStation",
95
+ "CognitePointCloudModel",
96
+ "Cognite3DTransformation",
97
+ "Cognite360ImageModel",
98
+ "Cognite3DModel",
99
+ "CogniteCADModel",
100
+ "CognitePointCloudRevision",
101
+ "CogniteCADNode",
138
102
  )
139
103
 
140
104
  DMS_LISTABLE_PROPERTY_LIMIT = 1000
@@ -10,11 +10,9 @@ from ._classic_cdf._labels import LabelsExtractor
10
10
  from ._classic_cdf._relationships import RelationshipsExtractor
11
11
  from ._classic_cdf._sequences import SequencesExtractor
12
12
  from ._classic_cdf._timeseries import TimeSeriesExtractor
13
- from ._dexpi import DexpiExtractor
14
13
  from ._dict import DictExtractor
15
14
  from ._dms import DMSExtractor
16
15
  from ._dms_graph import DMSGraphExtractor
17
- from ._iodd import IODDExtractor
18
16
  from ._mock_graph_generator import MockGraphGenerator
19
17
  from ._raw import RAWExtractor
20
18
  from ._rdf_file import RdfFileExtractor
@@ -26,11 +24,9 @@ __all__ = [
26
24
  "DMSExtractor",
27
25
  "DMSGraphExtractor",
28
26
  "DataSetExtractor",
29
- "DexpiExtractor",
30
27
  "DictExtractor",
31
28
  "EventsExtractor",
32
29
  "FilesExtractor",
33
- "IODDExtractor",
34
30
  "KnowledgeGraphExtractor",
35
31
  "LabelsExtractor",
36
32
  "MockGraphGenerator",
@@ -52,8 +48,6 @@ TripleExtractors = (
52
48
  | FilesExtractor
53
49
  | LabelsExtractor
54
50
  | RdfFileExtractor
55
- | DexpiExtractor
56
- | IODDExtractor
57
51
  | DMSExtractor
58
52
  | ClassicGraphExtractor
59
53
  | DataSetExtractor
@@ -259,7 +259,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
259
259
  """Selects the views with data."""
260
260
  view_iterations: dict[dm.ViewId, _ViewIterator] = {}
261
261
  for view_id, query in view_query_by_id.items():
262
- count = self.graph_store.queries.count_of_type(query.rdf_type)
262
+ count = self.graph_store.queries.select.count_of_type(query.rdf_type)
263
263
  if count > 0:
264
264
  view_iterations[view_id] = _ViewIterator(view_id, count, query)
265
265
  return view_iterations
@@ -269,7 +269,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
269
269
  if self._space_property is None:
270
270
  return issues
271
271
  total = sum(it.instance_count for it in view_iterations)
272
- properties_by_uriref = self.graph_store.queries.properties()
272
+ properties_by_uriref = self.graph_store.queries.select.properties()
273
273
  space_property_uri = next((k for k, v in properties_by_uriref.items() if v == self._space_property), None)
274
274
  if space_property_uri is None:
275
275
  error: ResourceNotFoundError[str, str] = ResourceNotFoundError(
@@ -282,7 +282,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
282
282
  issues.append(error)
283
283
  return issues
284
284
 
285
- instance_iterable = self.graph_store.queries.list_instances_ids_by_space(space_property_uri)
285
+ instance_iterable = self.graph_store.queries.select.list_instances_ids_by_space(space_property_uri)
286
286
  instance_iterable = iterate_progress_bar_if_above_config_threshold(
287
287
  instance_iterable, total, f"Looking up spaces for {total} instances..."
288
288
  )
@@ -308,8 +308,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
308
308
  if not self.neat_prefix_by_type_uri:
309
309
  return
310
310
 
311
- count = sum(count for _, count in self.graph_store.queries.summarize_instances())
312
- instance_iterable = self.graph_store.queries.list_instances_ids()
311
+ count = sum(count for _, count in self.graph_store.queries.select.summarize_instances())
312
+ instance_iterable = self.graph_store.queries.select.list_instances_ids()
313
313
  instance_iterable = iterate_progress_bar_if_above_config_threshold(
314
314
  instance_iterable, count, f"Looking up identifiers for {count} instances..."
315
315
  )
@@ -1,3 +1,3 @@
1
- from ._base import Queries
1
+ from ._queries import Queries
2
2
 
3
3
  __all__ = ["Queries"]
@@ -1,21 +1,8 @@
1
- import urllib.parse
2
- from collections import defaultdict
3
- from collections.abc import Iterable
4
- from typing import Any, Literal, cast, overload
5
-
6
- from rdflib import RDF, XSD, Dataset, Graph, Namespace, URIRef
7
- from rdflib import Literal as RdfLiteral
1
+ from rdflib import Dataset, Graph, URIRef
8
2
  from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
9
- from rdflib.query import ResultRow
10
-
11
- from cognite.neat._constants import NEAT
12
- from cognite.neat._shared import InstanceType
13
- from cognite.neat._utils.rdf_ import remove_instance_ids_in_batch, remove_namespace_from_uri
14
-
15
3
 
16
- class Queries:
17
- """Helper class for storing standard queries for the graph store."""
18
4
 
5
+ class BaseQuery:
19
6
  def __init__(
20
7
  self,
21
8
  dataset: Dataset,
@@ -27,444 +14,3 @@ class Queries:
27
14
  def graph(self, named_graph: URIRef | None = None) -> Graph:
28
15
  """Get named graph from the dataset to query over"""
29
16
  return self.dataset.graph(named_graph or self.default_named_graph)
30
-
31
- def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple[str, int]]:
32
- """Summarize instances in the graph store by class and count"""
33
- query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
34
- WHERE {
35
- ?instance a ?class .
36
- }
37
- GROUP BY ?class
38
- ORDER BY DESC(?instanceCount) """
39
-
40
- return [ # type: ignore[misc]
41
- (
42
- remove_namespace_from_uri(cast(URIRef, class_)),
43
- cast(RdfLiteral, count).value,
44
- )
45
- for class_, count in self.graph(named_graph=named_graph).query(query_statement)
46
- ]
47
-
48
- def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
49
- """Types and their short form in the graph"""
50
- query = """SELECT DISTINCT ?type
51
- WHERE {?s a ?type .}"""
52
-
53
- return { # type: ignore[misc, index, arg-type]
54
- cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
55
- for (type_,) in list(self.graph(named_graph).query(query))
56
- }
57
-
58
- def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
59
- """Get the URIRef of a type"""
60
- return [k for k, v in self.types(named_graph).items() if v == type_]
61
-
62
- def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
63
- """Properties and their short form in the graph
64
-
65
- Args:
66
- named_graph: Named graph to query over, default None (default graph)
67
-
68
- """
69
- query = """SELECT DISTINCT ?property
70
- WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
71
- return { # type: ignore[misc, index, arg-type]
72
- cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
73
- for (type_,) in list(self.graph(named_graph).query(query))
74
- }
75
-
76
- def properties_by_type(self, named_graph: URIRef | None = None) -> dict[URIRef, dict[URIRef, str]]:
77
- """Properties and their short form in the graph by type
78
-
79
- Args:
80
- named_graph: Named graph to query over, default None (default graph)
81
-
82
- """
83
- query = """SELECT DISTINCT ?type ?property
84
- WHERE {?s a ?type . ?s ?property ?o . FILTER(?property != rdf:type)}"""
85
- properties_by_type: dict[URIRef, dict[URIRef, str]] = defaultdict(dict)
86
- for type_, property_ in cast(ResultRow, list(self.graph(named_graph).query(query))):
87
- properties_by_type[type_][property_] = remove_namespace_from_uri(property_) # type: ignore[index]
88
- return properties_by_type
89
-
90
- def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
91
- """Get the URIRef of a property
92
-
93
- Args:
94
- property_: Property to find URIRef for
95
- named_graph: Named graph to query over, default None (default graph)
96
- """
97
- return [k for k, v in self.properties(named_graph).items() if v == property_]
98
-
99
- @overload
100
- def list_instances_ids(
101
- self, class_uri: None = None, limit: int = -1, named_graph: URIRef | None = None
102
- ) -> Iterable[tuple[URIRef, URIRef]]: ...
103
-
104
- @overload
105
- def list_instances_ids(
106
- self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
107
- ) -> Iterable[URIRef]: ...
108
-
109
- def list_instances_ids(
110
- self, class_uri: URIRef | None = None, limit: int = -1, named_graph: URIRef | None = None
111
- ) -> Iterable[URIRef] | Iterable[tuple[URIRef, URIRef]]:
112
- """List all instance IDs
113
-
114
- Args:
115
- class_uri: Class for which instances are to be found, default None (all instances)
116
- limit: Max number of instances to return, by default -1 meaning all instances
117
- named_graph: Named graph to query over, default None (default graph)
118
-
119
- Returns:
120
- List of class instance URIs
121
- """
122
- query = "SELECT DISTINCT ?subject"
123
- if class_uri:
124
- query += f" WHERE {{ ?subject a <{class_uri}> .}}"
125
- else:
126
- query += " ?type WHERE {{ ?subject a ?type .}}"
127
- if limit != -1:
128
- query += f" LIMIT {limit}"
129
- # MyPy is not very happy with RDFLib, so just ignore the type hinting here
130
- return (tuple(res) if class_uri is None else res[0] for res in self.graph(named_graph).query(query)) # type: ignore[index, return-value, arg-type]
131
-
132
- def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
133
- """Check if a property exists in the graph store
134
-
135
- Args:
136
- type_: Type URI to check
137
- property_uri: Property URI to check
138
- named_graph: Named graph to query over, default None (default graph)
139
-
140
- Returns:
141
- True if property exists, False otherwise
142
- """
143
- query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
144
- return bool(list(self.graph(named_graph).query(query)))
145
-
146
- def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
147
- """Check if a namespace exists in the graph store
148
-
149
- Args:
150
- namespace: Namespace to check
151
- named_graph: Named graph to query over, default None (default graph)
152
-
153
- Returns:
154
- True if namespace exists, False otherwise
155
- """
156
- query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
157
- return bool(self.graph(named_graph).query(query))
158
-
159
- def has_data(self) -> bool:
160
- """Check if the graph store has data"""
161
- return cast(bool, next(iter(self.dataset.query("ASK WHERE { ?s ?p ?o }"))))
162
-
163
- def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
164
- """Check if a type exists in the graph store
165
-
166
- Args:
167
- type_: Type to check
168
- named_graph: Named graph to query over, default None (default graph)
169
-
170
- Returns:
171
- True if type exists, False otherwise
172
- """
173
- query = f"ASK WHERE {{ ?s a <{type_}> }}"
174
- return bool(self.graph(named_graph).query(query))
175
-
176
- def describe(
177
- self,
178
- instance_id: URIRef,
179
- instance_type: URIRef | None = None,
180
- property_renaming_config: dict | None = None,
181
- named_graph: URIRef | None = None,
182
- remove_uri_namespace: bool = True,
183
- ) -> tuple[URIRef, dict[str | InstanceType, list[Any]]] | None:
184
- """DESCRIBE instance for a given class from the graph store
185
-
186
- Args:
187
- instance_id: Instance id for which we want to generate query
188
- instance_type: Type of the instance, default None (will be inferred from triples)
189
- property_renaming_config: Dictionary to rename properties, default None (no renaming)
190
- named_graph: Named graph to query over, default None (default graph)
191
- remove_uri_namespace: Whether to remove the namespace from the URI, by default True
192
-
193
-
194
- Returns:
195
- Dictionary of instance properties
196
- """
197
- property_values: dict[str, list[str] | list[URIRef]] = defaultdict(list)
198
- for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
199
- if object_.lower() in [
200
- "",
201
- "none",
202
- "nan",
203
- "null",
204
- ]:
205
- continue
206
-
207
- # set property
208
- if property_renaming_config and predicate != RDF.type:
209
- property_ = remove_namespace_from_uri(predicate, validation="prefix")
210
- renamed_property_ = property_renaming_config.get(predicate, property_)
211
-
212
- elif not property_renaming_config and predicate != RDF.type:
213
- property_ = remove_namespace_from_uri(predicate, validation="prefix")
214
- renamed_property_ = property_
215
-
216
- else:
217
- property_ = RDF.type
218
- renamed_property_ = property_
219
-
220
- value: Any
221
- if isinstance(object_, URIRef) and remove_uri_namespace:
222
- # These properties contain the space in the Namespace.
223
- value = remove_namespace_from_uri(object_, validation="prefix")
224
- elif isinstance(object_, URIRef):
225
- value = object_
226
- elif isinstance(object_, RdfLiteral):
227
- if object_.datatype == XSD._NS["json"]:
228
- # For JSON literals, the .toPython() returns a Literal object.
229
- value = str(object_)
230
- else:
231
- value = object_.toPython()
232
- else:
233
- # It is a blank node
234
- value = str(object_)
235
-
236
- # add type to the dictionary
237
- if predicate != RDF.type:
238
- property_values[renamed_property_].append(value) # type: ignore[arg-type]
239
- else:
240
- # guarding against multiple rdf:type values as this is not allowed in CDF
241
- if RDF.type not in property_values:
242
- property_values[RDF.type].append(
243
- remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value # type: ignore[arg-type]
244
- )
245
- else:
246
- # we should not have multiple rdf:type values
247
- continue
248
- if property_values:
249
- return (
250
- instance_id,
251
- property_values,
252
- )
253
- else:
254
- return None
255
-
256
- def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
257
- """List triples in the graph store
258
-
259
- Args:
260
- limit: Max number of triples to return, by default 25
261
- named_graph: Named graph to query over, default None (default graph)
262
-
263
- Returns:
264
- List of triples
265
- """
266
- query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
267
- return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
268
-
269
- @overload
270
- def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
271
-
272
- @overload
273
- def list_types(
274
- self,
275
- remove_namespace: Literal[True],
276
- limit: int = 25,
277
- named_graph: URIRef | None = None,
278
- ) -> list[str]: ...
279
-
280
- def list_types(
281
- self,
282
- remove_namespace: bool = False,
283
- limit: int | None = 25,
284
- named_graph: URIRef | None = None,
285
- ) -> list[ResultRow] | list[str]:
286
- """List types in the graph store
287
-
288
- Args:
289
- limit: Max number of types to return, by default 25
290
- remove_namespace: Whether to remove the namespace from the type, by default False
291
-
292
- Returns:
293
- List of types
294
- """
295
- query = "SELECT DISTINCT ?type WHERE { ?subject a ?type }"
296
- if limit is not None:
297
- query += f" LIMIT {limit}"
298
- result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
299
- if remove_namespace:
300
- return [remove_namespace_from_uri(res[0]) for res in result]
301
- return result
302
-
303
- def multi_value_type_property(
304
- self,
305
- named_graph: URIRef | None = None,
306
- ) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
307
- query = """SELECT ?sourceType ?property
308
- (GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
309
-
310
- WHERE {{
311
- ?s ?property ?o .
312
- ?s a ?sourceType .
313
- OPTIONAL {{ ?o a ?type }}
314
-
315
- # Key part to determine value type: either object, data or unknown
316
- BIND( IF(isLiteral(?o),DATATYPE(?o),
317
- IF(BOUND(?type), ?type,
318
- <{unknownType}>)) AS ?valueType)
319
- }}
320
-
321
- GROUP BY ?sourceType ?property
322
- HAVING (COUNT(DISTINCT ?valueType) > 1)"""
323
-
324
- for (
325
- source_type,
326
- property_,
327
- value_types,
328
- ) in cast(
329
- ResultRow,
330
- self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
331
- ):
332
- yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
333
-
334
- def drop_types(
335
- self,
336
- type_: list[URIRef],
337
- named_graph: URIRef | None = None,
338
- ) -> dict[URIRef, int]:
339
- """Drop types from the graph store
340
-
341
- Args:
342
- type_: List of types to drop
343
- named_graph: Named graph to query over, default None (default graph
344
-
345
- Returns:
346
- Dictionary of dropped types
347
- """
348
- dropped_types: dict[URIRef, int] = {}
349
- for t in type_:
350
- instance_ids = list(self.list_instances_ids(t))
351
- dropped_types[t] = len(instance_ids)
352
- remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
353
- return dropped_types
354
-
355
- def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
356
- """Find instances with multiple types
357
-
358
- Args:
359
- named_graph: Named graph to query over, default None (default graph)
360
-
361
- """
362
-
363
- query = """
364
- SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
365
- WHERE {
366
- ?instance a ?type .
367
- }
368
- GROUP BY ?instance
369
- HAVING (COUNT(?type) > 1)
370
- """
371
-
372
- result = {}
373
- for instance, types in self.graph(named_graph).query(query): # type: ignore
374
- result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
375
-
376
- return result
377
-
378
- def count_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> int:
379
- query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
380
- return int(next(iter(self.graph(named_graph).query(query)))[0]) # type: ignore[arg-type, index]
381
-
382
- def types_with_instance_and_property_count(
383
- self, remove_namespace: bool = True, named_graph: URIRef | None = None
384
- ) -> list[dict[str, Any]]:
385
- query = """
386
- SELECT ?type (COUNT(DISTINCT ?instance) AS ?instanceCount) (COUNT(DISTINCT ?property) AS ?propertyCount)
387
- WHERE {
388
- ?instance a ?type .
389
- ?instance ?property ?value .
390
- FILTER(?property != rdf:type)
391
- }
392
- GROUP BY ?type
393
- ORDER BY DESC(?instanceCount)"""
394
- return [
395
- {
396
- "type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
397
- "instanceCount": cast(RdfLiteral, instance_count).toPython(),
398
- "propertyCount": cast(RdfLiteral, property_count).toPython(),
399
- }
400
- for type_, instance_count, property_count in list(
401
- cast(list[ResultRow], self.graph(named_graph).query(query))
402
- )
403
- ]
404
-
405
- def properties_with_count(
406
- self, remove_namespace: bool = True, named_graph: URIRef | None = None
407
- ) -> list[dict[str, Any]]:
408
- instance_count_by_type = {
409
- entry["type"]: entry["instanceCount"]
410
- for entry in self.types_with_instance_and_property_count(remove_namespace=False, named_graph=named_graph)
411
- }
412
- query = """SELECT ?type ?property (COUNT(DISTINCT ?instance) AS ?instanceCount)
413
- WHERE {
414
- ?instance a ?type .
415
- ?instance ?property ?value .
416
- FILTER(?property != rdf:type)
417
- }
418
- GROUP BY ?type ?property
419
- ORDER BY ASC(?type) ASC(?property)"""
420
- return [
421
- {
422
- "type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
423
- "property": urllib.parse.unquote(remove_namespace_from_uri(property)) if remove_namespace else property,
424
- "instanceCount": cast(RdfLiteral, instance_count).toPython(),
425
- "total": instance_count_by_type[type_],
426
- }
427
- for type_, property, instance_count in list(cast(list[ResultRow], self.graph(named_graph).query(query)))
428
- ]
429
-
430
- @overload
431
- def instances_with_properties(
432
- self, type: URIRef, remove_namespace: Literal[False], named_graph: URIRef | None = None
433
- ) -> dict[URIRef, set[URIRef]]: ...
434
-
435
- @overload
436
- def instances_with_properties(
437
- self, type: URIRef, remove_namespace: Literal[True], named_graph: URIRef | None = None
438
- ) -> dict[str, set[str]]: ...
439
-
440
- def instances_with_properties(
441
- self, type: URIRef, remove_namespace: bool = True, named_graph: URIRef | None = None
442
- ) -> dict[str, set[str]] | dict[URIRef, set[URIRef]]:
443
- query = """SELECT DISTINCT ?instance ?property
444
- WHERE {{
445
- ?instance a <{type}> .
446
- ?instance ?property ?value .
447
- FILTER(?property != rdf:type)
448
- }}"""
449
- result = defaultdict(set)
450
- for instance, property_ in cast(Iterable[ResultRow], self.graph(named_graph).query(query.format(type=type))):
451
- instance_str = urllib.parse.unquote(remove_namespace_from_uri(instance)) if remove_namespace else instance
452
- property_str = urllib.parse.unquote(remove_namespace_from_uri(property_)) if remove_namespace else property_
453
- result[instance_str].add(property_str)
454
- return result
455
-
456
- def list_instances_ids_by_space(
457
- self, space_property: URIRef, named_graph: URIRef | None = None
458
- ) -> Iterable[tuple[URIRef, str]]:
459
- """Returns instance ids by space"""
460
- query = f"""SELECT DISTINCT ?instance ?space
461
- WHERE {{?instance <{space_property}> ?space}}"""
462
-
463
- for result in cast(Iterable[ResultRow], self.graph(named_graph).query(query)):
464
- instance_id, space = cast(tuple[URIRef, URIRef | RdfLiteral], result)
465
- if isinstance(space, URIRef):
466
- yield instance_id, remove_namespace_from_uri(space)
467
- elif isinstance(space, RdfLiteral):
468
- yield instance_id, str(space.toPython())
469
- else:
470
- yield instance_id, str(space)
@@ -0,0 +1,16 @@
1
+ from rdflib import Dataset, URIRef
2
+
3
+ from ._select import SelectQueries
4
+ from ._update import UpdateQueries
5
+
6
+
7
+ class Queries:
8
+ """Helper class for storing standard queries for the graph store."""
9
+
10
+ def __init__(
11
+ self,
12
+ dataset: Dataset,
13
+ default_named_graph: URIRef | None = None,
14
+ ) -> None:
15
+ self.select = SelectQueries(dataset, default_named_graph)
16
+ self.update = UpdateQueries(self.select, dataset, default_named_graph)