cognite-neat 0.119.1__py3-none-any.whl → 0.119.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +34 -70
- cognite/neat/_graph/extractors/__init__.py +0 -6
- cognite/neat/_graph/loaders/_rdf2dms.py +5 -5
- cognite/neat/_graph/queries/__init__.py +1 -1
- cognite/neat/_graph/queries/_base.py +2 -456
- cognite/neat/_graph/queries/_queries.py +16 -0
- cognite/neat/_graph/queries/_select.py +440 -0
- cognite/neat/_graph/queries/_update.py +37 -0
- cognite/neat/_issues/errors/_external.py +4 -2
- cognite/neat/_rules/exporters/_rules2excel.py +240 -107
- cognite/neat/_rules/importers/_yaml2rules.py +7 -1
- cognite/neat/_rules/models/_base_rules.py +16 -1
- cognite/neat/_rules/models/dms/_validation.py +11 -2
- cognite/neat/_rules/transformers/_converters.py +16 -6
- cognite/neat/_session/_drop.py +2 -2
- cognite/neat/_session/_explore.py +4 -4
- cognite/neat/_session/_prepare.py +5 -5
- cognite/neat/_session/_read.py +6 -0
- cognite/neat/_session/_set.py +3 -3
- cognite/neat/_session/_show.py +1 -1
- cognite/neat/_session/_template.py +21 -2
- cognite/neat/_state/README.md +23 -0
- cognite/neat/_store/_graph_store.py +5 -5
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/METADATA +37 -2
- {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/RECORD +29 -28
- cognite/neat/_graph/extractors/_dexpi.py +0 -234
- cognite/neat/_graph/extractors/_iodd.py +0 -403
- cognite/neat/_graph/transformers/_iodd.py +0 -30
- {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/LICENSE +0 -0
- {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/WHEEL +0 -0
- {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/entry_points.txt +0 -0
cognite/neat/_constants.py
CHANGED
|
@@ -65,76 +65,40 @@ COGNITE_SPACES = frozenset(
|
|
|
65
65
|
}
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
COGNITE_CORE_FEATURES = frozenset(
|
|
103
|
-
{
|
|
104
|
-
"CogniteDescribable",
|
|
105
|
-
"CogniteSourceable",
|
|
106
|
-
"CogniteSourceSystem",
|
|
107
|
-
"CogniteSchedulable",
|
|
108
|
-
"CogniteVisualizable",
|
|
109
|
-
}
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
COGNITE_3D_CONCEPTS = frozenset(
|
|
113
|
-
{
|
|
114
|
-
"Cognite3DModel",
|
|
115
|
-
"Cognite3DObject",
|
|
116
|
-
"Cognite3DRevision",
|
|
117
|
-
"Cognite3DTransformation",
|
|
118
|
-
"Cognite360Image",
|
|
119
|
-
"Cognite360ImageAnnotation",
|
|
120
|
-
"Cognite360ImageCollection",
|
|
121
|
-
"Cognite360ImageModel",
|
|
122
|
-
"Cognite360ImageStation",
|
|
123
|
-
"CogniteCADModel",
|
|
124
|
-
"CogniteCADNode",
|
|
125
|
-
"CogniteCADRevision",
|
|
126
|
-
"CogniteCubeMap",
|
|
127
|
-
"CognitePointCloudModel",
|
|
128
|
-
"CognitePointCloudRevision",
|
|
129
|
-
"CognitePointCloudVolume",
|
|
130
|
-
}
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
COGNITE_ANNOTATION = frozenset(
|
|
134
|
-
{
|
|
135
|
-
"CogniteAnnotation",
|
|
136
|
-
"CogniteDiagramAnnotation",
|
|
137
|
-
}
|
|
68
|
+
COGNITE_CONCEPTS = (
|
|
69
|
+
"CogniteAsset",
|
|
70
|
+
"CogniteEquipment",
|
|
71
|
+
"CogniteActivity",
|
|
72
|
+
"CogniteTimeSeries",
|
|
73
|
+
"CogniteFile",
|
|
74
|
+
"CogniteUnit",
|
|
75
|
+
"CogniteAssetClass",
|
|
76
|
+
"CogniteAssetType",
|
|
77
|
+
"CogniteEquipmentType",
|
|
78
|
+
"CogniteFileCategory",
|
|
79
|
+
"CogniteDescribable",
|
|
80
|
+
"CogniteSourceable",
|
|
81
|
+
"CogniteSourceSystem",
|
|
82
|
+
"CogniteSchedulable",
|
|
83
|
+
"CogniteVisualizable",
|
|
84
|
+
"CogniteAnnotation",
|
|
85
|
+
"CogniteDiagramAnnotation",
|
|
86
|
+
"CogniteCubeMap",
|
|
87
|
+
"CogniteCADRevision",
|
|
88
|
+
"CognitePointCloudVolume",
|
|
89
|
+
"Cognite360ImageAnnotation",
|
|
90
|
+
"Cognite3DObject",
|
|
91
|
+
"Cognite3DRevision",
|
|
92
|
+
"Cognite360Image",
|
|
93
|
+
"Cognite360ImageCollection",
|
|
94
|
+
"Cognite360ImageStation",
|
|
95
|
+
"CognitePointCloudModel",
|
|
96
|
+
"Cognite3DTransformation",
|
|
97
|
+
"Cognite360ImageModel",
|
|
98
|
+
"Cognite3DModel",
|
|
99
|
+
"CogniteCADModel",
|
|
100
|
+
"CognitePointCloudRevision",
|
|
101
|
+
"CogniteCADNode",
|
|
138
102
|
)
|
|
139
103
|
|
|
140
104
|
DMS_LISTABLE_PROPERTY_LIMIT = 1000
|
|
@@ -10,11 +10,9 @@ from ._classic_cdf._labels import LabelsExtractor
|
|
|
10
10
|
from ._classic_cdf._relationships import RelationshipsExtractor
|
|
11
11
|
from ._classic_cdf._sequences import SequencesExtractor
|
|
12
12
|
from ._classic_cdf._timeseries import TimeSeriesExtractor
|
|
13
|
-
from ._dexpi import DexpiExtractor
|
|
14
13
|
from ._dict import DictExtractor
|
|
15
14
|
from ._dms import DMSExtractor
|
|
16
15
|
from ._dms_graph import DMSGraphExtractor
|
|
17
|
-
from ._iodd import IODDExtractor
|
|
18
16
|
from ._mock_graph_generator import MockGraphGenerator
|
|
19
17
|
from ._raw import RAWExtractor
|
|
20
18
|
from ._rdf_file import RdfFileExtractor
|
|
@@ -26,11 +24,9 @@ __all__ = [
|
|
|
26
24
|
"DMSExtractor",
|
|
27
25
|
"DMSGraphExtractor",
|
|
28
26
|
"DataSetExtractor",
|
|
29
|
-
"DexpiExtractor",
|
|
30
27
|
"DictExtractor",
|
|
31
28
|
"EventsExtractor",
|
|
32
29
|
"FilesExtractor",
|
|
33
|
-
"IODDExtractor",
|
|
34
30
|
"KnowledgeGraphExtractor",
|
|
35
31
|
"LabelsExtractor",
|
|
36
32
|
"MockGraphGenerator",
|
|
@@ -52,8 +48,6 @@ TripleExtractors = (
|
|
|
52
48
|
| FilesExtractor
|
|
53
49
|
| LabelsExtractor
|
|
54
50
|
| RdfFileExtractor
|
|
55
|
-
| DexpiExtractor
|
|
56
|
-
| IODDExtractor
|
|
57
51
|
| DMSExtractor
|
|
58
52
|
| ClassicGraphExtractor
|
|
59
53
|
| DataSetExtractor
|
|
@@ -259,7 +259,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
259
259
|
"""Selects the views with data."""
|
|
260
260
|
view_iterations: dict[dm.ViewId, _ViewIterator] = {}
|
|
261
261
|
for view_id, query in view_query_by_id.items():
|
|
262
|
-
count = self.graph_store.queries.count_of_type(query.rdf_type)
|
|
262
|
+
count = self.graph_store.queries.select.count_of_type(query.rdf_type)
|
|
263
263
|
if count > 0:
|
|
264
264
|
view_iterations[view_id] = _ViewIterator(view_id, count, query)
|
|
265
265
|
return view_iterations
|
|
@@ -269,7 +269,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
269
269
|
if self._space_property is None:
|
|
270
270
|
return issues
|
|
271
271
|
total = sum(it.instance_count for it in view_iterations)
|
|
272
|
-
properties_by_uriref = self.graph_store.queries.properties()
|
|
272
|
+
properties_by_uriref = self.graph_store.queries.select.properties()
|
|
273
273
|
space_property_uri = next((k for k, v in properties_by_uriref.items() if v == self._space_property), None)
|
|
274
274
|
if space_property_uri is None:
|
|
275
275
|
error: ResourceNotFoundError[str, str] = ResourceNotFoundError(
|
|
@@ -282,7 +282,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
282
282
|
issues.append(error)
|
|
283
283
|
return issues
|
|
284
284
|
|
|
285
|
-
instance_iterable = self.graph_store.queries.list_instances_ids_by_space(space_property_uri)
|
|
285
|
+
instance_iterable = self.graph_store.queries.select.list_instances_ids_by_space(space_property_uri)
|
|
286
286
|
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
287
287
|
instance_iterable, total, f"Looking up spaces for {total} instances..."
|
|
288
288
|
)
|
|
@@ -308,8 +308,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
308
308
|
if not self.neat_prefix_by_type_uri:
|
|
309
309
|
return
|
|
310
310
|
|
|
311
|
-
count = sum(count for _, count in self.graph_store.queries.summarize_instances())
|
|
312
|
-
instance_iterable = self.graph_store.queries.list_instances_ids()
|
|
311
|
+
count = sum(count for _, count in self.graph_store.queries.select.summarize_instances())
|
|
312
|
+
instance_iterable = self.graph_store.queries.select.list_instances_ids()
|
|
313
313
|
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
314
314
|
instance_iterable, count, f"Looking up identifiers for {count} instances..."
|
|
315
315
|
)
|
|
@@ -1,21 +1,8 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections import defaultdict
|
|
3
|
-
from collections.abc import Iterable
|
|
4
|
-
from typing import Any, Literal, cast, overload
|
|
5
|
-
|
|
6
|
-
from rdflib import RDF, XSD, Dataset, Graph, Namespace, URIRef
|
|
7
|
-
from rdflib import Literal as RdfLiteral
|
|
1
|
+
from rdflib import Dataset, Graph, URIRef
|
|
8
2
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
9
|
-
from rdflib.query import ResultRow
|
|
10
|
-
|
|
11
|
-
from cognite.neat._constants import NEAT
|
|
12
|
-
from cognite.neat._shared import InstanceType
|
|
13
|
-
from cognite.neat._utils.rdf_ import remove_instance_ids_in_batch, remove_namespace_from_uri
|
|
14
|
-
|
|
15
3
|
|
|
16
|
-
class Queries:
|
|
17
|
-
"""Helper class for storing standard queries for the graph store."""
|
|
18
4
|
|
|
5
|
+
class BaseQuery:
|
|
19
6
|
def __init__(
|
|
20
7
|
self,
|
|
21
8
|
dataset: Dataset,
|
|
@@ -27,444 +14,3 @@ class Queries:
|
|
|
27
14
|
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
28
15
|
"""Get named graph from the dataset to query over"""
|
|
29
16
|
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
30
|
-
|
|
31
|
-
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple[str, int]]:
|
|
32
|
-
"""Summarize instances in the graph store by class and count"""
|
|
33
|
-
query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
|
|
34
|
-
WHERE {
|
|
35
|
-
?instance a ?class .
|
|
36
|
-
}
|
|
37
|
-
GROUP BY ?class
|
|
38
|
-
ORDER BY DESC(?instanceCount) """
|
|
39
|
-
|
|
40
|
-
return [ # type: ignore[misc]
|
|
41
|
-
(
|
|
42
|
-
remove_namespace_from_uri(cast(URIRef, class_)),
|
|
43
|
-
cast(RdfLiteral, count).value,
|
|
44
|
-
)
|
|
45
|
-
for class_, count in self.graph(named_graph=named_graph).query(query_statement)
|
|
46
|
-
]
|
|
47
|
-
|
|
48
|
-
def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
49
|
-
"""Types and their short form in the graph"""
|
|
50
|
-
query = """SELECT DISTINCT ?type
|
|
51
|
-
WHERE {?s a ?type .}"""
|
|
52
|
-
|
|
53
|
-
return { # type: ignore[misc, index, arg-type]
|
|
54
|
-
cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
|
|
55
|
-
for (type_,) in list(self.graph(named_graph).query(query))
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
59
|
-
"""Get the URIRef of a type"""
|
|
60
|
-
return [k for k, v in self.types(named_graph).items() if v == type_]
|
|
61
|
-
|
|
62
|
-
def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
63
|
-
"""Properties and their short form in the graph
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
67
|
-
|
|
68
|
-
"""
|
|
69
|
-
query = """SELECT DISTINCT ?property
|
|
70
|
-
WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
|
|
71
|
-
return { # type: ignore[misc, index, arg-type]
|
|
72
|
-
cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
|
|
73
|
-
for (type_,) in list(self.graph(named_graph).query(query))
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
def properties_by_type(self, named_graph: URIRef | None = None) -> dict[URIRef, dict[URIRef, str]]:
|
|
77
|
-
"""Properties and their short form in the graph by type
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
81
|
-
|
|
82
|
-
"""
|
|
83
|
-
query = """SELECT DISTINCT ?type ?property
|
|
84
|
-
WHERE {?s a ?type . ?s ?property ?o . FILTER(?property != rdf:type)}"""
|
|
85
|
-
properties_by_type: dict[URIRef, dict[URIRef, str]] = defaultdict(dict)
|
|
86
|
-
for type_, property_ in cast(ResultRow, list(self.graph(named_graph).query(query))):
|
|
87
|
-
properties_by_type[type_][property_] = remove_namespace_from_uri(property_) # type: ignore[index]
|
|
88
|
-
return properties_by_type
|
|
89
|
-
|
|
90
|
-
def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
91
|
-
"""Get the URIRef of a property
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
property_: Property to find URIRef for
|
|
95
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
96
|
-
"""
|
|
97
|
-
return [k for k, v in self.properties(named_graph).items() if v == property_]
|
|
98
|
-
|
|
99
|
-
@overload
|
|
100
|
-
def list_instances_ids(
|
|
101
|
-
self, class_uri: None = None, limit: int = -1, named_graph: URIRef | None = None
|
|
102
|
-
) -> Iterable[tuple[URIRef, URIRef]]: ...
|
|
103
|
-
|
|
104
|
-
@overload
|
|
105
|
-
def list_instances_ids(
|
|
106
|
-
self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
|
|
107
|
-
) -> Iterable[URIRef]: ...
|
|
108
|
-
|
|
109
|
-
def list_instances_ids(
|
|
110
|
-
self, class_uri: URIRef | None = None, limit: int = -1, named_graph: URIRef | None = None
|
|
111
|
-
) -> Iterable[URIRef] | Iterable[tuple[URIRef, URIRef]]:
|
|
112
|
-
"""List all instance IDs
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
class_uri: Class for which instances are to be found, default None (all instances)
|
|
116
|
-
limit: Max number of instances to return, by default -1 meaning all instances
|
|
117
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
List of class instance URIs
|
|
121
|
-
"""
|
|
122
|
-
query = "SELECT DISTINCT ?subject"
|
|
123
|
-
if class_uri:
|
|
124
|
-
query += f" WHERE {{ ?subject a <{class_uri}> .}}"
|
|
125
|
-
else:
|
|
126
|
-
query += " ?type WHERE {{ ?subject a ?type .}}"
|
|
127
|
-
if limit != -1:
|
|
128
|
-
query += f" LIMIT {limit}"
|
|
129
|
-
# MyPy is not very happy with RDFLib, so just ignore the type hinting here
|
|
130
|
-
return (tuple(res) if class_uri is None else res[0] for res in self.graph(named_graph).query(query)) # type: ignore[index, return-value, arg-type]
|
|
131
|
-
|
|
132
|
-
def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
133
|
-
"""Check if a property exists in the graph store
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
type_: Type URI to check
|
|
137
|
-
property_uri: Property URI to check
|
|
138
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
True if property exists, False otherwise
|
|
142
|
-
"""
|
|
143
|
-
query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
|
|
144
|
-
return bool(list(self.graph(named_graph).query(query)))
|
|
145
|
-
|
|
146
|
-
def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
|
|
147
|
-
"""Check if a namespace exists in the graph store
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
namespace: Namespace to check
|
|
151
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
True if namespace exists, False otherwise
|
|
155
|
-
"""
|
|
156
|
-
query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
|
|
157
|
-
return bool(self.graph(named_graph).query(query))
|
|
158
|
-
|
|
159
|
-
def has_data(self) -> bool:
|
|
160
|
-
"""Check if the graph store has data"""
|
|
161
|
-
return cast(bool, next(iter(self.dataset.query("ASK WHERE { ?s ?p ?o }"))))
|
|
162
|
-
|
|
163
|
-
def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
164
|
-
"""Check if a type exists in the graph store
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
type_: Type to check
|
|
168
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
169
|
-
|
|
170
|
-
Returns:
|
|
171
|
-
True if type exists, False otherwise
|
|
172
|
-
"""
|
|
173
|
-
query = f"ASK WHERE {{ ?s a <{type_}> }}"
|
|
174
|
-
return bool(self.graph(named_graph).query(query))
|
|
175
|
-
|
|
176
|
-
def describe(
|
|
177
|
-
self,
|
|
178
|
-
instance_id: URIRef,
|
|
179
|
-
instance_type: URIRef | None = None,
|
|
180
|
-
property_renaming_config: dict | None = None,
|
|
181
|
-
named_graph: URIRef | None = None,
|
|
182
|
-
remove_uri_namespace: bool = True,
|
|
183
|
-
) -> tuple[URIRef, dict[str | InstanceType, list[Any]]] | None:
|
|
184
|
-
"""DESCRIBE instance for a given class from the graph store
|
|
185
|
-
|
|
186
|
-
Args:
|
|
187
|
-
instance_id: Instance id for which we want to generate query
|
|
188
|
-
instance_type: Type of the instance, default None (will be inferred from triples)
|
|
189
|
-
property_renaming_config: Dictionary to rename properties, default None (no renaming)
|
|
190
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
191
|
-
remove_uri_namespace: Whether to remove the namespace from the URI, by default True
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
Returns:
|
|
195
|
-
Dictionary of instance properties
|
|
196
|
-
"""
|
|
197
|
-
property_values: dict[str, list[str] | list[URIRef]] = defaultdict(list)
|
|
198
|
-
for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
|
|
199
|
-
if object_.lower() in [
|
|
200
|
-
"",
|
|
201
|
-
"none",
|
|
202
|
-
"nan",
|
|
203
|
-
"null",
|
|
204
|
-
]:
|
|
205
|
-
continue
|
|
206
|
-
|
|
207
|
-
# set property
|
|
208
|
-
if property_renaming_config and predicate != RDF.type:
|
|
209
|
-
property_ = remove_namespace_from_uri(predicate, validation="prefix")
|
|
210
|
-
renamed_property_ = property_renaming_config.get(predicate, property_)
|
|
211
|
-
|
|
212
|
-
elif not property_renaming_config and predicate != RDF.type:
|
|
213
|
-
property_ = remove_namespace_from_uri(predicate, validation="prefix")
|
|
214
|
-
renamed_property_ = property_
|
|
215
|
-
|
|
216
|
-
else:
|
|
217
|
-
property_ = RDF.type
|
|
218
|
-
renamed_property_ = property_
|
|
219
|
-
|
|
220
|
-
value: Any
|
|
221
|
-
if isinstance(object_, URIRef) and remove_uri_namespace:
|
|
222
|
-
# These properties contain the space in the Namespace.
|
|
223
|
-
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
224
|
-
elif isinstance(object_, URIRef):
|
|
225
|
-
value = object_
|
|
226
|
-
elif isinstance(object_, RdfLiteral):
|
|
227
|
-
if object_.datatype == XSD._NS["json"]:
|
|
228
|
-
# For JSON literals, the .toPython() returns a Literal object.
|
|
229
|
-
value = str(object_)
|
|
230
|
-
else:
|
|
231
|
-
value = object_.toPython()
|
|
232
|
-
else:
|
|
233
|
-
# It is a blank node
|
|
234
|
-
value = str(object_)
|
|
235
|
-
|
|
236
|
-
# add type to the dictionary
|
|
237
|
-
if predicate != RDF.type:
|
|
238
|
-
property_values[renamed_property_].append(value) # type: ignore[arg-type]
|
|
239
|
-
else:
|
|
240
|
-
# guarding against multiple rdf:type values as this is not allowed in CDF
|
|
241
|
-
if RDF.type not in property_values:
|
|
242
|
-
property_values[RDF.type].append(
|
|
243
|
-
remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value # type: ignore[arg-type]
|
|
244
|
-
)
|
|
245
|
-
else:
|
|
246
|
-
# we should not have multiple rdf:type values
|
|
247
|
-
continue
|
|
248
|
-
if property_values:
|
|
249
|
-
return (
|
|
250
|
-
instance_id,
|
|
251
|
-
property_values,
|
|
252
|
-
)
|
|
253
|
-
else:
|
|
254
|
-
return None
|
|
255
|
-
|
|
256
|
-
def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
257
|
-
"""List triples in the graph store
|
|
258
|
-
|
|
259
|
-
Args:
|
|
260
|
-
limit: Max number of triples to return, by default 25
|
|
261
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
262
|
-
|
|
263
|
-
Returns:
|
|
264
|
-
List of triples
|
|
265
|
-
"""
|
|
266
|
-
query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
|
|
267
|
-
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
268
|
-
|
|
269
|
-
@overload
|
|
270
|
-
def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
|
|
271
|
-
|
|
272
|
-
@overload
|
|
273
|
-
def list_types(
|
|
274
|
-
self,
|
|
275
|
-
remove_namespace: Literal[True],
|
|
276
|
-
limit: int = 25,
|
|
277
|
-
named_graph: URIRef | None = None,
|
|
278
|
-
) -> list[str]: ...
|
|
279
|
-
|
|
280
|
-
def list_types(
|
|
281
|
-
self,
|
|
282
|
-
remove_namespace: bool = False,
|
|
283
|
-
limit: int | None = 25,
|
|
284
|
-
named_graph: URIRef | None = None,
|
|
285
|
-
) -> list[ResultRow] | list[str]:
|
|
286
|
-
"""List types in the graph store
|
|
287
|
-
|
|
288
|
-
Args:
|
|
289
|
-
limit: Max number of types to return, by default 25
|
|
290
|
-
remove_namespace: Whether to remove the namespace from the type, by default False
|
|
291
|
-
|
|
292
|
-
Returns:
|
|
293
|
-
List of types
|
|
294
|
-
"""
|
|
295
|
-
query = "SELECT DISTINCT ?type WHERE { ?subject a ?type }"
|
|
296
|
-
if limit is not None:
|
|
297
|
-
query += f" LIMIT {limit}"
|
|
298
|
-
result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
299
|
-
if remove_namespace:
|
|
300
|
-
return [remove_namespace_from_uri(res[0]) for res in result]
|
|
301
|
-
return result
|
|
302
|
-
|
|
303
|
-
def multi_value_type_property(
|
|
304
|
-
self,
|
|
305
|
-
named_graph: URIRef | None = None,
|
|
306
|
-
) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
|
|
307
|
-
query = """SELECT ?sourceType ?property
|
|
308
|
-
(GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
|
|
309
|
-
|
|
310
|
-
WHERE {{
|
|
311
|
-
?s ?property ?o .
|
|
312
|
-
?s a ?sourceType .
|
|
313
|
-
OPTIONAL {{ ?o a ?type }}
|
|
314
|
-
|
|
315
|
-
# Key part to determine value type: either object, data or unknown
|
|
316
|
-
BIND( IF(isLiteral(?o),DATATYPE(?o),
|
|
317
|
-
IF(BOUND(?type), ?type,
|
|
318
|
-
<{unknownType}>)) AS ?valueType)
|
|
319
|
-
}}
|
|
320
|
-
|
|
321
|
-
GROUP BY ?sourceType ?property
|
|
322
|
-
HAVING (COUNT(DISTINCT ?valueType) > 1)"""
|
|
323
|
-
|
|
324
|
-
for (
|
|
325
|
-
source_type,
|
|
326
|
-
property_,
|
|
327
|
-
value_types,
|
|
328
|
-
) in cast(
|
|
329
|
-
ResultRow,
|
|
330
|
-
self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
|
|
331
|
-
):
|
|
332
|
-
yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
|
|
333
|
-
|
|
334
|
-
def drop_types(
|
|
335
|
-
self,
|
|
336
|
-
type_: list[URIRef],
|
|
337
|
-
named_graph: URIRef | None = None,
|
|
338
|
-
) -> dict[URIRef, int]:
|
|
339
|
-
"""Drop types from the graph store
|
|
340
|
-
|
|
341
|
-
Args:
|
|
342
|
-
type_: List of types to drop
|
|
343
|
-
named_graph: Named graph to query over, default None (default graph
|
|
344
|
-
|
|
345
|
-
Returns:
|
|
346
|
-
Dictionary of dropped types
|
|
347
|
-
"""
|
|
348
|
-
dropped_types: dict[URIRef, int] = {}
|
|
349
|
-
for t in type_:
|
|
350
|
-
instance_ids = list(self.list_instances_ids(t))
|
|
351
|
-
dropped_types[t] = len(instance_ids)
|
|
352
|
-
remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
|
|
353
|
-
return dropped_types
|
|
354
|
-
|
|
355
|
-
def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
|
|
356
|
-
"""Find instances with multiple types
|
|
357
|
-
|
|
358
|
-
Args:
|
|
359
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
360
|
-
|
|
361
|
-
"""
|
|
362
|
-
|
|
363
|
-
query = """
|
|
364
|
-
SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
|
|
365
|
-
WHERE {
|
|
366
|
-
?instance a ?type .
|
|
367
|
-
}
|
|
368
|
-
GROUP BY ?instance
|
|
369
|
-
HAVING (COUNT(?type) > 1)
|
|
370
|
-
"""
|
|
371
|
-
|
|
372
|
-
result = {}
|
|
373
|
-
for instance, types in self.graph(named_graph).query(query): # type: ignore
|
|
374
|
-
result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
|
|
375
|
-
|
|
376
|
-
return result
|
|
377
|
-
|
|
378
|
-
def count_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> int:
|
|
379
|
-
query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
|
|
380
|
-
return int(next(iter(self.graph(named_graph).query(query)))[0]) # type: ignore[arg-type, index]
|
|
381
|
-
|
|
382
|
-
def types_with_instance_and_property_count(
|
|
383
|
-
self, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
384
|
-
) -> list[dict[str, Any]]:
|
|
385
|
-
query = """
|
|
386
|
-
SELECT ?type (COUNT(DISTINCT ?instance) AS ?instanceCount) (COUNT(DISTINCT ?property) AS ?propertyCount)
|
|
387
|
-
WHERE {
|
|
388
|
-
?instance a ?type .
|
|
389
|
-
?instance ?property ?value .
|
|
390
|
-
FILTER(?property != rdf:type)
|
|
391
|
-
}
|
|
392
|
-
GROUP BY ?type
|
|
393
|
-
ORDER BY DESC(?instanceCount)"""
|
|
394
|
-
return [
|
|
395
|
-
{
|
|
396
|
-
"type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
|
|
397
|
-
"instanceCount": cast(RdfLiteral, instance_count).toPython(),
|
|
398
|
-
"propertyCount": cast(RdfLiteral, property_count).toPython(),
|
|
399
|
-
}
|
|
400
|
-
for type_, instance_count, property_count in list(
|
|
401
|
-
cast(list[ResultRow], self.graph(named_graph).query(query))
|
|
402
|
-
)
|
|
403
|
-
]
|
|
404
|
-
|
|
405
|
-
def properties_with_count(
|
|
406
|
-
self, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
407
|
-
) -> list[dict[str, Any]]:
|
|
408
|
-
instance_count_by_type = {
|
|
409
|
-
entry["type"]: entry["instanceCount"]
|
|
410
|
-
for entry in self.types_with_instance_and_property_count(remove_namespace=False, named_graph=named_graph)
|
|
411
|
-
}
|
|
412
|
-
query = """SELECT ?type ?property (COUNT(DISTINCT ?instance) AS ?instanceCount)
|
|
413
|
-
WHERE {
|
|
414
|
-
?instance a ?type .
|
|
415
|
-
?instance ?property ?value .
|
|
416
|
-
FILTER(?property != rdf:type)
|
|
417
|
-
}
|
|
418
|
-
GROUP BY ?type ?property
|
|
419
|
-
ORDER BY ASC(?type) ASC(?property)"""
|
|
420
|
-
return [
|
|
421
|
-
{
|
|
422
|
-
"type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
|
|
423
|
-
"property": urllib.parse.unquote(remove_namespace_from_uri(property)) if remove_namespace else property,
|
|
424
|
-
"instanceCount": cast(RdfLiteral, instance_count).toPython(),
|
|
425
|
-
"total": instance_count_by_type[type_],
|
|
426
|
-
}
|
|
427
|
-
for type_, property, instance_count in list(cast(list[ResultRow], self.graph(named_graph).query(query)))
|
|
428
|
-
]
|
|
429
|
-
|
|
430
|
-
@overload
|
|
431
|
-
def instances_with_properties(
|
|
432
|
-
self, type: URIRef, remove_namespace: Literal[False], named_graph: URIRef | None = None
|
|
433
|
-
) -> dict[URIRef, set[URIRef]]: ...
|
|
434
|
-
|
|
435
|
-
@overload
|
|
436
|
-
def instances_with_properties(
|
|
437
|
-
self, type: URIRef, remove_namespace: Literal[True], named_graph: URIRef | None = None
|
|
438
|
-
) -> dict[str, set[str]]: ...
|
|
439
|
-
|
|
440
|
-
def instances_with_properties(
|
|
441
|
-
self, type: URIRef, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
442
|
-
) -> dict[str, set[str]] | dict[URIRef, set[URIRef]]:
|
|
443
|
-
query = """SELECT DISTINCT ?instance ?property
|
|
444
|
-
WHERE {{
|
|
445
|
-
?instance a <{type}> .
|
|
446
|
-
?instance ?property ?value .
|
|
447
|
-
FILTER(?property != rdf:type)
|
|
448
|
-
}}"""
|
|
449
|
-
result = defaultdict(set)
|
|
450
|
-
for instance, property_ in cast(Iterable[ResultRow], self.graph(named_graph).query(query.format(type=type))):
|
|
451
|
-
instance_str = urllib.parse.unquote(remove_namespace_from_uri(instance)) if remove_namespace else instance
|
|
452
|
-
property_str = urllib.parse.unquote(remove_namespace_from_uri(property_)) if remove_namespace else property_
|
|
453
|
-
result[instance_str].add(property_str)
|
|
454
|
-
return result
|
|
455
|
-
|
|
456
|
-
def list_instances_ids_by_space(
|
|
457
|
-
self, space_property: URIRef, named_graph: URIRef | None = None
|
|
458
|
-
) -> Iterable[tuple[URIRef, str]]:
|
|
459
|
-
"""Returns instance ids by space"""
|
|
460
|
-
query = f"""SELECT DISTINCT ?instance ?space
|
|
461
|
-
WHERE {{?instance <{space_property}> ?space}}"""
|
|
462
|
-
|
|
463
|
-
for result in cast(Iterable[ResultRow], self.graph(named_graph).query(query)):
|
|
464
|
-
instance_id, space = cast(tuple[URIRef, URIRef | RdfLiteral], result)
|
|
465
|
-
if isinstance(space, URIRef):
|
|
466
|
-
yield instance_id, remove_namespace_from_uri(space)
|
|
467
|
-
elif isinstance(space, RdfLiteral):
|
|
468
|
-
yield instance_id, str(space.toPython())
|
|
469
|
-
else:
|
|
470
|
-
yield instance_id, str(space)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from rdflib import Dataset, URIRef
|
|
2
|
+
|
|
3
|
+
from ._select import SelectQueries
|
|
4
|
+
from ._update import UpdateQueries
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Queries:
|
|
8
|
+
"""Helper class for storing standard queries for the graph store."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
dataset: Dataset,
|
|
13
|
+
default_named_graph: URIRef | None = None,
|
|
14
|
+
) -> None:
|
|
15
|
+
self.select = SelectQueries(dataset, default_named_graph)
|
|
16
|
+
self.update = UpdateQueries(self.select, dataset, default_named_graph)
|