cognite-neat 0.109.4__py3-none-any.whl → 0.111.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +8 -0
- cognite/neat/_client/_api/schema.py +43 -1
- cognite/neat/_client/data_classes/schema.py +4 -4
- cognite/neat/_constants.py +15 -1
- cognite/neat/_graph/extractors/__init__.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +48 -19
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
- cognite/neat/_graph/extractors/_dict.py +102 -0
- cognite/neat/_graph/extractors/_dms.py +27 -40
- cognite/neat/_graph/extractors/_dms_graph.py +30 -3
- cognite/neat/_graph/extractors/_iodd.py +3 -3
- cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
- cognite/neat/_graph/extractors/_raw.py +67 -0
- cognite/neat/_graph/loaders/_base.py +20 -4
- cognite/neat/_graph/loaders/_rdf2dms.py +476 -383
- cognite/neat/_graph/queries/_base.py +163 -133
- cognite/neat/_graph/transformers/__init__.py +1 -3
- cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
- cognite/neat/_graph/transformers/_rdfpath.py +2 -49
- cognite/neat/_issues/__init__.py +1 -6
- cognite/neat/_issues/_base.py +21 -252
- cognite/neat/_issues/_contextmanagers.py +46 -0
- cognite/neat/_issues/_factory.py +69 -0
- cognite/neat/_issues/errors/__init__.py +20 -4
- cognite/neat/_issues/errors/_external.py +7 -0
- cognite/neat/_issues/errors/_wrapper.py +81 -3
- cognite/neat/_issues/formatters.py +4 -4
- cognite/neat/_issues/warnings/__init__.py +3 -2
- cognite/neat/_issues/warnings/_properties.py +8 -0
- cognite/neat/_issues/warnings/user_modeling.py +12 -0
- cognite/neat/_rules/_constants.py +12 -0
- cognite/neat/_rules/_shared.py +3 -2
- cognite/neat/_rules/analysis/__init__.py +2 -3
- cognite/neat/_rules/analysis/_base.py +430 -259
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2excel.py +3 -9
- cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
- cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
- cognite/neat/_rules/importers/_base.py +2 -47
- cognite/neat/_rules/importers/_dms2rules.py +7 -10
- cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +66 -26
- cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
- cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
- cognite/neat/_rules/models/_base_rules.py +0 -2
- cognite/neat/_rules/models/data_types.py +7 -0
- cognite/neat/_rules/models/dms/_exporter.py +9 -8
- cognite/neat/_rules/models/dms/_rules.py +29 -2
- cognite/neat/_rules/models/dms/_rules_input.py +9 -1
- cognite/neat/_rules/models/dms/_validation.py +115 -5
- cognite/neat/_rules/models/entities/_loaders.py +1 -1
- cognite/neat/_rules/models/entities/_multi_value.py +2 -2
- cognite/neat/_rules/models/entities/_single_value.py +8 -3
- cognite/neat/_rules/models/entities/_wrapped.py +2 -2
- cognite/neat/_rules/models/information/_rules.py +18 -17
- cognite/neat/_rules/models/information/_rules_input.py +3 -1
- cognite/neat/_rules/models/information/_validation.py +66 -17
- cognite/neat/_rules/transformers/__init__.py +8 -2
- cognite/neat/_rules/transformers/_converters.py +234 -44
- cognite/neat/_rules/transformers/_verification.py +5 -10
- cognite/neat/_session/_base.py +6 -4
- cognite/neat/_session/_explore.py +39 -0
- cognite/neat/_session/_inspect.py +25 -6
- cognite/neat/_session/_prepare.py +12 -0
- cognite/neat/_session/_read.py +88 -20
- cognite/neat/_session/_set.py +7 -1
- cognite/neat/_session/_show.py +11 -123
- cognite/neat/_session/_state.py +6 -2
- cognite/neat/_session/_subset.py +64 -0
- cognite/neat/_session/_to.py +177 -19
- cognite/neat/_store/_graph_store.py +9 -246
- cognite/neat/_utils/rdf_.py +36 -5
- cognite/neat/_utils/spreadsheet.py +44 -1
- cognite/neat/_utils/text.py +124 -37
- cognite/neat/_utils/upload.py +2 -0
- cognite/neat/_version.py +2 -2
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +83 -82
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +1 -1
- cognite/neat/_graph/queries/_construct.py +0 -187
- cognite/neat/_graph/queries/_shared.py +0 -173
- cognite/neat/_rules/analysis/_dms.py +0 -57
- cognite/neat/_rules/analysis/_information.py +0 -249
- cognite/neat/_rules/models/_rdfpath.py +0 -372
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,22 +1,17 @@
|
|
|
1
|
-
import
|
|
1
|
+
import urllib.parse
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from collections.abc import Iterable
|
|
4
|
-
from typing import Literal, cast, overload
|
|
4
|
+
from typing import Any, Literal, cast, overload
|
|
5
5
|
|
|
6
|
-
from rdflib import RDF, Dataset, Graph, Namespace, URIRef
|
|
6
|
+
from rdflib import RDF, XSD, Dataset, Graph, Namespace, URIRef
|
|
7
7
|
from rdflib import Literal as RdfLiteral
|
|
8
8
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
9
9
|
from rdflib.query import ResultRow
|
|
10
10
|
|
|
11
11
|
from cognite.neat._constants import NEAT
|
|
12
|
-
from cognite.neat._rules._constants import EntityTypes
|
|
13
|
-
from cognite.neat._rules.models.entities import ClassEntity
|
|
14
|
-
from cognite.neat._rules.models.information import InformationRules
|
|
15
12
|
from cognite.neat._shared import InstanceType
|
|
16
13
|
from cognite.neat._utils.rdf_ import remove_instance_ids_in_batch, remove_namespace_from_uri
|
|
17
14
|
|
|
18
|
-
from ._construct import build_construct_query
|
|
19
|
-
|
|
20
15
|
|
|
21
16
|
class Queries:
|
|
22
17
|
"""Helper class for storing standard queries for the graph store."""
|
|
@@ -24,20 +19,17 @@ class Queries:
|
|
|
24
19
|
def __init__(
|
|
25
20
|
self,
|
|
26
21
|
dataset: Dataset,
|
|
27
|
-
rules: dict[URIRef, InformationRules] | None = None,
|
|
28
22
|
default_named_graph: URIRef | None = None,
|
|
29
23
|
):
|
|
30
24
|
self.dataset = dataset
|
|
31
|
-
self.rules = rules or {}
|
|
32
25
|
self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
|
|
33
26
|
|
|
34
27
|
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
35
28
|
"""Get named graph from the dataset to query over"""
|
|
36
29
|
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
37
30
|
|
|
38
|
-
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
|
|
31
|
+
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple[str, int]]:
|
|
39
32
|
"""Summarize instances in the graph store by class and count"""
|
|
40
|
-
|
|
41
33
|
query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
|
|
42
34
|
WHERE {
|
|
43
35
|
?instance a ?class .
|
|
@@ -45,12 +37,12 @@ class Queries:
|
|
|
45
37
|
GROUP BY ?class
|
|
46
38
|
ORDER BY DESC(?instanceCount) """
|
|
47
39
|
|
|
48
|
-
return [
|
|
40
|
+
return [ # type: ignore[misc]
|
|
49
41
|
(
|
|
50
|
-
remove_namespace_from_uri(cast(URIRef,
|
|
51
|
-
cast(RdfLiteral,
|
|
42
|
+
remove_namespace_from_uri(cast(URIRef, class_)),
|
|
43
|
+
cast(RdfLiteral, count).value,
|
|
52
44
|
)
|
|
53
|
-
for
|
|
45
|
+
for class_, count in self.graph(named_graph=named_graph).query(query_statement)
|
|
54
46
|
]
|
|
55
47
|
|
|
56
48
|
def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
@@ -81,6 +73,20 @@ class Queries:
|
|
|
81
73
|
for (type_,) in list(self.graph(named_graph).query(query))
|
|
82
74
|
}
|
|
83
75
|
|
|
76
|
+
def properties_by_type(self, named_graph: URIRef | None = None) -> dict[URIRef, dict[URIRef, str]]:
|
|
77
|
+
"""Properties and their short form in the graph by type
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
query = """SELECT DISTINCT ?type ?property
|
|
84
|
+
WHERE {?s a ?type . ?s ?property ?o . FILTER(?property != rdf:type)}"""
|
|
85
|
+
properties_by_type: dict[URIRef, dict[URIRef, str]] = defaultdict(dict)
|
|
86
|
+
for type_, property_ in cast(ResultRow, list(self.graph(named_graph).query(query))):
|
|
87
|
+
properties_by_type[type_][property_] = remove_namespace_from_uri(property_) # type: ignore[index]
|
|
88
|
+
return properties_by_type
|
|
89
|
+
|
|
84
90
|
def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
85
91
|
"""Get the URIRef of a property
|
|
86
92
|
|
|
@@ -90,73 +96,38 @@ class Queries:
|
|
|
90
96
|
"""
|
|
91
97
|
return [k for k, v in self.properties(named_graph).items() if v == property_]
|
|
92
98
|
|
|
93
|
-
|
|
99
|
+
@overload
|
|
100
|
+
def list_instances_ids(
|
|
101
|
+
self, class_uri: None = None, limit: int = -1, named_graph: URIRef | None = None
|
|
102
|
+
) -> Iterable[tuple[URIRef, URIRef]]: ...
|
|
103
|
+
|
|
104
|
+
@overload
|
|
105
|
+
def list_instances_ids(
|
|
94
106
|
self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
|
|
95
|
-
) ->
|
|
96
|
-
|
|
107
|
+
) -> Iterable[URIRef]: ...
|
|
108
|
+
|
|
109
|
+
def list_instances_ids(
|
|
110
|
+
self, class_uri: URIRef | None = None, limit: int = -1, named_graph: URIRef | None = None
|
|
111
|
+
) -> Iterable[URIRef] | Iterable[tuple[URIRef, URIRef]]:
|
|
112
|
+
"""List all instance IDs
|
|
97
113
|
|
|
98
114
|
Args:
|
|
99
|
-
class_uri: Class for which instances are to be found
|
|
115
|
+
class_uri: Class for which instances are to be found, default None (all instances)
|
|
100
116
|
limit: Max number of instances to return, by default -1 meaning all instances
|
|
101
117
|
named_graph: Named graph to query over, default None (default graph)
|
|
102
118
|
|
|
103
119
|
Returns:
|
|
104
120
|
List of class instance URIs
|
|
105
121
|
"""
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
|
|
110
|
-
|
|
111
|
-
def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
112
|
-
"""Get all triples for instances of a given class
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
class_uri: Class for which instances are to be found
|
|
116
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
117
|
-
|
|
118
|
-
Returns:
|
|
119
|
-
List of triples for instances of the given class in the named graph
|
|
120
|
-
"""
|
|
121
|
-
query = (
|
|
122
|
-
f"SELECT ?instance ?prop ?value "
|
|
123
|
-
f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# Select queries gives an iterable of result rows
|
|
127
|
-
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
128
|
-
|
|
129
|
-
def triples_of_type_instances(
|
|
130
|
-
self, rdf_type: str | URIRef, named_graph: URIRef | None = None
|
|
131
|
-
) -> list[tuple[str, str, str]]:
|
|
132
|
-
"""Get all triples of a given type.
|
|
133
|
-
|
|
134
|
-
Args:
|
|
135
|
-
rdf_type: Type URI to query
|
|
136
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
137
|
-
"""
|
|
138
|
-
named_graph = named_graph or self.default_named_graph
|
|
139
|
-
if isinstance(rdf_type, URIRef):
|
|
140
|
-
rdf_uri = rdf_type
|
|
141
|
-
elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
|
|
142
|
-
rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
|
|
122
|
+
query = "SELECT DISTINCT ?subject"
|
|
123
|
+
if class_uri:
|
|
124
|
+
query += f" WHERE {{ ?subject a <{class_uri}> .}}"
|
|
143
125
|
else:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
query = (
|
|
151
|
-
"SELECT ?instance ?prop ?value "
|
|
152
|
-
f"WHERE {{ ?instance a <{rdf_uri}> . ?instance ?prop ?value . }} "
|
|
153
|
-
"order by ?instance"
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
result = self.graph(named_graph).query(query)
|
|
157
|
-
|
|
158
|
-
# We cannot include the RDF.type in case there is a neat:type property
|
|
159
|
-
return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
|
|
126
|
+
query += " ?type WHERE {{ ?subject a ?type .}}"
|
|
127
|
+
if limit != -1:
|
|
128
|
+
query += f" LIMIT {limit}"
|
|
129
|
+
# MyPy is not very happy with RDFLib, so just ignore the type hinting here
|
|
130
|
+
return (tuple(res) if class_uri is None else res[0] for res in self.graph(named_graph).query(query)) # type: ignore[index, return-value, arg-type]
|
|
160
131
|
|
|
161
132
|
def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
162
133
|
"""Check if a property exists in the graph store
|
|
@@ -205,26 +176,25 @@ class Queries:
|
|
|
205
176
|
def describe(
|
|
206
177
|
self,
|
|
207
178
|
instance_id: URIRef,
|
|
208
|
-
instance_type:
|
|
179
|
+
instance_type: URIRef | None = None,
|
|
209
180
|
property_renaming_config: dict | None = None,
|
|
210
|
-
property_types: dict[str, EntityTypes] | None = None,
|
|
211
181
|
named_graph: URIRef | None = None,
|
|
212
|
-
|
|
182
|
+
remove_uri_namespace: bool = True,
|
|
183
|
+
) -> tuple[URIRef, dict[str | InstanceType, list[Any]]] | None:
|
|
213
184
|
"""DESCRIBE instance for a given class from the graph store
|
|
214
185
|
|
|
215
186
|
Args:
|
|
216
187
|
instance_id: Instance id for which we want to generate query
|
|
217
188
|
instance_type: Type of the instance, default None (will be inferred from triples)
|
|
218
189
|
property_renaming_config: Dictionary to rename properties, default None (no renaming)
|
|
219
|
-
property_types: Dictionary of property types, default None (helper for removal of namespace)
|
|
220
190
|
named_graph: Named graph to query over, default None (default graph)
|
|
191
|
+
remove_uri_namespace: Whether to remove the namespace from the URI, by default True
|
|
221
192
|
|
|
222
193
|
|
|
223
194
|
Returns:
|
|
224
195
|
Dictionary of instance properties
|
|
225
196
|
"""
|
|
226
|
-
property_values: dict[str, list[str]] = defaultdict(list)
|
|
227
|
-
identifier = remove_namespace_from_uri(instance_id, validation="prefix")
|
|
197
|
+
property_values: dict[str, list[str] | list[URIRef]] = defaultdict(list)
|
|
228
198
|
for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
|
|
229
199
|
if object_.lower() in [
|
|
230
200
|
"",
|
|
@@ -247,78 +217,42 @@ class Queries:
|
|
|
247
217
|
property_ = RDF.type
|
|
248
218
|
renamed_property_ = property_
|
|
249
219
|
|
|
250
|
-
|
|
220
|
+
value: Any
|
|
221
|
+
if isinstance(object_, URIRef) and remove_uri_namespace:
|
|
222
|
+
# These properties contain the space in the Namespace.
|
|
251
223
|
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
224
|
+
elif isinstance(object_, URIRef):
|
|
225
|
+
value = object_
|
|
252
226
|
elif isinstance(object_, RdfLiteral):
|
|
253
|
-
|
|
227
|
+
if object_.datatype == XSD._NS["json"]:
|
|
228
|
+
# For JSON literals, the .toPython() returns a Literal object.
|
|
229
|
+
value = str(object_)
|
|
230
|
+
else:
|
|
231
|
+
value = object_.toPython()
|
|
254
232
|
else:
|
|
255
233
|
# It is a blank node
|
|
256
234
|
value = str(object_)
|
|
257
235
|
|
|
258
236
|
# add type to the dictionary
|
|
259
237
|
if predicate != RDF.type:
|
|
260
|
-
property_values[renamed_property_].append(value)
|
|
238
|
+
property_values[renamed_property_].append(value) # type: ignore[arg-type]
|
|
261
239
|
else:
|
|
262
240
|
# guarding against multiple rdf:type values as this is not allowed in CDF
|
|
263
241
|
if RDF.type not in property_values:
|
|
264
|
-
property_values[RDF.type].append(
|
|
242
|
+
property_values[RDF.type].append(
|
|
243
|
+
remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value # type: ignore[arg-type]
|
|
244
|
+
)
|
|
265
245
|
else:
|
|
266
246
|
# we should not have multiple rdf:type values
|
|
267
247
|
continue
|
|
268
248
|
if property_values:
|
|
269
249
|
return (
|
|
270
|
-
|
|
250
|
+
instance_id,
|
|
271
251
|
property_values,
|
|
272
252
|
)
|
|
273
253
|
else:
|
|
274
254
|
return None
|
|
275
255
|
|
|
276
|
-
def construct_instances_of_class(
|
|
277
|
-
self,
|
|
278
|
-
class_: str,
|
|
279
|
-
properties_optional: bool = True,
|
|
280
|
-
instance_id: URIRef | None = None,
|
|
281
|
-
named_graph: URIRef | None = None,
|
|
282
|
-
) -> list[tuple[str, str, str]]:
|
|
283
|
-
"""CONSTRUCT instances for a given class from the graph store
|
|
284
|
-
|
|
285
|
-
Args:
|
|
286
|
-
class_: Class entity for which we want to generate query
|
|
287
|
-
properties_optional: Whether to make all properties optional, default True
|
|
288
|
-
instance_ids: List of instance ids to filter on, default None (all)
|
|
289
|
-
named_graph: Named graph to query over, default None (default graph
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
List of triples for instances of the given class
|
|
293
|
-
"""
|
|
294
|
-
named_graph = named_graph or self.default_named_graph
|
|
295
|
-
if (
|
|
296
|
-
self.rules
|
|
297
|
-
and self.rules.get(named_graph)
|
|
298
|
-
and (
|
|
299
|
-
query := build_construct_query(
|
|
300
|
-
class_=ClassEntity(
|
|
301
|
-
prefix=self.rules[named_graph].metadata.prefix,
|
|
302
|
-
suffix=class_,
|
|
303
|
-
),
|
|
304
|
-
graph=self.graph(named_graph),
|
|
305
|
-
rules=self.rules[named_graph],
|
|
306
|
-
properties_optional=properties_optional,
|
|
307
|
-
instance_id=instance_id,
|
|
308
|
-
)
|
|
309
|
-
)
|
|
310
|
-
):
|
|
311
|
-
result = self.graph(named_graph).query(query)
|
|
312
|
-
|
|
313
|
-
# We cannot include the RDF.type in case there is a neat:type property
|
|
314
|
-
return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
|
|
315
|
-
else:
|
|
316
|
-
warnings.warn(
|
|
317
|
-
"No rules found for the graph store, returning empty list.",
|
|
318
|
-
stacklevel=2,
|
|
319
|
-
)
|
|
320
|
-
return []
|
|
321
|
-
|
|
322
256
|
def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
323
257
|
"""List triples in the graph store
|
|
324
258
|
|
|
@@ -346,7 +280,7 @@ class Queries:
|
|
|
346
280
|
def list_types(
|
|
347
281
|
self,
|
|
348
282
|
remove_namespace: bool = False,
|
|
349
|
-
limit: int = 25,
|
|
283
|
+
limit: int | None = 25,
|
|
350
284
|
named_graph: URIRef | None = None,
|
|
351
285
|
) -> list[ResultRow] | list[str]:
|
|
352
286
|
"""List types in the graph store
|
|
@@ -358,7 +292,9 @@ class Queries:
|
|
|
358
292
|
Returns:
|
|
359
293
|
List of types
|
|
360
294
|
"""
|
|
361
|
-
query =
|
|
295
|
+
query = "SELECT DISTINCT ?type WHERE { ?subject a ?type }"
|
|
296
|
+
if limit is not None:
|
|
297
|
+
query += f" LIMIT {limit}"
|
|
362
298
|
result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
363
299
|
if remove_namespace:
|
|
364
300
|
return [remove_namespace_from_uri(res[0]) for res in result]
|
|
@@ -411,7 +347,7 @@ class Queries:
|
|
|
411
347
|
"""
|
|
412
348
|
dropped_types: dict[URIRef, int] = {}
|
|
413
349
|
for t in type_:
|
|
414
|
-
instance_ids = self.
|
|
350
|
+
instance_ids = list(self.list_instances_ids(t))
|
|
415
351
|
dropped_types[t] = len(instance_ids)
|
|
416
352
|
remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
|
|
417
353
|
return dropped_types
|
|
@@ -438,3 +374,97 @@ class Queries:
|
|
|
438
374
|
result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
|
|
439
375
|
|
|
440
376
|
return result
|
|
377
|
+
|
|
378
|
+
def count_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> int:
|
|
379
|
+
query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
|
|
380
|
+
return int(next(iter(self.graph(named_graph).query(query)))[0]) # type: ignore[arg-type, index]
|
|
381
|
+
|
|
382
|
+
def types_with_instance_and_property_count(
|
|
383
|
+
self, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
384
|
+
) -> list[dict[str, Any]]:
|
|
385
|
+
query = """
|
|
386
|
+
SELECT ?type (COUNT(DISTINCT ?instance) AS ?instanceCount) (COUNT(DISTINCT ?property) AS ?propertyCount)
|
|
387
|
+
WHERE {
|
|
388
|
+
?instance a ?type .
|
|
389
|
+
?instance ?property ?value .
|
|
390
|
+
FILTER(?property != rdf:type)
|
|
391
|
+
}
|
|
392
|
+
GROUP BY ?type
|
|
393
|
+
ORDER BY DESC(?instanceCount)"""
|
|
394
|
+
return [
|
|
395
|
+
{
|
|
396
|
+
"type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
|
|
397
|
+
"instanceCount": cast(RdfLiteral, instance_count).toPython(),
|
|
398
|
+
"propertyCount": cast(RdfLiteral, property_count).toPython(),
|
|
399
|
+
}
|
|
400
|
+
for type_, instance_count, property_count in list(
|
|
401
|
+
cast(list[ResultRow], self.graph(named_graph).query(query))
|
|
402
|
+
)
|
|
403
|
+
]
|
|
404
|
+
|
|
405
|
+
def properties_with_count(
|
|
406
|
+
self, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
407
|
+
) -> list[dict[str, Any]]:
|
|
408
|
+
instance_count_by_type = {
|
|
409
|
+
entry["type"]: entry["instanceCount"]
|
|
410
|
+
for entry in self.types_with_instance_and_property_count(remove_namespace=False, named_graph=named_graph)
|
|
411
|
+
}
|
|
412
|
+
query = """SELECT ?type ?property (COUNT(DISTINCT ?instance) AS ?instanceCount)
|
|
413
|
+
WHERE {
|
|
414
|
+
?instance a ?type .
|
|
415
|
+
?instance ?property ?value .
|
|
416
|
+
FILTER(?property != rdf:type)
|
|
417
|
+
}
|
|
418
|
+
GROUP BY ?type ?property
|
|
419
|
+
ORDER BY ASC(?type) ASC(?property)"""
|
|
420
|
+
return [
|
|
421
|
+
{
|
|
422
|
+
"type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
|
|
423
|
+
"property": urllib.parse.unquote(remove_namespace_from_uri(property)) if remove_namespace else property,
|
|
424
|
+
"instanceCount": cast(RdfLiteral, instance_count).toPython(),
|
|
425
|
+
"total": instance_count_by_type[type_],
|
|
426
|
+
}
|
|
427
|
+
for type_, property, instance_count in list(cast(list[ResultRow], self.graph(named_graph).query(query)))
|
|
428
|
+
]
|
|
429
|
+
|
|
430
|
+
@overload
|
|
431
|
+
def instances_with_properties(
|
|
432
|
+
self, type: URIRef, remove_namespace: Literal[False], named_graph: URIRef | None = None
|
|
433
|
+
) -> dict[URIRef, set[URIRef]]: ...
|
|
434
|
+
|
|
435
|
+
@overload
|
|
436
|
+
def instances_with_properties(
|
|
437
|
+
self, type: URIRef, remove_namespace: Literal[True], named_graph: URIRef | None = None
|
|
438
|
+
) -> dict[str, set[str]]: ...
|
|
439
|
+
|
|
440
|
+
def instances_with_properties(
|
|
441
|
+
self, type: URIRef, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
442
|
+
) -> dict[str, set[str]] | dict[URIRef, set[URIRef]]:
|
|
443
|
+
query = """SELECT DISTINCT ?instance ?property
|
|
444
|
+
WHERE {{
|
|
445
|
+
?instance a <{type}> .
|
|
446
|
+
?instance ?property ?value .
|
|
447
|
+
FILTER(?property != rdf:type)
|
|
448
|
+
}}"""
|
|
449
|
+
result = defaultdict(set)
|
|
450
|
+
for instance, property_ in cast(Iterable[ResultRow], self.graph(named_graph).query(query.format(type=type))):
|
|
451
|
+
instance_str = urllib.parse.unquote(remove_namespace_from_uri(instance)) if remove_namespace else instance
|
|
452
|
+
property_str = urllib.parse.unquote(remove_namespace_from_uri(property_)) if remove_namespace else property_
|
|
453
|
+
result[instance_str].add(property_str)
|
|
454
|
+
return result
|
|
455
|
+
|
|
456
|
+
def list_instances_ids_by_space(
|
|
457
|
+
self, space_property: URIRef, named_graph: URIRef | None = None
|
|
458
|
+
) -> Iterable[tuple[URIRef, str]]:
|
|
459
|
+
"""Returns instance ids by space"""
|
|
460
|
+
query = f"""SELECT DISTINCT ?instance ?space
|
|
461
|
+
WHERE {{?instance <{space_property}> ?space}}"""
|
|
462
|
+
|
|
463
|
+
for result in cast(Iterable[ResultRow], self.graph(named_graph).query(query)):
|
|
464
|
+
instance_id, space = cast(tuple[URIRef, URIRef | RdfLiteral], result)
|
|
465
|
+
if isinstance(space, URIRef):
|
|
466
|
+
yield instance_id, remove_namespace_from_uri(space)
|
|
467
|
+
elif isinstance(space, RdfLiteral):
|
|
468
|
+
yield instance_id, str(space.toPython())
|
|
469
|
+
else:
|
|
470
|
+
yield instance_id, str(space)
|
|
@@ -16,12 +16,11 @@ from ._prune_graph import (
|
|
|
16
16
|
PruneInstancesOfUnknownType,
|
|
17
17
|
PruneTypes,
|
|
18
18
|
)
|
|
19
|
-
from ._rdfpath import
|
|
19
|
+
from ._rdfpath import MakeConnectionOnExactMatch
|
|
20
20
|
from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetType, SplitMultiValueProperty
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
23
|
"AddAssetDepth",
|
|
24
|
-
"AddSelfReferenceProperty",
|
|
25
24
|
"AssetEventConnector",
|
|
26
25
|
"AssetFileConnector",
|
|
27
26
|
"AssetRelationshipConnector",
|
|
@@ -49,7 +48,6 @@ Transformers = (
|
|
|
49
48
|
| AssetFileConnector
|
|
50
49
|
| AssetEventConnector
|
|
51
50
|
| AssetRelationshipConnector
|
|
52
|
-
| AddSelfReferenceProperty
|
|
53
51
|
| SplitMultiValueProperty
|
|
54
52
|
| RelationshipAsEdgeTransformer
|
|
55
53
|
| MakeConnectionOnExactMatch
|
|
@@ -240,50 +240,36 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
|
240
240
|
str(extractors.RelationshipsExtractor.__name__),
|
|
241
241
|
}
|
|
242
242
|
)
|
|
243
|
-
_asset_template: str = """SELECT ?source ?target WHERE {{
|
|
244
|
-
<{relationship_id}> <{relationship_source_xid_prop}> ?source_xid .
|
|
245
|
-
?source <{asset_xid_property}> ?source_xid .
|
|
246
|
-
?source a <{asset_type}> .
|
|
247
|
-
|
|
248
|
-
<{relationship_id}> <{relationship_target_xid_prop}> ?target_xid .
|
|
249
|
-
?target <{asset_xid_property}> ?target_xid .
|
|
250
|
-
?target a <{asset_type}> .}}"""
|
|
251
243
|
|
|
252
244
|
def _count_query(self) -> str:
|
|
253
|
-
query = """SELECT (COUNT(?
|
|
245
|
+
query = """SELECT (COUNT(?target_xid) as ?count) WHERE {{
|
|
254
246
|
?relationship a <{relationship_type}> .
|
|
255
247
|
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
256
|
-
?
|
|
257
|
-
?source a <{asset_type}> .
|
|
248
|
+
?source_xid a <{asset_type}> .
|
|
258
249
|
|
|
259
250
|
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
260
|
-
?
|
|
261
|
-
?target a <{asset_type}> .}}"""
|
|
251
|
+
?target_xid a <{asset_type}> .}}"""
|
|
262
252
|
|
|
263
253
|
return query.format(
|
|
264
254
|
relationship_type=self.relationship_type,
|
|
265
255
|
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
266
256
|
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
267
|
-
asset_xid_property=self.asset_xid_property,
|
|
268
257
|
asset_type=self.asset_type,
|
|
269
258
|
)
|
|
270
259
|
|
|
271
260
|
def _iterate_query(self) -> str:
|
|
272
|
-
query = """SELECT ?
|
|
261
|
+
query = """SELECT ?source_xid ?relationship ?target_xid WHERE {{
|
|
273
262
|
?relationship a <{relationship_type}> .
|
|
274
263
|
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
275
|
-
?
|
|
276
|
-
?source a <{asset_type}> .
|
|
264
|
+
?source_xid a <{asset_type}> .
|
|
277
265
|
|
|
278
266
|
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
279
|
-
?
|
|
280
|
-
?target a <{asset_type}> .}}"""
|
|
267
|
+
?target_xid a <{asset_type}> .}}"""
|
|
281
268
|
|
|
282
269
|
return query.format(
|
|
283
270
|
relationship_type=self.relationship_type,
|
|
284
271
|
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
285
272
|
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
286
|
-
asset_xid_property=self.asset_xid_property,
|
|
287
273
|
asset_type=self.asset_type,
|
|
288
274
|
)
|
|
289
275
|
|
|
@@ -293,13 +279,11 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
|
293
279
|
relationship_type: URIRef | None = None,
|
|
294
280
|
relationship_source_xid_prop: URIRef | None = None,
|
|
295
281
|
relationship_target_xid_prop: URIRef | None = None,
|
|
296
|
-
asset_xid_property: URIRef | None = None,
|
|
297
282
|
):
|
|
298
283
|
self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
|
|
299
284
|
self.relationship_type = relationship_type or DEFAULT_NAMESPACE.Relationship
|
|
300
285
|
self.relationship_source_xid_prop = relationship_source_xid_prop or DEFAULT_NAMESPACE.sourceExternalId
|
|
301
286
|
self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
|
|
302
|
-
self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
|
|
303
287
|
|
|
304
288
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
305
289
|
row_output = RowTransformationOutput()
|
|
@@ -1,59 +1,12 @@
|
|
|
1
1
|
from typing import cast
|
|
2
2
|
from urllib.parse import quote
|
|
3
3
|
|
|
4
|
-
from rdflib import
|
|
4
|
+
from rdflib import Namespace, URIRef
|
|
5
5
|
from rdflib.query import ResultRow
|
|
6
6
|
|
|
7
|
-
from cognite.neat._rules.analysis import InformationAnalysis
|
|
8
|
-
from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
|
|
9
|
-
from cognite.neat._rules.models.information import InformationRules
|
|
10
7
|
from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
|
|
11
8
|
|
|
12
|
-
from ._base import
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class ReduceHopTraversal(BaseTransformer):
|
|
16
|
-
"""ReduceHopTraversal is a transformer that reduces the number of hops to direct connection."""
|
|
17
|
-
|
|
18
|
-
...
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# TODO: Standardise
|
|
22
|
-
class AddSelfReferenceProperty(BaseTransformer):
|
|
23
|
-
description: str = "Adds property that contains id of reference to all references of given class in Rules"
|
|
24
|
-
_use_only_once: bool = True
|
|
25
|
-
_need_changes = frozenset({})
|
|
26
|
-
_ref_template: str = """SELECT ?s WHERE {{?s a <{type_}>}}"""
|
|
27
|
-
|
|
28
|
-
def __init__(
|
|
29
|
-
self,
|
|
30
|
-
rules: InformationRules,
|
|
31
|
-
):
|
|
32
|
-
self.rules = rules
|
|
33
|
-
self.properties = InformationAnalysis(rules).all_reference_transformations()
|
|
34
|
-
|
|
35
|
-
def transform(self, graph: Graph) -> None:
|
|
36
|
-
for property_ in self.properties:
|
|
37
|
-
prefix = property_.instance_source.traversal.class_.prefix
|
|
38
|
-
suffix = property_.instance_source.traversal.class_.suffix
|
|
39
|
-
|
|
40
|
-
namespace = self.rules.prefixes[prefix] if prefix in self.rules.prefixes else self.rules.metadata.namespace
|
|
41
|
-
|
|
42
|
-
for (reference,) in graph.query(self._ref_template.format(type_=namespace[suffix])): # type: ignore [misc]
|
|
43
|
-
graph.add(
|
|
44
|
-
(
|
|
45
|
-
reference,
|
|
46
|
-
self.rules.metadata.namespace[property_.property_],
|
|
47
|
-
reference,
|
|
48
|
-
)
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
traversal = SingleProperty.from_string(
|
|
52
|
-
class_=property_.view.id,
|
|
53
|
-
property_=f"{self.rules.metadata.prefix}:{property_.property_}",
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
property_.instance_source = RDFPath(traversal=traversal)
|
|
9
|
+
from ._base import BaseTransformerStandardised, RowTransformationOutput
|
|
57
10
|
|
|
58
11
|
|
|
59
12
|
class MakeConnectionOnExactMatch(BaseTransformerStandardised):
|
cognite/neat/_issues/__init__.py
CHANGED
|
@@ -2,24 +2,19 @@
|
|
|
2
2
|
as some helper classes to handle them like NeatIssueList"""
|
|
3
3
|
|
|
4
4
|
from ._base import (
|
|
5
|
-
DefaultWarning,
|
|
6
5
|
IssueList,
|
|
7
6
|
MultiValueError,
|
|
8
7
|
NeatError,
|
|
9
8
|
NeatIssue,
|
|
10
|
-
NeatIssueList,
|
|
11
9
|
NeatWarning,
|
|
12
|
-
catch_issues,
|
|
13
|
-
catch_warnings,
|
|
14
10
|
)
|
|
11
|
+
from ._contextmanagers import catch_issues, catch_warnings
|
|
15
12
|
|
|
16
13
|
__all__ = [
|
|
17
|
-
"DefaultWarning",
|
|
18
14
|
"IssueList",
|
|
19
15
|
"MultiValueError",
|
|
20
16
|
"NeatError",
|
|
21
17
|
"NeatIssue",
|
|
22
|
-
"NeatIssueList",
|
|
23
18
|
"NeatWarning",
|
|
24
19
|
"catch_issues",
|
|
25
20
|
"catch_warnings",
|