cognite-neat 0.107.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +83 -6
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
- cognite/neat/_graph/extractors/_dms.py +162 -47
- cognite/neat/_graph/extractors/_dms_graph.py +54 -4
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +3 -2
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_value_type.py +54 -3
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +241 -18
- cognite/neat/_rules/models/_base_rules.py +13 -3
- cognite/neat/_rules/models/dms/_rules.py +1 -8
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules_input.py +5 -0
- cognite/neat/_rules/transformers/__init__.py +6 -0
- cognite/neat/_rules/transformers/_converters.py +98 -7
- cognite/neat/_session/_base.py +55 -4
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_read.py +61 -14
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +8 -4
- cognite/neat/_session/_to.py +4 -1
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +2 -1
- cognite/neat/_store/_graph_store.py +287 -133
- cognite/neat/_store/_rules_store.py +108 -1
- cognite/neat/_utils/auth.py +1 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +52 -52
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -3,11 +3,12 @@ from collections import defaultdict
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from typing import Literal, cast, overload
|
|
5
5
|
|
|
6
|
-
from rdflib import RDF, Graph, Namespace, URIRef
|
|
6
|
+
from rdflib import RDF, Dataset, Graph, Namespace, URIRef
|
|
7
7
|
from rdflib import Literal as RdfLiteral
|
|
8
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
8
9
|
from rdflib.query import ResultRow
|
|
9
10
|
|
|
10
|
-
from cognite.neat._constants import
|
|
11
|
+
from cognite.neat._constants import NEAT
|
|
11
12
|
from cognite.neat._rules._constants import EntityTypes
|
|
12
13
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
13
14
|
from cognite.neat._rules.models.information import InformationRules
|
|
@@ -20,11 +21,21 @@ from ._construct import build_construct_query
|
|
|
20
21
|
class Queries:
|
|
21
22
|
"""Helper class for storing standard queries for the graph store."""
|
|
22
23
|
|
|
23
|
-
def __init__(
|
|
24
|
-
self
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
dataset: Dataset,
|
|
27
|
+
rules: dict[URIRef, InformationRules] | None = None,
|
|
28
|
+
default_named_graph: URIRef | None = None,
|
|
29
|
+
):
|
|
30
|
+
self.dataset = dataset
|
|
31
|
+
self.rules = rules or {}
|
|
32
|
+
self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
|
|
33
|
+
|
|
34
|
+
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
35
|
+
"""Get named graph from the dataset to query over"""
|
|
36
|
+
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
37
|
+
|
|
38
|
+
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
|
|
28
39
|
"""Summarize instances in the graph store by class and count"""
|
|
29
40
|
|
|
30
41
|
query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
|
|
@@ -39,36 +50,55 @@ class Queries:
|
|
|
39
50
|
remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
|
|
40
51
|
cast(RdfLiteral, cast(tuple, res)[1]).value,
|
|
41
52
|
)
|
|
42
|
-
for res in list(self.graph.query(query_statement))
|
|
53
|
+
for res in list(self.graph(named_graph=named_graph).query(query_statement))
|
|
43
54
|
]
|
|
44
55
|
|
|
45
|
-
|
|
46
|
-
def types(self) -> dict[URIRef, str]:
|
|
56
|
+
def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
47
57
|
"""Types and their short form in the graph"""
|
|
48
58
|
query = """SELECT DISTINCT ?type
|
|
49
59
|
WHERE {?s a ?type .}"""
|
|
50
|
-
return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
|
|
51
60
|
|
|
52
|
-
|
|
61
|
+
return { # type: ignore[misc, index, arg-type]
|
|
62
|
+
cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
|
|
63
|
+
for (type_,) in list(self.graph(named_graph).query(query))
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
53
67
|
"""Get the URIRef of a type"""
|
|
54
|
-
return [k for k, v in self.types.items() if v == type_]
|
|
68
|
+
return [k for k, v in self.types(named_graph).items() if v == type_]
|
|
69
|
+
|
|
70
|
+
def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
71
|
+
"""Properties and their short form in the graph
|
|
55
72
|
|
|
56
|
-
|
|
57
|
-
|
|
73
|
+
Args:
|
|
74
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
75
|
+
|
|
76
|
+
"""
|
|
58
77
|
query = """SELECT DISTINCT ?property
|
|
59
78
|
WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
|
|
60
|
-
return {
|
|
79
|
+
return { # type: ignore[misc, index, arg-type]
|
|
80
|
+
cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
|
|
81
|
+
for (type_,) in list(self.graph(named_graph).query(query))
|
|
82
|
+
}
|
|
61
83
|
|
|
62
|
-
def property_uri(self, property_: str) -> list[URIRef]:
|
|
63
|
-
"""Get the URIRef of a
|
|
64
|
-
return [k for k, v in self.properties.items() if v == property_]
|
|
84
|
+
def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
85
|
+
"""Get the URIRef of a property
|
|
65
86
|
|
|
66
|
-
|
|
87
|
+
Args:
|
|
88
|
+
property_: Property to find URIRef for
|
|
89
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
90
|
+
"""
|
|
91
|
+
return [k for k, v in self.properties(named_graph).items() if v == property_]
|
|
92
|
+
|
|
93
|
+
def list_instances_ids_of_class(
|
|
94
|
+
self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
|
|
95
|
+
) -> list[URIRef]:
|
|
67
96
|
"""Get instances ids for a given class
|
|
68
97
|
|
|
69
98
|
Args:
|
|
70
99
|
class_uri: Class for which instances are to be found
|
|
71
100
|
limit: Max number of instances to return, by default -1 meaning all instances
|
|
101
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
72
102
|
|
|
73
103
|
Returns:
|
|
74
104
|
List of class instance URIs
|
|
@@ -76,16 +106,17 @@ class Queries:
|
|
|
76
106
|
query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
|
|
77
107
|
"class", class_uri
|
|
78
108
|
).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
|
|
79
|
-
return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
|
|
109
|
+
return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
|
|
80
110
|
|
|
81
|
-
def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
|
|
111
|
+
def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
82
112
|
"""Get all triples for instances of a given class
|
|
83
113
|
|
|
84
114
|
Args:
|
|
85
115
|
class_uri: Class for which instances are to be found
|
|
116
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
86
117
|
|
|
87
118
|
Returns:
|
|
88
|
-
List of triples for instances of the given class
|
|
119
|
+
List of triples for instances of the given class in the named graph
|
|
89
120
|
"""
|
|
90
121
|
query = (
|
|
91
122
|
f"SELECT ?instance ?prop ?value "
|
|
@@ -93,17 +124,22 @@ class Queries:
|
|
|
93
124
|
)
|
|
94
125
|
|
|
95
126
|
# Select queries gives an iterable of result rows
|
|
96
|
-
return cast(list[ResultRow], list(self.graph.query(query)))
|
|
127
|
+
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
97
128
|
|
|
98
|
-
def triples_of_type_instances(
|
|
129
|
+
def triples_of_type_instances(
|
|
130
|
+
self, rdf_type: str | URIRef, named_graph: URIRef | None = None
|
|
131
|
+
) -> list[tuple[str, str, str]]:
|
|
99
132
|
"""Get all triples of a given type.
|
|
100
133
|
|
|
101
|
-
|
|
134
|
+
Args:
|
|
135
|
+
rdf_type: Type URI to query
|
|
136
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
102
137
|
"""
|
|
138
|
+
named_graph = named_graph or self.default_named_graph
|
|
103
139
|
if isinstance(rdf_type, URIRef):
|
|
104
140
|
rdf_uri = rdf_type
|
|
105
|
-
elif isinstance(rdf_type, str) and self.rules:
|
|
106
|
-
rdf_uri = self.rules.metadata.namespace[rdf_type]
|
|
141
|
+
elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
|
|
142
|
+
rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
|
|
107
143
|
else:
|
|
108
144
|
warnings.warn(
|
|
109
145
|
"Unknown namespace. Please either provide a URIRef or set the rules of the store.",
|
|
@@ -117,46 +153,50 @@ class Queries:
|
|
|
117
153
|
"order by ?instance"
|
|
118
154
|
)
|
|
119
155
|
|
|
120
|
-
result = self.graph.query(query)
|
|
156
|
+
result = self.graph(named_graph).query(query)
|
|
121
157
|
|
|
122
158
|
# We cannot include the RDF.type in case there is a neat:type property
|
|
123
159
|
return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
|
|
124
160
|
|
|
125
|
-
def type_with_property(self, type_: URIRef, property_uri: URIRef) -> bool:
|
|
161
|
+
def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
126
162
|
"""Check if a property exists in the graph store
|
|
127
163
|
|
|
128
164
|
Args:
|
|
165
|
+
type_: Type URI to check
|
|
129
166
|
property_uri: Property URI to check
|
|
167
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
130
168
|
|
|
131
169
|
Returns:
|
|
132
170
|
True if property exists, False otherwise
|
|
133
171
|
"""
|
|
134
172
|
query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
|
|
135
|
-
return bool(list(self.graph.query(query)))
|
|
173
|
+
return bool(list(self.graph(named_graph).query(query)))
|
|
136
174
|
|
|
137
|
-
def has_namespace(self, namespace: Namespace) -> bool:
|
|
175
|
+
def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
|
|
138
176
|
"""Check if a namespace exists in the graph store
|
|
139
177
|
|
|
140
178
|
Args:
|
|
141
179
|
namespace: Namespace to check
|
|
180
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
142
181
|
|
|
143
182
|
Returns:
|
|
144
183
|
True if namespace exists, False otherwise
|
|
145
184
|
"""
|
|
146
185
|
query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
|
|
147
|
-
return bool(self.graph.query(query))
|
|
186
|
+
return bool(self.graph(named_graph).query(query))
|
|
148
187
|
|
|
149
|
-
def has_type(self, type_: URIRef) -> bool:
|
|
188
|
+
def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
150
189
|
"""Check if a type exists in the graph store
|
|
151
190
|
|
|
152
191
|
Args:
|
|
153
192
|
type_: Type to check
|
|
193
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
154
194
|
|
|
155
195
|
Returns:
|
|
156
196
|
True if type exists, False otherwise
|
|
157
197
|
"""
|
|
158
198
|
query = f"ASK WHERE {{ ?s a <{type_}> }}"
|
|
159
|
-
return bool(self.graph.query(query))
|
|
199
|
+
return bool(self.graph(named_graph).query(query))
|
|
160
200
|
|
|
161
201
|
def describe(
|
|
162
202
|
self,
|
|
@@ -164,6 +204,7 @@ class Queries:
|
|
|
164
204
|
instance_type: str | None = None,
|
|
165
205
|
property_renaming_config: dict | None = None,
|
|
166
206
|
property_types: dict[str, EntityTypes] | None = None,
|
|
207
|
+
named_graph: URIRef | None = None,
|
|
167
208
|
) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
|
|
168
209
|
"""DESCRIBE instance for a given class from the graph store
|
|
169
210
|
|
|
@@ -172,6 +213,7 @@ class Queries:
|
|
|
172
213
|
instance_type: Type of the instance, default None (will be inferred from triples)
|
|
173
214
|
property_renaming_config: Dictionary to rename properties, default None (no renaming)
|
|
174
215
|
property_types: Dictionary of property types, default None (helper for removal of namespace)
|
|
216
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
175
217
|
|
|
176
218
|
|
|
177
219
|
Returns:
|
|
@@ -179,7 +221,7 @@ class Queries:
|
|
|
179
221
|
"""
|
|
180
222
|
property_values: dict[str, list[str]] = defaultdict(list)
|
|
181
223
|
identifier = remove_namespace_from_uri(instance_id, validation="prefix")
|
|
182
|
-
for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
|
|
224
|
+
for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
|
|
183
225
|
if object_.lower() in [
|
|
184
226
|
"",
|
|
185
227
|
"none",
|
|
@@ -190,45 +232,28 @@ class Queries:
|
|
|
190
232
|
|
|
191
233
|
# set property
|
|
192
234
|
if property_renaming_config and predicate != RDF.type:
|
|
193
|
-
property_ =
|
|
194
|
-
|
|
195
|
-
|
|
235
|
+
property_ = remove_namespace_from_uri(predicate, validation="prefix")
|
|
236
|
+
renamed_property_ = property_renaming_config.get(predicate, property_)
|
|
237
|
+
|
|
196
238
|
elif not property_renaming_config and predicate != RDF.type:
|
|
197
239
|
property_ = remove_namespace_from_uri(predicate, validation="prefix")
|
|
240
|
+
renamed_property_ = property_
|
|
241
|
+
|
|
198
242
|
else:
|
|
199
243
|
property_ = RDF.type
|
|
244
|
+
renamed_property_ = property_
|
|
200
245
|
|
|
201
|
-
|
|
202
|
-
# if it is URIRef and property type is object property, we need to remove namespace
|
|
203
|
-
# if it URIref but we are doing this into data type property, we do not remove namespace
|
|
204
|
-
# case 1 for RDF type we remove namespace
|
|
205
|
-
if property_ == RDF.type:
|
|
206
|
-
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
207
|
-
|
|
208
|
-
# case 2 for define object properties we remove namespace
|
|
209
|
-
elif (
|
|
210
|
-
isinstance(object_, URIRef)
|
|
211
|
-
and property_types
|
|
212
|
-
and (
|
|
213
|
-
property_types.get(property_, None) == EntityTypes.object_property
|
|
214
|
-
or property_types.get(property_, None) == EntityTypes.undefined
|
|
215
|
-
)
|
|
216
|
-
):
|
|
246
|
+
if isinstance(object_, URIRef):
|
|
217
247
|
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
elif isinstance(object_, URIRef) and not property_types:
|
|
221
|
-
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
222
|
-
|
|
223
|
-
# case 4 for data type properties we do not remove namespace but keep the entire value
|
|
224
|
-
# but we drop the datatype part, and keep everything to be string (data loader will do the conversion)
|
|
225
|
-
# for value type it expects (if possible)
|
|
248
|
+
elif isinstance(object_, RdfLiteral):
|
|
249
|
+
value = object_.toPython()
|
|
226
250
|
else:
|
|
251
|
+
# It is a blank node
|
|
227
252
|
value = str(object_)
|
|
228
253
|
|
|
229
254
|
# add type to the dictionary
|
|
230
255
|
if predicate != RDF.type:
|
|
231
|
-
property_values[
|
|
256
|
+
property_values[renamed_property_].append(value)
|
|
232
257
|
else:
|
|
233
258
|
# guarding against multiple rdf:type values as this is not allowed in CDF
|
|
234
259
|
if RDF.type not in property_values:
|
|
@@ -249,6 +274,7 @@ class Queries:
|
|
|
249
274
|
class_: str,
|
|
250
275
|
properties_optional: bool = True,
|
|
251
276
|
instance_id: URIRef | None = None,
|
|
277
|
+
named_graph: URIRef | None = None,
|
|
252
278
|
) -> list[tuple[str, str, str]]:
|
|
253
279
|
"""CONSTRUCT instances for a given class from the graph store
|
|
254
280
|
|
|
@@ -256,21 +282,29 @@ class Queries:
|
|
|
256
282
|
class_: Class entity for which we want to generate query
|
|
257
283
|
properties_optional: Whether to make all properties optional, default True
|
|
258
284
|
instance_ids: List of instance ids to filter on, default None (all)
|
|
285
|
+
named_graph: Named graph to query over, default None (default graph
|
|
259
286
|
|
|
260
287
|
Returns:
|
|
261
288
|
List of triples for instances of the given class
|
|
262
289
|
"""
|
|
263
|
-
|
|
264
|
-
if
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
290
|
+
named_graph = named_graph or self.default_named_graph
|
|
291
|
+
if (
|
|
292
|
+
self.rules
|
|
293
|
+
and self.rules.get(named_graph)
|
|
294
|
+
and (
|
|
295
|
+
query := build_construct_query(
|
|
296
|
+
class_=ClassEntity(
|
|
297
|
+
prefix=self.rules[named_graph].metadata.prefix,
|
|
298
|
+
suffix=class_,
|
|
299
|
+
),
|
|
300
|
+
graph=self.graph(named_graph),
|
|
301
|
+
rules=self.rules[named_graph],
|
|
302
|
+
properties_optional=properties_optional,
|
|
303
|
+
instance_id=instance_id,
|
|
304
|
+
)
|
|
271
305
|
)
|
|
272
306
|
):
|
|
273
|
-
result = self.graph.query(query)
|
|
307
|
+
result = self.graph(named_graph).query(query)
|
|
274
308
|
|
|
275
309
|
# We cannot include the RDF.type in case there is a neat:type property
|
|
276
310
|
return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
|
|
@@ -281,25 +315,36 @@ class Queries:
|
|
|
281
315
|
)
|
|
282
316
|
return []
|
|
283
317
|
|
|
284
|
-
def list_triples(self, limit: int = 25) -> list[ResultRow]:
|
|
318
|
+
def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
285
319
|
"""List triples in the graph store
|
|
286
320
|
|
|
287
321
|
Args:
|
|
288
322
|
limit: Max number of triples to return, by default 25
|
|
323
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
289
324
|
|
|
290
325
|
Returns:
|
|
291
326
|
List of triples
|
|
292
327
|
"""
|
|
293
328
|
query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
|
|
294
|
-
return cast(list[ResultRow], list(self.graph.query(query)))
|
|
329
|
+
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
295
330
|
|
|
296
331
|
@overload
|
|
297
332
|
def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
|
|
298
333
|
|
|
299
334
|
@overload
|
|
300
|
-
def list_types(
|
|
335
|
+
def list_types(
|
|
336
|
+
self,
|
|
337
|
+
remove_namespace: Literal[True],
|
|
338
|
+
limit: int = 25,
|
|
339
|
+
named_graph: URIRef | None = None,
|
|
340
|
+
) -> list[str]: ...
|
|
301
341
|
|
|
302
|
-
def list_types(
|
|
342
|
+
def list_types(
|
|
343
|
+
self,
|
|
344
|
+
remove_namespace: bool = False,
|
|
345
|
+
limit: int = 25,
|
|
346
|
+
named_graph: URIRef | None = None,
|
|
347
|
+
) -> list[ResultRow] | list[str]:
|
|
303
348
|
"""List types in the graph store
|
|
304
349
|
|
|
305
350
|
Args:
|
|
@@ -310,13 +355,14 @@ class Queries:
|
|
|
310
355
|
List of types
|
|
311
356
|
"""
|
|
312
357
|
query = f"SELECT DISTINCT ?type WHERE {{ ?subject a ?type }} LIMIT {limit}"
|
|
313
|
-
result = cast(list[ResultRow], list(self.graph.query(query)))
|
|
358
|
+
result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
314
359
|
if remove_namespace:
|
|
315
360
|
return [remove_namespace_from_uri(res[0]) for res in result]
|
|
316
361
|
return result
|
|
317
362
|
|
|
318
363
|
def multi_value_type_property(
|
|
319
364
|
self,
|
|
365
|
+
named_graph: URIRef | None = None,
|
|
320
366
|
) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
|
|
321
367
|
query = """SELECT ?sourceType ?property
|
|
322
368
|
(GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
|
|
@@ -341,15 +387,20 @@ class Queries:
|
|
|
341
387
|
value_types,
|
|
342
388
|
) in cast(
|
|
343
389
|
ResultRow,
|
|
344
|
-
self.graph.query(query.format(unknownType=str(
|
|
390
|
+
self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
|
|
345
391
|
):
|
|
346
392
|
yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
|
|
347
393
|
|
|
348
|
-
def drop_types(
|
|
394
|
+
def drop_types(
|
|
395
|
+
self,
|
|
396
|
+
type_: list[URIRef],
|
|
397
|
+
named_graph: URIRef | None = None,
|
|
398
|
+
) -> dict[URIRef, int]:
|
|
349
399
|
"""Drop types from the graph store
|
|
350
400
|
|
|
351
401
|
Args:
|
|
352
402
|
type_: List of types to drop
|
|
403
|
+
named_graph: Named graph to query over, default None (default graph
|
|
353
404
|
|
|
354
405
|
Returns:
|
|
355
406
|
Dictionary of dropped types
|
|
@@ -358,11 +409,16 @@ class Queries:
|
|
|
358
409
|
for t in type_:
|
|
359
410
|
instance_ids = self.list_instances_ids_of_class(t)
|
|
360
411
|
dropped_types[t] = len(instance_ids)
|
|
361
|
-
remove_instance_ids_in_batch(self.graph, instance_ids)
|
|
412
|
+
remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
|
|
362
413
|
return dropped_types
|
|
363
414
|
|
|
364
|
-
def multi_type_instances(self) -> dict[str, list[str]]:
|
|
365
|
-
"""Find instances with multiple types
|
|
415
|
+
def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
|
|
416
|
+
"""Find instances with multiple types
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
420
|
+
|
|
421
|
+
"""
|
|
366
422
|
|
|
367
423
|
query = """
|
|
368
424
|
SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
|
|
@@ -374,7 +430,7 @@ class Queries:
|
|
|
374
430
|
"""
|
|
375
431
|
|
|
376
432
|
result = {}
|
|
377
|
-
for instance, types in self.graph.query(query): # type: ignore
|
|
433
|
+
for instance, types in self.graph(named_graph).query(query): # type: ignore
|
|
378
434
|
result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
|
|
379
435
|
|
|
380
436
|
return result
|
|
@@ -115,7 +115,7 @@ def to_construct_triples(
|
|
|
115
115
|
graph_template_triple = Triple(
|
|
116
116
|
subject="?instance",
|
|
117
117
|
predicate=f"{transformation.class_.prefix}:{transformation.property_}",
|
|
118
|
-
object=f
|
|
118
|
+
object=f"?{re.sub(r'[^_a-zA-Z0-9/_]', '_', str(transformation.property_).lower())}",
|
|
119
119
|
optional=False,
|
|
120
120
|
)
|
|
121
121
|
templates.append(graph_template_triple)
|
|
@@ -17,7 +17,7 @@ from ._prune_graph import (
|
|
|
17
17
|
PruneTypes,
|
|
18
18
|
)
|
|
19
19
|
from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
|
|
20
|
-
from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
|
|
20
|
+
from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetNeatType, SplitMultiValueProperty
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
23
|
"AddAssetDepth",
|
|
@@ -38,6 +38,7 @@ __all__ = [
|
|
|
38
38
|
"PruneInstancesOfUnknownType",
|
|
39
39
|
"PruneTypes",
|
|
40
40
|
"RelationshipAsEdgeTransformer",
|
|
41
|
+
"SetNeatType",
|
|
41
42
|
"SplitMultiValueProperty",
|
|
42
43
|
]
|
|
43
44
|
|
|
@@ -62,4 +63,5 @@ Transformers = (
|
|
|
62
63
|
| ConnectionToLiteral
|
|
63
64
|
| BaseTransformerStandardised
|
|
64
65
|
| LookupRelationshipSourceTarget
|
|
66
|
+
| SetNeatType
|
|
65
67
|
)
|
|
@@ -4,10 +4,10 @@ from typing import Any, cast
|
|
|
4
4
|
from urllib.parse import quote
|
|
5
5
|
|
|
6
6
|
import rdflib
|
|
7
|
-
from rdflib import RDF, Namespace, URIRef
|
|
7
|
+
from rdflib import RDF, Literal, Namespace, URIRef
|
|
8
8
|
from rdflib.query import ResultRow
|
|
9
9
|
|
|
10
|
-
from cognite.neat._constants import
|
|
10
|
+
from cognite.neat._constants import NEAT
|
|
11
11
|
from cognite.neat._issues.warnings import PropertyDataTypeConversionWarning
|
|
12
12
|
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
13
13
|
from cognite.neat._utils.rdf_ import Triple, get_namespace, remove_namespace_from_uri
|
|
@@ -24,7 +24,7 @@ class SplitMultiValueProperty(BaseTransformerStandardised):
|
|
|
24
24
|
_need_changes = frozenset({})
|
|
25
25
|
|
|
26
26
|
def __init__(self, unknown_type: URIRef | None = None) -> None:
|
|
27
|
-
self.unknown_type = unknown_type or
|
|
27
|
+
self.unknown_type = unknown_type or NEAT.UnknownType
|
|
28
28
|
|
|
29
29
|
def _iterate_query(self) -> str:
|
|
30
30
|
query = """SELECT ?subjectType ?property
|
|
@@ -305,3 +305,54 @@ class ConnectionToLiteral(BaseTransformerStandardised):
|
|
|
305
305
|
row_output.instances_modified_count += 1
|
|
306
306
|
|
|
307
307
|
return row_output
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class SetNeatType(BaseTransformerStandardised):
|
|
311
|
+
description = "Set the sub type of an instance based on the property"
|
|
312
|
+
|
|
313
|
+
def __init__(
|
|
314
|
+
self, subject_type: URIRef, subject_predicate: URIRef, drop_property: bool, namespace: Namespace | None = None
|
|
315
|
+
) -> None:
|
|
316
|
+
self.subject_type = subject_type
|
|
317
|
+
self.subject_predicate = subject_predicate
|
|
318
|
+
self.drop_property = drop_property
|
|
319
|
+
self._namespace = namespace or Namespace(get_namespace(subject_type))
|
|
320
|
+
|
|
321
|
+
def _count_query(self) -> str:
|
|
322
|
+
query = """SELECT (COUNT(?object) AS ?objectCount)
|
|
323
|
+
WHERE {{
|
|
324
|
+
?instance a <{subject_type}> .
|
|
325
|
+
?instance <{subject_predicate}> ?object
|
|
326
|
+
FILTER(isLiteral(?object))
|
|
327
|
+
}}"""
|
|
328
|
+
return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
|
|
329
|
+
|
|
330
|
+
def _skip_count_query(self) -> str:
|
|
331
|
+
query = """SELECT (COUNT(?object) AS ?objectCount)
|
|
332
|
+
WHERE {{
|
|
333
|
+
?instance a <{subject_type}> .
|
|
334
|
+
?instance <{subject_predicate}> ?object
|
|
335
|
+
FILTER(isIRI(?object))
|
|
336
|
+
}}"""
|
|
337
|
+
return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
|
|
338
|
+
|
|
339
|
+
def _iterate_query(self) -> str:
|
|
340
|
+
query = """SELECT ?instance ?object
|
|
341
|
+
WHERE {{
|
|
342
|
+
?instance a <{subject_type}> .
|
|
343
|
+
?instance <{subject_predicate}> ?object
|
|
344
|
+
FILTER(isLiteral(?object))
|
|
345
|
+
}}"""
|
|
346
|
+
return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
|
|
347
|
+
|
|
348
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
349
|
+
row_output = RowTransformationOutput()
|
|
350
|
+
|
|
351
|
+
instance, object_literal = cast(tuple[URIRef, Literal], query_result_row)
|
|
352
|
+
if self.drop_property:
|
|
353
|
+
row_output.remove_triples.append((instance, self.subject_predicate, object_literal))
|
|
354
|
+
|
|
355
|
+
row_output.add_triples.append((instance, NEAT.type, self._namespace[str(object_literal.toPython())]))
|
|
356
|
+
row_output.instances_modified_count += 1
|
|
357
|
+
|
|
358
|
+
return row_output
|
|
@@ -30,7 +30,7 @@ class ResourceRetrievalError(ResourceError[T_Identifier]):
|
|
|
30
30
|
|
|
31
31
|
@dataclass(unsafe_hash=True)
|
|
32
32
|
class ResourceNotFoundError(ResourceError, Generic[T_Identifier, T_ReferenceIdentifier]):
|
|
33
|
-
"""The {resource_type} with identifier {identifier} does not exist"""
|
|
33
|
+
"""The {resource_type} with identifier '{identifier}' does not exist"""
|
|
34
34
|
|
|
35
35
|
extra = " This is expected by {referred_type} {referred_by}."
|
|
36
36
|
|
|
@@ -31,7 +31,6 @@ from ._properties import (
|
|
|
31
31
|
PropertyDirectRelationLimitWarning,
|
|
32
32
|
PropertyNotFoundWarning,
|
|
33
33
|
PropertyOverwritingWarning,
|
|
34
|
-
PropertySkippedWarning,
|
|
35
34
|
PropertyTypeNotSupportedWarning,
|
|
36
35
|
PropertyValueTypeUndefinedWarning,
|
|
37
36
|
)
|
|
@@ -68,7 +67,6 @@ __all__ = [
|
|
|
68
67
|
"PropertyDirectRelationLimitWarning",
|
|
69
68
|
"PropertyNotFoundWarning",
|
|
70
69
|
"PropertyOverwritingWarning",
|
|
71
|
-
"PropertySkippedWarning",
|
|
72
70
|
"PropertyTypeNotSupportedWarning",
|
|
73
71
|
"PropertyValueTypeUndefinedWarning",
|
|
74
72
|
"RegexViolationWarning",
|
|
@@ -74,7 +74,7 @@ class CDFNotSupportedWarning(NeatWarning, ABC):
|
|
|
74
74
|
class NotSupportedViewContainerLimitWarning(CDFNotSupportedWarning):
|
|
75
75
|
"""The view {view_id} maps, {count} containers, which is more than the limit {limit}."""
|
|
76
76
|
|
|
77
|
-
fix = "Reduce the number of containers the view maps to."
|
|
77
|
+
fix = "Reduce the number of containers the view maps to."
|
|
78
78
|
|
|
79
79
|
view_id: ViewId
|
|
80
80
|
count: int
|
|
@@ -65,14 +65,6 @@ class PropertyOverwritingWarning(PropertyWarning[T_Identifier]):
|
|
|
65
65
|
overwriting: tuple[str, ...]
|
|
66
66
|
|
|
67
67
|
|
|
68
|
-
@dataclass(unsafe_hash=True)
|
|
69
|
-
class PropertySkippedWarning(PropertyWarning[T_Identifier]):
|
|
70
|
-
"""The {resource_type} with identifier {identifier} has a property {property_name}
|
|
71
|
-
which is skipped. {reason}."""
|
|
72
|
-
|
|
73
|
-
reason: str
|
|
74
|
-
|
|
75
|
-
|
|
76
68
|
@dataclass(unsafe_hash=True)
|
|
77
69
|
class PropertyDataTypeConversionWarning(PropertyWarning[T_Identifier]):
|
|
78
70
|
"""The {resource_type} with identifier {identifier} failed to convert the property {property_name}: {error}"""
|
|
Binary file
|
|
@@ -96,13 +96,13 @@ class InstanceTemplateExporter(BaseExporter[InformationRules, Workbook]):
|
|
|
96
96
|
def _add_index_identifiers(workbook: Workbook, sheet: str, no_rows: int):
|
|
97
97
|
"""Adds index-based auto identifier to a sheet identifier column"""
|
|
98
98
|
for i in range(no_rows):
|
|
99
|
-
workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{sheet}-{i+1}")'
|
|
99
|
+
workbook[sheet][f"A{i + 2}"] = f'=IF(ISBLANK(B{i + 2}), "","{sheet}-{i + 1}")'
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def _add_uuid_identifiers(workbook: Workbook, sheet: str, no_rows: int):
|
|
103
103
|
"""Adds UUID-based auto identifier to a sheet identifier column"""
|
|
104
104
|
for i in range(no_rows):
|
|
105
|
-
workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{sheet}-{uuid.uuid4()}")'
|
|
105
|
+
workbook[sheet][f"A{i + 2}"] = f'=IF(ISBLANK(B{i + 2}), "","{sheet}-{uuid.uuid4()}")'
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def _add_drop_down_list(
|
|
@@ -122,7 +122,7 @@ def _add_drop_down_list(
|
|
|
122
122
|
workbook[sheet].add_data_validation(drop_down_list)
|
|
123
123
|
|
|
124
124
|
for i in range(no_rows):
|
|
125
|
-
drop_down_list.add(workbook[sheet][f"{column}{i+2}"])
|
|
125
|
+
drop_down_list.add(workbook[sheet][f"{column}{i + 2}"])
|
|
126
126
|
|
|
127
127
|
|
|
128
128
|
def _adjust_column_width(workbook: Workbook):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from ._base import BaseImporter
|
|
2
2
|
from ._dms2rules import DMSImporter
|
|
3
3
|
from ._dtdl2rules import DTDLImporter
|
|
4
|
-
from ._rdf import IMFImporter, InferenceImporter, OWLImporter
|
|
4
|
+
from ._rdf import IMFImporter, InferenceImporter, OWLImporter, SubclassInferenceImporter
|
|
5
5
|
from ._spreadsheet2rules import ExcelImporter, GoogleSheetImporter
|
|
6
6
|
from ._yaml2rules import YAMLImporter
|
|
7
7
|
|
|
@@ -14,6 +14,7 @@ __all__ = [
|
|
|
14
14
|
"IMFImporter",
|
|
15
15
|
"InferenceImporter",
|
|
16
16
|
"OWLImporter",
|
|
17
|
+
"SubclassInferenceImporter",
|
|
17
18
|
"YAMLImporter",
|
|
18
19
|
]
|
|
19
20
|
|
|
@@ -26,6 +27,7 @@ RulesImporters = (
|
|
|
26
27
|
| DTDLImporter
|
|
27
28
|
| YAMLImporter
|
|
28
29
|
| InferenceImporter
|
|
30
|
+
| SubclassInferenceImporter
|
|
29
31
|
)
|
|
30
32
|
|
|
31
33
|
|