cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
- cognite/neat/_graph/extractors/_dms.py +162 -47
- cognite/neat/_graph/extractors/_dms_graph.py +54 -4
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +3 -2
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
- cognite/neat/_graph/queries/_base.py +144 -84
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_base.py +4 -4
- cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
- cognite/neat/_graph/transformers/_prune_graph.py +3 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -4
- cognite/neat/_graph/transformers/_value_type.py +71 -13
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_external.py +8 -0
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_issues/warnings/_resources.py +1 -1
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
- cognite/neat/_rules/models/_base_rules.py +22 -11
- cognite/neat/_rules/models/dms/_exporter.py +5 -4
- cognite/neat/_rules/models/dms/_rules.py +1 -8
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules_input.py +5 -0
- cognite/neat/_rules/transformers/__init__.py +10 -3
- cognite/neat/_rules/transformers/_base.py +6 -1
- cognite/neat/_rules/transformers/_converters.py +530 -364
- cognite/neat/_rules/transformers/_mapping.py +4 -4
- cognite/neat/_session/_base.py +100 -47
- cognite/neat/_session/_create.py +133 -0
- cognite/neat/_session/_drop.py +60 -2
- cognite/neat/_session/_fix.py +28 -0
- cognite/neat/_session/_inspect.py +22 -7
- cognite/neat/_session/_mapping.py +8 -8
- cognite/neat/_session/_prepare.py +3 -247
- cognite/neat/_session/_read.py +138 -17
- cognite/neat/_session/_set.py +50 -1
- cognite/neat/_session/_show.py +16 -43
- cognite/neat/_session/_state.py +53 -52
- cognite/neat/_session/_to.py +11 -4
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -1
- cognite/neat/_store/_graph_store.py +301 -146
- cognite/neat/_store/_provenance.py +36 -20
- cognite/neat/_store/_rules_store.py +253 -267
- cognite/neat/_store/exceptions.py +40 -4
- cognite/neat/_utils/auth.py +5 -3
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
|
@@ -3,11 +3,12 @@ from collections import defaultdict
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from typing import Literal, cast, overload
|
|
5
5
|
|
|
6
|
-
from rdflib import RDF, Graph, Namespace, URIRef
|
|
6
|
+
from rdflib import RDF, Dataset, Graph, Namespace, URIRef
|
|
7
7
|
from rdflib import Literal as RdfLiteral
|
|
8
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
8
9
|
from rdflib.query import ResultRow
|
|
9
10
|
|
|
10
|
-
from cognite.neat._constants import
|
|
11
|
+
from cognite.neat._constants import NEAT
|
|
11
12
|
from cognite.neat._rules._constants import EntityTypes
|
|
12
13
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
13
14
|
from cognite.neat._rules.models.information import InformationRules
|
|
@@ -20,11 +21,21 @@ from ._construct import build_construct_query
|
|
|
20
21
|
class Queries:
|
|
21
22
|
"""Helper class for storing standard queries for the graph store."""
|
|
22
23
|
|
|
23
|
-
def __init__(
|
|
24
|
-
self
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
dataset: Dataset,
|
|
27
|
+
rules: dict[URIRef, InformationRules] | None = None,
|
|
28
|
+
default_named_graph: URIRef | None = None,
|
|
29
|
+
):
|
|
30
|
+
self.dataset = dataset
|
|
31
|
+
self.rules = rules or {}
|
|
32
|
+
self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
|
|
33
|
+
|
|
34
|
+
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
35
|
+
"""Get named graph from the dataset to query over"""
|
|
36
|
+
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
37
|
+
|
|
38
|
+
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
|
|
28
39
|
"""Summarize instances in the graph store by class and count"""
|
|
29
40
|
|
|
30
41
|
query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
|
|
@@ -39,36 +50,55 @@ class Queries:
|
|
|
39
50
|
remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
|
|
40
51
|
cast(RdfLiteral, cast(tuple, res)[1]).value,
|
|
41
52
|
)
|
|
42
|
-
for res in list(self.graph.query(query_statement))
|
|
53
|
+
for res in list(self.graph(named_graph=named_graph).query(query_statement))
|
|
43
54
|
]
|
|
44
55
|
|
|
45
|
-
|
|
46
|
-
def types(self) -> dict[URIRef, str]:
|
|
56
|
+
def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
47
57
|
"""Types and their short form in the graph"""
|
|
48
58
|
query = """SELECT DISTINCT ?type
|
|
49
59
|
WHERE {?s a ?type .}"""
|
|
50
|
-
return {type_: remove_namespace_from_uri(cast(URIRef, type_)) for (type_,) in list(self.graph.query(query))} # type: ignore[misc, index, arg-type]
|
|
51
60
|
|
|
52
|
-
|
|
61
|
+
return { # type: ignore[misc, index, arg-type]
|
|
62
|
+
cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
|
|
63
|
+
for (type_,) in list(self.graph(named_graph).query(query))
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def type_uri(self, type_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
53
67
|
"""Get the URIRef of a type"""
|
|
54
|
-
return [k for k, v in self.types.items() if v == type_]
|
|
68
|
+
return [k for k, v in self.types(named_graph).items() if v == type_]
|
|
69
|
+
|
|
70
|
+
def properties(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
71
|
+
"""Properties and their short form in the graph
|
|
55
72
|
|
|
56
|
-
|
|
57
|
-
|
|
73
|
+
Args:
|
|
74
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
75
|
+
|
|
76
|
+
"""
|
|
58
77
|
query = """SELECT DISTINCT ?property
|
|
59
78
|
WHERE {?s ?property ?o . FILTER(?property != rdf:type)}"""
|
|
60
|
-
return {
|
|
79
|
+
return { # type: ignore[misc, index, arg-type]
|
|
80
|
+
cast(URIRef, type_): remove_namespace_from_uri(cast(URIRef, type_))
|
|
81
|
+
for (type_,) in list(self.graph(named_graph).query(query))
|
|
82
|
+
}
|
|
61
83
|
|
|
62
|
-
def property_uri(self, property_: str) -> list[URIRef]:
|
|
63
|
-
"""Get the URIRef of a
|
|
64
|
-
return [k for k, v in self.properties.items() if v == property_]
|
|
84
|
+
def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
85
|
+
"""Get the URIRef of a property
|
|
65
86
|
|
|
66
|
-
|
|
87
|
+
Args:
|
|
88
|
+
property_: Property to find URIRef for
|
|
89
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
90
|
+
"""
|
|
91
|
+
return [k for k, v in self.properties(named_graph).items() if v == property_]
|
|
92
|
+
|
|
93
|
+
def list_instances_ids_of_class(
|
|
94
|
+
self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
|
|
95
|
+
) -> list[URIRef]:
|
|
67
96
|
"""Get instances ids for a given class
|
|
68
97
|
|
|
69
98
|
Args:
|
|
70
99
|
class_uri: Class for which instances are to be found
|
|
71
100
|
limit: Max number of instances to return, by default -1 meaning all instances
|
|
101
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
72
102
|
|
|
73
103
|
Returns:
|
|
74
104
|
List of class instance URIs
|
|
@@ -76,16 +106,17 @@ class Queries:
|
|
|
76
106
|
query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
|
|
77
107
|
"class", class_uri
|
|
78
108
|
).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
|
|
79
|
-
return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
|
|
109
|
+
return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
|
|
80
110
|
|
|
81
|
-
def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
|
|
111
|
+
def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
82
112
|
"""Get all triples for instances of a given class
|
|
83
113
|
|
|
84
114
|
Args:
|
|
85
115
|
class_uri: Class for which instances are to be found
|
|
116
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
86
117
|
|
|
87
118
|
Returns:
|
|
88
|
-
List of triples for instances of the given class
|
|
119
|
+
List of triples for instances of the given class in the named graph
|
|
89
120
|
"""
|
|
90
121
|
query = (
|
|
91
122
|
f"SELECT ?instance ?prop ?value "
|
|
@@ -93,17 +124,22 @@ class Queries:
|
|
|
93
124
|
)
|
|
94
125
|
|
|
95
126
|
# Select queries gives an iterable of result rows
|
|
96
|
-
return cast(list[ResultRow], list(self.graph.query(query)))
|
|
127
|
+
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
97
128
|
|
|
98
|
-
def triples_of_type_instances(
|
|
129
|
+
def triples_of_type_instances(
|
|
130
|
+
self, rdf_type: str | URIRef, named_graph: URIRef | None = None
|
|
131
|
+
) -> list[tuple[str, str, str]]:
|
|
99
132
|
"""Get all triples of a given type.
|
|
100
133
|
|
|
101
|
-
|
|
134
|
+
Args:
|
|
135
|
+
rdf_type: Type URI to query
|
|
136
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
102
137
|
"""
|
|
138
|
+
named_graph = named_graph or self.default_named_graph
|
|
103
139
|
if isinstance(rdf_type, URIRef):
|
|
104
140
|
rdf_uri = rdf_type
|
|
105
|
-
elif isinstance(rdf_type, str) and self.rules:
|
|
106
|
-
rdf_uri = self.rules.metadata.namespace[rdf_type]
|
|
141
|
+
elif isinstance(rdf_type, str) and self.rules and self.rules.get(named_graph):
|
|
142
|
+
rdf_uri = self.rules[named_graph].metadata.namespace[rdf_type]
|
|
107
143
|
else:
|
|
108
144
|
warnings.warn(
|
|
109
145
|
"Unknown namespace. Please either provide a URIRef or set the rules of the store.",
|
|
@@ -117,46 +153,54 @@ class Queries:
|
|
|
117
153
|
"order by ?instance"
|
|
118
154
|
)
|
|
119
155
|
|
|
120
|
-
result = self.graph.query(query)
|
|
156
|
+
result = self.graph(named_graph).query(query)
|
|
121
157
|
|
|
122
158
|
# We cannot include the RDF.type in case there is a neat:type property
|
|
123
159
|
return [remove_namespace_from_uri(list(triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
|
|
124
160
|
|
|
125
|
-
def type_with_property(self, type_: URIRef, property_uri: URIRef) -> bool:
|
|
161
|
+
def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
126
162
|
"""Check if a property exists in the graph store
|
|
127
163
|
|
|
128
164
|
Args:
|
|
165
|
+
type_: Type URI to check
|
|
129
166
|
property_uri: Property URI to check
|
|
167
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
130
168
|
|
|
131
169
|
Returns:
|
|
132
170
|
True if property exists, False otherwise
|
|
133
171
|
"""
|
|
134
172
|
query = f"SELECT ?o WHERE {{ ?s a <{type_}> ; <{property_uri}> ?o .}} Limit 1"
|
|
135
|
-
return bool(list(self.graph.query(query)))
|
|
173
|
+
return bool(list(self.graph(named_graph).query(query)))
|
|
136
174
|
|
|
137
|
-
def has_namespace(self, namespace: Namespace) -> bool:
|
|
175
|
+
def has_namespace(self, namespace: Namespace, named_graph: URIRef | None = None) -> bool:
|
|
138
176
|
"""Check if a namespace exists in the graph store
|
|
139
177
|
|
|
140
178
|
Args:
|
|
141
179
|
namespace: Namespace to check
|
|
180
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
142
181
|
|
|
143
182
|
Returns:
|
|
144
183
|
True if namespace exists, False otherwise
|
|
145
184
|
"""
|
|
146
185
|
query = f"ASK WHERE {{ ?s ?p ?o . FILTER(STRSTARTS(STR(?p), STR(<{namespace}>))) }}"
|
|
147
|
-
return bool(self.graph.query(query))
|
|
186
|
+
return bool(self.graph(named_graph).query(query))
|
|
148
187
|
|
|
149
|
-
def
|
|
188
|
+
def has_data(self) -> bool:
|
|
189
|
+
"""Check if the graph store has data"""
|
|
190
|
+
return cast(bool, next(iter(self.dataset.query("ASK WHERE { ?s ?p ?o }"))))
|
|
191
|
+
|
|
192
|
+
def has_type(self, type_: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
150
193
|
"""Check if a type exists in the graph store
|
|
151
194
|
|
|
152
195
|
Args:
|
|
153
196
|
type_: Type to check
|
|
197
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
154
198
|
|
|
155
199
|
Returns:
|
|
156
200
|
True if type exists, False otherwise
|
|
157
201
|
"""
|
|
158
202
|
query = f"ASK WHERE {{ ?s a <{type_}> }}"
|
|
159
|
-
return bool(self.graph.query(query))
|
|
203
|
+
return bool(self.graph(named_graph).query(query))
|
|
160
204
|
|
|
161
205
|
def describe(
|
|
162
206
|
self,
|
|
@@ -164,6 +208,7 @@ class Queries:
|
|
|
164
208
|
instance_type: str | None = None,
|
|
165
209
|
property_renaming_config: dict | None = None,
|
|
166
210
|
property_types: dict[str, EntityTypes] | None = None,
|
|
211
|
+
named_graph: URIRef | None = None,
|
|
167
212
|
) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
|
|
168
213
|
"""DESCRIBE instance for a given class from the graph store
|
|
169
214
|
|
|
@@ -172,6 +217,7 @@ class Queries:
|
|
|
172
217
|
instance_type: Type of the instance, default None (will be inferred from triples)
|
|
173
218
|
property_renaming_config: Dictionary to rename properties, default None (no renaming)
|
|
174
219
|
property_types: Dictionary of property types, default None (helper for removal of namespace)
|
|
220
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
175
221
|
|
|
176
222
|
|
|
177
223
|
Returns:
|
|
@@ -179,7 +225,7 @@ class Queries:
|
|
|
179
225
|
"""
|
|
180
226
|
property_values: dict[str, list[str]] = defaultdict(list)
|
|
181
227
|
identifier = remove_namespace_from_uri(instance_id, validation="prefix")
|
|
182
|
-
for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
|
|
228
|
+
for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
|
|
183
229
|
if object_.lower() in [
|
|
184
230
|
"",
|
|
185
231
|
"none",
|
|
@@ -190,45 +236,28 @@ class Queries:
|
|
|
190
236
|
|
|
191
237
|
# set property
|
|
192
238
|
if property_renaming_config and predicate != RDF.type:
|
|
193
|
-
property_ =
|
|
194
|
-
|
|
195
|
-
|
|
239
|
+
property_ = remove_namespace_from_uri(predicate, validation="prefix")
|
|
240
|
+
renamed_property_ = property_renaming_config.get(predicate, property_)
|
|
241
|
+
|
|
196
242
|
elif not property_renaming_config and predicate != RDF.type:
|
|
197
243
|
property_ = remove_namespace_from_uri(predicate, validation="prefix")
|
|
244
|
+
renamed_property_ = property_
|
|
245
|
+
|
|
198
246
|
else:
|
|
199
247
|
property_ = RDF.type
|
|
248
|
+
renamed_property_ = property_
|
|
200
249
|
|
|
201
|
-
|
|
202
|
-
# if it is URIRef and property type is object property, we need to remove namespace
|
|
203
|
-
# if it URIref but we are doing this into data type property, we do not remove namespace
|
|
204
|
-
# case 1 for RDF type we remove namespace
|
|
205
|
-
if property_ == RDF.type:
|
|
250
|
+
if isinstance(object_, URIRef):
|
|
206
251
|
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
elif (
|
|
210
|
-
isinstance(object_, URIRef)
|
|
211
|
-
and property_types
|
|
212
|
-
and (
|
|
213
|
-
property_types.get(property_, None) == EntityTypes.object_property
|
|
214
|
-
or property_types.get(property_, None) == EntityTypes.undefined
|
|
215
|
-
)
|
|
216
|
-
):
|
|
217
|
-
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
218
|
-
|
|
219
|
-
# case 3 when property type is not defined and returned value is URIRef we remove namespace
|
|
220
|
-
elif isinstance(object_, URIRef) and not property_types:
|
|
221
|
-
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
222
|
-
|
|
223
|
-
# case 4 for data type properties we do not remove namespace but keep the entire value
|
|
224
|
-
# but we drop the datatype part, and keep everything to be string (data loader will do the conversion)
|
|
225
|
-
# for value type it expects (if possible)
|
|
252
|
+
elif isinstance(object_, RdfLiteral):
|
|
253
|
+
value = object_.toPython()
|
|
226
254
|
else:
|
|
255
|
+
# It is a blank node
|
|
227
256
|
value = str(object_)
|
|
228
257
|
|
|
229
258
|
# add type to the dictionary
|
|
230
259
|
if predicate != RDF.type:
|
|
231
|
-
property_values[
|
|
260
|
+
property_values[renamed_property_].append(value)
|
|
232
261
|
else:
|
|
233
262
|
# guarding against multiple rdf:type values as this is not allowed in CDF
|
|
234
263
|
if RDF.type not in property_values:
|
|
@@ -249,6 +278,7 @@ class Queries:
|
|
|
249
278
|
class_: str,
|
|
250
279
|
properties_optional: bool = True,
|
|
251
280
|
instance_id: URIRef | None = None,
|
|
281
|
+
named_graph: URIRef | None = None,
|
|
252
282
|
) -> list[tuple[str, str, str]]:
|
|
253
283
|
"""CONSTRUCT instances for a given class from the graph store
|
|
254
284
|
|
|
@@ -256,21 +286,29 @@ class Queries:
|
|
|
256
286
|
class_: Class entity for which we want to generate query
|
|
257
287
|
properties_optional: Whether to make all properties optional, default True
|
|
258
288
|
instance_ids: List of instance ids to filter on, default None (all)
|
|
289
|
+
named_graph: Named graph to query over, default None (default graph
|
|
259
290
|
|
|
260
291
|
Returns:
|
|
261
292
|
List of triples for instances of the given class
|
|
262
293
|
"""
|
|
263
|
-
|
|
264
|
-
if
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
294
|
+
named_graph = named_graph or self.default_named_graph
|
|
295
|
+
if (
|
|
296
|
+
self.rules
|
|
297
|
+
and self.rules.get(named_graph)
|
|
298
|
+
and (
|
|
299
|
+
query := build_construct_query(
|
|
300
|
+
class_=ClassEntity(
|
|
301
|
+
prefix=self.rules[named_graph].metadata.prefix,
|
|
302
|
+
suffix=class_,
|
|
303
|
+
),
|
|
304
|
+
graph=self.graph(named_graph),
|
|
305
|
+
rules=self.rules[named_graph],
|
|
306
|
+
properties_optional=properties_optional,
|
|
307
|
+
instance_id=instance_id,
|
|
308
|
+
)
|
|
271
309
|
)
|
|
272
310
|
):
|
|
273
|
-
result = self.graph.query(query)
|
|
311
|
+
result = self.graph(named_graph).query(query)
|
|
274
312
|
|
|
275
313
|
# We cannot include the RDF.type in case there is a neat:type property
|
|
276
314
|
return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index, arg-type]
|
|
@@ -281,25 +319,36 @@ class Queries:
|
|
|
281
319
|
)
|
|
282
320
|
return []
|
|
283
321
|
|
|
284
|
-
def list_triples(self, limit: int = 25) -> list[ResultRow]:
|
|
322
|
+
def list_triples(self, limit: int = 25, named_graph: URIRef | None = None) -> list[ResultRow]:
|
|
285
323
|
"""List triples in the graph store
|
|
286
324
|
|
|
287
325
|
Args:
|
|
288
326
|
limit: Max number of triples to return, by default 25
|
|
327
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
289
328
|
|
|
290
329
|
Returns:
|
|
291
330
|
List of triples
|
|
292
331
|
"""
|
|
293
332
|
query = f"SELECT ?subject ?predicate ?object WHERE {{ ?subject ?predicate ?object }} LIMIT {limit}"
|
|
294
|
-
return cast(list[ResultRow], list(self.graph.query(query)))
|
|
333
|
+
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
295
334
|
|
|
296
335
|
@overload
|
|
297
336
|
def list_types(self, remove_namespace: Literal[False] = False, limit: int = 25) -> list[ResultRow]: ...
|
|
298
337
|
|
|
299
338
|
@overload
|
|
300
|
-
def list_types(
|
|
339
|
+
def list_types(
|
|
340
|
+
self,
|
|
341
|
+
remove_namespace: Literal[True],
|
|
342
|
+
limit: int = 25,
|
|
343
|
+
named_graph: URIRef | None = None,
|
|
344
|
+
) -> list[str]: ...
|
|
301
345
|
|
|
302
|
-
def list_types(
|
|
346
|
+
def list_types(
|
|
347
|
+
self,
|
|
348
|
+
remove_namespace: bool = False,
|
|
349
|
+
limit: int = 25,
|
|
350
|
+
named_graph: URIRef | None = None,
|
|
351
|
+
) -> list[ResultRow] | list[str]:
|
|
303
352
|
"""List types in the graph store
|
|
304
353
|
|
|
305
354
|
Args:
|
|
@@ -310,13 +359,14 @@ class Queries:
|
|
|
310
359
|
List of types
|
|
311
360
|
"""
|
|
312
361
|
query = f"SELECT DISTINCT ?type WHERE {{ ?subject a ?type }} LIMIT {limit}"
|
|
313
|
-
result = cast(list[ResultRow], list(self.graph.query(query)))
|
|
362
|
+
result = cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
314
363
|
if remove_namespace:
|
|
315
364
|
return [remove_namespace_from_uri(res[0]) for res in result]
|
|
316
365
|
return result
|
|
317
366
|
|
|
318
367
|
def multi_value_type_property(
|
|
319
368
|
self,
|
|
369
|
+
named_graph: URIRef | None = None,
|
|
320
370
|
) -> Iterable[tuple[URIRef, URIRef, list[URIRef]]]:
|
|
321
371
|
query = """SELECT ?sourceType ?property
|
|
322
372
|
(GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
|
|
@@ -341,15 +391,20 @@ class Queries:
|
|
|
341
391
|
value_types,
|
|
342
392
|
) in cast(
|
|
343
393
|
ResultRow,
|
|
344
|
-
self.graph.query(query.format(unknownType=str(
|
|
394
|
+
self.graph(named_graph).query(query.format(unknownType=str(NEAT.UnknownType))),
|
|
345
395
|
):
|
|
346
396
|
yield cast(URIRef, source_type), cast(URIRef, property_), [URIRef(uri) for uri in value_types.split(",")]
|
|
347
397
|
|
|
348
|
-
def drop_types(
|
|
398
|
+
def drop_types(
|
|
399
|
+
self,
|
|
400
|
+
type_: list[URIRef],
|
|
401
|
+
named_graph: URIRef | None = None,
|
|
402
|
+
) -> dict[URIRef, int]:
|
|
349
403
|
"""Drop types from the graph store
|
|
350
404
|
|
|
351
405
|
Args:
|
|
352
406
|
type_: List of types to drop
|
|
407
|
+
named_graph: Named graph to query over, default None (default graph
|
|
353
408
|
|
|
354
409
|
Returns:
|
|
355
410
|
Dictionary of dropped types
|
|
@@ -358,11 +413,16 @@ class Queries:
|
|
|
358
413
|
for t in type_:
|
|
359
414
|
instance_ids = self.list_instances_ids_of_class(t)
|
|
360
415
|
dropped_types[t] = len(instance_ids)
|
|
361
|
-
remove_instance_ids_in_batch(self.graph, instance_ids)
|
|
416
|
+
remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
|
|
362
417
|
return dropped_types
|
|
363
418
|
|
|
364
|
-
def multi_type_instances(self) -> dict[str, list[str]]:
|
|
365
|
-
"""Find instances with multiple types
|
|
419
|
+
def multi_type_instances(self, named_graph: URIRef | None = None) -> dict[str, list[str]]:
|
|
420
|
+
"""Find instances with multiple types
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
424
|
+
|
|
425
|
+
"""
|
|
366
426
|
|
|
367
427
|
query = """
|
|
368
428
|
SELECT ?instance (GROUP_CONCAT(str(?type); SEPARATOR=",") AS ?types)
|
|
@@ -374,7 +434,7 @@ class Queries:
|
|
|
374
434
|
"""
|
|
375
435
|
|
|
376
436
|
result = {}
|
|
377
|
-
for instance, types in self.graph.query(query): # type: ignore
|
|
437
|
+
for instance, types in self.graph(named_graph).query(query): # type: ignore
|
|
378
438
|
result[remove_namespace_from_uri(instance)] = remove_namespace_from_uri(types.split(","))
|
|
379
439
|
|
|
380
440
|
return result
|
|
@@ -115,7 +115,7 @@ def to_construct_triples(
|
|
|
115
115
|
graph_template_triple = Triple(
|
|
116
116
|
subject="?instance",
|
|
117
117
|
predicate=f"{transformation.class_.prefix}:{transformation.property_}",
|
|
118
|
-
object=f
|
|
118
|
+
object=f"?{re.sub(r'[^_a-zA-Z0-9/_]', '_', str(transformation.property_).lower())}",
|
|
119
119
|
optional=False,
|
|
120
120
|
)
|
|
121
121
|
templates.append(graph_template_triple)
|
|
@@ -17,7 +17,7 @@ from ._prune_graph import (
|
|
|
17
17
|
PruneTypes,
|
|
18
18
|
)
|
|
19
19
|
from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
|
|
20
|
-
from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
|
|
20
|
+
from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SetType, SplitMultiValueProperty
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
23
|
"AddAssetDepth",
|
|
@@ -38,6 +38,7 @@ __all__ = [
|
|
|
38
38
|
"PruneInstancesOfUnknownType",
|
|
39
39
|
"PruneTypes",
|
|
40
40
|
"RelationshipAsEdgeTransformer",
|
|
41
|
+
"SetType",
|
|
41
42
|
"SplitMultiValueProperty",
|
|
42
43
|
]
|
|
43
44
|
|
|
@@ -62,4 +63,5 @@ Transformers = (
|
|
|
62
63
|
| ConnectionToLiteral
|
|
63
64
|
| BaseTransformerStandardised
|
|
64
65
|
| LookupRelationshipSourceTarget
|
|
66
|
+
| SetType
|
|
65
67
|
)
|
|
@@ -12,14 +12,14 @@ from cognite.neat._shared import Triple
|
|
|
12
12
|
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
13
13
|
from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
|
|
14
14
|
|
|
15
|
-
To_Add_Triples: TypeAlias =
|
|
16
|
-
To_Remove_Triples: TypeAlias =
|
|
15
|
+
To_Add_Triples: TypeAlias = set[Triple]
|
|
16
|
+
To_Remove_Triples: TypeAlias = set[Triple]
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
@dataclasses.dataclass
|
|
20
20
|
class RowTransformationOutput:
|
|
21
|
-
remove_triples: To_Remove_Triples = dataclasses.field(default_factory=
|
|
22
|
-
add_triples: To_Add_Triples = dataclasses.field(default_factory=
|
|
21
|
+
remove_triples: To_Remove_Triples = dataclasses.field(default_factory=set)
|
|
22
|
+
add_triples: To_Add_Triples = dataclasses.field(default_factory=set)
|
|
23
23
|
instances_removed_count: int = 0
|
|
24
24
|
instances_added_count: int = 0
|
|
25
25
|
instances_modified_count: int = 0
|
|
@@ -63,11 +63,11 @@ class AddAssetDepth(BaseTransformerStandardised):
|
|
|
63
63
|
row_output = RowTransformationOutput()
|
|
64
64
|
subject, object = query_result_row
|
|
65
65
|
|
|
66
|
-
row_output.add_triples.
|
|
66
|
+
row_output.add_triples.add(cast(Triple, (subject, DEFAULT_NAMESPACE.depth, object)))
|
|
67
67
|
|
|
68
68
|
if self.depth_typing and (type_ := self.depth_typing.get(int(object), None)):
|
|
69
|
-
row_output.remove_triples.
|
|
70
|
-
row_output.add_triples.
|
|
69
|
+
row_output.remove_triples.add(cast(Triple, (subject, RDF.type, self.asset_type)))
|
|
70
|
+
row_output.add_triples.add(cast(Triple, (subject, RDF.type, DEFAULT_NAMESPACE[type_])))
|
|
71
71
|
|
|
72
72
|
row_output.instances_modified_count += 1
|
|
73
73
|
|
|
@@ -128,7 +128,7 @@ class BaseAssetConnector(BaseTransformerStandardised, ABC):
|
|
|
128
128
|
row_output = RowTransformationOutput()
|
|
129
129
|
subject, object = query_result_row
|
|
130
130
|
|
|
131
|
-
row_output.add_triples.
|
|
131
|
+
row_output.add_triples.add(cast(Triple, (subject, self.asset_to_resource_connection, object)))
|
|
132
132
|
|
|
133
133
|
row_output.instances_modified_count += 1
|
|
134
134
|
|
|
@@ -305,12 +305,12 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
|
305
305
|
row_output = RowTransformationOutput()
|
|
306
306
|
source, relationship, target = query_result_row
|
|
307
307
|
|
|
308
|
-
row_output.add_triples.
|
|
309
|
-
row_output.add_triples.
|
|
310
|
-
row_output.add_triples.
|
|
308
|
+
row_output.add_triples.add(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
|
|
309
|
+
row_output.add_triples.add(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
|
|
310
|
+
row_output.add_triples.add(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
|
|
311
311
|
|
|
312
|
-
row_output.remove_triples.
|
|
313
|
-
row_output.remove_triples.
|
|
312
|
+
row_output.remove_triples.add(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
|
|
313
|
+
row_output.remove_triples.add(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
|
|
314
314
|
|
|
315
315
|
row_output.instances_modified_count += 2
|
|
316
316
|
|
|
@@ -578,10 +578,10 @@ WHERE {{
|
|
|
578
578
|
warnings.warn(ResourceNotFoundWarning(target, "class", str(instance), "class"), stacklevel=2)
|
|
579
579
|
return output
|
|
580
580
|
|
|
581
|
-
output.remove_triples.
|
|
582
|
-
output.remove_triples.
|
|
583
|
-
output.add_triples.
|
|
584
|
-
output.add_triples.
|
|
581
|
+
output.remove_triples.add((instance, self._namespace.sourceExternalId, source))
|
|
582
|
+
output.remove_triples.add((instance, self._namespace.targetExternalId, target))
|
|
583
|
+
output.add_triples.add((instance, self._namespace.sourceExternalId, source_id))
|
|
584
|
+
output.add_triples.add((instance, self._namespace.targetExternalId, target_id))
|
|
585
585
|
output.instances_modified_count += 1
|
|
586
586
|
return output
|
|
587
587
|
|
|
@@ -237,7 +237,7 @@ class PruneTypes(BaseTransformerStandardised):
|
|
|
237
237
|
row_output = RowTransformationOutput()
|
|
238
238
|
|
|
239
239
|
(subject,) = query_result_row
|
|
240
|
-
row_output.remove_triples.
|
|
240
|
+
row_output.remove_triples.add((subject, None, None)) # type: ignore
|
|
241
241
|
row_output.instances_removed_count = 1
|
|
242
242
|
|
|
243
243
|
return row_output
|
|
@@ -272,7 +272,7 @@ class PruneDeadEndEdges(BaseTransformerStandardised):
|
|
|
272
272
|
|
|
273
273
|
def operation(self, row: ResultRow) -> RowTransformationOutput:
|
|
274
274
|
row_output = RowTransformationOutput()
|
|
275
|
-
row_output.remove_triples.
|
|
275
|
+
row_output.remove_triples.add(cast(Triple, row))
|
|
276
276
|
row_output.instances_modified_count = 1
|
|
277
277
|
|
|
278
278
|
return row_output
|
|
@@ -307,7 +307,7 @@ class PruneInstancesOfUnknownType(BaseTransformerStandardised):
|
|
|
307
307
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
308
308
|
row_output = RowTransformationOutput()
|
|
309
309
|
(subject,) = query_result_row
|
|
310
|
-
row_output.remove_triples.
|
|
310
|
+
row_output.remove_triples.add(cast(Triple, (subject, None, None)))
|
|
311
311
|
row_output.instances_removed_count = 1
|
|
312
312
|
|
|
313
313
|
return row_output
|
|
@@ -7,7 +7,6 @@ from rdflib.query import ResultRow
|
|
|
7
7
|
from cognite.neat._rules.analysis import InformationAnalysis
|
|
8
8
|
from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
|
|
9
9
|
from cognite.neat._rules.models.information import InformationRules
|
|
10
|
-
from cognite.neat._shared import Triple
|
|
11
10
|
from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
|
|
12
11
|
|
|
13
12
|
from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
|
|
@@ -76,7 +75,7 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
|
|
|
76
75
|
self.object_type = object_type
|
|
77
76
|
self.object_predicate = object_predicate
|
|
78
77
|
subject_namespace = Namespace(get_namespace(subject_type))
|
|
79
|
-
self.connection = (
|
|
78
|
+
self.connection: URIRef = (
|
|
80
79
|
subject_namespace[quote(connection.strip())]
|
|
81
80
|
if isinstance(connection, str)
|
|
82
81
|
else connection or subject_namespace[remove_namespace_from_uri(self.object_type).lower()]
|
|
@@ -125,8 +124,8 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
|
|
|
125
124
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
126
125
|
row_output = RowTransformationOutput()
|
|
127
126
|
|
|
128
|
-
subject, object = query_result_row
|
|
127
|
+
subject, object = cast(tuple[URIRef, URIRef], query_result_row)
|
|
129
128
|
|
|
130
|
-
row_output.add_triples.
|
|
129
|
+
row_output.add_triples.add((subject, self.connection, object))
|
|
131
130
|
row_output.instances_modified_count += 1
|
|
132
131
|
return row_output
|