cognite-neat 0.110.0__py3-none-any.whl → 0.111.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +6 -0
- cognite/neat/_client/_api/schema.py +26 -0
- cognite/neat/_client/data_classes/schema.py +1 -1
- cognite/neat/_constants.py +4 -1
- cognite/neat/_graph/extractors/__init__.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
- cognite/neat/_graph/extractors/_dict.py +102 -0
- cognite/neat/_graph/extractors/_dms.py +27 -40
- cognite/neat/_graph/extractors/_dms_graph.py +30 -3
- cognite/neat/_graph/extractors/_raw.py +67 -0
- cognite/neat/_graph/loaders/_base.py +20 -4
- cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
- cognite/neat/_graph/queries/_base.py +137 -43
- cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
- cognite/neat/_issues/_factory.py +9 -1
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_external.py +7 -0
- cognite/neat/_issues/warnings/user_modeling.py +12 -0
- cognite/neat/_rules/_constants.py +3 -0
- cognite/neat/_rules/analysis/_base.py +29 -50
- cognite/neat/_rules/exporters/_rules2excel.py +1 -1
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
- cognite/neat/_rules/models/_base_rules.py +0 -2
- cognite/neat/_rules/models/data_types.py +7 -0
- cognite/neat/_rules/models/dms/_exporter.py +9 -8
- cognite/neat/_rules/models/dms/_rules.py +26 -1
- cognite/neat/_rules/models/dms/_rules_input.py +5 -1
- cognite/neat/_rules/models/dms/_validation.py +101 -1
- cognite/neat/_rules/models/entities/_single_value.py +8 -3
- cognite/neat/_rules/models/entities/_wrapped.py +2 -2
- cognite/neat/_rules/models/information/_rules_input.py +1 -0
- cognite/neat/_rules/models/information/_validation.py +64 -17
- cognite/neat/_rules/transformers/_converters.py +7 -2
- cognite/neat/_session/_base.py +2 -0
- cognite/neat/_session/_explore.py +39 -0
- cognite/neat/_session/_inspect.py +25 -6
- cognite/neat/_session/_read.py +67 -3
- cognite/neat/_session/_set.py +7 -1
- cognite/neat/_session/_state.py +6 -0
- cognite/neat/_session/_to.py +115 -8
- cognite/neat/_store/_graph_store.py +8 -4
- cognite/neat/_utils/rdf_.py +34 -3
- cognite/neat/_utils/text.py +72 -4
- cognite/neat/_utils/upload.py +2 -0
- cognite/neat/_version.py +2 -2
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +53 -50
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import urllib.parse
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from collections.abc import Iterable
|
|
3
|
-
from typing import Literal, cast, overload
|
|
4
|
+
from typing import Any, Literal, cast, overload
|
|
4
5
|
|
|
5
|
-
from rdflib import RDF, Dataset, Graph, Namespace, URIRef
|
|
6
|
+
from rdflib import RDF, XSD, Dataset, Graph, Namespace, URIRef
|
|
6
7
|
from rdflib import Literal as RdfLiteral
|
|
7
8
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
8
9
|
from rdflib.query import ResultRow
|
|
@@ -27,9 +28,8 @@ class Queries:
|
|
|
27
28
|
"""Get named graph from the dataset to query over"""
|
|
28
29
|
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
29
30
|
|
|
30
|
-
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
|
|
31
|
+
def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple[str, int]]:
|
|
31
32
|
"""Summarize instances in the graph store by class and count"""
|
|
32
|
-
|
|
33
33
|
query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
|
|
34
34
|
WHERE {
|
|
35
35
|
?instance a ?class .
|
|
@@ -37,12 +37,12 @@ class Queries:
|
|
|
37
37
|
GROUP BY ?class
|
|
38
38
|
ORDER BY DESC(?instanceCount) """
|
|
39
39
|
|
|
40
|
-
return [
|
|
40
|
+
return [ # type: ignore[misc]
|
|
41
41
|
(
|
|
42
|
-
remove_namespace_from_uri(cast(URIRef,
|
|
43
|
-
cast(RdfLiteral,
|
|
42
|
+
remove_namespace_from_uri(cast(URIRef, class_)),
|
|
43
|
+
cast(RdfLiteral, count).value,
|
|
44
44
|
)
|
|
45
|
-
for
|
|
45
|
+
for class_, count in self.graph(named_graph=named_graph).query(query_statement)
|
|
46
46
|
]
|
|
47
47
|
|
|
48
48
|
def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
|
|
@@ -73,6 +73,20 @@ class Queries:
|
|
|
73
73
|
for (type_,) in list(self.graph(named_graph).query(query))
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
def properties_by_type(self, named_graph: URIRef | None = None) -> dict[URIRef, dict[URIRef, str]]:
|
|
77
|
+
"""Properties and their short form in the graph by type
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
named_graph: Named graph to query over, default None (default graph)
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
query = """SELECT DISTINCT ?type ?property
|
|
84
|
+
WHERE {?s a ?type . ?s ?property ?o . FILTER(?property != rdf:type)}"""
|
|
85
|
+
properties_by_type: dict[URIRef, dict[URIRef, str]] = defaultdict(dict)
|
|
86
|
+
for type_, property_ in cast(ResultRow, list(self.graph(named_graph).query(query))):
|
|
87
|
+
properties_by_type[type_][property_] = remove_namespace_from_uri(property_) # type: ignore[index]
|
|
88
|
+
return properties_by_type
|
|
89
|
+
|
|
76
90
|
def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
|
|
77
91
|
"""Get the URIRef of a property
|
|
78
92
|
|
|
@@ -82,41 +96,38 @@ class Queries:
|
|
|
82
96
|
"""
|
|
83
97
|
return [k for k, v in self.properties(named_graph).items() if v == property_]
|
|
84
98
|
|
|
85
|
-
|
|
99
|
+
@overload
|
|
100
|
+
def list_instances_ids(
|
|
101
|
+
self, class_uri: None = None, limit: int = -1, named_graph: URIRef | None = None
|
|
102
|
+
) -> Iterable[tuple[URIRef, URIRef]]: ...
|
|
103
|
+
|
|
104
|
+
@overload
|
|
105
|
+
def list_instances_ids(
|
|
86
106
|
self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
|
|
87
|
-
) ->
|
|
88
|
-
|
|
107
|
+
) -> Iterable[URIRef]: ...
|
|
108
|
+
|
|
109
|
+
def list_instances_ids(
|
|
110
|
+
self, class_uri: URIRef | None = None, limit: int = -1, named_graph: URIRef | None = None
|
|
111
|
+
) -> Iterable[URIRef] | Iterable[tuple[URIRef, URIRef]]:
|
|
112
|
+
"""List all instance IDs
|
|
89
113
|
|
|
90
114
|
Args:
|
|
91
|
-
class_uri: Class for which instances are to be found
|
|
115
|
+
class_uri: Class for which instances are to be found, default None (all instances)
|
|
92
116
|
limit: Max number of instances to return, by default -1 meaning all instances
|
|
93
117
|
named_graph: Named graph to query over, default None (default graph)
|
|
94
118
|
|
|
95
119
|
Returns:
|
|
96
120
|
List of class instance URIs
|
|
97
121
|
"""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
class_uri: Class for which instances are to be found
|
|
108
|
-
named_graph: Named graph to query over, default None (default graph)
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
List of triples for instances of the given class in the named graph
|
|
112
|
-
"""
|
|
113
|
-
query = (
|
|
114
|
-
f"SELECT ?instance ?prop ?value "
|
|
115
|
-
f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
# Select queries gives an iterable of result rows
|
|
119
|
-
return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
|
|
122
|
+
query = "SELECT DISTINCT ?subject"
|
|
123
|
+
if class_uri:
|
|
124
|
+
query += f" WHERE {{ ?subject a <{class_uri}> .}}"
|
|
125
|
+
else:
|
|
126
|
+
query += " ?type WHERE {{ ?subject a ?type .}}"
|
|
127
|
+
if limit != -1:
|
|
128
|
+
query += f" LIMIT {limit}"
|
|
129
|
+
# MyPy is not very happy with RDFLib, so just ignore the type hinting here
|
|
130
|
+
return (tuple(res) if class_uri is None else res[0] for res in self.graph(named_graph).query(query)) # type: ignore[index, return-value, arg-type]
|
|
120
131
|
|
|
121
132
|
def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
|
|
122
133
|
"""Check if a property exists in the graph store
|
|
@@ -168,7 +179,8 @@ class Queries:
|
|
|
168
179
|
instance_type: URIRef | None = None,
|
|
169
180
|
property_renaming_config: dict | None = None,
|
|
170
181
|
named_graph: URIRef | None = None,
|
|
171
|
-
|
|
182
|
+
remove_uri_namespace: bool = True,
|
|
183
|
+
) -> tuple[URIRef, dict[str | InstanceType, list[Any]]] | None:
|
|
172
184
|
"""DESCRIBE instance for a given class from the graph store
|
|
173
185
|
|
|
174
186
|
Args:
|
|
@@ -176,13 +188,13 @@ class Queries:
|
|
|
176
188
|
instance_type: Type of the instance, default None (will be inferred from triples)
|
|
177
189
|
property_renaming_config: Dictionary to rename properties, default None (no renaming)
|
|
178
190
|
named_graph: Named graph to query over, default None (default graph)
|
|
191
|
+
remove_uri_namespace: Whether to remove the namespace from the URI, by default True
|
|
179
192
|
|
|
180
193
|
|
|
181
194
|
Returns:
|
|
182
195
|
Dictionary of instance properties
|
|
183
196
|
"""
|
|
184
|
-
property_values: dict[str, list[str]] = defaultdict(list)
|
|
185
|
-
identifier = remove_namespace_from_uri(instance_id, validation="prefix")
|
|
197
|
+
property_values: dict[str, list[str] | list[URIRef]] = defaultdict(list)
|
|
186
198
|
for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
|
|
187
199
|
if object_.lower() in [
|
|
188
200
|
"",
|
|
@@ -205,29 +217,37 @@ class Queries:
|
|
|
205
217
|
property_ = RDF.type
|
|
206
218
|
renamed_property_ = property_
|
|
207
219
|
|
|
208
|
-
|
|
220
|
+
value: Any
|
|
221
|
+
if isinstance(object_, URIRef) and remove_uri_namespace:
|
|
222
|
+
# These properties contain the space in the Namespace.
|
|
209
223
|
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
224
|
+
elif isinstance(object_, URIRef):
|
|
225
|
+
value = object_
|
|
210
226
|
elif isinstance(object_, RdfLiteral):
|
|
211
|
-
|
|
227
|
+
if object_.datatype == XSD._NS["json"]:
|
|
228
|
+
# For JSON literals, the .toPython() returns a Literal object.
|
|
229
|
+
value = str(object_)
|
|
230
|
+
else:
|
|
231
|
+
value = object_.toPython()
|
|
212
232
|
else:
|
|
213
233
|
# It is a blank node
|
|
214
234
|
value = str(object_)
|
|
215
235
|
|
|
216
236
|
# add type to the dictionary
|
|
217
237
|
if predicate != RDF.type:
|
|
218
|
-
property_values[renamed_property_].append(value)
|
|
238
|
+
property_values[renamed_property_].append(value) # type: ignore[arg-type]
|
|
219
239
|
else:
|
|
220
240
|
# guarding against multiple rdf:type values as this is not allowed in CDF
|
|
221
241
|
if RDF.type not in property_values:
|
|
222
242
|
property_values[RDF.type].append(
|
|
223
|
-
remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value
|
|
243
|
+
remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value # type: ignore[arg-type]
|
|
224
244
|
)
|
|
225
245
|
else:
|
|
226
246
|
# we should not have multiple rdf:type values
|
|
227
247
|
continue
|
|
228
248
|
if property_values:
|
|
229
249
|
return (
|
|
230
|
-
|
|
250
|
+
instance_id,
|
|
231
251
|
property_values,
|
|
232
252
|
)
|
|
233
253
|
else:
|
|
@@ -327,7 +347,7 @@ class Queries:
|
|
|
327
347
|
"""
|
|
328
348
|
dropped_types: dict[URIRef, int] = {}
|
|
329
349
|
for t in type_:
|
|
330
|
-
instance_ids = self.
|
|
350
|
+
instance_ids = list(self.list_instances_ids(t))
|
|
331
351
|
dropped_types[t] = len(instance_ids)
|
|
332
352
|
remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
|
|
333
353
|
return dropped_types
|
|
@@ -359,6 +379,80 @@ class Queries:
|
|
|
359
379
|
query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
|
|
360
380
|
return int(next(iter(self.graph(named_graph).query(query)))[0]) # type: ignore[arg-type, index]
|
|
361
381
|
|
|
382
|
+
def types_with_instance_and_property_count(
|
|
383
|
+
self, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
384
|
+
) -> list[dict[str, Any]]:
|
|
385
|
+
query = """
|
|
386
|
+
SELECT ?type (COUNT(DISTINCT ?instance) AS ?instanceCount) (COUNT(DISTINCT ?property) AS ?propertyCount)
|
|
387
|
+
WHERE {
|
|
388
|
+
?instance a ?type .
|
|
389
|
+
?instance ?property ?value .
|
|
390
|
+
FILTER(?property != rdf:type)
|
|
391
|
+
}
|
|
392
|
+
GROUP BY ?type
|
|
393
|
+
ORDER BY DESC(?instanceCount)"""
|
|
394
|
+
return [
|
|
395
|
+
{
|
|
396
|
+
"type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
|
|
397
|
+
"instanceCount": cast(RdfLiteral, instance_count).toPython(),
|
|
398
|
+
"propertyCount": cast(RdfLiteral, property_count).toPython(),
|
|
399
|
+
}
|
|
400
|
+
for type_, instance_count, property_count in list(
|
|
401
|
+
cast(list[ResultRow], self.graph(named_graph).query(query))
|
|
402
|
+
)
|
|
403
|
+
]
|
|
404
|
+
|
|
405
|
+
def properties_with_count(
|
|
406
|
+
self, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
407
|
+
) -> list[dict[str, Any]]:
|
|
408
|
+
instance_count_by_type = {
|
|
409
|
+
entry["type"]: entry["instanceCount"]
|
|
410
|
+
for entry in self.types_with_instance_and_property_count(remove_namespace=False, named_graph=named_graph)
|
|
411
|
+
}
|
|
412
|
+
query = """SELECT ?type ?property (COUNT(DISTINCT ?instance) AS ?instanceCount)
|
|
413
|
+
WHERE {
|
|
414
|
+
?instance a ?type .
|
|
415
|
+
?instance ?property ?value .
|
|
416
|
+
FILTER(?property != rdf:type)
|
|
417
|
+
}
|
|
418
|
+
GROUP BY ?type ?property
|
|
419
|
+
ORDER BY ASC(?type) ASC(?property)"""
|
|
420
|
+
return [
|
|
421
|
+
{
|
|
422
|
+
"type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
|
|
423
|
+
"property": urllib.parse.unquote(remove_namespace_from_uri(property)) if remove_namespace else property,
|
|
424
|
+
"instanceCount": cast(RdfLiteral, instance_count).toPython(),
|
|
425
|
+
"total": instance_count_by_type[type_],
|
|
426
|
+
}
|
|
427
|
+
for type_, property, instance_count in list(cast(list[ResultRow], self.graph(named_graph).query(query)))
|
|
428
|
+
]
|
|
429
|
+
|
|
430
|
+
@overload
|
|
431
|
+
def instances_with_properties(
|
|
432
|
+
self, type: URIRef, remove_namespace: Literal[False], named_graph: URIRef | None = None
|
|
433
|
+
) -> dict[URIRef, set[URIRef]]: ...
|
|
434
|
+
|
|
435
|
+
@overload
|
|
436
|
+
def instances_with_properties(
|
|
437
|
+
self, type: URIRef, remove_namespace: Literal[True], named_graph: URIRef | None = None
|
|
438
|
+
) -> dict[str, set[str]]: ...
|
|
439
|
+
|
|
440
|
+
def instances_with_properties(
|
|
441
|
+
self, type: URIRef, remove_namespace: bool = True, named_graph: URIRef | None = None
|
|
442
|
+
) -> dict[str, set[str]] | dict[URIRef, set[URIRef]]:
|
|
443
|
+
query = """SELECT DISTINCT ?instance ?property
|
|
444
|
+
WHERE {{
|
|
445
|
+
?instance a <{type}> .
|
|
446
|
+
?instance ?property ?value .
|
|
447
|
+
FILTER(?property != rdf:type)
|
|
448
|
+
}}"""
|
|
449
|
+
result = defaultdict(set)
|
|
450
|
+
for instance, property_ in cast(Iterable[ResultRow], self.graph(named_graph).query(query.format(type=type))):
|
|
451
|
+
instance_str = urllib.parse.unquote(remove_namespace_from_uri(instance)) if remove_namespace else instance
|
|
452
|
+
property_str = urllib.parse.unquote(remove_namespace_from_uri(property_)) if remove_namespace else property_
|
|
453
|
+
result[instance_str].add(property_str)
|
|
454
|
+
return result
|
|
455
|
+
|
|
362
456
|
def list_instances_ids_by_space(
|
|
363
457
|
self, space_property: URIRef, named_graph: URIRef | None = None
|
|
364
458
|
) -> Iterable[tuple[URIRef, str]]:
|
|
@@ -240,50 +240,36 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
|
240
240
|
str(extractors.RelationshipsExtractor.__name__),
|
|
241
241
|
}
|
|
242
242
|
)
|
|
243
|
-
_asset_template: str = """SELECT ?source ?target WHERE {{
|
|
244
|
-
<{relationship_id}> <{relationship_source_xid_prop}> ?source_xid .
|
|
245
|
-
?source <{asset_xid_property}> ?source_xid .
|
|
246
|
-
?source a <{asset_type}> .
|
|
247
|
-
|
|
248
|
-
<{relationship_id}> <{relationship_target_xid_prop}> ?target_xid .
|
|
249
|
-
?target <{asset_xid_property}> ?target_xid .
|
|
250
|
-
?target a <{asset_type}> .}}"""
|
|
251
243
|
|
|
252
244
|
def _count_query(self) -> str:
|
|
253
|
-
query = """SELECT (COUNT(?
|
|
245
|
+
query = """SELECT (COUNT(?target_xid) as ?count) WHERE {{
|
|
254
246
|
?relationship a <{relationship_type}> .
|
|
255
247
|
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
256
|
-
?
|
|
257
|
-
?source a <{asset_type}> .
|
|
248
|
+
?source_xid a <{asset_type}> .
|
|
258
249
|
|
|
259
250
|
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
260
|
-
?
|
|
261
|
-
?target a <{asset_type}> .}}"""
|
|
251
|
+
?target_xid a <{asset_type}> .}}"""
|
|
262
252
|
|
|
263
253
|
return query.format(
|
|
264
254
|
relationship_type=self.relationship_type,
|
|
265
255
|
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
266
256
|
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
267
|
-
asset_xid_property=self.asset_xid_property,
|
|
268
257
|
asset_type=self.asset_type,
|
|
269
258
|
)
|
|
270
259
|
|
|
271
260
|
def _iterate_query(self) -> str:
|
|
272
|
-
query = """SELECT ?
|
|
261
|
+
query = """SELECT ?source_xid ?relationship ?target_xid WHERE {{
|
|
273
262
|
?relationship a <{relationship_type}> .
|
|
274
263
|
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
275
|
-
?
|
|
276
|
-
?source a <{asset_type}> .
|
|
264
|
+
?source_xid a <{asset_type}> .
|
|
277
265
|
|
|
278
266
|
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
279
|
-
?
|
|
280
|
-
?target a <{asset_type}> .}}"""
|
|
267
|
+
?target_xid a <{asset_type}> .}}"""
|
|
281
268
|
|
|
282
269
|
return query.format(
|
|
283
270
|
relationship_type=self.relationship_type,
|
|
284
271
|
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
285
272
|
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
286
|
-
asset_xid_property=self.asset_xid_property,
|
|
287
273
|
asset_type=self.asset_type,
|
|
288
274
|
)
|
|
289
275
|
|
|
@@ -293,13 +279,11 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
|
293
279
|
relationship_type: URIRef | None = None,
|
|
294
280
|
relationship_source_xid_prop: URIRef | None = None,
|
|
295
281
|
relationship_target_xid_prop: URIRef | None = None,
|
|
296
|
-
asset_xid_property: URIRef | None = None,
|
|
297
282
|
):
|
|
298
283
|
self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
|
|
299
284
|
self.relationship_type = relationship_type or DEFAULT_NAMESPACE.Relationship
|
|
300
285
|
self.relationship_source_xid_prop = relationship_source_xid_prop or DEFAULT_NAMESPACE.sourceExternalId
|
|
301
286
|
self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
|
|
302
|
-
self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
|
|
303
287
|
|
|
304
288
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
305
289
|
row_output = RowTransformationOutput()
|
cognite/neat/_issues/_factory.py
CHANGED
|
@@ -35,7 +35,15 @@ def from_warning(warning: WarningMessage) -> NeatWarning:
|
|
|
35
35
|
def _from_pydantic_error(error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead]) -> NeatError:
|
|
36
36
|
neat_error = _create_neat_value_error(error)
|
|
37
37
|
location = error["loc"]
|
|
38
|
-
|
|
38
|
+
|
|
39
|
+
# only errors caused in model_validate will have location information
|
|
40
|
+
if location:
|
|
41
|
+
return SpreadsheetError.create(location, neat_error, read_info_by_sheet.get(cast(str, location[0])))
|
|
42
|
+
|
|
43
|
+
# errors that occur while for example parsing spreadsheet in input rules
|
|
44
|
+
# will not have location information so we return neat_error as is
|
|
45
|
+
# this is workaround until more elegant solution is found
|
|
46
|
+
return neat_error
|
|
39
47
|
|
|
40
48
|
|
|
41
49
|
def _create_neat_value_error(error: ErrorDetails) -> NeatValueError:
|
|
@@ -3,6 +3,7 @@ from cognite.neat._issues._base import NeatError, _get_subclasses
|
|
|
3
3
|
from ._external import (
|
|
4
4
|
AuthorizationError,
|
|
5
5
|
CDFMissingClientError,
|
|
6
|
+
CDFMissingResourcesError,
|
|
6
7
|
FileMissingRequiredFieldError,
|
|
7
8
|
FileNotAFileError,
|
|
8
9
|
FileNotFoundNeatError,
|
|
@@ -45,6 +46,7 @@ from ._wrapper import (
|
|
|
45
46
|
__all__ = [
|
|
46
47
|
"AuthorizationError",
|
|
47
48
|
"CDFMissingClientError",
|
|
49
|
+
"CDFMissingResourcesError",
|
|
48
50
|
"ClassValueError",
|
|
49
51
|
"ContainerValueError",
|
|
50
52
|
"EnumValueError",
|
|
@@ -80,3 +80,10 @@ class CDFMissingClientError(NeatError, RuntimeError):
|
|
|
80
80
|
"""CDF client is required: {reason}"""
|
|
81
81
|
|
|
82
82
|
reason: str
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(unsafe_hash=True)
|
|
86
|
+
class CDFMissingResourcesError(NeatError, RuntimeError):
|
|
87
|
+
"""Following CDF resources are missing: {resources}"""
|
|
88
|
+
|
|
89
|
+
resources: str
|
|
@@ -20,9 +20,21 @@ __all__ = [
|
|
|
20
20
|
"NodeTypeFilterOnParentViewWarning",
|
|
21
21
|
"NotNeatSupportedFilterWarning",
|
|
22
22
|
"ParentInDifferentSpaceWarning",
|
|
23
|
+
"ViewsAndDataModelNotInSameSpaceWarning",
|
|
23
24
|
]
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
@dataclass(unsafe_hash=True)
|
|
28
|
+
class ViewsAndDataModelNotInSameSpaceWarning(UserModelingWarning):
|
|
29
|
+
"""The data model is in {data_model_space}, while views are in {views_spaces} space(s).
|
|
30
|
+
This is strongly discouraged as it can lead to confusion and unnecessary complexity.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
fix = "Ensure that views and data model are in the same space and have same version"
|
|
34
|
+
data_model_space: str
|
|
35
|
+
views_spaces: str
|
|
36
|
+
|
|
37
|
+
|
|
26
38
|
@dataclass(unsafe_hash=True)
|
|
27
39
|
class DirectRelationMissingSourceWarning(UserModelingWarning):
|
|
28
40
|
"""The view {view_id}.{prop_name} is a direct relation without a source.
|
|
@@ -117,6 +117,9 @@ SPLIT_ON_COMMA_PATTERN = re.compile(r",(?![^(]*\))")
|
|
|
117
117
|
# This pattern ignores equal signs inside brackets
|
|
118
118
|
SPLIT_ON_EQUAL_PATTERN = re.compile(r"=(?![^(]*\))")
|
|
119
119
|
|
|
120
|
+
# Very special Edge Entity parsing
|
|
121
|
+
SPLIT_ON_EDGE_ENTITY_ARGS_PATTERN = re.compile(r"(\btype\b|\bproperties\b|\bdirection\b)\s*=\s*([^,]+)")
|
|
122
|
+
|
|
120
123
|
|
|
121
124
|
class _Patterns:
|
|
122
125
|
@cached_property
|
|
@@ -487,43 +487,26 @@ class RulesAnalysis:
|
|
|
487
487
|
|
|
488
488
|
rules = self.dms
|
|
489
489
|
|
|
490
|
-
# Views with properties or used as ValueType
|
|
491
|
-
# If a view is not used in properties or as ValueType, it is not added to the graph
|
|
492
|
-
# as we typically do not have the properties for it.
|
|
493
|
-
used_views = {prop_.view for prop_ in rules.properties} | {
|
|
494
|
-
prop_.value_type for prop_ in rules.properties if isinstance(prop_.value_type, ViewEntity)
|
|
495
|
-
}
|
|
496
|
-
|
|
497
490
|
# Add nodes and edges from Views sheet
|
|
498
491
|
for view in rules.views:
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
di_graph.add_edge(
|
|
511
|
-
view.view.suffix,
|
|
512
|
-
implement.suffix,
|
|
513
|
-
label="implements",
|
|
514
|
-
dashes=True,
|
|
515
|
-
)
|
|
492
|
+
di_graph.add_node(view.view.suffix, label=view.view.suffix)
|
|
493
|
+
|
|
494
|
+
if format == "implements" and view.implements:
|
|
495
|
+
for implement in view.implements:
|
|
496
|
+
di_graph.add_node(implement.suffix, label=implement.suffix)
|
|
497
|
+
di_graph.add_edge(
|
|
498
|
+
view.view.suffix,
|
|
499
|
+
implement.suffix,
|
|
500
|
+
label="implements",
|
|
501
|
+
dashes=True,
|
|
502
|
+
)
|
|
516
503
|
|
|
517
504
|
if format == "data-model":
|
|
518
505
|
# Add nodes and edges from Properties sheet
|
|
519
506
|
for prop_ in rules.properties:
|
|
520
507
|
if prop_.connection and isinstance(prop_.value_type, ViewEntity):
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
if not di_graph.has_node(prop_.value_type.suffix):
|
|
525
|
-
di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
|
|
526
|
-
|
|
508
|
+
di_graph.add_node(prop_.view.suffix, label=prop_.view.suffix)
|
|
509
|
+
di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
|
|
527
510
|
di_graph.add_edge(
|
|
528
511
|
prop_.view.suffix,
|
|
529
512
|
prop_.value_type.suffix,
|
|
@@ -541,32 +524,28 @@ class RulesAnalysis:
|
|
|
541
524
|
# Add nodes and edges from Views sheet
|
|
542
525
|
for class_ in rules.classes:
|
|
543
526
|
# if possible use human readable label coming from the view name
|
|
544
|
-
if not di_graph.has_node(class_.class_.suffix):
|
|
545
|
-
di_graph.add_node(
|
|
546
|
-
class_.class_.suffix,
|
|
547
|
-
label=class_.name or class_.class_.suffix,
|
|
548
|
-
)
|
|
549
527
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
528
|
+
di_graph.add_node(
|
|
529
|
+
class_.class_.suffix,
|
|
530
|
+
label=class_.name or class_.class_.suffix,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
if format == "implements" and class_.implements:
|
|
534
|
+
for parent in class_.implements:
|
|
535
|
+
di_graph.add_node(parent.suffix, label=parent.suffix)
|
|
536
|
+
di_graph.add_edge(
|
|
537
|
+
class_.class_.suffix,
|
|
538
|
+
parent.suffix,
|
|
539
|
+
label="implements",
|
|
540
|
+
dashes=True,
|
|
541
|
+
)
|
|
560
542
|
|
|
561
543
|
if format == "data-model":
|
|
562
544
|
# Add nodes and edges from Properties sheet
|
|
563
545
|
for prop_ in rules.properties:
|
|
564
546
|
if isinstance(prop_.value_type, ClassEntity) and not isinstance(prop_.value_type, UnknownEntity):
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
if not di_graph.has_node(prop_.value_type.suffix):
|
|
569
|
-
di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
|
|
547
|
+
di_graph.add_node(prop_.class_.suffix, label=prop_.class_.suffix)
|
|
548
|
+
di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
|
|
570
549
|
|
|
571
550
|
di_graph.add_edge(
|
|
572
551
|
prop_.class_.suffix,
|
|
@@ -55,7 +55,7 @@ class ExcelExporter(BaseExporter[VerifiedRules, Workbook]):
|
|
|
55
55
|
Style = Literal["none", "minimal", "default", "maximal"]
|
|
56
56
|
DumpOptions = Literal["user", "last", "reference"]
|
|
57
57
|
_main_header_by_sheet_name: ClassVar[dict[str, str]] = {
|
|
58
|
-
"Properties": "Definition of Properties
|
|
58
|
+
"Properties": "Definition of Properties",
|
|
59
59
|
"Classes": "Definition of Classes",
|
|
60
60
|
"Views": "Definition of Views",
|
|
61
61
|
"Containers": "Definition of Containers",
|
|
@@ -10,6 +10,7 @@ from cognite.client import data_modeling as dm
|
|
|
10
10
|
from rdflib import RDF, RDFS, Graph, Namespace, URIRef
|
|
11
11
|
from rdflib import Literal as RdfLiteral
|
|
12
12
|
|
|
13
|
+
from cognite.neat._config import GLOBAL_CONFIG
|
|
13
14
|
from cognite.neat._constants import NEAT, get_default_prefixes_and_namespaces
|
|
14
15
|
from cognite.neat._issues import IssueList
|
|
15
16
|
from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
|
|
@@ -27,6 +28,7 @@ from cognite.neat._store import NeatGraphStore
|
|
|
27
28
|
from cognite.neat._store._provenance import INSTANCES_ENTITY
|
|
28
29
|
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
29
30
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
|
|
31
|
+
from cognite.neat._utils.text import NamingStandardization
|
|
30
32
|
|
|
31
33
|
from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
|
|
32
34
|
|
|
@@ -403,7 +405,7 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
403
405
|
else:
|
|
404
406
|
existing_classes = {}
|
|
405
407
|
classes: list[InformationInputClass] = []
|
|
406
|
-
|
|
408
|
+
properties_by_class_suffix_by_property_id: dict[str, dict[str, InformationInputProperty]] = {}
|
|
407
409
|
|
|
408
410
|
# Help for IDE
|
|
409
411
|
type_uri: URIRef
|
|
@@ -455,7 +457,8 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
455
457
|
continue
|
|
456
458
|
property_id = remove_namespace_from_uri(property_uri)
|
|
457
459
|
self._add_uri_namespace_to_prefixes(property_uri, prefixes)
|
|
458
|
-
|
|
460
|
+
property_id_standardized = NamingStandardization.standardize_property_str(property_uri)
|
|
461
|
+
if existing_prop := properties_by_id.get(property_id_standardized):
|
|
459
462
|
if not isinstance(existing_prop.instance_source, list):
|
|
460
463
|
existing_prop.instance_source = (
|
|
461
464
|
[existing_prop.instance_source] if existing_prop.instance_source else []
|
|
@@ -463,29 +466,28 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
463
466
|
existing_prop.instance_source.append(property_uri)
|
|
464
467
|
continue
|
|
465
468
|
else:
|
|
466
|
-
properties_by_id[
|
|
469
|
+
properties_by_id[property_id_standardized] = self._create_property(
|
|
467
470
|
read_properties, class_suffix, property_uri, property_id, prefixes
|
|
468
471
|
)
|
|
469
|
-
|
|
472
|
+
properties_by_class_suffix_by_property_id[class_suffix] = properties_by_id
|
|
470
473
|
if parent_suffix:
|
|
471
474
|
properties_by_id = {}
|
|
472
475
|
for property_uri, read_properties in shared_properties.items():
|
|
473
476
|
property_id = remove_namespace_from_uri(property_uri)
|
|
474
477
|
self._add_uri_namespace_to_prefixes(property_uri, prefixes)
|
|
475
|
-
|
|
478
|
+
property_id_standardized = NamingStandardization.standardize_property_str(property_uri)
|
|
479
|
+
if existing_prop := properties_by_id.get(property_id_standardized):
|
|
476
480
|
if not isinstance(existing_prop.instance_source, list):
|
|
477
481
|
existing_prop.instance_source = (
|
|
478
482
|
[existing_prop.instance_source] if existing_prop.instance_source else []
|
|
479
483
|
)
|
|
480
484
|
existing_prop.instance_source.append(property_uri)
|
|
481
485
|
else:
|
|
482
|
-
properties_by_id[
|
|
486
|
+
properties_by_id[property_id_standardized] = self._create_property(
|
|
483
487
|
read_properties, parent_suffix, property_uri, property_id, prefixes
|
|
484
488
|
)
|
|
485
489
|
return classes, [
|
|
486
|
-
prop
|
|
487
|
-
for properties in properties_by_class_suffix_by_property_id_lowered.values()
|
|
488
|
-
for prop in properties.values()
|
|
490
|
+
prop for properties in properties_by_class_suffix_by_property_id.values() for prop in properties.values()
|
|
489
491
|
]
|
|
490
492
|
|
|
491
493
|
@staticmethod
|
|
@@ -522,7 +524,11 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
522
524
|
existing_classes = {}
|
|
523
525
|
properties_by_class_by_subclass: list[_ReadProperties] = []
|
|
524
526
|
existing_class: InformationClass | None
|
|
525
|
-
|
|
527
|
+
total_instance_count = sum(count_by_type.values())
|
|
528
|
+
iterable = count_by_type.items()
|
|
529
|
+
if GLOBAL_CONFIG.use_iterate_bar_threshold and total_instance_count > GLOBAL_CONFIG.use_iterate_bar_threshold:
|
|
530
|
+
iterable = iterate_progress_bar(iterable, len(count_by_type), "Inferring types...") # type: ignore[assignment]
|
|
531
|
+
for type_uri, instance_count in iterable:
|
|
526
532
|
property_query = self._properties_query.format(type=type_uri, unknown_type=NEAT.UnknownType)
|
|
527
533
|
class_suffix = remove_namespace_from_uri(type_uri)
|
|
528
534
|
if (existing_class := existing_classes.get(class_suffix)) and existing_class.instance_source is None:
|
|
@@ -126,7 +126,6 @@ class SchemaModel(BaseModel):
|
|
|
126
126
|
extra="ignore",
|
|
127
127
|
use_enum_values=True,
|
|
128
128
|
)
|
|
129
|
-
validators_to_skip: set[str] = Field(default_factory=set, exclude=True)
|
|
130
129
|
|
|
131
130
|
@classmethod
|
|
132
131
|
def mandatory_fields(cls, use_alias=False) -> set[str]:
|
|
@@ -256,7 +255,6 @@ class BaseRules(SchemaModel, ABC):
|
|
|
256
255
|
|
|
257
256
|
Args:
|
|
258
257
|
metadata: Data model metadata
|
|
259
|
-
validators_to_skip: List of validators to skip. Defaults to []
|
|
260
258
|
"""
|
|
261
259
|
|
|
262
260
|
metadata: BaseMetadata
|