cognite-neat 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_config.py +6 -260
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +42 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +78 -8
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +10 -3
- cognite/neat/_graph/extractors/_dms.py +48 -14
- cognite/neat/_graph/extractors/_dms_graph.py +149 -0
- cognite/neat/_graph/extractors/_rdf_file.py +32 -5
- cognite/neat/_graph/loaders/_rdf2dms.py +119 -20
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +5 -0
- cognite/neat/_graph/transformers/_base.py +13 -9
- cognite/neat/_graph/transformers/_classic_cdf.py +141 -44
- cognite/neat/_graph/transformers/_rdfpath.py +4 -4
- cognite/neat/_graph/transformers/_value_type.py +54 -44
- cognite/neat/_issues/warnings/_external.py +1 -1
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +5 -3
- cognite/neat/_rules/models/_base_rules.py +0 -12
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +50 -2
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +1 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
- cognite/neat/_rules/transformers/__init__.py +4 -0
- cognite/neat/_rules/transformers/_converters.py +209 -62
- cognite/neat/_rules/transformers/_mapping.py +3 -2
- cognite/neat/_session/_base.py +8 -13
- cognite/neat/_session/_inspect.py +6 -2
- cognite/neat/_session/_mapping.py +22 -13
- cognite/neat/_session/_prepare.py +9 -57
- cognite/neat/_session/_read.py +96 -29
- cognite/neat/_session/_set.py +9 -0
- cognite/neat/_session/_state.py +10 -1
- cognite/neat/_session/_to.py +51 -15
- cognite/neat/_session/exceptions.py +7 -3
- cognite/neat/_store/_graph_store.py +85 -39
- cognite/neat/_store/_rules_store.py +22 -0
- cognite/neat/_utils/auth.py +2 -0
- cognite/neat/_utils/collection_.py +32 -11
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -8
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +54 -52
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from ._base import BaseTransformerStandardised
|
|
1
2
|
from ._classic_cdf import (
|
|
2
3
|
AddAssetDepth,
|
|
3
4
|
AssetEventConnector,
|
|
@@ -5,6 +6,7 @@ from ._classic_cdf import (
|
|
|
5
6
|
AssetRelationshipConnector,
|
|
6
7
|
AssetSequenceConnector,
|
|
7
8
|
AssetTimeSeriesConnector,
|
|
9
|
+
LookupRelationshipSourceTarget,
|
|
8
10
|
RelationshipAsEdgeTransformer,
|
|
9
11
|
)
|
|
10
12
|
from ._prune_graph import (
|
|
@@ -29,6 +31,7 @@ __all__ = [
|
|
|
29
31
|
"ConnectionToLiteral",
|
|
30
32
|
"ConvertLiteral",
|
|
31
33
|
"LiteralToEntity",
|
|
34
|
+
"LookupRelationshipSourceTarget",
|
|
32
35
|
"MakeConnectionOnExactMatch",
|
|
33
36
|
"PruneDanglingNodes",
|
|
34
37
|
"PruneDeadEndEdges",
|
|
@@ -57,4 +60,6 @@ Transformers = (
|
|
|
57
60
|
| ConvertLiteral
|
|
58
61
|
| LiteralToEntity
|
|
59
62
|
| ConnectionToLiteral
|
|
63
|
+
| BaseTransformerStandardised
|
|
64
|
+
| LookupRelationshipSourceTarget
|
|
60
65
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import warnings
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Iterator
|
|
4
5
|
from typing import ClassVar, TypeAlias, cast
|
|
5
6
|
|
|
6
7
|
from rdflib import Graph
|
|
@@ -8,7 +9,7 @@ from rdflib.query import ResultRow
|
|
|
8
9
|
|
|
9
10
|
from cognite.neat._issues.warnings import NeatValueWarning
|
|
10
11
|
from cognite.neat._shared import Triple
|
|
11
|
-
from cognite.neat._utils.collection_ import
|
|
12
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
12
13
|
from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
|
|
13
14
|
|
|
14
15
|
To_Add_Triples: TypeAlias = list[Triple]
|
|
@@ -42,7 +43,6 @@ class BaseTransformerStandardised(ABC):
|
|
|
42
43
|
description: str
|
|
43
44
|
_use_only_once: bool = False
|
|
44
45
|
_need_changes: ClassVar[frozenset[str]] = frozenset()
|
|
45
|
-
_use_iterate_bar_threshold: int = 500
|
|
46
46
|
|
|
47
47
|
@abstractmethod
|
|
48
48
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
@@ -66,9 +66,16 @@ class BaseTransformerStandardised(ABC):
|
|
|
66
66
|
The query to use for extracting target triples from the graph and performing the transformation.
|
|
67
67
|
Returns:
|
|
68
68
|
A query string.
|
|
69
|
+
|
|
70
|
+
!!! note "Complex Queries"
|
|
71
|
+
In majority of cases the query should be a simple SELECT query. However, in case
|
|
72
|
+
when there is a need to have one or more sub iterators, one can overwrite the ._iterator() method
|
|
69
73
|
"""
|
|
70
74
|
raise NotImplementedError()
|
|
71
75
|
|
|
76
|
+
def _iterator(self, graph: Graph) -> Iterator:
|
|
77
|
+
yield from graph.query(self._iterate_query())
|
|
78
|
+
|
|
72
79
|
def _skip_count_query(self) -> str:
|
|
73
80
|
"""
|
|
74
81
|
The query to use for extracting target triples from the graph and performing the transformation.
|
|
@@ -98,13 +105,10 @@ class BaseTransformerStandardised(ABC):
|
|
|
98
105
|
if iteration_count == 0:
|
|
99
106
|
return outcome
|
|
100
107
|
|
|
101
|
-
result_iterable =
|
|
102
|
-
|
|
103
|
-
result_iterable
|
|
104
|
-
|
|
105
|
-
total=iteration_count,
|
|
106
|
-
description=self.description,
|
|
107
|
-
)
|
|
108
|
+
result_iterable = self._iterator(graph)
|
|
109
|
+
result_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
110
|
+
result_iterable, iteration_count, self.description
|
|
111
|
+
)
|
|
108
112
|
|
|
109
113
|
for row in result_iterable:
|
|
110
114
|
row = cast(ResultRow, row)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import urllib.parse
|
|
1
2
|
import warnings
|
|
2
3
|
from abc import ABC
|
|
3
|
-
from collections.abc import Callable, Iterable
|
|
4
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
from typing import cast
|
|
6
7
|
|
|
@@ -9,6 +10,7 @@ from rdflib.query import ResultRow
|
|
|
9
10
|
|
|
10
11
|
from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
|
|
11
12
|
from cognite.neat._graph import extractors
|
|
13
|
+
from cognite.neat._issues.errors import NeatValueError
|
|
12
14
|
from cognite.neat._issues.warnings import ResourceNotFoundWarning
|
|
13
15
|
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
14
16
|
from cognite.neat._utils.rdf_ import (
|
|
@@ -229,8 +231,7 @@ class AssetEventConnector(BaseAssetConnector):
|
|
|
229
231
|
)
|
|
230
232
|
|
|
231
233
|
|
|
232
|
-
|
|
233
|
-
class AssetRelationshipConnector(BaseTransformer):
|
|
234
|
+
class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
234
235
|
description: str = "Connects assets via relationships"
|
|
235
236
|
_use_only_once: bool = True
|
|
236
237
|
_need_changes = frozenset(
|
|
@@ -248,6 +249,44 @@ class AssetRelationshipConnector(BaseTransformer):
|
|
|
248
249
|
?target <{asset_xid_property}> ?target_xid .
|
|
249
250
|
?target a <{asset_type}> .}}"""
|
|
250
251
|
|
|
252
|
+
def _count_query(self) -> str:
|
|
253
|
+
query = """SELECT (COUNT(?target) as ?count) WHERE {{
|
|
254
|
+
?relationship a <{relationship_type}> .
|
|
255
|
+
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
256
|
+
?source <{asset_xid_property}> ?source_xid .
|
|
257
|
+
?source a <{asset_type}> .
|
|
258
|
+
|
|
259
|
+
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
260
|
+
?target <{asset_xid_property}> ?target_xid .
|
|
261
|
+
?target a <{asset_type}> .}}"""
|
|
262
|
+
|
|
263
|
+
return query.format(
|
|
264
|
+
relationship_type=self.relationship_type,
|
|
265
|
+
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
266
|
+
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
267
|
+
asset_xid_property=self.asset_xid_property,
|
|
268
|
+
asset_type=self.asset_type,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _iterate_query(self) -> str:
|
|
272
|
+
query = """SELECT ?source ?relationship ?target WHERE {{
|
|
273
|
+
?relationship a <{relationship_type}> .
|
|
274
|
+
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
275
|
+
?source <{asset_xid_property}> ?source_xid .
|
|
276
|
+
?source a <{asset_type}> .
|
|
277
|
+
|
|
278
|
+
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
279
|
+
?target <{asset_xid_property}> ?target_xid .
|
|
280
|
+
?target a <{asset_type}> .}}"""
|
|
281
|
+
|
|
282
|
+
return query.format(
|
|
283
|
+
relationship_type=self.relationship_type,
|
|
284
|
+
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
285
|
+
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
286
|
+
asset_xid_property=self.asset_xid_property,
|
|
287
|
+
asset_type=self.asset_type,
|
|
288
|
+
)
|
|
289
|
+
|
|
251
290
|
def __init__(
|
|
252
291
|
self,
|
|
253
292
|
asset_type: URIRef | None = None,
|
|
@@ -262,48 +301,20 @@ class AssetRelationshipConnector(BaseTransformer):
|
|
|
262
301
|
self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
|
|
263
302
|
self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
|
|
264
303
|
|
|
265
|
-
def
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
):
|
|
269
|
-
relationship_id: URIRef = cast(tuple, relationship_id_result)[0]
|
|
270
|
-
|
|
271
|
-
if assets_id_res := list(
|
|
272
|
-
graph.query(
|
|
273
|
-
self._asset_template.format(
|
|
274
|
-
relationship_id=relationship_id,
|
|
275
|
-
asset_xid_property=self.asset_xid_property,
|
|
276
|
-
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
277
|
-
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
278
|
-
asset_type=self.asset_type,
|
|
279
|
-
)
|
|
280
|
-
)
|
|
281
|
-
):
|
|
282
|
-
# files can be connected to multiple assets in the graph
|
|
283
|
-
for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
|
|
284
|
-
# create a relationship between the two assets
|
|
285
|
-
graph.add(
|
|
286
|
-
(
|
|
287
|
-
source_asset_id,
|
|
288
|
-
DEFAULT_NAMESPACE.relationship,
|
|
289
|
-
relationship_id,
|
|
290
|
-
)
|
|
291
|
-
)
|
|
292
|
-
graph.add(
|
|
293
|
-
(
|
|
294
|
-
target_asset_id,
|
|
295
|
-
DEFAULT_NAMESPACE.relationship,
|
|
296
|
-
relationship_id,
|
|
297
|
-
)
|
|
298
|
-
)
|
|
304
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
305
|
+
row_output = RowTransformationOutput()
|
|
306
|
+
source, relationship, target = query_result_row
|
|
299
307
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
308
|
+
row_output.add_triples.append(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
|
|
309
|
+
row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
|
|
310
|
+
row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
|
|
303
311
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
312
|
+
row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
|
|
313
|
+
row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
|
|
314
|
+
|
|
315
|
+
row_output.instances_modified_count += 2
|
|
316
|
+
|
|
317
|
+
return row_output
|
|
307
318
|
|
|
308
319
|
|
|
309
320
|
# TODO: standardise
|
|
@@ -455,7 +466,7 @@ WHERE {{
|
|
|
455
466
|
ResourceNotFoundWarning(target_source_id, "class", str(relationship_id), "class"), stacklevel=2
|
|
456
467
|
)
|
|
457
468
|
return []
|
|
458
|
-
edge_id = str(object_by_predicates["externalId"])
|
|
469
|
+
edge_id = urllib.parse.quote(str(object_by_predicates["externalId"]))
|
|
459
470
|
# If there is properties on the relationship, we create a new intermediate node
|
|
460
471
|
edge_type = self._namespace[f"{source_type}To{target_type}Edge"]
|
|
461
472
|
return self._create_edge(
|
|
@@ -506,3 +517,89 @@ WHERE {{
|
|
|
506
517
|
|
|
507
518
|
def _predicate(self, target_type: str) -> URIRef:
|
|
508
519
|
return self._namespace[f"relationship{target_type.capitalize()}"]
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class LookupRelationshipSourceTarget(BaseTransformerStandardised):
|
|
523
|
+
"""When relationships are extracted, the source and target are extracted as literals. This transformers
|
|
524
|
+
lookup the externalID of the source and target and replaces the literals with the URIRef of the entity.
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
description = "Lookup relationships source and target externalId"
|
|
528
|
+
_use_only_once: bool = True
|
|
529
|
+
_need_changes = frozenset({extractors.RelationshipsExtractor.__name__})
|
|
530
|
+
|
|
531
|
+
_lookup_entity_query = """SELECT ?entity
|
|
532
|
+
WHERE {{
|
|
533
|
+
?entity a <{entity_type}> .
|
|
534
|
+
?entity <{namespace}externalId> "{external_id}" .
|
|
535
|
+
}}"""
|
|
536
|
+
|
|
537
|
+
def __init__(self, namespace: Namespace = CLASSIC_CDF_NAMESPACE, type_prefix: str | None = None) -> None:
|
|
538
|
+
self._namespace = namespace
|
|
539
|
+
self._type_prefix = type_prefix
|
|
540
|
+
self._lookup_entity: Callable[[URIRef, str], URIRef] | None = None
|
|
541
|
+
|
|
542
|
+
def _count_query(self) -> str:
|
|
543
|
+
return f"""SELECT (COUNT(?instance) AS ?instanceCount)
|
|
544
|
+
WHERE {{
|
|
545
|
+
?instance a <{self._namespace}ClassicRelationship> .
|
|
546
|
+
}}"""
|
|
547
|
+
|
|
548
|
+
def _iterate_query(self) -> str:
|
|
549
|
+
return f"""SELECT ?instance ?source ?sourceType ?target ?targetType
|
|
550
|
+
WHERE {{
|
|
551
|
+
?instance a <{self._namespace}ClassicRelationship> .
|
|
552
|
+
?instance <{self._namespace}sourceExternalId> ?source .
|
|
553
|
+
?instance <{self._namespace}targetExternalId> ?target .
|
|
554
|
+
?instance <{self._namespace}sourceType> ?sourceType .
|
|
555
|
+
?instance <{self._namespace}targetType> ?targetType
|
|
556
|
+
}}"""
|
|
557
|
+
|
|
558
|
+
def _iterator(self, graph: Graph) -> Iterator:
|
|
559
|
+
self._lookup_entity = self.create_lookup_entity_with_external_id(graph, self._namespace, self._type_prefix)
|
|
560
|
+
yield from graph.query(self._iterate_query())
|
|
561
|
+
|
|
562
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
563
|
+
output = RowTransformationOutput()
|
|
564
|
+
instance, source, source_type, target, target_type = cast(
|
|
565
|
+
tuple[URIRef, Literal, URIRef, Literal, URIRef], query_result_row
|
|
566
|
+
)
|
|
567
|
+
if self._lookup_entity is None:
|
|
568
|
+
raise NeatValueError(f"{type(self)}: .operation() called before .transform()")
|
|
569
|
+
try:
|
|
570
|
+
source_id = self._lookup_entity(source_type, source.toPython())
|
|
571
|
+
except ValueError:
|
|
572
|
+
warnings.warn(ResourceNotFoundWarning(source, "class", str(instance), "class"), stacklevel=2)
|
|
573
|
+
return output
|
|
574
|
+
|
|
575
|
+
try:
|
|
576
|
+
target_id = self._lookup_entity(target_type, target.toPython())
|
|
577
|
+
except ValueError:
|
|
578
|
+
warnings.warn(ResourceNotFoundWarning(target, "class", str(instance), "class"), stacklevel=2)
|
|
579
|
+
return output
|
|
580
|
+
|
|
581
|
+
output.remove_triples.append((instance, self._namespace.sourceExternalId, source))
|
|
582
|
+
output.remove_triples.append((instance, self._namespace.targetExternalId, target))
|
|
583
|
+
output.add_triples.append((instance, self._namespace.sourceExternalId, source_id))
|
|
584
|
+
output.add_triples.append((instance, self._namespace.targetExternalId, target_id))
|
|
585
|
+
output.instances_modified_count += 1
|
|
586
|
+
return output
|
|
587
|
+
|
|
588
|
+
@staticmethod
|
|
589
|
+
def create_lookup_entity_with_external_id(
|
|
590
|
+
graph: Graph, namespace: Namespace, type_prefix: str | None
|
|
591
|
+
) -> Callable[[URIRef, str], URIRef]:
|
|
592
|
+
@lru_cache(maxsize=10_000)
|
|
593
|
+
def lookup_entity_with_external_id(entity_type: URIRef, external_id: str) -> URIRef:
|
|
594
|
+
if type_prefix:
|
|
595
|
+
entity_type = namespace[type_prefix + remove_namespace_from_uri(entity_type)]
|
|
596
|
+
|
|
597
|
+
query = LookupRelationshipSourceTarget._lookup_entity_query.format(
|
|
598
|
+
namespace=namespace, entity_type=entity_type, external_id=external_id
|
|
599
|
+
)
|
|
600
|
+
result = list(graph.query(query))
|
|
601
|
+
if len(result) == 1:
|
|
602
|
+
return cast(URIRef, result[0][0]) # type: ignore[index]
|
|
603
|
+
raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
|
|
604
|
+
|
|
605
|
+
return lookup_entity_with_external_id
|
|
@@ -35,8 +35,8 @@ class AddSelfReferenceProperty(BaseTransformer):
|
|
|
35
35
|
|
|
36
36
|
def transform(self, graph: Graph) -> None:
|
|
37
37
|
for property_ in self.properties:
|
|
38
|
-
prefix = property_.
|
|
39
|
-
suffix = property_.
|
|
38
|
+
prefix = property_.instance_source.traversal.class_.prefix
|
|
39
|
+
suffix = property_.instance_source.traversal.class_.suffix
|
|
40
40
|
|
|
41
41
|
namespace = self.rules.prefixes[prefix] if prefix in self.rules.prefixes else self.rules.metadata.namespace
|
|
42
42
|
|
|
@@ -54,12 +54,12 @@ class AddSelfReferenceProperty(BaseTransformer):
|
|
|
54
54
|
property_=f"{self.rules.metadata.prefix}:{property_.property_}",
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
property_.
|
|
57
|
+
property_.instance_source = RDFPath(traversal=traversal)
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
class MakeConnectionOnExactMatch(BaseTransformerStandardised):
|
|
61
61
|
description: str = "Adds property that contains id of reference to all references of given class in Rules"
|
|
62
|
-
_use_only_once: bool =
|
|
62
|
+
_use_only_once: bool = False
|
|
63
63
|
_need_changes = frozenset({})
|
|
64
64
|
|
|
65
65
|
def __init__(
|
|
@@ -1,23 +1,21 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from collections.abc import Callable
|
|
2
|
+
from collections.abc import Callable, Iterator
|
|
3
3
|
from typing import Any, cast
|
|
4
4
|
from urllib.parse import quote
|
|
5
5
|
|
|
6
6
|
import rdflib
|
|
7
|
-
from rdflib import RDF,
|
|
7
|
+
from rdflib import RDF, Namespace, URIRef
|
|
8
8
|
from rdflib.query import ResultRow
|
|
9
9
|
|
|
10
10
|
from cognite.neat._constants import UNKNOWN_TYPE
|
|
11
|
-
from cognite.neat._graph.queries import Queries
|
|
12
11
|
from cognite.neat._issues.warnings import PropertyDataTypeConversionWarning
|
|
13
12
|
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
14
|
-
from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
|
|
13
|
+
from cognite.neat._utils.rdf_ import Triple, get_namespace, remove_namespace_from_uri
|
|
15
14
|
|
|
16
|
-
from ._base import
|
|
15
|
+
from ._base import BaseTransformerStandardised, RowTransformationOutput
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
class SplitMultiValueProperty(BaseTransformer):
|
|
18
|
+
class SplitMultiValueProperty(BaseTransformerStandardised):
|
|
21
19
|
description: str = (
|
|
22
20
|
"SplitMultiValueProperty is a transformer that splits a "
|
|
23
21
|
"multi-value property into multiple single-value properties."
|
|
@@ -25,55 +23,67 @@ class SplitMultiValueProperty(BaseTransformer):
|
|
|
25
23
|
_use_only_once: bool = True
|
|
26
24
|
_need_changes = frozenset({})
|
|
27
25
|
|
|
28
|
-
|
|
26
|
+
def __init__(self, unknown_type: URIRef | None = None) -> None:
|
|
27
|
+
self.unknown_type = unknown_type or UNKNOWN_TYPE
|
|
29
28
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
def _iterate_query(self) -> str:
|
|
30
|
+
query = """SELECT ?subjectType ?property
|
|
31
|
+
(GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
|
|
32
|
+
|
|
33
|
+
WHERE {{
|
|
34
|
+
?s ?property ?o .
|
|
35
|
+
?s a ?subjectType .
|
|
36
|
+
OPTIONAL {{ ?o a ?type }}
|
|
37
|
+
|
|
38
|
+
# Key part to determine value type: either object, data or unknown
|
|
39
|
+
BIND( IF(isLiteral(?o),DATATYPE(?o),
|
|
40
|
+
IF(BOUND(?type), ?type,
|
|
41
|
+
<{unknownType}>)) AS ?valueType)
|
|
42
|
+
}}
|
|
43
|
+
|
|
44
|
+
GROUP BY ?subjectType ?property
|
|
45
|
+
HAVING (COUNT(DISTINCT ?valueType) > 1)"""
|
|
46
|
+
|
|
47
|
+
return query.format(unknownType=self.unknown_type)
|
|
48
|
+
|
|
49
|
+
def _count_query(self) -> str:
|
|
50
|
+
query = """SELECT (COUNT(*) AS ?tripleCount)
|
|
51
|
+
WHERE {?s ?p ?o .}"""
|
|
52
|
+
return query
|
|
53
|
+
|
|
54
|
+
def _sub_iterate_query(self, type_: URIRef, property_: URIRef) -> str:
|
|
55
|
+
query = """ SELECT ?s ?p ?o ?valueType WHERE {{
|
|
56
|
+
?s a <{subject_uri}> .
|
|
57
|
+
?s <{property_uri}> ?o .
|
|
33
58
|
|
|
34
|
-
|
|
59
|
+
OPTIONAL {{ ?o a ?type }}
|
|
35
60
|
|
|
36
|
-
|
|
61
|
+
BIND(<{property_uri}> AS ?p)
|
|
37
62
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
63
|
+
BIND(IF(isLiteral(?o), DATATYPE(?o),
|
|
64
|
+
IF(BOUND(?type),?type,
|
|
65
|
+
<{unknownType}>)) AS ?valueType)
|
|
41
66
|
|
|
42
|
-
|
|
67
|
+
}} """
|
|
43
68
|
|
|
44
|
-
|
|
69
|
+
return query.format(unknownType=self.unknown_type, subject_uri=type_, property_uri=property_)
|
|
45
70
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}}"""
|
|
71
|
+
def _iterator(self, graph) -> Iterator:
|
|
72
|
+
for type_, property_, _ in graph.query(self._iterate_query()):
|
|
73
|
+
yield from graph.query(self._sub_iterate_query(type_, property_))
|
|
50
74
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
for value_type_uri in value_types:
|
|
55
|
-
_args = {
|
|
56
|
-
"subject_uri": subject_uri,
|
|
57
|
-
"property_uri": property_uri,
|
|
58
|
-
"object_uri": value_type_uri,
|
|
59
|
-
}
|
|
75
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
76
|
+
row_output = RowTransformationOutput()
|
|
77
|
+
subject, old_property, object, value_type = query_result_row
|
|
60
78
|
|
|
61
|
-
|
|
62
|
-
if value_type_uri == UNKNOWN_TYPE:
|
|
63
|
-
iterator = graph.query(self._unknown_property_template.format(**_args))
|
|
79
|
+
new_property = URIRef(f"{old_property}_{remove_namespace_from_uri(value_type)}")
|
|
64
80
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
iterator = graph.query(self._datatype_property_template.format(**_args))
|
|
81
|
+
row_output.add_triples.append(cast(Triple, (subject, new_property, object)))
|
|
82
|
+
row_output.remove_triples.append(cast(Triple, (subject, old_property, object)))
|
|
68
83
|
|
|
69
|
-
|
|
70
|
-
else:
|
|
71
|
-
iterator = graph.query(self._object_property_template.format(**_args))
|
|
84
|
+
row_output.instances_modified_count += 1
|
|
72
85
|
|
|
73
|
-
|
|
74
|
-
graph.remove((s, property_uri, o))
|
|
75
|
-
new_property = URIRef(f"{property_uri}_{remove_namespace_from_uri(value_type_uri)}")
|
|
76
|
-
graph.add((s, new_property, o))
|
|
86
|
+
return row_output
|
|
77
87
|
|
|
78
88
|
|
|
79
89
|
class ConvertLiteral(BaseTransformerStandardised):
|
|
@@ -251,7 +251,7 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
|
|
|
251
251
|
if (
|
|
252
252
|
only_rdfpath
|
|
253
253
|
and isinstance(property_, InformationProperty)
|
|
254
|
-
and isinstance(property_.
|
|
254
|
+
and isinstance(property_.instance_source, RDFPath)
|
|
255
255
|
) or not only_rdfpath:
|
|
256
256
|
processed_properties[prop_entity] = property_
|
|
257
257
|
class_property_pairs[class_] = processed_properties
|
|
@@ -55,12 +55,13 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
|
|
|
55
55
|
|
|
56
56
|
def has_hop_transformations(self):
|
|
57
57
|
return any(
|
|
58
|
-
prop_.
|
|
58
|
+
prop_.instance_source and isinstance(prop_.instance_source.traversal, Hop)
|
|
59
|
+
for prop_ in self.rules.properties
|
|
59
60
|
)
|
|
60
61
|
|
|
61
62
|
def has_self_reference_property_transformations(self):
|
|
62
63
|
return any(
|
|
63
|
-
prop_.
|
|
64
|
+
prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
|
|
64
65
|
for prop_ in self.rules.properties
|
|
65
66
|
)
|
|
66
67
|
|
|
@@ -68,7 +69,7 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
|
|
|
68
69
|
return [
|
|
69
70
|
prop_
|
|
70
71
|
for prop_ in self.rules.properties
|
|
71
|
-
if prop_.
|
|
72
|
+
if prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
|
|
72
73
|
]
|
|
73
74
|
|
|
74
75
|
def define_property_renaming_config(self, class_: ClassEntity) -> dict[str | URIRef, str]:
|
|
@@ -76,7 +77,7 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
|
|
|
76
77
|
|
|
77
78
|
if definitions := self.class_property_pairs(only_rdfpath=True, consider_inheritance=True).get(class_, None):
|
|
78
79
|
for property_id, definition in definitions.items():
|
|
79
|
-
transformation = cast(RDFPath, definition.
|
|
80
|
+
transformation = cast(RDFPath, definition.instance_source)
|
|
80
81
|
|
|
81
82
|
# use case we have a single property rdf path, and defined prefix
|
|
82
83
|
# in either metadata or prefixes of rules
|
|
@@ -101,26 +102,26 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
|
|
|
101
102
|
|
|
102
103
|
return property_renaming_configuration
|
|
103
104
|
|
|
104
|
-
def
|
|
105
|
+
def neat_id_to_instance_source_property_uri(self, property_neat_id: URIRef) -> URIRef | None:
|
|
105
106
|
if (
|
|
106
107
|
(property_ := self.properties_by_neat_id.get(property_neat_id))
|
|
107
|
-
and property_.
|
|
108
|
+
and property_.instance_source
|
|
108
109
|
and isinstance(
|
|
109
|
-
property_.
|
|
110
|
+
property_.instance_source.traversal,
|
|
110
111
|
SingleProperty,
|
|
111
112
|
)
|
|
112
113
|
and (
|
|
113
|
-
property_.
|
|
114
|
-
or property_.
|
|
114
|
+
property_.instance_source.traversal.property.prefix in self.rules.prefixes
|
|
115
|
+
or property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
|
|
115
116
|
)
|
|
116
117
|
):
|
|
117
118
|
namespace = (
|
|
118
119
|
self.rules.metadata.namespace
|
|
119
|
-
if property_.
|
|
120
|
-
else self.rules.prefixes[property_.
|
|
120
|
+
if property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
|
|
121
|
+
else self.rules.prefixes[property_.instance_source.traversal.property.prefix]
|
|
121
122
|
)
|
|
122
123
|
|
|
123
|
-
return namespace[property_.
|
|
124
|
+
return namespace[property_.instance_source.traversal.property.suffix]
|
|
124
125
|
return None
|
|
125
126
|
|
|
126
127
|
def property_types(self, class_: ClassEntity) -> dict[str, EntityTypes]:
|
|
@@ -137,7 +138,7 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
|
|
|
137
138
|
class_, None
|
|
138
139
|
):
|
|
139
140
|
for property_ in class_property_pairs.values():
|
|
140
|
-
classes.append(cast(RDFPath, property_.
|
|
141
|
+
classes.append(cast(RDFPath, property_.instance_source).traversal.class_)
|
|
141
142
|
|
|
142
143
|
return cast(ClassEntity, most_occurring_element(classes))
|
|
143
144
|
else:
|
|
Binary file
|
|
Binary file
|
|
@@ -115,8 +115,8 @@ class DMSImporter(BaseImporter[DMSInputRules]):
|
|
|
115
115
|
data_model_ids = [data_model_id]
|
|
116
116
|
data_models = client.data_modeling.data_models.retrieve(data_model_ids, inline_views=True)
|
|
117
117
|
|
|
118
|
-
|
|
119
|
-
if len(
|
|
118
|
+
retrieved_models = cls._find_model_in_list(data_models, data_model_id)
|
|
119
|
+
if len(retrieved_models) == 0:
|
|
120
120
|
return cls(
|
|
121
121
|
DMSSchema(),
|
|
122
122
|
[
|
|
@@ -127,16 +127,18 @@ class DMSImporter(BaseImporter[DMSInputRules]):
|
|
|
127
127
|
)
|
|
128
128
|
],
|
|
129
129
|
)
|
|
130
|
-
|
|
130
|
+
return cls.from_data_model(client, retrieved_models.latest_version())
|
|
131
131
|
|
|
132
|
+
@classmethod
|
|
133
|
+
def from_data_model(cls, client: NeatClient, model: dm.DataModel[dm.View]) -> "DMSImporter":
|
|
132
134
|
issue_list = IssueList()
|
|
133
135
|
with _handle_issues(issue_list) as result:
|
|
134
|
-
schema =
|
|
136
|
+
schema = client.schema.retrieve_data_model(model)
|
|
135
137
|
|
|
136
138
|
if result.result == "failure" or issue_list.has_errors:
|
|
137
139
|
return cls(DMSSchema(), issue_list)
|
|
138
140
|
|
|
139
|
-
metadata = cls._create_metadata_from_model(
|
|
141
|
+
metadata = cls._create_metadata_from_model(model)
|
|
140
142
|
|
|
141
143
|
return cls(
|
|
142
144
|
schema,
|
|
@@ -17,6 +17,7 @@ from cognite.neat._rules.models.information import (
|
|
|
17
17
|
)
|
|
18
18
|
from cognite.neat._store import NeatGraphStore
|
|
19
19
|
from cognite.neat._store._provenance import INSTANCES_ENTITY
|
|
20
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
20
21
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
|
|
21
22
|
|
|
22
23
|
from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
|
|
@@ -27,7 +28,6 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
|
|
|
27
28
|
WHERE { ?s a ?class . }
|
|
28
29
|
group by ?class order by DESC(?instances)"""
|
|
29
30
|
|
|
30
|
-
|
|
31
31
|
INSTANCES_OF_CLASS_QUERY = """SELECT ?s ?propertyCount WHERE { ?s a <class> . BIND ('Unknown' as ?propertyCount) }"""
|
|
32
32
|
|
|
33
33
|
|
|
@@ -171,8 +171,10 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
171
171
|
INSTANCES_OF_CLASS_QUERY if self.max_number_of_instance == -1 else INSTANCES_OF_CLASS_RICHNESS_ORDERED_QUERY
|
|
172
172
|
)
|
|
173
173
|
|
|
174
|
+
classes_iterable = iterate_progress_bar(classes.items(), len(classes), "Inferring classes")
|
|
175
|
+
|
|
174
176
|
# Infers all the properties of the class
|
|
175
|
-
for class_id, class_definition in
|
|
177
|
+
for class_id, class_definition in classes_iterable:
|
|
176
178
|
for ( # type: ignore[misc]
|
|
177
179
|
instance,
|
|
178
180
|
_,
|
|
@@ -231,7 +233,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
231
233
|
"property_": property_id,
|
|
232
234
|
"max_count": cast(RdfLiteral, occurrence).value,
|
|
233
235
|
"value_type": value_type_id,
|
|
234
|
-
"
|
|
236
|
+
"instance_source": (
|
|
235
237
|
f"{uri_to_short_form(class_definition['uri'], prefixes)}"
|
|
236
238
|
f"({uri_to_short_form(cast(URIRef, property_uri), prefixes)})"
|
|
237
239
|
),
|