cognite-neat 0.106.0__py3-none-any.whl → 0.107.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +16 -3
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +74 -7
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +8 -1
- cognite/neat/_graph/extractors/_dms.py +48 -14
- cognite/neat/_graph/extractors/_dms_graph.py +149 -0
- cognite/neat/_graph/extractors/_rdf_file.py +32 -5
- cognite/neat/_graph/loaders/_rdf2dms.py +112 -18
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +5 -0
- cognite/neat/_graph/transformers/_base.py +9 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -3
- cognite/neat/_graph/transformers/_value_type.py +54 -44
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +1 -1
- cognite/neat/_rules/models/_base_rules.py +0 -12
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +50 -2
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +1 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/transformers/__init__.py +4 -0
- cognite/neat/_rules/transformers/_converters.py +209 -62
- cognite/neat/_session/_base.py +2 -6
- cognite/neat/_session/_mapping.py +17 -6
- cognite/neat/_session/_prepare.py +0 -47
- cognite/neat/_session/_read.py +63 -5
- cognite/neat/_session/_state.py +7 -0
- cognite/neat/_session/_to.py +40 -2
- cognite/neat/_session/exceptions.py +7 -3
- cognite/neat/_store/_graph_store.py +52 -11
- cognite/neat/_store/_rules_store.py +22 -0
- cognite/neat/_utils/auth.py +2 -0
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +47 -45
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import zipfile
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from pathlib import Path
|
|
3
|
-
from typing import get_args
|
|
4
|
+
from typing import cast, get_args
|
|
4
5
|
|
|
5
6
|
from rdflib import URIRef
|
|
6
7
|
from rdflib.util import guess_format
|
|
@@ -10,6 +11,7 @@ from cognite.neat._graph._shared import RDFTypes
|
|
|
10
11
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
11
12
|
from cognite.neat._issues._base import IssueList
|
|
12
13
|
from cognite.neat._issues.errors import FileNotFoundNeatError, FileTypeUnexpectedError
|
|
14
|
+
from cognite.neat._issues.errors._general import NeatValueError
|
|
13
15
|
from cognite.neat._shared import Triple
|
|
14
16
|
|
|
15
17
|
|
|
@@ -24,25 +26,50 @@ class RdfFileExtractor(BaseExtractor):
|
|
|
24
26
|
|
|
25
27
|
def __init__(
|
|
26
28
|
self,
|
|
27
|
-
filepath: Path,
|
|
29
|
+
filepath: Path | zipfile.ZipExtFile,
|
|
28
30
|
base_uri: URIRef = DEFAULT_BASE_URI,
|
|
29
31
|
issue_list: IssueList | None = None,
|
|
30
32
|
):
|
|
31
33
|
self.issue_list = issue_list or IssueList(title=f"{filepath.name}")
|
|
32
34
|
self.base_uri = base_uri
|
|
33
35
|
self.filepath = filepath
|
|
34
|
-
self.format = guess_format(str(self.filepath))
|
|
35
36
|
|
|
36
|
-
if
|
|
37
|
+
self.format = guess_format(str(self.filepath) if isinstance(self.filepath, Path) else self.filepath.name)
|
|
38
|
+
|
|
39
|
+
print(self.format)
|
|
40
|
+
if isinstance(self.filepath, Path) and not self.filepath.exists():
|
|
37
41
|
self.issue_list.append(FileNotFoundNeatError(self.filepath))
|
|
38
42
|
|
|
39
43
|
if not self.format:
|
|
40
44
|
self.issue_list.append(
|
|
41
45
|
FileTypeUnexpectedError(
|
|
42
|
-
self.filepath,
|
|
46
|
+
(self.filepath if isinstance(self.filepath, Path) else Path(self.filepath.name)),
|
|
43
47
|
frozenset(get_args(RDFTypes)),
|
|
44
48
|
)
|
|
45
49
|
)
|
|
46
50
|
|
|
47
51
|
def extract(self) -> Iterable[Triple]:
|
|
48
52
|
raise NotImplementedError()
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_zip(
|
|
56
|
+
cls,
|
|
57
|
+
filepath: Path,
|
|
58
|
+
filename: str = "neat-session/instances/instances.ttl",
|
|
59
|
+
base_uri: URIRef = DEFAULT_BASE_URI,
|
|
60
|
+
issue_list: IssueList | None = None,
|
|
61
|
+
):
|
|
62
|
+
if not filepath.exists():
|
|
63
|
+
raise FileNotFoundNeatError(filepath)
|
|
64
|
+
if filepath.suffix not in {".zip"}:
|
|
65
|
+
raise NeatValueError("Expected a zip file, got {filepath.suffix}")
|
|
66
|
+
|
|
67
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
68
|
+
for file_info in zip_ref.infolist():
|
|
69
|
+
if file_info.filename == filename:
|
|
70
|
+
# We need to open the file in the zip file, and close it upon
|
|
71
|
+
# triple extraction ...
|
|
72
|
+
file = zip_ref.open(file_info)
|
|
73
|
+
return cls(cast(zipfile.ZipExtFile, file), base_uri, issue_list)
|
|
74
|
+
|
|
75
|
+
raise NeatValueError(f"Cannot extract {filename} from zip file {filepath}")
|
|
@@ -32,7 +32,7 @@ from cognite.neat._issues.errors import (
|
|
|
32
32
|
from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
|
|
33
33
|
from cognite.neat._rules.analysis._dms import DMSAnalysis
|
|
34
34
|
from cognite.neat._rules.models import DMSRules
|
|
35
|
-
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
|
|
35
|
+
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
|
|
36
36
|
from cognite.neat._rules.models.entities._single_value import ViewEntity
|
|
37
37
|
from cognite.neat._shared import InstanceType
|
|
38
38
|
from cognite.neat._store import NeatGraphStore
|
|
@@ -159,7 +159,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
159
159
|
|
|
160
160
|
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
161
161
|
for view_id, (view, instance_count) in view_and_count_by_id.items():
|
|
162
|
-
pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
162
|
+
pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
163
163
|
yield from issues
|
|
164
164
|
tracker.issue(issues)
|
|
165
165
|
|
|
@@ -200,20 +200,61 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
200
200
|
)
|
|
201
201
|
|
|
202
202
|
for identifier, properties in instance_iterable:
|
|
203
|
+
start_node, end_node = self._pop_start_end_node(properties)
|
|
204
|
+
is_edge = start_node and end_node
|
|
205
|
+
if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
|
|
206
|
+
instance_type = "edge" if is_edge else "node"
|
|
207
|
+
creation_error = ResourceCreationError(
|
|
208
|
+
identifier,
|
|
209
|
+
instance_type,
|
|
210
|
+
error=f"{instance_type.capitalize()} found in {view.used_for} view",
|
|
211
|
+
)
|
|
212
|
+
tracker.issue(creation_error)
|
|
213
|
+
if stop_on_exception:
|
|
214
|
+
raise creation_error
|
|
215
|
+
yield creation_error
|
|
216
|
+
continue
|
|
217
|
+
|
|
203
218
|
if skip_properties:
|
|
204
219
|
properties = {k: v for k, v in properties.items() if k not in skip_properties}
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
220
|
+
|
|
221
|
+
if start_node and end_node:
|
|
222
|
+
# Is an edge
|
|
223
|
+
try:
|
|
224
|
+
yield self._create_edge_with_properties(
|
|
225
|
+
identifier, properties, start_node, end_node, pydantic_cls, view_id
|
|
226
|
+
)
|
|
227
|
+
except ValueError as e:
|
|
228
|
+
error_edge = ResourceCreationError(identifier, "edge", error=str(e))
|
|
229
|
+
tracker.issue(error_edge)
|
|
230
|
+
if stop_on_exception:
|
|
231
|
+
raise error_edge from e
|
|
232
|
+
yield error_edge
|
|
233
|
+
else:
|
|
234
|
+
try:
|
|
235
|
+
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
236
|
+
except ValueError as e:
|
|
237
|
+
error_node = ResourceCreationError(identifier, "node", error=str(e))
|
|
238
|
+
tracker.issue(error_node)
|
|
239
|
+
if stop_on_exception:
|
|
240
|
+
raise error_node from e
|
|
241
|
+
yield error_node
|
|
242
|
+
yield from self._create_edges_without_properties(
|
|
243
|
+
identifier, properties, edge_by_type, edge_by_prop_id, tracker
|
|
244
|
+
)
|
|
214
245
|
tracker.finish(track_id)
|
|
215
246
|
yield _END_OF_CLASS
|
|
216
247
|
|
|
248
|
+
@staticmethod
|
|
249
|
+
def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
|
|
250
|
+
start_node = properties.pop("startNode", [None])[0]
|
|
251
|
+
if not start_node:
|
|
252
|
+
start_node = properties.pop("start_node", [None])[0]
|
|
253
|
+
end_node = properties.pop("endNode", [None])[0]
|
|
254
|
+
if not end_node:
|
|
255
|
+
end_node = properties.pop("end_node", [None])[0]
|
|
256
|
+
return start_node, end_node
|
|
257
|
+
|
|
217
258
|
def write_to_file(self, filepath: Path) -> None:
|
|
218
259
|
if filepath.suffix not in [".json", ".yaml", ".yml"]:
|
|
219
260
|
raise ValueError(f"File format {filepath.suffix} is not supported")
|
|
@@ -298,17 +339,30 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
298
339
|
|
|
299
340
|
def _create_validation_classes(
|
|
300
341
|
self, view: dm.View
|
|
301
|
-
) -> tuple[
|
|
342
|
+
) -> tuple[
|
|
343
|
+
type[BaseModel],
|
|
344
|
+
dict[str, tuple[str, dm.EdgeConnection]],
|
|
345
|
+
dict[str, tuple[str, dm.EdgeConnection]],
|
|
346
|
+
NeatIssueList,
|
|
347
|
+
]:
|
|
302
348
|
issues = IssueList()
|
|
303
349
|
field_definitions: dict[str, tuple[type, Any]] = {}
|
|
304
|
-
|
|
350
|
+
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
351
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
305
352
|
validators: dict[str, classmethod] = {}
|
|
306
353
|
direct_relation_by_property: dict[str, dm.DirectRelation] = {}
|
|
307
354
|
unit_properties: list[str] = []
|
|
308
355
|
json_fields: list[str] = []
|
|
356
|
+
text_fields: list[str] = []
|
|
309
357
|
for prop_id, prop in view.properties.items():
|
|
310
358
|
if isinstance(prop, dm.EdgeConnection):
|
|
311
|
-
|
|
359
|
+
if prop.edge_source:
|
|
360
|
+
# Edges with properties are created separately
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
edge_by_type[prop.type.external_id] = prop_id, prop
|
|
364
|
+
edge_by_prop_id[prop_id] = prop_id, prop
|
|
365
|
+
|
|
312
366
|
if isinstance(prop, dm.MappedProperty):
|
|
313
367
|
if is_readonly_property(prop.container, prop.container_property_identifier):
|
|
314
368
|
continue
|
|
@@ -334,6 +388,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
334
388
|
|
|
335
389
|
if data_type == Json:
|
|
336
390
|
json_fields.append(prop_id)
|
|
391
|
+
elif data_type == String:
|
|
392
|
+
text_fields.append(prop_id)
|
|
337
393
|
python_type = data_type.python
|
|
338
394
|
if isinstance(prop.type, ListablePropertyType) and prop.type.is_list:
|
|
339
395
|
python_type = list[python_type]
|
|
@@ -369,11 +425,20 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
369
425
|
else:
|
|
370
426
|
raise ValueError(f"Expect valid JSON string or dict for {info.field_name}: {value}")
|
|
371
427
|
|
|
428
|
+
def parse_text(cls, value: Any, info: ValidationInfo) -> Any:
|
|
429
|
+
if isinstance(value, list):
|
|
430
|
+
return [remove_namespace_from_uri(v) for v in value]
|
|
431
|
+
else:
|
|
432
|
+
return remove_namespace_from_uri(value)
|
|
433
|
+
|
|
372
434
|
if json_fields:
|
|
373
435
|
validators["parse_json_string"] = field_validator(*json_fields, mode="before")(parse_json_string) # type: ignore[assignment, arg-type]
|
|
374
436
|
|
|
375
437
|
validators["parse_list"] = field_validator("*", mode="before")(parse_list) # type: ignore[assignment, arg-type]
|
|
376
438
|
|
|
439
|
+
if text_fields:
|
|
440
|
+
validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text) # type: ignore[assignment, arg-type]
|
|
441
|
+
|
|
377
442
|
if direct_relation_by_property:
|
|
378
443
|
|
|
379
444
|
def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
|
|
@@ -414,7 +479,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
414
479
|
)
|
|
415
480
|
|
|
416
481
|
pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
|
|
417
|
-
return pydantic_cls,
|
|
482
|
+
return pydantic_cls, edge_by_type, edge_by_prop_id, issues
|
|
418
483
|
|
|
419
484
|
def _create_node(
|
|
420
485
|
self,
|
|
@@ -435,17 +500,46 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
435
500
|
],
|
|
436
501
|
)
|
|
437
502
|
|
|
438
|
-
def
|
|
503
|
+
def _create_edge_with_properties(
|
|
504
|
+
self,
|
|
505
|
+
identifier: str,
|
|
506
|
+
properties: dict[str | InstanceType, list[str]],
|
|
507
|
+
start_node: str,
|
|
508
|
+
end_node: str,
|
|
509
|
+
pydantic_cls: type[BaseModel],
|
|
510
|
+
view_id: dm.ViewId,
|
|
511
|
+
) -> dm.EdgeApply:
|
|
512
|
+
type_ = properties.pop(RDF.type, [None])[0]
|
|
513
|
+
created = pydantic_cls.model_validate(properties)
|
|
514
|
+
if type_ is None:
|
|
515
|
+
raise ValueError(f"Missing type for edge {identifier}")
|
|
516
|
+
|
|
517
|
+
return dm.EdgeApply(
|
|
518
|
+
space=self.instance_space,
|
|
519
|
+
external_id=identifier,
|
|
520
|
+
type=dm.DirectRelationReference(view_id.space, view_id.external_id),
|
|
521
|
+
start_node=dm.DirectRelationReference(self.instance_space, start_node),
|
|
522
|
+
end_node=dm.DirectRelationReference(self.instance_space, end_node),
|
|
523
|
+
sources=[
|
|
524
|
+
dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
|
|
525
|
+
],
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def _create_edges_without_properties(
|
|
439
529
|
self,
|
|
440
530
|
identifier: str,
|
|
441
531
|
properties: dict[str, list[str]],
|
|
442
532
|
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
|
|
533
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
|
|
443
534
|
tracker: Tracker,
|
|
444
535
|
) -> Iterable[dm.EdgeApply | NeatIssue]:
|
|
445
536
|
for predicate, values in properties.items():
|
|
446
|
-
if predicate
|
|
537
|
+
if predicate in edge_by_type:
|
|
538
|
+
prop_id, edge = edge_by_type[predicate]
|
|
539
|
+
elif predicate in edge_by_prop_id:
|
|
540
|
+
prop_id, edge = edge_by_prop_id[predicate]
|
|
541
|
+
else:
|
|
447
542
|
continue
|
|
448
|
-
prop_id, edge = edge_by_type[predicate]
|
|
449
543
|
if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
|
|
450
544
|
error = ResourceDuplicatedError(
|
|
451
545
|
resource_type="edge",
|
|
@@ -106,7 +106,7 @@ def to_construct_triples(
|
|
|
106
106
|
non_inherited_starting_rdf_types = []
|
|
107
107
|
|
|
108
108
|
for transformation in transformations:
|
|
109
|
-
traversal = cast(RDFPath, transformation.
|
|
109
|
+
traversal = cast(RDFPath, transformation.instance_source).traversal
|
|
110
110
|
|
|
111
111
|
# keeping track of starting rdf types of non-inherited transformations/properties
|
|
112
112
|
if isinstance(traversal, Traversal) and not transformation.inherited:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from ._base import BaseTransformerStandardised
|
|
1
2
|
from ._classic_cdf import (
|
|
2
3
|
AddAssetDepth,
|
|
3
4
|
AssetEventConnector,
|
|
@@ -5,6 +6,7 @@ from ._classic_cdf import (
|
|
|
5
6
|
AssetRelationshipConnector,
|
|
6
7
|
AssetSequenceConnector,
|
|
7
8
|
AssetTimeSeriesConnector,
|
|
9
|
+
LookupRelationshipSourceTarget,
|
|
8
10
|
RelationshipAsEdgeTransformer,
|
|
9
11
|
)
|
|
10
12
|
from ._prune_graph import (
|
|
@@ -29,6 +31,7 @@ __all__ = [
|
|
|
29
31
|
"ConnectionToLiteral",
|
|
30
32
|
"ConvertLiteral",
|
|
31
33
|
"LiteralToEntity",
|
|
34
|
+
"LookupRelationshipSourceTarget",
|
|
32
35
|
"MakeConnectionOnExactMatch",
|
|
33
36
|
"PruneDanglingNodes",
|
|
34
37
|
"PruneDeadEndEdges",
|
|
@@ -57,4 +60,6 @@ Transformers = (
|
|
|
57
60
|
| ConvertLiteral
|
|
58
61
|
| LiteralToEntity
|
|
59
62
|
| ConnectionToLiteral
|
|
63
|
+
| BaseTransformerStandardised
|
|
64
|
+
| LookupRelationshipSourceTarget
|
|
60
65
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import warnings
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Iterator
|
|
4
5
|
from typing import ClassVar, TypeAlias, cast
|
|
5
6
|
|
|
6
7
|
from rdflib import Graph
|
|
@@ -65,9 +66,16 @@ class BaseTransformerStandardised(ABC):
|
|
|
65
66
|
The query to use for extracting target triples from the graph and performing the transformation.
|
|
66
67
|
Returns:
|
|
67
68
|
A query string.
|
|
69
|
+
|
|
70
|
+
!!! note "Complex Queries"
|
|
71
|
+
In majority of cases the query should be a simple SELECT query. However, in case
|
|
72
|
+
when there is a need to have one or more sub iterators, one can overwrite the ._iterator() method
|
|
68
73
|
"""
|
|
69
74
|
raise NotImplementedError()
|
|
70
75
|
|
|
76
|
+
def _iterator(self, graph: Graph) -> Iterator:
|
|
77
|
+
yield from graph.query(self._iterate_query())
|
|
78
|
+
|
|
71
79
|
def _skip_count_query(self) -> str:
|
|
72
80
|
"""
|
|
73
81
|
The query to use for extracting target triples from the graph and performing the transformation.
|
|
@@ -97,7 +105,7 @@ class BaseTransformerStandardised(ABC):
|
|
|
97
105
|
if iteration_count == 0:
|
|
98
106
|
return outcome
|
|
99
107
|
|
|
100
|
-
result_iterable =
|
|
108
|
+
result_iterable = self._iterator(graph)
|
|
101
109
|
result_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
102
110
|
result_iterable, iteration_count, self.description
|
|
103
111
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import urllib.parse
|
|
1
2
|
import warnings
|
|
2
3
|
from abc import ABC
|
|
3
|
-
from collections.abc import Callable, Iterable
|
|
4
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
from typing import cast
|
|
6
7
|
|
|
@@ -9,6 +10,7 @@ from rdflib.query import ResultRow
|
|
|
9
10
|
|
|
10
11
|
from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
|
|
11
12
|
from cognite.neat._graph import extractors
|
|
13
|
+
from cognite.neat._issues.errors import NeatValueError
|
|
12
14
|
from cognite.neat._issues.warnings import ResourceNotFoundWarning
|
|
13
15
|
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
14
16
|
from cognite.neat._utils.rdf_ import (
|
|
@@ -229,7 +231,6 @@ class AssetEventConnector(BaseAssetConnector):
|
|
|
229
231
|
)
|
|
230
232
|
|
|
231
233
|
|
|
232
|
-
# TODO: standardise
|
|
233
234
|
class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
234
235
|
description: str = "Connects assets via relationships"
|
|
235
236
|
_use_only_once: bool = True
|
|
@@ -465,7 +466,7 @@ WHERE {{
|
|
|
465
466
|
ResourceNotFoundWarning(target_source_id, "class", str(relationship_id), "class"), stacklevel=2
|
|
466
467
|
)
|
|
467
468
|
return []
|
|
468
|
-
edge_id = str(object_by_predicates["externalId"])
|
|
469
|
+
edge_id = urllib.parse.quote(str(object_by_predicates["externalId"]))
|
|
469
470
|
# If there is properties on the relationship, we create a new intermediate node
|
|
470
471
|
edge_type = self._namespace[f"{source_type}To{target_type}Edge"]
|
|
471
472
|
return self._create_edge(
|
|
@@ -516,3 +517,89 @@ WHERE {{
|
|
|
516
517
|
|
|
517
518
|
def _predicate(self, target_type: str) -> URIRef:
|
|
518
519
|
return self._namespace[f"relationship{target_type.capitalize()}"]
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class LookupRelationshipSourceTarget(BaseTransformerStandardised):
|
|
523
|
+
"""When relationships are extracted, the source and target are extracted as literals. This transformers
|
|
524
|
+
lookup the externalID of the source and target and replaces the literals with the URIRef of the entity.
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
description = "Lookup relationships source and target externalId"
|
|
528
|
+
_use_only_once: bool = True
|
|
529
|
+
_need_changes = frozenset({extractors.RelationshipsExtractor.__name__})
|
|
530
|
+
|
|
531
|
+
_lookup_entity_query = """SELECT ?entity
|
|
532
|
+
WHERE {{
|
|
533
|
+
?entity a <{entity_type}> .
|
|
534
|
+
?entity <{namespace}externalId> "{external_id}" .
|
|
535
|
+
}}"""
|
|
536
|
+
|
|
537
|
+
def __init__(self, namespace: Namespace = CLASSIC_CDF_NAMESPACE, type_prefix: str | None = None) -> None:
|
|
538
|
+
self._namespace = namespace
|
|
539
|
+
self._type_prefix = type_prefix
|
|
540
|
+
self._lookup_entity: Callable[[URIRef, str], URIRef] | None = None
|
|
541
|
+
|
|
542
|
+
def _count_query(self) -> str:
|
|
543
|
+
return f"""SELECT (COUNT(?instance) AS ?instanceCount)
|
|
544
|
+
WHERE {{
|
|
545
|
+
?instance a <{self._namespace}ClassicRelationship> .
|
|
546
|
+
}}"""
|
|
547
|
+
|
|
548
|
+
def _iterate_query(self) -> str:
|
|
549
|
+
return f"""SELECT ?instance ?source ?sourceType ?target ?targetType
|
|
550
|
+
WHERE {{
|
|
551
|
+
?instance a <{self._namespace}ClassicRelationship> .
|
|
552
|
+
?instance <{self._namespace}sourceExternalId> ?source .
|
|
553
|
+
?instance <{self._namespace}targetExternalId> ?target .
|
|
554
|
+
?instance <{self._namespace}sourceType> ?sourceType .
|
|
555
|
+
?instance <{self._namespace}targetType> ?targetType
|
|
556
|
+
}}"""
|
|
557
|
+
|
|
558
|
+
def _iterator(self, graph: Graph) -> Iterator:
|
|
559
|
+
self._lookup_entity = self.create_lookup_entity_with_external_id(graph, self._namespace, self._type_prefix)
|
|
560
|
+
yield from graph.query(self._iterate_query())
|
|
561
|
+
|
|
562
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
563
|
+
output = RowTransformationOutput()
|
|
564
|
+
instance, source, source_type, target, target_type = cast(
|
|
565
|
+
tuple[URIRef, Literal, URIRef, Literal, URIRef], query_result_row
|
|
566
|
+
)
|
|
567
|
+
if self._lookup_entity is None:
|
|
568
|
+
raise NeatValueError(f"{type(self)}: .operation() called before .transform()")
|
|
569
|
+
try:
|
|
570
|
+
source_id = self._lookup_entity(source_type, source.toPython())
|
|
571
|
+
except ValueError:
|
|
572
|
+
warnings.warn(ResourceNotFoundWarning(source, "class", str(instance), "class"), stacklevel=2)
|
|
573
|
+
return output
|
|
574
|
+
|
|
575
|
+
try:
|
|
576
|
+
target_id = self._lookup_entity(target_type, target.toPython())
|
|
577
|
+
except ValueError:
|
|
578
|
+
warnings.warn(ResourceNotFoundWarning(target, "class", str(instance), "class"), stacklevel=2)
|
|
579
|
+
return output
|
|
580
|
+
|
|
581
|
+
output.remove_triples.append((instance, self._namespace.sourceExternalId, source))
|
|
582
|
+
output.remove_triples.append((instance, self._namespace.targetExternalId, target))
|
|
583
|
+
output.add_triples.append((instance, self._namespace.sourceExternalId, source_id))
|
|
584
|
+
output.add_triples.append((instance, self._namespace.targetExternalId, target_id))
|
|
585
|
+
output.instances_modified_count += 1
|
|
586
|
+
return output
|
|
587
|
+
|
|
588
|
+
@staticmethod
|
|
589
|
+
def create_lookup_entity_with_external_id(
|
|
590
|
+
graph: Graph, namespace: Namespace, type_prefix: str | None
|
|
591
|
+
) -> Callable[[URIRef, str], URIRef]:
|
|
592
|
+
@lru_cache(maxsize=10_000)
|
|
593
|
+
def lookup_entity_with_external_id(entity_type: URIRef, external_id: str) -> URIRef:
|
|
594
|
+
if type_prefix:
|
|
595
|
+
entity_type = namespace[type_prefix + remove_namespace_from_uri(entity_type)]
|
|
596
|
+
|
|
597
|
+
query = LookupRelationshipSourceTarget._lookup_entity_query.format(
|
|
598
|
+
namespace=namespace, entity_type=entity_type, external_id=external_id
|
|
599
|
+
)
|
|
600
|
+
result = list(graph.query(query))
|
|
601
|
+
if len(result) == 1:
|
|
602
|
+
return cast(URIRef, result[0][0]) # type: ignore[index]
|
|
603
|
+
raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
|
|
604
|
+
|
|
605
|
+
return lookup_entity_with_external_id
|
|
@@ -35,8 +35,8 @@ class AddSelfReferenceProperty(BaseTransformer):
|
|
|
35
35
|
|
|
36
36
|
def transform(self, graph: Graph) -> None:
|
|
37
37
|
for property_ in self.properties:
|
|
38
|
-
prefix = property_.
|
|
39
|
-
suffix = property_.
|
|
38
|
+
prefix = property_.instance_source.traversal.class_.prefix
|
|
39
|
+
suffix = property_.instance_source.traversal.class_.suffix
|
|
40
40
|
|
|
41
41
|
namespace = self.rules.prefixes[prefix] if prefix in self.rules.prefixes else self.rules.metadata.namespace
|
|
42
42
|
|
|
@@ -54,7 +54,7 @@ class AddSelfReferenceProperty(BaseTransformer):
|
|
|
54
54
|
property_=f"{self.rules.metadata.prefix}:{property_.property_}",
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
property_.
|
|
57
|
+
property_.instance_source = RDFPath(traversal=traversal)
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
class MakeConnectionOnExactMatch(BaseTransformerStandardised):
|
|
@@ -1,23 +1,21 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from collections.abc import Callable
|
|
2
|
+
from collections.abc import Callable, Iterator
|
|
3
3
|
from typing import Any, cast
|
|
4
4
|
from urllib.parse import quote
|
|
5
5
|
|
|
6
6
|
import rdflib
|
|
7
|
-
from rdflib import RDF,
|
|
7
|
+
from rdflib import RDF, Namespace, URIRef
|
|
8
8
|
from rdflib.query import ResultRow
|
|
9
9
|
|
|
10
10
|
from cognite.neat._constants import UNKNOWN_TYPE
|
|
11
|
-
from cognite.neat._graph.queries import Queries
|
|
12
11
|
from cognite.neat._issues.warnings import PropertyDataTypeConversionWarning
|
|
13
12
|
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
14
|
-
from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
|
|
13
|
+
from cognite.neat._utils.rdf_ import Triple, get_namespace, remove_namespace_from_uri
|
|
15
14
|
|
|
16
|
-
from ._base import
|
|
15
|
+
from ._base import BaseTransformerStandardised, RowTransformationOutput
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
class SplitMultiValueProperty(BaseTransformer):
|
|
18
|
+
class SplitMultiValueProperty(BaseTransformerStandardised):
|
|
21
19
|
description: str = (
|
|
22
20
|
"SplitMultiValueProperty is a transformer that splits a "
|
|
23
21
|
"multi-value property into multiple single-value properties."
|
|
@@ -25,55 +23,67 @@ class SplitMultiValueProperty(BaseTransformer):
|
|
|
25
23
|
_use_only_once: bool = True
|
|
26
24
|
_need_changes = frozenset({})
|
|
27
25
|
|
|
28
|
-
|
|
26
|
+
def __init__(self, unknown_type: URIRef | None = None) -> None:
|
|
27
|
+
self.unknown_type = unknown_type or UNKNOWN_TYPE
|
|
29
28
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
def _iterate_query(self) -> str:
|
|
30
|
+
query = """SELECT ?subjectType ?property
|
|
31
|
+
(GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
|
|
32
|
+
|
|
33
|
+
WHERE {{
|
|
34
|
+
?s ?property ?o .
|
|
35
|
+
?s a ?subjectType .
|
|
36
|
+
OPTIONAL {{ ?o a ?type }}
|
|
37
|
+
|
|
38
|
+
# Key part to determine value type: either object, data or unknown
|
|
39
|
+
BIND( IF(isLiteral(?o),DATATYPE(?o),
|
|
40
|
+
IF(BOUND(?type), ?type,
|
|
41
|
+
<{unknownType}>)) AS ?valueType)
|
|
42
|
+
}}
|
|
43
|
+
|
|
44
|
+
GROUP BY ?subjectType ?property
|
|
45
|
+
HAVING (COUNT(DISTINCT ?valueType) > 1)"""
|
|
46
|
+
|
|
47
|
+
return query.format(unknownType=self.unknown_type)
|
|
48
|
+
|
|
49
|
+
def _count_query(self) -> str:
|
|
50
|
+
query = """SELECT (COUNT(*) AS ?tripleCount)
|
|
51
|
+
WHERE {?s ?p ?o .}"""
|
|
52
|
+
return query
|
|
53
|
+
|
|
54
|
+
def _sub_iterate_query(self, type_: URIRef, property_: URIRef) -> str:
|
|
55
|
+
query = """ SELECT ?s ?p ?o ?valueType WHERE {{
|
|
56
|
+
?s a <{subject_uri}> .
|
|
57
|
+
?s <{property_uri}> ?o .
|
|
33
58
|
|
|
34
|
-
|
|
59
|
+
OPTIONAL {{ ?o a ?type }}
|
|
35
60
|
|
|
36
|
-
|
|
61
|
+
BIND(<{property_uri}> AS ?p)
|
|
37
62
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
63
|
+
BIND(IF(isLiteral(?o), DATATYPE(?o),
|
|
64
|
+
IF(BOUND(?type),?type,
|
|
65
|
+
<{unknownType}>)) AS ?valueType)
|
|
41
66
|
|
|
42
|
-
|
|
67
|
+
}} """
|
|
43
68
|
|
|
44
|
-
|
|
69
|
+
return query.format(unknownType=self.unknown_type, subject_uri=type_, property_uri=property_)
|
|
45
70
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}}"""
|
|
71
|
+
def _iterator(self, graph) -> Iterator:
|
|
72
|
+
for type_, property_, _ in graph.query(self._iterate_query()):
|
|
73
|
+
yield from graph.query(self._sub_iterate_query(type_, property_))
|
|
50
74
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
for value_type_uri in value_types:
|
|
55
|
-
_args = {
|
|
56
|
-
"subject_uri": subject_uri,
|
|
57
|
-
"property_uri": property_uri,
|
|
58
|
-
"object_uri": value_type_uri,
|
|
59
|
-
}
|
|
75
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
76
|
+
row_output = RowTransformationOutput()
|
|
77
|
+
subject, old_property, object, value_type = query_result_row
|
|
60
78
|
|
|
61
|
-
|
|
62
|
-
if value_type_uri == UNKNOWN_TYPE:
|
|
63
|
-
iterator = graph.query(self._unknown_property_template.format(**_args))
|
|
79
|
+
new_property = URIRef(f"{old_property}_{remove_namespace_from_uri(value_type)}")
|
|
64
80
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
iterator = graph.query(self._datatype_property_template.format(**_args))
|
|
81
|
+
row_output.add_triples.append(cast(Triple, (subject, new_property, object)))
|
|
82
|
+
row_output.remove_triples.append(cast(Triple, (subject, old_property, object)))
|
|
68
83
|
|
|
69
|
-
|
|
70
|
-
else:
|
|
71
|
-
iterator = graph.query(self._object_property_template.format(**_args))
|
|
84
|
+
row_output.instances_modified_count += 1
|
|
72
85
|
|
|
73
|
-
|
|
74
|
-
graph.remove((s, property_uri, o))
|
|
75
|
-
new_property = URIRef(f"{property_uri}_{remove_namespace_from_uri(value_type_uri)}")
|
|
76
|
-
graph.add((s, new_property, o))
|
|
86
|
+
return row_output
|
|
77
87
|
|
|
78
88
|
|
|
79
89
|
class ConvertLiteral(BaseTransformerStandardised):
|
|
@@ -251,7 +251,7 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
|
|
|
251
251
|
if (
|
|
252
252
|
only_rdfpath
|
|
253
253
|
and isinstance(property_, InformationProperty)
|
|
254
|
-
and isinstance(property_.
|
|
254
|
+
and isinstance(property_.instance_source, RDFPath)
|
|
255
255
|
) or not only_rdfpath:
|
|
256
256
|
processed_properties[prop_entity] = property_
|
|
257
257
|
class_property_pairs[class_] = processed_properties
|