cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
- cognite/neat/_graph/extractors/_dms.py +196 -47
- cognite/neat/_graph/extractors/_dms_graph.py +199 -0
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +33 -5
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +2 -2
- cognite/neat/_graph/transformers/__init__.py +8 -1
- cognite/neat/_graph/transformers/_base.py +9 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -3
- cognite/neat/_graph/transformers/_value_type.py +106 -45
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
- cognite/neat/_rules/models/_base_rules.py +13 -15
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +51 -10
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +6 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/transformers/__init__.py +10 -0
- cognite/neat/_rules/transformers/_converters.py +300 -62
- cognite/neat/_session/_base.py +57 -10
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_mapping.py +17 -6
- cognite/neat/_session/_prepare.py +0 -47
- cognite/neat/_session/_read.py +115 -10
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +12 -1
- cognite/neat/_session/_to.py +43 -2
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -3
- cognite/neat/_store/_graph_store.py +331 -136
- cognite/neat/_store/_rules_store.py +130 -1
- cognite/neat/_utils/auth.py +3 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,16 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
import warnings
|
|
1
3
|
from collections import defaultdict
|
|
2
4
|
from collections.abc import Callable, Iterable, Set
|
|
3
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
4
7
|
|
|
5
8
|
from cognite.client import CogniteClient
|
|
6
9
|
from cognite.client.data_classes import Relationship, RelationshipList
|
|
7
|
-
from rdflib import Namespace
|
|
10
|
+
from rdflib import Namespace, URIRef
|
|
8
11
|
|
|
12
|
+
from cognite.neat._issues.warnings import NeatValueWarning
|
|
13
|
+
from cognite.neat._shared import Triple
|
|
9
14
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
10
15
|
|
|
11
16
|
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
|
|
@@ -28,9 +33,18 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
|
28
33
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
29
34
|
camel_case: bool = True,
|
|
30
35
|
as_write: bool = False,
|
|
36
|
+
prefix: str | None = None,
|
|
37
|
+
identifier: typing.Literal["id", "externalId"] = "id",
|
|
31
38
|
):
|
|
39
|
+
# This is used by the ClassicExtractor to log the target nodes, such
|
|
40
|
+
# that it can extract them.
|
|
41
|
+
# It is private to avoid exposing it to the user.
|
|
42
|
+
self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
43
|
+
self._log_target_nodes = False
|
|
44
|
+
# Ensure that this becomes an iterator, even if it is a list.
|
|
45
|
+
to_iterate = (self._log_target_nodes_if_set(item) for item in items)
|
|
32
46
|
super().__init__(
|
|
33
|
-
|
|
47
|
+
to_iterate,
|
|
34
48
|
namespace=namespace,
|
|
35
49
|
to_type=to_type,
|
|
36
50
|
total=total,
|
|
@@ -39,12 +53,40 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
|
39
53
|
skip_metadata_values=skip_metadata_values,
|
|
40
54
|
camel_case=camel_case,
|
|
41
55
|
as_write=as_write,
|
|
56
|
+
prefix=prefix,
|
|
57
|
+
identifier=identifier,
|
|
42
58
|
)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
self.
|
|
59
|
+
self._uri_by_external_id_by_by_type: dict[InstanceIdPrefix, dict[str, URIRef]] = defaultdict(dict)
|
|
60
|
+
self._target_triples: list[tuple[URIRef, URIRef, str, str]] = []
|
|
61
|
+
|
|
62
|
+
def _log_target_nodes_if_set(self, item: Relationship) -> Relationship:
|
|
63
|
+
if not self._log_target_nodes:
|
|
64
|
+
return item
|
|
65
|
+
if item.target_type and item.target_external_id:
|
|
66
|
+
self._target_external_ids_by_type[InstanceIdPrefix.from_str(item.target_type)].add(item.target_external_id)
|
|
67
|
+
return item
|
|
68
|
+
|
|
69
|
+
def _item2triples_special_cases(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
|
|
70
|
+
if self.identifier == "externalId":
|
|
71
|
+
return []
|
|
72
|
+
triples: list[Triple] = []
|
|
73
|
+
if (source_external_id := dumped.pop("sourceExternalId")) and "sourceType" in dumped:
|
|
74
|
+
source_type = dumped["sourceType"]
|
|
75
|
+
try:
|
|
76
|
+
source_uri = self._uri_by_external_id_by_by_type[InstanceIdPrefix.from_str(source_type)][
|
|
77
|
+
source_external_id
|
|
78
|
+
]
|
|
79
|
+
except KeyError:
|
|
80
|
+
warnings.warn(
|
|
81
|
+
NeatValueWarning(f"Missing externalId {source_external_id} for {source_type}"), stacklevel=2
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
triples.append((id_, self.namespace["sourceExternalId"], source_uri))
|
|
85
|
+
if (target_external_id := dumped.pop("targetExternalId")) and "targetType" in dumped:
|
|
86
|
+
target_type = dumped["targetType"]
|
|
87
|
+
# We do not yet have the target nodes, so we log them for later extraction.
|
|
88
|
+
self._target_triples.append((id_, self.namespace["targetExternalId"], target_type, target_external_id))
|
|
89
|
+
return triples
|
|
48
90
|
|
|
49
91
|
@classmethod
|
|
50
92
|
def _from_dataset(
|
|
@@ -67,10 +109,6 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
|
|
|
67
109
|
return len(relationships), relationships
|
|
68
110
|
|
|
69
111
|
def _fallback_id(self, item: Relationship) -> str | None:
|
|
70
|
-
if item.external_id
|
|
71
|
-
if self._log_target_nodes and item.target_type and item.target_external_id:
|
|
72
|
-
self._target_external_ids_by_type[InstanceIdPrefix.from_str(item.target_type)].add(
|
|
73
|
-
item.target_external_id
|
|
74
|
-
)
|
|
112
|
+
if item.external_id:
|
|
75
113
|
return create_sha256_hash(item.external_id)
|
|
76
114
|
return None
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import json
|
|
3
|
+
import typing
|
|
3
4
|
from collections.abc import Callable, Iterable, Set
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
@@ -52,10 +53,22 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
52
53
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
53
54
|
camel_case: bool = True,
|
|
54
55
|
as_write: bool = False,
|
|
56
|
+
prefix: str | None = None,
|
|
57
|
+
identifier: typing.Literal["id", "externalId"] = "id",
|
|
55
58
|
unpack_columns: bool = False,
|
|
56
59
|
):
|
|
57
60
|
super().__init__(
|
|
58
|
-
items,
|
|
61
|
+
items,
|
|
62
|
+
namespace,
|
|
63
|
+
to_type,
|
|
64
|
+
total,
|
|
65
|
+
limit,
|
|
66
|
+
unpack_metadata,
|
|
67
|
+
skip_metadata_values,
|
|
68
|
+
camel_case,
|
|
69
|
+
as_write,
|
|
70
|
+
prefix,
|
|
71
|
+
identifier,
|
|
59
72
|
)
|
|
60
73
|
self.unpack_columns = unpack_columns
|
|
61
74
|
|
|
@@ -71,6 +84,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
71
84
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
72
85
|
camel_case: bool = True,
|
|
73
86
|
as_write: bool = False,
|
|
87
|
+
prefix: str | None = None,
|
|
88
|
+
identifier: typing.Literal["id", "externalId"] = "id",
|
|
74
89
|
unpack_columns: bool = False,
|
|
75
90
|
):
|
|
76
91
|
total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
|
|
@@ -84,6 +99,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
84
99
|
skip_metadata_values,
|
|
85
100
|
camel_case,
|
|
86
101
|
as_write,
|
|
102
|
+
prefix,
|
|
103
|
+
identifier,
|
|
87
104
|
unpack_columns,
|
|
88
105
|
)
|
|
89
106
|
|
|
@@ -99,6 +116,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
99
116
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
100
117
|
camel_case: bool = True,
|
|
101
118
|
as_write: bool = False,
|
|
119
|
+
prefix: str | None = None,
|
|
120
|
+
identifier: typing.Literal["id", "externalId"] = "id",
|
|
102
121
|
unpack_columns: bool = False,
|
|
103
122
|
):
|
|
104
123
|
total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
|
|
@@ -112,6 +131,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
112
131
|
skip_metadata_values,
|
|
113
132
|
camel_case,
|
|
114
133
|
as_write,
|
|
134
|
+
prefix,
|
|
135
|
+
identifier,
|
|
115
136
|
unpack_columns,
|
|
116
137
|
)
|
|
117
138
|
|
|
@@ -126,6 +147,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
126
147
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
127
148
|
camel_case: bool = True,
|
|
128
149
|
as_write: bool = False,
|
|
150
|
+
prefix: str | None = None,
|
|
151
|
+
identifier: typing.Literal["id", "externalId"] = "id",
|
|
129
152
|
unpack_columns: bool = False,
|
|
130
153
|
):
|
|
131
154
|
total, items = cls._from_file(file_path)
|
|
@@ -139,6 +162,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
139
162
|
skip_metadata_values,
|
|
140
163
|
camel_case,
|
|
141
164
|
as_write,
|
|
165
|
+
prefix,
|
|
166
|
+
identifier,
|
|
142
167
|
unpack_columns,
|
|
143
168
|
)
|
|
144
169
|
|
|
@@ -1,44 +1,68 @@
|
|
|
1
|
-
|
|
1
|
+
import urllib.parse
|
|
2
|
+
from collections.abc import Iterable, Iterator, Set
|
|
3
|
+
from functools import cached_property
|
|
2
4
|
from typing import cast
|
|
3
5
|
|
|
4
6
|
from cognite.client import CogniteClient
|
|
5
7
|
from cognite.client import data_modeling as dm
|
|
6
8
|
from cognite.client.data_classes.data_modeling import DataModelIdentifier
|
|
7
9
|
from cognite.client.data_classes.data_modeling.instances import Instance, PropertyValue
|
|
10
|
+
from cognite.client.utils.useful_types import SequenceNotStr
|
|
8
11
|
from rdflib import RDF, XSD, Literal, Namespace, URIRef
|
|
9
12
|
|
|
10
|
-
from cognite.neat.
|
|
13
|
+
from cognite.neat._config import GLOBAL_CONFIG
|
|
14
|
+
from cognite.neat._constants import DEFAULT_SPACE_URI, is_readonly_property
|
|
11
15
|
from cognite.neat._issues.errors import ResourceRetrievalError
|
|
12
16
|
from cognite.neat._shared import Triple
|
|
17
|
+
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
18
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
13
19
|
|
|
14
20
|
from ._base import BaseExtractor
|
|
15
21
|
|
|
22
|
+
DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
|
|
23
|
+
|
|
16
24
|
|
|
17
25
|
class DMSExtractor(BaseExtractor):
|
|
18
26
|
"""Extract data from Cognite Data Fusion DMS instances into Neat.
|
|
19
27
|
|
|
20
28
|
Args:
|
|
21
|
-
|
|
22
|
-
|
|
29
|
+
total_instances_pair_by_view: A dictionary where the key is the view id and the value is a tuple with the total
|
|
30
|
+
number of instances and an iterable of instances.
|
|
23
31
|
limit: The maximum number of items to extract.
|
|
24
32
|
overwrite_namespace: If provided, this will overwrite the space of the extracted items.
|
|
33
|
+
unpack_json: If True, JSON objects will be unpacked into RDF literals.
|
|
34
|
+
empty_values: If unpack_json is True, when unpacking JSON objects, if a key has a value in this set, it will be
|
|
35
|
+
considered as an empty value and skipped.
|
|
36
|
+
str_to_ideal_type: If unpack_json is True, when unpacking JSON objects, if the value is a string, the extractor
|
|
37
|
+
will try to convert it to the ideal type.
|
|
25
38
|
"""
|
|
26
39
|
|
|
27
40
|
def __init__(
|
|
28
41
|
self,
|
|
29
|
-
|
|
30
|
-
total: int | None = None,
|
|
42
|
+
total_instances_pair_by_view: dict[dm.ViewId, tuple[int | None, Iterable[Instance]]],
|
|
31
43
|
limit: int | None = None,
|
|
32
44
|
overwrite_namespace: Namespace | None = None,
|
|
45
|
+
unpack_json: bool = False,
|
|
46
|
+
empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
|
|
47
|
+
str_to_ideal_type: bool = False,
|
|
33
48
|
) -> None:
|
|
34
|
-
self.
|
|
35
|
-
self.total = total
|
|
49
|
+
self.total_instances_pair_by_view = total_instances_pair_by_view
|
|
36
50
|
self.limit = limit
|
|
37
51
|
self.overwrite_namespace = overwrite_namespace
|
|
52
|
+
self.unpack_json = unpack_json
|
|
53
|
+
self.empty_values = empty_values
|
|
54
|
+
self.str_to_ideal_type = str_to_ideal_type
|
|
38
55
|
|
|
39
56
|
@classmethod
|
|
40
57
|
def from_data_model(
|
|
41
|
-
cls,
|
|
58
|
+
cls,
|
|
59
|
+
client: CogniteClient,
|
|
60
|
+
data_model: DataModelIdentifier,
|
|
61
|
+
limit: int | None = None,
|
|
62
|
+
overwrite_namespace: Namespace | None = None,
|
|
63
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
64
|
+
unpack_json: bool = False,
|
|
65
|
+
str_to_ideal_type: bool = False,
|
|
42
66
|
) -> "DMSExtractor":
|
|
43
67
|
"""Create an extractor from a data model.
|
|
44
68
|
|
|
@@ -46,28 +70,79 @@ class DMSExtractor(BaseExtractor):
|
|
|
46
70
|
client: The Cognite client to use.
|
|
47
71
|
data_model: The data model to extract.
|
|
48
72
|
limit: The maximum number of instances to extract.
|
|
73
|
+
overwrite_namespace: If provided, this will overwrite the space of the extracted items.
|
|
74
|
+
instance_space: The space to extract instances from.
|
|
75
|
+
unpack_json: If True, JSON objects will be unpacked into RDF literals.
|
|
49
76
|
"""
|
|
50
77
|
retrieved = client.data_modeling.data_models.retrieve(data_model, inline_views=True)
|
|
51
78
|
if not retrieved:
|
|
52
79
|
raise ResourceRetrievalError(dm.DataModelId.load(data_model), "data model", "Data Model is missing in CDF")
|
|
53
|
-
return cls.from_views(
|
|
80
|
+
return cls.from_views(
|
|
81
|
+
client,
|
|
82
|
+
retrieved.latest_version().views,
|
|
83
|
+
limit,
|
|
84
|
+
overwrite_namespace,
|
|
85
|
+
instance_space,
|
|
86
|
+
unpack_json,
|
|
87
|
+
str_to_ideal_type,
|
|
88
|
+
)
|
|
54
89
|
|
|
55
90
|
@classmethod
|
|
56
|
-
def from_views(
|
|
91
|
+
def from_views(
|
|
92
|
+
cls,
|
|
93
|
+
client: CogniteClient,
|
|
94
|
+
views: Iterable[dm.View],
|
|
95
|
+
limit: int | None = None,
|
|
96
|
+
overwrite_namespace: Namespace | None = None,
|
|
97
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
98
|
+
unpack_json: bool = False,
|
|
99
|
+
str_to_ideal_type: bool = False,
|
|
100
|
+
) -> "DMSExtractor":
|
|
57
101
|
"""Create an extractor from a set of views.
|
|
58
102
|
|
|
59
103
|
Args:
|
|
60
104
|
client: The Cognite client to use.
|
|
61
105
|
views: The views to extract.
|
|
62
106
|
limit: The maximum number of instances to extract.
|
|
107
|
+
overwrite_namespace: If provided, this will overwrite the space of the extracted items.
|
|
108
|
+
instance_space: The space to extract instances from.
|
|
109
|
+
unpack_json: If True, JSON objects will be unpacked into RDF literals.
|
|
110
|
+
str_to_ideal_type: If True, when unpacking JSON objects, if the value is a string, the extractor will try to
|
|
111
|
+
convert it to the ideal type.
|
|
63
112
|
"""
|
|
64
|
-
|
|
113
|
+
total_instances_pair_by_view: dict[dm.ViewId, tuple[int | None, Iterable[Instance]]] = {}
|
|
114
|
+
for view in views:
|
|
115
|
+
instance_iterator = _ViewInstanceIterator(client, view, instance_space)
|
|
116
|
+
total_instances_pair_by_view[view.as_id()] = (instance_iterator.count, instance_iterator)
|
|
117
|
+
|
|
118
|
+
return cls(
|
|
119
|
+
total_instances_pair_by_view=total_instances_pair_by_view,
|
|
120
|
+
limit=limit,
|
|
121
|
+
overwrite_namespace=overwrite_namespace,
|
|
122
|
+
unpack_json=unpack_json,
|
|
123
|
+
str_to_ideal_type=str_to_ideal_type,
|
|
124
|
+
)
|
|
65
125
|
|
|
66
126
|
def extract(self) -> Iterable[Triple]:
|
|
67
|
-
for
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
127
|
+
total_instances = sum(total for total, _ in self.total_instances_pair_by_view.values() if total is not None)
|
|
128
|
+
use_progress_bar = (
|
|
129
|
+
GLOBAL_CONFIG.use_iterate_bar_threshold and total_instances > GLOBAL_CONFIG.use_iterate_bar_threshold
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
for view_id, (total, instances) in self.total_instances_pair_by_view.items():
|
|
133
|
+
if total == 0:
|
|
134
|
+
continue
|
|
135
|
+
if use_progress_bar and total is not None:
|
|
136
|
+
instances = iterate_progress_bar(
|
|
137
|
+
instances,
|
|
138
|
+
total,
|
|
139
|
+
f"Extracting instances from {view_id.space}:{view_id.external_id}(version={view_id.version})",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
for count, item in enumerate(instances, 1):
|
|
143
|
+
if self.limit and count > self.limit:
|
|
144
|
+
break
|
|
145
|
+
yield from self._extract_instance(item)
|
|
71
146
|
|
|
72
147
|
def _extract_instance(self, instance: Instance) -> Iterable[Triple]:
|
|
73
148
|
if isinstance(instance, dm.Edge):
|
|
@@ -82,7 +157,6 @@ class DMSExtractor(BaseExtractor):
|
|
|
82
157
|
# If the edge has properties, we create a node for the edge and connect it to the start and end nodes.
|
|
83
158
|
id_ = self._as_uri_ref(instance)
|
|
84
159
|
yield id_, RDF.type, self._as_uri_ref(instance.type)
|
|
85
|
-
yield id_, RDF.type, self._get_namespace(instance.space).Edge
|
|
86
160
|
yield (
|
|
87
161
|
id_,
|
|
88
162
|
self._as_uri_ref(dm.DirectRelationReference(instance.space, "startNode")),
|
|
@@ -98,6 +172,9 @@ class DMSExtractor(BaseExtractor):
|
|
|
98
172
|
id_ = self._as_uri_ref(instance)
|
|
99
173
|
if instance.type:
|
|
100
174
|
type_ = self._as_uri_ref(cast(dm.DirectRelationReference, instance.type))
|
|
175
|
+
elif len(instance.properties) == 1:
|
|
176
|
+
view_id = next(iter(instance.properties.keys()))
|
|
177
|
+
type_ = self._get_namespace(view_id.space)[urllib.parse.quote(view_id.external_id)]
|
|
101
178
|
else:
|
|
102
179
|
type_ = self._get_namespace(instance.space).Node
|
|
103
180
|
|
|
@@ -105,54 +182,126 @@ class DMSExtractor(BaseExtractor):
|
|
|
105
182
|
else:
|
|
106
183
|
raise NotImplementedError(f"Unknown instance type {type(instance)}")
|
|
107
184
|
|
|
185
|
+
if self.overwrite_namespace:
|
|
186
|
+
# If the namespace is overwritten, keep the original space as a property to avoid losing information.
|
|
187
|
+
yield id_, self._get_namespace(instance.space)["space"], Literal(instance.space)
|
|
188
|
+
|
|
108
189
|
for view_id, properties in instance.properties.items():
|
|
109
190
|
namespace = self._get_namespace(view_id.space)
|
|
110
191
|
for key, value in properties.items():
|
|
111
|
-
for object_ in self.
|
|
112
|
-
yield id_, namespace[
|
|
192
|
+
for predicate_str, object_ in self._get_predicate_objects_pair(key, value):
|
|
193
|
+
yield id_, namespace[urllib.parse.quote(predicate_str)], object_
|
|
113
194
|
|
|
114
|
-
def
|
|
195
|
+
def _get_predicate_objects_pair(self, key: str, value: PropertyValue) -> Iterable[tuple[str, Literal | URIRef]]:
|
|
115
196
|
if isinstance(value, str | float | bool | int):
|
|
116
|
-
yield Literal(value)
|
|
197
|
+
yield key, Literal(value)
|
|
117
198
|
elif isinstance(value, dict) and "space" in value and "externalId" in value:
|
|
118
|
-
yield self._as_uri_ref(dm.DirectRelationReference.load(value))
|
|
199
|
+
yield key, self._as_uri_ref(dm.DirectRelationReference.load(value))
|
|
200
|
+
elif isinstance(value, dict) and self.unpack_json:
|
|
201
|
+
for sub_key, sub_value in value.items():
|
|
202
|
+
if isinstance(sub_value, str):
|
|
203
|
+
if sub_value.casefold() in self.empty_values:
|
|
204
|
+
continue
|
|
205
|
+
if self.str_to_ideal_type:
|
|
206
|
+
yield sub_key, Literal(string_to_ideal_type(sub_value))
|
|
207
|
+
else:
|
|
208
|
+
yield sub_key, Literal(sub_value)
|
|
209
|
+
elif isinstance(sub_value, int | float | bool):
|
|
210
|
+
yield sub_key, Literal(sub_value)
|
|
211
|
+
elif isinstance(sub_value, dict):
|
|
212
|
+
yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", sub_value)
|
|
213
|
+
elif isinstance(sub_value, list):
|
|
214
|
+
for item in sub_value:
|
|
215
|
+
yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", item)
|
|
216
|
+
else:
|
|
217
|
+
yield sub_key, Literal(str(sub_value))
|
|
119
218
|
elif isinstance(value, dict):
|
|
120
219
|
# This object is a json object.
|
|
121
|
-
yield Literal(str(value), datatype=XSD._NS["json"])
|
|
220
|
+
yield key, Literal(str(value), datatype=XSD._NS["json"])
|
|
122
221
|
elif isinstance(value, list):
|
|
123
222
|
for item in value:
|
|
124
|
-
yield from self.
|
|
223
|
+
yield from self._get_predicate_objects_pair(key, item)
|
|
125
224
|
|
|
126
225
|
def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
|
|
127
|
-
return self._get_namespace(instance.space)[instance.external_id]
|
|
226
|
+
return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
|
|
128
227
|
|
|
129
228
|
def _get_namespace(self, space: str) -> Namespace:
|
|
130
229
|
if self.overwrite_namespace:
|
|
131
230
|
return self.overwrite_namespace
|
|
132
|
-
return Namespace(DEFAULT_SPACE_URI.format(space=space))
|
|
231
|
+
return Namespace(DEFAULT_SPACE_URI.format(space=urllib.parse.quote(space)))
|
|
133
232
|
|
|
134
233
|
|
|
135
|
-
class
|
|
136
|
-
def __init__(self, client: CogniteClient,
|
|
234
|
+
class _ViewInstanceIterator(Iterable[Instance]):
|
|
235
|
+
def __init__(self, client: CogniteClient, view: dm.View, instance_space: str | SequenceNotStr[str] | None = None):
|
|
137
236
|
self.client = client
|
|
138
|
-
self.
|
|
237
|
+
self.view = view
|
|
238
|
+
self.instance_space = instance_space
|
|
239
|
+
|
|
240
|
+
@cached_property
|
|
241
|
+
def count(self) -> int:
|
|
242
|
+
node_count = edge_count = 0
|
|
243
|
+
if self.view.used_for in ("node", "all"):
|
|
244
|
+
node_count = int(
|
|
245
|
+
self.client.data_modeling.instances.aggregate(
|
|
246
|
+
view=self.view.as_id(),
|
|
247
|
+
aggregates=dm.aggregations.Count("externalId"),
|
|
248
|
+
instance_type="node",
|
|
249
|
+
space=self.instance_space,
|
|
250
|
+
).value
|
|
251
|
+
)
|
|
252
|
+
if self.view.used_for in ("edge", "all"):
|
|
253
|
+
edge_count = int(
|
|
254
|
+
self.client.data_modeling.instances.aggregate(
|
|
255
|
+
view=self.view.as_id(),
|
|
256
|
+
aggregates=dm.aggregations.Count("externalId"),
|
|
257
|
+
instance_type="edge",
|
|
258
|
+
space=self.instance_space,
|
|
259
|
+
).value
|
|
260
|
+
)
|
|
261
|
+
return node_count + edge_count
|
|
139
262
|
|
|
140
263
|
def __iter__(self) -> Iterator[Instance]:
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
264
|
+
view_id = self.view.as_id()
|
|
265
|
+
read_only_properties = {
|
|
266
|
+
prop_id
|
|
267
|
+
for prop_id, prop in self.view.properties.items()
|
|
268
|
+
if isinstance(prop, dm.MappedProperty)
|
|
269
|
+
and is_readonly_property(prop.container, prop.container_property_identifier)
|
|
270
|
+
}
|
|
271
|
+
# All nodes and edges with properties
|
|
272
|
+
if self.view.used_for in ("node", "all"):
|
|
273
|
+
node_iterable: Iterable[Instance] = self.client.data_modeling.instances(
|
|
274
|
+
chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
|
|
275
|
+
)
|
|
276
|
+
if read_only_properties:
|
|
277
|
+
node_iterable = self._remove_read_only_properties(node_iterable, read_only_properties, view_id)
|
|
278
|
+
yield from node_iterable
|
|
279
|
+
|
|
280
|
+
if self.view.used_for in ("edge", "all"):
|
|
281
|
+
yield from self.client.data_modeling.instances(
|
|
282
|
+
chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
for prop in self.view.properties.values():
|
|
286
|
+
if isinstance(prop, dm.EdgeConnection):
|
|
287
|
+
if prop.edge_source:
|
|
288
|
+
# All edges with properties are extracted from the edge source
|
|
289
|
+
continue
|
|
290
|
+
yield from self.client.data_modeling.instances(
|
|
291
|
+
chunk_size=None,
|
|
292
|
+
instance_type="edge",
|
|
293
|
+
filter=dm.filters.Equals(
|
|
294
|
+
["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
|
|
295
|
+
),
|
|
296
|
+
space=self.instance_space,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
@staticmethod
|
|
300
|
+
def _remove_read_only_properties(
|
|
301
|
+
nodes: Iterable[Instance], read_only_properties: Set[str], view_id: dm.ViewId
|
|
302
|
+
) -> Iterable[Instance]:
|
|
303
|
+
for node in nodes:
|
|
304
|
+
if properties := node.properties.get(view_id):
|
|
305
|
+
for read_only in read_only_properties:
|
|
306
|
+
properties.pop(read_only, None)
|
|
307
|
+
yield node
|