cognite-neat 0.99.1__py3-none-any.whl → 0.100.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_client/_api/data_modeling_loaders.py +403 -182
- cognite/neat/_client/data_classes/data_modeling.py +4 -0
- cognite/neat/_graph/extractors/_base.py +7 -0
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -13
- cognite/neat/_graph/loaders/_rdf2dms.py +50 -11
- cognite/neat/_graph/transformers/__init__.py +3 -3
- cognite/neat/_graph/transformers/_classic_cdf.py +120 -52
- cognite/neat/_issues/warnings/__init__.py +2 -0
- cognite/neat/_issues/warnings/_resources.py +15 -0
- cognite/neat/_rules/analysis/_base.py +15 -5
- cognite/neat/_rules/analysis/_dms.py +20 -0
- cognite/neat/_rules/analysis/_information.py +22 -0
- cognite/neat/_rules/exporters/_base.py +3 -5
- cognite/neat/_rules/exporters/_rules2dms.py +192 -200
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +22 -5
- cognite/neat/_rules/models/_base_rules.py +19 -0
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_exporter.py +215 -93
- cognite/neat/_rules/models/dms/_rules.py +4 -4
- cognite/neat/_rules/models/dms/_rules_input.py +8 -3
- cognite/neat/_rules/models/dms/_validation.py +42 -11
- cognite/neat/_rules/models/entities/_multi_value.py +3 -0
- cognite/neat/_rules/models/information/_rules.py +17 -2
- cognite/neat/_rules/models/information/_rules_input.py +11 -2
- cognite/neat/_rules/models/information/_validation.py +99 -3
- cognite/neat/_rules/models/mapping/_classic2core.yaml +1 -1
- cognite/neat/_rules/transformers/__init__.py +2 -1
- cognite/neat/_rules/transformers/_converters.py +163 -61
- cognite/neat/_rules/transformers/_mapping.py +132 -2
- cognite/neat/_session/_base.py +42 -31
- cognite/neat/_session/_mapping.py +105 -5
- cognite/neat/_session/_prepare.py +43 -9
- cognite/neat/_session/_read.py +50 -4
- cognite/neat/_session/_set.py +1 -0
- cognite/neat/_session/_to.py +36 -13
- cognite/neat/_session/_wizard.py +5 -0
- cognite/neat/_session/engine/_interface.py +3 -2
- cognite/neat/_store/_base.py +79 -19
- cognite/neat/_utils/collection_.py +22 -0
- cognite/neat/_utils/rdf_.py +24 -0
- cognite/neat/_version.py +2 -2
- cognite/neat/_workflows/steps/lib/current/rules_exporter.py +3 -3
- {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/METADATA +1 -1
- {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/RECORD +47 -47
- {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/LICENSE +0 -0
- {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/entry_points.txt +0 -0
|
@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from collections.abc import Hashable, ItemsView, Iterable, Iterator, KeysView, Mapping, MutableMapping, ValuesView
|
|
3
3
|
from typing import (
|
|
4
4
|
Any,
|
|
5
|
+
Literal,
|
|
6
|
+
TypeAlias,
|
|
5
7
|
TypeVar,
|
|
6
8
|
cast,
|
|
7
9
|
final,
|
|
@@ -24,6 +26,8 @@ from cognite.client.data_classes.data_modeling import (
|
|
|
24
26
|
from cognite.client.utils._auxiliary import load_yaml_or_json
|
|
25
27
|
from cognite.client.utils._pandas_helpers import convert_nullable_int_cols
|
|
26
28
|
|
|
29
|
+
Component: TypeAlias = Literal["spaces", "data_models", "views", "containers", "node_types"]
|
|
30
|
+
|
|
27
31
|
T_ID = TypeVar("T_ID", bound=Hashable)
|
|
28
32
|
|
|
29
33
|
|
|
@@ -10,6 +10,13 @@ class BaseExtractor:
|
|
|
10
10
|
extractors must implement.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
+
def _get_activity_names(self) -> list[str]:
|
|
14
|
+
"""Returns the name of the activities that the extractor performs,
|
|
15
|
+
i.e., the actions that it performs when you call extract().."""
|
|
16
|
+
# This method can be overridden by subclasses that runs multiple extractors
|
|
17
|
+
# for example the ClassicGraphExtractor
|
|
18
|
+
return [type(self).__name__]
|
|
19
|
+
|
|
13
20
|
@abstractmethod
|
|
14
21
|
def extract(self) -> Iterable[Triple]:
|
|
15
22
|
raise NotImplementedError()
|
|
@@ -11,7 +11,7 @@ from cognite.neat._constants import CLASSIC_CDF_NAMESPACE
|
|
|
11
11
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
12
12
|
from cognite.neat._issues.warnings import CDFAuthWarning
|
|
13
13
|
from cognite.neat._shared import Triple
|
|
14
|
-
from cognite.neat._utils.collection_ import chunker
|
|
14
|
+
from cognite.neat._utils.collection_ import chunker, iterate_progress_bar
|
|
15
15
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
16
16
|
|
|
17
17
|
from ._assets import AssetsExtractor
|
|
@@ -92,6 +92,7 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
92
92
|
data_set_external_id: str | None = None,
|
|
93
93
|
root_asset_external_id: str | None = None,
|
|
94
94
|
namespace: Namespace | None = None,
|
|
95
|
+
limit_per_type: int | None = None,
|
|
95
96
|
):
|
|
96
97
|
self._client = client
|
|
97
98
|
if sum([bool(data_set_external_id), bool(root_asset_external_id)]) != 1:
|
|
@@ -99,12 +100,27 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
99
100
|
self._root_asset_external_id = root_asset_external_id
|
|
100
101
|
self._data_set_external_id = data_set_external_id
|
|
101
102
|
self._namespace = namespace or CLASSIC_CDF_NAMESPACE
|
|
102
|
-
self._extractor_args = dict(
|
|
103
|
+
self._extractor_args = dict(
|
|
104
|
+
namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True, limit=limit_per_type
|
|
105
|
+
)
|
|
106
|
+
self._limit_per_type = limit_per_type
|
|
103
107
|
|
|
104
108
|
self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
105
109
|
self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
106
110
|
self._labels: set[str] = set()
|
|
107
111
|
self._data_set_ids: set[int] = set()
|
|
112
|
+
self._extracted_labels = False
|
|
113
|
+
self._extracted_data_sets = False
|
|
114
|
+
|
|
115
|
+
def _get_activity_names(self) -> list[str]:
|
|
116
|
+
activities = [data_access_object.extractor_cls.__name__ for data_access_object in self._classic_node_types] + [
|
|
117
|
+
RelationshipsExtractor.__name__,
|
|
118
|
+
]
|
|
119
|
+
if self._extracted_labels:
|
|
120
|
+
activities.append(LabelsExtractor.__name__)
|
|
121
|
+
if self._extracted_data_sets:
|
|
122
|
+
activities.append(DataSetExtractor.__name__)
|
|
123
|
+
return activities
|
|
108
124
|
|
|
109
125
|
def extract(self) -> Iterable[Triple]:
|
|
110
126
|
"""Extracts all classic CDF Resources."""
|
|
@@ -118,11 +134,15 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
118
134
|
yield from self._extract_labels()
|
|
119
135
|
except CogniteAPIError as e:
|
|
120
136
|
warnings.warn(CDFAuthWarning("extract labels", str(e)), stacklevel=2)
|
|
137
|
+
else:
|
|
138
|
+
self._extracted_labels = True
|
|
121
139
|
|
|
122
140
|
try:
|
|
123
141
|
yield from self._extract_data_sets()
|
|
124
142
|
except CogniteAPIError as e:
|
|
125
143
|
warnings.warn(CDFAuthWarning("extract data sets", str(e)), stacklevel=2)
|
|
144
|
+
else:
|
|
145
|
+
self._extracted_data_sets = True
|
|
126
146
|
|
|
127
147
|
def _extract_core_start_nodes(self):
|
|
128
148
|
for core_node in self._classic_node_types:
|
|
@@ -206,14 +226,4 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
206
226
|
@staticmethod
|
|
207
227
|
def _chunk(items: Sequence, description: str) -> Iterable:
|
|
208
228
|
to_iterate: Iterable = chunker(items, chunk_size=1000)
|
|
209
|
-
|
|
210
|
-
from rich.progress import track
|
|
211
|
-
except ModuleNotFoundError:
|
|
212
|
-
...
|
|
213
|
-
else:
|
|
214
|
-
to_iterate = track(
|
|
215
|
-
to_iterate,
|
|
216
|
-
total=(len(items) // 1_000) + 1,
|
|
217
|
-
description=description,
|
|
218
|
-
)
|
|
219
|
-
return to_iterate
|
|
229
|
+
return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
|
|
@@ -14,7 +14,7 @@ from cognite.client.data_classes.data_modeling.ids import InstanceId
|
|
|
14
14
|
from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
|
|
15
15
|
from cognite.client.exceptions import CogniteAPIError
|
|
16
16
|
from pydantic import BaseModel, ValidationInfo, create_model, field_validator
|
|
17
|
-
from rdflib import RDF
|
|
17
|
+
from rdflib import RDF, URIRef
|
|
18
18
|
|
|
19
19
|
from cognite.neat._graph._tracking import LogTracker, Tracker
|
|
20
20
|
from cognite.neat._issues import IssueList, NeatIssue, NeatIssueList
|
|
@@ -25,8 +25,10 @@ from cognite.neat._issues.errors import (
|
|
|
25
25
|
ResourceRetrievalError,
|
|
26
26
|
)
|
|
27
27
|
from cognite.neat._issues.warnings import PropertyTypeNotSupportedWarning
|
|
28
|
+
from cognite.neat._rules.analysis._dms import DMSAnalysis
|
|
28
29
|
from cognite.neat._rules.models import DMSRules
|
|
29
30
|
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
|
|
31
|
+
from cognite.neat._rules.models.entities._single_value import ViewEntity
|
|
30
32
|
from cognite.neat._shared import InstanceType
|
|
31
33
|
from cognite.neat._store import NeatGraphStore
|
|
32
34
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
@@ -52,16 +54,18 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
52
54
|
graph_store: NeatGraphStore,
|
|
53
55
|
data_model: dm.DataModel[dm.View] | None,
|
|
54
56
|
instance_space: str,
|
|
55
|
-
|
|
57
|
+
class_neat_id_by_view_id: dict[ViewId, URIRef] | None = None,
|
|
56
58
|
create_issues: Sequence[NeatIssue] | None = None,
|
|
57
59
|
tracker: type[Tracker] | None = None,
|
|
60
|
+
rules: DMSRules | None = None,
|
|
58
61
|
):
|
|
59
62
|
super().__init__(graph_store)
|
|
60
63
|
self.data_model = data_model
|
|
61
64
|
self.instance_space = instance_space
|
|
62
|
-
self.
|
|
65
|
+
self.class_neat_id_by_view_id = class_neat_id_by_view_id or {}
|
|
63
66
|
self._issues = IssueList(create_issues or [])
|
|
64
67
|
self._tracker: type[Tracker] = tracker or LogTracker
|
|
68
|
+
self.rules = rules
|
|
65
69
|
|
|
66
70
|
@classmethod
|
|
67
71
|
def from_data_model_id(
|
|
@@ -95,7 +99,17 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
95
99
|
reason=str(e),
|
|
96
100
|
)
|
|
97
101
|
)
|
|
98
|
-
|
|
102
|
+
|
|
103
|
+
class_neat_id_by_view_id = {view.view.as_id(): view.logical for view in rules.views if view.logical}
|
|
104
|
+
|
|
105
|
+
return cls(
|
|
106
|
+
graph_store,
|
|
107
|
+
data_model,
|
|
108
|
+
instance_space,
|
|
109
|
+
class_neat_id_by_view_id,
|
|
110
|
+
issues,
|
|
111
|
+
rules=rules,
|
|
112
|
+
)
|
|
99
113
|
|
|
100
114
|
def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
101
115
|
if self._issues.has_errors and stop_on_exception:
|
|
@@ -106,6 +120,13 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
106
120
|
if not self.data_model:
|
|
107
121
|
# There should already be an error in this case.
|
|
108
122
|
return
|
|
123
|
+
|
|
124
|
+
views_with_linked_properties = (
|
|
125
|
+
DMSAnalysis(self.rules).views_with_properties_linked_to_classes(consider_inheritance=True)
|
|
126
|
+
if self.rules and self.rules.metadata.logical
|
|
127
|
+
else None
|
|
128
|
+
)
|
|
129
|
+
|
|
109
130
|
view_ids = [repr(v.as_id()) for v in self.data_model.views]
|
|
110
131
|
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
111
132
|
for view in self.data_model.views:
|
|
@@ -114,17 +135,35 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
114
135
|
pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
115
136
|
yield from issues
|
|
116
137
|
tracker.issue(issues)
|
|
117
|
-
class_name = self.class_by_view_id.get(view.as_id(), view.external_id)
|
|
118
138
|
|
|
119
|
-
|
|
139
|
+
# this assumes no changes in the suffix of view and class
|
|
140
|
+
|
|
141
|
+
if views_with_linked_properties:
|
|
142
|
+
# we need graceful exit if the view is not in the view_property_pairs
|
|
143
|
+
property_link_pairs = views_with_linked_properties.get(ViewEntity.from_id(view_id))
|
|
144
|
+
|
|
145
|
+
if class_neat_id := self.class_neat_id_by_view_id.get(view_id):
|
|
146
|
+
reader = self.graph_store._read_via_rules_linkage(class_neat_id, property_link_pairs)
|
|
147
|
+
else:
|
|
148
|
+
error_view = ResourceRetrievalError(view_id, "view", "View not linked to class")
|
|
149
|
+
tracker.issue(error_view)
|
|
150
|
+
if stop_on_exception:
|
|
151
|
+
raise error_view
|
|
152
|
+
yield error_view
|
|
153
|
+
|
|
154
|
+
else:
|
|
155
|
+
reader = self.graph_store.read(view.external_id)
|
|
156
|
+
|
|
157
|
+
for identifier, properties in reader:
|
|
120
158
|
try:
|
|
159
|
+
print(view_id)
|
|
121
160
|
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
122
161
|
except ValueError as e:
|
|
123
|
-
|
|
124
|
-
tracker.issue(
|
|
162
|
+
error_node = ResourceCreationError(identifier, "node", error=str(e))
|
|
163
|
+
tracker.issue(error_node)
|
|
125
164
|
if stop_on_exception:
|
|
126
|
-
raise
|
|
127
|
-
yield
|
|
165
|
+
raise error_node from e
|
|
166
|
+
yield error_node
|
|
128
167
|
yield from self._create_edges(identifier, properties, edge_by_type, tracker)
|
|
129
168
|
tracker.finish(repr(view_id))
|
|
130
169
|
|
|
@@ -244,7 +283,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
244
283
|
return dm.NodeApply(
|
|
245
284
|
space=self.instance_space,
|
|
246
285
|
external_id=identifier,
|
|
247
|
-
type=dm.DirectRelationReference(view_id.space,
|
|
286
|
+
type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
|
|
248
287
|
sources=[dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump().items()))],
|
|
249
288
|
)
|
|
250
289
|
|
|
@@ -5,7 +5,7 @@ from ._classic_cdf import (
|
|
|
5
5
|
AssetRelationshipConnector,
|
|
6
6
|
AssetSequenceConnector,
|
|
7
7
|
AssetTimeSeriesConnector,
|
|
8
|
-
|
|
8
|
+
RelationshipAsEdgeTransformer,
|
|
9
9
|
)
|
|
10
10
|
from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
|
|
11
11
|
from ._value_type import SplitMultiValueProperty
|
|
@@ -19,7 +19,7 @@ __all__ = [
|
|
|
19
19
|
"AssetRelationshipConnector",
|
|
20
20
|
"AddSelfReferenceProperty",
|
|
21
21
|
"SplitMultiValueProperty",
|
|
22
|
-
"
|
|
22
|
+
"RelationshipAsEdgeTransformer",
|
|
23
23
|
"MakeConnectionOnExactMatch",
|
|
24
24
|
]
|
|
25
25
|
|
|
@@ -32,6 +32,6 @@ Transformers = (
|
|
|
32
32
|
| AssetRelationshipConnector
|
|
33
33
|
| AddSelfReferenceProperty
|
|
34
34
|
| SplitMultiValueProperty
|
|
35
|
-
|
|
|
35
|
+
| RelationshipAsEdgeTransformer
|
|
36
36
|
| MakeConnectionOnExactMatch
|
|
37
37
|
)
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
import textwrap
|
|
2
2
|
import warnings
|
|
3
3
|
from abc import ABC
|
|
4
|
+
from collections.abc import Callable, Iterable
|
|
5
|
+
from functools import lru_cache
|
|
4
6
|
from typing import cast
|
|
5
7
|
|
|
6
8
|
from rdflib import RDF, Graph, Literal, Namespace, URIRef
|
|
7
|
-
from rdflib.query import ResultRow
|
|
8
9
|
|
|
9
10
|
from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
|
|
10
11
|
from cognite.neat._graph import extractors
|
|
11
12
|
from cognite.neat._issues.warnings import ResourceNotFoundWarning
|
|
12
|
-
from cognite.neat._utils.
|
|
13
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
14
|
+
from cognite.neat._utils.rdf_ import (
|
|
15
|
+
Triple,
|
|
16
|
+
add_triples_in_batch,
|
|
17
|
+
remove_instance_ids_in_batch,
|
|
18
|
+
remove_namespace_from_uri,
|
|
19
|
+
)
|
|
13
20
|
|
|
14
21
|
from ._base import BaseTransformer
|
|
15
22
|
|
|
@@ -235,31 +242,40 @@ class AssetRelationshipConnector(BaseTransformer):
|
|
|
235
242
|
graph.remove((relationship_id, self.relationship_target_xid_prop, None))
|
|
236
243
|
|
|
237
244
|
|
|
238
|
-
class
|
|
239
|
-
"""
|
|
245
|
+
class RelationshipAsEdgeTransformer(BaseTransformer):
|
|
246
|
+
"""Converts relationships into edges in the graph.
|
|
240
247
|
|
|
241
|
-
This transformer
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
properties are replaced by a schema that contains the properties as attributes.
|
|
248
|
+
This transformer converts relationships into edges in the graph. This is useful as the
|
|
249
|
+
edges will be picked up as part of the schema connected to Assets, Events, Files, Sequenses,
|
|
250
|
+
and TimeSeries in the InferenceImporter.
|
|
245
251
|
|
|
246
252
|
Args:
|
|
247
|
-
|
|
248
|
-
to
|
|
253
|
+
min_relationship_types: The minimum number of relationship types that must exists to convert those
|
|
254
|
+
relationships to edges. For example, if there is only 5 relationships between Assets and TimeSeries,
|
|
255
|
+
and limit is 10, those relationships will not be converted to edges.
|
|
256
|
+
limit_per_type: The number of conversions to perform per relationship type. For example, if there are 10
|
|
257
|
+
relationships between Assets and TimeSeries, and limit_per_type is 1, only 1 of those relationships
|
|
258
|
+
will be converted to an edge. If None, all relationships will be converted.
|
|
249
259
|
|
|
250
260
|
"""
|
|
251
261
|
|
|
252
|
-
def __init__(
|
|
253
|
-
self
|
|
262
|
+
def __init__(
|
|
263
|
+
self,
|
|
264
|
+
min_relationship_types: int = 1,
|
|
265
|
+
limit_per_type: int | None = None,
|
|
266
|
+
namespace: Namespace = CLASSIC_CDF_NAMESPACE,
|
|
267
|
+
) -> None:
|
|
268
|
+
self._min_relationship_types = min_relationship_types
|
|
269
|
+
self._limit_per_type = limit_per_type
|
|
254
270
|
self._namespace = namespace
|
|
255
271
|
|
|
256
272
|
_NOT_PROPERTIES: frozenset[str] = frozenset(
|
|
257
273
|
{"sourceExternalId", "targetExternalId", "externalId", "sourceType", "targetType"}
|
|
258
274
|
)
|
|
259
275
|
_RELATIONSHIP_NODE_TYPES: tuple[str, ...] = tuple(["Asset", "Event", "File", "Sequence", "TimeSeries"])
|
|
260
|
-
description = "
|
|
276
|
+
description = "Converts relationships to edge"
|
|
261
277
|
_use_only_once: bool = True
|
|
262
|
-
_need_changes = frozenset({
|
|
278
|
+
_need_changes = frozenset({extractors.RelationshipsExtractor.__name__})
|
|
263
279
|
|
|
264
280
|
_count_by_source_target = """PREFIX classic: <{namespace}>
|
|
265
281
|
|
|
@@ -286,47 +302,99 @@ WHERE {{
|
|
|
286
302
|
?entity classic:externalId "{external_id}" .
|
|
287
303
|
}}"""
|
|
288
304
|
|
|
305
|
+
@staticmethod
|
|
306
|
+
def create_lookup_entity_with_external_id(graph: Graph, namespace: Namespace) -> Callable[[str, str], URIRef]:
|
|
307
|
+
@lru_cache(maxsize=10_000)
|
|
308
|
+
def lookup_entity_with_external_id(entity_type: str, external_id: str) -> URIRef:
|
|
309
|
+
query = RelationshipAsEdgeTransformer._lookup_entity_query.format(
|
|
310
|
+
namespace=namespace, entity_type=entity_type, external_id=external_id
|
|
311
|
+
)
|
|
312
|
+
result = list(graph.query(query))
|
|
313
|
+
if len(result) == 1:
|
|
314
|
+
return cast(URIRef, result[0][0]) # type: ignore[index]
|
|
315
|
+
raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
|
|
316
|
+
|
|
317
|
+
return lookup_entity_with_external_id
|
|
318
|
+
|
|
289
319
|
def transform(self, graph: Graph) -> None:
|
|
320
|
+
lookup_entity_with_external_id = self.create_lookup_entity_with_external_id(graph, self._namespace)
|
|
290
321
|
for source_type in self._RELATIONSHIP_NODE_TYPES:
|
|
291
322
|
for target_type in self._RELATIONSHIP_NODE_TYPES:
|
|
292
323
|
query = self._count_by_source_target.format(
|
|
293
324
|
namespace=self._namespace, source_type=source_type, target_type=target_type
|
|
294
325
|
)
|
|
295
|
-
for
|
|
296
|
-
|
|
326
|
+
for instance_count_res in graph.query(query):
|
|
327
|
+
instance_count = int(instance_count_res[0]) # type: ignore[index, arg-type]
|
|
328
|
+
if instance_count < self._min_relationship_types:
|
|
297
329
|
continue
|
|
298
|
-
|
|
299
|
-
|
|
330
|
+
edge_triples = self._edge_triples(
|
|
331
|
+
graph, source_type, target_type, instance_count, lookup_entity_with_external_id
|
|
300
332
|
)
|
|
301
|
-
|
|
302
|
-
instance_id = cast(URIRef, result[0]) # type: ignore[index, misc]
|
|
303
|
-
self._convert_relationship_to_schema(graph, instance_id, source_type, target_type)
|
|
333
|
+
add_triples_in_batch(graph, edge_triples)
|
|
304
334
|
|
|
305
|
-
def
|
|
306
|
-
self,
|
|
307
|
-
|
|
308
|
-
|
|
335
|
+
def _edge_triples(
|
|
336
|
+
self,
|
|
337
|
+
graph: Graph,
|
|
338
|
+
source_type: str,
|
|
339
|
+
target_type: str,
|
|
340
|
+
instance_count: int,
|
|
341
|
+
lookup_entity_with_external_id: Callable[[str, str], URIRef],
|
|
342
|
+
) -> Iterable[Triple]:
|
|
343
|
+
query = self._instances.format(namespace=self._namespace, source_type=source_type, target_type=target_type)
|
|
344
|
+
total_instance_count = instance_count if self._limit_per_type is None else self._limit_per_type
|
|
345
|
+
|
|
346
|
+
converted_relationships: list[URIRef] = []
|
|
347
|
+
for no, result in enumerate(
|
|
348
|
+
iterate_progress_bar(graph.query(query), total=total_instance_count, description="Relationships to edges")
|
|
349
|
+
):
|
|
350
|
+
if self._limit_per_type is not None and no >= self._limit_per_type:
|
|
351
|
+
break
|
|
352
|
+
relationship_id = cast(URIRef, result[0]) # type: ignore[index, misc]
|
|
353
|
+
yield from self._relationship_as_edge(
|
|
354
|
+
graph, relationship_id, source_type, target_type, lookup_entity_with_external_id
|
|
355
|
+
)
|
|
356
|
+
converted_relationships.append(relationship_id)
|
|
357
|
+
|
|
358
|
+
if len(converted_relationships) >= 1_000:
|
|
359
|
+
remove_instance_ids_in_batch(graph, converted_relationships)
|
|
360
|
+
converted_relationships = []
|
|
361
|
+
|
|
362
|
+
remove_instance_ids_in_batch(graph, converted_relationships)
|
|
363
|
+
|
|
364
|
+
def _relationship_as_edge(
|
|
365
|
+
self,
|
|
366
|
+
graph: Graph,
|
|
367
|
+
relationship_id: URIRef,
|
|
368
|
+
source_type: str,
|
|
369
|
+
target_type: str,
|
|
370
|
+
lookup_entity_with_external_id: Callable[[str, str], URIRef],
|
|
371
|
+
) -> list[Triple]:
|
|
372
|
+
relationship_triples = cast(list[Triple], list(graph.query(f"DESCRIBE <{relationship_id}>")))
|
|
309
373
|
object_by_predicates = cast(
|
|
310
|
-
dict[str, URIRef | Literal], {remove_namespace_from_uri(row[1]): row[2] for row in
|
|
374
|
+
dict[str, URIRef | Literal], {remove_namespace_from_uri(row[1]): row[2] for row in relationship_triples}
|
|
311
375
|
)
|
|
312
376
|
source_external_id = cast(URIRef, object_by_predicates["sourceExternalId"])
|
|
313
377
|
target_source_id = cast(URIRef, object_by_predicates["targetExternalId"])
|
|
314
378
|
try:
|
|
315
|
-
source_id =
|
|
379
|
+
source_id = lookup_entity_with_external_id(source_type, source_external_id)
|
|
316
380
|
except ValueError:
|
|
317
|
-
warnings.warn(
|
|
318
|
-
|
|
381
|
+
warnings.warn(
|
|
382
|
+
ResourceNotFoundWarning(source_external_id, "class", str(relationship_id), "class"), stacklevel=2
|
|
383
|
+
)
|
|
384
|
+
return []
|
|
319
385
|
try:
|
|
320
|
-
target_id =
|
|
386
|
+
target_id = lookup_entity_with_external_id(target_type, target_source_id)
|
|
321
387
|
except ValueError:
|
|
322
|
-
warnings.warn(
|
|
323
|
-
|
|
324
|
-
|
|
388
|
+
warnings.warn(
|
|
389
|
+
ResourceNotFoundWarning(target_source_id, "class", str(relationship_id), "class"), stacklevel=2
|
|
390
|
+
)
|
|
391
|
+
return []
|
|
392
|
+
edge_id = str(object_by_predicates["externalId"])
|
|
325
393
|
# If there is properties on the relationship, we create a new intermediate node
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
394
|
+
edge_type = self._namespace[f"{source_type}To{target_type}Edge"]
|
|
395
|
+
return self._create_edge(
|
|
396
|
+
object_by_predicates, edge_id, source_id, target_id, self._predicate(target_type), edge_type
|
|
397
|
+
)
|
|
330
398
|
|
|
331
399
|
def _lookup_entity(self, graph: Graph, entity_type: str, external_id: str) -> URIRef:
|
|
332
400
|
query = self._lookup_entity_query.format(
|
|
@@ -337,38 +405,38 @@ WHERE {{
|
|
|
337
405
|
return cast(URIRef, result[0][0]) # type: ignore[index]
|
|
338
406
|
raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
|
|
339
407
|
|
|
340
|
-
def
|
|
408
|
+
def _create_edge(
|
|
341
409
|
self,
|
|
342
|
-
graph: Graph,
|
|
343
410
|
objects_by_predicates: dict[str, URIRef | Literal],
|
|
344
411
|
external_id: str,
|
|
345
412
|
source_id: URIRef,
|
|
346
413
|
target_id: URIRef,
|
|
347
414
|
predicate: URIRef,
|
|
348
|
-
|
|
415
|
+
edge_type: URIRef,
|
|
416
|
+
) -> list[Triple]:
|
|
349
417
|
"""Creates a new intermediate node for the relationship with properties."""
|
|
350
418
|
# Create the entity with the properties
|
|
351
|
-
|
|
352
|
-
|
|
419
|
+
edge_triples: list[Triple] = []
|
|
420
|
+
edge_id = self._namespace[external_id]
|
|
421
|
+
|
|
422
|
+
edge_triples.append((edge_id, RDF.type, edge_type))
|
|
353
423
|
for prop_name, object_ in objects_by_predicates.items():
|
|
354
424
|
if prop_name in self._NOT_PROPERTIES:
|
|
355
425
|
continue
|
|
356
|
-
|
|
426
|
+
edge_triples.append((edge_id, self._namespace[prop_name], object_))
|
|
357
427
|
|
|
358
428
|
# Target and Source IDs will always be a combination of Asset, Sequence, Event, TimeSeries, and File.
|
|
359
429
|
# If we assume source ID is an asset and target ID is a time series, then
|
|
360
430
|
# before we had relationship pointing to both: timeseries <- relationship -> asset
|
|
361
|
-
# After, we want asset
|
|
431
|
+
# After, we want asset <-> Edge -> TimeSeries
|
|
362
432
|
# and the new edge will point to the asset and the timeseries through startNode and endNode
|
|
363
433
|
|
|
364
|
-
# Link the
|
|
365
|
-
|
|
366
|
-
#
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
# Link the source to the edge properties
|
|
371
|
-
graph.add((source_id, self._namespace["edgeSource"], instance_id))
|
|
434
|
+
# Link the source to the new edge
|
|
435
|
+
edge_triples.append((source_id, predicate, edge_id))
|
|
436
|
+
# Link the edge to the source and target
|
|
437
|
+
edge_triples.append((edge_id, self._namespace["startNode"], source_id))
|
|
438
|
+
edge_triples.append((edge_id, self._namespace["endNode"], target_id))
|
|
439
|
+
return edge_triples
|
|
372
440
|
|
|
373
441
|
def _predicate(self, target_type: str) -> URIRef:
|
|
374
442
|
return self._namespace[f"relationship{target_type.capitalize()}"]
|
|
@@ -36,6 +36,7 @@ from ._properties import (
|
|
|
36
36
|
from ._resources import (
|
|
37
37
|
ResourceNeatWarning,
|
|
38
38
|
ResourceNotFoundWarning,
|
|
39
|
+
ResourceRegexViolationWarning,
|
|
39
40
|
ResourceRetrievalWarning,
|
|
40
41
|
ResourcesDuplicatedWarning,
|
|
41
42
|
ResourceTypeNotSupportedWarning,
|
|
@@ -64,6 +65,7 @@ __all__ = [
|
|
|
64
65
|
"ResourceNotFoundWarning",
|
|
65
66
|
"ResourceTypeNotSupportedWarning",
|
|
66
67
|
"ResourceRetrievalWarning",
|
|
68
|
+
"ResourceRegexViolationWarning",
|
|
67
69
|
"PrincipleOneModelOneSpaceWarning",
|
|
68
70
|
"PrincipleMatchingSpaceAndVersionWarning",
|
|
69
71
|
"PrincipleSolutionBuildsOnEnterpriseWarning",
|
|
@@ -13,6 +13,21 @@ class ResourceNeatWarning(NeatWarning, Generic[T_Identifier]):
|
|
|
13
13
|
resource_type: ResourceType
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@dataclass(unsafe_hash=True)
|
|
17
|
+
class ResourceRegexViolationWarning(ResourceNeatWarning):
|
|
18
|
+
"""The {resource_type} with identifier {identifier} in the {location} is violating
|
|
19
|
+
the CDF regex {regex}. This will lead to errors when converting to DMS data model.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
fix = (
|
|
23
|
+
"Either export the data model and make the necessary changes manually"
|
|
24
|
+
" or run prepare.cdf_compliant_external_ids."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
location: str
|
|
28
|
+
regex: str
|
|
29
|
+
|
|
30
|
+
|
|
16
31
|
@dataclass(unsafe_hash=True)
|
|
17
32
|
class ResourceNotFoundWarning(ResourceNeatWarning, Generic[T_Identifier, T_ReferenceIdentifier]):
|
|
18
33
|
"""The {resource_type} with identifier {identifier} referred by {referred_type} {referred_by} does not exist.
|
|
@@ -4,23 +4,25 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
from collections.abc import Set
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import Generic, TypeVar
|
|
7
|
+
from typing import Generic, TypeVar, cast
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
|
-
from
|
|
10
|
+
from rdflib import URIRef
|
|
11
11
|
|
|
12
12
|
from cognite.neat._rules.models._base_rules import BaseRules
|
|
13
13
|
from cognite.neat._rules.models._rdfpath import RDFPath
|
|
14
|
+
from cognite.neat._rules.models.dms._rules import DMSProperty, DMSView
|
|
14
15
|
from cognite.neat._rules.models.entities import (
|
|
15
16
|
ClassEntity,
|
|
16
17
|
Entity,
|
|
17
18
|
)
|
|
18
19
|
from cognite.neat._rules.models.information import InformationProperty
|
|
20
|
+
from cognite.neat._rules.models.information._rules import InformationClass
|
|
19
21
|
from cognite.neat._utils.rdf_ import get_inheritance_path
|
|
20
22
|
|
|
21
23
|
T_Rules = TypeVar("T_Rules", bound=BaseRules)
|
|
22
|
-
T_Property = TypeVar("T_Property", bound=
|
|
23
|
-
T_Class = TypeVar("T_Class", bound=
|
|
24
|
+
T_Property = TypeVar("T_Property", bound=InformationProperty | DMSProperty)
|
|
25
|
+
T_Class = TypeVar("T_Class", bound=InformationClass | DMSView)
|
|
24
26
|
T_ClassEntity = TypeVar("T_ClassEntity", bound=Entity)
|
|
25
27
|
T_PropertyEntity = TypeVar("T_PropertyEntity", bound=Entity | str)
|
|
26
28
|
|
|
@@ -108,6 +110,14 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
|
|
|
108
110
|
def inherited_referred_classes(self) -> set[ClassEntity]:
|
|
109
111
|
raise NotImplementedError
|
|
110
112
|
|
|
113
|
+
@property
|
|
114
|
+
def properties_by_neat_id(self) -> dict[URIRef, T_Property]:
|
|
115
|
+
return {cast(URIRef, prop.neatId): prop for prop in self._get_properties()}
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def classes_by_neat_id(self) -> dict[URIRef, T_Class]:
|
|
119
|
+
return {cast(URIRef, class_.neatId): class_ for class_ in self._get_classes()}
|
|
120
|
+
|
|
111
121
|
# Todo Lru cache this method.
|
|
112
122
|
def class_parent_pairs(self, allow_different_space: bool = False) -> dict[T_ClassEntity, list[T_ClassEntity]]:
|
|
113
123
|
"""This only returns class - parent pairs only if parent is in the same data model"""
|
|
@@ -176,7 +186,7 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
|
|
|
176
186
|
# ParentClassEntity -> ClassEntity to match the type of class_property_pairs
|
|
177
187
|
if parent in class_property_pairs:
|
|
178
188
|
for property_ in class_property_pairs[parent]:
|
|
179
|
-
property_ = property_.model_copy()
|
|
189
|
+
property_ = property_.model_copy() # type: ignore
|
|
180
190
|
|
|
181
191
|
# This corresponds to importing properties from parent class
|
|
182
192
|
# making sure that the property is attached to desired child class
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
from rdflib import URIRef
|
|
4
|
+
|
|
1
5
|
from cognite.neat._constants import DMS_LISTABLE_PROPERTY_LIMIT
|
|
2
6
|
from cognite.neat._rules.models.dms import DMSProperty, DMSRules, DMSView
|
|
3
7
|
from cognite.neat._rules.models.entities import ViewEntity
|
|
@@ -35,3 +39,19 @@ class DMSAnalysis(BaseAnalysis[DMSRules, DMSView, DMSProperty, ViewEntity, str])
|
|
|
35
39
|
|
|
36
40
|
def _get_prop_entity(self, property_: DMSProperty) -> str:
|
|
37
41
|
return property_.view_property
|
|
42
|
+
|
|
43
|
+
def views_with_properties_linked_to_classes(
|
|
44
|
+
self,
|
|
45
|
+
consider_inheritance: bool = False,
|
|
46
|
+
allow_different_namespace: bool = False,
|
|
47
|
+
) -> dict[ViewEntity, dict[str, URIRef]]:
|
|
48
|
+
view_property_pairs = self.classes_with_properties(consider_inheritance, allow_different_namespace)
|
|
49
|
+
|
|
50
|
+
view_and_properties_with_links: dict[ViewEntity, dict[str, URIRef]] = defaultdict(dict)
|
|
51
|
+
|
|
52
|
+
for view, properties in view_property_pairs.items():
|
|
53
|
+
view_and_properties_with_links[view] = {
|
|
54
|
+
prop.view_property: prop.logical for prop in properties if prop.logical
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return view_and_properties_with_links
|