cognite-neat 0.109.4__py3-none-any.whl → 0.110.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +2 -0
- cognite/neat/_client/_api/schema.py +17 -1
- cognite/neat/_client/data_classes/schema.py +3 -3
- cognite/neat/_constants.py +11 -0
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +9 -10
- cognite/neat/_graph/extractors/_iodd.py +3 -3
- cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
- cognite/neat/_graph/loaders/_rdf2dms.py +285 -346
- cognite/neat/_graph/queries/_base.py +28 -92
- cognite/neat/_graph/transformers/__init__.py +1 -3
- cognite/neat/_graph/transformers/_rdfpath.py +2 -49
- cognite/neat/_issues/__init__.py +1 -6
- cognite/neat/_issues/_base.py +21 -252
- cognite/neat/_issues/_contextmanagers.py +46 -0
- cognite/neat/_issues/_factory.py +61 -0
- cognite/neat/_issues/errors/__init__.py +18 -4
- cognite/neat/_issues/errors/_wrapper.py +81 -3
- cognite/neat/_issues/formatters.py +4 -4
- cognite/neat/_issues/warnings/__init__.py +3 -2
- cognite/neat/_issues/warnings/_properties.py +8 -0
- cognite/neat/_rules/_constants.py +9 -0
- cognite/neat/_rules/_shared.py +3 -2
- cognite/neat/_rules/analysis/__init__.py +2 -3
- cognite/neat/_rules/analysis/_base.py +450 -258
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2excel.py +2 -8
- cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
- cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
- cognite/neat/_rules/importers/_base.py +2 -47
- cognite/neat/_rules/importers/_dms2rules.py +7 -10
- cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +59 -25
- cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
- cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
- cognite/neat/_rules/models/dms/_rules.py +3 -1
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/dms/_validation.py +14 -4
- cognite/neat/_rules/models/entities/_loaders.py +1 -1
- cognite/neat/_rules/models/entities/_multi_value.py +2 -2
- cognite/neat/_rules/models/information/_rules.py +18 -17
- cognite/neat/_rules/models/information/_rules_input.py +2 -1
- cognite/neat/_rules/models/information/_validation.py +3 -1
- cognite/neat/_rules/transformers/__init__.py +8 -2
- cognite/neat/_rules/transformers/_converters.py +228 -43
- cognite/neat/_rules/transformers/_verification.py +5 -10
- cognite/neat/_session/_base.py +4 -4
- cognite/neat/_session/_prepare.py +12 -0
- cognite/neat/_session/_read.py +21 -17
- cognite/neat/_session/_show.py +11 -123
- cognite/neat/_session/_state.py +0 -2
- cognite/neat/_session/_subset.py +64 -0
- cognite/neat/_session/_to.py +63 -12
- cognite/neat/_store/_graph_store.py +5 -246
- cognite/neat/_utils/rdf_.py +2 -2
- cognite/neat/_utils/spreadsheet.py +44 -1
- cognite/neat/_utils/text.py +51 -32
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/RECORD +62 -64
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/WHEEL +1 -1
- cognite/neat/_graph/queries/_construct.py +0 -187
- cognite/neat/_graph/queries/_shared.py +0 -173
- cognite/neat/_rules/analysis/_dms.py +0 -57
- cognite/neat/_rules/analysis/_information.py +0 -249
- cognite/neat/_rules/models/_rdfpath.py +0 -372
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/entry_points.txt +0 -0
|
@@ -4,268 +4,114 @@ import urllib.parse
|
|
|
4
4
|
import warnings
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from collections.abc import Iterable, Sequence
|
|
7
|
-
from
|
|
7
|
+
from dataclasses import dataclass
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any, cast, get_args
|
|
9
|
+
from typing import Any, Literal, cast, get_args
|
|
10
10
|
|
|
11
11
|
import yaml
|
|
12
12
|
from cognite.client import CogniteClient
|
|
13
13
|
from cognite.client import data_modeling as dm
|
|
14
14
|
from cognite.client.data_classes.capabilities import Capability, DataModelInstancesAcl
|
|
15
|
-
from cognite.client.data_classes.data_modeling import ViewId
|
|
16
15
|
from cognite.client.data_classes.data_modeling.data_types import ListablePropertyType
|
|
17
16
|
from cognite.client.data_classes.data_modeling.ids import InstanceId
|
|
18
17
|
from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
|
|
19
18
|
from cognite.client.exceptions import CogniteAPIError
|
|
20
19
|
from pydantic import BaseModel, ValidationInfo, create_model, field_validator
|
|
21
|
-
from rdflib import RDF
|
|
20
|
+
from rdflib import RDF
|
|
22
21
|
|
|
23
22
|
from cognite.neat._client import NeatClient
|
|
23
|
+
from cognite.neat._client._api_client import SchemaAPI
|
|
24
24
|
from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
|
|
25
|
-
from cognite.neat.
|
|
26
|
-
from cognite.neat._issues import
|
|
27
|
-
from cognite.neat._issues.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
from cognite.neat._issues import IssueList, NeatIssue, catch_issues
|
|
26
|
+
from cognite.neat._issues.errors import ResourceCreationError, ResourceDuplicatedError, ResourceNotFoundError
|
|
27
|
+
from cognite.neat._issues.warnings import (
|
|
28
|
+
PropertyDirectRelationLimitWarning,
|
|
29
|
+
PropertyMultipleValueWarning,
|
|
30
|
+
PropertyTypeNotSupportedWarning,
|
|
31
|
+
ResourceNeatWarning,
|
|
32
32
|
)
|
|
33
|
-
from cognite.neat.
|
|
34
|
-
from cognite.neat._rules.analysis.
|
|
33
|
+
from cognite.neat._rules.analysis import RulesAnalysis
|
|
34
|
+
from cognite.neat._rules.analysis._base import ViewQuery, ViewQueryDict
|
|
35
35
|
from cognite.neat._rules.models import DMSRules
|
|
36
36
|
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
|
|
37
|
-
from cognite.neat._rules.models.
|
|
37
|
+
from cognite.neat._rules.models.information._rules import InformationRules
|
|
38
38
|
from cognite.neat._shared import InstanceType
|
|
39
39
|
from cognite.neat._store import NeatGraphStore
|
|
40
40
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
41
41
|
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
42
42
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
43
|
+
from cognite.neat._utils.text import humanize_collection
|
|
43
44
|
from cognite.neat._utils.upload import UploadResult
|
|
44
45
|
|
|
45
46
|
from ._base import _END_OF_CLASS, CDFLoader
|
|
46
47
|
|
|
47
48
|
|
|
49
|
+
@dataclass
|
|
50
|
+
class _ViewIterator:
|
|
51
|
+
"""This is a helper class to iterate over the views
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
view_id: The view to iterate over
|
|
55
|
+
instance_count: The number of instances in the view
|
|
56
|
+
hierarchical_properties: The properties that are hierarchical, meaning they point to the same instances.
|
|
57
|
+
query: The query to get the instances from the store.
|
|
58
|
+
view: The view object from the client.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
view_id: dm.ViewId
|
|
62
|
+
instance_count: int
|
|
63
|
+
hierarchical_properties: set[str]
|
|
64
|
+
query: ViewQuery
|
|
65
|
+
view: dm.View | None = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class _Projection:
|
|
70
|
+
"""This is a helper class to project triples to a node and/or edge(s)"""
|
|
71
|
+
|
|
72
|
+
view_id: dm.ViewId
|
|
73
|
+
used_for: Literal["node", "edge", "all"]
|
|
74
|
+
pydantic_cls: type[BaseModel]
|
|
75
|
+
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]]
|
|
76
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]]
|
|
77
|
+
|
|
78
|
+
|
|
48
79
|
class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
49
80
|
"""Loads Instances to Cognite Data Fusion Data Model Service from NeatGraph.
|
|
50
81
|
|
|
51
82
|
Args:
|
|
52
|
-
|
|
53
|
-
|
|
83
|
+
dms_rules (DMSRules): The DMS rules used by the data model.
|
|
84
|
+
info_rules (InformationRules): The information rules used by the data model, used to
|
|
85
|
+
look+up the instances in the store.
|
|
86
|
+
graph_store (NeatGraphStore): The graph store to load the data from.
|
|
54
87
|
instance_space (str): The instance space to load the data into.
|
|
55
|
-
class_neat_id_by_view_id (dict[ViewId, URIRef] | None): A mapping from view id to class name. Defaults to None.
|
|
56
88
|
create_issues (Sequence[NeatIssue] | None): A list of issues that occurred during reading. Defaults to None.
|
|
57
|
-
tracker (type[Tracker] | None): The tracker to use. Defaults to None.
|
|
58
|
-
rules (DMSRules | None): The DMS rules used by the data model. This is used to lookup the
|
|
59
|
-
instances in the store. Defaults to None.
|
|
60
89
|
client (NeatClient | None): This is used to lookup containers such that the loader
|
|
61
90
|
creates instances in accordance with required constraints. Defaults to None.
|
|
91
|
+
unquote_external_ids (bool): If True, the loader will unquote external ids before creating the instances.
|
|
62
92
|
"""
|
|
63
93
|
|
|
64
94
|
def __init__(
|
|
65
95
|
self,
|
|
96
|
+
dms_rules: DMSRules,
|
|
97
|
+
info_rules: InformationRules,
|
|
66
98
|
graph_store: NeatGraphStore,
|
|
67
|
-
data_model: dm.DataModel[dm.View] | None,
|
|
68
99
|
instance_space: str,
|
|
69
|
-
|
|
70
|
-
create_issues: Sequence[NeatIssue] | None = None,
|
|
71
|
-
tracker: type[Tracker] | None = None,
|
|
72
|
-
rules: DMSRules | None = None,
|
|
100
|
+
space_property: str | None = None,
|
|
73
101
|
client: NeatClient | None = None,
|
|
102
|
+
create_issues: Sequence[NeatIssue] | None = None,
|
|
74
103
|
unquote_external_ids: bool = False,
|
|
75
104
|
):
|
|
76
105
|
super().__init__(graph_store)
|
|
77
|
-
self.
|
|
78
|
-
self.
|
|
79
|
-
self.
|
|
106
|
+
self.dms_rules = dms_rules
|
|
107
|
+
self.info_rules = info_rules
|
|
108
|
+
self._instance_space = instance_space
|
|
109
|
+
self._space_property = space_property
|
|
110
|
+
self._space_by_uri: dict[str, str] = defaultdict(lambda: instance_space)
|
|
80
111
|
self._issues = IssueList(create_issues or [])
|
|
81
|
-
self._tracker: type[Tracker] = tracker or LogTracker
|
|
82
|
-
self.rules = rules
|
|
83
112
|
self._client = client
|
|
84
113
|
self._unquote_external_ids = unquote_external_ids
|
|
85
114
|
|
|
86
|
-
@classmethod
|
|
87
|
-
def from_data_model_id(
|
|
88
|
-
cls,
|
|
89
|
-
client: NeatClient,
|
|
90
|
-
data_model_id: dm.DataModelId,
|
|
91
|
-
graph_store: NeatGraphStore,
|
|
92
|
-
instance_space: str,
|
|
93
|
-
) -> "DMSLoader":
|
|
94
|
-
issues: list[NeatIssue] = []
|
|
95
|
-
data_model: dm.DataModel[dm.View] | None = None
|
|
96
|
-
try:
|
|
97
|
-
data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True).latest_version()
|
|
98
|
-
except Exception as e:
|
|
99
|
-
issues.append(ResourceRetrievalError(data_model_id, "data model", str(e)))
|
|
100
|
-
|
|
101
|
-
return cls(graph_store, data_model, instance_space, {}, issues, client=client)
|
|
102
|
-
|
|
103
|
-
@classmethod
|
|
104
|
-
def from_rules(
|
|
105
|
-
cls,
|
|
106
|
-
rules: DMSRules,
|
|
107
|
-
graph_store: NeatGraphStore,
|
|
108
|
-
instance_space: str,
|
|
109
|
-
client: NeatClient | None = None,
|
|
110
|
-
unquote_external_ids: bool = False,
|
|
111
|
-
) -> "DMSLoader":
|
|
112
|
-
issues: list[NeatIssue] = []
|
|
113
|
-
data_model: dm.DataModel[dm.View] | None = None
|
|
114
|
-
try:
|
|
115
|
-
data_model = rules.as_schema().as_read_model()
|
|
116
|
-
except Exception as e:
|
|
117
|
-
issues.append(
|
|
118
|
-
ResourceConversionError(
|
|
119
|
-
identifier=rules.metadata.as_identifier(),
|
|
120
|
-
resource_type="DMS Rules",
|
|
121
|
-
target_format="read DMS model",
|
|
122
|
-
reason=str(e),
|
|
123
|
-
)
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
class_neat_id_by_view_id = {view.view.as_id(): view.logical for view in rules.views if view.logical}
|
|
127
|
-
|
|
128
|
-
return cls(
|
|
129
|
-
graph_store,
|
|
130
|
-
data_model,
|
|
131
|
-
instance_space,
|
|
132
|
-
class_neat_id_by_view_id,
|
|
133
|
-
issues,
|
|
134
|
-
rules=rules,
|
|
135
|
-
client=client,
|
|
136
|
-
unquote_external_ids=unquote_external_ids,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
|
|
140
|
-
if self._issues.has_errors and stop_on_exception:
|
|
141
|
-
raise self._issues.as_exception()
|
|
142
|
-
elif self._issues.has_errors:
|
|
143
|
-
yield from self._issues
|
|
144
|
-
return
|
|
145
|
-
if not self.data_model:
|
|
146
|
-
# There should already be an error in this case.
|
|
147
|
-
return
|
|
148
|
-
|
|
149
|
-
views_with_linked_properties = (
|
|
150
|
-
DMSAnalysis(self.rules).views_with_properties_linked_to_classes(consider_inheritance=True)
|
|
151
|
-
if self.rules and self.rules.metadata.logical
|
|
152
|
-
else None
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
view_and_count_by_id = self._select_views_with_instances(self.data_model.views)
|
|
156
|
-
|
|
157
|
-
if self._client:
|
|
158
|
-
view_and_count_by_id, properties_point_to_self = self._sort_by_direct_relation_dependencies(
|
|
159
|
-
view_and_count_by_id
|
|
160
|
-
)
|
|
161
|
-
else:
|
|
162
|
-
properties_point_to_self = {}
|
|
163
|
-
|
|
164
|
-
view_ids: list[str] = []
|
|
165
|
-
for view_id in view_and_count_by_id.keys():
|
|
166
|
-
view_ids.append(repr(view_id))
|
|
167
|
-
if view_id in properties_point_to_self:
|
|
168
|
-
# If the views have a dependency on themselves, we need to run it twice.
|
|
169
|
-
view_ids.append(f"{view_id!r} (self)")
|
|
170
|
-
|
|
171
|
-
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
172
|
-
for view_id, (view, instance_count) in view_and_count_by_id.items():
|
|
173
|
-
pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
174
|
-
yield from issues
|
|
175
|
-
tracker.issue(issues)
|
|
176
|
-
|
|
177
|
-
if view_id in properties_point_to_self:
|
|
178
|
-
# If the view has a dependency on itself, we need to run it twice.
|
|
179
|
-
# First, to ensure that all nodes are created, and then to add the direct relations.
|
|
180
|
-
# This only applies if there is a require constraint on the container, if not
|
|
181
|
-
# we can create an empty node on the fly.
|
|
182
|
-
iterations = [properties_point_to_self[view_id], set()]
|
|
183
|
-
else:
|
|
184
|
-
iterations = [set()]
|
|
185
|
-
|
|
186
|
-
for skip_properties in iterations:
|
|
187
|
-
if skip_properties:
|
|
188
|
-
track_id = f"{view_id} (self)"
|
|
189
|
-
else:
|
|
190
|
-
track_id = repr(view_id)
|
|
191
|
-
tracker.start(track_id)
|
|
192
|
-
if views_with_linked_properties:
|
|
193
|
-
# we need graceful exit if the view is not in the view_property_pairs
|
|
194
|
-
property_link_pairs = views_with_linked_properties.get(ViewEntity.from_id(view_id))
|
|
195
|
-
|
|
196
|
-
if class_neat_id := self.class_neat_id_by_view_id.get(view_id):
|
|
197
|
-
reader = self.graph_store._read_via_rules_linkage(class_neat_id, property_link_pairs)
|
|
198
|
-
else:
|
|
199
|
-
error_view = ResourceRetrievalError(view_id, "view", "View not linked to class")
|
|
200
|
-
tracker.issue(error_view)
|
|
201
|
-
if stop_on_exception:
|
|
202
|
-
raise error_view
|
|
203
|
-
yield error_view
|
|
204
|
-
continue
|
|
205
|
-
else:
|
|
206
|
-
# this assumes no changes in the suffix of view and class
|
|
207
|
-
reader = self.graph_store.read(view.external_id)
|
|
208
|
-
|
|
209
|
-
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
210
|
-
reader, instance_count, f"Loading {track_id}"
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
for identifier, properties in instance_iterable:
|
|
214
|
-
start_node, end_node = self._pop_start_end_node(properties)
|
|
215
|
-
is_edge = start_node and end_node
|
|
216
|
-
if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
|
|
217
|
-
instance_type = "edge" if is_edge else "node"
|
|
218
|
-
creation_error = ResourceCreationError(
|
|
219
|
-
identifier,
|
|
220
|
-
instance_type,
|
|
221
|
-
error=f"{instance_type.capitalize()} found in {view.used_for} view",
|
|
222
|
-
)
|
|
223
|
-
tracker.issue(creation_error)
|
|
224
|
-
if stop_on_exception:
|
|
225
|
-
raise creation_error
|
|
226
|
-
yield creation_error
|
|
227
|
-
continue
|
|
228
|
-
|
|
229
|
-
if skip_properties:
|
|
230
|
-
properties = {k: v for k, v in properties.items() if k not in skip_properties}
|
|
231
|
-
|
|
232
|
-
if start_node and end_node:
|
|
233
|
-
# Is an edge
|
|
234
|
-
try:
|
|
235
|
-
yield self._create_edge_with_properties(
|
|
236
|
-
identifier, properties, start_node, end_node, pydantic_cls, view_id
|
|
237
|
-
)
|
|
238
|
-
except ValueError as e:
|
|
239
|
-
error_edge = ResourceCreationError(identifier, "edge", error=str(e))
|
|
240
|
-
tracker.issue(error_edge)
|
|
241
|
-
if stop_on_exception:
|
|
242
|
-
raise error_edge from e
|
|
243
|
-
yield error_edge
|
|
244
|
-
else:
|
|
245
|
-
try:
|
|
246
|
-
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
247
|
-
except ValueError as e:
|
|
248
|
-
error_node = ResourceCreationError(identifier, "node", error=str(e))
|
|
249
|
-
tracker.issue(error_node)
|
|
250
|
-
if stop_on_exception:
|
|
251
|
-
raise error_node from e
|
|
252
|
-
yield error_node
|
|
253
|
-
yield from self._create_edges_without_properties(
|
|
254
|
-
identifier, properties, edge_by_type, edge_by_prop_id, tracker
|
|
255
|
-
)
|
|
256
|
-
tracker.finish(track_id)
|
|
257
|
-
yield _END_OF_CLASS
|
|
258
|
-
|
|
259
|
-
@staticmethod
|
|
260
|
-
def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
|
|
261
|
-
start_node = properties.pop("startNode", [None])[0]
|
|
262
|
-
if not start_node:
|
|
263
|
-
start_node = properties.pop("start_node", [None])[0]
|
|
264
|
-
end_node = properties.pop("endNode", [None])[0]
|
|
265
|
-
if not end_node:
|
|
266
|
-
end_node = properties.pop("end_node", [None])[0]
|
|
267
|
-
return start_node, end_node
|
|
268
|
-
|
|
269
115
|
def write_to_file(self, filepath: Path) -> None:
|
|
270
116
|
if filepath.suffix not in [".json", ".yaml", ".yml"]:
|
|
271
117
|
raise ValueError(f"File format {filepath.suffix} is not supported")
|
|
@@ -286,76 +132,124 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
286
132
|
else:
|
|
287
133
|
yaml.safe_dump(dumped, f, sort_keys=False)
|
|
288
134
|
|
|
289
|
-
def
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
135
|
+
def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
|
|
136
|
+
if self._issues.has_errors and stop_on_exception:
|
|
137
|
+
raise self._issues.as_exception()
|
|
138
|
+
elif self._issues.has_errors:
|
|
139
|
+
yield from self._issues
|
|
140
|
+
return
|
|
141
|
+
view_iterations, issues = self._create_view_iterations()
|
|
142
|
+
yield from issues
|
|
143
|
+
if self._space_property:
|
|
144
|
+
yield from self._lookup_space_by_uri(view_iterations, stop_on_exception)
|
|
145
|
+
|
|
146
|
+
for it in view_iterations:
|
|
147
|
+
view = it.view
|
|
148
|
+
if view is None:
|
|
149
|
+
yield ResourceNotFoundError(it.view_id, "view", more=f"Skipping {it.instance_count} instances...")
|
|
150
|
+
continue
|
|
151
|
+
projection, issues = self._create_projection(view)
|
|
152
|
+
yield from issues
|
|
153
|
+
query = it.query
|
|
154
|
+
reader = self.graph_store.read(query.rdf_type, property_renaming_config=query.property_renaming_config)
|
|
155
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
156
|
+
reader, it.instance_count, f"Loading {it.view_id!r}"
|
|
157
|
+
)
|
|
158
|
+
for identifier, properties in instance_iterable:
|
|
159
|
+
yield from self._create_instances(
|
|
160
|
+
identifier, properties, projection, stop_on_exception, exclude=it.hierarchical_properties
|
|
161
|
+
)
|
|
162
|
+
if it.hierarchical_properties:
|
|
163
|
+
# Force the creation of instances, before we create the hierarchical properties.
|
|
164
|
+
yield _END_OF_CLASS
|
|
165
|
+
yield from self._create_hierarchical_properties(it, projection, stop_on_exception)
|
|
166
|
+
|
|
167
|
+
yield _END_OF_CLASS
|
|
168
|
+
|
|
169
|
+
def _create_hierarchical_properties(
|
|
170
|
+
self, it: _ViewIterator, projection: _Projection, stop_on_exception: bool
|
|
171
|
+
) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
172
|
+
reader = self.graph_store.read(it.query.rdf_type, property_renaming_config=it.query.property_renaming_config)
|
|
173
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
174
|
+
reader,
|
|
175
|
+
it.instance_count,
|
|
176
|
+
f"Loading {it.view_id!r} hierarchical properties: {humanize_collection(it.hierarchical_properties)}",
|
|
177
|
+
)
|
|
178
|
+
for identifier, properties in instance_iterable:
|
|
179
|
+
yield from self._create_instances(
|
|
180
|
+
identifier, properties, projection, stop_on_exception, include=it.hierarchical_properties
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def _create_view_iterations(self) -> tuple[list[_ViewIterator], IssueList]:
|
|
184
|
+
view_query_by_id = RulesAnalysis(self.info_rules, self.dms_rules).view_query_by_id
|
|
185
|
+
iterations_by_view_id = self._select_views_with_instances(view_query_by_id)
|
|
186
|
+
if self._client:
|
|
187
|
+
issues = IssueList()
|
|
188
|
+
views = self._client.data_modeling.views.retrieve(
|
|
189
|
+
list(iterations_by_view_id.keys()), include_inherited_properties=True
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
views = dm.ViewList([])
|
|
193
|
+
with catch_issues() as issues:
|
|
194
|
+
read_model = self.dms_rules.as_schema().as_read_model()
|
|
195
|
+
views.extend(read_model.views)
|
|
196
|
+
if issues.has_errors:
|
|
197
|
+
return [], issues
|
|
198
|
+
views_by_id = {view.as_id(): view for view in views}
|
|
199
|
+
hierarchical_properties_by_view_id = SchemaAPI.get_hierarchical_properties(views)
|
|
200
|
+
|
|
201
|
+
def sort_by_instance_type(id_: dm.ViewId) -> int:
|
|
202
|
+
if id_ not in views_by_id:
|
|
203
|
+
return 0
|
|
204
|
+
return {"node": 1, "all": 2, "edge": 3}.get(views_by_id[id_].used_for, 0)
|
|
205
|
+
|
|
206
|
+
ordered_view_ids = sorted(iterations_by_view_id.keys(), key=sort_by_instance_type)
|
|
207
|
+
view_iterations: list[_ViewIterator] = []
|
|
208
|
+
for view_id in ordered_view_ids:
|
|
209
|
+
if view_id not in iterations_by_view_id:
|
|
304
210
|
continue
|
|
211
|
+
view_iteration = iterations_by_view_id[view_id]
|
|
212
|
+
view_iteration.view = views_by_id.get(view_id)
|
|
213
|
+
view_iteration.hierarchical_properties = hierarchical_properties_by_view_id.get(view_id, set())
|
|
214
|
+
view_iterations.append(view_iteration)
|
|
215
|
+
return view_iterations, issues
|
|
216
|
+
|
|
217
|
+
def _select_views_with_instances(self, view_query_by_id: ViewQueryDict) -> dict[dm.ViewId, _ViewIterator]:
|
|
218
|
+
"""Selects the views with data."""
|
|
219
|
+
view_iterations: dict[dm.ViewId, _ViewIterator] = {}
|
|
220
|
+
for view_id, query in view_query_by_id.items():
|
|
221
|
+
count = self.graph_store.queries.count_of_type(query.rdf_type)
|
|
305
222
|
if count > 0:
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
self
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
223
|
+
view_iterations[view_id] = _ViewIterator(view_id, count, set(), query)
|
|
224
|
+
return view_iterations
|
|
225
|
+
|
|
226
|
+
def _lookup_space_by_uri(self, view_iterations: list[_ViewIterator], stop_on_exception: bool = False) -> IssueList:
|
|
227
|
+
issues = IssueList()
|
|
228
|
+
if self._space_property is None:
|
|
229
|
+
return issues
|
|
230
|
+
total = sum(it.instance_count for it in view_iterations)
|
|
231
|
+
properties_by_uriref = self.graph_store.queries.properties()
|
|
232
|
+
space_property_uri = next((k for k, v in properties_by_uriref.items() if v == self._space_property), None)
|
|
233
|
+
if space_property_uri is None:
|
|
234
|
+
error: ResourceNotFoundError[str, str] = ResourceNotFoundError(
|
|
235
|
+
self._space_property,
|
|
236
|
+
"property",
|
|
237
|
+
more=f"Could not find the {self._space_property} in the graph.",
|
|
238
|
+
)
|
|
239
|
+
if stop_on_exception:
|
|
240
|
+
raise error
|
|
241
|
+
issues.append(error)
|
|
242
|
+
return issues
|
|
243
|
+
|
|
244
|
+
instance_iterable = self.graph_store.queries.list_instances_ids_by_space(space_property_uri)
|
|
245
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
246
|
+
instance_iterable, total, f"Looking up spaces for {total} instances..."
|
|
320
247
|
)
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
container_by_id = {container.as_id(): container for container in containers}
|
|
327
|
-
|
|
328
|
-
dependency_on_self: dict[dm.ViewId, set[str]] = defaultdict(set)
|
|
329
|
-
view_id_by_dependencies: dict[dm.ViewId, set[dm.ViewId]] = {}
|
|
330
|
-
for view in views:
|
|
331
|
-
view_id = view.as_id()
|
|
332
|
-
dependencies = set()
|
|
333
|
-
for prop_id, prop in view.properties.items():
|
|
334
|
-
if isinstance(prop, dm.MappedProperty) and isinstance(prop.type, dm.DirectRelation) and prop.source:
|
|
335
|
-
container = container_by_id[prop.container]
|
|
336
|
-
has_require_constraint = any(
|
|
337
|
-
isinstance(constraint, dm.RequiresConstraint) for constraint in container.constraints.values()
|
|
338
|
-
)
|
|
339
|
-
if has_require_constraint and prop.source == view_id:
|
|
340
|
-
dependency_on_self[view_id].add(prop_id)
|
|
341
|
-
elif has_require_constraint:
|
|
342
|
-
dependencies.add(prop.source)
|
|
343
|
-
view_id_by_dependencies[view_id] = dependencies
|
|
344
|
-
|
|
345
|
-
ordered_view_ids = TopologicalSorter(view_id_by_dependencies).static_order()
|
|
346
|
-
|
|
347
|
-
return {
|
|
348
|
-
view_id: view_and_count_by_id[view_id] for view_id in ordered_view_ids if view_id in view_and_count_by_id
|
|
349
|
-
}, dict(dependency_on_self)
|
|
350
|
-
|
|
351
|
-
def _create_validation_classes(
|
|
352
|
-
self, view: dm.View
|
|
353
|
-
) -> tuple[
|
|
354
|
-
type[BaseModel],
|
|
355
|
-
dict[str, tuple[str, dm.EdgeConnection]],
|
|
356
|
-
dict[str, tuple[str, dm.EdgeConnection]],
|
|
357
|
-
NeatIssueList,
|
|
358
|
-
]:
|
|
248
|
+
for instance, space in instance_iterable:
|
|
249
|
+
self._space_by_uri[remove_namespace_from_uri(instance)] = space
|
|
250
|
+
return issues
|
|
251
|
+
|
|
252
|
+
def _create_projection(self, view: dm.View) -> tuple[_Projection, IssueList]:
|
|
359
253
|
issues = IssueList()
|
|
360
254
|
field_definitions: dict[str, tuple[type, Any]] = {}
|
|
361
255
|
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
@@ -414,9 +308,15 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
414
308
|
|
|
415
309
|
def parse_list(cls, value: Any, info: ValidationInfo) -> list[str]:
|
|
416
310
|
if isinstance(value, list) and list.__name__ not in _get_field_value_types(cls, info):
|
|
417
|
-
if len(value)
|
|
418
|
-
|
|
419
|
-
|
|
311
|
+
if len(value) > 1:
|
|
312
|
+
warnings.warn(
|
|
313
|
+
# the identifier is unknown, it will be cest in the create_instances method
|
|
314
|
+
PropertyMultipleValueWarning("", "property", str(info.field_name), value=str(value[0])),
|
|
315
|
+
stacklevel=2,
|
|
316
|
+
)
|
|
317
|
+
elif not value:
|
|
318
|
+
return None # type: ignore[return-value]
|
|
319
|
+
return value[0]
|
|
420
320
|
|
|
421
321
|
return value
|
|
422
322
|
|
|
@@ -446,7 +346,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
446
346
|
def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
|
|
447
347
|
# We validate above that we only get one value for single direct relations.
|
|
448
348
|
if list.__name__ in _get_field_value_types(cls, info):
|
|
449
|
-
|
|
349
|
+
external_ids = (remove_namespace_from_uri(v) for v in value)
|
|
350
|
+
result = [{"space": self._space_by_uri[e], "externalId": e} for e in external_ids]
|
|
351
|
+
# Todo: Account for max_list_limit
|
|
450
352
|
if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
|
|
451
353
|
return result
|
|
452
354
|
warnings.warn(
|
|
@@ -462,7 +364,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
462
364
|
result.sort(key=lambda x: (x["space"], x["externalId"]))
|
|
463
365
|
return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
|
|
464
366
|
elif value:
|
|
465
|
-
|
|
367
|
+
external_id = remove_namespace_from_uri(value[0])
|
|
368
|
+
return {"space": self._space_by_uri[external_id], "externalId": external_id}
|
|
466
369
|
return {}
|
|
467
370
|
|
|
468
371
|
validators["parse_direct_relation"] = field_validator(*direct_relation_by_property.keys(), mode="before")( # type: ignore[assignment]
|
|
@@ -481,70 +384,91 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
481
384
|
)
|
|
482
385
|
|
|
483
386
|
pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
|
|
484
|
-
return pydantic_cls, edge_by_type, edge_by_prop_id, issues
|
|
485
|
-
|
|
486
|
-
def _create_node(
|
|
487
|
-
self,
|
|
488
|
-
identifier: str,
|
|
489
|
-
properties: dict[str | InstanceType, list[str]],
|
|
490
|
-
pydantic_cls: type[BaseModel],
|
|
491
|
-
view_id: dm.ViewId,
|
|
492
|
-
) -> dm.InstanceApply:
|
|
493
|
-
type_ = properties.pop(RDF.type, [None])[0]
|
|
494
|
-
created = pydantic_cls.model_validate(properties)
|
|
495
|
-
if self._unquote_external_ids:
|
|
496
|
-
identifier = urllib.parse.unquote(identifier)
|
|
497
|
-
|
|
498
|
-
return dm.NodeApply(
|
|
499
|
-
space=self.instance_space,
|
|
500
|
-
external_id=identifier,
|
|
501
|
-
type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
|
|
502
|
-
sources=[
|
|
503
|
-
dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
|
|
504
|
-
],
|
|
505
|
-
)
|
|
387
|
+
return _Projection(view.as_id(), view.used_for, pydantic_cls, edge_by_type, edge_by_prop_id), issues
|
|
506
388
|
|
|
507
|
-
def
|
|
389
|
+
def _create_instances(
|
|
508
390
|
self,
|
|
509
391
|
identifier: str,
|
|
510
392
|
properties: dict[str | InstanceType, list[str]],
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
) -> dm.
|
|
516
|
-
type_ = properties.pop(RDF.type, [None])[0]
|
|
517
|
-
created = pydantic_cls.model_validate(properties)
|
|
518
|
-
if type_ is None:
|
|
519
|
-
raise ValueError(f"Missing type for edge {identifier}")
|
|
520
|
-
|
|
393
|
+
projection: _Projection,
|
|
394
|
+
stop_on_exception: bool = False,
|
|
395
|
+
exclude: set[str] | None = None,
|
|
396
|
+
include: set[str] | None = None,
|
|
397
|
+
) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
521
398
|
if self._unquote_external_ids:
|
|
522
399
|
identifier = urllib.parse.unquote(identifier)
|
|
400
|
+
start_node, end_node = self._pop_start_end_node(properties)
|
|
401
|
+
is_edge = start_node and end_node
|
|
402
|
+
instance_type = "edge" if is_edge else "node"
|
|
403
|
+
if (projection.used_for == "node" and is_edge) or (projection.used_for == "edge" and not is_edge):
|
|
404
|
+
creation_error = ResourceCreationError(
|
|
405
|
+
identifier,
|
|
406
|
+
instance_type,
|
|
407
|
+
f"View used for {projection.used_for} instance {identifier!s} but is {instance_type}",
|
|
408
|
+
)
|
|
409
|
+
if stop_on_exception:
|
|
410
|
+
raise creation_error from None
|
|
411
|
+
yield creation_error
|
|
412
|
+
return
|
|
523
413
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
414
|
+
if RDF.type not in properties:
|
|
415
|
+
error = ResourceCreationError(identifier, instance_type, "No rdf:type found")
|
|
416
|
+
if stop_on_exception:
|
|
417
|
+
raise error from None
|
|
418
|
+
yield error
|
|
419
|
+
return
|
|
420
|
+
_ = properties.pop(RDF.type)[0]
|
|
421
|
+
if start_node and self._unquote_external_ids:
|
|
422
|
+
start_node = urllib.parse.unquote(start_node)
|
|
423
|
+
if end_node and self._unquote_external_ids:
|
|
424
|
+
end_node = urllib.parse.unquote(end_node)
|
|
425
|
+
|
|
426
|
+
if exclude:
|
|
427
|
+
properties = {k: v for k, v in properties.items() if k not in exclude}
|
|
428
|
+
if include:
|
|
429
|
+
properties = {k: v for k, v in properties.items() if k in include}
|
|
430
|
+
|
|
431
|
+
with catch_issues() as property_issues:
|
|
432
|
+
sources = [
|
|
433
|
+
dm.NodeOrEdgeData(
|
|
434
|
+
projection.view_id,
|
|
435
|
+
projection.pydantic_cls.model_validate(properties).model_dump(exclude_unset=True),
|
|
436
|
+
)
|
|
437
|
+
]
|
|
438
|
+
for issue in property_issues:
|
|
439
|
+
if isinstance(issue, ResourceNeatWarning):
|
|
440
|
+
issue.identifier = identifier
|
|
441
|
+
|
|
442
|
+
if property_issues.has_errors and stop_on_exception:
|
|
443
|
+
raise property_issues.as_exception()
|
|
444
|
+
yield from property_issues
|
|
445
|
+
|
|
446
|
+
if start_node and end_node:
|
|
447
|
+
yield dm.EdgeApply(
|
|
448
|
+
space=self._space_by_uri[identifier],
|
|
449
|
+
external_id=identifier,
|
|
450
|
+
type=(projection.view_id.space, projection.view_id.external_id),
|
|
451
|
+
start_node=(self._space_by_uri[start_node], start_node),
|
|
452
|
+
end_node=(self._space_by_uri[end_node], end_node),
|
|
453
|
+
sources=sources,
|
|
454
|
+
)
|
|
455
|
+
else:
|
|
456
|
+
yield dm.NodeApply(
|
|
457
|
+
space=self._space_by_uri[identifier],
|
|
458
|
+
external_id=identifier,
|
|
459
|
+
type=(projection.view_id.space, projection.view_id.external_id),
|
|
460
|
+
sources=sources,
|
|
461
|
+
)
|
|
462
|
+
yield from self._create_edges_without_properties(identifier, properties, projection)
|
|
534
463
|
|
|
535
464
|
def _create_edges_without_properties(
|
|
536
|
-
self,
|
|
537
|
-
identifier: str,
|
|
538
|
-
properties: dict[str, list[str]],
|
|
539
|
-
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
|
|
540
|
-
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
|
|
541
|
-
tracker: Tracker,
|
|
465
|
+
self, identifier: str, properties: dict[str | InstanceType, list[str]], projection: _Projection
|
|
542
466
|
) -> Iterable[dm.EdgeApply | NeatIssue]:
|
|
543
467
|
for predicate, values in properties.items():
|
|
544
|
-
if predicate in edge_by_type:
|
|
545
|
-
prop_id, edge = edge_by_type[predicate]
|
|
546
|
-
elif predicate in edge_by_prop_id:
|
|
547
|
-
prop_id, edge = edge_by_prop_id[predicate]
|
|
468
|
+
if predicate in projection.edge_by_type:
|
|
469
|
+
prop_id, edge = projection.edge_by_type[predicate]
|
|
470
|
+
elif predicate in projection.edge_by_prop_id:
|
|
471
|
+
prop_id, edge = projection.edge_by_prop_id[predicate]
|
|
548
472
|
else:
|
|
549
473
|
continue
|
|
550
474
|
if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
|
|
@@ -553,21 +477,36 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
553
477
|
identifier=identifier,
|
|
554
478
|
location=f"Multiple values for single edge {edge}. Expected only one.",
|
|
555
479
|
)
|
|
556
|
-
tracker.issue(error)
|
|
557
480
|
yield error
|
|
481
|
+
continue
|
|
558
482
|
for target in values:
|
|
559
483
|
external_id = f"{identifier}.{prop_id}.{target}"
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
484
|
+
start_node, end_node = (
|
|
485
|
+
(self._space_by_uri[identifier], identifier),
|
|
486
|
+
(self._space_by_uri[target], target),
|
|
487
|
+
)
|
|
488
|
+
if edge.direction == "inwards":
|
|
489
|
+
start_node, end_node = end_node, start_node
|
|
563
490
|
yield dm.EdgeApply(
|
|
564
|
-
space=self.
|
|
491
|
+
space=self._space_by_uri[identifier],
|
|
565
492
|
external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
|
|
566
493
|
type=edge.type,
|
|
567
|
-
start_node=
|
|
568
|
-
end_node=
|
|
494
|
+
start_node=start_node,
|
|
495
|
+
end_node=end_node,
|
|
569
496
|
)
|
|
570
497
|
|
|
498
|
+
@staticmethod
|
|
499
|
+
def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str, str] | tuple[None, None]:
|
|
500
|
+
start_node = properties.pop("startNode", [None])[0]
|
|
501
|
+
if not start_node:
|
|
502
|
+
start_node = properties.pop("start_node", [None])[0]
|
|
503
|
+
end_node = properties.pop("endNode", [None])[0]
|
|
504
|
+
if not end_node:
|
|
505
|
+
end_node = properties.pop("end_node", [None])[0]
|
|
506
|
+
if start_node and end_node:
|
|
507
|
+
return start_node, end_node
|
|
508
|
+
return None, None
|
|
509
|
+
|
|
571
510
|
def _get_required_capabilities(self) -> list[Capability]:
|
|
572
511
|
return [
|
|
573
512
|
DataModelInstancesAcl(
|
|
@@ -576,7 +515,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
576
515
|
DataModelInstancesAcl.Action.Write_Properties,
|
|
577
516
|
DataModelInstancesAcl.Action.Read,
|
|
578
517
|
],
|
|
579
|
-
scope=DataModelInstancesAcl.Scope.SpaceID([self.
|
|
518
|
+
scope=DataModelInstancesAcl.Scope.SpaceID([self._instance_space]),
|
|
580
519
|
)
|
|
581
520
|
]
|
|
582
521
|
|