cognite-neat 0.110.0__py3-none-any.whl → 0.111.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +6 -0
- cognite/neat/_client/_api/schema.py +26 -0
- cognite/neat/_client/data_classes/schema.py +1 -1
- cognite/neat/_constants.py +4 -1
- cognite/neat/_graph/extractors/__init__.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
- cognite/neat/_graph/extractors/_dict.py +102 -0
- cognite/neat/_graph/extractors/_dms.py +27 -40
- cognite/neat/_graph/extractors/_dms_graph.py +30 -3
- cognite/neat/_graph/extractors/_raw.py +67 -0
- cognite/neat/_graph/loaders/_base.py +20 -4
- cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
- cognite/neat/_graph/queries/_base.py +137 -43
- cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
- cognite/neat/_issues/_factory.py +9 -1
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_external.py +7 -0
- cognite/neat/_issues/warnings/user_modeling.py +12 -0
- cognite/neat/_rules/_constants.py +3 -0
- cognite/neat/_rules/analysis/_base.py +29 -50
- cognite/neat/_rules/exporters/_rules2excel.py +1 -1
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
- cognite/neat/_rules/models/_base_rules.py +0 -2
- cognite/neat/_rules/models/data_types.py +7 -0
- cognite/neat/_rules/models/dms/_exporter.py +9 -8
- cognite/neat/_rules/models/dms/_rules.py +26 -1
- cognite/neat/_rules/models/dms/_rules_input.py +5 -1
- cognite/neat/_rules/models/dms/_validation.py +101 -1
- cognite/neat/_rules/models/entities/_single_value.py +8 -3
- cognite/neat/_rules/models/entities/_wrapped.py +2 -2
- cognite/neat/_rules/models/information/_rules_input.py +1 -0
- cognite/neat/_rules/models/information/_validation.py +64 -17
- cognite/neat/_rules/transformers/_converters.py +7 -2
- cognite/neat/_session/_base.py +2 -0
- cognite/neat/_session/_explore.py +39 -0
- cognite/neat/_session/_inspect.py +25 -6
- cognite/neat/_session/_read.py +67 -3
- cognite/neat/_session/_set.py +7 -1
- cognite/neat/_session/_state.py +6 -0
- cognite/neat/_session/_to.py +115 -8
- cognite/neat/_store/_graph_store.py +8 -4
- cognite/neat/_utils/rdf_.py +34 -3
- cognite/neat/_utils/text.py +72 -4
- cognite/neat/_utils/upload.py +2 -0
- cognite/neat/_version.py +2 -2
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +53 -50
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
|
@@ -6,7 +6,7 @@ from collections import defaultdict
|
|
|
6
6
|
from collections.abc import Iterable, Sequence
|
|
7
7
|
from dataclasses import dataclass
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Literal, cast, get_args
|
|
9
|
+
from typing import Any, Literal, cast, get_args, overload
|
|
10
10
|
|
|
11
11
|
import yaml
|
|
12
12
|
from cognite.client import CogniteClient
|
|
@@ -17,14 +17,20 @@ from cognite.client.data_classes.data_modeling.ids import InstanceId
|
|
|
17
17
|
from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
|
|
18
18
|
from cognite.client.exceptions import CogniteAPIError
|
|
19
19
|
from pydantic import BaseModel, ValidationInfo, create_model, field_validator
|
|
20
|
-
from rdflib import RDF
|
|
20
|
+
from rdflib import RDF, URIRef
|
|
21
21
|
|
|
22
22
|
from cognite.neat._client import NeatClient
|
|
23
23
|
from cognite.neat._client._api_client import SchemaAPI
|
|
24
24
|
from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
|
|
25
|
-
from cognite.neat._issues import IssueList, NeatIssue, catch_issues
|
|
26
|
-
from cognite.neat._issues.errors import
|
|
25
|
+
from cognite.neat._issues import IssueList, NeatError, NeatIssue, catch_issues
|
|
26
|
+
from cognite.neat._issues.errors import (
|
|
27
|
+
AuthorizationError,
|
|
28
|
+
ResourceCreationError,
|
|
29
|
+
ResourceDuplicatedError,
|
|
30
|
+
ResourceNotFoundError,
|
|
31
|
+
)
|
|
27
32
|
from cognite.neat._issues.warnings import (
|
|
33
|
+
NeatValueWarning,
|
|
28
34
|
PropertyDirectRelationLimitWarning,
|
|
29
35
|
PropertyMultipleValueWarning,
|
|
30
36
|
PropertyTypeNotSupportedWarning,
|
|
@@ -39,11 +45,11 @@ from cognite.neat._shared import InstanceType
|
|
|
39
45
|
from cognite.neat._store import NeatGraphStore
|
|
40
46
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
41
47
|
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
42
|
-
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
43
|
-
from cognite.neat._utils.text import humanize_collection
|
|
48
|
+
from cognite.neat._utils.rdf_ import namespace_as_space, remove_namespace_from_uri, split_uri
|
|
49
|
+
from cognite.neat._utils.text import NamingStandardization, humanize_collection
|
|
44
50
|
from cognite.neat._utils.upload import UploadResult
|
|
45
51
|
|
|
46
|
-
from ._base import _END_OF_CLASS, CDFLoader
|
|
52
|
+
from ._base import _END_OF_CLASS, _START_OF_CLASS, CDFLoader
|
|
47
53
|
|
|
48
54
|
|
|
49
55
|
@dataclass
|
|
@@ -89,6 +95,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
89
95
|
client (NeatClient | None): This is used to lookup containers such that the loader
|
|
90
96
|
creates instances in accordance with required constraints. Defaults to None.
|
|
91
97
|
unquote_external_ids (bool): If True, the loader will unquote external ids before creating the instances.
|
|
98
|
+
neat_prefix_by_predicate_uri (dict[URIRef, str] | None): A dictionary that maps a predicate URIRef to a
|
|
99
|
+
prefix that Neat added to the object upon extraction. This is used to remove the prefix from the
|
|
100
|
+
object before creating the instance.
|
|
92
101
|
"""
|
|
93
102
|
|
|
94
103
|
def __init__(
|
|
@@ -98,16 +107,23 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
98
107
|
graph_store: NeatGraphStore,
|
|
99
108
|
instance_space: str,
|
|
100
109
|
space_property: str | None = None,
|
|
110
|
+
use_source_space: bool = False,
|
|
101
111
|
client: NeatClient | None = None,
|
|
102
112
|
create_issues: Sequence[NeatIssue] | None = None,
|
|
103
113
|
unquote_external_ids: bool = False,
|
|
114
|
+
neat_prefix_by_predicate_uri: dict[URIRef, str] | None = None,
|
|
115
|
+
neat_prefix_by_type_uri: dict[URIRef, str] | None = None,
|
|
104
116
|
):
|
|
105
117
|
super().__init__(graph_store)
|
|
106
118
|
self.dms_rules = dms_rules
|
|
107
119
|
self.info_rules = info_rules
|
|
120
|
+
self.neat_prefix_by_predicate_uri = neat_prefix_by_predicate_uri or {}
|
|
121
|
+
self.neat_prefix_by_type_uri = neat_prefix_by_type_uri or {}
|
|
108
122
|
self._instance_space = instance_space
|
|
109
123
|
self._space_property = space_property
|
|
110
|
-
self.
|
|
124
|
+
self._use_source_space = use_source_space
|
|
125
|
+
self._space_by_instance_uri: dict[URIRef, str] = defaultdict(lambda: instance_space)
|
|
126
|
+
self._external_id_by_uri: dict[URIRef, str] = {}
|
|
111
127
|
self._issues = IssueList(create_issues or [])
|
|
112
128
|
self._client = client
|
|
113
129
|
self._unquote_external_ids = unquote_external_ids
|
|
@@ -132,7 +148,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
132
148
|
else:
|
|
133
149
|
yaml.safe_dump(dumped, f, sort_keys=False)
|
|
134
150
|
|
|
135
|
-
def _load(
|
|
151
|
+
def _load(
|
|
152
|
+
self, stop_on_exception: bool = False
|
|
153
|
+
) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS] | _START_OF_CLASS]:
|
|
136
154
|
if self._issues.has_errors and stop_on_exception:
|
|
137
155
|
raise self._issues.as_exception()
|
|
138
156
|
elif self._issues.has_errors:
|
|
@@ -143,6 +161,16 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
143
161
|
if self._space_property:
|
|
144
162
|
yield from self._lookup_space_by_uri(view_iterations, stop_on_exception)
|
|
145
163
|
|
|
164
|
+
if self._client:
|
|
165
|
+
space_creation = self._create_instance_space_if_not_exists()
|
|
166
|
+
yield from space_creation.warnings
|
|
167
|
+
if space_creation.has_errors and stop_on_exception:
|
|
168
|
+
raise space_creation.as_exception()
|
|
169
|
+
yield from space_creation.errors
|
|
170
|
+
|
|
171
|
+
if self.neat_prefix_by_type_uri:
|
|
172
|
+
self._lookup_identifier_by_uri()
|
|
173
|
+
|
|
146
174
|
for it in view_iterations:
|
|
147
175
|
view = it.view
|
|
148
176
|
if view is None:
|
|
@@ -151,10 +179,15 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
151
179
|
projection, issues = self._create_projection(view)
|
|
152
180
|
yield from issues
|
|
153
181
|
query = it.query
|
|
154
|
-
reader = self.graph_store.read(
|
|
182
|
+
reader = self.graph_store.read(
|
|
183
|
+
query.rdf_type,
|
|
184
|
+
property_renaming_config=query.property_renaming_config,
|
|
185
|
+
remove_uri_namespace=False,
|
|
186
|
+
)
|
|
155
187
|
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
156
188
|
reader, it.instance_count, f"Loading {it.view_id!r}"
|
|
157
189
|
)
|
|
190
|
+
yield _START_OF_CLASS(view.external_id)
|
|
158
191
|
for identifier, properties in instance_iterable:
|
|
159
192
|
yield from self._create_instances(
|
|
160
193
|
identifier, properties, projection, stop_on_exception, exclude=it.hierarchical_properties
|
|
@@ -162,7 +195,10 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
162
195
|
if it.hierarchical_properties:
|
|
163
196
|
# Force the creation of instances, before we create the hierarchical properties.
|
|
164
197
|
yield _END_OF_CLASS
|
|
198
|
+
yield _START_OF_CLASS(f"{view.external_id} ({humanize_collection(it.hierarchical_properties)})")
|
|
165
199
|
yield from self._create_hierarchical_properties(it, projection, stop_on_exception)
|
|
200
|
+
if reader is not instance_iterable:
|
|
201
|
+
print(f"Loaded {it.instance_count} instances for {it.view_id!r}")
|
|
166
202
|
|
|
167
203
|
yield _END_OF_CLASS
|
|
168
204
|
|
|
@@ -188,6 +224,12 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
188
224
|
views = self._client.data_modeling.views.retrieve(
|
|
189
225
|
list(iterations_by_view_id.keys()), include_inherited_properties=True
|
|
190
226
|
)
|
|
227
|
+
if missing := set(iterations_by_view_id) - {view.as_id() for view in views}:
|
|
228
|
+
for missing_view in missing:
|
|
229
|
+
issues.append(ResourceNotFoundError(missing_view, "view", more="The view is not found in CDF."))
|
|
230
|
+
return [], issues
|
|
231
|
+
# Todo: Remove if this turns out to be unnecessary.
|
|
232
|
+
hierarchical_properties_by_view_id: dict[dm.ViewId, set[str]] = {}
|
|
191
233
|
else:
|
|
192
234
|
views = dm.ViewList([])
|
|
193
235
|
with catch_issues() as issues:
|
|
@@ -195,15 +237,17 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
195
237
|
views.extend(read_model.views)
|
|
196
238
|
if issues.has_errors:
|
|
197
239
|
return [], issues
|
|
240
|
+
hierarchical_properties_by_view_id = {}
|
|
198
241
|
views_by_id = {view.as_id(): view for view in views}
|
|
199
|
-
hierarchical_properties_by_view_id = SchemaAPI.get_hierarchical_properties(views)
|
|
200
242
|
|
|
201
243
|
def sort_by_instance_type(id_: dm.ViewId) -> int:
|
|
202
244
|
if id_ not in views_by_id:
|
|
203
245
|
return 0
|
|
204
|
-
return {"node": 1, "all":
|
|
246
|
+
return {"node": 1, "all": 1, "edge": 3}.get(views_by_id[id_].used_for, 0)
|
|
205
247
|
|
|
206
|
-
ordered_view_ids =
|
|
248
|
+
ordered_view_ids = SchemaAPI.get_view_order_by_direct_relation_constraints(views)
|
|
249
|
+
# Sort is stable in Python, so we will keep the order of the views:
|
|
250
|
+
ordered_view_ids.sort(key=sort_by_instance_type)
|
|
207
251
|
view_iterations: list[_ViewIterator] = []
|
|
208
252
|
for view_id in ordered_view_ids:
|
|
209
253
|
if view_id not in iterations_by_view_id:
|
|
@@ -245,8 +289,59 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
245
289
|
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
246
290
|
instance_iterable, total, f"Looking up spaces for {total} instances..."
|
|
247
291
|
)
|
|
292
|
+
neat_prefix = self.neat_prefix_by_predicate_uri.get(space_property_uri)
|
|
293
|
+
warned_spaces: set[str] = set()
|
|
248
294
|
for instance, space in instance_iterable:
|
|
249
|
-
|
|
295
|
+
if neat_prefix:
|
|
296
|
+
space = space.removeprefix(neat_prefix)
|
|
297
|
+
|
|
298
|
+
clean_space = NamingStandardization.standardize_space_str(space)
|
|
299
|
+
if clean_space != space and space not in warned_spaces:
|
|
300
|
+
issues.append(
|
|
301
|
+
NeatValueWarning(
|
|
302
|
+
f"Invalid space in property {self._space_property}: {space}. Fixed to {clean_space}"
|
|
303
|
+
)
|
|
304
|
+
)
|
|
305
|
+
warned_spaces.add(space)
|
|
306
|
+
|
|
307
|
+
self._space_by_instance_uri[instance] = clean_space
|
|
308
|
+
return issues
|
|
309
|
+
|
|
310
|
+
def _lookup_identifier_by_uri(self) -> None:
|
|
311
|
+
if not self.neat_prefix_by_type_uri:
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
count = sum(count for _, count in self.graph_store.queries.summarize_instances())
|
|
315
|
+
instance_iterable = self.graph_store.queries.list_instances_ids()
|
|
316
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
317
|
+
instance_iterable, count, f"Looking up identifiers for {count} instances..."
|
|
318
|
+
)
|
|
319
|
+
count_by_identifier: dict[str, list[URIRef]] = defaultdict(list)
|
|
320
|
+
for instance_uri, type in instance_iterable:
|
|
321
|
+
if type not in self.neat_prefix_by_type_uri:
|
|
322
|
+
continue
|
|
323
|
+
prefix = self.neat_prefix_by_type_uri[type]
|
|
324
|
+
identifier = remove_namespace_from_uri(instance_uri)
|
|
325
|
+
if self._unquote_external_ids:
|
|
326
|
+
identifier = urllib.parse.unquote(identifier)
|
|
327
|
+
count_by_identifier[identifier.removeprefix(prefix)].append(instance_uri)
|
|
328
|
+
|
|
329
|
+
for identifier, uris in count_by_identifier.items():
|
|
330
|
+
if len(uris) == 1:
|
|
331
|
+
self._external_id_by_uri[uris[0]] = identifier
|
|
332
|
+
|
|
333
|
+
def _create_instance_space_if_not_exists(self) -> IssueList:
|
|
334
|
+
issues = IssueList()
|
|
335
|
+
if not self._client:
|
|
336
|
+
return issues
|
|
337
|
+
|
|
338
|
+
instance_spaces = set(self._space_by_instance_uri.values()) - {self._instance_space}
|
|
339
|
+
existing_spaces = {space.space for space in self._client.data_modeling.spaces.retrieve(list(instance_spaces))}
|
|
340
|
+
if missing_spaces := (instance_spaces - existing_spaces):
|
|
341
|
+
try:
|
|
342
|
+
self._client.data_modeling.spaces.apply([dm.SpaceApply(space=space) for space in missing_spaces])
|
|
343
|
+
except CogniteAPIError as e:
|
|
344
|
+
issues.append(AuthorizationError(f"Creating {len(missing_spaces)} instance spaces.", str(e)))
|
|
250
345
|
return issues
|
|
251
346
|
|
|
252
347
|
def _create_projection(self, view: dm.View) -> tuple[_Projection, IssueList]:
|
|
@@ -346,8 +441,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
346
441
|
def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
|
|
347
442
|
# We validate above that we only get one value for single direct relations.
|
|
348
443
|
if list.__name__ in _get_field_value_types(cls, info):
|
|
349
|
-
|
|
350
|
-
result = [
|
|
444
|
+
ids = (self._create_instance_id(v, "node", stop_on_exception=True) for v in value)
|
|
445
|
+
result = [id_.dump(camel_case=True, include_instance_type=False) for id_ in ids]
|
|
351
446
|
# Todo: Account for max_list_limit
|
|
352
447
|
if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
|
|
353
448
|
return result
|
|
@@ -364,8 +459,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
364
459
|
result.sort(key=lambda x: (x["space"], x["externalId"]))
|
|
365
460
|
return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
|
|
366
461
|
elif value:
|
|
367
|
-
|
|
368
|
-
|
|
462
|
+
return self._create_instance_id(value[0], "node", stop_on_exception=True).dump(
|
|
463
|
+
camel_case=True, include_instance_type=False
|
|
464
|
+
)
|
|
369
465
|
return {}
|
|
370
466
|
|
|
371
467
|
validators["parse_direct_relation"] = field_validator(*direct_relation_by_property.keys(), mode="before")( # type: ignore[assignment]
|
|
@@ -376,35 +472,50 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
376
472
|
|
|
377
473
|
def parse_direct_relation_to_unit(cls, value: Any, info: ValidationInfo) -> dict | list[dict]:
|
|
378
474
|
if value:
|
|
379
|
-
|
|
475
|
+
external_id = remove_namespace_from_uri(value[0])
|
|
476
|
+
if self._unquote_external_ids:
|
|
477
|
+
external_id = urllib.parse.unquote(external_id)
|
|
478
|
+
return {"space": "cdf_cdm_units", "externalId": external_id}
|
|
380
479
|
return {}
|
|
381
480
|
|
|
382
481
|
validators["parse_direct_relation_to_unit"] = field_validator(*unit_properties, mode="before")( # type: ignore[assignment]
|
|
383
482
|
parse_direct_relation_to_unit # type: ignore[arg-type]
|
|
384
483
|
)
|
|
385
484
|
|
|
485
|
+
if text_fields:
|
|
486
|
+
|
|
487
|
+
def parse_text(cls, value: Any, info: ValidationInfo) -> str | list[str]:
|
|
488
|
+
if isinstance(value, list):
|
|
489
|
+
return [remove_namespace_from_uri(v) if isinstance(v, URIRef) else str(v) for v in value]
|
|
490
|
+
return remove_namespace_from_uri(value) if isinstance(value, URIRef) else str(value)
|
|
491
|
+
|
|
492
|
+
validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text) # type: ignore[assignment, arg-type]
|
|
493
|
+
|
|
386
494
|
pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
|
|
387
495
|
return _Projection(view.as_id(), view.used_for, pydantic_cls, edge_by_type, edge_by_prop_id), issues
|
|
388
496
|
|
|
389
497
|
def _create_instances(
|
|
390
498
|
self,
|
|
391
|
-
|
|
392
|
-
properties: dict[str | InstanceType, list[
|
|
499
|
+
instance_uri: URIRef,
|
|
500
|
+
properties: dict[str | InstanceType, list[Any]],
|
|
393
501
|
projection: _Projection,
|
|
394
|
-
stop_on_exception:
|
|
502
|
+
stop_on_exception: Literal[True, False] = False,
|
|
395
503
|
exclude: set[str] | None = None,
|
|
396
504
|
include: set[str] | None = None,
|
|
397
505
|
) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
398
|
-
|
|
399
|
-
|
|
506
|
+
instance_id = self._create_instance_id(instance_uri, "node", stop_on_exception)
|
|
507
|
+
if not isinstance(instance_id, InstanceId):
|
|
508
|
+
yield instance_id
|
|
509
|
+
return
|
|
510
|
+
space, external_id = instance_id.space, instance_id.external_id
|
|
400
511
|
start_node, end_node = self._pop_start_end_node(properties)
|
|
401
512
|
is_edge = start_node and end_node
|
|
402
513
|
instance_type = "edge" if is_edge else "node"
|
|
403
514
|
if (projection.used_for == "node" and is_edge) or (projection.used_for == "edge" and not is_edge):
|
|
404
515
|
creation_error = ResourceCreationError(
|
|
405
|
-
|
|
516
|
+
external_id,
|
|
406
517
|
instance_type,
|
|
407
|
-
f"View used for {projection.used_for} instance {
|
|
518
|
+
f"View used for {projection.used_for} instance {external_id!s} but is {instance_type}",
|
|
408
519
|
)
|
|
409
520
|
if stop_on_exception:
|
|
410
521
|
raise creation_error from None
|
|
@@ -412,22 +523,18 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
412
523
|
return
|
|
413
524
|
|
|
414
525
|
if RDF.type not in properties:
|
|
415
|
-
error = ResourceCreationError(
|
|
526
|
+
error = ResourceCreationError(external_id, instance_type, "No rdf:type found")
|
|
416
527
|
if stop_on_exception:
|
|
417
528
|
raise error from None
|
|
418
529
|
yield error
|
|
419
530
|
return
|
|
420
531
|
_ = properties.pop(RDF.type)[0]
|
|
421
|
-
if start_node and self._unquote_external_ids:
|
|
422
|
-
start_node = urllib.parse.unquote(start_node)
|
|
423
|
-
if end_node and self._unquote_external_ids:
|
|
424
|
-
end_node = urllib.parse.unquote(end_node)
|
|
425
|
-
|
|
426
532
|
if exclude:
|
|
427
533
|
properties = {k: v for k, v in properties.items() if k not in exclude}
|
|
428
534
|
if include:
|
|
429
535
|
properties = {k: v for k, v in properties.items() if k in include}
|
|
430
536
|
|
|
537
|
+
sources = []
|
|
431
538
|
with catch_issues() as property_issues:
|
|
432
539
|
sources = [
|
|
433
540
|
dm.NodeOrEdgeData(
|
|
@@ -437,32 +544,46 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
437
544
|
]
|
|
438
545
|
for issue in property_issues:
|
|
439
546
|
if isinstance(issue, ResourceNeatWarning):
|
|
440
|
-
issue.identifier =
|
|
547
|
+
issue.identifier = external_id
|
|
441
548
|
|
|
442
549
|
if property_issues.has_errors and stop_on_exception:
|
|
443
550
|
raise property_issues.as_exception()
|
|
444
551
|
yield from property_issues
|
|
552
|
+
if not sources:
|
|
553
|
+
return
|
|
445
554
|
|
|
446
555
|
if start_node and end_node:
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
556
|
+
start = self._create_instance_id(start_node, "edge", stop_on_exception)
|
|
557
|
+
end = self._create_instance_id(end_node, "edge", stop_on_exception)
|
|
558
|
+
if isinstance(start, NeatError):
|
|
559
|
+
yield start
|
|
560
|
+
if isinstance(end, NeatError):
|
|
561
|
+
yield end
|
|
562
|
+
if isinstance(start, InstanceId) and isinstance(end, InstanceId):
|
|
563
|
+
yield dm.EdgeApply(
|
|
564
|
+
space=space,
|
|
565
|
+
external_id=external_id,
|
|
566
|
+
type=(projection.view_id.space, projection.view_id.external_id),
|
|
567
|
+
start_node=start.as_tuple(),
|
|
568
|
+
end_node=end.as_tuple(),
|
|
569
|
+
sources=sources,
|
|
570
|
+
)
|
|
455
571
|
else:
|
|
456
572
|
yield dm.NodeApply(
|
|
457
|
-
space=
|
|
458
|
-
external_id=
|
|
573
|
+
space=space,
|
|
574
|
+
external_id=external_id,
|
|
459
575
|
type=(projection.view_id.space, projection.view_id.external_id),
|
|
460
576
|
sources=sources,
|
|
461
577
|
)
|
|
462
|
-
yield from self._create_edges_without_properties(
|
|
578
|
+
yield from self._create_edges_without_properties(space, external_id, properties, projection, stop_on_exception)
|
|
463
579
|
|
|
464
580
|
def _create_edges_without_properties(
|
|
465
|
-
self,
|
|
581
|
+
self,
|
|
582
|
+
space: str,
|
|
583
|
+
identifier: str,
|
|
584
|
+
properties: dict[str | InstanceType, list[str] | list[URIRef]],
|
|
585
|
+
projection: _Projection,
|
|
586
|
+
stop_on_exception: Literal[True, False],
|
|
466
587
|
) -> Iterable[dm.EdgeApply | NeatIssue]:
|
|
467
588
|
for predicate, values in properties.items():
|
|
468
589
|
if predicate in projection.edge_by_type:
|
|
@@ -480,15 +601,22 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
480
601
|
yield error
|
|
481
602
|
continue
|
|
482
603
|
for target in values:
|
|
604
|
+
target_id = self._create_instance_id(target, "edge", stop_on_exception) # type: ignore[call-overload]
|
|
605
|
+
if not isinstance(target_id, InstanceId):
|
|
606
|
+
yield target_id
|
|
607
|
+
continue
|
|
608
|
+
if isinstance(target, URIRef):
|
|
609
|
+
target = remove_namespace_from_uri(target)
|
|
483
610
|
external_id = f"{identifier}.{prop_id}.{target}"
|
|
611
|
+
|
|
484
612
|
start_node, end_node = (
|
|
485
|
-
(
|
|
486
|
-
(
|
|
613
|
+
(space, identifier),
|
|
614
|
+
target_id.as_tuple(),
|
|
487
615
|
)
|
|
488
616
|
if edge.direction == "inwards":
|
|
489
617
|
start_node, end_node = end_node, start_node
|
|
490
618
|
yield dm.EdgeApply(
|
|
491
|
-
space=
|
|
619
|
+
space=space,
|
|
492
620
|
external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
|
|
493
621
|
type=edge.type,
|
|
494
622
|
start_node=start_node,
|
|
@@ -496,7 +624,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
496
624
|
)
|
|
497
625
|
|
|
498
626
|
@staticmethod
|
|
499
|
-
def _pop_start_end_node(
|
|
627
|
+
def _pop_start_end_node(
|
|
628
|
+
properties: dict[str | InstanceType, list[str] | list[URIRef]],
|
|
629
|
+
) -> tuple[URIRef, URIRef] | tuple[None, None]:
|
|
500
630
|
start_node = properties.pop("startNode", [None])[0]
|
|
501
631
|
if not start_node:
|
|
502
632
|
start_node = properties.pop("start_node", [None])[0]
|
|
@@ -504,9 +634,47 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
504
634
|
if not end_node:
|
|
505
635
|
end_node = properties.pop("end_node", [None])[0]
|
|
506
636
|
if start_node and end_node:
|
|
507
|
-
return start_node, end_node
|
|
637
|
+
return start_node, end_node # type: ignore[return-value]
|
|
508
638
|
return None, None
|
|
509
639
|
|
|
640
|
+
@overload
|
|
641
|
+
def _create_instance_id(
|
|
642
|
+
self, uri: URIRef, instance_type: str, stop_on_exception: Literal[False] = False
|
|
643
|
+
) -> InstanceId | NeatError: ...
|
|
644
|
+
|
|
645
|
+
@overload
|
|
646
|
+
def _create_instance_id(
|
|
647
|
+
self, uri: URIRef, instance_type: str, stop_on_exception: Literal[True] = True
|
|
648
|
+
) -> InstanceId: ...
|
|
649
|
+
|
|
650
|
+
def _create_instance_id(
|
|
651
|
+
self, uri: URIRef, instance_type: str, stop_on_exception: bool = False
|
|
652
|
+
) -> InstanceId | NeatError:
|
|
653
|
+
space: str | None = None
|
|
654
|
+
external_id: str | None = None
|
|
655
|
+
error: NeatError | None = None
|
|
656
|
+
if self._use_source_space:
|
|
657
|
+
namespace, external_id = split_uri(uri)
|
|
658
|
+
space = namespace_as_space(namespace)
|
|
659
|
+
if space is None:
|
|
660
|
+
error = ResourceCreationError(uri, instance_type, f"Could not find space for {uri!s}.")
|
|
661
|
+
else:
|
|
662
|
+
space = self._space_by_instance_uri[uri]
|
|
663
|
+
if uri in self._external_id_by_uri:
|
|
664
|
+
external_id = self._external_id_by_uri[uri]
|
|
665
|
+
else:
|
|
666
|
+
external_id = remove_namespace_from_uri(uri)
|
|
667
|
+
|
|
668
|
+
if external_id and self._unquote_external_ids:
|
|
669
|
+
external_id = urllib.parse.unquote(external_id)
|
|
670
|
+
if space and external_id:
|
|
671
|
+
return InstanceId(space, external_id)
|
|
672
|
+
if error is None:
|
|
673
|
+
raise ValueError(f"Bug in neat. Failed to create instance ID and determine error for {uri!r}")
|
|
674
|
+
if stop_on_exception:
|
|
675
|
+
raise error
|
|
676
|
+
return error
|
|
677
|
+
|
|
510
678
|
def _get_required_capabilities(self) -> list[Capability]:
|
|
511
679
|
return [
|
|
512
680
|
DataModelInstancesAcl(
|
|
@@ -525,24 +693,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
525
693
|
items: list[dm.InstanceApply],
|
|
526
694
|
dry_run: bool,
|
|
527
695
|
read_issues: IssueList,
|
|
696
|
+
class_name: str | None = None,
|
|
528
697
|
) -> Iterable[UploadResult]:
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
source_by_edge_id: dict[dm.EdgeId, str] = {}
|
|
533
|
-
for item in items:
|
|
534
|
-
if isinstance(item, dm.NodeApply):
|
|
535
|
-
nodes.append(item)
|
|
536
|
-
if item.sources:
|
|
537
|
-
source_by_node_id[item.as_id()] = item.sources[0].source.external_id
|
|
538
|
-
else:
|
|
539
|
-
source_by_node_id[item.as_id()] = "node"
|
|
540
|
-
elif isinstance(item, dm.EdgeApply):
|
|
541
|
-
edges.append(item)
|
|
542
|
-
if item.sources:
|
|
543
|
-
source_by_edge_id[item.as_id()] = item.sources[0].source.external_id
|
|
544
|
-
else:
|
|
545
|
-
source_by_edge_id[item.as_id()] = "edge"
|
|
698
|
+
name = class_name or "Instances"
|
|
699
|
+
nodes = [item for item in items if isinstance(item, dm.NodeApply)]
|
|
700
|
+
edges = [item for item in items if isinstance(item, dm.EdgeApply)]
|
|
546
701
|
try:
|
|
547
702
|
upserted = client.data_modeling.instances.apply(
|
|
548
703
|
nodes,
|
|
@@ -552,29 +707,28 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
552
707
|
skip_on_version_conflict=True,
|
|
553
708
|
)
|
|
554
709
|
except CogniteAPIError as e:
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
710
|
+
if len(items) == 1:
|
|
711
|
+
yield UploadResult(
|
|
712
|
+
name=name,
|
|
713
|
+
issues=read_issues,
|
|
714
|
+
failed_items=items,
|
|
715
|
+
error_messages=[str(e)],
|
|
716
|
+
failed_upserted={item.as_id() for item in items}, # type: ignore[attr-defined]
|
|
717
|
+
)
|
|
718
|
+
else:
|
|
719
|
+
half = len(items) // 2
|
|
720
|
+
yield from self._upload_to_cdf(client, items[:half], dry_run, read_issues, class_name)
|
|
721
|
+
yield from self._upload_to_cdf(client, items[half:], dry_run, read_issues, class_name)
|
|
560
722
|
else:
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
for instance in subinstances: # type: ignore[attr-defined]
|
|
571
|
-
if instance.was_modified and instance.created_time == instance.last_updated_time:
|
|
572
|
-
result.created.add(instance.as_id())
|
|
573
|
-
elif instance.was_modified:
|
|
574
|
-
result.changed.add(instance.as_id())
|
|
575
|
-
else:
|
|
576
|
-
result.unchanged.add(instance.as_id())
|
|
577
|
-
yield result
|
|
723
|
+
result = UploadResult(name=name, issues=read_issues) # type: ignore[var-annotated]
|
|
724
|
+
for instance in itertools.chain(upserted.nodes, upserted.edges): # type: ignore[attr-defined]
|
|
725
|
+
if instance.was_modified and instance.created_time == instance.last_updated_time:
|
|
726
|
+
result.created.add(instance.as_id())
|
|
727
|
+
elif instance.was_modified:
|
|
728
|
+
result.changed.add(instance.as_id())
|
|
729
|
+
else:
|
|
730
|
+
result.unchanged.add(instance.as_id())
|
|
731
|
+
yield result
|
|
578
732
|
|
|
579
733
|
|
|
580
734
|
def _get_field_value_types(cls, info):
|