cognite-neat 0.110.0__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (53) hide show
  1. cognite/neat/_alpha.py +6 -0
  2. cognite/neat/_client/_api/schema.py +26 -0
  3. cognite/neat/_client/data_classes/schema.py +1 -1
  4. cognite/neat/_constants.py +4 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_raw.py +67 -0
  14. cognite/neat/_graph/loaders/_base.py +20 -4
  15. cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
  16. cognite/neat/_graph/queries/_base.py +137 -43
  17. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  18. cognite/neat/_issues/_factory.py +9 -1
  19. cognite/neat/_issues/errors/__init__.py +2 -0
  20. cognite/neat/_issues/errors/_external.py +7 -0
  21. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  22. cognite/neat/_rules/_constants.py +3 -0
  23. cognite/neat/_rules/analysis/_base.py +29 -50
  24. cognite/neat/_rules/exporters/_rules2excel.py +1 -1
  25. cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
  26. cognite/neat/_rules/models/_base_rules.py +0 -2
  27. cognite/neat/_rules/models/data_types.py +7 -0
  28. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  29. cognite/neat/_rules/models/dms/_rules.py +26 -1
  30. cognite/neat/_rules/models/dms/_rules_input.py +5 -1
  31. cognite/neat/_rules/models/dms/_validation.py +101 -1
  32. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  33. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  34. cognite/neat/_rules/models/information/_rules_input.py +1 -0
  35. cognite/neat/_rules/models/information/_validation.py +64 -17
  36. cognite/neat/_rules/transformers/_converters.py +7 -2
  37. cognite/neat/_session/_base.py +2 -0
  38. cognite/neat/_session/_explore.py +39 -0
  39. cognite/neat/_session/_inspect.py +25 -6
  40. cognite/neat/_session/_read.py +67 -3
  41. cognite/neat/_session/_set.py +7 -1
  42. cognite/neat/_session/_state.py +6 -0
  43. cognite/neat/_session/_to.py +115 -8
  44. cognite/neat/_store/_graph_store.py +8 -4
  45. cognite/neat/_utils/rdf_.py +34 -3
  46. cognite/neat/_utils/text.py +72 -4
  47. cognite/neat/_utils/upload.py +2 -0
  48. cognite/neat/_version.py +2 -2
  49. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
  50. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +53 -50
  51. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
  52. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +0 -0
  53. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ from collections import defaultdict
6
6
  from collections.abc import Iterable, Sequence
7
7
  from dataclasses import dataclass
8
8
  from pathlib import Path
9
- from typing import Any, Literal, cast, get_args
9
+ from typing import Any, Literal, cast, get_args, overload
10
10
 
11
11
  import yaml
12
12
  from cognite.client import CogniteClient
@@ -17,14 +17,20 @@ from cognite.client.data_classes.data_modeling.ids import InstanceId
17
17
  from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
18
18
  from cognite.client.exceptions import CogniteAPIError
19
19
  from pydantic import BaseModel, ValidationInfo, create_model, field_validator
20
- from rdflib import RDF
20
+ from rdflib import RDF, URIRef
21
21
 
22
22
  from cognite.neat._client import NeatClient
23
23
  from cognite.neat._client._api_client import SchemaAPI
24
24
  from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
25
- from cognite.neat._issues import IssueList, NeatIssue, catch_issues
26
- from cognite.neat._issues.errors import ResourceCreationError, ResourceDuplicatedError, ResourceNotFoundError
25
+ from cognite.neat._issues import IssueList, NeatError, NeatIssue, catch_issues
26
+ from cognite.neat._issues.errors import (
27
+ AuthorizationError,
28
+ ResourceCreationError,
29
+ ResourceDuplicatedError,
30
+ ResourceNotFoundError,
31
+ )
27
32
  from cognite.neat._issues.warnings import (
33
+ NeatValueWarning,
28
34
  PropertyDirectRelationLimitWarning,
29
35
  PropertyMultipleValueWarning,
30
36
  PropertyTypeNotSupportedWarning,
@@ -39,11 +45,11 @@ from cognite.neat._shared import InstanceType
39
45
  from cognite.neat._store import NeatGraphStore
40
46
  from cognite.neat._utils.auxiliary import create_sha256_hash
41
47
  from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
42
- from cognite.neat._utils.rdf_ import remove_namespace_from_uri
43
- from cognite.neat._utils.text import humanize_collection
48
+ from cognite.neat._utils.rdf_ import namespace_as_space, remove_namespace_from_uri, split_uri
49
+ from cognite.neat._utils.text import NamingStandardization, humanize_collection
44
50
  from cognite.neat._utils.upload import UploadResult
45
51
 
46
- from ._base import _END_OF_CLASS, CDFLoader
52
+ from ._base import _END_OF_CLASS, _START_OF_CLASS, CDFLoader
47
53
 
48
54
 
49
55
  @dataclass
@@ -89,6 +95,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
89
95
  client (NeatClient | None): This is used to lookup containers such that the loader
90
96
  creates instances in accordance with required constraints. Defaults to None.
91
97
  unquote_external_ids (bool): If True, the loader will unquote external ids before creating the instances.
98
+ neat_prefix_by_predicate_uri (dict[URIRef, str] | None): A dictionary that maps a predicate URIRef to a
99
+ prefix that Neat added to the object upon extraction. This is used to remove the prefix from the
100
+ object before creating the instance.
92
101
  """
93
102
 
94
103
  def __init__(
@@ -98,16 +107,23 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
98
107
  graph_store: NeatGraphStore,
99
108
  instance_space: str,
100
109
  space_property: str | None = None,
110
+ use_source_space: bool = False,
101
111
  client: NeatClient | None = None,
102
112
  create_issues: Sequence[NeatIssue] | None = None,
103
113
  unquote_external_ids: bool = False,
114
+ neat_prefix_by_predicate_uri: dict[URIRef, str] | None = None,
115
+ neat_prefix_by_type_uri: dict[URIRef, str] | None = None,
104
116
  ):
105
117
  super().__init__(graph_store)
106
118
  self.dms_rules = dms_rules
107
119
  self.info_rules = info_rules
120
+ self.neat_prefix_by_predicate_uri = neat_prefix_by_predicate_uri or {}
121
+ self.neat_prefix_by_type_uri = neat_prefix_by_type_uri or {}
108
122
  self._instance_space = instance_space
109
123
  self._space_property = space_property
110
- self._space_by_uri: dict[str, str] = defaultdict(lambda: instance_space)
124
+ self._use_source_space = use_source_space
125
+ self._space_by_instance_uri: dict[URIRef, str] = defaultdict(lambda: instance_space)
126
+ self._external_id_by_uri: dict[URIRef, str] = {}
111
127
  self._issues = IssueList(create_issues or [])
112
128
  self._client = client
113
129
  self._unquote_external_ids = unquote_external_ids
@@ -132,7 +148,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
132
148
  else:
133
149
  yaml.safe_dump(dumped, f, sort_keys=False)
134
150
 
135
- def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
151
+ def _load(
152
+ self, stop_on_exception: bool = False
153
+ ) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS] | _START_OF_CLASS]:
136
154
  if self._issues.has_errors and stop_on_exception:
137
155
  raise self._issues.as_exception()
138
156
  elif self._issues.has_errors:
@@ -143,6 +161,16 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
143
161
  if self._space_property:
144
162
  yield from self._lookup_space_by_uri(view_iterations, stop_on_exception)
145
163
 
164
+ if self._client:
165
+ space_creation = self._create_instance_space_if_not_exists()
166
+ yield from space_creation.warnings
167
+ if space_creation.has_errors and stop_on_exception:
168
+ raise space_creation.as_exception()
169
+ yield from space_creation.errors
170
+
171
+ if self.neat_prefix_by_type_uri:
172
+ self._lookup_identifier_by_uri()
173
+
146
174
  for it in view_iterations:
147
175
  view = it.view
148
176
  if view is None:
@@ -151,10 +179,15 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
151
179
  projection, issues = self._create_projection(view)
152
180
  yield from issues
153
181
  query = it.query
154
- reader = self.graph_store.read(query.rdf_type, property_renaming_config=query.property_renaming_config)
182
+ reader = self.graph_store.read(
183
+ query.rdf_type,
184
+ property_renaming_config=query.property_renaming_config,
185
+ remove_uri_namespace=False,
186
+ )
155
187
  instance_iterable = iterate_progress_bar_if_above_config_threshold(
156
188
  reader, it.instance_count, f"Loading {it.view_id!r}"
157
189
  )
190
+ yield _START_OF_CLASS(view.external_id)
158
191
  for identifier, properties in instance_iterable:
159
192
  yield from self._create_instances(
160
193
  identifier, properties, projection, stop_on_exception, exclude=it.hierarchical_properties
@@ -162,7 +195,10 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
162
195
  if it.hierarchical_properties:
163
196
  # Force the creation of instances, before we create the hierarchical properties.
164
197
  yield _END_OF_CLASS
198
+ yield _START_OF_CLASS(f"{view.external_id} ({humanize_collection(it.hierarchical_properties)})")
165
199
  yield from self._create_hierarchical_properties(it, projection, stop_on_exception)
200
+ if reader is not instance_iterable:
201
+ print(f"Loaded {it.instance_count} instances for {it.view_id!r}")
166
202
 
167
203
  yield _END_OF_CLASS
168
204
 
@@ -188,6 +224,12 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
188
224
  views = self._client.data_modeling.views.retrieve(
189
225
  list(iterations_by_view_id.keys()), include_inherited_properties=True
190
226
  )
227
+ if missing := set(iterations_by_view_id) - {view.as_id() for view in views}:
228
+ for missing_view in missing:
229
+ issues.append(ResourceNotFoundError(missing_view, "view", more="The view is not found in CDF."))
230
+ return [], issues
231
+ # Todo: Remove if this turns out to be unnecessary.
232
+ hierarchical_properties_by_view_id: dict[dm.ViewId, set[str]] = {}
191
233
  else:
192
234
  views = dm.ViewList([])
193
235
  with catch_issues() as issues:
@@ -195,15 +237,17 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
195
237
  views.extend(read_model.views)
196
238
  if issues.has_errors:
197
239
  return [], issues
240
+ hierarchical_properties_by_view_id = {}
198
241
  views_by_id = {view.as_id(): view for view in views}
199
- hierarchical_properties_by_view_id = SchemaAPI.get_hierarchical_properties(views)
200
242
 
201
243
  def sort_by_instance_type(id_: dm.ViewId) -> int:
202
244
  if id_ not in views_by_id:
203
245
  return 0
204
- return {"node": 1, "all": 2, "edge": 3}.get(views_by_id[id_].used_for, 0)
246
+ return {"node": 1, "all": 1, "edge": 3}.get(views_by_id[id_].used_for, 0)
205
247
 
206
- ordered_view_ids = sorted(iterations_by_view_id.keys(), key=sort_by_instance_type)
248
+ ordered_view_ids = SchemaAPI.get_view_order_by_direct_relation_constraints(views)
249
+ # Sort is stable in Python, so we will keep the order of the views:
250
+ ordered_view_ids.sort(key=sort_by_instance_type)
207
251
  view_iterations: list[_ViewIterator] = []
208
252
  for view_id in ordered_view_ids:
209
253
  if view_id not in iterations_by_view_id:
@@ -245,8 +289,59 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
245
289
  instance_iterable = iterate_progress_bar_if_above_config_threshold(
246
290
  instance_iterable, total, f"Looking up spaces for {total} instances..."
247
291
  )
292
+ neat_prefix = self.neat_prefix_by_predicate_uri.get(space_property_uri)
293
+ warned_spaces: set[str] = set()
248
294
  for instance, space in instance_iterable:
249
- self._space_by_uri[remove_namespace_from_uri(instance)] = space
295
+ if neat_prefix:
296
+ space = space.removeprefix(neat_prefix)
297
+
298
+ clean_space = NamingStandardization.standardize_space_str(space)
299
+ if clean_space != space and space not in warned_spaces:
300
+ issues.append(
301
+ NeatValueWarning(
302
+ f"Invalid space in property {self._space_property}: {space}. Fixed to {clean_space}"
303
+ )
304
+ )
305
+ warned_spaces.add(space)
306
+
307
+ self._space_by_instance_uri[instance] = clean_space
308
+ return issues
309
+
310
+ def _lookup_identifier_by_uri(self) -> None:
311
+ if not self.neat_prefix_by_type_uri:
312
+ return
313
+
314
+ count = sum(count for _, count in self.graph_store.queries.summarize_instances())
315
+ instance_iterable = self.graph_store.queries.list_instances_ids()
316
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
317
+ instance_iterable, count, f"Looking up identifiers for {count} instances..."
318
+ )
319
+ count_by_identifier: dict[str, list[URIRef]] = defaultdict(list)
320
+ for instance_uri, type in instance_iterable:
321
+ if type not in self.neat_prefix_by_type_uri:
322
+ continue
323
+ prefix = self.neat_prefix_by_type_uri[type]
324
+ identifier = remove_namespace_from_uri(instance_uri)
325
+ if self._unquote_external_ids:
326
+ identifier = urllib.parse.unquote(identifier)
327
+ count_by_identifier[identifier.removeprefix(prefix)].append(instance_uri)
328
+
329
+ for identifier, uris in count_by_identifier.items():
330
+ if len(uris) == 1:
331
+ self._external_id_by_uri[uris[0]] = identifier
332
+
333
+ def _create_instance_space_if_not_exists(self) -> IssueList:
334
+ issues = IssueList()
335
+ if not self._client:
336
+ return issues
337
+
338
+ instance_spaces = set(self._space_by_instance_uri.values()) - {self._instance_space}
339
+ existing_spaces = {space.space for space in self._client.data_modeling.spaces.retrieve(list(instance_spaces))}
340
+ if missing_spaces := (instance_spaces - existing_spaces):
341
+ try:
342
+ self._client.data_modeling.spaces.apply([dm.SpaceApply(space=space) for space in missing_spaces])
343
+ except CogniteAPIError as e:
344
+ issues.append(AuthorizationError(f"Creating {len(missing_spaces)} instance spaces.", str(e)))
250
345
  return issues
251
346
 
252
347
  def _create_projection(self, view: dm.View) -> tuple[_Projection, IssueList]:
@@ -346,8 +441,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
346
441
  def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
347
442
  # We validate above that we only get one value for single direct relations.
348
443
  if list.__name__ in _get_field_value_types(cls, info):
349
- external_ids = (remove_namespace_from_uri(v) for v in value)
350
- result = [{"space": self._space_by_uri[e], "externalId": e} for e in external_ids]
444
+ ids = (self._create_instance_id(v, "node", stop_on_exception=True) for v in value)
445
+ result = [id_.dump(camel_case=True, include_instance_type=False) for id_ in ids]
351
446
  # Todo: Account for max_list_limit
352
447
  if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
353
448
  return result
@@ -364,8 +459,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
364
459
  result.sort(key=lambda x: (x["space"], x["externalId"]))
365
460
  return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
366
461
  elif value:
367
- external_id = remove_namespace_from_uri(value[0])
368
- return {"space": self._space_by_uri[external_id], "externalId": external_id}
462
+ return self._create_instance_id(value[0], "node", stop_on_exception=True).dump(
463
+ camel_case=True, include_instance_type=False
464
+ )
369
465
  return {}
370
466
 
371
467
  validators["parse_direct_relation"] = field_validator(*direct_relation_by_property.keys(), mode="before")( # type: ignore[assignment]
@@ -376,35 +472,50 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
376
472
 
377
473
  def parse_direct_relation_to_unit(cls, value: Any, info: ValidationInfo) -> dict | list[dict]:
378
474
  if value:
379
- return {"space": "cdf_cdm_units", "externalId": remove_namespace_from_uri(value[0])}
475
+ external_id = remove_namespace_from_uri(value[0])
476
+ if self._unquote_external_ids:
477
+ external_id = urllib.parse.unquote(external_id)
478
+ return {"space": "cdf_cdm_units", "externalId": external_id}
380
479
  return {}
381
480
 
382
481
  validators["parse_direct_relation_to_unit"] = field_validator(*unit_properties, mode="before")( # type: ignore[assignment]
383
482
  parse_direct_relation_to_unit # type: ignore[arg-type]
384
483
  )
385
484
 
485
+ if text_fields:
486
+
487
+ def parse_text(cls, value: Any, info: ValidationInfo) -> str | list[str]:
488
+ if isinstance(value, list):
489
+ return [remove_namespace_from_uri(v) if isinstance(v, URIRef) else str(v) for v in value]
490
+ return remove_namespace_from_uri(value) if isinstance(value, URIRef) else str(value)
491
+
492
+ validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text) # type: ignore[assignment, arg-type]
493
+
386
494
  pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
387
495
  return _Projection(view.as_id(), view.used_for, pydantic_cls, edge_by_type, edge_by_prop_id), issues
388
496
 
389
497
  def _create_instances(
390
498
  self,
391
- identifier: str,
392
- properties: dict[str | InstanceType, list[str]],
499
+ instance_uri: URIRef,
500
+ properties: dict[str | InstanceType, list[Any]],
393
501
  projection: _Projection,
394
- stop_on_exception: bool = False,
502
+ stop_on_exception: Literal[True, False] = False,
395
503
  exclude: set[str] | None = None,
396
504
  include: set[str] | None = None,
397
505
  ) -> Iterable[dm.InstanceApply | NeatIssue]:
398
- if self._unquote_external_ids:
399
- identifier = urllib.parse.unquote(identifier)
506
+ instance_id = self._create_instance_id(instance_uri, "node", stop_on_exception)
507
+ if not isinstance(instance_id, InstanceId):
508
+ yield instance_id
509
+ return
510
+ space, external_id = instance_id.space, instance_id.external_id
400
511
  start_node, end_node = self._pop_start_end_node(properties)
401
512
  is_edge = start_node and end_node
402
513
  instance_type = "edge" if is_edge else "node"
403
514
  if (projection.used_for == "node" and is_edge) or (projection.used_for == "edge" and not is_edge):
404
515
  creation_error = ResourceCreationError(
405
- identifier,
516
+ external_id,
406
517
  instance_type,
407
- f"View used for {projection.used_for} instance {identifier!s} but is {instance_type}",
518
+ f"View used for {projection.used_for} instance {external_id!s} but is {instance_type}",
408
519
  )
409
520
  if stop_on_exception:
410
521
  raise creation_error from None
@@ -412,22 +523,18 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
412
523
  return
413
524
 
414
525
  if RDF.type not in properties:
415
- error = ResourceCreationError(identifier, instance_type, "No rdf:type found")
526
+ error = ResourceCreationError(external_id, instance_type, "No rdf:type found")
416
527
  if stop_on_exception:
417
528
  raise error from None
418
529
  yield error
419
530
  return
420
531
  _ = properties.pop(RDF.type)[0]
421
- if start_node and self._unquote_external_ids:
422
- start_node = urllib.parse.unquote(start_node)
423
- if end_node and self._unquote_external_ids:
424
- end_node = urllib.parse.unquote(end_node)
425
-
426
532
  if exclude:
427
533
  properties = {k: v for k, v in properties.items() if k not in exclude}
428
534
  if include:
429
535
  properties = {k: v for k, v in properties.items() if k in include}
430
536
 
537
+ sources = []
431
538
  with catch_issues() as property_issues:
432
539
  sources = [
433
540
  dm.NodeOrEdgeData(
@@ -437,32 +544,46 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
437
544
  ]
438
545
  for issue in property_issues:
439
546
  if isinstance(issue, ResourceNeatWarning):
440
- issue.identifier = identifier
547
+ issue.identifier = external_id
441
548
 
442
549
  if property_issues.has_errors and stop_on_exception:
443
550
  raise property_issues.as_exception()
444
551
  yield from property_issues
552
+ if not sources:
553
+ return
445
554
 
446
555
  if start_node and end_node:
447
- yield dm.EdgeApply(
448
- space=self._space_by_uri[identifier],
449
- external_id=identifier,
450
- type=(projection.view_id.space, projection.view_id.external_id),
451
- start_node=(self._space_by_uri[start_node], start_node),
452
- end_node=(self._space_by_uri[end_node], end_node),
453
- sources=sources,
454
- )
556
+ start = self._create_instance_id(start_node, "edge", stop_on_exception)
557
+ end = self._create_instance_id(end_node, "edge", stop_on_exception)
558
+ if isinstance(start, NeatError):
559
+ yield start
560
+ if isinstance(end, NeatError):
561
+ yield end
562
+ if isinstance(start, InstanceId) and isinstance(end, InstanceId):
563
+ yield dm.EdgeApply(
564
+ space=space,
565
+ external_id=external_id,
566
+ type=(projection.view_id.space, projection.view_id.external_id),
567
+ start_node=start.as_tuple(),
568
+ end_node=end.as_tuple(),
569
+ sources=sources,
570
+ )
455
571
  else:
456
572
  yield dm.NodeApply(
457
- space=self._space_by_uri[identifier],
458
- external_id=identifier,
573
+ space=space,
574
+ external_id=external_id,
459
575
  type=(projection.view_id.space, projection.view_id.external_id),
460
576
  sources=sources,
461
577
  )
462
- yield from self._create_edges_without_properties(identifier, properties, projection)
578
+ yield from self._create_edges_without_properties(space, external_id, properties, projection, stop_on_exception)
463
579
 
464
580
  def _create_edges_without_properties(
465
- self, identifier: str, properties: dict[str | InstanceType, list[str]], projection: _Projection
581
+ self,
582
+ space: str,
583
+ identifier: str,
584
+ properties: dict[str | InstanceType, list[str] | list[URIRef]],
585
+ projection: _Projection,
586
+ stop_on_exception: Literal[True, False],
466
587
  ) -> Iterable[dm.EdgeApply | NeatIssue]:
467
588
  for predicate, values in properties.items():
468
589
  if predicate in projection.edge_by_type:
@@ -480,15 +601,22 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
480
601
  yield error
481
602
  continue
482
603
  for target in values:
604
+ target_id = self._create_instance_id(target, "edge", stop_on_exception) # type: ignore[call-overload]
605
+ if not isinstance(target_id, InstanceId):
606
+ yield target_id
607
+ continue
608
+ if isinstance(target, URIRef):
609
+ target = remove_namespace_from_uri(target)
483
610
  external_id = f"{identifier}.{prop_id}.{target}"
611
+
484
612
  start_node, end_node = (
485
- (self._space_by_uri[identifier], identifier),
486
- (self._space_by_uri[target], target),
613
+ (space, identifier),
614
+ target_id.as_tuple(),
487
615
  )
488
616
  if edge.direction == "inwards":
489
617
  start_node, end_node = end_node, start_node
490
618
  yield dm.EdgeApply(
491
- space=self._space_by_uri[identifier],
619
+ space=space,
492
620
  external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
493
621
  type=edge.type,
494
622
  start_node=start_node,
@@ -496,7 +624,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
496
624
  )
497
625
 
498
626
  @staticmethod
499
- def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str, str] | tuple[None, None]:
627
+ def _pop_start_end_node(
628
+ properties: dict[str | InstanceType, list[str] | list[URIRef]],
629
+ ) -> tuple[URIRef, URIRef] | tuple[None, None]:
500
630
  start_node = properties.pop("startNode", [None])[0]
501
631
  if not start_node:
502
632
  start_node = properties.pop("start_node", [None])[0]
@@ -504,9 +634,47 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
504
634
  if not end_node:
505
635
  end_node = properties.pop("end_node", [None])[0]
506
636
  if start_node and end_node:
507
- return start_node, end_node
637
+ return start_node, end_node # type: ignore[return-value]
508
638
  return None, None
509
639
 
640
+ @overload
641
+ def _create_instance_id(
642
+ self, uri: URIRef, instance_type: str, stop_on_exception: Literal[False] = False
643
+ ) -> InstanceId | NeatError: ...
644
+
645
+ @overload
646
+ def _create_instance_id(
647
+ self, uri: URIRef, instance_type: str, stop_on_exception: Literal[True] = True
648
+ ) -> InstanceId: ...
649
+
650
+ def _create_instance_id(
651
+ self, uri: URIRef, instance_type: str, stop_on_exception: bool = False
652
+ ) -> InstanceId | NeatError:
653
+ space: str | None = None
654
+ external_id: str | None = None
655
+ error: NeatError | None = None
656
+ if self._use_source_space:
657
+ namespace, external_id = split_uri(uri)
658
+ space = namespace_as_space(namespace)
659
+ if space is None:
660
+ error = ResourceCreationError(uri, instance_type, f"Could not find space for {uri!s}.")
661
+ else:
662
+ space = self._space_by_instance_uri[uri]
663
+ if uri in self._external_id_by_uri:
664
+ external_id = self._external_id_by_uri[uri]
665
+ else:
666
+ external_id = remove_namespace_from_uri(uri)
667
+
668
+ if external_id and self._unquote_external_ids:
669
+ external_id = urllib.parse.unquote(external_id)
670
+ if space and external_id:
671
+ return InstanceId(space, external_id)
672
+ if error is None:
673
+ raise ValueError(f"Bug in neat. Failed to create instance ID and determine error for {uri!r}")
674
+ if stop_on_exception:
675
+ raise error
676
+ return error
677
+
510
678
  def _get_required_capabilities(self) -> list[Capability]:
511
679
  return [
512
680
  DataModelInstancesAcl(
@@ -525,24 +693,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
525
693
  items: list[dm.InstanceApply],
526
694
  dry_run: bool,
527
695
  read_issues: IssueList,
696
+ class_name: str | None = None,
528
697
  ) -> Iterable[UploadResult]:
529
- nodes: list[dm.NodeApply] = []
530
- edges: list[dm.EdgeApply] = []
531
- source_by_node_id: dict[dm.NodeId, str] = {}
532
- source_by_edge_id: dict[dm.EdgeId, str] = {}
533
- for item in items:
534
- if isinstance(item, dm.NodeApply):
535
- nodes.append(item)
536
- if item.sources:
537
- source_by_node_id[item.as_id()] = item.sources[0].source.external_id
538
- else:
539
- source_by_node_id[item.as_id()] = "node"
540
- elif isinstance(item, dm.EdgeApply):
541
- edges.append(item)
542
- if item.sources:
543
- source_by_edge_id[item.as_id()] = item.sources[0].source.external_id
544
- else:
545
- source_by_edge_id[item.as_id()] = "edge"
698
+ name = class_name or "Instances"
699
+ nodes = [item for item in items if isinstance(item, dm.NodeApply)]
700
+ edges = [item for item in items if isinstance(item, dm.EdgeApply)]
546
701
  try:
547
702
  upserted = client.data_modeling.instances.apply(
548
703
  nodes,
@@ -552,29 +707,28 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
552
707
  skip_on_version_conflict=True,
553
708
  )
554
709
  except CogniteAPIError as e:
555
- result = UploadResult[InstanceId](name="Instances", issues=read_issues)
556
- result.error_messages.append(str(e))
557
- result.failed_upserted.update(item.as_id() for item in e.failed + e.unknown)
558
- result.created.update(item.as_id() for item in e.successful)
559
- yield result
710
+ if len(items) == 1:
711
+ yield UploadResult(
712
+ name=name,
713
+ issues=read_issues,
714
+ failed_items=items,
715
+ error_messages=[str(e)],
716
+ failed_upserted={item.as_id() for item in items}, # type: ignore[attr-defined]
717
+ )
718
+ else:
719
+ half = len(items) // 2
720
+ yield from self._upload_to_cdf(client, items[:half], dry_run, read_issues, class_name)
721
+ yield from self._upload_to_cdf(client, items[half:], dry_run, read_issues, class_name)
560
722
  else:
561
- for instances, ids_by_source in [
562
- (upserted.nodes, source_by_node_id),
563
- (upserted.edges, source_by_edge_id),
564
- ]:
565
- for name, subinstances in itertools.groupby(
566
- sorted(instances, key=lambda i: ids_by_source.get(i.as_id(), "")), # type: ignore[call-overload, index, attr-defined]
567
- key=lambda i: ids_by_source.get(i.as_id(), ""), # type: ignore[index, attr-defined]
568
- ):
569
- result = UploadResult(name=name, issues=read_issues)
570
- for instance in subinstances: # type: ignore[attr-defined]
571
- if instance.was_modified and instance.created_time == instance.last_updated_time:
572
- result.created.add(instance.as_id())
573
- elif instance.was_modified:
574
- result.changed.add(instance.as_id())
575
- else:
576
- result.unchanged.add(instance.as_id())
577
- yield result
723
+ result = UploadResult(name=name, issues=read_issues) # type: ignore[var-annotated]
724
+ for instance in itertools.chain(upserted.nodes, upserted.edges): # type: ignore[attr-defined]
725
+ if instance.was_modified and instance.created_time == instance.last_updated_time:
726
+ result.created.add(instance.as_id())
727
+ elif instance.was_modified:
728
+ result.changed.add(instance.as_id())
729
+ else:
730
+ result.unchanged.add(instance.as_id())
731
+ yield result
578
732
 
579
733
 
580
734
  def _get_field_value_types(cls, info):