cognite-neat 0.109.4__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (88) hide show
  1. cognite/neat/_alpha.py +8 -0
  2. cognite/neat/_client/_api/schema.py +43 -1
  3. cognite/neat/_client/data_classes/schema.py +4 -4
  4. cognite/neat/_constants.py +15 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +48 -19
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_iodd.py +3 -3
  14. cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
  15. cognite/neat/_graph/extractors/_raw.py +67 -0
  16. cognite/neat/_graph/loaders/_base.py +20 -4
  17. cognite/neat/_graph/loaders/_rdf2dms.py +476 -383
  18. cognite/neat/_graph/queries/_base.py +163 -133
  19. cognite/neat/_graph/transformers/__init__.py +1 -3
  20. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  21. cognite/neat/_graph/transformers/_rdfpath.py +2 -49
  22. cognite/neat/_issues/__init__.py +1 -6
  23. cognite/neat/_issues/_base.py +21 -252
  24. cognite/neat/_issues/_contextmanagers.py +46 -0
  25. cognite/neat/_issues/_factory.py +69 -0
  26. cognite/neat/_issues/errors/__init__.py +20 -4
  27. cognite/neat/_issues/errors/_external.py +7 -0
  28. cognite/neat/_issues/errors/_wrapper.py +81 -3
  29. cognite/neat/_issues/formatters.py +4 -4
  30. cognite/neat/_issues/warnings/__init__.py +3 -2
  31. cognite/neat/_issues/warnings/_properties.py +8 -0
  32. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  33. cognite/neat/_rules/_constants.py +12 -0
  34. cognite/neat/_rules/_shared.py +3 -2
  35. cognite/neat/_rules/analysis/__init__.py +2 -3
  36. cognite/neat/_rules/analysis/_base.py +430 -259
  37. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  38. cognite/neat/_rules/exporters/_rules2excel.py +3 -9
  39. cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
  40. cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
  41. cognite/neat/_rules/importers/_base.py +2 -47
  42. cognite/neat/_rules/importers/_dms2rules.py +7 -10
  43. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
  44. cognite/neat/_rules/importers/_rdf/_inference2rules.py +66 -26
  45. cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
  46. cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
  47. cognite/neat/_rules/models/_base_rules.py +0 -2
  48. cognite/neat/_rules/models/data_types.py +7 -0
  49. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  50. cognite/neat/_rules/models/dms/_rules.py +29 -2
  51. cognite/neat/_rules/models/dms/_rules_input.py +9 -1
  52. cognite/neat/_rules/models/dms/_validation.py +115 -5
  53. cognite/neat/_rules/models/entities/_loaders.py +1 -1
  54. cognite/neat/_rules/models/entities/_multi_value.py +2 -2
  55. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  56. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  57. cognite/neat/_rules/models/information/_rules.py +18 -17
  58. cognite/neat/_rules/models/information/_rules_input.py +3 -1
  59. cognite/neat/_rules/models/information/_validation.py +66 -17
  60. cognite/neat/_rules/transformers/__init__.py +8 -2
  61. cognite/neat/_rules/transformers/_converters.py +234 -44
  62. cognite/neat/_rules/transformers/_verification.py +5 -10
  63. cognite/neat/_session/_base.py +6 -4
  64. cognite/neat/_session/_explore.py +39 -0
  65. cognite/neat/_session/_inspect.py +25 -6
  66. cognite/neat/_session/_prepare.py +12 -0
  67. cognite/neat/_session/_read.py +88 -20
  68. cognite/neat/_session/_set.py +7 -1
  69. cognite/neat/_session/_show.py +11 -123
  70. cognite/neat/_session/_state.py +6 -2
  71. cognite/neat/_session/_subset.py +64 -0
  72. cognite/neat/_session/_to.py +177 -19
  73. cognite/neat/_store/_graph_store.py +9 -246
  74. cognite/neat/_utils/rdf_.py +36 -5
  75. cognite/neat/_utils/spreadsheet.py +44 -1
  76. cognite/neat/_utils/text.py +124 -37
  77. cognite/neat/_utils/upload.py +2 -0
  78. cognite/neat/_version.py +2 -2
  79. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
  80. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +83 -82
  81. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +1 -1
  82. cognite/neat/_graph/queries/_construct.py +0 -187
  83. cognite/neat/_graph/queries/_shared.py +0 -173
  84. cognite/neat/_rules/analysis/_dms.py +0 -57
  85. cognite/neat/_rules/analysis/_information.py +0 -249
  86. cognite/neat/_rules/models/_rdfpath.py +0 -372
  87. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
  88. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
@@ -4,15 +4,14 @@ import urllib.parse
4
4
  import warnings
5
5
  from collections import defaultdict
6
6
  from collections.abc import Iterable, Sequence
7
- from graphlib import TopologicalSorter
7
+ from dataclasses import dataclass
8
8
  from pathlib import Path
9
- from typing import Any, cast, get_args
9
+ from typing import Any, Literal, cast, get_args, overload
10
10
 
11
11
  import yaml
12
12
  from cognite.client import CogniteClient
13
13
  from cognite.client import data_modeling as dm
14
14
  from cognite.client.data_classes.capabilities import Capability, DataModelInstancesAcl
15
- from cognite.client.data_classes.data_modeling import ViewId
16
15
  from cognite.client.data_classes.data_modeling.data_types import ListablePropertyType
17
16
  from cognite.client.data_classes.data_modeling.ids import InstanceId
18
17
  from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
@@ -21,251 +20,114 @@ from pydantic import BaseModel, ValidationInfo, create_model, field_validator
21
20
  from rdflib import RDF, URIRef
22
21
 
23
22
  from cognite.neat._client import NeatClient
23
+ from cognite.neat._client._api_client import SchemaAPI
24
24
  from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
25
- from cognite.neat._graph._tracking import LogTracker, Tracker
26
- from cognite.neat._issues import IssueList, NeatIssue, NeatIssueList
25
+ from cognite.neat._issues import IssueList, NeatError, NeatIssue, catch_issues
27
26
  from cognite.neat._issues.errors import (
28
- ResourceConversionError,
27
+ AuthorizationError,
29
28
  ResourceCreationError,
30
29
  ResourceDuplicatedError,
31
- ResourceRetrievalError,
30
+ ResourceNotFoundError,
32
31
  )
33
- from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
34
- from cognite.neat._rules.analysis._dms import DMSAnalysis
32
+ from cognite.neat._issues.warnings import (
33
+ NeatValueWarning,
34
+ PropertyDirectRelationLimitWarning,
35
+ PropertyMultipleValueWarning,
36
+ PropertyTypeNotSupportedWarning,
37
+ ResourceNeatWarning,
38
+ )
39
+ from cognite.neat._rules.analysis import RulesAnalysis
40
+ from cognite.neat._rules.analysis._base import ViewQuery, ViewQueryDict
35
41
  from cognite.neat._rules.models import DMSRules
36
42
  from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
37
- from cognite.neat._rules.models.entities._single_value import ViewEntity
43
+ from cognite.neat._rules.models.information._rules import InformationRules
38
44
  from cognite.neat._shared import InstanceType
39
45
  from cognite.neat._store import NeatGraphStore
40
46
  from cognite.neat._utils.auxiliary import create_sha256_hash
41
47
  from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
42
- from cognite.neat._utils.rdf_ import remove_namespace_from_uri
48
+ from cognite.neat._utils.rdf_ import namespace_as_space, remove_namespace_from_uri, split_uri
49
+ from cognite.neat._utils.text import NamingStandardization, humanize_collection
43
50
  from cognite.neat._utils.upload import UploadResult
44
51
 
45
- from ._base import _END_OF_CLASS, CDFLoader
52
+ from ._base import _END_OF_CLASS, _START_OF_CLASS, CDFLoader
53
+
54
+
55
+ @dataclass
56
+ class _ViewIterator:
57
+ """This is a helper class to iterate over the views
58
+
59
+ Args:
60
+ view_id: The view to iterate over
61
+ instance_count: The number of instances in the view
62
+ hierarchical_properties: The properties that are hierarchical, meaning they point to the same instances.
63
+ query: The query to get the instances from the store.
64
+ view: The view object from the client.
65
+ """
66
+
67
+ view_id: dm.ViewId
68
+ instance_count: int
69
+ hierarchical_properties: set[str]
70
+ query: ViewQuery
71
+ view: dm.View | None = None
72
+
73
+
74
+ @dataclass
75
+ class _Projection:
76
+ """This is a helper class to project triples to a node and/or edge(s)"""
77
+
78
+ view_id: dm.ViewId
79
+ used_for: Literal["node", "edge", "all"]
80
+ pydantic_cls: type[BaseModel]
81
+ edge_by_type: dict[str, tuple[str, dm.EdgeConnection]]
82
+ edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]]
46
83
 
47
84
 
48
85
  class DMSLoader(CDFLoader[dm.InstanceApply]):
49
86
  """Loads Instances to Cognite Data Fusion Data Model Service from NeatGraph.
50
87
 
51
88
  Args:
52
- graph_store (NeatGraphStore): The graph store to load the data into.
53
- data_model (dm.DataModel[dm.View] | None): The data model to load.
89
+ dms_rules (DMSRules): The DMS rules used by the data model.
90
+ info_rules (InformationRules): The information rules used by the data model, used to
91
+ look+up the instances in the store.
92
+ graph_store (NeatGraphStore): The graph store to load the data from.
54
93
  instance_space (str): The instance space to load the data into.
55
- class_neat_id_by_view_id (dict[ViewId, URIRef] | None): A mapping from view id to class name. Defaults to None.
56
94
  create_issues (Sequence[NeatIssue] | None): A list of issues that occurred during reading. Defaults to None.
57
- tracker (type[Tracker] | None): The tracker to use. Defaults to None.
58
- rules (DMSRules | None): The DMS rules used by the data model. This is used to lookup the
59
- instances in the store. Defaults to None.
60
95
  client (NeatClient | None): This is used to lookup containers such that the loader
61
96
  creates instances in accordance with required constraints. Defaults to None.
97
+ unquote_external_ids (bool): If True, the loader will unquote external ids before creating the instances.
98
+ neat_prefix_by_predicate_uri (dict[URIRef, str] | None): A dictionary that maps a predicate URIRef to a
99
+ prefix that Neat added to the object upon extraction. This is used to remove the prefix from the
100
+ object before creating the instance.
62
101
  """
63
102
 
64
103
  def __init__(
65
104
  self,
105
+ dms_rules: DMSRules,
106
+ info_rules: InformationRules,
66
107
  graph_store: NeatGraphStore,
67
- data_model: dm.DataModel[dm.View] | None,
68
108
  instance_space: str,
69
- class_neat_id_by_view_id: dict[ViewId, URIRef] | None = None,
70
- create_issues: Sequence[NeatIssue] | None = None,
71
- tracker: type[Tracker] | None = None,
72
- rules: DMSRules | None = None,
109
+ space_property: str | None = None,
110
+ use_source_space: bool = False,
73
111
  client: NeatClient | None = None,
112
+ create_issues: Sequence[NeatIssue] | None = None,
74
113
  unquote_external_ids: bool = False,
114
+ neat_prefix_by_predicate_uri: dict[URIRef, str] | None = None,
115
+ neat_prefix_by_type_uri: dict[URIRef, str] | None = None,
75
116
  ):
76
117
  super().__init__(graph_store)
77
- self.data_model = data_model
78
- self.instance_space = instance_space
79
- self.class_neat_id_by_view_id = class_neat_id_by_view_id or {}
118
+ self.dms_rules = dms_rules
119
+ self.info_rules = info_rules
120
+ self.neat_prefix_by_predicate_uri = neat_prefix_by_predicate_uri or {}
121
+ self.neat_prefix_by_type_uri = neat_prefix_by_type_uri or {}
122
+ self._instance_space = instance_space
123
+ self._space_property = space_property
124
+ self._use_source_space = use_source_space
125
+ self._space_by_instance_uri: dict[URIRef, str] = defaultdict(lambda: instance_space)
126
+ self._external_id_by_uri: dict[URIRef, str] = {}
80
127
  self._issues = IssueList(create_issues or [])
81
- self._tracker: type[Tracker] = tracker or LogTracker
82
- self.rules = rules
83
128
  self._client = client
84
129
  self._unquote_external_ids = unquote_external_ids
85
130
 
86
- @classmethod
87
- def from_data_model_id(
88
- cls,
89
- client: NeatClient,
90
- data_model_id: dm.DataModelId,
91
- graph_store: NeatGraphStore,
92
- instance_space: str,
93
- ) -> "DMSLoader":
94
- issues: list[NeatIssue] = []
95
- data_model: dm.DataModel[dm.View] | None = None
96
- try:
97
- data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True).latest_version()
98
- except Exception as e:
99
- issues.append(ResourceRetrievalError(data_model_id, "data model", str(e)))
100
-
101
- return cls(graph_store, data_model, instance_space, {}, issues, client=client)
102
-
103
- @classmethod
104
- def from_rules(
105
- cls,
106
- rules: DMSRules,
107
- graph_store: NeatGraphStore,
108
- instance_space: str,
109
- client: NeatClient | None = None,
110
- unquote_external_ids: bool = False,
111
- ) -> "DMSLoader":
112
- issues: list[NeatIssue] = []
113
- data_model: dm.DataModel[dm.View] | None = None
114
- try:
115
- data_model = rules.as_schema().as_read_model()
116
- except Exception as e:
117
- issues.append(
118
- ResourceConversionError(
119
- identifier=rules.metadata.as_identifier(),
120
- resource_type="DMS Rules",
121
- target_format="read DMS model",
122
- reason=str(e),
123
- )
124
- )
125
-
126
- class_neat_id_by_view_id = {view.view.as_id(): view.logical for view in rules.views if view.logical}
127
-
128
- return cls(
129
- graph_store,
130
- data_model,
131
- instance_space,
132
- class_neat_id_by_view_id,
133
- issues,
134
- rules=rules,
135
- client=client,
136
- unquote_external_ids=unquote_external_ids,
137
- )
138
-
139
- def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
140
- if self._issues.has_errors and stop_on_exception:
141
- raise self._issues.as_exception()
142
- elif self._issues.has_errors:
143
- yield from self._issues
144
- return
145
- if not self.data_model:
146
- # There should already be an error in this case.
147
- return
148
-
149
- views_with_linked_properties = (
150
- DMSAnalysis(self.rules).views_with_properties_linked_to_classes(consider_inheritance=True)
151
- if self.rules and self.rules.metadata.logical
152
- else None
153
- )
154
-
155
- view_and_count_by_id = self._select_views_with_instances(self.data_model.views)
156
-
157
- if self._client:
158
- view_and_count_by_id, properties_point_to_self = self._sort_by_direct_relation_dependencies(
159
- view_and_count_by_id
160
- )
161
- else:
162
- properties_point_to_self = {}
163
-
164
- view_ids: list[str] = []
165
- for view_id in view_and_count_by_id.keys():
166
- view_ids.append(repr(view_id))
167
- if view_id in properties_point_to_self:
168
- # If the views have a dependency on themselves, we need to run it twice.
169
- view_ids.append(f"{view_id!r} (self)")
170
-
171
- tracker = self._tracker(type(self).__name__, view_ids, "views")
172
- for view_id, (view, instance_count) in view_and_count_by_id.items():
173
- pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
174
- yield from issues
175
- tracker.issue(issues)
176
-
177
- if view_id in properties_point_to_self:
178
- # If the view has a dependency on itself, we need to run it twice.
179
- # First, to ensure that all nodes are created, and then to add the direct relations.
180
- # This only applies if there is a require constraint on the container, if not
181
- # we can create an empty node on the fly.
182
- iterations = [properties_point_to_self[view_id], set()]
183
- else:
184
- iterations = [set()]
185
-
186
- for skip_properties in iterations:
187
- if skip_properties:
188
- track_id = f"{view_id} (self)"
189
- else:
190
- track_id = repr(view_id)
191
- tracker.start(track_id)
192
- if views_with_linked_properties:
193
- # we need graceful exit if the view is not in the view_property_pairs
194
- property_link_pairs = views_with_linked_properties.get(ViewEntity.from_id(view_id))
195
-
196
- if class_neat_id := self.class_neat_id_by_view_id.get(view_id):
197
- reader = self.graph_store._read_via_rules_linkage(class_neat_id, property_link_pairs)
198
- else:
199
- error_view = ResourceRetrievalError(view_id, "view", "View not linked to class")
200
- tracker.issue(error_view)
201
- if stop_on_exception:
202
- raise error_view
203
- yield error_view
204
- continue
205
- else:
206
- # this assumes no changes in the suffix of view and class
207
- reader = self.graph_store.read(view.external_id)
208
-
209
- instance_iterable = iterate_progress_bar_if_above_config_threshold(
210
- reader, instance_count, f"Loading {track_id}"
211
- )
212
-
213
- for identifier, properties in instance_iterable:
214
- start_node, end_node = self._pop_start_end_node(properties)
215
- is_edge = start_node and end_node
216
- if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
217
- instance_type = "edge" if is_edge else "node"
218
- creation_error = ResourceCreationError(
219
- identifier,
220
- instance_type,
221
- error=f"{instance_type.capitalize()} found in {view.used_for} view",
222
- )
223
- tracker.issue(creation_error)
224
- if stop_on_exception:
225
- raise creation_error
226
- yield creation_error
227
- continue
228
-
229
- if skip_properties:
230
- properties = {k: v for k, v in properties.items() if k not in skip_properties}
231
-
232
- if start_node and end_node:
233
- # Is an edge
234
- try:
235
- yield self._create_edge_with_properties(
236
- identifier, properties, start_node, end_node, pydantic_cls, view_id
237
- )
238
- except ValueError as e:
239
- error_edge = ResourceCreationError(identifier, "edge", error=str(e))
240
- tracker.issue(error_edge)
241
- if stop_on_exception:
242
- raise error_edge from e
243
- yield error_edge
244
- else:
245
- try:
246
- yield self._create_node(identifier, properties, pydantic_cls, view_id)
247
- except ValueError as e:
248
- error_node = ResourceCreationError(identifier, "node", error=str(e))
249
- tracker.issue(error_node)
250
- if stop_on_exception:
251
- raise error_node from e
252
- yield error_node
253
- yield from self._create_edges_without_properties(
254
- identifier, properties, edge_by_type, edge_by_prop_id, tracker
255
- )
256
- tracker.finish(track_id)
257
- yield _END_OF_CLASS
258
-
259
- @staticmethod
260
- def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
261
- start_node = properties.pop("startNode", [None])[0]
262
- if not start_node:
263
- start_node = properties.pop("start_node", [None])[0]
264
- end_node = properties.pop("endNode", [None])[0]
265
- if not end_node:
266
- end_node = properties.pop("end_node", [None])[0]
267
- return start_node, end_node
268
-
269
131
  def write_to_file(self, filepath: Path) -> None:
270
132
  if filepath.suffix not in [".json", ".yaml", ".yml"]:
271
133
  raise ValueError(f"File format {filepath.suffix} is not supported")
@@ -286,76 +148,203 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
286
148
  else:
287
149
  yaml.safe_dump(dumped, f, sort_keys=False)
288
150
 
289
- def _select_views_with_instances(self, views: list[dm.View]) -> dict[dm.ViewId, tuple[dm.View, int]]:
290
- """Selects the views with data."""
291
- view_and_count_by_id: dict[dm.ViewId, tuple[dm.View, int]] = {}
292
- uri_by_type: dict[str, URIRef] = {
293
- remove_namespace_from_uri(uri[0]): uri[0] # type: ignore[misc]
294
- for uri in self.graph_store.queries.list_types()
295
- }
296
- for view in views:
297
- view_id = view.as_id()
298
- neat_id = self.class_neat_id_by_view_id.get(view_id)
299
- if neat_id is not None:
300
- count = self.graph_store.count_of_id(neat_id)
301
- elif view.external_id in uri_by_type:
302
- count = self.graph_store.count_of_type(uri_by_type[view.external_id])
303
- else:
151
+ def _load(
152
+ self, stop_on_exception: bool = False
153
+ ) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS] | _START_OF_CLASS]:
154
+ if self._issues.has_errors and stop_on_exception:
155
+ raise self._issues.as_exception()
156
+ elif self._issues.has_errors:
157
+ yield from self._issues
158
+ return
159
+ view_iterations, issues = self._create_view_iterations()
160
+ yield from issues
161
+ if self._space_property:
162
+ yield from self._lookup_space_by_uri(view_iterations, stop_on_exception)
163
+
164
+ if self._client:
165
+ space_creation = self._create_instance_space_if_not_exists()
166
+ yield from space_creation.warnings
167
+ if space_creation.has_errors and stop_on_exception:
168
+ raise space_creation.as_exception()
169
+ yield from space_creation.errors
170
+
171
+ if self.neat_prefix_by_type_uri:
172
+ self._lookup_identifier_by_uri()
173
+
174
+ for it in view_iterations:
175
+ view = it.view
176
+ if view is None:
177
+ yield ResourceNotFoundError(it.view_id, "view", more=f"Skipping {it.instance_count} instances...")
304
178
  continue
305
- if count > 0:
306
- view_and_count_by_id[view_id] = view, count
179
+ projection, issues = self._create_projection(view)
180
+ yield from issues
181
+ query = it.query
182
+ reader = self.graph_store.read(
183
+ query.rdf_type,
184
+ property_renaming_config=query.property_renaming_config,
185
+ remove_uri_namespace=False,
186
+ )
187
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
188
+ reader, it.instance_count, f"Loading {it.view_id!r}"
189
+ )
190
+ yield _START_OF_CLASS(view.external_id)
191
+ for identifier, properties in instance_iterable:
192
+ yield from self._create_instances(
193
+ identifier, properties, projection, stop_on_exception, exclude=it.hierarchical_properties
194
+ )
195
+ if it.hierarchical_properties:
196
+ # Force the creation of instances, before we create the hierarchical properties.
197
+ yield _END_OF_CLASS
198
+ yield _START_OF_CLASS(f"{view.external_id} ({humanize_collection(it.hierarchical_properties)})")
199
+ yield from self._create_hierarchical_properties(it, projection, stop_on_exception)
200
+ if reader is not instance_iterable:
201
+ print(f"Loaded {it.instance_count} instances for {it.view_id!r}")
202
+
203
+ yield _END_OF_CLASS
204
+
205
+ def _create_hierarchical_properties(
206
+ self, it: _ViewIterator, projection: _Projection, stop_on_exception: bool
207
+ ) -> Iterable[dm.InstanceApply | NeatIssue]:
208
+ reader = self.graph_store.read(it.query.rdf_type, property_renaming_config=it.query.property_renaming_config)
209
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
210
+ reader,
211
+ it.instance_count,
212
+ f"Loading {it.view_id!r} hierarchical properties: {humanize_collection(it.hierarchical_properties)}",
213
+ )
214
+ for identifier, properties in instance_iterable:
215
+ yield from self._create_instances(
216
+ identifier, properties, projection, stop_on_exception, include=it.hierarchical_properties
217
+ )
307
218
 
308
- return view_and_count_by_id
219
+ def _create_view_iterations(self) -> tuple[list[_ViewIterator], IssueList]:
220
+ view_query_by_id = RulesAnalysis(self.info_rules, self.dms_rules).view_query_by_id
221
+ iterations_by_view_id = self._select_views_with_instances(view_query_by_id)
222
+ if self._client:
223
+ issues = IssueList()
224
+ views = self._client.data_modeling.views.retrieve(
225
+ list(iterations_by_view_id.keys()), include_inherited_properties=True
226
+ )
227
+ if missing := set(iterations_by_view_id) - {view.as_id() for view in views}:
228
+ for missing_view in missing:
229
+ issues.append(ResourceNotFoundError(missing_view, "view", more="The view is not found in CDF."))
230
+ return [], issues
231
+ # Todo: Remove if this turns out to be unnecessary.
232
+ hierarchical_properties_by_view_id: dict[dm.ViewId, set[str]] = {}
233
+ else:
234
+ views = dm.ViewList([])
235
+ with catch_issues() as issues:
236
+ read_model = self.dms_rules.as_schema().as_read_model()
237
+ views.extend(read_model.views)
238
+ if issues.has_errors:
239
+ return [], issues
240
+ hierarchical_properties_by_view_id = {}
241
+ views_by_id = {view.as_id(): view for view in views}
242
+
243
+ def sort_by_instance_type(id_: dm.ViewId) -> int:
244
+ if id_ not in views_by_id:
245
+ return 0
246
+ return {"node": 1, "all": 1, "edge": 3}.get(views_by_id[id_].used_for, 0)
247
+
248
+ ordered_view_ids = SchemaAPI.get_view_order_by_direct_relation_constraints(views)
249
+ # Sort is stable in Python, so we will keep the order of the views:
250
+ ordered_view_ids.sort(key=sort_by_instance_type)
251
+ view_iterations: list[_ViewIterator] = []
252
+ for view_id in ordered_view_ids:
253
+ if view_id not in iterations_by_view_id:
254
+ continue
255
+ view_iteration = iterations_by_view_id[view_id]
256
+ view_iteration.view = views_by_id.get(view_id)
257
+ view_iteration.hierarchical_properties = hierarchical_properties_by_view_id.get(view_id, set())
258
+ view_iterations.append(view_iteration)
259
+ return view_iterations, issues
309
260
 
310
- def _sort_by_direct_relation_dependencies(
311
- self, view_and_count_by_id: dict[dm.ViewId, tuple[dm.View, int]]
312
- ) -> tuple[dict[dm.ViewId, tuple[dm.View, int]], dict[dm.ViewId, set[str]]]:
313
- """Sorts the views by container constraints."""
314
- if not self._client:
315
- return view_and_count_by_id, {}
316
- # We need to retrieve the views to ensure we get all properties, such that we can find all
317
- # the containers that the view is linked to.
318
- views = self._client.data_modeling.views.retrieve(
319
- list(view_and_count_by_id.keys()), include_inherited_properties=True
261
+ def _select_views_with_instances(self, view_query_by_id: ViewQueryDict) -> dict[dm.ViewId, _ViewIterator]:
262
+ """Selects the views with data."""
263
+ view_iterations: dict[dm.ViewId, _ViewIterator] = {}
264
+ for view_id, query in view_query_by_id.items():
265
+ count = self.graph_store.queries.count_of_type(query.rdf_type)
266
+ if count > 0:
267
+ view_iterations[view_id] = _ViewIterator(view_id, count, set(), query)
268
+ return view_iterations
269
+
270
+ def _lookup_space_by_uri(self, view_iterations: list[_ViewIterator], stop_on_exception: bool = False) -> IssueList:
271
+ issues = IssueList()
272
+ if self._space_property is None:
273
+ return issues
274
+ total = sum(it.instance_count for it in view_iterations)
275
+ properties_by_uriref = self.graph_store.queries.properties()
276
+ space_property_uri = next((k for k, v in properties_by_uriref.items() if v == self._space_property), None)
277
+ if space_property_uri is None:
278
+ error: ResourceNotFoundError[str, str] = ResourceNotFoundError(
279
+ self._space_property,
280
+ "property",
281
+ more=f"Could not find the {self._space_property} in the graph.",
282
+ )
283
+ if stop_on_exception:
284
+ raise error
285
+ issues.append(error)
286
+ return issues
287
+
288
+ instance_iterable = self.graph_store.queries.list_instances_ids_by_space(space_property_uri)
289
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
290
+ instance_iterable, total, f"Looking up spaces for {total} instances..."
320
291
  )
321
- container_ids_by_view_id = {view.as_id(): view.referenced_containers() for view in views}
322
- referenced_containers = {
323
- container for containers in container_ids_by_view_id.values() for container in containers
324
- }
325
- containers = self._client.data_modeling.containers.retrieve(list(referenced_containers))
326
- container_by_id = {container.as_id(): container for container in containers}
327
-
328
- dependency_on_self: dict[dm.ViewId, set[str]] = defaultdict(set)
329
- view_id_by_dependencies: dict[dm.ViewId, set[dm.ViewId]] = {}
330
- for view in views:
331
- view_id = view.as_id()
332
- dependencies = set()
333
- for prop_id, prop in view.properties.items():
334
- if isinstance(prop, dm.MappedProperty) and isinstance(prop.type, dm.DirectRelation) and prop.source:
335
- container = container_by_id[prop.container]
336
- has_require_constraint = any(
337
- isinstance(constraint, dm.RequiresConstraint) for constraint in container.constraints.values()
292
+ neat_prefix = self.neat_prefix_by_predicate_uri.get(space_property_uri)
293
+ warned_spaces: set[str] = set()
294
+ for instance, space in instance_iterable:
295
+ if neat_prefix:
296
+ space = space.removeprefix(neat_prefix)
297
+
298
+ clean_space = NamingStandardization.standardize_space_str(space)
299
+ if clean_space != space and space not in warned_spaces:
300
+ issues.append(
301
+ NeatValueWarning(
302
+ f"Invalid space in property {self._space_property}: {space}. Fixed to {clean_space}"
338
303
  )
339
- if has_require_constraint and prop.source == view_id:
340
- dependency_on_self[view_id].add(prop_id)
341
- elif has_require_constraint:
342
- dependencies.add(prop.source)
343
- view_id_by_dependencies[view_id] = dependencies
344
-
345
- ordered_view_ids = TopologicalSorter(view_id_by_dependencies).static_order()
346
-
347
- return {
348
- view_id: view_and_count_by_id[view_id] for view_id in ordered_view_ids if view_id in view_and_count_by_id
349
- }, dict(dependency_on_self)
350
-
351
- def _create_validation_classes(
352
- self, view: dm.View
353
- ) -> tuple[
354
- type[BaseModel],
355
- dict[str, tuple[str, dm.EdgeConnection]],
356
- dict[str, tuple[str, dm.EdgeConnection]],
357
- NeatIssueList,
358
- ]:
304
+ )
305
+ warned_spaces.add(space)
306
+
307
+ self._space_by_instance_uri[instance] = clean_space
308
+ return issues
309
+
310
+ def _lookup_identifier_by_uri(self) -> None:
311
+ if not self.neat_prefix_by_type_uri:
312
+ return
313
+
314
+ count = sum(count for _, count in self.graph_store.queries.summarize_instances())
315
+ instance_iterable = self.graph_store.queries.list_instances_ids()
316
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
317
+ instance_iterable, count, f"Looking up identifiers for {count} instances..."
318
+ )
319
+ count_by_identifier: dict[str, list[URIRef]] = defaultdict(list)
320
+ for instance_uri, type in instance_iterable:
321
+ if type not in self.neat_prefix_by_type_uri:
322
+ continue
323
+ prefix = self.neat_prefix_by_type_uri[type]
324
+ identifier = remove_namespace_from_uri(instance_uri)
325
+ if self._unquote_external_ids:
326
+ identifier = urllib.parse.unquote(identifier)
327
+ count_by_identifier[identifier.removeprefix(prefix)].append(instance_uri)
328
+
329
+ for identifier, uris in count_by_identifier.items():
330
+ if len(uris) == 1:
331
+ self._external_id_by_uri[uris[0]] = identifier
332
+
333
+ def _create_instance_space_if_not_exists(self) -> IssueList:
334
+ issues = IssueList()
335
+ if not self._client:
336
+ return issues
337
+
338
+ instance_spaces = set(self._space_by_instance_uri.values()) - {self._instance_space}
339
+ existing_spaces = {space.space for space in self._client.data_modeling.spaces.retrieve(list(instance_spaces))}
340
+ if missing_spaces := (instance_spaces - existing_spaces):
341
+ try:
342
+ self._client.data_modeling.spaces.apply([dm.SpaceApply(space=space) for space in missing_spaces])
343
+ except CogniteAPIError as e:
344
+ issues.append(AuthorizationError(f"Creating {len(missing_spaces)} instance spaces.", str(e)))
345
+ return issues
346
+
347
+ def _create_projection(self, view: dm.View) -> tuple[_Projection, IssueList]:
359
348
  issues = IssueList()
360
349
  field_definitions: dict[str, tuple[type, Any]] = {}
361
350
  edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
@@ -414,9 +403,15 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
414
403
 
415
404
  def parse_list(cls, value: Any, info: ValidationInfo) -> list[str]:
416
405
  if isinstance(value, list) and list.__name__ not in _get_field_value_types(cls, info):
417
- if len(value) == 1:
418
- return value[0]
419
- raise ValueError(f"Got multiple values for {info.field_name}: {value}")
406
+ if len(value) > 1:
407
+ warnings.warn(
408
+ # the identifier is unknown, it will be cest in the create_instances method
409
+ PropertyMultipleValueWarning("", "property", str(info.field_name), value=str(value[0])),
410
+ stacklevel=2,
411
+ )
412
+ elif not value:
413
+ return None # type: ignore[return-value]
414
+ return value[0]
420
415
 
421
416
  return value
422
417
 
@@ -446,7 +441,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
446
441
  def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
447
442
  # We validate above that we only get one value for single direct relations.
448
443
  if list.__name__ in _get_field_value_types(cls, info):
449
- result = [{"space": self.instance_space, "externalId": remove_namespace_from_uri(v)} for v in value]
444
+ ids = (self._create_instance_id(v, "node", stop_on_exception=True) for v in value)
445
+ result = [id_.dump(camel_case=True, include_instance_type=False) for id_ in ids]
446
+ # Todo: Account for max_list_limit
450
447
  if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
451
448
  return result
452
449
  warnings.warn(
@@ -462,7 +459,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
462
459
  result.sort(key=lambda x: (x["space"], x["externalId"]))
463
460
  return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
464
461
  elif value:
465
- return {"space": self.instance_space, "externalId": remove_namespace_from_uri(value[0])}
462
+ return self._create_instance_id(value[0], "node", stop_on_exception=True).dump(
463
+ camel_case=True, include_instance_type=False
464
+ )
466
465
  return {}
467
466
 
468
467
  validators["parse_direct_relation"] = field_validator(*direct_relation_by_property.keys(), mode="before")( # type: ignore[assignment]
@@ -473,78 +472,124 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
473
472
 
474
473
  def parse_direct_relation_to_unit(cls, value: Any, info: ValidationInfo) -> dict | list[dict]:
475
474
  if value:
476
- return {"space": "cdf_cdm_units", "externalId": remove_namespace_from_uri(value[0])}
475
+ external_id = remove_namespace_from_uri(value[0])
476
+ if self._unquote_external_ids:
477
+ external_id = urllib.parse.unquote(external_id)
478
+ return {"space": "cdf_cdm_units", "externalId": external_id}
477
479
  return {}
478
480
 
479
481
  validators["parse_direct_relation_to_unit"] = field_validator(*unit_properties, mode="before")( # type: ignore[assignment]
480
482
  parse_direct_relation_to_unit # type: ignore[arg-type]
481
483
  )
482
484
 
485
+ if text_fields:
486
+
487
+ def parse_text(cls, value: Any, info: ValidationInfo) -> str | list[str]:
488
+ if isinstance(value, list):
489
+ return [remove_namespace_from_uri(v) if isinstance(v, URIRef) else str(v) for v in value]
490
+ return remove_namespace_from_uri(value) if isinstance(value, URIRef) else str(value)
491
+
492
+ validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text) # type: ignore[assignment, arg-type]
493
+
483
494
  pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
484
- return pydantic_cls, edge_by_type, edge_by_prop_id, issues
495
+ return _Projection(view.as_id(), view.used_for, pydantic_cls, edge_by_type, edge_by_prop_id), issues
485
496
 
486
- def _create_node(
497
+ def _create_instances(
487
498
  self,
488
- identifier: str,
489
- properties: dict[str | InstanceType, list[str]],
490
- pydantic_cls: type[BaseModel],
491
- view_id: dm.ViewId,
492
- ) -> dm.InstanceApply:
493
- type_ = properties.pop(RDF.type, [None])[0]
494
- created = pydantic_cls.model_validate(properties)
495
- if self._unquote_external_ids:
496
- identifier = urllib.parse.unquote(identifier)
497
-
498
- return dm.NodeApply(
499
- space=self.instance_space,
500
- external_id=identifier,
501
- type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
502
- sources=[
503
- dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
504
- ],
505
- )
499
+ instance_uri: URIRef,
500
+ properties: dict[str | InstanceType, list[Any]],
501
+ projection: _Projection,
502
+ stop_on_exception: Literal[True, False] = False,
503
+ exclude: set[str] | None = None,
504
+ include: set[str] | None = None,
505
+ ) -> Iterable[dm.InstanceApply | NeatIssue]:
506
+ instance_id = self._create_instance_id(instance_uri, "node", stop_on_exception)
507
+ if not isinstance(instance_id, InstanceId):
508
+ yield instance_id
509
+ return
510
+ space, external_id = instance_id.space, instance_id.external_id
511
+ start_node, end_node = self._pop_start_end_node(properties)
512
+ is_edge = start_node and end_node
513
+ instance_type = "edge" if is_edge else "node"
514
+ if (projection.used_for == "node" and is_edge) or (projection.used_for == "edge" and not is_edge):
515
+ creation_error = ResourceCreationError(
516
+ external_id,
517
+ instance_type,
518
+ f"View used for {projection.used_for} instance {external_id!s} but is {instance_type}",
519
+ )
520
+ if stop_on_exception:
521
+ raise creation_error from None
522
+ yield creation_error
523
+ return
506
524
 
507
- def _create_edge_with_properties(
508
- self,
509
- identifier: str,
510
- properties: dict[str | InstanceType, list[str]],
511
- start_node: str,
512
- end_node: str,
513
- pydantic_cls: type[BaseModel],
514
- view_id: dm.ViewId,
515
- ) -> dm.EdgeApply:
516
- type_ = properties.pop(RDF.type, [None])[0]
517
- created = pydantic_cls.model_validate(properties)
518
- if type_ is None:
519
- raise ValueError(f"Missing type for edge {identifier}")
520
-
521
- if self._unquote_external_ids:
522
- identifier = urllib.parse.unquote(identifier)
523
-
524
- return dm.EdgeApply(
525
- space=self.instance_space,
526
- external_id=identifier,
527
- type=dm.DirectRelationReference(view_id.space, view_id.external_id),
528
- start_node=dm.DirectRelationReference(self.instance_space, start_node),
529
- end_node=dm.DirectRelationReference(self.instance_space, end_node),
530
- sources=[
531
- dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
532
- ],
533
- )
525
+ if RDF.type not in properties:
526
+ error = ResourceCreationError(external_id, instance_type, "No rdf:type found")
527
+ if stop_on_exception:
528
+ raise error from None
529
+ yield error
530
+ return
531
+ _ = properties.pop(RDF.type)[0]
532
+ if exclude:
533
+ properties = {k: v for k, v in properties.items() if k not in exclude}
534
+ if include:
535
+ properties = {k: v for k, v in properties.items() if k in include}
536
+
537
+ sources = []
538
+ with catch_issues() as property_issues:
539
+ sources = [
540
+ dm.NodeOrEdgeData(
541
+ projection.view_id,
542
+ projection.pydantic_cls.model_validate(properties).model_dump(exclude_unset=True),
543
+ )
544
+ ]
545
+ for issue in property_issues:
546
+ if isinstance(issue, ResourceNeatWarning):
547
+ issue.identifier = external_id
548
+
549
+ if property_issues.has_errors and stop_on_exception:
550
+ raise property_issues.as_exception()
551
+ yield from property_issues
552
+ if not sources:
553
+ return
554
+
555
+ if start_node and end_node:
556
+ start = self._create_instance_id(start_node, "edge", stop_on_exception)
557
+ end = self._create_instance_id(end_node, "edge", stop_on_exception)
558
+ if isinstance(start, NeatError):
559
+ yield start
560
+ if isinstance(end, NeatError):
561
+ yield end
562
+ if isinstance(start, InstanceId) and isinstance(end, InstanceId):
563
+ yield dm.EdgeApply(
564
+ space=space,
565
+ external_id=external_id,
566
+ type=(projection.view_id.space, projection.view_id.external_id),
567
+ start_node=start.as_tuple(),
568
+ end_node=end.as_tuple(),
569
+ sources=sources,
570
+ )
571
+ else:
572
+ yield dm.NodeApply(
573
+ space=space,
574
+ external_id=external_id,
575
+ type=(projection.view_id.space, projection.view_id.external_id),
576
+ sources=sources,
577
+ )
578
+ yield from self._create_edges_without_properties(space, external_id, properties, projection, stop_on_exception)
534
579
 
535
580
  def _create_edges_without_properties(
536
581
  self,
582
+ space: str,
537
583
  identifier: str,
538
- properties: dict[str, list[str]],
539
- edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
540
- edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
541
- tracker: Tracker,
584
+ properties: dict[str | InstanceType, list[str] | list[URIRef]],
585
+ projection: _Projection,
586
+ stop_on_exception: Literal[True, False],
542
587
  ) -> Iterable[dm.EdgeApply | NeatIssue]:
543
588
  for predicate, values in properties.items():
544
- if predicate in edge_by_type:
545
- prop_id, edge = edge_by_type[predicate]
546
- elif predicate in edge_by_prop_id:
547
- prop_id, edge = edge_by_prop_id[predicate]
589
+ if predicate in projection.edge_by_type:
590
+ prop_id, edge = projection.edge_by_type[predicate]
591
+ elif predicate in projection.edge_by_prop_id:
592
+ prop_id, edge = projection.edge_by_prop_id[predicate]
548
593
  else:
549
594
  continue
550
595
  if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
@@ -553,21 +598,83 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
553
598
  identifier=identifier,
554
599
  location=f"Multiple values for single edge {edge}. Expected only one.",
555
600
  )
556
- tracker.issue(error)
557
601
  yield error
602
+ continue
558
603
  for target in values:
604
+ target_id = self._create_instance_id(target, "edge", stop_on_exception) # type: ignore[call-overload]
605
+ if not isinstance(target_id, InstanceId):
606
+ yield target_id
607
+ continue
608
+ if isinstance(target, URIRef):
609
+ target = remove_namespace_from_uri(target)
559
610
  external_id = f"{identifier}.{prop_id}.{target}"
560
- if self._unquote_external_ids:
561
- external_id = urllib.parse.unquote(external_id)
562
611
 
612
+ start_node, end_node = (
613
+ (space, identifier),
614
+ target_id.as_tuple(),
615
+ )
616
+ if edge.direction == "inwards":
617
+ start_node, end_node = end_node, start_node
563
618
  yield dm.EdgeApply(
564
- space=self.instance_space,
619
+ space=space,
565
620
  external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
566
621
  type=edge.type,
567
- start_node=dm.DirectRelationReference(self.instance_space, identifier),
568
- end_node=dm.DirectRelationReference(self.instance_space, target),
622
+ start_node=start_node,
623
+ end_node=end_node,
569
624
  )
570
625
 
626
+ @staticmethod
627
+ def _pop_start_end_node(
628
+ properties: dict[str | InstanceType, list[str] | list[URIRef]],
629
+ ) -> tuple[URIRef, URIRef] | tuple[None, None]:
630
+ start_node = properties.pop("startNode", [None])[0]
631
+ if not start_node:
632
+ start_node = properties.pop("start_node", [None])[0]
633
+ end_node = properties.pop("endNode", [None])[0]
634
+ if not end_node:
635
+ end_node = properties.pop("end_node", [None])[0]
636
+ if start_node and end_node:
637
+ return start_node, end_node # type: ignore[return-value]
638
+ return None, None
639
+
640
+ @overload
641
+ def _create_instance_id(
642
+ self, uri: URIRef, instance_type: str, stop_on_exception: Literal[False] = False
643
+ ) -> InstanceId | NeatError: ...
644
+
645
+ @overload
646
+ def _create_instance_id(
647
+ self, uri: URIRef, instance_type: str, stop_on_exception: Literal[True] = True
648
+ ) -> InstanceId: ...
649
+
650
+ def _create_instance_id(
651
+ self, uri: URIRef, instance_type: str, stop_on_exception: bool = False
652
+ ) -> InstanceId | NeatError:
653
+ space: str | None = None
654
+ external_id: str | None = None
655
+ error: NeatError | None = None
656
+ if self._use_source_space:
657
+ namespace, external_id = split_uri(uri)
658
+ space = namespace_as_space(namespace)
659
+ if space is None:
660
+ error = ResourceCreationError(uri, instance_type, f"Could not find space for {uri!s}.")
661
+ else:
662
+ space = self._space_by_instance_uri[uri]
663
+ if uri in self._external_id_by_uri:
664
+ external_id = self._external_id_by_uri[uri]
665
+ else:
666
+ external_id = remove_namespace_from_uri(uri)
667
+
668
+ if external_id and self._unquote_external_ids:
669
+ external_id = urllib.parse.unquote(external_id)
670
+ if space and external_id:
671
+ return InstanceId(space, external_id)
672
+ if error is None:
673
+ raise ValueError(f"Bug in neat. Failed to create instance ID and determine error for {uri!r}")
674
+ if stop_on_exception:
675
+ raise error
676
+ return error
677
+
571
678
  def _get_required_capabilities(self) -> list[Capability]:
572
679
  return [
573
680
  DataModelInstancesAcl(
@@ -576,7 +683,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
576
683
  DataModelInstancesAcl.Action.Write_Properties,
577
684
  DataModelInstancesAcl.Action.Read,
578
685
  ],
579
- scope=DataModelInstancesAcl.Scope.SpaceID([self.instance_space]),
686
+ scope=DataModelInstancesAcl.Scope.SpaceID([self._instance_space]),
580
687
  )
581
688
  ]
582
689
 
@@ -586,24 +693,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
586
693
  items: list[dm.InstanceApply],
587
694
  dry_run: bool,
588
695
  read_issues: IssueList,
696
+ class_name: str | None = None,
589
697
  ) -> Iterable[UploadResult]:
590
- nodes: list[dm.NodeApply] = []
591
- edges: list[dm.EdgeApply] = []
592
- source_by_node_id: dict[dm.NodeId, str] = {}
593
- source_by_edge_id: dict[dm.EdgeId, str] = {}
594
- for item in items:
595
- if isinstance(item, dm.NodeApply):
596
- nodes.append(item)
597
- if item.sources:
598
- source_by_node_id[item.as_id()] = item.sources[0].source.external_id
599
- else:
600
- source_by_node_id[item.as_id()] = "node"
601
- elif isinstance(item, dm.EdgeApply):
602
- edges.append(item)
603
- if item.sources:
604
- source_by_edge_id[item.as_id()] = item.sources[0].source.external_id
605
- else:
606
- source_by_edge_id[item.as_id()] = "edge"
698
+ name = class_name or "Instances"
699
+ nodes = [item for item in items if isinstance(item, dm.NodeApply)]
700
+ edges = [item for item in items if isinstance(item, dm.EdgeApply)]
607
701
  try:
608
702
  upserted = client.data_modeling.instances.apply(
609
703
  nodes,
@@ -613,29 +707,28 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
613
707
  skip_on_version_conflict=True,
614
708
  )
615
709
  except CogniteAPIError as e:
616
- result = UploadResult[InstanceId](name="Instances", issues=read_issues)
617
- result.error_messages.append(str(e))
618
- result.failed_upserted.update(item.as_id() for item in e.failed + e.unknown)
619
- result.created.update(item.as_id() for item in e.successful)
620
- yield result
710
+ if len(items) == 1:
711
+ yield UploadResult(
712
+ name=name,
713
+ issues=read_issues,
714
+ failed_items=items,
715
+ error_messages=[str(e)],
716
+ failed_upserted={item.as_id() for item in items}, # type: ignore[attr-defined]
717
+ )
718
+ else:
719
+ half = len(items) // 2
720
+ yield from self._upload_to_cdf(client, items[:half], dry_run, read_issues, class_name)
721
+ yield from self._upload_to_cdf(client, items[half:], dry_run, read_issues, class_name)
621
722
  else:
622
- for instances, ids_by_source in [
623
- (upserted.nodes, source_by_node_id),
624
- (upserted.edges, source_by_edge_id),
625
- ]:
626
- for name, subinstances in itertools.groupby(
627
- sorted(instances, key=lambda i: ids_by_source.get(i.as_id(), "")), # type: ignore[call-overload, index, attr-defined]
628
- key=lambda i: ids_by_source.get(i.as_id(), ""), # type: ignore[index, attr-defined]
629
- ):
630
- result = UploadResult(name=name, issues=read_issues)
631
- for instance in subinstances: # type: ignore[attr-defined]
632
- if instance.was_modified and instance.created_time == instance.last_updated_time:
633
- result.created.add(instance.as_id())
634
- elif instance.was_modified:
635
- result.changed.add(instance.as_id())
636
- else:
637
- result.unchanged.add(instance.as_id())
638
- yield result
723
+ result = UploadResult(name=name, issues=read_issues) # type: ignore[var-annotated]
724
+ for instance in itertools.chain(upserted.nodes, upserted.edges): # type: ignore[attr-defined]
725
+ if instance.was_modified and instance.created_time == instance.last_updated_time:
726
+ result.created.add(instance.as_id())
727
+ elif instance.was_modified:
728
+ result.changed.add(instance.as_id())
729
+ else:
730
+ result.unchanged.add(instance.as_id())
731
+ yield result
639
732
 
640
733
 
641
734
  def _get_field_value_types(cls, info):