cognite-neat 0.109.3__py3-none-any.whl → 0.110.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (67) hide show
  1. cognite/neat/_alpha.py +2 -0
  2. cognite/neat/_client/_api/schema.py +17 -1
  3. cognite/neat/_client/data_classes/schema.py +3 -3
  4. cognite/neat/_constants.py +11 -0
  5. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +9 -10
  6. cognite/neat/_graph/extractors/_iodd.py +3 -3
  7. cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
  8. cognite/neat/_graph/loaders/_rdf2dms.py +285 -346
  9. cognite/neat/_graph/queries/_base.py +28 -92
  10. cognite/neat/_graph/transformers/__init__.py +1 -3
  11. cognite/neat/_graph/transformers/_rdfpath.py +2 -49
  12. cognite/neat/_issues/__init__.py +1 -6
  13. cognite/neat/_issues/_base.py +21 -252
  14. cognite/neat/_issues/_contextmanagers.py +46 -0
  15. cognite/neat/_issues/_factory.py +61 -0
  16. cognite/neat/_issues/errors/__init__.py +18 -4
  17. cognite/neat/_issues/errors/_wrapper.py +81 -3
  18. cognite/neat/_issues/formatters.py +4 -4
  19. cognite/neat/_issues/warnings/__init__.py +3 -2
  20. cognite/neat/_issues/warnings/_properties.py +8 -0
  21. cognite/neat/_rules/_constants.py +9 -0
  22. cognite/neat/_rules/_shared.py +3 -2
  23. cognite/neat/_rules/analysis/__init__.py +2 -3
  24. cognite/neat/_rules/analysis/_base.py +450 -258
  25. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  26. cognite/neat/_rules/exporters/_rules2excel.py +2 -8
  27. cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
  28. cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
  29. cognite/neat/_rules/importers/_base.py +2 -47
  30. cognite/neat/_rules/importers/_dms2rules.py +7 -10
  31. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
  32. cognite/neat/_rules/importers/_rdf/_inference2rules.py +59 -25
  33. cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
  34. cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
  35. cognite/neat/_rules/models/dms/_rules.py +3 -1
  36. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  37. cognite/neat/_rules/models/dms/_validation.py +14 -4
  38. cognite/neat/_rules/models/entities/_loaders.py +1 -1
  39. cognite/neat/_rules/models/entities/_multi_value.py +2 -2
  40. cognite/neat/_rules/models/information/_rules.py +18 -17
  41. cognite/neat/_rules/models/information/_rules_input.py +2 -1
  42. cognite/neat/_rules/models/information/_validation.py +3 -1
  43. cognite/neat/_rules/transformers/__init__.py +8 -2
  44. cognite/neat/_rules/transformers/_converters.py +242 -43
  45. cognite/neat/_rules/transformers/_verification.py +5 -10
  46. cognite/neat/_session/_base.py +4 -4
  47. cognite/neat/_session/_prepare.py +12 -0
  48. cognite/neat/_session/_read.py +21 -17
  49. cognite/neat/_session/_show.py +11 -123
  50. cognite/neat/_session/_state.py +0 -2
  51. cognite/neat/_session/_subset.py +64 -0
  52. cognite/neat/_session/_to.py +63 -12
  53. cognite/neat/_store/_graph_store.py +5 -246
  54. cognite/neat/_utils/rdf_.py +2 -2
  55. cognite/neat/_utils/spreadsheet.py +44 -1
  56. cognite/neat/_utils/text.py +51 -32
  57. cognite/neat/_version.py +1 -1
  58. {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/METADATA +1 -1
  59. {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/RECORD +62 -64
  60. {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/WHEEL +1 -1
  61. cognite/neat/_graph/queries/_construct.py +0 -187
  62. cognite/neat/_graph/queries/_shared.py +0 -173
  63. cognite/neat/_rules/analysis/_dms.py +0 -57
  64. cognite/neat/_rules/analysis/_information.py +0 -249
  65. cognite/neat/_rules/models/_rdfpath.py +0 -372
  66. {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/LICENSE +0 -0
  67. {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/entry_points.txt +0 -0
@@ -4,268 +4,114 @@ import urllib.parse
4
4
  import warnings
5
5
  from collections import defaultdict
6
6
  from collections.abc import Iterable, Sequence
7
- from graphlib import TopologicalSorter
7
+ from dataclasses import dataclass
8
8
  from pathlib import Path
9
- from typing import Any, cast, get_args
9
+ from typing import Any, Literal, cast, get_args
10
10
 
11
11
  import yaml
12
12
  from cognite.client import CogniteClient
13
13
  from cognite.client import data_modeling as dm
14
14
  from cognite.client.data_classes.capabilities import Capability, DataModelInstancesAcl
15
- from cognite.client.data_classes.data_modeling import ViewId
16
15
  from cognite.client.data_classes.data_modeling.data_types import ListablePropertyType
17
16
  from cognite.client.data_classes.data_modeling.ids import InstanceId
18
17
  from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
19
18
  from cognite.client.exceptions import CogniteAPIError
20
19
  from pydantic import BaseModel, ValidationInfo, create_model, field_validator
21
- from rdflib import RDF, URIRef
20
+ from rdflib import RDF
22
21
 
23
22
  from cognite.neat._client import NeatClient
23
+ from cognite.neat._client._api_client import SchemaAPI
24
24
  from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
25
- from cognite.neat._graph._tracking import LogTracker, Tracker
26
- from cognite.neat._issues import IssueList, NeatIssue, NeatIssueList
27
- from cognite.neat._issues.errors import (
28
- ResourceConversionError,
29
- ResourceCreationError,
30
- ResourceDuplicatedError,
31
- ResourceRetrievalError,
25
+ from cognite.neat._issues import IssueList, NeatIssue, catch_issues
26
+ from cognite.neat._issues.errors import ResourceCreationError, ResourceDuplicatedError, ResourceNotFoundError
27
+ from cognite.neat._issues.warnings import (
28
+ PropertyDirectRelationLimitWarning,
29
+ PropertyMultipleValueWarning,
30
+ PropertyTypeNotSupportedWarning,
31
+ ResourceNeatWarning,
32
32
  )
33
- from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
34
- from cognite.neat._rules.analysis._dms import DMSAnalysis
33
+ from cognite.neat._rules.analysis import RulesAnalysis
34
+ from cognite.neat._rules.analysis._base import ViewQuery, ViewQueryDict
35
35
  from cognite.neat._rules.models import DMSRules
36
36
  from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
37
- from cognite.neat._rules.models.entities._single_value import ViewEntity
37
+ from cognite.neat._rules.models.information._rules import InformationRules
38
38
  from cognite.neat._shared import InstanceType
39
39
  from cognite.neat._store import NeatGraphStore
40
40
  from cognite.neat._utils.auxiliary import create_sha256_hash
41
41
  from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
42
42
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
43
+ from cognite.neat._utils.text import humanize_collection
43
44
  from cognite.neat._utils.upload import UploadResult
44
45
 
45
46
  from ._base import _END_OF_CLASS, CDFLoader
46
47
 
47
48
 
49
+ @dataclass
50
+ class _ViewIterator:
51
+ """This is a helper class to iterate over the views
52
+
53
+ Args:
54
+ view_id: The view to iterate over
55
+ instance_count: The number of instances in the view
56
+ hierarchical_properties: The properties that are hierarchical, meaning they point to the same instances.
57
+ query: The query to get the instances from the store.
58
+ view: The view object from the client.
59
+ """
60
+
61
+ view_id: dm.ViewId
62
+ instance_count: int
63
+ hierarchical_properties: set[str]
64
+ query: ViewQuery
65
+ view: dm.View | None = None
66
+
67
+
68
+ @dataclass
69
+ class _Projection:
70
+ """This is a helper class to project triples to a node and/or edge(s)"""
71
+
72
+ view_id: dm.ViewId
73
+ used_for: Literal["node", "edge", "all"]
74
+ pydantic_cls: type[BaseModel]
75
+ edge_by_type: dict[str, tuple[str, dm.EdgeConnection]]
76
+ edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]]
77
+
78
+
48
79
  class DMSLoader(CDFLoader[dm.InstanceApply]):
49
80
  """Loads Instances to Cognite Data Fusion Data Model Service from NeatGraph.
50
81
 
51
82
  Args:
52
- graph_store (NeatGraphStore): The graph store to load the data into.
53
- data_model (dm.DataModel[dm.View] | None): The data model to load.
83
+ dms_rules (DMSRules): The DMS rules used by the data model.
84
+ info_rules (InformationRules): The information rules used by the data model, used to
85
+ look+up the instances in the store.
86
+ graph_store (NeatGraphStore): The graph store to load the data from.
54
87
  instance_space (str): The instance space to load the data into.
55
- class_neat_id_by_view_id (dict[ViewId, URIRef] | None): A mapping from view id to class name. Defaults to None.
56
88
  create_issues (Sequence[NeatIssue] | None): A list of issues that occurred during reading. Defaults to None.
57
- tracker (type[Tracker] | None): The tracker to use. Defaults to None.
58
- rules (DMSRules | None): The DMS rules used by the data model. This is used to lookup the
59
- instances in the store. Defaults to None.
60
89
  client (NeatClient | None): This is used to lookup containers such that the loader
61
90
  creates instances in accordance with required constraints. Defaults to None.
91
+ unquote_external_ids (bool): If True, the loader will unquote external ids before creating the instances.
62
92
  """
63
93
 
64
94
  def __init__(
65
95
  self,
96
+ dms_rules: DMSRules,
97
+ info_rules: InformationRules,
66
98
  graph_store: NeatGraphStore,
67
- data_model: dm.DataModel[dm.View] | None,
68
99
  instance_space: str,
69
- class_neat_id_by_view_id: dict[ViewId, URIRef] | None = None,
70
- create_issues: Sequence[NeatIssue] | None = None,
71
- tracker: type[Tracker] | None = None,
72
- rules: DMSRules | None = None,
100
+ space_property: str | None = None,
73
101
  client: NeatClient | None = None,
102
+ create_issues: Sequence[NeatIssue] | None = None,
74
103
  unquote_external_ids: bool = False,
75
104
  ):
76
105
  super().__init__(graph_store)
77
- self.data_model = data_model
78
- self.instance_space = instance_space
79
- self.class_neat_id_by_view_id = class_neat_id_by_view_id or {}
106
+ self.dms_rules = dms_rules
107
+ self.info_rules = info_rules
108
+ self._instance_space = instance_space
109
+ self._space_property = space_property
110
+ self._space_by_uri: dict[str, str] = defaultdict(lambda: instance_space)
80
111
  self._issues = IssueList(create_issues or [])
81
- self._tracker: type[Tracker] = tracker or LogTracker
82
- self.rules = rules
83
112
  self._client = client
84
113
  self._unquote_external_ids = unquote_external_ids
85
114
 
86
- @classmethod
87
- def from_data_model_id(
88
- cls,
89
- client: NeatClient,
90
- data_model_id: dm.DataModelId,
91
- graph_store: NeatGraphStore,
92
- instance_space: str,
93
- ) -> "DMSLoader":
94
- issues: list[NeatIssue] = []
95
- data_model: dm.DataModel[dm.View] | None = None
96
- try:
97
- data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True).latest_version()
98
- except Exception as e:
99
- issues.append(ResourceRetrievalError(data_model_id, "data model", str(e)))
100
-
101
- return cls(graph_store, data_model, instance_space, {}, issues, client=client)
102
-
103
- @classmethod
104
- def from_rules(
105
- cls,
106
- rules: DMSRules,
107
- graph_store: NeatGraphStore,
108
- instance_space: str,
109
- client: NeatClient | None = None,
110
- unquote_external_ids: bool = False,
111
- ) -> "DMSLoader":
112
- issues: list[NeatIssue] = []
113
- data_model: dm.DataModel[dm.View] | None = None
114
- try:
115
- data_model = rules.as_schema().as_read_model()
116
- except Exception as e:
117
- issues.append(
118
- ResourceConversionError(
119
- identifier=rules.metadata.as_identifier(),
120
- resource_type="DMS Rules",
121
- target_format="read DMS model",
122
- reason=str(e),
123
- )
124
- )
125
-
126
- class_neat_id_by_view_id = {view.view.as_id(): view.logical for view in rules.views if view.logical}
127
-
128
- return cls(
129
- graph_store,
130
- data_model,
131
- instance_space,
132
- class_neat_id_by_view_id,
133
- issues,
134
- rules=rules,
135
- client=client,
136
- unquote_external_ids=unquote_external_ids,
137
- )
138
-
139
- def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
140
- if self._issues.has_errors and stop_on_exception:
141
- raise self._issues.as_exception()
142
- elif self._issues.has_errors:
143
- yield from self._issues
144
- return
145
- if not self.data_model:
146
- # There should already be an error in this case.
147
- return
148
-
149
- views_with_linked_properties = (
150
- DMSAnalysis(self.rules).views_with_properties_linked_to_classes(consider_inheritance=True)
151
- if self.rules and self.rules.metadata.logical
152
- else None
153
- )
154
-
155
- view_and_count_by_id = self._select_views_with_instances(self.data_model.views)
156
-
157
- if self._client:
158
- view_and_count_by_id, properties_point_to_self = self._sort_by_direct_relation_dependencies(
159
- view_and_count_by_id
160
- )
161
- else:
162
- properties_point_to_self = {}
163
-
164
- view_ids: list[str] = []
165
- for view_id in view_and_count_by_id.keys():
166
- view_ids.append(repr(view_id))
167
- if view_id in properties_point_to_self:
168
- # If the views have a dependency on themselves, we need to run it twice.
169
- view_ids.append(f"{view_id!r} (self)")
170
-
171
- tracker = self._tracker(type(self).__name__, view_ids, "views")
172
- for view_id, (view, instance_count) in view_and_count_by_id.items():
173
- pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
174
- yield from issues
175
- tracker.issue(issues)
176
-
177
- if view_id in properties_point_to_self:
178
- # If the view has a dependency on itself, we need to run it twice.
179
- # First, to ensure that all nodes are created, and then to add the direct relations.
180
- # This only applies if there is a require constraint on the container, if not
181
- # we can create an empty node on the fly.
182
- iterations = [properties_point_to_self[view_id], set()]
183
- else:
184
- iterations = [set()]
185
-
186
- for skip_properties in iterations:
187
- if skip_properties:
188
- track_id = f"{view_id} (self)"
189
- else:
190
- track_id = repr(view_id)
191
- tracker.start(track_id)
192
- if views_with_linked_properties:
193
- # we need graceful exit if the view is not in the view_property_pairs
194
- property_link_pairs = views_with_linked_properties.get(ViewEntity.from_id(view_id))
195
-
196
- if class_neat_id := self.class_neat_id_by_view_id.get(view_id):
197
- reader = self.graph_store._read_via_rules_linkage(class_neat_id, property_link_pairs)
198
- else:
199
- error_view = ResourceRetrievalError(view_id, "view", "View not linked to class")
200
- tracker.issue(error_view)
201
- if stop_on_exception:
202
- raise error_view
203
- yield error_view
204
- continue
205
- else:
206
- # this assumes no changes in the suffix of view and class
207
- reader = self.graph_store.read(view.external_id)
208
-
209
- instance_iterable = iterate_progress_bar_if_above_config_threshold(
210
- reader, instance_count, f"Loading {track_id}"
211
- )
212
-
213
- for identifier, properties in instance_iterable:
214
- start_node, end_node = self._pop_start_end_node(properties)
215
- is_edge = start_node and end_node
216
- if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
217
- instance_type = "edge" if is_edge else "node"
218
- creation_error = ResourceCreationError(
219
- identifier,
220
- instance_type,
221
- error=f"{instance_type.capitalize()} found in {view.used_for} view",
222
- )
223
- tracker.issue(creation_error)
224
- if stop_on_exception:
225
- raise creation_error
226
- yield creation_error
227
- continue
228
-
229
- if skip_properties:
230
- properties = {k: v for k, v in properties.items() if k not in skip_properties}
231
-
232
- if start_node and end_node:
233
- # Is an edge
234
- try:
235
- yield self._create_edge_with_properties(
236
- identifier, properties, start_node, end_node, pydantic_cls, view_id
237
- )
238
- except ValueError as e:
239
- error_edge = ResourceCreationError(identifier, "edge", error=str(e))
240
- tracker.issue(error_edge)
241
- if stop_on_exception:
242
- raise error_edge from e
243
- yield error_edge
244
- else:
245
- try:
246
- yield self._create_node(identifier, properties, pydantic_cls, view_id)
247
- except ValueError as e:
248
- error_node = ResourceCreationError(identifier, "node", error=str(e))
249
- tracker.issue(error_node)
250
- if stop_on_exception:
251
- raise error_node from e
252
- yield error_node
253
- yield from self._create_edges_without_properties(
254
- identifier, properties, edge_by_type, edge_by_prop_id, tracker
255
- )
256
- tracker.finish(track_id)
257
- yield _END_OF_CLASS
258
-
259
- @staticmethod
260
- def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
261
- start_node = properties.pop("startNode", [None])[0]
262
- if not start_node:
263
- start_node = properties.pop("start_node", [None])[0]
264
- end_node = properties.pop("endNode", [None])[0]
265
- if not end_node:
266
- end_node = properties.pop("end_node", [None])[0]
267
- return start_node, end_node
268
-
269
115
  def write_to_file(self, filepath: Path) -> None:
270
116
  if filepath.suffix not in [".json", ".yaml", ".yml"]:
271
117
  raise ValueError(f"File format {filepath.suffix} is not supported")
@@ -286,76 +132,124 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
286
132
  else:
287
133
  yaml.safe_dump(dumped, f, sort_keys=False)
288
134
 
289
- def _select_views_with_instances(self, views: list[dm.View]) -> dict[dm.ViewId, tuple[dm.View, int]]:
290
- """Selects the views with data."""
291
- view_and_count_by_id: dict[dm.ViewId, tuple[dm.View, int]] = {}
292
- uri_by_type: dict[str, URIRef] = {
293
- remove_namespace_from_uri(uri[0]): uri[0] # type: ignore[misc]
294
- for uri in self.graph_store.queries.list_types()
295
- }
296
- for view in views:
297
- view_id = view.as_id()
298
- neat_id = self.class_neat_id_by_view_id.get(view_id)
299
- if neat_id is not None:
300
- count = self.graph_store.count_of_id(neat_id)
301
- elif view.external_id in uri_by_type:
302
- count = self.graph_store.count_of_type(uri_by_type[view.external_id])
303
- else:
135
+ def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
136
+ if self._issues.has_errors and stop_on_exception:
137
+ raise self._issues.as_exception()
138
+ elif self._issues.has_errors:
139
+ yield from self._issues
140
+ return
141
+ view_iterations, issues = self._create_view_iterations()
142
+ yield from issues
143
+ if self._space_property:
144
+ yield from self._lookup_space_by_uri(view_iterations, stop_on_exception)
145
+
146
+ for it in view_iterations:
147
+ view = it.view
148
+ if view is None:
149
+ yield ResourceNotFoundError(it.view_id, "view", more=f"Skipping {it.instance_count} instances...")
150
+ continue
151
+ projection, issues = self._create_projection(view)
152
+ yield from issues
153
+ query = it.query
154
+ reader = self.graph_store.read(query.rdf_type, property_renaming_config=query.property_renaming_config)
155
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
156
+ reader, it.instance_count, f"Loading {it.view_id!r}"
157
+ )
158
+ for identifier, properties in instance_iterable:
159
+ yield from self._create_instances(
160
+ identifier, properties, projection, stop_on_exception, exclude=it.hierarchical_properties
161
+ )
162
+ if it.hierarchical_properties:
163
+ # Force the creation of instances, before we create the hierarchical properties.
164
+ yield _END_OF_CLASS
165
+ yield from self._create_hierarchical_properties(it, projection, stop_on_exception)
166
+
167
+ yield _END_OF_CLASS
168
+
169
+ def _create_hierarchical_properties(
170
+ self, it: _ViewIterator, projection: _Projection, stop_on_exception: bool
171
+ ) -> Iterable[dm.InstanceApply | NeatIssue]:
172
+ reader = self.graph_store.read(it.query.rdf_type, property_renaming_config=it.query.property_renaming_config)
173
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
174
+ reader,
175
+ it.instance_count,
176
+ f"Loading {it.view_id!r} hierarchical properties: {humanize_collection(it.hierarchical_properties)}",
177
+ )
178
+ for identifier, properties in instance_iterable:
179
+ yield from self._create_instances(
180
+ identifier, properties, projection, stop_on_exception, include=it.hierarchical_properties
181
+ )
182
+
183
+ def _create_view_iterations(self) -> tuple[list[_ViewIterator], IssueList]:
184
+ view_query_by_id = RulesAnalysis(self.info_rules, self.dms_rules).view_query_by_id
185
+ iterations_by_view_id = self._select_views_with_instances(view_query_by_id)
186
+ if self._client:
187
+ issues = IssueList()
188
+ views = self._client.data_modeling.views.retrieve(
189
+ list(iterations_by_view_id.keys()), include_inherited_properties=True
190
+ )
191
+ else:
192
+ views = dm.ViewList([])
193
+ with catch_issues() as issues:
194
+ read_model = self.dms_rules.as_schema().as_read_model()
195
+ views.extend(read_model.views)
196
+ if issues.has_errors:
197
+ return [], issues
198
+ views_by_id = {view.as_id(): view for view in views}
199
+ hierarchical_properties_by_view_id = SchemaAPI.get_hierarchical_properties(views)
200
+
201
+ def sort_by_instance_type(id_: dm.ViewId) -> int:
202
+ if id_ not in views_by_id:
203
+ return 0
204
+ return {"node": 1, "all": 2, "edge": 3}.get(views_by_id[id_].used_for, 0)
205
+
206
+ ordered_view_ids = sorted(iterations_by_view_id.keys(), key=sort_by_instance_type)
207
+ view_iterations: list[_ViewIterator] = []
208
+ for view_id in ordered_view_ids:
209
+ if view_id not in iterations_by_view_id:
304
210
  continue
211
+ view_iteration = iterations_by_view_id[view_id]
212
+ view_iteration.view = views_by_id.get(view_id)
213
+ view_iteration.hierarchical_properties = hierarchical_properties_by_view_id.get(view_id, set())
214
+ view_iterations.append(view_iteration)
215
+ return view_iterations, issues
216
+
217
+ def _select_views_with_instances(self, view_query_by_id: ViewQueryDict) -> dict[dm.ViewId, _ViewIterator]:
218
+ """Selects the views with data."""
219
+ view_iterations: dict[dm.ViewId, _ViewIterator] = {}
220
+ for view_id, query in view_query_by_id.items():
221
+ count = self.graph_store.queries.count_of_type(query.rdf_type)
305
222
  if count > 0:
306
- view_and_count_by_id[view_id] = view, count
307
-
308
- return view_and_count_by_id
309
-
310
- def _sort_by_direct_relation_dependencies(
311
- self, view_and_count_by_id: dict[dm.ViewId, tuple[dm.View, int]]
312
- ) -> tuple[dict[dm.ViewId, tuple[dm.View, int]], dict[dm.ViewId, set[str]]]:
313
- """Sorts the views by container constraints."""
314
- if not self._client:
315
- return view_and_count_by_id, {}
316
- # We need to retrieve the views to ensure we get all properties, such that we can find all
317
- # the containers that the view is linked to.
318
- views = self._client.data_modeling.views.retrieve(
319
- list(view_and_count_by_id.keys()), include_inherited_properties=True
223
+ view_iterations[view_id] = _ViewIterator(view_id, count, set(), query)
224
+ return view_iterations
225
+
226
+ def _lookup_space_by_uri(self, view_iterations: list[_ViewIterator], stop_on_exception: bool = False) -> IssueList:
227
+ issues = IssueList()
228
+ if self._space_property is None:
229
+ return issues
230
+ total = sum(it.instance_count for it in view_iterations)
231
+ properties_by_uriref = self.graph_store.queries.properties()
232
+ space_property_uri = next((k for k, v in properties_by_uriref.items() if v == self._space_property), None)
233
+ if space_property_uri is None:
234
+ error: ResourceNotFoundError[str, str] = ResourceNotFoundError(
235
+ self._space_property,
236
+ "property",
237
+ more=f"Could not find the {self._space_property} in the graph.",
238
+ )
239
+ if stop_on_exception:
240
+ raise error
241
+ issues.append(error)
242
+ return issues
243
+
244
+ instance_iterable = self.graph_store.queries.list_instances_ids_by_space(space_property_uri)
245
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
246
+ instance_iterable, total, f"Looking up spaces for {total} instances..."
320
247
  )
321
- container_ids_by_view_id = {view.as_id(): view.referenced_containers() for view in views}
322
- referenced_containers = {
323
- container for containers in container_ids_by_view_id.values() for container in containers
324
- }
325
- containers = self._client.data_modeling.containers.retrieve(list(referenced_containers))
326
- container_by_id = {container.as_id(): container for container in containers}
327
-
328
- dependency_on_self: dict[dm.ViewId, set[str]] = defaultdict(set)
329
- view_id_by_dependencies: dict[dm.ViewId, set[dm.ViewId]] = {}
330
- for view in views:
331
- view_id = view.as_id()
332
- dependencies = set()
333
- for prop_id, prop in view.properties.items():
334
- if isinstance(prop, dm.MappedProperty) and isinstance(prop.type, dm.DirectRelation) and prop.source:
335
- container = container_by_id[prop.container]
336
- has_require_constraint = any(
337
- isinstance(constraint, dm.RequiresConstraint) for constraint in container.constraints.values()
338
- )
339
- if has_require_constraint and prop.source == view_id:
340
- dependency_on_self[view_id].add(prop_id)
341
- elif has_require_constraint:
342
- dependencies.add(prop.source)
343
- view_id_by_dependencies[view_id] = dependencies
344
-
345
- ordered_view_ids = TopologicalSorter(view_id_by_dependencies).static_order()
346
-
347
- return {
348
- view_id: view_and_count_by_id[view_id] for view_id in ordered_view_ids if view_id in view_and_count_by_id
349
- }, dict(dependency_on_self)
350
-
351
- def _create_validation_classes(
352
- self, view: dm.View
353
- ) -> tuple[
354
- type[BaseModel],
355
- dict[str, tuple[str, dm.EdgeConnection]],
356
- dict[str, tuple[str, dm.EdgeConnection]],
357
- NeatIssueList,
358
- ]:
248
+ for instance, space in instance_iterable:
249
+ self._space_by_uri[remove_namespace_from_uri(instance)] = space
250
+ return issues
251
+
252
+ def _create_projection(self, view: dm.View) -> tuple[_Projection, IssueList]:
359
253
  issues = IssueList()
360
254
  field_definitions: dict[str, tuple[type, Any]] = {}
361
255
  edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
@@ -414,9 +308,15 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
414
308
 
415
309
  def parse_list(cls, value: Any, info: ValidationInfo) -> list[str]:
416
310
  if isinstance(value, list) and list.__name__ not in _get_field_value_types(cls, info):
417
- if len(value) == 1:
418
- return value[0]
419
- raise ValueError(f"Got multiple values for {info.field_name}: {value}")
311
+ if len(value) > 1:
312
+ warnings.warn(
313
+ # the identifier is unknown, it will be cest in the create_instances method
314
+ PropertyMultipleValueWarning("", "property", str(info.field_name), value=str(value[0])),
315
+ stacklevel=2,
316
+ )
317
+ elif not value:
318
+ return None # type: ignore[return-value]
319
+ return value[0]
420
320
 
421
321
  return value
422
322
 
@@ -446,7 +346,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
446
346
  def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
447
347
  # We validate above that we only get one value for single direct relations.
448
348
  if list.__name__ in _get_field_value_types(cls, info):
449
- result = [{"space": self.instance_space, "externalId": remove_namespace_from_uri(v)} for v in value]
349
+ external_ids = (remove_namespace_from_uri(v) for v in value)
350
+ result = [{"space": self._space_by_uri[e], "externalId": e} for e in external_ids]
351
+ # Todo: Account for max_list_limit
450
352
  if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
451
353
  return result
452
354
  warnings.warn(
@@ -462,7 +364,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
462
364
  result.sort(key=lambda x: (x["space"], x["externalId"]))
463
365
  return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
464
366
  elif value:
465
- return {"space": self.instance_space, "externalId": remove_namespace_from_uri(value[0])}
367
+ external_id = remove_namespace_from_uri(value[0])
368
+ return {"space": self._space_by_uri[external_id], "externalId": external_id}
466
369
  return {}
467
370
 
468
371
  validators["parse_direct_relation"] = field_validator(*direct_relation_by_property.keys(), mode="before")( # type: ignore[assignment]
@@ -481,70 +384,91 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
481
384
  )
482
385
 
483
386
  pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
484
- return pydantic_cls, edge_by_type, edge_by_prop_id, issues
485
-
486
- def _create_node(
487
- self,
488
- identifier: str,
489
- properties: dict[str | InstanceType, list[str]],
490
- pydantic_cls: type[BaseModel],
491
- view_id: dm.ViewId,
492
- ) -> dm.InstanceApply:
493
- type_ = properties.pop(RDF.type, [None])[0]
494
- created = pydantic_cls.model_validate(properties)
495
- if self._unquote_external_ids:
496
- identifier = urllib.parse.unquote(identifier)
497
-
498
- return dm.NodeApply(
499
- space=self.instance_space,
500
- external_id=identifier,
501
- type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
502
- sources=[
503
- dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
504
- ],
505
- )
387
+ return _Projection(view.as_id(), view.used_for, pydantic_cls, edge_by_type, edge_by_prop_id), issues
506
388
 
507
- def _create_edge_with_properties(
389
+ def _create_instances(
508
390
  self,
509
391
  identifier: str,
510
392
  properties: dict[str | InstanceType, list[str]],
511
- start_node: str,
512
- end_node: str,
513
- pydantic_cls: type[BaseModel],
514
- view_id: dm.ViewId,
515
- ) -> dm.EdgeApply:
516
- type_ = properties.pop(RDF.type, [None])[0]
517
- created = pydantic_cls.model_validate(properties)
518
- if type_ is None:
519
- raise ValueError(f"Missing type for edge {identifier}")
520
-
393
+ projection: _Projection,
394
+ stop_on_exception: bool = False,
395
+ exclude: set[str] | None = None,
396
+ include: set[str] | None = None,
397
+ ) -> Iterable[dm.InstanceApply | NeatIssue]:
521
398
  if self._unquote_external_ids:
522
399
  identifier = urllib.parse.unquote(identifier)
400
+ start_node, end_node = self._pop_start_end_node(properties)
401
+ is_edge = start_node and end_node
402
+ instance_type = "edge" if is_edge else "node"
403
+ if (projection.used_for == "node" and is_edge) or (projection.used_for == "edge" and not is_edge):
404
+ creation_error = ResourceCreationError(
405
+ identifier,
406
+ instance_type,
407
+ f"View used for {projection.used_for} instance {identifier!s} but is {instance_type}",
408
+ )
409
+ if stop_on_exception:
410
+ raise creation_error from None
411
+ yield creation_error
412
+ return
523
413
 
524
- return dm.EdgeApply(
525
- space=self.instance_space,
526
- external_id=identifier,
527
- type=dm.DirectRelationReference(view_id.space, view_id.external_id),
528
- start_node=dm.DirectRelationReference(self.instance_space, start_node),
529
- end_node=dm.DirectRelationReference(self.instance_space, end_node),
530
- sources=[
531
- dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
532
- ],
533
- )
414
+ if RDF.type not in properties:
415
+ error = ResourceCreationError(identifier, instance_type, "No rdf:type found")
416
+ if stop_on_exception:
417
+ raise error from None
418
+ yield error
419
+ return
420
+ _ = properties.pop(RDF.type)[0]
421
+ if start_node and self._unquote_external_ids:
422
+ start_node = urllib.parse.unquote(start_node)
423
+ if end_node and self._unquote_external_ids:
424
+ end_node = urllib.parse.unquote(end_node)
425
+
426
+ if exclude:
427
+ properties = {k: v for k, v in properties.items() if k not in exclude}
428
+ if include:
429
+ properties = {k: v for k, v in properties.items() if k in include}
430
+
431
+ with catch_issues() as property_issues:
432
+ sources = [
433
+ dm.NodeOrEdgeData(
434
+ projection.view_id,
435
+ projection.pydantic_cls.model_validate(properties).model_dump(exclude_unset=True),
436
+ )
437
+ ]
438
+ for issue in property_issues:
439
+ if isinstance(issue, ResourceNeatWarning):
440
+ issue.identifier = identifier
441
+
442
+ if property_issues.has_errors and stop_on_exception:
443
+ raise property_issues.as_exception()
444
+ yield from property_issues
445
+
446
+ if start_node and end_node:
447
+ yield dm.EdgeApply(
448
+ space=self._space_by_uri[identifier],
449
+ external_id=identifier,
450
+ type=(projection.view_id.space, projection.view_id.external_id),
451
+ start_node=(self._space_by_uri[start_node], start_node),
452
+ end_node=(self._space_by_uri[end_node], end_node),
453
+ sources=sources,
454
+ )
455
+ else:
456
+ yield dm.NodeApply(
457
+ space=self._space_by_uri[identifier],
458
+ external_id=identifier,
459
+ type=(projection.view_id.space, projection.view_id.external_id),
460
+ sources=sources,
461
+ )
462
+ yield from self._create_edges_without_properties(identifier, properties, projection)
534
463
 
535
464
  def _create_edges_without_properties(
536
- self,
537
- identifier: str,
538
- properties: dict[str, list[str]],
539
- edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
540
- edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
541
- tracker: Tracker,
465
+ self, identifier: str, properties: dict[str | InstanceType, list[str]], projection: _Projection
542
466
  ) -> Iterable[dm.EdgeApply | NeatIssue]:
543
467
  for predicate, values in properties.items():
544
- if predicate in edge_by_type:
545
- prop_id, edge = edge_by_type[predicate]
546
- elif predicate in edge_by_prop_id:
547
- prop_id, edge = edge_by_prop_id[predicate]
468
+ if predicate in projection.edge_by_type:
469
+ prop_id, edge = projection.edge_by_type[predicate]
470
+ elif predicate in projection.edge_by_prop_id:
471
+ prop_id, edge = projection.edge_by_prop_id[predicate]
548
472
  else:
549
473
  continue
550
474
  if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
@@ -553,21 +477,36 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
553
477
  identifier=identifier,
554
478
  location=f"Multiple values for single edge {edge}. Expected only one.",
555
479
  )
556
- tracker.issue(error)
557
480
  yield error
481
+ continue
558
482
  for target in values:
559
483
  external_id = f"{identifier}.{prop_id}.{target}"
560
- if self._unquote_external_ids:
561
- external_id = urllib.parse.unquote(external_id)
562
-
484
+ start_node, end_node = (
485
+ (self._space_by_uri[identifier], identifier),
486
+ (self._space_by_uri[target], target),
487
+ )
488
+ if edge.direction == "inwards":
489
+ start_node, end_node = end_node, start_node
563
490
  yield dm.EdgeApply(
564
- space=self.instance_space,
491
+ space=self._space_by_uri[identifier],
565
492
  external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
566
493
  type=edge.type,
567
- start_node=dm.DirectRelationReference(self.instance_space, identifier),
568
- end_node=dm.DirectRelationReference(self.instance_space, target),
494
+ start_node=start_node,
495
+ end_node=end_node,
569
496
  )
570
497
 
498
+ @staticmethod
499
+ def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str, str] | tuple[None, None]:
500
+ start_node = properties.pop("startNode", [None])[0]
501
+ if not start_node:
502
+ start_node = properties.pop("start_node", [None])[0]
503
+ end_node = properties.pop("endNode", [None])[0]
504
+ if not end_node:
505
+ end_node = properties.pop("end_node", [None])[0]
506
+ if start_node and end_node:
507
+ return start_node, end_node
508
+ return None, None
509
+
571
510
  def _get_required_capabilities(self) -> list[Capability]:
572
511
  return [
573
512
  DataModelInstancesAcl(
@@ -576,7 +515,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
576
515
  DataModelInstancesAcl.Action.Write_Properties,
577
516
  DataModelInstancesAcl.Action.Read,
578
517
  ],
579
- scope=DataModelInstancesAcl.Scope.SpaceID([self.instance_space]),
518
+ scope=DataModelInstancesAcl.Scope.SpaceID([self._instance_space]),
580
519
  )
581
520
  ]
582
521