cognite-neat 0.98.0__py3-none-any.whl → 0.99.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (103) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +585 -0
  3. cognite/neat/_client/_api/schema.py +111 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/_client/data_classes/schema.py +495 -0
  8. cognite/neat/_constants.py +27 -4
  9. cognite/neat/_graph/_shared.py +14 -15
  10. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  11. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  12. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +25 -14
  13. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  14. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  15. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  16. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  17. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  18. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  19. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  20. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  21. cognite/neat/_graph/loaders/_rdf2dms.py +2 -2
  22. cognite/neat/_graph/queries/_base.py +17 -1
  23. cognite/neat/_graph/transformers/_classic_cdf.py +74 -147
  24. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  25. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  26. cognite/neat/_issues/_base.py +26 -17
  27. cognite/neat/_issues/errors/__init__.py +4 -2
  28. cognite/neat/_issues/errors/_external.py +7 -0
  29. cognite/neat/_issues/errors/_properties.py +2 -7
  30. cognite/neat/_issues/errors/_resources.py +1 -1
  31. cognite/neat/_issues/warnings/__init__.py +8 -0
  32. cognite/neat/_issues/warnings/_external.py +16 -0
  33. cognite/neat/_issues/warnings/_properties.py +16 -0
  34. cognite/neat/_issues/warnings/_resources.py +26 -2
  35. cognite/neat/_issues/warnings/user_modeling.py +4 -4
  36. cognite/neat/_rules/_constants.py +8 -11
  37. cognite/neat/_rules/analysis/_base.py +8 -4
  38. cognite/neat/_rules/exporters/_base.py +3 -4
  39. cognite/neat/_rules/exporters/_rules2dms.py +33 -46
  40. cognite/neat/_rules/importers/__init__.py +1 -3
  41. cognite/neat/_rules/importers/_base.py +1 -1
  42. cognite/neat/_rules/importers/_dms2rules.py +6 -29
  43. cognite/neat/_rules/importers/_rdf/__init__.py +5 -0
  44. cognite/neat/_rules/importers/_rdf/_base.py +34 -11
  45. cognite/neat/_rules/importers/_rdf/_imf2rules.py +91 -0
  46. cognite/neat/_rules/importers/_rdf/_inference2rules.py +43 -35
  47. cognite/neat/_rules/importers/_rdf/_owl2rules.py +80 -0
  48. cognite/neat/_rules/importers/_rdf/_shared.py +138 -441
  49. cognite/neat/_rules/models/__init__.py +1 -1
  50. cognite/neat/_rules/models/_base_rules.py +22 -12
  51. cognite/neat/_rules/models/dms/__init__.py +4 -2
  52. cognite/neat/_rules/models/dms/_exporter.py +45 -48
  53. cognite/neat/_rules/models/dms/_rules.py +20 -17
  54. cognite/neat/_rules/models/dms/_rules_input.py +52 -8
  55. cognite/neat/_rules/models/dms/_validation.py +391 -119
  56. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  57. cognite/neat/_rules/models/information/__init__.py +2 -0
  58. cognite/neat/_rules/models/information/_rules.py +0 -67
  59. cognite/neat/_rules/models/information/_validation.py +9 -9
  60. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  61. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  62. cognite/neat/_rules/models/mapping/_classic2core.yaml +343 -0
  63. cognite/neat/_rules/transformers/__init__.py +2 -2
  64. cognite/neat/_rules/transformers/_converters.py +110 -11
  65. cognite/neat/_rules/transformers/_mapping.py +105 -30
  66. cognite/neat/_rules/transformers/_pipelines.py +1 -1
  67. cognite/neat/_rules/transformers/_verification.py +31 -3
  68. cognite/neat/_session/_base.py +24 -8
  69. cognite/neat/_session/_drop.py +35 -0
  70. cognite/neat/_session/_inspect.py +17 -5
  71. cognite/neat/_session/_mapping.py +39 -0
  72. cognite/neat/_session/_prepare.py +219 -23
  73. cognite/neat/_session/_read.py +49 -12
  74. cognite/neat/_session/_to.py +8 -5
  75. cognite/neat/_session/exceptions.py +4 -0
  76. cognite/neat/_store/_base.py +27 -24
  77. cognite/neat/_utils/rdf_.py +34 -5
  78. cognite/neat/_version.py +1 -1
  79. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +5 -88
  80. cognite/neat/_workflows/steps/lib/current/rules_importer.py +3 -14
  81. cognite/neat/_workflows/steps/lib/current/rules_validator.py +6 -7
  82. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/METADATA +3 -3
  83. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/RECORD +87 -92
  84. cognite/neat/_rules/importers/_rdf/_imf2rules/__init__.py +0 -3
  85. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +0 -86
  86. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +0 -29
  87. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +0 -130
  88. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2rules.py +0 -154
  89. cognite/neat/_rules/importers/_rdf/_owl2rules/__init__.py +0 -3
  90. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +0 -58
  91. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +0 -65
  92. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +0 -59
  93. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2rules.py +0 -39
  94. cognite/neat/_rules/models/dms/_schema.py +0 -1101
  95. cognite/neat/_rules/models/mapping/_base.py +0 -131
  96. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  97. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  98. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  99. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  100. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  101. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/LICENSE +0 -0
  102. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/WHEEL +0 -0
  103. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/entry_points.txt +0 -0
@@ -1,1101 +0,0 @@
1
- import json
2
- import sys
3
- import warnings
4
- import zipfile
5
- from collections import ChainMap, Counter, defaultdict
6
- from collections.abc import Iterable, MutableMapping
7
- from dataclasses import Field, dataclass, field, fields
8
- from pathlib import Path
9
- from typing import Any, ClassVar, Literal, cast
10
-
11
- import yaml
12
- from cognite.client import CogniteClient
13
- from cognite.client import data_modeling as dm
14
- from cognite.client.data_classes import DatabaseWrite, DatabaseWriteList, TransformationWrite, TransformationWriteList
15
- from cognite.client.data_classes.data_modeling import ViewApply
16
- from cognite.client.data_classes.data_modeling.views import (
17
- ReverseDirectRelation,
18
- ReverseDirectRelationApply,
19
- SingleEdgeConnection,
20
- SingleEdgeConnectionApply,
21
- SingleReverseDirectRelation,
22
- SingleReverseDirectRelationApply,
23
- ViewProperty,
24
- ViewPropertyApply,
25
- )
26
- from cognite.client.data_classes.transformations.common import Edges, EdgeType, Nodes, ViewInfo
27
-
28
- from cognite.neat._issues import NeatError
29
- from cognite.neat._issues.errors import (
30
- NeatYamlError,
31
- PropertyMappingDuplicatedError,
32
- PropertyNotFoundError,
33
- ResourceDuplicatedError,
34
- ResourceNotFoundError,
35
- )
36
- from cognite.neat._issues.warnings import (
37
- FileTypeUnexpectedWarning,
38
- ResourceNotFoundWarning,
39
- ResourceRetrievalWarning,
40
- ResourcesDuplicatedWarning,
41
- )
42
- from cognite.neat._issues.warnings.user_modeling import DirectRelationMissingSourceWarning
43
- from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
44
- from cognite.neat._utils.cdf.data_classes import (
45
- CogniteResourceDict,
46
- ContainerApplyDict,
47
- NodeApplyDict,
48
- RawTableWrite,
49
- RawTableWriteList,
50
- SpaceApplyDict,
51
- ViewApplyDict,
52
- )
53
- from cognite.neat._utils.cdf.loaders import ViewLoader
54
- from cognite.neat._utils.rdf_ import get_inheritance_path
55
- from cognite.neat._utils.text import to_camel
56
-
57
- if sys.version_info >= (3, 11):
58
- from typing import Self
59
- else:
60
- from typing_extensions import Self
61
-
62
-
63
- @dataclass
64
- class DMSSchema:
65
- data_model: dm.DataModelApply | None = None
66
- spaces: SpaceApplyDict = field(default_factory=SpaceApplyDict)
67
- views: ViewApplyDict = field(default_factory=ViewApplyDict)
68
- containers: ContainerApplyDict = field(default_factory=ContainerApplyDict)
69
- node_types: NodeApplyDict = field(default_factory=NodeApplyDict)
70
- # The last schema is the previous version of the data model. In the case, extension=addition, this
71
- # should not be modified.
72
- last: "DMSSchema | None" = None
73
- # Reference is typically the Enterprise model, while this is the solution model.
74
- reference: "DMSSchema | None" = None
75
-
76
- _FIELD_NAME_BY_RESOURCE_TYPE: ClassVar[dict[str, str]] = {
77
- "container": "containers",
78
- "view": "views",
79
- "datamodel": "data_model",
80
- "space": "spaces",
81
- "node": "node_types",
82
- }
83
-
84
- def _get_mapped_container_from_view(self, view_id: dm.ViewId) -> set[dm.ContainerId]:
85
- # index all views, including ones from reference
86
- view_by_id = self.views.copy()
87
- if self.reference:
88
- view_by_id.update(self.reference.views)
89
-
90
- if view_id not in view_by_id:
91
- raise ValueError(f"View {view_id} not found")
92
-
93
- indexed_implemented_views = {id_: view.implements for id_, view in view_by_id.items()}
94
- view_inheritance = get_inheritance_path(view_id, indexed_implemented_views)
95
-
96
- directly_referenced_containers = view_by_id[view_id].referenced_containers()
97
- inherited_referenced_containers = set()
98
-
99
- for parent_id in view_inheritance:
100
- if implemented_view := view_by_id.get(parent_id):
101
- inherited_referenced_containers |= implemented_view.referenced_containers()
102
- else:
103
- raise ResourceNotFoundError(parent_id, "view", view_id, "view")
104
-
105
- return directly_referenced_containers | inherited_referenced_containers
106
-
107
- @classmethod
108
- def from_model_id(cls, client: CogniteClient, data_model_id: dm.DataModelIdentifier) -> "DMSSchema":
109
- data_models = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
110
- if len(data_models) == 0:
111
- raise ValueError(f"Data model {data_model_id} not found")
112
- data_model = data_models.latest_version()
113
- return cls.from_data_model(client, data_model)
114
-
115
- @classmethod
116
- def from_data_model(
117
- cls,
118
- client: CogniteClient,
119
- data_model: dm.DataModel[dm.View],
120
- reference_model: dm.DataModel[dm.View] | None = None,
121
- ) -> "DMSSchema":
122
- """Create a schema from a data model.
123
-
124
- If a reference model is provided, the schema will include a reference schema. To determine which views,
125
- and containers to put in the reference schema, the following rule is applied:
126
-
127
- If a view or container space is different from the data model space,
128
- it will be included in the reference schema.*
129
-
130
- *One exception to this rule is if a view is directly referenced by the data model. In this case, the view will
131
- be included in the data model schema, even if the space is different.
132
-
133
- Args:
134
- client: The Cognite client used for retrieving components referenced by the data model.
135
- data_model: The data model to create the schema from.
136
- reference_model: (Optional) The reference model to include in the schema.
137
- This is typically the Enterprise model.
138
-
139
- Returns:
140
- DMSSchema: The schema created from the data model.
141
- """
142
- views = dm.ViewList(data_model.views)
143
-
144
- data_model_write = data_model.as_write()
145
- data_model_write.views = list(views.as_ids())
146
-
147
- if reference_model:
148
- views.extend(reference_model.views)
149
-
150
- container_ids = views.referenced_containers()
151
- containers = client.data_modeling.containers.retrieve(list(container_ids))
152
- cls._append_referenced_containers(client, containers)
153
-
154
- space_ids = [data_model.space, reference_model.space] if reference_model else [data_model.space]
155
- space_read = client.data_modeling.spaces.retrieve(space_ids)
156
- if len(space_read) != len(space_ids):
157
- raise ValueError(f"Space(s) {space_read} not found")
158
- space_write = space_read.as_write()
159
-
160
- view_loader = ViewLoader(client)
161
-
162
- existing_view_ids = set(views.as_ids())
163
-
164
- # We need to include all views the edges/direct relations are pointing to have a complete schema.
165
- connection_referenced_view_ids: set[dm.ViewId] = set()
166
- for view in views:
167
- connection_referenced_view_ids |= cls._connection_references(view)
168
- connection_referenced_view_ids = connection_referenced_view_ids - existing_view_ids
169
- if connection_referenced_view_ids:
170
- for view_id in connection_referenced_view_ids:
171
- warnings.warn(
172
- ResourceNotFoundWarning(view_id, "view", data_model_write.as_id(), "data model"),
173
- stacklevel=2,
174
- )
175
- connection_referenced_views = view_loader.retrieve(list(connection_referenced_view_ids))
176
- if failed := connection_referenced_view_ids - set(connection_referenced_views.as_ids()):
177
- warnings.warn(ResourceRetrievalWarning(frozenset(failed), "view"), stacklevel=2)
178
- views.extend(connection_referenced_views)
179
-
180
- # We need to include parent views in the schema to make sure that the schema is valid.
181
- parent_view_ids = {parent for view in views for parent in view.implements or []}
182
- parents = view_loader.retrieve_all_parents(list(parent_view_ids - existing_view_ids))
183
- views.extend([parent for parent in parents if parent.as_id() not in existing_view_ids])
184
-
185
- # Converting views from read to write format requires to account for parents (implements)
186
- # as the read format contains all properties from all parents, while the write formate should not contain
187
- # properties from any parents.
188
- # The ViewLoader as_write method looks up parents and remove properties from them.
189
- view_write = ViewApplyDict([view_loader.as_write(view) for view in views])
190
-
191
- container_write = ContainerApplyDict(containers.as_write())
192
- user_space = data_model.space
193
- if reference_model:
194
- user_model_view_ids = set(data_model_write.views)
195
- ref_model_write = reference_model.as_write()
196
- ref_model_write.views = [view.as_id() for view in reference_model.views]
197
-
198
- ref_views = ViewApplyDict(
199
- [
200
- view
201
- for view_id, view in view_write.items()
202
- if (view.space != user_space) or (view_id not in user_model_view_ids)
203
- ]
204
- )
205
- view_write = ViewApplyDict(
206
- [
207
- view
208
- for view_id, view in view_write.items()
209
- if view.space == user_space or view_id in user_model_view_ids
210
- ]
211
- )
212
-
213
- ref_containers = ContainerApplyDict(
214
- [container for container in container_write.values() if container.space != user_space]
215
- )
216
- container_write = ContainerApplyDict(
217
- [container for container in container_write.values() if container.space == user_space]
218
- )
219
-
220
- ref_schema: DMSSchema | None = cls(
221
- spaces=SpaceApplyDict([s for s in space_write if s.space != user_space]),
222
- data_model=ref_model_write,
223
- views=ref_views,
224
- containers=ref_containers,
225
- )
226
- else:
227
- ref_schema = None
228
- return cls(
229
- spaces=SpaceApplyDict([s for s in space_write if s.space == user_space]),
230
- data_model=data_model_write,
231
- views=view_write,
232
- containers=container_write,
233
- reference=ref_schema,
234
- )
235
-
236
- @classmethod
237
- def _connection_references(cls, view: dm.View) -> set[dm.ViewId]:
238
- view_ids: set[dm.ViewId] = set()
239
- for prop in (view.properties or {}).values():
240
- if isinstance(prop, dm.MappedProperty) and isinstance(prop.type, dm.DirectRelation):
241
- if prop.source:
242
- view_ids.add(prop.source)
243
- elif isinstance(prop, dm.EdgeConnection):
244
- view_ids.add(prop.source)
245
- if prop.edge_source:
246
- view_ids.add(prop.edge_source)
247
- elif isinstance(prop, ReverseDirectRelation):
248
- view_ids.add(prop.source)
249
- return view_ids
250
-
251
- @classmethod
252
- def from_directory(cls, directory: str | Path) -> Self:
253
- """Load a schema from a directory containing YAML files.
254
-
255
- The directory is expected to follow the Cognite-Toolkit convention
256
- where each file is named as `resource_type.resource_name.yaml`.
257
- """
258
- data, context = cls._read_directory(Path(directory))
259
- return cls.load(data, context)
260
-
261
- @classmethod
262
- def _read_directory(cls, directory: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
263
- data: dict[str, Any] = {}
264
- context: dict[str, list[Path]] = {}
265
- for yaml_file in directory.rglob("*.yaml"):
266
- if "." in yaml_file.stem:
267
- resource_type = yaml_file.stem.rsplit(".", 1)[-1]
268
- if attr_name := cls._FIELD_NAME_BY_RESOURCE_TYPE.get(resource_type):
269
- data.setdefault(attr_name, [])
270
- context.setdefault(attr_name, [])
271
- try:
272
- loaded = yaml.safe_load(yaml_file.read_text())
273
- except Exception as e:
274
- warnings.warn(
275
- FileTypeUnexpectedWarning(yaml_file, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
276
- )
277
- continue
278
-
279
- if isinstance(loaded, list):
280
- data[attr_name].extend(loaded)
281
- context[attr_name].extend([yaml_file] * len(loaded))
282
- else:
283
- data[attr_name].append(loaded)
284
- context[attr_name].append(yaml_file)
285
- return data, context
286
-
287
- def to_directory(
288
- self,
289
- directory: str | Path,
290
- exclude: set[str] | None = None,
291
- new_line: str | None = "\n",
292
- encoding: str | None = "utf-8",
293
- ) -> None:
294
- """Save the schema to a directory as YAML files. This is compatible with the Cognite-Toolkit convention.
295
-
296
- Args:
297
- directory (str | Path): The directory to save the schema to.
298
- exclude (set[str]): A set of attributes to exclude from the output.
299
- new_line (str): The line endings to use in the output files. Defaults to "\n".
300
- encoding (str): The encoding to use in the output files. Defaults to "utf-8".
301
- """
302
- path_dir = Path(directory)
303
- exclude_set = exclude or set()
304
- data_models = path_dir / "data_models"
305
- data_models.mkdir(parents=True, exist_ok=True)
306
- if "spaces" not in exclude_set:
307
- for space in self.spaces.values():
308
- (data_models / f"{space.space}.space.yaml").write_text(
309
- space.dump_yaml(), newline=new_line, encoding=encoding
310
- )
311
- if "data_models" not in exclude_set and self.data_model:
312
- (data_models / f"{self.data_model.external_id}.datamodel.yaml").write_text(
313
- self.data_model.dump_yaml(), newline=new_line, encoding=encoding
314
- )
315
- if "views" not in exclude_set and self.views:
316
- view_dir = data_models / "views"
317
- view_dir.mkdir(parents=True, exist_ok=True)
318
- for view in self.views.values():
319
- (view_dir / f"{view.external_id}.view.yaml").write_text(
320
- view.dump_yaml(), newline=new_line, encoding=encoding
321
- )
322
- if "containers" not in exclude_set and self.containers:
323
- container_dir = data_models / "containers"
324
- container_dir.mkdir(parents=True, exist_ok=True)
325
- for container in self.containers.values():
326
- (container_dir / f"{container.external_id}.container.yaml").write_text(
327
- container.dump_yaml(), newline=new_line, encoding=encoding
328
- )
329
- if "node_types" not in exclude_set and self.node_types:
330
- node_dir = data_models / "nodes"
331
- node_dir.mkdir(parents=True, exist_ok=True)
332
- for node in self.node_types.values():
333
- (node_dir / f"{node.external_id}.node.yaml").write_text(
334
- node.dump_yaml(), newline=new_line, encoding=encoding
335
- )
336
-
337
- @classmethod
338
- def from_zip(cls, zip_file: str | Path) -> Self:
339
- """Load a schema from a ZIP file containing YAML files.
340
-
341
- The ZIP file is expected to follow the Cognite-Toolkit convention
342
- where each file is named as `resource_type.resource_name.yaml`.
343
- """
344
- data, context = cls._read_zip(Path(zip_file))
345
- return cls.load(data, context)
346
-
347
- @classmethod
348
- def _read_zip(cls, zip_file: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
349
- data: dict[str, list[Any]] = {}
350
- context: dict[str, list[Path]] = {}
351
- with zipfile.ZipFile(zip_file, "r") as zip_ref:
352
- for file_info in zip_ref.infolist():
353
- if file_info.filename.endswith(".yaml"):
354
- if "/" not in file_info.filename:
355
- continue
356
- filename = Path(file_info.filename.split("/")[-1])
357
- if "." not in filename.stem:
358
- continue
359
- resource_type = filename.stem.rsplit(".", 1)[-1]
360
- if attr_name := cls._FIELD_NAME_BY_RESOURCE_TYPE.get(resource_type):
361
- data.setdefault(attr_name, [])
362
- context.setdefault(attr_name, [])
363
- try:
364
- loaded = yaml.safe_load(zip_ref.read(file_info).decode())
365
- except Exception as e:
366
- warnings.warn(
367
- FileTypeUnexpectedWarning(filename, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
368
- )
369
- continue
370
- if isinstance(loaded, list):
371
- data[attr_name].extend(loaded)
372
- context[attr_name].extend([filename] * len(loaded))
373
- else:
374
- data[attr_name].append(loaded)
375
- context[attr_name].append(filename)
376
- return data, context
377
-
378
- def to_zip(self, zip_file: str | Path, exclude: set[str] | None = None) -> None:
379
- """Save the schema to a ZIP file as YAML files. This is compatible with the Cognite-Toolkit convention.
380
-
381
- Args:
382
- zip_file (str | Path): The ZIP file to save the schema to.
383
- exclude (set[str]): A set of attributes to exclude from the output.
384
- """
385
- exclude_set = exclude or set()
386
- with zipfile.ZipFile(zip_file, "w") as zip_ref:
387
- if "spaces" not in exclude_set:
388
- for space in self.spaces.values():
389
- zip_ref.writestr(f"data_models/{space.space}.space.yaml", space.dump_yaml())
390
- if "data_models" not in exclude_set and self.data_model:
391
- zip_ref.writestr(
392
- f"data_models/{self.data_model.external_id}.datamodel.yaml", self.data_model.dump_yaml()
393
- )
394
- if "views" not in exclude_set:
395
- for view in self.views.values():
396
- zip_ref.writestr(f"data_models/views/{view.external_id}.view.yaml", view.dump_yaml())
397
- if "containers" not in exclude_set:
398
- for container in self.containers.values():
399
- zip_ref.writestr(
400
- f"data_models/containers{container.external_id}.container.yaml", container.dump_yaml()
401
- )
402
- if "node_types" not in exclude_set:
403
- for node in self.node_types.values():
404
- zip_ref.writestr(f"data_models/nodes/{node.external_id}.node.yaml", node.dump_yaml())
405
-
406
- @classmethod
407
- def load(cls, data: str | dict[str, list[Any]], context: dict[str, list[Path]] | None = None) -> Self:
408
- """Loads a schema from a dictionary or a YAML or JSON formatted string.
409
-
410
- Args:
411
- data: The data to load the schema from. This can be a dictionary, a YAML or JSON formatted string.
412
- context: This provides linage for where the data was loaded from. This is used in Warnings
413
- if a single item fails to load.
414
-
415
- Returns:
416
- DMSSchema: The loaded schema.
417
- """
418
- context = context or {}
419
- if isinstance(data, str):
420
- # YAML is a superset of JSON, so we can use the same parser
421
- try:
422
- data_dict = yaml.safe_load(data)
423
- except Exception as e:
424
- raise NeatYamlError(str(e)) from None
425
- if not isinstance(data_dict, dict) and all(isinstance(v, list) for v in data_dict.values()):
426
- raise NeatYamlError(f"Invalid data structure: {type(data)}", "dict[str, list[Any]]") from None
427
- else:
428
- data_dict = data
429
- loaded: dict[str, Any] = {}
430
- for attr in fields(cls):
431
- if items := data_dict.get(attr.name) or data_dict.get(to_camel(attr.name)):
432
- if attr.name == "data_model":
433
- if isinstance(items, list) and len(items) > 1:
434
- try:
435
- data_model_ids = [dm.DataModelId.load(item) for item in items]
436
- except Exception as e:
437
- data_model_file = context.get(attr.name, [Path("UNKNOWN")])[0]
438
- warnings.warn(
439
- FileTypeUnexpectedWarning(
440
- data_model_file, frozenset([dm.DataModelApply.__name__]), str(e)
441
- ),
442
- stacklevel=2,
443
- )
444
- else:
445
- warnings.warn(
446
- ResourcesDuplicatedWarning(
447
- frozenset(data_model_ids),
448
- "data model",
449
- "Will use the first DataModel.",
450
- ),
451
- stacklevel=2,
452
- )
453
- item = items[0] if isinstance(items, list) else items
454
- try:
455
- loaded[attr.name] = dm.DataModelApply.load(item)
456
- except Exception as e:
457
- data_model_file = context.get(attr.name, [Path("UNKNOWN")])[0]
458
- warnings.warn(
459
- FileTypeUnexpectedWarning(data_model_file, frozenset([dm.DataModelApply.__name__]), str(e)),
460
- stacklevel=2,
461
- )
462
- else:
463
- try:
464
- loaded[attr.name] = attr.type.load(items) # type: ignore[union-attr]
465
- except Exception as e:
466
- loaded[attr.name] = cls._load_individual_resources(
467
- items, attr, str(e), context.get(attr.name, [])
468
- )
469
- return cls(**loaded)
470
-
471
- @classmethod
472
- def _load_individual_resources(cls, items: list, attr: Field, trigger_error: str, resource_context) -> list[Any]:
473
- type_ = cast(type, attr.type)
474
- resources = type_([])
475
- if not hasattr(type_, "_RESOURCE"):
476
- warnings.warn(
477
- FileTypeUnexpectedWarning(Path("UNKNOWN"), frozenset([type_.__name__]), trigger_error), stacklevel=2
478
- )
479
- return resources
480
- # Fallback to load individual resources.
481
- single_cls = type_._RESOURCE
482
- for no, item in enumerate(items):
483
- try:
484
- loaded_instance = single_cls.load(item)
485
- except Exception as e:
486
- try:
487
- filepath = resource_context[no]
488
- except IndexError:
489
- filepath = Path("UNKNOWN")
490
- # We use repr(e) instead of str(e) to include the exception type in the warning message
491
- warnings.warn(
492
- FileTypeUnexpectedWarning(filepath, frozenset([single_cls.__name__]), repr(e)), stacklevel=2
493
- )
494
- else:
495
- resources.append(loaded_instance)
496
- return resources
497
-
498
- def dump(self, camel_case: bool = True, sort: bool = True) -> dict[str, Any]:
499
- """Dump the schema to a dictionary that can be serialized to JSON.
500
-
501
- Args:
502
- camel_case (bool): If True, the keys in the output dictionary will be in camel case.
503
- sort (bool): If True, the items in the output dictionary will be sorted by their ID.
504
- This is useful for deterministic output which is useful for comparing schemas.
505
-
506
- Returns:
507
- dict: The schema as a dictionary.
508
- """
509
- output: dict[str, Any] = {}
510
- cls_fields = sorted(fields(self), key=lambda f: f.name) if sort else fields(self)
511
- for attr in cls_fields:
512
- if items := getattr(self, attr.name):
513
- key = to_camel(attr.name) if camel_case else attr.name
514
- if isinstance(items, CogniteResourceDict):
515
- if sort:
516
- output[key] = [
517
- item.dump(camel_case) for item in sorted(items.values(), key=self._to_sortable_identifier)
518
- ]
519
- else:
520
- output[key] = items.dump(camel_case)
521
- else:
522
- output[key] = items.dump(camel_case=camel_case)
523
- return output
524
-
525
- @classmethod
526
- def _to_sortable_identifier(cls, item: Any) -> str | tuple[str, str] | tuple[str, str, str]:
527
- if isinstance(item, dm.ContainerApply | dm.ViewApply | dm.DataModelApply | dm.NodeApply | RawTableWrite):
528
- identifier = item.as_id().as_tuple()
529
- if len(identifier) == 3 and identifier[2] is None:
530
- return identifier[:2] # type: ignore[misc]
531
- return cast(tuple[str, str] | tuple[str, str, str], identifier)
532
- elif isinstance(item, dm.SpaceApply):
533
- return item.space
534
- elif isinstance(item, TransformationWrite):
535
- return item.external_id or ""
536
- elif isinstance(item, DatabaseWrite):
537
- return item.name or ""
538
- else:
539
- raise ValueError(f"Cannot sort item of type {type(item)}")
540
-
541
- def validate(self) -> list[NeatError]:
542
- # TODO: This type of validation should be done in NeatSession where all the
543
- # schema components which are not part of Rules are imported and the model as
544
- # the whole is validated.
545
-
546
- errors: set[NeatError] = set()
547
- defined_spaces = self.spaces.copy()
548
- defined_containers = self.containers.copy()
549
- defined_views = self.views.copy()
550
- for other_schema in [self.reference, self.last]:
551
- if other_schema:
552
- defined_spaces |= other_schema.spaces
553
- defined_containers |= other_schema.containers
554
- defined_views |= other_schema.views
555
-
556
- for container in self.containers.values():
557
- if container.space not in defined_spaces:
558
- errors.add(
559
- ResourceNotFoundError[str, dm.ContainerId](container.space, "space", container.as_id(), "container")
560
- )
561
-
562
- for view in self.views.values():
563
- view_id = view.as_id()
564
- if view.space not in defined_spaces:
565
- errors.add(ResourceNotFoundError(view.space, "space", view_id, "view"))
566
-
567
- for parent in view.implements or []:
568
- if parent not in defined_views:
569
- errors.add(PropertyNotFoundError(parent, "view", "implements", view_id, "view"))
570
-
571
- for prop_name, prop in (view.properties or {}).items():
572
- if isinstance(prop, dm.MappedPropertyApply):
573
- ref_container = defined_containers.get(prop.container)
574
- if ref_container is None:
575
- errors.add(ResourceNotFoundError(prop.container, "container", view_id, "view"))
576
- elif prop.container_property_identifier not in ref_container.properties:
577
- errors.add(
578
- PropertyNotFoundError(
579
- prop.container,
580
- "container",
581
- prop.container_property_identifier,
582
- view_id,
583
- "view",
584
- )
585
- )
586
- else:
587
- container_property = ref_container.properties[prop.container_property_identifier]
588
-
589
- if isinstance(container_property.type, dm.DirectRelation) and prop.source is None:
590
- warnings.warn(
591
- DirectRelationMissingSourceWarning(view_id, prop_name),
592
- stacklevel=2,
593
- )
594
-
595
- if isinstance(prop, dm.EdgeConnectionApply) and prop.source not in defined_views:
596
- errors.add(PropertyNotFoundError(prop.source, "view", prop_name, view_id, "view"))
597
-
598
- if (
599
- isinstance(prop, dm.EdgeConnectionApply)
600
- and prop.edge_source is not None
601
- and prop.edge_source not in defined_views
602
- ):
603
- errors.add(PropertyNotFoundError(prop.edge_source, "view", prop_name, view_id, "view"))
604
-
605
- # This allows for multiple view properties to be mapped to the same container property,
606
- # as long as they have different external_id, otherwise this will lead to raising
607
- # error ContainerPropertyUsedMultipleTimesError
608
- property_count = Counter(
609
- (prop.container, prop.container_property_identifier, view_property_identifier)
610
- for view_property_identifier, prop in (view.properties or {}).items()
611
- if isinstance(prop, dm.MappedPropertyApply)
612
- )
613
-
614
- for (
615
- container_id,
616
- container_property_identifier,
617
- _,
618
- ), count in property_count.items():
619
- if count > 1:
620
- view_properties = [
621
- prop_name
622
- for prop_name, prop in (view.properties or {}).items()
623
- if isinstance(prop, dm.MappedPropertyApply)
624
- and (prop.container, prop.container_property_identifier)
625
- == (container_id, container_property_identifier)
626
- ]
627
- errors.add(
628
- PropertyMappingDuplicatedError(
629
- container_id,
630
- "container",
631
- container_property_identifier,
632
- frozenset({dm.PropertyId(view_id, prop_name) for prop_name in view_properties}),
633
- "view property",
634
- )
635
- )
636
-
637
- if self.data_model:
638
- model = self.data_model
639
- if model.space not in defined_spaces:
640
- errors.add(ResourceNotFoundError(model.space, "space", model.as_id(), "data model"))
641
-
642
- view_counts: dict[dm.ViewId, int] = defaultdict(int)
643
- for view_id_or_class in model.views or []:
644
- view_id = view_id_or_class if isinstance(view_id_or_class, dm.ViewId) else view_id_or_class.as_id()
645
- if view_id not in defined_views:
646
- errors.add(ResourceNotFoundError(view_id, "view", model.as_id(), "data model"))
647
- view_counts[view_id] += 1
648
-
649
- for view_id, count in view_counts.items():
650
- if count > 1:
651
- errors.add(
652
- ResourceDuplicatedError(
653
- view_id,
654
- "view",
655
- repr(model.as_id()),
656
- )
657
- )
658
-
659
- return list(errors)
660
-
661
- @classmethod
662
- def _append_referenced_containers(cls, client: CogniteClient, containers: dm.ContainerList) -> None:
663
- """Containers can reference each other through the 'requires' constraint.
664
-
665
- This method retrieves all containers that are referenced by other containers through the 'requires' constraint,
666
- including their parents.
667
-
668
- """
669
- for _ in range(10): # Limiting the number of iterations to avoid infinite loops
670
- referenced_containers = {
671
- const.require
672
- for container in containers
673
- for const in (container.constraints or {}).values()
674
- if isinstance(const, dm.RequiresConstraint)
675
- }
676
- missing_containers = referenced_containers - set(containers.as_ids())
677
- if not missing_containers:
678
- break
679
- found_containers = client.data_modeling.containers.retrieve(list(missing_containers))
680
- containers.extend(found_containers)
681
- if len(found_containers) != len(missing_containers):
682
- break
683
- else:
684
- warnings.warn(
685
- "The maximum number of iterations was reached while resolving referenced containers."
686
- "There might be referenced containers that are not included in the list of containers.",
687
- RuntimeWarning,
688
- stacklevel=2,
689
- )
690
- return None
691
-
692
- def referenced_spaces(self, include_indirect_references: bool = True) -> set[str]:
693
- """Get the spaces referenced by the schema.
694
-
695
- Args:
696
- include_indirect_references (bool): If True, the spaces referenced by as view.implements, and
697
- view.referenced_containers will be included in the output.
698
- Returns:
699
- set[str]: The spaces referenced by the schema.
700
- """
701
- referenced_spaces = {view.space for view in self.views.values()}
702
- referenced_spaces |= {container.space for container in self.containers.values()}
703
- if include_indirect_references:
704
- referenced_spaces |= {
705
- container.space for view in self.views.values() for container in view.referenced_containers()
706
- }
707
- referenced_spaces |= {parent.space for view in self.views.values() for parent in view.implements or []}
708
- referenced_spaces |= {node.space for node in self.node_types.values()}
709
- if self.data_model:
710
- referenced_spaces |= {self.data_model.space}
711
- referenced_spaces |= {view.space for view in self.data_model.views or []}
712
- referenced_spaces |= {s.space for s in self.spaces.values()}
713
- return referenced_spaces
714
-
715
- def referenced_container(self) -> set[dm.ContainerId]:
716
- referenced_containers = {
717
- container for view in self.views.values() for container in view.referenced_containers()
718
- }
719
- referenced_containers |= set(self.containers.keys())
720
- return referenced_containers
721
-
722
- def as_read_model(self) -> dm.DataModel[dm.View]:
723
- if self.data_model is None:
724
- raise ValueError("Data model is not defined")
725
- all_containers = self.containers.copy()
726
- all_views = self.views.copy()
727
- for other_schema in [self.reference, self.last]:
728
- if other_schema:
729
- all_containers |= other_schema.containers
730
- all_views |= other_schema.views
731
-
732
- views: list[dm.View] = []
733
- for view in self.views.values():
734
- referenced_containers = ContainerApplyDict()
735
- properties: dict[str, ViewProperty] = {}
736
- # ChainMap is used to merge properties from the view and its parents
737
- # Note that the order of the ChainMap is important, as the first dictionary has the highest priority
738
- # So if a child and parent have the same property, the child property will be used.
739
- write_properties = ChainMap(view.properties, *(all_views[v].properties for v in view.implements or [])) # type: ignore[arg-type]
740
- for prop_name, prop in write_properties.items():
741
- read_prop = self._as_read_properties(prop, all_containers)
742
- if isinstance(read_prop, dm.MappedProperty) and read_prop.container not in referenced_containers:
743
- referenced_containers[read_prop.container] = all_containers[read_prop.container]
744
- properties[prop_name] = read_prop
745
-
746
- read_view = dm.View(
747
- space=view.space,
748
- external_id=view.external_id,
749
- version=view.version,
750
- description=view.description,
751
- name=view.name,
752
- filter=view.filter,
753
- implements=view.implements.copy(),
754
- used_for=self._used_for(referenced_containers.values()),
755
- writable=self._writable(properties.values(), referenced_containers.values()),
756
- properties=properties,
757
- is_global=False,
758
- last_updated_time=0,
759
- created_time=0,
760
- )
761
- views.append(read_view)
762
-
763
- return dm.DataModel(
764
- space=self.data_model.space,
765
- external_id=self.data_model.external_id,
766
- version=self.data_model.version,
767
- name=self.data_model.name,
768
- description=self.data_model.description,
769
- views=views,
770
- is_global=False,
771
- last_updated_time=0,
772
- created_time=0,
773
- )
774
-
775
- @staticmethod
776
- def _as_read_properties(
777
- write: ViewPropertyApply, all_containers: MutableMapping[dm.ContainerId, dm.ContainerApply]
778
- ) -> ViewProperty:
779
- if isinstance(write, dm.MappedPropertyApply):
780
- container_prop = all_containers[write.container].properties[write.container_property_identifier]
781
- return dm.MappedProperty(
782
- container=write.container,
783
- container_property_identifier=write.container_property_identifier,
784
- name=write.name,
785
- description=write.description,
786
- source=write.source,
787
- type=container_prop.type,
788
- nullable=container_prop.nullable,
789
- auto_increment=container_prop.auto_increment,
790
- immutable=container_prop.immutable,
791
- # Likely bug in SDK.
792
- default_value=container_prop.default_value, # type: ignore[arg-type]
793
- )
794
- if isinstance(write, dm.EdgeConnectionApply):
795
- edge_cls = SingleEdgeConnection if isinstance(write, SingleEdgeConnectionApply) else dm.MultiEdgeConnection
796
- return edge_cls(
797
- type=write.type,
798
- source=write.source,
799
- name=write.name,
800
- description=write.description,
801
- edge_source=write.edge_source,
802
- direction=write.direction,
803
- )
804
- if isinstance(write, ReverseDirectRelationApply):
805
- relation_cls = (
806
- SingleReverseDirectRelation
807
- if isinstance(write, SingleReverseDirectRelationApply)
808
- else dm.MultiReverseDirectRelation
809
- )
810
- return relation_cls(
811
- source=write.source,
812
- through=write.through,
813
- name=write.name,
814
- description=write.description,
815
- )
816
- raise ValueError(f"Cannot convert {write} to read format")
817
-
818
- @staticmethod
819
- def _used_for(containers: Iterable[dm.ContainerApply]) -> Literal["node", "edge", "all"]:
820
- used_for = {container.used_for for container in containers}
821
- if used_for == {"node"}:
822
- return "node"
823
- if used_for == {"edge"}:
824
- return "edge"
825
- return "all"
826
-
827
- @staticmethod
828
- def _writable(properties: Iterable[ViewProperty], containers: Iterable[dm.ContainerApply]) -> bool:
829
- used_properties = {
830
- (prop.container, prop.container_property_identifier)
831
- for prop in properties
832
- if isinstance(prop, dm.MappedProperty)
833
- }
834
- required_properties = {
835
- (container.as_id(), prop_id)
836
- for container in containers
837
- for prop_id, prop in container.properties.items()
838
- if not prop.nullable
839
- }
840
- # If a container has a required property that is not used by the view, the view is not writable
841
- return not bool(required_properties - used_properties)
842
-
843
-
844
- @dataclass
845
- class PipelineSchema(DMSSchema):
846
- transformations: TransformationWriteList = field(default_factory=lambda: TransformationWriteList([]))
847
- databases: DatabaseWriteList = field(default_factory=lambda: DatabaseWriteList([]))
848
- raw_tables: RawTableWriteList = field(default_factory=lambda: RawTableWriteList([]))
849
-
850
- _FIELD_NAME_BY_RESOURCE_TYPE: ClassVar[dict[str, str]] = {
851
- **DMSSchema._FIELD_NAME_BY_RESOURCE_TYPE,
852
- "raw": "raw_tables",
853
- }
854
-
855
- def __post_init__(self):
856
- existing_databases = {database.name for database in self.databases}
857
- table_database = {table.database for table in self.raw_tables}
858
- if missing := table_database - existing_databases:
859
- self.databases.extend([DatabaseWrite(name=database) for database in missing])
860
-
861
- @classmethod
862
- def _read_directory(cls, directory: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
863
- data, context = super()._read_directory(directory)
864
- for yaml_file in directory.rglob("*.yaml"):
865
- if yaml_file.parent.name in ("transformations", "raw"):
866
- attr_name = cls._FIELD_NAME_BY_RESOURCE_TYPE.get(yaml_file.parent.name, yaml_file.parent.name)
867
- data.setdefault(attr_name, [])
868
- context.setdefault(attr_name, [])
869
- try:
870
- loaded = yaml.safe_load(yaml_file.read_text())
871
- except Exception as e:
872
- warnings.warn(
873
- FileTypeUnexpectedWarning(yaml_file, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
874
- )
875
- continue
876
- if isinstance(loaded, list):
877
- data[attr_name].extend(loaded)
878
- context[attr_name].extend([yaml_file] * len(loaded))
879
- else:
880
- data[attr_name].append(loaded)
881
- context[attr_name].append(yaml_file)
882
- return data, context
883
-
884
- def to_directory(
885
- self,
886
- directory: str | Path,
887
- exclude: set[str] | None = None,
888
- new_line: str | None = "\n",
889
- encoding: str | None = "utf-8",
890
- ) -> None:
891
- super().to_directory(directory, exclude)
892
- exclude_set = exclude or set()
893
- path_dir = Path(directory)
894
- if "transformations" not in exclude_set and self.transformations:
895
- transformation_dir = path_dir / "transformations"
896
- transformation_dir.mkdir(exist_ok=True, parents=True)
897
- for transformation in self.transformations:
898
- (transformation_dir / f"{transformation.external_id}.yaml").write_text(
899
- transformation.dump_yaml(), newline=new_line, encoding=encoding
900
- )
901
- if "raw" not in exclude_set and self.raw_tables:
902
- # The RAW Databases are not written to file. This is because cognite-toolkit expects the RAW databases
903
- # to be in the same file as the RAW tables.
904
- raw_dir = path_dir / "raw"
905
- raw_dir.mkdir(exist_ok=True, parents=True)
906
- for raw_table in self.raw_tables:
907
- (raw_dir / f"{raw_table.name}.yaml").write_text(
908
- raw_table.dump_yaml(), newline=new_line, encoding=encoding
909
- )
910
-
911
- def to_zip(self, zip_file: str | Path, exclude: set[str] | None = None) -> None:
912
- super().to_zip(zip_file, exclude)
913
- exclude_set = exclude or set()
914
- with zipfile.ZipFile(zip_file, "a") as zip_ref:
915
- if "transformations" not in exclude_set:
916
- for transformation in self.transformations:
917
- zip_ref.writestr(f"transformations/{transformation.external_id}.yaml", transformation.dump_yaml())
918
- if "raw" not in exclude_set:
919
- # The RAW Databases are not written to file. This is because cognite-toolkit expects the RAW databases
920
- # to be in the same file as the RAW tables.
921
- for raw_table in self.raw_tables:
922
- zip_ref.writestr(f"raw/{raw_table.name}.yaml", raw_table.dump_yaml())
923
-
924
- @classmethod
925
- def _read_zip(cls, zip_file: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
926
- data, context = super()._read_zip(zip_file)
927
- with zipfile.ZipFile(zip_file, "r") as zip_ref:
928
- for file_info in zip_ref.infolist():
929
- if file_info.filename.endswith(".yaml"):
930
- if "/" not in file_info.filename:
931
- continue
932
- filepath = Path(file_info.filename)
933
- if (parent := filepath.parent.name) in ("transformations", "raw"):
934
- attr_name = cls._FIELD_NAME_BY_RESOURCE_TYPE.get(parent, parent)
935
- data.setdefault(attr_name, [])
936
- context.setdefault(attr_name, [])
937
- try:
938
- loaded = yaml.safe_load(zip_ref.read(file_info).decode())
939
- except Exception as e:
940
- warnings.warn(
941
- FileTypeUnexpectedWarning(filepath, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
942
- )
943
- continue
944
- if isinstance(loaded, list):
945
- data[attr_name].extend(loaded)
946
- context[attr_name].extend([filepath] * len(loaded))
947
- else:
948
- data[attr_name].append(loaded)
949
- context[attr_name].append(filepath)
950
- return data, context
951
-
952
- @classmethod
953
- def from_dms(cls, schema: DMSSchema, instance_space: str | None = None) -> "PipelineSchema":
954
- if not schema.data_model:
955
- raise ValueError("PipelineSchema must contain at least one data model")
956
- first_data_model = schema.data_model
957
- # The database name is limited to 32 characters
958
- database_name = first_data_model.external_id[:32]
959
- instance_space = instance_space or first_data_model.space
960
- database = DatabaseWrite(name=database_name)
961
- parent_views = {parent for view in schema.views.values() for parent in view.implements or []}
962
- container_by_id = schema.containers.copy()
963
-
964
- transformations = TransformationWriteList([])
965
- raw_tables = RawTableWriteList([])
966
- for view in schema.views.values():
967
- if view.as_id() in parent_views:
968
- # Skipping parents as they do not have their own data
969
- continue
970
- mapped_properties = {
971
- prop_name: prop
972
- for prop_name, prop in (view.properties or {}).items()
973
- if isinstance(prop, dm.MappedPropertyApply)
974
- }
975
- if mapped_properties:
976
- view_table = RawTableWrite(name=f"{view.external_id}Properties", database=database_name)
977
- raw_tables.append(view_table)
978
- transformation = cls._create_property_transformation(
979
- mapped_properties, view, view_table, container_by_id, instance_space
980
- )
981
- transformations.append(transformation)
982
- connection_properties = {
983
- prop_name: prop
984
- for prop_name, prop in (view.properties or {}).items()
985
- if isinstance(prop, dm.EdgeConnectionApply)
986
- }
987
- for prop_name, connection_property in connection_properties.items():
988
- view_table = RawTableWrite(name=f"{view.external_id}.{prop_name}Edge", database=database_name)
989
- raw_tables.append(view_table)
990
- transformation = cls._create_edge_transformation(connection_property, view, view_table, instance_space)
991
- transformations.append(transformation)
992
-
993
- return cls(
994
- spaces=schema.spaces,
995
- data_model=schema.data_model,
996
- views=schema.views,
997
- containers=schema.containers,
998
- transformations=transformations,
999
- databases=DatabaseWriteList([database]),
1000
- raw_tables=raw_tables,
1001
- )
1002
-
1003
- @classmethod
1004
- def _create_property_transformation(
1005
- cls,
1006
- properties: dict[str, dm.MappedPropertyApply],
1007
- view: ViewApply,
1008
- table: RawTableWrite,
1009
- container_by_id: dict[dm.ContainerId, dm.ContainerApply],
1010
- instance_space: str,
1011
- ) -> TransformationWrite:
1012
- mapping_mode = {
1013
- "version": 1,
1014
- "sourceType": "raw",
1015
- # 'mappings' is set here and overwritten further down to ensure the correct order
1016
- "mappings": [],
1017
- "sourceLevel1": table.database,
1018
- "sourceLevel2": table.name,
1019
- }
1020
- mappings = [
1021
- {"from": "externalId", "to": "externalId", "asType": "STRING"},
1022
- ]
1023
- select_rows = ["cast(`externalId` as STRING) as externalId"]
1024
- for prop_name, prop in properties.items():
1025
- container = container_by_id.get(prop.container)
1026
- if container is not None:
1027
- dms_type = container.properties[prop.container_property_identifier].type._type
1028
- if dms_type in _DATA_TYPE_BY_DMS_TYPE:
1029
- sql_type = _DATA_TYPE_BY_DMS_TYPE[dms_type].sql
1030
- else:
1031
- warnings.warn(
1032
- f"Unknown DMS type '{dms_type}' for property '{prop_name}'", RuntimeWarning, stacklevel=2
1033
- )
1034
- sql_type = "STRING"
1035
- else:
1036
- sql_type = "STRING"
1037
- select_rows.append(f"cast(`{prop_name}` as {sql_type}) as {prop_name}")
1038
- mappings.append({"from": prop_name, "to": prop_name, "asType": sql_type})
1039
- mapping_mode["mappings"] = mappings
1040
- select = ",\n ".join(select_rows)
1041
-
1042
- return TransformationWrite(
1043
- external_id=f"{table.name}Transformation",
1044
- name=f"{table.name}Transformation",
1045
- ignore_null_fields=True,
1046
- destination=Nodes(
1047
- view=ViewInfo(view.space, view.external_id, view.version),
1048
- instance_space=instance_space,
1049
- ),
1050
- conflict_mode="upsert",
1051
- query=f"""/* MAPPING_MODE_ENABLED: true */
1052
- /* {json.dumps(mapping_mode)} */
1053
- select
1054
- {select}
1055
- from
1056
- `{table.database}`.`{table.name}`;
1057
- """,
1058
- )
1059
-
1060
- @classmethod
1061
- def _create_edge_transformation(
1062
- cls, property_: dm.EdgeConnectionApply, view: ViewApply, table: RawTableWrite, instance_space: str
1063
- ) -> TransformationWrite:
1064
- start, end = view.external_id, property_.source.external_id
1065
- if property_.direction == "inwards":
1066
- start, end = end, start
1067
- mapping_mode = {
1068
- "version": 1,
1069
- "sourceType": "raw",
1070
- "mappings": [
1071
- {"from": "externalId", "to": "externalId", "asType": "STRING"},
1072
- {"from": start, "to": "startNode", "asType": "STRUCT<`space`:STRING, `externalId`:STRING>"},
1073
- {"from": end, "to": "endNode", "asType": "STRUCT<`space`:STRING, `externalId`:STRING>"},
1074
- ],
1075
- "sourceLevel1": table.database,
1076
- "sourceLevel2": table.name,
1077
- }
1078
- select_rows = [
1079
- "cast(`externalId` as STRING) as externalId",
1080
- f"node_reference('{instance_space}', `{start}`) as startNode",
1081
- f"node_reference('{instance_space}', `{end}`) as endNode",
1082
- ]
1083
- select = ",\n ".join(select_rows)
1084
-
1085
- return TransformationWrite(
1086
- external_id=f"{table.name}Transformation",
1087
- name=f"{table.name}Transformation",
1088
- ignore_null_fields=True,
1089
- destination=Edges(
1090
- instance_space=instance_space,
1091
- edge_type=EdgeType(space=property_.type.space, external_id=property_.type.external_id),
1092
- ),
1093
- conflict_mode="upsert",
1094
- query=f"""/* MAPPING_MODE_ENABLED: true */
1095
- /* {json.dumps(mapping_mode)} */
1096
- select
1097
- {select}
1098
- from
1099
- `{table.database}`.`{table.name}`;
1100
- """,
1101
- )