cognite-neat 0.98.0__py3-none-any.whl → 0.99.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_client/__init__.py +4 -0
- cognite/neat/_client/_api/data_modeling_loaders.py +585 -0
- cognite/neat/_client/_api/schema.py +111 -0
- cognite/neat/_client/_api_client.py +17 -0
- cognite/neat/_client/data_classes/__init__.py +0 -0
- cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
- cognite/neat/_client/data_classes/schema.py +495 -0
- cognite/neat/_constants.py +27 -4
- cognite/neat/_graph/_shared.py +14 -15
- cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +25 -14
- cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
- cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
- cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
- cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
- cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
- cognite/neat/_graph/extractors/_rdf_file.py +6 -7
- cognite/neat/_graph/loaders/_rdf2dms.py +2 -2
- cognite/neat/_graph/queries/_base.py +17 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +74 -147
- cognite/neat/_graph/transformers/_prune_graph.py +1 -1
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/_base.py +26 -17
- cognite/neat/_issues/errors/__init__.py +4 -2
- cognite/neat/_issues/errors/_external.py +7 -0
- cognite/neat/_issues/errors/_properties.py +2 -7
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +8 -0
- cognite/neat/_issues/warnings/_external.py +16 -0
- cognite/neat/_issues/warnings/_properties.py +16 -0
- cognite/neat/_issues/warnings/_resources.py +26 -2
- cognite/neat/_issues/warnings/user_modeling.py +4 -4
- cognite/neat/_rules/_constants.py +8 -11
- cognite/neat/_rules/analysis/_base.py +8 -4
- cognite/neat/_rules/exporters/_base.py +3 -4
- cognite/neat/_rules/exporters/_rules2dms.py +33 -46
- cognite/neat/_rules/importers/__init__.py +1 -3
- cognite/neat/_rules/importers/_base.py +1 -1
- cognite/neat/_rules/importers/_dms2rules.py +6 -29
- cognite/neat/_rules/importers/_rdf/__init__.py +5 -0
- cognite/neat/_rules/importers/_rdf/_base.py +34 -11
- cognite/neat/_rules/importers/_rdf/_imf2rules.py +91 -0
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +43 -35
- cognite/neat/_rules/importers/_rdf/_owl2rules.py +80 -0
- cognite/neat/_rules/importers/_rdf/_shared.py +138 -441
- cognite/neat/_rules/models/__init__.py +1 -1
- cognite/neat/_rules/models/_base_rules.py +22 -12
- cognite/neat/_rules/models/dms/__init__.py +4 -2
- cognite/neat/_rules/models/dms/_exporter.py +45 -48
- cognite/neat/_rules/models/dms/_rules.py +20 -17
- cognite/neat/_rules/models/dms/_rules_input.py +52 -8
- cognite/neat/_rules/models/dms/_validation.py +391 -119
- cognite/neat/_rules/models/entities/_single_value.py +32 -4
- cognite/neat/_rules/models/information/__init__.py +2 -0
- cognite/neat/_rules/models/information/_rules.py +0 -67
- cognite/neat/_rules/models/information/_validation.py +9 -9
- cognite/neat/_rules/models/mapping/__init__.py +2 -3
- cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
- cognite/neat/_rules/models/mapping/_classic2core.yaml +343 -0
- cognite/neat/_rules/transformers/__init__.py +2 -2
- cognite/neat/_rules/transformers/_converters.py +110 -11
- cognite/neat/_rules/transformers/_mapping.py +105 -30
- cognite/neat/_rules/transformers/_pipelines.py +1 -1
- cognite/neat/_rules/transformers/_verification.py +31 -3
- cognite/neat/_session/_base.py +24 -8
- cognite/neat/_session/_drop.py +35 -0
- cognite/neat/_session/_inspect.py +17 -5
- cognite/neat/_session/_mapping.py +39 -0
- cognite/neat/_session/_prepare.py +219 -23
- cognite/neat/_session/_read.py +49 -12
- cognite/neat/_session/_to.py +8 -5
- cognite/neat/_session/exceptions.py +4 -0
- cognite/neat/_store/_base.py +27 -24
- cognite/neat/_utils/rdf_.py +34 -5
- cognite/neat/_version.py +1 -1
- cognite/neat/_workflows/steps/lib/current/rules_exporter.py +5 -88
- cognite/neat/_workflows/steps/lib/current/rules_importer.py +3 -14
- cognite/neat/_workflows/steps/lib/current/rules_validator.py +6 -7
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/METADATA +3 -3
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/RECORD +87 -92
- cognite/neat/_rules/importers/_rdf/_imf2rules/__init__.py +0 -3
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +0 -86
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +0 -29
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +0 -130
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2rules.py +0 -154
- cognite/neat/_rules/importers/_rdf/_owl2rules/__init__.py +0 -3
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +0 -58
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +0 -65
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +0 -59
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2rules.py +0 -39
- cognite/neat/_rules/models/dms/_schema.py +0 -1101
- cognite/neat/_rules/models/mapping/_base.py +0 -131
- cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
- cognite/neat/_utils/cdf/loaders/_base.py +0 -54
- cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
- cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
- /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/LICENSE +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,1101 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import sys
|
|
3
|
-
import warnings
|
|
4
|
-
import zipfile
|
|
5
|
-
from collections import ChainMap, Counter, defaultdict
|
|
6
|
-
from collections.abc import Iterable, MutableMapping
|
|
7
|
-
from dataclasses import Field, dataclass, field, fields
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Any, ClassVar, Literal, cast
|
|
10
|
-
|
|
11
|
-
import yaml
|
|
12
|
-
from cognite.client import CogniteClient
|
|
13
|
-
from cognite.client import data_modeling as dm
|
|
14
|
-
from cognite.client.data_classes import DatabaseWrite, DatabaseWriteList, TransformationWrite, TransformationWriteList
|
|
15
|
-
from cognite.client.data_classes.data_modeling import ViewApply
|
|
16
|
-
from cognite.client.data_classes.data_modeling.views import (
|
|
17
|
-
ReverseDirectRelation,
|
|
18
|
-
ReverseDirectRelationApply,
|
|
19
|
-
SingleEdgeConnection,
|
|
20
|
-
SingleEdgeConnectionApply,
|
|
21
|
-
SingleReverseDirectRelation,
|
|
22
|
-
SingleReverseDirectRelationApply,
|
|
23
|
-
ViewProperty,
|
|
24
|
-
ViewPropertyApply,
|
|
25
|
-
)
|
|
26
|
-
from cognite.client.data_classes.transformations.common import Edges, EdgeType, Nodes, ViewInfo
|
|
27
|
-
|
|
28
|
-
from cognite.neat._issues import NeatError
|
|
29
|
-
from cognite.neat._issues.errors import (
|
|
30
|
-
NeatYamlError,
|
|
31
|
-
PropertyMappingDuplicatedError,
|
|
32
|
-
PropertyNotFoundError,
|
|
33
|
-
ResourceDuplicatedError,
|
|
34
|
-
ResourceNotFoundError,
|
|
35
|
-
)
|
|
36
|
-
from cognite.neat._issues.warnings import (
|
|
37
|
-
FileTypeUnexpectedWarning,
|
|
38
|
-
ResourceNotFoundWarning,
|
|
39
|
-
ResourceRetrievalWarning,
|
|
40
|
-
ResourcesDuplicatedWarning,
|
|
41
|
-
)
|
|
42
|
-
from cognite.neat._issues.warnings.user_modeling import DirectRelationMissingSourceWarning
|
|
43
|
-
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
|
|
44
|
-
from cognite.neat._utils.cdf.data_classes import (
|
|
45
|
-
CogniteResourceDict,
|
|
46
|
-
ContainerApplyDict,
|
|
47
|
-
NodeApplyDict,
|
|
48
|
-
RawTableWrite,
|
|
49
|
-
RawTableWriteList,
|
|
50
|
-
SpaceApplyDict,
|
|
51
|
-
ViewApplyDict,
|
|
52
|
-
)
|
|
53
|
-
from cognite.neat._utils.cdf.loaders import ViewLoader
|
|
54
|
-
from cognite.neat._utils.rdf_ import get_inheritance_path
|
|
55
|
-
from cognite.neat._utils.text import to_camel
|
|
56
|
-
|
|
57
|
-
if sys.version_info >= (3, 11):
|
|
58
|
-
from typing import Self
|
|
59
|
-
else:
|
|
60
|
-
from typing_extensions import Self
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@dataclass
|
|
64
|
-
class DMSSchema:
|
|
65
|
-
data_model: dm.DataModelApply | None = None
|
|
66
|
-
spaces: SpaceApplyDict = field(default_factory=SpaceApplyDict)
|
|
67
|
-
views: ViewApplyDict = field(default_factory=ViewApplyDict)
|
|
68
|
-
containers: ContainerApplyDict = field(default_factory=ContainerApplyDict)
|
|
69
|
-
node_types: NodeApplyDict = field(default_factory=NodeApplyDict)
|
|
70
|
-
# The last schema is the previous version of the data model. In the case, extension=addition, this
|
|
71
|
-
# should not be modified.
|
|
72
|
-
last: "DMSSchema | None" = None
|
|
73
|
-
# Reference is typically the Enterprise model, while this is the solution model.
|
|
74
|
-
reference: "DMSSchema | None" = None
|
|
75
|
-
|
|
76
|
-
_FIELD_NAME_BY_RESOURCE_TYPE: ClassVar[dict[str, str]] = {
|
|
77
|
-
"container": "containers",
|
|
78
|
-
"view": "views",
|
|
79
|
-
"datamodel": "data_model",
|
|
80
|
-
"space": "spaces",
|
|
81
|
-
"node": "node_types",
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
def _get_mapped_container_from_view(self, view_id: dm.ViewId) -> set[dm.ContainerId]:
|
|
85
|
-
# index all views, including ones from reference
|
|
86
|
-
view_by_id = self.views.copy()
|
|
87
|
-
if self.reference:
|
|
88
|
-
view_by_id.update(self.reference.views)
|
|
89
|
-
|
|
90
|
-
if view_id not in view_by_id:
|
|
91
|
-
raise ValueError(f"View {view_id} not found")
|
|
92
|
-
|
|
93
|
-
indexed_implemented_views = {id_: view.implements for id_, view in view_by_id.items()}
|
|
94
|
-
view_inheritance = get_inheritance_path(view_id, indexed_implemented_views)
|
|
95
|
-
|
|
96
|
-
directly_referenced_containers = view_by_id[view_id].referenced_containers()
|
|
97
|
-
inherited_referenced_containers = set()
|
|
98
|
-
|
|
99
|
-
for parent_id in view_inheritance:
|
|
100
|
-
if implemented_view := view_by_id.get(parent_id):
|
|
101
|
-
inherited_referenced_containers |= implemented_view.referenced_containers()
|
|
102
|
-
else:
|
|
103
|
-
raise ResourceNotFoundError(parent_id, "view", view_id, "view")
|
|
104
|
-
|
|
105
|
-
return directly_referenced_containers | inherited_referenced_containers
|
|
106
|
-
|
|
107
|
-
@classmethod
|
|
108
|
-
def from_model_id(cls, client: CogniteClient, data_model_id: dm.DataModelIdentifier) -> "DMSSchema":
|
|
109
|
-
data_models = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
|
|
110
|
-
if len(data_models) == 0:
|
|
111
|
-
raise ValueError(f"Data model {data_model_id} not found")
|
|
112
|
-
data_model = data_models.latest_version()
|
|
113
|
-
return cls.from_data_model(client, data_model)
|
|
114
|
-
|
|
115
|
-
@classmethod
|
|
116
|
-
def from_data_model(
|
|
117
|
-
cls,
|
|
118
|
-
client: CogniteClient,
|
|
119
|
-
data_model: dm.DataModel[dm.View],
|
|
120
|
-
reference_model: dm.DataModel[dm.View] | None = None,
|
|
121
|
-
) -> "DMSSchema":
|
|
122
|
-
"""Create a schema from a data model.
|
|
123
|
-
|
|
124
|
-
If a reference model is provided, the schema will include a reference schema. To determine which views,
|
|
125
|
-
and containers to put in the reference schema, the following rule is applied:
|
|
126
|
-
|
|
127
|
-
If a view or container space is different from the data model space,
|
|
128
|
-
it will be included in the reference schema.*
|
|
129
|
-
|
|
130
|
-
*One exception to this rule is if a view is directly referenced by the data model. In this case, the view will
|
|
131
|
-
be included in the data model schema, even if the space is different.
|
|
132
|
-
|
|
133
|
-
Args:
|
|
134
|
-
client: The Cognite client used for retrieving components referenced by the data model.
|
|
135
|
-
data_model: The data model to create the schema from.
|
|
136
|
-
reference_model: (Optional) The reference model to include in the schema.
|
|
137
|
-
This is typically the Enterprise model.
|
|
138
|
-
|
|
139
|
-
Returns:
|
|
140
|
-
DMSSchema: The schema created from the data model.
|
|
141
|
-
"""
|
|
142
|
-
views = dm.ViewList(data_model.views)
|
|
143
|
-
|
|
144
|
-
data_model_write = data_model.as_write()
|
|
145
|
-
data_model_write.views = list(views.as_ids())
|
|
146
|
-
|
|
147
|
-
if reference_model:
|
|
148
|
-
views.extend(reference_model.views)
|
|
149
|
-
|
|
150
|
-
container_ids = views.referenced_containers()
|
|
151
|
-
containers = client.data_modeling.containers.retrieve(list(container_ids))
|
|
152
|
-
cls._append_referenced_containers(client, containers)
|
|
153
|
-
|
|
154
|
-
space_ids = [data_model.space, reference_model.space] if reference_model else [data_model.space]
|
|
155
|
-
space_read = client.data_modeling.spaces.retrieve(space_ids)
|
|
156
|
-
if len(space_read) != len(space_ids):
|
|
157
|
-
raise ValueError(f"Space(s) {space_read} not found")
|
|
158
|
-
space_write = space_read.as_write()
|
|
159
|
-
|
|
160
|
-
view_loader = ViewLoader(client)
|
|
161
|
-
|
|
162
|
-
existing_view_ids = set(views.as_ids())
|
|
163
|
-
|
|
164
|
-
# We need to include all views the edges/direct relations are pointing to have a complete schema.
|
|
165
|
-
connection_referenced_view_ids: set[dm.ViewId] = set()
|
|
166
|
-
for view in views:
|
|
167
|
-
connection_referenced_view_ids |= cls._connection_references(view)
|
|
168
|
-
connection_referenced_view_ids = connection_referenced_view_ids - existing_view_ids
|
|
169
|
-
if connection_referenced_view_ids:
|
|
170
|
-
for view_id in connection_referenced_view_ids:
|
|
171
|
-
warnings.warn(
|
|
172
|
-
ResourceNotFoundWarning(view_id, "view", data_model_write.as_id(), "data model"),
|
|
173
|
-
stacklevel=2,
|
|
174
|
-
)
|
|
175
|
-
connection_referenced_views = view_loader.retrieve(list(connection_referenced_view_ids))
|
|
176
|
-
if failed := connection_referenced_view_ids - set(connection_referenced_views.as_ids()):
|
|
177
|
-
warnings.warn(ResourceRetrievalWarning(frozenset(failed), "view"), stacklevel=2)
|
|
178
|
-
views.extend(connection_referenced_views)
|
|
179
|
-
|
|
180
|
-
# We need to include parent views in the schema to make sure that the schema is valid.
|
|
181
|
-
parent_view_ids = {parent for view in views for parent in view.implements or []}
|
|
182
|
-
parents = view_loader.retrieve_all_parents(list(parent_view_ids - existing_view_ids))
|
|
183
|
-
views.extend([parent for parent in parents if parent.as_id() not in existing_view_ids])
|
|
184
|
-
|
|
185
|
-
# Converting views from read to write format requires to account for parents (implements)
|
|
186
|
-
# as the read format contains all properties from all parents, while the write formate should not contain
|
|
187
|
-
# properties from any parents.
|
|
188
|
-
# The ViewLoader as_write method looks up parents and remove properties from them.
|
|
189
|
-
view_write = ViewApplyDict([view_loader.as_write(view) for view in views])
|
|
190
|
-
|
|
191
|
-
container_write = ContainerApplyDict(containers.as_write())
|
|
192
|
-
user_space = data_model.space
|
|
193
|
-
if reference_model:
|
|
194
|
-
user_model_view_ids = set(data_model_write.views)
|
|
195
|
-
ref_model_write = reference_model.as_write()
|
|
196
|
-
ref_model_write.views = [view.as_id() for view in reference_model.views]
|
|
197
|
-
|
|
198
|
-
ref_views = ViewApplyDict(
|
|
199
|
-
[
|
|
200
|
-
view
|
|
201
|
-
for view_id, view in view_write.items()
|
|
202
|
-
if (view.space != user_space) or (view_id not in user_model_view_ids)
|
|
203
|
-
]
|
|
204
|
-
)
|
|
205
|
-
view_write = ViewApplyDict(
|
|
206
|
-
[
|
|
207
|
-
view
|
|
208
|
-
for view_id, view in view_write.items()
|
|
209
|
-
if view.space == user_space or view_id in user_model_view_ids
|
|
210
|
-
]
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
ref_containers = ContainerApplyDict(
|
|
214
|
-
[container for container in container_write.values() if container.space != user_space]
|
|
215
|
-
)
|
|
216
|
-
container_write = ContainerApplyDict(
|
|
217
|
-
[container for container in container_write.values() if container.space == user_space]
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
ref_schema: DMSSchema | None = cls(
|
|
221
|
-
spaces=SpaceApplyDict([s for s in space_write if s.space != user_space]),
|
|
222
|
-
data_model=ref_model_write,
|
|
223
|
-
views=ref_views,
|
|
224
|
-
containers=ref_containers,
|
|
225
|
-
)
|
|
226
|
-
else:
|
|
227
|
-
ref_schema = None
|
|
228
|
-
return cls(
|
|
229
|
-
spaces=SpaceApplyDict([s for s in space_write if s.space == user_space]),
|
|
230
|
-
data_model=data_model_write,
|
|
231
|
-
views=view_write,
|
|
232
|
-
containers=container_write,
|
|
233
|
-
reference=ref_schema,
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
@classmethod
|
|
237
|
-
def _connection_references(cls, view: dm.View) -> set[dm.ViewId]:
|
|
238
|
-
view_ids: set[dm.ViewId] = set()
|
|
239
|
-
for prop in (view.properties or {}).values():
|
|
240
|
-
if isinstance(prop, dm.MappedProperty) and isinstance(prop.type, dm.DirectRelation):
|
|
241
|
-
if prop.source:
|
|
242
|
-
view_ids.add(prop.source)
|
|
243
|
-
elif isinstance(prop, dm.EdgeConnection):
|
|
244
|
-
view_ids.add(prop.source)
|
|
245
|
-
if prop.edge_source:
|
|
246
|
-
view_ids.add(prop.edge_source)
|
|
247
|
-
elif isinstance(prop, ReverseDirectRelation):
|
|
248
|
-
view_ids.add(prop.source)
|
|
249
|
-
return view_ids
|
|
250
|
-
|
|
251
|
-
@classmethod
|
|
252
|
-
def from_directory(cls, directory: str | Path) -> Self:
|
|
253
|
-
"""Load a schema from a directory containing YAML files.
|
|
254
|
-
|
|
255
|
-
The directory is expected to follow the Cognite-Toolkit convention
|
|
256
|
-
where each file is named as `resource_type.resource_name.yaml`.
|
|
257
|
-
"""
|
|
258
|
-
data, context = cls._read_directory(Path(directory))
|
|
259
|
-
return cls.load(data, context)
|
|
260
|
-
|
|
261
|
-
@classmethod
|
|
262
|
-
def _read_directory(cls, directory: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
|
|
263
|
-
data: dict[str, Any] = {}
|
|
264
|
-
context: dict[str, list[Path]] = {}
|
|
265
|
-
for yaml_file in directory.rglob("*.yaml"):
|
|
266
|
-
if "." in yaml_file.stem:
|
|
267
|
-
resource_type = yaml_file.stem.rsplit(".", 1)[-1]
|
|
268
|
-
if attr_name := cls._FIELD_NAME_BY_RESOURCE_TYPE.get(resource_type):
|
|
269
|
-
data.setdefault(attr_name, [])
|
|
270
|
-
context.setdefault(attr_name, [])
|
|
271
|
-
try:
|
|
272
|
-
loaded = yaml.safe_load(yaml_file.read_text())
|
|
273
|
-
except Exception as e:
|
|
274
|
-
warnings.warn(
|
|
275
|
-
FileTypeUnexpectedWarning(yaml_file, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
|
|
276
|
-
)
|
|
277
|
-
continue
|
|
278
|
-
|
|
279
|
-
if isinstance(loaded, list):
|
|
280
|
-
data[attr_name].extend(loaded)
|
|
281
|
-
context[attr_name].extend([yaml_file] * len(loaded))
|
|
282
|
-
else:
|
|
283
|
-
data[attr_name].append(loaded)
|
|
284
|
-
context[attr_name].append(yaml_file)
|
|
285
|
-
return data, context
|
|
286
|
-
|
|
287
|
-
def to_directory(
|
|
288
|
-
self,
|
|
289
|
-
directory: str | Path,
|
|
290
|
-
exclude: set[str] | None = None,
|
|
291
|
-
new_line: str | None = "\n",
|
|
292
|
-
encoding: str | None = "utf-8",
|
|
293
|
-
) -> None:
|
|
294
|
-
"""Save the schema to a directory as YAML files. This is compatible with the Cognite-Toolkit convention.
|
|
295
|
-
|
|
296
|
-
Args:
|
|
297
|
-
directory (str | Path): The directory to save the schema to.
|
|
298
|
-
exclude (set[str]): A set of attributes to exclude from the output.
|
|
299
|
-
new_line (str): The line endings to use in the output files. Defaults to "\n".
|
|
300
|
-
encoding (str): The encoding to use in the output files. Defaults to "utf-8".
|
|
301
|
-
"""
|
|
302
|
-
path_dir = Path(directory)
|
|
303
|
-
exclude_set = exclude or set()
|
|
304
|
-
data_models = path_dir / "data_models"
|
|
305
|
-
data_models.mkdir(parents=True, exist_ok=True)
|
|
306
|
-
if "spaces" not in exclude_set:
|
|
307
|
-
for space in self.spaces.values():
|
|
308
|
-
(data_models / f"{space.space}.space.yaml").write_text(
|
|
309
|
-
space.dump_yaml(), newline=new_line, encoding=encoding
|
|
310
|
-
)
|
|
311
|
-
if "data_models" not in exclude_set and self.data_model:
|
|
312
|
-
(data_models / f"{self.data_model.external_id}.datamodel.yaml").write_text(
|
|
313
|
-
self.data_model.dump_yaml(), newline=new_line, encoding=encoding
|
|
314
|
-
)
|
|
315
|
-
if "views" not in exclude_set and self.views:
|
|
316
|
-
view_dir = data_models / "views"
|
|
317
|
-
view_dir.mkdir(parents=True, exist_ok=True)
|
|
318
|
-
for view in self.views.values():
|
|
319
|
-
(view_dir / f"{view.external_id}.view.yaml").write_text(
|
|
320
|
-
view.dump_yaml(), newline=new_line, encoding=encoding
|
|
321
|
-
)
|
|
322
|
-
if "containers" not in exclude_set and self.containers:
|
|
323
|
-
container_dir = data_models / "containers"
|
|
324
|
-
container_dir.mkdir(parents=True, exist_ok=True)
|
|
325
|
-
for container in self.containers.values():
|
|
326
|
-
(container_dir / f"{container.external_id}.container.yaml").write_text(
|
|
327
|
-
container.dump_yaml(), newline=new_line, encoding=encoding
|
|
328
|
-
)
|
|
329
|
-
if "node_types" not in exclude_set and self.node_types:
|
|
330
|
-
node_dir = data_models / "nodes"
|
|
331
|
-
node_dir.mkdir(parents=True, exist_ok=True)
|
|
332
|
-
for node in self.node_types.values():
|
|
333
|
-
(node_dir / f"{node.external_id}.node.yaml").write_text(
|
|
334
|
-
node.dump_yaml(), newline=new_line, encoding=encoding
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
@classmethod
|
|
338
|
-
def from_zip(cls, zip_file: str | Path) -> Self:
|
|
339
|
-
"""Load a schema from a ZIP file containing YAML files.
|
|
340
|
-
|
|
341
|
-
The ZIP file is expected to follow the Cognite-Toolkit convention
|
|
342
|
-
where each file is named as `resource_type.resource_name.yaml`.
|
|
343
|
-
"""
|
|
344
|
-
data, context = cls._read_zip(Path(zip_file))
|
|
345
|
-
return cls.load(data, context)
|
|
346
|
-
|
|
347
|
-
@classmethod
|
|
348
|
-
def _read_zip(cls, zip_file: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
|
|
349
|
-
data: dict[str, list[Any]] = {}
|
|
350
|
-
context: dict[str, list[Path]] = {}
|
|
351
|
-
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
352
|
-
for file_info in zip_ref.infolist():
|
|
353
|
-
if file_info.filename.endswith(".yaml"):
|
|
354
|
-
if "/" not in file_info.filename:
|
|
355
|
-
continue
|
|
356
|
-
filename = Path(file_info.filename.split("/")[-1])
|
|
357
|
-
if "." not in filename.stem:
|
|
358
|
-
continue
|
|
359
|
-
resource_type = filename.stem.rsplit(".", 1)[-1]
|
|
360
|
-
if attr_name := cls._FIELD_NAME_BY_RESOURCE_TYPE.get(resource_type):
|
|
361
|
-
data.setdefault(attr_name, [])
|
|
362
|
-
context.setdefault(attr_name, [])
|
|
363
|
-
try:
|
|
364
|
-
loaded = yaml.safe_load(zip_ref.read(file_info).decode())
|
|
365
|
-
except Exception as e:
|
|
366
|
-
warnings.warn(
|
|
367
|
-
FileTypeUnexpectedWarning(filename, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
|
|
368
|
-
)
|
|
369
|
-
continue
|
|
370
|
-
if isinstance(loaded, list):
|
|
371
|
-
data[attr_name].extend(loaded)
|
|
372
|
-
context[attr_name].extend([filename] * len(loaded))
|
|
373
|
-
else:
|
|
374
|
-
data[attr_name].append(loaded)
|
|
375
|
-
context[attr_name].append(filename)
|
|
376
|
-
return data, context
|
|
377
|
-
|
|
378
|
-
def to_zip(self, zip_file: str | Path, exclude: set[str] | None = None) -> None:
|
|
379
|
-
"""Save the schema to a ZIP file as YAML files. This is compatible with the Cognite-Toolkit convention.
|
|
380
|
-
|
|
381
|
-
Args:
|
|
382
|
-
zip_file (str | Path): The ZIP file to save the schema to.
|
|
383
|
-
exclude (set[str]): A set of attributes to exclude from the output.
|
|
384
|
-
"""
|
|
385
|
-
exclude_set = exclude or set()
|
|
386
|
-
with zipfile.ZipFile(zip_file, "w") as zip_ref:
|
|
387
|
-
if "spaces" not in exclude_set:
|
|
388
|
-
for space in self.spaces.values():
|
|
389
|
-
zip_ref.writestr(f"data_models/{space.space}.space.yaml", space.dump_yaml())
|
|
390
|
-
if "data_models" not in exclude_set and self.data_model:
|
|
391
|
-
zip_ref.writestr(
|
|
392
|
-
f"data_models/{self.data_model.external_id}.datamodel.yaml", self.data_model.dump_yaml()
|
|
393
|
-
)
|
|
394
|
-
if "views" not in exclude_set:
|
|
395
|
-
for view in self.views.values():
|
|
396
|
-
zip_ref.writestr(f"data_models/views/{view.external_id}.view.yaml", view.dump_yaml())
|
|
397
|
-
if "containers" not in exclude_set:
|
|
398
|
-
for container in self.containers.values():
|
|
399
|
-
zip_ref.writestr(
|
|
400
|
-
f"data_models/containers{container.external_id}.container.yaml", container.dump_yaml()
|
|
401
|
-
)
|
|
402
|
-
if "node_types" not in exclude_set:
|
|
403
|
-
for node in self.node_types.values():
|
|
404
|
-
zip_ref.writestr(f"data_models/nodes/{node.external_id}.node.yaml", node.dump_yaml())
|
|
405
|
-
|
|
406
|
-
@classmethod
|
|
407
|
-
def load(cls, data: str | dict[str, list[Any]], context: dict[str, list[Path]] | None = None) -> Self:
|
|
408
|
-
"""Loads a schema from a dictionary or a YAML or JSON formatted string.
|
|
409
|
-
|
|
410
|
-
Args:
|
|
411
|
-
data: The data to load the schema from. This can be a dictionary, a YAML or JSON formatted string.
|
|
412
|
-
context: This provides linage for where the data was loaded from. This is used in Warnings
|
|
413
|
-
if a single item fails to load.
|
|
414
|
-
|
|
415
|
-
Returns:
|
|
416
|
-
DMSSchema: The loaded schema.
|
|
417
|
-
"""
|
|
418
|
-
context = context or {}
|
|
419
|
-
if isinstance(data, str):
|
|
420
|
-
# YAML is a superset of JSON, so we can use the same parser
|
|
421
|
-
try:
|
|
422
|
-
data_dict = yaml.safe_load(data)
|
|
423
|
-
except Exception as e:
|
|
424
|
-
raise NeatYamlError(str(e)) from None
|
|
425
|
-
if not isinstance(data_dict, dict) and all(isinstance(v, list) for v in data_dict.values()):
|
|
426
|
-
raise NeatYamlError(f"Invalid data structure: {type(data)}", "dict[str, list[Any]]") from None
|
|
427
|
-
else:
|
|
428
|
-
data_dict = data
|
|
429
|
-
loaded: dict[str, Any] = {}
|
|
430
|
-
for attr in fields(cls):
|
|
431
|
-
if items := data_dict.get(attr.name) or data_dict.get(to_camel(attr.name)):
|
|
432
|
-
if attr.name == "data_model":
|
|
433
|
-
if isinstance(items, list) and len(items) > 1:
|
|
434
|
-
try:
|
|
435
|
-
data_model_ids = [dm.DataModelId.load(item) for item in items]
|
|
436
|
-
except Exception as e:
|
|
437
|
-
data_model_file = context.get(attr.name, [Path("UNKNOWN")])[0]
|
|
438
|
-
warnings.warn(
|
|
439
|
-
FileTypeUnexpectedWarning(
|
|
440
|
-
data_model_file, frozenset([dm.DataModelApply.__name__]), str(e)
|
|
441
|
-
),
|
|
442
|
-
stacklevel=2,
|
|
443
|
-
)
|
|
444
|
-
else:
|
|
445
|
-
warnings.warn(
|
|
446
|
-
ResourcesDuplicatedWarning(
|
|
447
|
-
frozenset(data_model_ids),
|
|
448
|
-
"data model",
|
|
449
|
-
"Will use the first DataModel.",
|
|
450
|
-
),
|
|
451
|
-
stacklevel=2,
|
|
452
|
-
)
|
|
453
|
-
item = items[0] if isinstance(items, list) else items
|
|
454
|
-
try:
|
|
455
|
-
loaded[attr.name] = dm.DataModelApply.load(item)
|
|
456
|
-
except Exception as e:
|
|
457
|
-
data_model_file = context.get(attr.name, [Path("UNKNOWN")])[0]
|
|
458
|
-
warnings.warn(
|
|
459
|
-
FileTypeUnexpectedWarning(data_model_file, frozenset([dm.DataModelApply.__name__]), str(e)),
|
|
460
|
-
stacklevel=2,
|
|
461
|
-
)
|
|
462
|
-
else:
|
|
463
|
-
try:
|
|
464
|
-
loaded[attr.name] = attr.type.load(items) # type: ignore[union-attr]
|
|
465
|
-
except Exception as e:
|
|
466
|
-
loaded[attr.name] = cls._load_individual_resources(
|
|
467
|
-
items, attr, str(e), context.get(attr.name, [])
|
|
468
|
-
)
|
|
469
|
-
return cls(**loaded)
|
|
470
|
-
|
|
471
|
-
@classmethod
|
|
472
|
-
def _load_individual_resources(cls, items: list, attr: Field, trigger_error: str, resource_context) -> list[Any]:
|
|
473
|
-
type_ = cast(type, attr.type)
|
|
474
|
-
resources = type_([])
|
|
475
|
-
if not hasattr(type_, "_RESOURCE"):
|
|
476
|
-
warnings.warn(
|
|
477
|
-
FileTypeUnexpectedWarning(Path("UNKNOWN"), frozenset([type_.__name__]), trigger_error), stacklevel=2
|
|
478
|
-
)
|
|
479
|
-
return resources
|
|
480
|
-
# Fallback to load individual resources.
|
|
481
|
-
single_cls = type_._RESOURCE
|
|
482
|
-
for no, item in enumerate(items):
|
|
483
|
-
try:
|
|
484
|
-
loaded_instance = single_cls.load(item)
|
|
485
|
-
except Exception as e:
|
|
486
|
-
try:
|
|
487
|
-
filepath = resource_context[no]
|
|
488
|
-
except IndexError:
|
|
489
|
-
filepath = Path("UNKNOWN")
|
|
490
|
-
# We use repr(e) instead of str(e) to include the exception type in the warning message
|
|
491
|
-
warnings.warn(
|
|
492
|
-
FileTypeUnexpectedWarning(filepath, frozenset([single_cls.__name__]), repr(e)), stacklevel=2
|
|
493
|
-
)
|
|
494
|
-
else:
|
|
495
|
-
resources.append(loaded_instance)
|
|
496
|
-
return resources
|
|
497
|
-
|
|
498
|
-
def dump(self, camel_case: bool = True, sort: bool = True) -> dict[str, Any]:
|
|
499
|
-
"""Dump the schema to a dictionary that can be serialized to JSON.
|
|
500
|
-
|
|
501
|
-
Args:
|
|
502
|
-
camel_case (bool): If True, the keys in the output dictionary will be in camel case.
|
|
503
|
-
sort (bool): If True, the items in the output dictionary will be sorted by their ID.
|
|
504
|
-
This is useful for deterministic output which is useful for comparing schemas.
|
|
505
|
-
|
|
506
|
-
Returns:
|
|
507
|
-
dict: The schema as a dictionary.
|
|
508
|
-
"""
|
|
509
|
-
output: dict[str, Any] = {}
|
|
510
|
-
cls_fields = sorted(fields(self), key=lambda f: f.name) if sort else fields(self)
|
|
511
|
-
for attr in cls_fields:
|
|
512
|
-
if items := getattr(self, attr.name):
|
|
513
|
-
key = to_camel(attr.name) if camel_case else attr.name
|
|
514
|
-
if isinstance(items, CogniteResourceDict):
|
|
515
|
-
if sort:
|
|
516
|
-
output[key] = [
|
|
517
|
-
item.dump(camel_case) for item in sorted(items.values(), key=self._to_sortable_identifier)
|
|
518
|
-
]
|
|
519
|
-
else:
|
|
520
|
-
output[key] = items.dump(camel_case)
|
|
521
|
-
else:
|
|
522
|
-
output[key] = items.dump(camel_case=camel_case)
|
|
523
|
-
return output
|
|
524
|
-
|
|
525
|
-
@classmethod
|
|
526
|
-
def _to_sortable_identifier(cls, item: Any) -> str | tuple[str, str] | tuple[str, str, str]:
|
|
527
|
-
if isinstance(item, dm.ContainerApply | dm.ViewApply | dm.DataModelApply | dm.NodeApply | RawTableWrite):
|
|
528
|
-
identifier = item.as_id().as_tuple()
|
|
529
|
-
if len(identifier) == 3 and identifier[2] is None:
|
|
530
|
-
return identifier[:2] # type: ignore[misc]
|
|
531
|
-
return cast(tuple[str, str] | tuple[str, str, str], identifier)
|
|
532
|
-
elif isinstance(item, dm.SpaceApply):
|
|
533
|
-
return item.space
|
|
534
|
-
elif isinstance(item, TransformationWrite):
|
|
535
|
-
return item.external_id or ""
|
|
536
|
-
elif isinstance(item, DatabaseWrite):
|
|
537
|
-
return item.name or ""
|
|
538
|
-
else:
|
|
539
|
-
raise ValueError(f"Cannot sort item of type {type(item)}")
|
|
540
|
-
|
|
541
|
-
def validate(self) -> list[NeatError]:
|
|
542
|
-
# TODO: This type of validation should be done in NeatSession where all the
|
|
543
|
-
# schema components which are not part of Rules are imported and the model as
|
|
544
|
-
# the whole is validated.
|
|
545
|
-
|
|
546
|
-
errors: set[NeatError] = set()
|
|
547
|
-
defined_spaces = self.spaces.copy()
|
|
548
|
-
defined_containers = self.containers.copy()
|
|
549
|
-
defined_views = self.views.copy()
|
|
550
|
-
for other_schema in [self.reference, self.last]:
|
|
551
|
-
if other_schema:
|
|
552
|
-
defined_spaces |= other_schema.spaces
|
|
553
|
-
defined_containers |= other_schema.containers
|
|
554
|
-
defined_views |= other_schema.views
|
|
555
|
-
|
|
556
|
-
for container in self.containers.values():
|
|
557
|
-
if container.space not in defined_spaces:
|
|
558
|
-
errors.add(
|
|
559
|
-
ResourceNotFoundError[str, dm.ContainerId](container.space, "space", container.as_id(), "container")
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
for view in self.views.values():
|
|
563
|
-
view_id = view.as_id()
|
|
564
|
-
if view.space not in defined_spaces:
|
|
565
|
-
errors.add(ResourceNotFoundError(view.space, "space", view_id, "view"))
|
|
566
|
-
|
|
567
|
-
for parent in view.implements or []:
|
|
568
|
-
if parent not in defined_views:
|
|
569
|
-
errors.add(PropertyNotFoundError(parent, "view", "implements", view_id, "view"))
|
|
570
|
-
|
|
571
|
-
for prop_name, prop in (view.properties or {}).items():
|
|
572
|
-
if isinstance(prop, dm.MappedPropertyApply):
|
|
573
|
-
ref_container = defined_containers.get(prop.container)
|
|
574
|
-
if ref_container is None:
|
|
575
|
-
errors.add(ResourceNotFoundError(prop.container, "container", view_id, "view"))
|
|
576
|
-
elif prop.container_property_identifier not in ref_container.properties:
|
|
577
|
-
errors.add(
|
|
578
|
-
PropertyNotFoundError(
|
|
579
|
-
prop.container,
|
|
580
|
-
"container",
|
|
581
|
-
prop.container_property_identifier,
|
|
582
|
-
view_id,
|
|
583
|
-
"view",
|
|
584
|
-
)
|
|
585
|
-
)
|
|
586
|
-
else:
|
|
587
|
-
container_property = ref_container.properties[prop.container_property_identifier]
|
|
588
|
-
|
|
589
|
-
if isinstance(container_property.type, dm.DirectRelation) and prop.source is None:
|
|
590
|
-
warnings.warn(
|
|
591
|
-
DirectRelationMissingSourceWarning(view_id, prop_name),
|
|
592
|
-
stacklevel=2,
|
|
593
|
-
)
|
|
594
|
-
|
|
595
|
-
if isinstance(prop, dm.EdgeConnectionApply) and prop.source not in defined_views:
|
|
596
|
-
errors.add(PropertyNotFoundError(prop.source, "view", prop_name, view_id, "view"))
|
|
597
|
-
|
|
598
|
-
if (
|
|
599
|
-
isinstance(prop, dm.EdgeConnectionApply)
|
|
600
|
-
and prop.edge_source is not None
|
|
601
|
-
and prop.edge_source not in defined_views
|
|
602
|
-
):
|
|
603
|
-
errors.add(PropertyNotFoundError(prop.edge_source, "view", prop_name, view_id, "view"))
|
|
604
|
-
|
|
605
|
-
# This allows for multiple view properties to be mapped to the same container property,
|
|
606
|
-
# as long as they have different external_id, otherwise this will lead to raising
|
|
607
|
-
# error ContainerPropertyUsedMultipleTimesError
|
|
608
|
-
property_count = Counter(
|
|
609
|
-
(prop.container, prop.container_property_identifier, view_property_identifier)
|
|
610
|
-
for view_property_identifier, prop in (view.properties or {}).items()
|
|
611
|
-
if isinstance(prop, dm.MappedPropertyApply)
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
for (
|
|
615
|
-
container_id,
|
|
616
|
-
container_property_identifier,
|
|
617
|
-
_,
|
|
618
|
-
), count in property_count.items():
|
|
619
|
-
if count > 1:
|
|
620
|
-
view_properties = [
|
|
621
|
-
prop_name
|
|
622
|
-
for prop_name, prop in (view.properties or {}).items()
|
|
623
|
-
if isinstance(prop, dm.MappedPropertyApply)
|
|
624
|
-
and (prop.container, prop.container_property_identifier)
|
|
625
|
-
== (container_id, container_property_identifier)
|
|
626
|
-
]
|
|
627
|
-
errors.add(
|
|
628
|
-
PropertyMappingDuplicatedError(
|
|
629
|
-
container_id,
|
|
630
|
-
"container",
|
|
631
|
-
container_property_identifier,
|
|
632
|
-
frozenset({dm.PropertyId(view_id, prop_name) for prop_name in view_properties}),
|
|
633
|
-
"view property",
|
|
634
|
-
)
|
|
635
|
-
)
|
|
636
|
-
|
|
637
|
-
if self.data_model:
|
|
638
|
-
model = self.data_model
|
|
639
|
-
if model.space not in defined_spaces:
|
|
640
|
-
errors.add(ResourceNotFoundError(model.space, "space", model.as_id(), "data model"))
|
|
641
|
-
|
|
642
|
-
view_counts: dict[dm.ViewId, int] = defaultdict(int)
|
|
643
|
-
for view_id_or_class in model.views or []:
|
|
644
|
-
view_id = view_id_or_class if isinstance(view_id_or_class, dm.ViewId) else view_id_or_class.as_id()
|
|
645
|
-
if view_id not in defined_views:
|
|
646
|
-
errors.add(ResourceNotFoundError(view_id, "view", model.as_id(), "data model"))
|
|
647
|
-
view_counts[view_id] += 1
|
|
648
|
-
|
|
649
|
-
for view_id, count in view_counts.items():
|
|
650
|
-
if count > 1:
|
|
651
|
-
errors.add(
|
|
652
|
-
ResourceDuplicatedError(
|
|
653
|
-
view_id,
|
|
654
|
-
"view",
|
|
655
|
-
repr(model.as_id()),
|
|
656
|
-
)
|
|
657
|
-
)
|
|
658
|
-
|
|
659
|
-
return list(errors)
|
|
660
|
-
|
|
661
|
-
@classmethod
|
|
662
|
-
def _append_referenced_containers(cls, client: CogniteClient, containers: dm.ContainerList) -> None:
|
|
663
|
-
"""Containers can reference each other through the 'requires' constraint.
|
|
664
|
-
|
|
665
|
-
This method retrieves all containers that are referenced by other containers through the 'requires' constraint,
|
|
666
|
-
including their parents.
|
|
667
|
-
|
|
668
|
-
"""
|
|
669
|
-
for _ in range(10): # Limiting the number of iterations to avoid infinite loops
|
|
670
|
-
referenced_containers = {
|
|
671
|
-
const.require
|
|
672
|
-
for container in containers
|
|
673
|
-
for const in (container.constraints or {}).values()
|
|
674
|
-
if isinstance(const, dm.RequiresConstraint)
|
|
675
|
-
}
|
|
676
|
-
missing_containers = referenced_containers - set(containers.as_ids())
|
|
677
|
-
if not missing_containers:
|
|
678
|
-
break
|
|
679
|
-
found_containers = client.data_modeling.containers.retrieve(list(missing_containers))
|
|
680
|
-
containers.extend(found_containers)
|
|
681
|
-
if len(found_containers) != len(missing_containers):
|
|
682
|
-
break
|
|
683
|
-
else:
|
|
684
|
-
warnings.warn(
|
|
685
|
-
"The maximum number of iterations was reached while resolving referenced containers."
|
|
686
|
-
"There might be referenced containers that are not included in the list of containers.",
|
|
687
|
-
RuntimeWarning,
|
|
688
|
-
stacklevel=2,
|
|
689
|
-
)
|
|
690
|
-
return None
|
|
691
|
-
|
|
692
|
-
def referenced_spaces(self, include_indirect_references: bool = True) -> set[str]:
|
|
693
|
-
"""Get the spaces referenced by the schema.
|
|
694
|
-
|
|
695
|
-
Args:
|
|
696
|
-
include_indirect_references (bool): If True, the spaces referenced by as view.implements, and
|
|
697
|
-
view.referenced_containers will be included in the output.
|
|
698
|
-
Returns:
|
|
699
|
-
set[str]: The spaces referenced by the schema.
|
|
700
|
-
"""
|
|
701
|
-
referenced_spaces = {view.space for view in self.views.values()}
|
|
702
|
-
referenced_spaces |= {container.space for container in self.containers.values()}
|
|
703
|
-
if include_indirect_references:
|
|
704
|
-
referenced_spaces |= {
|
|
705
|
-
container.space for view in self.views.values() for container in view.referenced_containers()
|
|
706
|
-
}
|
|
707
|
-
referenced_spaces |= {parent.space for view in self.views.values() for parent in view.implements or []}
|
|
708
|
-
referenced_spaces |= {node.space for node in self.node_types.values()}
|
|
709
|
-
if self.data_model:
|
|
710
|
-
referenced_spaces |= {self.data_model.space}
|
|
711
|
-
referenced_spaces |= {view.space for view in self.data_model.views or []}
|
|
712
|
-
referenced_spaces |= {s.space for s in self.spaces.values()}
|
|
713
|
-
return referenced_spaces
|
|
714
|
-
|
|
715
|
-
def referenced_container(self) -> set[dm.ContainerId]:
|
|
716
|
-
referenced_containers = {
|
|
717
|
-
container for view in self.views.values() for container in view.referenced_containers()
|
|
718
|
-
}
|
|
719
|
-
referenced_containers |= set(self.containers.keys())
|
|
720
|
-
return referenced_containers
|
|
721
|
-
|
|
722
|
-
def as_read_model(self) -> dm.DataModel[dm.View]:
|
|
723
|
-
if self.data_model is None:
|
|
724
|
-
raise ValueError("Data model is not defined")
|
|
725
|
-
all_containers = self.containers.copy()
|
|
726
|
-
all_views = self.views.copy()
|
|
727
|
-
for other_schema in [self.reference, self.last]:
|
|
728
|
-
if other_schema:
|
|
729
|
-
all_containers |= other_schema.containers
|
|
730
|
-
all_views |= other_schema.views
|
|
731
|
-
|
|
732
|
-
views: list[dm.View] = []
|
|
733
|
-
for view in self.views.values():
|
|
734
|
-
referenced_containers = ContainerApplyDict()
|
|
735
|
-
properties: dict[str, ViewProperty] = {}
|
|
736
|
-
# ChainMap is used to merge properties from the view and its parents
|
|
737
|
-
# Note that the order of the ChainMap is important, as the first dictionary has the highest priority
|
|
738
|
-
# So if a child and parent have the same property, the child property will be used.
|
|
739
|
-
write_properties = ChainMap(view.properties, *(all_views[v].properties for v in view.implements or [])) # type: ignore[arg-type]
|
|
740
|
-
for prop_name, prop in write_properties.items():
|
|
741
|
-
read_prop = self._as_read_properties(prop, all_containers)
|
|
742
|
-
if isinstance(read_prop, dm.MappedProperty) and read_prop.container not in referenced_containers:
|
|
743
|
-
referenced_containers[read_prop.container] = all_containers[read_prop.container]
|
|
744
|
-
properties[prop_name] = read_prop
|
|
745
|
-
|
|
746
|
-
read_view = dm.View(
|
|
747
|
-
space=view.space,
|
|
748
|
-
external_id=view.external_id,
|
|
749
|
-
version=view.version,
|
|
750
|
-
description=view.description,
|
|
751
|
-
name=view.name,
|
|
752
|
-
filter=view.filter,
|
|
753
|
-
implements=view.implements.copy(),
|
|
754
|
-
used_for=self._used_for(referenced_containers.values()),
|
|
755
|
-
writable=self._writable(properties.values(), referenced_containers.values()),
|
|
756
|
-
properties=properties,
|
|
757
|
-
is_global=False,
|
|
758
|
-
last_updated_time=0,
|
|
759
|
-
created_time=0,
|
|
760
|
-
)
|
|
761
|
-
views.append(read_view)
|
|
762
|
-
|
|
763
|
-
return dm.DataModel(
|
|
764
|
-
space=self.data_model.space,
|
|
765
|
-
external_id=self.data_model.external_id,
|
|
766
|
-
version=self.data_model.version,
|
|
767
|
-
name=self.data_model.name,
|
|
768
|
-
description=self.data_model.description,
|
|
769
|
-
views=views,
|
|
770
|
-
is_global=False,
|
|
771
|
-
last_updated_time=0,
|
|
772
|
-
created_time=0,
|
|
773
|
-
)
|
|
774
|
-
|
|
775
|
-
@staticmethod
|
|
776
|
-
def _as_read_properties(
|
|
777
|
-
write: ViewPropertyApply, all_containers: MutableMapping[dm.ContainerId, dm.ContainerApply]
|
|
778
|
-
) -> ViewProperty:
|
|
779
|
-
if isinstance(write, dm.MappedPropertyApply):
|
|
780
|
-
container_prop = all_containers[write.container].properties[write.container_property_identifier]
|
|
781
|
-
return dm.MappedProperty(
|
|
782
|
-
container=write.container,
|
|
783
|
-
container_property_identifier=write.container_property_identifier,
|
|
784
|
-
name=write.name,
|
|
785
|
-
description=write.description,
|
|
786
|
-
source=write.source,
|
|
787
|
-
type=container_prop.type,
|
|
788
|
-
nullable=container_prop.nullable,
|
|
789
|
-
auto_increment=container_prop.auto_increment,
|
|
790
|
-
immutable=container_prop.immutable,
|
|
791
|
-
# Likely bug in SDK.
|
|
792
|
-
default_value=container_prop.default_value, # type: ignore[arg-type]
|
|
793
|
-
)
|
|
794
|
-
if isinstance(write, dm.EdgeConnectionApply):
|
|
795
|
-
edge_cls = SingleEdgeConnection if isinstance(write, SingleEdgeConnectionApply) else dm.MultiEdgeConnection
|
|
796
|
-
return edge_cls(
|
|
797
|
-
type=write.type,
|
|
798
|
-
source=write.source,
|
|
799
|
-
name=write.name,
|
|
800
|
-
description=write.description,
|
|
801
|
-
edge_source=write.edge_source,
|
|
802
|
-
direction=write.direction,
|
|
803
|
-
)
|
|
804
|
-
if isinstance(write, ReverseDirectRelationApply):
|
|
805
|
-
relation_cls = (
|
|
806
|
-
SingleReverseDirectRelation
|
|
807
|
-
if isinstance(write, SingleReverseDirectRelationApply)
|
|
808
|
-
else dm.MultiReverseDirectRelation
|
|
809
|
-
)
|
|
810
|
-
return relation_cls(
|
|
811
|
-
source=write.source,
|
|
812
|
-
through=write.through,
|
|
813
|
-
name=write.name,
|
|
814
|
-
description=write.description,
|
|
815
|
-
)
|
|
816
|
-
raise ValueError(f"Cannot convert {write} to read format")
|
|
817
|
-
|
|
818
|
-
@staticmethod
|
|
819
|
-
def _used_for(containers: Iterable[dm.ContainerApply]) -> Literal["node", "edge", "all"]:
|
|
820
|
-
used_for = {container.used_for for container in containers}
|
|
821
|
-
if used_for == {"node"}:
|
|
822
|
-
return "node"
|
|
823
|
-
if used_for == {"edge"}:
|
|
824
|
-
return "edge"
|
|
825
|
-
return "all"
|
|
826
|
-
|
|
827
|
-
@staticmethod
|
|
828
|
-
def _writable(properties: Iterable[ViewProperty], containers: Iterable[dm.ContainerApply]) -> bool:
|
|
829
|
-
used_properties = {
|
|
830
|
-
(prop.container, prop.container_property_identifier)
|
|
831
|
-
for prop in properties
|
|
832
|
-
if isinstance(prop, dm.MappedProperty)
|
|
833
|
-
}
|
|
834
|
-
required_properties = {
|
|
835
|
-
(container.as_id(), prop_id)
|
|
836
|
-
for container in containers
|
|
837
|
-
for prop_id, prop in container.properties.items()
|
|
838
|
-
if not prop.nullable
|
|
839
|
-
}
|
|
840
|
-
# If a container has a required property that is not used by the view, the view is not writable
|
|
841
|
-
return not bool(required_properties - used_properties)
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
@dataclass
|
|
845
|
-
class PipelineSchema(DMSSchema):
|
|
846
|
-
transformations: TransformationWriteList = field(default_factory=lambda: TransformationWriteList([]))
|
|
847
|
-
databases: DatabaseWriteList = field(default_factory=lambda: DatabaseWriteList([]))
|
|
848
|
-
raw_tables: RawTableWriteList = field(default_factory=lambda: RawTableWriteList([]))
|
|
849
|
-
|
|
850
|
-
_FIELD_NAME_BY_RESOURCE_TYPE: ClassVar[dict[str, str]] = {
|
|
851
|
-
**DMSSchema._FIELD_NAME_BY_RESOURCE_TYPE,
|
|
852
|
-
"raw": "raw_tables",
|
|
853
|
-
}
|
|
854
|
-
|
|
855
|
-
def __post_init__(self):
|
|
856
|
-
existing_databases = {database.name for database in self.databases}
|
|
857
|
-
table_database = {table.database for table in self.raw_tables}
|
|
858
|
-
if missing := table_database - existing_databases:
|
|
859
|
-
self.databases.extend([DatabaseWrite(name=database) for database in missing])
|
|
860
|
-
|
|
861
|
-
@classmethod
|
|
862
|
-
def _read_directory(cls, directory: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
|
|
863
|
-
data, context = super()._read_directory(directory)
|
|
864
|
-
for yaml_file in directory.rglob("*.yaml"):
|
|
865
|
-
if yaml_file.parent.name in ("transformations", "raw"):
|
|
866
|
-
attr_name = cls._FIELD_NAME_BY_RESOURCE_TYPE.get(yaml_file.parent.name, yaml_file.parent.name)
|
|
867
|
-
data.setdefault(attr_name, [])
|
|
868
|
-
context.setdefault(attr_name, [])
|
|
869
|
-
try:
|
|
870
|
-
loaded = yaml.safe_load(yaml_file.read_text())
|
|
871
|
-
except Exception as e:
|
|
872
|
-
warnings.warn(
|
|
873
|
-
FileTypeUnexpectedWarning(yaml_file, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
|
|
874
|
-
)
|
|
875
|
-
continue
|
|
876
|
-
if isinstance(loaded, list):
|
|
877
|
-
data[attr_name].extend(loaded)
|
|
878
|
-
context[attr_name].extend([yaml_file] * len(loaded))
|
|
879
|
-
else:
|
|
880
|
-
data[attr_name].append(loaded)
|
|
881
|
-
context[attr_name].append(yaml_file)
|
|
882
|
-
return data, context
|
|
883
|
-
|
|
884
|
-
def to_directory(
|
|
885
|
-
self,
|
|
886
|
-
directory: str | Path,
|
|
887
|
-
exclude: set[str] | None = None,
|
|
888
|
-
new_line: str | None = "\n",
|
|
889
|
-
encoding: str | None = "utf-8",
|
|
890
|
-
) -> None:
|
|
891
|
-
super().to_directory(directory, exclude)
|
|
892
|
-
exclude_set = exclude or set()
|
|
893
|
-
path_dir = Path(directory)
|
|
894
|
-
if "transformations" not in exclude_set and self.transformations:
|
|
895
|
-
transformation_dir = path_dir / "transformations"
|
|
896
|
-
transformation_dir.mkdir(exist_ok=True, parents=True)
|
|
897
|
-
for transformation in self.transformations:
|
|
898
|
-
(transformation_dir / f"{transformation.external_id}.yaml").write_text(
|
|
899
|
-
transformation.dump_yaml(), newline=new_line, encoding=encoding
|
|
900
|
-
)
|
|
901
|
-
if "raw" not in exclude_set and self.raw_tables:
|
|
902
|
-
# The RAW Databases are not written to file. This is because cognite-toolkit expects the RAW databases
|
|
903
|
-
# to be in the same file as the RAW tables.
|
|
904
|
-
raw_dir = path_dir / "raw"
|
|
905
|
-
raw_dir.mkdir(exist_ok=True, parents=True)
|
|
906
|
-
for raw_table in self.raw_tables:
|
|
907
|
-
(raw_dir / f"{raw_table.name}.yaml").write_text(
|
|
908
|
-
raw_table.dump_yaml(), newline=new_line, encoding=encoding
|
|
909
|
-
)
|
|
910
|
-
|
|
911
|
-
def to_zip(self, zip_file: str | Path, exclude: set[str] | None = None) -> None:
|
|
912
|
-
super().to_zip(zip_file, exclude)
|
|
913
|
-
exclude_set = exclude or set()
|
|
914
|
-
with zipfile.ZipFile(zip_file, "a") as zip_ref:
|
|
915
|
-
if "transformations" not in exclude_set:
|
|
916
|
-
for transformation in self.transformations:
|
|
917
|
-
zip_ref.writestr(f"transformations/{transformation.external_id}.yaml", transformation.dump_yaml())
|
|
918
|
-
if "raw" not in exclude_set:
|
|
919
|
-
# The RAW Databases are not written to file. This is because cognite-toolkit expects the RAW databases
|
|
920
|
-
# to be in the same file as the RAW tables.
|
|
921
|
-
for raw_table in self.raw_tables:
|
|
922
|
-
zip_ref.writestr(f"raw/{raw_table.name}.yaml", raw_table.dump_yaml())
|
|
923
|
-
|
|
924
|
-
@classmethod
|
|
925
|
-
def _read_zip(cls, zip_file: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
|
|
926
|
-
data, context = super()._read_zip(zip_file)
|
|
927
|
-
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
928
|
-
for file_info in zip_ref.infolist():
|
|
929
|
-
if file_info.filename.endswith(".yaml"):
|
|
930
|
-
if "/" not in file_info.filename:
|
|
931
|
-
continue
|
|
932
|
-
filepath = Path(file_info.filename)
|
|
933
|
-
if (parent := filepath.parent.name) in ("transformations", "raw"):
|
|
934
|
-
attr_name = cls._FIELD_NAME_BY_RESOURCE_TYPE.get(parent, parent)
|
|
935
|
-
data.setdefault(attr_name, [])
|
|
936
|
-
context.setdefault(attr_name, [])
|
|
937
|
-
try:
|
|
938
|
-
loaded = yaml.safe_load(zip_ref.read(file_info).decode())
|
|
939
|
-
except Exception as e:
|
|
940
|
-
warnings.warn(
|
|
941
|
-
FileTypeUnexpectedWarning(filepath, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
|
|
942
|
-
)
|
|
943
|
-
continue
|
|
944
|
-
if isinstance(loaded, list):
|
|
945
|
-
data[attr_name].extend(loaded)
|
|
946
|
-
context[attr_name].extend([filepath] * len(loaded))
|
|
947
|
-
else:
|
|
948
|
-
data[attr_name].append(loaded)
|
|
949
|
-
context[attr_name].append(filepath)
|
|
950
|
-
return data, context
|
|
951
|
-
|
|
952
|
-
@classmethod
|
|
953
|
-
def from_dms(cls, schema: DMSSchema, instance_space: str | None = None) -> "PipelineSchema":
|
|
954
|
-
if not schema.data_model:
|
|
955
|
-
raise ValueError("PipelineSchema must contain at least one data model")
|
|
956
|
-
first_data_model = schema.data_model
|
|
957
|
-
# The database name is limited to 32 characters
|
|
958
|
-
database_name = first_data_model.external_id[:32]
|
|
959
|
-
instance_space = instance_space or first_data_model.space
|
|
960
|
-
database = DatabaseWrite(name=database_name)
|
|
961
|
-
parent_views = {parent for view in schema.views.values() for parent in view.implements or []}
|
|
962
|
-
container_by_id = schema.containers.copy()
|
|
963
|
-
|
|
964
|
-
transformations = TransformationWriteList([])
|
|
965
|
-
raw_tables = RawTableWriteList([])
|
|
966
|
-
for view in schema.views.values():
|
|
967
|
-
if view.as_id() in parent_views:
|
|
968
|
-
# Skipping parents as they do not have their own data
|
|
969
|
-
continue
|
|
970
|
-
mapped_properties = {
|
|
971
|
-
prop_name: prop
|
|
972
|
-
for prop_name, prop in (view.properties or {}).items()
|
|
973
|
-
if isinstance(prop, dm.MappedPropertyApply)
|
|
974
|
-
}
|
|
975
|
-
if mapped_properties:
|
|
976
|
-
view_table = RawTableWrite(name=f"{view.external_id}Properties", database=database_name)
|
|
977
|
-
raw_tables.append(view_table)
|
|
978
|
-
transformation = cls._create_property_transformation(
|
|
979
|
-
mapped_properties, view, view_table, container_by_id, instance_space
|
|
980
|
-
)
|
|
981
|
-
transformations.append(transformation)
|
|
982
|
-
connection_properties = {
|
|
983
|
-
prop_name: prop
|
|
984
|
-
for prop_name, prop in (view.properties or {}).items()
|
|
985
|
-
if isinstance(prop, dm.EdgeConnectionApply)
|
|
986
|
-
}
|
|
987
|
-
for prop_name, connection_property in connection_properties.items():
|
|
988
|
-
view_table = RawTableWrite(name=f"{view.external_id}.{prop_name}Edge", database=database_name)
|
|
989
|
-
raw_tables.append(view_table)
|
|
990
|
-
transformation = cls._create_edge_transformation(connection_property, view, view_table, instance_space)
|
|
991
|
-
transformations.append(transformation)
|
|
992
|
-
|
|
993
|
-
return cls(
|
|
994
|
-
spaces=schema.spaces,
|
|
995
|
-
data_model=schema.data_model,
|
|
996
|
-
views=schema.views,
|
|
997
|
-
containers=schema.containers,
|
|
998
|
-
transformations=transformations,
|
|
999
|
-
databases=DatabaseWriteList([database]),
|
|
1000
|
-
raw_tables=raw_tables,
|
|
1001
|
-
)
|
|
1002
|
-
|
|
1003
|
-
@classmethod
|
|
1004
|
-
def _create_property_transformation(
|
|
1005
|
-
cls,
|
|
1006
|
-
properties: dict[str, dm.MappedPropertyApply],
|
|
1007
|
-
view: ViewApply,
|
|
1008
|
-
table: RawTableWrite,
|
|
1009
|
-
container_by_id: dict[dm.ContainerId, dm.ContainerApply],
|
|
1010
|
-
instance_space: str,
|
|
1011
|
-
) -> TransformationWrite:
|
|
1012
|
-
mapping_mode = {
|
|
1013
|
-
"version": 1,
|
|
1014
|
-
"sourceType": "raw",
|
|
1015
|
-
# 'mappings' is set here and overwritten further down to ensure the correct order
|
|
1016
|
-
"mappings": [],
|
|
1017
|
-
"sourceLevel1": table.database,
|
|
1018
|
-
"sourceLevel2": table.name,
|
|
1019
|
-
}
|
|
1020
|
-
mappings = [
|
|
1021
|
-
{"from": "externalId", "to": "externalId", "asType": "STRING"},
|
|
1022
|
-
]
|
|
1023
|
-
select_rows = ["cast(`externalId` as STRING) as externalId"]
|
|
1024
|
-
for prop_name, prop in properties.items():
|
|
1025
|
-
container = container_by_id.get(prop.container)
|
|
1026
|
-
if container is not None:
|
|
1027
|
-
dms_type = container.properties[prop.container_property_identifier].type._type
|
|
1028
|
-
if dms_type in _DATA_TYPE_BY_DMS_TYPE:
|
|
1029
|
-
sql_type = _DATA_TYPE_BY_DMS_TYPE[dms_type].sql
|
|
1030
|
-
else:
|
|
1031
|
-
warnings.warn(
|
|
1032
|
-
f"Unknown DMS type '{dms_type}' for property '{prop_name}'", RuntimeWarning, stacklevel=2
|
|
1033
|
-
)
|
|
1034
|
-
sql_type = "STRING"
|
|
1035
|
-
else:
|
|
1036
|
-
sql_type = "STRING"
|
|
1037
|
-
select_rows.append(f"cast(`{prop_name}` as {sql_type}) as {prop_name}")
|
|
1038
|
-
mappings.append({"from": prop_name, "to": prop_name, "asType": sql_type})
|
|
1039
|
-
mapping_mode["mappings"] = mappings
|
|
1040
|
-
select = ",\n ".join(select_rows)
|
|
1041
|
-
|
|
1042
|
-
return TransformationWrite(
|
|
1043
|
-
external_id=f"{table.name}Transformation",
|
|
1044
|
-
name=f"{table.name}Transformation",
|
|
1045
|
-
ignore_null_fields=True,
|
|
1046
|
-
destination=Nodes(
|
|
1047
|
-
view=ViewInfo(view.space, view.external_id, view.version),
|
|
1048
|
-
instance_space=instance_space,
|
|
1049
|
-
),
|
|
1050
|
-
conflict_mode="upsert",
|
|
1051
|
-
query=f"""/* MAPPING_MODE_ENABLED: true */
|
|
1052
|
-
/* {json.dumps(mapping_mode)} */
|
|
1053
|
-
select
|
|
1054
|
-
{select}
|
|
1055
|
-
from
|
|
1056
|
-
`{table.database}`.`{table.name}`;
|
|
1057
|
-
""",
|
|
1058
|
-
)
|
|
1059
|
-
|
|
1060
|
-
@classmethod
|
|
1061
|
-
def _create_edge_transformation(
|
|
1062
|
-
cls, property_: dm.EdgeConnectionApply, view: ViewApply, table: RawTableWrite, instance_space: str
|
|
1063
|
-
) -> TransformationWrite:
|
|
1064
|
-
start, end = view.external_id, property_.source.external_id
|
|
1065
|
-
if property_.direction == "inwards":
|
|
1066
|
-
start, end = end, start
|
|
1067
|
-
mapping_mode = {
|
|
1068
|
-
"version": 1,
|
|
1069
|
-
"sourceType": "raw",
|
|
1070
|
-
"mappings": [
|
|
1071
|
-
{"from": "externalId", "to": "externalId", "asType": "STRING"},
|
|
1072
|
-
{"from": start, "to": "startNode", "asType": "STRUCT<`space`:STRING, `externalId`:STRING>"},
|
|
1073
|
-
{"from": end, "to": "endNode", "asType": "STRUCT<`space`:STRING, `externalId`:STRING>"},
|
|
1074
|
-
],
|
|
1075
|
-
"sourceLevel1": table.database,
|
|
1076
|
-
"sourceLevel2": table.name,
|
|
1077
|
-
}
|
|
1078
|
-
select_rows = [
|
|
1079
|
-
"cast(`externalId` as STRING) as externalId",
|
|
1080
|
-
f"node_reference('{instance_space}', `{start}`) as startNode",
|
|
1081
|
-
f"node_reference('{instance_space}', `{end}`) as endNode",
|
|
1082
|
-
]
|
|
1083
|
-
select = ",\n ".join(select_rows)
|
|
1084
|
-
|
|
1085
|
-
return TransformationWrite(
|
|
1086
|
-
external_id=f"{table.name}Transformation",
|
|
1087
|
-
name=f"{table.name}Transformation",
|
|
1088
|
-
ignore_null_fields=True,
|
|
1089
|
-
destination=Edges(
|
|
1090
|
-
instance_space=instance_space,
|
|
1091
|
-
edge_type=EdgeType(space=property_.type.space, external_id=property_.type.external_id),
|
|
1092
|
-
),
|
|
1093
|
-
conflict_mode="upsert",
|
|
1094
|
-
query=f"""/* MAPPING_MODE_ENABLED: true */
|
|
1095
|
-
/* {json.dumps(mapping_mode)} */
|
|
1096
|
-
select
|
|
1097
|
-
{select}
|
|
1098
|
-
from
|
|
1099
|
-
`{table.database}`.`{table.name}`;
|
|
1100
|
-
""",
|
|
1101
|
-
)
|