cognite-toolkit 0.6.88__py3-none-any.whl → 0.6.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py +60 -5
- cognite_toolkit/_cdf_tk/commands/_migrate/command.py +4 -2
- cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +161 -44
- cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +10 -10
- cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +7 -3
- cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +8 -10
- cognite_toolkit/_cdf_tk/storageio/__init__.py +2 -0
- cognite_toolkit/_cdf_tk/storageio/_annotations.py +102 -0
- cognite_toolkit/_cdf_tk/tracker.py +6 -6
- cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +90 -44
- cognite_toolkit/_cdf_tk/utils/http_client/_client.py +6 -4
- cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +2 -0
- cognite_toolkit/_cdf_tk/utils/useful_types.py +7 -4
- cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
- cognite_toolkit/_resources/cdf.toml +1 -1
- cognite_toolkit/_version.py +1 -1
- {cognite_toolkit-0.6.88.dist-info → cognite_toolkit-0.6.89.dist-info}/METADATA +1 -1
- {cognite_toolkit-0.6.88.dist-info → cognite_toolkit-0.6.89.dist-info}/RECORD +22 -22
- cognite_toolkit/_cdf_tk/commands/_migrate/base.py +0 -106
- {cognite_toolkit-0.6.88.dist-info → cognite_toolkit-0.6.89.dist-info}/WHEEL +0 -0
- {cognite_toolkit-0.6.88.dist-info → cognite_toolkit-0.6.89.dist-info}/entry_points.txt +0 -0
- {cognite_toolkit-0.6.88.dist-info → cognite_toolkit-0.6.89.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
from uuid import uuid4
|
|
2
2
|
|
|
3
|
+
from cognite.client.data_classes.capabilities import (
|
|
4
|
+
Capability,
|
|
5
|
+
DataModelInstancesAcl,
|
|
6
|
+
DataModelsAcl,
|
|
7
|
+
SpaceIDScope,
|
|
8
|
+
)
|
|
3
9
|
from cognite.client.exceptions import CogniteException
|
|
4
10
|
|
|
5
11
|
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
@@ -10,16 +16,19 @@ from cognite_toolkit._cdf_tk.client.data_classes.canvas import (
|
|
|
10
16
|
FdmInstanceContainerReferenceApply,
|
|
11
17
|
)
|
|
12
18
|
from cognite_toolkit._cdf_tk.client.data_classes.migration import InstanceSource
|
|
13
|
-
from cognite_toolkit._cdf_tk.
|
|
19
|
+
from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand
|
|
20
|
+
from cognite_toolkit._cdf_tk.commands._migrate.data_model import (
|
|
21
|
+
INSTANCE_SOURCE_VIEW_ID,
|
|
22
|
+
MODEL_ID,
|
|
23
|
+
RESOURCE_VIEW_MAPPING_VIEW_ID,
|
|
24
|
+
)
|
|
25
|
+
from cognite_toolkit._cdf_tk.exceptions import AuthenticationError, ToolkitMigrationError
|
|
14
26
|
from cognite_toolkit._cdf_tk.tk_warnings import HighSeverityWarning, LowSeverityWarning, MediumSeverityWarning
|
|
15
27
|
from cognite_toolkit._cdf_tk.utils import humanize_collection
|
|
16
28
|
from cognite_toolkit._cdf_tk.utils.interactive_select import InteractiveCanvasSelect
|
|
17
29
|
|
|
18
|
-
from .base import BaseMigrateCommand
|
|
19
|
-
from .data_model import INSTANCE_SOURCE_VIEW_ID
|
|
20
30
|
|
|
21
|
-
|
|
22
|
-
class MigrationCanvasCommand(BaseMigrateCommand):
|
|
31
|
+
class MigrationCanvasCommand(ToolkitCommand):
|
|
23
32
|
canvas_schema_space = Canvas.get_source().space
|
|
24
33
|
# Note sequences are not supported in Canvas, so we do not include them here.
|
|
25
34
|
asset_centric_resource_types = frozenset({"asset", "event", "file", "timeseries"})
|
|
@@ -144,3 +153,49 @@ class MigrationCanvasCommand(BaseMigrateCommand):
|
|
|
144
153
|
max_width=reference.max_width,
|
|
145
154
|
max_height=reference.max_height,
|
|
146
155
|
)
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def validate_access(
|
|
159
|
+
client: ToolkitClient,
|
|
160
|
+
instance_spaces: list[str] | None = None,
|
|
161
|
+
schema_spaces: list[str] | None = None,
|
|
162
|
+
) -> None:
|
|
163
|
+
required_capabilities: list[Capability] = []
|
|
164
|
+
if instance_spaces is not None:
|
|
165
|
+
required_capabilities.append(
|
|
166
|
+
DataModelInstancesAcl(
|
|
167
|
+
actions=[
|
|
168
|
+
DataModelInstancesAcl.Action.Read,
|
|
169
|
+
DataModelInstancesAcl.Action.Write,
|
|
170
|
+
DataModelInstancesAcl.Action.Write_Properties,
|
|
171
|
+
],
|
|
172
|
+
scope=SpaceIDScope(instance_spaces),
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
if schema_spaces is not None:
|
|
176
|
+
required_capabilities.append(
|
|
177
|
+
DataModelsAcl(actions=[DataModelsAcl.Action.Read], scope=SpaceIDScope(schema_spaces)),
|
|
178
|
+
)
|
|
179
|
+
if missing := client.iam.verify_capabilities(required_capabilities):
|
|
180
|
+
raise AuthenticationError(f"Missing required capabilities: {humanize_collection(missing)}.", missing)
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def validate_migration_model_available(client: ToolkitClient) -> None:
|
|
184
|
+
models = client.data_modeling.data_models.retrieve([MODEL_ID], inline_views=False)
|
|
185
|
+
if not models:
|
|
186
|
+
raise ToolkitMigrationError(
|
|
187
|
+
f"The migration data model {MODEL_ID!r} does not exist. "
|
|
188
|
+
"Please run the `cdf migrate prepare` command to deploy the migration data model."
|
|
189
|
+
)
|
|
190
|
+
elif len(models) > 1:
|
|
191
|
+
raise ToolkitMigrationError(
|
|
192
|
+
f"Multiple migration models {MODEL_ID!r}. "
|
|
193
|
+
"Please delete the duplicate models before proceeding with the migration."
|
|
194
|
+
)
|
|
195
|
+
model = models[0]
|
|
196
|
+
missing_views = {INSTANCE_SOURCE_VIEW_ID, RESOURCE_VIEW_MAPPING_VIEW_ID} - set(model.views or [])
|
|
197
|
+
if missing_views:
|
|
198
|
+
raise ToolkitMigrationError(
|
|
199
|
+
f"Invalid migration model. Missing views {humanize_collection(missing_views)}. "
|
|
200
|
+
f"Please run the `cdf migrate prepare` command to deploy the migration data model."
|
|
201
|
+
)
|
|
@@ -162,12 +162,14 @@ class MigrationCommand(ToolkitCommand):
|
|
|
162
162
|
for item in source:
|
|
163
163
|
target, issue = mapper.map(item)
|
|
164
164
|
id_ = data.as_id(item)
|
|
165
|
-
|
|
165
|
+
result: Status = "failed" if target is None else "success"
|
|
166
|
+
tracker.set_progress(id_, step=self.Steps.CONVERT, status=result)
|
|
166
167
|
|
|
167
168
|
if issue.has_issues:
|
|
168
169
|
# MyPy fails to understand that dict[str, JsonVal] is a Chunk
|
|
169
170
|
issues.append(issue.dump()) # type: ignore[arg-type]
|
|
170
|
-
|
|
171
|
+
if target is not None:
|
|
172
|
+
targets.append(UploadItem(source_id=id_, item=target))
|
|
171
173
|
if issues:
|
|
172
174
|
log_file.write_chunks(issues)
|
|
173
175
|
return targets
|
|
@@ -1,21 +1,29 @@
|
|
|
1
1
|
from collections.abc import Mapping, Set
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import Any, ClassVar
|
|
3
|
+
from typing import Any, ClassVar, overload
|
|
4
4
|
|
|
5
|
-
from cognite.client.data_classes import Asset, Event, FileMetadata,
|
|
6
|
-
from cognite.client.data_classes.
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
from cognite.client.data_classes import Annotation, Asset, Event, FileMetadata, TimeSeries
|
|
6
|
+
from cognite.client.data_classes.data_modeling import (
|
|
7
|
+
DirectRelation,
|
|
8
|
+
DirectRelationReference,
|
|
9
|
+
EdgeId,
|
|
10
|
+
MappedProperty,
|
|
11
|
+
NodeApply,
|
|
12
|
+
NodeId,
|
|
13
|
+
)
|
|
14
|
+
from cognite.client.data_classes.data_modeling.instances import EdgeApply, NodeOrEdgeData, PropertyValueWrite
|
|
9
15
|
from cognite.client.data_classes.data_modeling.views import ViewProperty
|
|
10
16
|
|
|
11
|
-
from cognite_toolkit._cdf_tk.client.data_classes.extended_filemetadata import ExtendedFileMetadata
|
|
12
|
-
from cognite_toolkit._cdf_tk.client.data_classes.extended_timeseries import ExtendedTimeSeries
|
|
13
17
|
from cognite_toolkit._cdf_tk.client.data_classes.migration import AssetCentricId, ResourceViewMapping
|
|
14
18
|
from cognite_toolkit._cdf_tk.utils.collection import flatten_dict_json_path
|
|
15
19
|
from cognite_toolkit._cdf_tk.utils.dtype_conversion import (
|
|
16
20
|
asset_centric_convert_to_primary_property,
|
|
21
|
+
convert_to_primary_property,
|
|
22
|
+
)
|
|
23
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import (
|
|
24
|
+
AssetCentricResourceExtended,
|
|
25
|
+
AssetCentricType,
|
|
17
26
|
)
|
|
18
|
-
from cognite_toolkit._cdf_tk.utils.useful_types import AssetCentricType
|
|
19
27
|
|
|
20
28
|
from .data_model import INSTANCE_SOURCE_VIEW_ID
|
|
21
29
|
from .issues import ConversionIssue, FailedConversion, InvalidPropertyDataType
|
|
@@ -49,37 +57,71 @@ class DirectRelationCache:
|
|
|
49
57
|
("event", "assetIds"),
|
|
50
58
|
("sequence", "assetId"),
|
|
51
59
|
("asset", "parentId"),
|
|
60
|
+
("fileAnnotation", "data.assetRef.id"),
|
|
52
61
|
}
|
|
53
62
|
SOURCE_REFERENCE_PROPERTIES: ClassVar[Set[tuple[AssetCentricType, str]]] = {
|
|
54
63
|
("asset", "source"),
|
|
55
64
|
("event", "source"),
|
|
56
65
|
("file", "source"),
|
|
57
66
|
}
|
|
67
|
+
FILE_REFERENCE_PROPERTIES: ClassVar[Set[tuple[AssetCentricType, str]]] = {
|
|
68
|
+
("fileAnnotation", "data.fileRef.id"),
|
|
69
|
+
("fileAnnotation", "annotatedResourceId"),
|
|
70
|
+
}
|
|
58
71
|
|
|
59
72
|
asset: Mapping[int, DirectRelationReference]
|
|
60
73
|
source: Mapping[str, DirectRelationReference]
|
|
74
|
+
file: Mapping[int, DirectRelationReference]
|
|
61
75
|
|
|
62
76
|
def get(self, resource_type: AssetCentricType, property_id: str) -> Mapping[str | int, DirectRelationReference]:
|
|
63
|
-
|
|
77
|
+
key = resource_type, property_id
|
|
78
|
+
if key in self.ASSET_REFERENCE_PROPERTIES:
|
|
64
79
|
return self.asset # type: ignore[return-value]
|
|
65
|
-
if
|
|
80
|
+
if key in self.SOURCE_REFERENCE_PROPERTIES:
|
|
66
81
|
return self.source # type: ignore[return-value]
|
|
82
|
+
if key in self.FILE_REFERENCE_PROPERTIES:
|
|
83
|
+
return self.file # type: ignore[return-value]
|
|
67
84
|
return {}
|
|
68
85
|
|
|
69
86
|
|
|
87
|
+
@overload
|
|
70
88
|
def asset_centric_to_dm(
|
|
71
|
-
resource:
|
|
89
|
+
resource: AssetCentricResourceExtended,
|
|
72
90
|
instance_id: NodeId,
|
|
73
91
|
view_source: ResourceViewMapping,
|
|
74
92
|
view_properties: dict[str, ViewProperty],
|
|
75
93
|
asset_instance_id_by_id: Mapping[int, DirectRelationReference],
|
|
76
94
|
source_instance_id_by_external_id: Mapping[str, DirectRelationReference],
|
|
77
|
-
|
|
95
|
+
file_instance_id_by_id: Mapping[int, DirectRelationReference],
|
|
96
|
+
) -> tuple[NodeApply | None, ConversionIssue]: ...
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@overload
|
|
100
|
+
def asset_centric_to_dm(
|
|
101
|
+
resource: AssetCentricResourceExtended,
|
|
102
|
+
instance_id: EdgeId,
|
|
103
|
+
view_source: ResourceViewMapping,
|
|
104
|
+
view_properties: dict[str, ViewProperty],
|
|
105
|
+
asset_instance_id_by_id: Mapping[int, DirectRelationReference],
|
|
106
|
+
source_instance_id_by_external_id: Mapping[str, DirectRelationReference],
|
|
107
|
+
file_instance_id_by_id: Mapping[int, DirectRelationReference],
|
|
108
|
+
) -> tuple[EdgeApply | None, ConversionIssue]: ...
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def asset_centric_to_dm(
|
|
112
|
+
resource: AssetCentricResourceExtended,
|
|
113
|
+
instance_id: NodeId | EdgeId,
|
|
114
|
+
view_source: ResourceViewMapping,
|
|
115
|
+
view_properties: dict[str, ViewProperty],
|
|
116
|
+
asset_instance_id_by_id: Mapping[int, DirectRelationReference],
|
|
117
|
+
source_instance_id_by_external_id: Mapping[str, DirectRelationReference],
|
|
118
|
+
file_instance_id_by_id: Mapping[int, DirectRelationReference],
|
|
119
|
+
) -> tuple[NodeApply | EdgeApply | None, ConversionIssue]:
|
|
78
120
|
"""Convert an asset-centric resource to a data model instance.
|
|
79
121
|
|
|
80
122
|
Args:
|
|
81
123
|
resource (CogniteResource): The asset-centric resource to convert.
|
|
82
|
-
instance_id (NodeId): The ID of the instance to create or update.
|
|
124
|
+
instance_id (NodeId | EdgeApply): The ID of the instance to create or update.
|
|
83
125
|
view_source (ResourceViewMapping): The view source defining how to map the resource to the data model.
|
|
84
126
|
view_properties (dict[str, ViewProperty]): The defined properties referenced in the view source mapping.
|
|
85
127
|
asset_instance_id_by_id (dict[int, DirectRelationReference]): A mapping from asset IDs to their corresponding
|
|
@@ -88,12 +130,17 @@ def asset_centric_to_dm(
|
|
|
88
130
|
source_instance_id_by_external_id (dict[str, DirectRelationReference]): A mapping from source strings to their
|
|
89
131
|
corresponding DirectRelationReference in the data model. This is used to create direct relations for resources
|
|
90
132
|
that reference sources.
|
|
133
|
+
file_instance_id_by_id (dict[int, DirectRelationReference]): A mapping from file IDs to their corresponding
|
|
134
|
+
DirectRelationReference in the data model. This is used to create direct relations for resources that
|
|
135
|
+
reference files.
|
|
91
136
|
|
|
92
137
|
Returns:
|
|
93
|
-
tuple[NodeApply, ConversionIssue]: A tuple containing the converted NodeApply and any ConversionIssue encountered.
|
|
138
|
+
tuple[NodeApply | EdgeApply, ConversionIssue]: A tuple containing the converted NodeApply and any ConversionIssue encountered.
|
|
94
139
|
"""
|
|
95
|
-
cache = DirectRelationCache(
|
|
96
|
-
|
|
140
|
+
cache = DirectRelationCache(
|
|
141
|
+
asset=asset_instance_id_by_id, source=source_instance_id_by_external_id, file=file_instance_id_by_id
|
|
142
|
+
)
|
|
143
|
+
resource_type = _lookup_resource_type(resource)
|
|
97
144
|
dumped = resource.dump()
|
|
98
145
|
try:
|
|
99
146
|
id_ = dumped.pop("id")
|
|
@@ -117,37 +164,54 @@ def asset_centric_to_dm(
|
|
|
117
164
|
sources: list[NodeOrEdgeData] = []
|
|
118
165
|
if properties:
|
|
119
166
|
sources.append(NodeOrEdgeData(source=view_source.view_id, properties=properties))
|
|
120
|
-
instance_source_properties = {
|
|
121
|
-
"resourceType": resource_type,
|
|
122
|
-
"id": id_,
|
|
123
|
-
"dataSetId": data_set_id,
|
|
124
|
-
"classicExternalId": external_id,
|
|
125
|
-
}
|
|
126
|
-
sources.append(NodeOrEdgeData(source=INSTANCE_SOURCE_VIEW_ID, properties=instance_source_properties))
|
|
127
167
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
168
|
+
if resource_type != "fileAnnotation":
|
|
169
|
+
instance_source_properties = {
|
|
170
|
+
"resourceType": resource_type,
|
|
171
|
+
"id": id_,
|
|
172
|
+
"dataSetId": data_set_id,
|
|
173
|
+
"classicExternalId": external_id,
|
|
174
|
+
}
|
|
175
|
+
sources.append(NodeOrEdgeData(source=INSTANCE_SOURCE_VIEW_ID, properties=instance_source_properties))
|
|
133
176
|
|
|
134
|
-
|
|
177
|
+
instance: NodeApply | EdgeApply
|
|
178
|
+
if isinstance(instance_id, EdgeId):
|
|
179
|
+
edge_properties = create_edge_properties(
|
|
180
|
+
dumped, view_source.property_mapping, resource_type, issue, cache, instance_id.space
|
|
181
|
+
)
|
|
182
|
+
if any(key not in edge_properties for key in ("start_node", "end_node", "type")):
|
|
183
|
+
# Failed conversion of edge properties
|
|
184
|
+
return None, issue
|
|
185
|
+
instance = EdgeApply(
|
|
186
|
+
space=instance_id.space,
|
|
187
|
+
external_id=instance_id.external_id,
|
|
188
|
+
sources=sources,
|
|
189
|
+
**edge_properties, # type: ignore[arg-type]
|
|
190
|
+
)
|
|
191
|
+
elif isinstance(instance_id, NodeId):
|
|
192
|
+
instance = NodeApply(space=instance_id.space, external_id=instance_id.external_id, sources=sources)
|
|
193
|
+
else:
|
|
194
|
+
raise RuntimeError(f"Unexpected instance_id type {type(instance_id)}")
|
|
135
195
|
|
|
196
|
+
return instance, issue
|
|
136
197
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
198
|
+
|
|
199
|
+
def _lookup_resource_type(resource_type: AssetCentricResourceExtended) -> AssetCentricType:
|
|
200
|
+
if isinstance(resource_type, Asset):
|
|
201
|
+
return "asset"
|
|
202
|
+
elif isinstance(resource_type, FileMetadata):
|
|
203
|
+
return "file"
|
|
204
|
+
elif isinstance(resource_type, Event):
|
|
205
|
+
return "event"
|
|
206
|
+
elif isinstance(resource_type, TimeSeries):
|
|
207
|
+
return "timeseries"
|
|
208
|
+
elif isinstance(resource_type, Annotation):
|
|
209
|
+
if resource_type.annotated_resource_type == "file" and resource_type.annotation_type in (
|
|
210
|
+
"diagrams.AssetLink",
|
|
211
|
+
"diagrams.FileLink",
|
|
212
|
+
):
|
|
213
|
+
return "fileAnnotation"
|
|
214
|
+
raise ValueError(f"Unsupported resource type: {resource_type}")
|
|
151
215
|
|
|
152
216
|
|
|
153
217
|
def create_properties(
|
|
@@ -210,5 +274,58 @@ def create_properties(
|
|
|
210
274
|
(set(flatten_dump.keys()) - set(property_mapping.keys())) | ignored_asset_centric_properties
|
|
211
275
|
)
|
|
212
276
|
issue.missing_asset_centric_properties = sorted(set(property_mapping.keys()) - set(flatten_dump.keys()))
|
|
213
|
-
|
|
277
|
+
# Node and edge properties are handled separately
|
|
278
|
+
issue.missing_instance_properties = sorted(
|
|
279
|
+
{
|
|
280
|
+
prop_id
|
|
281
|
+
for prop_id in property_mapping.values()
|
|
282
|
+
if not (prop_id.startswith("edge.") or prop_id.startswith("node."))
|
|
283
|
+
}
|
|
284
|
+
- set(view_properties.keys())
|
|
285
|
+
)
|
|
214
286
|
return properties
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def create_edge_properties(
|
|
290
|
+
dumped: dict[str, Any],
|
|
291
|
+
property_mapping: dict[str, str],
|
|
292
|
+
resource_type: AssetCentricType,
|
|
293
|
+
issue: ConversionIssue,
|
|
294
|
+
direct_relation_cache: DirectRelationCache,
|
|
295
|
+
default_instance_space: str,
|
|
296
|
+
) -> dict[str, DirectRelationReference]:
|
|
297
|
+
flatten_dump = flatten_dict_json_path(dumped)
|
|
298
|
+
edge_properties: dict[str, DirectRelationReference] = {}
|
|
299
|
+
for prop_json_path, prop_id in property_mapping.items():
|
|
300
|
+
if not prop_id.startswith("edge."):
|
|
301
|
+
continue
|
|
302
|
+
if prop_json_path not in flatten_dump:
|
|
303
|
+
continue
|
|
304
|
+
edge_prop_id = prop_id.removeprefix("edge.")
|
|
305
|
+
if edge_prop_id in ("startNode", "endNode", "type"):
|
|
306
|
+
# DirectRelation lookup.
|
|
307
|
+
try:
|
|
308
|
+
value = convert_to_primary_property(
|
|
309
|
+
flatten_dump[prop_json_path],
|
|
310
|
+
DirectRelation(),
|
|
311
|
+
False,
|
|
312
|
+
direct_relation_lookup=direct_relation_cache.get(resource_type, prop_json_path),
|
|
313
|
+
)
|
|
314
|
+
except (ValueError, TypeError, NotImplementedError) as e:
|
|
315
|
+
issue.failed_conversions.append(
|
|
316
|
+
FailedConversion(property_id=prop_json_path, value=flatten_dump[prop_json_path], error=str(e))
|
|
317
|
+
)
|
|
318
|
+
continue
|
|
319
|
+
elif edge_prop_id.endswith(".externalId"):
|
|
320
|
+
# Just an external ID string.
|
|
321
|
+
edge_prop_id = edge_prop_id.removesuffix(".externalId")
|
|
322
|
+
value = DirectRelationReference(default_instance_space, str(flatten_dump[prop_json_path]))
|
|
323
|
+
else:
|
|
324
|
+
issue.invalid_instance_property_types.append(
|
|
325
|
+
InvalidPropertyDataType(property_id=prop_id, expected_type="EdgeProperty")
|
|
326
|
+
)
|
|
327
|
+
continue
|
|
328
|
+
# We know that value is DirectRelationReference here
|
|
329
|
+
edge_properties[edge_prop_id.replace("Node", "_node")] = value # type: ignore[assignment]
|
|
330
|
+
|
|
331
|
+
return edge_properties
|
|
@@ -3,7 +3,6 @@ from pathlib import Path
|
|
|
3
3
|
from typing import Any, Generic, Literal
|
|
4
4
|
|
|
5
5
|
from cognite.client.data_classes._base import (
|
|
6
|
-
T_WritableCogniteResource,
|
|
7
6
|
WriteableCogniteResource,
|
|
8
7
|
WriteableCogniteResourceList,
|
|
9
8
|
)
|
|
@@ -15,11 +14,14 @@ from cognite_toolkit._cdf_tk.client.data_classes.instances import InstanceApplyL
|
|
|
15
14
|
from cognite_toolkit._cdf_tk.client.data_classes.migration import AssetCentricId
|
|
16
15
|
from cognite_toolkit._cdf_tk.client.data_classes.pending_instances_ids import PendingInstanceId
|
|
17
16
|
from cognite_toolkit._cdf_tk.commands._migrate.default_mappings import create_default_mappings
|
|
18
|
-
from cognite_toolkit._cdf_tk.exceptions import
|
|
19
|
-
ToolkitValueError,
|
|
20
|
-
)
|
|
17
|
+
from cognite_toolkit._cdf_tk.exceptions import ToolkitValueError
|
|
21
18
|
from cognite_toolkit._cdf_tk.storageio._data_classes import ModelList
|
|
22
|
-
from cognite_toolkit._cdf_tk.utils.useful_types import
|
|
19
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import (
|
|
20
|
+
AssetCentricKind,
|
|
21
|
+
AssetCentricType,
|
|
22
|
+
JsonVal,
|
|
23
|
+
T_AssetCentricResource,
|
|
24
|
+
)
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
class MigrationMapping(BaseModel, alias_generator=to_camel_case, extra="ignore", populate_by_name=True):
|
|
@@ -186,9 +188,9 @@ class TimeSeriesMigrationMappingList(MigrationMappingList):
|
|
|
186
188
|
|
|
187
189
|
|
|
188
190
|
@dataclass
|
|
189
|
-
class AssetCentricMapping(Generic[
|
|
191
|
+
class AssetCentricMapping(Generic[T_AssetCentricResource], WriteableCogniteResource[InstanceApply]):
|
|
190
192
|
mapping: MigrationMapping
|
|
191
|
-
resource:
|
|
193
|
+
resource: T_AssetCentricResource
|
|
192
194
|
|
|
193
195
|
def as_write(self) -> InstanceApply:
|
|
194
196
|
raise NotImplementedError()
|
|
@@ -203,9 +205,7 @@ class AssetCentricMapping(Generic[T_WritableCogniteResource], WriteableCogniteRe
|
|
|
203
205
|
}
|
|
204
206
|
|
|
205
207
|
|
|
206
|
-
class AssetCentricMappingList(
|
|
207
|
-
WriteableCogniteResourceList[InstanceApply, AssetCentricMapping[T_WritableCogniteResource]]
|
|
208
|
-
):
|
|
208
|
+
class AssetCentricMappingList(WriteableCogniteResourceList[InstanceApply, AssetCentricMapping[T_AssetCentricResource]]):
|
|
209
209
|
_RESOURCE: type = AssetCentricMapping
|
|
210
210
|
|
|
211
211
|
def as_write(self) -> InstanceApplyList:
|
|
@@ -16,6 +16,7 @@ from cognite_toolkit._cdf_tk.constants import MISSING_INSTANCE_SPACE
|
|
|
16
16
|
from cognite_toolkit._cdf_tk.exceptions import ToolkitValueError
|
|
17
17
|
from cognite_toolkit._cdf_tk.storageio._base import T_Selector, T_WriteCogniteResource
|
|
18
18
|
from cognite_toolkit._cdf_tk.utils import humanize_collection
|
|
19
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import T_AssetCentricResource
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class DataMapper(Generic[T_Selector, T_CogniteResource, T_WriteCogniteResource], ABC):
|
|
@@ -30,7 +31,7 @@ class DataMapper(Generic[T_Selector, T_CogniteResource, T_WriteCogniteResource],
|
|
|
30
31
|
pass
|
|
31
32
|
|
|
32
33
|
@abstractmethod
|
|
33
|
-
def map(self, source: T_CogniteResource) -> tuple[T_WriteCogniteResource, MigrationIssue]:
|
|
34
|
+
def map(self, source: T_CogniteResource) -> tuple[T_WriteCogniteResource | None, MigrationIssue]:
|
|
34
35
|
"""Map a chunk of source data to the target format.
|
|
35
36
|
|
|
36
37
|
Args:
|
|
@@ -43,7 +44,9 @@ class DataMapper(Generic[T_Selector, T_CogniteResource, T_WriteCogniteResource],
|
|
|
43
44
|
raise NotImplementedError("Subclasses must implement this method.")
|
|
44
45
|
|
|
45
46
|
|
|
46
|
-
class AssetCentricMapper(
|
|
47
|
+
class AssetCentricMapper(
|
|
48
|
+
DataMapper[AssetCentricMigrationSelector, AssetCentricMapping[T_AssetCentricResource], InstanceApply]
|
|
49
|
+
):
|
|
47
50
|
def __init__(self, client: ToolkitClient) -> None:
|
|
48
51
|
self.client = client
|
|
49
52
|
self._ingestion_view_by_id: dict[ViewId, View] = {}
|
|
@@ -84,7 +87,7 @@ class AssetCentricMapper(DataMapper[AssetCentricMigrationSelector, AssetCentricM
|
|
|
84
87
|
asset_mappings = self.client.migration.instance_source.list(resource_type="asset", limit=-1)
|
|
85
88
|
self._asset_mapping_by_id = {mapping.id_: mapping.as_direct_relation_reference() for mapping in asset_mappings}
|
|
86
89
|
|
|
87
|
-
def map(self, source: AssetCentricMapping) -> tuple[InstanceApply, ConversionIssue]:
|
|
90
|
+
def map(self, source: AssetCentricMapping[T_AssetCentricResource]) -> tuple[InstanceApply | None, ConversionIssue]:
|
|
88
91
|
"""Map a chunk of asset-centric data to InstanceApplyList format."""
|
|
89
92
|
mapping = source.mapping
|
|
90
93
|
ingestion_view = mapping.get_ingestion_view()
|
|
@@ -102,6 +105,7 @@ class AssetCentricMapper(DataMapper[AssetCentricMigrationSelector, AssetCentricM
|
|
|
102
105
|
view_properties=view_properties,
|
|
103
106
|
asset_instance_id_by_id=self._asset_mapping_by_id,
|
|
104
107
|
source_instance_id_by_external_id=self._source_system_mapping_by_id,
|
|
108
|
+
file_instance_id_by_id={}, # Todo implement file direct relations
|
|
105
109
|
)
|
|
106
110
|
if mapping.instance_id.space == MISSING_INSTANCE_SPACE:
|
|
107
111
|
conversion_issue.missing_instance_space = f"Missing instance space for dataset ID {mapping.data_set_id!r}"
|
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
from collections.abc import Iterator, Mapping, Sequence
|
|
2
2
|
from typing import ClassVar, cast
|
|
3
3
|
|
|
4
|
-
from cognite.client.data_classes._base import (
|
|
5
|
-
T_WritableCogniteResource,
|
|
6
|
-
)
|
|
7
4
|
from cognite.client.data_classes.data_modeling import InstanceApply, NodeId
|
|
8
5
|
|
|
9
6
|
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
@@ -22,6 +19,7 @@ from cognite_toolkit._cdf_tk.utils.useful_types import (
|
|
|
22
19
|
AssetCentricKind,
|
|
23
20
|
AssetCentricType,
|
|
24
21
|
JsonVal,
|
|
22
|
+
T_AssetCentricResource,
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
from .data_classes import AssetCentricMapping, AssetCentricMappingList, MigrationMapping, MigrationMappingList
|
|
@@ -30,7 +28,7 @@ from .selectors import AssetCentricMigrationSelector, MigrateDataSetSelector, Mi
|
|
|
30
28
|
|
|
31
29
|
|
|
32
30
|
class AssetCentricMigrationIO(
|
|
33
|
-
UploadableStorageIO[AssetCentricMigrationSelector, AssetCentricMapping[
|
|
31
|
+
UploadableStorageIO[AssetCentricMigrationSelector, AssetCentricMapping[T_AssetCentricResource], InstanceApply]
|
|
34
32
|
):
|
|
35
33
|
KIND = "AssetCentricMigration"
|
|
36
34
|
SUPPORTED_DOWNLOAD_FORMATS = frozenset({".parquet", ".csv", ".ndjson"})
|
|
@@ -63,11 +61,11 @@ class AssetCentricMigrationIO(
|
|
|
63
61
|
|
|
64
62
|
def _stream_from_csv(
|
|
65
63
|
self, selector: MigrationCSVFileSelector, limit: int | None = None
|
|
66
|
-
) -> Iterator[Sequence[AssetCentricMapping[
|
|
64
|
+
) -> Iterator[Sequence[AssetCentricMapping[T_AssetCentricResource]]]:
|
|
67
65
|
items = selector.items
|
|
68
66
|
if limit is not None:
|
|
69
67
|
items = MigrationMappingList(items[:limit])
|
|
70
|
-
chunk: list[AssetCentricMapping[
|
|
68
|
+
chunk: list[AssetCentricMapping[T_AssetCentricResource]] = []
|
|
71
69
|
for current_batch in chunker_sequence(items, self.CHUNK_SIZE):
|
|
72
70
|
resources = self.hierarchy.get_resource_io(selector.kind).retrieve(current_batch.get_ids())
|
|
73
71
|
for mapping, resource in zip(current_batch, resources, strict=True):
|
|
@@ -86,12 +84,12 @@ class AssetCentricMigrationIO(
|
|
|
86
84
|
|
|
87
85
|
def _stream_given_dataset(
|
|
88
86
|
self, selector: MigrateDataSetSelector, limit: int | None = None
|
|
89
|
-
) -> Iterator[Sequence[AssetCentricMapping[
|
|
87
|
+
) -> Iterator[Sequence[AssetCentricMapping[T_AssetCentricResource]]]:
|
|
90
88
|
asset_centric_selector = selector.as_asset_centric_selector()
|
|
91
89
|
for data_chunk in self.hierarchy.stream_data(asset_centric_selector, limit):
|
|
92
|
-
mapping_list = AssetCentricMappingList[
|
|
90
|
+
mapping_list = AssetCentricMappingList[T_AssetCentricResource]([])
|
|
93
91
|
for resource in data_chunk.items:
|
|
94
|
-
# We
|
|
92
|
+
# We got the resource from a dataset selector, so we know it is there
|
|
95
93
|
data_set_id = cast(int, resource.data_set_id)
|
|
96
94
|
space_source = self.client.migration.space_source.retrieve(data_set_id=data_set_id)
|
|
97
95
|
instance_space = space_source.instance_space if space_source else None
|
|
@@ -129,7 +127,7 @@ class AssetCentricMigrationIO(
|
|
|
129
127
|
|
|
130
128
|
def data_to_json_chunk(
|
|
131
129
|
self,
|
|
132
|
-
data_chunk: Sequence[AssetCentricMapping[
|
|
130
|
+
data_chunk: Sequence[AssetCentricMapping[T_AssetCentricResource]],
|
|
133
131
|
selector: AssetCentricMigrationSelector | None = None,
|
|
134
132
|
) -> list[dict[str, JsonVal]]:
|
|
135
133
|
return [item.dump() for item in data_chunk]
|
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
|
|
4
4
|
from cognite_toolkit._cdf_tk.utils.fileio import COMPRESSION_BY_SUFFIX
|
|
5
5
|
|
|
6
|
+
from ._annotations import FileAnnotationIO
|
|
6
7
|
from ._applications import CanvasIO, ChartIO
|
|
7
8
|
from ._asset_centric import AssetIO, BaseAssetCentricIO, EventIO, FileMetadataIO, HierarchyIO, TimeSeriesIO
|
|
8
9
|
from ._base import (
|
|
@@ -50,6 +51,7 @@ __all__ = [
|
|
|
50
51
|
"ChartIO",
|
|
51
52
|
"ConfigurableStorageIO",
|
|
52
53
|
"EventIO",
|
|
54
|
+
"FileAnnotationIO",
|
|
53
55
|
"FileMetadataIO",
|
|
54
56
|
"HierarchyIO",
|
|
55
57
|
"InstanceIO",
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from collections.abc import Iterable, Sequence
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from cognite.client.data_classes import Annotation, AnnotationFilter
|
|
5
|
+
|
|
6
|
+
from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
|
|
7
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
|
|
8
|
+
|
|
9
|
+
from ._asset_centric import FileMetadataIO
|
|
10
|
+
from ._base import Page, StorageIO
|
|
11
|
+
from .selectors import AssetCentricSelector
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FileAnnotationIO(StorageIO[AssetCentricSelector, Annotation]):
|
|
15
|
+
SUPPORTED_DOWNLOAD_FORMATS = frozenset({".ndjson"})
|
|
16
|
+
SUPPORTED_COMPRESSIONS = frozenset({".gz"})
|
|
17
|
+
CHUNK_SIZE = 1000
|
|
18
|
+
BASE_SELECTOR = AssetCentricSelector
|
|
19
|
+
|
|
20
|
+
MISSING_ID = "<MISSING_RESOURCE_ID>"
|
|
21
|
+
|
|
22
|
+
def as_id(self, item: Annotation) -> str:
|
|
23
|
+
project = item._cognite_client.config.project
|
|
24
|
+
return f"INTERNAL_ID_project_{project}_{item.id!s}"
|
|
25
|
+
|
|
26
|
+
def stream_data(self, selector: AssetCentricSelector, limit: int | None = None) -> Iterable[Page]:
|
|
27
|
+
total = 0
|
|
28
|
+
for file_chunk in FileMetadataIO(self.client).stream_data(selector, None):
|
|
29
|
+
# Todo Support pagination. This is missing in the SDK.
|
|
30
|
+
results = self.client.annotations.list(
|
|
31
|
+
filter=AnnotationFilter(
|
|
32
|
+
annotated_resource_type="file",
|
|
33
|
+
annotated_resource_ids=[{"id": file_metadata.id} for file_metadata in file_chunk.items],
|
|
34
|
+
)
|
|
35
|
+
)
|
|
36
|
+
if limit is not None and total + len(results) > limit:
|
|
37
|
+
results = results[: limit - total]
|
|
38
|
+
|
|
39
|
+
for chunk in chunker_sequence(results, self.CHUNK_SIZE):
|
|
40
|
+
yield Page(worker_id="main", items=chunk)
|
|
41
|
+
total += len(chunk)
|
|
42
|
+
if limit is not None and total >= limit:
|
|
43
|
+
break
|
|
44
|
+
|
|
45
|
+
def count(self, selector: AssetCentricSelector) -> int | None:
|
|
46
|
+
"""There is no efficient way to count annotations in CDF."""
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
def data_to_json_chunk(
|
|
50
|
+
self, data_chunk: Sequence[Annotation], selector: AssetCentricSelector | None = None
|
|
51
|
+
) -> list[dict[str, JsonVal]]:
|
|
52
|
+
files_ids: set[int] = set()
|
|
53
|
+
for item in data_chunk:
|
|
54
|
+
if item.annotated_resource_type == "file" and item.annotated_resource_id is not None:
|
|
55
|
+
files_ids.add(item.annotated_resource_id)
|
|
56
|
+
if file_id := self._get_file_id(item.data):
|
|
57
|
+
files_ids.add(file_id)
|
|
58
|
+
self.client.lookup.files.external_id(list(files_ids)) # Preload file external IDs
|
|
59
|
+
asset_ids = {asset_id for item in data_chunk if (asset_id := self._get_asset_id(item.data))}
|
|
60
|
+
self.client.lookup.assets.external_id(list(asset_ids)) # Preload asset external IDs
|
|
61
|
+
return [self.dump_annotation_to_json(item) for item in data_chunk]
|
|
62
|
+
|
|
63
|
+
def dump_annotation_to_json(self, annotation: Annotation) -> dict[str, JsonVal]:
|
|
64
|
+
"""Dump annotations to a list of JSON serializable dictionaries.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
annotation: The annotations to dump.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
A list of JSON serializable dictionaries representing the annotations.
|
|
71
|
+
"""
|
|
72
|
+
dumped = annotation.as_write().dump()
|
|
73
|
+
if isinstance(annotated_resource_id := dumped.pop("annotatedResourceId", None), int):
|
|
74
|
+
external_id = self.client.lookup.files.external_id(annotated_resource_id)
|
|
75
|
+
dumped["annotatedResourceExternalId"] = self.MISSING_ID if external_id is None else external_id
|
|
76
|
+
|
|
77
|
+
if isinstance(data := dumped.get("data"), dict):
|
|
78
|
+
if isinstance(file_ref := data.get("fileRef"), dict) and isinstance(file_ref.get("id"), int):
|
|
79
|
+
external_id = self.client.lookup.files.external_id(file_ref.pop("id"))
|
|
80
|
+
file_ref["externalId"] = self.MISSING_ID if external_id is None else external_id
|
|
81
|
+
if isinstance(asset_ref := data.get("assetRef"), dict) and isinstance(asset_ref.get("id"), int):
|
|
82
|
+
external_id = self.client.lookup.assets.external_id(asset_ref.pop("id"))
|
|
83
|
+
asset_ref["externalId"] = self.MISSING_ID if external_id is None else external_id
|
|
84
|
+
return dumped
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def _get_file_id(cls, data: dict[str, Any]) -> int | None:
|
|
88
|
+
file_ref = data.get("fileRef")
|
|
89
|
+
if isinstance(file_ref, dict):
|
|
90
|
+
id_ = file_ref.get("id")
|
|
91
|
+
if isinstance(id_, int):
|
|
92
|
+
return id_
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def _get_asset_id(cls, data: dict[str, Any]) -> int | None:
|
|
97
|
+
asset_ref = data.get("assetRef")
|
|
98
|
+
if isinstance(asset_ref, dict):
|
|
99
|
+
id_ = asset_ref.get("id")
|
|
100
|
+
if isinstance(id_, int):
|
|
101
|
+
return id_
|
|
102
|
+
return None
|
|
@@ -58,7 +58,7 @@ class Tracker:
|
|
|
58
58
|
warning_details[f"warningMostCommon{no}Count"] = count
|
|
59
59
|
warning_details[f"warningMostCommon{no}Name"] = warning
|
|
60
60
|
|
|
61
|
-
|
|
61
|
+
subcommands, optional_args = self._parse_sys_args()
|
|
62
62
|
event_information = {
|
|
63
63
|
"userInput": self.user_command,
|
|
64
64
|
"toolkitVersion": __version__,
|
|
@@ -69,7 +69,7 @@ class Tracker:
|
|
|
69
69
|
**warning_details,
|
|
70
70
|
"result": type(result).__name__ if isinstance(result, Exception) else result,
|
|
71
71
|
"error": str(result) if isinstance(result, Exception) else "",
|
|
72
|
-
|
|
72
|
+
"subcommands": subcommands,
|
|
73
73
|
**optional_args,
|
|
74
74
|
"alphaFlags": [name for name, value in self._cdf_toml.alpha_flags.items() if value],
|
|
75
75
|
"plugins": [name for name, value in self._cdf_toml.plugins.items() if value],
|
|
@@ -128,9 +128,9 @@ class Tracker:
|
|
|
128
128
|
return distinct_id
|
|
129
129
|
|
|
130
130
|
@staticmethod
|
|
131
|
-
def _parse_sys_args() -> tuple[
|
|
131
|
+
def _parse_sys_args() -> tuple[list[str], dict[str, str | bool]]:
|
|
132
132
|
optional_args: dict[str, str | bool] = {}
|
|
133
|
-
|
|
133
|
+
subcommands: list[str] = []
|
|
134
134
|
last_key: str | None = None
|
|
135
135
|
if sys.argv and len(sys.argv) > 1:
|
|
136
136
|
for arg in sys.argv[1:]:
|
|
@@ -147,11 +147,11 @@ class Tracker:
|
|
|
147
147
|
optional_args[last_key] = arg
|
|
148
148
|
last_key = None
|
|
149
149
|
else:
|
|
150
|
-
|
|
150
|
+
subcommands.append(arg)
|
|
151
151
|
|
|
152
152
|
if last_key:
|
|
153
153
|
optional_args[last_key] = True
|
|
154
|
-
return
|
|
154
|
+
return subcommands, optional_args
|
|
155
155
|
|
|
156
156
|
@property
|
|
157
157
|
def _cicd(self) -> str:
|
|
@@ -7,6 +7,7 @@ from dataclasses import dataclass
|
|
|
7
7
|
from functools import partial
|
|
8
8
|
from io import TextIOWrapper
|
|
9
9
|
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
10
11
|
|
|
11
12
|
import yaml
|
|
12
13
|
|
|
@@ -87,26 +88,20 @@ class FailedParsing:
|
|
|
87
88
|
error: str
|
|
88
89
|
|
|
89
90
|
|
|
90
|
-
class TableReader(FileReader, ABC):
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
class CSVReader(TableReader):
|
|
94
|
-
"""Reads CSV files and yields each row as a dictionary.
|
|
91
|
+
class TableReader(FileReader, ABC):
|
|
92
|
+
"""Reads table-like files and yields each row as a dictionary.
|
|
95
93
|
|
|
96
94
|
Args:
|
|
97
|
-
input_file (Path): The path to the
|
|
95
|
+
input_file (Path): The path to the table file to read.
|
|
98
96
|
sniff_rows (int | None): Optional number of rows to sniff for
|
|
99
97
|
schema detection. If None, no schema is detected. If a schema is sniffed
|
|
100
|
-
from the first `sniff_rows` rows, it will be used to parse the
|
|
98
|
+
from the first `sniff_rows` rows, it will be used to parse the table.
|
|
101
99
|
schema (Sequence[SchemaColumn] | None): Optional schema to use for parsing.
|
|
102
100
|
You can either provide a schema or use `sniff_rows` to detect it.
|
|
103
101
|
keep_failed_cells (bool): If True, failed cells will be kept in the
|
|
104
102
|
`failed_cell` attribute. If False, they will be ignored.
|
|
105
|
-
|
|
106
103
|
"""
|
|
107
104
|
|
|
108
|
-
format = ".csv"
|
|
109
|
-
|
|
110
105
|
def __init__(
|
|
111
106
|
self,
|
|
112
107
|
input_file: Path,
|
|
@@ -152,18 +147,19 @@ class CSVReader(TableReader):
|
|
|
152
147
|
@classmethod
|
|
153
148
|
def sniff_schema(cls, input_file: Path, sniff_rows: int = 100) -> list[SchemaColumn]:
|
|
154
149
|
"""
|
|
155
|
-
Sniff the schema from the first `sniff_rows` rows of the
|
|
150
|
+
Sniff the schema from the first `sniff_rows` rows of the file.
|
|
156
151
|
|
|
157
152
|
Args:
|
|
158
|
-
input_file (Path): The path to the
|
|
153
|
+
input_file (Path): The path to the tabular file.
|
|
159
154
|
sniff_rows (int): The number of rows to read for sniffing the schema.
|
|
160
155
|
|
|
161
156
|
Returns:
|
|
162
157
|
list[SchemaColumn]: The inferred schema as a list of SchemaColumn objects.
|
|
158
|
+
|
|
163
159
|
Raises:
|
|
164
160
|
ValueError: If `sniff_rows` is not a positive integer.
|
|
165
161
|
ToolkitFileNotFoundError: If the file does not exist.
|
|
166
|
-
ToolkitValueError: If the file is not
|
|
162
|
+
ToolkitValueError: If the file is not the correct format or if there are issues with the content.
|
|
167
163
|
|
|
168
164
|
"""
|
|
169
165
|
if sniff_rows <= 0:
|
|
@@ -171,43 +167,50 @@ class CSVReader(TableReader):
|
|
|
171
167
|
|
|
172
168
|
if not input_file.exists():
|
|
173
169
|
raise ToolkitFileNotFoundError(f"File not found: {input_file.as_posix()!r}.")
|
|
174
|
-
if input_file.suffix !=
|
|
175
|
-
raise ToolkitValueError(f"Expected a .
|
|
170
|
+
if input_file.suffix != cls.format:
|
|
171
|
+
raise ToolkitValueError(f"Expected a {cls.format} file got a {input_file.suffix!r} file instead.")
|
|
176
172
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
if duplicated := [name for name, count in column_names.items() if count > 1]:
|
|
181
|
-
raise ToolkitValueError(f"CSV file contains duplicate headers: {humanize_collection(duplicated)}")
|
|
182
|
-
sample_rows: list[dict[str, str]] = []
|
|
183
|
-
for no, row in enumerate(reader):
|
|
184
|
-
if no >= sniff_rows:
|
|
185
|
-
break
|
|
186
|
-
sample_rows.append(row)
|
|
173
|
+
column_names, sample_rows = cls._read_sample_rows(input_file, sniff_rows)
|
|
174
|
+
cls._check_column_names(column_names)
|
|
175
|
+
return cls._infer_schema(sample_rows, column_names)
|
|
187
176
|
|
|
188
|
-
|
|
189
|
-
|
|
177
|
+
@classmethod
|
|
178
|
+
@abstractmethod
|
|
179
|
+
def _read_sample_rows(cls, input_file: Path, sniff_rows: int) -> tuple[Sequence[str], list[dict[str, str]]]: ...
|
|
190
180
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
181
|
+
@classmethod
|
|
182
|
+
def _infer_schema(cls, sample_rows: list[dict[str, Any]], column_names: Sequence[str]) -> list[SchemaColumn]:
|
|
183
|
+
schema: list[SchemaColumn] = []
|
|
184
|
+
for column_name in column_names:
|
|
185
|
+
sample_values = [row[column_name] for row in sample_rows if column_name in row]
|
|
186
|
+
if not sample_values:
|
|
187
|
+
column = SchemaColumn(name=column_name, type="string")
|
|
188
|
+
else:
|
|
189
|
+
data_types = Counter(
|
|
190
|
+
infer_data_type_from_value(value, dtype="Json")[0] for value in sample_values if value is not None
|
|
191
|
+
)
|
|
192
|
+
if not data_types:
|
|
193
|
+
inferred_type = "string"
|
|
196
194
|
else:
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
)
|
|
202
|
-
if not data_types:
|
|
203
|
-
inferred_type = "string"
|
|
204
|
-
else:
|
|
205
|
-
inferred_type = data_types.most_common()[0][0]
|
|
206
|
-
# Json dtype is a subset of Datatype that SchemaColumn accepts
|
|
207
|
-
column = SchemaColumn(name=column_name, type=inferred_type) # type: ignore[arg-type]
|
|
208
|
-
schema.append(column)
|
|
195
|
+
inferred_type = data_types.most_common()[0][0]
|
|
196
|
+
# Json dtype is a subset of Datatype that SchemaColumn accepts
|
|
197
|
+
column = SchemaColumn(name=column_name, type=inferred_type) # type: ignore[arg-type]
|
|
198
|
+
schema.append(column)
|
|
209
199
|
return schema
|
|
210
200
|
|
|
201
|
+
@classmethod
|
|
202
|
+
def _check_column_names(cls, column_names: Sequence[str]) -> None:
|
|
203
|
+
"""Check for duplicate column names."""
|
|
204
|
+
duplicates = [name for name, count in Counter(column_names).items() if count > 1]
|
|
205
|
+
if duplicates:
|
|
206
|
+
raise ToolkitValueError(f"Duplicate column names found: {humanize_collection(duplicates)}.")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class CSVReader(TableReader):
|
|
210
|
+
"""Reads CSV files and yields each row as a dictionary."""
|
|
211
|
+
|
|
212
|
+
format = ".csv"
|
|
213
|
+
|
|
211
214
|
def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
|
|
212
215
|
if self.keep_failed_cells and self.failed_cell:
|
|
213
216
|
self.failed_cell.clear()
|
|
@@ -231,10 +234,31 @@ class CSVReader(TableReader):
|
|
|
231
234
|
with compression.open("r") as file:
|
|
232
235
|
yield from csv.DictReader(file)
|
|
233
236
|
|
|
237
|
+
@classmethod
|
|
238
|
+
def _read_sample_rows(cls, input_file: Path, sniff_rows: int) -> tuple[Sequence[str], list[dict[str, str]]]:
|
|
239
|
+
column_names: Sequence[str] = []
|
|
240
|
+
compression = Compression.from_filepath(input_file)
|
|
241
|
+
with compression.open("r") as file:
|
|
242
|
+
reader = csv.DictReader(file)
|
|
243
|
+
column_names = reader.fieldnames or []
|
|
244
|
+
sample_rows: list[dict[str, str]] = []
|
|
245
|
+
for no, row in enumerate(reader):
|
|
246
|
+
if no >= sniff_rows:
|
|
247
|
+
break
|
|
248
|
+
sample_rows.append(row)
|
|
249
|
+
|
|
250
|
+
if not sample_rows:
|
|
251
|
+
raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
|
|
252
|
+
return column_names, sample_rows
|
|
253
|
+
|
|
234
254
|
|
|
235
255
|
class ParquetReader(TableReader):
|
|
236
256
|
format = ".parquet"
|
|
237
257
|
|
|
258
|
+
def __init__(self, input_file: Path) -> None:
|
|
259
|
+
# Parquet files have their own schema, so we don't need to sniff or provide one.
|
|
260
|
+
super().__init__(input_file, sniff_rows=None, schema=None, keep_failed_cells=False)
|
|
261
|
+
|
|
238
262
|
def read_chunks(self) -> Iterator[dict[str, JsonVal]]:
|
|
239
263
|
import pyarrow.parquet as pq
|
|
240
264
|
|
|
@@ -258,6 +282,28 @@ class ParquetReader(TableReader):
|
|
|
258
282
|
return value
|
|
259
283
|
return value
|
|
260
284
|
|
|
285
|
+
@classmethod
|
|
286
|
+
def _read_sample_rows(cls, input_file: Path, sniff_rows: int) -> tuple[Sequence[str], list[dict[str, str]]]:
|
|
287
|
+
import pyarrow.parquet as pq
|
|
288
|
+
|
|
289
|
+
column_names: Sequence[str] = []
|
|
290
|
+
sample_rows: list[dict[str, str]] = []
|
|
291
|
+
with pq.ParquetFile(input_file) as parquet_file:
|
|
292
|
+
column_names = parquet_file.schema.names
|
|
293
|
+
row_count = min(sniff_rows, parquet_file.metadata.num_rows)
|
|
294
|
+
row_iter = parquet_file.iter_batches(batch_size=row_count)
|
|
295
|
+
try:
|
|
296
|
+
batch = next(row_iter)
|
|
297
|
+
for row in batch.to_pylist():
|
|
298
|
+
str_row = {key: (str(value) if value is not None else "") for key, value in row.items()}
|
|
299
|
+
sample_rows.append(str_row)
|
|
300
|
+
except StopIteration:
|
|
301
|
+
pass
|
|
302
|
+
|
|
303
|
+
if not sample_rows:
|
|
304
|
+
raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
|
|
305
|
+
return column_names, sample_rows
|
|
306
|
+
|
|
261
307
|
|
|
262
308
|
FILE_READ_CLS_BY_FORMAT: Mapping[str, type[FileReader]] = {}
|
|
263
309
|
TABLE_READ_CLS_BY_FORMAT: Mapping[str, type[TableReader]] = {}
|
|
@@ -147,13 +147,15 @@ class HTTPClient:
|
|
|
147
147
|
timeout=self.config.timeout,
|
|
148
148
|
)
|
|
149
149
|
|
|
150
|
-
def _create_headers(
|
|
150
|
+
def _create_headers(
|
|
151
|
+
self, api_version: str | None = None, content_type: str = "application/json", accept: str = "application/json"
|
|
152
|
+
) -> MutableMapping[str, str]:
|
|
151
153
|
headers: MutableMapping[str, str] = {}
|
|
152
154
|
headers["User-Agent"] = f"httpx/{httpx.__version__} {get_user_agent()}"
|
|
153
155
|
auth_name, auth_value = self.config.credentials.authorization_header()
|
|
154
156
|
headers[auth_name] = auth_value
|
|
155
|
-
headers["content-type"] =
|
|
156
|
-
headers["accept"] =
|
|
157
|
+
headers["content-type"] = content_type
|
|
158
|
+
headers["accept"] = accept
|
|
157
159
|
headers["x-cdp-sdk"] = f"CogniteToolkit:{get_current_toolkit_version()}"
|
|
158
160
|
headers["x-cdp-app"] = self.config.client_name
|
|
159
161
|
headers["cdf-version"] = api_version or self.config.api_subversion
|
|
@@ -162,7 +164,7 @@ class HTTPClient:
|
|
|
162
164
|
return headers
|
|
163
165
|
|
|
164
166
|
def _make_request(self, item: RequestMessage) -> httpx.Response:
|
|
165
|
-
headers = self._create_headers(item.api_version)
|
|
167
|
+
headers = self._create_headers(item.api_version, item.content_type, item.accept)
|
|
166
168
|
params: dict[str, PrimitiveType] | None = None
|
|
167
169
|
if isinstance(item, ParamRequest):
|
|
168
170
|
params = item.parameters
|
|
@@ -2,15 +2,17 @@ from collections.abc import Hashable
|
|
|
2
2
|
from datetime import date, datetime
|
|
3
3
|
from typing import Any, Literal, TypeAlias, TypeVar, get_args
|
|
4
4
|
|
|
5
|
-
from cognite.client.data_classes import Asset, Event, FileMetadata,
|
|
5
|
+
from cognite.client.data_classes import Annotation, Asset, Event, FileMetadata, TimeSeries
|
|
6
6
|
from cognite.client.data_classes._base import CogniteObject, WriteableCogniteResourceList
|
|
7
7
|
|
|
8
8
|
JsonVal: TypeAlias = None | str | int | float | bool | dict[str, "JsonVal"] | list["JsonVal"]
|
|
9
9
|
|
|
10
10
|
AssetCentricDestinationType: TypeAlias = Literal["assets", "files", "events", "timeseries", "sequences"]
|
|
11
|
-
AssetCentricType: TypeAlias = Literal["asset", "file", "event", "timeseries", "sequence"]
|
|
12
|
-
AssetCentricResource: TypeAlias = Asset | FileMetadata | Event | TimeSeries
|
|
13
|
-
|
|
11
|
+
AssetCentricType: TypeAlias = Literal["asset", "file", "event", "timeseries", "sequence", "fileAnnotation"]
|
|
12
|
+
AssetCentricResource: TypeAlias = Asset | FileMetadata | Event | TimeSeries
|
|
13
|
+
AssetCentricResourceExtended: TypeAlias = Asset | FileMetadata | Event | TimeSeries | Annotation
|
|
14
|
+
AssetCentricKind: TypeAlias = Literal["Assets", "Events", "TimeSeries", "FileMetadata", "FileAnnotations"]
|
|
15
|
+
|
|
14
16
|
DataType: TypeAlias = Literal["string", "integer", "float", "boolean", "json", "date", "timestamp"]
|
|
15
17
|
PythonTypes: TypeAlias = str | int | float | bool | datetime | date | dict[str, Any] | list[Any]
|
|
16
18
|
|
|
@@ -22,3 +24,4 @@ T_Value = TypeVar("T_Value")
|
|
|
22
24
|
PrimitiveType: TypeAlias = str | int | float | bool
|
|
23
25
|
|
|
24
26
|
T_WriteCogniteResource = TypeVar("T_WriteCogniteResource", bound=CogniteObject)
|
|
27
|
+
T_AssetCentricResource = TypeVar("T_AssetCentricResource", bound=AssetCentricResource)
|
|
@@ -4,7 +4,7 @@ default_env = "<DEFAULT_ENV_PLACEHOLDER>"
|
|
|
4
4
|
[modules]
|
|
5
5
|
# This is the version of the modules. It should not be changed manually.
|
|
6
6
|
# It will be updated by the 'cdf modules upgrade' command.
|
|
7
|
-
version = "0.6.
|
|
7
|
+
version = "0.6.89"
|
|
8
8
|
|
|
9
9
|
[alpha_flags]
|
|
10
10
|
external-libraries = true
|
cognite_toolkit/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.6.
|
|
1
|
+
__version__ = "0.6.89"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognite_toolkit
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.89
|
|
4
4
|
Summary: Official Cognite Data Fusion tool for project templates and configuration deployment
|
|
5
5
|
Project-URL: Homepage, https://docs.cognite.com/cdf/deploy/cdf_toolkit/
|
|
6
6
|
Project-URL: Changelog, https://github.com/cognitedata/toolkit/releases
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
cognite_toolkit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
cognite_toolkit/_cdf.py,sha256=1OSAvbOeuIrnsczEG2BtGqRP3L3sq0VMPthmugnqCUw,5821
|
|
3
|
-
cognite_toolkit/_version.py,sha256=
|
|
3
|
+
cognite_toolkit/_version.py,sha256=Zx4pn6mGv-GP7UL77bykGBRh05hpWmRUnqQ8PXKNLcc,23
|
|
4
4
|
cognite_toolkit/_cdf_tk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
cognite_toolkit/_cdf_tk/cdf_toml.py,sha256=IjmzNVLxsOV6tsMDgmJmXsy-LQru-8IEQdFzGW5DxVk,8117
|
|
6
6
|
cognite_toolkit/_cdf_tk/constants.py,sha256=e9XmGvQCqGq7zYQrNoopU5e2KnYZYBPyUC5raGShK7k,6364
|
|
@@ -8,7 +8,7 @@ cognite_toolkit/_cdf_tk/exceptions.py,sha256=xG0jMwi5A20nvPvyo6sCyz_cyKycynPyIzp
|
|
|
8
8
|
cognite_toolkit/_cdf_tk/feature_flags.py,sha256=oKvUHcNTtt8zp31eZ1eSCxfSIelm0L5B0xAQOskr1hc,2892
|
|
9
9
|
cognite_toolkit/_cdf_tk/hints.py,sha256=UI1ymi2T5wCcYOpEbKbVaDnlyFReFy8TDtMVt-5E1h8,6493
|
|
10
10
|
cognite_toolkit/_cdf_tk/plugins.py,sha256=yL7Q4k9UGnoHP9Ucrno02_qi1L3DrE6ggBiQI-wQKiU,783
|
|
11
|
-
cognite_toolkit/_cdf_tk/tracker.py,sha256=
|
|
11
|
+
cognite_toolkit/_cdf_tk/tracker.py,sha256=ybazaYDMgrtmAaCEb1nlKAQzjcN352-U-om4NBGV328,5965
|
|
12
12
|
cognite_toolkit/_cdf_tk/validation.py,sha256=KFdPgnNIbVM0yjFF0cqmpBB8MI8e-U-YbBYrP4IiClE,8441
|
|
13
13
|
cognite_toolkit/_cdf_tk/apps/__init__.py,sha256=nNQymHhwxjXNpY9N9xDmnvSPLCMwQkn_t9oRkgDWofI,659
|
|
14
14
|
cognite_toolkit/_cdf_tk/apps/_auth_app.py,sha256=ER7uYb3ViwsHMXiQEZpyhwU6TIjKaB9aEy32VI4MPpg,3397
|
|
@@ -115,17 +115,16 @@ cognite_toolkit/_cdf_tk/commands/pull.py,sha256=2Zf6IOXxSxZ-5XkNE80FlrXBuNejAWrA
|
|
|
115
115
|
cognite_toolkit/_cdf_tk/commands/repo.py,sha256=MNy8MWphTklIZHvQOROCweq8_SYxGv6BaqnLpkFFnuk,3845
|
|
116
116
|
cognite_toolkit/_cdf_tk/commands/run.py,sha256=JyX9jLEQej9eRrHVCCNlw4GuF80qETSol3-T5CCofgw,37331
|
|
117
117
|
cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py,sha256=i5ldcTah59K0E4fH5gHTV0GRvtDCEvVses9WQzn9Lno,226
|
|
118
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/
|
|
119
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/
|
|
120
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/
|
|
121
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py,sha256=Eol-0ruQ14fwS-bx2pEmbXdICodfknSJ_OsAASa6jkY,9592
|
|
118
|
+
cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py,sha256=R-z0yfOFcJZj-zRLhN-7z_-SLxqzSmONMgrbzNF9dGs,8843
|
|
119
|
+
cognite_toolkit/_cdf_tk/commands/_migrate/command.py,sha256=059GfizsxZzsO7CJP3uEtyvfbeKt2F5eYnvq9GvHxNE,14212
|
|
120
|
+
cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py,sha256=ElsZeZNRPkxBm2VdTe0nCvyq75qfGLOLFY54fCuaxFY,14333
|
|
122
121
|
cognite_toolkit/_cdf_tk/commands/_migrate/creators.py,sha256=FTu7w3G8KyPY8pagG3KdPpOmpLcjehaAg2auEy6iM7A,9605
|
|
123
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py,sha256=
|
|
124
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py,sha256=
|
|
122
|
+
cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py,sha256=S5MWfkaYAHfV6rc6RA4KR7WRETdcAKdpe6s4ZdwB0F0,8562
|
|
123
|
+
cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py,sha256=7m9uj_W11iokGmbWD979z5UK5KwOYv7L9HiY5PcpIN4,6231
|
|
125
124
|
cognite_toolkit/_cdf_tk/commands/_migrate/data_model.py,sha256=i1eUsNX6Dueol9STIEwyksBnBsWUk13O8qHIjW964pM,7860
|
|
126
125
|
cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py,sha256=KkSq_4R6hQ15ccG-jHy7vVgPwC5IDd5OaXZLvz5mIZs,5547
|
|
127
126
|
cognite_toolkit/_cdf_tk/commands/_migrate/issues.py,sha256=lWSnuS3CfRDbA7i1g12gJ2reJnQcLmZWxHDK19-Wxkk,5772
|
|
128
|
-
cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py,sha256=
|
|
127
|
+
cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py,sha256=obtdne1XqzOhlWuw2QnFs7vub5_FTaYOf086INB5d4I,9813
|
|
129
128
|
cognite_toolkit/_cdf_tk/commands/_migrate/prepare.py,sha256=RfqaNoso5CyBwc-p6ckwcYqBfZXKhdJgdGIyd0TATaI,2635
|
|
130
129
|
cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py,sha256=CYle1Gz69HHnKF4onTIFxrpiOoDApvVK01SFuQuHzP0,2130
|
|
131
130
|
cognite_toolkit/_cdf_tk/cruds/__init__.py,sha256=j0yxDCwd4Cl9KG7SvGTDQg4Y2bHfYVEDv8CBxbFTWUM,6070
|
|
@@ -226,7 +225,8 @@ cognite_toolkit/_cdf_tk/resource_classes/robotics/data_postprocessing.py,sha256=
|
|
|
226
225
|
cognite_toolkit/_cdf_tk/resource_classes/robotics/frame.py,sha256=XmDqJ0pAxe_vAP0Dhktba1f9o2zg_ORCJ3Hz8cyJMrk,899
|
|
227
226
|
cognite_toolkit/_cdf_tk/resource_classes/robotics/location.py,sha256=dbc9HT-bc2Qt15hHoR63SM7pg321BhNuTNjI7HHCwSA,468
|
|
228
227
|
cognite_toolkit/_cdf_tk/resource_classes/robotics/map.py,sha256=j77z7CzCMiMj8r94BdUKCum9EuZRUjaSlUAy9K9DL_Q,942
|
|
229
|
-
cognite_toolkit/_cdf_tk/storageio/__init__.py,sha256
|
|
228
|
+
cognite_toolkit/_cdf_tk/storageio/__init__.py,sha256=-OhPPhl9z1ynYcFlRVYfOPD246HhQydmn4VzByJh7C0,2355
|
|
229
|
+
cognite_toolkit/_cdf_tk/storageio/_annotations.py,sha256=wAMkgM-IpgXuY7_1KbtiTv8VdA555ywKjntD_boOBPk,4647
|
|
230
230
|
cognite_toolkit/_cdf_tk/storageio/_applications.py,sha256=bhyG1d2_9duPkX-otC2brVcpChvdXSPkYhBHS5T_72g,4343
|
|
231
231
|
cognite_toolkit/_cdf_tk/storageio/_asset_centric.py,sha256=Rhy64zUW4oxacq_vYomDeTRPmF6Vx-1mkYAFAqJE9vk,28312
|
|
232
232
|
cognite_toolkit/_cdf_tk/storageio/_base.py,sha256=NWXPdgzUnpBiav5Hi8XGHkWU9QiMjNzBQTxMcuxF-LA,11017
|
|
@@ -268,16 +268,16 @@ cognite_toolkit/_cdf_tk/utils/sql_parser.py,sha256=jernu2amPQ54cQZ4vFZm1gEhFZfGc
|
|
|
268
268
|
cognite_toolkit/_cdf_tk/utils/table_writers.py,sha256=Rxp_CZDDWrNPERNq6u1xsAX1OvzownwMMnpwNu8KdH0,17861
|
|
269
269
|
cognite_toolkit/_cdf_tk/utils/text.py,sha256=EpIXjaQ5C5q5fjbUjAW7tncXpdJfiQeV7CYSbr70Bl0,3106
|
|
270
270
|
cognite_toolkit/_cdf_tk/utils/thread_safe_dict.py,sha256=NbRHcZvWpF9xHP5OkOMGFpxrPNbi0Q3Eea6PUNbGlt4,3426
|
|
271
|
-
cognite_toolkit/_cdf_tk/utils/useful_types.py,sha256=
|
|
271
|
+
cognite_toolkit/_cdf_tk/utils/useful_types.py,sha256=ITiaJYcJrQZTR-CejtnSRPdseg86TxtWv8TqIkLi09c,1565
|
|
272
272
|
cognite_toolkit/_cdf_tk/utils/validate_access.py,sha256=1puswcpgEDNCwdk91dhLqCBSu_aaUAd3Hsw21d-YVFs,21955
|
|
273
273
|
cognite_toolkit/_cdf_tk/utils/fileio/__init__.py,sha256=ts5kYu_1Ks7xjnM6pIrVUrZe0nkYI6euYXeE4ox34xk,1199
|
|
274
274
|
cognite_toolkit/_cdf_tk/utils/fileio/_base.py,sha256=MpWaD3lR9vrJ-kGzTiDOtChXhvFD7-xrP-Pzp7vjnLY,756
|
|
275
275
|
cognite_toolkit/_cdf_tk/utils/fileio/_compression.py,sha256=8BAPgg5OKc3vkEEkqOvYsuyh12iXVNuEmC0omWwyJNQ,2355
|
|
276
|
-
cognite_toolkit/_cdf_tk/utils/fileio/_readers.py,sha256=
|
|
276
|
+
cognite_toolkit/_cdf_tk/utils/fileio/_readers.py,sha256=nGfsSfpXDYUncncsFuJD9-xYPJ5635mSFUJfuCyQ3no,13724
|
|
277
277
|
cognite_toolkit/_cdf_tk/utils/fileio/_writers.py,sha256=ghNGBZjkISAlbxe8o5YWWloLXG9QKOtF_qGA9JkvYss,17712
|
|
278
278
|
cognite_toolkit/_cdf_tk/utils/http_client/__init__.py,sha256=H1T-cyIoVaPL4MvN1IuG-cHgj-cqB7eszu2kIN939lw,813
|
|
279
|
-
cognite_toolkit/_cdf_tk/utils/http_client/_client.py,sha256=
|
|
280
|
-
cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py,sha256=
|
|
279
|
+
cognite_toolkit/_cdf_tk/utils/http_client/_client.py,sha256=sN1Sizxv_rAcOSRd-2lKSs0p-SO1rA4eHL8pHMTBx54,11018
|
|
280
|
+
cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py,sha256=7mr1goZKx5oR877EVu5MWu1qplm-lfCKCAMVZZ5Tvm0,12611
|
|
281
281
|
cognite_toolkit/_cdf_tk/utils/http_client/_exception.py,sha256=fC9oW6BN0HbUe2AkYABMP7Kj0-9dNYXVFBY5RQztq2c,126
|
|
282
282
|
cognite_toolkit/_cdf_tk/utils/http_client/_tracker.py,sha256=EBBnd-JZ7nc_jYNFJokCHN2UZ9sx0McFLZvlceUYYic,1215
|
|
283
283
|
cognite_toolkit/_repo_files/.env.tmpl,sha256=UmgKZVvIp-OzD8oOcYuwb_6c7vSJsqkLhuFaiVgK7RI,972
|
|
@@ -285,13 +285,13 @@ cognite_toolkit/_repo_files/.gitignore,sha256=ip9kf9tcC5OguF4YF4JFEApnKYw0nG0vPi
|
|
|
285
285
|
cognite_toolkit/_repo_files/AzureDevOps/.devops/README.md,sha256=OLA0D7yCX2tACpzvkA0IfkgQ4_swSd-OlJ1tYcTBpsA,240
|
|
286
286
|
cognite_toolkit/_repo_files/AzureDevOps/.devops/deploy-pipeline.yml,sha256=brULcs8joAeBC_w_aoWjDDUHs3JheLMIR9ajPUK96nc,693
|
|
287
287
|
cognite_toolkit/_repo_files/AzureDevOps/.devops/dry-run-pipeline.yml,sha256=OBFDhFWK1mlT4Dc6mDUE2Es834l8sAlYG50-5RxRtHk,723
|
|
288
|
-
cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml,sha256=
|
|
289
|
-
cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml,sha256=
|
|
290
|
-
cognite_toolkit/_resources/cdf.toml,sha256=
|
|
288
|
+
cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml,sha256=4lnerKrn16H3_STBcvu5fODE5cVtxaZEloaTtvCpxXc,667
|
|
289
|
+
cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml,sha256=ppSA3aVmcoucLy8pNBfEKscrIvsPgyFwnb0Wv09dLNY,2430
|
|
290
|
+
cognite_toolkit/_resources/cdf.toml,sha256=YVocnddLa-tvLk7NRmUshNUS1v8EbDyHHYYJY4or2WU,487
|
|
291
291
|
cognite_toolkit/demo/__init__.py,sha256=-m1JoUiwRhNCL18eJ6t7fZOL7RPfowhCuqhYFtLgrss,72
|
|
292
292
|
cognite_toolkit/demo/_base.py,sha256=6xKBUQpXZXGQ3fJ5f7nj7oT0s2n7OTAGIa17ZlKHZ5U,8052
|
|
293
|
-
cognite_toolkit-0.6.
|
|
294
|
-
cognite_toolkit-0.6.
|
|
295
|
-
cognite_toolkit-0.6.
|
|
296
|
-
cognite_toolkit-0.6.
|
|
297
|
-
cognite_toolkit-0.6.
|
|
293
|
+
cognite_toolkit-0.6.89.dist-info/METADATA,sha256=BctpEwbWImcRv7ez5PS2z0R9qS4SmL8VRc4Ryv_GVoQ,4501
|
|
294
|
+
cognite_toolkit-0.6.89.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
295
|
+
cognite_toolkit-0.6.89.dist-info/entry_points.txt,sha256=JlR7MH1_UMogC3QOyN4-1l36VbrCX9xUdQoHGkuJ6-4,83
|
|
296
|
+
cognite_toolkit-0.6.89.dist-info/licenses/LICENSE,sha256=CW0DRcx5tL-pCxLEN7ts2S9g2sLRAsWgHVEX4SN9_Mc,752
|
|
297
|
+
cognite_toolkit-0.6.89.dist-info/RECORD,,
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
from abc import ABC
|
|
2
|
-
from typing import TypeVar
|
|
3
|
-
|
|
4
|
-
from cognite.client.data_classes import Asset, Event, FileMetadata, Sequence, TimeSeries
|
|
5
|
-
from cognite.client.data_classes.capabilities import (
|
|
6
|
-
Capability,
|
|
7
|
-
DataModelInstancesAcl,
|
|
8
|
-
DataModelsAcl,
|
|
9
|
-
SpaceIDScope,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
13
|
-
from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand
|
|
14
|
-
from cognite_toolkit._cdf_tk.constants import DMS_INSTANCE_LIMIT_MARGIN
|
|
15
|
-
from cognite_toolkit._cdf_tk.exceptions import (
|
|
16
|
-
AuthenticationError,
|
|
17
|
-
ToolkitMigrationError,
|
|
18
|
-
ToolkitValueError,
|
|
19
|
-
)
|
|
20
|
-
from cognite_toolkit._cdf_tk.utils import humanize_collection
|
|
21
|
-
|
|
22
|
-
from .data_model import INSTANCE_SOURCE_VIEW_ID, MODEL_ID, RESOURCE_VIEW_MAPPING_VIEW_ID
|
|
23
|
-
|
|
24
|
-
T_AssetCentricResource = TypeVar("T_AssetCentricResource", bound=Asset | Event | FileMetadata | TimeSeries | Sequence)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class BaseMigrateCommand(ToolkitCommand, ABC):
|
|
28
|
-
def source_acl(self, data_set_id: list[int]) -> Capability:
|
|
29
|
-
"""Return the source ACL for the given data set IDs."""
|
|
30
|
-
# This method should be implemented in subclasses that needs access to a specific source ACL.
|
|
31
|
-
# such as TimeSeries, Files, Assets, and so on.
|
|
32
|
-
raise ValueError(
|
|
33
|
-
"Bug in Toolkit: the source ACL is not defined for this migration command. "
|
|
34
|
-
"Please implement the source_acl method."
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
def validate_access(
|
|
38
|
-
self,
|
|
39
|
-
client: ToolkitClient,
|
|
40
|
-
instance_spaces: list[str] | None = None,
|
|
41
|
-
schema_spaces: list[str] | None = None,
|
|
42
|
-
data_set_ids: list[int] | None = None,
|
|
43
|
-
) -> None:
|
|
44
|
-
required_capabilities: list[Capability] = []
|
|
45
|
-
if instance_spaces is not None:
|
|
46
|
-
required_capabilities.append(
|
|
47
|
-
DataModelInstancesAcl(
|
|
48
|
-
actions=[
|
|
49
|
-
DataModelInstancesAcl.Action.Read,
|
|
50
|
-
DataModelInstancesAcl.Action.Write,
|
|
51
|
-
DataModelInstancesAcl.Action.Write_Properties,
|
|
52
|
-
],
|
|
53
|
-
scope=SpaceIDScope(instance_spaces),
|
|
54
|
-
)
|
|
55
|
-
)
|
|
56
|
-
if schema_spaces is not None:
|
|
57
|
-
required_capabilities.append(
|
|
58
|
-
DataModelsAcl(actions=[DataModelsAcl.Action.Read], scope=SpaceIDScope(schema_spaces)),
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
if data_set_ids is not None:
|
|
62
|
-
source_acl = self.source_acl(data_set_ids)
|
|
63
|
-
required_capabilities.append(source_acl)
|
|
64
|
-
if missing := client.iam.verify_capabilities(required_capabilities):
|
|
65
|
-
raise AuthenticationError(f"Missing required capabilities: {humanize_collection(missing)}.", missing)
|
|
66
|
-
|
|
67
|
-
@staticmethod
|
|
68
|
-
def validate_migration_model_available(client: ToolkitClient) -> None:
|
|
69
|
-
models = client.data_modeling.data_models.retrieve([MODEL_ID], inline_views=False)
|
|
70
|
-
if not models:
|
|
71
|
-
raise ToolkitMigrationError(
|
|
72
|
-
f"The migration data model {MODEL_ID!r} does not exist. "
|
|
73
|
-
"Please run the `cdf migrate prepare` command to deploy the migration data model."
|
|
74
|
-
)
|
|
75
|
-
elif len(models) > 1:
|
|
76
|
-
raise ToolkitMigrationError(
|
|
77
|
-
f"Multiple migration models {MODEL_ID!r}. "
|
|
78
|
-
"Please delete the duplicate models before proceeding with the migration."
|
|
79
|
-
)
|
|
80
|
-
model = models[0]
|
|
81
|
-
missing_views = {INSTANCE_SOURCE_VIEW_ID, RESOURCE_VIEW_MAPPING_VIEW_ID} - set(model.views or [])
|
|
82
|
-
if missing_views:
|
|
83
|
-
raise ToolkitMigrationError(
|
|
84
|
-
f"Invalid migration model. Missing views {humanize_collection(missing_views)}. "
|
|
85
|
-
f"Please run the `cdf migrate prepare` command to deploy the migration data model."
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
def validate_available_capacity(self, client: ToolkitClient, instance_count: int) -> None:
|
|
89
|
-
"""Validate that the project has enough capacity to accommodate the migration."""
|
|
90
|
-
|
|
91
|
-
stats = client.data_modeling.statistics.project()
|
|
92
|
-
|
|
93
|
-
available_capacity = stats.instances.instances_limit - stats.instances.instances
|
|
94
|
-
available_capacity_after = available_capacity - instance_count
|
|
95
|
-
|
|
96
|
-
if available_capacity_after < DMS_INSTANCE_LIMIT_MARGIN:
|
|
97
|
-
raise ToolkitValueError(
|
|
98
|
-
"Cannot proceed with migration, not enough instance capacity available. Total capacity after migration"
|
|
99
|
-
f" would be {available_capacity_after:,} instances, which is less than the required margin of"
|
|
100
|
-
f" {DMS_INSTANCE_LIMIT_MARGIN:,} instances. Please increase the instance capacity in your CDF project"
|
|
101
|
-
f" or delete some existing instances before proceeding with the migration of {instance_count:,} assets."
|
|
102
|
-
)
|
|
103
|
-
total_instances = stats.instances.instances + instance_count
|
|
104
|
-
self.console(
|
|
105
|
-
f"Project has enough capacity for migration. Total instances after migration: {total_instances:,}."
|
|
106
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|