cognite-toolkit 0.6.97__py3-none-any.whl → 0.7.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite_toolkit/_cdf.py +16 -17
- cognite_toolkit/_cdf_tk/apps/__init__.py +2 -0
- cognite_toolkit/_cdf_tk/apps/_core_app.py +13 -5
- cognite_toolkit/_cdf_tk/apps/_data_app.py +1 -1
- cognite_toolkit/_cdf_tk/apps/_dev_app.py +86 -0
- cognite_toolkit/_cdf_tk/apps/_download_app.py +692 -24
- cognite_toolkit/_cdf_tk/apps/_dump_app.py +43 -101
- cognite_toolkit/_cdf_tk/apps/_landing_app.py +18 -4
- cognite_toolkit/_cdf_tk/apps/_migrate_app.py +249 -9
- cognite_toolkit/_cdf_tk/apps/_modules_app.py +0 -3
- cognite_toolkit/_cdf_tk/apps/_purge.py +15 -43
- cognite_toolkit/_cdf_tk/apps/_run.py +11 -0
- cognite_toolkit/_cdf_tk/apps/_upload_app.py +45 -6
- cognite_toolkit/_cdf_tk/builders/__init__.py +2 -2
- cognite_toolkit/_cdf_tk/builders/_base.py +28 -42
- cognite_toolkit/_cdf_tk/cdf_toml.py +20 -1
- cognite_toolkit/_cdf_tk/client/_toolkit_client.py +23 -3
- cognite_toolkit/_cdf_tk/client/api/extended_functions.py +6 -9
- cognite_toolkit/_cdf_tk/client/api/infield.py +93 -1
- cognite_toolkit/_cdf_tk/client/api/migration.py +175 -1
- cognite_toolkit/_cdf_tk/client/api/streams.py +84 -0
- cognite_toolkit/_cdf_tk/client/api/three_d.py +50 -0
- cognite_toolkit/_cdf_tk/client/data_classes/base.py +25 -1
- cognite_toolkit/_cdf_tk/client/data_classes/canvas.py +46 -3
- cognite_toolkit/_cdf_tk/client/data_classes/charts.py +3 -3
- cognite_toolkit/_cdf_tk/client/data_classes/charts_data.py +95 -213
- cognite_toolkit/_cdf_tk/client/data_classes/infield.py +32 -18
- cognite_toolkit/_cdf_tk/client/data_classes/migration.py +10 -2
- cognite_toolkit/_cdf_tk/client/data_classes/streams.py +90 -0
- cognite_toolkit/_cdf_tk/client/data_classes/three_d.py +47 -0
- cognite_toolkit/_cdf_tk/client/testing.py +18 -2
- cognite_toolkit/_cdf_tk/commands/__init__.py +6 -6
- cognite_toolkit/_cdf_tk/commands/_changes.py +3 -42
- cognite_toolkit/_cdf_tk/commands/_download.py +21 -11
- cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -2
- cognite_toolkit/_cdf_tk/commands/_migrate/command.py +22 -20
- cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +133 -91
- cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +73 -22
- cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +311 -43
- cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +5 -5
- cognite_toolkit/_cdf_tk/commands/_migrate/issues.py +33 -0
- cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +157 -8
- cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +9 -4
- cognite_toolkit/_cdf_tk/commands/_purge.py +27 -28
- cognite_toolkit/_cdf_tk/commands/_questionary_style.py +16 -0
- cognite_toolkit/_cdf_tk/commands/_upload.py +109 -86
- cognite_toolkit/_cdf_tk/commands/about.py +221 -0
- cognite_toolkit/_cdf_tk/commands/auth.py +19 -12
- cognite_toolkit/_cdf_tk/commands/build_cmd.py +15 -61
- cognite_toolkit/_cdf_tk/commands/clean.py +63 -16
- cognite_toolkit/_cdf_tk/commands/deploy.py +20 -17
- cognite_toolkit/_cdf_tk/commands/dump_resource.py +6 -4
- cognite_toolkit/_cdf_tk/commands/init.py +225 -3
- cognite_toolkit/_cdf_tk/commands/modules.py +20 -44
- cognite_toolkit/_cdf_tk/commands/pull.py +6 -19
- cognite_toolkit/_cdf_tk/commands/resources.py +179 -0
- cognite_toolkit/_cdf_tk/constants.py +20 -1
- cognite_toolkit/_cdf_tk/cruds/__init__.py +19 -5
- cognite_toolkit/_cdf_tk/cruds/_base_cruds.py +14 -70
- cognite_toolkit/_cdf_tk/cruds/_data_cruds.py +8 -17
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/__init__.py +4 -1
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/agent.py +11 -9
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/auth.py +4 -14
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/classic.py +44 -43
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/configuration.py +4 -11
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/data_organization.py +4 -13
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/datamodel.py +205 -66
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/extraction_pipeline.py +5 -17
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/fieldops.py +116 -27
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/file.py +6 -27
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/function.py +9 -28
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/hosted_extractors.py +12 -30
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/industrial_tool.py +3 -7
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/location.py +3 -15
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/migration.py +4 -12
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/raw.py +4 -10
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/relationship.py +3 -8
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/robotics.py +15 -44
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/streams.py +94 -0
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/three_d_model.py +3 -7
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/timeseries.py +5 -15
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/transformation.py +39 -31
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/workflow.py +20 -40
- cognite_toolkit/_cdf_tk/cruds/_worker.py +24 -36
- cognite_toolkit/_cdf_tk/feature_flags.py +16 -36
- cognite_toolkit/_cdf_tk/plugins.py +2 -1
- cognite_toolkit/_cdf_tk/resource_classes/__init__.py +4 -0
- cognite_toolkit/_cdf_tk/resource_classes/capabilities.py +12 -0
- cognite_toolkit/_cdf_tk/resource_classes/functions.py +3 -1
- cognite_toolkit/_cdf_tk/resource_classes/infield_cdm_location_config.py +109 -0
- cognite_toolkit/_cdf_tk/resource_classes/migration.py +8 -17
- cognite_toolkit/_cdf_tk/resource_classes/streams.py +29 -0
- cognite_toolkit/_cdf_tk/storageio/__init__.py +9 -21
- cognite_toolkit/_cdf_tk/storageio/_annotations.py +19 -16
- cognite_toolkit/_cdf_tk/storageio/_applications.py +338 -26
- cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +67 -104
- cognite_toolkit/_cdf_tk/storageio/_base.py +61 -29
- cognite_toolkit/_cdf_tk/storageio/_datapoints.py +276 -20
- cognite_toolkit/_cdf_tk/storageio/_file_content.py +436 -0
- cognite_toolkit/_cdf_tk/storageio/_instances.py +34 -2
- cognite_toolkit/_cdf_tk/storageio/_raw.py +26 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +62 -4
- cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
- cognite_toolkit/_cdf_tk/storageio/selectors/_canvas.py +14 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/_charts.py +14 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/_datapoints.py +23 -3
- cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +164 -0
- cognite_toolkit/_cdf_tk/tk_warnings/other.py +4 -0
- cognite_toolkit/_cdf_tk/tracker.py +2 -2
- cognite_toolkit/_cdf_tk/utils/dtype_conversion.py +9 -3
- cognite_toolkit/_cdf_tk/utils/fileio/__init__.py +2 -0
- cognite_toolkit/_cdf_tk/utils/fileio/_base.py +5 -1
- cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +112 -20
- cognite_toolkit/_cdf_tk/utils/fileio/_writers.py +15 -15
- cognite_toolkit/_cdf_tk/utils/http_client/_client.py +284 -18
- cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +50 -4
- cognite_toolkit/_cdf_tk/utils/http_client/_data_classes2.py +187 -0
- cognite_toolkit/_cdf_tk/utils/interactive_select.py +9 -14
- cognite_toolkit/_cdf_tk/utils/sql_parser.py +2 -3
- cognite_toolkit/_cdf_tk/utils/useful_types.py +6 -2
- cognite_toolkit/_cdf_tk/validation.py +79 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
- cognite_toolkit/_resources/cdf.toml +5 -4
- cognite_toolkit/_version.py +1 -1
- cognite_toolkit/config.dev.yaml +13 -0
- {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.30.dist-info}/METADATA +24 -24
- {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.30.dist-info}/RECORD +153 -143
- cognite_toolkit-0.7.30.dist-info/WHEEL +4 -0
- {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.30.dist-info}/entry_points.txt +1 -0
- cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py +0 -201
- cognite_toolkit/_cdf_tk/commands/dump_data.py +0 -489
- cognite_toolkit/_cdf_tk/commands/featureflag.py +0 -27
- cognite_toolkit/_cdf_tk/utils/table_writers.py +0 -434
- cognite_toolkit-0.6.97.dist-info/WHEEL +0 -4
- cognite_toolkit-0.6.97.dist-info/licenses/LICENSE +0 -18
|
@@ -1,29 +1,39 @@
|
|
|
1
|
-
from collections.abc import Iterator, Mapping, Sequence
|
|
1
|
+
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
2
2
|
from typing import ClassVar, cast
|
|
3
3
|
|
|
4
|
-
from cognite.client.data_classes
|
|
4
|
+
from cognite.client.data_classes import Annotation
|
|
5
|
+
from cognite.client.data_classes.data_modeling import EdgeId, InstanceApply, NodeId
|
|
5
6
|
|
|
6
7
|
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
7
8
|
from cognite_toolkit._cdf_tk.client.data_classes.pending_instances_ids import PendingInstanceId
|
|
8
9
|
from cognite_toolkit._cdf_tk.constants import MISSING_EXTERNAL_ID, MISSING_INSTANCE_SPACE
|
|
9
|
-
from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError
|
|
10
|
+
from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError, ToolkitValueError
|
|
10
11
|
from cognite_toolkit._cdf_tk.storageio import (
|
|
12
|
+
AnnotationIO,
|
|
11
13
|
HierarchyIO,
|
|
12
14
|
InstanceIO,
|
|
13
15
|
UploadableStorageIO,
|
|
14
16
|
)
|
|
15
17
|
from cognite_toolkit._cdf_tk.storageio._base import Page, UploadItem
|
|
18
|
+
from cognite_toolkit._cdf_tk.tk_warnings import MediumSeverityWarning
|
|
16
19
|
from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
|
|
17
20
|
from cognite_toolkit._cdf_tk.utils.http_client import HTTPClient, HTTPMessage, ItemsRequest, SuccessResponseItems
|
|
18
21
|
from cognite_toolkit._cdf_tk.utils.useful_types import (
|
|
19
|
-
|
|
22
|
+
AssetCentricKindExtended,
|
|
20
23
|
AssetCentricType,
|
|
21
24
|
JsonVal,
|
|
22
25
|
T_AssetCentricResource,
|
|
23
26
|
)
|
|
24
27
|
|
|
25
|
-
from .data_classes import
|
|
28
|
+
from .data_classes import (
|
|
29
|
+
AnnotationMapping,
|
|
30
|
+
AssetCentricMapping,
|
|
31
|
+
AssetCentricMappingList,
|
|
32
|
+
MigrationMapping,
|
|
33
|
+
MigrationMappingList,
|
|
34
|
+
)
|
|
26
35
|
from .data_model import INSTANCE_SOURCE_VIEW_ID
|
|
36
|
+
from .default_mappings import ASSET_ANNOTATIONS_ID, FILE_ANNOTATIONS_ID
|
|
27
37
|
from .selectors import AssetCentricMigrationSelector, MigrateDataSetSelector, MigrationCSVFileSelector
|
|
28
38
|
|
|
29
39
|
|
|
@@ -37,7 +47,7 @@ class AssetCentricMigrationIO(
|
|
|
37
47
|
CHUNK_SIZE = 1000
|
|
38
48
|
UPLOAD_ENDPOINT = InstanceIO.UPLOAD_ENDPOINT
|
|
39
49
|
|
|
40
|
-
PENDING_INSTANCE_ID_ENDPOINT_BY_KIND: ClassVar[Mapping[
|
|
50
|
+
PENDING_INSTANCE_ID_ENDPOINT_BY_KIND: ClassVar[Mapping[AssetCentricKindExtended, str]] = {
|
|
41
51
|
"TimeSeries": "/timeseries/set-pending-instance-ids",
|
|
42
52
|
"FileMetadata": "/files/set-pending-instance-ids",
|
|
43
53
|
}
|
|
@@ -113,8 +123,8 @@ class AssetCentricMigrationIO(
|
|
|
113
123
|
yield mapping_list
|
|
114
124
|
|
|
115
125
|
@staticmethod
|
|
116
|
-
def _kind_to_resource_type(kind:
|
|
117
|
-
mapping: dict[
|
|
126
|
+
def _kind_to_resource_type(kind: AssetCentricKindExtended) -> AssetCentricType:
|
|
127
|
+
mapping: dict[AssetCentricKindExtended, AssetCentricType] = {
|
|
118
128
|
"Assets": "asset",
|
|
119
129
|
"Events": "event",
|
|
120
130
|
"TimeSeries": "timeseries",
|
|
@@ -199,3 +209,142 @@ class AssetCentricMigrationIO(
|
|
|
199
209
|
pending_instance_id=NodeId(item.space, item.external_id),
|
|
200
210
|
id=id_,
|
|
201
211
|
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class AnnotationMigrationIO(
|
|
215
|
+
UploadableStorageIO[AssetCentricMigrationSelector, AssetCentricMapping[Annotation], InstanceApply]
|
|
216
|
+
):
|
|
217
|
+
"""IO class for migrating Annotations.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
client: The ToolkitClient to use for CDF interactions.
|
|
221
|
+
instance_space: The instance space to use for the migrated annotations.
|
|
222
|
+
default_asset_annotation_mapping: The default ingestion mapping to use for asset-linked annotations.
|
|
223
|
+
default_file_annotation_mapping: The default ingestion mappingto use for file-linked annotations.
|
|
224
|
+
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
KIND = "AnnotationMigration"
|
|
228
|
+
SUPPORTED_DOWNLOAD_FORMATS = frozenset({".parquet", ".csv", ".ndjson"})
|
|
229
|
+
SUPPORTED_COMPRESSIONS = frozenset({".gz"})
|
|
230
|
+
SUPPORTED_READ_FORMATS = frozenset({".parquet", ".csv", ".ndjson", ".yaml", ".yml"})
|
|
231
|
+
CHUNK_SIZE = 1000
|
|
232
|
+
UPLOAD_ENDPOINT = InstanceIO.UPLOAD_ENDPOINT
|
|
233
|
+
|
|
234
|
+
SUPPORTED_ANNOTATION_TYPES = frozenset({"diagrams.AssetLink", "diagrams.FileLink"})
|
|
235
|
+
|
|
236
|
+
def __init__(
|
|
237
|
+
self,
|
|
238
|
+
client: ToolkitClient,
|
|
239
|
+
instance_space: str | None = None,
|
|
240
|
+
default_asset_annotation_mapping: str | None = None,
|
|
241
|
+
default_file_annotation_mapping: str | None = None,
|
|
242
|
+
) -> None:
|
|
243
|
+
super().__init__(client)
|
|
244
|
+
self.annotation_io = AnnotationIO(client)
|
|
245
|
+
self.instance_space = instance_space
|
|
246
|
+
self.default_asset_annotation_mapping = default_asset_annotation_mapping or ASSET_ANNOTATIONS_ID
|
|
247
|
+
self.default_file_annotation_mapping = default_file_annotation_mapping or FILE_ANNOTATIONS_ID
|
|
248
|
+
|
|
249
|
+
def as_id(self, item: AssetCentricMapping[Annotation]) -> str:
|
|
250
|
+
return f"Annotation_{item.mapping.id}"
|
|
251
|
+
|
|
252
|
+
def count(self, selector: AssetCentricMigrationSelector) -> int | None:
|
|
253
|
+
if isinstance(selector, MigrationCSVFileSelector):
|
|
254
|
+
return len(selector.items)
|
|
255
|
+
else:
|
|
256
|
+
# There is no efficient way to count annotations in CDF.
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
def stream_data(self, selector: AssetCentricMigrationSelector, limit: int | None = None) -> Iterable[Page]:
|
|
260
|
+
if isinstance(selector, MigrateDataSetSelector):
|
|
261
|
+
iterator = self._stream_from_dataset(selector, limit)
|
|
262
|
+
elif isinstance(selector, MigrationCSVFileSelector):
|
|
263
|
+
iterator = self._stream_from_csv(selector, limit)
|
|
264
|
+
else:
|
|
265
|
+
raise ToolkitNotImplementedError(f"Selector {type(selector)} is not supported for stream_data")
|
|
266
|
+
yield from (Page(worker_id="main", items=items) for items in iterator)
|
|
267
|
+
|
|
268
|
+
def _stream_from_dataset(
|
|
269
|
+
self, selector: MigrateDataSetSelector, limit: int | None = None
|
|
270
|
+
) -> Iterator[Sequence[AssetCentricMapping[Annotation]]]:
|
|
271
|
+
if self.instance_space is None:
|
|
272
|
+
raise ToolkitValueError("Instance space must be provided for dataset-based annotation migration.")
|
|
273
|
+
asset_centric_selector = selector.as_asset_centric_selector()
|
|
274
|
+
for data_chunk in self.annotation_io.stream_data(asset_centric_selector, limit):
|
|
275
|
+
mapping_list = AssetCentricMappingList[Annotation]([])
|
|
276
|
+
for resource in data_chunk.items:
|
|
277
|
+
if resource.annotation_type not in self.SUPPORTED_ANNOTATION_TYPES:
|
|
278
|
+
# This should not happen, as the annotation_io should already filter these out.
|
|
279
|
+
# This is just in case.
|
|
280
|
+
continue
|
|
281
|
+
mapping = AnnotationMapping(
|
|
282
|
+
instance_id=EdgeId(space=self.instance_space, external_id=f"annotation_{resource.id!r}"),
|
|
283
|
+
id=resource.id,
|
|
284
|
+
ingestion_view=self._get_mapping(selector.ingestion_mapping, resource),
|
|
285
|
+
preferred_consumer_view=selector.preferred_consumer_view,
|
|
286
|
+
# The PySDK is poorly typed.
|
|
287
|
+
annotation_type=resource.annotation_type, # type: ignore[arg-type]
|
|
288
|
+
)
|
|
289
|
+
mapping_list.append(AssetCentricMapping(mapping=mapping, resource=resource))
|
|
290
|
+
yield mapping_list
|
|
291
|
+
|
|
292
|
+
def _stream_from_csv(
|
|
293
|
+
self, selector: MigrationCSVFileSelector, limit: int | None = None
|
|
294
|
+
) -> Iterator[Sequence[AssetCentricMapping[Annotation]]]:
|
|
295
|
+
items = selector.items
|
|
296
|
+
if limit is not None:
|
|
297
|
+
items = MigrationMappingList(items[:limit])
|
|
298
|
+
chunk: list[AssetCentricMapping[Annotation]] = []
|
|
299
|
+
for current_batch in chunker_sequence(items, self.CHUNK_SIZE):
|
|
300
|
+
resources = self.client.annotations.retrieve_multiple(current_batch.get_ids())
|
|
301
|
+
resources_by_id = {resource.id: resource for resource in resources}
|
|
302
|
+
not_found = 0
|
|
303
|
+
incorrect_type_count = 0
|
|
304
|
+
for mapping in current_batch:
|
|
305
|
+
resource = resources_by_id.get(mapping.id)
|
|
306
|
+
if resource is None:
|
|
307
|
+
not_found += 1
|
|
308
|
+
continue
|
|
309
|
+
if resource.annotation_type not in self.SUPPORTED_ANNOTATION_TYPES:
|
|
310
|
+
incorrect_type_count += 1
|
|
311
|
+
continue
|
|
312
|
+
mapping.ingestion_view = self._get_mapping(mapping.ingestion_view, resource)
|
|
313
|
+
chunk.append(AssetCentricMapping(mapping=mapping, resource=resource))
|
|
314
|
+
if chunk:
|
|
315
|
+
yield chunk
|
|
316
|
+
chunk = []
|
|
317
|
+
if not_found:
|
|
318
|
+
MediumSeverityWarning(
|
|
319
|
+
f"Could not find {not_found} annotations referenced in the CSV file. They will be skipped during migration."
|
|
320
|
+
).print_warning(include_timestamp=True, console=self.client.console)
|
|
321
|
+
if incorrect_type_count:
|
|
322
|
+
MediumSeverityWarning(
|
|
323
|
+
f"Found {incorrect_type_count} annotations with unsupported types. Only 'diagrams.AssetLink' and "
|
|
324
|
+
"'diagrams.FileLink' are supported. These annotations will be skipped during migration."
|
|
325
|
+
).print_warning(include_timestamp=True, console=self.client.console)
|
|
326
|
+
|
|
327
|
+
def _get_mapping(self, current_mapping: str | None, resource: Annotation) -> str:
|
|
328
|
+
try:
|
|
329
|
+
return (
|
|
330
|
+
current_mapping
|
|
331
|
+
or {
|
|
332
|
+
"diagrams.AssetLink": self.default_asset_annotation_mapping,
|
|
333
|
+
"diagrams.FileLink": self.default_file_annotation_mapping,
|
|
334
|
+
}[resource.annotation_type]
|
|
335
|
+
)
|
|
336
|
+
except KeyError as e:
|
|
337
|
+
raise ToolkitValueError(
|
|
338
|
+
f"Could not determine default ingestion view for annotation type '{resource.annotation_type}'. "
|
|
339
|
+
"Please specify the ingestion view explicitly in the CSV file."
|
|
340
|
+
) from e
|
|
341
|
+
|
|
342
|
+
def json_to_resource(self, item_json: dict[str, JsonVal]) -> InstanceApply:
|
|
343
|
+
raise NotImplementedError("Deserializing Annotation Migrations from JSON is not supported.")
|
|
344
|
+
|
|
345
|
+
def data_to_json_chunk(
|
|
346
|
+
self,
|
|
347
|
+
data_chunk: Sequence[AssetCentricMapping[Annotation]],
|
|
348
|
+
selector: AssetCentricMigrationSelector | None = None,
|
|
349
|
+
) -> list[dict[str, JsonVal]]:
|
|
350
|
+
raise NotImplementedError("Serializing Annotation Migrations to JSON is not supported.")
|
|
@@ -8,11 +8,11 @@ from cognite.client.data_classes.data_modeling import ViewId
|
|
|
8
8
|
from cognite_toolkit._cdf_tk.commands._migrate.data_classes import MigrationMappingList
|
|
9
9
|
from cognite_toolkit._cdf_tk.storageio import DataSelector
|
|
10
10
|
from cognite_toolkit._cdf_tk.storageio.selectors import DataSetSelector
|
|
11
|
-
from cognite_toolkit._cdf_tk.utils.useful_types import
|
|
11
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import AssetCentricKindExtended
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class AssetCentricMigrationSelector(DataSelector, ABC):
|
|
15
|
-
kind:
|
|
15
|
+
kind: AssetCentricKindExtended
|
|
16
16
|
|
|
17
17
|
@abstractmethod
|
|
18
18
|
def get_ingestion_mappings(self) -> list[str]:
|
|
@@ -41,7 +41,7 @@ class MigrationCSVFileSelector(AssetCentricMigrationSelector):
|
|
|
41
41
|
|
|
42
42
|
class MigrateDataSetSelector(AssetCentricMigrationSelector):
|
|
43
43
|
type: Literal["migrateDataSet"] = "migrateDataSet"
|
|
44
|
-
kind:
|
|
44
|
+
kind: AssetCentricKindExtended
|
|
45
45
|
data_set_external_id: str
|
|
46
46
|
ingestion_mapping: str | None = None
|
|
47
47
|
preferred_consumer_view: ViewId | None = None
|
|
@@ -63,4 +63,9 @@ class MigrateDataSetSelector(AssetCentricMigrationSelector):
|
|
|
63
63
|
return [self.ingestion_mapping] if self.ingestion_mapping else []
|
|
64
64
|
|
|
65
65
|
def as_asset_centric_selector(self) -> DataSetSelector:
|
|
66
|
-
|
|
66
|
+
if self.kind == "Annotations":
|
|
67
|
+
# Annotations are connected to file metadata, so we need to download the file metadata
|
|
68
|
+
# and look up the annotations connected to each file metadata.
|
|
69
|
+
return DataSetSelector(data_set_external_id=self.data_set_external_id, kind="FileMetadata")
|
|
70
|
+
else:
|
|
71
|
+
return DataSetSelector(data_set_external_id=self.data_set_external_id, kind=self.kind)
|
|
@@ -7,12 +7,7 @@ from typing import Literal, cast
|
|
|
7
7
|
|
|
8
8
|
import questionary
|
|
9
9
|
from cognite.client.data_classes import DataSetUpdate
|
|
10
|
-
from cognite.client.data_classes.
|
|
11
|
-
from cognite.client.data_classes.data_modeling import (
|
|
12
|
-
EdgeList,
|
|
13
|
-
NodeId,
|
|
14
|
-
NodeList,
|
|
15
|
-
)
|
|
10
|
+
from cognite.client.data_classes.data_modeling import Edge, NodeId
|
|
16
11
|
from cognite.client.data_classes.data_modeling.statistics import SpaceStatistics
|
|
17
12
|
from cognite.client.exceptions import CogniteAPIError
|
|
18
13
|
from cognite.client.utils._identifier import InstanceId
|
|
@@ -46,6 +41,7 @@ from cognite_toolkit._cdf_tk.exceptions import (
|
|
|
46
41
|
AuthorizationError,
|
|
47
42
|
ToolkitMissingResourceError,
|
|
48
43
|
)
|
|
44
|
+
from cognite_toolkit._cdf_tk.protocols import ResourceResponseProtocol
|
|
49
45
|
from cognite_toolkit._cdf_tk.storageio import InstanceIO
|
|
50
46
|
from cognite_toolkit._cdf_tk.storageio.selectors import InstanceSelector
|
|
51
47
|
from cognite_toolkit._cdf_tk.tk_warnings import (
|
|
@@ -107,7 +103,7 @@ class ToDelete(ABC):
|
|
|
107
103
|
@abstractmethod
|
|
108
104
|
def get_process_function(
|
|
109
105
|
self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
|
|
110
|
-
) -> Callable[[
|
|
106
|
+
) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
|
|
111
107
|
raise NotImplementedError()
|
|
112
108
|
|
|
113
109
|
def get_extra_fields(self) -> dict[str, JsonVal]:
|
|
@@ -118,9 +114,10 @@ class ToDelete(ABC):
|
|
|
118
114
|
class DataModelingToDelete(ToDelete):
|
|
119
115
|
def get_process_function(
|
|
120
116
|
self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
|
|
121
|
-
) -> Callable[[
|
|
122
|
-
def as_id(chunk:
|
|
123
|
-
|
|
117
|
+
) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
|
|
118
|
+
def as_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
|
|
119
|
+
# We know that all data modeling resources implement as_id
|
|
120
|
+
return [item.as_id().dump(include_type=False) for item in chunk] # type: ignore[attr-defined]
|
|
124
121
|
|
|
125
122
|
return as_id
|
|
126
123
|
|
|
@@ -129,11 +126,11 @@ class DataModelingToDelete(ToDelete):
|
|
|
129
126
|
class EdgeToDelete(ToDelete):
|
|
130
127
|
def get_process_function(
|
|
131
128
|
self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
|
|
132
|
-
) -> Callable[[
|
|
133
|
-
def as_id(chunk:
|
|
129
|
+
) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
|
|
130
|
+
def as_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
|
|
134
131
|
return [
|
|
135
132
|
{"space": item.space, "externalId": item.external_id, "instanceType": "edge"}
|
|
136
|
-
for item in cast(
|
|
133
|
+
for item in cast(list[Edge], chunk)
|
|
137
134
|
]
|
|
138
135
|
|
|
139
136
|
return as_id
|
|
@@ -146,9 +143,10 @@ class NodesToDelete(ToDelete):
|
|
|
146
143
|
|
|
147
144
|
def get_process_function(
|
|
148
145
|
self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
|
|
149
|
-
) -> Callable[[
|
|
150
|
-
def check_for_data(chunk:
|
|
151
|
-
|
|
146
|
+
) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
|
|
147
|
+
def check_for_data(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
|
|
148
|
+
# We know that all node resources implement as_id
|
|
149
|
+
node_ids = [item.as_id() for item in chunk] # type: ignore[attr-defined]
|
|
152
150
|
found_ids: set[InstanceId] = set()
|
|
153
151
|
if not self.delete_datapoints:
|
|
154
152
|
timeseries = client.time_series.retrieve_multiple(instance_ids=node_ids, ignore_unknown_ids=True)
|
|
@@ -164,8 +162,7 @@ class NodesToDelete(ToDelete):
|
|
|
164
162
|
dumped = node_id.dump(include_instance_type=True)
|
|
165
163
|
# The delete endpoint expects "instanceType" instead of "type"
|
|
166
164
|
dumped["instanceType"] = dumped.pop("type")
|
|
167
|
-
|
|
168
|
-
result.append(dumped) # type: ignore[arg-type]
|
|
165
|
+
result.append(dumped)
|
|
169
166
|
return result
|
|
170
167
|
|
|
171
168
|
return check_for_data
|
|
@@ -175,9 +172,10 @@ class NodesToDelete(ToDelete):
|
|
|
175
172
|
class IdResourceToDelete(ToDelete):
|
|
176
173
|
def get_process_function(
|
|
177
174
|
self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
|
|
178
|
-
) -> Callable[[
|
|
179
|
-
def as_id(chunk:
|
|
180
|
-
|
|
175
|
+
) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
|
|
176
|
+
def as_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
|
|
177
|
+
# We know that all id resources have an id attribute
|
|
178
|
+
return [{"id": item.id} for item in chunk] # type: ignore[attr-defined]
|
|
181
179
|
|
|
182
180
|
return as_id
|
|
183
181
|
|
|
@@ -186,9 +184,10 @@ class IdResourceToDelete(ToDelete):
|
|
|
186
184
|
class ExternalIdToDelete(ToDelete):
|
|
187
185
|
def get_process_function(
|
|
188
186
|
self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
|
|
189
|
-
) -> Callable[[
|
|
190
|
-
def as_external_id(chunk:
|
|
191
|
-
|
|
187
|
+
) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
|
|
188
|
+
def as_external_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
|
|
189
|
+
# We know that all external id resources have an external_id attribute
|
|
190
|
+
return [{"externalId": item.external_id} for item in chunk] # type: ignore[attr-defined]
|
|
192
191
|
|
|
193
192
|
return as_external_id
|
|
194
193
|
|
|
@@ -321,7 +320,7 @@ class PurgeCommand(ToolkitCommand):
|
|
|
321
320
|
iteration_count = item.total // self.BATCH_SIZE_DM + (
|
|
322
321
|
1 if item.total % self.BATCH_SIZE_DM > 0 else 0
|
|
323
322
|
)
|
|
324
|
-
executor = ProducerWorkerExecutor[
|
|
323
|
+
executor = ProducerWorkerExecutor[list[ResourceResponseProtocol], list[JsonVal]](
|
|
325
324
|
download_iterable=self._iterate_batch(
|
|
326
325
|
item.crud, space, data_set_external_id, batch_size=self.BATCH_SIZE_DM
|
|
327
326
|
),
|
|
@@ -348,13 +347,13 @@ class PurgeCommand(ToolkitCommand):
|
|
|
348
347
|
@staticmethod
|
|
349
348
|
def _iterate_batch(
|
|
350
349
|
crud: ResourceCRUD, selected_space: str | None, data_set_external_id: str | None, batch_size: int
|
|
351
|
-
) -> Iterable[
|
|
352
|
-
batch =
|
|
350
|
+
) -> Iterable[list[ResourceResponseProtocol]]:
|
|
351
|
+
batch: list[ResourceResponseProtocol] = []
|
|
353
352
|
for resource in crud.iterate(space=selected_space, data_set_external_id=data_set_external_id):
|
|
354
353
|
batch.append(resource)
|
|
355
354
|
if len(batch) >= batch_size:
|
|
356
355
|
yield batch
|
|
357
|
-
batch =
|
|
356
|
+
batch = []
|
|
358
357
|
if batch:
|
|
359
358
|
yield batch
|
|
360
359
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import questionary
|
|
2
|
+
|
|
3
|
+
custom_style_fancy = questionary.Style(
|
|
4
|
+
[
|
|
5
|
+
("qmark", "fg:#673ab7"), # token in front of the question
|
|
6
|
+
("question", "bold"), # question text
|
|
7
|
+
("answer", "fg:#f44336 bold"), # submitted answer text behind the question
|
|
8
|
+
("pointer", "fg:#673ab7 bold"), # pointer used in select and checkbox prompts
|
|
9
|
+
("highlighted", "fg:#673ab7 bold"), # pointed-at choice in select and checkbox prompts
|
|
10
|
+
("selected", "fg:#673ab7"), # style for a selected item of a checkbox
|
|
11
|
+
("separator", "fg:#cc5454"), # separator in lists
|
|
12
|
+
("instruction", ""), # user instructions for select, rawselect, checkbox
|
|
13
|
+
("text", ""), # plain text
|
|
14
|
+
("disabled", "fg:#858585 italic"), # disabled choices for select and checkbox prompts
|
|
15
|
+
]
|
|
16
|
+
)
|
|
@@ -1,32 +1,33 @@
|
|
|
1
|
+
from collections import Counter
|
|
1
2
|
from collections.abc import Sequence
|
|
2
3
|
from functools import partial
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
5
|
-
from cognite.client.data_classes.
|
|
6
|
-
|
|
6
|
+
from cognite.client.data_classes.data_modeling import (
|
|
7
|
+
ViewId,
|
|
8
|
+
)
|
|
7
9
|
from rich.console import Console
|
|
8
10
|
|
|
9
11
|
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
10
|
-
from cognite_toolkit._cdf_tk.constants import
|
|
12
|
+
from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX, DATA_RESOURCE_DIR
|
|
13
|
+
from cognite_toolkit._cdf_tk.cruds import ViewCRUD
|
|
11
14
|
from cognite_toolkit._cdf_tk.exceptions import ToolkitValueError
|
|
15
|
+
from cognite_toolkit._cdf_tk.protocols import T_ResourceRequest, T_ResourceResponse
|
|
12
16
|
from cognite_toolkit._cdf_tk.storageio import (
|
|
13
17
|
T_Selector,
|
|
14
18
|
UploadableStorageIO,
|
|
15
|
-
are_same_kind,
|
|
16
19
|
get_upload_io,
|
|
17
20
|
)
|
|
18
|
-
from cognite_toolkit._cdf_tk.storageio._base import
|
|
19
|
-
from cognite_toolkit._cdf_tk.storageio.selectors import Selector,
|
|
20
|
-
from cognite_toolkit._cdf_tk.
|
|
21
|
-
from cognite_toolkit._cdf_tk.tk_warnings
|
|
21
|
+
from cognite_toolkit._cdf_tk.storageio._base import TableUploadableStorageIO, UploadItem
|
|
22
|
+
from cognite_toolkit._cdf_tk.storageio.selectors import Selector, load_selector
|
|
23
|
+
from cognite_toolkit._cdf_tk.storageio.selectors._instances import InstanceSpaceSelector
|
|
24
|
+
from cognite_toolkit._cdf_tk.tk_warnings import HighSeverityWarning, MediumSeverityWarning, ToolkitWarning
|
|
22
25
|
from cognite_toolkit._cdf_tk.utils.auth import EnvironmentVariables
|
|
23
|
-
from cognite_toolkit._cdf_tk.utils.
|
|
24
|
-
from cognite_toolkit._cdf_tk.utils.fileio import TABLE_READ_CLS_BY_FORMAT, FileReader
|
|
26
|
+
from cognite_toolkit._cdf_tk.utils.fileio import MultiFileReader
|
|
25
27
|
from cognite_toolkit._cdf_tk.utils.http_client import HTTPClient, ItemMessage, SuccessResponseItems
|
|
26
28
|
from cognite_toolkit._cdf_tk.utils.producer_worker import ProducerWorkerExecutor
|
|
27
29
|
from cognite_toolkit._cdf_tk.utils.progress_tracker import ProgressTracker
|
|
28
30
|
from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
|
|
29
|
-
from cognite_toolkit._cdf_tk.validation import humanize_validation_error
|
|
30
31
|
|
|
31
32
|
from ._base import ToolkitCommand
|
|
32
33
|
from .deploy import DeployCommand
|
|
@@ -76,53 +77,76 @@ class UploadCommand(ToolkitCommand):
|
|
|
76
77
|
│ │ └── table2.Table.yaml
|
|
77
78
|
│ └── ...
|
|
78
79
|
├── datafile1.kind.ndjson # Data file of a specific kind
|
|
79
|
-
├── datafile1.
|
|
80
|
+
├── datafile1.Manifest.yaml # Manifest for datafile1
|
|
80
81
|
├── datafile2.kind2.ndjson # Another data file of the same or different kind
|
|
81
|
-
├── datafile2.
|
|
82
|
+
├── datafile2.Manifest.yaml # Manifest file for datafile2
|
|
82
83
|
└── ...
|
|
83
84
|
"""
|
|
84
|
-
console =
|
|
85
|
-
data_files_by_selector = self._find_data_files(input_dir
|
|
85
|
+
console = client.console
|
|
86
|
+
data_files_by_selector = self._find_data_files(input_dir)
|
|
86
87
|
|
|
87
88
|
self._deploy_resource_folder(input_dir / DATA_RESOURCE_DIR, deploy_resources, client, console, dry_run, verbose)
|
|
88
89
|
|
|
90
|
+
data_files_by_selector = self._topological_sort_if_instance_selector(data_files_by_selector, client)
|
|
91
|
+
|
|
89
92
|
self._upload_data(data_files_by_selector, client, dry_run, input_dir, console, verbose)
|
|
90
93
|
|
|
94
|
+
def _topological_sort_if_instance_selector(
|
|
95
|
+
self, data_files_by_selector: dict[Selector, list[Path]], client: ToolkitClient
|
|
96
|
+
) -> dict[Selector, list[Path]]:
|
|
97
|
+
"""Topologically sorts InstanceSpaceSelectors (if they are present) to determine the order of upload based on container dependencies from the views.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
data_files_by_selector: A dictionary mapping selectors to their data files.
|
|
101
|
+
client: The cognite client to use for the upload.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
A dictionary mapping selectors to their data files with necessary preprocessing.
|
|
105
|
+
"""
|
|
106
|
+
counts = Counter(type(selector) for selector in data_files_by_selector.keys())
|
|
107
|
+
if counts[InstanceSpaceSelector] <= 1:
|
|
108
|
+
return data_files_by_selector
|
|
109
|
+
|
|
110
|
+
selector_by_view_id: dict[ViewId, Selector] = {}
|
|
111
|
+
for selector in data_files_by_selector:
|
|
112
|
+
if isinstance(selector, InstanceSpaceSelector) and selector.view is not None:
|
|
113
|
+
selector_by_view_id[selector.view.as_id()] = selector
|
|
114
|
+
|
|
115
|
+
view_dependencies = ViewCRUD.create_loader(client).topological_sort_container_constraints(
|
|
116
|
+
list(selector_by_view_id.keys())
|
|
117
|
+
)
|
|
118
|
+
prepared_selectors: dict[Selector, list[Path]] = {}
|
|
119
|
+
|
|
120
|
+
# Reorder selectors according to the dependency-sorted view list
|
|
121
|
+
for view_id in view_dependencies:
|
|
122
|
+
selector = selector_by_view_id[view_id]
|
|
123
|
+
prepared_selectors[selector] = data_files_by_selector[selector]
|
|
124
|
+
|
|
125
|
+
# Preserve selectors that aren't affected by view dependencies
|
|
126
|
+
# (e.g., raw tables, time series, non-view instance data)
|
|
127
|
+
for selector in data_files_by_selector.keys():
|
|
128
|
+
if selector not in prepared_selectors:
|
|
129
|
+
prepared_selectors[selector] = data_files_by_selector[selector]
|
|
130
|
+
|
|
131
|
+
return prepared_selectors
|
|
132
|
+
|
|
91
133
|
def _find_data_files(
|
|
92
134
|
self,
|
|
93
135
|
input_dir: Path,
|
|
94
|
-
kind: str | None = None,
|
|
95
136
|
) -> dict[Selector, list[Path]]:
|
|
96
137
|
"""Finds data files and their corresponding metadata files in the input directory."""
|
|
97
|
-
manifest_file_endswith = f".{DATA_MANIFEST_STEM}.yaml"
|
|
98
138
|
data_files_by_metadata: dict[Selector, list[Path]] = {}
|
|
99
|
-
for
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if kind is not None and data_files:
|
|
107
|
-
data_files = [data_file for data_file in data_files if are_same_kind(kind, data_file)]
|
|
108
|
-
if not data_files:
|
|
109
|
-
continue
|
|
139
|
+
for manifest_file in input_dir.glob(f"*{DATA_MANIFEST_SUFFIX}"):
|
|
140
|
+
selector_or_warning = load_selector(manifest_file)
|
|
141
|
+
if isinstance(selector_or_warning, ToolkitWarning):
|
|
142
|
+
self.warn(selector_or_warning)
|
|
143
|
+
continue
|
|
144
|
+
selector: Selector = selector_or_warning
|
|
145
|
+
data_files = selector.find_data_files(input_dir, manifest_file)
|
|
110
146
|
if not data_files:
|
|
111
147
|
self.warn(
|
|
112
148
|
MediumSeverityWarning(
|
|
113
|
-
f"Metadata file {
|
|
114
|
-
)
|
|
115
|
-
)
|
|
116
|
-
continue
|
|
117
|
-
|
|
118
|
-
selector_dict = read_yaml_file(metadata_file, expected_output="dict")
|
|
119
|
-
try:
|
|
120
|
-
selector = SelectorAdapter.validate_python(selector_dict)
|
|
121
|
-
except ValidationError as e:
|
|
122
|
-
errors = humanize_validation_error(e)
|
|
123
|
-
self.warn(
|
|
124
|
-
ResourceFormatWarning(
|
|
125
|
-
metadata_file, tuple(errors), text="Invalid selector in metadata file, skipping."
|
|
149
|
+
f"Metadata file {manifest_file.as_posix()!r} has no corresponding data files, skipping.",
|
|
126
150
|
)
|
|
127
151
|
)
|
|
128
152
|
continue
|
|
@@ -178,49 +202,48 @@ class UploadCommand(ToolkitCommand):
|
|
|
178
202
|
io = self._create_selected_io(selector, datafiles[0], client)
|
|
179
203
|
if io is None:
|
|
180
204
|
continue
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
tracker=tracker,
|
|
202
|
-
console=console,
|
|
203
|
-
),
|
|
204
|
-
iteration_count=None,
|
|
205
|
-
max_queue_size=self._MAX_QUEUE_SIZE,
|
|
206
|
-
download_description=f"Reading {file_count:,}/{total_file_count + 1:,}: {file_display.as_posix()!s}",
|
|
207
|
-
process_description="Processing",
|
|
208
|
-
write_description=f"{action} {selector.display_name!r}",
|
|
205
|
+
reader = MultiFileReader(datafiles)
|
|
206
|
+
if reader.is_table and not isinstance(io, TableUploadableStorageIO):
|
|
207
|
+
raise ToolkitValueError(f"{selector.display_name} does not support {reader.format!r} files.")
|
|
208
|
+
|
|
209
|
+
chunk_count = io.count_chunks(reader)
|
|
210
|
+
iteration_count = chunk_count // io.CHUNK_SIZE + (1 if chunk_count % io.CHUNK_SIZE > 0 else 0)
|
|
211
|
+
|
|
212
|
+
tracker = ProgressTracker[str]([self._UPLOAD])
|
|
213
|
+
executor = ProducerWorkerExecutor[list[tuple[str, dict[str, JsonVal]]], Sequence[UploadItem]](
|
|
214
|
+
download_iterable=io.read_chunks(reader, selector),
|
|
215
|
+
process=partial(io.rows_to_data, selector=selector)
|
|
216
|
+
if reader.is_table and isinstance(io, TableUploadableStorageIO)
|
|
217
|
+
else io.json_chunk_to_data,
|
|
218
|
+
write=partial(
|
|
219
|
+
self._upload_items,
|
|
220
|
+
upload_client=upload_client,
|
|
221
|
+
io=io,
|
|
222
|
+
dry_run=dry_run,
|
|
223
|
+
selector=selector,
|
|
224
|
+
tracker=tracker,
|
|
209
225
|
console=console,
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
226
|
+
),
|
|
227
|
+
iteration_count=iteration_count,
|
|
228
|
+
max_queue_size=self._MAX_QUEUE_SIZE,
|
|
229
|
+
download_description=f"Reading {selector.display_name!r} files",
|
|
230
|
+
process_description="Processing",
|
|
231
|
+
write_description=f"{action} {selector.display_name!r}",
|
|
232
|
+
console=console,
|
|
233
|
+
)
|
|
234
|
+
executor.run()
|
|
235
|
+
file_count += len(datafiles)
|
|
236
|
+
executor.raise_on_error()
|
|
237
|
+
final_action = "Uploaded" if not dry_run else "Would upload"
|
|
238
|
+
suffix = " successfully" if not dry_run else ""
|
|
239
|
+
results = tracker.aggregate()
|
|
240
|
+
success = results.get((self._UPLOAD, "success"), 0)
|
|
241
|
+
failed = results.get((self._UPLOAD, "failed"), 0)
|
|
242
|
+
if failed > 0:
|
|
243
|
+
suffix += f", {failed:,} failed"
|
|
244
|
+
console.print(
|
|
245
|
+
f"{final_action} {success:,} {selector.display_name} from {len(datafiles)} files{suffix}."
|
|
246
|
+
)
|
|
224
247
|
|
|
225
248
|
@staticmethod
|
|
226
249
|
def _path_as_display_name(input_path: Path, cwd: Path = Path.cwd()) -> Path:
|
|
@@ -233,7 +256,7 @@ class UploadCommand(ToolkitCommand):
|
|
|
233
256
|
self, selector: Selector, data_file: Path, client: ToolkitClient
|
|
234
257
|
) -> UploadableStorageIO | None:
|
|
235
258
|
try:
|
|
236
|
-
io_cls = get_upload_io(
|
|
259
|
+
io_cls = get_upload_io(selector)
|
|
237
260
|
except ValueError as e:
|
|
238
261
|
self.warn(HighSeverityWarning(f"Could not find StorageIO for selector {selector}: {e}"))
|
|
239
262
|
return None
|
|
@@ -242,9 +265,9 @@ class UploadCommand(ToolkitCommand):
|
|
|
242
265
|
@classmethod
|
|
243
266
|
def _upload_items(
|
|
244
267
|
cls,
|
|
245
|
-
data_chunk: Sequence[UploadItem],
|
|
268
|
+
data_chunk: Sequence[UploadItem[T_ResourceRequest]],
|
|
246
269
|
upload_client: HTTPClient,
|
|
247
|
-
io: UploadableStorageIO[T_Selector,
|
|
270
|
+
io: UploadableStorageIO[T_Selector, T_ResourceResponse, T_ResourceRequest],
|
|
248
271
|
selector: T_Selector,
|
|
249
272
|
dry_run: bool,
|
|
250
273
|
tracker: ProgressTracker[str],
|