cognite-toolkit 0.6.97__py3-none-any.whl → 0.7.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. cognite_toolkit/_cdf.py +16 -17
  2. cognite_toolkit/_cdf_tk/apps/__init__.py +2 -0
  3. cognite_toolkit/_cdf_tk/apps/_core_app.py +13 -5
  4. cognite_toolkit/_cdf_tk/apps/_data_app.py +1 -1
  5. cognite_toolkit/_cdf_tk/apps/_dev_app.py +86 -0
  6. cognite_toolkit/_cdf_tk/apps/_download_app.py +692 -24
  7. cognite_toolkit/_cdf_tk/apps/_dump_app.py +43 -101
  8. cognite_toolkit/_cdf_tk/apps/_landing_app.py +18 -4
  9. cognite_toolkit/_cdf_tk/apps/_migrate_app.py +249 -9
  10. cognite_toolkit/_cdf_tk/apps/_modules_app.py +0 -3
  11. cognite_toolkit/_cdf_tk/apps/_purge.py +15 -43
  12. cognite_toolkit/_cdf_tk/apps/_run.py +11 -0
  13. cognite_toolkit/_cdf_tk/apps/_upload_app.py +45 -6
  14. cognite_toolkit/_cdf_tk/builders/__init__.py +2 -2
  15. cognite_toolkit/_cdf_tk/builders/_base.py +28 -42
  16. cognite_toolkit/_cdf_tk/cdf_toml.py +20 -1
  17. cognite_toolkit/_cdf_tk/client/_toolkit_client.py +23 -3
  18. cognite_toolkit/_cdf_tk/client/api/extended_functions.py +6 -9
  19. cognite_toolkit/_cdf_tk/client/api/infield.py +93 -1
  20. cognite_toolkit/_cdf_tk/client/api/migration.py +175 -1
  21. cognite_toolkit/_cdf_tk/client/api/streams.py +84 -0
  22. cognite_toolkit/_cdf_tk/client/api/three_d.py +50 -0
  23. cognite_toolkit/_cdf_tk/client/data_classes/base.py +25 -1
  24. cognite_toolkit/_cdf_tk/client/data_classes/canvas.py +46 -3
  25. cognite_toolkit/_cdf_tk/client/data_classes/charts.py +3 -3
  26. cognite_toolkit/_cdf_tk/client/data_classes/charts_data.py +95 -213
  27. cognite_toolkit/_cdf_tk/client/data_classes/infield.py +32 -18
  28. cognite_toolkit/_cdf_tk/client/data_classes/migration.py +10 -2
  29. cognite_toolkit/_cdf_tk/client/data_classes/streams.py +90 -0
  30. cognite_toolkit/_cdf_tk/client/data_classes/three_d.py +47 -0
  31. cognite_toolkit/_cdf_tk/client/testing.py +18 -2
  32. cognite_toolkit/_cdf_tk/commands/__init__.py +6 -6
  33. cognite_toolkit/_cdf_tk/commands/_changes.py +3 -42
  34. cognite_toolkit/_cdf_tk/commands/_download.py +21 -11
  35. cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -2
  36. cognite_toolkit/_cdf_tk/commands/_migrate/command.py +22 -20
  37. cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +133 -91
  38. cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +73 -22
  39. cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +311 -43
  40. cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +5 -5
  41. cognite_toolkit/_cdf_tk/commands/_migrate/issues.py +33 -0
  42. cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +157 -8
  43. cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +9 -4
  44. cognite_toolkit/_cdf_tk/commands/_purge.py +27 -28
  45. cognite_toolkit/_cdf_tk/commands/_questionary_style.py +16 -0
  46. cognite_toolkit/_cdf_tk/commands/_upload.py +109 -86
  47. cognite_toolkit/_cdf_tk/commands/about.py +221 -0
  48. cognite_toolkit/_cdf_tk/commands/auth.py +19 -12
  49. cognite_toolkit/_cdf_tk/commands/build_cmd.py +15 -61
  50. cognite_toolkit/_cdf_tk/commands/clean.py +63 -16
  51. cognite_toolkit/_cdf_tk/commands/deploy.py +20 -17
  52. cognite_toolkit/_cdf_tk/commands/dump_resource.py +6 -4
  53. cognite_toolkit/_cdf_tk/commands/init.py +225 -3
  54. cognite_toolkit/_cdf_tk/commands/modules.py +20 -44
  55. cognite_toolkit/_cdf_tk/commands/pull.py +6 -19
  56. cognite_toolkit/_cdf_tk/commands/resources.py +179 -0
  57. cognite_toolkit/_cdf_tk/constants.py +20 -1
  58. cognite_toolkit/_cdf_tk/cruds/__init__.py +19 -5
  59. cognite_toolkit/_cdf_tk/cruds/_base_cruds.py +14 -70
  60. cognite_toolkit/_cdf_tk/cruds/_data_cruds.py +8 -17
  61. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/__init__.py +4 -1
  62. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/agent.py +11 -9
  63. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/auth.py +4 -14
  64. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/classic.py +44 -43
  65. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/configuration.py +4 -11
  66. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/data_organization.py +4 -13
  67. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/datamodel.py +205 -66
  68. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/extraction_pipeline.py +5 -17
  69. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/fieldops.py +116 -27
  70. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/file.py +6 -27
  71. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/function.py +9 -28
  72. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/hosted_extractors.py +12 -30
  73. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/industrial_tool.py +3 -7
  74. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/location.py +3 -15
  75. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/migration.py +4 -12
  76. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/raw.py +4 -10
  77. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/relationship.py +3 -8
  78. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/robotics.py +15 -44
  79. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/streams.py +94 -0
  80. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/three_d_model.py +3 -7
  81. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/timeseries.py +5 -15
  82. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/transformation.py +39 -31
  83. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/workflow.py +20 -40
  84. cognite_toolkit/_cdf_tk/cruds/_worker.py +24 -36
  85. cognite_toolkit/_cdf_tk/feature_flags.py +16 -36
  86. cognite_toolkit/_cdf_tk/plugins.py +2 -1
  87. cognite_toolkit/_cdf_tk/resource_classes/__init__.py +4 -0
  88. cognite_toolkit/_cdf_tk/resource_classes/capabilities.py +12 -0
  89. cognite_toolkit/_cdf_tk/resource_classes/functions.py +3 -1
  90. cognite_toolkit/_cdf_tk/resource_classes/infield_cdm_location_config.py +109 -0
  91. cognite_toolkit/_cdf_tk/resource_classes/migration.py +8 -17
  92. cognite_toolkit/_cdf_tk/resource_classes/streams.py +29 -0
  93. cognite_toolkit/_cdf_tk/storageio/__init__.py +9 -21
  94. cognite_toolkit/_cdf_tk/storageio/_annotations.py +19 -16
  95. cognite_toolkit/_cdf_tk/storageio/_applications.py +338 -26
  96. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +67 -104
  97. cognite_toolkit/_cdf_tk/storageio/_base.py +61 -29
  98. cognite_toolkit/_cdf_tk/storageio/_datapoints.py +276 -20
  99. cognite_toolkit/_cdf_tk/storageio/_file_content.py +436 -0
  100. cognite_toolkit/_cdf_tk/storageio/_instances.py +34 -2
  101. cognite_toolkit/_cdf_tk/storageio/_raw.py +26 -0
  102. cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +62 -4
  103. cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
  104. cognite_toolkit/_cdf_tk/storageio/selectors/_canvas.py +14 -0
  105. cognite_toolkit/_cdf_tk/storageio/selectors/_charts.py +14 -0
  106. cognite_toolkit/_cdf_tk/storageio/selectors/_datapoints.py +23 -3
  107. cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +164 -0
  108. cognite_toolkit/_cdf_tk/tk_warnings/other.py +4 -0
  109. cognite_toolkit/_cdf_tk/tracker.py +2 -2
  110. cognite_toolkit/_cdf_tk/utils/dtype_conversion.py +9 -3
  111. cognite_toolkit/_cdf_tk/utils/fileio/__init__.py +2 -0
  112. cognite_toolkit/_cdf_tk/utils/fileio/_base.py +5 -1
  113. cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +112 -20
  114. cognite_toolkit/_cdf_tk/utils/fileio/_writers.py +15 -15
  115. cognite_toolkit/_cdf_tk/utils/http_client/_client.py +284 -18
  116. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +50 -4
  117. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes2.py +187 -0
  118. cognite_toolkit/_cdf_tk/utils/interactive_select.py +9 -14
  119. cognite_toolkit/_cdf_tk/utils/sql_parser.py +2 -3
  120. cognite_toolkit/_cdf_tk/utils/useful_types.py +6 -2
  121. cognite_toolkit/_cdf_tk/validation.py +79 -1
  122. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  123. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  124. cognite_toolkit/_resources/cdf.toml +5 -4
  125. cognite_toolkit/_version.py +1 -1
  126. cognite_toolkit/config.dev.yaml +13 -0
  127. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.30.dist-info}/METADATA +24 -24
  128. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.30.dist-info}/RECORD +153 -143
  129. cognite_toolkit-0.7.30.dist-info/WHEEL +4 -0
  130. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.30.dist-info}/entry_points.txt +1 -0
  131. cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py +0 -201
  132. cognite_toolkit/_cdf_tk/commands/dump_data.py +0 -489
  133. cognite_toolkit/_cdf_tk/commands/featureflag.py +0 -27
  134. cognite_toolkit/_cdf_tk/utils/table_writers.py +0 -434
  135. cognite_toolkit-0.6.97.dist-info/WHEEL +0 -4
  136. cognite_toolkit-0.6.97.dist-info/licenses/LICENSE +0 -18
@@ -1,29 +1,39 @@
1
- from collections.abc import Iterator, Mapping, Sequence
1
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
2
2
  from typing import ClassVar, cast
3
3
 
4
- from cognite.client.data_classes.data_modeling import InstanceApply, NodeId
4
+ from cognite.client.data_classes import Annotation
5
+ from cognite.client.data_classes.data_modeling import EdgeId, InstanceApply, NodeId
5
6
 
6
7
  from cognite_toolkit._cdf_tk.client import ToolkitClient
7
8
  from cognite_toolkit._cdf_tk.client.data_classes.pending_instances_ids import PendingInstanceId
8
9
  from cognite_toolkit._cdf_tk.constants import MISSING_EXTERNAL_ID, MISSING_INSTANCE_SPACE
9
- from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError
10
+ from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError, ToolkitValueError
10
11
  from cognite_toolkit._cdf_tk.storageio import (
12
+ AnnotationIO,
11
13
  HierarchyIO,
12
14
  InstanceIO,
13
15
  UploadableStorageIO,
14
16
  )
15
17
  from cognite_toolkit._cdf_tk.storageio._base import Page, UploadItem
18
+ from cognite_toolkit._cdf_tk.tk_warnings import MediumSeverityWarning
16
19
  from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
17
20
  from cognite_toolkit._cdf_tk.utils.http_client import HTTPClient, HTTPMessage, ItemsRequest, SuccessResponseItems
18
21
  from cognite_toolkit._cdf_tk.utils.useful_types import (
19
- AssetCentricKind,
22
+ AssetCentricKindExtended,
20
23
  AssetCentricType,
21
24
  JsonVal,
22
25
  T_AssetCentricResource,
23
26
  )
24
27
 
25
- from .data_classes import AssetCentricMapping, AssetCentricMappingList, MigrationMapping, MigrationMappingList
28
+ from .data_classes import (
29
+ AnnotationMapping,
30
+ AssetCentricMapping,
31
+ AssetCentricMappingList,
32
+ MigrationMapping,
33
+ MigrationMappingList,
34
+ )
26
35
  from .data_model import INSTANCE_SOURCE_VIEW_ID
36
+ from .default_mappings import ASSET_ANNOTATIONS_ID, FILE_ANNOTATIONS_ID
27
37
  from .selectors import AssetCentricMigrationSelector, MigrateDataSetSelector, MigrationCSVFileSelector
28
38
 
29
39
 
@@ -37,7 +47,7 @@ class AssetCentricMigrationIO(
37
47
  CHUNK_SIZE = 1000
38
48
  UPLOAD_ENDPOINT = InstanceIO.UPLOAD_ENDPOINT
39
49
 
40
- PENDING_INSTANCE_ID_ENDPOINT_BY_KIND: ClassVar[Mapping[AssetCentricKind, str]] = {
50
+ PENDING_INSTANCE_ID_ENDPOINT_BY_KIND: ClassVar[Mapping[AssetCentricKindExtended, str]] = {
41
51
  "TimeSeries": "/timeseries/set-pending-instance-ids",
42
52
  "FileMetadata": "/files/set-pending-instance-ids",
43
53
  }
@@ -113,8 +123,8 @@ class AssetCentricMigrationIO(
113
123
  yield mapping_list
114
124
 
115
125
  @staticmethod
116
- def _kind_to_resource_type(kind: AssetCentricKind) -> AssetCentricType:
117
- mapping: dict[AssetCentricKind, AssetCentricType] = {
126
+ def _kind_to_resource_type(kind: AssetCentricKindExtended) -> AssetCentricType:
127
+ mapping: dict[AssetCentricKindExtended, AssetCentricType] = {
118
128
  "Assets": "asset",
119
129
  "Events": "event",
120
130
  "TimeSeries": "timeseries",
@@ -199,3 +209,142 @@ class AssetCentricMigrationIO(
199
209
  pending_instance_id=NodeId(item.space, item.external_id),
200
210
  id=id_,
201
211
  )
212
+
213
+
214
+ class AnnotationMigrationIO(
215
+ UploadableStorageIO[AssetCentricMigrationSelector, AssetCentricMapping[Annotation], InstanceApply]
216
+ ):
217
+ """IO class for migrating Annotations.
218
+
219
+ Args:
220
+ client: The ToolkitClient to use for CDF interactions.
221
+ instance_space: The instance space to use for the migrated annotations.
222
+ default_asset_annotation_mapping: The default ingestion mapping to use for asset-linked annotations.
223
+ default_file_annotation_mapping: The default ingestion mappingto use for file-linked annotations.
224
+
225
+ """
226
+
227
+ KIND = "AnnotationMigration"
228
+ SUPPORTED_DOWNLOAD_FORMATS = frozenset({".parquet", ".csv", ".ndjson"})
229
+ SUPPORTED_COMPRESSIONS = frozenset({".gz"})
230
+ SUPPORTED_READ_FORMATS = frozenset({".parquet", ".csv", ".ndjson", ".yaml", ".yml"})
231
+ CHUNK_SIZE = 1000
232
+ UPLOAD_ENDPOINT = InstanceIO.UPLOAD_ENDPOINT
233
+
234
+ SUPPORTED_ANNOTATION_TYPES = frozenset({"diagrams.AssetLink", "diagrams.FileLink"})
235
+
236
+ def __init__(
237
+ self,
238
+ client: ToolkitClient,
239
+ instance_space: str | None = None,
240
+ default_asset_annotation_mapping: str | None = None,
241
+ default_file_annotation_mapping: str | None = None,
242
+ ) -> None:
243
+ super().__init__(client)
244
+ self.annotation_io = AnnotationIO(client)
245
+ self.instance_space = instance_space
246
+ self.default_asset_annotation_mapping = default_asset_annotation_mapping or ASSET_ANNOTATIONS_ID
247
+ self.default_file_annotation_mapping = default_file_annotation_mapping or FILE_ANNOTATIONS_ID
248
+
249
+ def as_id(self, item: AssetCentricMapping[Annotation]) -> str:
250
+ return f"Annotation_{item.mapping.id}"
251
+
252
+ def count(self, selector: AssetCentricMigrationSelector) -> int | None:
253
+ if isinstance(selector, MigrationCSVFileSelector):
254
+ return len(selector.items)
255
+ else:
256
+ # There is no efficient way to count annotations in CDF.
257
+ return None
258
+
259
+ def stream_data(self, selector: AssetCentricMigrationSelector, limit: int | None = None) -> Iterable[Page]:
260
+ if isinstance(selector, MigrateDataSetSelector):
261
+ iterator = self._stream_from_dataset(selector, limit)
262
+ elif isinstance(selector, MigrationCSVFileSelector):
263
+ iterator = self._stream_from_csv(selector, limit)
264
+ else:
265
+ raise ToolkitNotImplementedError(f"Selector {type(selector)} is not supported for stream_data")
266
+ yield from (Page(worker_id="main", items=items) for items in iterator)
267
+
268
+ def _stream_from_dataset(
269
+ self, selector: MigrateDataSetSelector, limit: int | None = None
270
+ ) -> Iterator[Sequence[AssetCentricMapping[Annotation]]]:
271
+ if self.instance_space is None:
272
+ raise ToolkitValueError("Instance space must be provided for dataset-based annotation migration.")
273
+ asset_centric_selector = selector.as_asset_centric_selector()
274
+ for data_chunk in self.annotation_io.stream_data(asset_centric_selector, limit):
275
+ mapping_list = AssetCentricMappingList[Annotation]([])
276
+ for resource in data_chunk.items:
277
+ if resource.annotation_type not in self.SUPPORTED_ANNOTATION_TYPES:
278
+ # This should not happen, as the annotation_io should already filter these out.
279
+ # This is just in case.
280
+ continue
281
+ mapping = AnnotationMapping(
282
+ instance_id=EdgeId(space=self.instance_space, external_id=f"annotation_{resource.id!r}"),
283
+ id=resource.id,
284
+ ingestion_view=self._get_mapping(selector.ingestion_mapping, resource),
285
+ preferred_consumer_view=selector.preferred_consumer_view,
286
+ # The PySDK is poorly typed.
287
+ annotation_type=resource.annotation_type, # type: ignore[arg-type]
288
+ )
289
+ mapping_list.append(AssetCentricMapping(mapping=mapping, resource=resource))
290
+ yield mapping_list
291
+
292
+ def _stream_from_csv(
293
+ self, selector: MigrationCSVFileSelector, limit: int | None = None
294
+ ) -> Iterator[Sequence[AssetCentricMapping[Annotation]]]:
295
+ items = selector.items
296
+ if limit is not None:
297
+ items = MigrationMappingList(items[:limit])
298
+ chunk: list[AssetCentricMapping[Annotation]] = []
299
+ for current_batch in chunker_sequence(items, self.CHUNK_SIZE):
300
+ resources = self.client.annotations.retrieve_multiple(current_batch.get_ids())
301
+ resources_by_id = {resource.id: resource for resource in resources}
302
+ not_found = 0
303
+ incorrect_type_count = 0
304
+ for mapping in current_batch:
305
+ resource = resources_by_id.get(mapping.id)
306
+ if resource is None:
307
+ not_found += 1
308
+ continue
309
+ if resource.annotation_type not in self.SUPPORTED_ANNOTATION_TYPES:
310
+ incorrect_type_count += 1
311
+ continue
312
+ mapping.ingestion_view = self._get_mapping(mapping.ingestion_view, resource)
313
+ chunk.append(AssetCentricMapping(mapping=mapping, resource=resource))
314
+ if chunk:
315
+ yield chunk
316
+ chunk = []
317
+ if not_found:
318
+ MediumSeverityWarning(
319
+ f"Could not find {not_found} annotations referenced in the CSV file. They will be skipped during migration."
320
+ ).print_warning(include_timestamp=True, console=self.client.console)
321
+ if incorrect_type_count:
322
+ MediumSeverityWarning(
323
+ f"Found {incorrect_type_count} annotations with unsupported types. Only 'diagrams.AssetLink' and "
324
+ "'diagrams.FileLink' are supported. These annotations will be skipped during migration."
325
+ ).print_warning(include_timestamp=True, console=self.client.console)
326
+
327
+ def _get_mapping(self, current_mapping: str | None, resource: Annotation) -> str:
328
+ try:
329
+ return (
330
+ current_mapping
331
+ or {
332
+ "diagrams.AssetLink": self.default_asset_annotation_mapping,
333
+ "diagrams.FileLink": self.default_file_annotation_mapping,
334
+ }[resource.annotation_type]
335
+ )
336
+ except KeyError as e:
337
+ raise ToolkitValueError(
338
+ f"Could not determine default ingestion view for annotation type '{resource.annotation_type}'. "
339
+ "Please specify the ingestion view explicitly in the CSV file."
340
+ ) from e
341
+
342
+ def json_to_resource(self, item_json: dict[str, JsonVal]) -> InstanceApply:
343
+ raise NotImplementedError("Deserializing Annotation Migrations from JSON is not supported.")
344
+
345
+ def data_to_json_chunk(
346
+ self,
347
+ data_chunk: Sequence[AssetCentricMapping[Annotation]],
348
+ selector: AssetCentricMigrationSelector | None = None,
349
+ ) -> list[dict[str, JsonVal]]:
350
+ raise NotImplementedError("Serializing Annotation Migrations to JSON is not supported.")
@@ -8,11 +8,11 @@ from cognite.client.data_classes.data_modeling import ViewId
8
8
  from cognite_toolkit._cdf_tk.commands._migrate.data_classes import MigrationMappingList
9
9
  from cognite_toolkit._cdf_tk.storageio import DataSelector
10
10
  from cognite_toolkit._cdf_tk.storageio.selectors import DataSetSelector
11
- from cognite_toolkit._cdf_tk.utils.useful_types import AssetCentricKind
11
+ from cognite_toolkit._cdf_tk.utils.useful_types import AssetCentricKindExtended
12
12
 
13
13
 
14
14
  class AssetCentricMigrationSelector(DataSelector, ABC):
15
- kind: AssetCentricKind
15
+ kind: AssetCentricKindExtended
16
16
 
17
17
  @abstractmethod
18
18
  def get_ingestion_mappings(self) -> list[str]:
@@ -41,7 +41,7 @@ class MigrationCSVFileSelector(AssetCentricMigrationSelector):
41
41
 
42
42
  class MigrateDataSetSelector(AssetCentricMigrationSelector):
43
43
  type: Literal["migrateDataSet"] = "migrateDataSet"
44
- kind: AssetCentricKind
44
+ kind: AssetCentricKindExtended
45
45
  data_set_external_id: str
46
46
  ingestion_mapping: str | None = None
47
47
  preferred_consumer_view: ViewId | None = None
@@ -63,4 +63,9 @@ class MigrateDataSetSelector(AssetCentricMigrationSelector):
63
63
  return [self.ingestion_mapping] if self.ingestion_mapping else []
64
64
 
65
65
  def as_asset_centric_selector(self) -> DataSetSelector:
66
- return DataSetSelector(data_set_external_id=self.data_set_external_id, kind=self.kind)
66
+ if self.kind == "Annotations":
67
+ # Annotations are connected to file metadata, so we need to download the file metadata
68
+ # and look up the annotations connected to each file metadata.
69
+ return DataSetSelector(data_set_external_id=self.data_set_external_id, kind="FileMetadata")
70
+ else:
71
+ return DataSetSelector(data_set_external_id=self.data_set_external_id, kind=self.kind)
@@ -7,12 +7,7 @@ from typing import Literal, cast
7
7
 
8
8
  import questionary
9
9
  from cognite.client.data_classes import DataSetUpdate
10
- from cognite.client.data_classes._base import CogniteResourceList
11
- from cognite.client.data_classes.data_modeling import (
12
- EdgeList,
13
- NodeId,
14
- NodeList,
15
- )
10
+ from cognite.client.data_classes.data_modeling import Edge, NodeId
16
11
  from cognite.client.data_classes.data_modeling.statistics import SpaceStatistics
17
12
  from cognite.client.exceptions import CogniteAPIError
18
13
  from cognite.client.utils._identifier import InstanceId
@@ -46,6 +41,7 @@ from cognite_toolkit._cdf_tk.exceptions import (
46
41
  AuthorizationError,
47
42
  ToolkitMissingResourceError,
48
43
  )
44
+ from cognite_toolkit._cdf_tk.protocols import ResourceResponseProtocol
49
45
  from cognite_toolkit._cdf_tk.storageio import InstanceIO
50
46
  from cognite_toolkit._cdf_tk.storageio.selectors import InstanceSelector
51
47
  from cognite_toolkit._cdf_tk.tk_warnings import (
@@ -107,7 +103,7 @@ class ToDelete(ABC):
107
103
  @abstractmethod
108
104
  def get_process_function(
109
105
  self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
110
- ) -> Callable[[CogniteResourceList], list[JsonVal]]:
106
+ ) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
111
107
  raise NotImplementedError()
112
108
 
113
109
  def get_extra_fields(self) -> dict[str, JsonVal]:
@@ -118,9 +114,10 @@ class ToDelete(ABC):
118
114
  class DataModelingToDelete(ToDelete):
119
115
  def get_process_function(
120
116
  self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
121
- ) -> Callable[[CogniteResourceList], list[JsonVal]]:
122
- def as_id(chunk: CogniteResourceList) -> list[JsonVal]:
123
- return [item.as_id().dump(include_type=False) for item in chunk]
117
+ ) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
118
+ def as_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
119
+ # We know that all data modeling resources implement as_id
120
+ return [item.as_id().dump(include_type=False) for item in chunk] # type: ignore[attr-defined]
124
121
 
125
122
  return as_id
126
123
 
@@ -129,11 +126,11 @@ class DataModelingToDelete(ToDelete):
129
126
  class EdgeToDelete(ToDelete):
130
127
  def get_process_function(
131
128
  self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
132
- ) -> Callable[[CogniteResourceList], list[JsonVal]]:
133
- def as_id(chunk: CogniteResourceList) -> list[JsonVal]:
129
+ ) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
130
+ def as_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
134
131
  return [
135
132
  {"space": item.space, "externalId": item.external_id, "instanceType": "edge"}
136
- for item in cast(EdgeList, chunk)
133
+ for item in cast(list[Edge], chunk)
137
134
  ]
138
135
 
139
136
  return as_id
@@ -146,9 +143,10 @@ class NodesToDelete(ToDelete):
146
143
 
147
144
  def get_process_function(
148
145
  self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
149
- ) -> Callable[[CogniteResourceList], list[JsonVal]]:
150
- def check_for_data(chunk: CogniteResourceList) -> list[JsonVal]:
151
- node_ids = cast(NodeList, chunk).as_ids()
146
+ ) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
147
+ def check_for_data(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
148
+ # We know that all node resources implement as_id
149
+ node_ids = [item.as_id() for item in chunk] # type: ignore[attr-defined]
152
150
  found_ids: set[InstanceId] = set()
153
151
  if not self.delete_datapoints:
154
152
  timeseries = client.time_series.retrieve_multiple(instance_ids=node_ids, ignore_unknown_ids=True)
@@ -164,8 +162,7 @@ class NodesToDelete(ToDelete):
164
162
  dumped = node_id.dump(include_instance_type=True)
165
163
  # The delete endpoint expects "instanceType" instead of "type"
166
164
  dumped["instanceType"] = dumped.pop("type")
167
- # MyPy think complains about invariant here, even though dict[str, str] is a type of JsonVal
168
- result.append(dumped) # type: ignore[arg-type]
165
+ result.append(dumped)
169
166
  return result
170
167
 
171
168
  return check_for_data
@@ -175,9 +172,10 @@ class NodesToDelete(ToDelete):
175
172
  class IdResourceToDelete(ToDelete):
176
173
  def get_process_function(
177
174
  self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
178
- ) -> Callable[[CogniteResourceList], list[JsonVal]]:
179
- def as_id(chunk: CogniteResourceList) -> list[JsonVal]:
180
- return [{"id": item.id} for item in chunk]
175
+ ) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
176
+ def as_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
177
+ # We know that all id resources have an id attribute
178
+ return [{"id": item.id} for item in chunk] # type: ignore[attr-defined]
181
179
 
182
180
  return as_id
183
181
 
@@ -186,9 +184,10 @@ class IdResourceToDelete(ToDelete):
186
184
  class ExternalIdToDelete(ToDelete):
187
185
  def get_process_function(
188
186
  self, client: ToolkitClient, console: Console, verbose: bool, process_results: ResourceDeployResult
189
- ) -> Callable[[CogniteResourceList], list[JsonVal]]:
190
- def as_external_id(chunk: CogniteResourceList) -> list[JsonVal]:
191
- return [{"externalId": item.external_id} for item in chunk]
187
+ ) -> Callable[[list[ResourceResponseProtocol]], list[JsonVal]]:
188
+ def as_external_id(chunk: list[ResourceResponseProtocol]) -> list[JsonVal]:
189
+ # We know that all external id resources have an external_id attribute
190
+ return [{"externalId": item.external_id} for item in chunk] # type: ignore[attr-defined]
192
191
 
193
192
  return as_external_id
194
193
 
@@ -321,7 +320,7 @@ class PurgeCommand(ToolkitCommand):
321
320
  iteration_count = item.total // self.BATCH_SIZE_DM + (
322
321
  1 if item.total % self.BATCH_SIZE_DM > 0 else 0
323
322
  )
324
- executor = ProducerWorkerExecutor[CogniteResourceList, list[JsonVal]](
323
+ executor = ProducerWorkerExecutor[list[ResourceResponseProtocol], list[JsonVal]](
325
324
  download_iterable=self._iterate_batch(
326
325
  item.crud, space, data_set_external_id, batch_size=self.BATCH_SIZE_DM
327
326
  ),
@@ -348,13 +347,13 @@ class PurgeCommand(ToolkitCommand):
348
347
  @staticmethod
349
348
  def _iterate_batch(
350
349
  crud: ResourceCRUD, selected_space: str | None, data_set_external_id: str | None, batch_size: int
351
- ) -> Iterable[CogniteResourceList]:
352
- batch = crud.list_cls([])
350
+ ) -> Iterable[list[ResourceResponseProtocol]]:
351
+ batch: list[ResourceResponseProtocol] = []
353
352
  for resource in crud.iterate(space=selected_space, data_set_external_id=data_set_external_id):
354
353
  batch.append(resource)
355
354
  if len(batch) >= batch_size:
356
355
  yield batch
357
- batch = crud.list_cls([])
356
+ batch = []
358
357
  if batch:
359
358
  yield batch
360
359
 
@@ -0,0 +1,16 @@
1
+ import questionary
2
+
3
+ custom_style_fancy = questionary.Style(
4
+ [
5
+ ("qmark", "fg:#673ab7"), # token in front of the question
6
+ ("question", "bold"), # question text
7
+ ("answer", "fg:#f44336 bold"), # submitted answer text behind the question
8
+ ("pointer", "fg:#673ab7 bold"), # pointer used in select and checkbox prompts
9
+ ("highlighted", "fg:#673ab7 bold"), # pointed-at choice in select and checkbox prompts
10
+ ("selected", "fg:#673ab7"), # style for a selected item of a checkbox
11
+ ("separator", "fg:#cc5454"), # separator in lists
12
+ ("instruction", ""), # user instructions for select, rawselect, checkbox
13
+ ("text", ""), # plain text
14
+ ("disabled", "fg:#858585 italic"), # disabled choices for select and checkbox prompts
15
+ ]
16
+ )
@@ -1,32 +1,33 @@
1
+ from collections import Counter
1
2
  from collections.abc import Sequence
2
3
  from functools import partial
3
4
  from pathlib import Path
4
5
 
5
- from cognite.client.data_classes._base import T_CogniteResource
6
- from pydantic import ValidationError
6
+ from cognite.client.data_classes.data_modeling import (
7
+ ViewId,
8
+ )
7
9
  from rich.console import Console
8
10
 
9
11
  from cognite_toolkit._cdf_tk.client import ToolkitClient
10
- from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_STEM, DATA_RESOURCE_DIR
12
+ from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX, DATA_RESOURCE_DIR
13
+ from cognite_toolkit._cdf_tk.cruds import ViewCRUD
11
14
  from cognite_toolkit._cdf_tk.exceptions import ToolkitValueError
15
+ from cognite_toolkit._cdf_tk.protocols import T_ResourceRequest, T_ResourceResponse
12
16
  from cognite_toolkit._cdf_tk.storageio import (
13
17
  T_Selector,
14
18
  UploadableStorageIO,
15
- are_same_kind,
16
19
  get_upload_io,
17
20
  )
18
- from cognite_toolkit._cdf_tk.storageio._base import T_WriteCogniteResource, TableUploadableStorageIO, UploadItem
19
- from cognite_toolkit._cdf_tk.storageio.selectors import Selector, SelectorAdapter
20
- from cognite_toolkit._cdf_tk.tk_warnings import HighSeverityWarning, MediumSeverityWarning
21
- from cognite_toolkit._cdf_tk.tk_warnings.fileread import ResourceFormatWarning
21
+ from cognite_toolkit._cdf_tk.storageio._base import TableUploadableStorageIO, UploadItem
22
+ from cognite_toolkit._cdf_tk.storageio.selectors import Selector, load_selector
23
+ from cognite_toolkit._cdf_tk.storageio.selectors._instances import InstanceSpaceSelector
24
+ from cognite_toolkit._cdf_tk.tk_warnings import HighSeverityWarning, MediumSeverityWarning, ToolkitWarning
22
25
  from cognite_toolkit._cdf_tk.utils.auth import EnvironmentVariables
23
- from cognite_toolkit._cdf_tk.utils.file import read_yaml_file
24
- from cognite_toolkit._cdf_tk.utils.fileio import TABLE_READ_CLS_BY_FORMAT, FileReader
26
+ from cognite_toolkit._cdf_tk.utils.fileio import MultiFileReader
25
27
  from cognite_toolkit._cdf_tk.utils.http_client import HTTPClient, ItemMessage, SuccessResponseItems
26
28
  from cognite_toolkit._cdf_tk.utils.producer_worker import ProducerWorkerExecutor
27
29
  from cognite_toolkit._cdf_tk.utils.progress_tracker import ProgressTracker
28
30
  from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
29
- from cognite_toolkit._cdf_tk.validation import humanize_validation_error
30
31
 
31
32
  from ._base import ToolkitCommand
32
33
  from .deploy import DeployCommand
@@ -76,53 +77,76 @@ class UploadCommand(ToolkitCommand):
76
77
  │ │ └── table2.Table.yaml
77
78
  │ └── ...
78
79
  ├── datafile1.kind.ndjson # Data file of a specific kind
79
- ├── datafile1.Metadata.yaml # Metadata file for datafile1
80
+ ├── datafile1.Manifest.yaml # Manifest for datafile1
80
81
  ├── datafile2.kind2.ndjson # Another data file of the same or different kind
81
- ├── datafile2.Metadata.yaml # Metadata file for datafile2
82
+ ├── datafile2.Manifest.yaml # Manifest file for datafile2
82
83
  └── ...
83
84
  """
84
- console = Console()
85
- data_files_by_selector = self._find_data_files(input_dir, kind)
85
+ console = client.console
86
+ data_files_by_selector = self._find_data_files(input_dir)
86
87
 
87
88
  self._deploy_resource_folder(input_dir / DATA_RESOURCE_DIR, deploy_resources, client, console, dry_run, verbose)
88
89
 
90
+ data_files_by_selector = self._topological_sort_if_instance_selector(data_files_by_selector, client)
91
+
89
92
  self._upload_data(data_files_by_selector, client, dry_run, input_dir, console, verbose)
90
93
 
94
+ def _topological_sort_if_instance_selector(
95
+ self, data_files_by_selector: dict[Selector, list[Path]], client: ToolkitClient
96
+ ) -> dict[Selector, list[Path]]:
97
+ """Topologically sorts InstanceSpaceSelectors (if they are present) to determine the order of upload based on container dependencies from the views.
98
+
99
+ Args:
100
+ data_files_by_selector: A dictionary mapping selectors to their data files.
101
+ client: The cognite client to use for the upload.
102
+
103
+ Returns:
104
+ A dictionary mapping selectors to their data files with necessary preprocessing.
105
+ """
106
+ counts = Counter(type(selector) for selector in data_files_by_selector.keys())
107
+ if counts[InstanceSpaceSelector] <= 1:
108
+ return data_files_by_selector
109
+
110
+ selector_by_view_id: dict[ViewId, Selector] = {}
111
+ for selector in data_files_by_selector:
112
+ if isinstance(selector, InstanceSpaceSelector) and selector.view is not None:
113
+ selector_by_view_id[selector.view.as_id()] = selector
114
+
115
+ view_dependencies = ViewCRUD.create_loader(client).topological_sort_container_constraints(
116
+ list(selector_by_view_id.keys())
117
+ )
118
+ prepared_selectors: dict[Selector, list[Path]] = {}
119
+
120
+ # Reorder selectors according to the dependency-sorted view list
121
+ for view_id in view_dependencies:
122
+ selector = selector_by_view_id[view_id]
123
+ prepared_selectors[selector] = data_files_by_selector[selector]
124
+
125
+ # Preserve selectors that aren't affected by view dependencies
126
+ # (e.g., raw tables, time series, non-view instance data)
127
+ for selector in data_files_by_selector.keys():
128
+ if selector not in prepared_selectors:
129
+ prepared_selectors[selector] = data_files_by_selector[selector]
130
+
131
+ return prepared_selectors
132
+
91
133
  def _find_data_files(
92
134
  self,
93
135
  input_dir: Path,
94
- kind: str | None = None,
95
136
  ) -> dict[Selector, list[Path]]:
96
137
  """Finds data files and their corresponding metadata files in the input directory."""
97
- manifest_file_endswith = f".{DATA_MANIFEST_STEM}.yaml"
98
138
  data_files_by_metadata: dict[Selector, list[Path]] = {}
99
- for metadata_file in input_dir.glob(f"*{manifest_file_endswith}"):
100
- data_file_prefix = metadata_file.name.removesuffix(manifest_file_endswith)
101
- data_files = [
102
- file
103
- for file in input_dir.glob(f"{data_file_prefix}*")
104
- if not file.name.endswith(manifest_file_endswith)
105
- ]
106
- if kind is not None and data_files:
107
- data_files = [data_file for data_file in data_files if are_same_kind(kind, data_file)]
108
- if not data_files:
109
- continue
139
+ for manifest_file in input_dir.glob(f"*{DATA_MANIFEST_SUFFIX}"):
140
+ selector_or_warning = load_selector(manifest_file)
141
+ if isinstance(selector_or_warning, ToolkitWarning):
142
+ self.warn(selector_or_warning)
143
+ continue
144
+ selector: Selector = selector_or_warning
145
+ data_files = selector.find_data_files(input_dir, manifest_file)
110
146
  if not data_files:
111
147
  self.warn(
112
148
  MediumSeverityWarning(
113
- f"Metadata file {metadata_file.as_posix()!r} has no corresponding data files, skipping.",
114
- )
115
- )
116
- continue
117
-
118
- selector_dict = read_yaml_file(metadata_file, expected_output="dict")
119
- try:
120
- selector = SelectorAdapter.validate_python(selector_dict)
121
- except ValidationError as e:
122
- errors = humanize_validation_error(e)
123
- self.warn(
124
- ResourceFormatWarning(
125
- metadata_file, tuple(errors), text="Invalid selector in metadata file, skipping."
149
+ f"Metadata file {manifest_file.as_posix()!r} has no corresponding data files, skipping.",
126
150
  )
127
151
  )
128
152
  continue
@@ -178,49 +202,48 @@ class UploadCommand(ToolkitCommand):
178
202
  io = self._create_selected_io(selector, datafiles[0], client)
179
203
  if io is None:
180
204
  continue
181
- for data_file in datafiles:
182
- file_display = self._path_as_display_name(data_file)
183
- if verbose:
184
- console.print(f"{action} {selector.display_name} from {file_display.as_posix()!r}")
185
- reader = FileReader.from_filepath(data_file)
186
- is_table = reader.format in TABLE_READ_CLS_BY_FORMAT
187
- if is_table and not isinstance(io, TableUploadableStorageIO):
188
- raise ToolkitValueError(f"{selector.display_name} does not support {reader.format!r} files.")
189
- tracker = ProgressTracker[str]([self._UPLOAD])
190
- executor = ProducerWorkerExecutor[list[tuple[str, dict[str, JsonVal]]], Sequence[UploadItem]](
191
- download_iterable=io.read_chunks(reader),
192
- process=partial(io.rows_to_data, selector=selector)
193
- if is_table and isinstance(io, TableUploadableStorageIO)
194
- else io.json_chunk_to_data,
195
- write=partial(
196
- self._upload_items,
197
- upload_client=upload_client,
198
- io=io,
199
- dry_run=dry_run,
200
- selector=selector,
201
- tracker=tracker,
202
- console=console,
203
- ),
204
- iteration_count=None,
205
- max_queue_size=self._MAX_QUEUE_SIZE,
206
- download_description=f"Reading {file_count:,}/{total_file_count + 1:,}: {file_display.as_posix()!s}",
207
- process_description="Processing",
208
- write_description=f"{action} {selector.display_name!r}",
205
+ reader = MultiFileReader(datafiles)
206
+ if reader.is_table and not isinstance(io, TableUploadableStorageIO):
207
+ raise ToolkitValueError(f"{selector.display_name} does not support {reader.format!r} files.")
208
+
209
+ chunk_count = io.count_chunks(reader)
210
+ iteration_count = chunk_count // io.CHUNK_SIZE + (1 if chunk_count % io.CHUNK_SIZE > 0 else 0)
211
+
212
+ tracker = ProgressTracker[str]([self._UPLOAD])
213
+ executor = ProducerWorkerExecutor[list[tuple[str, dict[str, JsonVal]]], Sequence[UploadItem]](
214
+ download_iterable=io.read_chunks(reader, selector),
215
+ process=partial(io.rows_to_data, selector=selector)
216
+ if reader.is_table and isinstance(io, TableUploadableStorageIO)
217
+ else io.json_chunk_to_data,
218
+ write=partial(
219
+ self._upload_items,
220
+ upload_client=upload_client,
221
+ io=io,
222
+ dry_run=dry_run,
223
+ selector=selector,
224
+ tracker=tracker,
209
225
  console=console,
210
- )
211
- executor.run()
212
- file_count += 1
213
- executor.raise_on_error()
214
- final_action = "Uploaded" if not dry_run else "Would upload"
215
- suffix = " successfully" if not dry_run else ""
216
- results = tracker.aggregate()
217
- success = results.get((self._UPLOAD, "success"), 0)
218
- failed = results.get((self._UPLOAD, "failed"), 0)
219
- if failed > 0:
220
- suffix += f", {failed:,} failed"
221
- console.print(
222
- f"{final_action} {success:,} {selector.display_name} from {file_display.as_posix()!r}{suffix}."
223
- )
226
+ ),
227
+ iteration_count=iteration_count,
228
+ max_queue_size=self._MAX_QUEUE_SIZE,
229
+ download_description=f"Reading {selector.display_name!r} files",
230
+ process_description="Processing",
231
+ write_description=f"{action} {selector.display_name!r}",
232
+ console=console,
233
+ )
234
+ executor.run()
235
+ file_count += len(datafiles)
236
+ executor.raise_on_error()
237
+ final_action = "Uploaded" if not dry_run else "Would upload"
238
+ suffix = " successfully" if not dry_run else ""
239
+ results = tracker.aggregate()
240
+ success = results.get((self._UPLOAD, "success"), 0)
241
+ failed = results.get((self._UPLOAD, "failed"), 0)
242
+ if failed > 0:
243
+ suffix += f", {failed:,} failed"
244
+ console.print(
245
+ f"{final_action} {success:,} {selector.display_name} from {len(datafiles)} files{suffix}."
246
+ )
224
247
 
225
248
  @staticmethod
226
249
  def _path_as_display_name(input_path: Path, cwd: Path = Path.cwd()) -> Path:
@@ -233,7 +256,7 @@ class UploadCommand(ToolkitCommand):
233
256
  self, selector: Selector, data_file: Path, client: ToolkitClient
234
257
  ) -> UploadableStorageIO | None:
235
258
  try:
236
- io_cls = get_upload_io(type(selector), kind=data_file)
259
+ io_cls = get_upload_io(selector)
237
260
  except ValueError as e:
238
261
  self.warn(HighSeverityWarning(f"Could not find StorageIO for selector {selector}: {e}"))
239
262
  return None
@@ -242,9 +265,9 @@ class UploadCommand(ToolkitCommand):
242
265
  @classmethod
243
266
  def _upload_items(
244
267
  cls,
245
- data_chunk: Sequence[UploadItem],
268
+ data_chunk: Sequence[UploadItem[T_ResourceRequest]],
246
269
  upload_client: HTTPClient,
247
- io: UploadableStorageIO[T_Selector, T_CogniteResource, T_WriteCogniteResource],
270
+ io: UploadableStorageIO[T_Selector, T_ResourceResponse, T_ResourceRequest],
248
271
  selector: T_Selector,
249
272
  dry_run: bool,
250
273
  tracker: ProgressTracker[str],