cognite-toolkit 0.6.111__py3-none-any.whl → 0.6.112__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. cognite_toolkit/_cdf_tk/apps/_download_app.py +307 -25
  2. cognite_toolkit/_cdf_tk/client/data_classes/base.py +25 -1
  3. cognite_toolkit/_cdf_tk/client/data_classes/infield.py +6 -21
  4. cognite_toolkit/_cdf_tk/client/data_classes/streams.py +10 -39
  5. cognite_toolkit/_cdf_tk/commands/_upload.py +15 -28
  6. cognite_toolkit/_cdf_tk/constants.py +1 -0
  7. cognite_toolkit/_cdf_tk/storageio/__init__.py +4 -16
  8. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +4 -23
  9. cognite_toolkit/_cdf_tk/storageio/_base.py +3 -1
  10. cognite_toolkit/_cdf_tk/storageio/_datapoints.py +3 -1
  11. cognite_toolkit/_cdf_tk/storageio/_file_content.py +149 -0
  12. cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +13 -1
  13. cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
  14. cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +95 -0
  15. cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +11 -3
  16. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +19 -1
  17. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  18. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  19. cognite_toolkit/_resources/cdf.toml +1 -1
  20. cognite_toolkit/_version.py +1 -1
  21. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/METADATA +1 -1
  22. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/RECORD +25 -23
  23. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/WHEEL +0 -0
  24. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/entry_points.txt +0 -0
  25. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,9 @@ from enum import Enum
2
2
  from pathlib import Path
3
3
  from typing import Annotated, Any
4
4
 
5
+ import questionary
5
6
  import typer
7
+ from questionary import Choice
6
8
  from rich import print
7
9
 
8
10
  from cognite_toolkit._cdf_tk.client.data_classes.raw import RawTable
@@ -11,12 +13,14 @@ from cognite_toolkit._cdf_tk.constants import DATA_DEFAULT_DIR
11
13
  from cognite_toolkit._cdf_tk.storageio import (
12
14
  AssetIO,
13
15
  ChartIO,
16
+ EventIO,
17
+ FileMetadataIO,
14
18
  HierarchyIO,
15
19
  InstanceIO,
16
20
  RawIO,
21
+ TimeSeriesIO,
17
22
  )
18
23
  from cognite_toolkit._cdf_tk.storageio.selectors import (
19
- AssetCentricSelector,
20
24
  AssetSubtreeSelector,
21
25
  ChartExternalIdSelector,
22
26
  ChartSelector,
@@ -28,11 +32,16 @@ from cognite_toolkit._cdf_tk.storageio.selectors import (
28
32
  )
29
33
  from cognite_toolkit._cdf_tk.utils.auth import EnvironmentVariables
30
34
  from cognite_toolkit._cdf_tk.utils.interactive_select import (
35
+ AssetCentricInteractiveSelect,
31
36
  AssetInteractiveSelect,
32
37
  DataModelingSelect,
38
+ EventInteractiveSelect,
39
+ FileMetadataInteractiveSelect,
33
40
  InteractiveChartSelect,
34
41
  RawTableInteractiveSelect,
42
+ TimeSeriesInteractiveSelect,
35
43
  )
44
+ from cognite_toolkit._cdf_tk.utils.useful_types import AssetCentricKind
36
45
 
37
46
 
38
47
  class RawFormats(str, Enum):
@@ -77,6 +86,9 @@ class DownloadApp(typer.Typer):
77
86
  self.callback(invoke_without_command=True)(self.download_main)
78
87
  self.command("raw")(self.download_raw_cmd)
79
88
  self.command("assets")(self.download_assets_cmd)
89
+ self.command("timeseries")(self.download_timeseries_cmd)
90
+ self.command("events")(self.download_events_cmd)
91
+ self.command("files")(self.download_files_cmd)
80
92
  self.command("hierarchy")(self.download_hierarchy_cmd)
81
93
  self.command("instances")(self.download_instances_cmd)
82
94
  self.command("charts")(self.download_charts_cmd)
@@ -178,23 +190,15 @@ class DownloadApp(typer.Typer):
178
190
  )
179
191
  )
180
192
 
181
- @staticmethod
182
193
  def download_assets_cmd(
194
+ self,
183
195
  ctx: typer.Context,
184
196
  data_sets: Annotated[
185
197
  list[str] | None,
186
198
  typer.Option(
187
199
  "--data-set",
188
200
  "-d",
189
- help="List of data sets to download assets from. If this and hierarchy are not provided, an interactive selection will be made.",
190
- ),
191
- ] = None,
192
- hierarchy: Annotated[
193
- list[str] | None,
194
- typer.Option(
195
- "--hierarchy",
196
- "-r",
197
- help="List of asset hierarchies to download assets from. If this and data sets are not provided, an interactive selection will be made.",
201
+ help="List of data sets to download assets from. If this is not provided, an interactive selection will be made.",
198
202
  ),
199
203
  ] = None,
200
204
  file_format: Annotated[
@@ -241,20 +245,17 @@ class DownloadApp(typer.Typer):
241
245
  ) -> None:
242
246
  """This command will download assets from CDF into a temporary directory."""
243
247
  client = EnvironmentVariables.create_from_environment().get_client()
244
- is_interactive = not data_sets and not hierarchy
245
- if is_interactive:
246
- interactive = AssetInteractiveSelect(client, "download assets")
247
- selector_type = interactive.select_hierarchies_or_data_sets()
248
- if selector_type == "Data Set":
249
- data_sets = interactive.select_data_sets()
250
- else:
251
- hierarchy = interactive.select_hierarchies()
252
-
253
- selectors: list[AssetCentricSelector] = []
254
- if data_sets:
255
- selectors.extend([DataSetSelector(data_set_external_id=ds, kind="Assets") for ds in data_sets])
256
- if hierarchy:
257
- selectors.extend([AssetSubtreeSelector(hierarchy=h, kind="Assets") for h in hierarchy])
248
+ if data_sets is None:
249
+ data_sets, file_format, compression, output_dir, limit = self._asset_centric_interactive(
250
+ AssetInteractiveSelect(client, "download"),
251
+ file_format,
252
+ compression,
253
+ output_dir,
254
+ limit,
255
+ "Assets",
256
+ )
257
+
258
+ selectors = [DataSetSelector(kind="Assets", data_set_external_id=data_set) for data_set in data_sets]
258
259
  cmd = DownloadCommand()
259
260
  cmd.run(
260
261
  lambda: cmd.download(
@@ -268,6 +269,287 @@ class DownloadApp(typer.Typer):
268
269
  )
269
270
  )
270
271
 
272
+ @classmethod
273
+ def _asset_centric_interactive(
274
+ cls,
275
+ selector: AssetCentricInteractiveSelect,
276
+ file_format: AssetCentricFormats,
277
+ compression: CompressionFormat,
278
+ output_dir: Path,
279
+ limit: int,
280
+ kind: AssetCentricKind,
281
+ ) -> tuple[list[str], AssetCentricFormats, CompressionFormat, Path, int]:
282
+ data_sets = selector.select_data_sets()
283
+ display_name = kind.casefold() + "s"
284
+ file_format = questionary.select(
285
+ f"Select format to download the {display_name} in:",
286
+ choices=[Choice(title=format_.value, value=format_) for format_ in AssetCentricFormats],
287
+ default=file_format,
288
+ ).ask()
289
+ compression = questionary.select(
290
+ f"Select compression format to use when downloading the {display_name}:",
291
+ choices=[Choice(title=comp.value, value=comp) for comp in CompressionFormat],
292
+ default=compression,
293
+ ).ask()
294
+ output_dir = Path(
295
+ questionary.path(
296
+ "Where to download the assets:",
297
+ default=str(output_dir),
298
+ only_directories=True,
299
+ ).ask()
300
+ )
301
+ while True:
302
+ limit_str = questionary.text(
303
+ f"The maximum number of {display_name} to download from each dataset. Use -1 to download all {display_name}.",
304
+ default=str(limit),
305
+ ).ask()
306
+ if limit_str is None:
307
+ raise typer.Abort()
308
+ try:
309
+ limit = int(limit_str)
310
+ break
311
+ except ValueError:
312
+ print("[red]Please enter a valid integer for the limit.[/]")
313
+ return data_sets, file_format, compression, output_dir, limit
314
+
315
+ def download_timeseries_cmd(
316
+ self,
317
+ ctx: typer.Context,
318
+ data_sets: Annotated[
319
+ list[str] | None,
320
+ typer.Option(
321
+ "--data-set",
322
+ "-d",
323
+ help="List of data sets to download time series from. If this is not provided, an interactive selection will be made.",
324
+ ),
325
+ ] = None,
326
+ file_format: Annotated[
327
+ AssetCentricFormats,
328
+ typer.Option(
329
+ "--format",
330
+ "-f",
331
+ help="Format to download the time series in.",
332
+ ),
333
+ ] = AssetCentricFormats.csv,
334
+ compression: Annotated[
335
+ CompressionFormat,
336
+ typer.Option(
337
+ "--compression",
338
+ "-z",
339
+ help="Compression format to use when downloading the time series.",
340
+ ),
341
+ ] = CompressionFormat.none,
342
+ output_dir: Annotated[
343
+ Path,
344
+ typer.Option(
345
+ "--output-dir",
346
+ "-o",
347
+ help="Where to download the time series.",
348
+ allow_dash=True,
349
+ ),
350
+ ] = DEFAULT_DOWNLOAD_DIR,
351
+ limit: Annotated[
352
+ int,
353
+ typer.Option(
354
+ "--limit",
355
+ "-l",
356
+ help="The maximum number of time series to download from each dataset. Use -1 to download all time series.",
357
+ ),
358
+ ] = 100_000,
359
+ verbose: Annotated[
360
+ bool,
361
+ typer.Option(
362
+ "--verbose",
363
+ "-v",
364
+ help="Turn on to get more verbose output when running the command",
365
+ ),
366
+ ] = False,
367
+ ) -> None:
368
+ """This command will download time series from CDF into a temporary directory."""
369
+ client = EnvironmentVariables.create_from_environment().get_client()
370
+ if data_sets is None:
371
+ data_sets, file_format, compression, output_dir, limit = self._asset_centric_interactive(
372
+ TimeSeriesInteractiveSelect(client, "download"),
373
+ file_format,
374
+ compression,
375
+ output_dir,
376
+ limit,
377
+ "TimeSeries",
378
+ )
379
+
380
+ selectors = [DataSetSelector(kind="TimeSeries", data_set_external_id=data_set) for data_set in data_sets]
381
+ cmd = DownloadCommand()
382
+ cmd.run(
383
+ lambda: cmd.download(
384
+ selectors=selectors,
385
+ io=TimeSeriesIO(client),
386
+ output_dir=output_dir,
387
+ file_format=f".{file_format.value}",
388
+ compression=compression.value,
389
+ limit=limit if limit != -1 else None,
390
+ verbose=verbose,
391
+ )
392
+ )
393
+
394
+ def download_events_cmd(
395
+ self,
396
+ ctx: typer.Context,
397
+ data_sets: Annotated[
398
+ list[str] | None,
399
+ typer.Option(
400
+ "--data-set",
401
+ "-d",
402
+ help="List of data sets to download events from. If this is not provided, an interactive selection will be made.",
403
+ ),
404
+ ] = None,
405
+ file_format: Annotated[
406
+ AssetCentricFormats,
407
+ typer.Option(
408
+ "--format",
409
+ "-f",
410
+ help="Format to download the events in.",
411
+ ),
412
+ ] = AssetCentricFormats.csv,
413
+ compression: Annotated[
414
+ CompressionFormat,
415
+ typer.Option(
416
+ "--compression",
417
+ "-z",
418
+ help="Compression format to use when downloading the events.",
419
+ ),
420
+ ] = CompressionFormat.none,
421
+ output_dir: Annotated[
422
+ Path,
423
+ typer.Option(
424
+ "--output-dir",
425
+ "-o",
426
+ help="Where to download the events.",
427
+ allow_dash=True,
428
+ ),
429
+ ] = DEFAULT_DOWNLOAD_DIR,
430
+ limit: Annotated[
431
+ int,
432
+ typer.Option(
433
+ "--limit",
434
+ "-l",
435
+ help="The maximum number of events to download from each dataset. Use -1 to download all events.",
436
+ ),
437
+ ] = 100_000,
438
+ verbose: Annotated[
439
+ bool,
440
+ typer.Option(
441
+ "--verbose",
442
+ "-v",
443
+ help="Turn on to get more verbose output when running the command",
444
+ ),
445
+ ] = False,
446
+ ) -> None:
447
+ """This command will download events from CDF into a temporary directory."""
448
+ client = EnvironmentVariables.create_from_environment().get_client()
449
+ if data_sets is None:
450
+ data_sets, file_format, compression, output_dir, limit = self._asset_centric_interactive(
451
+ EventInteractiveSelect(client, "download"),
452
+ file_format,
453
+ compression,
454
+ output_dir,
455
+ limit,
456
+ "Events",
457
+ )
458
+
459
+ selectors = [DataSetSelector(kind="Events", data_set_external_id=data_set) for data_set in data_sets]
460
+ cmd = DownloadCommand()
461
+
462
+ cmd.run(
463
+ lambda: cmd.download(
464
+ selectors=selectors,
465
+ io=EventIO(client),
466
+ output_dir=output_dir,
467
+ file_format=f".{file_format.value}",
468
+ compression=compression.value,
469
+ limit=limit if limit != -1 else None,
470
+ verbose=verbose,
471
+ )
472
+ )
473
+
474
+ def download_files_cmd(
475
+ self,
476
+ ctx: typer.Context,
477
+ data_sets: Annotated[
478
+ list[str] | None,
479
+ typer.Option(
480
+ "--data-set",
481
+ "-d",
482
+ help="List of data sets to download file metadata from. If this is not provided, an interactive selection will be made.",
483
+ ),
484
+ ] = None,
485
+ file_format: Annotated[
486
+ AssetCentricFormats,
487
+ typer.Option(
488
+ "--format",
489
+ "-f",
490
+ help="Format to download the file metadata in.",
491
+ ),
492
+ ] = AssetCentricFormats.csv,
493
+ compression: Annotated[
494
+ CompressionFormat,
495
+ typer.Option(
496
+ "--compression",
497
+ "-z",
498
+ help="Compression format to use when downloading the file metadata.",
499
+ ),
500
+ ] = CompressionFormat.none,
501
+ output_dir: Annotated[
502
+ Path,
503
+ typer.Option(
504
+ "--output-dir",
505
+ "-o",
506
+ help="Where to download the file metadata.",
507
+ allow_dash=True,
508
+ ),
509
+ ] = DEFAULT_DOWNLOAD_DIR,
510
+ limit: Annotated[
511
+ int,
512
+ typer.Option(
513
+ "--limit",
514
+ "-l",
515
+ help="The maximum number of file metadata to download from each dataset. Use -1 to download all file metadata.",
516
+ ),
517
+ ] = 100_000,
518
+ verbose: Annotated[
519
+ bool,
520
+ typer.Option(
521
+ "--verbose",
522
+ "-v",
523
+ help="Turn on to get more verbose output when running the command",
524
+ ),
525
+ ] = False,
526
+ ) -> None:
527
+ """This command will download file metadata from CDF into a temporary directory."""
528
+ client = EnvironmentVariables.create_from_environment().get_client()
529
+ if data_sets is None:
530
+ data_sets, file_format, compression, output_dir, limit = self._asset_centric_interactive(
531
+ FileMetadataInteractiveSelect(client, "download"),
532
+ file_format,
533
+ compression,
534
+ output_dir,
535
+ limit,
536
+ "FileMetadata",
537
+ )
538
+
539
+ selectors = [DataSetSelector(kind="FileMetadata", data_set_external_id=data_set) for data_set in data_sets]
540
+ cmd = DownloadCommand()
541
+ cmd.run(
542
+ lambda: cmd.download(
543
+ selectors=selectors,
544
+ io=FileMetadataIO(client),
545
+ output_dir=output_dir,
546
+ file_format=f".{file_format.value}",
547
+ compression=compression.value,
548
+ limit=limit if limit != -1 else None,
549
+ verbose=verbose,
550
+ )
551
+ )
552
+
271
553
  @staticmethod
272
554
  def download_hierarchy_cmd(
273
555
  ctx: typer.Context,
@@ -1,10 +1,14 @@
1
1
  import sys
2
2
  from abc import ABC, abstractmethod
3
- from typing import Any, Generic, TypeVar
3
+ from collections import UserList
4
+ from typing import TYPE_CHECKING, Any, Generic, TypeVar
4
5
 
5
6
  from pydantic import BaseModel, ConfigDict
6
7
  from pydantic.alias_generators import to_camel
7
8
 
9
+ if TYPE_CHECKING:
10
+ from cognite.client import CogniteClient
11
+
8
12
  if sys.version_info >= (3, 11):
9
13
  from typing import Self
10
14
  else:
@@ -61,3 +65,23 @@ class Identifier(BaseModel):
61
65
 
62
66
  def as_id(self) -> Self:
63
67
  return self
68
+
69
+
70
+ T_Resource = TypeVar("T_Resource", bound=RequestResource | ResponseResource)
71
+
72
+
73
+ class BaseResourceList(UserList[T_Resource]):
74
+ """Base class for resource lists."""
75
+
76
+ _RESOURCE: type[T_Resource]
77
+
78
+ def __init__(self, initlist: list[T_Resource] | None = None, **_: Any) -> None:
79
+ super().__init__(initlist or [])
80
+
81
+ def dump(self, camel_case: bool = True) -> list[dict[str, Any]]:
82
+ return [item.dump(camel_case) for item in self.data]
83
+
84
+ @classmethod
85
+ def load(cls, data: list[dict[str, Any]], cognite_client: "CogniteClient | None" = None) -> Self:
86
+ items = [cls._RESOURCE.model_validate(item) for item in data]
87
+ return cls(items) # type: ignore[arg-type]
@@ -1,15 +1,16 @@
1
1
  import sys
2
- from collections import UserList
3
2
  from typing import Any, ClassVar, Literal
4
3
 
5
- from cognite.client import CogniteClient
6
4
  from pydantic import JsonValue, field_validator
7
5
  from pydantic_core.core_schema import ValidationInfo
8
6
 
9
- from cognite_toolkit._cdf_tk.protocols import ResourceRequestListProtocol, ResourceResponseListProtocol
7
+ from cognite_toolkit._cdf_tk.protocols import (
8
+ ResourceRequestListProtocol,
9
+ ResourceResponseListProtocol,
10
+ )
10
11
  from cognite_toolkit._cdf_tk.utils.text import sanitize_instance_external_id
11
12
 
12
- from .base import ResponseResource
13
+ from .base import BaseResourceList, ResponseResource
13
14
  from .instance_api import InstanceRequestResource, ViewReference
14
15
 
15
16
  if sys.version_info >= (3, 11):
@@ -74,29 +75,13 @@ class InfieldLocationConfig(
74
75
 
75
76
 
76
77
  class InfieldLocationConfigList(
77
- UserList[InfieldLocationConfig],
78
+ BaseResourceList[InfieldLocationConfig],
78
79
  ResourceResponseListProtocol,
79
80
  ResourceRequestListProtocol,
80
81
  ):
81
82
  """A list of InfieldLocationConfig objects."""
82
83
 
83
84
  _RESOURCE = InfieldLocationConfig
84
- data: list[InfieldLocationConfig]
85
-
86
- def __init__(self, initlist: list[InfieldLocationConfig] | None = None, **_: Any) -> None:
87
- super().__init__(initlist or [])
88
-
89
- def dump(self, camel_case: bool = True) -> list[dict[str, Any]]:
90
- """Serialize the list of InfieldLocationConfig objects to a list of dictionaries."""
91
- return [item.dump(camel_case) for item in self.data]
92
-
93
- @classmethod
94
- def load(
95
- cls, data: list[dict[str, Any]], cognite_client: CogniteClient | None = None
96
- ) -> "InfieldLocationConfigList":
97
- """Deserialize a list of dictionaries to an InfieldLocationConfigList."""
98
- items = [InfieldLocationConfig.model_validate(item) for item in data]
99
- return cls(items)
100
85
 
101
86
  def as_write(self) -> Self:
102
87
  return self
@@ -1,18 +1,12 @@
1
- import sys
2
- from collections import UserList
3
- from typing import Any, Literal
4
-
5
- from cognite.client import CogniteClient
1
+ from typing import Literal
6
2
 
7
3
  from cognite_toolkit._cdf_tk.constants import StreamTemplateName
8
- from cognite_toolkit._cdf_tk.protocols import ResourceRequestListProtocol, ResourceResponseListProtocol
9
-
10
- from .base import BaseModelObject, RequestResource, ResponseResource
4
+ from cognite_toolkit._cdf_tk.protocols import (
5
+ ResourceRequestListProtocol,
6
+ ResourceResponseListProtocol,
7
+ )
11
8
 
12
- if sys.version_info >= (3, 11):
13
- from typing import Self
14
- else:
15
- from typing_extensions import Self
9
+ from .base import BaseModelObject, BaseResourceList, RequestResource, ResponseResource
16
10
 
17
11
 
18
12
  class StreamRequest(RequestResource):
@@ -25,22 +19,11 @@ class StreamRequest(RequestResource):
25
19
  return self.external_id
26
20
 
27
21
 
28
- class StreamRequestList(UserList[StreamRequest], ResourceRequestListProtocol):
22
+ class StreamRequestList(BaseResourceList[StreamRequest], ResourceRequestListProtocol):
29
23
  """List of Stream request resources."""
30
24
 
31
25
  _RESOURCE = StreamRequest
32
26
 
33
- def __init__(self, initlist: list[StreamRequest] | None = None, **_: Any) -> None:
34
- super().__init__(initlist or [])
35
-
36
- def dump(self, camel_case: bool = True) -> list[dict[str, Any]]:
37
- return [item.dump(camel_case) for item in self.data]
38
-
39
- @classmethod
40
- def load(cls, data: list[dict[str, Any]], cognite_client: CogniteClient | None = None) -> "StreamRequestList":
41
- items = [StreamRequest.model_validate(item) for item in data]
42
- return cls(items)
43
-
44
27
 
45
28
  class LifecycleObject(BaseModelObject):
46
29
  """Lifecycle object."""
@@ -98,22 +81,10 @@ class StreamResponse(ResponseResource["StreamRequest"]):
98
81
  )
99
82
 
100
83
 
101
- class StreamResponseList(UserList[StreamResponse], ResourceResponseListProtocol):
84
+ class StreamResponseList(BaseResourceList[StreamResponse], ResourceResponseListProtocol):
102
85
  """List of Stream response resources."""
103
86
 
104
87
  _RESOURCE = StreamResponse
105
- data: list[StreamResponse]
106
-
107
- def __init__(self, initlist: list[StreamResponse] | None = None, **_: Any) -> None:
108
- super().__init__(initlist or [])
109
-
110
- def dump(self, camel_case: bool = True) -> list[dict[str, Any]]:
111
- return [item.dump(camel_case) for item in self.data]
112
-
113
- @classmethod
114
- def load(cls, data: list[dict[str, Any]], cognite_client: CogniteClient | None = None) -> "StreamResponseList":
115
- items = [StreamResponse.model_validate(item) for item in data]
116
- return cls(items)
117
88
 
118
- def as_write(self) -> Self:
119
- return self
89
+ def as_write(self) -> StreamRequestList:
90
+ return StreamRequestList([item.as_write() for item in self.data])
@@ -11,13 +11,12 @@ from pydantic import ValidationError
11
11
  from rich.console import Console
12
12
 
13
13
  from cognite_toolkit._cdf_tk.client import ToolkitClient
14
- from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_STEM, DATA_RESOURCE_DIR
14
+ from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX, DATA_RESOURCE_DIR
15
15
  from cognite_toolkit._cdf_tk.cruds import ViewCRUD
16
16
  from cognite_toolkit._cdf_tk.exceptions import ToolkitValueError
17
17
  from cognite_toolkit._cdf_tk.storageio import (
18
18
  T_Selector,
19
19
  UploadableStorageIO,
20
- are_same_kind,
21
20
  get_upload_io,
22
21
  )
23
22
  from cognite_toolkit._cdf_tk.storageio._base import T_WriteCogniteResource, TableUploadableStorageIO, UploadItem
@@ -88,7 +87,7 @@ class UploadCommand(ToolkitCommand):
88
87
  └── ...
89
88
  """
90
89
  console = client.console
91
- data_files_by_selector = self._find_data_files(input_dir, kind)
90
+ data_files_by_selector = self._find_data_files(input_dir)
92
91
 
93
92
  self._deploy_resource_folder(input_dir / DATA_RESOURCE_DIR, deploy_resources, client, console, dry_run, verbose)
94
93
 
@@ -138,38 +137,26 @@ class UploadCommand(ToolkitCommand):
138
137
  def _find_data_files(
139
138
  self,
140
139
  input_dir: Path,
141
- kind: str | None = None,
142
140
  ) -> dict[Selector, list[Path]]:
143
141
  """Finds data files and their corresponding metadata files in the input directory."""
144
- manifest_file_endswith = f".{DATA_MANIFEST_STEM}.yaml"
145
142
  data_files_by_metadata: dict[Selector, list[Path]] = {}
146
- for metadata_file in input_dir.glob(f"*{manifest_file_endswith}"):
147
- data_file_prefix = metadata_file.name.removesuffix(manifest_file_endswith)
148
- data_files = [
149
- file
150
- for file in input_dir.glob(f"{data_file_prefix}*")
151
- if not file.name.endswith(manifest_file_endswith)
152
- ]
153
- if kind is not None and data_files:
154
- data_files = [data_file for data_file in data_files if are_same_kind(kind, data_file)]
155
- if not data_files:
156
- continue
157
- if not data_files:
158
- self.warn(
159
- MediumSeverityWarning(
160
- f"Metadata file {metadata_file.as_posix()!r} has no corresponding data files, skipping.",
161
- )
162
- )
163
- continue
164
-
165
- selector_dict = read_yaml_file(metadata_file, expected_output="dict")
143
+ for manifest_file in input_dir.glob(f"*{DATA_MANIFEST_SUFFIX}"):
144
+ selector_dict = read_yaml_file(manifest_file, expected_output="dict")
166
145
  try:
167
146
  selector = SelectorAdapter.validate_python(selector_dict)
168
147
  except ValidationError as e:
169
148
  errors = humanize_validation_error(e)
170
149
  self.warn(
171
150
  ResourceFormatWarning(
172
- metadata_file, tuple(errors), text="Invalid selector in metadata file, skipping."
151
+ manifest_file, tuple(errors), text="Invalid selector in metadata file, skipping."
152
+ )
153
+ )
154
+ continue
155
+ data_files = selector.find_data_files(input_dir, manifest_file)
156
+ if not data_files:
157
+ self.warn(
158
+ MediumSeverityWarning(
159
+ f"Metadata file {manifest_file.as_posix()!r} has no corresponding data files, skipping.",
173
160
  )
174
161
  )
175
162
  continue
@@ -230,7 +217,7 @@ class UploadCommand(ToolkitCommand):
230
217
  raise ToolkitValueError(f"{selector.display_name} does not support {reader.format!r} files.")
231
218
  tracker = ProgressTracker[str]([self._UPLOAD])
232
219
  executor = ProducerWorkerExecutor[list[tuple[str, dict[str, JsonVal]]], Sequence[UploadItem]](
233
- download_iterable=io.read_chunks(reader),
220
+ download_iterable=io.read_chunks(reader, selector),
234
221
  process=partial(io.rows_to_data, selector=selector)
235
222
  if reader.is_table and isinstance(io, TableUploadableStorageIO)
236
223
  else io.json_chunk_to_data,
@@ -275,7 +262,7 @@ class UploadCommand(ToolkitCommand):
275
262
  self, selector: Selector, data_file: Path, client: ToolkitClient
276
263
  ) -> UploadableStorageIO | None:
277
264
  try:
278
- io_cls = get_upload_io(type(selector), kind=data_file)
265
+ io_cls = get_upload_io(type(selector))
279
266
  except ValueError as e:
280
267
  self.warn(HighSeverityWarning(f"Could not find StorageIO for selector {selector}: {e}"))
281
268
  return None
@@ -176,6 +176,7 @@ READONLY_CONTAINER_PROPERTIES = {
176
176
  DATA_DEFAULT_DIR = "data"
177
177
  DATA_RESOURCE_DIR = "resources"
178
178
  DATA_MANIFEST_STEM = "Manifest"
179
+ DATA_MANIFEST_SUFFIX = f".{DATA_MANIFEST_STEM}.yaml"
179
180
 
180
181
  # Migration Constants
181
182
  MISSING_INSTANCE_SPACE = "<InstanceSpaceMissing>"