cognite-toolkit 0.6.111__py3-none-any.whl → 0.6.113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. cognite_toolkit/_cdf_tk/apps/_download_app.py +307 -25
  2. cognite_toolkit/_cdf_tk/client/data_classes/base.py +25 -1
  3. cognite_toolkit/_cdf_tk/client/data_classes/infield.py +6 -21
  4. cognite_toolkit/_cdf_tk/client/data_classes/streams.py +10 -39
  5. cognite_toolkit/_cdf_tk/commands/_upload.py +15 -28
  6. cognite_toolkit/_cdf_tk/commands/init.py +16 -12
  7. cognite_toolkit/_cdf_tk/commands/modules.py +1 -0
  8. cognite_toolkit/_cdf_tk/constants.py +1 -0
  9. cognite_toolkit/_cdf_tk/feature_flags.py +4 -0
  10. cognite_toolkit/_cdf_tk/storageio/__init__.py +4 -16
  11. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +4 -23
  12. cognite_toolkit/_cdf_tk/storageio/_base.py +3 -1
  13. cognite_toolkit/_cdf_tk/storageio/_datapoints.py +3 -1
  14. cognite_toolkit/_cdf_tk/storageio/_file_content.py +149 -0
  15. cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +13 -1
  16. cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
  17. cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +95 -0
  18. cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +11 -3
  19. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +19 -1
  20. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  21. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  22. cognite_toolkit/_resources/cdf.toml +1 -1
  23. cognite_toolkit/_version.py +1 -1
  24. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.113.dist-info}/METADATA +1 -1
  25. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.113.dist-info}/RECORD +28 -26
  26. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.113.dist-info}/WHEEL +0 -0
  27. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.113.dist-info}/entry_points.txt +0 -0
  28. {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.113.dist-info}/licenses/LICENSE +0 -0
@@ -19,7 +19,7 @@ from cognite_toolkit._cdf_tk.commands.collect import CollectCommand
19
19
  from cognite_toolkit._cdf_tk.commands.modules import ModulesCommand
20
20
  from cognite_toolkit._cdf_tk.commands.repo import RepoCommand
21
21
  from cognite_toolkit._cdf_tk.exceptions import ToolkitError
22
- from cognite_toolkit._cdf_tk.feature_flags import Flags
22
+ from cognite_toolkit._cdf_tk.feature_flags import FeatureFlag, Flags
23
23
 
24
24
 
25
25
  class InitItemStatus(Enum):
@@ -32,8 +32,6 @@ class InitItemStatus(Enum):
32
32
 
33
33
  @dataclass
34
34
  class InitChecklistItem:
35
- """Represents an item in the init checklist"""
36
-
37
35
  name: str
38
36
  description: str
39
37
  function: Callable[[], None]
@@ -41,7 +39,6 @@ class InitChecklistItem:
41
39
  mandatory: bool = False
42
40
 
43
41
  def get_status_display(self) -> str:
44
- """Get a display string for the status"""
45
42
  if self.status == InitItemStatus.SUCCESSFUL:
46
43
  return "✓"
47
44
  elif self.status == InitItemStatus.FAILED:
@@ -50,12 +47,17 @@ class InitChecklistItem:
50
47
  return "○"
51
48
 
52
49
  def get_choice_title(self) -> str:
53
- """Get the title for the questionary choice"""
54
50
  status_icon = self.get_status_display()
55
51
  return f"{status_icon} {self.description} (required)" if self.mandatory else f"{status_icon} {self.description}"
56
52
 
57
53
 
58
54
  class InitCommand(ToolkitCommand):
55
+ organization_dir: Path | None
56
+
57
+ def __init__(self, print_warning: bool = True, skip_tracking: bool = False, silent: bool = False) -> None:
58
+ super().__init__(print_warning, skip_tracking, silent)
59
+ self.organization_dir = None
60
+
59
61
  def execute(self, dry_run: bool = False, emulate_dot_seven: bool = False) -> None:
60
62
  if not Flags.v07.is_enabled() and not emulate_dot_seven:
61
63
  print("This command is deprecated. Use 'cdf modules init' instead.")
@@ -140,7 +142,6 @@ class InitCommand(ToolkitCommand):
140
142
  if selected == "__exit__":
141
143
  if all_mandatory_complete:
142
144
  print("Setup complete!")
143
- print("You can now start using the Cognite Toolkit.")
144
145
  break
145
146
  else:
146
147
  incomplete_mandatory = [
@@ -168,7 +169,6 @@ class InitCommand(ToolkitCommand):
168
169
  if not confirm:
169
170
  continue
170
171
 
171
- # Run the function
172
172
  try:
173
173
  selected_item.function()
174
174
  selected_item.status = InitItemStatus.SUCCESSFUL
@@ -188,12 +188,14 @@ class InitCommand(ToolkitCommand):
188
188
  print(f"Unexpected error occurred. Full traceback:\n{traceback.format_exc()}")
189
189
 
190
190
  def _init_toml(self, dry_run: bool = False) -> None:
191
- organization_dir = ModulesCommand._prompt_organization_dir()
191
+ if self.organization_dir is None:
192
+ self.organization_dir = ModulesCommand._prompt_organization_dir()
192
193
  if dry_run:
193
194
  print("Would initialize cdf.toml configuration file")
194
195
  return
195
- CDFToml.write(organization_dir, "dev")
196
- print(f"cdf.toml configuration file initialized in {organization_dir}")
196
+ CDFToml.write(self.organization_dir, "dev")
197
+ FeatureFlag.flush()
198
+ print(f"cdf.toml configuration file initialized in {self.organization_dir}")
197
199
 
198
200
  def _init_auth(self, dry_run: bool = False) -> None:
199
201
  auth_command = AuthCommand()
@@ -201,12 +203,14 @@ class InitCommand(ToolkitCommand):
201
203
 
202
204
  def _init_modules(self, dry_run: bool = False) -> None:
203
205
  with ModulesCommand() as modules_command:
206
+ if self.organization_dir is None:
207
+ self.organization_dir = ModulesCommand._prompt_organization_dir()
204
208
  if dry_run:
205
209
  organization_dir = Path(tempfile.mkdtemp(prefix="init_modules_", suffix=".tmp", dir=Path.cwd()))
206
210
  modules_command.run(lambda: modules_command.init(organization_dir=organization_dir))
207
211
  shutil.rmtree(organization_dir)
208
212
  else:
209
- modules_command.run(lambda: modules_command.init())
213
+ modules_command.run(lambda: modules_command.init(organization_dir=self.organization_dir))
210
214
 
211
215
  def _init_repo(self, dry_run: bool = False) -> None:
212
216
  repo_command = RepoCommand()
@@ -216,7 +220,7 @@ class InitCommand(ToolkitCommand):
216
220
  """Opt in to collect usage statistics"""
217
221
 
218
222
  opt_in = questionary.confirm(
219
- "Do you want to opt in to collect usage statistics?",
223
+ "Do you want to opt in to collect usage statistics? This will help us improve the Toolkit.",
220
224
  default=True,
221
225
  ).ask()
222
226
  if dry_run:
@@ -756,6 +756,7 @@ class ModulesCommand(ToolkitCommand):
756
756
  """
757
757
 
758
758
  cdf_toml = CDFToml.load()
759
+
759
760
  if (Flags.EXTERNAL_LIBRARIES.is_enabled() or user_library) and self._module_source_dir is None:
760
761
  libraries = {"userdefined": user_library} if user_library else cdf_toml.libraries
761
762
 
@@ -176,6 +176,7 @@ READONLY_CONTAINER_PROPERTIES = {
176
176
  DATA_DEFAULT_DIR = "data"
177
177
  DATA_RESOURCE_DIR = "resources"
178
178
  DATA_MANIFEST_STEM = "Manifest"
179
+ DATA_MANIFEST_SUFFIX = f".{DATA_MANIFEST_STEM}.yaml"
179
180
 
180
181
  # Migration Constants
181
182
  MISSING_INSTANCE_SPACE = "<InstanceSpaceMissing>"
@@ -99,3 +99,7 @@ class FeatureFlag:
99
99
  @lru_cache(typed=True)
100
100
  def is_enabled(flag: Flags) -> bool:
101
101
  return CDFToml.load().alpha_flags.get(clean_name(flag.name), False)
102
+
103
+ @staticmethod
104
+ def flush() -> None:
105
+ FeatureFlag.is_enabled.cache_clear()
@@ -1,7 +1,4 @@
1
- from pathlib import Path
2
-
3
1
  from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
4
- from cognite_toolkit._cdf_tk.utils.fileio import COMPRESSION_BY_SUFFIX
5
2
 
6
3
  from ._annotations import AnnotationIO
7
4
  from ._applications import CanvasIO, ChartIO
@@ -25,6 +22,7 @@ from ._base import (
25
22
  )
26
23
  from ._data_classes import InstanceIdCSVList, InstanceIdRow, ModelList
27
24
  from ._datapoints import DatapointsIO
25
+ from ._file_content import FileContentIO
28
26
  from ._instances import InstanceIO
29
27
  from ._raw import RawIO
30
28
  from .selectors._base import DataSelector
@@ -34,24 +32,14 @@ STORAGE_IO_CLASSES = get_concrete_subclasses(StorageIO) # type: ignore[type-abs
34
32
  UPLOAD_IO_CLASSES = get_concrete_subclasses(UploadableStorageIO) # type: ignore[type-abstract]
35
33
 
36
34
 
37
- def get_upload_io(selector_cls: type[DataSelector], kind: str | Path) -> type[UploadableStorageIO]:
35
+ def get_upload_io(selector_cls: type[DataSelector]) -> type[UploadableStorageIO]:
38
36
  """Get the appropriate UploadableStorageIO class based on the type of the provided selector."""
39
37
  for cls in UPLOAD_IO_CLASSES:
40
- if issubclass(selector_cls, cls.BASE_SELECTOR) and are_same_kind(cls.KIND, kind):
38
+ if issubclass(selector_cls, cls.BASE_SELECTOR):
41
39
  return cls
42
40
  raise ValueError(f"No UploadableStorageIO found for selector of type {selector_cls.__name__}")
43
41
 
44
42
 
45
- def are_same_kind(kind: str, kind_or_path: str | Path, /) -> bool:
46
- """Check if two kinds are the same, ignoring case and compression suffixes."""
47
- if not isinstance(kind_or_path, Path):
48
- return kind.casefold() == kind_or_path.casefold()
49
- stem = kind_or_path.stem
50
- if kind_or_path.suffix in COMPRESSION_BY_SUFFIX:
51
- stem = Path(stem).stem
52
- return stem.lower().endswith(kind.casefold())
53
-
54
-
55
43
  __all__ = [
56
44
  "AnnotationIO",
57
45
  "AssetIO",
@@ -61,6 +49,7 @@ __all__ = [
61
49
  "ConfigurableStorageIO",
62
50
  "DatapointsIO",
63
51
  "EventIO",
52
+ "FileContentIO",
64
53
  "FileMetadataIO",
65
54
  "HierarchyIO",
66
55
  "InstanceIO",
@@ -76,6 +65,5 @@ __all__ = [
76
65
  "TimeSeriesIO",
77
66
  "UploadItem",
78
67
  "UploadableStorageIO",
79
- "are_same_kind",
80
68
  "get_upload_io",
81
69
  ]
@@ -51,15 +51,9 @@ from cognite_toolkit._cdf_tk.utils.cdf import metadata_key_counts
51
51
  from cognite_toolkit._cdf_tk.utils.fileio import FileReader, SchemaColumn
52
52
  from cognite_toolkit._cdf_tk.utils.fileio._readers import TableReader
53
53
  from cognite_toolkit._cdf_tk.utils.http_client import (
54
- FailedRequestItems,
55
- FailedRequestMessage,
56
- FailedResponse,
57
- FailedResponseItems,
58
54
  HTTPClient,
59
55
  HTTPMessage,
60
56
  SimpleBodyRequest,
61
- SuccessResponse,
62
- SuccessResponseItems,
63
57
  )
64
58
  from cognite_toolkit._cdf_tk.utils.useful_types import (
65
59
  T_ID,
@@ -347,7 +341,9 @@ class AssetIO(BaseAssetCentricIO[str, AssetWrite, Asset, AssetWriteList, AssetLi
347
341
  return self.client.assets.retrieve_multiple(ids)
348
342
 
349
343
  @classmethod
350
- def read_chunks(cls, reader: FileReader) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
344
+ def read_chunks(
345
+ cls, reader: FileReader, selector: AssetCentricSelector
346
+ ) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
351
347
  """Assets require special handling when reading data to ensure parent assets are created first."""
352
348
  current_depth = max_depth = 0
353
349
  data_name = "row" if isinstance(reader, TableReader) else "line"
@@ -460,22 +456,7 @@ class FileMetadataIO(BaseAssetCentricIO[str, FileMetadataWrite, FileMetadata, Fi
460
456
  body_content=item.dump(), # type: ignore[arg-type]
461
457
  )
462
458
  )
463
- # Convert the responses to per-item responses
464
- for message in responses:
465
- if isinstance(message, SuccessResponse):
466
- results.append(
467
- SuccessResponseItems(status_code=message.status_code, ids=[item.as_id()], body=message.body)
468
- )
469
- elif isinstance(message, FailedResponse):
470
- results.append(
471
- FailedResponseItems(
472
- status_code=message.status_code, ids=[item.as_id()], body=message.body, error=message.error
473
- )
474
- )
475
- elif isinstance(message, FailedRequestMessage):
476
- results.append(FailedRequestItems(ids=[item.as_id()], error=message.error))
477
- else:
478
- results.append(message)
459
+ results.extend(responses.as_item_responses(item.as_id()))
479
460
  return results
480
461
 
481
462
  def retrieve(self, ids: Sequence[int]) -> FileMetadataList:
@@ -216,7 +216,9 @@ class UploadableStorageIO(
216
216
  raise NotImplementedError()
217
217
 
218
218
  @classmethod
219
- def read_chunks(cls, reader: MultiFileReader) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
219
+ def read_chunks(
220
+ cls, reader: MultiFileReader, selector: T_Selector
221
+ ) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
220
222
  data_name = "row" if reader.is_table else "line"
221
223
  # Include name of line for better error messages
222
224
  iterable = ((f"{data_name} {line_no}", item) for line_no, item in reader.read_chunks_with_line_numbers())
@@ -164,7 +164,9 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
164
164
  )
165
165
 
166
166
  @classmethod
167
- def read_chunks(cls, reader: MultiFileReader) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
167
+ def read_chunks(
168
+ cls, reader: MultiFileReader, selector: DataPointsFileSelector
169
+ ) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
168
170
  if not reader.is_table:
169
171
  raise RuntimeError("DatapointsIO can only read from TableReader instances.")
170
172
  iterator = iter(reader.read_chunks_with_line_numbers())
@@ -0,0 +1,149 @@
1
+ import json
2
+ import mimetypes
3
+ from collections.abc import Iterable, MutableSequence, Sequence
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import cast
7
+
8
+ from cognite.client.data_classes import FileMetadata, FileMetadataWrite
9
+
10
+ from cognite_toolkit._cdf_tk.client import ToolkitClient
11
+ from cognite_toolkit._cdf_tk.cruds import FileMetadataCRUD
12
+ from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError
13
+ from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
14
+ from cognite_toolkit._cdf_tk.utils.fileio import MultiFileReader
15
+ from cognite_toolkit._cdf_tk.utils.http_client import (
16
+ DataBodyRequest,
17
+ ErrorDetails,
18
+ FailedResponseItems,
19
+ HTTPClient,
20
+ HTTPMessage,
21
+ SimpleBodyRequest,
22
+ )
23
+ from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
24
+
25
+ from ._base import Page, UploadableStorageIO, UploadItem
26
+ from .selectors import FileContentSelector, FileMetadataTemplateSelector
27
+ from .selectors._file_content import FILEPATH
28
+
29
+
30
+ @dataclass
31
+ class UploadFileContentItem(UploadItem[FileMetadataWrite]):
32
+ file_path: Path
33
+ mime_type: str
34
+
35
+
36
+ class FileContentIO(UploadableStorageIO[FileContentSelector, FileMetadata, FileMetadataWrite]):
37
+ SUPPORTED_DOWNLOAD_FORMATS = frozenset({".ndjson"})
38
+ SUPPORTED_COMPRESSIONS = frozenset({".gz"})
39
+ CHUNK_SIZE = 10
40
+ BASE_SELECTOR = FileContentSelector
41
+ KIND = "FileContent"
42
+ SUPPORTED_READ_FORMATS = frozenset({".ndjson"})
43
+ UPLOAD_ENDPOINT = "/files"
44
+
45
+ def __init__(self, client: ToolkitClient) -> None:
46
+ super().__init__(client)
47
+ self._crud = FileMetadataCRUD(client, None, None)
48
+
49
+ def as_id(self, item: FileMetadata) -> str:
50
+ return item.external_id or str(item.id)
51
+
52
+ def stream_data(self, selector: FileContentSelector, limit: int | None = None) -> Iterable[Page]:
53
+ raise NotImplementedError("Download of FileContent is not yet supported")
54
+
55
+ def count(self, selector: FileContentSelector) -> int | None:
56
+ return None
57
+
58
+ def data_to_json_chunk(
59
+ self, data_chunk: Sequence[FileMetadata], selector: FileContentSelector | None = None
60
+ ) -> list[dict[str, JsonVal]]:
61
+ raise NotImplementedError("Download of FileContent is not yet supported")
62
+
63
+ def json_chunk_to_data(self, data_chunk: list[tuple[str, dict[str, JsonVal]]]) -> Sequence[UploadFileContentItem]:
64
+ """Convert a JSON-compatible chunk of data back to a writable Cognite resource list.
65
+
66
+ Args:
67
+ data_chunk: A list of tuples, each containing a source ID and a dictionary representing
68
+ the data in a JSON-compatible format.
69
+ Returns:
70
+ A writable Cognite resource list representing the data.
71
+ """
72
+ result: list[UploadFileContentItem] = []
73
+ for source_id, item_json in data_chunk:
74
+ item = self.json_to_resource(item_json)
75
+ filepath = cast(Path, item_json[FILEPATH])
76
+ mime_type, _ = mimetypes.guess_type(filepath)
77
+ # application/octet-stream is the standard fallback for binary data when the type is unknown. (at least Claude thinks so)
78
+ result.append(
79
+ UploadFileContentItem(
80
+ source_id=source_id,
81
+ item=item,
82
+ file_path=filepath,
83
+ mime_type=mime_type or "application/octet-stream",
84
+ )
85
+ )
86
+ return result
87
+
88
+ def json_to_resource(self, item_json: dict[str, JsonVal]) -> FileMetadataWrite:
89
+ return self._crud.load_resource(item_json)
90
+
91
+ def upload_items(
92
+ self,
93
+ data_chunk: Sequence[UploadItem[FileMetadataWrite]],
94
+ http_client: HTTPClient,
95
+ selector: FileContentSelector | None = None,
96
+ ) -> Sequence[HTTPMessage]:
97
+ if not isinstance(selector, FileMetadataTemplateSelector):
98
+ raise ToolkitNotImplementedError("Only uploading of file metadata is currently supported.")
99
+ config = http_client.config
100
+ results: MutableSequence[HTTPMessage] = []
101
+ for item in cast(Sequence[UploadFileContentItem], data_chunk):
102
+ responses = http_client.request_with_retries(
103
+ message=SimpleBodyRequest(
104
+ endpoint_url=config.create_api_url(self.UPLOAD_ENDPOINT),
105
+ method="POST",
106
+ # MyPy does not understand that .dump is valid json
107
+ body_content=item.dump(), # type: ignore[arg-type]
108
+ )
109
+ )
110
+ try:
111
+ body = responses.get_first_body()
112
+ except ValueError:
113
+ results.extend(responses.as_item_responses(item.as_id()))
114
+ continue
115
+ try:
116
+ upload_url = cast(str, body["uploadUrl"])
117
+ except (KeyError, IndexError):
118
+ results.append(
119
+ FailedResponseItems(
120
+ status_code=200,
121
+ body=json.dumps(body),
122
+ error=ErrorDetails(code=200, message="Malformed response"),
123
+ ids=[item.as_id()],
124
+ )
125
+ )
126
+ continue
127
+
128
+ upload_response = http_client.request_with_retries(
129
+ message=DataBodyRequest(
130
+ endpoint_url=upload_url,
131
+ method="PUT",
132
+ content_type=item.mime_type,
133
+ data_content=item.file_path.read_bytes(),
134
+ )
135
+ )
136
+ results.extend(upload_response.as_item_responses(item.as_id()))
137
+ return results
138
+
139
+ @classmethod
140
+ def read_chunks(
141
+ cls, reader: MultiFileReader, selector: FileContentSelector
142
+ ) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
143
+ for chunk in chunker_sequence(reader.input_files, cls.CHUNK_SIZE):
144
+ batch: list[tuple[str, dict[str, JsonVal]]] = []
145
+ for file_path in chunk:
146
+ metadata = selector.create_instance(file_path)
147
+ metadata[FILEPATH] = file_path
148
+ batch.append((str(file_path), metadata))
149
+ yield batch
@@ -13,6 +13,12 @@ from ._datapoints import (
13
13
  InternalIdColumn,
14
14
  TimeSeriesColumn,
15
15
  )
16
+ from ._file_content import (
17
+ FileContentSelector,
18
+ FileDataModelingTemplateSelector,
19
+ FileMetadataTemplate,
20
+ FileMetadataTemplateSelector,
21
+ )
16
22
  from ._instances import (
17
23
  InstanceFileSelector,
18
24
  InstanceSelector,
@@ -33,7 +39,9 @@ Selector = Annotated[
33
39
  | AssetCentricFileSelector
34
40
  | DataSetSelector
35
41
  | DataPointsFileSelector
36
- | ChartExternalIdSelector,
42
+ | ChartExternalIdSelector
43
+ | FileMetadataTemplateSelector
44
+ | FileDataModelingTemplateSelector,
37
45
  Field(discriminator="type"),
38
46
  ]
39
47
 
@@ -53,6 +61,10 @@ __all__ = [
53
61
  "DataSelector",
54
62
  "DataSetSelector",
55
63
  "ExternalIdColumn",
64
+ "FileContentSelector",
65
+ "FileDataModelingTemplateSelector",
66
+ "FileMetadataTemplate",
67
+ "FileMetadataTemplateSelector",
56
68
  "InstanceColumn",
57
69
  "InstanceFileSelector",
58
70
  "InstanceSelector",
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
  from pydantic import BaseModel, ConfigDict
5
5
  from pydantic.alias_generators import to_camel
6
6
 
7
- from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_STEM
7
+ from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX
8
8
  from cognite_toolkit._cdf_tk.utils.file import safe_write, sanitize_filename, yaml_safe_dump
9
9
  from cognite_toolkit._cdf_tk.utils.text import to_sentence_case
10
10
  from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
@@ -41,7 +41,7 @@ class DataSelector(SelectorObject, ABC):
41
41
  directory: The directory where the YAML file will be saved.
42
42
  """
43
43
 
44
- filepath = directory / f"{sanitize_filename(str(self))}.{DATA_MANIFEST_STEM}.yaml"
44
+ filepath = directory / f"{sanitize_filename(str(self))}{DATA_MANIFEST_SUFFIX}"
45
45
  filepath.parent.mkdir(parents=True, exist_ok=True)
46
46
  safe_write(file=filepath, content=yaml_safe_dump(self.model_dump(mode="json", by_alias=True)), encoding="utf-8")
47
47
  return filepath
@@ -66,3 +66,15 @@ class DataSelector(SelectorObject, ABC):
66
66
  def __str__(self) -> str:
67
67
  # We want to force subclasses to implement __str__
68
68
  raise NotImplementedError()
69
+
70
+ def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
71
+ """Find data files in the specified input directory that match this selector.
72
+
73
+ Args:
74
+ input_dir: The directory to search for data files.
75
+ manifest_file: The manifest file that describes the data files.
76
+ Returns:
77
+ A list of Paths to the data files that match this selector.
78
+ """
79
+ data_file_prefix = manifest_file.name.removesuffix(DATA_MANIFEST_SUFFIX)
80
+ return [file for file in input_dir.glob(f"{data_file_prefix}*") if not file.name.endswith(DATA_MANIFEST_SUFFIX)]
@@ -0,0 +1,95 @@
1
+ import json
2
+ from abc import ABC, abstractmethod
3
+ from pathlib import Path
4
+ from typing import Any, Literal
5
+
6
+ from pydantic import ConfigDict, field_validator
7
+
8
+ from ._base import DataSelector, SelectorObject
9
+ from ._instances import SelectedView
10
+
11
+ FILENAME_VARIABLE = "$FILENAME"
12
+ FILEPATH = "$FILEPATH"
13
+
14
+
15
+ class FileContentSelector(DataSelector, ABC):
16
+ kind: Literal["FileContent"] = "FileContent"
17
+ file_directory: Path
18
+
19
+ def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
20
+ file_dir = input_dir / self.file_directory
21
+ if not file_dir.is_dir():
22
+ return []
23
+ return [file for file in file_dir.iterdir() if file.is_file()]
24
+
25
+ @abstractmethod
26
+ def create_instance(self, filepath: Path) -> dict[str, Any]: ...
27
+
28
+
29
+ class FileTemplate(SelectorObject):
30
+ model_config = ConfigDict(extra="allow")
31
+
32
+ def create_instance(self, filename: str) -> dict[str, Any]:
33
+ json_str = self.model_dump_json(by_alias=True)
34
+ return json.loads(json_str.replace(FILENAME_VARIABLE, filename))
35
+
36
+
37
+ class FileMetadataTemplate(FileTemplate):
38
+ name: str
39
+ external_id: str
40
+
41
+ @field_validator("name", "external_id")
42
+ @classmethod
43
+ def _validate_filename_in_fields(cls, v: str) -> str:
44
+ if FILENAME_VARIABLE not in v:
45
+ raise ValueError(
46
+ f"{FILENAME_VARIABLE!s} must be present in 'name' and 'external_id' fields. "
47
+ f"This allows for dynamic substitution based on the file name."
48
+ )
49
+ return v
50
+
51
+
52
+ class FileMetadataTemplateSelector(FileContentSelector):
53
+ type: Literal["fileMetadataTemplate"] = "fileMetadataTemplate"
54
+ template: FileMetadataTemplate
55
+
56
+ @property
57
+ def group(self) -> str:
58
+ return "FileMetadata"
59
+
60
+ def __str__(self) -> str:
61
+ return "metadata_template"
62
+
63
+ def create_instance(self, filepath: Path) -> dict[str, Any]:
64
+ return self.template.create_instance(filepath.name)
65
+
66
+
67
+ class FileDataModelingTemplate(FileTemplate):
68
+ space: str
69
+ external_id: str
70
+
71
+ @field_validator("external_id")
72
+ @classmethod
73
+ def _validate_filename_in_fields(cls, v: str) -> str:
74
+ if FILENAME_VARIABLE not in v:
75
+ raise ValueError(
76
+ f"{FILENAME_VARIABLE!s} must be present in 'external_id' field. "
77
+ f"This allows for dynamic substitution based on the file name."
78
+ )
79
+ return v
80
+
81
+
82
+ class FileDataModelingTemplateSelector(FileContentSelector):
83
+ type: Literal["fileDataModelingTemplate"] = "fileDataModelingTemplate"
84
+ view_id: SelectedView
85
+ template: FileDataModelingTemplate
86
+
87
+ @property
88
+ def group(self) -> str:
89
+ return "FileDataModeling"
90
+
91
+ def __str__(self) -> str:
92
+ return "data_modeling_template"
93
+
94
+ def create_instance(self, filepath: Path) -> dict[str, Any]:
95
+ return self.template.create_instance(filepath.name)
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
5
5
  from collections import Counter, defaultdict
6
6
  from collections.abc import Callable, Iterator, Mapping, Sequence
7
7
  from dataclasses import dataclass
8
- from functools import partial
8
+ from functools import cached_property, partial
9
9
  from io import TextIOWrapper
10
10
  from pathlib import Path
11
11
  from typing import Any
@@ -75,17 +75,25 @@ class MultiFileReader(FileReader):
75
75
  def __init__(self, input_files: Sequence[Path]) -> None:
76
76
  super().__init__(input_file=input_files[0])
77
77
  self.input_files = input_files
78
+
79
+ @cached_property
80
+ def reader_class(self) -> type[FileReader]:
81
+ """Determine the reader class based on the input files."""
78
82
  reader_classes = Counter([FileReader.from_filepath(input_file) for input_file in self.input_files])
79
83
  if len(reader_classes) > 1:
80
84
  raise ToolkitValueError(
81
85
  "All input files must be of the same format. "
82
86
  f"Found formats: {humanize_collection([cls.FORMAT for cls in reader_classes.keys()])}."
83
87
  )
84
- self.reader_class = reader_classes.most_common(1)[0][0]
88
+ return reader_classes.most_common(1)[0][0]
85
89
 
86
90
  @property
87
91
  def is_table(self) -> bool:
88
- return issubclass(self.reader_class, TableReader)
92
+ try:
93
+ return issubclass(self.reader_class, TableReader)
94
+ except ValueError:
95
+ # The input files are not a known format, so it is not a table.
96
+ return False
89
97
 
90
98
  @property
91
99
  def format(self) -> str:
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from collections import UserList
3
- from collections.abc import Sequence
3
+ from collections.abc import Hashable, Sequence
4
4
  from dataclasses import dataclass, field
5
5
  from typing import Generic, Literal, Protocol, TypeAlias, TypeVar
6
6
 
@@ -352,6 +352,24 @@ class ResponseList(UserList[ResponseMessage | FailedRequestMessage]):
352
352
  return _json.loads(resp.body)
353
353
  raise ValueError("No successful responses with a body found.")
354
354
 
355
+ def as_item_responses(self, item_id: Hashable) -> list[ResponseMessage | FailedRequestMessage]:
356
+ # Convert the responses to per-item responses
357
+ results: list[ResponseMessage | FailedRequestMessage] = []
358
+ for message in self.data:
359
+ if isinstance(message, SuccessResponse):
360
+ results.append(SuccessResponseItems(status_code=message.status_code, ids=[item_id], body=message.body))
361
+ elif isinstance(message, FailedResponse):
362
+ results.append(
363
+ FailedResponseItems(
364
+ status_code=message.status_code, ids=[item_id], body=message.body, error=message.error
365
+ )
366
+ )
367
+ elif isinstance(message, FailedRequestMessage):
368
+ results.append(FailedRequestItems(ids=[item_id], error=message.error))
369
+ else:
370
+ results.append(message)
371
+ return results
372
+
355
373
 
356
374
  def _dump_body(body: dict[str, JsonVal]) -> str:
357
375
  try:
@@ -12,7 +12,7 @@ jobs:
12
12
  environment: dev
13
13
  name: Deploy
14
14
  container:
15
- image: cognite/toolkit:0.6.111
15
+ image: cognite/toolkit:0.6.113
16
16
  env:
17
17
  CDF_CLUSTER: ${{ vars.CDF_CLUSTER }}
18
18
  CDF_PROJECT: ${{ vars.CDF_PROJECT }}
@@ -10,7 +10,7 @@ jobs:
10
10
  environment: dev
11
11
  name: Deploy Dry Run
12
12
  container:
13
- image: cognite/toolkit:0.6.111
13
+ image: cognite/toolkit:0.6.113
14
14
  env:
15
15
  CDF_CLUSTER: ${{ vars.CDF_CLUSTER }}
16
16
  CDF_PROJECT: ${{ vars.CDF_PROJECT }}
@@ -4,7 +4,7 @@ default_env = "<DEFAULT_ENV_PLACEHOLDER>"
4
4
  [modules]
5
5
  # This is the version of the modules. It should not be changed manually.
6
6
  # It will be updated by the 'cdf modules upgrade' command.
7
- version = "0.6.111"
7
+ version = "0.6.113"
8
8
 
9
9
  [alpha_flags]
10
10
  external-libraries = true