cognite-toolkit 0.6.111__py3-none-any.whl → 0.6.112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite_toolkit/_cdf_tk/apps/_download_app.py +307 -25
- cognite_toolkit/_cdf_tk/client/data_classes/base.py +25 -1
- cognite_toolkit/_cdf_tk/client/data_classes/infield.py +6 -21
- cognite_toolkit/_cdf_tk/client/data_classes/streams.py +10 -39
- cognite_toolkit/_cdf_tk/commands/_upload.py +15 -28
- cognite_toolkit/_cdf_tk/constants.py +1 -0
- cognite_toolkit/_cdf_tk/storageio/__init__.py +4 -16
- cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +4 -23
- cognite_toolkit/_cdf_tk/storageio/_base.py +3 -1
- cognite_toolkit/_cdf_tk/storageio/_datapoints.py +3 -1
- cognite_toolkit/_cdf_tk/storageio/_file_content.py +149 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +13 -1
- cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
- cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +95 -0
- cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +11 -3
- cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +19 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
- cognite_toolkit/_resources/cdf.toml +1 -1
- cognite_toolkit/_version.py +1 -1
- {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/METADATA +1 -1
- {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/RECORD +25 -23
- {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/WHEEL +0 -0
- {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/entry_points.txt +0 -0
- {cognite_toolkit-0.6.111.dist-info → cognite_toolkit-0.6.112.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
1
|
from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
|
|
4
|
-
from cognite_toolkit._cdf_tk.utils.fileio import COMPRESSION_BY_SUFFIX
|
|
5
2
|
|
|
6
3
|
from ._annotations import AnnotationIO
|
|
7
4
|
from ._applications import CanvasIO, ChartIO
|
|
@@ -25,6 +22,7 @@ from ._base import (
|
|
|
25
22
|
)
|
|
26
23
|
from ._data_classes import InstanceIdCSVList, InstanceIdRow, ModelList
|
|
27
24
|
from ._datapoints import DatapointsIO
|
|
25
|
+
from ._file_content import FileContentIO
|
|
28
26
|
from ._instances import InstanceIO
|
|
29
27
|
from ._raw import RawIO
|
|
30
28
|
from .selectors._base import DataSelector
|
|
@@ -34,24 +32,14 @@ STORAGE_IO_CLASSES = get_concrete_subclasses(StorageIO) # type: ignore[type-abs
|
|
|
34
32
|
UPLOAD_IO_CLASSES = get_concrete_subclasses(UploadableStorageIO) # type: ignore[type-abstract]
|
|
35
33
|
|
|
36
34
|
|
|
37
|
-
def get_upload_io(selector_cls: type[DataSelector]
|
|
35
|
+
def get_upload_io(selector_cls: type[DataSelector]) -> type[UploadableStorageIO]:
|
|
38
36
|
"""Get the appropriate UploadableStorageIO class based on the type of the provided selector."""
|
|
39
37
|
for cls in UPLOAD_IO_CLASSES:
|
|
40
|
-
if issubclass(selector_cls, cls.BASE_SELECTOR)
|
|
38
|
+
if issubclass(selector_cls, cls.BASE_SELECTOR):
|
|
41
39
|
return cls
|
|
42
40
|
raise ValueError(f"No UploadableStorageIO found for selector of type {selector_cls.__name__}")
|
|
43
41
|
|
|
44
42
|
|
|
45
|
-
def are_same_kind(kind: str, kind_or_path: str | Path, /) -> bool:
|
|
46
|
-
"""Check if two kinds are the same, ignoring case and compression suffixes."""
|
|
47
|
-
if not isinstance(kind_or_path, Path):
|
|
48
|
-
return kind.casefold() == kind_or_path.casefold()
|
|
49
|
-
stem = kind_or_path.stem
|
|
50
|
-
if kind_or_path.suffix in COMPRESSION_BY_SUFFIX:
|
|
51
|
-
stem = Path(stem).stem
|
|
52
|
-
return stem.lower().endswith(kind.casefold())
|
|
53
|
-
|
|
54
|
-
|
|
55
43
|
__all__ = [
|
|
56
44
|
"AnnotationIO",
|
|
57
45
|
"AssetIO",
|
|
@@ -61,6 +49,7 @@ __all__ = [
|
|
|
61
49
|
"ConfigurableStorageIO",
|
|
62
50
|
"DatapointsIO",
|
|
63
51
|
"EventIO",
|
|
52
|
+
"FileContentIO",
|
|
64
53
|
"FileMetadataIO",
|
|
65
54
|
"HierarchyIO",
|
|
66
55
|
"InstanceIO",
|
|
@@ -76,6 +65,5 @@ __all__ = [
|
|
|
76
65
|
"TimeSeriesIO",
|
|
77
66
|
"UploadItem",
|
|
78
67
|
"UploadableStorageIO",
|
|
79
|
-
"are_same_kind",
|
|
80
68
|
"get_upload_io",
|
|
81
69
|
]
|
|
@@ -51,15 +51,9 @@ from cognite_toolkit._cdf_tk.utils.cdf import metadata_key_counts
|
|
|
51
51
|
from cognite_toolkit._cdf_tk.utils.fileio import FileReader, SchemaColumn
|
|
52
52
|
from cognite_toolkit._cdf_tk.utils.fileio._readers import TableReader
|
|
53
53
|
from cognite_toolkit._cdf_tk.utils.http_client import (
|
|
54
|
-
FailedRequestItems,
|
|
55
|
-
FailedRequestMessage,
|
|
56
|
-
FailedResponse,
|
|
57
|
-
FailedResponseItems,
|
|
58
54
|
HTTPClient,
|
|
59
55
|
HTTPMessage,
|
|
60
56
|
SimpleBodyRequest,
|
|
61
|
-
SuccessResponse,
|
|
62
|
-
SuccessResponseItems,
|
|
63
57
|
)
|
|
64
58
|
from cognite_toolkit._cdf_tk.utils.useful_types import (
|
|
65
59
|
T_ID,
|
|
@@ -347,7 +341,9 @@ class AssetIO(BaseAssetCentricIO[str, AssetWrite, Asset, AssetWriteList, AssetLi
|
|
|
347
341
|
return self.client.assets.retrieve_multiple(ids)
|
|
348
342
|
|
|
349
343
|
@classmethod
|
|
350
|
-
def read_chunks(
|
|
344
|
+
def read_chunks(
|
|
345
|
+
cls, reader: FileReader, selector: AssetCentricSelector
|
|
346
|
+
) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
|
|
351
347
|
"""Assets require special handling when reading data to ensure parent assets are created first."""
|
|
352
348
|
current_depth = max_depth = 0
|
|
353
349
|
data_name = "row" if isinstance(reader, TableReader) else "line"
|
|
@@ -460,22 +456,7 @@ class FileMetadataIO(BaseAssetCentricIO[str, FileMetadataWrite, FileMetadata, Fi
|
|
|
460
456
|
body_content=item.dump(), # type: ignore[arg-type]
|
|
461
457
|
)
|
|
462
458
|
)
|
|
463
|
-
|
|
464
|
-
for message in responses:
|
|
465
|
-
if isinstance(message, SuccessResponse):
|
|
466
|
-
results.append(
|
|
467
|
-
SuccessResponseItems(status_code=message.status_code, ids=[item.as_id()], body=message.body)
|
|
468
|
-
)
|
|
469
|
-
elif isinstance(message, FailedResponse):
|
|
470
|
-
results.append(
|
|
471
|
-
FailedResponseItems(
|
|
472
|
-
status_code=message.status_code, ids=[item.as_id()], body=message.body, error=message.error
|
|
473
|
-
)
|
|
474
|
-
)
|
|
475
|
-
elif isinstance(message, FailedRequestMessage):
|
|
476
|
-
results.append(FailedRequestItems(ids=[item.as_id()], error=message.error))
|
|
477
|
-
else:
|
|
478
|
-
results.append(message)
|
|
459
|
+
results.extend(responses.as_item_responses(item.as_id()))
|
|
479
460
|
return results
|
|
480
461
|
|
|
481
462
|
def retrieve(self, ids: Sequence[int]) -> FileMetadataList:
|
|
@@ -216,7 +216,9 @@ class UploadableStorageIO(
|
|
|
216
216
|
raise NotImplementedError()
|
|
217
217
|
|
|
218
218
|
@classmethod
|
|
219
|
-
def read_chunks(
|
|
219
|
+
def read_chunks(
|
|
220
|
+
cls, reader: MultiFileReader, selector: T_Selector
|
|
221
|
+
) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
|
|
220
222
|
data_name = "row" if reader.is_table else "line"
|
|
221
223
|
# Include name of line for better error messages
|
|
222
224
|
iterable = ((f"{data_name} {line_no}", item) for line_no, item in reader.read_chunks_with_line_numbers())
|
|
@@ -164,7 +164,9 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
|
|
|
164
164
|
)
|
|
165
165
|
|
|
166
166
|
@classmethod
|
|
167
|
-
def read_chunks(
|
|
167
|
+
def read_chunks(
|
|
168
|
+
cls, reader: MultiFileReader, selector: DataPointsFileSelector
|
|
169
|
+
) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
|
|
168
170
|
if not reader.is_table:
|
|
169
171
|
raise RuntimeError("DatapointsIO can only read from TableReader instances.")
|
|
170
172
|
iterator = iter(reader.read_chunks_with_line_numbers())
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import mimetypes
|
|
3
|
+
from collections.abc import Iterable, MutableSequence, Sequence
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import cast
|
|
7
|
+
|
|
8
|
+
from cognite.client.data_classes import FileMetadata, FileMetadataWrite
|
|
9
|
+
|
|
10
|
+
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
11
|
+
from cognite_toolkit._cdf_tk.cruds import FileMetadataCRUD
|
|
12
|
+
from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError
|
|
13
|
+
from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
|
|
14
|
+
from cognite_toolkit._cdf_tk.utils.fileio import MultiFileReader
|
|
15
|
+
from cognite_toolkit._cdf_tk.utils.http_client import (
|
|
16
|
+
DataBodyRequest,
|
|
17
|
+
ErrorDetails,
|
|
18
|
+
FailedResponseItems,
|
|
19
|
+
HTTPClient,
|
|
20
|
+
HTTPMessage,
|
|
21
|
+
SimpleBodyRequest,
|
|
22
|
+
)
|
|
23
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
|
|
24
|
+
|
|
25
|
+
from ._base import Page, UploadableStorageIO, UploadItem
|
|
26
|
+
from .selectors import FileContentSelector, FileMetadataTemplateSelector
|
|
27
|
+
from .selectors._file_content import FILEPATH
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class UploadFileContentItem(UploadItem[FileMetadataWrite]):
|
|
32
|
+
file_path: Path
|
|
33
|
+
mime_type: str
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FileContentIO(UploadableStorageIO[FileContentSelector, FileMetadata, FileMetadataWrite]):
|
|
37
|
+
SUPPORTED_DOWNLOAD_FORMATS = frozenset({".ndjson"})
|
|
38
|
+
SUPPORTED_COMPRESSIONS = frozenset({".gz"})
|
|
39
|
+
CHUNK_SIZE = 10
|
|
40
|
+
BASE_SELECTOR = FileContentSelector
|
|
41
|
+
KIND = "FileContent"
|
|
42
|
+
SUPPORTED_READ_FORMATS = frozenset({".ndjson"})
|
|
43
|
+
UPLOAD_ENDPOINT = "/files"
|
|
44
|
+
|
|
45
|
+
def __init__(self, client: ToolkitClient) -> None:
|
|
46
|
+
super().__init__(client)
|
|
47
|
+
self._crud = FileMetadataCRUD(client, None, None)
|
|
48
|
+
|
|
49
|
+
def as_id(self, item: FileMetadata) -> str:
|
|
50
|
+
return item.external_id or str(item.id)
|
|
51
|
+
|
|
52
|
+
def stream_data(self, selector: FileContentSelector, limit: int | None = None) -> Iterable[Page]:
|
|
53
|
+
raise NotImplementedError("Download of FileContent is not yet supported")
|
|
54
|
+
|
|
55
|
+
def count(self, selector: FileContentSelector) -> int | None:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
def data_to_json_chunk(
|
|
59
|
+
self, data_chunk: Sequence[FileMetadata], selector: FileContentSelector | None = None
|
|
60
|
+
) -> list[dict[str, JsonVal]]:
|
|
61
|
+
raise NotImplementedError("Download of FileContent is not yet supported")
|
|
62
|
+
|
|
63
|
+
def json_chunk_to_data(self, data_chunk: list[tuple[str, dict[str, JsonVal]]]) -> Sequence[UploadFileContentItem]:
|
|
64
|
+
"""Convert a JSON-compatible chunk of data back to a writable Cognite resource list.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
data_chunk: A list of tuples, each containing a source ID and a dictionary representing
|
|
68
|
+
the data in a JSON-compatible format.
|
|
69
|
+
Returns:
|
|
70
|
+
A writable Cognite resource list representing the data.
|
|
71
|
+
"""
|
|
72
|
+
result: list[UploadFileContentItem] = []
|
|
73
|
+
for source_id, item_json in data_chunk:
|
|
74
|
+
item = self.json_to_resource(item_json)
|
|
75
|
+
filepath = cast(Path, item_json[FILEPATH])
|
|
76
|
+
mime_type, _ = mimetypes.guess_type(filepath)
|
|
77
|
+
# application/octet-stream is the standard fallback for binary data when the type is unknown. (at least Claude thinks so)
|
|
78
|
+
result.append(
|
|
79
|
+
UploadFileContentItem(
|
|
80
|
+
source_id=source_id,
|
|
81
|
+
item=item,
|
|
82
|
+
file_path=filepath,
|
|
83
|
+
mime_type=mime_type or "application/octet-stream",
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
return result
|
|
87
|
+
|
|
88
|
+
def json_to_resource(self, item_json: dict[str, JsonVal]) -> FileMetadataWrite:
|
|
89
|
+
return self._crud.load_resource(item_json)
|
|
90
|
+
|
|
91
|
+
def upload_items(
|
|
92
|
+
self,
|
|
93
|
+
data_chunk: Sequence[UploadItem[FileMetadataWrite]],
|
|
94
|
+
http_client: HTTPClient,
|
|
95
|
+
selector: FileContentSelector | None = None,
|
|
96
|
+
) -> Sequence[HTTPMessage]:
|
|
97
|
+
if not isinstance(selector, FileMetadataTemplateSelector):
|
|
98
|
+
raise ToolkitNotImplementedError("Only uploading of file metadata is currently supported.")
|
|
99
|
+
config = http_client.config
|
|
100
|
+
results: MutableSequence[HTTPMessage] = []
|
|
101
|
+
for item in cast(Sequence[UploadFileContentItem], data_chunk):
|
|
102
|
+
responses = http_client.request_with_retries(
|
|
103
|
+
message=SimpleBodyRequest(
|
|
104
|
+
endpoint_url=config.create_api_url(self.UPLOAD_ENDPOINT),
|
|
105
|
+
method="POST",
|
|
106
|
+
# MyPy does not understand that .dump is valid json
|
|
107
|
+
body_content=item.dump(), # type: ignore[arg-type]
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
try:
|
|
111
|
+
body = responses.get_first_body()
|
|
112
|
+
except ValueError:
|
|
113
|
+
results.extend(responses.as_item_responses(item.as_id()))
|
|
114
|
+
continue
|
|
115
|
+
try:
|
|
116
|
+
upload_url = cast(str, body["uploadUrl"])
|
|
117
|
+
except (KeyError, IndexError):
|
|
118
|
+
results.append(
|
|
119
|
+
FailedResponseItems(
|
|
120
|
+
status_code=200,
|
|
121
|
+
body=json.dumps(body),
|
|
122
|
+
error=ErrorDetails(code=200, message="Malformed response"),
|
|
123
|
+
ids=[item.as_id()],
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
upload_response = http_client.request_with_retries(
|
|
129
|
+
message=DataBodyRequest(
|
|
130
|
+
endpoint_url=upload_url,
|
|
131
|
+
method="PUT",
|
|
132
|
+
content_type=item.mime_type,
|
|
133
|
+
data_content=item.file_path.read_bytes(),
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
results.extend(upload_response.as_item_responses(item.as_id()))
|
|
137
|
+
return results
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def read_chunks(
|
|
141
|
+
cls, reader: MultiFileReader, selector: FileContentSelector
|
|
142
|
+
) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
|
|
143
|
+
for chunk in chunker_sequence(reader.input_files, cls.CHUNK_SIZE):
|
|
144
|
+
batch: list[tuple[str, dict[str, JsonVal]]] = []
|
|
145
|
+
for file_path in chunk:
|
|
146
|
+
metadata = selector.create_instance(file_path)
|
|
147
|
+
metadata[FILEPATH] = file_path
|
|
148
|
+
batch.append((str(file_path), metadata))
|
|
149
|
+
yield batch
|
|
@@ -13,6 +13,12 @@ from ._datapoints import (
|
|
|
13
13
|
InternalIdColumn,
|
|
14
14
|
TimeSeriesColumn,
|
|
15
15
|
)
|
|
16
|
+
from ._file_content import (
|
|
17
|
+
FileContentSelector,
|
|
18
|
+
FileDataModelingTemplateSelector,
|
|
19
|
+
FileMetadataTemplate,
|
|
20
|
+
FileMetadataTemplateSelector,
|
|
21
|
+
)
|
|
16
22
|
from ._instances import (
|
|
17
23
|
InstanceFileSelector,
|
|
18
24
|
InstanceSelector,
|
|
@@ -33,7 +39,9 @@ Selector = Annotated[
|
|
|
33
39
|
| AssetCentricFileSelector
|
|
34
40
|
| DataSetSelector
|
|
35
41
|
| DataPointsFileSelector
|
|
36
|
-
| ChartExternalIdSelector
|
|
42
|
+
| ChartExternalIdSelector
|
|
43
|
+
| FileMetadataTemplateSelector
|
|
44
|
+
| FileDataModelingTemplateSelector,
|
|
37
45
|
Field(discriminator="type"),
|
|
38
46
|
]
|
|
39
47
|
|
|
@@ -53,6 +61,10 @@ __all__ = [
|
|
|
53
61
|
"DataSelector",
|
|
54
62
|
"DataSetSelector",
|
|
55
63
|
"ExternalIdColumn",
|
|
64
|
+
"FileContentSelector",
|
|
65
|
+
"FileDataModelingTemplateSelector",
|
|
66
|
+
"FileMetadataTemplate",
|
|
67
|
+
"FileMetadataTemplateSelector",
|
|
56
68
|
"InstanceColumn",
|
|
57
69
|
"InstanceFileSelector",
|
|
58
70
|
"InstanceSelector",
|
|
@@ -4,7 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict
|
|
5
5
|
from pydantic.alias_generators import to_camel
|
|
6
6
|
|
|
7
|
-
from cognite_toolkit._cdf_tk.constants import
|
|
7
|
+
from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX
|
|
8
8
|
from cognite_toolkit._cdf_tk.utils.file import safe_write, sanitize_filename, yaml_safe_dump
|
|
9
9
|
from cognite_toolkit._cdf_tk.utils.text import to_sentence_case
|
|
10
10
|
from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
|
|
@@ -41,7 +41,7 @@ class DataSelector(SelectorObject, ABC):
|
|
|
41
41
|
directory: The directory where the YAML file will be saved.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
filepath = directory / f"{sanitize_filename(str(self))}
|
|
44
|
+
filepath = directory / f"{sanitize_filename(str(self))}{DATA_MANIFEST_SUFFIX}"
|
|
45
45
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
46
46
|
safe_write(file=filepath, content=yaml_safe_dump(self.model_dump(mode="json", by_alias=True)), encoding="utf-8")
|
|
47
47
|
return filepath
|
|
@@ -66,3 +66,15 @@ class DataSelector(SelectorObject, ABC):
|
|
|
66
66
|
def __str__(self) -> str:
|
|
67
67
|
# We want to force subclasses to implement __str__
|
|
68
68
|
raise NotImplementedError()
|
|
69
|
+
|
|
70
|
+
def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
|
|
71
|
+
"""Find data files in the specified input directory that match this selector.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
input_dir: The directory to search for data files.
|
|
75
|
+
manifest_file: The manifest file that describes the data files.
|
|
76
|
+
Returns:
|
|
77
|
+
A list of Paths to the data files that match this selector.
|
|
78
|
+
"""
|
|
79
|
+
data_file_prefix = manifest_file.name.removesuffix(DATA_MANIFEST_SUFFIX)
|
|
80
|
+
return [file for file in input_dir.glob(f"{data_file_prefix}*") if not file.name.endswith(DATA_MANIFEST_SUFFIX)]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import ConfigDict, field_validator
|
|
7
|
+
|
|
8
|
+
from ._base import DataSelector, SelectorObject
|
|
9
|
+
from ._instances import SelectedView
|
|
10
|
+
|
|
11
|
+
FILENAME_VARIABLE = "$FILENAME"
|
|
12
|
+
FILEPATH = "$FILEPATH"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FileContentSelector(DataSelector, ABC):
|
|
16
|
+
kind: Literal["FileContent"] = "FileContent"
|
|
17
|
+
file_directory: Path
|
|
18
|
+
|
|
19
|
+
def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
|
|
20
|
+
file_dir = input_dir / self.file_directory
|
|
21
|
+
if not file_dir.is_dir():
|
|
22
|
+
return []
|
|
23
|
+
return [file for file in file_dir.iterdir() if file.is_file()]
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def create_instance(self, filepath: Path) -> dict[str, Any]: ...
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FileTemplate(SelectorObject):
|
|
30
|
+
model_config = ConfigDict(extra="allow")
|
|
31
|
+
|
|
32
|
+
def create_instance(self, filename: str) -> dict[str, Any]:
|
|
33
|
+
json_str = self.model_dump_json(by_alias=True)
|
|
34
|
+
return json.loads(json_str.replace(FILENAME_VARIABLE, filename))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FileMetadataTemplate(FileTemplate):
|
|
38
|
+
name: str
|
|
39
|
+
external_id: str
|
|
40
|
+
|
|
41
|
+
@field_validator("name", "external_id")
|
|
42
|
+
@classmethod
|
|
43
|
+
def _validate_filename_in_fields(cls, v: str) -> str:
|
|
44
|
+
if FILENAME_VARIABLE not in v:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"{FILENAME_VARIABLE!s} must be present in 'name' and 'external_id' fields. "
|
|
47
|
+
f"This allows for dynamic substitution based on the file name."
|
|
48
|
+
)
|
|
49
|
+
return v
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class FileMetadataTemplateSelector(FileContentSelector):
|
|
53
|
+
type: Literal["fileMetadataTemplate"] = "fileMetadataTemplate"
|
|
54
|
+
template: FileMetadataTemplate
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def group(self) -> str:
|
|
58
|
+
return "FileMetadata"
|
|
59
|
+
|
|
60
|
+
def __str__(self) -> str:
|
|
61
|
+
return "metadata_template"
|
|
62
|
+
|
|
63
|
+
def create_instance(self, filepath: Path) -> dict[str, Any]:
|
|
64
|
+
return self.template.create_instance(filepath.name)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class FileDataModelingTemplate(FileTemplate):
|
|
68
|
+
space: str
|
|
69
|
+
external_id: str
|
|
70
|
+
|
|
71
|
+
@field_validator("external_id")
|
|
72
|
+
@classmethod
|
|
73
|
+
def _validate_filename_in_fields(cls, v: str) -> str:
|
|
74
|
+
if FILENAME_VARIABLE not in v:
|
|
75
|
+
raise ValueError(
|
|
76
|
+
f"{FILENAME_VARIABLE!s} must be present in 'external_id' field. "
|
|
77
|
+
f"This allows for dynamic substitution based on the file name."
|
|
78
|
+
)
|
|
79
|
+
return v
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class FileDataModelingTemplateSelector(FileContentSelector):
|
|
83
|
+
type: Literal["fileDataModelingTemplate"] = "fileDataModelingTemplate"
|
|
84
|
+
view_id: SelectedView
|
|
85
|
+
template: FileDataModelingTemplate
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def group(self) -> str:
|
|
89
|
+
return "FileDataModeling"
|
|
90
|
+
|
|
91
|
+
def __str__(self) -> str:
|
|
92
|
+
return "data_modeling_template"
|
|
93
|
+
|
|
94
|
+
def create_instance(self, filepath: Path) -> dict[str, Any]:
|
|
95
|
+
return self.template.create_instance(filepath.name)
|
|
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
|
|
|
5
5
|
from collections import Counter, defaultdict
|
|
6
6
|
from collections.abc import Callable, Iterator, Mapping, Sequence
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
-
from functools import partial
|
|
8
|
+
from functools import cached_property, partial
|
|
9
9
|
from io import TextIOWrapper
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Any
|
|
@@ -75,17 +75,25 @@ class MultiFileReader(FileReader):
|
|
|
75
75
|
def __init__(self, input_files: Sequence[Path]) -> None:
|
|
76
76
|
super().__init__(input_file=input_files[0])
|
|
77
77
|
self.input_files = input_files
|
|
78
|
+
|
|
79
|
+
@cached_property
|
|
80
|
+
def reader_class(self) -> type[FileReader]:
|
|
81
|
+
"""Determine the reader class based on the input files."""
|
|
78
82
|
reader_classes = Counter([FileReader.from_filepath(input_file) for input_file in self.input_files])
|
|
79
83
|
if len(reader_classes) > 1:
|
|
80
84
|
raise ToolkitValueError(
|
|
81
85
|
"All input files must be of the same format. "
|
|
82
86
|
f"Found formats: {humanize_collection([cls.FORMAT for cls in reader_classes.keys()])}."
|
|
83
87
|
)
|
|
84
|
-
|
|
88
|
+
return reader_classes.most_common(1)[0][0]
|
|
85
89
|
|
|
86
90
|
@property
|
|
87
91
|
def is_table(self) -> bool:
|
|
88
|
-
|
|
92
|
+
try:
|
|
93
|
+
return issubclass(self.reader_class, TableReader)
|
|
94
|
+
except ValueError:
|
|
95
|
+
# The input files are not a known format, so it is not a table.
|
|
96
|
+
return False
|
|
89
97
|
|
|
90
98
|
@property
|
|
91
99
|
def format(self) -> str:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from collections import UserList
|
|
3
|
-
from collections.abc import Sequence
|
|
3
|
+
from collections.abc import Hashable, Sequence
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from typing import Generic, Literal, Protocol, TypeAlias, TypeVar
|
|
6
6
|
|
|
@@ -352,6 +352,24 @@ class ResponseList(UserList[ResponseMessage | FailedRequestMessage]):
|
|
|
352
352
|
return _json.loads(resp.body)
|
|
353
353
|
raise ValueError("No successful responses with a body found.")
|
|
354
354
|
|
|
355
|
+
def as_item_responses(self, item_id: Hashable) -> list[ResponseMessage | FailedRequestMessage]:
|
|
356
|
+
# Convert the responses to per-item responses
|
|
357
|
+
results: list[ResponseMessage | FailedRequestMessage] = []
|
|
358
|
+
for message in self.data:
|
|
359
|
+
if isinstance(message, SuccessResponse):
|
|
360
|
+
results.append(SuccessResponseItems(status_code=message.status_code, ids=[item_id], body=message.body))
|
|
361
|
+
elif isinstance(message, FailedResponse):
|
|
362
|
+
results.append(
|
|
363
|
+
FailedResponseItems(
|
|
364
|
+
status_code=message.status_code, ids=[item_id], body=message.body, error=message.error
|
|
365
|
+
)
|
|
366
|
+
)
|
|
367
|
+
elif isinstance(message, FailedRequestMessage):
|
|
368
|
+
results.append(FailedRequestItems(ids=[item_id], error=message.error))
|
|
369
|
+
else:
|
|
370
|
+
results.append(message)
|
|
371
|
+
return results
|
|
372
|
+
|
|
355
373
|
|
|
356
374
|
def _dump_body(body: dict[str, JsonVal]) -> str:
|
|
357
375
|
try:
|
|
@@ -4,7 +4,7 @@ default_env = "<DEFAULT_ENV_PLACEHOLDER>"
|
|
|
4
4
|
[modules]
|
|
5
5
|
# This is the version of the modules. It should not be changed manually.
|
|
6
6
|
# It will be updated by the 'cdf modules upgrade' command.
|
|
7
|
-
version = "0.6.
|
|
7
|
+
version = "0.6.112"
|
|
8
8
|
|
|
9
9
|
[alpha_flags]
|
|
10
10
|
external-libraries = true
|
cognite_toolkit/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.6.
|
|
1
|
+
__version__ = "0.6.112"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognite_toolkit
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.112
|
|
4
4
|
Summary: Official Cognite Data Fusion tool for project templates and configuration deployment
|
|
5
5
|
Project-URL: Homepage, https://docs.cognite.com/cdf/deploy/cdf_toolkit/
|
|
6
6
|
Project-URL: Changelog, https://github.com/cognitedata/toolkit/releases
|