cledar-sdk 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cledar/__init__.py +0 -0
- cledar/kafka/README.md +239 -0
- cledar/kafka/__init__.py +40 -0
- cledar/kafka/clients/base.py +98 -0
- cledar/kafka/clients/consumer.py +110 -0
- cledar/kafka/clients/producer.py +80 -0
- cledar/kafka/config/schemas.py +178 -0
- cledar/kafka/exceptions.py +22 -0
- cledar/kafka/handlers/dead_letter.py +82 -0
- cledar/kafka/handlers/parser.py +49 -0
- cledar/kafka/logger.py +3 -0
- cledar/kafka/models/input.py +13 -0
- cledar/kafka/models/message.py +10 -0
- cledar/kafka/models/output.py +8 -0
- cledar/kafka/tests/.env.test.kafka +3 -0
- cledar/kafka/tests/README.md +216 -0
- cledar/kafka/tests/conftest.py +104 -0
- cledar/kafka/tests/integration/__init__.py +1 -0
- cledar/kafka/tests/integration/conftest.py +78 -0
- cledar/kafka/tests/integration/helpers.py +47 -0
- cledar/kafka/tests/integration/test_consumer_integration.py +375 -0
- cledar/kafka/tests/integration/test_integration.py +394 -0
- cledar/kafka/tests/integration/test_producer_consumer_interaction.py +388 -0
- cledar/kafka/tests/integration/test_producer_integration.py +217 -0
- cledar/kafka/tests/unit/__init__.py +1 -0
- cledar/kafka/tests/unit/test_base_kafka_client.py +391 -0
- cledar/kafka/tests/unit/test_config_validation.py +609 -0
- cledar/kafka/tests/unit/test_dead_letter_handler.py +443 -0
- cledar/kafka/tests/unit/test_error_handling.py +674 -0
- cledar/kafka/tests/unit/test_input_parser.py +310 -0
- cledar/kafka/tests/unit/test_input_parser_comprehensive.py +489 -0
- cledar/kafka/tests/unit/test_utils.py +25 -0
- cledar/kafka/tests/unit/test_utils_comprehensive.py +408 -0
- cledar/kafka/utils/callbacks.py +19 -0
- cledar/kafka/utils/messages.py +28 -0
- cledar/kafka/utils/topics.py +2 -0
- cledar/kserve/README.md +352 -0
- cledar/kserve/__init__.py +3 -0
- cledar/kserve/tests/__init__.py +0 -0
- cledar/kserve/tests/test_utils.py +64 -0
- cledar/kserve/utils.py +27 -0
- cledar/logging/README.md +53 -0
- cledar/logging/__init__.py +3 -0
- cledar/logging/tests/test_universal_plaintext_formatter.py +249 -0
- cledar/logging/universal_plaintext_formatter.py +94 -0
- cledar/monitoring/README.md +71 -0
- cledar/monitoring/__init__.py +3 -0
- cledar/monitoring/monitoring_server.py +112 -0
- cledar/monitoring/tests/integration/test_monitoring_server_int.py +162 -0
- cledar/monitoring/tests/test_monitoring_server.py +59 -0
- cledar/nonce/README.md +99 -0
- cledar/nonce/__init__.py +3 -0
- cledar/nonce/nonce_service.py +36 -0
- cledar/nonce/tests/__init__.py +0 -0
- cledar/nonce/tests/test_nonce_service.py +136 -0
- cledar/redis/README.md +536 -0
- cledar/redis/__init__.py +15 -0
- cledar/redis/async_example.py +111 -0
- cledar/redis/example.py +37 -0
- cledar/redis/exceptions.py +22 -0
- cledar/redis/logger.py +3 -0
- cledar/redis/model.py +10 -0
- cledar/redis/redis.py +525 -0
- cledar/redis/redis_config_store.py +252 -0
- cledar/redis/tests/test_async_integration_redis.py +158 -0
- cledar/redis/tests/test_async_redis_service.py +380 -0
- cledar/redis/tests/test_integration_redis.py +119 -0
- cledar/redis/tests/test_redis_service.py +319 -0
- cledar/storage/README.md +529 -0
- cledar/storage/__init__.py +4 -0
- cledar/storage/constants.py +3 -0
- cledar/storage/exceptions.py +50 -0
- cledar/storage/models.py +19 -0
- cledar/storage/object_storage.py +955 -0
- cledar/storage/tests/conftest.py +18 -0
- cledar/storage/tests/test_abfs.py +164 -0
- cledar/storage/tests/test_integration_filesystem.py +359 -0
- cledar/storage/tests/test_integration_s3.py +453 -0
- cledar/storage/tests/test_local.py +384 -0
- cledar/storage/tests/test_s3.py +521 -0
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/METADATA +1 -1
- cledar_sdk-2.0.3.dist-info/RECORD +84 -0
- cledar_sdk-2.0.2.dist-info/RECORD +0 -4
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/WHEEL +0 -0
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,955 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
import fsspec
|
|
6
|
+
from fsspec.exceptions import FSTimeoutError
|
|
7
|
+
|
|
8
|
+
from .constants import (
|
|
9
|
+
ABFS_PATH_PREFIX,
|
|
10
|
+
ABFSS_PATH_PREFIX,
|
|
11
|
+
S3_PATH_PREFIX,
|
|
12
|
+
)
|
|
13
|
+
from .exceptions import (
|
|
14
|
+
CheckFileExistenceError,
|
|
15
|
+
CopyFileError,
|
|
16
|
+
DeleteFileError,
|
|
17
|
+
DownloadFileError,
|
|
18
|
+
GetFileInfoError,
|
|
19
|
+
GetFileSizeError,
|
|
20
|
+
ListObjectsError,
|
|
21
|
+
MoveFileError,
|
|
22
|
+
ReadFileError,
|
|
23
|
+
RequiredBucketNotFoundError,
|
|
24
|
+
UploadBufferError,
|
|
25
|
+
UploadFileError,
|
|
26
|
+
)
|
|
27
|
+
from .models import ObjectStorageServiceConfig, TransferPath
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("object_storage_service")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ObjectStorageService:
|
|
33
|
+
client: Any = None
|
|
34
|
+
|
|
35
|
+
def __init__(self, config: ObjectStorageServiceConfig) -> None:
|
|
36
|
+
self.client = fsspec.filesystem(
|
|
37
|
+
"s3",
|
|
38
|
+
key=config.s3_access_key,
|
|
39
|
+
secret=config.s3_secret_key,
|
|
40
|
+
client_kwargs={"endpoint_url": config.s3_endpoint_url},
|
|
41
|
+
max_concurrency=config.s3_max_concurrency,
|
|
42
|
+
)
|
|
43
|
+
logger.info(
|
|
44
|
+
"Initiated filesystem", extra={"endpoint_url": config.s3_endpoint_url}
|
|
45
|
+
)
|
|
46
|
+
self.local_client = fsspec.filesystem("file")
|
|
47
|
+
|
|
48
|
+
if config.azure_account_name and config.azure_account_key:
|
|
49
|
+
self.azure_client = fsspec.filesystem(
|
|
50
|
+
"abfs",
|
|
51
|
+
account_name=config.azure_account_name,
|
|
52
|
+
account_key=config.azure_account_key,
|
|
53
|
+
)
|
|
54
|
+
else:
|
|
55
|
+
self.azure_client = None
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _is_s3_path(path: str | None) -> bool:
|
|
59
|
+
if path is None:
|
|
60
|
+
return False
|
|
61
|
+
return path.lower().startswith(S3_PATH_PREFIX)
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _is_abfs_path(path: str | None) -> bool:
|
|
65
|
+
if path is None:
|
|
66
|
+
return False
|
|
67
|
+
lower = path.lower()
|
|
68
|
+
return lower.startswith((ABFS_PATH_PREFIX, ABFSS_PATH_PREFIX))
|
|
69
|
+
|
|
70
|
+
def is_alive(self) -> bool:
|
|
71
|
+
try:
|
|
72
|
+
self.client.ls(path="")
|
|
73
|
+
return True
|
|
74
|
+
except (OSError, PermissionError, TimeoutError, FSTimeoutError):
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
def _write_buffer_to_s3_key(
|
|
78
|
+
self, buffer: io.BytesIO, bucket: str, key: str
|
|
79
|
+
) -> None:
|
|
80
|
+
buffer.seek(0)
|
|
81
|
+
with self.client.open(
|
|
82
|
+
path=f"{S3_PATH_PREFIX}{bucket}/{key}", mode="wb"
|
|
83
|
+
) as fobj:
|
|
84
|
+
fobj.write(buffer.getbuffer())
|
|
85
|
+
|
|
86
|
+
def _write_buffer_to_s3_path(
|
|
87
|
+
self, buffer: io.BytesIO, destination_path: str
|
|
88
|
+
) -> None:
|
|
89
|
+
buffer.seek(0)
|
|
90
|
+
with self.client.open(path=destination_path, mode="wb") as fobj:
|
|
91
|
+
fobj.write(buffer.getbuffer())
|
|
92
|
+
|
|
93
|
+
def _write_buffer_to_abfs_path(
|
|
94
|
+
self, buffer: io.BytesIO, destination_path: str
|
|
95
|
+
) -> None:
|
|
96
|
+
buffer.seek(0)
|
|
97
|
+
with self.azure_client.open(path=destination_path, mode="wb") as fobj:
|
|
98
|
+
fobj.write(buffer.getbuffer())
|
|
99
|
+
|
|
100
|
+
def _write_buffer_to_local_path(
|
|
101
|
+
self, buffer: io.BytesIO, destination_path: str
|
|
102
|
+
) -> None:
|
|
103
|
+
buffer.seek(0)
|
|
104
|
+
with self.local_client.open(path=destination_path, mode="wb") as fobj:
|
|
105
|
+
fobj.write(buffer.getbuffer())
|
|
106
|
+
|
|
107
|
+
def _read_from_s3_key(self, bucket: str, key: str) -> bytes:
|
|
108
|
+
with self.client.open(
|
|
109
|
+
path=f"{S3_PATH_PREFIX}{bucket}/{key}", mode="rb"
|
|
110
|
+
) as fobj:
|
|
111
|
+
data: bytes = fobj.read()
|
|
112
|
+
return data
|
|
113
|
+
|
|
114
|
+
def _read_from_s3_path(self, path: str) -> bytes:
|
|
115
|
+
with self.client.open(path=path, mode="rb") as fobj:
|
|
116
|
+
data: bytes = fobj.read()
|
|
117
|
+
return data
|
|
118
|
+
|
|
119
|
+
def _read_from_abfs_path(self, path: str) -> bytes:
|
|
120
|
+
with self.azure_client.open(path=path, mode="rb") as fobj:
|
|
121
|
+
data: bytes = fobj.read()
|
|
122
|
+
return data
|
|
123
|
+
|
|
124
|
+
def _read_from_local_path(self, path: str) -> bytes:
|
|
125
|
+
with self.local_client.open(path=path, mode="rb") as fobj:
|
|
126
|
+
data: bytes = fobj.read()
|
|
127
|
+
return data
|
|
128
|
+
|
|
129
|
+
def _put_file(self, fs: Any, lpath: str, rpath: str) -> None:
|
|
130
|
+
fs.put(lpath=lpath, rpath=rpath)
|
|
131
|
+
|
|
132
|
+
def _get_file(self, fs: Any, src: str, dst: str) -> None:
|
|
133
|
+
fs.get(src, dst)
|
|
134
|
+
|
|
135
|
+
def _list_via_find_or_ls(self, fs: Any, path: str, recursive: bool) -> list[str]:
|
|
136
|
+
if recursive:
|
|
137
|
+
return cast(list[str], fs.find(path))
|
|
138
|
+
return cast(list[str], fs.ls(path, detail=False))
|
|
139
|
+
|
|
140
|
+
def _normalize_s3_keys(self, bucket: str, objects: list[str]) -> list[str]:
|
|
141
|
+
keys: list[str] = []
|
|
142
|
+
for obj in objects:
|
|
143
|
+
if obj.startswith(f"{S3_PATH_PREFIX}{bucket}/"):
|
|
144
|
+
keys.append(obj.replace(f"{S3_PATH_PREFIX}{bucket}/", ""))
|
|
145
|
+
elif obj.startswith(f"{bucket}/"):
|
|
146
|
+
keys.append(obj.replace(f"{bucket}/", ""))
|
|
147
|
+
else:
|
|
148
|
+
keys.append(obj)
|
|
149
|
+
return keys
|
|
150
|
+
|
|
151
|
+
def _size_from_info(self, info: dict[str, Any]) -> int:
|
|
152
|
+
return int(info.get("size", 0))
|
|
153
|
+
|
|
154
|
+
def _copy_with_backend(self, backend: str, src: str, dst: str) -> None:
|
|
155
|
+
if backend == "s3":
|
|
156
|
+
self.client.copy(src, dst)
|
|
157
|
+
return
|
|
158
|
+
if backend == "abfs":
|
|
159
|
+
self.azure_client.copy(src, dst)
|
|
160
|
+
return
|
|
161
|
+
if backend == "local":
|
|
162
|
+
self.local_client.copy(src, dst)
|
|
163
|
+
return
|
|
164
|
+
with (
|
|
165
|
+
fsspec.open(src, mode="rb") as src_f,
|
|
166
|
+
fsspec.open(dst, mode="wb") as dst_f,
|
|
167
|
+
):
|
|
168
|
+
dst_f.write(src_f.read())
|
|
169
|
+
|
|
170
|
+
def _move_with_backend(self, backend: str, src: str, dst: str) -> None:
|
|
171
|
+
if backend == "s3":
|
|
172
|
+
self.client.move(src, dst)
|
|
173
|
+
return
|
|
174
|
+
if backend == "abfs":
|
|
175
|
+
self.azure_client.move(src, dst)
|
|
176
|
+
return
|
|
177
|
+
if backend == "local":
|
|
178
|
+
self.local_client.move(src, dst)
|
|
179
|
+
return
|
|
180
|
+
with (
|
|
181
|
+
fsspec.open(src, mode="rb") as src_f,
|
|
182
|
+
fsspec.open(dst, mode="wb") as dst_f,
|
|
183
|
+
):
|
|
184
|
+
dst_f.write(src_f.read())
|
|
185
|
+
if self._is_s3_path(src):
|
|
186
|
+
self.client.rm(src)
|
|
187
|
+
elif self._is_abfs_path(src):
|
|
188
|
+
self.azure_client.rm(src)
|
|
189
|
+
else:
|
|
190
|
+
self.local_client.rm(src)
|
|
191
|
+
|
|
192
|
+
def _get_fs_for_backend(self, backend: str) -> Any:
|
|
193
|
+
if backend == "s3":
|
|
194
|
+
return self.client
|
|
195
|
+
if backend == "abfs":
|
|
196
|
+
return self.azure_client
|
|
197
|
+
return self.local_client
|
|
198
|
+
|
|
199
|
+
def _resolve_source_backend_and_path(
|
|
200
|
+
self, bucket: str | None, key: str | None, path: str | None
|
|
201
|
+
) -> TransferPath:
|
|
202
|
+
if bucket and key:
|
|
203
|
+
return TransferPath(backend="s3", path=f"{S3_PATH_PREFIX}{bucket}/{key}")
|
|
204
|
+
if path and self._is_s3_path(path):
|
|
205
|
+
return TransferPath(backend="s3", path=path)
|
|
206
|
+
if path and self._is_abfs_path(path):
|
|
207
|
+
return TransferPath(backend="abfs", path=path)
|
|
208
|
+
if path:
|
|
209
|
+
return TransferPath(backend="local", path=path)
|
|
210
|
+
raise ValueError("Either path or bucket and key must be provided")
|
|
211
|
+
|
|
212
|
+
def _resolve_dest_backend_and_path(
|
|
213
|
+
self, bucket: str | None, key: str | None, destination_path: str | None
|
|
214
|
+
) -> TransferPath:
|
|
215
|
+
if bucket and key:
|
|
216
|
+
return TransferPath(backend="s3", path=f"{S3_PATH_PREFIX}{bucket}/{key}")
|
|
217
|
+
if destination_path and self._is_s3_path(destination_path):
|
|
218
|
+
return TransferPath(backend="s3", path=destination_path)
|
|
219
|
+
if destination_path and self._is_abfs_path(destination_path):
|
|
220
|
+
return TransferPath(backend="abfs", path=destination_path)
|
|
221
|
+
if destination_path:
|
|
222
|
+
return TransferPath(backend="local", path=destination_path)
|
|
223
|
+
raise ValueError("Either destination_path or bucket and key must be provided")
|
|
224
|
+
|
|
225
|
+
def _resolve_path_backend(self, path: str | None) -> TransferPath:
|
|
226
|
+
if path and self._is_s3_path(path):
|
|
227
|
+
return TransferPath(backend="s3", path=path)
|
|
228
|
+
if path and self._is_abfs_path(path):
|
|
229
|
+
return TransferPath(backend="abfs", path=path)
|
|
230
|
+
if path:
|
|
231
|
+
return TransferPath(backend="local", path=path)
|
|
232
|
+
raise ValueError("Path must be provided")
|
|
233
|
+
|
|
234
|
+
def _read_from_backend_path(self, backend: str, src_path: str) -> bytes:
|
|
235
|
+
if backend == "s3":
|
|
236
|
+
return self._read_from_s3_path(src_path)
|
|
237
|
+
if backend == "abfs":
|
|
238
|
+
return self._read_from_abfs_path(src_path)
|
|
239
|
+
return self._read_from_local_path(src_path)
|
|
240
|
+
|
|
241
|
+
def has_bucket(self, bucket: str, throw: bool = False) -> bool:
|
|
242
|
+
try:
|
|
243
|
+
self.client.ls(path=bucket)
|
|
244
|
+
return True
|
|
245
|
+
except (
|
|
246
|
+
FileNotFoundError,
|
|
247
|
+
PermissionError,
|
|
248
|
+
OSError,
|
|
249
|
+
TimeoutError,
|
|
250
|
+
FSTimeoutError,
|
|
251
|
+
) as exception:
|
|
252
|
+
if throw:
|
|
253
|
+
logger.exception("Bucket not found", extra={"bucket": bucket})
|
|
254
|
+
raise RequiredBucketNotFoundError from exception
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
def upload_buffer(
|
|
258
|
+
self,
|
|
259
|
+
buffer: io.BytesIO,
|
|
260
|
+
bucket: str | None = None,
|
|
261
|
+
key: str | None = None,
|
|
262
|
+
destination_path: str | None = None,
|
|
263
|
+
) -> None:
|
|
264
|
+
try:
|
|
265
|
+
if bucket and key:
|
|
266
|
+
self._write_buffer_to_s3_key(buffer=buffer, bucket=bucket, key=key)
|
|
267
|
+
logger.debug(
|
|
268
|
+
"Uploaded file from buffer", extra={"bucket": bucket, "key": key}
|
|
269
|
+
)
|
|
270
|
+
elif destination_path and self._is_s3_path(destination_path):
|
|
271
|
+
logger.debug(
|
|
272
|
+
"Uploading file from buffer to S3 via path",
|
|
273
|
+
extra={"destination_path": destination_path},
|
|
274
|
+
)
|
|
275
|
+
self._write_buffer_to_s3_path(
|
|
276
|
+
buffer=buffer, destination_path=destination_path
|
|
277
|
+
)
|
|
278
|
+
elif destination_path and self._is_abfs_path(destination_path):
|
|
279
|
+
logger.debug(
|
|
280
|
+
"Uploading file from buffer to ABFS via path",
|
|
281
|
+
extra={"destination_path": destination_path},
|
|
282
|
+
)
|
|
283
|
+
self._write_buffer_to_abfs_path(
|
|
284
|
+
buffer=buffer, destination_path=destination_path
|
|
285
|
+
)
|
|
286
|
+
elif destination_path:
|
|
287
|
+
logger.debug(
|
|
288
|
+
"Uploading file from buffer to local filesystem",
|
|
289
|
+
extra={"destination_path": destination_path},
|
|
290
|
+
)
|
|
291
|
+
self._write_buffer_to_local_path(
|
|
292
|
+
buffer=buffer, destination_path=destination_path
|
|
293
|
+
)
|
|
294
|
+
else:
|
|
295
|
+
raise ValueError(
|
|
296
|
+
"Either destination_path or bucket and key must be provided"
|
|
297
|
+
)
|
|
298
|
+
except (OSError, PermissionError, TimeoutError, FSTimeoutError) as exception:
|
|
299
|
+
logger.exception(
|
|
300
|
+
"Failed to upload buffer",
|
|
301
|
+
extra={
|
|
302
|
+
"bucket": bucket,
|
|
303
|
+
"key": key,
|
|
304
|
+
"destination_path": destination_path,
|
|
305
|
+
},
|
|
306
|
+
)
|
|
307
|
+
raise UploadBufferError(
|
|
308
|
+
f"Failed to upload buffer (bucket={bucket}, key={key}, "
|
|
309
|
+
f"destination_path={destination_path})"
|
|
310
|
+
) from exception
|
|
311
|
+
|
|
312
|
+
def read_file(
|
|
313
|
+
self,
|
|
314
|
+
bucket: str | None = None,
|
|
315
|
+
key: str | None = None,
|
|
316
|
+
path: str | None = None,
|
|
317
|
+
max_tries: int = 3,
|
|
318
|
+
) -> bytes:
|
|
319
|
+
transfer_path: TransferPath = self._resolve_source_backend_and_path(
|
|
320
|
+
bucket=bucket, key=key, path=path
|
|
321
|
+
)
|
|
322
|
+
backend_name: str = transfer_path.backend
|
|
323
|
+
src_path: str = transfer_path.path
|
|
324
|
+
for attempt in range(max_tries):
|
|
325
|
+
try:
|
|
326
|
+
logger.debug(
|
|
327
|
+
"Reading file",
|
|
328
|
+
extra={"backend": backend_name, "source": src_path},
|
|
329
|
+
)
|
|
330
|
+
content = self._read_from_backend_path(backend_name, src_path)
|
|
331
|
+
logger.debug(
|
|
332
|
+
"File read",
|
|
333
|
+
extra={"backend": backend_name, "source": src_path},
|
|
334
|
+
)
|
|
335
|
+
return content
|
|
336
|
+
except OSError as exception:
|
|
337
|
+
if attempt == max_tries - 1:
|
|
338
|
+
logger.exception(
|
|
339
|
+
"Failed to read file after %d retries",
|
|
340
|
+
max_tries,
|
|
341
|
+
extra={"bucket": bucket, "key": key, "path": path},
|
|
342
|
+
)
|
|
343
|
+
raise ReadFileError(
|
|
344
|
+
f"Failed to read file after {max_tries} retries "
|
|
345
|
+
f"(bucket={bucket}, key={key}, path={path})"
|
|
346
|
+
) from exception
|
|
347
|
+
logger.warning(
|
|
348
|
+
"Failed to read file, retrying...",
|
|
349
|
+
extra={"attempt": attempt + 1},
|
|
350
|
+
)
|
|
351
|
+
raise NotImplementedError("This should never be reached")
|
|
352
|
+
|
|
353
|
+
def upload_file(
|
|
354
|
+
self,
|
|
355
|
+
file_path: str,
|
|
356
|
+
bucket: str | None = None,
|
|
357
|
+
key: str | None = None,
|
|
358
|
+
destination_path: str | None = None,
|
|
359
|
+
) -> None:
|
|
360
|
+
try:
|
|
361
|
+
transfer_path: TransferPath = self._resolve_dest_backend_and_path(
|
|
362
|
+
bucket=bucket, key=key, destination_path=destination_path
|
|
363
|
+
)
|
|
364
|
+
backend_name: str = transfer_path.backend
|
|
365
|
+
dst_path: str = transfer_path.path
|
|
366
|
+
logger.debug(
|
|
367
|
+
"Uploading file",
|
|
368
|
+
extra={
|
|
369
|
+
"backend": backend_name,
|
|
370
|
+
"destination": dst_path,
|
|
371
|
+
"file": file_path,
|
|
372
|
+
},
|
|
373
|
+
)
|
|
374
|
+
fs = self._get_fs_for_backend(backend_name)
|
|
375
|
+
self._put_file(fs, lpath=file_path, rpath=dst_path)
|
|
376
|
+
logger.debug(
|
|
377
|
+
"Uploaded file",
|
|
378
|
+
extra={
|
|
379
|
+
"backend": backend_name,
|
|
380
|
+
"destination": dst_path,
|
|
381
|
+
"file": file_path,
|
|
382
|
+
},
|
|
383
|
+
)
|
|
384
|
+
except (OSError, PermissionError, TimeoutError, FSTimeoutError) as exception:
|
|
385
|
+
logger.exception(
|
|
386
|
+
"Failed to upload file",
|
|
387
|
+
extra={
|
|
388
|
+
"bucket": bucket,
|
|
389
|
+
"key": key,
|
|
390
|
+
"destination_path": destination_path,
|
|
391
|
+
"file_path": file_path,
|
|
392
|
+
},
|
|
393
|
+
)
|
|
394
|
+
raise UploadFileError(
|
|
395
|
+
f"Failed to upload file {file_path} "
|
|
396
|
+
f"(bucket={bucket}, key={key}, destination_path={destination_path})"
|
|
397
|
+
) from exception
|
|
398
|
+
|
|
399
|
+
def list_objects(
|
|
400
|
+
self,
|
|
401
|
+
bucket: str | None = None,
|
|
402
|
+
prefix: str = "",
|
|
403
|
+
path: str | None = None,
|
|
404
|
+
recursive: bool = True,
|
|
405
|
+
) -> list[str]:
|
|
406
|
+
"""
|
|
407
|
+
List objects in storage with optional prefix filtering.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
bucket: The bucket name (for S3)
|
|
411
|
+
prefix: Optional prefix to filter objects (for S3)
|
|
412
|
+
path: The filesystem path. Uses S3 if starts with s3://, otherwise local
|
|
413
|
+
recursive: If True, list all objects recursively
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
List of object keys/paths
|
|
417
|
+
"""
|
|
418
|
+
try:
|
|
419
|
+
if path:
|
|
420
|
+
transfer_path: TransferPath = self._resolve_path_backend(path)
|
|
421
|
+
backend_name: str = transfer_path.backend
|
|
422
|
+
resolved_path: str = transfer_path.path
|
|
423
|
+
logger.debug(
|
|
424
|
+
"Listing objects",
|
|
425
|
+
extra={
|
|
426
|
+
"backend": backend_name,
|
|
427
|
+
"path": resolved_path,
|
|
428
|
+
"recursive": recursive,
|
|
429
|
+
},
|
|
430
|
+
)
|
|
431
|
+
fs = self._get_fs_for_backend(backend_name)
|
|
432
|
+
objects = self._list_via_find_or_ls(fs, resolved_path, recursive)
|
|
433
|
+
logger.debug(
|
|
434
|
+
"Listed objects",
|
|
435
|
+
extra={
|
|
436
|
+
"backend": backend_name,
|
|
437
|
+
"path": resolved_path,
|
|
438
|
+
"count": len(objects),
|
|
439
|
+
},
|
|
440
|
+
)
|
|
441
|
+
return objects
|
|
442
|
+
if bucket:
|
|
443
|
+
s3_path = (
|
|
444
|
+
f"{S3_PATH_PREFIX}{bucket}/{prefix}"
|
|
445
|
+
if prefix
|
|
446
|
+
else f"{S3_PATH_PREFIX}{bucket}/"
|
|
447
|
+
)
|
|
448
|
+
logger.debug(
|
|
449
|
+
"Listing objects from S3",
|
|
450
|
+
extra={"bucket": bucket, "prefix": prefix, "recursive": recursive},
|
|
451
|
+
)
|
|
452
|
+
objects = self._list_via_find_or_ls(self.client, s3_path, recursive)
|
|
453
|
+
keys = self._normalize_s3_keys(bucket, objects)
|
|
454
|
+
logger.debug(
|
|
455
|
+
"Listed objects from S3",
|
|
456
|
+
extra={
|
|
457
|
+
"bucket": bucket,
|
|
458
|
+
"prefix": prefix,
|
|
459
|
+
"count": len(keys),
|
|
460
|
+
},
|
|
461
|
+
)
|
|
462
|
+
return keys
|
|
463
|
+
raise ValueError("Either path or bucket must be provided")
|
|
464
|
+
except (
|
|
465
|
+
FileNotFoundError,
|
|
466
|
+
PermissionError,
|
|
467
|
+
OSError,
|
|
468
|
+
TimeoutError,
|
|
469
|
+
FSTimeoutError,
|
|
470
|
+
) as exception:
|
|
471
|
+
logger.exception(
|
|
472
|
+
"Failed to list objects",
|
|
473
|
+
extra={"bucket": bucket, "prefix": prefix, "path": path},
|
|
474
|
+
)
|
|
475
|
+
raise ListObjectsError(
|
|
476
|
+
f"Failed to list objects (bucket={bucket}, prefix={prefix}, "
|
|
477
|
+
f"path={path})"
|
|
478
|
+
) from exception
|
|
479
|
+
|
|
480
|
+
def delete_file(
|
|
481
|
+
self, bucket: str | None = None, key: str | None = None, path: str | None = None
|
|
482
|
+
) -> None:
|
|
483
|
+
"""
|
|
484
|
+
Delete a single object from storage.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
bucket: The bucket name (for S3)
|
|
488
|
+
key: The object key to delete (for S3)
|
|
489
|
+
path: The filesystem path. Uses S3 if starts with s3://, otherwise local
|
|
490
|
+
"""
|
|
491
|
+
try:
|
|
492
|
+
if bucket and key:
|
|
493
|
+
s3_path = f"{S3_PATH_PREFIX}{bucket}/{key}"
|
|
494
|
+
logger.debug(
|
|
495
|
+
"Deleting file from S3", extra={"bucket": bucket, "key": key}
|
|
496
|
+
)
|
|
497
|
+
self.client.rm(s3_path)
|
|
498
|
+
logger.debug(
|
|
499
|
+
"Deleted file from S3", extra={"bucket": bucket, "key": key}
|
|
500
|
+
)
|
|
501
|
+
elif path and self._is_s3_path(path):
|
|
502
|
+
logger.debug("Deleting file from S3 via path", extra={"path": path})
|
|
503
|
+
self.client.rm(path)
|
|
504
|
+
logger.debug("Deleted file from S3 via path", extra={"path": path})
|
|
505
|
+
elif path and self._is_abfs_path(path):
|
|
506
|
+
logger.debug("Deleting file from ABFS via path", extra={"path": path})
|
|
507
|
+
self.azure_client.rm(path)
|
|
508
|
+
logger.debug("Deleted file from ABFS via path", extra={"path": path})
|
|
509
|
+
elif path:
|
|
510
|
+
logger.debug(
|
|
511
|
+
"Deleting file from local filesystem", extra={"path": path}
|
|
512
|
+
)
|
|
513
|
+
self.local_client.rm(path)
|
|
514
|
+
logger.debug("Deleted file from local filesystem", extra={"path": path})
|
|
515
|
+
else:
|
|
516
|
+
raise ValueError("Either path or bucket and key must be provided")
|
|
517
|
+
except (
|
|
518
|
+
FileNotFoundError,
|
|
519
|
+
PermissionError,
|
|
520
|
+
OSError,
|
|
521
|
+
TimeoutError,
|
|
522
|
+
FSTimeoutError,
|
|
523
|
+
) as exception:
|
|
524
|
+
logger.exception(
|
|
525
|
+
"Failed to delete file",
|
|
526
|
+
extra={"bucket": bucket, "key": key, "path": path},
|
|
527
|
+
)
|
|
528
|
+
raise DeleteFileError(
|
|
529
|
+
f"Failed to delete file (bucket={bucket}, key={key}, path={path})"
|
|
530
|
+
) from exception
|
|
531
|
+
|
|
532
|
+
def file_exists(
|
|
533
|
+
self, bucket: str | None = None, key: str | None = None, path: str | None = None
|
|
534
|
+
) -> bool:
|
|
535
|
+
"""
|
|
536
|
+
Check if a specific file exists in storage.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
bucket: The bucket name (for S3)
|
|
540
|
+
key: The object key to check (for S3)
|
|
541
|
+
path: The filesystem path. Uses S3 if starts with s3://, otherwise local
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
True if the file exists, False otherwise
|
|
545
|
+
"""
|
|
546
|
+
try:
|
|
547
|
+
if bucket and key:
|
|
548
|
+
s3_path = f"{S3_PATH_PREFIX}{bucket}/{key}"
|
|
549
|
+
exists = self.client.exists(s3_path)
|
|
550
|
+
logger.debug(
|
|
551
|
+
"Checked file existence in S3",
|
|
552
|
+
extra={"bucket": bucket, "key": key, "exists": exists},
|
|
553
|
+
)
|
|
554
|
+
return bool(exists)
|
|
555
|
+
if path and self._is_s3_path(path):
|
|
556
|
+
exists = self.client.exists(path)
|
|
557
|
+
logger.debug(
|
|
558
|
+
"Checked file existence in S3 via path",
|
|
559
|
+
extra={"path": path, "exists": exists},
|
|
560
|
+
)
|
|
561
|
+
return bool(exists)
|
|
562
|
+
if path and self._is_abfs_path(path):
|
|
563
|
+
exists = self.azure_client.exists(path)
|
|
564
|
+
logger.debug(
|
|
565
|
+
"Checked file existence in ABFS via path",
|
|
566
|
+
extra={"path": path, "exists": exists},
|
|
567
|
+
)
|
|
568
|
+
return bool(exists)
|
|
569
|
+
if path:
|
|
570
|
+
exists = self.local_client.exists(path)
|
|
571
|
+
logger.debug(
|
|
572
|
+
"Checked file existence in local filesystem",
|
|
573
|
+
extra={"path": path, "exists": exists},
|
|
574
|
+
)
|
|
575
|
+
return bool(exists)
|
|
576
|
+
raise ValueError("Either path or bucket and key must be provided")
|
|
577
|
+
except (OSError, PermissionError, TimeoutError, FSTimeoutError) as exception:
|
|
578
|
+
logger.exception(
|
|
579
|
+
"Failed to check file existence",
|
|
580
|
+
extra={"bucket": bucket, "key": key, "path": path},
|
|
581
|
+
)
|
|
582
|
+
raise CheckFileExistenceError(
|
|
583
|
+
f"Failed to check file existence (bucket={bucket}, key={key}, "
|
|
584
|
+
f"path={path})"
|
|
585
|
+
) from exception
|
|
586
|
+
|
|
587
|
+
def download_file(
|
|
588
|
+
self,
|
|
589
|
+
dest_path: str,
|
|
590
|
+
bucket: str | None = None,
|
|
591
|
+
key: str | None = None,
|
|
592
|
+
source_path: str | None = None,
|
|
593
|
+
max_tries: int = 3,
|
|
594
|
+
) -> None:
|
|
595
|
+
"""
|
|
596
|
+
Download a file from storage to local filesystem.
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
dest_path: The destination local path where the file should be saved
|
|
600
|
+
bucket: The bucket name (for S3)
|
|
601
|
+
key: The object key to download (for S3)
|
|
602
|
+
source_path: The source path. Uses S3 if starts with s3://, otherwise local
|
|
603
|
+
max_tries: Number of retry attempts on failure
|
|
604
|
+
"""
|
|
605
|
+
transfer_path: TransferPath = self._resolve_source_backend_and_path(
|
|
606
|
+
bucket=bucket, key=key, path=source_path
|
|
607
|
+
)
|
|
608
|
+
backend_name: str = transfer_path.backend
|
|
609
|
+
src_path: str = transfer_path.path
|
|
610
|
+
for attempt in range(max_tries):
|
|
611
|
+
try:
|
|
612
|
+
logger.debug(
|
|
613
|
+
"Downloading file",
|
|
614
|
+
extra={
|
|
615
|
+
"backend": backend_name,
|
|
616
|
+
"source": src_path,
|
|
617
|
+
"dest_path": dest_path,
|
|
618
|
+
},
|
|
619
|
+
)
|
|
620
|
+
fs = self._get_fs_for_backend(backend_name)
|
|
621
|
+
self._get_file(fs, src_path, dest_path)
|
|
622
|
+
logger.debug(
|
|
623
|
+
"Downloaded file",
|
|
624
|
+
extra={
|
|
625
|
+
"backend": backend_name,
|
|
626
|
+
"source": src_path,
|
|
627
|
+
"dest_path": dest_path,
|
|
628
|
+
},
|
|
629
|
+
)
|
|
630
|
+
return
|
|
631
|
+
except OSError as exception:
|
|
632
|
+
if attempt == max_tries - 1:
|
|
633
|
+
logger.exception(
|
|
634
|
+
"Failed to download file after %d retries",
|
|
635
|
+
max_tries,
|
|
636
|
+
extra={
|
|
637
|
+
"bucket": bucket,
|
|
638
|
+
"key": key,
|
|
639
|
+
"source_path": source_path,
|
|
640
|
+
"dest_path": dest_path,
|
|
641
|
+
},
|
|
642
|
+
)
|
|
643
|
+
raise DownloadFileError(
|
|
644
|
+
f"Failed to download file after {max_tries} retries "
|
|
645
|
+
f"(bucket={bucket}, key={key}, source_path={source_path}, "
|
|
646
|
+
f"dest_path={dest_path})"
|
|
647
|
+
) from exception
|
|
648
|
+
logger.warning(
|
|
649
|
+
"Failed to download file, retrying...",
|
|
650
|
+
extra={"attempt": attempt + 1},
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
def get_file_size(
|
|
654
|
+
self, bucket: str | None = None, key: str | None = None, path: str | None = None
|
|
655
|
+
) -> int:
|
|
656
|
+
"""
|
|
657
|
+
Get the size of a file without downloading it.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
bucket: The bucket name (for S3)
|
|
661
|
+
key: The object key (for S3)
|
|
662
|
+
path: The filesystem path. Uses S3 if starts with s3://, otherwise local
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
File size in bytes
|
|
666
|
+
"""
|
|
667
|
+
try:
|
|
668
|
+
if bucket and key:
|
|
669
|
+
s3_path = f"s3://{bucket}/{key}"
|
|
670
|
+
logger.debug(
|
|
671
|
+
"Getting file size from S3", extra={"bucket": bucket, "key": key}
|
|
672
|
+
)
|
|
673
|
+
info = cast(dict[str, Any], self.client.info(s3_path))
|
|
674
|
+
size = self._size_from_info(info)
|
|
675
|
+
logger.debug(
|
|
676
|
+
"Got file size from S3",
|
|
677
|
+
extra={"bucket": bucket, "key": key, "size": size},
|
|
678
|
+
)
|
|
679
|
+
return size
|
|
680
|
+
if path and self._is_s3_path(path):
|
|
681
|
+
logger.debug("Getting file size from S3 via path", extra={"path": path})
|
|
682
|
+
info = cast(dict[str, Any], self.client.info(path))
|
|
683
|
+
size = self._size_from_info(info)
|
|
684
|
+
logger.debug(
|
|
685
|
+
"Got file size from S3 via path",
|
|
686
|
+
extra={"path": path, "size": size},
|
|
687
|
+
)
|
|
688
|
+
return size
|
|
689
|
+
if path and self._is_abfs_path(path):
|
|
690
|
+
logger.debug(
|
|
691
|
+
"Getting file size from ABFS via path", extra={"path": path}
|
|
692
|
+
)
|
|
693
|
+
info = self.azure_client.info(path)
|
|
694
|
+
size = self._size_from_info(info)
|
|
695
|
+
logger.debug(
|
|
696
|
+
"Got file size from ABFS via path",
|
|
697
|
+
extra={"path": path, "size": size},
|
|
698
|
+
)
|
|
699
|
+
return size
|
|
700
|
+
if path:
|
|
701
|
+
logger.debug(
|
|
702
|
+
"Getting file size from local filesystem", extra={"path": path}
|
|
703
|
+
)
|
|
704
|
+
info = cast(dict[str, Any], self.local_client.info(path))
|
|
705
|
+
size = self._size_from_info(info)
|
|
706
|
+
logger.debug(
|
|
707
|
+
"Got file size from local filesystem",
|
|
708
|
+
extra={"path": path, "size": size},
|
|
709
|
+
)
|
|
710
|
+
return size
|
|
711
|
+
|
|
712
|
+
raise ValueError("Either path or bucket and key must be provided")
|
|
713
|
+
except (
|
|
714
|
+
FileNotFoundError,
|
|
715
|
+
PermissionError,
|
|
716
|
+
OSError,
|
|
717
|
+
TimeoutError,
|
|
718
|
+
FSTimeoutError,
|
|
719
|
+
) as exception:
|
|
720
|
+
logger.exception(
|
|
721
|
+
"Failed to get file size",
|
|
722
|
+
extra={"bucket": bucket, "key": key, "path": path},
|
|
723
|
+
)
|
|
724
|
+
raise GetFileSizeError(
|
|
725
|
+
f"Failed to get file size (bucket={bucket}, key={key}, path={path})"
|
|
726
|
+
) from exception
|
|
727
|
+
|
|
728
|
+
def get_file_info(
|
|
729
|
+
self, bucket: str | None = None, key: str | None = None, path: str | None = None
|
|
730
|
+
) -> dict[str, Any]:
|
|
731
|
+
"""
|
|
732
|
+
Get metadata information about a file.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
bucket: The bucket name (for S3)
|
|
736
|
+
key: The object key (for S3)
|
|
737
|
+
path: The filesystem path. Uses S3 if starts with s3://, otherwise local
|
|
738
|
+
|
|
739
|
+
Returns:
|
|
740
|
+
Dictionary containing file metadata (size, modified time, etc.)
|
|
741
|
+
"""
|
|
742
|
+
try:
|
|
743
|
+
if bucket and key:
|
|
744
|
+
s3_path = f"{S3_PATH_PREFIX}{bucket}/{key}"
|
|
745
|
+
logger.debug(
|
|
746
|
+
"Getting file info from S3", extra={"bucket": bucket, "key": key}
|
|
747
|
+
)
|
|
748
|
+
info = cast(dict[str, Any], self.client.info(s3_path))
|
|
749
|
+
logger.debug(
|
|
750
|
+
"Got file info from S3",
|
|
751
|
+
extra={"bucket": bucket, "key": key},
|
|
752
|
+
)
|
|
753
|
+
return info
|
|
754
|
+
if path and self._is_s3_path(path):
|
|
755
|
+
logger.debug("Getting file info from S3 via path", extra={"path": path})
|
|
756
|
+
info = cast(dict[str, Any], self.client.info(path))
|
|
757
|
+
logger.debug(
|
|
758
|
+
"Got file info from S3 via path",
|
|
759
|
+
extra={"path": path},
|
|
760
|
+
)
|
|
761
|
+
return info
|
|
762
|
+
if path and self._is_abfs_path(path):
|
|
763
|
+
logger.debug(
|
|
764
|
+
"Getting file info from ABFS via path", extra={"path": path}
|
|
765
|
+
)
|
|
766
|
+
info = cast(dict[str, Any], self.azure_client.info(path))
|
|
767
|
+
logger.debug(
|
|
768
|
+
"Got file info from ABFS via path",
|
|
769
|
+
extra={"path": path},
|
|
770
|
+
)
|
|
771
|
+
return info
|
|
772
|
+
if path:
|
|
773
|
+
logger.debug(
|
|
774
|
+
"Getting file info from local filesystem", extra={"path": path}
|
|
775
|
+
)
|
|
776
|
+
info = cast(dict[str, Any], self.local_client.info(path))
|
|
777
|
+
logger.debug(
|
|
778
|
+
"Got file info from local filesystem",
|
|
779
|
+
extra={"path": path},
|
|
780
|
+
)
|
|
781
|
+
return info
|
|
782
|
+
|
|
783
|
+
raise ValueError("Either path or bucket and key must be provided")
|
|
784
|
+
except (
|
|
785
|
+
FileNotFoundError,
|
|
786
|
+
PermissionError,
|
|
787
|
+
OSError,
|
|
788
|
+
TimeoutError,
|
|
789
|
+
FSTimeoutError,
|
|
790
|
+
) as exception:
|
|
791
|
+
logger.exception(
|
|
792
|
+
"Failed to get file info",
|
|
793
|
+
extra={"bucket": bucket, "key": key, "path": path},
|
|
794
|
+
)
|
|
795
|
+
raise GetFileInfoError(
|
|
796
|
+
f"Failed to get file info (bucket={bucket}, key={key}, path={path})"
|
|
797
|
+
) from exception
|
|
798
|
+
|
|
799
|
+
def _resolve_transfer_paths(
|
|
800
|
+
self,
|
|
801
|
+
source_bucket: str | None,
|
|
802
|
+
source_key: str | None,
|
|
803
|
+
source_path: str | None,
|
|
804
|
+
dest_bucket: str | None,
|
|
805
|
+
dest_key: str | None,
|
|
806
|
+
dest_path: str | None,
|
|
807
|
+
) -> tuple[str, str, str]:
|
|
808
|
+
"""
|
|
809
|
+
Resolve source and destination paths for copy/move operations and
|
|
810
|
+
identify which backend to use (s3, abfs, local, or mixed).
|
|
811
|
+
"""
|
|
812
|
+
src_is_s3 = False
|
|
813
|
+
src_is_abfs = False
|
|
814
|
+
if source_bucket and source_key:
|
|
815
|
+
src: str = f"{S3_PATH_PREFIX}{source_bucket}/{source_key}"
|
|
816
|
+
src_is_s3 = True
|
|
817
|
+
elif self._is_s3_path(source_path):
|
|
818
|
+
src = cast(str, source_path)
|
|
819
|
+
src_is_s3 = True
|
|
820
|
+
elif self._is_abfs_path(source_path):
|
|
821
|
+
src = cast(str, source_path)
|
|
822
|
+
src_is_abfs = True
|
|
823
|
+
elif source_path:
|
|
824
|
+
src = source_path
|
|
825
|
+
else:
|
|
826
|
+
raise ValueError(
|
|
827
|
+
"Either source_path or source_bucket and source_key must be provided"
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
dst_is_s3 = False
|
|
831
|
+
dst_is_abfs = False
|
|
832
|
+
if dest_bucket and dest_key:
|
|
833
|
+
dst: str = f"{S3_PATH_PREFIX}{dest_bucket}/{dest_key}"
|
|
834
|
+
dst_is_s3 = True
|
|
835
|
+
elif self._is_s3_path(dest_path):
|
|
836
|
+
dst = cast(str, dest_path)
|
|
837
|
+
dst_is_s3 = True
|
|
838
|
+
elif self._is_abfs_path(dest_path):
|
|
839
|
+
dst = cast(str, dest_path)
|
|
840
|
+
dst_is_abfs = True
|
|
841
|
+
elif dest_path:
|
|
842
|
+
dst = dest_path
|
|
843
|
+
else:
|
|
844
|
+
raise ValueError(
|
|
845
|
+
"Either dest_path or dest_bucket and dest_key must be provided"
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
if (src_is_s3 or dst_is_s3) and not (src_is_abfs or dst_is_abfs):
|
|
849
|
+
backend = "s3"
|
|
850
|
+
elif (src_is_abfs or dst_is_abfs) and not (src_is_s3 or dst_is_s3):
|
|
851
|
+
backend = "abfs"
|
|
852
|
+
elif (src_is_s3 or dst_is_s3) and (src_is_abfs or dst_is_abfs):
|
|
853
|
+
backend = "mixed"
|
|
854
|
+
else:
|
|
855
|
+
backend = "local"
|
|
856
|
+
|
|
857
|
+
return src, dst, backend
|
|
858
|
+
|
|
859
|
+
def copy_file(
|
|
860
|
+
self,
|
|
861
|
+
source_bucket: str | None = None,
|
|
862
|
+
source_key: str | None = None,
|
|
863
|
+
source_path: str | None = None,
|
|
864
|
+
dest_bucket: str | None = None,
|
|
865
|
+
dest_key: str | None = None,
|
|
866
|
+
dest_path: str | None = None,
|
|
867
|
+
) -> None:
|
|
868
|
+
"""
|
|
869
|
+
Copy a file from one location to another.
|
|
870
|
+
|
|
871
|
+
Args:
|
|
872
|
+
source_bucket: Source bucket name (for S3 source)
|
|
873
|
+
source_key: Source object key (for S3 source)
|
|
874
|
+
source_path: Source path. Uses S3 if starts with s3://, otherwise local
|
|
875
|
+
dest_bucket: Destination bucket name (for S3 destination)
|
|
876
|
+
dest_key: Destination object key (for S3 destination)
|
|
877
|
+
dest_path: Destination path. Uses S3 if starts with s3://, otherwise local
|
|
878
|
+
"""
|
|
879
|
+
try:
|
|
880
|
+
src, dst, backend = self._resolve_transfer_paths(
|
|
881
|
+
source_bucket=source_bucket,
|
|
882
|
+
source_key=source_key,
|
|
883
|
+
source_path=source_path,
|
|
884
|
+
dest_bucket=dest_bucket,
|
|
885
|
+
dest_key=dest_key,
|
|
886
|
+
dest_path=dest_path,
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
logger.debug("Copying file", extra={"source": src, "destination": dst})
|
|
890
|
+
self._copy_with_backend(backend=backend, src=src, dst=dst)
|
|
891
|
+
|
|
892
|
+
logger.debug("Copied file", extra={"source": src, "destination": dst})
|
|
893
|
+
except (
|
|
894
|
+
FileNotFoundError,
|
|
895
|
+
PermissionError,
|
|
896
|
+
OSError,
|
|
897
|
+
TimeoutError,
|
|
898
|
+
FSTimeoutError,
|
|
899
|
+
) as exception:
|
|
900
|
+
logger.exception(
|
|
901
|
+
"Failed to copy file",
|
|
902
|
+
extra={"source": src, "destination": dst},
|
|
903
|
+
)
|
|
904
|
+
raise CopyFileError(
|
|
905
|
+
f"Failed to copy file (source={src}, destination={dst})"
|
|
906
|
+
) from exception
|
|
907
|
+
|
|
908
|
+
def move_file(
|
|
909
|
+
self,
|
|
910
|
+
source_bucket: str | None = None,
|
|
911
|
+
source_key: str | None = None,
|
|
912
|
+
source_path: str | None = None,
|
|
913
|
+
dest_bucket: str | None = None,
|
|
914
|
+
dest_key: str | None = None,
|
|
915
|
+
dest_path: str | None = None,
|
|
916
|
+
) -> None:
|
|
917
|
+
"""
|
|
918
|
+
Move/rename a file from one location to another.
|
|
919
|
+
|
|
920
|
+
Args:
|
|
921
|
+
source_bucket: Source bucket name (for S3 source)
|
|
922
|
+
source_key: Source object key (for S3 source)
|
|
923
|
+
source_path: Source path. Uses S3 if starts with s3://, otherwise local
|
|
924
|
+
dest_bucket: Destination bucket name (for S3 destination)
|
|
925
|
+
dest_key: Destination object key (for S3 destination)
|
|
926
|
+
dest_path: Destination path. Uses S3 if starts with s3://, otherwise local
|
|
927
|
+
"""
|
|
928
|
+
try:
|
|
929
|
+
src, dst, backend = self._resolve_transfer_paths(
|
|
930
|
+
source_bucket=source_bucket,
|
|
931
|
+
source_key=source_key,
|
|
932
|
+
source_path=source_path,
|
|
933
|
+
dest_bucket=dest_bucket,
|
|
934
|
+
dest_key=dest_key,
|
|
935
|
+
dest_path=dest_path,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
logger.debug("Moving file", extra={"source": src, "destination": dst})
|
|
939
|
+
self._move_with_backend(backend=backend, src=src, dst=dst)
|
|
940
|
+
|
|
941
|
+
logger.debug("Moved file", extra={"source": src, "destination": dst})
|
|
942
|
+
except (
|
|
943
|
+
FileNotFoundError,
|
|
944
|
+
PermissionError,
|
|
945
|
+
OSError,
|
|
946
|
+
TimeoutError,
|
|
947
|
+
FSTimeoutError,
|
|
948
|
+
) as exception:
|
|
949
|
+
logger.exception(
|
|
950
|
+
"Failed to move file",
|
|
951
|
+
extra={"source": src, "destination": dst},
|
|
952
|
+
)
|
|
953
|
+
raise MoveFileError(
|
|
954
|
+
f"Failed to move file (source={src}, destination={dst})"
|
|
955
|
+
) from exception
|