cledar-sdk 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. cledar/__init__.py +0 -0
  2. cledar/kafka/README.md +239 -0
  3. cledar/kafka/__init__.py +40 -0
  4. cledar/kafka/clients/base.py +98 -0
  5. cledar/kafka/clients/consumer.py +110 -0
  6. cledar/kafka/clients/producer.py +80 -0
  7. cledar/kafka/config/schemas.py +178 -0
  8. cledar/kafka/exceptions.py +22 -0
  9. cledar/kafka/handlers/dead_letter.py +82 -0
  10. cledar/kafka/handlers/parser.py +49 -0
  11. cledar/kafka/logger.py +3 -0
  12. cledar/kafka/models/input.py +13 -0
  13. cledar/kafka/models/message.py +10 -0
  14. cledar/kafka/models/output.py +8 -0
  15. cledar/kafka/tests/.env.test.kafka +3 -0
  16. cledar/kafka/tests/README.md +216 -0
  17. cledar/kafka/tests/conftest.py +104 -0
  18. cledar/kafka/tests/integration/__init__.py +1 -0
  19. cledar/kafka/tests/integration/conftest.py +78 -0
  20. cledar/kafka/tests/integration/helpers.py +47 -0
  21. cledar/kafka/tests/integration/test_consumer_integration.py +375 -0
  22. cledar/kafka/tests/integration/test_integration.py +394 -0
  23. cledar/kafka/tests/integration/test_producer_consumer_interaction.py +388 -0
  24. cledar/kafka/tests/integration/test_producer_integration.py +217 -0
  25. cledar/kafka/tests/unit/__init__.py +1 -0
  26. cledar/kafka/tests/unit/test_base_kafka_client.py +391 -0
  27. cledar/kafka/tests/unit/test_config_validation.py +609 -0
  28. cledar/kafka/tests/unit/test_dead_letter_handler.py +443 -0
  29. cledar/kafka/tests/unit/test_error_handling.py +674 -0
  30. cledar/kafka/tests/unit/test_input_parser.py +310 -0
  31. cledar/kafka/tests/unit/test_input_parser_comprehensive.py +489 -0
  32. cledar/kafka/tests/unit/test_utils.py +25 -0
  33. cledar/kafka/tests/unit/test_utils_comprehensive.py +408 -0
  34. cledar/kafka/utils/callbacks.py +19 -0
  35. cledar/kafka/utils/messages.py +28 -0
  36. cledar/kafka/utils/topics.py +2 -0
  37. cledar/kserve/README.md +352 -0
  38. cledar/kserve/__init__.py +3 -0
  39. cledar/kserve/tests/__init__.py +0 -0
  40. cledar/kserve/tests/test_utils.py +64 -0
  41. cledar/kserve/utils.py +27 -0
  42. cledar/logging/README.md +53 -0
  43. cledar/logging/__init__.py +3 -0
  44. cledar/logging/tests/test_universal_plaintext_formatter.py +249 -0
  45. cledar/logging/universal_plaintext_formatter.py +94 -0
  46. cledar/monitoring/README.md +71 -0
  47. cledar/monitoring/__init__.py +3 -0
  48. cledar/monitoring/monitoring_server.py +112 -0
  49. cledar/monitoring/tests/integration/test_monitoring_server_int.py +162 -0
  50. cledar/monitoring/tests/test_monitoring_server.py +59 -0
  51. cledar/nonce/README.md +99 -0
  52. cledar/nonce/__init__.py +3 -0
  53. cledar/nonce/nonce_service.py +36 -0
  54. cledar/nonce/tests/__init__.py +0 -0
  55. cledar/nonce/tests/test_nonce_service.py +136 -0
  56. cledar/redis/README.md +536 -0
  57. cledar/redis/__init__.py +15 -0
  58. cledar/redis/async_example.py +111 -0
  59. cledar/redis/example.py +37 -0
  60. cledar/redis/exceptions.py +22 -0
  61. cledar/redis/logger.py +3 -0
  62. cledar/redis/model.py +10 -0
  63. cledar/redis/redis.py +525 -0
  64. cledar/redis/redis_config_store.py +252 -0
  65. cledar/redis/tests/test_async_integration_redis.py +158 -0
  66. cledar/redis/tests/test_async_redis_service.py +380 -0
  67. cledar/redis/tests/test_integration_redis.py +119 -0
  68. cledar/redis/tests/test_redis_service.py +319 -0
  69. cledar/storage/README.md +529 -0
  70. cledar/storage/__init__.py +4 -0
  71. cledar/storage/constants.py +3 -0
  72. cledar/storage/exceptions.py +50 -0
  73. cledar/storage/models.py +19 -0
  74. cledar/storage/object_storage.py +955 -0
  75. cledar/storage/tests/conftest.py +18 -0
  76. cledar/storage/tests/test_abfs.py +164 -0
  77. cledar/storage/tests/test_integration_filesystem.py +359 -0
  78. cledar/storage/tests/test_integration_s3.py +453 -0
  79. cledar/storage/tests/test_local.py +384 -0
  80. cledar/storage/tests/test_s3.py +521 -0
  81. {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/METADATA +1 -1
  82. cledar_sdk-2.0.3.dist-info/RECORD +84 -0
  83. cledar_sdk-2.0.2.dist-info/RECORD +0 -4
  84. {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/WHEEL +0 -0
  85. {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,955 @@
1
+ import io
2
+ import logging
3
+ from typing import Any, cast
4
+
5
+ import fsspec
6
+ from fsspec.exceptions import FSTimeoutError
7
+
8
+ from .constants import (
9
+ ABFS_PATH_PREFIX,
10
+ ABFSS_PATH_PREFIX,
11
+ S3_PATH_PREFIX,
12
+ )
13
+ from .exceptions import (
14
+ CheckFileExistenceError,
15
+ CopyFileError,
16
+ DeleteFileError,
17
+ DownloadFileError,
18
+ GetFileInfoError,
19
+ GetFileSizeError,
20
+ ListObjectsError,
21
+ MoveFileError,
22
+ ReadFileError,
23
+ RequiredBucketNotFoundError,
24
+ UploadBufferError,
25
+ UploadFileError,
26
+ )
27
+ from .models import ObjectStorageServiceConfig, TransferPath
28
+
29
+ logger = logging.getLogger("object_storage_service")
30
+
31
+
32
+ class ObjectStorageService:
33
+ client: Any = None
34
+
35
+ def __init__(self, config: ObjectStorageServiceConfig) -> None:
36
+ self.client = fsspec.filesystem(
37
+ "s3",
38
+ key=config.s3_access_key,
39
+ secret=config.s3_secret_key,
40
+ client_kwargs={"endpoint_url": config.s3_endpoint_url},
41
+ max_concurrency=config.s3_max_concurrency,
42
+ )
43
+ logger.info(
44
+ "Initiated filesystem", extra={"endpoint_url": config.s3_endpoint_url}
45
+ )
46
+ self.local_client = fsspec.filesystem("file")
47
+
48
+ if config.azure_account_name and config.azure_account_key:
49
+ self.azure_client = fsspec.filesystem(
50
+ "abfs",
51
+ account_name=config.azure_account_name,
52
+ account_key=config.azure_account_key,
53
+ )
54
+ else:
55
+ self.azure_client = None
56
+
57
+ @staticmethod
58
+ def _is_s3_path(path: str | None) -> bool:
59
+ if path is None:
60
+ return False
61
+ return path.lower().startswith(S3_PATH_PREFIX)
62
+
63
+ @staticmethod
64
+ def _is_abfs_path(path: str | None) -> bool:
65
+ if path is None:
66
+ return False
67
+ lower = path.lower()
68
+ return lower.startswith((ABFS_PATH_PREFIX, ABFSS_PATH_PREFIX))
69
+
70
+ def is_alive(self) -> bool:
71
+ try:
72
+ self.client.ls(path="")
73
+ return True
74
+ except (OSError, PermissionError, TimeoutError, FSTimeoutError):
75
+ return False
76
+
77
+ def _write_buffer_to_s3_key(
78
+ self, buffer: io.BytesIO, bucket: str, key: str
79
+ ) -> None:
80
+ buffer.seek(0)
81
+ with self.client.open(
82
+ path=f"{S3_PATH_PREFIX}{bucket}/{key}", mode="wb"
83
+ ) as fobj:
84
+ fobj.write(buffer.getbuffer())
85
+
86
+ def _write_buffer_to_s3_path(
87
+ self, buffer: io.BytesIO, destination_path: str
88
+ ) -> None:
89
+ buffer.seek(0)
90
+ with self.client.open(path=destination_path, mode="wb") as fobj:
91
+ fobj.write(buffer.getbuffer())
92
+
93
+ def _write_buffer_to_abfs_path(
94
+ self, buffer: io.BytesIO, destination_path: str
95
+ ) -> None:
96
+ buffer.seek(0)
97
+ with self.azure_client.open(path=destination_path, mode="wb") as fobj:
98
+ fobj.write(buffer.getbuffer())
99
+
100
+ def _write_buffer_to_local_path(
101
+ self, buffer: io.BytesIO, destination_path: str
102
+ ) -> None:
103
+ buffer.seek(0)
104
+ with self.local_client.open(path=destination_path, mode="wb") as fobj:
105
+ fobj.write(buffer.getbuffer())
106
+
107
+ def _read_from_s3_key(self, bucket: str, key: str) -> bytes:
108
+ with self.client.open(
109
+ path=f"{S3_PATH_PREFIX}{bucket}/{key}", mode="rb"
110
+ ) as fobj:
111
+ data: bytes = fobj.read()
112
+ return data
113
+
114
+ def _read_from_s3_path(self, path: str) -> bytes:
115
+ with self.client.open(path=path, mode="rb") as fobj:
116
+ data: bytes = fobj.read()
117
+ return data
118
+
119
+ def _read_from_abfs_path(self, path: str) -> bytes:
120
+ with self.azure_client.open(path=path, mode="rb") as fobj:
121
+ data: bytes = fobj.read()
122
+ return data
123
+
124
+ def _read_from_local_path(self, path: str) -> bytes:
125
+ with self.local_client.open(path=path, mode="rb") as fobj:
126
+ data: bytes = fobj.read()
127
+ return data
128
+
129
+ def _put_file(self, fs: Any, lpath: str, rpath: str) -> None:
130
+ fs.put(lpath=lpath, rpath=rpath)
131
+
132
+ def _get_file(self, fs: Any, src: str, dst: str) -> None:
133
+ fs.get(src, dst)
134
+
135
+ def _list_via_find_or_ls(self, fs: Any, path: str, recursive: bool) -> list[str]:
136
+ if recursive:
137
+ return cast(list[str], fs.find(path))
138
+ return cast(list[str], fs.ls(path, detail=False))
139
+
140
+ def _normalize_s3_keys(self, bucket: str, objects: list[str]) -> list[str]:
141
+ keys: list[str] = []
142
+ for obj in objects:
143
+ if obj.startswith(f"{S3_PATH_PREFIX}{bucket}/"):
144
+ keys.append(obj.replace(f"{S3_PATH_PREFIX}{bucket}/", ""))
145
+ elif obj.startswith(f"{bucket}/"):
146
+ keys.append(obj.replace(f"{bucket}/", ""))
147
+ else:
148
+ keys.append(obj)
149
+ return keys
150
+
151
+ def _size_from_info(self, info: dict[str, Any]) -> int:
152
+ return int(info.get("size", 0))
153
+
154
+ def _copy_with_backend(self, backend: str, src: str, dst: str) -> None:
155
+ if backend == "s3":
156
+ self.client.copy(src, dst)
157
+ return
158
+ if backend == "abfs":
159
+ self.azure_client.copy(src, dst)
160
+ return
161
+ if backend == "local":
162
+ self.local_client.copy(src, dst)
163
+ return
164
+ with (
165
+ fsspec.open(src, mode="rb") as src_f,
166
+ fsspec.open(dst, mode="wb") as dst_f,
167
+ ):
168
+ dst_f.write(src_f.read())
169
+
170
+ def _move_with_backend(self, backend: str, src: str, dst: str) -> None:
171
+ if backend == "s3":
172
+ self.client.move(src, dst)
173
+ return
174
+ if backend == "abfs":
175
+ self.azure_client.move(src, dst)
176
+ return
177
+ if backend == "local":
178
+ self.local_client.move(src, dst)
179
+ return
180
+ with (
181
+ fsspec.open(src, mode="rb") as src_f,
182
+ fsspec.open(dst, mode="wb") as dst_f,
183
+ ):
184
+ dst_f.write(src_f.read())
185
+ if self._is_s3_path(src):
186
+ self.client.rm(src)
187
+ elif self._is_abfs_path(src):
188
+ self.azure_client.rm(src)
189
+ else:
190
+ self.local_client.rm(src)
191
+
192
+ def _get_fs_for_backend(self, backend: str) -> Any:
193
+ if backend == "s3":
194
+ return self.client
195
+ if backend == "abfs":
196
+ return self.azure_client
197
+ return self.local_client
198
+
199
+ def _resolve_source_backend_and_path(
200
+ self, bucket: str | None, key: str | None, path: str | None
201
+ ) -> TransferPath:
202
+ if bucket and key:
203
+ return TransferPath(backend="s3", path=f"{S3_PATH_PREFIX}{bucket}/{key}")
204
+ if path and self._is_s3_path(path):
205
+ return TransferPath(backend="s3", path=path)
206
+ if path and self._is_abfs_path(path):
207
+ return TransferPath(backend="abfs", path=path)
208
+ if path:
209
+ return TransferPath(backend="local", path=path)
210
+ raise ValueError("Either path or bucket and key must be provided")
211
+
212
+ def _resolve_dest_backend_and_path(
213
+ self, bucket: str | None, key: str | None, destination_path: str | None
214
+ ) -> TransferPath:
215
+ if bucket and key:
216
+ return TransferPath(backend="s3", path=f"{S3_PATH_PREFIX}{bucket}/{key}")
217
+ if destination_path and self._is_s3_path(destination_path):
218
+ return TransferPath(backend="s3", path=destination_path)
219
+ if destination_path and self._is_abfs_path(destination_path):
220
+ return TransferPath(backend="abfs", path=destination_path)
221
+ if destination_path:
222
+ return TransferPath(backend="local", path=destination_path)
223
+ raise ValueError("Either destination_path or bucket and key must be provided")
224
+
225
+ def _resolve_path_backend(self, path: str | None) -> TransferPath:
226
+ if path and self._is_s3_path(path):
227
+ return TransferPath(backend="s3", path=path)
228
+ if path and self._is_abfs_path(path):
229
+ return TransferPath(backend="abfs", path=path)
230
+ if path:
231
+ return TransferPath(backend="local", path=path)
232
+ raise ValueError("Path must be provided")
233
+
234
+ def _read_from_backend_path(self, backend: str, src_path: str) -> bytes:
235
+ if backend == "s3":
236
+ return self._read_from_s3_path(src_path)
237
+ if backend == "abfs":
238
+ return self._read_from_abfs_path(src_path)
239
+ return self._read_from_local_path(src_path)
240
+
241
+ def has_bucket(self, bucket: str, throw: bool = False) -> bool:
242
+ try:
243
+ self.client.ls(path=bucket)
244
+ return True
245
+ except (
246
+ FileNotFoundError,
247
+ PermissionError,
248
+ OSError,
249
+ TimeoutError,
250
+ FSTimeoutError,
251
+ ) as exception:
252
+ if throw:
253
+ logger.exception("Bucket not found", extra={"bucket": bucket})
254
+ raise RequiredBucketNotFoundError from exception
255
+ return False
256
+
257
+ def upload_buffer(
258
+ self,
259
+ buffer: io.BytesIO,
260
+ bucket: str | None = None,
261
+ key: str | None = None,
262
+ destination_path: str | None = None,
263
+ ) -> None:
264
+ try:
265
+ if bucket and key:
266
+ self._write_buffer_to_s3_key(buffer=buffer, bucket=bucket, key=key)
267
+ logger.debug(
268
+ "Uploaded file from buffer", extra={"bucket": bucket, "key": key}
269
+ )
270
+ elif destination_path and self._is_s3_path(destination_path):
271
+ logger.debug(
272
+ "Uploading file from buffer to S3 via path",
273
+ extra={"destination_path": destination_path},
274
+ )
275
+ self._write_buffer_to_s3_path(
276
+ buffer=buffer, destination_path=destination_path
277
+ )
278
+ elif destination_path and self._is_abfs_path(destination_path):
279
+ logger.debug(
280
+ "Uploading file from buffer to ABFS via path",
281
+ extra={"destination_path": destination_path},
282
+ )
283
+ self._write_buffer_to_abfs_path(
284
+ buffer=buffer, destination_path=destination_path
285
+ )
286
+ elif destination_path:
287
+ logger.debug(
288
+ "Uploading file from buffer to local filesystem",
289
+ extra={"destination_path": destination_path},
290
+ )
291
+ self._write_buffer_to_local_path(
292
+ buffer=buffer, destination_path=destination_path
293
+ )
294
+ else:
295
+ raise ValueError(
296
+ "Either destination_path or bucket and key must be provided"
297
+ )
298
+ except (OSError, PermissionError, TimeoutError, FSTimeoutError) as exception:
299
+ logger.exception(
300
+ "Failed to upload buffer",
301
+ extra={
302
+ "bucket": bucket,
303
+ "key": key,
304
+ "destination_path": destination_path,
305
+ },
306
+ )
307
+ raise UploadBufferError(
308
+ f"Failed to upload buffer (bucket={bucket}, key={key}, "
309
+ f"destination_path={destination_path})"
310
+ ) from exception
311
+
312
+ def read_file(
313
+ self,
314
+ bucket: str | None = None,
315
+ key: str | None = None,
316
+ path: str | None = None,
317
+ max_tries: int = 3,
318
+ ) -> bytes:
319
+ transfer_path: TransferPath = self._resolve_source_backend_and_path(
320
+ bucket=bucket, key=key, path=path
321
+ )
322
+ backend_name: str = transfer_path.backend
323
+ src_path: str = transfer_path.path
324
+ for attempt in range(max_tries):
325
+ try:
326
+ logger.debug(
327
+ "Reading file",
328
+ extra={"backend": backend_name, "source": src_path},
329
+ )
330
+ content = self._read_from_backend_path(backend_name, src_path)
331
+ logger.debug(
332
+ "File read",
333
+ extra={"backend": backend_name, "source": src_path},
334
+ )
335
+ return content
336
+ except OSError as exception:
337
+ if attempt == max_tries - 1:
338
+ logger.exception(
339
+ "Failed to read file after %d retries",
340
+ max_tries,
341
+ extra={"bucket": bucket, "key": key, "path": path},
342
+ )
343
+ raise ReadFileError(
344
+ f"Failed to read file after {max_tries} retries "
345
+ f"(bucket={bucket}, key={key}, path={path})"
346
+ ) from exception
347
+ logger.warning(
348
+ "Failed to read file, retrying...",
349
+ extra={"attempt": attempt + 1},
350
+ )
351
+ raise NotImplementedError("This should never be reached")
352
+
353
+ def upload_file(
354
+ self,
355
+ file_path: str,
356
+ bucket: str | None = None,
357
+ key: str | None = None,
358
+ destination_path: str | None = None,
359
+ ) -> None:
360
+ try:
361
+ transfer_path: TransferPath = self._resolve_dest_backend_and_path(
362
+ bucket=bucket, key=key, destination_path=destination_path
363
+ )
364
+ backend_name: str = transfer_path.backend
365
+ dst_path: str = transfer_path.path
366
+ logger.debug(
367
+ "Uploading file",
368
+ extra={
369
+ "backend": backend_name,
370
+ "destination": dst_path,
371
+ "file": file_path,
372
+ },
373
+ )
374
+ fs = self._get_fs_for_backend(backend_name)
375
+ self._put_file(fs, lpath=file_path, rpath=dst_path)
376
+ logger.debug(
377
+ "Uploaded file",
378
+ extra={
379
+ "backend": backend_name,
380
+ "destination": dst_path,
381
+ "file": file_path,
382
+ },
383
+ )
384
+ except (OSError, PermissionError, TimeoutError, FSTimeoutError) as exception:
385
+ logger.exception(
386
+ "Failed to upload file",
387
+ extra={
388
+ "bucket": bucket,
389
+ "key": key,
390
+ "destination_path": destination_path,
391
+ "file_path": file_path,
392
+ },
393
+ )
394
+ raise UploadFileError(
395
+ f"Failed to upload file {file_path} "
396
+ f"(bucket={bucket}, key={key}, destination_path={destination_path})"
397
+ ) from exception
398
+
399
+ def list_objects(
400
+ self,
401
+ bucket: str | None = None,
402
+ prefix: str = "",
403
+ path: str | None = None,
404
+ recursive: bool = True,
405
+ ) -> list[str]:
406
+ """
407
+ List objects in storage with optional prefix filtering.
408
+
409
+ Args:
410
+ bucket: The bucket name (for S3)
411
+ prefix: Optional prefix to filter objects (for S3)
412
+ path: The filesystem path. Uses S3 if starts with s3://, otherwise local
413
+ recursive: If True, list all objects recursively
414
+
415
+ Returns:
416
+ List of object keys/paths
417
+ """
418
+ try:
419
+ if path:
420
+ transfer_path: TransferPath = self._resolve_path_backend(path)
421
+ backend_name: str = transfer_path.backend
422
+ resolved_path: str = transfer_path.path
423
+ logger.debug(
424
+ "Listing objects",
425
+ extra={
426
+ "backend": backend_name,
427
+ "path": resolved_path,
428
+ "recursive": recursive,
429
+ },
430
+ )
431
+ fs = self._get_fs_for_backend(backend_name)
432
+ objects = self._list_via_find_or_ls(fs, resolved_path, recursive)
433
+ logger.debug(
434
+ "Listed objects",
435
+ extra={
436
+ "backend": backend_name,
437
+ "path": resolved_path,
438
+ "count": len(objects),
439
+ },
440
+ )
441
+ return objects
442
+ if bucket:
443
+ s3_path = (
444
+ f"{S3_PATH_PREFIX}{bucket}/{prefix}"
445
+ if prefix
446
+ else f"{S3_PATH_PREFIX}{bucket}/"
447
+ )
448
+ logger.debug(
449
+ "Listing objects from S3",
450
+ extra={"bucket": bucket, "prefix": prefix, "recursive": recursive},
451
+ )
452
+ objects = self._list_via_find_or_ls(self.client, s3_path, recursive)
453
+ keys = self._normalize_s3_keys(bucket, objects)
454
+ logger.debug(
455
+ "Listed objects from S3",
456
+ extra={
457
+ "bucket": bucket,
458
+ "prefix": prefix,
459
+ "count": len(keys),
460
+ },
461
+ )
462
+ return keys
463
+ raise ValueError("Either path or bucket must be provided")
464
+ except (
465
+ FileNotFoundError,
466
+ PermissionError,
467
+ OSError,
468
+ TimeoutError,
469
+ FSTimeoutError,
470
+ ) as exception:
471
+ logger.exception(
472
+ "Failed to list objects",
473
+ extra={"bucket": bucket, "prefix": prefix, "path": path},
474
+ )
475
+ raise ListObjectsError(
476
+ f"Failed to list objects (bucket={bucket}, prefix={prefix}, "
477
+ f"path={path})"
478
+ ) from exception
479
+
480
+ def delete_file(
481
+ self, bucket: str | None = None, key: str | None = None, path: str | None = None
482
+ ) -> None:
483
+ """
484
+ Delete a single object from storage.
485
+
486
+ Args:
487
+ bucket: The bucket name (for S3)
488
+ key: The object key to delete (for S3)
489
+ path: The filesystem path. Uses S3 if starts with s3://, otherwise local
490
+ """
491
+ try:
492
+ if bucket and key:
493
+ s3_path = f"{S3_PATH_PREFIX}{bucket}/{key}"
494
+ logger.debug(
495
+ "Deleting file from S3", extra={"bucket": bucket, "key": key}
496
+ )
497
+ self.client.rm(s3_path)
498
+ logger.debug(
499
+ "Deleted file from S3", extra={"bucket": bucket, "key": key}
500
+ )
501
+ elif path and self._is_s3_path(path):
502
+ logger.debug("Deleting file from S3 via path", extra={"path": path})
503
+ self.client.rm(path)
504
+ logger.debug("Deleted file from S3 via path", extra={"path": path})
505
+ elif path and self._is_abfs_path(path):
506
+ logger.debug("Deleting file from ABFS via path", extra={"path": path})
507
+ self.azure_client.rm(path)
508
+ logger.debug("Deleted file from ABFS via path", extra={"path": path})
509
+ elif path:
510
+ logger.debug(
511
+ "Deleting file from local filesystem", extra={"path": path}
512
+ )
513
+ self.local_client.rm(path)
514
+ logger.debug("Deleted file from local filesystem", extra={"path": path})
515
+ else:
516
+ raise ValueError("Either path or bucket and key must be provided")
517
+ except (
518
+ FileNotFoundError,
519
+ PermissionError,
520
+ OSError,
521
+ TimeoutError,
522
+ FSTimeoutError,
523
+ ) as exception:
524
+ logger.exception(
525
+ "Failed to delete file",
526
+ extra={"bucket": bucket, "key": key, "path": path},
527
+ )
528
+ raise DeleteFileError(
529
+ f"Failed to delete file (bucket={bucket}, key={key}, path={path})"
530
+ ) from exception
531
+
532
+ def file_exists(
533
+ self, bucket: str | None = None, key: str | None = None, path: str | None = None
534
+ ) -> bool:
535
+ """
536
+ Check if a specific file exists in storage.
537
+
538
+ Args:
539
+ bucket: The bucket name (for S3)
540
+ key: The object key to check (for S3)
541
+ path: The filesystem path. Uses S3 if starts with s3://, otherwise local
542
+
543
+ Returns:
544
+ True if the file exists, False otherwise
545
+ """
546
+ try:
547
+ if bucket and key:
548
+ s3_path = f"{S3_PATH_PREFIX}{bucket}/{key}"
549
+ exists = self.client.exists(s3_path)
550
+ logger.debug(
551
+ "Checked file existence in S3",
552
+ extra={"bucket": bucket, "key": key, "exists": exists},
553
+ )
554
+ return bool(exists)
555
+ if path and self._is_s3_path(path):
556
+ exists = self.client.exists(path)
557
+ logger.debug(
558
+ "Checked file existence in S3 via path",
559
+ extra={"path": path, "exists": exists},
560
+ )
561
+ return bool(exists)
562
+ if path and self._is_abfs_path(path):
563
+ exists = self.azure_client.exists(path)
564
+ logger.debug(
565
+ "Checked file existence in ABFS via path",
566
+ extra={"path": path, "exists": exists},
567
+ )
568
+ return bool(exists)
569
+ if path:
570
+ exists = self.local_client.exists(path)
571
+ logger.debug(
572
+ "Checked file existence in local filesystem",
573
+ extra={"path": path, "exists": exists},
574
+ )
575
+ return bool(exists)
576
+ raise ValueError("Either path or bucket and key must be provided")
577
+ except (OSError, PermissionError, TimeoutError, FSTimeoutError) as exception:
578
+ logger.exception(
579
+ "Failed to check file existence",
580
+ extra={"bucket": bucket, "key": key, "path": path},
581
+ )
582
+ raise CheckFileExistenceError(
583
+ f"Failed to check file existence (bucket={bucket}, key={key}, "
584
+ f"path={path})"
585
+ ) from exception
586
+
587
+ def download_file(
588
+ self,
589
+ dest_path: str,
590
+ bucket: str | None = None,
591
+ key: str | None = None,
592
+ source_path: str | None = None,
593
+ max_tries: int = 3,
594
+ ) -> None:
595
+ """
596
+ Download a file from storage to local filesystem.
597
+
598
+ Args:
599
+ dest_path: The destination local path where the file should be saved
600
+ bucket: The bucket name (for S3)
601
+ key: The object key to download (for S3)
602
+ source_path: The source path. Uses S3 if starts with s3://, otherwise local
603
+ max_tries: Number of retry attempts on failure
604
+ """
605
+ transfer_path: TransferPath = self._resolve_source_backend_and_path(
606
+ bucket=bucket, key=key, path=source_path
607
+ )
608
+ backend_name: str = transfer_path.backend
609
+ src_path: str = transfer_path.path
610
+ for attempt in range(max_tries):
611
+ try:
612
+ logger.debug(
613
+ "Downloading file",
614
+ extra={
615
+ "backend": backend_name,
616
+ "source": src_path,
617
+ "dest_path": dest_path,
618
+ },
619
+ )
620
+ fs = self._get_fs_for_backend(backend_name)
621
+ self._get_file(fs, src_path, dest_path)
622
+ logger.debug(
623
+ "Downloaded file",
624
+ extra={
625
+ "backend": backend_name,
626
+ "source": src_path,
627
+ "dest_path": dest_path,
628
+ },
629
+ )
630
+ return
631
+ except OSError as exception:
632
+ if attempt == max_tries - 1:
633
+ logger.exception(
634
+ "Failed to download file after %d retries",
635
+ max_tries,
636
+ extra={
637
+ "bucket": bucket,
638
+ "key": key,
639
+ "source_path": source_path,
640
+ "dest_path": dest_path,
641
+ },
642
+ )
643
+ raise DownloadFileError(
644
+ f"Failed to download file after {max_tries} retries "
645
+ f"(bucket={bucket}, key={key}, source_path={source_path}, "
646
+ f"dest_path={dest_path})"
647
+ ) from exception
648
+ logger.warning(
649
+ "Failed to download file, retrying...",
650
+ extra={"attempt": attempt + 1},
651
+ )
652
+
653
+ def get_file_size(
654
+ self, bucket: str | None = None, key: str | None = None, path: str | None = None
655
+ ) -> int:
656
+ """
657
+ Get the size of a file without downloading it.
658
+
659
+ Args:
660
+ bucket: The bucket name (for S3)
661
+ key: The object key (for S3)
662
+ path: The filesystem path. Uses S3 if starts with s3://, otherwise local
663
+
664
+ Returns:
665
+ File size in bytes
666
+ """
667
+ try:
668
+ if bucket and key:
669
+ s3_path = f"s3://{bucket}/{key}"
670
+ logger.debug(
671
+ "Getting file size from S3", extra={"bucket": bucket, "key": key}
672
+ )
673
+ info = cast(dict[str, Any], self.client.info(s3_path))
674
+ size = self._size_from_info(info)
675
+ logger.debug(
676
+ "Got file size from S3",
677
+ extra={"bucket": bucket, "key": key, "size": size},
678
+ )
679
+ return size
680
+ if path and self._is_s3_path(path):
681
+ logger.debug("Getting file size from S3 via path", extra={"path": path})
682
+ info = cast(dict[str, Any], self.client.info(path))
683
+ size = self._size_from_info(info)
684
+ logger.debug(
685
+ "Got file size from S3 via path",
686
+ extra={"path": path, "size": size},
687
+ )
688
+ return size
689
+ if path and self._is_abfs_path(path):
690
+ logger.debug(
691
+ "Getting file size from ABFS via path", extra={"path": path}
692
+ )
693
+ info = self.azure_client.info(path)
694
+ size = self._size_from_info(info)
695
+ logger.debug(
696
+ "Got file size from ABFS via path",
697
+ extra={"path": path, "size": size},
698
+ )
699
+ return size
700
+ if path:
701
+ logger.debug(
702
+ "Getting file size from local filesystem", extra={"path": path}
703
+ )
704
+ info = cast(dict[str, Any], self.local_client.info(path))
705
+ size = self._size_from_info(info)
706
+ logger.debug(
707
+ "Got file size from local filesystem",
708
+ extra={"path": path, "size": size},
709
+ )
710
+ return size
711
+
712
+ raise ValueError("Either path or bucket and key must be provided")
713
+ except (
714
+ FileNotFoundError,
715
+ PermissionError,
716
+ OSError,
717
+ TimeoutError,
718
+ FSTimeoutError,
719
+ ) as exception:
720
+ logger.exception(
721
+ "Failed to get file size",
722
+ extra={"bucket": bucket, "key": key, "path": path},
723
+ )
724
+ raise GetFileSizeError(
725
+ f"Failed to get file size (bucket={bucket}, key={key}, path={path})"
726
+ ) from exception
727
+
728
+ def get_file_info(
729
+ self, bucket: str | None = None, key: str | None = None, path: str | None = None
730
+ ) -> dict[str, Any]:
731
+ """
732
+ Get metadata information about a file.
733
+
734
+ Args:
735
+ bucket: The bucket name (for S3)
736
+ key: The object key (for S3)
737
+ path: The filesystem path. Uses S3 if starts with s3://, otherwise local
738
+
739
+ Returns:
740
+ Dictionary containing file metadata (size, modified time, etc.)
741
+ """
742
+ try:
743
+ if bucket and key:
744
+ s3_path = f"{S3_PATH_PREFIX}{bucket}/{key}"
745
+ logger.debug(
746
+ "Getting file info from S3", extra={"bucket": bucket, "key": key}
747
+ )
748
+ info = cast(dict[str, Any], self.client.info(s3_path))
749
+ logger.debug(
750
+ "Got file info from S3",
751
+ extra={"bucket": bucket, "key": key},
752
+ )
753
+ return info
754
+ if path and self._is_s3_path(path):
755
+ logger.debug("Getting file info from S3 via path", extra={"path": path})
756
+ info = cast(dict[str, Any], self.client.info(path))
757
+ logger.debug(
758
+ "Got file info from S3 via path",
759
+ extra={"path": path},
760
+ )
761
+ return info
762
+ if path and self._is_abfs_path(path):
763
+ logger.debug(
764
+ "Getting file info from ABFS via path", extra={"path": path}
765
+ )
766
+ info = cast(dict[str, Any], self.azure_client.info(path))
767
+ logger.debug(
768
+ "Got file info from ABFS via path",
769
+ extra={"path": path},
770
+ )
771
+ return info
772
+ if path:
773
+ logger.debug(
774
+ "Getting file info from local filesystem", extra={"path": path}
775
+ )
776
+ info = cast(dict[str, Any], self.local_client.info(path))
777
+ logger.debug(
778
+ "Got file info from local filesystem",
779
+ extra={"path": path},
780
+ )
781
+ return info
782
+
783
+ raise ValueError("Either path or bucket and key must be provided")
784
+ except (
785
+ FileNotFoundError,
786
+ PermissionError,
787
+ OSError,
788
+ TimeoutError,
789
+ FSTimeoutError,
790
+ ) as exception:
791
+ logger.exception(
792
+ "Failed to get file info",
793
+ extra={"bucket": bucket, "key": key, "path": path},
794
+ )
795
+ raise GetFileInfoError(
796
+ f"Failed to get file info (bucket={bucket}, key={key}, path={path})"
797
+ ) from exception
798
+
799
+ def _resolve_transfer_paths(
800
+ self,
801
+ source_bucket: str | None,
802
+ source_key: str | None,
803
+ source_path: str | None,
804
+ dest_bucket: str | None,
805
+ dest_key: str | None,
806
+ dest_path: str | None,
807
+ ) -> tuple[str, str, str]:
808
+ """
809
+ Resolve source and destination paths for copy/move operations and
810
+ identify which backend to use (s3, abfs, local, or mixed).
811
+ """
812
+ src_is_s3 = False
813
+ src_is_abfs = False
814
+ if source_bucket and source_key:
815
+ src: str = f"{S3_PATH_PREFIX}{source_bucket}/{source_key}"
816
+ src_is_s3 = True
817
+ elif self._is_s3_path(source_path):
818
+ src = cast(str, source_path)
819
+ src_is_s3 = True
820
+ elif self._is_abfs_path(source_path):
821
+ src = cast(str, source_path)
822
+ src_is_abfs = True
823
+ elif source_path:
824
+ src = source_path
825
+ else:
826
+ raise ValueError(
827
+ "Either source_path or source_bucket and source_key must be provided"
828
+ )
829
+
830
+ dst_is_s3 = False
831
+ dst_is_abfs = False
832
+ if dest_bucket and dest_key:
833
+ dst: str = f"{S3_PATH_PREFIX}{dest_bucket}/{dest_key}"
834
+ dst_is_s3 = True
835
+ elif self._is_s3_path(dest_path):
836
+ dst = cast(str, dest_path)
837
+ dst_is_s3 = True
838
+ elif self._is_abfs_path(dest_path):
839
+ dst = cast(str, dest_path)
840
+ dst_is_abfs = True
841
+ elif dest_path:
842
+ dst = dest_path
843
+ else:
844
+ raise ValueError(
845
+ "Either dest_path or dest_bucket and dest_key must be provided"
846
+ )
847
+
848
+ if (src_is_s3 or dst_is_s3) and not (src_is_abfs or dst_is_abfs):
849
+ backend = "s3"
850
+ elif (src_is_abfs or dst_is_abfs) and not (src_is_s3 or dst_is_s3):
851
+ backend = "abfs"
852
+ elif (src_is_s3 or dst_is_s3) and (src_is_abfs or dst_is_abfs):
853
+ backend = "mixed"
854
+ else:
855
+ backend = "local"
856
+
857
+ return src, dst, backend
858
+
859
+ def copy_file(
860
+ self,
861
+ source_bucket: str | None = None,
862
+ source_key: str | None = None,
863
+ source_path: str | None = None,
864
+ dest_bucket: str | None = None,
865
+ dest_key: str | None = None,
866
+ dest_path: str | None = None,
867
+ ) -> None:
868
+ """
869
+ Copy a file from one location to another.
870
+
871
+ Args:
872
+ source_bucket: Source bucket name (for S3 source)
873
+ source_key: Source object key (for S3 source)
874
+ source_path: Source path. Uses S3 if starts with s3://, otherwise local
875
+ dest_bucket: Destination bucket name (for S3 destination)
876
+ dest_key: Destination object key (for S3 destination)
877
+ dest_path: Destination path. Uses S3 if starts with s3://, otherwise local
878
+ """
879
+ try:
880
+ src, dst, backend = self._resolve_transfer_paths(
881
+ source_bucket=source_bucket,
882
+ source_key=source_key,
883
+ source_path=source_path,
884
+ dest_bucket=dest_bucket,
885
+ dest_key=dest_key,
886
+ dest_path=dest_path,
887
+ )
888
+
889
+ logger.debug("Copying file", extra={"source": src, "destination": dst})
890
+ self._copy_with_backend(backend=backend, src=src, dst=dst)
891
+
892
+ logger.debug("Copied file", extra={"source": src, "destination": dst})
893
+ except (
894
+ FileNotFoundError,
895
+ PermissionError,
896
+ OSError,
897
+ TimeoutError,
898
+ FSTimeoutError,
899
+ ) as exception:
900
+ logger.exception(
901
+ "Failed to copy file",
902
+ extra={"source": src, "destination": dst},
903
+ )
904
+ raise CopyFileError(
905
+ f"Failed to copy file (source={src}, destination={dst})"
906
+ ) from exception
907
+
908
+ def move_file(
909
+ self,
910
+ source_bucket: str | None = None,
911
+ source_key: str | None = None,
912
+ source_path: str | None = None,
913
+ dest_bucket: str | None = None,
914
+ dest_key: str | None = None,
915
+ dest_path: str | None = None,
916
+ ) -> None:
917
+ """
918
+ Move/rename a file from one location to another.
919
+
920
+ Args:
921
+ source_bucket: Source bucket name (for S3 source)
922
+ source_key: Source object key (for S3 source)
923
+ source_path: Source path. Uses S3 if starts with s3://, otherwise local
924
+ dest_bucket: Destination bucket name (for S3 destination)
925
+ dest_key: Destination object key (for S3 destination)
926
+ dest_path: Destination path. Uses S3 if starts with s3://, otherwise local
927
+ """
928
+ try:
929
+ src, dst, backend = self._resolve_transfer_paths(
930
+ source_bucket=source_bucket,
931
+ source_key=source_key,
932
+ source_path=source_path,
933
+ dest_bucket=dest_bucket,
934
+ dest_key=dest_key,
935
+ dest_path=dest_path,
936
+ )
937
+
938
+ logger.debug("Moving file", extra={"source": src, "destination": dst})
939
+ self._move_with_backend(backend=backend, src=src, dst=dst)
940
+
941
+ logger.debug("Moved file", extra={"source": src, "destination": dst})
942
+ except (
943
+ FileNotFoundError,
944
+ PermissionError,
945
+ OSError,
946
+ TimeoutError,
947
+ FSTimeoutError,
948
+ ) as exception:
949
+ logger.exception(
950
+ "Failed to move file",
951
+ extra={"source": src, "destination": dst},
952
+ )
953
+ raise MoveFileError(
954
+ f"Failed to move file (source={src}, destination={dst})"
955
+ ) from exception