adapta 2.11.9__py3-none-any.whl → 3.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapta/__init__.py +1 -1
- adapta/_version.py +1 -1
- adapta/connectors/__init__.py +1 -1
- adapta/connectors/service_bus/__init__.py +1 -1
- adapta/connectors/service_bus/_connector.py +2 -3
- adapta/logs/__init__.py +1 -1
- adapta/logs/_async_logger.py +38 -24
- adapta/logs/_base.py +21 -21
- adapta/logs/_internal.py +6 -7
- adapta/logs/_internal_logger.py +113 -41
- adapta/logs/_logger_interface.py +9 -10
- adapta/logs/handlers/__init__.py +1 -1
- adapta/logs/handlers/datadog_api_handler.py +7 -7
- adapta/logs/handlers/safe_stream_handler.py +4 -4
- adapta/logs/models/__init__.py +1 -1
- adapta/logs/models/_log_level.py +1 -1
- adapta/logs/models/_logs_metadata.py +4 -5
- adapta/metrics/__init__.py +1 -1
- adapta/metrics/_base.py +14 -15
- adapta/metrics/providers/__init__.py +1 -1
- adapta/metrics/providers/datadog_provider.py +21 -22
- adapta/metrics/providers/void_provider.py +34 -0
- adapta/ml/__init__.py +1 -1
- adapta/ml/_model.py +1 -1
- adapta/ml/mlflow/__init__.py +1 -1
- adapta/ml/mlflow/_client.py +101 -5
- adapta/ml/mlflow/_functions.py +44 -13
- adapta/process_communication/__init__.py +1 -1
- adapta/process_communication/_models.py +8 -6
- adapta/schema_management/README.md +0 -1
- adapta/schema_management/__init__.py +1 -1
- adapta/schema_management/schema_entity.py +3 -3
- adapta/security/__init__.py +1 -1
- adapta/security/clients/__init__.py +1 -1
- adapta/security/clients/_azure_client.py +14 -12
- adapta/security/clients/_base.py +11 -6
- adapta/security/clients/_local_client.py +6 -6
- adapta/security/clients/aws/__init__.py +1 -1
- adapta/security/clients/aws/_aws_client.py +12 -10
- adapta/security/clients/aws/_aws_credentials.py +7 -8
- adapta/security/clients/hashicorp_vault/__init__.py +1 -1
- adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +7 -6
- adapta/security/clients/hashicorp_vault/kubernetes_client.py +2 -2
- adapta/security/clients/hashicorp_vault/oidc_client.py +2 -2
- adapta/security/clients/hashicorp_vault/token_client.py +2 -2
- adapta/storage/__init__.py +1 -1
- adapta/storage/blob/README.md +14 -10
- adapta/storage/blob/__init__.py +1 -1
- adapta/storage/blob/azure_storage_client.py +76 -24
- adapta/storage/blob/base.py +15 -13
- adapta/storage/blob/local_storage_client.py +28 -16
- adapta/storage/blob/s3_storage_client.py +19 -24
- adapta/storage/cache/__init__.py +1 -1
- adapta/storage/cache/_base.py +5 -5
- adapta/storage/cache/redis_cache.py +5 -5
- adapta/storage/database/__init__.py +4 -1
- adapta/storage/database/{README.md → v2/README.md} +2 -0
- adapta/storage/database/v2/__init__.py +17 -0
- adapta/storage/database/v2/azure_sql.py +143 -0
- adapta/storage/{distributed_object_store/datastax_astra → database/v2/models}/__init__.py +5 -5
- adapta/storage/database/v2/models/_models.py +53 -0
- adapta/storage/database/{odbc.py → v2/odbc.py} +22 -13
- adapta/storage/database/{snowflake_sql.py → v2/snowflake_sql.py} +20 -12
- adapta/storage/database/{trino_sql.py → v2/trino_sql.py} +15 -6
- adapta/storage/database/v3/README.md +109 -0
- adapta/storage/database/v3/__init__.py +14 -0
- adapta/storage/database/{azure_sql.py → v3/azure_sql.py} +7 -9
- adapta/storage/database/v3/models/__init__.py +19 -0
- adapta/storage/database/{models → v3/models}/_models.py +2 -3
- adapta/storage/database/v3/odbc.py +217 -0
- adapta/storage/database/v3/snowflake_sql.py +241 -0
- adapta/storage/database/v3/trino_sql.py +154 -0
- adapta/storage/delta_lake/__init__.py +2 -3
- adapta/storage/delta_lake/{README.md → v2/README.md} +2 -0
- adapta/storage/delta_lake/v2/__init__.py +19 -0
- adapta/storage/delta_lake/{_functions.py → v2/_functions.py} +43 -27
- adapta/storage/delta_lake/v2/_models.py +72 -0
- adapta/storage/delta_lake/v3/README.md +147 -0
- adapta/storage/delta_lake/v3/__init__.py +20 -0
- adapta/storage/delta_lake/v3/_functions.py +315 -0
- adapta/storage/delta_lake/{_models.py → v3/_models.py} +4 -5
- adapta/storage/distributed_object_store/__init__.py +3 -1
- adapta/storage/distributed_object_store/v2/__init__.py +18 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/README.md +2 -0
- adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +20 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/_models.py +16 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/astra_client.py +61 -52
- adapta/storage/{database/models → distributed_object_store/v3}/__init__.py +4 -5
- adapta/storage/distributed_object_store/v3/datastax_astra/README.md +277 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +20 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +469 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +134 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +569 -0
- adapta/storage/exceptions.py +1 -1
- adapta/storage/models/__init__.py +1 -1
- adapta/storage/models/_functions.py +5 -5
- adapta/storage/models/astra.py +4 -4
- adapta/storage/models/aws.py +1 -1
- adapta/storage/models/azure.py +2 -3
- adapta/storage/models/base.py +9 -1
- adapta/storage/models/enum.py +19 -0
- adapta/storage/models/filter_expression.py +124 -10
- adapta/storage/models/format.py +16 -205
- adapta/storage/models/formatters/__init__.py +36 -0
- adapta/storage/models/formatters/dict.py +43 -0
- adapta/storage/models/formatters/exceptions.py +7 -0
- adapta/storage/models/formatters/metaframe.py +48 -0
- adapta/storage/models/formatters/pandas.py +139 -0
- adapta/storage/models/formatters/pickle.py +36 -0
- adapta/storage/models/formatters/polars.py +240 -0
- adapta/storage/models/formatters/unit.py +26 -0
- adapta/storage/models/hive.py +24 -16
- adapta/storage/models/local.py +1 -1
- adapta/storage/models/trino.py +56 -0
- adapta/storage/query_enabled_store/README.md +1 -1
- adapta/storage/query_enabled_store/__init__.py +7 -1
- adapta/storage/query_enabled_store/_models.py +42 -13
- adapta/storage/query_enabled_store/_qes_astra.py +27 -14
- adapta/storage/query_enabled_store/_qes_delta.py +32 -10
- adapta/storage/query_enabled_store/_qes_local.py +81 -0
- adapta/storage/query_enabled_store/_qes_trino.py +133 -0
- adapta/storage/secrets/__init__.py +1 -1
- adapta/storage/secrets/_base.py +5 -4
- adapta/storage/secrets/azure_secret_client.py +3 -4
- adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +5 -5
- adapta/utils/README.md +92 -0
- adapta/utils/__init__.py +2 -1
- adapta/utils/_common.py +50 -17
- adapta/utils/_requests.py +53 -0
- adapta/utils/concurrent_task_runner.py +10 -9
- adapta/utils/data_structures/_functions.py +6 -6
- adapta/utils/decorators/_logging.py +3 -3
- adapta/utils/decorators/_rate_limit.py +2 -2
- adapta/utils/metaframe.py +172 -0
- adapta/utils/python_typing/_functions.py +5 -10
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/METADATA +18 -14
- adapta-3.5.13.dist-info/RECORD +146 -0
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/WHEEL +1 -1
- adapta-2.11.9.dist-info/RECORD +0 -110
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Storage Client implementation for Azure Cloud.
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -21,7 +21,8 @@ from datetime import datetime, timedelta
|
|
|
21
21
|
from functools import partial
|
|
22
22
|
import signal
|
|
23
23
|
from threading import Thread
|
|
24
|
-
from typing import
|
|
24
|
+
from typing import TypeVar, final
|
|
25
|
+
from collections.abc import Iterator, Callable
|
|
25
26
|
|
|
26
27
|
from azure.core.paging import ItemPaged
|
|
27
28
|
from azure.storage.blob import (
|
|
@@ -32,6 +33,7 @@ from azure.storage.blob import (
|
|
|
32
33
|
BlobProperties,
|
|
33
34
|
ExponentialRetry,
|
|
34
35
|
ContainerClient,
|
|
36
|
+
BlobLeaseClient,
|
|
35
37
|
)
|
|
36
38
|
|
|
37
39
|
from adapta.storage.blob.base import StorageClient
|
|
@@ -51,7 +53,7 @@ class AzureStorageClient(StorageClient):
|
|
|
51
53
|
Azure Storage (Blob and ADLS) Client.
|
|
52
54
|
"""
|
|
53
55
|
|
|
54
|
-
def __init__(self, *, base_client: AzureClient, path:
|
|
56
|
+
def __init__(self, *, base_client: AzureClient, path: AdlsGen2Path | WasbPath, implicit_login=True):
|
|
55
57
|
super().__init__(base_client=base_client)
|
|
56
58
|
|
|
57
59
|
# overrides default ExponentialRetry
|
|
@@ -69,18 +71,32 @@ class AzureStorageClient(StorageClient):
|
|
|
69
71
|
self._storage_options = None
|
|
70
72
|
else:
|
|
71
73
|
self._storage_options = self._base_client.connect_storage(path)
|
|
72
|
-
|
|
73
|
-
f"
|
|
74
|
-
f"AccountName={self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']};"
|
|
75
|
-
f"AccountKey={self._storage_options['AZURE_STORAGE_ACCOUNT_KEY']};"
|
|
76
|
-
f"BlobEndpoint=https://{self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']}.blob.core.windows.net/;"
|
|
74
|
+
blob_endpoint = (
|
|
75
|
+
f"BlobEndpoint=https://{self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']}.blob.core.windows.net/"
|
|
77
76
|
)
|
|
77
|
+
endpoint_protocol = "DefaultEndpointsProtocol=https"
|
|
78
|
+
|
|
79
|
+
if "ADAPTA__AZURE_STORAGE_BLOB_ENDPOINT" in os.environ:
|
|
80
|
+
blob_endpoint = f'BlobEndpoint={os.environ["ADAPTA__AZURE_STORAGE_BLOB_ENDPOINT"]}'
|
|
81
|
+
|
|
82
|
+
if "ADAPTA__AZURE_STORAGE_DEFAULT_PROTOCOL" in os.environ:
|
|
83
|
+
endpoint_protocol = f"DefaultEndpointsProtocol={os.environ['ADAPTA__AZURE_STORAGE_DEFAULT_PROTOCOL']}"
|
|
84
|
+
|
|
85
|
+
connection_string = ";".join(
|
|
86
|
+
[
|
|
87
|
+
endpoint_protocol,
|
|
88
|
+
f"AccountName={self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']}",
|
|
89
|
+
f"AccountKey={self._storage_options['AZURE_STORAGE_ACCOUNT_KEY']}",
|
|
90
|
+
blob_endpoint,
|
|
91
|
+
]
|
|
92
|
+
)
|
|
93
|
+
|
|
78
94
|
self._blob_service_client: BlobServiceClient = BlobServiceClient.from_connection_string(
|
|
79
95
|
connection_string, retry_policy=retry_policy
|
|
80
96
|
)
|
|
81
97
|
|
|
82
98
|
@classmethod
|
|
83
|
-
def create(cls, auth: AzureClient, endpoint_url:
|
|
99
|
+
def create(cls, auth: AzureClient, endpoint_url: str | None = None):
|
|
84
100
|
"""
|
|
85
101
|
Not used in Azure.
|
|
86
102
|
:return:
|
|
@@ -120,14 +136,14 @@ class AzureStorageClient(StorageClient):
|
|
|
120
136
|
self,
|
|
121
137
|
data: T,
|
|
122
138
|
blob_path: DataPath,
|
|
123
|
-
serialization_format:
|
|
124
|
-
metadata:
|
|
139
|
+
serialization_format: type[SerializationFormat[T]],
|
|
140
|
+
metadata: dict[str, str] | None = None,
|
|
125
141
|
overwrite: bool = False,
|
|
126
142
|
) -> None:
|
|
127
143
|
bytes_ = serialization_format().serialize(data)
|
|
128
144
|
self._get_blob_client(blob_path).upload_blob(bytes_, metadata=metadata, overwrite=overwrite)
|
|
129
145
|
|
|
130
|
-
def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
|
|
146
|
+
def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
|
|
131
147
|
blob_client = self._get_blob_client(blob_path)
|
|
132
148
|
azure_path = cast_path(blob_path)
|
|
133
149
|
|
|
@@ -137,7 +153,7 @@ class AzureStorageClient(StorageClient):
|
|
|
137
153
|
container_name=azure_path.container,
|
|
138
154
|
account_name=azure_path.account,
|
|
139
155
|
permission=kwargs.get("permission", BlobSasPermissions(read=True)),
|
|
140
|
-
expiry=
|
|
156
|
+
expiry=datetime.utcnow() + timedelta(seconds=expires_in_seconds),
|
|
141
157
|
)
|
|
142
158
|
|
|
143
159
|
sas_token = (
|
|
@@ -148,18 +164,17 @@ class AzureStorageClient(StorageClient):
|
|
|
148
164
|
else base_call(
|
|
149
165
|
user_delegation_key=self._blob_service_client.get_user_delegation_key(
|
|
150
166
|
key_start_time=datetime.utcnow() - timedelta(minutes=1),
|
|
151
|
-
key_expiry_time=
|
|
167
|
+
key_expiry_time=datetime.utcnow() + timedelta(seconds=expires_in_seconds),
|
|
152
168
|
),
|
|
153
169
|
)
|
|
154
170
|
)
|
|
155
171
|
|
|
156
|
-
|
|
157
|
-
return sas_uri
|
|
172
|
+
return f"{blob_client.url}?{sas_token}"
|
|
158
173
|
|
|
159
174
|
def blob_exists(self, blob_path: DataPath) -> bool:
|
|
160
175
|
return self._get_blob_client(blob_path).exists()
|
|
161
176
|
|
|
162
|
-
def _list_blobs(self, blob_path: DataPath) -> (ItemPaged[BlobProperties],
|
|
177
|
+
def _list_blobs(self, blob_path: DataPath) -> (ItemPaged[BlobProperties], AdlsGen2Path | WasbPath):
|
|
163
178
|
azure_path = cast_path(blob_path)
|
|
164
179
|
|
|
165
180
|
return (
|
|
@@ -170,8 +185,8 @@ class AzureStorageClient(StorageClient):
|
|
|
170
185
|
def read_blobs(
|
|
171
186
|
self,
|
|
172
187
|
blob_path: DataPath,
|
|
173
|
-
serialization_format:
|
|
174
|
-
filter_predicate:
|
|
188
|
+
serialization_format: type[SerializationFormat[T]],
|
|
189
|
+
filter_predicate: Callable[[BlobProperties], bool] | None = None,
|
|
175
190
|
) -> Iterator[T]:
|
|
176
191
|
blobs_on_path, azure_path = self._list_blobs(blob_path)
|
|
177
192
|
|
|
@@ -211,8 +226,8 @@ class AzureStorageClient(StorageClient):
|
|
|
211
226
|
self,
|
|
212
227
|
blob_path: DataPath,
|
|
213
228
|
local_path: str,
|
|
214
|
-
threads:
|
|
215
|
-
filter_predicate:
|
|
229
|
+
threads: int | None = None,
|
|
230
|
+
filter_predicate: Callable[[BlobProperties], bool] | None = None,
|
|
216
231
|
) -> None:
|
|
217
232
|
def download_blob(blob: BlobProperties, container: str) -> None:
|
|
218
233
|
write_path = os.path.join(local_path, blob.name)
|
|
@@ -229,7 +244,7 @@ class AzureStorageClient(StorageClient):
|
|
|
229
244
|
.readall()
|
|
230
245
|
)
|
|
231
246
|
|
|
232
|
-
def download_blob_list(blob_list:
|
|
247
|
+
def download_blob_list(blob_list: list[BlobProperties], container: str) -> None:
|
|
233
248
|
for blob_from_list in blob_list:
|
|
234
249
|
if blob_from_list:
|
|
235
250
|
download_blob(blob_from_list, container)
|
|
@@ -251,7 +266,7 @@ class AzureStorageClient(StorageClient):
|
|
|
251
266
|
for blob_dir in blob_dirs:
|
|
252
267
|
os.makedirs(os.path.join(local_path, blob_dir.name), exist_ok=True)
|
|
253
268
|
|
|
254
|
-
blob_lists:
|
|
269
|
+
blob_lists: list[list[BlobProperties]] = chunk_list(blob_files, threads)
|
|
255
270
|
thread_list = [
|
|
256
271
|
Thread(target=download_blob_list, args=(blob_list, azure_path.container)) for blob_list in blob_lists
|
|
257
272
|
]
|
|
@@ -263,7 +278,7 @@ class AzureStorageClient(StorageClient):
|
|
|
263
278
|
def list_blobs(
|
|
264
279
|
self,
|
|
265
280
|
blob_path: DataPath,
|
|
266
|
-
filter_predicate:
|
|
281
|
+
filter_predicate: Callable[[BlobProperties], bool] | None = lambda blob: blob.size != 0, # Skip folders
|
|
267
282
|
) -> Iterator[DataPath]:
|
|
268
283
|
blobs_on_path, azure_path = self._list_blobs(blob_path)
|
|
269
284
|
|
|
@@ -275,6 +290,31 @@ class AzureStorageClient(StorageClient):
|
|
|
275
290
|
path=blob.name,
|
|
276
291
|
)
|
|
277
292
|
|
|
293
|
+
def list_matching_prefixes(
|
|
294
|
+
self, blob_path: DataPath, delimiter: str = "/", timeout_seconds: int = 3600
|
|
295
|
+
) -> Iterator[DataPath]:
|
|
296
|
+
"""
|
|
297
|
+
List blobs in accordance with a hierarchy, as delimited by the specified delimiter character.
|
|
298
|
+
For example, calling list_matching_prefixes(AldsGen2Path.from_hdfs_path(path), delimiter="/"),
|
|
299
|
+
where path=abfss://c@a.dfs.core.windows.net/my/pre will return:
|
|
300
|
+
abfss://c@a.dfs.core.windows.net/my/pre1
|
|
301
|
+
abfss://c@a.dfs.core.windows.net/my/preadad
|
|
302
|
+
abfss://c@a.dfs.core.windows.net/my/preeqweq
|
|
303
|
+
|
|
304
|
+
but will not return abfss://c@a.dfs.core.windows.net/my/pre1/pre2
|
|
305
|
+
"""
|
|
306
|
+
azure_path = cast_path(blob_path)
|
|
307
|
+
for prefix in self._get_container_client(azure_path).walk_blobs(
|
|
308
|
+
name_starts_with=blob_path.path,
|
|
309
|
+
delimiter=delimiter,
|
|
310
|
+
timeout=timeout_seconds,
|
|
311
|
+
):
|
|
312
|
+
yield AdlsGen2Path(
|
|
313
|
+
account=azure_path.account,
|
|
314
|
+
container=azure_path.container,
|
|
315
|
+
path=prefix.name,
|
|
316
|
+
)
|
|
317
|
+
|
|
278
318
|
def delete_blob(
|
|
279
319
|
self,
|
|
280
320
|
blob_path: DataPath,
|
|
@@ -283,6 +323,18 @@ class AzureStorageClient(StorageClient):
|
|
|
283
323
|
|
|
284
324
|
self._get_container_client(azure_path).delete_blob(blob_path.path)
|
|
285
325
|
|
|
326
|
+
def delete_leased_blob(self, blob_path: DataPath) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Azure specific deletion that takes care of a leased blob
|
|
329
|
+
"""
|
|
330
|
+
azure_path = cast_path(blob_path)
|
|
331
|
+
blob_client = self._get_blob_client(azure_path)
|
|
332
|
+
|
|
333
|
+
if blob_client.get_blob_properties().lease.state == "leased":
|
|
334
|
+
BlobLeaseClient(blob_client).break_lease()
|
|
335
|
+
|
|
336
|
+
self._get_container_client(azure_path).delete_blob(blob_path.path)
|
|
337
|
+
|
|
286
338
|
def copy_blob(self, blob_path: DataPath, target_blob_path: DataPath, doze_period_ms=1000) -> None:
|
|
287
339
|
source_url = self.get_blob_uri(blob_path)
|
|
288
340
|
self._get_blob_client(target_blob_path).start_copy_from_url(source_url)
|
adapta/storage/blob/base.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Abstraction for storage operations.
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,7 +17,8 @@
|
|
|
17
17
|
#
|
|
18
18
|
|
|
19
19
|
from abc import ABC, abstractmethod
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import TypeVar
|
|
21
|
+
from collections.abc import Iterator, Callable
|
|
21
22
|
|
|
22
23
|
from adapta.security.clients import AuthenticationClient
|
|
23
24
|
from adapta.storage.models.base import DataPath
|
|
@@ -36,7 +37,7 @@ class StorageClient(ABC):
|
|
|
36
37
|
self._base_client = base_client
|
|
37
38
|
|
|
38
39
|
@classmethod
|
|
39
|
-
def create(cls, auth: AuthenticationClient, endpoint_url:
|
|
40
|
+
def create(cls, auth: AuthenticationClient, endpoint_url: str | None = None):
|
|
40
41
|
"""
|
|
41
42
|
Creates a Storage client using the AuthenticationClient to set up its session.
|
|
42
43
|
|
|
@@ -47,12 +48,13 @@ class StorageClient(ABC):
|
|
|
47
48
|
"""
|
|
48
49
|
|
|
49
50
|
@abstractmethod
|
|
50
|
-
def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
|
|
51
|
+
def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
|
|
51
52
|
"""
|
|
52
53
|
Generates a URL which can be used to download this blob.
|
|
53
54
|
|
|
54
|
-
:param blob_path:
|
|
55
|
-
:param
|
|
55
|
+
:param blob_path: Path to the blob.
|
|
56
|
+
:param expires_in_seconds: Expiration time in seconds.
|
|
57
|
+
:param kwargs: Optional parameters to pass to the signing client.
|
|
56
58
|
:return:
|
|
57
59
|
"""
|
|
58
60
|
|
|
@@ -70,8 +72,8 @@ class StorageClient(ABC):
|
|
|
70
72
|
self,
|
|
71
73
|
data: T,
|
|
72
74
|
blob_path: DataPath,
|
|
73
|
-
serialization_format:
|
|
74
|
-
metadata:
|
|
75
|
+
serialization_format: type[SerializationFormat[T]],
|
|
76
|
+
metadata: dict[str, str] | None = None,
|
|
75
77
|
overwrite: bool = False,
|
|
76
78
|
) -> None:
|
|
77
79
|
"""
|
|
@@ -101,7 +103,7 @@ class StorageClient(ABC):
|
|
|
101
103
|
def list_blobs(
|
|
102
104
|
self,
|
|
103
105
|
blob_path: DataPath,
|
|
104
|
-
filter_predicate:
|
|
106
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
105
107
|
) -> Iterator[DataPath]:
|
|
106
108
|
"""
|
|
107
109
|
Lists blobs in blob_path
|
|
@@ -115,8 +117,8 @@ class StorageClient(ABC):
|
|
|
115
117
|
def read_blobs(
|
|
116
118
|
self,
|
|
117
119
|
blob_path: DataPath,
|
|
118
|
-
serialization_format:
|
|
119
|
-
filter_predicate:
|
|
120
|
+
serialization_format: type[SerializationFormat[T]],
|
|
121
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
120
122
|
) -> Iterator[T]:
|
|
121
123
|
"""
|
|
122
124
|
Reads data under provided path into the given format.
|
|
@@ -132,8 +134,8 @@ class StorageClient(ABC):
|
|
|
132
134
|
self,
|
|
133
135
|
blob_path: DataPath,
|
|
134
136
|
local_path: str,
|
|
135
|
-
threads:
|
|
136
|
-
filter_predicate:
|
|
137
|
+
threads: int | None = None,
|
|
138
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
137
139
|
) -> None:
|
|
138
140
|
"""
|
|
139
141
|
Reads data under provided path into the given format.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Storage Client implementation for a regular filesystem.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
# Copyright (c) 2023-
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
8
|
# you may not use this file except in compliance with the License.
|
|
@@ -20,9 +20,10 @@
|
|
|
20
20
|
import os.path
|
|
21
21
|
import shutil
|
|
22
22
|
|
|
23
|
-
from typing import final
|
|
23
|
+
from typing import final
|
|
24
|
+
from collections.abc import Callable, Iterator
|
|
24
25
|
|
|
25
|
-
from adapta.security.clients import LocalClient
|
|
26
|
+
from adapta.security.clients import LocalClient, AuthenticationClient
|
|
26
27
|
from adapta.storage.blob.base import StorageClient, T
|
|
27
28
|
from adapta.storage.models import DataPath, LocalPath, parse_data_path
|
|
28
29
|
from adapta.storage.models.format import SerializationFormat
|
|
@@ -34,11 +35,14 @@ class LocalStorageClient(StorageClient):
|
|
|
34
35
|
Local Storage Client, primarily for unit tests.
|
|
35
36
|
"""
|
|
36
37
|
|
|
38
|
+
def __init__(self):
|
|
39
|
+
super().__init__(base_client=LocalClient())
|
|
40
|
+
|
|
37
41
|
@classmethod
|
|
38
|
-
def create(cls, auth:
|
|
39
|
-
|
|
42
|
+
def create(cls, auth: AuthenticationClient, endpoint_url: str | None = None):
|
|
43
|
+
return cls()
|
|
40
44
|
|
|
41
|
-
def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
|
|
45
|
+
def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
|
|
42
46
|
return cast_path(blob_path).path
|
|
43
47
|
|
|
44
48
|
def blob_exists(self, blob_path: DataPath) -> bool:
|
|
@@ -48,8 +52,8 @@ class LocalStorageClient(StorageClient):
|
|
|
48
52
|
self,
|
|
49
53
|
data: T,
|
|
50
54
|
blob_path: DataPath,
|
|
51
|
-
serialization_format:
|
|
52
|
-
metadata:
|
|
55
|
+
serialization_format: type[SerializationFormat[T]],
|
|
56
|
+
metadata: dict[str, str] | None = None,
|
|
53
57
|
overwrite: bool = False,
|
|
54
58
|
) -> None:
|
|
55
59
|
bytes_ = serialization_format().serialize(data)
|
|
@@ -64,28 +68,36 @@ class LocalStorageClient(StorageClient):
|
|
|
64
68
|
os.remove(cast_path(blob_path).path)
|
|
65
69
|
|
|
66
70
|
def list_blobs(
|
|
67
|
-
self, blob_path: DataPath, filter_predicate:
|
|
71
|
+
self, blob_path: DataPath, filter_predicate: Callable[[...], bool] | None = None
|
|
68
72
|
) -> Iterator[DataPath]:
|
|
69
73
|
for blob in os.listdir(cast_path(blob_path).path):
|
|
74
|
+
if filter_predicate is not None and not filter_predicate(blob):
|
|
75
|
+
continue
|
|
70
76
|
yield LocalPath(path=blob)
|
|
71
77
|
|
|
72
78
|
def read_blobs(
|
|
73
79
|
self,
|
|
74
80
|
blob_path: DataPath,
|
|
75
|
-
serialization_format:
|
|
76
|
-
filter_predicate:
|
|
81
|
+
serialization_format: type[SerializationFormat[T]],
|
|
82
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
77
83
|
) -> Iterator[T]:
|
|
78
84
|
dir_path = cast_path(blob_path).path
|
|
79
|
-
|
|
80
|
-
|
|
85
|
+
if os.path.isdir(dir_path):
|
|
86
|
+
for blob in os.listdir(dir_path):
|
|
87
|
+
if filter_predicate is not None and not filter_predicate(blob):
|
|
88
|
+
continue
|
|
89
|
+
with open(os.path.join(dir_path, blob), "rb") as blob_file:
|
|
90
|
+
yield serialization_format().deserialize(blob_file.read())
|
|
91
|
+
else:
|
|
92
|
+
with open(dir_path, "rb") as blob_file:
|
|
81
93
|
yield serialization_format().deserialize(blob_file.read())
|
|
82
94
|
|
|
83
95
|
def download_blobs(
|
|
84
96
|
self,
|
|
85
97
|
blob_path: DataPath,
|
|
86
98
|
local_path: str,
|
|
87
|
-
threads:
|
|
88
|
-
filter_predicate:
|
|
99
|
+
threads: int | None = None,
|
|
100
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
89
101
|
) -> None:
|
|
90
102
|
raise NotImplementedError("Not supported by this client")
|
|
91
103
|
|
|
@@ -98,7 +110,7 @@ class LocalStorageClient(StorageClient):
|
|
|
98
110
|
@classmethod
|
|
99
111
|
def for_storage_path(cls, path: str) -> "StorageClient":
|
|
100
112
|
_ = cast_path(parse_data_path(path))
|
|
101
|
-
return cls(
|
|
113
|
+
return cls()
|
|
102
114
|
|
|
103
115
|
|
|
104
116
|
def cast_path(blob_path: DataPath) -> LocalPath:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Storage Client implementation for AWS S3.
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
|
|
19
19
|
import os
|
|
20
20
|
|
|
21
|
-
from typing import
|
|
22
|
-
from
|
|
21
|
+
from typing import TypeVar, final
|
|
22
|
+
from collections.abc import Callable, Iterator
|
|
23
23
|
from boto3 import Session
|
|
24
24
|
from botocore.config import Config
|
|
25
25
|
from botocore.exceptions import ClientError
|
|
@@ -41,7 +41,7 @@ class S3StorageClient(StorageClient):
|
|
|
41
41
|
S3 Storage Client.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
def __init__(self, *, base_client: AwsClient, s3_resource:
|
|
44
|
+
def __init__(self, *, base_client: AwsClient, s3_resource: Session | None = None):
|
|
45
45
|
super().__init__(base_client=base_client)
|
|
46
46
|
self._base_client = base_client
|
|
47
47
|
self._s3_resource = s3_resource if s3_resource is not None else base_client.session.resource("s3")
|
|
@@ -50,10 +50,10 @@ class S3StorageClient(StorageClient):
|
|
|
50
50
|
def create(
|
|
51
51
|
cls,
|
|
52
52
|
auth: AwsClient,
|
|
53
|
-
endpoint_url:
|
|
54
|
-
session_callable:
|
|
53
|
+
endpoint_url: str | None = None,
|
|
54
|
+
session_callable: Callable[[], Session] | None = None,
|
|
55
55
|
):
|
|
56
|
-
def _get_endpoint_url() ->
|
|
56
|
+
def _get_endpoint_url() -> str | None:
|
|
57
57
|
if endpoint_url:
|
|
58
58
|
return endpoint_url
|
|
59
59
|
if auth.get_credentials():
|
|
@@ -65,19 +65,12 @@ class S3StorageClient(StorageClient):
|
|
|
65
65
|
|
|
66
66
|
return cls(base_client=auth, s3_resource=auth.session.resource("s3", endpoint_url=_get_endpoint_url()))
|
|
67
67
|
|
|
68
|
-
def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
|
|
69
|
-
"""Returns a signed URL for a blob in S3 storage.
|
|
70
|
-
|
|
71
|
-
:param blob_path: Path to blob
|
|
72
|
-
|
|
73
|
-
:return: The signed URL for the given blob path
|
|
74
|
-
"""
|
|
68
|
+
def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
|
|
75
69
|
s3_path = cast_path(blob_path)
|
|
76
70
|
params = {
|
|
77
71
|
"Bucket": s3_path.bucket,
|
|
78
72
|
"Key": s3_path.path,
|
|
79
73
|
}
|
|
80
|
-
expiry_time = int(kwargs.get("expiry", timedelta(hours=1).total_seconds()))
|
|
81
74
|
signature_version = kwargs.get("signature_version", "s3v4")
|
|
82
75
|
signing_client = self._base_client.session.client(
|
|
83
76
|
"s3",
|
|
@@ -85,7 +78,9 @@ class S3StorageClient(StorageClient):
|
|
|
85
78
|
config=Config(signature_version=signature_version),
|
|
86
79
|
)
|
|
87
80
|
|
|
88
|
-
return signing_client.generate_presigned_url(
|
|
81
|
+
return signing_client.generate_presigned_url(
|
|
82
|
+
ClientMethod="get_object", Params=params, ExpiresIn=int(expires_in_seconds)
|
|
83
|
+
)
|
|
89
84
|
|
|
90
85
|
def blob_exists(self, blob_path: DataPath) -> bool:
|
|
91
86
|
"""Checks if blob located at blob_path exists
|
|
@@ -105,8 +100,8 @@ class S3StorageClient(StorageClient):
|
|
|
105
100
|
self,
|
|
106
101
|
data: T,
|
|
107
102
|
blob_path: DataPath,
|
|
108
|
-
serialization_format:
|
|
109
|
-
metadata:
|
|
103
|
+
serialization_format: type[SerializationFormat[T]],
|
|
104
|
+
metadata: dict[str, str] | None = None,
|
|
110
105
|
overwrite: bool = False,
|
|
111
106
|
) -> None:
|
|
112
107
|
"""
|
|
@@ -140,7 +135,7 @@ class S3StorageClient(StorageClient):
|
|
|
140
135
|
self._s3_resource.Bucket(s3_path.bucket).Object(blob_path.path).delete()
|
|
141
136
|
|
|
142
137
|
def list_blobs(
|
|
143
|
-
self, blob_path: DataPath, filter_predicate:
|
|
138
|
+
self, blob_path: DataPath, filter_predicate: Callable[[...], bool] | None = None
|
|
144
139
|
) -> Iterator[DataPath]:
|
|
145
140
|
"""
|
|
146
141
|
Lists blobs in S3 storage.
|
|
@@ -173,8 +168,8 @@ class S3StorageClient(StorageClient):
|
|
|
173
168
|
def read_blobs(
|
|
174
169
|
self,
|
|
175
170
|
blob_path: DataPath,
|
|
176
|
-
serialization_format:
|
|
177
|
-
filter_predicate:
|
|
171
|
+
serialization_format: type[SerializationFormat[T]],
|
|
172
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
178
173
|
) -> Iterator[T]:
|
|
179
174
|
"""
|
|
180
175
|
Reads data under provided path into the given format.
|
|
@@ -194,8 +189,8 @@ class S3StorageClient(StorageClient):
|
|
|
194
189
|
self,
|
|
195
190
|
blob_path: DataPath,
|
|
196
191
|
local_path: str,
|
|
197
|
-
threads:
|
|
198
|
-
filter_predicate:
|
|
192
|
+
threads: int | None = None,
|
|
193
|
+
filter_predicate: Callable[[...], bool] | None = None,
|
|
199
194
|
) -> None:
|
|
200
195
|
"""
|
|
201
196
|
Downloads blobs from S3 storage to a local path.
|
|
@@ -286,4 +281,4 @@ class S3StorageClient(StorageClient):
|
|
|
286
281
|
Generate client instance that can operate on the provided path. Always uses EnvironmentCredentials/
|
|
287
282
|
"""
|
|
288
283
|
_ = cast_path(parse_data_path(path))
|
|
289
|
-
return cls(
|
|
284
|
+
return cls.create(auth=AwsClient())
|
adapta/storage/cache/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Import index.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
# Copyright (c) 2023-
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
8
|
# you may not use this file except in compliance with the License.
|
adapta/storage/cache/_base.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Generic key-value cache.
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
from abc import ABC, abstractmethod
|
|
20
20
|
from datetime import timedelta
|
|
21
|
-
from typing import Any
|
|
21
|
+
from typing import Any
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class KeyValueCache(ABC):
|
|
@@ -37,7 +37,7 @@ class KeyValueCache(ABC):
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
@abstractmethod
|
|
40
|
-
def exists(self, key: str, attribute:
|
|
40
|
+
def exists(self, key: str, attribute: str | None = None) -> bool:
|
|
41
41
|
"""
|
|
42
42
|
Checks if a cache key is present. If an attribute is provided, should also check
|
|
43
43
|
if a value possesses this attributes.
|
|
@@ -48,7 +48,7 @@ class KeyValueCache(ABC):
|
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
50
|
@abstractmethod
|
|
51
|
-
def multi_exists(self, keys:
|
|
51
|
+
def multi_exists(self, keys: list[str]) -> bool:
|
|
52
52
|
"""
|
|
53
53
|
Checks if all keys exist
|
|
54
54
|
|
|
@@ -57,7 +57,7 @@ class KeyValueCache(ABC):
|
|
|
57
57
|
"""
|
|
58
58
|
|
|
59
59
|
@abstractmethod
|
|
60
|
-
def multi_get(self, keys:
|
|
60
|
+
def multi_get(self, keys: list[str]) -> list[Any]:
|
|
61
61
|
"""
|
|
62
62
|
Reads multiple keys in a single call.
|
|
63
63
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Key-value cache based on Redis.
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
import os
|
|
20
20
|
import ssl
|
|
21
21
|
from datetime import timedelta
|
|
22
|
-
from typing import Any
|
|
22
|
+
from typing import Any
|
|
23
23
|
|
|
24
24
|
import redis
|
|
25
25
|
from redis import default_backoff
|
|
@@ -65,13 +65,13 @@ class RedisCache(KeyValueCache):
|
|
|
65
65
|
ssl=True,
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
-
def multi_exists(self, keys:
|
|
68
|
+
def multi_exists(self, keys: list[str]) -> bool:
|
|
69
69
|
return self._redis.exists(*keys) == len(keys)
|
|
70
70
|
|
|
71
71
|
def evict(self, key: str) -> None:
|
|
72
72
|
self._redis.delete(key)
|
|
73
73
|
|
|
74
|
-
def exists(self, key: str, attribute:
|
|
74
|
+
def exists(self, key: str, attribute: str | None = None) -> bool:
|
|
75
75
|
if not attribute:
|
|
76
76
|
return self._redis.exists(key) == 1
|
|
77
77
|
|
|
@@ -83,7 +83,7 @@ class RedisCache(KeyValueCache):
|
|
|
83
83
|
|
|
84
84
|
return self._redis.hgetall(key)
|
|
85
85
|
|
|
86
|
-
def multi_get(self, keys:
|
|
86
|
+
def multi_get(self, keys: list[str]) -> list[Any]:
|
|
87
87
|
return self._redis.mget(keys)
|
|
88
88
|
|
|
89
89
|
def set(
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Import index
|
|
3
|
+
"""
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
2
5
|
#
|
|
3
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Import index
|
|
3
|
+
"""
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|