adapta 2.11.9__py3-none-any.whl → 3.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. adapta/__init__.py +1 -1
  2. adapta/_version.py +1 -1
  3. adapta/connectors/__init__.py +1 -1
  4. adapta/connectors/service_bus/__init__.py +1 -1
  5. adapta/connectors/service_bus/_connector.py +2 -3
  6. adapta/logs/__init__.py +1 -1
  7. adapta/logs/_async_logger.py +38 -24
  8. adapta/logs/_base.py +21 -21
  9. adapta/logs/_internal.py +6 -7
  10. adapta/logs/_internal_logger.py +113 -41
  11. adapta/logs/_logger_interface.py +9 -10
  12. adapta/logs/handlers/__init__.py +1 -1
  13. adapta/logs/handlers/datadog_api_handler.py +7 -7
  14. adapta/logs/handlers/safe_stream_handler.py +4 -4
  15. adapta/logs/models/__init__.py +1 -1
  16. adapta/logs/models/_log_level.py +1 -1
  17. adapta/logs/models/_logs_metadata.py +4 -5
  18. adapta/metrics/__init__.py +1 -1
  19. adapta/metrics/_base.py +14 -15
  20. adapta/metrics/providers/__init__.py +1 -1
  21. adapta/metrics/providers/datadog_provider.py +21 -22
  22. adapta/metrics/providers/void_provider.py +34 -0
  23. adapta/ml/__init__.py +1 -1
  24. adapta/ml/_model.py +1 -1
  25. adapta/ml/mlflow/__init__.py +1 -1
  26. adapta/ml/mlflow/_client.py +101 -5
  27. adapta/ml/mlflow/_functions.py +44 -13
  28. adapta/process_communication/__init__.py +1 -1
  29. adapta/process_communication/_models.py +8 -6
  30. adapta/schema_management/README.md +0 -1
  31. adapta/schema_management/__init__.py +1 -1
  32. adapta/schema_management/schema_entity.py +3 -3
  33. adapta/security/__init__.py +1 -1
  34. adapta/security/clients/__init__.py +1 -1
  35. adapta/security/clients/_azure_client.py +14 -12
  36. adapta/security/clients/_base.py +11 -6
  37. adapta/security/clients/_local_client.py +6 -6
  38. adapta/security/clients/aws/__init__.py +1 -1
  39. adapta/security/clients/aws/_aws_client.py +12 -10
  40. adapta/security/clients/aws/_aws_credentials.py +7 -8
  41. adapta/security/clients/hashicorp_vault/__init__.py +1 -1
  42. adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +7 -6
  43. adapta/security/clients/hashicorp_vault/kubernetes_client.py +2 -2
  44. adapta/security/clients/hashicorp_vault/oidc_client.py +2 -2
  45. adapta/security/clients/hashicorp_vault/token_client.py +2 -2
  46. adapta/storage/__init__.py +1 -1
  47. adapta/storage/blob/README.md +14 -10
  48. adapta/storage/blob/__init__.py +1 -1
  49. adapta/storage/blob/azure_storage_client.py +76 -24
  50. adapta/storage/blob/base.py +15 -13
  51. adapta/storage/blob/local_storage_client.py +28 -16
  52. adapta/storage/blob/s3_storage_client.py +19 -24
  53. adapta/storage/cache/__init__.py +1 -1
  54. adapta/storage/cache/_base.py +5 -5
  55. adapta/storage/cache/redis_cache.py +5 -5
  56. adapta/storage/database/__init__.py +4 -1
  57. adapta/storage/database/{README.md → v2/README.md} +2 -0
  58. adapta/storage/database/v2/__init__.py +17 -0
  59. adapta/storage/database/v2/azure_sql.py +143 -0
  60. adapta/storage/{distributed_object_store/datastax_astra → database/v2/models}/__init__.py +5 -5
  61. adapta/storage/database/v2/models/_models.py +53 -0
  62. adapta/storage/database/{odbc.py → v2/odbc.py} +22 -13
  63. adapta/storage/database/{snowflake_sql.py → v2/snowflake_sql.py} +20 -12
  64. adapta/storage/database/{trino_sql.py → v2/trino_sql.py} +15 -6
  65. adapta/storage/database/v3/README.md +109 -0
  66. adapta/storage/database/v3/__init__.py +14 -0
  67. adapta/storage/database/{azure_sql.py → v3/azure_sql.py} +7 -9
  68. adapta/storage/database/v3/models/__init__.py +19 -0
  69. adapta/storage/database/{models → v3/models}/_models.py +2 -3
  70. adapta/storage/database/v3/odbc.py +217 -0
  71. adapta/storage/database/v3/snowflake_sql.py +241 -0
  72. adapta/storage/database/v3/trino_sql.py +154 -0
  73. adapta/storage/delta_lake/__init__.py +2 -3
  74. adapta/storage/delta_lake/{README.md → v2/README.md} +2 -0
  75. adapta/storage/delta_lake/v2/__init__.py +19 -0
  76. adapta/storage/delta_lake/{_functions.py → v2/_functions.py} +43 -27
  77. adapta/storage/delta_lake/v2/_models.py +72 -0
  78. adapta/storage/delta_lake/v3/README.md +147 -0
  79. adapta/storage/delta_lake/v3/__init__.py +20 -0
  80. adapta/storage/delta_lake/v3/_functions.py +315 -0
  81. adapta/storage/delta_lake/{_models.py → v3/_models.py} +4 -5
  82. adapta/storage/distributed_object_store/__init__.py +3 -1
  83. adapta/storage/distributed_object_store/v2/__init__.py +18 -0
  84. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/README.md +2 -0
  85. adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +20 -0
  86. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/_models.py +16 -0
  87. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/astra_client.py +61 -52
  88. adapta/storage/{database/models → distributed_object_store/v3}/__init__.py +4 -5
  89. adapta/storage/distributed_object_store/v3/datastax_astra/README.md +277 -0
  90. adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +20 -0
  91. adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +469 -0
  92. adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +134 -0
  93. adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +569 -0
  94. adapta/storage/exceptions.py +1 -1
  95. adapta/storage/models/__init__.py +1 -1
  96. adapta/storage/models/_functions.py +5 -5
  97. adapta/storage/models/astra.py +4 -4
  98. adapta/storage/models/aws.py +1 -1
  99. adapta/storage/models/azure.py +2 -3
  100. adapta/storage/models/base.py +9 -1
  101. adapta/storage/models/enum.py +19 -0
  102. adapta/storage/models/filter_expression.py +124 -10
  103. adapta/storage/models/format.py +16 -205
  104. adapta/storage/models/formatters/__init__.py +36 -0
  105. adapta/storage/models/formatters/dict.py +43 -0
  106. adapta/storage/models/formatters/exceptions.py +7 -0
  107. adapta/storage/models/formatters/metaframe.py +48 -0
  108. adapta/storage/models/formatters/pandas.py +139 -0
  109. adapta/storage/models/formatters/pickle.py +36 -0
  110. adapta/storage/models/formatters/polars.py +240 -0
  111. adapta/storage/models/formatters/unit.py +26 -0
  112. adapta/storage/models/hive.py +24 -16
  113. adapta/storage/models/local.py +1 -1
  114. adapta/storage/models/trino.py +56 -0
  115. adapta/storage/query_enabled_store/README.md +1 -1
  116. adapta/storage/query_enabled_store/__init__.py +7 -1
  117. adapta/storage/query_enabled_store/_models.py +42 -13
  118. adapta/storage/query_enabled_store/_qes_astra.py +27 -14
  119. adapta/storage/query_enabled_store/_qes_delta.py +32 -10
  120. adapta/storage/query_enabled_store/_qes_local.py +81 -0
  121. adapta/storage/query_enabled_store/_qes_trino.py +133 -0
  122. adapta/storage/secrets/__init__.py +1 -1
  123. adapta/storage/secrets/_base.py +5 -4
  124. adapta/storage/secrets/azure_secret_client.py +3 -4
  125. adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +5 -5
  126. adapta/utils/README.md +92 -0
  127. adapta/utils/__init__.py +2 -1
  128. adapta/utils/_common.py +50 -17
  129. adapta/utils/_requests.py +53 -0
  130. adapta/utils/concurrent_task_runner.py +10 -9
  131. adapta/utils/data_structures/_functions.py +6 -6
  132. adapta/utils/decorators/_logging.py +3 -3
  133. adapta/utils/decorators/_rate_limit.py +2 -2
  134. adapta/utils/metaframe.py +172 -0
  135. adapta/utils/python_typing/_functions.py +5 -10
  136. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/METADATA +18 -14
  137. adapta-3.5.13.dist-info/RECORD +146 -0
  138. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/WHEEL +1 -1
  139. adapta-2.11.9.dist-info/RECORD +0 -110
  140. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info/licenses}/LICENSE +0 -0
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Storage Client implementation for Azure Cloud.
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -21,7 +21,8 @@ from datetime import datetime, timedelta
21
21
  from functools import partial
22
22
  import signal
23
23
  from threading import Thread
24
- from typing import Union, Optional, Dict, Type, TypeVar, Iterator, List, Callable, final
24
+ from typing import TypeVar, final
25
+ from collections.abc import Iterator, Callable
25
26
 
26
27
  from azure.core.paging import ItemPaged
27
28
  from azure.storage.blob import (
@@ -32,6 +33,7 @@ from azure.storage.blob import (
32
33
  BlobProperties,
33
34
  ExponentialRetry,
34
35
  ContainerClient,
36
+ BlobLeaseClient,
35
37
  )
36
38
 
37
39
  from adapta.storage.blob.base import StorageClient
@@ -51,7 +53,7 @@ class AzureStorageClient(StorageClient):
51
53
  Azure Storage (Blob and ADLS) Client.
52
54
  """
53
55
 
54
- def __init__(self, *, base_client: AzureClient, path: Union[AdlsGen2Path, WasbPath], implicit_login=True):
56
+ def __init__(self, *, base_client: AzureClient, path: AdlsGen2Path | WasbPath, implicit_login=True):
55
57
  super().__init__(base_client=base_client)
56
58
 
57
59
  # overrides default ExponentialRetry
@@ -69,18 +71,32 @@ class AzureStorageClient(StorageClient):
69
71
  self._storage_options = None
70
72
  else:
71
73
  self._storage_options = self._base_client.connect_storage(path)
72
- connection_string = (
73
- f"DefaultEndpointsProtocol=https;"
74
- f"AccountName={self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']};"
75
- f"AccountKey={self._storage_options['AZURE_STORAGE_ACCOUNT_KEY']};"
76
- f"BlobEndpoint=https://{self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']}.blob.core.windows.net/;"
74
+ blob_endpoint = (
75
+ f"BlobEndpoint=https://{self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']}.blob.core.windows.net/"
77
76
  )
77
+ endpoint_protocol = "DefaultEndpointsProtocol=https"
78
+
79
+ if "ADAPTA__AZURE_STORAGE_BLOB_ENDPOINT" in os.environ:
80
+ blob_endpoint = f'BlobEndpoint={os.environ["ADAPTA__AZURE_STORAGE_BLOB_ENDPOINT"]}'
81
+
82
+ if "ADAPTA__AZURE_STORAGE_DEFAULT_PROTOCOL" in os.environ:
83
+ endpoint_protocol = f"DefaultEndpointsProtocol={os.environ['ADAPTA__AZURE_STORAGE_DEFAULT_PROTOCOL']}"
84
+
85
+ connection_string = ";".join(
86
+ [
87
+ endpoint_protocol,
88
+ f"AccountName={self._storage_options['AZURE_STORAGE_ACCOUNT_NAME']}",
89
+ f"AccountKey={self._storage_options['AZURE_STORAGE_ACCOUNT_KEY']}",
90
+ blob_endpoint,
91
+ ]
92
+ )
93
+
78
94
  self._blob_service_client: BlobServiceClient = BlobServiceClient.from_connection_string(
79
95
  connection_string, retry_policy=retry_policy
80
96
  )
81
97
 
82
98
  @classmethod
83
- def create(cls, auth: AzureClient, endpoint_url: Optional[str] = None):
99
+ def create(cls, auth: AzureClient, endpoint_url: str | None = None):
84
100
  """
85
101
  Not used in Azure.
86
102
  :return:
@@ -120,14 +136,14 @@ class AzureStorageClient(StorageClient):
120
136
  self,
121
137
  data: T,
122
138
  blob_path: DataPath,
123
- serialization_format: Type[SerializationFormat[T]],
124
- metadata: Optional[Dict[str, str]] = None,
139
+ serialization_format: type[SerializationFormat[T]],
140
+ metadata: dict[str, str] | None = None,
125
141
  overwrite: bool = False,
126
142
  ) -> None:
127
143
  bytes_ = serialization_format().serialize(data)
128
144
  self._get_blob_client(blob_path).upload_blob(bytes_, metadata=metadata, overwrite=overwrite)
129
145
 
130
- def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
146
+ def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
131
147
  blob_client = self._get_blob_client(blob_path)
132
148
  azure_path = cast_path(blob_path)
133
149
 
@@ -137,7 +153,7 @@ class AzureStorageClient(StorageClient):
137
153
  container_name=azure_path.container,
138
154
  account_name=azure_path.account,
139
155
  permission=kwargs.get("permission", BlobSasPermissions(read=True)),
140
- expiry=kwargs.get("expiry", datetime.utcnow() + timedelta(hours=1)),
156
+ expiry=datetime.utcnow() + timedelta(seconds=expires_in_seconds),
141
157
  )
142
158
 
143
159
  sas_token = (
@@ -148,18 +164,17 @@ class AzureStorageClient(StorageClient):
148
164
  else base_call(
149
165
  user_delegation_key=self._blob_service_client.get_user_delegation_key(
150
166
  key_start_time=datetime.utcnow() - timedelta(minutes=1),
151
- key_expiry_time=kwargs.get("expiry", datetime.utcnow() + timedelta(hours=1)),
167
+ key_expiry_time=datetime.utcnow() + timedelta(seconds=expires_in_seconds),
152
168
  ),
153
169
  )
154
170
  )
155
171
 
156
- sas_uri = f"{blob_client.url}?{sas_token}"
157
- return sas_uri
172
+ return f"{blob_client.url}?{sas_token}"
158
173
 
159
174
  def blob_exists(self, blob_path: DataPath) -> bool:
160
175
  return self._get_blob_client(blob_path).exists()
161
176
 
162
- def _list_blobs(self, blob_path: DataPath) -> (ItemPaged[BlobProperties], Union[AdlsGen2Path, WasbPath]):
177
+ def _list_blobs(self, blob_path: DataPath) -> (ItemPaged[BlobProperties], AdlsGen2Path | WasbPath):
163
178
  azure_path = cast_path(blob_path)
164
179
 
165
180
  return (
@@ -170,8 +185,8 @@ class AzureStorageClient(StorageClient):
170
185
  def read_blobs(
171
186
  self,
172
187
  blob_path: DataPath,
173
- serialization_format: Type[SerializationFormat[T]],
174
- filter_predicate: Optional[Callable[[BlobProperties], bool]] = None,
188
+ serialization_format: type[SerializationFormat[T]],
189
+ filter_predicate: Callable[[BlobProperties], bool] | None = None,
175
190
  ) -> Iterator[T]:
176
191
  blobs_on_path, azure_path = self._list_blobs(blob_path)
177
192
 
@@ -211,8 +226,8 @@ class AzureStorageClient(StorageClient):
211
226
  self,
212
227
  blob_path: DataPath,
213
228
  local_path: str,
214
- threads: Optional[int] = None,
215
- filter_predicate: Optional[Callable[[BlobProperties], bool]] = None,
229
+ threads: int | None = None,
230
+ filter_predicate: Callable[[BlobProperties], bool] | None = None,
216
231
  ) -> None:
217
232
  def download_blob(blob: BlobProperties, container: str) -> None:
218
233
  write_path = os.path.join(local_path, blob.name)
@@ -229,7 +244,7 @@ class AzureStorageClient(StorageClient):
229
244
  .readall()
230
245
  )
231
246
 
232
- def download_blob_list(blob_list: List[BlobProperties], container: str) -> None:
247
+ def download_blob_list(blob_list: list[BlobProperties], container: str) -> None:
233
248
  for blob_from_list in blob_list:
234
249
  if blob_from_list:
235
250
  download_blob(blob_from_list, container)
@@ -251,7 +266,7 @@ class AzureStorageClient(StorageClient):
251
266
  for blob_dir in blob_dirs:
252
267
  os.makedirs(os.path.join(local_path, blob_dir.name), exist_ok=True)
253
268
 
254
- blob_lists: List[List[BlobProperties]] = chunk_list(blob_files, threads)
269
+ blob_lists: list[list[BlobProperties]] = chunk_list(blob_files, threads)
255
270
  thread_list = [
256
271
  Thread(target=download_blob_list, args=(blob_list, azure_path.container)) for blob_list in blob_lists
257
272
  ]
@@ -263,7 +278,7 @@ class AzureStorageClient(StorageClient):
263
278
  def list_blobs(
264
279
  self,
265
280
  blob_path: DataPath,
266
- filter_predicate: Optional[Callable[[BlobProperties], bool]] = lambda blob: blob.size != 0, # Skip folders
281
+ filter_predicate: Callable[[BlobProperties], bool] | None = lambda blob: blob.size != 0, # Skip folders
267
282
  ) -> Iterator[DataPath]:
268
283
  blobs_on_path, azure_path = self._list_blobs(blob_path)
269
284
 
@@ -275,6 +290,31 @@ class AzureStorageClient(StorageClient):
275
290
  path=blob.name,
276
291
  )
277
292
 
293
+ def list_matching_prefixes(
294
+ self, blob_path: DataPath, delimiter: str = "/", timeout_seconds: int = 3600
295
+ ) -> Iterator[DataPath]:
296
+ """
297
+ List blobs in accordance with a hierarchy, as delimited by the specified delimiter character.
298
+ For example, calling list_matching_prefixes(AldsGen2Path.from_hdfs_path(path), delimiter="/"),
299
+ where path=abfss://c@a.dfs.core.windows.net/my/pre will return:
300
+ abfss://c@a.dfs.core.windows.net/my/pre1
301
+ abfss://c@a.dfs.core.windows.net/my/preadad
302
+ abfss://c@a.dfs.core.windows.net/my/preeqweq
303
+
304
+ but will not return abfss://c@a.dfs.core.windows.net/my/pre1/pre2
305
+ """
306
+ azure_path = cast_path(blob_path)
307
+ for prefix in self._get_container_client(azure_path).walk_blobs(
308
+ name_starts_with=blob_path.path,
309
+ delimiter=delimiter,
310
+ timeout=timeout_seconds,
311
+ ):
312
+ yield AdlsGen2Path(
313
+ account=azure_path.account,
314
+ container=azure_path.container,
315
+ path=prefix.name,
316
+ )
317
+
278
318
  def delete_blob(
279
319
  self,
280
320
  blob_path: DataPath,
@@ -283,6 +323,18 @@ class AzureStorageClient(StorageClient):
283
323
 
284
324
  self._get_container_client(azure_path).delete_blob(blob_path.path)
285
325
 
326
+ def delete_leased_blob(self, blob_path: DataPath) -> None:
327
+ """
328
+ Azure specific deletion that takes care of a leased blob
329
+ """
330
+ azure_path = cast_path(blob_path)
331
+ blob_client = self._get_blob_client(azure_path)
332
+
333
+ if blob_client.get_blob_properties().lease.state == "leased":
334
+ BlobLeaseClient(blob_client).break_lease()
335
+
336
+ self._get_container_client(azure_path).delete_blob(blob_path.path)
337
+
286
338
  def copy_blob(self, blob_path: DataPath, target_blob_path: DataPath, doze_period_ms=1000) -> None:
287
339
  source_url = self.get_blob_uri(blob_path)
288
340
  self._get_blob_client(target_blob_path).start_copy_from_url(source_url)
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Abstraction for storage operations.
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,7 +17,8 @@
17
17
  #
18
18
 
19
19
  from abc import ABC, abstractmethod
20
- from typing import Optional, Dict, Type, TypeVar, Iterator, Callable
20
+ from typing import TypeVar
21
+ from collections.abc import Iterator, Callable
21
22
 
22
23
  from adapta.security.clients import AuthenticationClient
23
24
  from adapta.storage.models.base import DataPath
@@ -36,7 +37,7 @@ class StorageClient(ABC):
36
37
  self._base_client = base_client
37
38
 
38
39
  @classmethod
39
- def create(cls, auth: AuthenticationClient, endpoint_url: Optional[str] = None):
40
+ def create(cls, auth: AuthenticationClient, endpoint_url: str | None = None):
40
41
  """
41
42
  Creates a Storage client using the AuthenticationClient to set up its session.
42
43
 
@@ -47,12 +48,13 @@ class StorageClient(ABC):
47
48
  """
48
49
 
49
50
  @abstractmethod
50
- def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
51
+ def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
51
52
  """
52
53
  Generates a URL which can be used to download this blob.
53
54
 
54
- :param blob_path:
55
- :param kwargs:
55
+ :param blob_path: Path to the blob.
56
+ :param expires_in_seconds: Expiration time in seconds.
57
+ :param kwargs: Optional parameters to pass to the signing client.
56
58
  :return:
57
59
  """
58
60
 
@@ -70,8 +72,8 @@ class StorageClient(ABC):
70
72
  self,
71
73
  data: T,
72
74
  blob_path: DataPath,
73
- serialization_format: Type[SerializationFormat[T]],
74
- metadata: Optional[Dict[str, str]] = None,
75
+ serialization_format: type[SerializationFormat[T]],
76
+ metadata: dict[str, str] | None = None,
75
77
  overwrite: bool = False,
76
78
  ) -> None:
77
79
  """
@@ -101,7 +103,7 @@ class StorageClient(ABC):
101
103
  def list_blobs(
102
104
  self,
103
105
  blob_path: DataPath,
104
- filter_predicate: Optional[Callable[[...], bool]] = None,
106
+ filter_predicate: Callable[[...], bool] | None = None,
105
107
  ) -> Iterator[DataPath]:
106
108
  """
107
109
  Lists blobs in blob_path
@@ -115,8 +117,8 @@ class StorageClient(ABC):
115
117
  def read_blobs(
116
118
  self,
117
119
  blob_path: DataPath,
118
- serialization_format: Type[SerializationFormat[T]],
119
- filter_predicate: Optional[Callable[[...], bool]] = None,
120
+ serialization_format: type[SerializationFormat[T]],
121
+ filter_predicate: Callable[[...], bool] | None = None,
120
122
  ) -> Iterator[T]:
121
123
  """
122
124
  Reads data under provided path into the given format.
@@ -132,8 +134,8 @@ class StorageClient(ABC):
132
134
  self,
133
135
  blob_path: DataPath,
134
136
  local_path: str,
135
- threads: Optional[int] = None,
136
- filter_predicate: Optional[Callable[[...], bool]] = None,
137
+ threads: int | None = None,
138
+ filter_predicate: Callable[[...], bool] | None = None,
137
139
  ) -> None:
138
140
  """
139
141
  Reads data under provided path into the given format.
@@ -2,7 +2,7 @@
2
2
  Storage Client implementation for a regular filesystem.
3
3
  """
4
4
 
5
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License");
8
8
  # you may not use this file except in compliance with the License.
@@ -20,9 +20,10 @@
20
20
  import os.path
21
21
  import shutil
22
22
 
23
- from typing import final, Optional, Callable, Type, Iterator, Dict
23
+ from typing import final
24
+ from collections.abc import Callable, Iterator
24
25
 
25
- from adapta.security.clients import LocalClient
26
+ from adapta.security.clients import LocalClient, AuthenticationClient
26
27
  from adapta.storage.blob.base import StorageClient, T
27
28
  from adapta.storage.models import DataPath, LocalPath, parse_data_path
28
29
  from adapta.storage.models.format import SerializationFormat
@@ -34,11 +35,14 @@ class LocalStorageClient(StorageClient):
34
35
  Local Storage Client, primarily for unit tests.
35
36
  """
36
37
 
38
+ def __init__(self):
39
+ super().__init__(base_client=LocalClient())
40
+
37
41
  @classmethod
38
- def create(cls, auth: StorageClient, endpoint_url: Optional[str] = None):
39
- raise NotImplementedError("Not supported by this client")
42
+ def create(cls, auth: AuthenticationClient, endpoint_url: str | None = None):
43
+ return cls()
40
44
 
41
- def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
45
+ def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
42
46
  return cast_path(blob_path).path
43
47
 
44
48
  def blob_exists(self, blob_path: DataPath) -> bool:
@@ -48,8 +52,8 @@ class LocalStorageClient(StorageClient):
48
52
  self,
49
53
  data: T,
50
54
  blob_path: DataPath,
51
- serialization_format: Type[SerializationFormat[T]],
52
- metadata: Optional[Dict[str, str]] = None,
55
+ serialization_format: type[SerializationFormat[T]],
56
+ metadata: dict[str, str] | None = None,
53
57
  overwrite: bool = False,
54
58
  ) -> None:
55
59
  bytes_ = serialization_format().serialize(data)
@@ -64,28 +68,36 @@ class LocalStorageClient(StorageClient):
64
68
  os.remove(cast_path(blob_path).path)
65
69
 
66
70
  def list_blobs(
67
- self, blob_path: DataPath, filter_predicate: Optional[Callable[[...], bool]] = None
71
+ self, blob_path: DataPath, filter_predicate: Callable[[...], bool] | None = None
68
72
  ) -> Iterator[DataPath]:
69
73
  for blob in os.listdir(cast_path(blob_path).path):
74
+ if filter_predicate is not None and not filter_predicate(blob):
75
+ continue
70
76
  yield LocalPath(path=blob)
71
77
 
72
78
  def read_blobs(
73
79
  self,
74
80
  blob_path: DataPath,
75
- serialization_format: Type[SerializationFormat[T]],
76
- filter_predicate: Optional[Callable[[...], bool]] = None,
81
+ serialization_format: type[SerializationFormat[T]],
82
+ filter_predicate: Callable[[...], bool] | None = None,
77
83
  ) -> Iterator[T]:
78
84
  dir_path = cast_path(blob_path).path
79
- for blob in os.listdir(dir_path):
80
- with open(os.path.join(dir_path, blob), "rb") as blob_file:
85
+ if os.path.isdir(dir_path):
86
+ for blob in os.listdir(dir_path):
87
+ if filter_predicate is not None and not filter_predicate(blob):
88
+ continue
89
+ with open(os.path.join(dir_path, blob), "rb") as blob_file:
90
+ yield serialization_format().deserialize(blob_file.read())
91
+ else:
92
+ with open(dir_path, "rb") as blob_file:
81
93
  yield serialization_format().deserialize(blob_file.read())
82
94
 
83
95
  def download_blobs(
84
96
  self,
85
97
  blob_path: DataPath,
86
98
  local_path: str,
87
- threads: Optional[int] = None,
88
- filter_predicate: Optional[Callable[[...], bool]] = None,
99
+ threads: int | None = None,
100
+ filter_predicate: Callable[[...], bool] | None = None,
89
101
  ) -> None:
90
102
  raise NotImplementedError("Not supported by this client")
91
103
 
@@ -98,7 +110,7 @@ class LocalStorageClient(StorageClient):
98
110
  @classmethod
99
111
  def for_storage_path(cls, path: str) -> "StorageClient":
100
112
  _ = cast_path(parse_data_path(path))
101
- return cls(base_client=LocalClient())
113
+ return cls()
102
114
 
103
115
 
104
116
  def cast_path(blob_path: DataPath) -> LocalPath:
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Storage Client implementation for AWS S3.
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -18,8 +18,8 @@
18
18
 
19
19
  import os
20
20
 
21
- from typing import Optional, Callable, Type, Iterator, Dict, TypeVar, final
22
- from datetime import timedelta
21
+ from typing import TypeVar, final
22
+ from collections.abc import Callable, Iterator
23
23
  from boto3 import Session
24
24
  from botocore.config import Config
25
25
  from botocore.exceptions import ClientError
@@ -41,7 +41,7 @@ class S3StorageClient(StorageClient):
41
41
  S3 Storage Client.
42
42
  """
43
43
 
44
- def __init__(self, *, base_client: AwsClient, s3_resource: Optional[Session] = None):
44
+ def __init__(self, *, base_client: AwsClient, s3_resource: Session | None = None):
45
45
  super().__init__(base_client=base_client)
46
46
  self._base_client = base_client
47
47
  self._s3_resource = s3_resource if s3_resource is not None else base_client.session.resource("s3")
@@ -50,10 +50,10 @@ class S3StorageClient(StorageClient):
50
50
  def create(
51
51
  cls,
52
52
  auth: AwsClient,
53
- endpoint_url: Optional[str] = None,
54
- session_callable: Optional[Callable[[], Session]] = None,
53
+ endpoint_url: str | None = None,
54
+ session_callable: Callable[[], Session] | None = None,
55
55
  ):
56
- def _get_endpoint_url() -> Optional[str]:
56
+ def _get_endpoint_url() -> str | None:
57
57
  if endpoint_url:
58
58
  return endpoint_url
59
59
  if auth.get_credentials():
@@ -65,19 +65,12 @@ class S3StorageClient(StorageClient):
65
65
 
66
66
  return cls(base_client=auth, s3_resource=auth.session.resource("s3", endpoint_url=_get_endpoint_url()))
67
67
 
68
- def get_blob_uri(self, blob_path: DataPath, **kwargs) -> str:
69
- """Returns a signed URL for a blob in S3 storage.
70
-
71
- :param blob_path: Path to blob
72
-
73
- :return: The signed URL for the given blob path
74
- """
68
+ def get_blob_uri(self, blob_path: DataPath, expires_in_seconds: float = 3600.0, **kwargs) -> str:
75
69
  s3_path = cast_path(blob_path)
76
70
  params = {
77
71
  "Bucket": s3_path.bucket,
78
72
  "Key": s3_path.path,
79
73
  }
80
- expiry_time = int(kwargs.get("expiry", timedelta(hours=1).total_seconds()))
81
74
  signature_version = kwargs.get("signature_version", "s3v4")
82
75
  signing_client = self._base_client.session.client(
83
76
  "s3",
@@ -85,7 +78,9 @@ class S3StorageClient(StorageClient):
85
78
  config=Config(signature_version=signature_version),
86
79
  )
87
80
 
88
- return signing_client.generate_presigned_url(ClientMethod="get_object", Params=params, ExpiresIn=expiry_time)
81
+ return signing_client.generate_presigned_url(
82
+ ClientMethod="get_object", Params=params, ExpiresIn=int(expires_in_seconds)
83
+ )
89
84
 
90
85
  def blob_exists(self, blob_path: DataPath) -> bool:
91
86
  """Checks if blob located at blob_path exists
@@ -105,8 +100,8 @@ class S3StorageClient(StorageClient):
105
100
  self,
106
101
  data: T,
107
102
  blob_path: DataPath,
108
- serialization_format: Type[SerializationFormat[T]],
109
- metadata: Optional[Dict[str, str]] = None,
103
+ serialization_format: type[SerializationFormat[T]],
104
+ metadata: dict[str, str] | None = None,
110
105
  overwrite: bool = False,
111
106
  ) -> None:
112
107
  """
@@ -140,7 +135,7 @@ class S3StorageClient(StorageClient):
140
135
  self._s3_resource.Bucket(s3_path.bucket).Object(blob_path.path).delete()
141
136
 
142
137
  def list_blobs(
143
- self, blob_path: DataPath, filter_predicate: Optional[Callable[[...], bool]] = None
138
+ self, blob_path: DataPath, filter_predicate: Callable[[...], bool] | None = None
144
139
  ) -> Iterator[DataPath]:
145
140
  """
146
141
  Lists blobs in S3 storage.
@@ -173,8 +168,8 @@ class S3StorageClient(StorageClient):
173
168
  def read_blobs(
174
169
  self,
175
170
  blob_path: DataPath,
176
- serialization_format: Type[SerializationFormat[T]],
177
- filter_predicate: Optional[Callable[[...], bool]] = None,
171
+ serialization_format: type[SerializationFormat[T]],
172
+ filter_predicate: Callable[[...], bool] | None = None,
178
173
  ) -> Iterator[T]:
179
174
  """
180
175
  Reads data under provided path into the given format.
@@ -194,8 +189,8 @@ class S3StorageClient(StorageClient):
194
189
  self,
195
190
  blob_path: DataPath,
196
191
  local_path: str,
197
- threads: Optional[int] = None,
198
- filter_predicate: Optional[Callable[[...], bool]] = None,
192
+ threads: int | None = None,
193
+ filter_predicate: Callable[[...], bool] | None = None,
199
194
  ) -> None:
200
195
  """
201
196
  Downloads blobs from S3 storage to a local path.
@@ -286,4 +281,4 @@ class S3StorageClient(StorageClient):
286
281
  Generate client instance that can operate on the provided path. Always uses EnvironmentCredentials/
287
282
  """
288
283
  _ = cast_path(parse_data_path(path))
289
- return cls(base_client=AwsClient())
284
+ return cls.create(auth=AwsClient())
@@ -2,7 +2,7 @@
2
2
  Import index.
3
3
  """
4
4
 
5
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License");
8
8
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Generic key-value cache.
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
18
18
 
19
19
  from abc import ABC, abstractmethod
20
20
  from datetime import timedelta
21
- from typing import Any, List, Optional
21
+ from typing import Any
22
22
 
23
23
 
24
24
  class KeyValueCache(ABC):
@@ -37,7 +37,7 @@ class KeyValueCache(ABC):
37
37
  """
38
38
 
39
39
  @abstractmethod
40
- def exists(self, key: str, attribute: Optional[str] = None) -> bool:
40
+ def exists(self, key: str, attribute: str | None = None) -> bool:
41
41
  """
42
42
  Checks if a cache key is present. If an attribute is provided, should also check
43
43
  if a value possesses this attributes.
@@ -48,7 +48,7 @@ class KeyValueCache(ABC):
48
48
  """
49
49
 
50
50
  @abstractmethod
51
- def multi_exists(self, keys: List[str]) -> bool:
51
+ def multi_exists(self, keys: list[str]) -> bool:
52
52
  """
53
53
  Checks if all keys exist
54
54
 
@@ -57,7 +57,7 @@ class KeyValueCache(ABC):
57
57
  """
58
58
 
59
59
  @abstractmethod
60
- def multi_get(self, keys: List[str]) -> List[Any]:
60
+ def multi_get(self, keys: list[str]) -> list[Any]:
61
61
  """
62
62
  Reads multiple keys in a single call.
63
63
 
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Key-value cache based on Redis.
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
19
19
  import os
20
20
  import ssl
21
21
  from datetime import timedelta
22
- from typing import Any, List, Optional
22
+ from typing import Any
23
23
 
24
24
  import redis
25
25
  from redis import default_backoff
@@ -65,13 +65,13 @@ class RedisCache(KeyValueCache):
65
65
  ssl=True,
66
66
  )
67
67
 
68
- def multi_exists(self, keys: List[str]) -> bool:
68
+ def multi_exists(self, keys: list[str]) -> bool:
69
69
  return self._redis.exists(*keys) == len(keys)
70
70
 
71
71
  def evict(self, key: str) -> None:
72
72
  self._redis.delete(key)
73
73
 
74
- def exists(self, key: str, attribute: Optional[str] = None) -> bool:
74
+ def exists(self, key: str, attribute: str | None = None) -> bool:
75
75
  if not attribute:
76
76
  return self._redis.exists(key) == 1
77
77
 
@@ -83,7 +83,7 @@ class RedisCache(KeyValueCache):
83
83
 
84
84
  return self._redis.hgetall(key)
85
85
 
86
- def multi_get(self, keys: List[str]) -> List[Any]:
86
+ def multi_get(self, keys: list[str]) -> list[Any]:
87
87
  return self._redis.mget(keys)
88
88
 
89
89
  def set(
@@ -1,4 +1,7 @@
1
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
1
+ """
2
+ Import index
3
+ """
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
2
5
  #
3
6
  # Licensed under the Apache License, Version 2.0 (the "License");
4
7
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,7 @@
1
1
  # Database Clients
2
2
 
3
+ **This is a deprecated module. Please use the new module `adapta.storage.database.v3` instead.**
4
+
3
5
  Supported clients:
4
6
 
5
7
  - Generic ODBC
@@ -0,0 +1,17 @@
1
+ """
2
+ Import index
3
+ """
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #