adapta 2.11.9__py3-none-any.whl → 3.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapta/__init__.py +1 -1
- adapta/_version.py +1 -1
- adapta/connectors/__init__.py +1 -1
- adapta/connectors/service_bus/__init__.py +1 -1
- adapta/connectors/service_bus/_connector.py +2 -3
- adapta/logs/__init__.py +1 -1
- adapta/logs/_async_logger.py +38 -24
- adapta/logs/_base.py +21 -21
- adapta/logs/_internal.py +6 -7
- adapta/logs/_internal_logger.py +113 -41
- adapta/logs/_logger_interface.py +9 -10
- adapta/logs/handlers/__init__.py +1 -1
- adapta/logs/handlers/datadog_api_handler.py +7 -7
- adapta/logs/handlers/safe_stream_handler.py +4 -4
- adapta/logs/models/__init__.py +1 -1
- adapta/logs/models/_log_level.py +1 -1
- adapta/logs/models/_logs_metadata.py +4 -5
- adapta/metrics/__init__.py +1 -1
- adapta/metrics/_base.py +14 -15
- adapta/metrics/providers/__init__.py +1 -1
- adapta/metrics/providers/datadog_provider.py +21 -22
- adapta/metrics/providers/void_provider.py +34 -0
- adapta/ml/__init__.py +1 -1
- adapta/ml/_model.py +1 -1
- adapta/ml/mlflow/__init__.py +1 -1
- adapta/ml/mlflow/_client.py +101 -5
- adapta/ml/mlflow/_functions.py +44 -13
- adapta/process_communication/__init__.py +1 -1
- adapta/process_communication/_models.py +8 -6
- adapta/schema_management/README.md +0 -1
- adapta/schema_management/__init__.py +1 -1
- adapta/schema_management/schema_entity.py +3 -3
- adapta/security/__init__.py +1 -1
- adapta/security/clients/__init__.py +1 -1
- adapta/security/clients/_azure_client.py +14 -12
- adapta/security/clients/_base.py +11 -6
- adapta/security/clients/_local_client.py +6 -6
- adapta/security/clients/aws/__init__.py +1 -1
- adapta/security/clients/aws/_aws_client.py +12 -10
- adapta/security/clients/aws/_aws_credentials.py +7 -8
- adapta/security/clients/hashicorp_vault/__init__.py +1 -1
- adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +7 -6
- adapta/security/clients/hashicorp_vault/kubernetes_client.py +2 -2
- adapta/security/clients/hashicorp_vault/oidc_client.py +2 -2
- adapta/security/clients/hashicorp_vault/token_client.py +2 -2
- adapta/storage/__init__.py +1 -1
- adapta/storage/blob/README.md +14 -10
- adapta/storage/blob/__init__.py +1 -1
- adapta/storage/blob/azure_storage_client.py +76 -24
- adapta/storage/blob/base.py +15 -13
- adapta/storage/blob/local_storage_client.py +28 -16
- adapta/storage/blob/s3_storage_client.py +19 -24
- adapta/storage/cache/__init__.py +1 -1
- adapta/storage/cache/_base.py +5 -5
- adapta/storage/cache/redis_cache.py +5 -5
- adapta/storage/database/__init__.py +4 -1
- adapta/storage/database/{README.md → v2/README.md} +2 -0
- adapta/storage/database/v2/__init__.py +17 -0
- adapta/storage/database/v2/azure_sql.py +143 -0
- adapta/storage/{distributed_object_store/datastax_astra → database/v2/models}/__init__.py +5 -5
- adapta/storage/database/v2/models/_models.py +53 -0
- adapta/storage/database/{odbc.py → v2/odbc.py} +22 -13
- adapta/storage/database/{snowflake_sql.py → v2/snowflake_sql.py} +20 -12
- adapta/storage/database/{trino_sql.py → v2/trino_sql.py} +15 -6
- adapta/storage/database/v3/README.md +109 -0
- adapta/storage/database/v3/__init__.py +14 -0
- adapta/storage/database/{azure_sql.py → v3/azure_sql.py} +7 -9
- adapta/storage/database/v3/models/__init__.py +19 -0
- adapta/storage/database/{models → v3/models}/_models.py +2 -3
- adapta/storage/database/v3/odbc.py +217 -0
- adapta/storage/database/v3/snowflake_sql.py +241 -0
- adapta/storage/database/v3/trino_sql.py +154 -0
- adapta/storage/delta_lake/__init__.py +2 -3
- adapta/storage/delta_lake/{README.md → v2/README.md} +2 -0
- adapta/storage/delta_lake/v2/__init__.py +19 -0
- adapta/storage/delta_lake/{_functions.py → v2/_functions.py} +43 -27
- adapta/storage/delta_lake/v2/_models.py +72 -0
- adapta/storage/delta_lake/v3/README.md +147 -0
- adapta/storage/delta_lake/v3/__init__.py +20 -0
- adapta/storage/delta_lake/v3/_functions.py +315 -0
- adapta/storage/delta_lake/{_models.py → v3/_models.py} +4 -5
- adapta/storage/distributed_object_store/__init__.py +3 -1
- adapta/storage/distributed_object_store/v2/__init__.py +18 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/README.md +2 -0
- adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +20 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/_models.py +16 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/astra_client.py +61 -52
- adapta/storage/{database/models → distributed_object_store/v3}/__init__.py +4 -5
- adapta/storage/distributed_object_store/v3/datastax_astra/README.md +277 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +20 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +469 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +134 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +569 -0
- adapta/storage/exceptions.py +1 -1
- adapta/storage/models/__init__.py +1 -1
- adapta/storage/models/_functions.py +5 -5
- adapta/storage/models/astra.py +4 -4
- adapta/storage/models/aws.py +1 -1
- adapta/storage/models/azure.py +2 -3
- adapta/storage/models/base.py +9 -1
- adapta/storage/models/enum.py +19 -0
- adapta/storage/models/filter_expression.py +124 -10
- adapta/storage/models/format.py +16 -205
- adapta/storage/models/formatters/__init__.py +36 -0
- adapta/storage/models/formatters/dict.py +43 -0
- adapta/storage/models/formatters/exceptions.py +7 -0
- adapta/storage/models/formatters/metaframe.py +48 -0
- adapta/storage/models/formatters/pandas.py +139 -0
- adapta/storage/models/formatters/pickle.py +36 -0
- adapta/storage/models/formatters/polars.py +240 -0
- adapta/storage/models/formatters/unit.py +26 -0
- adapta/storage/models/hive.py +24 -16
- adapta/storage/models/local.py +1 -1
- adapta/storage/models/trino.py +56 -0
- adapta/storage/query_enabled_store/README.md +1 -1
- adapta/storage/query_enabled_store/__init__.py +7 -1
- adapta/storage/query_enabled_store/_models.py +42 -13
- adapta/storage/query_enabled_store/_qes_astra.py +27 -14
- adapta/storage/query_enabled_store/_qes_delta.py +32 -10
- adapta/storage/query_enabled_store/_qes_local.py +81 -0
- adapta/storage/query_enabled_store/_qes_trino.py +133 -0
- adapta/storage/secrets/__init__.py +1 -1
- adapta/storage/secrets/_base.py +5 -4
- adapta/storage/secrets/azure_secret_client.py +3 -4
- adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +5 -5
- adapta/utils/README.md +92 -0
- adapta/utils/__init__.py +2 -1
- adapta/utils/_common.py +50 -17
- adapta/utils/_requests.py +53 -0
- adapta/utils/concurrent_task_runner.py +10 -9
- adapta/utils/data_structures/_functions.py +6 -6
- adapta/utils/decorators/_logging.py +3 -3
- adapta/utils/decorators/_rate_limit.py +2 -2
- adapta/utils/metaframe.py +172 -0
- adapta/utils/python_typing/_functions.py +5 -10
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/METADATA +18 -14
- adapta-3.5.13.dist-info/RECORD +146 -0
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/WHEEL +1 -1
- adapta-2.11.9.dist-info/RECORD +0 -110
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info/licenses}/LICENSE +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Query Enabled Store Connection interface.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
# Copyright (c) 2023-
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
8
|
# you may not use this file except in compliance with the License.
|
|
@@ -21,12 +21,13 @@ import re
|
|
|
21
21
|
from abc import ABC, abstractmethod
|
|
22
22
|
from enum import Enum
|
|
23
23
|
from pydoc import locate
|
|
24
|
-
from typing import TypeVar, Generic,
|
|
25
|
-
|
|
26
|
-
from pandas import DataFrame
|
|
24
|
+
from typing import TypeVar, Generic, final
|
|
25
|
+
from collections.abc import Iterator
|
|
27
26
|
|
|
28
27
|
from adapta.storage.models.base import DataPath
|
|
29
28
|
from adapta.storage.models.filter_expression import Expression
|
|
29
|
+
from adapta.storage.models.enum import QueryEnabledStoreOptions
|
|
30
|
+
from adapta.utils.metaframe import MetaFrame
|
|
30
31
|
|
|
31
32
|
TCredential = TypeVar("TCredential") # pylint: disable=C0103
|
|
32
33
|
TSettings = TypeVar("TSettings") # pylint: disable=C0103
|
|
@@ -42,6 +43,8 @@ class BundledQueryEnabledStores(Enum):
|
|
|
42
43
|
|
|
43
44
|
DELTA = "adapta.storage.query_enabled_store.DeltaQueryEnabledStore"
|
|
44
45
|
ASTRA = "adapta.storage.query_enabled_store.AstraQueryEnabledStore"
|
|
46
|
+
LOCAL = "adapta.storage.query_enabled_store.LocalQueryEnabledStore"
|
|
47
|
+
TRINO = "adapta.storage.query_enabled_store.TrinoQueryEnabledStore"
|
|
45
48
|
|
|
46
49
|
|
|
47
50
|
BUNDLED_STORES = {store.name: store.value for store in BundledQueryEnabledStores}
|
|
@@ -84,16 +87,21 @@ class QueryEnabledStore(Generic[TCredential, TSettings], ABC):
|
|
|
84
87
|
|
|
85
88
|
@abstractmethod
|
|
86
89
|
def _apply_filter(
|
|
87
|
-
self,
|
|
88
|
-
|
|
90
|
+
self,
|
|
91
|
+
path: DataPath,
|
|
92
|
+
filter_expression: Expression,
|
|
93
|
+
columns: list[str],
|
|
94
|
+
options: dict[QueryEnabledStoreOptions, any] | None = None,
|
|
95
|
+
limit: int | None = None,
|
|
96
|
+
) -> MetaFrame | Iterator[MetaFrame]:
|
|
89
97
|
"""
|
|
90
|
-
Applies the provided filter expression to this Store and returns the result in a
|
|
98
|
+
Applies the provided filter expression to this Store and returns the result in a MetaFrame
|
|
91
99
|
"""
|
|
92
100
|
|
|
93
101
|
@abstractmethod
|
|
94
|
-
def _apply_query(self, query: str) ->
|
|
102
|
+
def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
|
|
95
103
|
"""
|
|
96
|
-
Applies a plaintext query to this Store and returns the result in a
|
|
104
|
+
Applies a plaintext query to this Store and returns the result in a MetaFrame
|
|
97
105
|
"""
|
|
98
106
|
|
|
99
107
|
@classmethod
|
|
@@ -117,7 +125,7 @@ class QueryEnabledStore(Generic[TCredential, TSettings], ABC):
|
|
|
117
125
|
:param: lazy_init: Whether to set this instance QES for querying eagerly or lazily.
|
|
118
126
|
"""
|
|
119
127
|
|
|
120
|
-
def get_qes_class(name: str) ->
|
|
128
|
+
def get_qes_class(name: str) -> type[QueryEnabledStore[TCredential, TSettings]]:
|
|
121
129
|
return locate(BUNDLED_STORES.get(name, name))
|
|
122
130
|
|
|
123
131
|
class_name, _, _ = re.findall(re.compile(CONNECTION_STRING_REGEX), connection_string)[0]
|
|
@@ -138,8 +146,10 @@ class QueryConfigurationBuilder:
|
|
|
138
146
|
def __init__(self, store: QueryEnabledStore, path: DataPath):
|
|
139
147
|
self._store = store
|
|
140
148
|
self._path = path
|
|
141
|
-
self._filter_expression:
|
|
149
|
+
self._filter_expression: Expression | None = None
|
|
142
150
|
self._columns: list[str] = []
|
|
151
|
+
self._options: dict[QueryEnabledStoreOptions, any] = {}
|
|
152
|
+
self._limit = None
|
|
143
153
|
|
|
144
154
|
def filter(self, filter_expression: Expression) -> "QueryConfigurationBuilder":
|
|
145
155
|
"""
|
|
@@ -157,10 +167,29 @@ class QueryConfigurationBuilder:
|
|
|
157
167
|
self._columns = list(columns)
|
|
158
168
|
return self
|
|
159
169
|
|
|
160
|
-
def
|
|
170
|
+
def add_options(self, option_key: QueryEnabledStoreOptions, option_value: any) -> "QueryConfigurationBuilder":
|
|
171
|
+
"""
|
|
172
|
+
Use the provided options when querying the underlying storage.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
self._options[option_key] = option_value
|
|
176
|
+
return self
|
|
177
|
+
|
|
178
|
+
def limit(self, limit: int | None) -> "QueryConfigurationBuilder":
|
|
179
|
+
"""
|
|
180
|
+
Limit the number of results returned by the underlying store.
|
|
181
|
+
"""
|
|
182
|
+
self._limit = limit
|
|
183
|
+
return self
|
|
184
|
+
|
|
185
|
+
def read(self) -> MetaFrame | Iterator[MetaFrame]:
|
|
161
186
|
"""
|
|
162
187
|
Execute the query on the underlying store.
|
|
163
188
|
"""
|
|
164
189
|
return self._store._apply_filter(
|
|
165
|
-
path=self._path,
|
|
190
|
+
path=self._path,
|
|
191
|
+
filter_expression=self._filter_expression,
|
|
192
|
+
columns=self._columns,
|
|
193
|
+
options=self._options,
|
|
194
|
+
limit=self._limit,
|
|
166
195
|
)
|
|
@@ -4,19 +4,23 @@
|
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import final
|
|
8
|
-
|
|
9
|
-
from pandas import DataFrame
|
|
7
|
+
from typing import final
|
|
8
|
+
from collections.abc import Iterator
|
|
10
9
|
|
|
11
10
|
from dataclasses_json import DataClassJsonMixin
|
|
12
11
|
|
|
13
12
|
from adapta._version import __version__
|
|
14
|
-
from adapta.storage.distributed_object_store.datastax_astra
|
|
13
|
+
from adapta.storage.distributed_object_store.v3.datastax_astra import AstraClient
|
|
15
14
|
from adapta.storage.models.astra import AstraPath
|
|
16
15
|
from adapta.storage.models.base import DataPath
|
|
17
16
|
from adapta.storage.models.filter_expression import Expression
|
|
18
17
|
|
|
19
|
-
from adapta.storage.query_enabled_store._models import
|
|
18
|
+
from adapta.storage.query_enabled_store._models import (
|
|
19
|
+
QueryEnabledStore,
|
|
20
|
+
CONNECTION_STRING_REGEX,
|
|
21
|
+
)
|
|
22
|
+
from adapta.storage.models.enum import QueryEnabledStoreOptions
|
|
23
|
+
from adapta.utils.metaframe import MetaFrame
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
@dataclass
|
|
@@ -25,9 +29,9 @@ class AstraCredential(DataClassJsonMixin):
|
|
|
25
29
|
Astra DB credential helper for QES.
|
|
26
30
|
"""
|
|
27
31
|
|
|
28
|
-
secret_connection_bundle_bytes:
|
|
29
|
-
client_id:
|
|
30
|
-
client_secret:
|
|
32
|
+
secret_connection_bundle_bytes: str | None = None
|
|
33
|
+
client_id: str | None = None
|
|
34
|
+
client_secret: str | None = None
|
|
31
35
|
|
|
32
36
|
def __post_init__(self):
|
|
33
37
|
self.secret_connection_bundle_bytes = self.secret_connection_bundle_bytes or os.getenv(
|
|
@@ -36,7 +40,7 @@ class AstraCredential(DataClassJsonMixin):
|
|
|
36
40
|
self.client_id = self.client_id or os.getenv("PROTEUS__ASTRA_CLIENT_ID")
|
|
37
41
|
self.client_secret = self.client_secret or os.getenv("PROTEUS__ASTRA_CLIENT_SECRET")
|
|
38
42
|
|
|
39
|
-
if not all([self.secret_connection_bundle_bytes, self.
|
|
43
|
+
if not all([self.secret_connection_bundle_bytes, self.client_id, self.client_secret]):
|
|
40
44
|
raise RuntimeError(
|
|
41
45
|
"Authentication information provided is insufficient. Please verify you are supplying bundle bytes, client id and secret either via connection string or via environment variables."
|
|
42
46
|
)
|
|
@@ -48,8 +52,8 @@ class AstraSettings(DataClassJsonMixin):
|
|
|
48
52
|
Astra DB connection settings for QES.
|
|
49
53
|
"""
|
|
50
54
|
|
|
51
|
-
client_name:
|
|
52
|
-
keyspace:
|
|
55
|
+
client_name: str | None = None
|
|
56
|
+
keyspace: str | None = None
|
|
53
57
|
|
|
54
58
|
def __post_init__(self):
|
|
55
59
|
self.client_name = self.client_name or f"Adapta Client {__version__}"
|
|
@@ -79,8 +83,13 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
|
|
|
79
83
|
self._astra_client.connect()
|
|
80
84
|
|
|
81
85
|
def _apply_filter(
|
|
82
|
-
self,
|
|
83
|
-
|
|
86
|
+
self,
|
|
87
|
+
path: DataPath,
|
|
88
|
+
filter_expression: Expression,
|
|
89
|
+
columns: list[str],
|
|
90
|
+
options: dict[QueryEnabledStoreOptions, any] | None = None,
|
|
91
|
+
limit: int | None = 10000,
|
|
92
|
+
) -> MetaFrame | Iterator[MetaFrame]:
|
|
84
93
|
assert isinstance(path, AstraPath)
|
|
85
94
|
astra_path: AstraPath = path
|
|
86
95
|
if self._lazy:
|
|
@@ -92,6 +101,8 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
|
|
|
92
101
|
table_name=astra_path.table,
|
|
93
102
|
select_columns=columns,
|
|
94
103
|
num_threads=-1, # auto-infer, see method documentation
|
|
104
|
+
options=options,
|
|
105
|
+
limit=limit,
|
|
95
106
|
)
|
|
96
107
|
|
|
97
108
|
return self._astra_client.filter_entities(
|
|
@@ -101,9 +112,11 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
|
|
|
101
112
|
table_name=astra_path.table,
|
|
102
113
|
select_columns=columns,
|
|
103
114
|
num_threads=-1, # auto-infer, see method documentation
|
|
115
|
+
options=options,
|
|
116
|
+
limit=limit,
|
|
104
117
|
)
|
|
105
118
|
|
|
106
|
-
def _apply_query(self, query: str) ->
|
|
119
|
+
def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
|
|
107
120
|
if self._lazy:
|
|
108
121
|
with self._astra_client as astra_client:
|
|
109
122
|
return astra_client.get_entities_raw(query)
|
|
@@ -4,15 +4,21 @@
|
|
|
4
4
|
import re
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pydoc import locate
|
|
7
|
-
from typing import final
|
|
7
|
+
from typing import final
|
|
8
|
+
from collections.abc import Iterator
|
|
8
9
|
|
|
9
|
-
from pandas import DataFrame
|
|
10
10
|
from dataclasses_json import DataClassJsonMixin
|
|
11
11
|
|
|
12
|
-
from adapta.
|
|
12
|
+
from adapta.security.clients import AuthenticationClient
|
|
13
|
+
from adapta.storage.delta_lake.v3 import load
|
|
13
14
|
from adapta.storage.models.base import DataPath
|
|
14
15
|
from adapta.storage.models.filter_expression import Expression
|
|
15
|
-
from adapta.storage.query_enabled_store._models import
|
|
16
|
+
from adapta.storage.query_enabled_store._models import (
|
|
17
|
+
QueryEnabledStore,
|
|
18
|
+
CONNECTION_STRING_REGEX,
|
|
19
|
+
)
|
|
20
|
+
from adapta.storage.models.enum import QueryEnabledStoreOptions
|
|
21
|
+
from adapta.utils.metaframe import MetaFrame
|
|
16
22
|
|
|
17
23
|
|
|
18
24
|
@dataclass
|
|
@@ -22,16 +28,25 @@ class DeltaCredential(DataClassJsonMixin):
|
|
|
22
28
|
"""
|
|
23
29
|
|
|
24
30
|
auth_client_class: str
|
|
31
|
+
auth_client_credentials_class: str | None = None
|
|
32
|
+
|
|
33
|
+
auth_client: AuthenticationClient | None = None
|
|
34
|
+
auth_client_credentials: type | None = None
|
|
25
35
|
|
|
26
36
|
def __post_init__(self):
|
|
27
37
|
if not self.auth_client_class:
|
|
28
38
|
raise ValueError("Authentication plugin class name not provided but is required")
|
|
29
39
|
|
|
30
|
-
|
|
40
|
+
self.auth_client = locate(self.auth_client_class)
|
|
41
|
+
|
|
42
|
+
if self.auth_client is None:
|
|
31
43
|
raise ModuleNotFoundError(
|
|
32
44
|
"Authentication plugin class name cannot be loaded. Please check the spelling and make sure your application can resolve the import"
|
|
33
45
|
)
|
|
34
46
|
|
|
47
|
+
if self.auth_client_credentials_class:
|
|
48
|
+
self.auth_client_credentials = locate(self.auth_client_credentials_class)
|
|
49
|
+
|
|
35
50
|
|
|
36
51
|
@dataclass
|
|
37
52
|
class DeltaSettings(DataClassJsonMixin):
|
|
@@ -57,14 +72,21 @@ class DeltaQueryEnabledStore(QueryEnabledStore[DeltaCredential, DeltaSettings]):
|
|
|
57
72
|
return cls(credentials=DeltaCredential.from_json(credentials), settings=DeltaSettings.from_json(settings))
|
|
58
73
|
|
|
59
74
|
def _apply_filter(
|
|
60
|
-
self,
|
|
61
|
-
|
|
75
|
+
self,
|
|
76
|
+
path: DataPath,
|
|
77
|
+
filter_expression: Expression,
|
|
78
|
+
columns: list[str],
|
|
79
|
+
options: dict[QueryEnabledStoreOptions, any] | None = None,
|
|
80
|
+
limit: int | None = None,
|
|
81
|
+
) -> MetaFrame | Iterator[MetaFrame]:
|
|
62
82
|
return load(
|
|
63
|
-
auth_client=
|
|
83
|
+
auth_client=self.credentials.auth_client(credentials=self.credentials.auth_client_credentials()),
|
|
64
84
|
path=path,
|
|
65
85
|
row_filter=filter_expression,
|
|
66
|
-
columns=columns,
|
|
86
|
+
columns=columns if columns else None,
|
|
87
|
+
limit=limit,
|
|
88
|
+
timeout=options.get(QueryEnabledStoreOptions.TIMEOUT, None),
|
|
67
89
|
)
|
|
68
90
|
|
|
69
|
-
def _apply_query(self, query: str) ->
|
|
91
|
+
def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
|
|
70
92
|
raise NotImplementedError("Text queries are not supported by Delta QES")
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Local Query Enabled Store (QES) for reading local files."""
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import final
|
|
5
|
+
from collections.abc import Iterator
|
|
6
|
+
|
|
7
|
+
from dataclasses_json import DataClassJsonMixin
|
|
8
|
+
from pyarrow.parquet import read_table
|
|
9
|
+
|
|
10
|
+
from adapta.storage.models import DataPath
|
|
11
|
+
from adapta.storage.models.filter_expression import Expression, compile_expression, ArrowFilterExpression
|
|
12
|
+
from adapta.storage.query_enabled_store._models import (
|
|
13
|
+
QueryEnabledStore,
|
|
14
|
+
CONNECTION_STRING_REGEX,
|
|
15
|
+
)
|
|
16
|
+
from adapta.storage.models.enum import QueryEnabledStoreOptions
|
|
17
|
+
from adapta.utils.metaframe import MetaFrame
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class LocalCredential(DataClassJsonMixin):
|
|
22
|
+
"""
|
|
23
|
+
Local credential helper for QES.
|
|
24
|
+
No authentication is required for local files.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class LocalSettings(DataClassJsonMixin):
|
|
30
|
+
"""
|
|
31
|
+
Settings for local QES
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@final
|
|
36
|
+
class LocalQueryEnabledStore(QueryEnabledStore[LocalCredential, LocalSettings]):
|
|
37
|
+
"""
|
|
38
|
+
QES Client for local file reads (e.g., Parquet) using PyArrow.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def close(self) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _from_connection_string(
|
|
46
|
+
cls, connection_string: str, lazy_init: bool = False
|
|
47
|
+
) -> "QueryEnabledStore[LocalCredential, LocalSettings]":
|
|
48
|
+
"""
|
|
49
|
+
Parses a connection string for local files.
|
|
50
|
+
"""
|
|
51
|
+
_, credentials, settings = re.findall(re.compile(CONNECTION_STRING_REGEX), connection_string)[0]
|
|
52
|
+
return cls(credentials=LocalCredential.from_json(credentials), settings=LocalSettings.from_json(settings))
|
|
53
|
+
|
|
54
|
+
def _apply_filter(
|
|
55
|
+
self,
|
|
56
|
+
path: DataPath,
|
|
57
|
+
filter_expression: Expression,
|
|
58
|
+
columns: list[str],
|
|
59
|
+
options: dict[QueryEnabledStoreOptions, any] | None = None,
|
|
60
|
+
limit: int = None,
|
|
61
|
+
) -> MetaFrame | Iterator[MetaFrame]:
|
|
62
|
+
"""
|
|
63
|
+
Applies a filter to a local file
|
|
64
|
+
"""
|
|
65
|
+
row_filter = compile_expression(filter_expression, ArrowFilterExpression) if filter_expression else None
|
|
66
|
+
|
|
67
|
+
pyarrow_table = read_table(
|
|
68
|
+
path.path,
|
|
69
|
+
columns=columns if columns else None,
|
|
70
|
+
filters=row_filter,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return MetaFrame.from_arrow(
|
|
74
|
+
data=pyarrow_table,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
|
|
78
|
+
"""
|
|
79
|
+
Local QES does not natively support SQL-like queries.
|
|
80
|
+
"""
|
|
81
|
+
raise NotImplementedError("Text queries are currently not supported by Local QES")
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""
|
|
2
|
+
QES implementations for Trino.
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import final
|
|
8
|
+
from collections.abc import Iterator
|
|
9
|
+
|
|
10
|
+
from dataclasses_json import DataClassJsonMixin
|
|
11
|
+
|
|
12
|
+
from adapta.storage.database.v3.trino_sql import TrinoClient
|
|
13
|
+
from adapta.storage.models import TrinoPath
|
|
14
|
+
from adapta.storage.models.filter_expression import (
|
|
15
|
+
Expression,
|
|
16
|
+
compile_expression,
|
|
17
|
+
TrinoFilterExpression,
|
|
18
|
+
)
|
|
19
|
+
from adapta.storage.query_enabled_store._models import (
|
|
20
|
+
QueryEnabledStore,
|
|
21
|
+
CONNECTION_STRING_REGEX,
|
|
22
|
+
)
|
|
23
|
+
from adapta.storage.models.enum import QueryEnabledStoreOptions
|
|
24
|
+
from adapta.utils.metaframe import MetaFrame, concat
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class TrinoCredential(DataClassJsonMixin):
|
|
29
|
+
"""
|
|
30
|
+
Trino credential helper for QES.
|
|
31
|
+
|
|
32
|
+
Trino credentials can either be provided via the oauth2_username or via the following environment variables, which
|
|
33
|
+
is handled inside the TrinoClient:
|
|
34
|
+
- (ADAPTA__TRINO_USERNAME, ADAPTA__TRINO_PASSWORD)
|
|
35
|
+
|
|
36
|
+
Currently, we don't support the credentials_provider option of the TrinoClient.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
oauth2_username: str | None = None
|
|
40
|
+
|
|
41
|
+
def __post_init__(self):
|
|
42
|
+
self.oauth2_username = self.oauth2_username or os.getenv("ADAPTA__TRINO_OAUTH2_USERNAME")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class TrinoSettings(DataClassJsonMixin):
|
|
47
|
+
"""
|
|
48
|
+
Trino connection settings for QES.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
host: str | None = None
|
|
52
|
+
port: int | None = None
|
|
53
|
+
|
|
54
|
+
def __post_init__(self):
|
|
55
|
+
self.host = self.host or os.getenv("PROTEUS__TRINO_HOST")
|
|
56
|
+
if not self.host:
|
|
57
|
+
raise RuntimeError(
|
|
58
|
+
"Trino host not provided. Please provide it via connection string or via environment variable PROTEUS__TRINO_HOST."
|
|
59
|
+
)
|
|
60
|
+
self.port = self.port or int(os.getenv("PROTEUS__TRINO_PORT", "443"))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@final
|
|
64
|
+
class TrinoQueryEnabledStore(QueryEnabledStore[TrinoCredential, TrinoSettings]):
|
|
65
|
+
"""
|
|
66
|
+
QES Client for Trino queries.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def close(self) -> None:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
def __init__(self, credentials: TrinoCredential, settings: TrinoSettings):
|
|
73
|
+
super().__init__(credentials, settings)
|
|
74
|
+
self._trino_client = TrinoClient(
|
|
75
|
+
host=self.settings.host,
|
|
76
|
+
port=self.settings.port,
|
|
77
|
+
oauth2_username=self.credentials.oauth2_username,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _apply_filter(
|
|
81
|
+
self,
|
|
82
|
+
path: TrinoPath,
|
|
83
|
+
filter_expression: Expression,
|
|
84
|
+
columns: list[str],
|
|
85
|
+
options: dict[QueryEnabledStoreOptions, any] | None = None,
|
|
86
|
+
limit: int | None = None,
|
|
87
|
+
) -> MetaFrame | Iterator[MetaFrame]:
|
|
88
|
+
query = self._build_query(query=path.query, filter_expression=filter_expression, columns=columns, limit=limit)
|
|
89
|
+
|
|
90
|
+
with self._trino_client as trino_client:
|
|
91
|
+
if QueryEnabledStoreOptions.BATCH_SIZE in options:
|
|
92
|
+
data = concat(
|
|
93
|
+
trino_client.query(
|
|
94
|
+
query=query,
|
|
95
|
+
batch_size=options[QueryEnabledStoreOptions.BATCH_SIZE],
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
data = concat(trino_client.query(query=query))
|
|
100
|
+
|
|
101
|
+
return data
|
|
102
|
+
|
|
103
|
+
def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
|
|
104
|
+
raise NotImplementedError("Text queries are not supported by Trino QES")
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def _from_connection_string(
|
|
108
|
+
cls, connection_string: str, lazy_init: bool = False
|
|
109
|
+
) -> "QueryEnabledStore[TrinoCredential, TrinoSettings]":
|
|
110
|
+
_, credentials, settings = re.findall(re.compile(CONNECTION_STRING_REGEX), connection_string)[0]
|
|
111
|
+
return cls(
|
|
112
|
+
credentials=TrinoCredential.from_json(credentials),
|
|
113
|
+
settings=TrinoSettings.from_json(settings),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _build_query(query: str, filter_expression: Expression, columns: list[str], limit: int | None) -> str:
|
|
118
|
+
"""
|
|
119
|
+
Build the final query by applying the filter expression, selected columns, and limit to the base query.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
if filter_expression or columns or limit:
|
|
123
|
+
columns_to_select = ", ".join(columns) if columns else "*"
|
|
124
|
+
query = f"SELECT {columns_to_select} FROM ({query})"
|
|
125
|
+
|
|
126
|
+
if filter_expression:
|
|
127
|
+
compiled_expression = compile_expression(expression=filter_expression, target=TrinoFilterExpression)
|
|
128
|
+
query = f"{query} WHERE {compiled_expression}"
|
|
129
|
+
|
|
130
|
+
if limit:
|
|
131
|
+
query = f"{query} LIMIT {limit}"
|
|
132
|
+
|
|
133
|
+
return query
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Import index.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
# Copyright (c) 2023-
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
8
|
# you may not use this file except in compliance with the License.
|
adapta/storage/secrets/_base.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Abstraction for secret storage operations.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
# Copyright (c) 2023-
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
8
|
# you may not use this file except in compliance with the License.
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
#
|
|
19
19
|
|
|
20
20
|
from abc import ABC, abstractmethod
|
|
21
|
-
from typing import
|
|
21
|
+
from typing import Any
|
|
22
|
+
from collections.abc import Iterable
|
|
22
23
|
|
|
23
24
|
from adapta.security.clients import AuthenticationClient
|
|
24
25
|
|
|
@@ -32,7 +33,7 @@ class SecretStorageClient(ABC):
|
|
|
32
33
|
self._base_client = base_client
|
|
33
34
|
|
|
34
35
|
@abstractmethod
|
|
35
|
-
def read_secret(self, storage_name: str, secret_name: str) ->
|
|
36
|
+
def read_secret(self, storage_name: str, secret_name: str) -> bytes | str | dict[str, str]:
|
|
36
37
|
"""
|
|
37
38
|
Reads a secret from the specified storage.
|
|
38
39
|
|
|
@@ -46,7 +47,7 @@ class SecretStorageClient(ABC):
|
|
|
46
47
|
self,
|
|
47
48
|
storage_name: str,
|
|
48
49
|
secret_name: str,
|
|
49
|
-
secret_value:
|
|
50
|
+
secret_value: str | dict[str, str],
|
|
50
51
|
b64_encode=False,
|
|
51
52
|
) -> None:
|
|
52
53
|
"""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Azure Secret Storage Client (KeyVault).
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
#
|
|
18
18
|
|
|
19
19
|
import base64
|
|
20
|
-
from typing import Union, Dict
|
|
21
20
|
|
|
22
21
|
from azure.keyvault.secrets import SecretClient
|
|
23
22
|
|
|
@@ -43,14 +42,14 @@ class AzureSecretStorageClient(SecretStorageClient):
|
|
|
43
42
|
kv_uri = f"https://{keyvault}.vault.azure.net"
|
|
44
43
|
return SecretClient(kv_uri, self._base_client.get_credentials())
|
|
45
44
|
|
|
46
|
-
def read_secret(self, storage_name: str, secret_name: str) ->
|
|
45
|
+
def read_secret(self, storage_name: str, secret_name: str) -> bytes | str | dict[str, str]:
|
|
47
46
|
return self._get_keyvault(storage_name).get_secret(secret_name).value
|
|
48
47
|
|
|
49
48
|
def create_secret(
|
|
50
49
|
self,
|
|
51
50
|
storage_name: str,
|
|
52
51
|
secret_name: str,
|
|
53
|
-
secret_value:
|
|
52
|
+
secret_value: str | dict[str, str],
|
|
54
53
|
b64_encode=False,
|
|
55
54
|
) -> None:
|
|
56
55
|
if not isinstance(secret_value, str):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Hashicorp Vault Secret storage client
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
#
|
|
18
18
|
|
|
19
|
-
from
|
|
19
|
+
from collections.abc import Iterable
|
|
20
20
|
|
|
21
21
|
import hvac
|
|
22
22
|
|
|
@@ -41,7 +41,7 @@ class HashicorpSecretStorageClient(SecretStorageClient):
|
|
|
41
41
|
self.client = hvac.Client(self._base_client.vault_address, self._access_token)
|
|
42
42
|
self._role = role
|
|
43
43
|
|
|
44
|
-
def read_secret(self, storage_name: str, secret_name: str) ->
|
|
44
|
+
def read_secret(self, storage_name: str, secret_name: str) -> bytes | str | dict[str, str]:
|
|
45
45
|
secret = self.client.secrets.kv.v2.read_secret_version(path=secret_name)
|
|
46
46
|
return secret["data"]["data"]
|
|
47
47
|
|
|
@@ -49,10 +49,10 @@ class HashicorpSecretStorageClient(SecretStorageClient):
|
|
|
49
49
|
self,
|
|
50
50
|
storage_name: str,
|
|
51
51
|
secret_name: str,
|
|
52
|
-
secret_value:
|
|
52
|
+
secret_value: str | dict[str, str],
|
|
53
53
|
b64_encode=False,
|
|
54
54
|
) -> None:
|
|
55
|
-
if not isinstance(secret_value,
|
|
55
|
+
if not isinstance(secret_value, dict):
|
|
56
56
|
raise ValueError(
|
|
57
57
|
f"Only Dict secret type supported in HashicorpSecretStorageClient but was: {type(secret_value)}"
|
|
58
58
|
)
|