adapta 2.11.9__py3-none-any.whl → 3.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. adapta/__init__.py +1 -1
  2. adapta/_version.py +1 -1
  3. adapta/connectors/__init__.py +1 -1
  4. adapta/connectors/service_bus/__init__.py +1 -1
  5. adapta/connectors/service_bus/_connector.py +2 -3
  6. adapta/logs/__init__.py +1 -1
  7. adapta/logs/_async_logger.py +38 -24
  8. adapta/logs/_base.py +21 -21
  9. adapta/logs/_internal.py +6 -7
  10. adapta/logs/_internal_logger.py +113 -41
  11. adapta/logs/_logger_interface.py +9 -10
  12. adapta/logs/handlers/__init__.py +1 -1
  13. adapta/logs/handlers/datadog_api_handler.py +7 -7
  14. adapta/logs/handlers/safe_stream_handler.py +4 -4
  15. adapta/logs/models/__init__.py +1 -1
  16. adapta/logs/models/_log_level.py +1 -1
  17. adapta/logs/models/_logs_metadata.py +4 -5
  18. adapta/metrics/__init__.py +1 -1
  19. adapta/metrics/_base.py +14 -15
  20. adapta/metrics/providers/__init__.py +1 -1
  21. adapta/metrics/providers/datadog_provider.py +21 -22
  22. adapta/metrics/providers/void_provider.py +34 -0
  23. adapta/ml/__init__.py +1 -1
  24. adapta/ml/_model.py +1 -1
  25. adapta/ml/mlflow/__init__.py +1 -1
  26. adapta/ml/mlflow/_client.py +101 -5
  27. adapta/ml/mlflow/_functions.py +44 -13
  28. adapta/process_communication/__init__.py +1 -1
  29. adapta/process_communication/_models.py +8 -6
  30. adapta/schema_management/README.md +0 -1
  31. adapta/schema_management/__init__.py +1 -1
  32. adapta/schema_management/schema_entity.py +3 -3
  33. adapta/security/__init__.py +1 -1
  34. adapta/security/clients/__init__.py +1 -1
  35. adapta/security/clients/_azure_client.py +14 -12
  36. adapta/security/clients/_base.py +11 -6
  37. adapta/security/clients/_local_client.py +6 -6
  38. adapta/security/clients/aws/__init__.py +1 -1
  39. adapta/security/clients/aws/_aws_client.py +12 -10
  40. adapta/security/clients/aws/_aws_credentials.py +7 -8
  41. adapta/security/clients/hashicorp_vault/__init__.py +1 -1
  42. adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +7 -6
  43. adapta/security/clients/hashicorp_vault/kubernetes_client.py +2 -2
  44. adapta/security/clients/hashicorp_vault/oidc_client.py +2 -2
  45. adapta/security/clients/hashicorp_vault/token_client.py +2 -2
  46. adapta/storage/__init__.py +1 -1
  47. adapta/storage/blob/README.md +14 -10
  48. adapta/storage/blob/__init__.py +1 -1
  49. adapta/storage/blob/azure_storage_client.py +76 -24
  50. adapta/storage/blob/base.py +15 -13
  51. adapta/storage/blob/local_storage_client.py +28 -16
  52. adapta/storage/blob/s3_storage_client.py +19 -24
  53. adapta/storage/cache/__init__.py +1 -1
  54. adapta/storage/cache/_base.py +5 -5
  55. adapta/storage/cache/redis_cache.py +5 -5
  56. adapta/storage/database/__init__.py +4 -1
  57. adapta/storage/database/{README.md → v2/README.md} +2 -0
  58. adapta/storage/database/v2/__init__.py +17 -0
  59. adapta/storage/database/v2/azure_sql.py +143 -0
  60. adapta/storage/{distributed_object_store/datastax_astra → database/v2/models}/__init__.py +5 -5
  61. adapta/storage/database/v2/models/_models.py +53 -0
  62. adapta/storage/database/{odbc.py → v2/odbc.py} +22 -13
  63. adapta/storage/database/{snowflake_sql.py → v2/snowflake_sql.py} +20 -12
  64. adapta/storage/database/{trino_sql.py → v2/trino_sql.py} +15 -6
  65. adapta/storage/database/v3/README.md +109 -0
  66. adapta/storage/database/v3/__init__.py +14 -0
  67. adapta/storage/database/{azure_sql.py → v3/azure_sql.py} +7 -9
  68. adapta/storage/database/v3/models/__init__.py +19 -0
  69. adapta/storage/database/{models → v3/models}/_models.py +2 -3
  70. adapta/storage/database/v3/odbc.py +217 -0
  71. adapta/storage/database/v3/snowflake_sql.py +241 -0
  72. adapta/storage/database/v3/trino_sql.py +154 -0
  73. adapta/storage/delta_lake/__init__.py +2 -3
  74. adapta/storage/delta_lake/{README.md → v2/README.md} +2 -0
  75. adapta/storage/delta_lake/v2/__init__.py +19 -0
  76. adapta/storage/delta_lake/{_functions.py → v2/_functions.py} +43 -27
  77. adapta/storage/delta_lake/v2/_models.py +72 -0
  78. adapta/storage/delta_lake/v3/README.md +147 -0
  79. adapta/storage/delta_lake/v3/__init__.py +20 -0
  80. adapta/storage/delta_lake/v3/_functions.py +315 -0
  81. adapta/storage/delta_lake/{_models.py → v3/_models.py} +4 -5
  82. adapta/storage/distributed_object_store/__init__.py +3 -1
  83. adapta/storage/distributed_object_store/v2/__init__.py +18 -0
  84. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/README.md +2 -0
  85. adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +20 -0
  86. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/_models.py +16 -0
  87. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/astra_client.py +61 -52
  88. adapta/storage/{database/models → distributed_object_store/v3}/__init__.py +4 -5
  89. adapta/storage/distributed_object_store/v3/datastax_astra/README.md +277 -0
  90. adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +20 -0
  91. adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +469 -0
  92. adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +134 -0
  93. adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +569 -0
  94. adapta/storage/exceptions.py +1 -1
  95. adapta/storage/models/__init__.py +1 -1
  96. adapta/storage/models/_functions.py +5 -5
  97. adapta/storage/models/astra.py +4 -4
  98. adapta/storage/models/aws.py +1 -1
  99. adapta/storage/models/azure.py +2 -3
  100. adapta/storage/models/base.py +9 -1
  101. adapta/storage/models/enum.py +19 -0
  102. adapta/storage/models/filter_expression.py +124 -10
  103. adapta/storage/models/format.py +16 -205
  104. adapta/storage/models/formatters/__init__.py +36 -0
  105. adapta/storage/models/formatters/dict.py +43 -0
  106. adapta/storage/models/formatters/exceptions.py +7 -0
  107. adapta/storage/models/formatters/metaframe.py +48 -0
  108. adapta/storage/models/formatters/pandas.py +139 -0
  109. adapta/storage/models/formatters/pickle.py +36 -0
  110. adapta/storage/models/formatters/polars.py +240 -0
  111. adapta/storage/models/formatters/unit.py +26 -0
  112. adapta/storage/models/hive.py +24 -16
  113. adapta/storage/models/local.py +1 -1
  114. adapta/storage/models/trino.py +56 -0
  115. adapta/storage/query_enabled_store/README.md +1 -1
  116. adapta/storage/query_enabled_store/__init__.py +7 -1
  117. adapta/storage/query_enabled_store/_models.py +42 -13
  118. adapta/storage/query_enabled_store/_qes_astra.py +27 -14
  119. adapta/storage/query_enabled_store/_qes_delta.py +32 -10
  120. adapta/storage/query_enabled_store/_qes_local.py +81 -0
  121. adapta/storage/query_enabled_store/_qes_trino.py +133 -0
  122. adapta/storage/secrets/__init__.py +1 -1
  123. adapta/storage/secrets/_base.py +5 -4
  124. adapta/storage/secrets/azure_secret_client.py +3 -4
  125. adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +5 -5
  126. adapta/utils/README.md +92 -0
  127. adapta/utils/__init__.py +2 -1
  128. adapta/utils/_common.py +50 -17
  129. adapta/utils/_requests.py +53 -0
  130. adapta/utils/concurrent_task_runner.py +10 -9
  131. adapta/utils/data_structures/_functions.py +6 -6
  132. adapta/utils/decorators/_logging.py +3 -3
  133. adapta/utils/decorators/_rate_limit.py +2 -2
  134. adapta/utils/metaframe.py +172 -0
  135. adapta/utils/python_typing/_functions.py +5 -10
  136. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/METADATA +18 -14
  137. adapta-3.5.13.dist-info/RECORD +146 -0
  138. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/WHEEL +1 -1
  139. adapta-2.11.9.dist-info/RECORD +0 -110
  140. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info/licenses}/LICENSE +0 -0
@@ -2,7 +2,7 @@
2
2
  Query Enabled Store Connection interface.
3
3
  """
4
4
 
5
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License");
8
8
  # you may not use this file except in compliance with the License.
@@ -21,12 +21,13 @@ import re
21
21
  from abc import ABC, abstractmethod
22
22
  from enum import Enum
23
23
  from pydoc import locate
24
- from typing import TypeVar, Generic, Type, Iterator, Union, final, Optional
25
-
26
- from pandas import DataFrame
24
+ from typing import TypeVar, Generic, final
25
+ from collections.abc import Iterator
27
26
 
28
27
  from adapta.storage.models.base import DataPath
29
28
  from adapta.storage.models.filter_expression import Expression
29
+ from adapta.storage.models.enum import QueryEnabledStoreOptions
30
+ from adapta.utils.metaframe import MetaFrame
30
31
 
31
32
  TCredential = TypeVar("TCredential") # pylint: disable=C0103
32
33
  TSettings = TypeVar("TSettings") # pylint: disable=C0103
@@ -42,6 +43,8 @@ class BundledQueryEnabledStores(Enum):
42
43
 
43
44
  DELTA = "adapta.storage.query_enabled_store.DeltaQueryEnabledStore"
44
45
  ASTRA = "adapta.storage.query_enabled_store.AstraQueryEnabledStore"
46
+ LOCAL = "adapta.storage.query_enabled_store.LocalQueryEnabledStore"
47
+ TRINO = "adapta.storage.query_enabled_store.TrinoQueryEnabledStore"
45
48
 
46
49
 
47
50
  BUNDLED_STORES = {store.name: store.value for store in BundledQueryEnabledStores}
@@ -84,16 +87,21 @@ class QueryEnabledStore(Generic[TCredential, TSettings], ABC):
84
87
 
85
88
  @abstractmethod
86
89
  def _apply_filter(
87
- self, path: DataPath, filter_expression: Expression, columns: list[str]
88
- ) -> Union[DataFrame, Iterator[DataFrame]]:
90
+ self,
91
+ path: DataPath,
92
+ filter_expression: Expression,
93
+ columns: list[str],
94
+ options: dict[QueryEnabledStoreOptions, any] | None = None,
95
+ limit: int | None = None,
96
+ ) -> MetaFrame | Iterator[MetaFrame]:
89
97
  """
90
- Applies the provided filter expression to this Store and returns the result in a pandas DataFrame
98
+ Applies the provided filter expression to this Store and returns the result in a MetaFrame
91
99
  """
92
100
 
93
101
  @abstractmethod
94
- def _apply_query(self, query: str) -> Union[DataFrame, Iterator[DataFrame]]:
102
+ def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
95
103
  """
96
- Applies a plaintext query to this Store and returns the result in a pandas DataFrame
104
+ Applies a plaintext query to this Store and returns the result in a MetaFrame
97
105
  """
98
106
 
99
107
  @classmethod
@@ -117,7 +125,7 @@ class QueryEnabledStore(Generic[TCredential, TSettings], ABC):
117
125
  :param: lazy_init: Whether to set this instance QES for querying eagerly or lazily.
118
126
  """
119
127
 
120
- def get_qes_class(name: str) -> Type[QueryEnabledStore[TCredential, TSettings]]:
128
+ def get_qes_class(name: str) -> type[QueryEnabledStore[TCredential, TSettings]]:
121
129
  return locate(BUNDLED_STORES.get(name, name))
122
130
 
123
131
  class_name, _, _ = re.findall(re.compile(CONNECTION_STRING_REGEX), connection_string)[0]
@@ -138,8 +146,10 @@ class QueryConfigurationBuilder:
138
146
  def __init__(self, store: QueryEnabledStore, path: DataPath):
139
147
  self._store = store
140
148
  self._path = path
141
- self._filter_expression: Optional[Expression] = None
149
+ self._filter_expression: Expression | None = None
142
150
  self._columns: list[str] = []
151
+ self._options: dict[QueryEnabledStoreOptions, any] = {}
152
+ self._limit = None
143
153
 
144
154
  def filter(self, filter_expression: Expression) -> "QueryConfigurationBuilder":
145
155
  """
@@ -157,10 +167,29 @@ class QueryConfigurationBuilder:
157
167
  self._columns = list(columns)
158
168
  return self
159
169
 
160
- def read(self) -> Union[DataFrame, Iterator[DataFrame]]:
170
+ def add_options(self, option_key: QueryEnabledStoreOptions, option_value: any) -> "QueryConfigurationBuilder":
171
+ """
172
+ Use the provided options when querying the underlying storage.
173
+ """
174
+
175
+ self._options[option_key] = option_value
176
+ return self
177
+
178
+ def limit(self, limit: int | None) -> "QueryConfigurationBuilder":
179
+ """
180
+ Limit the number of results returned by the underlying store.
181
+ """
182
+ self._limit = limit
183
+ return self
184
+
185
+ def read(self) -> MetaFrame | Iterator[MetaFrame]:
161
186
  """
162
187
  Execute the query on the underlying store.
163
188
  """
164
189
  return self._store._apply_filter(
165
- path=self._path, filter_expression=self._filter_expression, columns=self._columns
190
+ path=self._path,
191
+ filter_expression=self._filter_expression,
192
+ columns=self._columns,
193
+ options=self._options,
194
+ limit=self._limit,
166
195
  )
@@ -4,19 +4,23 @@
4
4
  import os
5
5
  import re
6
6
  from dataclasses import dataclass
7
- from typing import final, Optional, Union, Iterator
8
-
9
- from pandas import DataFrame
7
+ from typing import final
8
+ from collections.abc import Iterator
10
9
 
11
10
  from dataclasses_json import DataClassJsonMixin
12
11
 
13
12
  from adapta._version import __version__
14
- from adapta.storage.distributed_object_store.datastax_astra.astra_client import AstraClient
13
+ from adapta.storage.distributed_object_store.v3.datastax_astra import AstraClient
15
14
  from adapta.storage.models.astra import AstraPath
16
15
  from adapta.storage.models.base import DataPath
17
16
  from adapta.storage.models.filter_expression import Expression
18
17
 
19
- from adapta.storage.query_enabled_store._models import QueryEnabledStore, CONNECTION_STRING_REGEX
18
+ from adapta.storage.query_enabled_store._models import (
19
+ QueryEnabledStore,
20
+ CONNECTION_STRING_REGEX,
21
+ )
22
+ from adapta.storage.models.enum import QueryEnabledStoreOptions
23
+ from adapta.utils.metaframe import MetaFrame
20
24
 
21
25
 
22
26
  @dataclass
@@ -25,9 +29,9 @@ class AstraCredential(DataClassJsonMixin):
25
29
  Astra DB credential helper for QES.
26
30
  """
27
31
 
28
- secret_connection_bundle_bytes: Optional[str] = None
29
- client_id: Optional[str] = None
30
- client_secret: Optional[str] = None
32
+ secret_connection_bundle_bytes: str | None = None
33
+ client_id: str | None = None
34
+ client_secret: str | None = None
31
35
 
32
36
  def __post_init__(self):
33
37
  self.secret_connection_bundle_bytes = self.secret_connection_bundle_bytes or os.getenv(
@@ -36,7 +40,7 @@ class AstraCredential(DataClassJsonMixin):
36
40
  self.client_id = self.client_id or os.getenv("PROTEUS__ASTRA_CLIENT_ID")
37
41
  self.client_secret = self.client_secret or os.getenv("PROTEUS__ASTRA_CLIENT_SECRET")
38
42
 
39
- if not all([self.secret_connection_bundle_bytes, self.client_secret, self.client_secret]):
43
+ if not all([self.secret_connection_bundle_bytes, self.client_id, self.client_secret]):
40
44
  raise RuntimeError(
41
45
  "Authentication information provided is insufficient. Please verify you are supplying bundle bytes, client id and secret either via connection string or via environment variables."
42
46
  )
@@ -48,8 +52,8 @@ class AstraSettings(DataClassJsonMixin):
48
52
  Astra DB connection settings for QES.
49
53
  """
50
54
 
51
- client_name: Optional[str] = None
52
- keyspace: Optional[str] = None
55
+ client_name: str | None = None
56
+ keyspace: str | None = None
53
57
 
54
58
  def __post_init__(self):
55
59
  self.client_name = self.client_name or f"Adapta Client {__version__}"
@@ -79,8 +83,13 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
79
83
  self._astra_client.connect()
80
84
 
81
85
  def _apply_filter(
82
- self, path: DataPath, filter_expression: Expression, columns: list[str]
83
- ) -> Union[DataFrame, Iterator[DataFrame]]:
86
+ self,
87
+ path: DataPath,
88
+ filter_expression: Expression,
89
+ columns: list[str],
90
+ options: dict[QueryEnabledStoreOptions, any] | None = None,
91
+ limit: int | None = 10000,
92
+ ) -> MetaFrame | Iterator[MetaFrame]:
84
93
  assert isinstance(path, AstraPath)
85
94
  astra_path: AstraPath = path
86
95
  if self._lazy:
@@ -92,6 +101,8 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
92
101
  table_name=astra_path.table,
93
102
  select_columns=columns,
94
103
  num_threads=-1, # auto-infer, see method documentation
104
+ options=options,
105
+ limit=limit,
95
106
  )
96
107
 
97
108
  return self._astra_client.filter_entities(
@@ -101,9 +112,11 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
101
112
  table_name=astra_path.table,
102
113
  select_columns=columns,
103
114
  num_threads=-1, # auto-infer, see method documentation
115
+ options=options,
116
+ limit=limit,
104
117
  )
105
118
 
106
- def _apply_query(self, query: str) -> Union[DataFrame, Iterator[DataFrame]]:
119
+ def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
107
120
  if self._lazy:
108
121
  with self._astra_client as astra_client:
109
122
  return astra_client.get_entities_raw(query)
@@ -4,15 +4,21 @@
4
4
  import re
5
5
  from dataclasses import dataclass
6
6
  from pydoc import locate
7
- from typing import final, Union, Iterator
7
+ from typing import final
8
+ from collections.abc import Iterator
8
9
 
9
- from pandas import DataFrame
10
10
  from dataclasses_json import DataClassJsonMixin
11
11
 
12
- from adapta.storage.delta_lake import load
12
+ from adapta.security.clients import AuthenticationClient
13
+ from adapta.storage.delta_lake.v3 import load
13
14
  from adapta.storage.models.base import DataPath
14
15
  from adapta.storage.models.filter_expression import Expression
15
- from adapta.storage.query_enabled_store._models import QueryEnabledStore, CONNECTION_STRING_REGEX
16
+ from adapta.storage.query_enabled_store._models import (
17
+ QueryEnabledStore,
18
+ CONNECTION_STRING_REGEX,
19
+ )
20
+ from adapta.storage.models.enum import QueryEnabledStoreOptions
21
+ from adapta.utils.metaframe import MetaFrame
16
22
 
17
23
 
18
24
  @dataclass
@@ -22,16 +28,25 @@ class DeltaCredential(DataClassJsonMixin):
22
28
  """
23
29
 
24
30
  auth_client_class: str
31
+ auth_client_credentials_class: str | None = None
32
+
33
+ auth_client: AuthenticationClient | None = None
34
+ auth_client_credentials: type | None = None
25
35
 
26
36
  def __post_init__(self):
27
37
  if not self.auth_client_class:
28
38
  raise ValueError("Authentication plugin class name not provided but is required")
29
39
 
30
- if locate(self.auth_client_class) is None:
40
+ self.auth_client = locate(self.auth_client_class)
41
+
42
+ if self.auth_client is None:
31
43
  raise ModuleNotFoundError(
32
44
  "Authentication plugin class name cannot be loaded. Please check the spelling and make sure your application can resolve the import"
33
45
  )
34
46
 
47
+ if self.auth_client_credentials_class:
48
+ self.auth_client_credentials = locate(self.auth_client_credentials_class)
49
+
35
50
 
36
51
  @dataclass
37
52
  class DeltaSettings(DataClassJsonMixin):
@@ -57,14 +72,21 @@ class DeltaQueryEnabledStore(QueryEnabledStore[DeltaCredential, DeltaSettings]):
57
72
  return cls(credentials=DeltaCredential.from_json(credentials), settings=DeltaSettings.from_json(settings))
58
73
 
59
74
  def _apply_filter(
60
- self, path: DataPath, filter_expression: Expression, columns: list[str]
61
- ) -> Union[DataFrame, Iterator[DataFrame]]:
75
+ self,
76
+ path: DataPath,
77
+ filter_expression: Expression,
78
+ columns: list[str],
79
+ options: dict[QueryEnabledStoreOptions, any] | None = None,
80
+ limit: int | None = None,
81
+ ) -> MetaFrame | Iterator[MetaFrame]:
62
82
  return load(
63
- auth_client=locate(self.credentials.auth_client_class)(),
83
+ auth_client=self.credentials.auth_client(credentials=self.credentials.auth_client_credentials()),
64
84
  path=path,
65
85
  row_filter=filter_expression,
66
- columns=columns,
86
+ columns=columns if columns else None,
87
+ limit=limit,
88
+ timeout=options.get(QueryEnabledStoreOptions.TIMEOUT, None),
67
89
  )
68
90
 
69
- def _apply_query(self, query: str) -> Union[DataFrame, Iterator[DataFrame]]:
91
+ def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
70
92
  raise NotImplementedError("Text queries are not supported by Delta QES")
@@ -0,0 +1,81 @@
1
+ """Local Query Enabled Store (QES) for reading local files."""
2
+ import re
3
+ from dataclasses import dataclass
4
+ from typing import final
5
+ from collections.abc import Iterator
6
+
7
+ from dataclasses_json import DataClassJsonMixin
8
+ from pyarrow.parquet import read_table
9
+
10
+ from adapta.storage.models import DataPath
11
+ from adapta.storage.models.filter_expression import Expression, compile_expression, ArrowFilterExpression
12
+ from adapta.storage.query_enabled_store._models import (
13
+ QueryEnabledStore,
14
+ CONNECTION_STRING_REGEX,
15
+ )
16
+ from adapta.storage.models.enum import QueryEnabledStoreOptions
17
+ from adapta.utils.metaframe import MetaFrame
18
+
19
+
20
+ @dataclass
21
+ class LocalCredential(DataClassJsonMixin):
22
+ """
23
+ Local credential helper for QES.
24
+ No authentication is required for local files.
25
+ """
26
+
27
+
28
+ @dataclass
29
+ class LocalSettings(DataClassJsonMixin):
30
+ """
31
+ Settings for local QES
32
+ """
33
+
34
+
35
+ @final
36
+ class LocalQueryEnabledStore(QueryEnabledStore[LocalCredential, LocalSettings]):
37
+ """
38
+ QES Client for local file reads (e.g., Parquet) using PyArrow.
39
+ """
40
+
41
+ def close(self) -> None:
42
+ pass
43
+
44
+ @classmethod
45
+ def _from_connection_string(
46
+ cls, connection_string: str, lazy_init: bool = False
47
+ ) -> "QueryEnabledStore[LocalCredential, LocalSettings]":
48
+ """
49
+ Parses a connection string for local files.
50
+ """
51
+ _, credentials, settings = re.findall(re.compile(CONNECTION_STRING_REGEX), connection_string)[0]
52
+ return cls(credentials=LocalCredential.from_json(credentials), settings=LocalSettings.from_json(settings))
53
+
54
+ def _apply_filter(
55
+ self,
56
+ path: DataPath,
57
+ filter_expression: Expression,
58
+ columns: list[str],
59
+ options: dict[QueryEnabledStoreOptions, any] | None = None,
60
+ limit: int = None,
61
+ ) -> MetaFrame | Iterator[MetaFrame]:
62
+ """
63
+ Applies a filter to a local file
64
+ """
65
+ row_filter = compile_expression(filter_expression, ArrowFilterExpression) if filter_expression else None
66
+
67
+ pyarrow_table = read_table(
68
+ path.path,
69
+ columns=columns if columns else None,
70
+ filters=row_filter,
71
+ )
72
+
73
+ return MetaFrame.from_arrow(
74
+ data=pyarrow_table,
75
+ )
76
+
77
+ def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
78
+ """
79
+ Local QES does not natively support SQL-like queries.
80
+ """
81
+ raise NotImplementedError("Text queries are currently not supported by Local QES")
@@ -0,0 +1,133 @@
1
+ """
2
+ QES implementations for Trino.
3
+ """
4
+ import os
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import final
8
+ from collections.abc import Iterator
9
+
10
+ from dataclasses_json import DataClassJsonMixin
11
+
12
+ from adapta.storage.database.v3.trino_sql import TrinoClient
13
+ from adapta.storage.models import TrinoPath
14
+ from adapta.storage.models.filter_expression import (
15
+ Expression,
16
+ compile_expression,
17
+ TrinoFilterExpression,
18
+ )
19
+ from adapta.storage.query_enabled_store._models import (
20
+ QueryEnabledStore,
21
+ CONNECTION_STRING_REGEX,
22
+ )
23
+ from adapta.storage.models.enum import QueryEnabledStoreOptions
24
+ from adapta.utils.metaframe import MetaFrame, concat
25
+
26
+
27
+ @dataclass
28
+ class TrinoCredential(DataClassJsonMixin):
29
+ """
30
+ Trino credential helper for QES.
31
+
32
+ Trino credentials can either be provided via the oauth2_username or via the following environment variables, which
33
+ is handled inside the TrinoClient:
34
+ - (ADAPTA__TRINO_USERNAME, ADAPTA__TRINO_PASSWORD)
35
+
36
+ Currently, we don't support the credentials_provider option of the TrinoClient.
37
+ """
38
+
39
+ oauth2_username: str | None = None
40
+
41
+ def __post_init__(self):
42
+ self.oauth2_username = self.oauth2_username or os.getenv("ADAPTA__TRINO_OAUTH2_USERNAME")
43
+
44
+
45
+ @dataclass
46
+ class TrinoSettings(DataClassJsonMixin):
47
+ """
48
+ Trino connection settings for QES.
49
+ """
50
+
51
+ host: str | None = None
52
+ port: int | None = None
53
+
54
+ def __post_init__(self):
55
+ self.host = self.host or os.getenv("PROTEUS__TRINO_HOST")
56
+ if not self.host:
57
+ raise RuntimeError(
58
+ "Trino host not provided. Please provide it via connection string or via environment variable PROTEUS__TRINO_HOST."
59
+ )
60
+ self.port = self.port or int(os.getenv("PROTEUS__TRINO_PORT", "443"))
61
+
62
+
63
+ @final
64
+ class TrinoQueryEnabledStore(QueryEnabledStore[TrinoCredential, TrinoSettings]):
65
+ """
66
+ QES Client for Trino queries.
67
+ """
68
+
69
+ def close(self) -> None:
70
+ pass
71
+
72
+ def __init__(self, credentials: TrinoCredential, settings: TrinoSettings):
73
+ super().__init__(credentials, settings)
74
+ self._trino_client = TrinoClient(
75
+ host=self.settings.host,
76
+ port=self.settings.port,
77
+ oauth2_username=self.credentials.oauth2_username,
78
+ )
79
+
80
+ def _apply_filter(
81
+ self,
82
+ path: TrinoPath,
83
+ filter_expression: Expression,
84
+ columns: list[str],
85
+ options: dict[QueryEnabledStoreOptions, any] | None = None,
86
+ limit: int | None = None,
87
+ ) -> MetaFrame | Iterator[MetaFrame]:
88
+ query = self._build_query(query=path.query, filter_expression=filter_expression, columns=columns, limit=limit)
89
+
90
+ with self._trino_client as trino_client:
91
+ if QueryEnabledStoreOptions.BATCH_SIZE in options:
92
+ data = concat(
93
+ trino_client.query(
94
+ query=query,
95
+ batch_size=options[QueryEnabledStoreOptions.BATCH_SIZE],
96
+ )
97
+ )
98
+ else:
99
+ data = concat(trino_client.query(query=query))
100
+
101
+ return data
102
+
103
+ def _apply_query(self, query: str) -> MetaFrame | Iterator[MetaFrame]:
104
+ raise NotImplementedError("Text queries are not supported by Trino QES")
105
+
106
+ @classmethod
107
+ def _from_connection_string(
108
+ cls, connection_string: str, lazy_init: bool = False
109
+ ) -> "QueryEnabledStore[TrinoCredential, TrinoSettings]":
110
+ _, credentials, settings = re.findall(re.compile(CONNECTION_STRING_REGEX), connection_string)[0]
111
+ return cls(
112
+ credentials=TrinoCredential.from_json(credentials),
113
+ settings=TrinoSettings.from_json(settings),
114
+ )
115
+
116
+ @staticmethod
117
+ def _build_query(query: str, filter_expression: Expression, columns: list[str], limit: int | None) -> str:
118
+ """
119
+ Build the final query by applying the filter expression, selected columns, and limit to the base query.
120
+ """
121
+
122
+ if filter_expression or columns or limit:
123
+ columns_to_select = ", ".join(columns) if columns else "*"
124
+ query = f"SELECT {columns_to_select} FROM ({query})"
125
+
126
+ if filter_expression:
127
+ compiled_expression = compile_expression(expression=filter_expression, target=TrinoFilterExpression)
128
+ query = f"{query} WHERE {compiled_expression}"
129
+
130
+ if limit:
131
+ query = f"{query} LIMIT {limit}"
132
+
133
+ return query
@@ -2,7 +2,7 @@
2
2
  Import index.
3
3
  """
4
4
 
5
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License");
8
8
  # you may not use this file except in compliance with the License.
@@ -2,7 +2,7 @@
2
2
  Abstraction for secret storage operations.
3
3
  """
4
4
 
5
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License");
8
8
  # you may not use this file except in compliance with the License.
@@ -18,7 +18,8 @@
18
18
  #
19
19
 
20
20
  from abc import ABC, abstractmethod
21
- from typing import Union, Dict, Iterable, Any
21
+ from typing import Any
22
+ from collections.abc import Iterable
22
23
 
23
24
  from adapta.security.clients import AuthenticationClient
24
25
 
@@ -32,7 +33,7 @@ class SecretStorageClient(ABC):
32
33
  self._base_client = base_client
33
34
 
34
35
  @abstractmethod
35
- def read_secret(self, storage_name: str, secret_name: str) -> Union[bytes, str, Dict[str, str]]:
36
+ def read_secret(self, storage_name: str, secret_name: str) -> bytes | str | dict[str, str]:
36
37
  """
37
38
  Reads a secret from the specified storage.
38
39
 
@@ -46,7 +47,7 @@ class SecretStorageClient(ABC):
46
47
  self,
47
48
  storage_name: str,
48
49
  secret_name: str,
49
- secret_value: Union[str, Dict[str, str]],
50
+ secret_value: str | dict[str, str],
50
51
  b64_encode=False,
51
52
  ) -> None:
52
53
  """
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Azure Secret Storage Client (KeyVault).
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
17
17
  #
18
18
 
19
19
  import base64
20
- from typing import Union, Dict
21
20
 
22
21
  from azure.keyvault.secrets import SecretClient
23
22
 
@@ -43,14 +42,14 @@ class AzureSecretStorageClient(SecretStorageClient):
43
42
  kv_uri = f"https://{keyvault}.vault.azure.net"
44
43
  return SecretClient(kv_uri, self._base_client.get_credentials())
45
44
 
46
- def read_secret(self, storage_name: str, secret_name: str) -> Union[bytes, str, Dict[str, str]]:
45
+ def read_secret(self, storage_name: str, secret_name: str) -> bytes | str | dict[str, str]:
47
46
  return self._get_keyvault(storage_name).get_secret(secret_name).value
48
47
 
49
48
  def create_secret(
50
49
  self,
51
50
  storage_name: str,
52
51
  secret_name: str,
53
- secret_value: Union[str, Dict[str, str]],
52
+ secret_value: str | dict[str, str],
54
53
  b64_encode=False,
55
54
  ) -> None:
56
55
  if not isinstance(secret_value, str):
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Hashicorp Vault Secret storage client
3
3
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
  #
18
18
 
19
- from typing import Union, Dict, Iterable
19
+ from collections.abc import Iterable
20
20
 
21
21
  import hvac
22
22
 
@@ -41,7 +41,7 @@ class HashicorpSecretStorageClient(SecretStorageClient):
41
41
  self.client = hvac.Client(self._base_client.vault_address, self._access_token)
42
42
  self._role = role
43
43
 
44
- def read_secret(self, storage_name: str, secret_name: str) -> Union[bytes, str, Dict[str, str]]:
44
+ def read_secret(self, storage_name: str, secret_name: str) -> bytes | str | dict[str, str]:
45
45
  secret = self.client.secrets.kv.v2.read_secret_version(path=secret_name)
46
46
  return secret["data"]["data"]
47
47
 
@@ -49,10 +49,10 @@ class HashicorpSecretStorageClient(SecretStorageClient):
49
49
  self,
50
50
  storage_name: str,
51
51
  secret_name: str,
52
- secret_value: Union[str, Dict[str, str]],
52
+ secret_value: str | dict[str, str],
53
53
  b64_encode=False,
54
54
  ) -> None:
55
- if not isinstance(secret_value, Dict):
55
+ if not isinstance(secret_value, dict):
56
56
  raise ValueError(
57
57
  f"Only Dict secret type supported in HashicorpSecretStorageClient but was: {type(secret_value)}"
58
58
  )