adapta 2.11.9__py3-none-any.whl → 3.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapta/__init__.py +1 -1
- adapta/_version.py +1 -1
- adapta/connectors/__init__.py +1 -1
- adapta/connectors/service_bus/__init__.py +1 -1
- adapta/connectors/service_bus/_connector.py +2 -3
- adapta/logs/__init__.py +1 -1
- adapta/logs/_async_logger.py +38 -24
- adapta/logs/_base.py +21 -21
- adapta/logs/_internal.py +6 -7
- adapta/logs/_internal_logger.py +113 -41
- adapta/logs/_logger_interface.py +9 -10
- adapta/logs/handlers/__init__.py +1 -1
- adapta/logs/handlers/datadog_api_handler.py +7 -7
- adapta/logs/handlers/safe_stream_handler.py +4 -4
- adapta/logs/models/__init__.py +1 -1
- adapta/logs/models/_log_level.py +1 -1
- adapta/logs/models/_logs_metadata.py +4 -5
- adapta/metrics/__init__.py +1 -1
- adapta/metrics/_base.py +14 -15
- adapta/metrics/providers/__init__.py +1 -1
- adapta/metrics/providers/datadog_provider.py +21 -22
- adapta/metrics/providers/void_provider.py +34 -0
- adapta/ml/__init__.py +1 -1
- adapta/ml/_model.py +1 -1
- adapta/ml/mlflow/__init__.py +1 -1
- adapta/ml/mlflow/_client.py +101 -5
- adapta/ml/mlflow/_functions.py +44 -13
- adapta/process_communication/__init__.py +1 -1
- adapta/process_communication/_models.py +8 -6
- adapta/schema_management/README.md +0 -1
- adapta/schema_management/__init__.py +1 -1
- adapta/schema_management/schema_entity.py +3 -3
- adapta/security/__init__.py +1 -1
- adapta/security/clients/__init__.py +1 -1
- adapta/security/clients/_azure_client.py +14 -12
- adapta/security/clients/_base.py +11 -6
- adapta/security/clients/_local_client.py +6 -6
- adapta/security/clients/aws/__init__.py +1 -1
- adapta/security/clients/aws/_aws_client.py +12 -10
- adapta/security/clients/aws/_aws_credentials.py +7 -8
- adapta/security/clients/hashicorp_vault/__init__.py +1 -1
- adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +7 -6
- adapta/security/clients/hashicorp_vault/kubernetes_client.py +2 -2
- adapta/security/clients/hashicorp_vault/oidc_client.py +2 -2
- adapta/security/clients/hashicorp_vault/token_client.py +2 -2
- adapta/storage/__init__.py +1 -1
- adapta/storage/blob/README.md +14 -10
- adapta/storage/blob/__init__.py +1 -1
- adapta/storage/blob/azure_storage_client.py +76 -24
- adapta/storage/blob/base.py +15 -13
- adapta/storage/blob/local_storage_client.py +28 -16
- adapta/storage/blob/s3_storage_client.py +19 -24
- adapta/storage/cache/__init__.py +1 -1
- adapta/storage/cache/_base.py +5 -5
- adapta/storage/cache/redis_cache.py +5 -5
- adapta/storage/database/__init__.py +4 -1
- adapta/storage/database/{README.md → v2/README.md} +2 -0
- adapta/storage/database/v2/__init__.py +17 -0
- adapta/storage/database/v2/azure_sql.py +143 -0
- adapta/storage/{distributed_object_store/datastax_astra → database/v2/models}/__init__.py +5 -5
- adapta/storage/database/v2/models/_models.py +53 -0
- adapta/storage/database/{odbc.py → v2/odbc.py} +22 -13
- adapta/storage/database/{snowflake_sql.py → v2/snowflake_sql.py} +20 -12
- adapta/storage/database/{trino_sql.py → v2/trino_sql.py} +15 -6
- adapta/storage/database/v3/README.md +109 -0
- adapta/storage/database/v3/__init__.py +14 -0
- adapta/storage/database/{azure_sql.py → v3/azure_sql.py} +7 -9
- adapta/storage/database/v3/models/__init__.py +19 -0
- adapta/storage/database/{models → v3/models}/_models.py +2 -3
- adapta/storage/database/v3/odbc.py +217 -0
- adapta/storage/database/v3/snowflake_sql.py +241 -0
- adapta/storage/database/v3/trino_sql.py +154 -0
- adapta/storage/delta_lake/__init__.py +2 -3
- adapta/storage/delta_lake/{README.md → v2/README.md} +2 -0
- adapta/storage/delta_lake/v2/__init__.py +19 -0
- adapta/storage/delta_lake/{_functions.py → v2/_functions.py} +43 -27
- adapta/storage/delta_lake/v2/_models.py +72 -0
- adapta/storage/delta_lake/v3/README.md +147 -0
- adapta/storage/delta_lake/v3/__init__.py +20 -0
- adapta/storage/delta_lake/v3/_functions.py +315 -0
- adapta/storage/delta_lake/{_models.py → v3/_models.py} +4 -5
- adapta/storage/distributed_object_store/__init__.py +3 -1
- adapta/storage/distributed_object_store/v2/__init__.py +18 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/README.md +2 -0
- adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +20 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/_models.py +16 -0
- adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/astra_client.py +61 -52
- adapta/storage/{database/models → distributed_object_store/v3}/__init__.py +4 -5
- adapta/storage/distributed_object_store/v3/datastax_astra/README.md +277 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +20 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +469 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +134 -0
- adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +569 -0
- adapta/storage/exceptions.py +1 -1
- adapta/storage/models/__init__.py +1 -1
- adapta/storage/models/_functions.py +5 -5
- adapta/storage/models/astra.py +4 -4
- adapta/storage/models/aws.py +1 -1
- adapta/storage/models/azure.py +2 -3
- adapta/storage/models/base.py +9 -1
- adapta/storage/models/enum.py +19 -0
- adapta/storage/models/filter_expression.py +124 -10
- adapta/storage/models/format.py +16 -205
- adapta/storage/models/formatters/__init__.py +36 -0
- adapta/storage/models/formatters/dict.py +43 -0
- adapta/storage/models/formatters/exceptions.py +7 -0
- adapta/storage/models/formatters/metaframe.py +48 -0
- adapta/storage/models/formatters/pandas.py +139 -0
- adapta/storage/models/formatters/pickle.py +36 -0
- adapta/storage/models/formatters/polars.py +240 -0
- adapta/storage/models/formatters/unit.py +26 -0
- adapta/storage/models/hive.py +24 -16
- adapta/storage/models/local.py +1 -1
- adapta/storage/models/trino.py +56 -0
- adapta/storage/query_enabled_store/README.md +1 -1
- adapta/storage/query_enabled_store/__init__.py +7 -1
- adapta/storage/query_enabled_store/_models.py +42 -13
- adapta/storage/query_enabled_store/_qes_astra.py +27 -14
- adapta/storage/query_enabled_store/_qes_delta.py +32 -10
- adapta/storage/query_enabled_store/_qes_local.py +81 -0
- adapta/storage/query_enabled_store/_qes_trino.py +133 -0
- adapta/storage/secrets/__init__.py +1 -1
- adapta/storage/secrets/_base.py +5 -4
- adapta/storage/secrets/azure_secret_client.py +3 -4
- adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +5 -5
- adapta/utils/README.md +92 -0
- adapta/utils/__init__.py +2 -1
- adapta/utils/_common.py +50 -17
- adapta/utils/_requests.py +53 -0
- adapta/utils/concurrent_task_runner.py +10 -9
- adapta/utils/data_structures/_functions.py +6 -6
- adapta/utils/decorators/_logging.py +3 -3
- adapta/utils/decorators/_rate_limit.py +2 -2
- adapta/utils/metaframe.py +172 -0
- adapta/utils/python_typing/_functions.py +5 -10
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/METADATA +18 -14
- adapta-3.5.13.dist-info/RECORD +146 -0
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/WHEEL +1 -1
- adapta-2.11.9.dist-info/RECORD +0 -110
- {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database client that uses an ODBC driver.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
|
+
#
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
#
|
|
19
|
+
|
|
20
|
+
from abc import ABC
|
|
21
|
+
from typing import Optional
|
|
22
|
+
from collections.abc import Iterator
|
|
23
|
+
|
|
24
|
+
from pandas import read_sql
|
|
25
|
+
import sqlalchemy
|
|
26
|
+
from sqlalchemy import text
|
|
27
|
+
from sqlalchemy.connectors import pyodbc
|
|
28
|
+
from sqlalchemy.engine import URL
|
|
29
|
+
from sqlalchemy.exc import SQLAlchemyError, OperationalError
|
|
30
|
+
|
|
31
|
+
from adapta.logs import SemanticLogger
|
|
32
|
+
from adapta.storage.database.v3.models import DatabaseType, SqlAlchemyDialect
|
|
33
|
+
from adapta.utils.metaframe import MetaFrame
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class OdbcClient(ABC):
|
|
37
|
+
"""
|
|
38
|
+
Generic ODBC database client that relies on SqlAlchemy API.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
logger: SemanticLogger,
|
|
44
|
+
database_type: DatabaseType,
|
|
45
|
+
host_name: str | None = None,
|
|
46
|
+
user_name: str | None = None,
|
|
47
|
+
database: str | None = None,
|
|
48
|
+
password: str | None = None,
|
|
49
|
+
port: int | None = None,
|
|
50
|
+
):
|
|
51
|
+
"""
|
|
52
|
+
Creates an instance of an OdbcClient
|
|
53
|
+
|
|
54
|
+
:param logger: Logger instance for database operations.
|
|
55
|
+
:param database_type: Type of database to connect to.
|
|
56
|
+
:param host_name: Host name.
|
|
57
|
+
:param user_name: SQL user name.
|
|
58
|
+
:param database: Optional database name to connect to.
|
|
59
|
+
:param password: SQL user password.
|
|
60
|
+
:param port: Connection port.
|
|
61
|
+
"""
|
|
62
|
+
self._db_type = database_type
|
|
63
|
+
self._dialect: SqlAlchemyDialect = database_type.value
|
|
64
|
+
self._host = host_name
|
|
65
|
+
self._database = database
|
|
66
|
+
self._user = user_name
|
|
67
|
+
self._password = password
|
|
68
|
+
self._port = port
|
|
69
|
+
self._logger = logger
|
|
70
|
+
self._engine = None
|
|
71
|
+
self._connection = None
|
|
72
|
+
pyodbc.pooling = False
|
|
73
|
+
|
|
74
|
+
def __enter__(self) -> Optional["OdbcClient"]:
|
|
75
|
+
connection_url: sqlalchemy.engine.URL = URL.create(
|
|
76
|
+
drivername=self._dialect.dialect,
|
|
77
|
+
host=self._host,
|
|
78
|
+
database=self._database,
|
|
79
|
+
username=self._user,
|
|
80
|
+
password=self._password,
|
|
81
|
+
port=self._port,
|
|
82
|
+
query=self._dialect.driver,
|
|
83
|
+
)
|
|
84
|
+
self._logger.info(
|
|
85
|
+
"Connecting to {host}:{port} using dialect {dialect} and driver {driver}",
|
|
86
|
+
host=self._host,
|
|
87
|
+
port=self._port,
|
|
88
|
+
dialect=self._dialect.dialect,
|
|
89
|
+
driver=self._dialect.driver,
|
|
90
|
+
)
|
|
91
|
+
try:
|
|
92
|
+
self._engine: sqlalchemy.engine.Engine = sqlalchemy.create_engine(connection_url, pool_pre_ping=True)
|
|
93
|
+
self._connection: sqlalchemy.engine.Connection = self._engine.connect()
|
|
94
|
+
return self
|
|
95
|
+
except SQLAlchemyError as ex:
|
|
96
|
+
self._logger.error(
|
|
97
|
+
"Error connecting to {host}:{port} using dialect {dialect} and driver {driver}",
|
|
98
|
+
host=self._host,
|
|
99
|
+
port=self._port,
|
|
100
|
+
dialect=self._dialect.dialect,
|
|
101
|
+
driver=self._dialect.driver,
|
|
102
|
+
exception=ex,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
108
|
+
self._connection.close()
|
|
109
|
+
self._engine.dispose()
|
|
110
|
+
|
|
111
|
+
def fork(self) -> "OdbcClient":
|
|
112
|
+
"""
|
|
113
|
+
Copies this client in order to create a new connection, while keeping the other one open (fork).
|
|
114
|
+
"""
|
|
115
|
+
return OdbcClient(
|
|
116
|
+
logger=self._logger,
|
|
117
|
+
database_type=self._db_type,
|
|
118
|
+
host_name=self._host,
|
|
119
|
+
user_name=self._user,
|
|
120
|
+
password=self._password,
|
|
121
|
+
port=self._port,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _get_connection(self) -> sqlalchemy.engine.Connection | None:
|
|
125
|
+
if self._connection is None:
|
|
126
|
+
self._logger.info("No connection is active. Please create one using with OdbcClient(..) as client: ...")
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
return self._connection
|
|
130
|
+
|
|
131
|
+
def query(self, query: str, chunksize: int | None = None) -> MetaFrame | Iterator[MetaFrame] | None:
|
|
132
|
+
"""
|
|
133
|
+
Read result of SQL query into a MetaFrame. The latent representation of the MetaFrame is a Pandas dataframe.
|
|
134
|
+
|
|
135
|
+
:param query: Query to execute on the connection.
|
|
136
|
+
:param chunksize: Size of an individual data chunk. If not provided, query result will be a single dataframe.
|
|
137
|
+
:return:
|
|
138
|
+
"""
|
|
139
|
+
try:
|
|
140
|
+
if chunksize:
|
|
141
|
+
return (
|
|
142
|
+
MetaFrame.from_pandas(chunk)
|
|
143
|
+
for chunk in read_sql(query, con=self._get_connection(), chunksize=chunksize)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return MetaFrame.from_pandas(read_sql(query, con=self._get_connection()))
|
|
147
|
+
except SQLAlchemyError as ex:
|
|
148
|
+
self._logger.error("Engine error while executing query {query}", query=query, exception=ex)
|
|
149
|
+
return None
|
|
150
|
+
except BaseException as other: # pylint: disable=W0703
|
|
151
|
+
self._logger.error(
|
|
152
|
+
"Unknown error while executing query {query}",
|
|
153
|
+
query=query,
|
|
154
|
+
exception=other,
|
|
155
|
+
)
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
def materialize(
|
|
159
|
+
self,
|
|
160
|
+
data: MetaFrame,
|
|
161
|
+
schema: str,
|
|
162
|
+
name: str,
|
|
163
|
+
overwrite: bool = False,
|
|
164
|
+
chunksize: int | None = None,
|
|
165
|
+
) -> int | None:
|
|
166
|
+
"""
|
|
167
|
+
Materialize MetaFrame as a table in a database.
|
|
168
|
+
The table is converted to a Pandas dataframe before materialization.
|
|
169
|
+
|
|
170
|
+
:param data: MetaFrame to materialize as a table.
|
|
171
|
+
:param schema: Schema of a table.
|
|
172
|
+
:param name: Name of a table.
|
|
173
|
+
:param overwrite: Whether to overwrite or append the data.
|
|
174
|
+
:param chunksize: Use this to split a dataframe into chunks and append them sequentially to the target table.
|
|
175
|
+
:return:
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
if overwrite:
|
|
179
|
+
try:
|
|
180
|
+
if self._dialect.dialect == DatabaseType.SQLITE_ODBC.value.dialect:
|
|
181
|
+
self._get_connection().execute(text(f"DELETE FROM {schema}.{name}"))
|
|
182
|
+
else:
|
|
183
|
+
self._get_connection().execute(text(f"TRUNCATE TABLE {schema}.{name}"))
|
|
184
|
+
except OperationalError as ex:
|
|
185
|
+
# The table does not exist. Do nothing and let the Pandas API handle the creation of the table.
|
|
186
|
+
self._logger.warning(
|
|
187
|
+
"Error truncating {schema}.{table}, now creating table without truncating.",
|
|
188
|
+
schema=schema,
|
|
189
|
+
table=name,
|
|
190
|
+
exception=ex,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
return data.to_pandas().to_sql(
|
|
194
|
+
name=name,
|
|
195
|
+
schema=schema,
|
|
196
|
+
con=self._get_connection(),
|
|
197
|
+
index=False,
|
|
198
|
+
chunksize=chunksize,
|
|
199
|
+
if_exists="append",
|
|
200
|
+
)
|
|
201
|
+
except SQLAlchemyError as ex:
|
|
202
|
+
self._logger.error(
|
|
203
|
+
"Error while materializing a dataframe into {schema}.{table}",
|
|
204
|
+
schema=schema,
|
|
205
|
+
table=name,
|
|
206
|
+
exception=ex,
|
|
207
|
+
)
|
|
208
|
+
return None
|
|
209
|
+
finally:
|
|
210
|
+
active_tran: sqlalchemy.engine.RootTransaction = self._get_connection().get_transaction()
|
|
211
|
+
if active_tran and active_tran.is_active:
|
|
212
|
+
self._logger.debug(
|
|
213
|
+
"Found an active transaction for {schema}.{table}. Committing it.",
|
|
214
|
+
schema=schema,
|
|
215
|
+
table=name,
|
|
216
|
+
)
|
|
217
|
+
active_tran.commit()
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Snowflake Client Wrapper
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from types import TracebackType
|
|
8
|
+
from typing import Self
|
|
9
|
+
|
|
10
|
+
import snowflake.connector
|
|
11
|
+
|
|
12
|
+
from snowflake.connector.errors import DatabaseError, ProgrammingError
|
|
13
|
+
|
|
14
|
+
from adapta.logs.models import LogLevel
|
|
15
|
+
from adapta.logs import SemanticLogger
|
|
16
|
+
from adapta.storage.models import S3Path, DataPath
|
|
17
|
+
|
|
18
|
+
from adapta.storage.models.azure import AdlsGen2Path
|
|
19
|
+
from adapta.utils.metaframe import MetaFrame
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SnowflakeClient:
|
|
23
|
+
"""
|
|
24
|
+
A wrapper around the Snowflake Python connector that provides a context manager for handling connections
|
|
25
|
+
and transactions. It also includes a method for executing queries and returning the result as a Pandas DataFrame.
|
|
26
|
+
|
|
27
|
+
:param user: The username for the Snowflake account.
|
|
28
|
+
:param account: The account name for the Snowflake account.
|
|
29
|
+
:param warehouse: The warehouse name for the Snowflake account.
|
|
30
|
+
:param authenticator: The authentication mechanism to use for the Snowflake account.
|
|
31
|
+
:param logger: The logger to use for logging messages. Defaults to a new SemanticLogger instance.
|
|
32
|
+
:param password: Optional - The password for the Snowflake user. Should be combined with `authenticator='snowflake'` to enable password authentication
|
|
33
|
+
:param role: Optional - The role for the Snowflake user.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
user: str,
|
|
39
|
+
account: str,
|
|
40
|
+
warehouse: str,
|
|
41
|
+
authenticator: str = "externalbrowser",
|
|
42
|
+
logger: SemanticLogger = SemanticLogger().add_log_source(
|
|
43
|
+
log_source_name="adapta-snowflake-client",
|
|
44
|
+
min_log_level=LogLevel.INFO,
|
|
45
|
+
is_default=True,
|
|
46
|
+
),
|
|
47
|
+
password: str | None = None,
|
|
48
|
+
role: str | None = None,
|
|
49
|
+
):
|
|
50
|
+
self._user = user
|
|
51
|
+
self._account = account
|
|
52
|
+
self._warehouse = warehouse
|
|
53
|
+
self._authenticator = "snowflake" if password else authenticator
|
|
54
|
+
self._logger = logger
|
|
55
|
+
self._password = password
|
|
56
|
+
self._role = role
|
|
57
|
+
self._conn = None
|
|
58
|
+
|
|
59
|
+
def __enter__(self) -> Self | None:
|
|
60
|
+
"""
|
|
61
|
+
Enters the context manager and establishes a connection to the Snowflake database.
|
|
62
|
+
:return: The SnowflakeClient instance, or None if there was an error connecting to the database.
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
self._conn = snowflake.connector.connect(
|
|
66
|
+
user=self._user,
|
|
67
|
+
account=self._account,
|
|
68
|
+
password=self._password,
|
|
69
|
+
warehouse=self._warehouse,
|
|
70
|
+
authenticator=self._authenticator,
|
|
71
|
+
role=self._role,
|
|
72
|
+
)
|
|
73
|
+
return self
|
|
74
|
+
except DatabaseError as ex:
|
|
75
|
+
self._logger.error(
|
|
76
|
+
"Error connecting to {account} for {user}", account=self._account, user=self._user, exception=ex
|
|
77
|
+
)
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
def __exit__(
|
|
81
|
+
self,
|
|
82
|
+
exc_type: type[BaseException] | None = None,
|
|
83
|
+
exc_val: BaseException | None = None,
|
|
84
|
+
exc_tb: TracebackType | None = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Exits the context manager and closes the database connection.
|
|
88
|
+
|
|
89
|
+
:param exc_type: The type of the exception that was raised, if any.
|
|
90
|
+
:param exc_val: The value of the exception that was raised, if any.
|
|
91
|
+
:param exc_tb: The traceback of the exception that was raised, if any.
|
|
92
|
+
"""
|
|
93
|
+
self._conn.close()
|
|
94
|
+
if exc_val is not None:
|
|
95
|
+
self._logger.error(f"An error occurred while closing the database connection: {exc_val}")
|
|
96
|
+
|
|
97
|
+
def query(self, query: str, fetch_dataframe: bool = True) -> MetaFrame | None:
|
|
98
|
+
"""
|
|
99
|
+
Executes the given SQL query and returns the result as a Pandas DataFrame.
|
|
100
|
+
|
|
101
|
+
:param query: The SQL query to execute.
|
|
102
|
+
:param fetch_dataframe: Fetch dataframes in batches, otherwise only execute the query
|
|
103
|
+
:return: An iterator of Pandas DataFrames, one for each result set returned by the query, or None if there was
|
|
104
|
+
an error executing the query.
|
|
105
|
+
"""
|
|
106
|
+
try:
|
|
107
|
+
with self._conn.cursor() as cursor:
|
|
108
|
+
result = cursor.execute(query)
|
|
109
|
+
if fetch_dataframe:
|
|
110
|
+
return MetaFrame.from_arrow(result.fetch_arrow_all(force_return_table=True))
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
except ProgrammingError as ex:
|
|
114
|
+
self._logger.error("Error executing query {query}", query=query, exception=ex)
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def _get_snowflake_type(self, data_type: str) -> str:
|
|
118
|
+
"""Maps delta type to Snowflake type"""
|
|
119
|
+
|
|
120
|
+
type_map = {
|
|
121
|
+
"string": "TEXT",
|
|
122
|
+
"integer": "INTEGER",
|
|
123
|
+
"float": "FLOAT",
|
|
124
|
+
"double": "FLOAT",
|
|
125
|
+
"timestamp": "TIMESTAMP_NTZ",
|
|
126
|
+
"date": "DATE",
|
|
127
|
+
"struct": "VARIANT",
|
|
128
|
+
"map": "VARIANT",
|
|
129
|
+
"array": "VARIANT",
|
|
130
|
+
"boolean": "BOOLEAN",
|
|
131
|
+
"binary": "BINARY",
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
snowflake_type = type_map.get(data_type, None)
|
|
135
|
+
if snowflake_type:
|
|
136
|
+
return snowflake_type
|
|
137
|
+
|
|
138
|
+
if data_type.startswith("decimal"):
|
|
139
|
+
decimal_info = [int(num) for num in re.findall(r"\d+", data_type)]
|
|
140
|
+
return f"DECIMAL({decimal_info[0]},{decimal_info[1]})"
|
|
141
|
+
|
|
142
|
+
raise ValueError(f"found type:{data_type} which is currently not supported")
|
|
143
|
+
|
|
144
|
+
def publish_external_delta_table(
|
|
145
|
+
self,
|
|
146
|
+
database: str,
|
|
147
|
+
schema: str,
|
|
148
|
+
table: str,
|
|
149
|
+
refresh_metadata_only: bool = False,
|
|
150
|
+
path: DataPath | None = None,
|
|
151
|
+
table_schema: dict[str, str] | None = None,
|
|
152
|
+
partition_columns: list[str] | None = None,
|
|
153
|
+
storage_integration: str | None = None,
|
|
154
|
+
) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Creates delta table as external table in Snowflake
|
|
157
|
+
|
|
158
|
+
:param database: name of the database, in Snowflake, to create the table
|
|
159
|
+
:param schema: name of the schema, in Snowflake, to create the table
|
|
160
|
+
:param table: name of the table to be created in Snowflake
|
|
161
|
+
:param refresh_metadata_only: Only refresh metadata, when table has already existed in snowflake.
|
|
162
|
+
So skip the initializing phases like creating schema, creating external table, etc.
|
|
163
|
+
:param path: path to the delta table in datalake
|
|
164
|
+
:param table_schema: A mapping from column name to column type (the type should be in the lower case and supported by delta table)
|
|
165
|
+
, like {'ColumnA': 'struct', 'ColumnB': 'decimal(10, 2)'}
|
|
166
|
+
:param partition_columns: A list of partition column names
|
|
167
|
+
:param storage_integration: name of the storage integration to use in Snowflake. Default to the name of the storage account
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def _get_azure_query(resolved_path: AdlsGen2Path):
|
|
171
|
+
return (
|
|
172
|
+
f"create stage if not exists {database}.{schema}.stage_{table}"
|
|
173
|
+
+ f" storage_integration = {storage_integration if storage_integration is not None else resolved_path.account}"
|
|
174
|
+
+ f" url = 'azure://{resolved_path.account}.blob.core.windows.net/{resolved_path.container}/{path.path}';"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _get_s3_query(resolved_path: S3Path):
|
|
178
|
+
return (
|
|
179
|
+
f"create stage if not exists {database}.{schema}.stage_{table}"
|
|
180
|
+
+ f" endpoint = '{os.environ['S3_ENDPOINT__DNS_NAME']}'"
|
|
181
|
+
+ f" url = 's3compat://{resolved_path.bucket}/{resolved_path.path}'"
|
|
182
|
+
+ f" credentials = (AWS_KEY_ID = '{os.environ['S3_ENDPOINT__ACCESS_KEY_ID']}' AWS_SECRET_KEY = '{os.environ['S3_ENDPOINT__ACCESS_KEY']}');"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if not refresh_metadata_only:
|
|
186
|
+
assert path, "Path to the delta table needed! Please check!"
|
|
187
|
+
assert table_schema, "Table schema needed! Please check!"
|
|
188
|
+
|
|
189
|
+
self.query(query=f"create schema if not exists {database}.{schema}", fetch_dataframe=False)
|
|
190
|
+
|
|
191
|
+
if isinstance(path, AdlsGen2Path):
|
|
192
|
+
query = _get_azure_query(path)
|
|
193
|
+
elif isinstance(path, S3Path):
|
|
194
|
+
query = _get_s3_query(path)
|
|
195
|
+
else:
|
|
196
|
+
raise ValueError(f"Path type {type(path)} is not supported!")
|
|
197
|
+
|
|
198
|
+
self.query(
|
|
199
|
+
query=query,
|
|
200
|
+
fetch_dataframe=False,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if partition_columns is not None:
|
|
204
|
+
partition_expr = ",".join(partition_columns)
|
|
205
|
+
partition_select = [
|
|
206
|
+
f"\"{partition_column}\" TEXT AS (split_part(split_part(metadata$filename, '=', {2 + i}), '/', 1))"
|
|
207
|
+
for i, partition_column in enumerate(partition_columns)
|
|
208
|
+
]
|
|
209
|
+
else:
|
|
210
|
+
partition_expr = ""
|
|
211
|
+
partition_select = []
|
|
212
|
+
partition_columns = []
|
|
213
|
+
|
|
214
|
+
snowflake_columns = [
|
|
215
|
+
(column_name, self._get_snowflake_type(column_type))
|
|
216
|
+
for column_name, column_type in table_schema.items()
|
|
217
|
+
if column_name not in partition_columns
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
columns = [
|
|
221
|
+
f'"{column}" {col_type} AS ($1:"{column}"::{col_type})' for column, col_type in snowflake_columns
|
|
222
|
+
] + partition_select
|
|
223
|
+
|
|
224
|
+
column_expr = ("," + os.linesep).join(columns)
|
|
225
|
+
|
|
226
|
+
self.query(
|
|
227
|
+
query=f"""
|
|
228
|
+
create or replace external table "{database}"."{schema}"."{table}"
|
|
229
|
+
(
|
|
230
|
+
{column_expr}
|
|
231
|
+
)
|
|
232
|
+
{f"partition by ({partition_expr})" if partition_expr else ""}
|
|
233
|
+
location=@{database}.{schema}.stage_{table}
|
|
234
|
+
auto_refresh = false
|
|
235
|
+
refresh_on_create=false
|
|
236
|
+
file_format = (type = parquet)
|
|
237
|
+
table_format = delta;""",
|
|
238
|
+
fetch_dataframe=False,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
self.query(query=f'alter external table "{database}"."{schema}"."{table}" refresh;', fetch_dataframe=False)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SqlAlchemy-based Trino Client Wrapper
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
6
|
+
#
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
#
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import final, Optional
|
|
23
|
+
from collections.abc import Iterator
|
|
24
|
+
|
|
25
|
+
import sqlalchemy
|
|
26
|
+
from pandas import read_sql_query
|
|
27
|
+
from sqlalchemy import create_engine
|
|
28
|
+
from sqlalchemy.exc import SQLAlchemyError
|
|
29
|
+
from trino.auth import OAuth2Authentication, BasicAuthentication
|
|
30
|
+
|
|
31
|
+
from adapta.logs.models import LogLevel
|
|
32
|
+
from adapta.logs import SemanticLogger
|
|
33
|
+
from adapta.storage.secrets import SecretStorageClient
|
|
34
|
+
from adapta.utils.metaframe import MetaFrame
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@final
|
|
38
|
+
@dataclass
|
|
39
|
+
class TrinoConnectionSecret:
|
|
40
|
+
"""
|
|
41
|
+
Connection secret structure for Trino
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
secret_name: str
|
|
45
|
+
username_secret_key: str
|
|
46
|
+
password_secret_key: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class TrinoClient:
|
|
50
|
+
"""
|
|
51
|
+
Trino (https://www.trino.io) connection client.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
host: str,
|
|
57
|
+
catalog: str | None = None,
|
|
58
|
+
port: int | None = 443,
|
|
59
|
+
oauth2_username: str | None = None,
|
|
60
|
+
credentials_provider: tuple[TrinoConnectionSecret, SecretStorageClient] | None = None,
|
|
61
|
+
logger: SemanticLogger = SemanticLogger().add_log_source(
|
|
62
|
+
log_source_name="adapta-trino-client",
|
|
63
|
+
min_log_level=LogLevel.INFO,
|
|
64
|
+
is_default=True,
|
|
65
|
+
),
|
|
66
|
+
):
|
|
67
|
+
"""
|
|
68
|
+
Initializes a SqlAlchemy Engine that will facilitate connections to Trino.
|
|
69
|
+
Authentication options:
|
|
70
|
+
- via OAuth2 if oauth2_username or ADAPTA__TRINO_OAUTH2_USERNAME is provided
|
|
71
|
+
- via external secret provider (Vault, Azure KeyVault, AWS Secrets Manager, etc.) if credentials_provider is provided
|
|
72
|
+
- via plaintext username-password if ADAPTA__TRINO_USERNAME and ADAPTA__TRINO_PASSWORD are provided
|
|
73
|
+
|
|
74
|
+
:param host: Trino Coordinator hostname, without protocol.
|
|
75
|
+
:param catalog: Trino catalog.
|
|
76
|
+
:param port: Trino connection port (443 default).
|
|
77
|
+
:param oauth2_username: Optional username to use if authenticating with interactive OAuth2.
|
|
78
|
+
Can also be provided via ADAPTA__TRINO_OAUTH2_USERNAME.
|
|
79
|
+
:param credentials_provider: Optional secret provider and auth secret details to use to read Basic Auth credentials.
|
|
80
|
+
:param logger: CompositeLogger instance.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
self._host = host
|
|
84
|
+
self._catalog = catalog
|
|
85
|
+
self._port = port
|
|
86
|
+
if "ADAPTA__TRINO_USERNAME" in os.environ:
|
|
87
|
+
self._engine = create_engine(
|
|
88
|
+
f"trino://{os.getenv('ADAPTA__TRINO_USERNAME')}@{self._host}:{self._port}/{self._catalog or ''}",
|
|
89
|
+
connect_args={
|
|
90
|
+
"auth": BasicAuthentication(
|
|
91
|
+
os.getenv("ADAPTA__TRINO_USERNAME"), os.getenv("ADAPTA__TRINO_PASSWORD")
|
|
92
|
+
),
|
|
93
|
+
"http_scheme": "https",
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
elif "ADAPTA__TRINO_OAUTH2_USERNAME" in os.environ or oauth2_username:
|
|
97
|
+
self._engine = create_engine(
|
|
98
|
+
f"trino://{os.getenv('ADAPTA__TRINO_OAUTH2_USERNAME')}@{self._host}:{self._port}/{self._catalog or ''}",
|
|
99
|
+
connect_args={
|
|
100
|
+
"auth": OAuth2Authentication(),
|
|
101
|
+
"http_scheme": "https",
|
|
102
|
+
},
|
|
103
|
+
)
|
|
104
|
+
elif credentials_provider:
|
|
105
|
+
credentials_secret = credentials_provider[1].read_secret("", credentials_provider[0].secret_name)
|
|
106
|
+
username = credentials_secret[credentials_provider[0].username_secret_key]
|
|
107
|
+
self._engine = create_engine(
|
|
108
|
+
f"trino://{username}@{self._host}:{self._port}/{self._catalog or ''}",
|
|
109
|
+
connect_args={
|
|
110
|
+
"auth": BasicAuthentication(
|
|
111
|
+
username, credentials_secret[credentials_provider[0].password_secret_key]
|
|
112
|
+
),
|
|
113
|
+
"http_scheme": "https",
|
|
114
|
+
},
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
raise ConnectionError(
|
|
118
|
+
"Neither ADAPTA__TRINO_USERNAME or ADAPTA__TRINO_OAUTH2_USERNAME is specified. Cannot authenticate to the provided host."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
self._logger = logger
|
|
122
|
+
self._connection: sqlalchemy.engine.Connection | None = None
|
|
123
|
+
|
|
124
|
+
def __enter__(self) -> Optional["TrinoClient"]:
|
|
125
|
+
try:
|
|
126
|
+
self._connection = self._engine.connect()
|
|
127
|
+
return self
|
|
128
|
+
except SQLAlchemyError as ex:
|
|
129
|
+
self._logger.error(
|
|
130
|
+
"Error connecting to {host}:{port}",
|
|
131
|
+
host=self._host,
|
|
132
|
+
port=self._port,
|
|
133
|
+
exception=ex,
|
|
134
|
+
)
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
138
|
+
self._connection.close()
|
|
139
|
+
self._engine.dispose()
|
|
140
|
+
|
|
141
|
+
def query(self, query: str, batch_size: int = 1000) -> Iterator[MetaFrame]:
|
|
142
|
+
"""
|
|
143
|
+
Executes a Trino DML query and converts the result into a Pandas dataframe.
|
|
144
|
+
|
|
145
|
+
This method internally calls pandas.read_sql_query
|
|
146
|
+
|
|
147
|
+
:param query: SQL query compliant with https://trino.io/docs/current/sql.html
|
|
148
|
+
:param batch_size: Optional batch size to return rows iteratively.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
return (
|
|
152
|
+
MetaFrame.from_pandas(chunk)
|
|
153
|
+
for chunk in read_sql_query(sql=query, con=self._connection, chunksize=batch_size)
|
|
154
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Import index.
|
|
3
3
|
"""
|
|
4
|
-
# Copyright (c) 2023-
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,5 +16,4 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
#
|
|
18
18
|
|
|
19
|
-
from adapta.storage.delta_lake.
|
|
20
|
-
from adapta.storage.delta_lake._models import *
|
|
19
|
+
from adapta.storage.delta_lake.v2 import *
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Delta Lake Operations
|
|
2
2
|
|
|
3
|
+
**This is a deprecated module. Please use the new module `adapta.storage.delta_lake.v3` instead.**
|
|
4
|
+
|
|
3
5
|
Supported API:
|
|
4
6
|
- read delta table as `pandas.DataFrame`
|
|
5
7
|
- read delta table in batches of a provided size, each batch being `pandas.DataFrame`
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Import index
|
|
3
|
+
"""
|
|
4
|
+
# Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
from adapta.storage.delta_lake.v2._models import *
|
|
19
|
+
from adapta.storage.delta_lake.v2._functions import *
|