clickzetta-dbutils 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickzetta_dbutils/__init__.py +5 -0
- clickzetta_dbutils/db_utils.py +267 -0
- clickzetta_dbutils/version.py +1 -0
- clickzetta_dbutils-1.0.0.dist-info/METADATA +24 -0
- clickzetta_dbutils-1.0.0.dist-info/RECORD +7 -0
- clickzetta_dbutils-1.0.0.dist-info/WHEEL +5 -0
- clickzetta_dbutils-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,5 @@
|
|
1
|
+
from .db_utils import get_active_engine, get_lakehouse_client, DatabaseConnectionManager, ConnectionConfig, \
|
2
|
+
DatabaseConnectionError
|
3
|
+
|
4
|
+
__all__ = ["get_active_engine", "get_lakehouse_client", "DatabaseConnectionManager", "ConnectionConfig",
|
5
|
+
"DatabaseConnectionError"]
|
@@ -0,0 +1,267 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import urllib.parse
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
from typing import Optional, Dict
|
6
|
+
|
7
|
+
from sqlalchemy import create_engine as sa_create_engine
|
8
|
+
from sqlalchemy.engine import URL
|
9
|
+
from sqlalchemy.engine.base import Engine
|
10
|
+
|
11
|
+
|
12
|
+
class DatabaseConnectionError(Exception):
|
13
|
+
"""Custom exception for database connection errors."""
|
14
|
+
pass
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class ConnectionConfig:
|
19
|
+
dsName: str
|
20
|
+
dsType: int
|
21
|
+
schema: str
|
22
|
+
host: Optional[str] = None
|
23
|
+
magicToken: Optional[str] = None
|
24
|
+
username: Optional[str] = None
|
25
|
+
password: Optional[str] = None
|
26
|
+
instanceName: Optional[str] = None
|
27
|
+
workspaceName: Optional[str] = None
|
28
|
+
options: Dict[str, str] = field(default_factory=dict)
|
29
|
+
|
30
|
+
|
31
|
+
class DatabaseConnectionManager:
|
32
|
+
"""
|
33
|
+
Manages database connections with flexible configuration options.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self):
|
37
|
+
"""
|
38
|
+
Initialize a database connection for a specific data source.
|
39
|
+
"""
|
40
|
+
self._vcluster: Optional[str] = None
|
41
|
+
self._workspace: Optional[str] = None
|
42
|
+
self._driver: Optional[str] = None
|
43
|
+
self._schema: Optional[str] = None
|
44
|
+
self._engine: Optional[Engine] = None
|
45
|
+
self._options = {}
|
46
|
+
|
47
|
+
@classmethod
|
48
|
+
def _load_connection_configs(cls) -> Dict[str, ConnectionConfig]:
|
49
|
+
"""
|
50
|
+
Load and cache connection configurations from environment variables.
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
Dict of connection configurations keyed by data source name
|
54
|
+
"""
|
55
|
+
if not hasattr(DatabaseConnectionManager, '_connection_cache'):
|
56
|
+
# Retrieve and decode connection info from environment variable
|
57
|
+
conn_info_str = os.environ.get('connectionInfos', '[]')
|
58
|
+
decoded_info = urllib.parse.unquote(conn_info_str)
|
59
|
+
conn_list = json.loads(decoded_info)
|
60
|
+
|
61
|
+
# Create connection configs
|
62
|
+
cls._connection_cache = {
|
63
|
+
info.get('dsName'): ConnectionConfig(**info)
|
64
|
+
for info in conn_list
|
65
|
+
}
|
66
|
+
return cls._connection_cache
|
67
|
+
|
68
|
+
def get_connection_info(self, ds_name: str) -> ConnectionConfig:
|
69
|
+
"""
|
70
|
+
Find connection info by data source name
|
71
|
+
"""
|
72
|
+
connections = self._load_connection_configs()
|
73
|
+
|
74
|
+
# Validate data source exists
|
75
|
+
if ds_name not in connections:
|
76
|
+
raise DatabaseConnectionError(f"Data source '{ds_name}' not found")
|
77
|
+
|
78
|
+
config = connections.get(ds_name)
|
79
|
+
config.options.update(self._options)
|
80
|
+
return config
|
81
|
+
|
82
|
+
def use_workspace(self, workspace: str) -> 'DatabaseConnectionManager':
|
83
|
+
"""
|
84
|
+
Set workspace for the connection.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
workspace (str): Workspace name
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
self: For method chaining
|
91
|
+
"""
|
92
|
+
self._workspace = workspace
|
93
|
+
return self
|
94
|
+
|
95
|
+
def use_driver(self, driver: str) -> 'DatabaseConnectionManager':
|
96
|
+
"""
|
97
|
+
Set driver for the connection.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
driver (str): Driver name
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
self: For method chaining
|
104
|
+
"""
|
105
|
+
self._driver = driver
|
106
|
+
return self
|
107
|
+
|
108
|
+
def use_schema(self, schema: str) -> 'DatabaseConnectionManager':
|
109
|
+
"""
|
110
|
+
Set schema for the connection.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
schema (str): Schema name
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
self: For method chaining
|
117
|
+
"""
|
118
|
+
self._schema = schema
|
119
|
+
return self
|
120
|
+
|
121
|
+
def use_vcluster(self, vcluster: str) -> 'DatabaseConnectionManager':
|
122
|
+
"""
|
123
|
+
Set virtual cluster for the connection.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
vcluster (str): Virtual cluster name
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
self: For method chaining
|
130
|
+
"""
|
131
|
+
self._vcluster = vcluster
|
132
|
+
return self
|
133
|
+
|
134
|
+
def use_options(self, options):
|
135
|
+
"""
|
136
|
+
Set additional connection options.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
options (dict): Additional connection options
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
self: For method chaining
|
143
|
+
"""
|
144
|
+
if options:
|
145
|
+
self._options.update(options)
|
146
|
+
|
147
|
+
def connect(self, ds_name: str, *args, **kwargs) -> Engine:
|
148
|
+
"""
|
149
|
+
Create SQLAlchemy engine based on data source name and optional schema
|
150
|
+
|
151
|
+
:param ds_name: Name of the data source
|
152
|
+
:return: SQLAlchemy Engine
|
153
|
+
"""
|
154
|
+
conn_info: ConnectionConfig = self.get_connection_info(ds_name)
|
155
|
+
|
156
|
+
if not conn_info.host:
|
157
|
+
raise DatabaseConnectionError("Missing connection host for MySQL data source")
|
158
|
+
|
159
|
+
ds_type = conn_info.dsType
|
160
|
+
options = conn_info.options or {}
|
161
|
+
schema = self._schema or conn_info.schema
|
162
|
+
host_parts = conn_info.host.split(':')
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
# Construct connection URL based on data source type
|
167
|
+
if ds_type == 5: # Mysql
|
168
|
+
if not conn_info.username or not conn_info.password:
|
169
|
+
raise DatabaseConnectionError("Missing username or password for MySQL data source")
|
170
|
+
# Split host into host and port if provided
|
171
|
+
|
172
|
+
url = URL.create(
|
173
|
+
drivername=self._driver or 'mysql+mysqlconnector',
|
174
|
+
username=conn_info.username,
|
175
|
+
password=conn_info.password,
|
176
|
+
host=host_parts[0],
|
177
|
+
port=host_parts[1] if len(host_parts) > 1 else None,
|
178
|
+
database=schema,
|
179
|
+
query=options
|
180
|
+
)
|
181
|
+
return sa_create_engine(url, *args, **kwargs)
|
182
|
+
|
183
|
+
elif ds_type == 7: # PostgreSQL
|
184
|
+
url = URL.create(
|
185
|
+
drivername=self._driver or 'postgresql+psycopg2',
|
186
|
+
username=conn_info.username,
|
187
|
+
password=conn_info.password,
|
188
|
+
host=host_parts[0],
|
189
|
+
port=host_parts[1] if len(host_parts) > 1 else None,
|
190
|
+
database=schema
|
191
|
+
)
|
192
|
+
elif ds_type == 1: # ClickZetta
|
193
|
+
if not conn_info.workspaceName or not conn_info.instanceName:
|
194
|
+
raise DatabaseConnectionError("Missing required parameters 'workspace_name', "
|
195
|
+
"'instance_name' for ClickZetta data source")
|
196
|
+
if not self._vcluster:
|
197
|
+
raise DatabaseConnectionError("Missing virtual cluster for ClickZetta data source")
|
198
|
+
|
199
|
+
if conn_info.username and conn_info.password:
|
200
|
+
base_url = (f"clickzetta://{conn_info.username}:{conn_info.password}@{conn_info.instanceName}."
|
201
|
+
f"{conn_info.host}/"
|
202
|
+
f"{conn_info.workspaceName}"
|
203
|
+
f"?virtualcluster={self._vcluster}"
|
204
|
+
)
|
205
|
+
elif conn_info.magicToken:
|
206
|
+
base_url = (f"clickzetta://{conn_info.instanceName}.{conn_info.host}/"
|
207
|
+
f"{conn_info.workspaceName}"
|
208
|
+
f"?magic_token={conn_info.magicToken}"
|
209
|
+
f"&virtualcluster={self._vcluster}"
|
210
|
+
)
|
211
|
+
else:
|
212
|
+
raise ValueError("username and password or token must be specified")
|
213
|
+
|
214
|
+
|
215
|
+
# Add schema if provided
|
216
|
+
if schema:
|
217
|
+
base_url += f"&schema={schema}"
|
218
|
+
|
219
|
+
url = base_url
|
220
|
+
else:
|
221
|
+
raise ValueError(f"Unsupported data source type: {ds_type}")
|
222
|
+
|
223
|
+
return sa_create_engine(url, connect_args={'options': self._convert_options(options)}, *args, **kwargs)
|
224
|
+
|
225
|
+
@staticmethod
|
226
|
+
def _convert_options(options):
|
227
|
+
if not options:
|
228
|
+
return ''
|
229
|
+
return ' '.join([f'-c {k}={v}' for k, v in options.items()])
|
230
|
+
|
231
|
+
|
232
|
+
def get_lakehouse_client(conn):
|
233
|
+
return conn.connection.connection._client
|
234
|
+
|
235
|
+
|
236
|
+
def get_active_engine(
|
237
|
+
ds_name: str,
|
238
|
+
vcluster: Optional[str] = None,
|
239
|
+
workspace: Optional[str] = None,
|
240
|
+
schema: Optional[str] = None,
|
241
|
+
options: Optional[Dict[str, str]] = None,
|
242
|
+
*args, **kwargs
|
243
|
+
) -> Engine:
|
244
|
+
"""
|
245
|
+
Convenience function to create a database engine.
|
246
|
+
|
247
|
+
Args:
|
248
|
+
ds_name (str): Data source name
|
249
|
+
workspace (str, optional): Workspace name
|
250
|
+
schema (str, optional): Schema name
|
251
|
+
vcluster (str, optional): Virtual cluster name
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
SQLAlchemy Engine instance
|
255
|
+
"""
|
256
|
+
manager = DatabaseConnectionManager()
|
257
|
+
|
258
|
+
if workspace:
|
259
|
+
manager.use_workspace(workspace)
|
260
|
+
if schema:
|
261
|
+
manager.use_schema(schema)
|
262
|
+
if vcluster:
|
263
|
+
manager.use_vcluster(vcluster)
|
264
|
+
if options:
|
265
|
+
manager.use_options(options)
|
266
|
+
|
267
|
+
return manager.connect(ds_name, *args, **kwargs)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.0.0"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: clickzetta-dbutils
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: clickzetta dbutils
|
5
|
+
Author-email: "lin.zhang" <lin.zhang@clickzetta.com>
|
6
|
+
Project-URL: documentation, https://www.yunqi.tech/
|
7
|
+
Platform: Posix
|
8
|
+
Platform: MacOS X
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
10
|
+
Requires-Python: >=3.7
|
11
|
+
Requires-Dist: clickzetta-connector-python>=0.8.79.8
|
12
|
+
Requires-Dist: psycopg2
|
13
|
+
Requires-Dist: sqlalchemy<2.0.0,>=1.4.0
|
14
|
+
Requires-Dist: mysqlclient
|
15
|
+
Requires-Dist: mysql-connector-python
|
16
|
+
Provides-Extra: dev
|
17
|
+
Requires-Dist: pytest==8.2.1; extra == "dev"
|
18
|
+
Requires-Dist: sqlparse; extra == "dev"
|
19
|
+
Requires-Dist: grpcio; extra == "dev"
|
20
|
+
Requires-Dist: grpcio-tools; extra == "dev"
|
21
|
+
Requires-Dist: build; extra == "dev"
|
22
|
+
Requires-Dist: pytest-xdist; extra == "dev"
|
23
|
+
Requires-Dist: pytz; extra == "dev"
|
24
|
+
Requires-Dist: apache-superset==4.0.2; extra == "dev"
|
@@ -0,0 +1,7 @@
|
|
1
|
+
clickzetta_dbutils/__init__.py,sha256=OevYNnzvgLUw0-KDJKG7loCONkN_7DxtmqZAmzCHGAg,282
|
2
|
+
clickzetta_dbutils/db_utils.py,sha256=L170MOzkMH4cfEwYJcHPz7_aQcTi96e9BqkF6CCRfc4,8506
|
3
|
+
clickzetta_dbutils/version.py,sha256=Aj77VL1d5Mdku7sgCgKQmPuYavPpAHuZuJcy6bygQZE,21
|
4
|
+
clickzetta_dbutils-1.0.0.dist-info/METADATA,sha256=B3yZU_cOnTz0sCOTqm47dEwxdm66wCLlIZQt-Fbgclc,836
|
5
|
+
clickzetta_dbutils-1.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
clickzetta_dbutils-1.0.0.dist-info/top_level.txt,sha256=8o5KqMSg9pxnPNejHjMaqZV2vEDvwvsz2GdChZI0N6I,19
|
7
|
+
clickzetta_dbutils-1.0.0.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
clickzetta_dbutils
|