clickzetta-dbutils 1.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ from .db_utils import get_active_engine, get_lakehouse_client, DatabaseConnectionManager, ConnectionConfig, \
2
+ DatabaseConnectionError
3
+
4
+ __all__ = ["get_active_engine", "get_lakehouse_client", "DatabaseConnectionManager", "ConnectionConfig",
5
+ "DatabaseConnectionError"]
@@ -0,0 +1,267 @@
1
+ import json
2
+ import os
3
+ import urllib.parse
4
+ from dataclasses import dataclass, field
5
+ from typing import Optional, Dict
6
+
7
+ from sqlalchemy import create_engine as sa_create_engine
8
+ from sqlalchemy.engine import URL
9
+ from sqlalchemy.engine.base import Engine
10
+
11
+
12
+ class DatabaseConnectionError(Exception):
13
+ """Custom exception for database connection errors."""
14
+ pass
15
+
16
+
17
+ @dataclass
18
+ class ConnectionConfig:
19
+ dsName: str
20
+ dsType: int
21
+ schema: str
22
+ host: Optional[str] = None
23
+ magicToken: Optional[str] = None
24
+ username: Optional[str] = None
25
+ password: Optional[str] = None
26
+ instanceName: Optional[str] = None
27
+ workspaceName: Optional[str] = None
28
+ options: Dict[str, str] = field(default_factory=dict)
29
+
30
+
31
+ class DatabaseConnectionManager:
32
+ """
33
+ Manages database connections with flexible configuration options.
34
+ """
35
+
36
+ def __init__(self):
37
+ """
38
+ Initialize a database connection for a specific data source.
39
+ """
40
+ self._vcluster: Optional[str] = None
41
+ self._workspace: Optional[str] = None
42
+ self._driver: Optional[str] = None
43
+ self._schema: Optional[str] = None
44
+ self._engine: Optional[Engine] = None
45
+ self._options = {}
46
+
47
+ @classmethod
48
+ def _load_connection_configs(cls) -> Dict[str, ConnectionConfig]:
49
+ """
50
+ Load and cache connection configurations from environment variables.
51
+
52
+ Returns:
53
+ Dict of connection configurations keyed by data source name
54
+ """
55
+ if not hasattr(DatabaseConnectionManager, '_connection_cache'):
56
+ # Retrieve and decode connection info from environment variable
57
+ conn_info_str = os.environ.get('connectionInfos', '[]')
58
+ decoded_info = urllib.parse.unquote(conn_info_str)
59
+ conn_list = json.loads(decoded_info)
60
+
61
+ # Create connection configs
62
+ cls._connection_cache = {
63
+ info.get('dsName'): ConnectionConfig(**info)
64
+ for info in conn_list
65
+ }
66
+ return cls._connection_cache
67
+
68
+ def get_connection_info(self, ds_name: str) -> ConnectionConfig:
69
+ """
70
+ Find connection info by data source name
71
+ """
72
+ connections = self._load_connection_configs()
73
+
74
+ # Validate data source exists
75
+ if ds_name not in connections:
76
+ raise DatabaseConnectionError(f"Data source '{ds_name}' not found")
77
+
78
+ config = connections.get(ds_name)
79
+ config.options.update(self._options)
80
+ return config
81
+
82
+ def use_workspace(self, workspace: str) -> 'DatabaseConnectionManager':
83
+ """
84
+ Set workspace for the connection.
85
+
86
+ Args:
87
+ workspace (str): Workspace name
88
+
89
+ Returns:
90
+ self: For method chaining
91
+ """
92
+ self._workspace = workspace
93
+ return self
94
+
95
+ def use_driver(self, driver: str) -> 'DatabaseConnectionManager':
96
+ """
97
+ Set driver for the connection.
98
+
99
+ Args:
100
+ driver (str): Driver name
101
+
102
+ Returns:
103
+ self: For method chaining
104
+ """
105
+ self._driver = driver
106
+ return self
107
+
108
+ def use_schema(self, schema: str) -> 'DatabaseConnectionManager':
109
+ """
110
+ Set schema for the connection.
111
+
112
+ Args:
113
+ schema (str): Schema name
114
+
115
+ Returns:
116
+ self: For method chaining
117
+ """
118
+ self._schema = schema
119
+ return self
120
+
121
+ def use_vcluster(self, vcluster: str) -> 'DatabaseConnectionManager':
122
+ """
123
+ Set virtual cluster for the connection.
124
+
125
+ Args:
126
+ vcluster (str): Virtual cluster name
127
+
128
+ Returns:
129
+ self: For method chaining
130
+ """
131
+ self._vcluster = vcluster
132
+ return self
133
+
134
+ def use_options(self, options):
135
+ """
136
+ Set additional connection options.
137
+
138
+ Args:
139
+ options (dict): Additional connection options
140
+
141
+ Returns:
142
+ self: For method chaining
143
+ """
144
+ if options:
145
+ self._options.update(options)
146
+
147
+ def connect(self, ds_name: str, *args, **kwargs) -> Engine:
148
+ """
149
+ Create SQLAlchemy engine based on data source name and optional schema
150
+
151
+ :param ds_name: Name of the data source
152
+ :return: SQLAlchemy Engine
153
+ """
154
+ conn_info: ConnectionConfig = self.get_connection_info(ds_name)
155
+
156
+ if not conn_info.host:
157
+ raise DatabaseConnectionError("Missing connection host for MySQL data source")
158
+
159
+ ds_type = conn_info.dsType
160
+ options = conn_info.options or {}
161
+ schema = self._schema or conn_info.schema
162
+ host_parts = conn_info.host.split(':')
163
+
164
+
165
+
166
+ # Construct connection URL based on data source type
167
+ if ds_type == 5: # Mysql
168
+ if not conn_info.username or not conn_info.password:
169
+ raise DatabaseConnectionError("Missing username or password for MySQL data source")
170
+ # Split host into host and port if provided
171
+
172
+ url = URL.create(
173
+ drivername=self._driver or 'mysql+mysqlconnector',
174
+ username=conn_info.username,
175
+ password=conn_info.password,
176
+ host=host_parts[0],
177
+ port=host_parts[1] if len(host_parts) > 1 else None,
178
+ database=schema,
179
+ query=options
180
+ )
181
+ return sa_create_engine(url, *args, **kwargs)
182
+
183
+ elif ds_type == 7: # PostgreSQL
184
+ url = URL.create(
185
+ drivername=self._driver or 'postgresql+psycopg2',
186
+ username=conn_info.username,
187
+ password=conn_info.password,
188
+ host=host_parts[0],
189
+ port=host_parts[1] if len(host_parts) > 1 else None,
190
+ database=schema
191
+ )
192
+ elif ds_type == 1: # ClickZetta
193
+ if not conn_info.workspaceName or not conn_info.instanceName:
194
+ raise DatabaseConnectionError("Missing required parameters 'workspace_name', "
195
+ "'instance_name' for ClickZetta data source")
196
+ if not self._vcluster:
197
+ raise DatabaseConnectionError("Missing virtual cluster for ClickZetta data source")
198
+
199
+ if conn_info.username and conn_info.password:
200
+ base_url = (f"clickzetta://{conn_info.username}:{conn_info.password}@{conn_info.instanceName}."
201
+ f"{conn_info.host}/"
202
+ f"{conn_info.workspaceName}"
203
+ f"?virtualcluster={self._vcluster}"
204
+ )
205
+ elif conn_info.magicToken:
206
+ base_url = (f"clickzetta://{conn_info.instanceName}.{conn_info.host}/"
207
+ f"{conn_info.workspaceName}"
208
+ f"?magic_token={conn_info.magicToken}"
209
+ f"&virtualcluster={self._vcluster}"
210
+ )
211
+ else:
212
+ raise ValueError("username and password or token must be specified")
213
+
214
+
215
+ # Add schema if provided
216
+ if schema:
217
+ base_url += f"&schema={schema}"
218
+
219
+ url = base_url
220
+ else:
221
+ raise ValueError(f"Unsupported data source type: {ds_type}")
222
+
223
+ return sa_create_engine(url, connect_args={'options': self._convert_options(options)}, *args, **kwargs)
224
+
225
+ @staticmethod
226
+ def _convert_options(options):
227
+ if not options:
228
+ return ''
229
+ return ' '.join([f'-c {k}={v}' for k, v in options.items()])
230
+
231
+
232
+ def get_lakehouse_client(conn):
233
+ return conn.connection.connection._client
234
+
235
+
236
+ def get_active_engine(
237
+ ds_name: str,
238
+ vcluster: Optional[str] = None,
239
+ workspace: Optional[str] = None,
240
+ schema: Optional[str] = None,
241
+ options: Optional[Dict[str, str]] = None,
242
+ *args, **kwargs
243
+ ) -> Engine:
244
+ """
245
+ Convenience function to create a database engine.
246
+
247
+ Args:
248
+ ds_name (str): Data source name
249
+ workspace (str, optional): Workspace name
250
+ schema (str, optional): Schema name
251
+ vcluster (str, optional): Virtual cluster name
252
+
253
+ Returns:
254
+ SQLAlchemy Engine instance
255
+ """
256
+ manager = DatabaseConnectionManager()
257
+
258
+ if workspace:
259
+ manager.use_workspace(workspace)
260
+ if schema:
261
+ manager.use_schema(schema)
262
+ if vcluster:
263
+ manager.use_vcluster(vcluster)
264
+ if options:
265
+ manager.use_options(options)
266
+
267
+ return manager.connect(ds_name, *args, **kwargs)
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.2
2
+ Name: clickzetta-dbutils
3
+ Version: 1.0.0
4
+ Summary: clickzetta dbutils
5
+ Author-email: "lin.zhang" <lin.zhang@clickzetta.com>
6
+ Project-URL: documentation, https://www.yunqi.tech/
7
+ Platform: Posix
8
+ Platform: MacOS X
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Requires-Python: >=3.7
11
+ Requires-Dist: clickzetta-connector-python>=0.8.79.8
12
+ Requires-Dist: psycopg2
13
+ Requires-Dist: sqlalchemy<2.0.0,>=1.4.0
14
+ Requires-Dist: mysqlclient
15
+ Requires-Dist: mysql-connector-python
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest==8.2.1; extra == "dev"
18
+ Requires-Dist: sqlparse; extra == "dev"
19
+ Requires-Dist: grpcio; extra == "dev"
20
+ Requires-Dist: grpcio-tools; extra == "dev"
21
+ Requires-Dist: build; extra == "dev"
22
+ Requires-Dist: pytest-xdist; extra == "dev"
23
+ Requires-Dist: pytz; extra == "dev"
24
+ Requires-Dist: apache-superset==4.0.2; extra == "dev"
@@ -0,0 +1,7 @@
1
+ clickzetta_dbutils/__init__.py,sha256=OevYNnzvgLUw0-KDJKG7loCONkN_7DxtmqZAmzCHGAg,282
2
+ clickzetta_dbutils/db_utils.py,sha256=L170MOzkMH4cfEwYJcHPz7_aQcTi96e9BqkF6CCRfc4,8506
3
+ clickzetta_dbutils/version.py,sha256=Aj77VL1d5Mdku7sgCgKQmPuYavPpAHuZuJcy6bygQZE,21
4
+ clickzetta_dbutils-1.0.0.dist-info/METADATA,sha256=B3yZU_cOnTz0sCOTqm47dEwxdm66wCLlIZQt-Fbgclc,836
5
+ clickzetta_dbutils-1.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ clickzetta_dbutils-1.0.0.dist-info/top_level.txt,sha256=8o5KqMSg9pxnPNejHjMaqZV2vEDvwvsz2GdChZI0N6I,19
7
+ clickzetta_dbutils-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.8.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ clickzetta_dbutils