odibi 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odibi/__init__.py +32 -0
- odibi/__main__.py +8 -0
- odibi/catalog.py +3011 -0
- odibi/cli/__init__.py +11 -0
- odibi/cli/__main__.py +6 -0
- odibi/cli/catalog.py +553 -0
- odibi/cli/deploy.py +69 -0
- odibi/cli/doctor.py +161 -0
- odibi/cli/export.py +66 -0
- odibi/cli/graph.py +150 -0
- odibi/cli/init_pipeline.py +242 -0
- odibi/cli/lineage.py +259 -0
- odibi/cli/main.py +215 -0
- odibi/cli/run.py +98 -0
- odibi/cli/schema.py +208 -0
- odibi/cli/secrets.py +232 -0
- odibi/cli/story.py +379 -0
- odibi/cli/system.py +132 -0
- odibi/cli/test.py +286 -0
- odibi/cli/ui.py +31 -0
- odibi/cli/validate.py +39 -0
- odibi/config.py +3541 -0
- odibi/connections/__init__.py +9 -0
- odibi/connections/azure_adls.py +499 -0
- odibi/connections/azure_sql.py +709 -0
- odibi/connections/base.py +28 -0
- odibi/connections/factory.py +322 -0
- odibi/connections/http.py +78 -0
- odibi/connections/local.py +119 -0
- odibi/connections/local_dbfs.py +61 -0
- odibi/constants.py +17 -0
- odibi/context.py +528 -0
- odibi/diagnostics/__init__.py +12 -0
- odibi/diagnostics/delta.py +520 -0
- odibi/diagnostics/diff.py +169 -0
- odibi/diagnostics/manager.py +171 -0
- odibi/engine/__init__.py +20 -0
- odibi/engine/base.py +334 -0
- odibi/engine/pandas_engine.py +2178 -0
- odibi/engine/polars_engine.py +1114 -0
- odibi/engine/registry.py +54 -0
- odibi/engine/spark_engine.py +2362 -0
- odibi/enums.py +7 -0
- odibi/exceptions.py +297 -0
- odibi/graph.py +426 -0
- odibi/introspect.py +1214 -0
- odibi/lineage.py +511 -0
- odibi/node.py +3341 -0
- odibi/orchestration/__init__.py +0 -0
- odibi/orchestration/airflow.py +90 -0
- odibi/orchestration/dagster.py +77 -0
- odibi/patterns/__init__.py +24 -0
- odibi/patterns/aggregation.py +599 -0
- odibi/patterns/base.py +94 -0
- odibi/patterns/date_dimension.py +423 -0
- odibi/patterns/dimension.py +696 -0
- odibi/patterns/fact.py +748 -0
- odibi/patterns/merge.py +128 -0
- odibi/patterns/scd2.py +148 -0
- odibi/pipeline.py +2382 -0
- odibi/plugins.py +80 -0
- odibi/project.py +581 -0
- odibi/references.py +151 -0
- odibi/registry.py +246 -0
- odibi/semantics/__init__.py +71 -0
- odibi/semantics/materialize.py +392 -0
- odibi/semantics/metrics.py +361 -0
- odibi/semantics/query.py +743 -0
- odibi/semantics/runner.py +430 -0
- odibi/semantics/story.py +507 -0
- odibi/semantics/views.py +432 -0
- odibi/state/__init__.py +1203 -0
- odibi/story/__init__.py +55 -0
- odibi/story/doc_story.py +554 -0
- odibi/story/generator.py +1431 -0
- odibi/story/lineage.py +1043 -0
- odibi/story/lineage_utils.py +324 -0
- odibi/story/metadata.py +608 -0
- odibi/story/renderers.py +453 -0
- odibi/story/templates/run_story.html +2520 -0
- odibi/story/themes.py +216 -0
- odibi/testing/__init__.py +13 -0
- odibi/testing/assertions.py +75 -0
- odibi/testing/fixtures.py +85 -0
- odibi/testing/source_pool.py +277 -0
- odibi/transformers/__init__.py +122 -0
- odibi/transformers/advanced.py +1472 -0
- odibi/transformers/delete_detection.py +610 -0
- odibi/transformers/manufacturing.py +1029 -0
- odibi/transformers/merge_transformer.py +778 -0
- odibi/transformers/relational.py +675 -0
- odibi/transformers/scd.py +579 -0
- odibi/transformers/sql_core.py +1356 -0
- odibi/transformers/validation.py +165 -0
- odibi/ui/__init__.py +0 -0
- odibi/ui/app.py +195 -0
- odibi/utils/__init__.py +66 -0
- odibi/utils/alerting.py +667 -0
- odibi/utils/config_loader.py +343 -0
- odibi/utils/console.py +231 -0
- odibi/utils/content_hash.py +202 -0
- odibi/utils/duration.py +43 -0
- odibi/utils/encoding.py +102 -0
- odibi/utils/extensions.py +28 -0
- odibi/utils/hashing.py +61 -0
- odibi/utils/logging.py +203 -0
- odibi/utils/logging_context.py +740 -0
- odibi/utils/progress.py +429 -0
- odibi/utils/setup_helpers.py +302 -0
- odibi/utils/telemetry.py +140 -0
- odibi/validation/__init__.py +62 -0
- odibi/validation/engine.py +765 -0
- odibi/validation/explanation_linter.py +155 -0
- odibi/validation/fk.py +547 -0
- odibi/validation/gate.py +252 -0
- odibi/validation/quarantine.py +605 -0
- odibi/writers/__init__.py +15 -0
- odibi/writers/sql_server_writer.py +2081 -0
- odibi-2.5.0.dist-info/METADATA +255 -0
- odibi-2.5.0.dist-info/RECORD +124 -0
- odibi-2.5.0.dist-info/WHEEL +5 -0
- odibi-2.5.0.dist-info/entry_points.txt +2 -0
- odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
- odibi-2.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Azure SQL Database Connection
|
|
3
|
+
==============================
|
|
4
|
+
|
|
5
|
+
Provides connectivity to Azure SQL databases with authentication support.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from odibi.connections.base import BaseConnection
|
|
13
|
+
from odibi.exceptions import ConnectionError
|
|
14
|
+
from odibi.utils.logging import logger
|
|
15
|
+
from odibi.utils.logging_context import get_logging_context
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AzureSQL(BaseConnection):
|
|
19
|
+
"""
|
|
20
|
+
Azure SQL Database connection.
|
|
21
|
+
|
|
22
|
+
Supports:
|
|
23
|
+
- SQL authentication (username/password)
|
|
24
|
+
- Azure Active Directory Managed Identity
|
|
25
|
+
- Connection pooling
|
|
26
|
+
- Read/write operations via SQLAlchemy
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
server: str,
|
|
32
|
+
database: str,
|
|
33
|
+
driver: str = "ODBC Driver 18 for SQL Server",
|
|
34
|
+
username: Optional[str] = None,
|
|
35
|
+
password: Optional[str] = None,
|
|
36
|
+
auth_mode: str = "aad_msi", # "aad_msi", "sql", "key_vault"
|
|
37
|
+
key_vault_name: Optional[str] = None,
|
|
38
|
+
secret_name: Optional[str] = None,
|
|
39
|
+
port: int = 1433,
|
|
40
|
+
timeout: int = 30,
|
|
41
|
+
**kwargs,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Initialize Azure SQL connection.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
server: SQL server hostname (e.g., 'myserver.database.windows.net')
|
|
48
|
+
database: Database name
|
|
49
|
+
driver: ODBC driver name (default: ODBC Driver 18 for SQL Server)
|
|
50
|
+
username: SQL auth username (required if auth_mode='sql')
|
|
51
|
+
password: SQL auth password (required if auth_mode='sql')
|
|
52
|
+
auth_mode: Authentication mode ('aad_msi', 'sql', 'key_vault')
|
|
53
|
+
key_vault_name: Key Vault name (required if auth_mode='key_vault')
|
|
54
|
+
secret_name: Secret name containing password (required if auth_mode='key_vault')
|
|
55
|
+
port: SQL Server port (default: 1433)
|
|
56
|
+
timeout: Connection timeout in seconds (default: 30)
|
|
57
|
+
"""
|
|
58
|
+
ctx = get_logging_context()
|
|
59
|
+
ctx.log_connection(
|
|
60
|
+
connection_type="azure_sql",
|
|
61
|
+
connection_name=f"{server}/{database}",
|
|
62
|
+
action="init",
|
|
63
|
+
server=server,
|
|
64
|
+
database=database,
|
|
65
|
+
auth_mode=auth_mode,
|
|
66
|
+
port=port,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
self.server = server
|
|
70
|
+
self.database = database
|
|
71
|
+
self.driver = driver
|
|
72
|
+
self.username = username
|
|
73
|
+
self.password = password
|
|
74
|
+
self.auth_mode = auth_mode
|
|
75
|
+
self.key_vault_name = key_vault_name
|
|
76
|
+
self.secret_name = secret_name
|
|
77
|
+
self.port = port
|
|
78
|
+
self.timeout = timeout
|
|
79
|
+
self._engine = None
|
|
80
|
+
self._cached_key = None # For consistency with ADLS / parallel fetch
|
|
81
|
+
|
|
82
|
+
ctx.debug(
|
|
83
|
+
"AzureSQL connection initialized",
|
|
84
|
+
server=server,
|
|
85
|
+
database=database,
|
|
86
|
+
auth_mode=auth_mode,
|
|
87
|
+
driver=driver,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def get_password(self) -> Optional[str]:
|
|
91
|
+
"""Get password (cached)."""
|
|
92
|
+
ctx = get_logging_context()
|
|
93
|
+
|
|
94
|
+
if self.password:
|
|
95
|
+
ctx.debug(
|
|
96
|
+
"Using provided password",
|
|
97
|
+
server=self.server,
|
|
98
|
+
database=self.database,
|
|
99
|
+
)
|
|
100
|
+
return self.password
|
|
101
|
+
|
|
102
|
+
if self._cached_key:
|
|
103
|
+
ctx.debug(
|
|
104
|
+
"Using cached password",
|
|
105
|
+
server=self.server,
|
|
106
|
+
database=self.database,
|
|
107
|
+
)
|
|
108
|
+
return self._cached_key
|
|
109
|
+
|
|
110
|
+
if self.auth_mode == "key_vault":
|
|
111
|
+
if not self.key_vault_name or not self.secret_name:
|
|
112
|
+
ctx.error(
|
|
113
|
+
"Key Vault mode requires key_vault_name and secret_name",
|
|
114
|
+
server=self.server,
|
|
115
|
+
database=self.database,
|
|
116
|
+
)
|
|
117
|
+
raise ValueError(
|
|
118
|
+
f"key_vault mode requires 'key_vault_name' and 'secret_name' "
|
|
119
|
+
f"for connection to {self.server}/{self.database}. "
|
|
120
|
+
f"Got key_vault_name={self.key_vault_name or '(missing)'}, "
|
|
121
|
+
f"secret_name={self.secret_name or '(missing)'}."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
ctx.debug(
|
|
125
|
+
"Fetching password from Key Vault",
|
|
126
|
+
server=self.server,
|
|
127
|
+
key_vault_name=self.key_vault_name,
|
|
128
|
+
secret_name=self.secret_name,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
from azure.identity import DefaultAzureCredential
|
|
133
|
+
from azure.keyvault.secrets import SecretClient
|
|
134
|
+
|
|
135
|
+
credential = DefaultAzureCredential()
|
|
136
|
+
kv_uri = f"https://{self.key_vault_name}.vault.azure.net"
|
|
137
|
+
client = SecretClient(vault_url=kv_uri, credential=credential)
|
|
138
|
+
secret = client.get_secret(self.secret_name)
|
|
139
|
+
self._cached_key = secret.value
|
|
140
|
+
logger.register_secret(self._cached_key)
|
|
141
|
+
|
|
142
|
+
ctx.info(
|
|
143
|
+
"Successfully fetched password from Key Vault",
|
|
144
|
+
server=self.server,
|
|
145
|
+
key_vault_name=self.key_vault_name,
|
|
146
|
+
)
|
|
147
|
+
return self._cached_key
|
|
148
|
+
except ImportError as e:
|
|
149
|
+
ctx.error(
|
|
150
|
+
"Key Vault support requires azure libraries",
|
|
151
|
+
server=self.server,
|
|
152
|
+
error=str(e),
|
|
153
|
+
)
|
|
154
|
+
raise ImportError(
|
|
155
|
+
"Key Vault support requires 'azure-identity' and 'azure-keyvault-secrets'. "
|
|
156
|
+
"Install with: pip install odibi[azure]"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
ctx.debug(
|
|
160
|
+
"No password required for auth_mode",
|
|
161
|
+
server=self.server,
|
|
162
|
+
auth_mode=self.auth_mode,
|
|
163
|
+
)
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
def odbc_dsn(self) -> str:
|
|
167
|
+
"""Build ODBC connection string.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
ODBC DSN string
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
>>> conn = AzureSQL(server="myserver.database.windows.net", database="mydb")
|
|
174
|
+
>>> conn.odbc_dsn()
|
|
175
|
+
'Driver={ODBC Driver 18 for SQL Server};Server=tcp:myserver...'
|
|
176
|
+
"""
|
|
177
|
+
ctx = get_logging_context()
|
|
178
|
+
ctx.debug(
|
|
179
|
+
"Building ODBC connection string",
|
|
180
|
+
server=self.server,
|
|
181
|
+
database=self.database,
|
|
182
|
+
auth_mode=self.auth_mode,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
dsn = (
|
|
186
|
+
f"Driver={{{self.driver}}};"
|
|
187
|
+
f"Server=tcp:{self.server},1433;"
|
|
188
|
+
f"Database={self.database};"
|
|
189
|
+
f"Encrypt=yes;"
|
|
190
|
+
f"TrustServerCertificate=yes;"
|
|
191
|
+
f"Connection Timeout=30;"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
pwd = self.get_password()
|
|
195
|
+
if self.username and pwd:
|
|
196
|
+
dsn += f"UID={self.username};PWD={pwd};"
|
|
197
|
+
ctx.debug(
|
|
198
|
+
"Using SQL authentication",
|
|
199
|
+
server=self.server,
|
|
200
|
+
username=self.username,
|
|
201
|
+
)
|
|
202
|
+
elif self.auth_mode == "aad_msi":
|
|
203
|
+
dsn += "Authentication=ActiveDirectoryMsi;"
|
|
204
|
+
ctx.debug(
|
|
205
|
+
"Using AAD Managed Identity authentication",
|
|
206
|
+
server=self.server,
|
|
207
|
+
)
|
|
208
|
+
elif self.auth_mode == "aad_service_principal":
|
|
209
|
+
# Not fully supported via ODBC string simply without token usually
|
|
210
|
+
ctx.debug(
|
|
211
|
+
"Using AAD Service Principal authentication",
|
|
212
|
+
server=self.server,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return dsn
|
|
216
|
+
|
|
217
|
+
def get_path(self, relative_path: str) -> str:
|
|
218
|
+
"""Get table reference for relative path."""
|
|
219
|
+
return relative_path
|
|
220
|
+
|
|
221
|
+
def validate(self) -> None:
|
|
222
|
+
"""Validate Azure SQL connection configuration."""
|
|
223
|
+
ctx = get_logging_context()
|
|
224
|
+
ctx.debug(
|
|
225
|
+
"Validating AzureSQL connection",
|
|
226
|
+
server=self.server,
|
|
227
|
+
database=self.database,
|
|
228
|
+
auth_mode=self.auth_mode,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if not self.server:
|
|
232
|
+
ctx.error("AzureSQL validation failed: missing 'server'")
|
|
233
|
+
raise ValueError(
|
|
234
|
+
"Azure SQL connection requires 'server'. "
|
|
235
|
+
"Provide the SQL server hostname (e.g., server: 'myserver.database.windows.net')."
|
|
236
|
+
)
|
|
237
|
+
if not self.database:
|
|
238
|
+
ctx.error(
|
|
239
|
+
"AzureSQL validation failed: missing 'database'",
|
|
240
|
+
server=self.server,
|
|
241
|
+
)
|
|
242
|
+
raise ValueError(
|
|
243
|
+
f"Azure SQL connection requires 'database' for server '{self.server}'."
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if self.auth_mode == "sql":
|
|
247
|
+
if not self.username:
|
|
248
|
+
ctx.error(
|
|
249
|
+
"AzureSQL validation failed: SQL auth requires username",
|
|
250
|
+
server=self.server,
|
|
251
|
+
database=self.database,
|
|
252
|
+
)
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Azure SQL with auth_mode='sql' requires 'username' "
|
|
255
|
+
f"for connection to {self.server}/{self.database}."
|
|
256
|
+
)
|
|
257
|
+
if not self.password and not (self.key_vault_name and self.secret_name):
|
|
258
|
+
ctx.error(
|
|
259
|
+
"AzureSQL validation failed: SQL auth requires password",
|
|
260
|
+
server=self.server,
|
|
261
|
+
database=self.database,
|
|
262
|
+
)
|
|
263
|
+
raise ValueError(
|
|
264
|
+
"Azure SQL with auth_mode='sql' requires password "
|
|
265
|
+
"(or key_vault_name/secret_name)"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if self.auth_mode == "key_vault":
|
|
269
|
+
if not self.key_vault_name or not self.secret_name:
|
|
270
|
+
ctx.error(
|
|
271
|
+
"AzureSQL validation failed: key_vault mode missing config",
|
|
272
|
+
server=self.server,
|
|
273
|
+
database=self.database,
|
|
274
|
+
)
|
|
275
|
+
raise ValueError(
|
|
276
|
+
"Azure SQL with auth_mode='key_vault' requires key_vault_name and secret_name"
|
|
277
|
+
)
|
|
278
|
+
if not self.username:
|
|
279
|
+
ctx.error(
|
|
280
|
+
"AzureSQL validation failed: key_vault mode requires username",
|
|
281
|
+
server=self.server,
|
|
282
|
+
database=self.database,
|
|
283
|
+
)
|
|
284
|
+
raise ValueError("Azure SQL with auth_mode='key_vault' requires username")
|
|
285
|
+
|
|
286
|
+
ctx.info(
|
|
287
|
+
"AzureSQL connection validated successfully",
|
|
288
|
+
server=self.server,
|
|
289
|
+
database=self.database,
|
|
290
|
+
auth_mode=self.auth_mode,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
def get_engine(self) -> Any:
|
|
294
|
+
"""
|
|
295
|
+
Get or create SQLAlchemy engine.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
SQLAlchemy engine instance
|
|
299
|
+
|
|
300
|
+
Raises:
|
|
301
|
+
ConnectionError: If connection fails or drivers missing
|
|
302
|
+
"""
|
|
303
|
+
ctx = get_logging_context()
|
|
304
|
+
|
|
305
|
+
if self._engine is not None:
|
|
306
|
+
ctx.debug(
|
|
307
|
+
"Using cached SQLAlchemy engine",
|
|
308
|
+
server=self.server,
|
|
309
|
+
database=self.database,
|
|
310
|
+
)
|
|
311
|
+
return self._engine
|
|
312
|
+
|
|
313
|
+
ctx.debug(
|
|
314
|
+
"Creating SQLAlchemy engine",
|
|
315
|
+
server=self.server,
|
|
316
|
+
database=self.database,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
try:
|
|
320
|
+
from urllib.parse import quote_plus
|
|
321
|
+
|
|
322
|
+
from sqlalchemy import create_engine
|
|
323
|
+
except ImportError as e:
|
|
324
|
+
ctx.error(
|
|
325
|
+
"SQLAlchemy import failed",
|
|
326
|
+
server=self.server,
|
|
327
|
+
database=self.database,
|
|
328
|
+
error=str(e),
|
|
329
|
+
)
|
|
330
|
+
raise ConnectionError(
|
|
331
|
+
connection_name=f"AzureSQL({self.server})",
|
|
332
|
+
reason="Required packages 'sqlalchemy' or 'pyodbc' not found.",
|
|
333
|
+
suggestions=[
|
|
334
|
+
"Install required packages: pip install sqlalchemy pyodbc",
|
|
335
|
+
"Or install odibi with azure extras: pip install 'odibi[azure]'",
|
|
336
|
+
],
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
# Build connection string
|
|
341
|
+
conn_str = self.odbc_dsn()
|
|
342
|
+
connection_url = f"mssql+pyodbc:///?odbc_connect={quote_plus(conn_str)}"
|
|
343
|
+
|
|
344
|
+
ctx.debug(
|
|
345
|
+
"Creating SQLAlchemy engine with connection pooling",
|
|
346
|
+
server=self.server,
|
|
347
|
+
database=self.database,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Create engine with connection pooling
|
|
351
|
+
self._engine = create_engine(
|
|
352
|
+
connection_url,
|
|
353
|
+
pool_pre_ping=True, # Verify connections before use
|
|
354
|
+
pool_recycle=3600, # Recycle connections after 1 hour
|
|
355
|
+
echo=False,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# Test connection
|
|
359
|
+
with self._engine.connect():
|
|
360
|
+
pass
|
|
361
|
+
|
|
362
|
+
ctx.info(
|
|
363
|
+
"SQLAlchemy engine created successfully",
|
|
364
|
+
server=self.server,
|
|
365
|
+
database=self.database,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
return self._engine
|
|
369
|
+
|
|
370
|
+
except Exception as e:
|
|
371
|
+
suggestions = self._get_error_suggestions(str(e))
|
|
372
|
+
ctx.error(
|
|
373
|
+
"Failed to create SQLAlchemy engine",
|
|
374
|
+
server=self.server,
|
|
375
|
+
database=self.database,
|
|
376
|
+
error=str(e),
|
|
377
|
+
suggestions=suggestions,
|
|
378
|
+
)
|
|
379
|
+
raise ConnectionError(
|
|
380
|
+
connection_name=f"AzureSQL({self.server})",
|
|
381
|
+
reason=f"Failed to create engine: {str(e)}",
|
|
382
|
+
suggestions=suggestions,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
def read_sql(self, query: str, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
|
386
|
+
"""
|
|
387
|
+
Execute SQL query and return results as DataFrame.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
query: SQL query string
|
|
391
|
+
params: Optional query parameters for parameterized queries
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Query results as pandas DataFrame
|
|
395
|
+
|
|
396
|
+
Raises:
|
|
397
|
+
ConnectionError: If execution fails
|
|
398
|
+
"""
|
|
399
|
+
ctx = get_logging_context()
|
|
400
|
+
ctx.debug(
|
|
401
|
+
"Executing SQL query",
|
|
402
|
+
server=self.server,
|
|
403
|
+
database=self.database,
|
|
404
|
+
query_length=len(query),
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
try:
|
|
408
|
+
engine = self.get_engine()
|
|
409
|
+
result = pd.read_sql(query, engine, params=params)
|
|
410
|
+
|
|
411
|
+
ctx.info(
|
|
412
|
+
"SQL query executed successfully",
|
|
413
|
+
server=self.server,
|
|
414
|
+
database=self.database,
|
|
415
|
+
rows_returned=len(result),
|
|
416
|
+
)
|
|
417
|
+
return result
|
|
418
|
+
except Exception as e:
|
|
419
|
+
if isinstance(e, ConnectionError):
|
|
420
|
+
raise
|
|
421
|
+
ctx.error(
|
|
422
|
+
"SQL query execution failed",
|
|
423
|
+
server=self.server,
|
|
424
|
+
database=self.database,
|
|
425
|
+
error=str(e),
|
|
426
|
+
)
|
|
427
|
+
raise ConnectionError(
|
|
428
|
+
connection_name=f"AzureSQL({self.server})",
|
|
429
|
+
reason=f"Query execution failed: {str(e)}",
|
|
430
|
+
suggestions=self._get_error_suggestions(str(e)),
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
def read_table(self, table_name: str, schema: Optional[str] = "dbo") -> pd.DataFrame:
|
|
434
|
+
"""
|
|
435
|
+
Read entire table into DataFrame.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
table_name: Name of the table
|
|
439
|
+
schema: Schema name (default: dbo)
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Table contents as pandas DataFrame
|
|
443
|
+
"""
|
|
444
|
+
ctx = get_logging_context()
|
|
445
|
+
ctx.info(
|
|
446
|
+
"Reading table",
|
|
447
|
+
server=self.server,
|
|
448
|
+
database=self.database,
|
|
449
|
+
table_name=table_name,
|
|
450
|
+
schema=schema,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if schema:
|
|
454
|
+
query = f"SELECT * FROM [{schema}].[{table_name}]"
|
|
455
|
+
else:
|
|
456
|
+
query = f"SELECT * FROM [{table_name}]"
|
|
457
|
+
|
|
458
|
+
return self.read_sql(query)
|
|
459
|
+
|
|
460
|
+
def write_table(
|
|
461
|
+
self,
|
|
462
|
+
df: pd.DataFrame,
|
|
463
|
+
table_name: str,
|
|
464
|
+
schema: Optional[str] = "dbo",
|
|
465
|
+
if_exists: str = "replace",
|
|
466
|
+
index: bool = False,
|
|
467
|
+
chunksize: Optional[int] = 1000,
|
|
468
|
+
) -> int:
|
|
469
|
+
"""
|
|
470
|
+
Write DataFrame to SQL table.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
df: DataFrame to write
|
|
474
|
+
table_name: Name of the table
|
|
475
|
+
schema: Schema name (default: dbo)
|
|
476
|
+
if_exists: How to behave if table exists ('fail', 'replace', 'append')
|
|
477
|
+
index: Whether to write DataFrame index as column
|
|
478
|
+
chunksize: Number of rows to write in each batch (default: 1000)
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
Number of rows written
|
|
482
|
+
|
|
483
|
+
Raises:
|
|
484
|
+
ConnectionError: If write fails
|
|
485
|
+
"""
|
|
486
|
+
ctx = get_logging_context()
|
|
487
|
+
ctx.info(
|
|
488
|
+
"Writing DataFrame to table",
|
|
489
|
+
server=self.server,
|
|
490
|
+
database=self.database,
|
|
491
|
+
table_name=table_name,
|
|
492
|
+
schema=schema,
|
|
493
|
+
rows=len(df),
|
|
494
|
+
if_exists=if_exists,
|
|
495
|
+
chunksize=chunksize,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
engine = self.get_engine()
|
|
500
|
+
|
|
501
|
+
rows_written = df.to_sql(
|
|
502
|
+
name=table_name,
|
|
503
|
+
con=engine,
|
|
504
|
+
schema=schema,
|
|
505
|
+
if_exists=if_exists,
|
|
506
|
+
index=index,
|
|
507
|
+
chunksize=chunksize,
|
|
508
|
+
method="multi", # Use multi-row INSERT for better performance
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
result_rows = rows_written if rows_written is not None else len(df)
|
|
512
|
+
ctx.info(
|
|
513
|
+
"Table write completed successfully",
|
|
514
|
+
server=self.server,
|
|
515
|
+
database=self.database,
|
|
516
|
+
table_name=table_name,
|
|
517
|
+
rows_written=result_rows,
|
|
518
|
+
)
|
|
519
|
+
return result_rows
|
|
520
|
+
except Exception as e:
|
|
521
|
+
if isinstance(e, ConnectionError):
|
|
522
|
+
raise
|
|
523
|
+
ctx.error(
|
|
524
|
+
"Table write failed",
|
|
525
|
+
server=self.server,
|
|
526
|
+
database=self.database,
|
|
527
|
+
table_name=table_name,
|
|
528
|
+
error=str(e),
|
|
529
|
+
)
|
|
530
|
+
raise ConnectionError(
|
|
531
|
+
connection_name=f"AzureSQL({self.server})",
|
|
532
|
+
reason=f"Write operation failed: {str(e)}",
|
|
533
|
+
suggestions=self._get_error_suggestions(str(e)),
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
def execute_sql(self, sql: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
|
537
|
+
"""
|
|
538
|
+
Execute SQL statement (INSERT, UPDATE, DELETE, etc.).
|
|
539
|
+
|
|
540
|
+
Alias for execute() - used by SqlServerMergeWriter.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
sql: SQL statement
|
|
544
|
+
params: Optional parameters for parameterized query
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Result from execution
|
|
548
|
+
|
|
549
|
+
Raises:
|
|
550
|
+
ConnectionError: If execution fails
|
|
551
|
+
"""
|
|
552
|
+
return self.execute(sql, params)
|
|
553
|
+
|
|
554
|
+
def execute(self, sql: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
|
555
|
+
"""
|
|
556
|
+
Execute SQL statement (INSERT, UPDATE, DELETE, etc.).
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
sql: SQL statement
|
|
560
|
+
params: Optional parameters for parameterized query
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
Result from execution
|
|
564
|
+
|
|
565
|
+
Raises:
|
|
566
|
+
ConnectionError: If execution fails
|
|
567
|
+
"""
|
|
568
|
+
ctx = get_logging_context()
|
|
569
|
+
ctx.debug(
|
|
570
|
+
"Executing SQL statement",
|
|
571
|
+
server=self.server,
|
|
572
|
+
database=self.database,
|
|
573
|
+
statement_length=len(sql),
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
try:
|
|
577
|
+
engine = self.get_engine()
|
|
578
|
+
from sqlalchemy import text
|
|
579
|
+
|
|
580
|
+
with engine.connect() as conn:
|
|
581
|
+
result = conn.execute(text(sql), params or {})
|
|
582
|
+
# Fetch all results before commit to avoid cursor invalidation
|
|
583
|
+
if result.returns_rows:
|
|
584
|
+
rows = result.fetchall()
|
|
585
|
+
else:
|
|
586
|
+
rows = None
|
|
587
|
+
conn.commit()
|
|
588
|
+
|
|
589
|
+
ctx.info(
|
|
590
|
+
"SQL statement executed successfully",
|
|
591
|
+
server=self.server,
|
|
592
|
+
database=self.database,
|
|
593
|
+
)
|
|
594
|
+
return rows
|
|
595
|
+
except Exception as e:
|
|
596
|
+
if isinstance(e, ConnectionError):
|
|
597
|
+
raise
|
|
598
|
+
ctx.error(
|
|
599
|
+
"SQL statement execution failed",
|
|
600
|
+
server=self.server,
|
|
601
|
+
database=self.database,
|
|
602
|
+
error=str(e),
|
|
603
|
+
)
|
|
604
|
+
raise ConnectionError(
|
|
605
|
+
connection_name=f"AzureSQL({self.server})",
|
|
606
|
+
reason=f"Statement execution failed: {str(e)}",
|
|
607
|
+
suggestions=self._get_error_suggestions(str(e)),
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
def close(self):
|
|
611
|
+
"""Close database connection and dispose of engine."""
|
|
612
|
+
ctx = get_logging_context()
|
|
613
|
+
ctx.debug(
|
|
614
|
+
"Closing AzureSQL connection",
|
|
615
|
+
server=self.server,
|
|
616
|
+
database=self.database,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if self._engine:
|
|
620
|
+
self._engine.dispose()
|
|
621
|
+
self._engine = None
|
|
622
|
+
ctx.info(
|
|
623
|
+
"AzureSQL connection closed",
|
|
624
|
+
server=self.server,
|
|
625
|
+
database=self.database,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
def _get_error_suggestions(self, error_msg: str) -> List[str]:
|
|
629
|
+
"""Generate suggestions based on error message."""
|
|
630
|
+
suggestions = []
|
|
631
|
+
error_lower = error_msg.lower()
|
|
632
|
+
|
|
633
|
+
if "login failed" in error_lower:
|
|
634
|
+
suggestions.append("Check username and password")
|
|
635
|
+
suggestions.append(f"Verify auth_mode is correct (current: {self.auth_mode})")
|
|
636
|
+
if "identity" in error_lower:
|
|
637
|
+
suggestions.append("Ensure Managed Identity has access to the database")
|
|
638
|
+
|
|
639
|
+
if "firewall" in error_lower or "tcp provider" in error_lower:
|
|
640
|
+
suggestions.append("Check Azure SQL Server firewall rules")
|
|
641
|
+
suggestions.append("Ensure client IP is allowed")
|
|
642
|
+
|
|
643
|
+
if "driver" in error_lower:
|
|
644
|
+
suggestions.append(f"Verify ODBC driver '{self.driver}' is installed")
|
|
645
|
+
suggestions.append("On Linux: sudo apt-get install msodbcsql18")
|
|
646
|
+
|
|
647
|
+
return suggestions
|
|
648
|
+
|
|
649
|
+
def get_spark_options(self) -> Dict[str, str]:
|
|
650
|
+
"""Get Spark JDBC options.
|
|
651
|
+
|
|
652
|
+
Returns:
|
|
653
|
+
Dictionary of Spark JDBC options (url, user, password, etc.)
|
|
654
|
+
"""
|
|
655
|
+
ctx = get_logging_context()
|
|
656
|
+
ctx.info(
|
|
657
|
+
"Building Spark JDBC options",
|
|
658
|
+
server=self.server,
|
|
659
|
+
database=self.database,
|
|
660
|
+
auth_mode=self.auth_mode,
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
jdbc_url = (
|
|
664
|
+
f"jdbc:sqlserver://{self.server}:{self.port};"
|
|
665
|
+
f"databaseName={self.database};encrypt=true;trustServerCertificate=true;"
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
if self.auth_mode == "aad_msi":
|
|
669
|
+
jdbc_url += (
|
|
670
|
+
"hostNameInCertificate=*.database.windows.net;"
|
|
671
|
+
"loginTimeout=30;authentication=ActiveDirectoryMsi;"
|
|
672
|
+
)
|
|
673
|
+
ctx.debug(
|
|
674
|
+
"Configured JDBC URL for AAD MSI",
|
|
675
|
+
server=self.server,
|
|
676
|
+
)
|
|
677
|
+
elif self.auth_mode == "aad_service_principal":
|
|
678
|
+
# Not fully implemented in init yet, but placeholder
|
|
679
|
+
ctx.debug(
|
|
680
|
+
"Configured JDBC URL for AAD Service Principal",
|
|
681
|
+
server=self.server,
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
options = {
|
|
685
|
+
"url": jdbc_url,
|
|
686
|
+
"driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
if self.auth_mode == "sql" or self.auth_mode == "key_vault":
|
|
690
|
+
if self.username:
|
|
691
|
+
options["user"] = self.username
|
|
692
|
+
|
|
693
|
+
pwd = self.get_password()
|
|
694
|
+
if pwd:
|
|
695
|
+
options["password"] = pwd
|
|
696
|
+
|
|
697
|
+
ctx.debug(
|
|
698
|
+
"Added SQL authentication to Spark options",
|
|
699
|
+
server=self.server,
|
|
700
|
+
username=self.username,
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
ctx.info(
|
|
704
|
+
"Spark JDBC options built successfully",
|
|
705
|
+
server=self.server,
|
|
706
|
+
database=self.database,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
return options
|