odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,709 @@
1
+ """
2
+ Azure SQL Database Connection
3
+ ==============================
4
+
5
+ Provides connectivity to Azure SQL databases with authentication support.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ import pandas as pd
11
+
12
+ from odibi.connections.base import BaseConnection
13
+ from odibi.exceptions import ConnectionError
14
+ from odibi.utils.logging import logger
15
+ from odibi.utils.logging_context import get_logging_context
16
+
17
+
18
+ class AzureSQL(BaseConnection):
19
+ """
20
+ Azure SQL Database connection.
21
+
22
+ Supports:
23
+ - SQL authentication (username/password)
24
+ - Azure Active Directory Managed Identity
25
+ - Connection pooling
26
+ - Read/write operations via SQLAlchemy
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ server: str,
32
+ database: str,
33
+ driver: str = "ODBC Driver 18 for SQL Server",
34
+ username: Optional[str] = None,
35
+ password: Optional[str] = None,
36
+ auth_mode: str = "aad_msi", # "aad_msi", "sql", "key_vault"
37
+ key_vault_name: Optional[str] = None,
38
+ secret_name: Optional[str] = None,
39
+ port: int = 1433,
40
+ timeout: int = 30,
41
+ **kwargs,
42
+ ):
43
+ """
44
+ Initialize Azure SQL connection.
45
+
46
+ Args:
47
+ server: SQL server hostname (e.g., 'myserver.database.windows.net')
48
+ database: Database name
49
+ driver: ODBC driver name (default: ODBC Driver 18 for SQL Server)
50
+ username: SQL auth username (required if auth_mode='sql')
51
+ password: SQL auth password (required if auth_mode='sql')
52
+ auth_mode: Authentication mode ('aad_msi', 'sql', 'key_vault')
53
+ key_vault_name: Key Vault name (required if auth_mode='key_vault')
54
+ secret_name: Secret name containing password (required if auth_mode='key_vault')
55
+ port: SQL Server port (default: 1433)
56
+ timeout: Connection timeout in seconds (default: 30)
57
+ """
58
+ ctx = get_logging_context()
59
+ ctx.log_connection(
60
+ connection_type="azure_sql",
61
+ connection_name=f"{server}/{database}",
62
+ action="init",
63
+ server=server,
64
+ database=database,
65
+ auth_mode=auth_mode,
66
+ port=port,
67
+ )
68
+
69
+ self.server = server
70
+ self.database = database
71
+ self.driver = driver
72
+ self.username = username
73
+ self.password = password
74
+ self.auth_mode = auth_mode
75
+ self.key_vault_name = key_vault_name
76
+ self.secret_name = secret_name
77
+ self.port = port
78
+ self.timeout = timeout
79
+ self._engine = None
80
+ self._cached_key = None # For consistency with ADLS / parallel fetch
81
+
82
+ ctx.debug(
83
+ "AzureSQL connection initialized",
84
+ server=server,
85
+ database=database,
86
+ auth_mode=auth_mode,
87
+ driver=driver,
88
+ )
89
+
90
+ def get_password(self) -> Optional[str]:
91
+ """Get password (cached)."""
92
+ ctx = get_logging_context()
93
+
94
+ if self.password:
95
+ ctx.debug(
96
+ "Using provided password",
97
+ server=self.server,
98
+ database=self.database,
99
+ )
100
+ return self.password
101
+
102
+ if self._cached_key:
103
+ ctx.debug(
104
+ "Using cached password",
105
+ server=self.server,
106
+ database=self.database,
107
+ )
108
+ return self._cached_key
109
+
110
+ if self.auth_mode == "key_vault":
111
+ if not self.key_vault_name or not self.secret_name:
112
+ ctx.error(
113
+ "Key Vault mode requires key_vault_name and secret_name",
114
+ server=self.server,
115
+ database=self.database,
116
+ )
117
+ raise ValueError(
118
+ f"key_vault mode requires 'key_vault_name' and 'secret_name' "
119
+ f"for connection to {self.server}/{self.database}. "
120
+ f"Got key_vault_name={self.key_vault_name or '(missing)'}, "
121
+ f"secret_name={self.secret_name or '(missing)'}."
122
+ )
123
+
124
+ ctx.debug(
125
+ "Fetching password from Key Vault",
126
+ server=self.server,
127
+ key_vault_name=self.key_vault_name,
128
+ secret_name=self.secret_name,
129
+ )
130
+
131
+ try:
132
+ from azure.identity import DefaultAzureCredential
133
+ from azure.keyvault.secrets import SecretClient
134
+
135
+ credential = DefaultAzureCredential()
136
+ kv_uri = f"https://{self.key_vault_name}.vault.azure.net"
137
+ client = SecretClient(vault_url=kv_uri, credential=credential)
138
+ secret = client.get_secret(self.secret_name)
139
+ self._cached_key = secret.value
140
+ logger.register_secret(self._cached_key)
141
+
142
+ ctx.info(
143
+ "Successfully fetched password from Key Vault",
144
+ server=self.server,
145
+ key_vault_name=self.key_vault_name,
146
+ )
147
+ return self._cached_key
148
+ except ImportError as e:
149
+ ctx.error(
150
+ "Key Vault support requires azure libraries",
151
+ server=self.server,
152
+ error=str(e),
153
+ )
154
+ raise ImportError(
155
+ "Key Vault support requires 'azure-identity' and 'azure-keyvault-secrets'. "
156
+ "Install with: pip install odibi[azure]"
157
+ )
158
+
159
+ ctx.debug(
160
+ "No password required for auth_mode",
161
+ server=self.server,
162
+ auth_mode=self.auth_mode,
163
+ )
164
+ return None
165
+
166
+ def odbc_dsn(self) -> str:
167
+ """Build ODBC connection string.
168
+
169
+ Returns:
170
+ ODBC DSN string
171
+
172
+ Example:
173
+ >>> conn = AzureSQL(server="myserver.database.windows.net", database="mydb")
174
+ >>> conn.odbc_dsn()
175
+ 'Driver={ODBC Driver 18 for SQL Server};Server=tcp:myserver...'
176
+ """
177
+ ctx = get_logging_context()
178
+ ctx.debug(
179
+ "Building ODBC connection string",
180
+ server=self.server,
181
+ database=self.database,
182
+ auth_mode=self.auth_mode,
183
+ )
184
+
185
+ dsn = (
186
+ f"Driver={{{self.driver}}};"
187
+ f"Server=tcp:{self.server},1433;"
188
+ f"Database={self.database};"
189
+ f"Encrypt=yes;"
190
+ f"TrustServerCertificate=yes;"
191
+ f"Connection Timeout=30;"
192
+ )
193
+
194
+ pwd = self.get_password()
195
+ if self.username and pwd:
196
+ dsn += f"UID={self.username};PWD={pwd};"
197
+ ctx.debug(
198
+ "Using SQL authentication",
199
+ server=self.server,
200
+ username=self.username,
201
+ )
202
+ elif self.auth_mode == "aad_msi":
203
+ dsn += "Authentication=ActiveDirectoryMsi;"
204
+ ctx.debug(
205
+ "Using AAD Managed Identity authentication",
206
+ server=self.server,
207
+ )
208
+ elif self.auth_mode == "aad_service_principal":
209
+ # Not fully supported via ODBC string simply without token usually
210
+ ctx.debug(
211
+ "Using AAD Service Principal authentication",
212
+ server=self.server,
213
+ )
214
+
215
+ return dsn
216
+
217
+ def get_path(self, relative_path: str) -> str:
218
+ """Get table reference for relative path."""
219
+ return relative_path
220
+
221
+ def validate(self) -> None:
222
+ """Validate Azure SQL connection configuration."""
223
+ ctx = get_logging_context()
224
+ ctx.debug(
225
+ "Validating AzureSQL connection",
226
+ server=self.server,
227
+ database=self.database,
228
+ auth_mode=self.auth_mode,
229
+ )
230
+
231
+ if not self.server:
232
+ ctx.error("AzureSQL validation failed: missing 'server'")
233
+ raise ValueError(
234
+ "Azure SQL connection requires 'server'. "
235
+ "Provide the SQL server hostname (e.g., server: 'myserver.database.windows.net')."
236
+ )
237
+ if not self.database:
238
+ ctx.error(
239
+ "AzureSQL validation failed: missing 'database'",
240
+ server=self.server,
241
+ )
242
+ raise ValueError(
243
+ f"Azure SQL connection requires 'database' for server '{self.server}'."
244
+ )
245
+
246
+ if self.auth_mode == "sql":
247
+ if not self.username:
248
+ ctx.error(
249
+ "AzureSQL validation failed: SQL auth requires username",
250
+ server=self.server,
251
+ database=self.database,
252
+ )
253
+ raise ValueError(
254
+ f"Azure SQL with auth_mode='sql' requires 'username' "
255
+ f"for connection to {self.server}/{self.database}."
256
+ )
257
+ if not self.password and not (self.key_vault_name and self.secret_name):
258
+ ctx.error(
259
+ "AzureSQL validation failed: SQL auth requires password",
260
+ server=self.server,
261
+ database=self.database,
262
+ )
263
+ raise ValueError(
264
+ "Azure SQL with auth_mode='sql' requires password "
265
+ "(or key_vault_name/secret_name)"
266
+ )
267
+
268
+ if self.auth_mode == "key_vault":
269
+ if not self.key_vault_name or not self.secret_name:
270
+ ctx.error(
271
+ "AzureSQL validation failed: key_vault mode missing config",
272
+ server=self.server,
273
+ database=self.database,
274
+ )
275
+ raise ValueError(
276
+ "Azure SQL with auth_mode='key_vault' requires key_vault_name and secret_name"
277
+ )
278
+ if not self.username:
279
+ ctx.error(
280
+ "AzureSQL validation failed: key_vault mode requires username",
281
+ server=self.server,
282
+ database=self.database,
283
+ )
284
+ raise ValueError("Azure SQL with auth_mode='key_vault' requires username")
285
+
286
+ ctx.info(
287
+ "AzureSQL connection validated successfully",
288
+ server=self.server,
289
+ database=self.database,
290
+ auth_mode=self.auth_mode,
291
+ )
292
+
293
+ def get_engine(self) -> Any:
294
+ """
295
+ Get or create SQLAlchemy engine.
296
+
297
+ Returns:
298
+ SQLAlchemy engine instance
299
+
300
+ Raises:
301
+ ConnectionError: If connection fails or drivers missing
302
+ """
303
+ ctx = get_logging_context()
304
+
305
+ if self._engine is not None:
306
+ ctx.debug(
307
+ "Using cached SQLAlchemy engine",
308
+ server=self.server,
309
+ database=self.database,
310
+ )
311
+ return self._engine
312
+
313
+ ctx.debug(
314
+ "Creating SQLAlchemy engine",
315
+ server=self.server,
316
+ database=self.database,
317
+ )
318
+
319
+ try:
320
+ from urllib.parse import quote_plus
321
+
322
+ from sqlalchemy import create_engine
323
+ except ImportError as e:
324
+ ctx.error(
325
+ "SQLAlchemy import failed",
326
+ server=self.server,
327
+ database=self.database,
328
+ error=str(e),
329
+ )
330
+ raise ConnectionError(
331
+ connection_name=f"AzureSQL({self.server})",
332
+ reason="Required packages 'sqlalchemy' or 'pyodbc' not found.",
333
+ suggestions=[
334
+ "Install required packages: pip install sqlalchemy pyodbc",
335
+ "Or install odibi with azure extras: pip install 'odibi[azure]'",
336
+ ],
337
+ )
338
+
339
+ try:
340
+ # Build connection string
341
+ conn_str = self.odbc_dsn()
342
+ connection_url = f"mssql+pyodbc:///?odbc_connect={quote_plus(conn_str)}"
343
+
344
+ ctx.debug(
345
+ "Creating SQLAlchemy engine with connection pooling",
346
+ server=self.server,
347
+ database=self.database,
348
+ )
349
+
350
+ # Create engine with connection pooling
351
+ self._engine = create_engine(
352
+ connection_url,
353
+ pool_pre_ping=True, # Verify connections before use
354
+ pool_recycle=3600, # Recycle connections after 1 hour
355
+ echo=False,
356
+ )
357
+
358
+ # Test connection
359
+ with self._engine.connect():
360
+ pass
361
+
362
+ ctx.info(
363
+ "SQLAlchemy engine created successfully",
364
+ server=self.server,
365
+ database=self.database,
366
+ )
367
+
368
+ return self._engine
369
+
370
+ except Exception as e:
371
+ suggestions = self._get_error_suggestions(str(e))
372
+ ctx.error(
373
+ "Failed to create SQLAlchemy engine",
374
+ server=self.server,
375
+ database=self.database,
376
+ error=str(e),
377
+ suggestions=suggestions,
378
+ )
379
+ raise ConnectionError(
380
+ connection_name=f"AzureSQL({self.server})",
381
+ reason=f"Failed to create engine: {str(e)}",
382
+ suggestions=suggestions,
383
+ )
384
+
385
+ def read_sql(self, query: str, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
386
+ """
387
+ Execute SQL query and return results as DataFrame.
388
+
389
+ Args:
390
+ query: SQL query string
391
+ params: Optional query parameters for parameterized queries
392
+
393
+ Returns:
394
+ Query results as pandas DataFrame
395
+
396
+ Raises:
397
+ ConnectionError: If execution fails
398
+ """
399
+ ctx = get_logging_context()
400
+ ctx.debug(
401
+ "Executing SQL query",
402
+ server=self.server,
403
+ database=self.database,
404
+ query_length=len(query),
405
+ )
406
+
407
+ try:
408
+ engine = self.get_engine()
409
+ result = pd.read_sql(query, engine, params=params)
410
+
411
+ ctx.info(
412
+ "SQL query executed successfully",
413
+ server=self.server,
414
+ database=self.database,
415
+ rows_returned=len(result),
416
+ )
417
+ return result
418
+ except Exception as e:
419
+ if isinstance(e, ConnectionError):
420
+ raise
421
+ ctx.error(
422
+ "SQL query execution failed",
423
+ server=self.server,
424
+ database=self.database,
425
+ error=str(e),
426
+ )
427
+ raise ConnectionError(
428
+ connection_name=f"AzureSQL({self.server})",
429
+ reason=f"Query execution failed: {str(e)}",
430
+ suggestions=self._get_error_suggestions(str(e)),
431
+ )
432
+
433
+ def read_table(self, table_name: str, schema: Optional[str] = "dbo") -> pd.DataFrame:
434
+ """
435
+ Read entire table into DataFrame.
436
+
437
+ Args:
438
+ table_name: Name of the table
439
+ schema: Schema name (default: dbo)
440
+
441
+ Returns:
442
+ Table contents as pandas DataFrame
443
+ """
444
+ ctx = get_logging_context()
445
+ ctx.info(
446
+ "Reading table",
447
+ server=self.server,
448
+ database=self.database,
449
+ table_name=table_name,
450
+ schema=schema,
451
+ )
452
+
453
+ if schema:
454
+ query = f"SELECT * FROM [{schema}].[{table_name}]"
455
+ else:
456
+ query = f"SELECT * FROM [{table_name}]"
457
+
458
+ return self.read_sql(query)
459
+
460
+ def write_table(
461
+ self,
462
+ df: pd.DataFrame,
463
+ table_name: str,
464
+ schema: Optional[str] = "dbo",
465
+ if_exists: str = "replace",
466
+ index: bool = False,
467
+ chunksize: Optional[int] = 1000,
468
+ ) -> int:
469
+ """
470
+ Write DataFrame to SQL table.
471
+
472
+ Args:
473
+ df: DataFrame to write
474
+ table_name: Name of the table
475
+ schema: Schema name (default: dbo)
476
+ if_exists: How to behave if table exists ('fail', 'replace', 'append')
477
+ index: Whether to write DataFrame index as column
478
+ chunksize: Number of rows to write in each batch (default: 1000)
479
+
480
+ Returns:
481
+ Number of rows written
482
+
483
+ Raises:
484
+ ConnectionError: If write fails
485
+ """
486
+ ctx = get_logging_context()
487
+ ctx.info(
488
+ "Writing DataFrame to table",
489
+ server=self.server,
490
+ database=self.database,
491
+ table_name=table_name,
492
+ schema=schema,
493
+ rows=len(df),
494
+ if_exists=if_exists,
495
+ chunksize=chunksize,
496
+ )
497
+
498
+ try:
499
+ engine = self.get_engine()
500
+
501
+ rows_written = df.to_sql(
502
+ name=table_name,
503
+ con=engine,
504
+ schema=schema,
505
+ if_exists=if_exists,
506
+ index=index,
507
+ chunksize=chunksize,
508
+ method="multi", # Use multi-row INSERT for better performance
509
+ )
510
+
511
+ result_rows = rows_written if rows_written is not None else len(df)
512
+ ctx.info(
513
+ "Table write completed successfully",
514
+ server=self.server,
515
+ database=self.database,
516
+ table_name=table_name,
517
+ rows_written=result_rows,
518
+ )
519
+ return result_rows
520
+ except Exception as e:
521
+ if isinstance(e, ConnectionError):
522
+ raise
523
+ ctx.error(
524
+ "Table write failed",
525
+ server=self.server,
526
+ database=self.database,
527
+ table_name=table_name,
528
+ error=str(e),
529
+ )
530
+ raise ConnectionError(
531
+ connection_name=f"AzureSQL({self.server})",
532
+ reason=f"Write operation failed: {str(e)}",
533
+ suggestions=self._get_error_suggestions(str(e)),
534
+ )
535
+
536
+ def execute_sql(self, sql: str, params: Optional[Dict[str, Any]] = None) -> Any:
537
+ """
538
+ Execute SQL statement (INSERT, UPDATE, DELETE, etc.).
539
+
540
+ Alias for execute() - used by SqlServerMergeWriter.
541
+
542
+ Args:
543
+ sql: SQL statement
544
+ params: Optional parameters for parameterized query
545
+
546
+ Returns:
547
+ Result from execution
548
+
549
+ Raises:
550
+ ConnectionError: If execution fails
551
+ """
552
+ return self.execute(sql, params)
553
+
554
+ def execute(self, sql: str, params: Optional[Dict[str, Any]] = None) -> Any:
555
+ """
556
+ Execute SQL statement (INSERT, UPDATE, DELETE, etc.).
557
+
558
+ Args:
559
+ sql: SQL statement
560
+ params: Optional parameters for parameterized query
561
+
562
+ Returns:
563
+ Result from execution
564
+
565
+ Raises:
566
+ ConnectionError: If execution fails
567
+ """
568
+ ctx = get_logging_context()
569
+ ctx.debug(
570
+ "Executing SQL statement",
571
+ server=self.server,
572
+ database=self.database,
573
+ statement_length=len(sql),
574
+ )
575
+
576
+ try:
577
+ engine = self.get_engine()
578
+ from sqlalchemy import text
579
+
580
+ with engine.connect() as conn:
581
+ result = conn.execute(text(sql), params or {})
582
+ # Fetch all results before commit to avoid cursor invalidation
583
+ if result.returns_rows:
584
+ rows = result.fetchall()
585
+ else:
586
+ rows = None
587
+ conn.commit()
588
+
589
+ ctx.info(
590
+ "SQL statement executed successfully",
591
+ server=self.server,
592
+ database=self.database,
593
+ )
594
+ return rows
595
+ except Exception as e:
596
+ if isinstance(e, ConnectionError):
597
+ raise
598
+ ctx.error(
599
+ "SQL statement execution failed",
600
+ server=self.server,
601
+ database=self.database,
602
+ error=str(e),
603
+ )
604
+ raise ConnectionError(
605
+ connection_name=f"AzureSQL({self.server})",
606
+ reason=f"Statement execution failed: {str(e)}",
607
+ suggestions=self._get_error_suggestions(str(e)),
608
+ )
609
+
610
+ def close(self):
611
+ """Close database connection and dispose of engine."""
612
+ ctx = get_logging_context()
613
+ ctx.debug(
614
+ "Closing AzureSQL connection",
615
+ server=self.server,
616
+ database=self.database,
617
+ )
618
+
619
+ if self._engine:
620
+ self._engine.dispose()
621
+ self._engine = None
622
+ ctx.info(
623
+ "AzureSQL connection closed",
624
+ server=self.server,
625
+ database=self.database,
626
+ )
627
+
628
+ def _get_error_suggestions(self, error_msg: str) -> List[str]:
629
+ """Generate suggestions based on error message."""
630
+ suggestions = []
631
+ error_lower = error_msg.lower()
632
+
633
+ if "login failed" in error_lower:
634
+ suggestions.append("Check username and password")
635
+ suggestions.append(f"Verify auth_mode is correct (current: {self.auth_mode})")
636
+ if "identity" in error_lower:
637
+ suggestions.append("Ensure Managed Identity has access to the database")
638
+
639
+ if "firewall" in error_lower or "tcp provider" in error_lower:
640
+ suggestions.append("Check Azure SQL Server firewall rules")
641
+ suggestions.append("Ensure client IP is allowed")
642
+
643
+ if "driver" in error_lower:
644
+ suggestions.append(f"Verify ODBC driver '{self.driver}' is installed")
645
+ suggestions.append("On Linux: sudo apt-get install msodbcsql18")
646
+
647
+ return suggestions
648
+
649
+ def get_spark_options(self) -> Dict[str, str]:
650
+ """Get Spark JDBC options.
651
+
652
+ Returns:
653
+ Dictionary of Spark JDBC options (url, user, password, etc.)
654
+ """
655
+ ctx = get_logging_context()
656
+ ctx.info(
657
+ "Building Spark JDBC options",
658
+ server=self.server,
659
+ database=self.database,
660
+ auth_mode=self.auth_mode,
661
+ )
662
+
663
+ jdbc_url = (
664
+ f"jdbc:sqlserver://{self.server}:{self.port};"
665
+ f"databaseName={self.database};encrypt=true;trustServerCertificate=true;"
666
+ )
667
+
668
+ if self.auth_mode == "aad_msi":
669
+ jdbc_url += (
670
+ "hostNameInCertificate=*.database.windows.net;"
671
+ "loginTimeout=30;authentication=ActiveDirectoryMsi;"
672
+ )
673
+ ctx.debug(
674
+ "Configured JDBC URL for AAD MSI",
675
+ server=self.server,
676
+ )
677
+ elif self.auth_mode == "aad_service_principal":
678
+ # Not fully implemented in init yet, but placeholder
679
+ ctx.debug(
680
+ "Configured JDBC URL for AAD Service Principal",
681
+ server=self.server,
682
+ )
683
+
684
+ options = {
685
+ "url": jdbc_url,
686
+ "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
687
+ }
688
+
689
+ if self.auth_mode == "sql" or self.auth_mode == "key_vault":
690
+ if self.username:
691
+ options["user"] = self.username
692
+
693
+ pwd = self.get_password()
694
+ if pwd:
695
+ options["password"] = pwd
696
+
697
+ ctx.debug(
698
+ "Added SQL authentication to Spark options",
699
+ server=self.server,
700
+ username=self.username,
701
+ )
702
+
703
+ ctx.info(
704
+ "Spark JDBC options built successfully",
705
+ server=self.server,
706
+ database=self.database,
707
+ )
708
+
709
+ return options