dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,837 @@
1
+ """
2
+ Federated Query Executor
3
+
4
+ Orchestrates multi-source query execution using Spark compute engine.
5
+ This is the core component that enables DVT's data virtualization capabilities.
6
+
7
+ v0.3.0: Unified Spark architecture - all federation uses Spark JDBC.
8
+
9
+ Execution flow:
10
+ 1. Identify all source tables/models from compiled SQL
11
+ 2. Load sources into Spark via JDBC (parallel reads)
12
+ 3. Execute model SQL in Spark
13
+ 4. Return results as PyArrow Table
14
+ 5. Materialize to target via JDBC or adapter
15
+
16
+ Key principle: Adapters for I/O only, Spark for all compute.
17
+ """
18
+
19
+ import re
20
+ import sys
21
+ import time
22
+ from typing import Any, Dict, List, Optional, Set, Tuple
23
+ from dataclasses import dataclass
24
+
25
+ from dbt.adapters.base import BaseAdapter
26
+ from dbt.compute.engines.spark_engine import SparkEngine, _clean_spark_error
27
+ from dbt.contracts.graph.manifest import Manifest
28
+ from dbt.contracts.graph.nodes import ManifestNode
29
+ from dbt.query_analyzer import QueryAnalysisResult
30
+ from dbt_common.exceptions import DbtRuntimeError
31
+
32
+
33
+ def _log(msg: str) -> None:
34
+ """
35
+ Log a message that appears immediately in console output.
36
+ DVT v0.4.7: Suppressed for clean output (logs go to spark_run_history).
37
+ """
38
+ # Suppressed for clean output - all debug info goes to spark_run_history file
39
+ pass
40
+
41
+
42
+ def _get_dependent_views_pg(cursor, schema: str, table: str) -> List[Dict[str, str]]:
43
+ """
44
+ Query PostgreSQL for views that depend on a table.
45
+ DVT v0.5.5: Used to save views before DROP CASCADE, then restore after.
46
+
47
+ Returns list of dicts with: schema, name, definition
48
+ """
49
+ try:
50
+ # Query views that depend on this table using pg_depend
51
+ sql = """
52
+ SELECT DISTINCT
53
+ n.nspname as view_schema,
54
+ c.relname as view_name,
55
+ pg_get_viewdef(c.oid, true) as view_definition
56
+ FROM pg_depend d
57
+ JOIN pg_rewrite r ON r.oid = d.objid
58
+ JOIN pg_class c ON c.oid = r.ev_class
59
+ JOIN pg_namespace n ON n.oid = c.relnamespace
60
+ JOIN pg_class t ON t.oid = d.refobjid
61
+ JOIN pg_namespace tn ON tn.oid = t.relnamespace
62
+ WHERE t.relname = %s
63
+ AND tn.nspname = %s
64
+ AND c.relkind = 'v'
65
+ AND d.classid = 'pg_rewrite'::regclass
66
+ AND d.deptype = 'n'
67
+ """
68
+ cursor.execute(sql, (table, schema))
69
+ rows = cursor.fetchall()
70
+ return [
71
+ {'schema': row[0], 'name': row[1], 'definition': row[2]}
72
+ for row in rows
73
+ ]
74
+ except Exception:
75
+ # If query fails (different DB, permissions), return empty
76
+ return []
77
+
78
+
79
+ def _recreate_views_pg(cursor, views: List[Dict[str, str]]) -> None:
80
+ """
81
+ Recreate views from their saved definitions.
82
+ DVT v0.5.5: Restores views after DROP CASCADE.
83
+ """
84
+ for view in views:
85
+ try:
86
+ create_sql = f'CREATE OR REPLACE VIEW "{view["schema"]}"."{view["name"]}" AS {view["definition"]}'
87
+ _log(f"[DVT] Recreating view: {view['schema']}.{view['name']}")
88
+ cursor.execute(create_sql)
89
+ except Exception as e:
90
+ _log(f"[DVT] Warning: Could not recreate view {view['name']}: {e}")
91
+
92
+
93
+ @dataclass
94
+ class SourceTableMetadata:
95
+ """Metadata about a source table needed for federated execution."""
96
+
97
+ source_id: str # Unique ID from manifest
98
+ connection_name: str # Which connection to read from
99
+ database: str # Database name
100
+ schema: str # Schema name
101
+ identifier: str # Table name
102
+ qualified_name: str # Fully qualified name for SQL
103
+
104
+
105
+ @dataclass
106
+ class FederatedExecutionResult:
107
+ """Result of federated query execution."""
108
+
109
+ spark_dataframe: Any # Spark DataFrame with query results
110
+ source_tables: List[SourceTableMetadata] # Sources used
111
+ compute_engine: str # Engine used (spark)
112
+ execution_time_ms: float # Execution time in milliseconds
113
+ rows_read: int # Total rows read from sources
114
+ rows_returned: int # Rows in result (may be None if not counted)
115
+ engine: Any # SparkEngine instance (for session lifecycle management)
116
+
117
+
118
+ class FederatedExecutor:
119
+ """
120
+ Orchestrates federated query execution across multiple data sources.
121
+
122
+ This executor:
123
+ 1. Extracts data from multiple sources via adapters
124
+ 2. Loads data into a compute engine
125
+ 3. Executes the query
126
+ 4. Returns results as Spark DataFrame
127
+ """
128
+
129
+ def __init__(
130
+ self,
131
+ manifest: Manifest,
132
+ adapters: Dict[str, BaseAdapter],
133
+ default_compute_engine: str = "spark-local",
134
+ ):
135
+ """
136
+ Initialize federated executor.
137
+
138
+ v0.3.0: All federation uses Spark (local or cluster).
139
+
140
+ :param manifest: The dbt manifest with all nodes and sources
141
+ :param adapters: Dict of connection_name → adapter instances
142
+ :param default_compute_engine: Default compute engine ("spark-local" or "spark-cluster")
143
+ """
144
+ self.manifest = manifest
145
+ self.adapters = adapters
146
+ self.default_compute_engine = default_compute_engine
147
+
148
+ def execute(
149
+ self,
150
+ node: ManifestNode,
151
+ analysis_result: QueryAnalysisResult,
152
+ compute_engine_override: Optional[str] = None,
153
+ spark_config: Optional[Dict[str, str]] = None,
154
+ target_adapter_type: Optional[str] = None,
155
+ coerce_view_to_table: bool = False,
156
+ ) -> FederatedExecutionResult:
157
+ """
158
+ Execute a node using federated query processing.
159
+
160
+ :param node: The compiled node to execute
161
+ :param analysis_result: Query analysis result
162
+ :param compute_engine_override: Override compute engine choice
163
+ :param spark_config: Spark configuration (if using Spark)
164
+ :param target_adapter_type: Target adapter type for JDBC materialization
165
+ :param coerce_view_to_table: DVT v0.51.6 - If True, treat view as table (Rule 3.C.3)
166
+ :returns: FederatedExecutionResult with query results
167
+ :raises DbtRuntimeError: If execution fails
168
+ """
169
+ import time
170
+
171
+ _log(f"[DVT] Starting federated execution for node: {node.unique_id}")
172
+ start_time = time.time()
173
+
174
+ # Determine compute engine
175
+ compute_engine = (
176
+ compute_engine_override
177
+ or analysis_result.user_override
178
+ or self.default_compute_engine
179
+ )
180
+ _log(f"[DVT] Compute engine selected: {compute_engine}")
181
+
182
+ # DVT v0.5.0: Restrict Spark compute to table and incremental materializations only
183
+ # DVT v0.51.6: Allow view if coerce_view_to_table is True (Rule 3.C.3)
184
+ if hasattr(node, 'config') and hasattr(node.config, 'materialized'):
185
+ materialized = node.config.materialized
186
+
187
+ # DVT v0.51.6: Views are coerced to tables in cross-target scenarios
188
+ effective_materialized = 'table' if (materialized == 'view' and coerce_view_to_table) else materialized
189
+
190
+ # Only allow table and incremental
191
+ if effective_materialized not in ('table', 'incremental'):
192
+ raise DbtRuntimeError(
193
+ f"Spark compute engine only supports 'table' and 'incremental' materializations. "
194
+ f"Node '{node.unique_id}' uses '{materialized}'. "
195
+ f"Please change the materialization to 'table' or 'incremental', or use adapter-native execution."
196
+ )
197
+
198
+ # For incremental, validate strategy is 'append' (only supported strategy)
199
+ if materialized == 'incremental':
200
+ incremental_strategy = getattr(node.config, 'incremental_strategy', 'append')
201
+ if incremental_strategy != 'append':
202
+ raise DbtRuntimeError(
203
+ f"Spark compute engine only supports 'append' incremental strategy. "
204
+ f"Node '{node.unique_id}' uses '{incremental_strategy}'. "
205
+ f"Supported strategies: append. "
206
+ f"For merge/delete+insert/insert_overwrite, use adapter-native execution."
207
+ )
208
+
209
+ if coerce_view_to_table and materialized == 'view':
210
+ _log(f"[DVT] Materialization: view → table (coerced for cross-target)")
211
+ else:
212
+ _log(f"[DVT] Materialization validated: {materialized}")
213
+
214
+ # Extract source table metadata
215
+ source_tables = self._extract_source_tables(analysis_result)
216
+ _log(f"[DVT] Found {len(source_tables)} source table(s)")
217
+
218
+ # v0.5.99: Look up named clusters from registry
219
+ from dbt.config.compute import ComputeRegistry
220
+ from dbt.compute.jdbc_utils import set_docker_mode
221
+ registry = ComputeRegistry()
222
+ cluster_config = None
223
+
224
+ # Check if it's a registered named cluster
225
+ if compute_engine not in ("spark-local", "spark", "spark-cluster"):
226
+ cluster = registry.get(compute_engine)
227
+ if cluster:
228
+ cluster_config = cluster.config
229
+ _log(f"[DVT] Found registered cluster '{compute_engine}' with platform: {cluster.detect_platform().value}")
230
+
231
+ # DVT v0.51.8: Enable Docker mode for standalone clusters with localhost master
232
+ # This rewrites localhost -> host.docker.internal in JDBC URLs
233
+ master = cluster_config.get("master", "")
234
+ if master.startswith("spark://") and ("localhost" in master or "127.0.0.1" in master):
235
+ set_docker_mode(True)
236
+ _log("[DVT] Docker mode enabled for JDBC URLs")
237
+ else:
238
+ set_docker_mode(False)
239
+ else:
240
+ # Not in registry - check if it starts with "spark" for backwards compat
241
+ if not compute_engine.startswith("spark"):
242
+ raise DbtRuntimeError(
243
+ f"Invalid compute engine '{compute_engine}'. "
244
+ f"Not found in compute registry. "
245
+ f"Available: {[c.name for c in registry.list()]}"
246
+ )
247
+ else:
248
+ set_docker_mode(False)
249
+
250
+ # Create Spark engine (local or cluster based on config)
251
+ _log(f"[DVT] Creating Spark engine (mode: {compute_engine})")
252
+ if compute_engine == "spark-local" or compute_engine == "spark":
253
+ engine = SparkEngine(mode="embedded", spark_config=spark_config or {})
254
+ elif compute_engine == "spark-cluster" or compute_engine.startswith("spark:"):
255
+ # External cluster
256
+ engine = SparkEngine(mode="external", spark_config=spark_config or {})
257
+ elif cluster_config:
258
+ # Named cluster from registry - pass full config
259
+ engine = SparkEngine(mode="external", spark_config=cluster_config)
260
+ else:
261
+ # Fallback
262
+ engine = SparkEngine(mode="external", spark_config=spark_config or {})
263
+
264
+ _log("[DVT] Spark engine created, initializing Spark session...")
265
+ try:
266
+ # v0.5.99: Collect adapter types from sources + target for JDBC driver provisioning
267
+ all_adapter_types = set()
268
+ for source_table in source_tables:
269
+ adapter = self.adapters.get(source_table.connection_name)
270
+ if adapter:
271
+ all_adapter_types.add(adapter.type())
272
+ # Include target adapter type for materialization
273
+ if target_adapter_type:
274
+ all_adapter_types.add(target_adapter_type)
275
+ _log(f"[DVT] Adapter types (sources + target): {all_adapter_types}")
276
+
277
+ # Initialize Spark session with all adapter types (for JDBC drivers)
278
+ engine.connect(adapter_types=all_adapter_types)
279
+ _log("[DVT] Spark session initialized successfully")
280
+
281
+ # Get compiled SQL first (needed for optimization checks)
282
+ compiled_sql = (
283
+ node.compiled_code
284
+ if hasattr(node, "compiled_code")
285
+ else node.raw_code
286
+ )
287
+
288
+ # Step 1: Load source data into Spark via JDBC (v0.3.0: Spark-only)
289
+ total_rows_read = self._load_sources_spark_jdbc(
290
+ engine, source_tables, analysis_result, compiled_sql
291
+ )
292
+
293
+ # Step 2: Rewrite SQL to use table aliases
294
+ rewritten_sql = self._rewrite_sql_for_compute(
295
+ compiled_sql, source_tables
296
+ )
297
+
298
+ # Step 3: Execute query in Spark
299
+ result_df = engine.spark.sql(rewritten_sql)
300
+
301
+ # Calculate execution time
302
+ execution_time_ms = (time.time() - start_time) * 1000
303
+
304
+ # Return Spark DataFrame AND engine (caller must close engine after materialization)
305
+ return FederatedExecutionResult(
306
+ spark_dataframe=result_df,
307
+ source_tables=source_tables,
308
+ compute_engine=compute_engine,
309
+ execution_time_ms=execution_time_ms,
310
+ rows_read=total_rows_read,
311
+ rows_returned=None, # Will be counted during JDBC write
312
+ engine=engine, # Return engine for lifecycle management
313
+ )
314
+
315
+ except Exception as e:
316
+ # Clean up engine on error
317
+ try:
318
+ engine.close()
319
+ except:
320
+ pass
321
+ # DVT v0.5.2: Clean error message (no Java stack trace)
322
+ clean_error = _clean_spark_error(e)
323
+ # DVT v0.5.99: Include original exception for debugging if cleaned message is too short
324
+ if len(clean_error) < 20:
325
+ clean_error = f"{clean_error} (original: {str(e)[:200]})"
326
+ raise DbtRuntimeError(
327
+ f"Federated execution failed for node {node.unique_id}: {clean_error}"
328
+ )
329
+
330
+ def _extract_source_tables(
331
+ self, analysis_result: QueryAnalysisResult
332
+ ) -> List[SourceTableMetadata]:
333
+ """
334
+ Extract metadata for all source tables referenced in the query.
335
+
336
+ :param analysis_result: Query analysis result
337
+ :returns: List of SourceTableMetadata
338
+ """
339
+ source_tables = []
340
+
341
+ for source_id in analysis_result.source_refs:
342
+ source = self.manifest.sources.get(source_id)
343
+ if not source:
344
+ raise DbtRuntimeError(
345
+ f"Source {source_id} not found in manifest. "
346
+ f"Available sources: {list(self.manifest.sources.keys())[:3]}"
347
+ )
348
+
349
+ # Get connection name from source definition
350
+ connection_name = getattr(source, "connection", None)
351
+
352
+ if not connection_name:
353
+ raise DbtRuntimeError(
354
+ f"Source {source_id} does not have a connection specified. "
355
+ "DVT requires all sources to specify a connection in the source YAML:\n"
356
+ " - name: my_source\n"
357
+ " connection: my_connection"
358
+ )
359
+
360
+ # Build qualified name for SQL
361
+ qualified_name = f"{source.database}.{source.schema}.{source.identifier}"
362
+
363
+ metadata = SourceTableMetadata(
364
+ source_id=source_id,
365
+ connection_name=connection_name,
366
+ database=source.database,
367
+ schema=source.schema,
368
+ identifier=source.identifier,
369
+ qualified_name=qualified_name,
370
+ )
371
+
372
+ source_tables.append(metadata)
373
+
374
+ return source_tables
375
+
376
+ # NOTE: _load_sources_via_adapters method removed in v0.3.0
377
+ # All data loading now uses Spark JDBC via _load_sources_spark_jdbc
378
+
379
+ def _load_sources_spark_jdbc(
380
+ self,
381
+ engine: SparkEngine,
382
+ source_tables: List[SourceTableMetadata],
383
+ analysis_result: QueryAnalysisResult,
384
+ compiled_sql: str,
385
+ ) -> int:
386
+ """
387
+ Load all source tables into Spark via JDBC connectors (Phase 1: v0.2.0).
388
+
389
+ This bypasses the DVT node's memory by reading data directly from source
390
+ databases into Spark workers (distributed memory). Data flow:
391
+ Source DB → Spark Workers → Target DB (no DVT node bottleneck)
392
+
393
+ This method:
394
+ 1. Gets adapter credentials for each source
395
+ 2. Converts credentials to JDBC config
396
+ 3. Auto-detects partition column for parallel reads
397
+ 4. Reads data via Spark JDBC with partitioning
398
+ 5. Registers as temp view in Spark
399
+
400
+ :param engine: Spark engine instance
401
+ :param source_tables: List of source table metadata
402
+ :param analysis_result: Query analysis result
403
+ :returns: Total number of rows loaded (estimated, as Spark is lazy)
404
+ :raises DbtRuntimeError: If JDBC not supported or read fails
405
+ """
406
+ from dbt.compute.jdbc_utils import build_jdbc_config
407
+ from dbt.compute.filter_pushdown import optimize_jdbc_table_read
408
+
409
+ total_rows = 0
410
+
411
+ for source_meta in source_tables:
412
+ # Get adapter for this source's connection
413
+ adapter = self.adapters.get(source_meta.connection_name)
414
+ if not adapter:
415
+ raise DbtRuntimeError(
416
+ f"No adapter found for connection '{source_meta.connection_name}'"
417
+ )
418
+
419
+ # Check if JDBC is supported for this adapter type
420
+ if not engine.supports_jdbc(adapter.type()):
421
+ raise DbtRuntimeError(
422
+ f"JDBC not supported for adapter type '{adapter.type()}'. "
423
+ f"Falling back to adapter-based loading not yet implemented. "
424
+ f"Please use DuckDB engine for this source type."
425
+ )
426
+
427
+ # Log connection attempt
428
+ _log(f"[DVT] Connecting to {adapter.type()} source: {source_meta.qualified_name} (connection: {source_meta.connection_name})")
429
+ connection_start = time.time()
430
+
431
+ # Get adapter credentials
432
+ credentials = adapter.config.credentials
433
+
434
+ # Build JDBC configuration
435
+ try:
436
+ jdbc_url, jdbc_properties = build_jdbc_config(credentials)
437
+ except Exception as e:
438
+ _log(f"[DVT] ERROR: Failed to build JDBC config for '{source_meta.qualified_name}': {str(e)}")
439
+ raise DbtRuntimeError(
440
+ f"Failed to build JDBC config for source '{source_meta.qualified_name}': {str(e)}"
441
+ ) from e
442
+
443
+ # Prepare JDBC read parameters with filter pushdown optimization
444
+ # Instead of reading full table, push down filters (LIMIT, WHERE) to source DB
445
+ jdbc_table = optimize_jdbc_table_read(
446
+ source_table=source_meta,
447
+ compiled_sql=compiled_sql,
448
+ source_tables=source_tables,
449
+ adapter_type=adapter.type()
450
+ )
451
+ table_alias = self._get_table_alias(source_meta)
452
+ numPartitions = 16 # Default parallelism
453
+
454
+ # Automatic partition detection DISABLED
455
+ # Reasons:
456
+ # 1. Slow metadata queries (30-60s on cold Snowflake warehouses)
457
+ # 2. Unnecessary overhead for small datasets
458
+ # 3. Filter pushdown now handles optimization automatically
459
+ partition_column = None
460
+ lower_bound = None
461
+ upper_bound = None
462
+
463
+ # Read via Spark JDBC and register as temp view
464
+ _log(f"[DVT] Reading from JDBC: {jdbc_table}")
465
+ try:
466
+ engine.register_jdbc_table(
467
+ url=jdbc_url,
468
+ table=jdbc_table,
469
+ properties=jdbc_properties,
470
+ table_alias=table_alias,
471
+ numPartitions=numPartitions,
472
+ partitionColumn=partition_column,
473
+ lowerBound=lower_bound,
474
+ upperBound=upper_bound,
475
+ )
476
+ connection_time = time.time() - connection_start
477
+ _log(f"[DVT] ✓ Connected to {source_meta.qualified_name} in {connection_time:.1f}s")
478
+ if connection_time > 30:
479
+ _log(f"[DVT] WARNING: Connection took {connection_time:.1f}s (warehouse may have been suspended)")
480
+ except Exception as e:
481
+ connection_time = time.time() - connection_start
482
+ # DVT v0.5.2: Clean error message (no Java stack trace)
483
+ clean_error = _clean_spark_error(e)
484
+ _log(f"[DVT] ERROR: Failed to load '{source_meta.qualified_name}' after {connection_time:.1f}s: {clean_error}")
485
+ raise DbtRuntimeError(
486
+ f"Failed to load source '{source_meta.qualified_name}' via JDBC: {clean_error}"
487
+ )
488
+
489
+ # Note: Can't easily count rows without triggering Spark action
490
+ # For now, return 0 (rows_read will be inaccurate for JDBC path)
491
+ # TODO: Consider running COUNT(*) query if row count is needed
492
+ total_rows += 0
493
+
494
+ return total_rows
495
+
496
+ def _get_table_alias(self, source_meta: SourceTableMetadata) -> str:
497
+ """
498
+ Generate a safe table alias for the compute engine.
499
+
500
+ Compute engines may not support dots or special characters in table names,
501
+ so we create a normalized alias.
502
+
503
+ :param source_meta: Source table metadata
504
+ :returns: Safe table alias
505
+ """
506
+ # Extract source name and table name from source_id
507
+ # source_id format: source.{project}.{source_name}.{table_name}
508
+ parts = source_meta.source_id.split(".")
509
+ if len(parts) >= 4:
510
+ source_name = parts[2]
511
+ table_name = parts[3]
512
+ return f"{source_name}_{table_name}"
513
+ else:
514
+ # Fallback: use identifier
515
+ return source_meta.identifier
516
+
517
+ def _rewrite_sql_for_compute(
518
+ self, sql: str, source_tables: List[SourceTableMetadata]
519
+ ) -> str:
520
+ """
521
+ Rewrite SQL to replace fully-qualified source table names with compute engine aliases.
522
+
523
+ Source tables are loaded into the compute engine with simple aliases (e.g., 'Exim_cbs_f_country'),
524
+ but the compiled SQL contains fully-qualified names (e.g., '"EXIM_EDWH_DEV"."ods"."cbs_f_country"').
525
+ This method replaces the qualified names with the aliases and removes source-specific clauses
526
+ like SAMPLE that have been pushed down to the source.
527
+
528
+ :param sql: Compiled SQL with fully-qualified table names
529
+ :param source_tables: List of source table metadata
530
+ :returns: Rewritten SQL with aliases and source-specific clauses removed
531
+ """
532
+ import re
533
+
534
+ rewritten_sql = sql
535
+
536
+ for source_meta in source_tables:
537
+ # Get the alias used in the compute engine
538
+ alias = self._get_table_alias(source_meta)
539
+
540
+ # Replace the fully-qualified table name with the alias
541
+ # Format: "database"."schema"."table" or database.schema.table
542
+ qualified_name = source_meta.qualified_name
543
+ parts = qualified_name.split(".")
544
+
545
+ # DVT v0.51.7: Use case-insensitive regex replacement for all variants
546
+ # because Snowflake returns uppercase but Spark/Databricks lowercases
547
+
548
+ # 1. Unquoted: EXIM_EDWH_DEV.ods.cbs_f_country (any case)
549
+ unquoted_pattern = re.compile(
550
+ r'\b' + r'\.'.join(re.escape(p) for p in parts) + r'\b',
551
+ re.IGNORECASE
552
+ )
553
+ rewritten_sql = unquoted_pattern.sub(alias, rewritten_sql)
554
+
555
+ # 2. Double-quoted (PostgreSQL style): "EXIM_EDWH_DEV"."ods"."cbs_f_country" (any case)
556
+ quoted_pattern = re.compile(
557
+ r'"' + r'"\."\s*'.join(re.escape(p) for p in parts) + r'"',
558
+ re.IGNORECASE
559
+ )
560
+ rewritten_sql = quoted_pattern.sub(alias, rewritten_sql)
561
+
562
+ # 3. Single string quoted: "EXIM_EDWH_DEV.ods.cbs_f_country" (any case)
563
+ single_quoted_pattern = re.compile(
564
+ r'"' + r'\.'.join(re.escape(p) for p in parts) + r'"',
565
+ re.IGNORECASE
566
+ )
567
+ rewritten_sql = single_quoted_pattern.sub(alias, rewritten_sql)
568
+
569
+ # 4. Backtick-quoted (Spark/Databricks style): `EXIM_EDWH_DEV`.`ods`.`cbs_f_country` (any case)
570
+ backtick_pattern = re.compile(
571
+ r'`' + r'`\.`\s*'.join(re.escape(p) for p in parts) + r'`',
572
+ re.IGNORECASE
573
+ )
574
+ rewritten_sql = backtick_pattern.sub(alias, rewritten_sql)
575
+
576
+ # DVT v0.4.5: Remove Snowflake-specific SAMPLE clauses
577
+ # These have been pushed down to the source via JDBC subqueries
578
+ # Spark SQL doesn't support SAMPLE syntax, so remove it from the query
579
+ # Pattern matches: SAMPLE (N), SAMPLE (N ROWS), SAMPLE SYSTEM|BERNOULLI|BLOCK (P)
580
+ # with optional REPEATABLE(seed) or SEED(seed)
581
+ rewritten_sql = re.sub(
582
+ r'\s*(?:TABLE)?SAMPLE\s+(?:SYSTEM|BERNOULLI|BLOCK)\s*\(\s*\d+(?:\.\d+)?\s*\)'
583
+ r'(?:\s+(?:REPEATABLE|SEED)\s*\(\s*\d+\s*\))?',
584
+ '',
585
+ rewritten_sql,
586
+ flags=re.IGNORECASE
587
+ )
588
+ rewritten_sql = re.sub(
589
+ r'\s*(?:TABLE)?SAMPLE\s*\(\s*\d+(?:\s+ROWS)?\s*\)'
590
+ r'(?:\s+(?:REPEATABLE|SEED)\s*\(\s*\d+\s*\))?',
591
+ '',
592
+ rewritten_sql,
593
+ flags=re.IGNORECASE
594
+ )
595
+
596
+ return rewritten_sql
597
+
598
+ def materialize_result(
599
+ self,
600
+ result: FederatedExecutionResult,
601
+ target_adapter: BaseAdapter,
602
+ target_table: str,
603
+ mode: str = "create",
604
+ use_jdbc: bool = True,
605
+ spark_result_df: Optional[Any] = None,
606
+ ) -> Any:
607
+ """
608
+ Materialize federated query results to target database.
609
+
610
+ v0.3.0: Uses Spark JDBC for all materialization (default).
611
+
612
+ :param result: Federated execution result
613
+ :param target_adapter: Adapter to use for getting target credentials
614
+ :param target_table: Target table name (qualified)
615
+ :param mode: Write mode ('create', 'append', 'replace')
616
+ :param use_jdbc: If True, use JDBC write path (default in v0.3.0)
617
+ :param spark_result_df: Spark DataFrame with results (required for JDBC path)
618
+ :returns: AdapterResponse from write operation
619
+ """
620
+ if use_jdbc and spark_result_df is not None:
621
+ # Use JDBC write path (default in v0.3.0)
622
+ return self._materialize_spark_jdbc(
623
+ result_df=spark_result_df,
624
+ target_adapter=target_adapter,
625
+ target_table=target_table,
626
+ mode=mode,
627
+ )
628
+ else:
629
+ # Fallback: use target adapter directly (for adapters without JDBC support)
630
+ raise DbtRuntimeError(
631
+ "Non-JDBC materialization path removed in v0.3.0. "
632
+ "All materialization requires Spark JDBC. "
633
+ "Ensure spark_result_df is provided."
634
+ )
635
+
636
+ def _materialize_spark_jdbc(
637
+ self,
638
+ result_df: Any, # Spark DataFrame
639
+ target_adapter: BaseAdapter,
640
+ target_table: str,
641
+ mode: str = "create",
642
+ ) -> Any:
643
+ """
644
+ Materialize Spark query results to target database via JDBC (Phase 1: v0.2.0).
645
+
646
+ This bypasses the DVT node's memory by writing data directly from Spark
647
+ workers to the target database.
648
+
649
+ :param result_df: Spark DataFrame with query results
650
+ :param target_adapter: Adapter to use for getting target credentials
651
+ :param target_table: Target table name (qualified)
652
+ :param mode: Write mode ('create', 'append', 'replace')
653
+ :returns: AdapterResponse
654
+ :raises DbtRuntimeError: If JDBC write fails
655
+ """
656
+ from dbt.compute.jdbc_utils import build_jdbc_config
657
+ from dbt.adapters.contracts.connection import AdapterResponse
658
+
659
+ # Get target credentials
660
+ target_credentials = target_adapter.config.credentials
661
+
662
+ # Build JDBC configuration for target
663
+ try:
664
+ jdbc_url, jdbc_properties = build_jdbc_config(target_credentials)
665
+ except Exception as e:
666
+ raise DbtRuntimeError(
667
+ f"Failed to build JDBC config for target '{target_table}': {str(e)}"
668
+ ) from e
669
+
670
+ # Map DVT mode to Spark JDBC mode
671
+ spark_mode_mapping = {
672
+ "create": "overwrite", # Create/recreate table (dbt behavior)
673
+ "append": "append", # Add to existing table
674
+ "replace": "overwrite", # Drop and recreate
675
+ }
676
+ spark_mode = spark_mode_mapping.get(mode, "overwrite")
677
+
678
+ _log(f"[DVT] Writing to target via Spark JDBC: {target_table} (mode={spark_mode})")
679
+
680
+ # Get Spark session from DataFrame
681
+ spark = result_df.sparkSession
682
+
683
+ # Log DataFrame schema for debugging
684
+ _log(f"[DVT] DataFrame schema:")
685
+ for field in result_df.schema.fields:
686
+ _log(f" - {field.name}: {field.dataType}")
687
+
688
+ # Log row count
689
+ row_count = result_df.count()
690
+ _log(f"[DVT] DataFrame has {row_count} rows")
691
+
692
+ # Sanitize URL for logging (hide password)
693
+ safe_url = jdbc_url.split("?")[0] if "?" in jdbc_url else jdbc_url
694
+ _log(f"[DVT] JDBC URL: {safe_url}")
695
+ _log(f"[DVT] JDBC table: {target_table}")
696
+
697
+ # Write via JDBC
698
+ saved_views: List[Dict[str, str]] = []
699
+ target_adapter_type = target_adapter.type()
700
+ is_postgres = target_adapter_type in ("postgres", "postgresql")
701
+
702
+ try:
703
+ # DVT v0.5.5: Save dependent views before DROP CASCADE, restore after
704
+ # Spark's JDBC overwrite mode doesn't use CASCADE, causing failures
705
+ # when dependent objects (views, etc.) exist
706
+ # DVT v0.51.6: Only applies to PostgreSQL (other DBs handle this differently)
707
+ if spark_mode == "overwrite" and is_postgres:
708
+ try:
709
+ with target_adapter.connection_named("__dvt_drop__"):
710
+ conn = target_adapter.connections.get_thread_connection()
711
+ cursor = conn.handle.cursor()
712
+
713
+ # Parse schema.table from target_table
714
+ parts = target_table.replace('"', '').split('.')
715
+ if len(parts) >= 2:
716
+ tbl_schema = parts[-2]
717
+ tbl_name = parts[-1]
718
+ else:
719
+ tbl_schema = 'public'
720
+ tbl_name = parts[-1]
721
+
722
+ # DVT v0.5.5: Save dependent views before dropping
723
+ saved_views = _get_dependent_views_pg(cursor, tbl_schema, tbl_name)
724
+ if saved_views:
725
+ _log(f"[DVT] Saving {len(saved_views)} dependent view(s) before DROP")
726
+
727
+ # Use CASCADE to drop dependent objects
728
+ drop_sql = f"DROP TABLE IF EXISTS {target_table} CASCADE"
729
+ _log(f"[DVT] Pre-drop with CASCADE: {drop_sql}")
730
+ cursor.execute(drop_sql)
731
+ conn.handle.commit()
732
+ cursor.close()
733
+ except Exception as drop_err:
734
+ _log(f"[DVT] Pre-drop warning (continuing): {drop_err}")
735
+
736
+ result_df.write.format("jdbc").options(
737
+ url=jdbc_url, dbtable=target_table, batchsize="10000", **jdbc_properties
738
+ ).mode(spark_mode).save()
739
+
740
+ # DVT v0.5.5: Restore dependent views after successful write (PostgreSQL only)
741
+ if saved_views and is_postgres:
742
+ try:
743
+ with target_adapter.connection_named("__dvt_restore__"):
744
+ conn = target_adapter.connections.get_thread_connection()
745
+ cursor = conn.handle.cursor()
746
+ _recreate_views_pg(cursor, saved_views)
747
+ conn.handle.commit()
748
+ cursor.close()
749
+ _log(f"[DVT] Restored {len(saved_views)} dependent view(s)")
750
+ except Exception as restore_err:
751
+ _log(f"[DVT] Warning: Could not restore views: {restore_err}")
752
+
753
+ # Return mock AdapterResponse
754
+ # Note: Can't easily get rows_affected from Spark JDBC write
755
+ return AdapterResponse(
756
+ _message=f"SUCCESS - Table {target_table} materialized via JDBC",
757
+ rows_affected=row_count,
758
+ )
759
+
760
+ except Exception as e:
761
+ # DVT v0.5.2: Clean error message (no Java stack trace)
762
+ clean_error = _clean_spark_error(e)
763
+ raise DbtRuntimeError(
764
+ f"Failed to materialize results to '{target_table}': {clean_error}"
765
+ )
766
+
767
+ def explain_execution(
768
+ self, node: ManifestNode, analysis_result: QueryAnalysisResult
769
+ ) -> str:
770
+ """
771
+ Generate an execution plan explanation for a federated query.
772
+
773
+ Useful for debugging and optimization.
774
+
775
+ :param node: The node to explain
776
+ :param analysis_result: Query analysis result
777
+ :returns: Human-readable execution plan
778
+ """
779
+ source_tables = self._extract_source_tables(analysis_result)
780
+
781
+ plan_parts = [
782
+ "=== DVT Federated Execution Plan ===",
783
+ f"Node: {node.unique_id}",
784
+ f"Compute Engine: {self.default_compute_engine}",
785
+ "",
786
+ "Data Sources:",
787
+ ]
788
+
789
+ for i, source_meta in enumerate(source_tables, 1):
790
+ plan_parts.append(
791
+ f" {i}. {source_meta.qualified_name} "
792
+ f"(connection: {source_meta.connection_name})"
793
+ )
794
+
795
+ plan_parts.extend(
796
+ [
797
+ "",
798
+ "Execution Steps (v0.3.0 - Spark-Unified):",
799
+ " 1. Extract data from each source via Spark JDBC (parallel reads)",
800
+ f" 2. Load {len(source_tables)} table(s) into Spark ({self.default_compute_engine})",
801
+ " 3. Execute query in Spark",
802
+ " 4. Materialize to target via Spark JDBC",
803
+ "",
804
+ f"Strategy: {analysis_result.strategy.upper()}",
805
+ f"Reason: {analysis_result.reason}",
806
+ ]
807
+ )
808
+
809
+ return "\n".join(plan_parts)
810
+
811
+
812
+ class SourceRewriter:
813
+ """
814
+ Rewrites SQL queries to use compute engine table aliases.
815
+
816
+ When sources are loaded into compute engines, they may be registered with
817
+ different names (aliases). This class rewrites the SQL to use those aliases.
818
+ """
819
+
820
+ @staticmethod
821
+ def rewrite_sources(sql: str, source_mapping: Dict[str, str]) -> str:
822
+ """
823
+ Rewrite SQL to use compute engine table aliases.
824
+
825
+ :param sql: Original SQL with qualified source names
826
+ :param source_mapping: Dict of qualified_name → alias
827
+ :returns: Rewritten SQL
828
+ """
829
+ rewritten = sql
830
+
831
+ # Replace each qualified name with its alias
832
+ for qualified_name, alias in source_mapping.items():
833
+ # Match qualified name (database.schema.table)
834
+ pattern = re.compile(rf"\b{re.escape(qualified_name)}\b", re.IGNORECASE)
835
+ rewritten = pattern.sub(alias, rewritten)
836
+
837
+ return rewritten