dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
dbt/task/run.py ADDED
@@ -0,0 +1,1647 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import threading
5
+ import time
6
+ from copy import deepcopy
7
+ from dataclasses import asdict
8
+ from datetime import datetime, timezone
9
+ from typing import AbstractSet, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
10
+
11
+ from dbt import tracking, utils
12
+ from dbt.adapters.base import BaseAdapter, BaseRelation
13
+ from dbt.adapters.capability import Capability
14
+ from dbt.adapters.events.types import FinishedRunningStats
15
+ from dbt.adapters.exceptions import MissingMaterializationError
16
+ from dbt.artifacts.resources import Hook
17
+ from dbt.artifacts.schemas.batch_results import BatchResults, BatchType
18
+ from dbt.artifacts.schemas.results import (
19
+ NodeStatus,
20
+ RunningStatus,
21
+ RunStatus,
22
+ TimingInfo,
23
+ collect_timing_info,
24
+ )
25
+ from dbt.artifacts.schemas.run import RunResult
26
+ from dbt.cli.flags import Flags
27
+ from dbt.clients.jinja import MacroGenerator
28
+ from dbt.config import RuntimeConfig
29
+ from dbt.context.providers import generate_runtime_model_context
30
+ from dbt.contracts.graph.manifest import Manifest
31
+ from dbt.contracts.graph.nodes import BatchContext, HookNode, ModelNode, ResultNode
32
+ from dbt.events.types import (
33
+ GenericExceptionOnRun,
34
+ LogBatchResult,
35
+ LogHookEndLine,
36
+ LogHookStartLine,
37
+ LogModelResult,
38
+ LogStartBatch,
39
+ LogStartLine,
40
+ MicrobatchExecutionDebug,
41
+ )
42
+ from dbt.exceptions import CompilationError, DbtInternalError, DbtRuntimeError, DbtValidationError
43
+ from dbt.graph import ResourceTypeSelector
44
+ from dbt.graph.thread_pool import DbtThreadPool
45
+ from dbt.hooks import get_hook_dict
46
+ from dbt.materializations.incremental.microbatch import MicrobatchBuilder
47
+ from dbt.node_types import NodeType, RunHookType
48
+ from dbt.task import group_lookup
49
+ from dbt.task.base import BaseRunner
50
+ from dbt.task.compile import CompileRunner, CompileTask
51
+ # DVT: Import query analysis and federated execution components
52
+ from dbt.query_analyzer import QueryAnalyzer
53
+ from dbt.compute.federated_executor import FederatedExecutor
54
+ from dbt.compute.smart_selector import SmartComputeSelector
55
+ from dbt.task.printer import get_counts, print_run_end_messages
56
+ from dbt.utils.artifact_upload import add_artifact_produced
57
+ from dbt_common.clients.jinja import MacroProtocol
58
+ from dbt_common.dataclass_schema import dbtClassMixin
59
+ from dbt_common.events.base_types import EventLevel
60
+ from dbt_common.events.contextvars import log_contextvars
61
+ from dbt_common.events.functions import fire_event, get_invocation_id
62
+ from dbt_common.events.types import Formatting
63
+ from dbt_common.exceptions import DbtValidationError
64
+ from dbt_common.invocation import get_invocation_started_at
65
+
66
+
67
+ @functools.total_ordering
68
+ class BiggestName(str):
69
+ def __lt__(self, other):
70
+ return True
71
+
72
+ def __eq__(self, other):
73
+ return isinstance(other, self.__class__)
74
+
75
+
76
+ def _hook_list() -> List[HookNode]:
77
+ return []
78
+
79
+
80
+ def get_hooks_by_tags(
81
+ nodes: Iterable[ResultNode],
82
+ match_tags: Set[str],
83
+ ) -> List[HookNode]:
84
+ matched_nodes = []
85
+ for node in nodes:
86
+ if not isinstance(node, HookNode):
87
+ continue
88
+ node_tags = node.tags
89
+ if len(set(node_tags) & match_tags):
90
+ matched_nodes.append(node)
91
+ return matched_nodes
92
+
93
+
94
+ def get_hook(source, index):
95
+ hook_dict = get_hook_dict(source)
96
+ hook_dict.setdefault("index", index)
97
+ Hook.validate(hook_dict)
98
+ return Hook.from_dict(hook_dict)
99
+
100
+
101
+ def get_execution_status(sql: str, adapter: BaseAdapter) -> Tuple[RunStatus, str]:
102
+ if not sql.strip():
103
+ return RunStatus.Success, "OK"
104
+
105
+ try:
106
+ response, _ = adapter.execute(sql, auto_begin=False, fetch=False)
107
+ status = RunStatus.Success
108
+ message = response._message
109
+ except (KeyboardInterrupt, SystemExit):
110
+ raise
111
+ except DbtRuntimeError as exc:
112
+ status = RunStatus.Error
113
+ message = exc.msg
114
+ except Exception as exc:
115
+ status = RunStatus.Error
116
+ message = str(exc)
117
+
118
+ return (status, message)
119
+
120
+
121
+ def _get_adapter_info(adapter, run_model_result) -> Dict[str, Any]:
122
+ """Each adapter returns a dataclass with a flexible dictionary for
123
+ adapter-specific fields. Only the non-'model_adapter_details' fields
124
+ are guaranteed cross adapter."""
125
+ return asdict(adapter.get_adapter_run_info(run_model_result.node.config)) if adapter else {}
126
+
127
+
128
+ def track_model_run(index, num_nodes, run_model_result, adapter=None):
129
+ if tracking.active_user is None:
130
+ raise DbtInternalError("cannot track model run with no active user")
131
+ invocation_id = get_invocation_id()
132
+ node = run_model_result.node
133
+ has_group = True if hasattr(node, "group") and node.group else False
134
+ if node.resource_type == NodeType.Model:
135
+ access = node.access.value if node.access is not None else None
136
+ contract_enforced = node.contract.enforced
137
+ versioned = True if node.version else False
138
+ incremental_strategy = node.config.incremental_strategy
139
+ else:
140
+ access = None
141
+ contract_enforced = False
142
+ versioned = False
143
+ incremental_strategy = None
144
+
145
+ tracking.track_model_run(
146
+ {
147
+ "invocation_id": invocation_id,
148
+ "index": index,
149
+ "total": num_nodes,
150
+ "execution_time": run_model_result.execution_time,
151
+ "run_status": str(run_model_result.status).upper(),
152
+ "run_skipped": run_model_result.status == NodeStatus.Skipped,
153
+ "run_error": run_model_result.status == NodeStatus.Error,
154
+ "model_materialization": node.get_materialization(),
155
+ "model_incremental_strategy": incremental_strategy,
156
+ "model_id": utils.get_hash(node),
157
+ "hashed_contents": utils.get_hashed_contents(node),
158
+ "timing": [t.to_dict(omit_none=True) for t in run_model_result.timing],
159
+ "language": str(node.language),
160
+ "has_group": has_group,
161
+ "contract_enforced": contract_enforced,
162
+ "access": access,
163
+ "versioned": versioned,
164
+ "adapter_info": _get_adapter_info(adapter, run_model_result),
165
+ }
166
+ )
167
+
168
+
169
+ # make sure that we got an ok result back from a materialization
170
+ def _validate_materialization_relations_dict(inp: Dict[Any, Any], model) -> List[BaseRelation]:
171
+ try:
172
+ relations_value = inp["relations"]
173
+ except KeyError:
174
+ msg = (
175
+ 'Invalid return value from materialization, "relations" '
176
+ "not found, got keys: {}".format(list(inp))
177
+ )
178
+ raise CompilationError(msg, node=model) from None
179
+
180
+ if not isinstance(relations_value, list):
181
+ msg = (
182
+ 'Invalid return value from materialization, "relations" '
183
+ "not a list, got: {}".format(relations_value)
184
+ )
185
+ raise CompilationError(msg, node=model) from None
186
+
187
+ relations: List[BaseRelation] = []
188
+ for relation in relations_value:
189
+ if not isinstance(relation, BaseRelation):
190
+ msg = (
191
+ "Invalid return value from materialization, "
192
+ '"relations" contains non-Relation: {}'.format(relation)
193
+ )
194
+ raise CompilationError(msg, node=model)
195
+
196
+ assert isinstance(relation, BaseRelation)
197
+ relations.append(relation)
198
+ return relations
199
+
200
+
201
+ class ModelRunner(CompileRunner):
202
+ def get_node_representation(self):
203
+ display_quote_policy = {"database": False, "schema": False, "identifier": False}
204
+ relation = self.adapter.Relation.create_from(
205
+ self.config, self.node, quote_policy=display_quote_policy
206
+ )
207
+ # exclude the database from output if it's the default
208
+ if self.node.database == self.config.credentials.database:
209
+ relation = relation.include(database=False)
210
+ return str(relation)
211
+
212
+ def describe_node(self) -> str:
213
+ # TODO CL 'language' will be moved to node level when we change representation
214
+ return f"{self.node.language} {self.node.get_materialization()} model {self.get_node_representation()}"
215
+
216
+ def print_start_line(self):
217
+ fire_event(
218
+ LogStartLine(
219
+ description=self.describe_node(),
220
+ index=self.node_index,
221
+ total=self.num_nodes,
222
+ node_info=self.node.node_info,
223
+ )
224
+ )
225
+
226
+ def print_result_line(self, result):
227
+ description = self.describe_node()
228
+ group = group_lookup.get(self.node.unique_id)
229
+ if result.status == NodeStatus.Error:
230
+ status = result.status
231
+ level = EventLevel.ERROR
232
+ else:
233
+ status = result.message
234
+ level = EventLevel.INFO
235
+ fire_event(
236
+ LogModelResult(
237
+ description=description,
238
+ status=status,
239
+ index=self.node_index,
240
+ total=self.num_nodes,
241
+ execution_time=result.execution_time,
242
+ node_info=self.node.node_info,
243
+ group=group,
244
+ ),
245
+ level=level,
246
+ )
247
+
248
+ def before_execute(self) -> None:
249
+ self.print_start_line()
250
+
251
+ def after_execute(self, result) -> None:
252
+ track_model_run(self.node_index, self.num_nodes, result, adapter=self.adapter)
253
+ self.print_result_line(result)
254
+
255
+ def _build_run_model_result(self, model, context, elapsed_time: float = 0.0):
256
+ result = context["load_result"]("main")
257
+ if not result:
258
+ raise DbtRuntimeError("main is not being called during running model")
259
+ adapter_response = {}
260
+ if isinstance(result.response, dbtClassMixin):
261
+ adapter_response = result.response.to_dict(omit_none=True)
262
+ return RunResult(
263
+ node=model,
264
+ status=RunStatus.Success,
265
+ timing=[],
266
+ thread_id=threading.current_thread().name,
267
+ execution_time=elapsed_time,
268
+ message=str(result.response),
269
+ adapter_response=adapter_response,
270
+ failures=result.get("failures"),
271
+ batch_results=None,
272
+ )
273
+
274
+ def _materialization_relations(self, result: Any, model) -> List[BaseRelation]:
275
+ if isinstance(result, str):
276
+ msg = (
277
+ 'The materialization ("{}") did not explicitly return a '
278
+ "list of relations to add to the cache.".format(str(model.get_materialization()))
279
+ )
280
+ raise CompilationError(msg, node=model)
281
+
282
+ if isinstance(result, dict):
283
+ return _validate_materialization_relations_dict(result, model)
284
+
285
+ msg = (
286
+ "Invalid return value from materialization, expected a dict "
287
+ 'with key "relations", got: {}'.format(str(result))
288
+ )
289
+ raise CompilationError(msg, node=model)
290
+
291
+ def _write_to_file_database(
292
+ self,
293
+ spark_df,
294
+ target_adapter,
295
+ target_table: str,
296
+ adapter_type: str,
297
+ ) -> None:
298
+ """
299
+ Write Spark DataFrame to file-based databases (DuckDB, SQLite).
300
+
301
+ DVT v0.59.0a30: These databases don't support concurrent JDBC writes.
302
+ DVT v0.59.0a31: Use the ADAPTER's connection to ensure tables are visible
303
+ to subsequent pushdown queries (fixes connection isolation issue).
304
+
305
+ :param spark_df: PySpark DataFrame to write
306
+ :param target_adapter: dbt adapter instance (used to get its connection)
307
+ :param target_table: Target table name (schema.table format)
308
+ :param adapter_type: 'duckdb' or 'sqlite'
309
+ """
310
+ # Convert Spark DataFrame to Pandas
311
+ pandas_df = spark_df.toPandas()
312
+
313
+ # Parse schema and table from target_table
314
+ # Can be: "table", "schema.table", or "database.schema.table"
315
+ parts = target_table.split('.')
316
+ if len(parts) == 3:
317
+ # database.schema.table - for DuckDB, ignore database (it's the file)
318
+ _, schema_name, table_name = parts
319
+ elif len(parts) == 2:
320
+ schema_name, table_name = parts
321
+ else:
322
+ schema_name = "main"
323
+ table_name = target_table
324
+
325
+ if adapter_type == 'duckdb':
326
+ # Use the ADAPTER's connection to ensure visibility
327
+ # dbt-duckdb's handle is a DuckDBConnectionWrapper with cursor() method
328
+ with target_adapter.connection_named('federation_write'):
329
+ connection = target_adapter.connections.get_thread_connection()
330
+ handle = connection.handle
331
+
332
+ # Get the cursor (DuckDBCursorWrapper) and its underlying raw cursor
333
+ cursor_wrapper = handle.cursor()
334
+ raw_cursor = cursor_wrapper._cursor # Access the raw DuckDB cursor
335
+
336
+ # Create schema if not exists
337
+ raw_cursor.execute(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"')
338
+ # Drop existing table
339
+ raw_cursor.execute(f'DROP TABLE IF EXISTS "{schema_name}"."{table_name}"')
340
+ # Register pandas DataFrame and create table
341
+ raw_cursor.register('_dvt_temp_df', pandas_df)
342
+ raw_cursor.execute(f'CREATE TABLE "{schema_name}"."{table_name}" AS SELECT * FROM _dvt_temp_df')
343
+ raw_cursor.unregister('_dvt_temp_df')
344
+
345
+ elif adapter_type == 'sqlite':
346
+ # SQLite: Use adapter's connection similarly
347
+ with target_adapter.connection_named('federation_write'):
348
+ connection = target_adapter.connections.get_thread_connection()
349
+ handle = connection.handle
350
+
351
+ # Parse table name (SQLite doesn't have schemas)
352
+ table_name_only = parts[-1]
353
+
354
+ # Use pandas to_sql with the adapter's connection
355
+ pandas_df.to_sql(table_name_only, handle, if_exists='replace', index=False)
356
+
357
+ def _execute_model(
358
+ self,
359
+ hook_ctx: Any,
360
+ context_config: Any,
361
+ model: ModelNode,
362
+ context: Dict[str, Any],
363
+ materialization_macro: MacroProtocol,
364
+ ) -> RunResult:
365
+ try:
366
+ result = MacroGenerator(
367
+ materialization_macro, context, stack=context["context_macro_stack"]
368
+ )()
369
+ finally:
370
+ self.adapter.post_model_hook(context_config, hook_ctx)
371
+
372
+ for relation in self._materialization_relations(result, model):
373
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
374
+
375
+ return self._build_run_model_result(model, context)
376
+
377
+ def execute(self, model, manifest):
378
+ # DVT: Analyze query for execution strategy
379
+ analyzer = QueryAnalyzer(manifest)
380
+
381
+ # DVT v0.51.1: Fix --target-compute logic
382
+ # CLI --target-compute should ONLY override models that:
383
+ # 1. Already have compute= config in their model definition, OR
384
+ # 2. Require federated execution (multi-source)
385
+ #
386
+ # Models without compute= config should ALWAYS favor pushdown
387
+ # (adapter-native execution) when possible.
388
+ cli_compute = getattr(self.config.args, 'TARGET_COMPUTE', None)
389
+ model_compute = model.config.compute if hasattr(model.config, 'compute') else None
390
+
391
+ # DVT v0.51.6: Target Hierarchy (Rule 2.1):
392
+ # Level 1 (Lowest): profiles.yml default target
393
+ # Level 2: Model-specific target config
394
+ # Level 3 (Highest): CLI --target argument (Forces Global Target Override)
395
+ #
396
+ # Rule 2.2: If CLI --target is used, ALL models are forced to materialize in this target.
397
+ cli_target = getattr(self.config.args, 'TARGET', None)
398
+ model_target = model.config.target if hasattr(model.config, 'target') else None
399
+
400
+ # CLI --target (Level 3) overrides model config (Level 2) which overrides profile default (Level 1)
401
+ if cli_target:
402
+ target_connection = cli_target # CLI always wins
403
+ elif model_target:
404
+ target_connection = model_target # Model config
405
+ else:
406
+ target_connection = self.config.target_name # Profile default
407
+
408
+ # First, analyze WITHOUT any compute override to see if federation is required
409
+ # Pass target_connection to detect cross-adapter scenarios
410
+ natural_analysis = analyzer.analyze(
411
+ model,
412
+ user_compute_override=None,
413
+ target_connection=target_connection
414
+ )
415
+
416
+ # DVT v0.51.6: Rule 3.C.3 - View coercion in cross-target scenarios
417
+ # Views are SQL definitions that reference tables by name.
418
+ # You CANNOT create a cross-database view - it's physically impossible.
419
+ # If federation is required, views MUST be coerced to tables with a warning.
420
+ materialization = model.get_materialization()
421
+ convert_view_to_table = False
422
+ if materialization == 'view' and natural_analysis.is_federated:
423
+ convert_view_to_table = True
424
+ # Rule 3.C.3: Log warning about view coercion
425
+ import sys
426
+ print(
427
+ f"[DVT Warning] Model '{model.name}' is configured as 'view' but requires federation. "
428
+ f"Materializing as TABLE instead. (Cross-database views are not supported)",
429
+ file=sys.stderr
430
+ )
431
+
432
+ # DVT v0.51.6: Rule 1.5 - Compute engine only applies to Federation path
433
+ # Compute Selection Hierarchy (Rule 1.3):
434
+ # Level 1: Default compute in computes.yml
435
+ # Level 2: Model-specific compute config
436
+ # Level 3: CLI --compute argument
437
+ # BUT: Compute settings are IGNORED for Pushdown-eligible models (Rule 1.5)
438
+
439
+ if natural_analysis.is_federated:
440
+ # Federation required - apply compute hierarchy
441
+ if cli_compute:
442
+ user_compute = cli_compute # Level 3 (highest)
443
+ elif model_compute:
444
+ user_compute = model_compute # Level 2
445
+ else:
446
+ user_compute = None # Will use Level 1 default from selector
447
+ else:
448
+ # Pushdown-eligible - Rule 1.5: IGNORE compute settings
449
+ user_compute = None
450
+
451
+ # Use the natural analysis (compute override doesn't change pushdown/federation decision)
452
+ analysis = natural_analysis
453
+
454
+ # Get target adapter (for materialization)
455
+ # DVT v0.51.6: Use target_connection which follows Rule 2.1 hierarchy
456
+ target_adapter = self.config.get_adapter(target_connection)
457
+
458
+ if analysis.is_pushdown:
459
+ # Pushdown execution: Use source adapter directly
460
+ execution_adapter = self.config.get_adapter(analysis.primary_connection)
461
+
462
+ context = generate_runtime_model_context(model, self.config, manifest)
463
+
464
+ # Execute using existing path but with correct adapter
465
+ materialization_macro = manifest.find_materialization_macro_by_name(
466
+ self.config.project_name,
467
+ model.get_materialization(),
468
+ execution_adapter.type()
469
+ )
470
+
471
+ if materialization_macro is None:
472
+ raise MissingMaterializationError(
473
+ materialization=model.get_materialization(), adapter_type=execution_adapter.type()
474
+ )
475
+
476
+ if "config" not in context:
477
+ raise DbtInternalError(
478
+ "Invalid materialization context generated, missing config: {}".format(context)
479
+ )
480
+ context_config = context["config"]
481
+
482
+ mat_has_supported_langs = hasattr(materialization_macro, "supported_languages")
483
+ model_lang_supported = model.language in materialization_macro.supported_languages
484
+ if mat_has_supported_langs and not model_lang_supported:
485
+ str_langs = [str(lang) for lang in materialization_macro.supported_languages]
486
+ raise DbtValidationError(
487
+ f'Materialization "{materialization_macro.name}" only supports languages {str_langs}; '
488
+ f'got "{model.language}"'
489
+ )
490
+
491
+ # DVT v0.59.0a29: Removed pre-drop CASCADE from pushdown path
492
+ # dbt-adapters manages table drop/swap during materialization
493
+ # Pre-drop CASCADE interferes with dbt's materialization process
494
+ # (dbt checks for existing relation during swap, errors if not found)
495
+ #
496
+ # Note: Federation path still uses pre-drop CASCADE because Spark JDBC
497
+ # mode="overwrite" doesn't use CASCADE when dropping.
498
+
499
+ # Run hooks
500
+ hook_ctx = execution_adapter.pre_model_hook(context_config)
501
+ result = self._execute_model(hook_ctx, context_config, model, context, materialization_macro)
502
+
503
+ return result
504
+ else:
505
+ # Federated execution: Use compute layer
506
+ executor = FederatedExecutor(
507
+ manifest=manifest,
508
+ adapters=self.config.adapters or {},
509
+ default_compute_engine='spark-local'
510
+ )
511
+
512
+ # Select compute engine
513
+ if user_compute:
514
+ compute_engine = user_compute
515
+ else:
516
+ selector = SmartComputeSelector(manifest)
517
+ compute_engine = selector.select_engine(model, analysis)
518
+
519
+ # Execute federally (pass target adapter type for JDBC materialization)
520
+ # DVT v0.51.6: Pass view coercion flag so executor treats view as table
521
+ fed_result = executor.execute(
522
+ node=model,
523
+ analysis_result=analysis,
524
+ compute_engine_override=compute_engine,
525
+ target_adapter_type=target_adapter.type() if target_adapter else None,
526
+ coerce_view_to_table=convert_view_to_table,
527
+ )
528
+
529
+ try:
530
+ # Materialize to target via Spark JDBC
531
+ # DVT v0.51.7: Use 3-part naming (database.schema.table) for adapters like Databricks
532
+ if hasattr(model, 'database') and model.database:
533
+ target_table = f"{model.database}.{model.schema}.{model.alias}"
534
+ else:
535
+ target_table = f"{model.schema}.{model.alias}"
536
+
537
+ # Get Spark DataFrame from result
538
+ spark_df = fed_result.spark_dataframe
539
+
540
+ # DVT v0.58.6: Inline JDBC write to avoid segfault with FederatedExecutor method call
541
+ # This is a known issue with PySpark 4.0 + Java 21 on macOS
542
+ from dbt.adapters.contracts.connection import AdapterResponse
543
+
544
+ target_credentials = target_adapter.config.credentials
545
+ adapter_type = target_adapter.type()
546
+
547
+ # DVT v0.59.0a30: Special handling for file-based databases
548
+ # DuckDB and SQLite don't support concurrent writes via JDBC
549
+ # Use the ADAPTER's connection to ensure visibility to subsequent queries
550
+ if adapter_type in ('duckdb', 'sqlite'):
551
+ self._write_to_file_database(
552
+ spark_df, target_adapter, target_table, adapter_type
553
+ )
554
+ else:
555
+ # For other databases, use Spark JDBC
556
+ from dbt.compute.jdbc_utils import build_jdbc_config
557
+ jdbc_url, jdbc_properties = build_jdbc_config(target_credentials)
558
+
559
+ # DVT v0.59.0a29: Use TRUNCATE + APPEND instead of DROP CASCADE
560
+ # This preserves dependent views (like dbt-adapters' atomic swap).
561
+ #
562
+ # Strategy:
563
+ # 1. If table exists: TRUNCATE it (preserves structure and dependents)
564
+ # 2. Then: mode="append" to insert new data
565
+ # 3. If table doesn't exist: mode="overwrite" creates it (first run)
566
+ #
567
+ # This matches dbt-adapters behavior where tables with dependent
568
+ # views continue to work after materialization.
569
+ table_exists = False
570
+ try:
571
+ if adapter_type == 'postgres':
572
+ import psycopg2
573
+ conn = psycopg2.connect(
574
+ host=target_credentials.host,
575
+ port=target_credentials.port,
576
+ database=target_credentials.database,
577
+ user=target_credentials.user,
578
+ password=target_credentials.password,
579
+ )
580
+ conn.autocommit = True
581
+ cursor = conn.cursor()
582
+ # Check if table exists
583
+ cursor.execute(f"""
584
+ SELECT EXISTS (
585
+ SELECT FROM pg_tables
586
+ WHERE schemaname || '.' || tablename = '{target_table}'
587
+ OR tablename = '{target_table.split('.')[-1]}'
588
+ )
589
+ """)
590
+ table_exists = cursor.fetchone()[0]
591
+ if table_exists:
592
+ # TRUNCATE preserves table structure and dependent views
593
+ cursor.execute(f"TRUNCATE TABLE {target_table}")
594
+ cursor.close()
595
+ conn.close()
596
+ except Exception:
597
+ # If check fails, fall back to overwrite mode
598
+ pass
599
+
600
+ # Write to target via Spark JDBC
601
+ write_mode = "append" if table_exists else "overwrite"
602
+ spark_df.write.jdbc(
603
+ jdbc_url,
604
+ target_table,
605
+ mode=write_mode,
606
+ properties=jdbc_properties
607
+ )
608
+
609
+ adapter_response = AdapterResponse(_message="DVT: Federated JDBC write completed")
610
+
611
+ # Return result in expected format
612
+ rows_affected = getattr(adapter_response, 'rows_affected', 0)
613
+ rows_msg = f"{rows_affected} rows" if rows_affected else "completed"
614
+ # DVT v0.51.5: Note when view was materialized as table
615
+ exec_msg = f"Federated execution: {rows_msg}"
616
+ if convert_view_to_table:
617
+ exec_msg = f"Federated (view→table): {rows_msg}"
618
+ return RunResult(
619
+ status=RunStatus.Success,
620
+ timing=[],
621
+ thread_id='main',
622
+ execution_time=fed_result.execution_time_ms / 1000.0,
623
+ adapter_response=adapter_response._asdict() if hasattr(adapter_response, '_asdict') else {},
624
+ message=exec_msg,
625
+ failures=None,
626
+ node=model,
627
+ agate_table=None,
628
+ )
629
+ finally:
630
+ # Always close Spark session after materialization
631
+ if fed_result.engine:
632
+ try:
633
+ fed_result.engine.close()
634
+ except Exception as e:
635
+ # Log but don't fail on cleanup errors
636
+ import sys
637
+ print(f"[DVT] Warning: Failed to close Spark session: {e}", file=sys.stderr)
638
+
639
+
640
+ class MicrobatchBatchRunner(ModelRunner):
641
+ """Handles the running of individual batches"""
642
+
643
+ def __init__(
644
+ self,
645
+ config,
646
+ adapter,
647
+ node,
648
+ node_index: int,
649
+ num_nodes: int,
650
+ batch_idx: int,
651
+ batches: Dict[int, BatchType],
652
+ relation_exists: bool,
653
+ incremental_batch: bool,
654
+ ):
655
+ super().__init__(config, adapter, node, node_index, num_nodes)
656
+
657
+ self.batch_idx = batch_idx
658
+ self.batches = batches
659
+ self.relation_exists = relation_exists
660
+ self.incremental_batch = incremental_batch
661
+
662
+ def describe_batch(self) -> str:
663
+ batch_start = self.batches[self.batch_idx][0]
664
+ formatted_batch_start = MicrobatchBuilder.format_batch_start(
665
+ batch_start, self.node.config.batch_size
666
+ )
667
+ return f"batch {formatted_batch_start} of {self.get_node_representation()}"
668
+
669
+ def print_result_line(self, result: RunResult):
670
+ if result.status == NodeStatus.Error:
671
+ status = result.status
672
+ level = EventLevel.ERROR
673
+ elif result.status == NodeStatus.Skipped:
674
+ status = result.status
675
+ level = EventLevel.INFO
676
+ else:
677
+ status = result.message
678
+ level = EventLevel.INFO
679
+
680
+ fire_event(
681
+ LogBatchResult(
682
+ description=self.describe_batch(),
683
+ status=status,
684
+ batch_index=self.batch_idx + 1,
685
+ total_batches=len(self.batches),
686
+ execution_time=result.execution_time,
687
+ node_info=self.node.node_info,
688
+ group=group_lookup.get(self.node.unique_id),
689
+ ),
690
+ level=level,
691
+ )
692
+
693
+ def print_start_line(self) -> None:
694
+ fire_event(
695
+ LogStartBatch(
696
+ description=self.describe_batch(),
697
+ batch_index=self.batch_idx + 1,
698
+ total_batches=len(self.batches),
699
+ node_info=self.node.node_info,
700
+ )
701
+ )
702
+
703
+ def should_run_in_parallel(self) -> bool:
704
+ if not self.adapter.supports(Capability.MicrobatchConcurrency):
705
+ run_in_parallel = False
706
+ elif not self.relation_exists:
707
+ # If the relation doesn't exist, we can't run in parallel
708
+ run_in_parallel = False
709
+ elif self.node.config.concurrent_batches is not None:
710
+ # If the relation exists and the `concurrent_batches` config isn't None, use the config value
711
+ run_in_parallel = self.node.config.concurrent_batches
712
+ else:
713
+ # If the relation exists, the `concurrent_batches` config is None, check if the model self references `this`.
714
+ # If the model self references `this` then we assume the model batches _can't_ be run in parallel
715
+ run_in_parallel = not self.node.has_this
716
+
717
+ return run_in_parallel
718
+
719
+ def on_skip(self):
720
+ result = RunResult(
721
+ node=self.node,
722
+ status=RunStatus.Skipped,
723
+ timing=[],
724
+ thread_id=threading.current_thread().name,
725
+ execution_time=0.0,
726
+ message="SKIPPED",
727
+ adapter_response={},
728
+ failures=1,
729
+ batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
730
+ )
731
+ self.print_result_line(result=result)
732
+ return result
733
+
734
+ def error_result(self, node, message, start_time, timing_info):
735
+ """Necessary to return a result with a batch result
736
+
737
+ Called by `BaseRunner.safe_run` when an error occurs
738
+ """
739
+ return self._build_run_result(
740
+ node=node,
741
+ start_time=start_time,
742
+ status=RunStatus.Error,
743
+ timing_info=timing_info,
744
+ message=message,
745
+ batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
746
+ )
747
+
748
+ def compile(self, manifest: Manifest):
749
+ batch = self.batches[self.batch_idx]
750
+
751
+ # LEGACY: Set start/end in context prior to re-compiling (Will be removed for 1.10+)
752
+ # TODO: REMOVE before 1.10 GA
753
+ self.node.config["__dbt_internal_microbatch_event_time_start"] = batch[0]
754
+ self.node.config["__dbt_internal_microbatch_event_time_end"] = batch[1]
755
+ # Create batch context on model node prior to re-compiling
756
+ self.node.batch = BatchContext(
757
+ id=MicrobatchBuilder.batch_id(batch[0], self.node.config.batch_size),
758
+ event_time_start=batch[0],
759
+ event_time_end=batch[1],
760
+ )
761
+ # Recompile node to re-resolve refs with event time filters rendered, update context
762
+ self.compiler.compile_node(
763
+ self.node,
764
+ manifest,
765
+ {},
766
+ split_suffix=MicrobatchBuilder.format_batch_start(
767
+ batch[0], self.node.config.batch_size
768
+ ),
769
+ )
770
+
771
+ return self.node
772
+
773
+ def _build_succesful_run_batch_result(
774
+ self,
775
+ model: ModelNode,
776
+ context: Dict[str, Any],
777
+ batch: BatchType,
778
+ elapsed_time: float = 0.0,
779
+ ) -> RunResult:
780
+ run_result = self._build_run_model_result(model, context, elapsed_time)
781
+ run_result.batch_results = BatchResults(successful=[batch])
782
+ return run_result
783
+
784
+ def _build_failed_run_batch_result(
785
+ self,
786
+ model: ModelNode,
787
+ batch: BatchType,
788
+ elapsed_time: float = 0.0,
789
+ ) -> RunResult:
790
+ return RunResult(
791
+ node=model,
792
+ status=RunStatus.Error,
793
+ timing=[],
794
+ thread_id=threading.current_thread().name,
795
+ execution_time=elapsed_time,
796
+ message="ERROR",
797
+ adapter_response={},
798
+ failures=1,
799
+ batch_results=BatchResults(failed=[batch]),
800
+ )
801
+
802
+ def _execute_microbatch_materialization(
803
+ self,
804
+ model: ModelNode,
805
+ context: Dict[str, Any],
806
+ materialization_macro: MacroProtocol,
807
+ ) -> RunResult:
808
+
809
+ batch = self.batches[self.batch_idx]
810
+ # call materialization_macro to get a batch-level run result
811
+ start_time = time.perf_counter()
812
+ try:
813
+ # Update jinja context with batch context members
814
+ jinja_context = MicrobatchBuilder.build_jinja_context_for_batch(
815
+ model=model,
816
+ incremental_batch=self.incremental_batch,
817
+ )
818
+ context.update(jinja_context)
819
+
820
+ # Materialize batch and cache any materialized relations
821
+ result = MacroGenerator(
822
+ materialization_macro, context, stack=context["context_macro_stack"]
823
+ )()
824
+ for relation in self._materialization_relations(result, model):
825
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
826
+
827
+ # Build result of executed batch
828
+ batch_run_result = self._build_succesful_run_batch_result(
829
+ model, context, batch, time.perf_counter() - start_time
830
+ )
831
+ batch_result = batch_run_result
832
+
833
+ # At least one batch has been inserted successfully!
834
+ # Can proceed incrementally + in parallel
835
+ self.relation_exists = True
836
+
837
+ except (KeyboardInterrupt, SystemExit):
838
+ # reraise it for GraphRunnableTask.execute_nodes to handle
839
+ raise
840
+ except Exception as e:
841
+ fire_event(
842
+ GenericExceptionOnRun(
843
+ unique_id=self.node.unique_id,
844
+ exc=f"Exception on worker thread. {str(e)}",
845
+ node_info=self.node.node_info,
846
+ )
847
+ )
848
+ batch_run_result = self._build_failed_run_batch_result(
849
+ model, batch, time.perf_counter() - start_time
850
+ )
851
+
852
+ batch_result = batch_run_result
853
+
854
+ return batch_result
855
+
856
+ def _execute_model(
857
+ self,
858
+ hook_ctx: Any,
859
+ context_config: Any,
860
+ model: ModelNode,
861
+ context: Dict[str, Any],
862
+ materialization_macro: MacroProtocol,
863
+ ) -> RunResult:
864
+ try:
865
+ batch_result = self._execute_microbatch_materialization(
866
+ model, context, materialization_macro
867
+ )
868
+ finally:
869
+ self.adapter.post_model_hook(context_config, hook_ctx)
870
+
871
+ return batch_result
872
+
873
+
874
+ class MicrobatchModelRunner(ModelRunner):
875
+ """Handles the orchestration of batches to run for a given microbatch model"""
876
+
877
+ def __init__(self, config, adapter, node, node_index: int, num_nodes: int):
878
+ super().__init__(config, adapter, node, node_index, num_nodes)
879
+
880
+ # The parent task is necessary because we need access to the `_submit_batch` and `submit` methods
881
+ self._parent_task: Optional[RunTask] = None
882
+ # The pool is necessary because we need to batches to be executed within the same thread pool
883
+ self._pool: Optional[DbtThreadPool] = None
884
+
885
+ def set_parent_task(self, parent_task: RunTask) -> None:
886
+ self._parent_task = parent_task
887
+
888
+ def set_pool(self, pool: DbtThreadPool) -> None:
889
+ self._pool = pool
890
+
891
+ @property
892
+ def parent_task(self) -> RunTask:
893
+ if self._parent_task is None:
894
+ raise DbtInternalError(
895
+ msg="Tried to access `parent_task` of `MicrobatchModelRunner` before it was set"
896
+ )
897
+
898
+ return self._parent_task
899
+
900
+ @property
901
+ def pool(self) -> DbtThreadPool:
902
+ if self._pool is None:
903
+ raise DbtInternalError(
904
+ msg="Tried to access `pool` of `MicrobatchModelRunner` before it was set"
905
+ )
906
+
907
+ return self._pool
908
+
909
+ def _has_relation(self, model: ModelNode) -> bool:
910
+ """Check whether the relation for the model exists in the data warehouse"""
911
+ relation_info = self.adapter.Relation.create_from(self.config, model)
912
+ relation = self.adapter.get_relation(
913
+ relation_info.database, relation_info.schema, relation_info.name
914
+ )
915
+ return relation is not None
916
+
917
+ def _is_incremental(self, model) -> bool:
918
+ """Check whether the model should be run `incrementally` or as `full refresh`"""
919
+ # TODO: Remove this whole function. This should be a temporary method. We're working with adapters on
920
+ # a strategy to ensure we can access the `is_incremental` logic without drift
921
+ relation_info = self.adapter.Relation.create_from(self.config, model)
922
+ relation = self.adapter.get_relation(
923
+ relation_info.database, relation_info.schema, relation_info.name
924
+ )
925
+ if (
926
+ relation is not None
927
+ and relation.type == "table"
928
+ and model.config.materialized == "incremental"
929
+ ):
930
+ if model.config.full_refresh is not None:
931
+ return not model.config.full_refresh
932
+ else:
933
+ return not getattr(self.config.args, "FULL_REFRESH", False)
934
+ else:
935
+ return False
936
+
937
+ def _initial_run_microbatch_model_result(self, model: ModelNode) -> RunResult:
938
+ return RunResult(
939
+ node=model,
940
+ status=RunStatus.Success,
941
+ timing=[],
942
+ thread_id=threading.current_thread().name,
943
+ # The execution_time here doesn't get propagated to logs because
944
+ # `safe_run_hooks` handles the elapsed time at the node level
945
+ execution_time=0,
946
+ message="",
947
+ adapter_response={},
948
+ failures=0,
949
+ batch_results=BatchResults(),
950
+ )
951
+
952
+ def describe_node(self) -> str:
953
+ return f"{self.node.language} microbatch model {self.get_node_representation()}"
954
+
955
+ def merge_batch_results(self, result: RunResult, batch_results: List[RunResult]):
956
+ """merge batch_results into result"""
957
+ if result.batch_results is None:
958
+ result.batch_results = BatchResults()
959
+
960
+ for batch_result in batch_results:
961
+ if batch_result.batch_results is not None:
962
+ result.batch_results += batch_result.batch_results
963
+ result.execution_time += batch_result.execution_time
964
+
965
+ num_successes = len(result.batch_results.successful)
966
+ num_failures = len(result.batch_results.failed)
967
+ if num_failures == 0:
968
+ status = RunStatus.Success
969
+ msg = "SUCCESS"
970
+ elif num_successes == 0:
971
+ status = RunStatus.Error
972
+ msg = "ERROR"
973
+ else:
974
+ status = RunStatus.PartialSuccess
975
+ msg = f"PARTIAL SUCCESS ({num_successes}/{num_successes + num_failures})"
976
+ result.status = status
977
+ result.message = msg
978
+
979
+ result.batch_results.successful = sorted(result.batch_results.successful)
980
+ result.batch_results.failed = sorted(result.batch_results.failed)
981
+
982
+ # # If retrying, propagate previously successful batches into final result, even thoguh they were not run in this invocation
983
+ if self.node.previous_batch_results is not None:
984
+ result.batch_results.successful += self.node.previous_batch_results.successful
985
+
986
+ def _update_result_with_unfinished_batches(
987
+ self, result: RunResult, batches: Dict[int, BatchType]
988
+ ) -> None:
989
+ """This method is really only to be used when the execution of a microbatch model is halted before all batches have had a chance to run"""
990
+ batches_finished: Set[BatchType] = set()
991
+
992
+ if result.batch_results:
993
+ # build list of finished batches
994
+ batches_finished = batches_finished.union(set(result.batch_results.successful))
995
+ batches_finished = batches_finished.union(set(result.batch_results.failed))
996
+ else:
997
+ # instantiate `batch_results` if it was `None`
998
+ result.batch_results = BatchResults()
999
+
1000
+ # skipped batches are any batch that was expected but didn't finish
1001
+ batches_expected = {batch for _, batch in batches.items()}
1002
+ skipped_batches = batches_expected.difference(batches_finished)
1003
+
1004
+ result.batch_results.failed.extend(list(skipped_batches))
1005
+
1006
+ # We call this method, even though we are merging no new results, as it updates
1007
+ # the result witht he appropriate status (Success/Partial/Failed)
1008
+ self.merge_batch_results(result, [])
1009
+
1010
+ def get_microbatch_builder(self, model: ModelNode) -> MicrobatchBuilder:
1011
+ # Intially set the start/end to values from args
1012
+ event_time_start = getattr(self.config.args, "EVENT_TIME_START", None)
1013
+ event_time_end = getattr(self.config.args, "EVENT_TIME_END", None)
1014
+
1015
+ # If we're in sample mode, alter start/end to sample values
1016
+ if getattr(self.config.args, "SAMPLE", None) is not None:
1017
+ event_time_start = self.config.args.sample.start
1018
+ event_time_end = self.config.args.sample.end
1019
+
1020
+ return MicrobatchBuilder(
1021
+ model=model,
1022
+ is_incremental=self._is_incremental(model),
1023
+ event_time_start=event_time_start,
1024
+ event_time_end=event_time_end,
1025
+ default_end_time=get_invocation_started_at(),
1026
+ )
1027
+
1028
+ def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
1029
+ """Get the batches that should be run for the model"""
1030
+
1031
+ # Note currently (02/23/2025) model.previous_batch_results is only ever _not_ `None`
1032
+ # IFF `dbt retry` is being run and the microbatch model had batches which
1033
+ # failed on the run of the model (which is being retried)
1034
+ if model.previous_batch_results is None:
1035
+ microbatch_builder = self.get_microbatch_builder(model)
1036
+ end = microbatch_builder.build_end_time()
1037
+ start = microbatch_builder.build_start_time(end)
1038
+ batches = microbatch_builder.build_batches(start, end)
1039
+ else:
1040
+ batches = model.previous_batch_results.failed
1041
+
1042
+ return {batch_idx: batches[batch_idx] for batch_idx in range(len(batches))}
1043
+
1044
+ def compile(self, manifest: Manifest):
1045
+ """Don't do anything here because this runner doesn't need to compile anything"""
1046
+ return self.node
1047
+
1048
+ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
1049
+ # Execution really means orchestration in this case
1050
+
1051
+ batches = self.get_batches(model=model)
1052
+ relation_exists = self._has_relation(model=model)
1053
+ result = self._initial_run_microbatch_model_result(model=model)
1054
+
1055
+ # No batches to run, so return initial result
1056
+ if len(batches) == 0:
1057
+ return result
1058
+
1059
+ batch_results: List[RunResult] = []
1060
+ batch_idx = 0
1061
+
1062
+ # Run first batch not in parallel
1063
+ relation_exists = self.parent_task._submit_batch(
1064
+ node=model,
1065
+ adapter=self.adapter,
1066
+ relation_exists=relation_exists,
1067
+ batches=batches,
1068
+ batch_idx=batch_idx,
1069
+ batch_results=batch_results,
1070
+ pool=self.pool,
1071
+ force_sequential_run=True,
1072
+ incremental_batch=self._is_incremental(model=model),
1073
+ )
1074
+ batch_idx += 1
1075
+ skip_batches = batch_results[0].status != RunStatus.Success
1076
+
1077
+ # Run all batches except first and last batch, in parallel if possible
1078
+ while batch_idx < len(batches) - 1:
1079
+ relation_exists = self.parent_task._submit_batch(
1080
+ node=model,
1081
+ adapter=self.adapter,
1082
+ relation_exists=relation_exists,
1083
+ batches=batches,
1084
+ batch_idx=batch_idx,
1085
+ batch_results=batch_results,
1086
+ pool=self.pool,
1087
+ skip=skip_batches,
1088
+ )
1089
+ batch_idx += 1
1090
+
1091
+ # Wait until all submitted batches have completed
1092
+ while len(batch_results) != batch_idx:
1093
+ # Check if the pool was closed, because if it was, then the main thread is trying to exit.
1094
+ # If the main thread is trying to exit, we need to shutdown. If we _don't_ shutdown, then
1095
+ # batches will continue to execute and we'll delay the run from stopping
1096
+ if self.pool.is_closed():
1097
+ # It's technically possible for more results to come in while we clean up
1098
+ # instead we're going to say the didn't finish, regardless of if they finished
1099
+ # or not. Thus, lets get a copy of the results as they exist right "now".
1100
+ frozen_batch_results = deepcopy(batch_results)
1101
+ self.merge_batch_results(result, frozen_batch_results)
1102
+ self._update_result_with_unfinished_batches(result, batches)
1103
+ return result
1104
+
1105
+ # breifly sleep so that this thread doesn't go brrrrr while waiting
1106
+ time.sleep(0.1)
1107
+
1108
+ # Only run "last" batch if there is more than one batch
1109
+ if len(batches) != 1:
1110
+ # Final batch runs once all others complete to ensure post_hook runs at the end
1111
+ self.parent_task._submit_batch(
1112
+ node=model,
1113
+ adapter=self.adapter,
1114
+ relation_exists=relation_exists,
1115
+ batches=batches,
1116
+ batch_idx=batch_idx,
1117
+ batch_results=batch_results,
1118
+ pool=self.pool,
1119
+ force_sequential_run=True,
1120
+ skip=skip_batches,
1121
+ )
1122
+
1123
+ # Finalize run: merge results, track model run, and print final result line
1124
+ self.merge_batch_results(result, batch_results)
1125
+
1126
+ return result
1127
+
1128
+
1129
+ class RunTask(CompileTask):
1130
+ def __init__(
1131
+ self,
1132
+ args: Flags,
1133
+ config: RuntimeConfig,
1134
+ manifest: Manifest,
1135
+ batch_map: Optional[Dict[str, BatchResults]] = None,
1136
+ ) -> None:
1137
+ super().__init__(args, config, manifest)
1138
+ self.batch_map = batch_map
1139
+
1140
+ def raise_on_first_error(self) -> bool:
1141
+ return False
1142
+
1143
+ def get_hook_sql(self, adapter, hook, idx, num_hooks, extra_context) -> str:
1144
+ if self.manifest is None:
1145
+ raise DbtInternalError("compile_node called before manifest was loaded")
1146
+
1147
+ compiled = self.compiler.compile_node(hook, self.manifest, extra_context)
1148
+ statement = compiled.compiled_code
1149
+ hook_index = hook.index or num_hooks
1150
+ hook_obj = get_hook(statement, index=hook_index)
1151
+ return hook_obj.sql or ""
1152
+
1153
+ def handle_job_queue(self, pool, callback):
1154
+ node = self.job_queue.get()
1155
+ self._raise_set_error()
1156
+ runner = self.get_runner(node)
1157
+ # we finally know what we're running! Make sure we haven't decided
1158
+ # to skip it due to upstream failures
1159
+ if runner.node.unique_id in self._skipped_children:
1160
+ cause = self._skipped_children.pop(runner.node.unique_id)
1161
+ runner.do_skip(cause=cause)
1162
+
1163
+ if isinstance(runner, MicrobatchModelRunner):
1164
+ runner.set_parent_task(self)
1165
+ runner.set_pool(pool)
1166
+
1167
+ args = [runner]
1168
+ self._submit(pool, args, callback)
1169
+
1170
+ def _submit_batch(
1171
+ self,
1172
+ node: ModelNode,
1173
+ adapter: BaseAdapter,
1174
+ relation_exists: bool,
1175
+ batches: Dict[int, BatchType],
1176
+ batch_idx: int,
1177
+ batch_results: List[RunResult],
1178
+ pool: DbtThreadPool,
1179
+ force_sequential_run: bool = False,
1180
+ skip: bool = False,
1181
+ incremental_batch: bool = True,
1182
+ ):
1183
+ node_copy = deepcopy(node)
1184
+ # Only run pre_hook(s) for first batch
1185
+ if batch_idx != 0:
1186
+ node_copy.config.pre_hook = []
1187
+
1188
+ # Only run post_hook(s) for last batch
1189
+ if batch_idx != len(batches) - 1:
1190
+ node_copy.config.post_hook = []
1191
+
1192
+ # TODO: We should be doing self.get_runner, however doing so
1193
+ # currently causes the tracking of how many nodes there are to
1194
+ # increment when we don't want it to
1195
+ batch_runner = MicrobatchBatchRunner(
1196
+ self.config,
1197
+ adapter,
1198
+ node_copy,
1199
+ self.run_count,
1200
+ self.num_nodes,
1201
+ batch_idx,
1202
+ batches,
1203
+ relation_exists,
1204
+ incremental_batch,
1205
+ )
1206
+
1207
+ if skip:
1208
+ batch_runner.do_skip()
1209
+
1210
+ if not pool.is_closed():
1211
+ if not force_sequential_run and batch_runner.should_run_in_parallel():
1212
+ fire_event(
1213
+ MicrobatchExecutionDebug(
1214
+ msg=f"{batch_runner.describe_batch()} is being run concurrently"
1215
+ )
1216
+ )
1217
+ self._submit(pool, [batch_runner], batch_results.append)
1218
+ else:
1219
+ fire_event(
1220
+ MicrobatchExecutionDebug(
1221
+ msg=f"{batch_runner.describe_batch()} is being run sequentially"
1222
+ )
1223
+ )
1224
+ batch_results.append(self.call_runner(batch_runner))
1225
+ relation_exists = batch_runner.relation_exists
1226
+ else:
1227
+ batch_results.append(
1228
+ batch_runner._build_failed_run_batch_result(node_copy, batches[batch_idx])
1229
+ )
1230
+
1231
+ return relation_exists
1232
+
1233
+ def _hook_keyfunc(self, hook: HookNode) -> Tuple[str, Optional[int]]:
1234
+ package_name = hook.package_name
1235
+ if package_name == self.config.project_name:
1236
+ package_name = BiggestName("")
1237
+ return package_name, hook.index
1238
+
1239
+ def get_hooks_by_type(self, hook_type: RunHookType) -> List[HookNode]:
1240
+
1241
+ if self.manifest is None:
1242
+ raise DbtInternalError("self.manifest was None in get_hooks_by_type")
1243
+
1244
+ nodes = self.manifest.nodes.values()
1245
+ # find all hooks defined in the manifest (could be multiple projects)
1246
+ hooks: List[HookNode] = get_hooks_by_tags(nodes, {hook_type})
1247
+ hooks.sort(key=self._hook_keyfunc)
1248
+ return hooks
1249
+
1250
+ def safe_run_hooks(
1251
+ self, adapter: BaseAdapter, hook_type: RunHookType, extra_context: Dict[str, Any]
1252
+ ) -> RunStatus:
1253
+ ordered_hooks = self.get_hooks_by_type(hook_type)
1254
+
1255
+ if hook_type == RunHookType.End and ordered_hooks:
1256
+ fire_event(Formatting(""))
1257
+
1258
+ # on-run-* hooks should run outside a transaction. This happens because psycopg2 automatically begins a transaction when a connection is created.
1259
+ adapter.clear_transaction()
1260
+ if not ordered_hooks:
1261
+ return RunStatus.Success
1262
+
1263
+ status = RunStatus.Success
1264
+ failed = False
1265
+ num_hooks = len(ordered_hooks)
1266
+
1267
+ for idx, hook in enumerate(ordered_hooks, 1):
1268
+ with log_contextvars(node_info=hook.node_info):
1269
+ hook.index = idx
1270
+ hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}"
1271
+ execution_time = 0.0
1272
+ timing: List[TimingInfo] = []
1273
+ failures = 1
1274
+
1275
+ if not failed:
1276
+ with collect_timing_info("compile", timing.append):
1277
+ sql = self.get_hook_sql(
1278
+ adapter, hook, hook.index, num_hooks, extra_context
1279
+ )
1280
+
1281
+ started_at = timing[0].started_at or datetime.now(timezone.utc).replace(
1282
+ tzinfo=None
1283
+ )
1284
+ hook.update_event_status(
1285
+ started_at=started_at.isoformat(), node_status=RunningStatus.Started
1286
+ )
1287
+
1288
+ fire_event(
1289
+ LogHookStartLine(
1290
+ statement=hook_name,
1291
+ index=hook.index,
1292
+ total=num_hooks,
1293
+ node_info=hook.node_info,
1294
+ )
1295
+ )
1296
+
1297
+ with collect_timing_info("execute", timing.append):
1298
+ status, message = get_execution_status(sql, adapter)
1299
+
1300
+ finished_at = timing[1].completed_at or datetime.now(timezone.utc).replace(
1301
+ tzinfo=None
1302
+ )
1303
+ hook.update_event_status(finished_at=finished_at.isoformat())
1304
+ execution_time = (finished_at - started_at).total_seconds()
1305
+ failures = 0 if status == RunStatus.Success else 1
1306
+
1307
+ if status == RunStatus.Success:
1308
+ message = f"{hook_name} passed"
1309
+ else:
1310
+ message = f"{hook_name} failed, error:\n {message}"
1311
+ failed = True
1312
+ else:
1313
+ status = RunStatus.Skipped
1314
+ message = f"{hook_name} skipped"
1315
+
1316
+ hook.update_event_status(node_status=status)
1317
+
1318
+ self.node_results.append(
1319
+ RunResult(
1320
+ status=status,
1321
+ thread_id="main",
1322
+ timing=timing,
1323
+ message=message,
1324
+ adapter_response={},
1325
+ execution_time=execution_time,
1326
+ failures=failures,
1327
+ node=hook,
1328
+ )
1329
+ )
1330
+
1331
+ fire_event(
1332
+ LogHookEndLine(
1333
+ statement=hook_name,
1334
+ status=status,
1335
+ index=hook.index,
1336
+ total=num_hooks,
1337
+ execution_time=execution_time,
1338
+ node_info=hook.node_info,
1339
+ )
1340
+ )
1341
+
1342
+ if hook_type == RunHookType.Start and ordered_hooks:
1343
+ fire_event(Formatting(""))
1344
+
1345
+ return status
1346
+
1347
+ def print_results_line(self, results, execution_time) -> None:
1348
+ nodes = [r.node for r in results if hasattr(r, "node")]
1349
+ stat_line = get_counts(nodes)
1350
+
1351
+ execution = ""
1352
+
1353
+ if execution_time is not None:
1354
+ execution = utils.humanize_execution_time(execution_time=execution_time)
1355
+
1356
+ fire_event(Formatting(""))
1357
+ fire_event(
1358
+ FinishedRunningStats(
1359
+ stat_line=stat_line, execution=execution, execution_time=execution_time
1360
+ )
1361
+ )
1362
+
1363
+ def populate_microbatch_batches(self, selected_uids: AbstractSet[str]):
1364
+ if self.batch_map is not None and self.manifest is not None:
1365
+ for uid in selected_uids:
1366
+ if uid in self.batch_map:
1367
+ node = self.manifest.ref_lookup.perform_lookup(uid, self.manifest)
1368
+ if isinstance(node, ModelNode):
1369
+ node.previous_batch_results = self.batch_map[uid]
1370
+
1371
+ def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
1372
+ with adapter.connection_named("master"):
1373
+ self.defer_to_manifest()
1374
+ required_schemas = self.get_model_schemas(adapter, selected_uids)
1375
+ self.create_schemas(adapter, required_schemas)
1376
+ self.populate_adapter_cache(adapter, required_schemas)
1377
+ self.populate_microbatch_batches(selected_uids)
1378
+ group_lookup.init(self.manifest, selected_uids)
1379
+
1380
+ # DVT v0.57.0: Auto-snapshot metadata on first run or --full-refresh
1381
+ self._ensure_source_metadata()
1382
+
1383
+ # v0.59.0a39: Validate source tables exist before execution
1384
+ # Skip if --skip-source-validation flag is set
1385
+ if not getattr(self.config.args, "SKIP_SOURCE_VALIDATION", False):
1386
+ self._validate_source_tables(adapter, selected_uids)
1387
+
1388
+ run_hooks_status = self.safe_run_hooks(adapter, RunHookType.Start, {})
1389
+ return run_hooks_status
1390
+
1391
+ def _validate_source_tables(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> None:
1392
+ """
1393
+ Validate that all source tables referenced by selected models exist.
1394
+
1395
+ v0.59.0a39: Pre-execution validation to fail fast with clear error messages
1396
+ when source tables are missing. This prevents models from failing at execution
1397
+ time with cryptic database errors.
1398
+
1399
+ :param adapter: Database adapter for checking table existence
1400
+ :param selected_uids: Set of selected node unique IDs
1401
+ :raises DbtRuntimeError: If any source tables are missing
1402
+ """
1403
+ missing_sources = []
1404
+
1405
+ # Iterate through selected models
1406
+ for uid in selected_uids:
1407
+ if uid not in self.manifest.nodes:
1408
+ continue
1409
+
1410
+ node = self.manifest.nodes[uid]
1411
+ # Get sources referenced by this model
1412
+ # Sources are stored as a list of Source objects in node.sources
1413
+ if not hasattr(node, 'sources') or not node.sources:
1414
+ continue
1415
+
1416
+ # Check each source referenced by this model
1417
+ for source_ref in node.sources:
1418
+ # source_ref is a Source object with unique_id
1419
+ source_uid = source_ref.unique_id
1420
+ if source_uid not in self.manifest.sources:
1421
+ missing_sources.append({
1422
+ 'model': node.name,
1423
+ 'source': source_uid,
1424
+ 'reason': 'Source definition not found in manifest'
1425
+ })
1426
+ continue
1427
+
1428
+ source_def = self.manifest.sources[source_uid]
1429
+ # Build relation for the source using adapter
1430
+ try:
1431
+ relation = adapter.Relation.create_from_source(source_def, source_ref)
1432
+ except Exception as e:
1433
+ # If we can't create relation, log and continue
1434
+ import logging
1435
+ logger = logging.getLogger(__name__)
1436
+ logger.warning(
1437
+ f"Could not create relation for source {source_def.source_name}.{source_def.name} "
1438
+ f"for model {node.name}: {e}"
1439
+ )
1440
+ continue
1441
+
1442
+ # Check if relation exists in database
1443
+ try:
1444
+ # Use the source's connection (may be different from target)
1445
+ source_connection = getattr(source_def, 'source_name', None)
1446
+ if source_connection:
1447
+ with adapter.connection_named(source_connection):
1448
+ # Try to get relation - if it doesn't exist, this will raise
1449
+ try:
1450
+ adapter.get_relation(
1451
+ relation.database,
1452
+ relation.schema,
1453
+ relation.identifier
1454
+ )
1455
+ except Exception:
1456
+ missing_sources.append({
1457
+ 'model': node.name,
1458
+ 'source': f"{source_def.source_name}.{source_def.name}",
1459
+ 'relation': str(relation),
1460
+ 'reason': 'Table does not exist in database'
1461
+ })
1462
+ else:
1463
+ # Fall back to default connection
1464
+ try:
1465
+ adapter.get_relation(
1466
+ relation.database,
1467
+ relation.schema,
1468
+ relation.identifier
1469
+ )
1470
+ except Exception:
1471
+ missing_sources.append({
1472
+ 'model': node.name,
1473
+ 'source': f"{source_def.source_name}.{source_def.name}",
1474
+ 'relation': str(relation),
1475
+ 'reason': 'Table does not exist in database'
1476
+ })
1477
+ except Exception as e:
1478
+ # If we can't check (e.g., connection issue), log warning but continue
1479
+ # The actual execution will fail with a clearer error
1480
+ import logging
1481
+ logger = logging.getLogger(__name__)
1482
+ logger.warning(
1483
+ f"Could not validate source {source_def.source_name}.{source_def.name} "
1484
+ f"for model {node.name}: {e}"
1485
+ )
1486
+
1487
+ # If any sources are missing, raise error with clear message
1488
+ if missing_sources:
1489
+ error_lines = [
1490
+ "Source validation failed: The following source tables are missing:",
1491
+ ""
1492
+ ]
1493
+ for missing in missing_sources:
1494
+ error_lines.append(
1495
+ f" - Model: {missing['model']}"
1496
+ )
1497
+ error_lines.append(
1498
+ f" Source: {missing['source']}"
1499
+ )
1500
+ if 'relation' in missing:
1501
+ error_lines.append(
1502
+ f" Relation: {missing['relation']}"
1503
+ )
1504
+ error_lines.append(
1505
+ f" Reason: {missing['reason']}"
1506
+ )
1507
+ error_lines.append("")
1508
+
1509
+ error_lines.append(
1510
+ "Please ensure all source tables exist before running models."
1511
+ )
1512
+ error_lines.append(
1513
+ "You can skip this validation with --skip-source-validation flag."
1514
+ )
1515
+
1516
+ raise DbtRuntimeError("\n".join(error_lines))
1517
+
1518
+
1519
+ def _ensure_source_metadata(self) -> None:
1520
+ """
1521
+ Auto-capture source metadata if not present or on --full-refresh.
1522
+
1523
+ DVT v0.57.0: Ensures metadata is available for type propagation
1524
+ across federated paths. Called automatically before every run.
1525
+ """
1526
+ from pathlib import Path
1527
+
1528
+ try:
1529
+ from dbt.compute.metadata import ProjectMetadataStore
1530
+ except ImportError:
1531
+ # DuckDB not available - skip metadata capture
1532
+ return
1533
+
1534
+ # Get project root
1535
+ project_dir = getattr(self.config, 'project_root', None)
1536
+ if not project_dir:
1537
+ return
1538
+
1539
+ project_root = Path(project_dir).resolve()
1540
+
1541
+ # Check if --full-refresh is set
1542
+ full_refresh = getattr(self.config.args, 'FULL_REFRESH', False)
1543
+
1544
+ try:
1545
+ with ProjectMetadataStore(project_root) as store:
1546
+ store.initialize()
1547
+ has_metadata = store.has_source_metadata()
1548
+
1549
+ # Re-capture on first run OR --full-refresh
1550
+ if full_refresh or not has_metadata:
1551
+ from dbt.task.metadata import MetadataTask
1552
+
1553
+ # Create args for metadata task
1554
+ class MetadataArgs:
1555
+ def __init__(self):
1556
+ self.subcommand = 'snapshot'
1557
+ self.project_dir = str(project_root)
1558
+
1559
+ # Run snapshot silently
1560
+ task = MetadataTask(MetadataArgs())
1561
+
1562
+ # Capture without verbose output (run silently)
1563
+ import io
1564
+ import sys
1565
+ old_stdout = sys.stdout
1566
+ sys.stdout = io.StringIO()
1567
+ try:
1568
+ task.run_snapshot()
1569
+ finally:
1570
+ sys.stdout = old_stdout
1571
+ except Exception:
1572
+ # Silently skip metadata capture if it fails
1573
+ pass
1574
+
1575
+ def after_run(self, adapter, results) -> None:
1576
+ # DVT v0.58.4: Clean up all Spark sessions BEFORE thread pool terminates
1577
+ # This prevents semaphore leaks and segfaults from JVM cleanup issues
1578
+ try:
1579
+ from dbt.compute.strategies.local import cleanup_all_spark_sessions
1580
+ cleanup_all_spark_sessions()
1581
+ except ImportError:
1582
+ pass # PySpark not installed, nothing to clean up
1583
+
1584
+ # in on-run-end hooks, provide the value 'database_schemas', which is a
1585
+ # list of unique (database, schema) pairs that successfully executed
1586
+ # models were in. For backwards compatibility, include the old
1587
+ # 'schemas', which did not include database information.
1588
+
1589
+ database_schema_set: Set[Tuple[Optional[str], str]] = {
1590
+ (r.node.database, r.node.schema)
1591
+ for r in results
1592
+ if (hasattr(r, "node") and r.node.is_relational)
1593
+ and r.status not in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.Skipped)
1594
+ }
1595
+
1596
+ extras = {
1597
+ "schemas": list({s for _, s in database_schema_set}),
1598
+ "results": [
1599
+ r for r in results if r.thread_id != "main" or r.status == RunStatus.Error
1600
+ ], # exclude that didn't fail to preserve backwards compatibility
1601
+ "database_schemas": list(database_schema_set),
1602
+ }
1603
+
1604
+ try:
1605
+ with adapter.connection_named("master"):
1606
+ self.safe_run_hooks(adapter, RunHookType.End, extras)
1607
+ except (KeyboardInterrupt, SystemExit, DbtRuntimeError):
1608
+ run_result = self.get_result(
1609
+ results=self.node_results,
1610
+ elapsed_time=time.time() - self.started_at,
1611
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
1612
+ )
1613
+
1614
+ if self.args.write_json and hasattr(run_result, "write"):
1615
+ run_result.write(self.result_path())
1616
+ add_artifact_produced(self.result_path())
1617
+
1618
+ print_run_end_messages(self.node_results, keyboard_interrupt=True)
1619
+
1620
+ raise
1621
+
1622
+ def get_node_selector(self) -> ResourceTypeSelector:
1623
+ if self.manifest is None or self.graph is None:
1624
+ raise DbtInternalError("manifest and graph must be set to get perform node selection")
1625
+ return ResourceTypeSelector(
1626
+ graph=self.graph,
1627
+ manifest=self.manifest,
1628
+ previous_state=self.previous_state,
1629
+ resource_types=[NodeType.Model],
1630
+ )
1631
+
1632
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
1633
+ if self.manifest is None:
1634
+ raise DbtInternalError("manifest must be set prior to calling get_runner_type")
1635
+
1636
+ if (
1637
+ node.config.materialized == "incremental"
1638
+ and node.config.incremental_strategy == "microbatch"
1639
+ and self.manifest.use_microbatch_batches(project_name=self.config.project_name)
1640
+ ):
1641
+ return MicrobatchModelRunner
1642
+ else:
1643
+ return ModelRunner
1644
+
1645
+ def task_end_messages(self, results) -> None:
1646
+ if results:
1647
+ print_run_end_messages(results)