dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
dbt/task/run.py ADDED
@@ -0,0 +1,1306 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import threading
5
+ import time
6
+ from copy import deepcopy
7
+ from dataclasses import asdict
8
+ from datetime import datetime, timezone
9
+ from typing import AbstractSet, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
10
+
11
+ from dbt import tracking, utils
12
+ from dbt.adapters.base import BaseAdapter, BaseRelation
13
+ from dbt.adapters.capability import Capability
14
+ from dbt.adapters.events.types import FinishedRunningStats
15
+ from dbt.adapters.exceptions import MissingMaterializationError
16
+ from dbt.artifacts.resources import Hook
17
+ from dbt.artifacts.schemas.batch_results import BatchResults, BatchType
18
+ from dbt.artifacts.schemas.results import (
19
+ NodeStatus,
20
+ RunningStatus,
21
+ RunStatus,
22
+ TimingInfo,
23
+ collect_timing_info,
24
+ )
25
+ from dbt.artifacts.schemas.run import RunResult
26
+ from dbt.cli.flags import Flags
27
+ from dbt.clients.jinja import MacroGenerator
28
+ from dbt.config import RuntimeConfig
29
+ from dbt.context.providers import generate_runtime_model_context
30
+ from dbt.contracts.graph.manifest import Manifest
31
+ from dbt.contracts.graph.nodes import BatchContext, HookNode, ModelNode, ResultNode
32
+ from dbt.events.types import (
33
+ GenericExceptionOnRun,
34
+ LogBatchResult,
35
+ LogHookEndLine,
36
+ LogHookStartLine,
37
+ LogModelResult,
38
+ LogStartBatch,
39
+ LogStartLine,
40
+ MicrobatchExecutionDebug,
41
+ )
42
+ from dbt.exceptions import CompilationError, DbtInternalError, DbtRuntimeError, DbtValidationError
43
+ from dbt.graph import ResourceTypeSelector
44
+ from dbt.graph.thread_pool import DbtThreadPool
45
+ from dbt.hooks import get_hook_dict
46
+ from dbt.materializations.incremental.microbatch import MicrobatchBuilder
47
+ from dbt.node_types import NodeType, RunHookType
48
+ from dbt.task import group_lookup
49
+ from dbt.task.base import BaseRunner
50
+ from dbt.task.compile import CompileRunner, CompileTask
51
+ # DVT: Import query analysis and federated execution components
52
+ from dbt.query_analyzer import QueryAnalyzer
53
+ from dbt.compute.federated_executor import FederatedExecutor
54
+ from dbt.compute.smart_selector import SmartComputeSelector
55
+ from dbt.task.printer import get_counts, print_run_end_messages
56
+ from dbt.utils.artifact_upload import add_artifact_produced
57
+ from dbt_common.clients.jinja import MacroProtocol
58
+ from dbt_common.dataclass_schema import dbtClassMixin
59
+ from dbt_common.events.base_types import EventLevel
60
+ from dbt_common.events.contextvars import log_contextvars
61
+ from dbt_common.events.functions import fire_event, get_invocation_id
62
+ from dbt_common.events.types import Formatting
63
+ from dbt_common.exceptions import DbtValidationError
64
+ from dbt_common.invocation import get_invocation_started_at
65
+
66
+
67
+ @functools.total_ordering
68
+ class BiggestName(str):
69
+ def __lt__(self, other):
70
+ return True
71
+
72
+ def __eq__(self, other):
73
+ return isinstance(other, self.__class__)
74
+
75
+
76
+ def _hook_list() -> List[HookNode]:
77
+ return []
78
+
79
+
80
+ def get_hooks_by_tags(
81
+ nodes: Iterable[ResultNode],
82
+ match_tags: Set[str],
83
+ ) -> List[HookNode]:
84
+ matched_nodes = []
85
+ for node in nodes:
86
+ if not isinstance(node, HookNode):
87
+ continue
88
+ node_tags = node.tags
89
+ if len(set(node_tags) & match_tags):
90
+ matched_nodes.append(node)
91
+ return matched_nodes
92
+
93
+
94
+ def get_hook(source, index):
95
+ hook_dict = get_hook_dict(source)
96
+ hook_dict.setdefault("index", index)
97
+ Hook.validate(hook_dict)
98
+ return Hook.from_dict(hook_dict)
99
+
100
+
101
+ def get_execution_status(sql: str, adapter: BaseAdapter) -> Tuple[RunStatus, str]:
102
+ if not sql.strip():
103
+ return RunStatus.Success, "OK"
104
+
105
+ try:
106
+ response, _ = adapter.execute(sql, auto_begin=False, fetch=False)
107
+ status = RunStatus.Success
108
+ message = response._message
109
+ except (KeyboardInterrupt, SystemExit):
110
+ raise
111
+ except DbtRuntimeError as exc:
112
+ status = RunStatus.Error
113
+ message = exc.msg
114
+ except Exception as exc:
115
+ status = RunStatus.Error
116
+ message = str(exc)
117
+
118
+ return (status, message)
119
+
120
+
121
+ def _get_adapter_info(adapter, run_model_result) -> Dict[str, Any]:
122
+ """Each adapter returns a dataclass with a flexible dictionary for
123
+ adapter-specific fields. Only the non-'model_adapter_details' fields
124
+ are guaranteed cross adapter."""
125
+ return asdict(adapter.get_adapter_run_info(run_model_result.node.config)) if adapter else {}
126
+
127
+
128
+ def track_model_run(index, num_nodes, run_model_result, adapter=None):
129
+ if tracking.active_user is None:
130
+ raise DbtInternalError("cannot track model run with no active user")
131
+ invocation_id = get_invocation_id()
132
+ node = run_model_result.node
133
+ has_group = True if hasattr(node, "group") and node.group else False
134
+ if node.resource_type == NodeType.Model:
135
+ access = node.access.value if node.access is not None else None
136
+ contract_enforced = node.contract.enforced
137
+ versioned = True if node.version else False
138
+ incremental_strategy = node.config.incremental_strategy
139
+ else:
140
+ access = None
141
+ contract_enforced = False
142
+ versioned = False
143
+ incremental_strategy = None
144
+
145
+ tracking.track_model_run(
146
+ {
147
+ "invocation_id": invocation_id,
148
+ "index": index,
149
+ "total": num_nodes,
150
+ "execution_time": run_model_result.execution_time,
151
+ "run_status": str(run_model_result.status).upper(),
152
+ "run_skipped": run_model_result.status == NodeStatus.Skipped,
153
+ "run_error": run_model_result.status == NodeStatus.Error,
154
+ "model_materialization": node.get_materialization(),
155
+ "model_incremental_strategy": incremental_strategy,
156
+ "model_id": utils.get_hash(node),
157
+ "hashed_contents": utils.get_hashed_contents(node),
158
+ "timing": [t.to_dict(omit_none=True) for t in run_model_result.timing],
159
+ "language": str(node.language),
160
+ "has_group": has_group,
161
+ "contract_enforced": contract_enforced,
162
+ "access": access,
163
+ "versioned": versioned,
164
+ "adapter_info": _get_adapter_info(adapter, run_model_result),
165
+ }
166
+ )
167
+
168
+
169
+ # make sure that we got an ok result back from a materialization
170
+ def _validate_materialization_relations_dict(inp: Dict[Any, Any], model) -> List[BaseRelation]:
171
+ try:
172
+ relations_value = inp["relations"]
173
+ except KeyError:
174
+ msg = (
175
+ 'Invalid return value from materialization, "relations" '
176
+ "not found, got keys: {}".format(list(inp))
177
+ )
178
+ raise CompilationError(msg, node=model) from None
179
+
180
+ if not isinstance(relations_value, list):
181
+ msg = (
182
+ 'Invalid return value from materialization, "relations" '
183
+ "not a list, got: {}".format(relations_value)
184
+ )
185
+ raise CompilationError(msg, node=model) from None
186
+
187
+ relations: List[BaseRelation] = []
188
+ for relation in relations_value:
189
+ if not isinstance(relation, BaseRelation):
190
+ msg = (
191
+ "Invalid return value from materialization, "
192
+ '"relations" contains non-Relation: {}'.format(relation)
193
+ )
194
+ raise CompilationError(msg, node=model)
195
+
196
+ assert isinstance(relation, BaseRelation)
197
+ relations.append(relation)
198
+ return relations
199
+
200
+
201
+ class ModelRunner(CompileRunner):
202
+ def get_node_representation(self):
203
+ display_quote_policy = {"database": False, "schema": False, "identifier": False}
204
+ relation = self.adapter.Relation.create_from(
205
+ self.config, self.node, quote_policy=display_quote_policy
206
+ )
207
+ # exclude the database from output if it's the default
208
+ if self.node.database == self.config.credentials.database:
209
+ relation = relation.include(database=False)
210
+ return str(relation)
211
+
212
+ def describe_node(self) -> str:
213
+ # TODO CL 'language' will be moved to node level when we change representation
214
+ return f"{self.node.language} {self.node.get_materialization()} model {self.get_node_representation()}"
215
+
216
+ def print_start_line(self):
217
+ fire_event(
218
+ LogStartLine(
219
+ description=self.describe_node(),
220
+ index=self.node_index,
221
+ total=self.num_nodes,
222
+ node_info=self.node.node_info,
223
+ )
224
+ )
225
+
226
+ def print_result_line(self, result):
227
+ description = self.describe_node()
228
+ group = group_lookup.get(self.node.unique_id)
229
+ if result.status == NodeStatus.Error:
230
+ status = result.status
231
+ level = EventLevel.ERROR
232
+ else:
233
+ status = result.message
234
+ level = EventLevel.INFO
235
+ fire_event(
236
+ LogModelResult(
237
+ description=description,
238
+ status=status,
239
+ index=self.node_index,
240
+ total=self.num_nodes,
241
+ execution_time=result.execution_time,
242
+ node_info=self.node.node_info,
243
+ group=group,
244
+ ),
245
+ level=level,
246
+ )
247
+
248
+ def before_execute(self) -> None:
249
+ self.print_start_line()
250
+
251
+ def after_execute(self, result) -> None:
252
+ track_model_run(self.node_index, self.num_nodes, result, adapter=self.adapter)
253
+ self.print_result_line(result)
254
+
255
+ def _build_run_model_result(self, model, context, elapsed_time: float = 0.0):
256
+ result = context["load_result"]("main")
257
+ if not result:
258
+ raise DbtRuntimeError("main is not being called during running model")
259
+ adapter_response = {}
260
+ if isinstance(result.response, dbtClassMixin):
261
+ adapter_response = result.response.to_dict(omit_none=True)
262
+ return RunResult(
263
+ node=model,
264
+ status=RunStatus.Success,
265
+ timing=[],
266
+ thread_id=threading.current_thread().name,
267
+ execution_time=elapsed_time,
268
+ message=str(result.response),
269
+ adapter_response=adapter_response,
270
+ failures=result.get("failures"),
271
+ batch_results=None,
272
+ )
273
+
274
+ def _materialization_relations(self, result: Any, model) -> List[BaseRelation]:
275
+ if isinstance(result, str):
276
+ msg = (
277
+ 'The materialization ("{}") did not explicitly return a '
278
+ "list of relations to add to the cache.".format(str(model.get_materialization()))
279
+ )
280
+ raise CompilationError(msg, node=model)
281
+
282
+ if isinstance(result, dict):
283
+ return _validate_materialization_relations_dict(result, model)
284
+
285
+ msg = (
286
+ "Invalid return value from materialization, expected a dict "
287
+ 'with key "relations", got: {}'.format(str(result))
288
+ )
289
+ raise CompilationError(msg, node=model)
290
+
291
+ def _execute_model(
292
+ self,
293
+ hook_ctx: Any,
294
+ context_config: Any,
295
+ model: ModelNode,
296
+ context: Dict[str, Any],
297
+ materialization_macro: MacroProtocol,
298
+ ) -> RunResult:
299
+ try:
300
+ result = MacroGenerator(
301
+ materialization_macro, context, stack=context["context_macro_stack"]
302
+ )()
303
+ finally:
304
+ self.adapter.post_model_hook(context_config, hook_ctx)
305
+
306
+ for relation in self._materialization_relations(result, model):
307
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
308
+
309
+ return self._build_run_model_result(model, context)
310
+
311
+ def execute(self, model, manifest):
312
+ # DVT: Analyze query for execution strategy
313
+ analyzer = QueryAnalyzer(manifest)
314
+
315
+ # DVT v0.51.1: Fix --target-compute logic
316
+ # CLI --target-compute should ONLY override models that:
317
+ # 1. Already have compute= config in their model definition, OR
318
+ # 2. Require federated execution (multi-source)
319
+ #
320
+ # Models without compute= config should ALWAYS favor pushdown
321
+ # (adapter-native execution) when possible.
322
+ cli_compute = getattr(self.config.args, 'TARGET_COMPUTE', None)
323
+ model_compute = model.config.compute if hasattr(model.config, 'compute') else None
324
+
325
+ # DVT v0.51.6: Target Hierarchy (Rule 2.1):
326
+ # Level 1 (Lowest): profiles.yml default target
327
+ # Level 2: Model-specific target config
328
+ # Level 3 (Highest): CLI --target argument (Forces Global Target Override)
329
+ #
330
+ # Rule 2.2: If CLI --target is used, ALL models are forced to materialize in this target.
331
+ cli_target = getattr(self.config.args, 'TARGET', None)
332
+ model_target = model.config.target if hasattr(model.config, 'target') else None
333
+
334
+ # CLI --target (Level 3) overrides model config (Level 2) which overrides profile default (Level 1)
335
+ if cli_target:
336
+ target_connection = cli_target # CLI always wins
337
+ elif model_target:
338
+ target_connection = model_target # Model config
339
+ else:
340
+ target_connection = self.config.target_name # Profile default
341
+
342
+ # First, analyze WITHOUT any compute override to see if federation is required
343
+ # Pass target_connection to detect cross-adapter scenarios
344
+ natural_analysis = analyzer.analyze(
345
+ model,
346
+ user_compute_override=None,
347
+ target_connection=target_connection
348
+ )
349
+
350
+ # DVT v0.51.6: Rule 3.C.3 - View coercion in cross-target scenarios
351
+ # Views are SQL definitions that reference tables by name.
352
+ # You CANNOT create a cross-database view - it's physically impossible.
353
+ # If federation is required, views MUST be coerced to tables with a warning.
354
+ materialization = model.get_materialization()
355
+ convert_view_to_table = False
356
+ if materialization == 'view' and natural_analysis.is_federated:
357
+ convert_view_to_table = True
358
+ # Rule 3.C.3: Log warning about view coercion
359
+ import sys
360
+ print(
361
+ f"[DVT Warning] Model '{model.name}' is configured as 'view' but requires federation. "
362
+ f"Materializing as TABLE instead. (Cross-database views are not supported)",
363
+ file=sys.stderr
364
+ )
365
+
366
+ # DVT v0.51.6: Rule 1.5 - Compute engine only applies to Federation path
367
+ # Compute Selection Hierarchy (Rule 1.3):
368
+ # Level 1: Default compute in computes.yml
369
+ # Level 2: Model-specific compute config
370
+ # Level 3: CLI --compute argument
371
+ # BUT: Compute settings are IGNORED for Pushdown-eligible models (Rule 1.5)
372
+
373
+ if natural_analysis.is_federated:
374
+ # Federation required - apply compute hierarchy
375
+ if cli_compute:
376
+ user_compute = cli_compute # Level 3 (highest)
377
+ elif model_compute:
378
+ user_compute = model_compute # Level 2
379
+ else:
380
+ user_compute = None # Will use Level 1 default from selector
381
+ else:
382
+ # Pushdown-eligible - Rule 1.5: IGNORE compute settings
383
+ user_compute = None
384
+
385
+ # Use the natural analysis (compute override doesn't change pushdown/federation decision)
386
+ analysis = natural_analysis
387
+
388
+ # Get target adapter (for materialization)
389
+ # DVT v0.51.6: Use target_connection which follows Rule 2.1 hierarchy
390
+ target_adapter = self.config.get_adapter(target_connection)
391
+
392
+ if analysis.is_pushdown:
393
+ # Pushdown execution: Use source adapter directly
394
+ execution_adapter = self.config.get_adapter(analysis.primary_connection)
395
+
396
+ context = generate_runtime_model_context(model, self.config, manifest)
397
+
398
+ # Execute using existing path but with correct adapter
399
+ materialization_macro = manifest.find_materialization_macro_by_name(
400
+ self.config.project_name,
401
+ model.get_materialization(),
402
+ execution_adapter.type()
403
+ )
404
+
405
+ if materialization_macro is None:
406
+ raise MissingMaterializationError(
407
+ materialization=model.get_materialization(), adapter_type=execution_adapter.type()
408
+ )
409
+
410
+ if "config" not in context:
411
+ raise DbtInternalError(
412
+ "Invalid materialization context generated, missing config: {}".format(context)
413
+ )
414
+ context_config = context["config"]
415
+
416
+ mat_has_supported_langs = hasattr(materialization_macro, "supported_languages")
417
+ model_lang_supported = model.language in materialization_macro.supported_languages
418
+ if mat_has_supported_langs and not model_lang_supported:
419
+ str_langs = [str(lang) for lang in materialization_macro.supported_languages]
420
+ raise DbtValidationError(
421
+ f'Materialization "{materialization_macro.name}" only supports languages {str_langs}; '
422
+ f'got "{model.language}"'
423
+ )
424
+
425
+ # Run hooks
426
+ hook_ctx = execution_adapter.pre_model_hook(context_config)
427
+ result = self._execute_model(hook_ctx, context_config, model, context, materialization_macro)
428
+
429
+ return result
430
+ else:
431
+ # Federated execution: Use compute layer
432
+ executor = FederatedExecutor(
433
+ manifest=manifest,
434
+ adapters=self.config.adapters or {},
435
+ default_compute_engine='spark-local'
436
+ )
437
+
438
+ # Select compute engine
439
+ if user_compute:
440
+ compute_engine = user_compute
441
+ else:
442
+ selector = SmartComputeSelector(manifest)
443
+ compute_engine = selector.select_engine(model, analysis)
444
+
445
+ # Execute federally (pass target adapter type for JDBC materialization)
446
+ # DVT v0.51.6: Pass view coercion flag so executor treats view as table
447
+ fed_result = executor.execute(
448
+ node=model,
449
+ analysis_result=analysis,
450
+ compute_engine_override=compute_engine,
451
+ target_adapter_type=target_adapter.type() if target_adapter else None,
452
+ coerce_view_to_table=convert_view_to_table,
453
+ )
454
+
455
+ try:
456
+ # Materialize to target via Spark JDBC
457
+ # DVT v0.51.7: Use 3-part naming (database.schema.table) for adapters like Databricks
458
+ if hasattr(model, 'database') and model.database:
459
+ target_table = f"{model.database}.{model.schema}.{model.alias}"
460
+ else:
461
+ target_table = f"{model.schema}.{model.alias}"
462
+ adapter_response = executor.materialize_result(
463
+ result=fed_result,
464
+ target_adapter=target_adapter,
465
+ target_table=target_table,
466
+ mode='replace' if model.config.full_refresh else 'create',
467
+ use_jdbc=True,
468
+ spark_result_df=fed_result.spark_dataframe
469
+ )
470
+
471
+ # Return result in expected format
472
+ rows_affected = getattr(adapter_response, 'rows_affected', 0)
473
+ rows_msg = f"{rows_affected} rows" if rows_affected else "completed"
474
+ # DVT v0.51.5: Note when view was materialized as table
475
+ exec_msg = f"Federated execution: {rows_msg}"
476
+ if convert_view_to_table:
477
+ exec_msg = f"Federated (view→table): {rows_msg}"
478
+ return RunResult(
479
+ status=RunStatus.Success,
480
+ timing=[],
481
+ thread_id='main',
482
+ execution_time=fed_result.execution_time_ms / 1000.0,
483
+ adapter_response=adapter_response._asdict() if hasattr(adapter_response, '_asdict') else {},
484
+ message=exec_msg,
485
+ failures=None,
486
+ node=model,
487
+ agate_table=None,
488
+ )
489
+ finally:
490
+ # Always close Spark session after materialization
491
+ if fed_result.engine:
492
+ try:
493
+ fed_result.engine.close()
494
+ except Exception as e:
495
+ # Log but don't fail on cleanup errors
496
+ import sys
497
+ print(f"[DVT] Warning: Failed to close Spark session: {e}", file=sys.stderr)
498
+
499
+
500
+ class MicrobatchBatchRunner(ModelRunner):
501
+ """Handles the running of individual batches"""
502
+
503
+ def __init__(
504
+ self,
505
+ config,
506
+ adapter,
507
+ node,
508
+ node_index: int,
509
+ num_nodes: int,
510
+ batch_idx: int,
511
+ batches: Dict[int, BatchType],
512
+ relation_exists: bool,
513
+ incremental_batch: bool,
514
+ ):
515
+ super().__init__(config, adapter, node, node_index, num_nodes)
516
+
517
+ self.batch_idx = batch_idx
518
+ self.batches = batches
519
+ self.relation_exists = relation_exists
520
+ self.incremental_batch = incremental_batch
521
+
522
+ def describe_batch(self) -> str:
523
+ batch_start = self.batches[self.batch_idx][0]
524
+ formatted_batch_start = MicrobatchBuilder.format_batch_start(
525
+ batch_start, self.node.config.batch_size
526
+ )
527
+ return f"batch {formatted_batch_start} of {self.get_node_representation()}"
528
+
529
+ def print_result_line(self, result: RunResult):
530
+ if result.status == NodeStatus.Error:
531
+ status = result.status
532
+ level = EventLevel.ERROR
533
+ elif result.status == NodeStatus.Skipped:
534
+ status = result.status
535
+ level = EventLevel.INFO
536
+ else:
537
+ status = result.message
538
+ level = EventLevel.INFO
539
+
540
+ fire_event(
541
+ LogBatchResult(
542
+ description=self.describe_batch(),
543
+ status=status,
544
+ batch_index=self.batch_idx + 1,
545
+ total_batches=len(self.batches),
546
+ execution_time=result.execution_time,
547
+ node_info=self.node.node_info,
548
+ group=group_lookup.get(self.node.unique_id),
549
+ ),
550
+ level=level,
551
+ )
552
+
553
+ def print_start_line(self) -> None:
554
+ fire_event(
555
+ LogStartBatch(
556
+ description=self.describe_batch(),
557
+ batch_index=self.batch_idx + 1,
558
+ total_batches=len(self.batches),
559
+ node_info=self.node.node_info,
560
+ )
561
+ )
562
+
563
+ def should_run_in_parallel(self) -> bool:
564
+ if not self.adapter.supports(Capability.MicrobatchConcurrency):
565
+ run_in_parallel = False
566
+ elif not self.relation_exists:
567
+ # If the relation doesn't exist, we can't run in parallel
568
+ run_in_parallel = False
569
+ elif self.node.config.concurrent_batches is not None:
570
+ # If the relation exists and the `concurrent_batches` config isn't None, use the config value
571
+ run_in_parallel = self.node.config.concurrent_batches
572
+ else:
573
+ # If the relation exists, the `concurrent_batches` config is None, check if the model self references `this`.
574
+ # If the model self references `this` then we assume the model batches _can't_ be run in parallel
575
+ run_in_parallel = not self.node.has_this
576
+
577
+ return run_in_parallel
578
+
579
+ def on_skip(self):
580
+ result = RunResult(
581
+ node=self.node,
582
+ status=RunStatus.Skipped,
583
+ timing=[],
584
+ thread_id=threading.current_thread().name,
585
+ execution_time=0.0,
586
+ message="SKIPPED",
587
+ adapter_response={},
588
+ failures=1,
589
+ batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
590
+ )
591
+ self.print_result_line(result=result)
592
+ return result
593
+
594
+ def error_result(self, node, message, start_time, timing_info):
595
+ """Necessary to return a result with a batch result
596
+
597
+ Called by `BaseRunner.safe_run` when an error occurs
598
+ """
599
+ return self._build_run_result(
600
+ node=node,
601
+ start_time=start_time,
602
+ status=RunStatus.Error,
603
+ timing_info=timing_info,
604
+ message=message,
605
+ batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
606
+ )
607
+
608
+ def compile(self, manifest: Manifest):
609
+ batch = self.batches[self.batch_idx]
610
+
611
+ # LEGACY: Set start/end in context prior to re-compiling (Will be removed for 1.10+)
612
+ # TODO: REMOVE before 1.10 GA
613
+ self.node.config["__dbt_internal_microbatch_event_time_start"] = batch[0]
614
+ self.node.config["__dbt_internal_microbatch_event_time_end"] = batch[1]
615
+ # Create batch context on model node prior to re-compiling
616
+ self.node.batch = BatchContext(
617
+ id=MicrobatchBuilder.batch_id(batch[0], self.node.config.batch_size),
618
+ event_time_start=batch[0],
619
+ event_time_end=batch[1],
620
+ )
621
+ # Recompile node to re-resolve refs with event time filters rendered, update context
622
+ self.compiler.compile_node(
623
+ self.node,
624
+ manifest,
625
+ {},
626
+ split_suffix=MicrobatchBuilder.format_batch_start(
627
+ batch[0], self.node.config.batch_size
628
+ ),
629
+ )
630
+
631
+ return self.node
632
+
633
+ def _build_succesful_run_batch_result(
634
+ self,
635
+ model: ModelNode,
636
+ context: Dict[str, Any],
637
+ batch: BatchType,
638
+ elapsed_time: float = 0.0,
639
+ ) -> RunResult:
640
+ run_result = self._build_run_model_result(model, context, elapsed_time)
641
+ run_result.batch_results = BatchResults(successful=[batch])
642
+ return run_result
643
+
644
+ def _build_failed_run_batch_result(
645
+ self,
646
+ model: ModelNode,
647
+ batch: BatchType,
648
+ elapsed_time: float = 0.0,
649
+ ) -> RunResult:
650
+ return RunResult(
651
+ node=model,
652
+ status=RunStatus.Error,
653
+ timing=[],
654
+ thread_id=threading.current_thread().name,
655
+ execution_time=elapsed_time,
656
+ message="ERROR",
657
+ adapter_response={},
658
+ failures=1,
659
+ batch_results=BatchResults(failed=[batch]),
660
+ )
661
+
662
+ def _execute_microbatch_materialization(
663
+ self,
664
+ model: ModelNode,
665
+ context: Dict[str, Any],
666
+ materialization_macro: MacroProtocol,
667
+ ) -> RunResult:
668
+
669
+ batch = self.batches[self.batch_idx]
670
+ # call materialization_macro to get a batch-level run result
671
+ start_time = time.perf_counter()
672
+ try:
673
+ # Update jinja context with batch context members
674
+ jinja_context = MicrobatchBuilder.build_jinja_context_for_batch(
675
+ model=model,
676
+ incremental_batch=self.incremental_batch,
677
+ )
678
+ context.update(jinja_context)
679
+
680
+ # Materialize batch and cache any materialized relations
681
+ result = MacroGenerator(
682
+ materialization_macro, context, stack=context["context_macro_stack"]
683
+ )()
684
+ for relation in self._materialization_relations(result, model):
685
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
686
+
687
+ # Build result of executed batch
688
+ batch_run_result = self._build_succesful_run_batch_result(
689
+ model, context, batch, time.perf_counter() - start_time
690
+ )
691
+ batch_result = batch_run_result
692
+
693
+ # At least one batch has been inserted successfully!
694
+ # Can proceed incrementally + in parallel
695
+ self.relation_exists = True
696
+
697
+ except (KeyboardInterrupt, SystemExit):
698
+ # reraise it for GraphRunnableTask.execute_nodes to handle
699
+ raise
700
+ except Exception as e:
701
+ fire_event(
702
+ GenericExceptionOnRun(
703
+ unique_id=self.node.unique_id,
704
+ exc=f"Exception on worker thread. {str(e)}",
705
+ node_info=self.node.node_info,
706
+ )
707
+ )
708
+ batch_run_result = self._build_failed_run_batch_result(
709
+ model, batch, time.perf_counter() - start_time
710
+ )
711
+
712
+ batch_result = batch_run_result
713
+
714
+ return batch_result
715
+
716
+ def _execute_model(
717
+ self,
718
+ hook_ctx: Any,
719
+ context_config: Any,
720
+ model: ModelNode,
721
+ context: Dict[str, Any],
722
+ materialization_macro: MacroProtocol,
723
+ ) -> RunResult:
724
+ try:
725
+ batch_result = self._execute_microbatch_materialization(
726
+ model, context, materialization_macro
727
+ )
728
+ finally:
729
+ self.adapter.post_model_hook(context_config, hook_ctx)
730
+
731
+ return batch_result
732
+
733
+
734
+ class MicrobatchModelRunner(ModelRunner):
735
+ """Handles the orchestration of batches to run for a given microbatch model"""
736
+
737
+ def __init__(self, config, adapter, node, node_index: int, num_nodes: int):
738
+ super().__init__(config, adapter, node, node_index, num_nodes)
739
+
740
+ # The parent task is necessary because we need access to the `_submit_batch` and `submit` methods
741
+ self._parent_task: Optional[RunTask] = None
742
+ # The pool is necessary because we need to batches to be executed within the same thread pool
743
+ self._pool: Optional[DbtThreadPool] = None
744
+
745
+ def set_parent_task(self, parent_task: RunTask) -> None:
746
+ self._parent_task = parent_task
747
+
748
+ def set_pool(self, pool: DbtThreadPool) -> None:
749
+ self._pool = pool
750
+
751
+ @property
752
+ def parent_task(self) -> RunTask:
753
+ if self._parent_task is None:
754
+ raise DbtInternalError(
755
+ msg="Tried to access `parent_task` of `MicrobatchModelRunner` before it was set"
756
+ )
757
+
758
+ return self._parent_task
759
+
760
+ @property
761
+ def pool(self) -> DbtThreadPool:
762
+ if self._pool is None:
763
+ raise DbtInternalError(
764
+ msg="Tried to access `pool` of `MicrobatchModelRunner` before it was set"
765
+ )
766
+
767
+ return self._pool
768
+
769
+ def _has_relation(self, model: ModelNode) -> bool:
770
+ """Check whether the relation for the model exists in the data warehouse"""
771
+ relation_info = self.adapter.Relation.create_from(self.config, model)
772
+ relation = self.adapter.get_relation(
773
+ relation_info.database, relation_info.schema, relation_info.name
774
+ )
775
+ return relation is not None
776
+
777
+ def _is_incremental(self, model) -> bool:
778
+ """Check whether the model should be run `incrementally` or as `full refresh`"""
779
+ # TODO: Remove this whole function. This should be a temporary method. We're working with adapters on
780
+ # a strategy to ensure we can access the `is_incremental` logic without drift
781
+ relation_info = self.adapter.Relation.create_from(self.config, model)
782
+ relation = self.adapter.get_relation(
783
+ relation_info.database, relation_info.schema, relation_info.name
784
+ )
785
+ if (
786
+ relation is not None
787
+ and relation.type == "table"
788
+ and model.config.materialized == "incremental"
789
+ ):
790
+ if model.config.full_refresh is not None:
791
+ return not model.config.full_refresh
792
+ else:
793
+ return not getattr(self.config.args, "FULL_REFRESH", False)
794
+ else:
795
+ return False
796
+
797
+ def _initial_run_microbatch_model_result(self, model: ModelNode) -> RunResult:
798
+ return RunResult(
799
+ node=model,
800
+ status=RunStatus.Success,
801
+ timing=[],
802
+ thread_id=threading.current_thread().name,
803
+ # The execution_time here doesn't get propagated to logs because
804
+ # `safe_run_hooks` handles the elapsed time at the node level
805
+ execution_time=0,
806
+ message="",
807
+ adapter_response={},
808
+ failures=0,
809
+ batch_results=BatchResults(),
810
+ )
811
+
812
+ def describe_node(self) -> str:
813
+ return f"{self.node.language} microbatch model {self.get_node_representation()}"
814
+
815
+ def merge_batch_results(self, result: RunResult, batch_results: List[RunResult]):
816
+ """merge batch_results into result"""
817
+ if result.batch_results is None:
818
+ result.batch_results = BatchResults()
819
+
820
+ for batch_result in batch_results:
821
+ if batch_result.batch_results is not None:
822
+ result.batch_results += batch_result.batch_results
823
+ result.execution_time += batch_result.execution_time
824
+
825
+ num_successes = len(result.batch_results.successful)
826
+ num_failures = len(result.batch_results.failed)
827
+ if num_failures == 0:
828
+ status = RunStatus.Success
829
+ msg = "SUCCESS"
830
+ elif num_successes == 0:
831
+ status = RunStatus.Error
832
+ msg = "ERROR"
833
+ else:
834
+ status = RunStatus.PartialSuccess
835
+ msg = f"PARTIAL SUCCESS ({num_successes}/{num_successes + num_failures})"
836
+ result.status = status
837
+ result.message = msg
838
+
839
+ result.batch_results.successful = sorted(result.batch_results.successful)
840
+ result.batch_results.failed = sorted(result.batch_results.failed)
841
+
842
+ # # If retrying, propagate previously successful batches into final result, even thoguh they were not run in this invocation
843
+ if self.node.previous_batch_results is not None:
844
+ result.batch_results.successful += self.node.previous_batch_results.successful
845
+
846
+ def _update_result_with_unfinished_batches(
847
+ self, result: RunResult, batches: Dict[int, BatchType]
848
+ ) -> None:
849
+ """This method is really only to be used when the execution of a microbatch model is halted before all batches have had a chance to run"""
850
+ batches_finished: Set[BatchType] = set()
851
+
852
+ if result.batch_results:
853
+ # build list of finished batches
854
+ batches_finished = batches_finished.union(set(result.batch_results.successful))
855
+ batches_finished = batches_finished.union(set(result.batch_results.failed))
856
+ else:
857
+ # instantiate `batch_results` if it was `None`
858
+ result.batch_results = BatchResults()
859
+
860
+ # skipped batches are any batch that was expected but didn't finish
861
+ batches_expected = {batch for _, batch in batches.items()}
862
+ skipped_batches = batches_expected.difference(batches_finished)
863
+
864
+ result.batch_results.failed.extend(list(skipped_batches))
865
+
866
+ # We call this method, even though we are merging no new results, as it updates
867
+ # the result witht he appropriate status (Success/Partial/Failed)
868
+ self.merge_batch_results(result, [])
869
+
870
+ def get_microbatch_builder(self, model: ModelNode) -> MicrobatchBuilder:
871
+ # Intially set the start/end to values from args
872
+ event_time_start = getattr(self.config.args, "EVENT_TIME_START", None)
873
+ event_time_end = getattr(self.config.args, "EVENT_TIME_END", None)
874
+
875
+ # If we're in sample mode, alter start/end to sample values
876
+ if getattr(self.config.args, "SAMPLE", None) is not None:
877
+ event_time_start = self.config.args.sample.start
878
+ event_time_end = self.config.args.sample.end
879
+
880
+ return MicrobatchBuilder(
881
+ model=model,
882
+ is_incremental=self._is_incremental(model),
883
+ event_time_start=event_time_start,
884
+ event_time_end=event_time_end,
885
+ default_end_time=get_invocation_started_at(),
886
+ )
887
+
888
+ def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
889
+ """Get the batches that should be run for the model"""
890
+
891
+ # Note currently (02/23/2025) model.previous_batch_results is only ever _not_ `None`
892
+ # IFF `dbt retry` is being run and the microbatch model had batches which
893
+ # failed on the run of the model (which is being retried)
894
+ if model.previous_batch_results is None:
895
+ microbatch_builder = self.get_microbatch_builder(model)
896
+ end = microbatch_builder.build_end_time()
897
+ start = microbatch_builder.build_start_time(end)
898
+ batches = microbatch_builder.build_batches(start, end)
899
+ else:
900
+ batches = model.previous_batch_results.failed
901
+
902
+ return {batch_idx: batches[batch_idx] for batch_idx in range(len(batches))}
903
+
904
+ def compile(self, manifest: Manifest):
905
+ """Don't do anything here because this runner doesn't need to compile anything"""
906
+ return self.node
907
+
908
+ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
909
+ # Execution really means orchestration in this case
910
+
911
+ batches = self.get_batches(model=model)
912
+ relation_exists = self._has_relation(model=model)
913
+ result = self._initial_run_microbatch_model_result(model=model)
914
+
915
+ # No batches to run, so return initial result
916
+ if len(batches) == 0:
917
+ return result
918
+
919
+ batch_results: List[RunResult] = []
920
+ batch_idx = 0
921
+
922
+ # Run first batch not in parallel
923
+ relation_exists = self.parent_task._submit_batch(
924
+ node=model,
925
+ adapter=self.adapter,
926
+ relation_exists=relation_exists,
927
+ batches=batches,
928
+ batch_idx=batch_idx,
929
+ batch_results=batch_results,
930
+ pool=self.pool,
931
+ force_sequential_run=True,
932
+ incremental_batch=self._is_incremental(model=model),
933
+ )
934
+ batch_idx += 1
935
+ skip_batches = batch_results[0].status != RunStatus.Success
936
+
937
+ # Run all batches except first and last batch, in parallel if possible
938
+ while batch_idx < len(batches) - 1:
939
+ relation_exists = self.parent_task._submit_batch(
940
+ node=model,
941
+ adapter=self.adapter,
942
+ relation_exists=relation_exists,
943
+ batches=batches,
944
+ batch_idx=batch_idx,
945
+ batch_results=batch_results,
946
+ pool=self.pool,
947
+ skip=skip_batches,
948
+ )
949
+ batch_idx += 1
950
+
951
+ # Wait until all submitted batches have completed
952
+ while len(batch_results) != batch_idx:
953
+ # Check if the pool was closed, because if it was, then the main thread is trying to exit.
954
+ # If the main thread is trying to exit, we need to shutdown. If we _don't_ shutdown, then
955
+ # batches will continue to execute and we'll delay the run from stopping
956
+ if self.pool.is_closed():
957
+ # It's technically possible for more results to come in while we clean up
958
+ # instead we're going to say the didn't finish, regardless of if they finished
959
+ # or not. Thus, lets get a copy of the results as they exist right "now".
960
+ frozen_batch_results = deepcopy(batch_results)
961
+ self.merge_batch_results(result, frozen_batch_results)
962
+ self._update_result_with_unfinished_batches(result, batches)
963
+ return result
964
+
965
+ # breifly sleep so that this thread doesn't go brrrrr while waiting
966
+ time.sleep(0.1)
967
+
968
+ # Only run "last" batch if there is more than one batch
969
+ if len(batches) != 1:
970
+ # Final batch runs once all others complete to ensure post_hook runs at the end
971
+ self.parent_task._submit_batch(
972
+ node=model,
973
+ adapter=self.adapter,
974
+ relation_exists=relation_exists,
975
+ batches=batches,
976
+ batch_idx=batch_idx,
977
+ batch_results=batch_results,
978
+ pool=self.pool,
979
+ force_sequential_run=True,
980
+ skip=skip_batches,
981
+ )
982
+
983
+ # Finalize run: merge results, track model run, and print final result line
984
+ self.merge_batch_results(result, batch_results)
985
+
986
+ return result
987
+
988
+
989
+ class RunTask(CompileTask):
990
+ def __init__(
991
+ self,
992
+ args: Flags,
993
+ config: RuntimeConfig,
994
+ manifest: Manifest,
995
+ batch_map: Optional[Dict[str, BatchResults]] = None,
996
+ ) -> None:
997
+ super().__init__(args, config, manifest)
998
+ self.batch_map = batch_map
999
+
1000
+ def raise_on_first_error(self) -> bool:
1001
+ return False
1002
+
1003
+ def get_hook_sql(self, adapter, hook, idx, num_hooks, extra_context) -> str:
1004
+ if self.manifest is None:
1005
+ raise DbtInternalError("compile_node called before manifest was loaded")
1006
+
1007
+ compiled = self.compiler.compile_node(hook, self.manifest, extra_context)
1008
+ statement = compiled.compiled_code
1009
+ hook_index = hook.index or num_hooks
1010
+ hook_obj = get_hook(statement, index=hook_index)
1011
+ return hook_obj.sql or ""
1012
+
1013
+ def handle_job_queue(self, pool, callback):
1014
+ node = self.job_queue.get()
1015
+ self._raise_set_error()
1016
+ runner = self.get_runner(node)
1017
+ # we finally know what we're running! Make sure we haven't decided
1018
+ # to skip it due to upstream failures
1019
+ if runner.node.unique_id in self._skipped_children:
1020
+ cause = self._skipped_children.pop(runner.node.unique_id)
1021
+ runner.do_skip(cause=cause)
1022
+
1023
+ if isinstance(runner, MicrobatchModelRunner):
1024
+ runner.set_parent_task(self)
1025
+ runner.set_pool(pool)
1026
+
1027
+ args = [runner]
1028
+ self._submit(pool, args, callback)
1029
+
1030
+ def _submit_batch(
1031
+ self,
1032
+ node: ModelNode,
1033
+ adapter: BaseAdapter,
1034
+ relation_exists: bool,
1035
+ batches: Dict[int, BatchType],
1036
+ batch_idx: int,
1037
+ batch_results: List[RunResult],
1038
+ pool: DbtThreadPool,
1039
+ force_sequential_run: bool = False,
1040
+ skip: bool = False,
1041
+ incremental_batch: bool = True,
1042
+ ):
1043
+ node_copy = deepcopy(node)
1044
+ # Only run pre_hook(s) for first batch
1045
+ if batch_idx != 0:
1046
+ node_copy.config.pre_hook = []
1047
+
1048
+ # Only run post_hook(s) for last batch
1049
+ if batch_idx != len(batches) - 1:
1050
+ node_copy.config.post_hook = []
1051
+
1052
+ # TODO: We should be doing self.get_runner, however doing so
1053
+ # currently causes the tracking of how many nodes there are to
1054
+ # increment when we don't want it to
1055
+ batch_runner = MicrobatchBatchRunner(
1056
+ self.config,
1057
+ adapter,
1058
+ node_copy,
1059
+ self.run_count,
1060
+ self.num_nodes,
1061
+ batch_idx,
1062
+ batches,
1063
+ relation_exists,
1064
+ incremental_batch,
1065
+ )
1066
+
1067
+ if skip:
1068
+ batch_runner.do_skip()
1069
+
1070
+ if not pool.is_closed():
1071
+ if not force_sequential_run and batch_runner.should_run_in_parallel():
1072
+ fire_event(
1073
+ MicrobatchExecutionDebug(
1074
+ msg=f"{batch_runner.describe_batch()} is being run concurrently"
1075
+ )
1076
+ )
1077
+ self._submit(pool, [batch_runner], batch_results.append)
1078
+ else:
1079
+ fire_event(
1080
+ MicrobatchExecutionDebug(
1081
+ msg=f"{batch_runner.describe_batch()} is being run sequentially"
1082
+ )
1083
+ )
1084
+ batch_results.append(self.call_runner(batch_runner))
1085
+ relation_exists = batch_runner.relation_exists
1086
+ else:
1087
+ batch_results.append(
1088
+ batch_runner._build_failed_run_batch_result(node_copy, batches[batch_idx])
1089
+ )
1090
+
1091
+ return relation_exists
1092
+
1093
+ def _hook_keyfunc(self, hook: HookNode) -> Tuple[str, Optional[int]]:
1094
+ package_name = hook.package_name
1095
+ if package_name == self.config.project_name:
1096
+ package_name = BiggestName("")
1097
+ return package_name, hook.index
1098
+
1099
+ def get_hooks_by_type(self, hook_type: RunHookType) -> List[HookNode]:
1100
+
1101
+ if self.manifest is None:
1102
+ raise DbtInternalError("self.manifest was None in get_hooks_by_type")
1103
+
1104
+ nodes = self.manifest.nodes.values()
1105
+ # find all hooks defined in the manifest (could be multiple projects)
1106
+ hooks: List[HookNode] = get_hooks_by_tags(nodes, {hook_type})
1107
+ hooks.sort(key=self._hook_keyfunc)
1108
+ return hooks
1109
+
1110
+ def safe_run_hooks(
1111
+ self, adapter: BaseAdapter, hook_type: RunHookType, extra_context: Dict[str, Any]
1112
+ ) -> RunStatus:
1113
+ ordered_hooks = self.get_hooks_by_type(hook_type)
1114
+
1115
+ if hook_type == RunHookType.End and ordered_hooks:
1116
+ fire_event(Formatting(""))
1117
+
1118
+ # on-run-* hooks should run outside a transaction. This happens because psycopg2 automatically begins a transaction when a connection is created.
1119
+ adapter.clear_transaction()
1120
+ if not ordered_hooks:
1121
+ return RunStatus.Success
1122
+
1123
+ status = RunStatus.Success
1124
+ failed = False
1125
+ num_hooks = len(ordered_hooks)
1126
+
1127
+ for idx, hook in enumerate(ordered_hooks, 1):
1128
+ with log_contextvars(node_info=hook.node_info):
1129
+ hook.index = idx
1130
+ hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}"
1131
+ execution_time = 0.0
1132
+ timing: List[TimingInfo] = []
1133
+ failures = 1
1134
+
1135
+ if not failed:
1136
+ with collect_timing_info("compile", timing.append):
1137
+ sql = self.get_hook_sql(
1138
+ adapter, hook, hook.index, num_hooks, extra_context
1139
+ )
1140
+
1141
+ started_at = timing[0].started_at or datetime.now(timezone.utc).replace(
1142
+ tzinfo=None
1143
+ )
1144
+ hook.update_event_status(
1145
+ started_at=started_at.isoformat(), node_status=RunningStatus.Started
1146
+ )
1147
+
1148
+ fire_event(
1149
+ LogHookStartLine(
1150
+ statement=hook_name,
1151
+ index=hook.index,
1152
+ total=num_hooks,
1153
+ node_info=hook.node_info,
1154
+ )
1155
+ )
1156
+
1157
+ with collect_timing_info("execute", timing.append):
1158
+ status, message = get_execution_status(sql, adapter)
1159
+
1160
+ finished_at = timing[1].completed_at or datetime.now(timezone.utc).replace(
1161
+ tzinfo=None
1162
+ )
1163
+ hook.update_event_status(finished_at=finished_at.isoformat())
1164
+ execution_time = (finished_at - started_at).total_seconds()
1165
+ failures = 0 if status == RunStatus.Success else 1
1166
+
1167
+ if status == RunStatus.Success:
1168
+ message = f"{hook_name} passed"
1169
+ else:
1170
+ message = f"{hook_name} failed, error:\n {message}"
1171
+ failed = True
1172
+ else:
1173
+ status = RunStatus.Skipped
1174
+ message = f"{hook_name} skipped"
1175
+
1176
+ hook.update_event_status(node_status=status)
1177
+
1178
+ self.node_results.append(
1179
+ RunResult(
1180
+ status=status,
1181
+ thread_id="main",
1182
+ timing=timing,
1183
+ message=message,
1184
+ adapter_response={},
1185
+ execution_time=execution_time,
1186
+ failures=failures,
1187
+ node=hook,
1188
+ )
1189
+ )
1190
+
1191
+ fire_event(
1192
+ LogHookEndLine(
1193
+ statement=hook_name,
1194
+ status=status,
1195
+ index=hook.index,
1196
+ total=num_hooks,
1197
+ execution_time=execution_time,
1198
+ node_info=hook.node_info,
1199
+ )
1200
+ )
1201
+
1202
+ if hook_type == RunHookType.Start and ordered_hooks:
1203
+ fire_event(Formatting(""))
1204
+
1205
+ return status
1206
+
1207
+ def print_results_line(self, results, execution_time) -> None:
1208
+ nodes = [r.node for r in results if hasattr(r, "node")]
1209
+ stat_line = get_counts(nodes)
1210
+
1211
+ execution = ""
1212
+
1213
+ if execution_time is not None:
1214
+ execution = utils.humanize_execution_time(execution_time=execution_time)
1215
+
1216
+ fire_event(Formatting(""))
1217
+ fire_event(
1218
+ FinishedRunningStats(
1219
+ stat_line=stat_line, execution=execution, execution_time=execution_time
1220
+ )
1221
+ )
1222
+
1223
+ def populate_microbatch_batches(self, selected_uids: AbstractSet[str]):
1224
+ if self.batch_map is not None and self.manifest is not None:
1225
+ for uid in selected_uids:
1226
+ if uid in self.batch_map:
1227
+ node = self.manifest.ref_lookup.perform_lookup(uid, self.manifest)
1228
+ if isinstance(node, ModelNode):
1229
+ node.previous_batch_results = self.batch_map[uid]
1230
+
1231
+ def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
1232
+ with adapter.connection_named("master"):
1233
+ self.defer_to_manifest()
1234
+ required_schemas = self.get_model_schemas(adapter, selected_uids)
1235
+ self.create_schemas(adapter, required_schemas)
1236
+ self.populate_adapter_cache(adapter, required_schemas)
1237
+ self.populate_microbatch_batches(selected_uids)
1238
+ group_lookup.init(self.manifest, selected_uids)
1239
+ run_hooks_status = self.safe_run_hooks(adapter, RunHookType.Start, {})
1240
+ return run_hooks_status
1241
+
1242
+ def after_run(self, adapter, results) -> None:
1243
+ # in on-run-end hooks, provide the value 'database_schemas', which is a
1244
+ # list of unique (database, schema) pairs that successfully executed
1245
+ # models were in. For backwards compatibility, include the old
1246
+ # 'schemas', which did not include database information.
1247
+
1248
+ database_schema_set: Set[Tuple[Optional[str], str]] = {
1249
+ (r.node.database, r.node.schema)
1250
+ for r in results
1251
+ if (hasattr(r, "node") and r.node.is_relational)
1252
+ and r.status not in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.Skipped)
1253
+ }
1254
+
1255
+ extras = {
1256
+ "schemas": list({s for _, s in database_schema_set}),
1257
+ "results": [
1258
+ r for r in results if r.thread_id != "main" or r.status == RunStatus.Error
1259
+ ], # exclude that didn't fail to preserve backwards compatibility
1260
+ "database_schemas": list(database_schema_set),
1261
+ }
1262
+
1263
+ try:
1264
+ with adapter.connection_named("master"):
1265
+ self.safe_run_hooks(adapter, RunHookType.End, extras)
1266
+ except (KeyboardInterrupt, SystemExit, DbtRuntimeError):
1267
+ run_result = self.get_result(
1268
+ results=self.node_results,
1269
+ elapsed_time=time.time() - self.started_at,
1270
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
1271
+ )
1272
+
1273
+ if self.args.write_json and hasattr(run_result, "write"):
1274
+ run_result.write(self.result_path())
1275
+ add_artifact_produced(self.result_path())
1276
+
1277
+ print_run_end_messages(self.node_results, keyboard_interrupt=True)
1278
+
1279
+ raise
1280
+
1281
+ def get_node_selector(self) -> ResourceTypeSelector:
1282
+ if self.manifest is None or self.graph is None:
1283
+ raise DbtInternalError("manifest and graph must be set to get perform node selection")
1284
+ return ResourceTypeSelector(
1285
+ graph=self.graph,
1286
+ manifest=self.manifest,
1287
+ previous_state=self.previous_state,
1288
+ resource_types=[NodeType.Model],
1289
+ )
1290
+
1291
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
1292
+ if self.manifest is None:
1293
+ raise DbtInternalError("manifest must be set prior to calling get_runner_type")
1294
+
1295
+ if (
1296
+ node.config.materialized == "incremental"
1297
+ and node.config.incremental_strategy == "microbatch"
1298
+ and self.manifest.use_microbatch_batches(project_name=self.config.project_name)
1299
+ ):
1300
+ return MicrobatchModelRunner
1301
+ else:
1302
+ return ModelRunner
1303
+
1304
+ def task_end_messages(self, results) -> None:
1305
+ if results:
1306
+ print_run_end_messages(results)