dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/task/run.py ADDED
@@ -0,0 +1,1146 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import threading
5
+ import time
6
+ from copy import deepcopy
7
+ from dataclasses import asdict
8
+ from datetime import datetime, timezone
9
+ from typing import AbstractSet, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
10
+
11
+ from dvt import tracking, utils
12
+ from dvt.artifacts.resources import Hook
13
+ from dvt.artifacts.schemas.batch_results import BatchResults, BatchType
14
+ from dvt.artifacts.schemas.results import (
15
+ NodeStatus,
16
+ RunningStatus,
17
+ RunStatus,
18
+ TimingInfo,
19
+ collect_timing_info,
20
+ )
21
+ from dvt.artifacts.schemas.run import RunResult
22
+ from dvt.cli.flags import Flags
23
+ from dvt.clients.jinja import MacroGenerator
24
+ from dvt.config import RuntimeConfig
25
+ from dvt.context.providers import generate_runtime_model_context
26
+ from dvt.contracts.graph.manifest import Manifest
27
+ from dvt.contracts.graph.nodes import BatchContext, HookNode, ModelNode, ResultNode
28
+ from dvt.events.types import (
29
+ GenericExceptionOnRun,
30
+ LogBatchResult,
31
+ LogHookEndLine,
32
+ LogHookStartLine,
33
+ LogModelResult,
34
+ LogStartBatch,
35
+ LogStartLine,
36
+ MicrobatchExecutionDebug,
37
+ )
38
+ from dvt.exceptions import CompilationError, DbtInternalError, DbtRuntimeError
39
+ from dvt.graph import ResourceTypeSelector
40
+ from dvt.graph.thread_pool import DbtThreadPool
41
+ from dvt.hooks import get_hook_dict
42
+ from dvt.materializations.incremental.microbatch import MicrobatchBuilder
43
+ from dvt.node_types import NodeType, RunHookType
44
+ from dvt.task import group_lookup
45
+ from dvt.task.base import BaseRunner
46
+ from dvt.task.compile import CompileRunner, CompileTask
47
+ from dvt.task.printer import get_counts, print_run_end_messages
48
+ from dvt.utils.artifact_upload import add_artifact_produced
49
+
50
+ from dbt.adapters.base import BaseAdapter, BaseRelation
51
+ from dbt.adapters.capability import Capability
52
+ from dbt.adapters.events.types import FinishedRunningStats
53
+ from dbt.adapters.exceptions import MissingMaterializationError
54
+ from dbt_common.clients.jinja import MacroProtocol
55
+ from dbt_common.dataclass_schema import dbtClassMixin
56
+ from dbt_common.events.base_types import EventLevel
57
+ from dbt_common.events.contextvars import log_contextvars
58
+ from dbt_common.events.functions import fire_event, get_invocation_id
59
+ from dbt_common.events.types import Formatting
60
+ from dbt_common.exceptions import DbtValidationError
61
+ from dbt_common.invocation import get_invocation_started_at
62
+
63
+
64
+ @functools.total_ordering
65
+ class BiggestName(str):
66
+ def __lt__(self, other):
67
+ return True
68
+
69
+ def __eq__(self, other):
70
+ return isinstance(other, self.__class__)
71
+
72
+
73
+ def _hook_list() -> List[HookNode]:
74
+ return []
75
+
76
+
77
+ def get_hooks_by_tags(
78
+ nodes: Iterable[ResultNode],
79
+ match_tags: Set[str],
80
+ ) -> List[HookNode]:
81
+ matched_nodes = []
82
+ for node in nodes:
83
+ if not isinstance(node, HookNode):
84
+ continue
85
+ node_tags = node.tags
86
+ if len(set(node_tags) & match_tags):
87
+ matched_nodes.append(node)
88
+ return matched_nodes
89
+
90
+
91
+ def get_hook(source, index):
92
+ hook_dict = get_hook_dict(source)
93
+ hook_dict.setdefault("index", index)
94
+ Hook.validate(hook_dict)
95
+ return Hook.from_dict(hook_dict)
96
+
97
+
98
+ def get_execution_status(sql: str, adapter: BaseAdapter) -> Tuple[RunStatus, str]:
99
+ if not sql.strip():
100
+ return RunStatus.Success, "OK"
101
+
102
+ try:
103
+ response, _ = adapter.execute(sql, auto_begin=False, fetch=False)
104
+ status = RunStatus.Success
105
+ message = response._message
106
+ except (KeyboardInterrupt, SystemExit):
107
+ raise
108
+ except DbtRuntimeError as exc:
109
+ status = RunStatus.Error
110
+ message = exc.msg
111
+ except Exception as exc:
112
+ status = RunStatus.Error
113
+ message = str(exc)
114
+
115
+ return (status, message)
116
+
117
+
118
+ def _get_adapter_info(adapter, run_model_result) -> Dict[str, Any]:
119
+ """Each adapter returns a dataclass with a flexible dictionary for
120
+ adapter-specific fields. Only the non-'model_adapter_details' fields
121
+ are guaranteed cross adapter."""
122
+ return asdict(adapter.get_adapter_run_info(run_model_result.node.config)) if adapter else {}
123
+
124
+
125
+ def track_model_run(index, num_nodes, run_model_result, adapter=None):
126
+ if tracking.active_user is None:
127
+ raise DbtInternalError("cannot track model run with no active user")
128
+ invocation_id = get_invocation_id()
129
+ node = run_model_result.node
130
+ has_group = True if hasattr(node, "group") and node.group else False
131
+ if node.resource_type == NodeType.Model:
132
+ access = node.access.value if node.access is not None else None
133
+ contract_enforced = node.contract.enforced
134
+ versioned = True if node.version else False
135
+ incremental_strategy = node.config.incremental_strategy
136
+ else:
137
+ access = None
138
+ contract_enforced = False
139
+ versioned = False
140
+ incremental_strategy = None
141
+
142
+ tracking.track_model_run(
143
+ {
144
+ "invocation_id": invocation_id,
145
+ "index": index,
146
+ "total": num_nodes,
147
+ "execution_time": run_model_result.execution_time,
148
+ "run_status": str(run_model_result.status).upper(),
149
+ "run_skipped": run_model_result.status == NodeStatus.Skipped,
150
+ "run_error": run_model_result.status == NodeStatus.Error,
151
+ "model_materialization": node.get_materialization(),
152
+ "model_incremental_strategy": incremental_strategy,
153
+ "model_id": utils.get_hash(node),
154
+ "hashed_contents": utils.get_hashed_contents(node),
155
+ "timing": [t.to_dict(omit_none=True) for t in run_model_result.timing],
156
+ "language": str(node.language),
157
+ "has_group": has_group,
158
+ "contract_enforced": contract_enforced,
159
+ "access": access,
160
+ "versioned": versioned,
161
+ "adapter_info": _get_adapter_info(adapter, run_model_result),
162
+ }
163
+ )
164
+
165
+
166
+ # make sure that we got an ok result back from a materialization
167
+ def _validate_materialization_relations_dict(inp: Dict[Any, Any], model) -> List[BaseRelation]:
168
+ try:
169
+ relations_value = inp["relations"]
170
+ except KeyError:
171
+ msg = (
172
+ 'Invalid return value from materialization, "relations" '
173
+ "not found, got keys: {}".format(list(inp))
174
+ )
175
+ raise CompilationError(msg, node=model) from None
176
+
177
+ if not isinstance(relations_value, list):
178
+ msg = (
179
+ 'Invalid return value from materialization, "relations" '
180
+ "not a list, got: {}".format(relations_value)
181
+ )
182
+ raise CompilationError(msg, node=model) from None
183
+
184
+ relations: List[BaseRelation] = []
185
+ for relation in relations_value:
186
+ if not isinstance(relation, BaseRelation):
187
+ msg = (
188
+ "Invalid return value from materialization, "
189
+ '"relations" contains non-Relation: {}'.format(relation)
190
+ )
191
+ raise CompilationError(msg, node=model)
192
+
193
+ assert isinstance(relation, BaseRelation)
194
+ relations.append(relation)
195
+ return relations
196
+
197
+
198
+ class ModelRunner(CompileRunner):
199
+ def get_node_representation(self):
200
+ display_quote_policy = {"database": False, "schema": False, "identifier": False}
201
+ relation = self.adapter.Relation.create_from(
202
+ self.config, self.node, quote_policy=display_quote_policy
203
+ )
204
+ # exclude the database from output if it's the default
205
+ if self.node.database == self.config.credentials.database:
206
+ relation = relation.include(database=False)
207
+ return str(relation)
208
+
209
+ def describe_node(self) -> str:
210
+ # TODO CL 'language' will be moved to node level when we change representation
211
+ return f"{self.node.language} {self.node.get_materialization()} model {self.get_node_representation()}"
212
+
213
+ def print_start_line(self):
214
+ fire_event(
215
+ LogStartLine(
216
+ description=self.describe_node(),
217
+ index=self.node_index,
218
+ total=self.num_nodes,
219
+ node_info=self.node.node_info,
220
+ )
221
+ )
222
+
223
+ def print_result_line(self, result):
224
+ description = self.describe_node()
225
+ group = group_lookup.get(self.node.unique_id)
226
+ if result.status == NodeStatus.Error:
227
+ status = result.status
228
+ level = EventLevel.ERROR
229
+ else:
230
+ status = result.message
231
+ level = EventLevel.INFO
232
+ fire_event(
233
+ LogModelResult(
234
+ description=description,
235
+ status=status,
236
+ index=self.node_index,
237
+ total=self.num_nodes,
238
+ execution_time=result.execution_time,
239
+ node_info=self.node.node_info,
240
+ group=group,
241
+ ),
242
+ level=level,
243
+ )
244
+
245
+ def before_execute(self) -> None:
246
+ self.print_start_line()
247
+
248
+ def after_execute(self, result) -> None:
249
+ track_model_run(self.node_index, self.num_nodes, result, adapter=self.adapter)
250
+ self.print_result_line(result)
251
+
252
+ def _build_run_model_result(self, model, context, elapsed_time: float = 0.0):
253
+ result = context["load_result"]("main")
254
+ if not result:
255
+ raise DbtRuntimeError("main is not being called during running model")
256
+ adapter_response = {}
257
+ if isinstance(result.response, dbtClassMixin):
258
+ adapter_response = result.response.to_dict(omit_none=True)
259
+ return RunResult(
260
+ node=model,
261
+ status=RunStatus.Success,
262
+ timing=[],
263
+ thread_id=threading.current_thread().name,
264
+ execution_time=elapsed_time,
265
+ message=str(result.response),
266
+ adapter_response=adapter_response,
267
+ failures=result.get("failures"),
268
+ batch_results=None,
269
+ )
270
+
271
+ def _materialization_relations(self, result: Any, model) -> List[BaseRelation]:
272
+ if isinstance(result, str):
273
+ msg = (
274
+ 'The materialization ("{}") did not explicitly return a '
275
+ "list of relations to add to the cache.".format(str(model.get_materialization()))
276
+ )
277
+ raise CompilationError(msg, node=model)
278
+
279
+ if isinstance(result, dict):
280
+ return _validate_materialization_relations_dict(result, model)
281
+
282
+ msg = (
283
+ "Invalid return value from materialization, expected a dict "
284
+ 'with key "relations", got: {}'.format(str(result))
285
+ )
286
+ raise CompilationError(msg, node=model)
287
+
288
+ def _execute_model(
289
+ self,
290
+ hook_ctx: Any,
291
+ context_config: Any,
292
+ model: ModelNode,
293
+ context: Dict[str, Any],
294
+ materialization_macro: MacroProtocol,
295
+ ) -> RunResult:
296
+ try:
297
+ result = MacroGenerator(
298
+ materialization_macro, context, stack=context["context_macro_stack"]
299
+ )()
300
+ finally:
301
+ self.adapter.post_model_hook(context_config, hook_ctx)
302
+
303
+ for relation in self._materialization_relations(result, model):
304
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
305
+
306
+ return self._build_run_model_result(model, context)
307
+
308
+ def execute(self, model, manifest):
309
+ context = generate_runtime_model_context(model, self.config, manifest)
310
+
311
+ materialization_macro = manifest.find_materialization_macro_by_name(
312
+ self.config.project_name, model.get_materialization(), self.adapter.type()
313
+ )
314
+
315
+ if materialization_macro is None:
316
+ raise MissingMaterializationError(
317
+ materialization=model.get_materialization(), adapter_type=self.adapter.type()
318
+ )
319
+
320
+ if "config" not in context:
321
+ raise DbtInternalError(
322
+ "Invalid materialization context generated, missing config: {}".format(context)
323
+ )
324
+ context_config = context["config"]
325
+
326
+ mat_has_supported_langs = hasattr(materialization_macro, "supported_languages")
327
+ model_lang_supported = model.language in materialization_macro.supported_languages
328
+ if mat_has_supported_langs and not model_lang_supported:
329
+ str_langs = [str(lang) for lang in materialization_macro.supported_languages]
330
+ raise DbtValidationError(
331
+ f'Materialization "{materialization_macro.name}" only supports languages {str_langs}; '
332
+ f'got "{model.language}"'
333
+ )
334
+
335
+ hook_ctx = self.adapter.pre_model_hook(context_config)
336
+
337
+ return self._execute_model(hook_ctx, context_config, model, context, materialization_macro)
338
+
339
+
340
+ class MicrobatchBatchRunner(ModelRunner):
341
+ """Handles the running of individual batches"""
342
+
343
+ def __init__(
344
+ self,
345
+ config,
346
+ adapter,
347
+ node,
348
+ node_index: int,
349
+ num_nodes: int,
350
+ batch_idx: int,
351
+ batches: Dict[int, BatchType],
352
+ relation_exists: bool,
353
+ incremental_batch: bool,
354
+ ):
355
+ super().__init__(config, adapter, node, node_index, num_nodes)
356
+
357
+ self.batch_idx = batch_idx
358
+ self.batches = batches
359
+ self.relation_exists = relation_exists
360
+ self.incremental_batch = incremental_batch
361
+
362
+ def describe_batch(self) -> str:
363
+ batch_start = self.batches[self.batch_idx][0]
364
+ formatted_batch_start = MicrobatchBuilder.format_batch_start(
365
+ batch_start, self.node.config.batch_size
366
+ )
367
+ return f"batch {formatted_batch_start} of {self.get_node_representation()}"
368
+
369
+ def print_result_line(self, result: RunResult):
370
+ if result.status == NodeStatus.Error:
371
+ status = result.status
372
+ level = EventLevel.ERROR
373
+ elif result.status == NodeStatus.Skipped:
374
+ status = result.status
375
+ level = EventLevel.INFO
376
+ else:
377
+ status = result.message
378
+ level = EventLevel.INFO
379
+
380
+ fire_event(
381
+ LogBatchResult(
382
+ description=self.describe_batch(),
383
+ status=status,
384
+ batch_index=self.batch_idx + 1,
385
+ total_batches=len(self.batches),
386
+ execution_time=result.execution_time,
387
+ node_info=self.node.node_info,
388
+ group=group_lookup.get(self.node.unique_id),
389
+ ),
390
+ level=level,
391
+ )
392
+
393
+ def print_start_line(self) -> None:
394
+ fire_event(
395
+ LogStartBatch(
396
+ description=self.describe_batch(),
397
+ batch_index=self.batch_idx + 1,
398
+ total_batches=len(self.batches),
399
+ node_info=self.node.node_info,
400
+ )
401
+ )
402
+
403
+ def should_run_in_parallel(self) -> bool:
404
+ if not self.adapter.supports(Capability.MicrobatchConcurrency):
405
+ run_in_parallel = False
406
+ elif not self.relation_exists:
407
+ # If the relation doesn't exist, we can't run in parallel
408
+ run_in_parallel = False
409
+ elif self.node.config.concurrent_batches is not None:
410
+ # If the relation exists and the `concurrent_batches` config isn't None, use the config value
411
+ run_in_parallel = self.node.config.concurrent_batches
412
+ else:
413
+ # If the relation exists, the `concurrent_batches` config is None, check if the model self references `this`.
414
+ # If the model self references `this` then we assume the model batches _can't_ be run in parallel
415
+ run_in_parallel = not self.node.has_this
416
+
417
+ return run_in_parallel
418
+
419
+ def on_skip(self):
420
+ result = RunResult(
421
+ node=self.node,
422
+ status=RunStatus.Skipped,
423
+ timing=[],
424
+ thread_id=threading.current_thread().name,
425
+ execution_time=0.0,
426
+ message="SKIPPED",
427
+ adapter_response={},
428
+ failures=1,
429
+ batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
430
+ )
431
+ self.print_result_line(result=result)
432
+ return result
433
+
434
+ def error_result(self, node, message, start_time, timing_info):
435
+ """Necessary to return a result with a batch result
436
+
437
+ Called by `BaseRunner.safe_run` when an error occurs
438
+ """
439
+ return self._build_run_result(
440
+ node=node,
441
+ start_time=start_time,
442
+ status=RunStatus.Error,
443
+ timing_info=timing_info,
444
+ message=message,
445
+ batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
446
+ )
447
+
448
+ def compile(self, manifest: Manifest):
449
+ batch = self.batches[self.batch_idx]
450
+
451
+ # LEGACY: Set start/end in context prior to re-compiling (Will be removed for 1.10+)
452
+ # TODO: REMOVE before 1.10 GA
453
+ self.node.config["__dbt_internal_microbatch_event_time_start"] = batch[0]
454
+ self.node.config["__dbt_internal_microbatch_event_time_end"] = batch[1]
455
+ # Create batch context on model node prior to re-compiling
456
+ self.node.batch = BatchContext(
457
+ id=MicrobatchBuilder.batch_id(batch[0], self.node.config.batch_size),
458
+ event_time_start=batch[0],
459
+ event_time_end=batch[1],
460
+ )
461
+ # Recompile node to re-resolve refs with event time filters rendered, update context
462
+ self.compiler.compile_node(
463
+ self.node,
464
+ manifest,
465
+ {},
466
+ split_suffix=MicrobatchBuilder.format_batch_start(
467
+ batch[0], self.node.config.batch_size
468
+ ),
469
+ )
470
+
471
+ return self.node
472
+
473
+ def _build_succesful_run_batch_result(
474
+ self,
475
+ model: ModelNode,
476
+ context: Dict[str, Any],
477
+ batch: BatchType,
478
+ elapsed_time: float = 0.0,
479
+ ) -> RunResult:
480
+ run_result = self._build_run_model_result(model, context, elapsed_time)
481
+ run_result.batch_results = BatchResults(successful=[batch])
482
+ return run_result
483
+
484
+ def _build_failed_run_batch_result(
485
+ self,
486
+ model: ModelNode,
487
+ batch: BatchType,
488
+ elapsed_time: float = 0.0,
489
+ ) -> RunResult:
490
+ return RunResult(
491
+ node=model,
492
+ status=RunStatus.Error,
493
+ timing=[],
494
+ thread_id=threading.current_thread().name,
495
+ execution_time=elapsed_time,
496
+ message="ERROR",
497
+ adapter_response={},
498
+ failures=1,
499
+ batch_results=BatchResults(failed=[batch]),
500
+ )
501
+
502
+ def _execute_microbatch_materialization(
503
+ self,
504
+ model: ModelNode,
505
+ context: Dict[str, Any],
506
+ materialization_macro: MacroProtocol,
507
+ ) -> RunResult:
508
+
509
+ batch = self.batches[self.batch_idx]
510
+ # call materialization_macro to get a batch-level run result
511
+ start_time = time.perf_counter()
512
+ try:
513
+ # Update jinja context with batch context members
514
+ jinja_context = MicrobatchBuilder.build_jinja_context_for_batch(
515
+ model=model,
516
+ incremental_batch=self.incremental_batch,
517
+ )
518
+ context.update(jinja_context)
519
+
520
+ # Materialize batch and cache any materialized relations
521
+ result = MacroGenerator(
522
+ materialization_macro, context, stack=context["context_macro_stack"]
523
+ )()
524
+ for relation in self._materialization_relations(result, model):
525
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
526
+
527
+ # Build result of executed batch
528
+ batch_run_result = self._build_succesful_run_batch_result(
529
+ model, context, batch, time.perf_counter() - start_time
530
+ )
531
+ batch_result = batch_run_result
532
+
533
+ # At least one batch has been inserted successfully!
534
+ # Can proceed incrementally + in parallel
535
+ self.relation_exists = True
536
+
537
+ except (KeyboardInterrupt, SystemExit):
538
+ # reraise it for GraphRunnableTask.execute_nodes to handle
539
+ raise
540
+ except Exception as e:
541
+ fire_event(
542
+ GenericExceptionOnRun(
543
+ unique_id=self.node.unique_id,
544
+ exc=f"Exception on worker thread. {str(e)}",
545
+ node_info=self.node.node_info,
546
+ )
547
+ )
548
+ batch_run_result = self._build_failed_run_batch_result(
549
+ model, batch, time.perf_counter() - start_time
550
+ )
551
+
552
+ batch_result = batch_run_result
553
+
554
+ return batch_result
555
+
556
+ def _execute_model(
557
+ self,
558
+ hook_ctx: Any,
559
+ context_config: Any,
560
+ model: ModelNode,
561
+ context: Dict[str, Any],
562
+ materialization_macro: MacroProtocol,
563
+ ) -> RunResult:
564
+ try:
565
+ batch_result = self._execute_microbatch_materialization(
566
+ model, context, materialization_macro
567
+ )
568
+ finally:
569
+ self.adapter.post_model_hook(context_config, hook_ctx)
570
+
571
+ return batch_result
572
+
573
+
574
+ class MicrobatchModelRunner(ModelRunner):
575
+ """Handles the orchestration of batches to run for a given microbatch model"""
576
+
577
+ def __init__(self, config, adapter, node, node_index: int, num_nodes: int):
578
+ super().__init__(config, adapter, node, node_index, num_nodes)
579
+
580
+ # The parent task is necessary because we need access to the `_submit_batch` and `submit` methods
581
+ self._parent_task: Optional[RunTask] = None
582
+ # The pool is necessary because we need to batches to be executed within the same thread pool
583
+ self._pool: Optional[DbtThreadPool] = None
584
+
585
+ def set_parent_task(self, parent_task: RunTask) -> None:
586
+ self._parent_task = parent_task
587
+
588
+ def set_pool(self, pool: DbtThreadPool) -> None:
589
+ self._pool = pool
590
+
591
+ @property
592
+ def parent_task(self) -> RunTask:
593
+ if self._parent_task is None:
594
+ raise DbtInternalError(
595
+ msg="Tried to access `parent_task` of `MicrobatchModelRunner` before it was set"
596
+ )
597
+
598
+ return self._parent_task
599
+
600
+ @property
601
+ def pool(self) -> DbtThreadPool:
602
+ if self._pool is None:
603
+ raise DbtInternalError(
604
+ msg="Tried to access `pool` of `MicrobatchModelRunner` before it was set"
605
+ )
606
+
607
+ return self._pool
608
+
609
+ def _has_relation(self, model: ModelNode) -> bool:
610
+ """Check whether the relation for the model exists in the data warehouse"""
611
+ relation_info = self.adapter.Relation.create_from(self.config, model)
612
+ relation = self.adapter.get_relation(
613
+ relation_info.database, relation_info.schema, relation_info.name
614
+ )
615
+ return relation is not None
616
+
617
+ def _is_incremental(self, model) -> bool:
618
+ """Check whether the model should be run `incrementally` or as `full refresh`"""
619
+ # TODO: Remove this whole function. This should be a temporary method. We're working with adapters on
620
+ # a strategy to ensure we can access the `is_incremental` logic without drift
621
+ relation_info = self.adapter.Relation.create_from(self.config, model)
622
+ relation = self.adapter.get_relation(
623
+ relation_info.database, relation_info.schema, relation_info.name
624
+ )
625
+ if (
626
+ relation is not None
627
+ and relation.type == "table"
628
+ and model.config.materialized == "incremental"
629
+ ):
630
+ if model.config.full_refresh is not None:
631
+ return not model.config.full_refresh
632
+ else:
633
+ return not getattr(self.config.args, "FULL_REFRESH", False)
634
+ else:
635
+ return False
636
+
637
+ def _initial_run_microbatch_model_result(self, model: ModelNode) -> RunResult:
638
+ return RunResult(
639
+ node=model,
640
+ status=RunStatus.Success,
641
+ timing=[],
642
+ thread_id=threading.current_thread().name,
643
+ # The execution_time here doesn't get propagated to logs because
644
+ # `safe_run_hooks` handles the elapsed time at the node level
645
+ execution_time=0,
646
+ message="",
647
+ adapter_response={},
648
+ failures=0,
649
+ batch_results=BatchResults(),
650
+ )
651
+
652
+ def describe_node(self) -> str:
653
+ return f"{self.node.language} microbatch model {self.get_node_representation()}"
654
+
655
+ def merge_batch_results(self, result: RunResult, batch_results: List[RunResult]):
656
+ """merge batch_results into result"""
657
+ if result.batch_results is None:
658
+ result.batch_results = BatchResults()
659
+
660
+ for batch_result in batch_results:
661
+ if batch_result.batch_results is not None:
662
+ result.batch_results += batch_result.batch_results
663
+ result.execution_time += batch_result.execution_time
664
+
665
+ num_successes = len(result.batch_results.successful)
666
+ num_failures = len(result.batch_results.failed)
667
+ if num_failures == 0:
668
+ status = RunStatus.Success
669
+ msg = "SUCCESS"
670
+ elif num_successes == 0:
671
+ status = RunStatus.Error
672
+ msg = "ERROR"
673
+ else:
674
+ status = RunStatus.PartialSuccess
675
+ msg = f"PARTIAL SUCCESS ({num_successes}/{num_successes + num_failures})"
676
+ result.status = status
677
+ result.message = msg
678
+
679
+ result.batch_results.successful = sorted(result.batch_results.successful)
680
+ result.batch_results.failed = sorted(result.batch_results.failed)
681
+
682
+ # # If retrying, propagate previously successful batches into final result, even thoguh they were not run in this invocation
683
+ if self.node.previous_batch_results is not None:
684
+ result.batch_results.successful += self.node.previous_batch_results.successful
685
+
686
+ def _update_result_with_unfinished_batches(
687
+ self, result: RunResult, batches: Dict[int, BatchType]
688
+ ) -> None:
689
+ """This method is really only to be used when the execution of a microbatch model is halted before all batches have had a chance to run"""
690
+ batches_finished: Set[BatchType] = set()
691
+
692
+ if result.batch_results:
693
+ # build list of finished batches
694
+ batches_finished = batches_finished.union(set(result.batch_results.successful))
695
+ batches_finished = batches_finished.union(set(result.batch_results.failed))
696
+ else:
697
+ # instantiate `batch_results` if it was `None`
698
+ result.batch_results = BatchResults()
699
+
700
+ # skipped batches are any batch that was expected but didn't finish
701
+ batches_expected = {batch for _, batch in batches.items()}
702
+ skipped_batches = batches_expected.difference(batches_finished)
703
+
704
+ result.batch_results.failed.extend(list(skipped_batches))
705
+
706
+ # We call this method, even though we are merging no new results, as it updates
707
+ # the result witht he appropriate status (Success/Partial/Failed)
708
+ self.merge_batch_results(result, [])
709
+
710
+ def get_microbatch_builder(self, model: ModelNode) -> MicrobatchBuilder:
711
+ # Intially set the start/end to values from args
712
+ event_time_start = getattr(self.config.args, "EVENT_TIME_START", None)
713
+ event_time_end = getattr(self.config.args, "EVENT_TIME_END", None)
714
+
715
+ # If we're in sample mode, alter start/end to sample values
716
+ if getattr(self.config.args, "SAMPLE", None) is not None:
717
+ event_time_start = self.config.args.sample.start
718
+ event_time_end = self.config.args.sample.end
719
+
720
+ return MicrobatchBuilder(
721
+ model=model,
722
+ is_incremental=self._is_incremental(model),
723
+ event_time_start=event_time_start,
724
+ event_time_end=event_time_end,
725
+ default_end_time=get_invocation_started_at(),
726
+ )
727
+
728
+ def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
729
+ """Get the batches that should be run for the model"""
730
+
731
+ # Note currently (02/23/2025) model.previous_batch_results is only ever _not_ `None`
732
+ # IFF `dbt retry` is being run and the microbatch model had batches which
733
+ # failed on the run of the model (which is being retried)
734
+ if model.previous_batch_results is None:
735
+ microbatch_builder = self.get_microbatch_builder(model)
736
+ end = microbatch_builder.build_end_time()
737
+ start = microbatch_builder.build_start_time(end)
738
+ batches = microbatch_builder.build_batches(start, end)
739
+ else:
740
+ batches = model.previous_batch_results.failed
741
+
742
+ return {batch_idx: batches[batch_idx] for batch_idx in range(len(batches))}
743
+
744
+ def compile(self, manifest: Manifest):
745
+ """Don't do anything here because this runner doesn't need to compile anything"""
746
+ return self.node
747
+
748
+ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
749
+ # Execution really means orchestration in this case
750
+
751
+ batches = self.get_batches(model=model)
752
+ relation_exists = self._has_relation(model=model)
753
+ result = self._initial_run_microbatch_model_result(model=model)
754
+
755
+ # No batches to run, so return initial result
756
+ if len(batches) == 0:
757
+ return result
758
+
759
+ batch_results: List[RunResult] = []
760
+ batch_idx = 0
761
+
762
+ # Run first batch not in parallel
763
+ relation_exists = self.parent_task._submit_batch(
764
+ node=model,
765
+ adapter=self.adapter,
766
+ relation_exists=relation_exists,
767
+ batches=batches,
768
+ batch_idx=batch_idx,
769
+ batch_results=batch_results,
770
+ pool=self.pool,
771
+ force_sequential_run=True,
772
+ incremental_batch=self._is_incremental(model=model),
773
+ )
774
+ batch_idx += 1
775
+ skip_batches = batch_results[0].status != RunStatus.Success
776
+
777
+ # Run all batches except first and last batch, in parallel if possible
778
+ while batch_idx < len(batches) - 1:
779
+ relation_exists = self.parent_task._submit_batch(
780
+ node=model,
781
+ adapter=self.adapter,
782
+ relation_exists=relation_exists,
783
+ batches=batches,
784
+ batch_idx=batch_idx,
785
+ batch_results=batch_results,
786
+ pool=self.pool,
787
+ skip=skip_batches,
788
+ )
789
+ batch_idx += 1
790
+
791
+ # Wait until all submitted batches have completed
792
+ while len(batch_results) != batch_idx:
793
+ # Check if the pool was closed, because if it was, then the main thread is trying to exit.
794
+ # If the main thread is trying to exit, we need to shutdown. If we _don't_ shutdown, then
795
+ # batches will continue to execute and we'll delay the run from stopping
796
+ if self.pool.is_closed():
797
+ # It's technically possible for more results to come in while we clean up
798
+ # instead we're going to say the didn't finish, regardless of if they finished
799
+ # or not. Thus, lets get a copy of the results as they exist right "now".
800
+ frozen_batch_results = deepcopy(batch_results)
801
+ self.merge_batch_results(result, frozen_batch_results)
802
+ self._update_result_with_unfinished_batches(result, batches)
803
+ return result
804
+
805
+ # breifly sleep so that this thread doesn't go brrrrr while waiting
806
+ time.sleep(0.1)
807
+
808
+ # Only run "last" batch if there is more than one batch
809
+ if len(batches) != 1:
810
+ # Final batch runs once all others complete to ensure post_hook runs at the end
811
+ self.parent_task._submit_batch(
812
+ node=model,
813
+ adapter=self.adapter,
814
+ relation_exists=relation_exists,
815
+ batches=batches,
816
+ batch_idx=batch_idx,
817
+ batch_results=batch_results,
818
+ pool=self.pool,
819
+ force_sequential_run=True,
820
+ skip=skip_batches,
821
+ )
822
+
823
+ # Finalize run: merge results, track model run, and print final result line
824
+ self.merge_batch_results(result, batch_results)
825
+
826
+ return result
827
+
828
+
829
+ class RunTask(CompileTask):
830
+ def __init__(
831
+ self,
832
+ args: Flags,
833
+ config: RuntimeConfig,
834
+ manifest: Manifest,
835
+ batch_map: Optional[Dict[str, BatchResults]] = None,
836
+ ) -> None:
837
+ super().__init__(args, config, manifest)
838
+ self.batch_map = batch_map
839
+
840
+ def raise_on_first_error(self) -> bool:
841
+ return False
842
+
843
+ def get_hook_sql(self, adapter, hook, idx, num_hooks, extra_context) -> str:
844
+ if self.manifest is None:
845
+ raise DbtInternalError("compile_node called before manifest was loaded")
846
+
847
+ compiled = self.compiler.compile_node(hook, self.manifest, extra_context)
848
+ statement = compiled.compiled_code
849
+ hook_index = hook.index or num_hooks
850
+ hook_obj = get_hook(statement, index=hook_index)
851
+ return hook_obj.sql or ""
852
+
853
+ def handle_job_queue(self, pool, callback):
854
+ node = self.job_queue.get()
855
+ self._raise_set_error()
856
+ runner = self.get_runner(node)
857
+ # we finally know what we're running! Make sure we haven't decided
858
+ # to skip it due to upstream failures
859
+ if runner.node.unique_id in self._skipped_children:
860
+ cause = self._skipped_children.pop(runner.node.unique_id)
861
+ runner.do_skip(cause=cause)
862
+
863
+ if isinstance(runner, MicrobatchModelRunner):
864
+ runner.set_parent_task(self)
865
+ runner.set_pool(pool)
866
+
867
+ args = [runner]
868
+ self._submit(pool, args, callback)
869
+
870
+ def _submit_batch(
871
+ self,
872
+ node: ModelNode,
873
+ adapter: BaseAdapter,
874
+ relation_exists: bool,
875
+ batches: Dict[int, BatchType],
876
+ batch_idx: int,
877
+ batch_results: List[RunResult],
878
+ pool: DbtThreadPool,
879
+ force_sequential_run: bool = False,
880
+ skip: bool = False,
881
+ incremental_batch: bool = True,
882
+ ):
883
+ node_copy = deepcopy(node)
884
+ # Only run pre_hook(s) for first batch
885
+ if batch_idx != 0:
886
+ node_copy.config.pre_hook = []
887
+
888
+ # Only run post_hook(s) for last batch
889
+ if batch_idx != len(batches) - 1:
890
+ node_copy.config.post_hook = []
891
+
892
+ # TODO: We should be doing self.get_runner, however doing so
893
+ # currently causes the tracking of how many nodes there are to
894
+ # increment when we don't want it to
895
+ batch_runner = MicrobatchBatchRunner(
896
+ self.config,
897
+ adapter,
898
+ node_copy,
899
+ self.run_count,
900
+ self.num_nodes,
901
+ batch_idx,
902
+ batches,
903
+ relation_exists,
904
+ incremental_batch,
905
+ )
906
+
907
+ if skip:
908
+ batch_runner.do_skip()
909
+
910
+ if not pool.is_closed():
911
+ if not force_sequential_run and batch_runner.should_run_in_parallel():
912
+ fire_event(
913
+ MicrobatchExecutionDebug(
914
+ msg=f"{batch_runner.describe_batch()} is being run concurrently"
915
+ )
916
+ )
917
+ self._submit(pool, [batch_runner], batch_results.append)
918
+ else:
919
+ fire_event(
920
+ MicrobatchExecutionDebug(
921
+ msg=f"{batch_runner.describe_batch()} is being run sequentially"
922
+ )
923
+ )
924
+ batch_results.append(self.call_runner(batch_runner))
925
+ relation_exists = batch_runner.relation_exists
926
+ else:
927
+ batch_results.append(
928
+ batch_runner._build_failed_run_batch_result(node_copy, batches[batch_idx])
929
+ )
930
+
931
+ return relation_exists
932
+
933
+ def _hook_keyfunc(self, hook: HookNode) -> Tuple[str, Optional[int]]:
934
+ package_name = hook.package_name
935
+ if package_name == self.config.project_name:
936
+ package_name = BiggestName("")
937
+ return package_name, hook.index
938
+
939
+ def get_hooks_by_type(self, hook_type: RunHookType) -> List[HookNode]:
940
+
941
+ if self.manifest is None:
942
+ raise DbtInternalError("self.manifest was None in get_hooks_by_type")
943
+
944
+ nodes = self.manifest.nodes.values()
945
+ # find all hooks defined in the manifest (could be multiple projects)
946
+ hooks: List[HookNode] = get_hooks_by_tags(nodes, {hook_type})
947
+ hooks.sort(key=self._hook_keyfunc)
948
+ return hooks
949
+
950
+ def safe_run_hooks(
951
+ self, adapter: BaseAdapter, hook_type: RunHookType, extra_context: Dict[str, Any]
952
+ ) -> RunStatus:
953
+ ordered_hooks = self.get_hooks_by_type(hook_type)
954
+
955
+ if hook_type == RunHookType.End and ordered_hooks:
956
+ fire_event(Formatting(""))
957
+
958
+ # on-run-* hooks should run outside a transaction. This happens because psycopg2 automatically begins a transaction when a connection is created.
959
+ adapter.clear_transaction()
960
+ if not ordered_hooks:
961
+ return RunStatus.Success
962
+
963
+ status = RunStatus.Success
964
+ failed = False
965
+ num_hooks = len(ordered_hooks)
966
+
967
+ for idx, hook in enumerate(ordered_hooks, 1):
968
+ with log_contextvars(node_info=hook.node_info):
969
+ hook.index = idx
970
+ hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}"
971
+ execution_time = 0.0
972
+ timing: List[TimingInfo] = []
973
+ failures = 1
974
+
975
+ if not failed:
976
+ with collect_timing_info("compile", timing.append):
977
+ sql = self.get_hook_sql(
978
+ adapter, hook, hook.index, num_hooks, extra_context
979
+ )
980
+
981
+ started_at = timing[0].started_at or datetime.now(timezone.utc).replace(
982
+ tzinfo=None
983
+ )
984
+ hook.update_event_status(
985
+ started_at=started_at.isoformat(), node_status=RunningStatus.Started
986
+ )
987
+
988
+ fire_event(
989
+ LogHookStartLine(
990
+ statement=hook_name,
991
+ index=hook.index,
992
+ total=num_hooks,
993
+ node_info=hook.node_info,
994
+ )
995
+ )
996
+
997
+ with collect_timing_info("execute", timing.append):
998
+ status, message = get_execution_status(sql, adapter)
999
+
1000
+ finished_at = timing[1].completed_at or datetime.now(timezone.utc).replace(
1001
+ tzinfo=None
1002
+ )
1003
+ hook.update_event_status(finished_at=finished_at.isoformat())
1004
+ execution_time = (finished_at - started_at).total_seconds()
1005
+ failures = 0 if status == RunStatus.Success else 1
1006
+
1007
+ if status == RunStatus.Success:
1008
+ message = f"{hook_name} passed"
1009
+ else:
1010
+ message = f"{hook_name} failed, error:\n {message}"
1011
+ failed = True
1012
+ else:
1013
+ status = RunStatus.Skipped
1014
+ message = f"{hook_name} skipped"
1015
+
1016
+ hook.update_event_status(node_status=status)
1017
+
1018
+ self.node_results.append(
1019
+ RunResult(
1020
+ status=status,
1021
+ thread_id="main",
1022
+ timing=timing,
1023
+ message=message,
1024
+ adapter_response={},
1025
+ execution_time=execution_time,
1026
+ failures=failures,
1027
+ node=hook,
1028
+ )
1029
+ )
1030
+
1031
+ fire_event(
1032
+ LogHookEndLine(
1033
+ statement=hook_name,
1034
+ status=status,
1035
+ index=hook.index,
1036
+ total=num_hooks,
1037
+ execution_time=execution_time,
1038
+ node_info=hook.node_info,
1039
+ )
1040
+ )
1041
+
1042
+ if hook_type == RunHookType.Start and ordered_hooks:
1043
+ fire_event(Formatting(""))
1044
+
1045
+ return status
1046
+
1047
+ def print_results_line(self, results, execution_time) -> None:
1048
+ nodes = [r.node for r in results if hasattr(r, "node")]
1049
+ stat_line = get_counts(nodes)
1050
+
1051
+ execution = ""
1052
+
1053
+ if execution_time is not None:
1054
+ execution = utils.humanize_execution_time(execution_time=execution_time)
1055
+
1056
+ fire_event(Formatting(""))
1057
+ fire_event(
1058
+ FinishedRunningStats(
1059
+ stat_line=stat_line, execution=execution, execution_time=execution_time
1060
+ )
1061
+ )
1062
+
1063
+ def populate_microbatch_batches(self, selected_uids: AbstractSet[str]):
1064
+ if self.batch_map is not None and self.manifest is not None:
1065
+ for uid in selected_uids:
1066
+ if uid in self.batch_map:
1067
+ node = self.manifest.ref_lookup.perform_lookup(uid, self.manifest)
1068
+ if isinstance(node, ModelNode):
1069
+ node.previous_batch_results = self.batch_map[uid]
1070
+
1071
+ def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
1072
+ with adapter.connection_named("master"):
1073
+ self.defer_to_manifest()
1074
+ required_schemas = self.get_model_schemas(adapter, selected_uids)
1075
+ self.create_schemas(adapter, required_schemas)
1076
+ self.populate_adapter_cache(adapter, required_schemas)
1077
+ self.populate_microbatch_batches(selected_uids)
1078
+ group_lookup.init(self.manifest, selected_uids)
1079
+ run_hooks_status = self.safe_run_hooks(adapter, RunHookType.Start, {})
1080
+ return run_hooks_status
1081
+
1082
+ def after_run(self, adapter, results) -> None:
1083
+ # in on-run-end hooks, provide the value 'database_schemas', which is a
1084
+ # list of unique (database, schema) pairs that successfully executed
1085
+ # models were in. For backwards compatibility, include the old
1086
+ # 'schemas', which did not include database information.
1087
+
1088
+ database_schema_set: Set[Tuple[Optional[str], str]] = {
1089
+ (r.node.database, r.node.schema)
1090
+ for r in results
1091
+ if (hasattr(r, "node") and r.node.is_relational)
1092
+ and r.status not in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.Skipped)
1093
+ }
1094
+
1095
+ extras = {
1096
+ "schemas": list({s for _, s in database_schema_set}),
1097
+ "results": [
1098
+ r for r in results if r.thread_id != "main" or r.status == RunStatus.Error
1099
+ ], # exclude that didn't fail to preserve backwards compatibility
1100
+ "database_schemas": list(database_schema_set),
1101
+ }
1102
+
1103
+ try:
1104
+ with adapter.connection_named("master"):
1105
+ self.safe_run_hooks(adapter, RunHookType.End, extras)
1106
+ except (KeyboardInterrupt, SystemExit, DbtRuntimeError):
1107
+ run_result = self.get_result(
1108
+ results=self.node_results,
1109
+ elapsed_time=time.time() - self.started_at,
1110
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
1111
+ )
1112
+
1113
+ if self.args.write_json and hasattr(run_result, "write"):
1114
+ run_result.write(self.result_path())
1115
+ add_artifact_produced(self.result_path())
1116
+
1117
+ print_run_end_messages(self.node_results, keyboard_interrupt=True)
1118
+
1119
+ raise
1120
+
1121
+ def get_node_selector(self) -> ResourceTypeSelector:
1122
+ if self.manifest is None or self.graph is None:
1123
+ raise DbtInternalError("manifest and graph must be set to get perform node selection")
1124
+ return ResourceTypeSelector(
1125
+ graph=self.graph,
1126
+ manifest=self.manifest,
1127
+ previous_state=self.previous_state,
1128
+ resource_types=[NodeType.Model],
1129
+ )
1130
+
1131
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
1132
+ if self.manifest is None:
1133
+ raise DbtInternalError("manifest must be set prior to calling get_runner_type")
1134
+
1135
+ if (
1136
+ node.config.materialized == "incremental"
1137
+ and node.config.incremental_strategy == "microbatch"
1138
+ and self.manifest.use_microbatch_batches(project_name=self.config.project_name)
1139
+ ):
1140
+ return MicrobatchModelRunner
1141
+ else:
1142
+ return ModelRunner
1143
+
1144
+ def task_end_messages(self, results) -> None:
1145
+ if results:
1146
+ print_run_end_messages(results)