dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/task/runnable.py ADDED
@@ -0,0 +1,802 @@
1
+ import os
2
+ import time
3
+ from abc import abstractmethod
4
+ from concurrent.futures import as_completed
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import AbstractSet, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
8
+
9
+ import dvt.exceptions
10
+ import dvt.tracking
11
+ import dvt.utils
12
+ from dvt.artifacts.schemas.results import (
13
+ BaseResult,
14
+ NodeStatus,
15
+ RunningStatus,
16
+ RunStatus,
17
+ )
18
+ from dvt.artifacts.schemas.run import RunExecutionResult, RunResult
19
+ from dvt.cli.flags import Flags
20
+ from dvt.config.runtime import RuntimeConfig
21
+ from dvt.constants import RUN_RESULTS_FILE_NAME
22
+ from dvt.contracts.graph.manifest import Manifest
23
+ from dvt.contracts.graph.nodes import Exposure, ResultNode
24
+ from dvt.contracts.state import PreviousState
25
+ from dvt.events.types import (
26
+ ArtifactWritten,
27
+ ConcurrencyLine,
28
+ DefaultSelector,
29
+ EndRunResult,
30
+ GenericExceptionOnRun,
31
+ LogCancelLine,
32
+ MarkSkippedChildren,
33
+ NodeFinished,
34
+ NodeStart,
35
+ NothingToDo,
36
+ QueryCancelationUnsupported,
37
+ SkippingDetails,
38
+ )
39
+ from dvt.exceptions import DbtInternalError, DbtRuntimeError, FailFastError
40
+ from dvt.flags import get_flags
41
+ from dvt.graph import (
42
+ GraphQueue,
43
+ NodeSelector,
44
+ SelectionSpec,
45
+ UniqueId,
46
+ parse_difference,
47
+ )
48
+ from dvt.graph.thread_pool import DbtThreadPool
49
+ from dvt.parser.manifest import write_manifest
50
+ from dvt.task import group_lookup
51
+ from dvt.task.base import BaseRunner, ConfiguredTask
52
+ from dvt.task.printer import print_run_end_messages, print_run_result_error
53
+ from dvt.utils.artifact_upload import add_artifact_produced
54
+
55
+ import dbt_common.utils.formatting
56
+ from dbt.adapters.base import BaseAdapter, BaseRelation
57
+ from dbt.adapters.factory import get_adapter
58
+ from dbt_common.context import _INVOCATION_CONTEXT_VAR, get_invocation_context
59
+ from dbt_common.dataclass_schema import StrEnum
60
+ from dbt_common.events.contextvars import log_contextvars, task_contextvars
61
+ from dbt_common.events.functions import fire_event, warn_or_error
62
+ from dbt_common.events.types import Formatting
63
+ from dbt_common.exceptions import NotImplementedError
64
+
65
+
66
+ class GraphRunnableMode(StrEnum):
67
+ Topological = "topological"
68
+ Independent = "independent"
69
+
70
+
71
+ def mark_node_as_skipped(
72
+ node: ResultNode, executed_node_ids: Set[str], message: Optional[str]
73
+ ) -> Optional[RunResult]:
74
+ if node.unique_id not in executed_node_ids:
75
+ return RunResult.from_node(node, RunStatus.Skipped, message)
76
+ return None
77
+
78
+
79
+ class GraphRunnableTask(ConfiguredTask):
80
+ MARK_DEPENDENT_ERRORS_STATUSES = [NodeStatus.Error, NodeStatus.PartialSuccess]
81
+
82
+ def __init__(self, args: Flags, config: RuntimeConfig, manifest: Manifest) -> None:
83
+ super().__init__(args, config, manifest)
84
+ self.config = config
85
+ self._flattened_nodes: Optional[List[ResultNode]] = None
86
+ self._raise_next_tick: Optional[DbtRuntimeError] = None
87
+ self._skipped_children: Dict[str, Optional[RunResult]] = {}
88
+ self.job_queue: Optional[GraphQueue] = None
89
+ self.node_results: List[BaseResult] = []
90
+ self.num_nodes: int = 0
91
+ self.previous_state: Optional[PreviousState] = None
92
+ self.previous_defer_state: Optional[PreviousState] = None
93
+ self.run_count: int = 0
94
+ self.started_at: float = 0
95
+
96
+ # DVT: Initialize execution router and multi-adapter manager
97
+ self._init_dvt_execution_layer()
98
+
99
+ if self.args.state:
100
+ self.previous_state = PreviousState(
101
+ state_path=self.args.state,
102
+ target_path=Path(self.config.target_path),
103
+ project_root=Path(self.config.project_root),
104
+ )
105
+
106
+ if self.args.defer_state:
107
+ self.previous_defer_state = PreviousState(
108
+ state_path=self.args.defer_state,
109
+ target_path=Path(self.config.target_path),
110
+ project_root=Path(self.config.project_root),
111
+ )
112
+
113
+ def _init_dvt_execution_layer(self) -> None:
114
+ """
115
+ Initialize DVT execution layer components.
116
+
117
+ This sets up:
118
+ - UnifiedProfileConfig: Load all connection profiles
119
+ - MultiAdapterManager: Manage multiple adapter instances
120
+ - ComputeConfig: Load compute layer configuration
121
+ - ExecutionRouter: Route queries to optimal execution engine
122
+
123
+ These components enable DVT's data virtualization capabilities.
124
+ """
125
+ from multiprocessing import get_context
126
+ from pathlib import Path
127
+
128
+ from dvt.adapters import create_multi_adapter_manager
129
+ from dvt.compute.router import ExecutionRouter
130
+ from dvt.config.compute_config import load_compute_config
131
+ from dvt.config.profiles_v2 import load_unified_profiles
132
+
133
+ # Load unified profiles (sources + targets)
134
+ project_path = Path(self.config.project_root)
135
+ self.unified_profiles = load_unified_profiles(project_path)
136
+
137
+ # Create multi-adapter manager for handling different source connections
138
+ mp_context = get_context("spawn")
139
+ self.multi_adapter_manager = create_multi_adapter_manager(
140
+ self.unified_profiles, mp_context
141
+ )
142
+
143
+ # Load compute layer configuration (DuckDB, Spark settings)
144
+ self.compute_config = load_compute_config(project_path)
145
+
146
+ # Initialize execution router for query routing decisions
147
+ self.execution_router = ExecutionRouter(
148
+ compute_config=self.compute_config,
149
+ multi_adapter_manager=self.multi_adapter_manager,
150
+ manifest=self.manifest,
151
+ )
152
+
153
+ def index_offset(self, value: int) -> int:
154
+ return value
155
+
156
+ @property
157
+ def selection_arg(self):
158
+ return self.args.select
159
+
160
+ @property
161
+ def exclusion_arg(self):
162
+ return self.args.exclude
163
+
164
+ def get_selection_spec(self) -> SelectionSpec:
165
+ default_selector_name = self.config.get_default_selector_name()
166
+ spec: Union[SelectionSpec, bool]
167
+ if hasattr(self.args, "inline") and self.args.inline:
168
+ # We want an empty selection spec.
169
+ spec = parse_difference(None, None)
170
+ elif self.args.selector:
171
+ # use pre-defined selector (--selector)
172
+ spec = self.config.get_selector(self.args.selector)
173
+ elif not (self.selection_arg or self.exclusion_arg) and default_selector_name:
174
+ # use pre-defined selector (--selector) with default: true
175
+ fire_event(DefaultSelector(name=default_selector_name))
176
+ spec = self.config.get_selector(default_selector_name)
177
+ else:
178
+ # This is what's used with no default selector and no selection
179
+ # use --select and --exclude args
180
+ spec = parse_difference(self.selection_arg, self.exclusion_arg)
181
+ # mypy complains because the return values of get_selector and parse_difference
182
+ # are different
183
+ return spec # type: ignore
184
+
185
+ @abstractmethod
186
+ def get_node_selector(self) -> NodeSelector:
187
+ raise NotImplementedError(f"get_node_selector not implemented for task {type(self)}")
188
+
189
+ def defer_to_manifest(self):
190
+ deferred_manifest = self._get_deferred_manifest()
191
+ if deferred_manifest is None:
192
+ return
193
+ if self.manifest is None:
194
+ raise DbtInternalError(
195
+ "Expected to defer to manifest, but there is no runtime manifest to defer from!"
196
+ )
197
+ self.manifest.merge_from_artifact(other=deferred_manifest)
198
+
199
+ def get_graph_queue(self) -> GraphQueue:
200
+ selector = self.get_node_selector()
201
+ # Following uses self.selection_arg and self.exclusion_arg
202
+ spec = self.get_selection_spec()
203
+
204
+ preserve_edges = True
205
+ if self.get_run_mode() == GraphRunnableMode.Independent:
206
+ preserve_edges = False
207
+
208
+ return selector.get_graph_queue(spec, preserve_edges)
209
+
210
+ def get_run_mode(self) -> GraphRunnableMode:
211
+ return GraphRunnableMode.Topological
212
+
213
+ def _runtime_initialize(self):
214
+ self.compile_manifest()
215
+ if self.manifest is None or self.graph is None:
216
+ raise DbtInternalError("_runtime_initialize never loaded the graph!")
217
+
218
+ self.job_queue = self.get_graph_queue()
219
+
220
+ # we use this a couple of times. order does not matter.
221
+ self._flattened_nodes = []
222
+ for uid in self.job_queue.get_selected_nodes():
223
+ if uid in self.manifest.nodes:
224
+ self._flattened_nodes.append(self.manifest.nodes[uid])
225
+ elif uid in self.manifest.sources:
226
+ self._flattened_nodes.append(self.manifest.sources[uid])
227
+ elif uid in self.manifest.saved_queries:
228
+ self._flattened_nodes.append(self.manifest.saved_queries[uid])
229
+ elif uid in self.manifest.unit_tests:
230
+ self._flattened_nodes.append(self.manifest.unit_tests[uid])
231
+ elif uid in self.manifest.exposures:
232
+ self._flattened_nodes.append(self.manifest.exposures[uid])
233
+ elif uid in self.manifest.functions:
234
+ self._flattened_nodes.append(self.manifest.functions[uid])
235
+ else:
236
+ raise DbtInternalError(
237
+ f"Node selection returned {uid}, expected an exposure, a function, a node, a saved query, a source, or a unit test"
238
+ )
239
+
240
+ self.num_nodes = len([n for n in self._flattened_nodes if not n.is_ephemeral_model])
241
+
242
+ def raise_on_first_error(self) -> bool:
243
+ return False
244
+
245
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
246
+ raise NotImplementedError("Not Implemented")
247
+
248
+ def result_path(self) -> str:
249
+ return os.path.join(self.config.project_target_path, RUN_RESULTS_FILE_NAME)
250
+
251
+ def get_runner(self, node) -> BaseRunner:
252
+ adapter = get_adapter(self.config)
253
+ run_count: int = 0
254
+ num_nodes: int = 0
255
+
256
+ if node.is_ephemeral_model:
257
+ run_count = 0
258
+ num_nodes = 0
259
+ else:
260
+ self.run_count += 1
261
+ run_count = self.run_count
262
+ num_nodes = self.num_nodes
263
+
264
+ cls = self.get_runner_type(node)
265
+
266
+ if cls is None:
267
+ raise DbtInternalError("Could not find runner type for node.")
268
+
269
+ return cls(self.config, adapter, node, run_count, num_nodes)
270
+
271
+ def call_runner(self, runner: BaseRunner) -> RunResult:
272
+ with log_contextvars(node_info=runner.node.node_info):
273
+ runner.node.update_event_status(
274
+ started_at=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
275
+ node_status=RunningStatus.Started,
276
+ )
277
+ fire_event(
278
+ NodeStart(
279
+ node_info=runner.node.node_info,
280
+ )
281
+ )
282
+
283
+ result = None
284
+ thread_exception: Optional[Union[KeyboardInterrupt, SystemExit, Exception]] = None
285
+ try:
286
+ result = runner.run_with_hooks(self.manifest)
287
+ except (KeyboardInterrupt, SystemExit) as exe:
288
+ result = None
289
+ thread_exception = exe
290
+ raise
291
+ except Exception as e:
292
+ result = None
293
+ thread_exception = e
294
+ finally:
295
+ if result is not None:
296
+ fire_event(
297
+ NodeFinished(
298
+ node_info=runner.node.node_info,
299
+ run_result=result.to_msg_dict(),
300
+ )
301
+ )
302
+ else:
303
+ msg = f"Exception on worker thread. {thread_exception}"
304
+
305
+ fire_event(
306
+ GenericExceptionOnRun(
307
+ unique_id=runner.node.unique_id,
308
+ exc=str(thread_exception),
309
+ node_info=runner.node.node_info,
310
+ )
311
+ )
312
+
313
+ result = RunResult(
314
+ status=RunStatus.Error, # type: ignore
315
+ timing=[],
316
+ thread_id="",
317
+ execution_time=0.0,
318
+ adapter_response={},
319
+ message=msg,
320
+ failures=None,
321
+ batch_results=None,
322
+ node=runner.node,
323
+ )
324
+
325
+ # `_event_status` dict is only used for logging. Make sure
326
+ # it gets deleted when we're done with it
327
+ runner.node.clear_event_status()
328
+
329
+ fail_fast = get_flags().FAIL_FAST
330
+
331
+ if (
332
+ result.status in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.PartialSuccess)
333
+ and fail_fast
334
+ ):
335
+ self._raise_next_tick = FailFastError(
336
+ msg="Failing early due to test failure or runtime error",
337
+ result=result,
338
+ node=getattr(result, "node", None),
339
+ )
340
+ elif result.status == NodeStatus.Error and self.raise_on_first_error():
341
+ # if we raise inside a thread, it'll just get silently swallowed.
342
+ # stash the error message we want here, and it will check the
343
+ # next 'tick' - should be soon since our thread is about to finish!
344
+ self._raise_next_tick = DbtRuntimeError(result.message)
345
+
346
+ return result
347
+
348
+ def _submit(self, pool, args, callback):
349
+ """If the caller has passed the magic 'single-threaded' flag, call the
350
+ function directly instead of pool.apply_async. The single-threaded flag
351
+ is intended for gathering more useful performance information about
352
+ what happens beneath `call_runner`, since python's default profiling
353
+ tools ignore child threads.
354
+
355
+ This does still go through the callback path for result collection.
356
+ """
357
+ if self.config.args.single_threaded:
358
+ callback(self.call_runner(*args))
359
+ else:
360
+ pool.apply_async(self.call_runner, args=args, callback=callback)
361
+
362
+ def _raise_set_error(self):
363
+ if self._raise_next_tick is not None:
364
+ raise self._raise_next_tick
365
+
366
+ def run_queue(self, pool):
367
+ """Given a pool, submit jobs from the queue to the pool."""
368
+ if self.job_queue is None:
369
+ raise DbtInternalError("Got to run_queue with no job queue set")
370
+
371
+ def callback(result):
372
+ """Note: mark_done, at a minimum, must happen here or dbt will
373
+ deadlock during ephemeral result error handling!
374
+ """
375
+ self._handle_result(result)
376
+
377
+ if self.job_queue is None:
378
+ raise DbtInternalError("Got to run_queue callback with no job queue set")
379
+ self.job_queue.mark_done(result.node.unique_id)
380
+
381
+ while not self.job_queue.empty():
382
+ self.handle_job_queue(pool, callback)
383
+
384
+ # block on completion
385
+ if get_flags().FAIL_FAST:
386
+ # checkout for an errors after task completion in case of
387
+ # fast failure
388
+ while self.job_queue.wait_until_something_was_done():
389
+ self._raise_set_error()
390
+ else:
391
+ # wait until every task will be complete
392
+ self.job_queue.join()
393
+
394
+ # if an error got set during join(), raise it.
395
+ self._raise_set_error()
396
+
397
+ return
398
+
399
+ # The build command overrides this
400
+ def handle_job_queue(self, pool, callback):
401
+ node = self.job_queue.get()
402
+ self._raise_set_error()
403
+ runner = self.get_runner(node)
404
+ # we finally know what we're running! Make sure we haven't decided
405
+ # to skip it due to upstream failures
406
+ if runner.node.unique_id in self._skipped_children:
407
+ cause = self._skipped_children.pop(runner.node.unique_id)
408
+ runner.do_skip(cause=cause)
409
+ args = [runner]
410
+ self._submit(pool, args, callback)
411
+
412
+ def _handle_result(self, result: RunResult) -> None:
413
+ """Mark the result as completed, insert the `CompileResultNode` into
414
+ the manifest, and mark any descendants (potentially with a 'cause' if
415
+ the result was an ephemeral model) as skipped.
416
+ """
417
+ is_ephemeral = result.node.is_ephemeral_model
418
+ if not is_ephemeral:
419
+ self.node_results.append(result)
420
+
421
+ node = result.node
422
+
423
+ if self.manifest is None:
424
+ raise DbtInternalError("manifest was None in _handle_result")
425
+
426
+ # If result.status == NodeStatus.Error, plus Fail for build command
427
+ if result.status in self.MARK_DEPENDENT_ERRORS_STATUSES:
428
+ if is_ephemeral:
429
+ cause = result
430
+ else:
431
+ cause = None
432
+ self._mark_dependent_errors(node.unique_id, result, cause)
433
+
434
+ def _cancel_connections(self, pool):
435
+ """Given a pool, cancel all adapter connections and wait until all
436
+ runners gentle terminates.
437
+ """
438
+ pool.close()
439
+ pool.terminate()
440
+
441
+ adapter = get_adapter(self.config)
442
+
443
+ if not adapter.is_cancelable():
444
+ fire_event(QueryCancelationUnsupported(type=adapter.type()))
445
+ else:
446
+ with adapter.connection_named("master"):
447
+ for conn_name in adapter.cancel_open_connections():
448
+ if self.manifest is not None:
449
+ node = self.manifest.nodes.get(conn_name)
450
+ if node is not None and node.is_ephemeral_model:
451
+ continue
452
+ # if we don't have a manifest/don't have a node, print
453
+ # anyway.
454
+ fire_event(LogCancelLine(conn_name=conn_name))
455
+
456
+ pool.join()
457
+
458
+ def execute_nodes(self):
459
+ num_threads = self.config.threads
460
+
461
+ pool = DbtThreadPool(
462
+ num_threads, self._pool_thread_initializer, [get_invocation_context()]
463
+ )
464
+ try:
465
+ self.run_queue(pool)
466
+ except FailFastError as failure:
467
+ self._cancel_connections(pool)
468
+
469
+ executed_node_ids = {r.node.unique_id for r in self.node_results}
470
+ message = "Skipping due to fail_fast"
471
+
472
+ for node in self._flattened_nodes:
473
+ if node.unique_id not in executed_node_ids:
474
+ self.node_results.append(
475
+ mark_node_as_skipped(node, executed_node_ids, message)
476
+ )
477
+
478
+ print_run_result_error(failure.result)
479
+ # ensure information about all nodes is propagated to run results when failing fast
480
+ return self.node_results
481
+ except (KeyboardInterrupt, SystemExit):
482
+ run_result = self.get_result(
483
+ results=self.node_results,
484
+ elapsed_time=time.time() - self.started_at,
485
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
486
+ )
487
+
488
+ if self.args.write_json and hasattr(run_result, "write"):
489
+ run_result.write(self.result_path())
490
+ add_artifact_produced(self.result_path())
491
+ fire_event(
492
+ ArtifactWritten(
493
+ artifact_type=run_result.__class__.__name__,
494
+ artifact_path=self.result_path(),
495
+ )
496
+ )
497
+
498
+ self._cancel_connections(pool)
499
+ print_run_end_messages(self.node_results, keyboard_interrupt=True)
500
+
501
+ raise
502
+
503
+ pool.close()
504
+ pool.join()
505
+
506
+ return self.node_results
507
+
508
+ @staticmethod
509
+ def _pool_thread_initializer(invocation_context):
510
+ _INVOCATION_CONTEXT_VAR.set(invocation_context)
511
+
512
+ def _mark_dependent_errors(
513
+ self, node_id: str, result: RunResult, cause: Optional[RunResult]
514
+ ) -> None:
515
+ if self.graph is None:
516
+ raise DbtInternalError("graph is None in _mark_dependent_errors")
517
+ fire_event(
518
+ MarkSkippedChildren(
519
+ unique_id=node_id,
520
+ status=result.status,
521
+ run_result=result.to_msg_dict(),
522
+ )
523
+ )
524
+ for dep_node_id in self.graph.get_dependent_nodes(UniqueId(node_id)):
525
+ self._skipped_children[dep_node_id] = cause
526
+
527
+ def populate_adapter_cache(
528
+ self, adapter, required_schemas: Optional[Set[BaseRelation]] = None
529
+ ):
530
+ if not self.args.populate_cache:
531
+ return
532
+
533
+ if self.manifest is None:
534
+ raise DbtInternalError("manifest was None in populate_adapter_cache")
535
+
536
+ start_populate_cache = time.perf_counter()
537
+ # the cache only cares about executable nodes
538
+ cachable_nodes = [
539
+ node
540
+ for node in self.manifest.nodes.values()
541
+ if (node.is_relational and not node.is_ephemeral_model and not node.is_external_node)
542
+ ]
543
+
544
+ if get_flags().CACHE_SELECTED_ONLY is True:
545
+ adapter.set_relations_cache(cachable_nodes, required_schemas=required_schemas)
546
+ else:
547
+ adapter.set_relations_cache(cachable_nodes)
548
+ cache_populate_time = time.perf_counter() - start_populate_cache
549
+ if dbt.tracking.active_user is not None:
550
+ dbt.tracking.track_runnable_timing(
551
+ {"adapter_cache_construction_elapsed": cache_populate_time}
552
+ )
553
+
554
+ def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
555
+ with adapter.connection_named("master"):
556
+ self.defer_to_manifest()
557
+ self.populate_adapter_cache(adapter)
558
+ return RunStatus.Success
559
+
560
+ def after_run(self, adapter, results) -> None:
561
+ pass
562
+
563
+ def print_results_line(self, node_results, elapsed):
564
+ pass
565
+
566
+ def execute_with_hooks(self, selected_uids: AbstractSet[str]):
567
+ adapter = get_adapter(self.config)
568
+
569
+ fire_event(Formatting(""))
570
+ fire_event(
571
+ ConcurrencyLine(
572
+ num_threads=self.config.threads,
573
+ target_name=self.config.target_name,
574
+ node_count=self.num_nodes,
575
+ )
576
+ )
577
+ fire_event(Formatting(""))
578
+
579
+ self.started_at = time.time()
580
+ try:
581
+ before_run_status = self.before_run(adapter, selected_uids)
582
+ if before_run_status == RunStatus.Success or (
583
+ not get_flags().skip_nodes_if_on_run_start_fails
584
+ ):
585
+ res = self.execute_nodes()
586
+ else:
587
+ executed_node_ids = {
588
+ r.node.unique_id for r in self.node_results if hasattr(r, "node")
589
+ }
590
+
591
+ res = []
592
+
593
+ for index, node in enumerate(self._flattened_nodes or []):
594
+ group = group_lookup.get(node.unique_id)
595
+
596
+ if node.unique_id not in executed_node_ids:
597
+ fire_event(
598
+ SkippingDetails(
599
+ resource_type=node.resource_type,
600
+ schema=node.schema,
601
+ node_name=node.name,
602
+ index=index + 1,
603
+ total=self.num_nodes,
604
+ node_info=node.node_info,
605
+ group=group,
606
+ )
607
+ )
608
+ skipped_node_result = mark_node_as_skipped(node, executed_node_ids, None)
609
+ if skipped_node_result:
610
+ self.node_results.append(skipped_node_result)
611
+
612
+ self.after_run(adapter, res)
613
+ finally:
614
+ adapter.cleanup_connections()
615
+ elapsed = time.time() - self.started_at
616
+ self.print_results_line(self.node_results, elapsed)
617
+ result = self.get_result(
618
+ results=self.node_results,
619
+ elapsed_time=elapsed,
620
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
621
+ )
622
+
623
+ return result
624
+
625
+ def run(self):
626
+ """
627
+ Run dbt for the query, based on the graph.
628
+ """
629
+ # We set up a context manager here with "task_contextvars" because we
630
+ # need the project_root in runtime_initialize.
631
+ with task_contextvars(project_root=self.config.project_root):
632
+ self._runtime_initialize()
633
+
634
+ if self._flattened_nodes is None:
635
+ raise DbtInternalError(
636
+ "after _runtime_initialize, _flattened_nodes was still None"
637
+ )
638
+
639
+ if len(self._flattened_nodes) == 0:
640
+ warn_or_error(NothingToDo())
641
+ result = self.get_result(
642
+ results=[],
643
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
644
+ elapsed_time=0.0,
645
+ )
646
+ else:
647
+ selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
648
+ result = self.execute_with_hooks(selected_uids)
649
+
650
+ # We have other result types here too, including FreshnessResult
651
+ if isinstance(result, RunExecutionResult):
652
+ result_msgs = [result.to_msg_dict() for result in result.results]
653
+ fire_event(
654
+ EndRunResult(
655
+ results=result_msgs,
656
+ generated_at=result.generated_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
657
+ elapsed_time=result.elapsed_time,
658
+ success=GraphRunnableTask.interpret_results(result.results),
659
+ )
660
+ )
661
+
662
+ if self.args.write_json:
663
+ write_manifest(self.manifest, self.config.project_target_path)
664
+ if hasattr(result, "write"):
665
+ result.write(self.result_path())
666
+ add_artifact_produced(self.result_path())
667
+ fire_event(
668
+ ArtifactWritten(
669
+ artifact_type=result.__class__.__name__, artifact_path=self.result_path()
670
+ )
671
+ )
672
+
673
+ self.task_end_messages(result.results)
674
+ return result
675
+
676
+ @classmethod
677
+ def interpret_results(cls, results):
678
+ if results is None:
679
+ return False
680
+
681
+ num_runtime_errors = len([r for r in results if r.status == NodeStatus.RuntimeErr])
682
+ num_errors = len([r for r in results if r.status == NodeStatus.Error])
683
+ num_fails = len([r for r in results if r.status == NodeStatus.Fail])
684
+ num_skipped = len(
685
+ [
686
+ r
687
+ for r in results
688
+ if r.status == NodeStatus.Skipped and not isinstance(r.node, Exposure)
689
+ ]
690
+ )
691
+ num_partial_success = len([r for r in results if r.status == NodeStatus.PartialSuccess])
692
+ num_total = num_runtime_errors + num_errors + num_fails + num_skipped + num_partial_success
693
+ return num_total == 0
694
+
695
+ def get_model_schemas(self, adapter, selected_uids: Iterable[str]) -> Set[BaseRelation]:
696
+ if self.manifest is None:
697
+ raise DbtInternalError("manifest was None in get_model_schemas")
698
+ result: Set[BaseRelation] = set()
699
+
700
+ for node in self.manifest.nodes.values():
701
+ if node.unique_id not in selected_uids:
702
+ continue
703
+ if node.is_relational and not node.is_ephemeral:
704
+ relation = adapter.Relation.create_from(self.config, node)
705
+ result.add(relation.without_identifier())
706
+
707
+ return result
708
+
709
+ def create_schemas(self, adapter, required_schemas: Set[BaseRelation]):
710
+ # we want the string form of the information schema database
711
+ required_databases: Set[BaseRelation] = set()
712
+ for required in required_schemas:
713
+ db_only = required.include(database=True, schema=False, identifier=False)
714
+ required_databases.add(db_only)
715
+
716
+ existing_schemas_lowered: Set[Tuple[Optional[str], Optional[str]]]
717
+ existing_schemas_lowered = set()
718
+
719
+ def list_schemas(db_only: BaseRelation) -> List[Tuple[Optional[str], str]]:
720
+ # the database can be None on some warehouses that don't support it
721
+ database_quoted: Optional[str]
722
+ db_lowercase = dbt_common.utils.formatting.lowercase(db_only.database)
723
+ if db_only.database is None:
724
+ database_quoted = None
725
+ else:
726
+ database_quoted = str(db_only)
727
+
728
+ # we should never create a null schema, so just filter them out
729
+ return [
730
+ (db_lowercase, s.lower())
731
+ for s in adapter.list_schemas(database_quoted)
732
+ if s is not None
733
+ ]
734
+
735
+ def create_schema(relation: BaseRelation) -> None:
736
+ db = relation.database or ""
737
+ schema = relation.schema
738
+ with adapter.connection_named(f"create_{db}_{schema}"):
739
+ adapter.create_schema(relation)
740
+
741
+ list_futures = []
742
+ create_futures = []
743
+
744
+ # TODO: following has a mypy issue because profile and project config
745
+ # defines threads as int and HasThreadingConfig defines it as Optional[int]
746
+ with dbt_common.utils.executor(self.config) as tpe: # type: ignore
747
+ for req in required_databases:
748
+ if req.database is None:
749
+ name = "list_schemas"
750
+ else:
751
+ name = f"list_{req.database}"
752
+ fut = tpe.submit_connected(adapter, name, list_schemas, req)
753
+ list_futures.append(fut)
754
+
755
+ for ls_future in as_completed(list_futures):
756
+ existing_schemas_lowered.update(ls_future.result())
757
+
758
+ for info in required_schemas:
759
+ if info.schema is None:
760
+ # we are not in the business of creating null schemas, so
761
+ # skip this
762
+ continue
763
+ db: Optional[str] = info.database
764
+ db_lower: Optional[str] = dbt_common.utils.formatting.lowercase(db)
765
+ schema: str = info.schema
766
+
767
+ db_schema = (db_lower, schema.lower())
768
+ if db_schema not in existing_schemas_lowered:
769
+ existing_schemas_lowered.add(db_schema)
770
+ fut = tpe.submit_connected(
771
+ adapter, f'create_{info.database or ""}_{info.schema}', create_schema, info
772
+ )
773
+ create_futures.append(fut)
774
+
775
+ for create_future in as_completed(create_futures):
776
+ # trigger/re-raise any exceptions while creating schemas
777
+ create_future.result()
778
+
779
+ def get_result(self, results, elapsed_time, generated_at):
780
+ return RunExecutionResult(
781
+ results=results,
782
+ elapsed_time=elapsed_time,
783
+ generated_at=generated_at,
784
+ args=dbt.utils.args_to_dict(self.args),
785
+ )
786
+
787
+ def task_end_messages(self, results) -> None:
788
+ print_run_end_messages(results)
789
+
790
+ def _get_previous_state(self) -> Optional[Manifest]:
791
+ state = self.previous_defer_state or self.previous_state
792
+ if not state:
793
+ raise DbtRuntimeError(
794
+ "--state or --defer-state are required for deferral, but neither was provided"
795
+ )
796
+
797
+ if not state.manifest:
798
+ raise DbtRuntimeError(f'Could not find manifest in --state path: "{state.state_path}"')
799
+ return state.manifest
800
+
801
+ def _get_deferred_manifest(self) -> Optional[Manifest]:
802
+ return self._get_previous_state() if self.args.defer else None