dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/task/base.py ADDED
@@ -0,0 +1,504 @@
1
+ import os
2
+ import threading
3
+ import time
4
+ import traceback
5
+ from abc import ABCMeta, abstractmethod
6
+ from contextlib import nullcontext
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional, Set
10
+
11
+ import dvt.exceptions
12
+ from dvt import tracking
13
+ from dvt.artifacts.resources.types import NodeType
14
+ from dvt.artifacts.schemas.results import (
15
+ NodeStatus,
16
+ RunningStatus,
17
+ RunStatus,
18
+ TimingInfo,
19
+ collect_timing_info,
20
+ )
21
+ from dvt.artifacts.schemas.run import RunResult
22
+ from dvt.cli.flags import Flags
23
+ from dvt.compilation import Compiler
24
+ from dvt.config import RuntimeConfig
25
+ from dvt.config.profile import read_profile
26
+ from dvt.constants import DBT_PROJECT_FILE_NAME
27
+ from dvt.contracts.graph.manifest import Manifest
28
+ from dvt.events.types import (
29
+ CatchableExceptionOnRun,
30
+ GenericExceptionOnRun,
31
+ InternalErrorOnRun,
32
+ LogDbtProfileError,
33
+ LogDbtProjectError,
34
+ LogDebugStackTrace,
35
+ LogSkipBecauseError,
36
+ NodeCompiling,
37
+ NodeConnectionReleaseError,
38
+ NodeExecuting,
39
+ SkippingDetails,
40
+ )
41
+ from dvt.flags import get_flags
42
+ from dvt.graph import Graph
43
+ from dvt.task import group_lookup
44
+ from dvt.task.printer import print_run_result_error
45
+
46
+ import dbt_common.exceptions.base
47
+ from dbt_common.events.contextvars import get_node_info
48
+ from dbt_common.events.functions import fire_event
49
+ from dbt_common.exceptions import DbtInternalError, DbtRuntimeError, NotImplementedError
50
+
51
+
52
+ def read_profiles(profiles_dir: Optional[str] = None) -> Dict[str, Any]:
53
+ """This is only used for some error handling"""
54
+ if profiles_dir is None:
55
+ profiles_dir = get_flags().PROFILES_DIR
56
+
57
+ raw_profiles = read_profile(profiles_dir)
58
+
59
+ if raw_profiles is None:
60
+ profiles = {}
61
+ else:
62
+ profiles = {k: v for (k, v) in raw_profiles.items() if k != "config"}
63
+
64
+ return profiles
65
+
66
+
67
+ class BaseTask(metaclass=ABCMeta):
68
+ def __init__(self, args: Flags) -> None:
69
+ self.args = args
70
+
71
+ def __enter__(self):
72
+ self.orig_dir = os.getcwd()
73
+ return self
74
+
75
+ def __exit__(self, exc_type, exc_value, traceback):
76
+ os.chdir(self.orig_dir)
77
+
78
+ @abstractmethod
79
+ def run(self):
80
+ raise dbt_common.exceptions.base.NotImplementedError("Not Implemented")
81
+
82
+ def interpret_results(self, results):
83
+ return True
84
+
85
+
86
+ def get_nearest_project_dir(project_dir: Optional[str]) -> Path:
87
+ # If the user provides an explicit project directory, use that
88
+ # but don't look at parent directories.
89
+ if project_dir:
90
+ cur_dir = Path(project_dir)
91
+ project_file = Path(project_dir) / DBT_PROJECT_FILE_NAME
92
+ if project_file.is_file():
93
+ return cur_dir
94
+ else:
95
+ raise dbt_common.exceptions.DbtRuntimeError(
96
+ "fatal: Invalid --project-dir flag. Not a dbt project. "
97
+ "Missing dbt_project.yml file"
98
+ )
99
+
100
+ cur_dir = Path.cwd()
101
+ project_file = cur_dir / DBT_PROJECT_FILE_NAME
102
+ if project_file.is_file():
103
+ return cur_dir
104
+ else:
105
+ raise dbt_common.exceptions.DbtRuntimeError(
106
+ "fatal: Not a dbt project (or any of the parent directories). "
107
+ "Missing dbt_project.yml file"
108
+ )
109
+
110
+
111
+ def move_to_nearest_project_dir(project_dir: Optional[str]) -> Path:
112
+ nearest_project_dir = get_nearest_project_dir(project_dir)
113
+ os.chdir(nearest_project_dir)
114
+ return nearest_project_dir
115
+
116
+
117
+ # TODO: look into deprecating this class in favor of several small functions that
118
+ # produce the same behavior. currently this class only contains manifest compilation,
119
+ # holding a manifest, and moving direcories.
120
+ class ConfiguredTask(BaseTask):
121
+ def __init__(
122
+ self, args: Flags, config: RuntimeConfig, manifest: Optional[Manifest] = None
123
+ ) -> None:
124
+ super().__init__(args)
125
+ self.config = config
126
+ self.graph: Optional[Graph] = None
127
+ self.manifest = manifest
128
+ self.compiler = Compiler(self.config)
129
+
130
+ def compile_manifest(self) -> None:
131
+ if self.manifest is None:
132
+ raise DbtInternalError("compile_manifest called before manifest was loaded")
133
+
134
+ start_compile_manifest = time.perf_counter()
135
+
136
+ self.graph = self.compiler.compile(self.manifest)
137
+
138
+ compile_time = time.perf_counter() - start_compile_manifest
139
+ if dbt.tracking.active_user is not None:
140
+ dbt.tracking.track_runnable_timing({"graph_compilation_elapsed": compile_time})
141
+
142
+ @classmethod
143
+ def from_args(cls, args: Flags, *pargs, **kwargs):
144
+ move_to_nearest_project_dir(args.project_dir)
145
+ try:
146
+ # This is usually RuntimeConfig
147
+ config = RuntimeConfig.from_args(args)
148
+ except dbt.exceptions.DbtProjectError as exc:
149
+ fire_event(LogDbtProjectError(exc=str(exc)))
150
+
151
+ tracking.track_invalid_invocation(args=args, result_type=exc.result_type)
152
+ raise dbt_common.exceptions.DbtRuntimeError("Could not run dbt") from exc
153
+ except dbt.exceptions.DbtProfileError as exc:
154
+ all_profile_names = list(read_profiles(get_flags().PROFILES_DIR).keys())
155
+ fire_event(LogDbtProfileError(exc=str(exc), profiles=all_profile_names))
156
+ tracking.track_invalid_invocation(args=args, result_type=exc.result_type)
157
+ raise dbt_common.exceptions.DbtRuntimeError("Could not run dbt") from exc
158
+ return cls(args, config, *pargs, **kwargs)
159
+
160
+
161
+ class ExecutionContext:
162
+ """During execution and error handling, dbt makes use of mutable state:
163
+ timing information and the newest (compiled vs executed) form of the node.
164
+ """
165
+
166
+ def __init__(self, node) -> None:
167
+ self.timing: List[TimingInfo] = []
168
+ self.node = node
169
+
170
+
171
+ class BaseRunner(metaclass=ABCMeta):
172
+ def __init__(self, config, adapter, node, node_index: int, num_nodes: int) -> None:
173
+ self.config = config
174
+ self.compiler = Compiler(config)
175
+ self.adapter = adapter
176
+ self.node = node
177
+ self.node_index = node_index
178
+ self.num_nodes = num_nodes
179
+
180
+ self.skip = False
181
+ self.skip_cause: Optional[RunResult] = None
182
+
183
+ self.run_ephemeral_models = False
184
+
185
+ @abstractmethod
186
+ def compile(self, manifest: Manifest) -> Any:
187
+ pass
188
+
189
+ def _node_build_path(self) -> Optional[str]:
190
+ return self.node.build_path if hasattr(self.node, "build_path") else None
191
+
192
+ def get_result_status(self, result) -> Dict[str, str]:
193
+ if result.status == NodeStatus.Error:
194
+ return {"node_status": "error", "node_error": str(result.message)}
195
+ elif result.status == NodeStatus.Skipped:
196
+ return {"node_status": "skipped"}
197
+ elif result.status == NodeStatus.Fail:
198
+ return {"node_status": "failed"}
199
+ elif result.status == NodeStatus.Warn:
200
+ return {"node_status": "warn"}
201
+ else:
202
+ return {"node_status": "passed"}
203
+
204
+ def run_with_hooks(self, manifest):
205
+ if self.skip:
206
+ return self.on_skip()
207
+
208
+ # no before/after printing for ephemeral mdoels
209
+ if not self.node.is_ephemeral_model:
210
+ self.before_execute()
211
+
212
+ result = self.safe_run(manifest)
213
+ self.node.update_event_status(
214
+ node_status=result.status,
215
+ finished_at=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
216
+ )
217
+
218
+ if not self.node.is_ephemeral_model:
219
+ self.after_execute(result)
220
+
221
+ return result
222
+
223
+ def _build_run_result(
224
+ self,
225
+ node,
226
+ start_time,
227
+ status,
228
+ timing_info,
229
+ message,
230
+ agate_table=None,
231
+ adapter_response=None,
232
+ failures=None,
233
+ batch_results=None,
234
+ ):
235
+ execution_time = time.time() - start_time
236
+ thread_id = threading.current_thread().name
237
+ if adapter_response is None:
238
+ adapter_response = {}
239
+ return RunResult(
240
+ status=status,
241
+ thread_id=thread_id,
242
+ execution_time=execution_time,
243
+ timing=timing_info,
244
+ message=message,
245
+ node=node,
246
+ agate_table=agate_table,
247
+ adapter_response=adapter_response,
248
+ failures=failures,
249
+ batch_results=batch_results,
250
+ )
251
+
252
+ def error_result(self, node, message, start_time, timing_info):
253
+ return self._build_run_result(
254
+ node=node,
255
+ start_time=start_time,
256
+ status=RunStatus.Error,
257
+ timing_info=timing_info,
258
+ message=message,
259
+ )
260
+
261
+ def ephemeral_result(self, node, start_time, timing_info):
262
+ return self._build_run_result(
263
+ node=node,
264
+ start_time=start_time,
265
+ status=RunStatus.Success,
266
+ timing_info=timing_info,
267
+ message=None,
268
+ )
269
+
270
+ def from_run_result(self, result, start_time, timing_info):
271
+ return self._build_run_result(
272
+ node=result.node,
273
+ start_time=start_time,
274
+ status=result.status,
275
+ timing_info=timing_info,
276
+ message=result.message,
277
+ agate_table=result.agate_table,
278
+ adapter_response=result.adapter_response,
279
+ failures=result.failures,
280
+ batch_results=result.batch_results,
281
+ )
282
+
283
+ def compile_and_execute(self, manifest: Manifest, ctx: ExecutionContext):
284
+ result = None
285
+ with (
286
+ self.adapter.connection_named(self.node.unique_id, self.node)
287
+ if get_flags().INTROSPECT
288
+ else nullcontext()
289
+ ):
290
+ ctx.node.update_event_status(node_status=RunningStatus.Compiling)
291
+ fire_event(
292
+ NodeCompiling(
293
+ node_info=ctx.node.node_info,
294
+ )
295
+ )
296
+ with collect_timing_info("compile", ctx.timing.append):
297
+ # if we fail here, we still have a compiled node to return
298
+ # this has the benefit of showing a build path for the errant
299
+ # model. This calls the 'compile' method in CompileTask
300
+ ctx.node = self.compile(manifest)
301
+
302
+ # for ephemeral nodes, we only want to compile, not run
303
+ if not ctx.node.is_ephemeral_model or self.run_ephemeral_models:
304
+ ctx.node.update_event_status(node_status=RunningStatus.Executing)
305
+ fire_event(
306
+ NodeExecuting(
307
+ node_info=ctx.node.node_info,
308
+ )
309
+ )
310
+ with collect_timing_info("execute", ctx.timing.append):
311
+ result = self.run(ctx.node, manifest)
312
+ ctx.node = result.node
313
+
314
+ return result
315
+
316
+ def _handle_catchable_exception(self, e: DbtRuntimeError, ctx: ExecutionContext) -> str:
317
+ if e.node is None:
318
+ e.add_node(ctx.node)
319
+
320
+ fire_event(
321
+ CatchableExceptionOnRun(
322
+ exc=str(e), exc_info=traceback.format_exc(), node_info=get_node_info()
323
+ )
324
+ )
325
+ return str(e)
326
+
327
+ def _handle_internal_exception(self, e: DbtInternalError, ctx: ExecutionContext) -> str:
328
+ fire_event(
329
+ InternalErrorOnRun(
330
+ build_path=self._node_build_path(), exc=str(e), node_info=get_node_info()
331
+ )
332
+ )
333
+ return str(e)
334
+
335
+ def _handle_generic_exception(self, e: Exception, ctx: ExecutionContext) -> str:
336
+ fire_event(
337
+ GenericExceptionOnRun(
338
+ build_path=self._node_build_path(),
339
+ unique_id=self.node.unique_id,
340
+ exc=str(e),
341
+ node_info=get_node_info(),
342
+ )
343
+ )
344
+ fire_event(LogDebugStackTrace(exc_info=traceback.format_exc()))
345
+
346
+ return str(e)
347
+
348
+ def handle_exception(self, e: Exception, ctx: ExecutionContext) -> str:
349
+ if isinstance(e, DbtRuntimeError):
350
+ error = self._handle_catchable_exception(e, ctx)
351
+ elif isinstance(e, DbtInternalError):
352
+ error = self._handle_internal_exception(e, ctx)
353
+ else:
354
+ error = self._handle_generic_exception(e, ctx)
355
+ return error
356
+
357
+ def safe_run(self, manifest: Manifest):
358
+ started = time.time()
359
+ ctx = ExecutionContext(self.node)
360
+ error = None
361
+ result = None
362
+
363
+ try:
364
+ result = self.compile_and_execute(manifest, ctx)
365
+ except Exception as e:
366
+ error = self.handle_exception(e, ctx)
367
+ finally:
368
+ exc_str = self._safe_release_connection()
369
+
370
+ # if releasing failed and the result doesn't have an error yet, set
371
+ # an error
372
+ if (
373
+ exc_str is not None
374
+ and result is not None
375
+ and result.status != NodeStatus.Error
376
+ and error is None
377
+ ):
378
+ error = exc_str
379
+
380
+ if error is not None:
381
+ result = self.error_result(ctx.node, error, started, ctx.timing)
382
+ elif result is not None:
383
+ result = self.from_run_result(result, started, ctx.timing)
384
+ else:
385
+ result = self.ephemeral_result(ctx.node, started, ctx.timing)
386
+ return result
387
+
388
+ def _safe_release_connection(self):
389
+ """Try to release a connection. If an exception is hit, log and return
390
+ the error string.
391
+ """
392
+ try:
393
+ self.adapter.release_connection()
394
+ except Exception as exc:
395
+ fire_event(
396
+ NodeConnectionReleaseError(
397
+ node_name=self.node.name, exc=str(exc), exc_info=traceback.format_exc()
398
+ )
399
+ )
400
+ return str(exc)
401
+
402
+ return None
403
+
404
+ def before_execute(self) -> None:
405
+ raise NotImplementedError("before_execute is not implemented")
406
+
407
+ def execute(self, compiled_node, manifest):
408
+ raise NotImplementedError("execute is not implemented")
409
+
410
+ def run(self, compiled_node, manifest):
411
+ return self.execute(compiled_node, manifest)
412
+
413
+ def after_execute(self, result) -> None:
414
+ raise NotImplementedError("after_execute is not implemented")
415
+
416
+ def _skip_caused_by_ephemeral_failure(self) -> bool:
417
+ if self.skip_cause is None or self.skip_cause.node is None:
418
+ return False
419
+ return self.skip_cause.node.is_ephemeral_model
420
+
421
+ def on_skip(self):
422
+ schema_name = getattr(self.node, "schema", "")
423
+ node_name = self.node.name
424
+
425
+ error_message = None
426
+ if not self.node.is_ephemeral_model:
427
+ # if this model was skipped due to an upstream ephemeral model
428
+ # failure, print a special 'error skip' message.
429
+ # Include skip_cause NodeStatus
430
+ group = group_lookup.get(self.node.unique_id)
431
+
432
+ if self._skip_caused_by_ephemeral_failure():
433
+ fire_event(
434
+ LogSkipBecauseError(
435
+ schema=schema_name,
436
+ relation=node_name,
437
+ index=self.node_index,
438
+ total=self.num_nodes,
439
+ status=self.skip_cause.status,
440
+ group=group,
441
+ )
442
+ )
443
+ # skip_cause here should be the run_result from the ephemeral model
444
+ print_run_result_error(result=self.skip_cause, newline=False)
445
+ if self.skip_cause is None: # mypy appeasement
446
+ raise DbtInternalError(
447
+ "Skip cause not set but skip was somehow caused by an ephemeral failure"
448
+ )
449
+ # set an error so dbt will exit with an error code
450
+ error_message = (
451
+ "Compilation Error in {}, caused by compilation error "
452
+ "in referenced ephemeral model {}".format(
453
+ self.node.unique_id, self.skip_cause.node.unique_id
454
+ )
455
+ )
456
+ else:
457
+ # 'skipped' nodes should not have a value for 'node_finished_at'
458
+ # they do have 'node_started_at', which is set in GraphRunnableTask.call_runner
459
+ self.node.update_event_status(node_status=RunStatus.Skipped)
460
+ fire_event(
461
+ SkippingDetails(
462
+ resource_type=self.node.resource_type,
463
+ schema=schema_name,
464
+ node_name=node_name,
465
+ index=self.node_index,
466
+ total=self.num_nodes,
467
+ node_info=self.node.node_info,
468
+ group=group,
469
+ )
470
+ )
471
+
472
+ node_result = RunResult.from_node(self.node, RunStatus.Skipped, error_message)
473
+ return node_result
474
+
475
+ def do_skip(self, cause=None) -> None:
476
+ self.skip = True
477
+ self.skip_cause = cause
478
+
479
+
480
+ def resource_types_from_args(
481
+ args: Flags, all_resource_values: Set[NodeType], default_resource_values: Set[NodeType]
482
+ ) -> Set[NodeType]:
483
+
484
+ if not args.resource_types:
485
+ resource_types = default_resource_values
486
+ else:
487
+ # This is a list of strings, not NodeTypes
488
+ arg_resource_types = set(args.resource_types)
489
+
490
+ if "all" in arg_resource_types:
491
+ arg_resource_types.remove("all")
492
+ arg_resource_types.update(all_resource_values)
493
+ if "default" in arg_resource_types:
494
+ arg_resource_types.remove("default")
495
+ arg_resource_types.update(default_resource_values)
496
+ # Convert to a set of NodeTypes now that the non-NodeType strings are gone
497
+ resource_types = set([NodeType(rt) for rt in list(arg_resource_types)])
498
+
499
+ if args.exclude_resource_types:
500
+ # Convert from a list of strings to a set of NodeTypes
501
+ exclude_resource_types = set([NodeType(rt) for rt in args.exclude_resource_types])
502
+ resource_types = resource_types - exclude_resource_types
503
+
504
+ return resource_types
dvt/task/build.py ADDED
@@ -0,0 +1,197 @@
1
+ from typing import Dict, List, Optional, Set, Type
2
+
3
+ from dvt.artifacts.schemas.results import NodeStatus
4
+ from dvt.artifacts.schemas.run import RunResult
5
+ from dvt.cli.flags import Flags
6
+ from dvt.config.runtime import RuntimeConfig
7
+ from dvt.contracts.graph.manifest import Manifest
8
+ from dvt.exceptions import DbtInternalError
9
+ from dvt.graph import Graph, GraphQueue, ResourceTypeSelector
10
+ from dvt.node_types import NodeType
11
+ from dvt.runners import ExposureRunner as exposure_runner
12
+ from dvt.runners import SavedQueryRunner as saved_query_runner
13
+ from dvt.task.base import BaseRunner, resource_types_from_args
14
+ from dvt.task.run import MicrobatchModelRunner
15
+
16
+ from .function import FunctionRunner as function_runner
17
+ from .run import ModelRunner as run_model_runner
18
+ from .run import RunTask
19
+ from .seed import SeedRunner as seed_runner
20
+ from .snapshot import SnapshotRunner as snapshot_model_runner
21
+ from .test import TestRunner as test_runner
22
+
23
+
24
+ class BuildTask(RunTask):
25
+ """The Build task processes all assets of a given process and attempts to
26
+ 'build' them in an opinionated fashion. Every resource type outlined in
27
+ RUNNER_MAP will be processed by the mapped runners class.
28
+
29
+ I.E. a resource of type Model is handled by the ModelRunner which is
30
+ imported as run_model_runner."""
31
+
32
+ MARK_DEPENDENT_ERRORS_STATUSES = [
33
+ NodeStatus.Error,
34
+ NodeStatus.Fail,
35
+ NodeStatus.Skipped,
36
+ NodeStatus.PartialSuccess,
37
+ ]
38
+
39
+ RUNNER_MAP = {
40
+ NodeType.Model: run_model_runner,
41
+ NodeType.Snapshot: snapshot_model_runner,
42
+ NodeType.Seed: seed_runner,
43
+ NodeType.Test: test_runner,
44
+ NodeType.Unit: test_runner,
45
+ NodeType.SavedQuery: saved_query_runner,
46
+ NodeType.Exposure: exposure_runner,
47
+ NodeType.Function: function_runner,
48
+ }
49
+ ALL_RESOURCE_VALUES = frozenset({x for x in RUNNER_MAP.keys()})
50
+
51
+ def __init__(self, args: Flags, config: RuntimeConfig, manifest: Manifest) -> None:
52
+ super().__init__(args, config, manifest)
53
+ self.selected_unit_tests: Set = set()
54
+ self.model_to_unit_test_map: Dict[str, List] = {}
55
+
56
+ def resource_types(self, no_unit_tests: bool = False) -> List[NodeType]:
57
+ resource_types = resource_types_from_args(
58
+ self.args, set(self.ALL_RESOURCE_VALUES), set(self.ALL_RESOURCE_VALUES)
59
+ )
60
+
61
+ # First we get selected_nodes including unit tests, then without,
62
+ # and do a set difference.
63
+ if no_unit_tests is True and NodeType.Unit in resource_types:
64
+ resource_types.remove(NodeType.Unit)
65
+ return list(resource_types)
66
+
67
+ # overrides get_graph_queue in runnable.py
68
+ def get_graph_queue(self) -> GraphQueue:
69
+ # Following uses self.selection_arg and self.exclusion_arg
70
+ spec = self.get_selection_spec()
71
+
72
+ # selector including unit tests
73
+ full_selector = self.get_node_selector(no_unit_tests=False)
74
+ # selected node unique_ids with unit_tests
75
+ full_selected_nodes = full_selector.get_selected(spec)
76
+
77
+ # This selector removes the unit_tests from the selector
78
+ selector_wo_unit_tests = self.get_node_selector(no_unit_tests=True)
79
+ # selected node unique_ids without unit_tests
80
+ selected_nodes_wo_unit_tests = selector_wo_unit_tests.get_selected(spec)
81
+
82
+ # Get the difference in the sets of nodes with and without unit tests and
83
+ # save it
84
+ selected_unit_tests = full_selected_nodes - selected_nodes_wo_unit_tests
85
+ self.selected_unit_tests = selected_unit_tests
86
+ self.build_model_to_unit_test_map(selected_unit_tests)
87
+
88
+ # get_graph_queue in the selector will remove NodeTypes not specified
89
+ # in the node_selector (filter_selection).
90
+ return selector_wo_unit_tests.get_graph_queue(spec)
91
+
92
+ # overrides handle_job_queue in runnable.py
93
+ def handle_job_queue(self, pool, callback):
94
+ if self.run_count == 0:
95
+ self.num_nodes = self.num_nodes + len(self.selected_unit_tests)
96
+ node = self.job_queue.get()
97
+ if (
98
+ node.resource_type == NodeType.Model
99
+ and self.model_to_unit_test_map
100
+ and node.unique_id in self.model_to_unit_test_map
101
+ ):
102
+ self.handle_model_with_unit_tests_node(node, pool, callback)
103
+
104
+ else:
105
+ self.handle_job_queue_node(node, pool, callback)
106
+
107
+ def handle_model_with_unit_tests_node(self, node, pool, callback):
108
+ self._raise_set_error()
109
+ args = [node, pool]
110
+ if self.config.args.single_threaded:
111
+ callback(self.call_model_and_unit_tests_runner(*args))
112
+ else:
113
+ pool.apply_async(self.call_model_and_unit_tests_runner, args=args, callback=callback)
114
+
115
+ def call_model_and_unit_tests_runner(self, node, pool) -> RunResult:
116
+ assert self.manifest
117
+ for unit_test_unique_id in self.model_to_unit_test_map[node.unique_id]:
118
+ unit_test_node = self.manifest.unit_tests[unit_test_unique_id]
119
+ unit_test_runner = self.get_runner(unit_test_node)
120
+ # If the model is marked skip, also skip the unit tests
121
+ if node.unique_id in self._skipped_children:
122
+ # cause is only for ephemeral nodes
123
+ unit_test_runner.do_skip(cause=None)
124
+ result = self.call_runner(unit_test_runner)
125
+ self._handle_result(result)
126
+ if result.status in self.MARK_DEPENDENT_ERRORS_STATUSES:
127
+ # The _skipped_children dictionary can contain a run_result for ephemeral nodes,
128
+ # but that should never be the case here.
129
+ self._skipped_children[node.unique_id] = None
130
+ runner = self.get_runner(node)
131
+ if runner.node.unique_id in self._skipped_children:
132
+ cause = self._skipped_children.pop(runner.node.unique_id)
133
+ runner.do_skip(cause=cause)
134
+
135
+ if isinstance(runner, MicrobatchModelRunner):
136
+ runner.set_parent_task(self)
137
+ runner.set_pool(pool)
138
+
139
+ return self.call_runner(runner)
140
+
141
+ # handle non-model-plus-unit-tests nodes
142
+ def handle_job_queue_node(self, node, pool, callback):
143
+ self._raise_set_error()
144
+ runner = self.get_runner(node)
145
+ # we finally know what we're running! Make sure we haven't decided
146
+ # to skip it due to upstream failures
147
+ if runner.node.unique_id in self._skipped_children:
148
+ cause = self._skipped_children.pop(runner.node.unique_id)
149
+ runner.do_skip(cause=cause)
150
+
151
+ if isinstance(runner, MicrobatchModelRunner):
152
+ runner.set_parent_task(self)
153
+ runner.set_pool(pool)
154
+
155
+ args = [runner]
156
+ self._submit(pool, args, callback)
157
+
158
+ # Make a map of model unique_ids to selected unit test unique_ids,
159
+ # for processing before the model.
160
+ def build_model_to_unit_test_map(self, selected_unit_tests):
161
+ dct = {}
162
+ for unit_test_unique_id in selected_unit_tests:
163
+ unit_test = self.manifest.unit_tests[unit_test_unique_id]
164
+ model_unique_id = unit_test.depends_on.nodes[0]
165
+ if model_unique_id not in dct:
166
+ dct[model_unique_id] = []
167
+ dct[model_unique_id].append(unit_test.unique_id)
168
+ self.model_to_unit_test_map = dct
169
+
170
+ # We return two different kinds of selectors, one with unit tests and one without
171
+ def get_node_selector(self, no_unit_tests=False) -> ResourceTypeSelector:
172
+ if self.manifest is None or self.graph is None:
173
+ raise DbtInternalError("manifest and graph must be set to get node selection")
174
+
175
+ resource_types = self.resource_types(no_unit_tests)
176
+
177
+ return ResourceTypeSelector(
178
+ graph=self.graph,
179
+ manifest=self.manifest,
180
+ previous_state=self.previous_state,
181
+ resource_types=resource_types,
182
+ )
183
+
184
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
185
+ if (
186
+ node.resource_type == NodeType.Model
187
+ and super().get_runner_type(node) == MicrobatchModelRunner
188
+ ):
189
+ return MicrobatchModelRunner
190
+
191
+ return self.RUNNER_MAP.get(node.resource_type)
192
+
193
+ # Special build compile_manifest method to pass add_test_edges to the compiler
194
+ def compile_manifest(self) -> None:
195
+ if self.manifest is None:
196
+ raise DbtInternalError("compile_manifest called before manifest was loaded")
197
+ self.graph: Graph = self.compiler.compile(self.manifest, add_test_edges=True)