dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
dbt/task/__init__.py ADDED
File without changes
dbt/task/base.py ADDED
@@ -0,0 +1,506 @@
1
+ import os
2
+ import threading
3
+ import time
4
+ import traceback
5
+ from abc import ABCMeta, abstractmethod
6
+ from contextlib import nullcontext
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional, Set
10
+
11
+ import dbt.exceptions
12
+ import dbt_common.exceptions.base
13
+ from dbt import tracking
14
+ from dbt.artifacts.resources.types import NodeType
15
+ from dbt.artifacts.schemas.results import (
16
+ NodeStatus,
17
+ RunningStatus,
18
+ RunStatus,
19
+ TimingInfo,
20
+ collect_timing_info,
21
+ )
22
+ from dbt.artifacts.schemas.run import RunResult
23
+ from dbt.cli.flags import Flags
24
+ from dbt.compilation import Compiler
25
+ from dbt.config import RuntimeConfig
26
+ from dbt.config.profile import read_profile
27
+ from dbt.constants import DVT_PROJECT_FILE_NAME
28
+ from dbt.contracts.graph.manifest import Manifest
29
+ from dbt.events.types import (
30
+ CatchableExceptionOnRun,
31
+ GenericExceptionOnRun,
32
+ InternalErrorOnRun,
33
+ LogDbtProfileError,
34
+ LogDbtProjectError,
35
+ LogDebugStackTrace,
36
+ LogSkipBecauseError,
37
+ NodeCompiling,
38
+ NodeConnectionReleaseError,
39
+ NodeExecuting,
40
+ SkippingDetails,
41
+ )
42
+ from dbt.flags import get_flags
43
+ from dbt.graph import Graph
44
+ from dbt.task import group_lookup
45
+ from dbt.task.printer import print_run_result_error
46
+ from dbt_common.events.contextvars import get_node_info
47
+ from dbt_common.events.functions import fire_event
48
+ from dbt_common.exceptions import DbtInternalError, DbtRuntimeError, NotImplementedError
49
+
50
+
51
+ def read_profiles(profiles_dir: Optional[str] = None) -> Dict[str, Any]:
52
+ """This is only used for some error handling"""
53
+ if profiles_dir is None:
54
+ profiles_dir = get_flags().PROFILES_DIR
55
+
56
+ raw_profiles = read_profile(profiles_dir)
57
+
58
+ if raw_profiles is None:
59
+ profiles = {}
60
+ else:
61
+ profiles = {k: v for (k, v) in raw_profiles.items() if k != "config"}
62
+
63
+ return profiles
64
+
65
+
66
+ class BaseTask(metaclass=ABCMeta):
67
+ def __init__(self, args: Flags) -> None:
68
+ self.args = args
69
+
70
+ def __enter__(self):
71
+ self.orig_dir = os.getcwd()
72
+ return self
73
+
74
+ def __exit__(self, exc_type, exc_value, traceback):
75
+ os.chdir(self.orig_dir)
76
+
77
+ @abstractmethod
78
+ def run(self):
79
+ raise dbt_common.exceptions.base.NotImplementedError("Not Implemented")
80
+
81
+ def interpret_results(self, results):
82
+ return True
83
+
84
+
85
+ def get_nearest_project_dir(project_dir: Optional[str]) -> Path:
86
+ # If the user provides an explicit project directory, use that
87
+ # but don't look at parent directories.
88
+ # DVT: Support both dvt_project.yml and dbt_project.yml
89
+ if project_dir:
90
+ cur_dir = Path(project_dir)
91
+ dvt_project_file = Path(project_dir) / DVT_PROJECT_FILE_NAME
92
+ dbt_project_file = Path(project_dir) / "dbt_project.yml"
93
+ if dvt_project_file.is_file() or dbt_project_file.is_file():
94
+ return cur_dir
95
+ else:
96
+ raise dbt_common.exceptions.DbtRuntimeError(
97
+ "fatal: Invalid --project-dir flag. Not a dbt project. "
98
+ "Missing dvt_project.yml or dbt_project.yml file"
99
+ )
100
+
101
+ cur_dir = Path.cwd()
102
+ dvt_project_file = cur_dir / DVT_PROJECT_FILE_NAME
103
+ dbt_project_file = cur_dir / "dbt_project.yml"
104
+ if dvt_project_file.is_file() or dbt_project_file.is_file():
105
+ return cur_dir
106
+ else:
107
+ raise dbt_common.exceptions.DbtRuntimeError(
108
+ "fatal: Not a dbt project (or any of the parent directories). "
109
+ "Missing dvt_project.yml or dbt_project.yml file"
110
+ )
111
+
112
+
113
+ def move_to_nearest_project_dir(project_dir: Optional[str]) -> Path:
114
+ nearest_project_dir = get_nearest_project_dir(project_dir)
115
+ os.chdir(nearest_project_dir)
116
+ return nearest_project_dir
117
+
118
+
119
+ # TODO: look into deprecating this class in favor of several small functions that
120
+ # produce the same behavior. currently this class only contains manifest compilation,
121
+ # holding a manifest, and moving direcories.
122
+ class ConfiguredTask(BaseTask):
123
+ def __init__(
124
+ self, args: Flags, config: RuntimeConfig, manifest: Optional[Manifest] = None
125
+ ) -> None:
126
+ super().__init__(args)
127
+ self.config = config
128
+ self.graph: Optional[Graph] = None
129
+ self.manifest = manifest
130
+ self.compiler = Compiler(self.config)
131
+
132
+ def compile_manifest(self) -> None:
133
+ if self.manifest is None:
134
+ raise DbtInternalError("compile_manifest called before manifest was loaded")
135
+
136
+ start_compile_manifest = time.perf_counter()
137
+
138
+ self.graph = self.compiler.compile(self.manifest)
139
+
140
+ compile_time = time.perf_counter() - start_compile_manifest
141
+ if dbt.tracking.active_user is not None:
142
+ dbt.tracking.track_runnable_timing({"graph_compilation_elapsed": compile_time})
143
+
144
+ @classmethod
145
+ def from_args(cls, args: Flags, *pargs, **kwargs):
146
+ move_to_nearest_project_dir(args.project_dir)
147
+ try:
148
+ # This is usually RuntimeConfig
149
+ config = RuntimeConfig.from_args(args)
150
+ except dbt.exceptions.DbtProjectError as exc:
151
+ fire_event(LogDbtProjectError(exc=str(exc)))
152
+
153
+ tracking.track_invalid_invocation(args=args, result_type=exc.result_type)
154
+ raise dbt_common.exceptions.DbtRuntimeError("Could not run dbt") from exc
155
+ except dbt.exceptions.DbtProfileError as exc:
156
+ all_profile_names = list(read_profiles(get_flags().PROFILES_DIR).keys())
157
+ fire_event(LogDbtProfileError(exc=str(exc), profiles=all_profile_names))
158
+ tracking.track_invalid_invocation(args=args, result_type=exc.result_type)
159
+ raise dbt_common.exceptions.DbtRuntimeError("Could not run dbt") from exc
160
+ return cls(args, config, *pargs, **kwargs)
161
+
162
+
163
+ class ExecutionContext:
164
+ """During execution and error handling, dbt makes use of mutable state:
165
+ timing information and the newest (compiled vs executed) form of the node.
166
+ """
167
+
168
+ def __init__(self, node) -> None:
169
+ self.timing: List[TimingInfo] = []
170
+ self.node = node
171
+
172
+
173
+ class BaseRunner(metaclass=ABCMeta):
174
+ def __init__(self, config, adapter, node, node_index: int, num_nodes: int) -> None:
175
+ self.config = config
176
+ self.compiler = Compiler(config)
177
+ self.adapter = adapter
178
+ self.node = node
179
+ self.node_index = node_index
180
+ self.num_nodes = num_nodes
181
+
182
+ self.skip = False
183
+ self.skip_cause: Optional[RunResult] = None
184
+
185
+ self.run_ephemeral_models = False
186
+
187
+ @abstractmethod
188
+ def compile(self, manifest: Manifest) -> Any:
189
+ pass
190
+
191
+ def _node_build_path(self) -> Optional[str]:
192
+ return self.node.build_path if hasattr(self.node, "build_path") else None
193
+
194
+ def get_result_status(self, result) -> Dict[str, str]:
195
+ if result.status == NodeStatus.Error:
196
+ return {"node_status": "error", "node_error": str(result.message)}
197
+ elif result.status == NodeStatus.Skipped:
198
+ return {"node_status": "skipped"}
199
+ elif result.status == NodeStatus.Fail:
200
+ return {"node_status": "failed"}
201
+ elif result.status == NodeStatus.Warn:
202
+ return {"node_status": "warn"}
203
+ else:
204
+ return {"node_status": "passed"}
205
+
206
+ def run_with_hooks(self, manifest):
207
+ if self.skip:
208
+ return self.on_skip()
209
+
210
+ # no before/after printing for ephemeral mdoels
211
+ if not self.node.is_ephemeral_model:
212
+ self.before_execute()
213
+
214
+ result = self.safe_run(manifest)
215
+ self.node.update_event_status(
216
+ node_status=result.status,
217
+ finished_at=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
218
+ )
219
+
220
+ if not self.node.is_ephemeral_model:
221
+ self.after_execute(result)
222
+
223
+ return result
224
+
225
+ def _build_run_result(
226
+ self,
227
+ node,
228
+ start_time,
229
+ status,
230
+ timing_info,
231
+ message,
232
+ agate_table=None,
233
+ adapter_response=None,
234
+ failures=None,
235
+ batch_results=None,
236
+ ):
237
+ execution_time = time.time() - start_time
238
+ thread_id = threading.current_thread().name
239
+ if adapter_response is None:
240
+ adapter_response = {}
241
+ return RunResult(
242
+ status=status,
243
+ thread_id=thread_id,
244
+ execution_time=execution_time,
245
+ timing=timing_info,
246
+ message=message,
247
+ node=node,
248
+ agate_table=agate_table,
249
+ adapter_response=adapter_response,
250
+ failures=failures,
251
+ batch_results=batch_results,
252
+ )
253
+
254
+ def error_result(self, node, message, start_time, timing_info):
255
+ return self._build_run_result(
256
+ node=node,
257
+ start_time=start_time,
258
+ status=RunStatus.Error,
259
+ timing_info=timing_info,
260
+ message=message,
261
+ )
262
+
263
+ def ephemeral_result(self, node, start_time, timing_info):
264
+ return self._build_run_result(
265
+ node=node,
266
+ start_time=start_time,
267
+ status=RunStatus.Success,
268
+ timing_info=timing_info,
269
+ message=None,
270
+ )
271
+
272
+ def from_run_result(self, result, start_time, timing_info):
273
+ return self._build_run_result(
274
+ node=result.node,
275
+ start_time=start_time,
276
+ status=result.status,
277
+ timing_info=timing_info,
278
+ message=result.message,
279
+ agate_table=result.agate_table,
280
+ adapter_response=result.adapter_response,
281
+ failures=result.failures,
282
+ batch_results=result.batch_results,
283
+ )
284
+
285
+ def compile_and_execute(self, manifest: Manifest, ctx: ExecutionContext):
286
+ result = None
287
+ with (
288
+ self.adapter.connection_named(self.node.unique_id, self.node)
289
+ if get_flags().INTROSPECT
290
+ else nullcontext()
291
+ ):
292
+ ctx.node.update_event_status(node_status=RunningStatus.Compiling)
293
+ fire_event(
294
+ NodeCompiling(
295
+ node_info=ctx.node.node_info,
296
+ )
297
+ )
298
+ with collect_timing_info("compile", ctx.timing.append):
299
+ # if we fail here, we still have a compiled node to return
300
+ # this has the benefit of showing a build path for the errant
301
+ # model. This calls the 'compile' method in CompileTask
302
+ ctx.node = self.compile(manifest)
303
+
304
+ # for ephemeral nodes, we only want to compile, not run
305
+ if not ctx.node.is_ephemeral_model or self.run_ephemeral_models:
306
+ ctx.node.update_event_status(node_status=RunningStatus.Executing)
307
+ fire_event(
308
+ NodeExecuting(
309
+ node_info=ctx.node.node_info,
310
+ )
311
+ )
312
+ with collect_timing_info("execute", ctx.timing.append):
313
+ result = self.run(ctx.node, manifest)
314
+ ctx.node = result.node
315
+
316
+ return result
317
+
318
+ def _handle_catchable_exception(self, e: DbtRuntimeError, ctx: ExecutionContext) -> str:
319
+ if e.node is None:
320
+ e.add_node(ctx.node)
321
+
322
+ fire_event(
323
+ CatchableExceptionOnRun(
324
+ exc=str(e), exc_info=traceback.format_exc(), node_info=get_node_info()
325
+ )
326
+ )
327
+ return str(e)
328
+
329
+ def _handle_internal_exception(self, e: DbtInternalError, ctx: ExecutionContext) -> str:
330
+ fire_event(
331
+ InternalErrorOnRun(
332
+ build_path=self._node_build_path(), exc=str(e), node_info=get_node_info()
333
+ )
334
+ )
335
+ return str(e)
336
+
337
+ def _handle_generic_exception(self, e: Exception, ctx: ExecutionContext) -> str:
338
+ fire_event(
339
+ GenericExceptionOnRun(
340
+ build_path=self._node_build_path(),
341
+ unique_id=self.node.unique_id,
342
+ exc=str(e),
343
+ node_info=get_node_info(),
344
+ )
345
+ )
346
+ fire_event(LogDebugStackTrace(exc_info=traceback.format_exc()))
347
+
348
+ return str(e)
349
+
350
+ def handle_exception(self, e: Exception, ctx: ExecutionContext) -> str:
351
+ if isinstance(e, DbtRuntimeError):
352
+ error = self._handle_catchable_exception(e, ctx)
353
+ elif isinstance(e, DbtInternalError):
354
+ error = self._handle_internal_exception(e, ctx)
355
+ else:
356
+ error = self._handle_generic_exception(e, ctx)
357
+ return error
358
+
359
+ def safe_run(self, manifest: Manifest):
360
+ started = time.time()
361
+ ctx = ExecutionContext(self.node)
362
+ error = None
363
+ result = None
364
+
365
+ try:
366
+ result = self.compile_and_execute(manifest, ctx)
367
+ except Exception as e:
368
+ error = self.handle_exception(e, ctx)
369
+ finally:
370
+ exc_str = self._safe_release_connection()
371
+
372
+ # if releasing failed and the result doesn't have an error yet, set
373
+ # an error
374
+ if (
375
+ exc_str is not None
376
+ and result is not None
377
+ and result.status != NodeStatus.Error
378
+ and error is None
379
+ ):
380
+ error = exc_str
381
+
382
+ if error is not None:
383
+ result = self.error_result(ctx.node, error, started, ctx.timing)
384
+ elif result is not None:
385
+ result = self.from_run_result(result, started, ctx.timing)
386
+ else:
387
+ result = self.ephemeral_result(ctx.node, started, ctx.timing)
388
+ return result
389
+
390
+ def _safe_release_connection(self):
391
+ """Try to release a connection. If an exception is hit, log and return
392
+ the error string.
393
+ """
394
+ try:
395
+ self.adapter.release_connection()
396
+ except Exception as exc:
397
+ fire_event(
398
+ NodeConnectionReleaseError(
399
+ node_name=self.node.name, exc=str(exc), exc_info=traceback.format_exc()
400
+ )
401
+ )
402
+ return str(exc)
403
+
404
+ return None
405
+
406
+ def before_execute(self) -> None:
407
+ raise NotImplementedError("before_execute is not implemented")
408
+
409
+ def execute(self, compiled_node, manifest):
410
+ raise NotImplementedError("execute is not implemented")
411
+
412
+ def run(self, compiled_node, manifest):
413
+ return self.execute(compiled_node, manifest)
414
+
415
+ def after_execute(self, result) -> None:
416
+ raise NotImplementedError("after_execute is not implemented")
417
+
418
+ def _skip_caused_by_ephemeral_failure(self) -> bool:
419
+ if self.skip_cause is None or self.skip_cause.node is None:
420
+ return False
421
+ return self.skip_cause.node.is_ephemeral_model
422
+
423
+ def on_skip(self):
424
+ schema_name = getattr(self.node, "schema", "")
425
+ node_name = self.node.name
426
+
427
+ error_message = None
428
+ if not self.node.is_ephemeral_model:
429
+ # if this model was skipped due to an upstream ephemeral model
430
+ # failure, print a special 'error skip' message.
431
+ # Include skip_cause NodeStatus
432
+ group = group_lookup.get(self.node.unique_id)
433
+
434
+ if self._skip_caused_by_ephemeral_failure():
435
+ fire_event(
436
+ LogSkipBecauseError(
437
+ schema=schema_name,
438
+ relation=node_name,
439
+ index=self.node_index,
440
+ total=self.num_nodes,
441
+ status=self.skip_cause.status,
442
+ group=group,
443
+ )
444
+ )
445
+ # skip_cause here should be the run_result from the ephemeral model
446
+ print_run_result_error(result=self.skip_cause, newline=False)
447
+ if self.skip_cause is None: # mypy appeasement
448
+ raise DbtInternalError(
449
+ "Skip cause not set but skip was somehow caused by an ephemeral failure"
450
+ )
451
+ # set an error so dbt will exit with an error code
452
+ error_message = (
453
+ "Compilation Error in {}, caused by compilation error "
454
+ "in referenced ephemeral model {}".format(
455
+ self.node.unique_id, self.skip_cause.node.unique_id
456
+ )
457
+ )
458
+ else:
459
+ # 'skipped' nodes should not have a value for 'node_finished_at'
460
+ # they do have 'node_started_at', which is set in GraphRunnableTask.call_runner
461
+ self.node.update_event_status(node_status=RunStatus.Skipped)
462
+ fire_event(
463
+ SkippingDetails(
464
+ resource_type=self.node.resource_type,
465
+ schema=schema_name,
466
+ node_name=node_name,
467
+ index=self.node_index,
468
+ total=self.num_nodes,
469
+ node_info=self.node.node_info,
470
+ group=group,
471
+ )
472
+ )
473
+
474
+ node_result = RunResult.from_node(self.node, RunStatus.Skipped, error_message)
475
+ return node_result
476
+
477
+ def do_skip(self, cause=None) -> None:
478
+ self.skip = True
479
+ self.skip_cause = cause
480
+
481
+
482
+ def resource_types_from_args(
483
+ args: Flags, all_resource_values: Set[NodeType], default_resource_values: Set[NodeType]
484
+ ) -> Set[NodeType]:
485
+
486
+ if not args.resource_types:
487
+ resource_types = default_resource_values
488
+ else:
489
+ # This is a list of strings, not NodeTypes
490
+ arg_resource_types = set(args.resource_types)
491
+
492
+ if "all" in arg_resource_types:
493
+ arg_resource_types.remove("all")
494
+ arg_resource_types.update(all_resource_values)
495
+ if "default" in arg_resource_types:
496
+ arg_resource_types.remove("default")
497
+ arg_resource_types.update(default_resource_values)
498
+ # Convert to a set of NodeTypes now that the non-NodeType strings are gone
499
+ resource_types = set([NodeType(rt) for rt in list(arg_resource_types)])
500
+
501
+ if args.exclude_resource_types:
502
+ # Convert from a list of strings to a set of NodeTypes
503
+ exclude_resource_types = set([NodeType(rt) for rt in args.exclude_resource_types])
504
+ resource_types = resource_types - exclude_resource_types
505
+
506
+ return resource_types
dbt/task/build.py ADDED
@@ -0,0 +1,197 @@
1
+ from typing import Dict, List, Optional, Set, Type
2
+
3
+ from dbt.artifacts.schemas.results import NodeStatus
4
+ from dbt.artifacts.schemas.run import RunResult
5
+ from dbt.cli.flags import Flags
6
+ from dbt.config.runtime import RuntimeConfig
7
+ from dbt.contracts.graph.manifest import Manifest
8
+ from dbt.exceptions import DbtInternalError
9
+ from dbt.graph import Graph, GraphQueue, ResourceTypeSelector
10
+ from dbt.node_types import NodeType
11
+ from dbt.runners import ExposureRunner as exposure_runner
12
+ from dbt.runners import SavedQueryRunner as saved_query_runner
13
+ from dbt.task.base import BaseRunner, resource_types_from_args
14
+ from dbt.task.run import MicrobatchModelRunner
15
+
16
+ from .function import FunctionRunner as function_runner
17
+ from .run import ModelRunner as run_model_runner
18
+ from .run import RunTask
19
+ from .seed import SeedRunner as seed_runner
20
+ from .snapshot import SnapshotRunner as snapshot_model_runner
21
+ from .test import TestRunner as test_runner
22
+
23
+
24
+ class BuildTask(RunTask):
25
+ """The Build task processes all assets of a given process and attempts to
26
+ 'build' them in an opinionated fashion. Every resource type outlined in
27
+ RUNNER_MAP will be processed by the mapped runners class.
28
+
29
+ I.E. a resource of type Model is handled by the ModelRunner which is
30
+ imported as run_model_runner."""
31
+
32
+ MARK_DEPENDENT_ERRORS_STATUSES = [
33
+ NodeStatus.Error,
34
+ NodeStatus.Fail,
35
+ NodeStatus.Skipped,
36
+ NodeStatus.PartialSuccess,
37
+ ]
38
+
39
+ RUNNER_MAP = {
40
+ NodeType.Model: run_model_runner,
41
+ NodeType.Snapshot: snapshot_model_runner,
42
+ NodeType.Seed: seed_runner,
43
+ NodeType.Test: test_runner,
44
+ NodeType.Unit: test_runner,
45
+ NodeType.SavedQuery: saved_query_runner,
46
+ NodeType.Exposure: exposure_runner,
47
+ NodeType.Function: function_runner,
48
+ }
49
+ ALL_RESOURCE_VALUES = frozenset({x for x in RUNNER_MAP.keys()})
50
+
51
+ def __init__(self, args: Flags, config: RuntimeConfig, manifest: Manifest) -> None:
52
+ super().__init__(args, config, manifest)
53
+ self.selected_unit_tests: Set = set()
54
+ self.model_to_unit_test_map: Dict[str, List] = {}
55
+
56
+ def resource_types(self, no_unit_tests: bool = False) -> List[NodeType]:
57
+ resource_types = resource_types_from_args(
58
+ self.args, set(self.ALL_RESOURCE_VALUES), set(self.ALL_RESOURCE_VALUES)
59
+ )
60
+
61
+ # First we get selected_nodes including unit tests, then without,
62
+ # and do a set difference.
63
+ if no_unit_tests is True and NodeType.Unit in resource_types:
64
+ resource_types.remove(NodeType.Unit)
65
+ return list(resource_types)
66
+
67
+ # overrides get_graph_queue in runnable.py
68
+ def get_graph_queue(self) -> GraphQueue:
69
+ # Following uses self.selection_arg and self.exclusion_arg
70
+ spec = self.get_selection_spec()
71
+
72
+ # selector including unit tests
73
+ full_selector = self.get_node_selector(no_unit_tests=False)
74
+ # selected node unique_ids with unit_tests
75
+ full_selected_nodes = full_selector.get_selected(spec)
76
+
77
+ # This selector removes the unit_tests from the selector
78
+ selector_wo_unit_tests = self.get_node_selector(no_unit_tests=True)
79
+ # selected node unique_ids without unit_tests
80
+ selected_nodes_wo_unit_tests = selector_wo_unit_tests.get_selected(spec)
81
+
82
+ # Get the difference in the sets of nodes with and without unit tests and
83
+ # save it
84
+ selected_unit_tests = full_selected_nodes - selected_nodes_wo_unit_tests
85
+ self.selected_unit_tests = selected_unit_tests
86
+ self.build_model_to_unit_test_map(selected_unit_tests)
87
+
88
+ # get_graph_queue in the selector will remove NodeTypes not specified
89
+ # in the node_selector (filter_selection).
90
+ return selector_wo_unit_tests.get_graph_queue(spec)
91
+
92
+ # overrides handle_job_queue in runnable.py
93
+ def handle_job_queue(self, pool, callback):
94
+ if self.run_count == 0:
95
+ self.num_nodes = self.num_nodes + len(self.selected_unit_tests)
96
+ node = self.job_queue.get()
97
+ if (
98
+ node.resource_type == NodeType.Model
99
+ and self.model_to_unit_test_map
100
+ and node.unique_id in self.model_to_unit_test_map
101
+ ):
102
+ self.handle_model_with_unit_tests_node(node, pool, callback)
103
+
104
+ else:
105
+ self.handle_job_queue_node(node, pool, callback)
106
+
107
+ def handle_model_with_unit_tests_node(self, node, pool, callback):
108
+ self._raise_set_error()
109
+ args = [node, pool]
110
+ if self.config.args.single_threaded:
111
+ callback(self.call_model_and_unit_tests_runner(*args))
112
+ else:
113
+ pool.apply_async(self.call_model_and_unit_tests_runner, args=args, callback=callback)
114
+
115
+ def call_model_and_unit_tests_runner(self, node, pool) -> RunResult:
116
+ assert self.manifest
117
+ for unit_test_unique_id in self.model_to_unit_test_map[node.unique_id]:
118
+ unit_test_node = self.manifest.unit_tests[unit_test_unique_id]
119
+ unit_test_runner = self.get_runner(unit_test_node)
120
+ # If the model is marked skip, also skip the unit tests
121
+ if node.unique_id in self._skipped_children:
122
+ # cause is only for ephemeral nodes
123
+ unit_test_runner.do_skip(cause=None)
124
+ result = self.call_runner(unit_test_runner)
125
+ self._handle_result(result)
126
+ if result.status in self.MARK_DEPENDENT_ERRORS_STATUSES:
127
+ # The _skipped_children dictionary can contain a run_result for ephemeral nodes,
128
+ # but that should never be the case here.
129
+ self._skipped_children[node.unique_id] = None
130
+ runner = self.get_runner(node)
131
+ if runner.node.unique_id in self._skipped_children:
132
+ cause = self._skipped_children.pop(runner.node.unique_id)
133
+ runner.do_skip(cause=cause)
134
+
135
+ if isinstance(runner, MicrobatchModelRunner):
136
+ runner.set_parent_task(self)
137
+ runner.set_pool(pool)
138
+
139
+ return self.call_runner(runner)
140
+
141
+ # handle non-model-plus-unit-tests nodes
142
+ def handle_job_queue_node(self, node, pool, callback):
143
+ self._raise_set_error()
144
+ runner = self.get_runner(node)
145
+ # we finally know what we're running! Make sure we haven't decided
146
+ # to skip it due to upstream failures
147
+ if runner.node.unique_id in self._skipped_children:
148
+ cause = self._skipped_children.pop(runner.node.unique_id)
149
+ runner.do_skip(cause=cause)
150
+
151
+ if isinstance(runner, MicrobatchModelRunner):
152
+ runner.set_parent_task(self)
153
+ runner.set_pool(pool)
154
+
155
+ args = [runner]
156
+ self._submit(pool, args, callback)
157
+
158
+ # Make a map of model unique_ids to selected unit test unique_ids,
159
+ # for processing before the model.
160
+ def build_model_to_unit_test_map(self, selected_unit_tests):
161
+ dct = {}
162
+ for unit_test_unique_id in selected_unit_tests:
163
+ unit_test = self.manifest.unit_tests[unit_test_unique_id]
164
+ model_unique_id = unit_test.depends_on.nodes[0]
165
+ if model_unique_id not in dct:
166
+ dct[model_unique_id] = []
167
+ dct[model_unique_id].append(unit_test.unique_id)
168
+ self.model_to_unit_test_map = dct
169
+
170
+ # We return two different kinds of selectors, one with unit tests and one without
171
+ def get_node_selector(self, no_unit_tests=False) -> ResourceTypeSelector:
172
+ if self.manifest is None or self.graph is None:
173
+ raise DbtInternalError("manifest and graph must be set to get node selection")
174
+
175
+ resource_types = self.resource_types(no_unit_tests)
176
+
177
+ return ResourceTypeSelector(
178
+ graph=self.graph,
179
+ manifest=self.manifest,
180
+ previous_state=self.previous_state,
181
+ resource_types=resource_types,
182
+ )
183
+
184
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
185
+ if (
186
+ node.resource_type == NodeType.Model
187
+ and super().get_runner_type(node) == MicrobatchModelRunner
188
+ ):
189
+ return MicrobatchModelRunner
190
+
191
+ return self.RUNNER_MAP.get(node.resource_type)
192
+
193
+ # Special build compile_manifest method to pass add_test_edges to the compiler
194
+ def compile_manifest(self) -> None:
195
+ if self.manifest is None:
196
+ raise DbtInternalError("compile_manifest called before manifest was loaded")
197
+ self.graph: Graph = self.compiler.compile(self.manifest, add_test_edges=True)