dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
dbt/task/runnable.py ADDED
@@ -0,0 +1,758 @@
1
+ import os
2
+ import time
3
+ from abc import abstractmethod
4
+ from concurrent.futures import as_completed
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import AbstractSet, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
8
+
9
+ import dbt.exceptions
10
+ import dbt.tracking
11
+ import dbt.utils
12
+ import dbt_common.utils.formatting
13
+ from dbt.adapters.base import BaseAdapter, BaseRelation
14
+ from dbt.adapters.factory import get_adapter
15
+ from dbt.artifacts.schemas.results import (
16
+ BaseResult,
17
+ NodeStatus,
18
+ RunningStatus,
19
+ RunStatus,
20
+ )
21
+ from dbt.artifacts.schemas.run import RunExecutionResult, RunResult
22
+ from dbt.cli.flags import Flags
23
+ from dbt.config.runtime import RuntimeConfig
24
+ from dbt.constants import RUN_RESULTS_FILE_NAME
25
+ from dbt.contracts.graph.manifest import Manifest
26
+ from dbt.contracts.graph.nodes import Exposure, ResultNode
27
+ from dbt.contracts.state import PreviousState
28
+ from dbt.events.types import (
29
+ ArtifactWritten,
30
+ ConcurrencyLine,
31
+ DefaultSelector,
32
+ EndRunResult,
33
+ GenericExceptionOnRun,
34
+ LogCancelLine,
35
+ MarkSkippedChildren,
36
+ NodeFinished,
37
+ NodeStart,
38
+ NothingToDo,
39
+ QueryCancelationUnsupported,
40
+ SkippingDetails,
41
+ )
42
+ from dbt.exceptions import DbtInternalError, DbtRuntimeError, FailFastError
43
+ from dbt.flags import get_flags
44
+ from dbt.graph import (
45
+ GraphQueue,
46
+ NodeSelector,
47
+ SelectionSpec,
48
+ UniqueId,
49
+ parse_difference,
50
+ )
51
+ from dbt.graph.thread_pool import DbtThreadPool
52
+ from dbt.parser.manifest import write_manifest
53
+ from dbt.task import group_lookup
54
+ from dbt.task.base import BaseRunner, ConfiguredTask
55
+ from dbt.task.printer import print_run_end_messages, print_run_result_error
56
+ from dbt.utils.artifact_upload import add_artifact_produced
57
+ from dbt_common.context import _INVOCATION_CONTEXT_VAR, get_invocation_context
58
+ from dbt_common.dataclass_schema import StrEnum
59
+ from dbt_common.events.contextvars import log_contextvars, task_contextvars
60
+ from dbt_common.events.functions import fire_event, warn_or_error
61
+ from dbt_common.events.types import Formatting
62
+ from dbt_common.exceptions import NotImplementedError
63
+
64
+
65
+ class GraphRunnableMode(StrEnum):
66
+ Topological = "topological"
67
+ Independent = "independent"
68
+
69
+
70
+ def mark_node_as_skipped(
71
+ node: ResultNode, executed_node_ids: Set[str], message: Optional[str]
72
+ ) -> Optional[RunResult]:
73
+ if node.unique_id not in executed_node_ids:
74
+ return RunResult.from_node(node, RunStatus.Skipped, message)
75
+ return None
76
+
77
+
78
+ class GraphRunnableTask(ConfiguredTask):
79
+ MARK_DEPENDENT_ERRORS_STATUSES = [NodeStatus.Error, NodeStatus.PartialSuccess]
80
+
81
+ def __init__(self, args: Flags, config: RuntimeConfig, manifest: Manifest) -> None:
82
+ super().__init__(args, config, manifest)
83
+ self.config = config
84
+ self._flattened_nodes: Optional[List[ResultNode]] = None
85
+ self._raise_next_tick: Optional[DbtRuntimeError] = None
86
+ self._skipped_children: Dict[str, Optional[RunResult]] = {}
87
+ self.job_queue: Optional[GraphQueue] = None
88
+ self.node_results: List[BaseResult] = []
89
+ self.num_nodes: int = 0
90
+ self.previous_state: Optional[PreviousState] = None
91
+ self.previous_defer_state: Optional[PreviousState] = None
92
+ self.run_count: int = 0
93
+ self.started_at: float = 0
94
+
95
+ if self.args.state:
96
+ self.previous_state = PreviousState(
97
+ state_path=self.args.state,
98
+ target_path=Path(self.config.target_path),
99
+ project_root=Path(self.config.project_root),
100
+ )
101
+
102
+ if self.args.defer_state:
103
+ self.previous_defer_state = PreviousState(
104
+ state_path=self.args.defer_state,
105
+ target_path=Path(self.config.target_path),
106
+ project_root=Path(self.config.project_root),
107
+ )
108
+
109
+ def index_offset(self, value: int) -> int:
110
+ return value
111
+
112
+ @property
113
+ def selection_arg(self):
114
+ return self.args.select
115
+
116
+ @property
117
+ def exclusion_arg(self):
118
+ return self.args.exclude
119
+
120
+ def get_selection_spec(self) -> SelectionSpec:
121
+ default_selector_name = self.config.get_default_selector_name()
122
+ spec: Union[SelectionSpec, bool]
123
+ if hasattr(self.args, "inline") and self.args.inline:
124
+ # We want an empty selection spec.
125
+ spec = parse_difference(None, None)
126
+ elif self.args.selector:
127
+ # use pre-defined selector (--selector)
128
+ spec = self.config.get_selector(self.args.selector)
129
+ elif not (self.selection_arg or self.exclusion_arg) and default_selector_name:
130
+ # use pre-defined selector (--selector) with default: true
131
+ fire_event(DefaultSelector(name=default_selector_name))
132
+ spec = self.config.get_selector(default_selector_name)
133
+ else:
134
+ # This is what's used with no default selector and no selection
135
+ # use --select and --exclude args
136
+ spec = parse_difference(self.selection_arg, self.exclusion_arg)
137
+ # mypy complains because the return values of get_selector and parse_difference
138
+ # are different
139
+ return spec # type: ignore
140
+
141
+ @abstractmethod
142
+ def get_node_selector(self) -> NodeSelector:
143
+ raise NotImplementedError(f"get_node_selector not implemented for task {type(self)}")
144
+
145
+ def defer_to_manifest(self):
146
+ deferred_manifest = self._get_deferred_manifest()
147
+ if deferred_manifest is None:
148
+ return
149
+ if self.manifest is None:
150
+ raise DbtInternalError(
151
+ "Expected to defer to manifest, but there is no runtime manifest to defer from!"
152
+ )
153
+ self.manifest.merge_from_artifact(other=deferred_manifest)
154
+
155
+ def get_graph_queue(self) -> GraphQueue:
156
+ selector = self.get_node_selector()
157
+ # Following uses self.selection_arg and self.exclusion_arg
158
+ spec = self.get_selection_spec()
159
+
160
+ preserve_edges = True
161
+ if self.get_run_mode() == GraphRunnableMode.Independent:
162
+ preserve_edges = False
163
+
164
+ return selector.get_graph_queue(spec, preserve_edges)
165
+
166
+ def get_run_mode(self) -> GraphRunnableMode:
167
+ return GraphRunnableMode.Topological
168
+
169
+ def _runtime_initialize(self):
170
+ self.compile_manifest()
171
+ if self.manifest is None or self.graph is None:
172
+ raise DbtInternalError("_runtime_initialize never loaded the graph!")
173
+
174
+ self.job_queue = self.get_graph_queue()
175
+
176
+ # we use this a couple of times. order does not matter.
177
+ self._flattened_nodes = []
178
+ for uid in self.job_queue.get_selected_nodes():
179
+ if uid in self.manifest.nodes:
180
+ self._flattened_nodes.append(self.manifest.nodes[uid])
181
+ elif uid in self.manifest.sources:
182
+ self._flattened_nodes.append(self.manifest.sources[uid])
183
+ elif uid in self.manifest.saved_queries:
184
+ self._flattened_nodes.append(self.manifest.saved_queries[uid])
185
+ elif uid in self.manifest.unit_tests:
186
+ self._flattened_nodes.append(self.manifest.unit_tests[uid])
187
+ elif uid in self.manifest.exposures:
188
+ self._flattened_nodes.append(self.manifest.exposures[uid])
189
+ elif uid in self.manifest.functions:
190
+ self._flattened_nodes.append(self.manifest.functions[uid])
191
+ else:
192
+ raise DbtInternalError(
193
+ f"Node selection returned {uid}, expected an exposure, a function, a node, a saved query, a source, or a unit test"
194
+ )
195
+
196
+ self.num_nodes = len([n for n in self._flattened_nodes if not n.is_ephemeral_model])
197
+
198
+ def raise_on_first_error(self) -> bool:
199
+ return False
200
+
201
+ def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
202
+ raise NotImplementedError("Not Implemented")
203
+
204
+ def result_path(self) -> str:
205
+ return os.path.join(self.config.project_target_path, RUN_RESULTS_FILE_NAME)
206
+
207
+ def get_runner(self, node) -> BaseRunner:
208
+ adapter = get_adapter(self.config)
209
+ run_count: int = 0
210
+ num_nodes: int = 0
211
+
212
+ if node.is_ephemeral_model:
213
+ run_count = 0
214
+ num_nodes = 0
215
+ else:
216
+ self.run_count += 1
217
+ run_count = self.run_count
218
+ num_nodes = self.num_nodes
219
+
220
+ cls = self.get_runner_type(node)
221
+
222
+ if cls is None:
223
+ raise DbtInternalError("Could not find runner type for node.")
224
+
225
+ return cls(self.config, adapter, node, run_count, num_nodes)
226
+
227
+ def call_runner(self, runner: BaseRunner) -> RunResult:
228
+ with log_contextvars(node_info=runner.node.node_info):
229
+ runner.node.update_event_status(
230
+ started_at=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
231
+ node_status=RunningStatus.Started,
232
+ )
233
+ fire_event(
234
+ NodeStart(
235
+ node_info=runner.node.node_info,
236
+ )
237
+ )
238
+
239
+ result = None
240
+ thread_exception: Optional[Union[KeyboardInterrupt, SystemExit, Exception]] = None
241
+ try:
242
+ result = runner.run_with_hooks(self.manifest)
243
+ except (KeyboardInterrupt, SystemExit) as exe:
244
+ result = None
245
+ thread_exception = exe
246
+ raise
247
+ except Exception as e:
248
+ result = None
249
+ thread_exception = e
250
+ finally:
251
+ if result is not None:
252
+ fire_event(
253
+ NodeFinished(
254
+ node_info=runner.node.node_info,
255
+ run_result=result.to_msg_dict(),
256
+ )
257
+ )
258
+ else:
259
+ msg = f"Exception on worker thread. {thread_exception}"
260
+
261
+ fire_event(
262
+ GenericExceptionOnRun(
263
+ unique_id=runner.node.unique_id,
264
+ exc=str(thread_exception),
265
+ node_info=runner.node.node_info,
266
+ )
267
+ )
268
+
269
+ result = RunResult(
270
+ status=RunStatus.Error, # type: ignore
271
+ timing=[],
272
+ thread_id="",
273
+ execution_time=0.0,
274
+ adapter_response={},
275
+ message=msg,
276
+ failures=None,
277
+ batch_results=None,
278
+ node=runner.node,
279
+ )
280
+
281
+ # `_event_status` dict is only used for logging. Make sure
282
+ # it gets deleted when we're done with it
283
+ runner.node.clear_event_status()
284
+
285
+ fail_fast = get_flags().FAIL_FAST
286
+
287
+ if (
288
+ result.status in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.PartialSuccess)
289
+ and fail_fast
290
+ ):
291
+ self._raise_next_tick = FailFastError(
292
+ msg="Failing early due to test failure or runtime error",
293
+ result=result,
294
+ node=getattr(result, "node", None),
295
+ )
296
+ elif result.status == NodeStatus.Error and self.raise_on_first_error():
297
+ # if we raise inside a thread, it'll just get silently swallowed.
298
+ # stash the error message we want here, and it will check the
299
+ # next 'tick' - should be soon since our thread is about to finish!
300
+ self._raise_next_tick = DbtRuntimeError(result.message)
301
+
302
+ return result
303
+
304
+ def _submit(self, pool, args, callback):
305
+ """If the caller has passed the magic 'single-threaded' flag, call the
306
+ function directly instead of pool.apply_async. The single-threaded flag
307
+ is intended for gathering more useful performance information about
308
+ what happens beneath `call_runner`, since python's default profiling
309
+ tools ignore child threads.
310
+
311
+ This does still go through the callback path for result collection.
312
+ """
313
+ if self.config.args.single_threaded:
314
+ callback(self.call_runner(*args))
315
+ else:
316
+ pool.apply_async(self.call_runner, args=args, callback=callback)
317
+
318
+ def _raise_set_error(self):
319
+ if self._raise_next_tick is not None:
320
+ raise self._raise_next_tick
321
+
322
+ def run_queue(self, pool):
323
+ """Given a pool, submit jobs from the queue to the pool."""
324
+ if self.job_queue is None:
325
+ raise DbtInternalError("Got to run_queue with no job queue set")
326
+
327
+ def callback(result):
328
+ """Note: mark_done, at a minimum, must happen here or dbt will
329
+ deadlock during ephemeral result error handling!
330
+ """
331
+ self._handle_result(result)
332
+
333
+ if self.job_queue is None:
334
+ raise DbtInternalError("Got to run_queue callback with no job queue set")
335
+ self.job_queue.mark_done(result.node.unique_id)
336
+
337
+ while not self.job_queue.empty():
338
+ self.handle_job_queue(pool, callback)
339
+
340
+ # block on completion
341
+ if get_flags().FAIL_FAST:
342
+ # checkout for an errors after task completion in case of
343
+ # fast failure
344
+ while self.job_queue.wait_until_something_was_done():
345
+ self._raise_set_error()
346
+ else:
347
+ # wait until every task will be complete
348
+ self.job_queue.join()
349
+
350
+ # if an error got set during join(), raise it.
351
+ self._raise_set_error()
352
+
353
+ return
354
+
355
+ # The build command overrides this
356
+ def handle_job_queue(self, pool, callback):
357
+ node = self.job_queue.get()
358
+ self._raise_set_error()
359
+ runner = self.get_runner(node)
360
+ # we finally know what we're running! Make sure we haven't decided
361
+ # to skip it due to upstream failures
362
+ if runner.node.unique_id in self._skipped_children:
363
+ cause = self._skipped_children.pop(runner.node.unique_id)
364
+ runner.do_skip(cause=cause)
365
+ args = [runner]
366
+ self._submit(pool, args, callback)
367
+
368
+ def _handle_result(self, result: RunResult) -> None:
369
+ """Mark the result as completed, insert the `CompileResultNode` into
370
+ the manifest, and mark any descendants (potentially with a 'cause' if
371
+ the result was an ephemeral model) as skipped.
372
+ """
373
+ is_ephemeral = result.node.is_ephemeral_model
374
+ if not is_ephemeral:
375
+ self.node_results.append(result)
376
+
377
+ node = result.node
378
+
379
+ if self.manifest is None:
380
+ raise DbtInternalError("manifest was None in _handle_result")
381
+
382
+ # If result.status == NodeStatus.Error, plus Fail for build command
383
+ if result.status in self.MARK_DEPENDENT_ERRORS_STATUSES:
384
+ if is_ephemeral:
385
+ cause = result
386
+ else:
387
+ cause = None
388
+ self._mark_dependent_errors(node.unique_id, result, cause)
389
+
390
+ def _cancel_connections(self, pool):
391
+ """Given a pool, cancel all adapter connections and wait until all
392
+ runners gentle terminates.
393
+ """
394
+ pool.close()
395
+ pool.terminate()
396
+
397
+ adapter = get_adapter(self.config)
398
+
399
+ if not adapter.is_cancelable():
400
+ fire_event(QueryCancelationUnsupported(type=adapter.type()))
401
+ else:
402
+ with adapter.connection_named("master"):
403
+ for conn_name in adapter.cancel_open_connections():
404
+ if self.manifest is not None:
405
+ node = self.manifest.nodes.get(conn_name)
406
+ if node is not None and node.is_ephemeral_model:
407
+ continue
408
+ # if we don't have a manifest/don't have a node, print
409
+ # anyway.
410
+ fire_event(LogCancelLine(conn_name=conn_name))
411
+
412
+ pool.join()
413
+
414
+ def execute_nodes(self):
415
+ num_threads = self.config.threads
416
+
417
+ pool = DbtThreadPool(
418
+ num_threads, self._pool_thread_initializer, [get_invocation_context()]
419
+ )
420
+ try:
421
+ self.run_queue(pool)
422
+ except FailFastError as failure:
423
+ self._cancel_connections(pool)
424
+
425
+ executed_node_ids = {r.node.unique_id for r in self.node_results}
426
+ message = "Skipping due to fail_fast"
427
+
428
+ for node in self._flattened_nodes:
429
+ if node.unique_id not in executed_node_ids:
430
+ self.node_results.append(
431
+ mark_node_as_skipped(node, executed_node_ids, message)
432
+ )
433
+
434
+ print_run_result_error(failure.result)
435
+ # ensure information about all nodes is propagated to run results when failing fast
436
+ return self.node_results
437
+ except (KeyboardInterrupt, SystemExit):
438
+ run_result = self.get_result(
439
+ results=self.node_results,
440
+ elapsed_time=time.time() - self.started_at,
441
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
442
+ )
443
+
444
+ if self.args.write_json and hasattr(run_result, "write"):
445
+ run_result.write(self.result_path())
446
+ add_artifact_produced(self.result_path())
447
+ fire_event(
448
+ ArtifactWritten(
449
+ artifact_type=run_result.__class__.__name__,
450
+ artifact_path=self.result_path(),
451
+ )
452
+ )
453
+
454
+ self._cancel_connections(pool)
455
+ print_run_end_messages(self.node_results, keyboard_interrupt=True)
456
+
457
+ raise
458
+
459
+ pool.close()
460
+ pool.join()
461
+
462
+ return self.node_results
463
+
464
+ @staticmethod
465
+ def _pool_thread_initializer(invocation_context):
466
+ _INVOCATION_CONTEXT_VAR.set(invocation_context)
467
+
468
+ def _mark_dependent_errors(
469
+ self, node_id: str, result: RunResult, cause: Optional[RunResult]
470
+ ) -> None:
471
+ if self.graph is None:
472
+ raise DbtInternalError("graph is None in _mark_dependent_errors")
473
+ fire_event(
474
+ MarkSkippedChildren(
475
+ unique_id=node_id,
476
+ status=result.status,
477
+ run_result=result.to_msg_dict(),
478
+ )
479
+ )
480
+ for dep_node_id in self.graph.get_dependent_nodes(UniqueId(node_id)):
481
+ self._skipped_children[dep_node_id] = cause
482
+
483
+ def populate_adapter_cache(
484
+ self, adapter, required_schemas: Optional[Set[BaseRelation]] = None
485
+ ):
486
+ if not self.args.populate_cache:
487
+ return
488
+
489
+ if self.manifest is None:
490
+ raise DbtInternalError("manifest was None in populate_adapter_cache")
491
+
492
+ start_populate_cache = time.perf_counter()
493
+ # the cache only cares about executable nodes
494
+ cachable_nodes = [
495
+ node
496
+ for node in self.manifest.nodes.values()
497
+ if (node.is_relational and not node.is_ephemeral_model and not node.is_external_node)
498
+ ]
499
+
500
+ if get_flags().CACHE_SELECTED_ONLY is True:
501
+ adapter.set_relations_cache(cachable_nodes, required_schemas=required_schemas)
502
+ else:
503
+ adapter.set_relations_cache(cachable_nodes)
504
+ cache_populate_time = time.perf_counter() - start_populate_cache
505
+ if dbt.tracking.active_user is not None:
506
+ dbt.tracking.track_runnable_timing(
507
+ {"adapter_cache_construction_elapsed": cache_populate_time}
508
+ )
509
+
510
+ def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
511
+ with adapter.connection_named("master"):
512
+ self.defer_to_manifest()
513
+ self.populate_adapter_cache(adapter)
514
+ return RunStatus.Success
515
+
516
+ def after_run(self, adapter, results) -> None:
517
+ pass
518
+
519
+ def print_results_line(self, node_results, elapsed):
520
+ pass
521
+
522
+ def execute_with_hooks(self, selected_uids: AbstractSet[str]):
523
+ adapter = get_adapter(self.config)
524
+
525
+ fire_event(Formatting(""))
526
+ fire_event(
527
+ ConcurrencyLine(
528
+ num_threads=self.config.threads,
529
+ target_name=self.config.target_name,
530
+ node_count=self.num_nodes,
531
+ )
532
+ )
533
+ fire_event(Formatting(""))
534
+
535
+ self.started_at = time.time()
536
+ try:
537
+ before_run_status = self.before_run(adapter, selected_uids)
538
+ if before_run_status == RunStatus.Success or (
539
+ not get_flags().skip_nodes_if_on_run_start_fails
540
+ ):
541
+ res = self.execute_nodes()
542
+ else:
543
+ executed_node_ids = {
544
+ r.node.unique_id for r in self.node_results if hasattr(r, "node")
545
+ }
546
+
547
+ res = []
548
+
549
+ for index, node in enumerate(self._flattened_nodes or []):
550
+ group = group_lookup.get(node.unique_id)
551
+
552
+ if node.unique_id not in executed_node_ids:
553
+ fire_event(
554
+ SkippingDetails(
555
+ resource_type=node.resource_type,
556
+ schema=node.schema,
557
+ node_name=node.name,
558
+ index=index + 1,
559
+ total=self.num_nodes,
560
+ node_info=node.node_info,
561
+ group=group,
562
+ )
563
+ )
564
+ skipped_node_result = mark_node_as_skipped(node, executed_node_ids, None)
565
+ if skipped_node_result:
566
+ self.node_results.append(skipped_node_result)
567
+
568
+ self.after_run(adapter, res)
569
+ finally:
570
+ adapter.cleanup_connections()
571
+ elapsed = time.time() - self.started_at
572
+ self.print_results_line(self.node_results, elapsed)
573
+ result = self.get_result(
574
+ results=self.node_results,
575
+ elapsed_time=elapsed,
576
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
577
+ )
578
+
579
+ return result
580
+
581
+ def run(self):
582
+ """
583
+ Run dbt for the query, based on the graph.
584
+ """
585
+ # We set up a context manager here with "task_contextvars" because we
586
+ # need the project_root in runtime_initialize.
587
+ with task_contextvars(project_root=self.config.project_root):
588
+ self._runtime_initialize()
589
+
590
+ if self._flattened_nodes is None:
591
+ raise DbtInternalError(
592
+ "after _runtime_initialize, _flattened_nodes was still None"
593
+ )
594
+
595
+ if len(self._flattened_nodes) == 0:
596
+ warn_or_error(NothingToDo())
597
+ result = self.get_result(
598
+ results=[],
599
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
600
+ elapsed_time=0.0,
601
+ )
602
+ else:
603
+ selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
604
+ result = self.execute_with_hooks(selected_uids)
605
+
606
+ # We have other result types here too, including FreshnessResult
607
+ if isinstance(result, RunExecutionResult):
608
+ result_msgs = [result.to_msg_dict() for result in result.results]
609
+ fire_event(
610
+ EndRunResult(
611
+ results=result_msgs,
612
+ generated_at=result.generated_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
613
+ elapsed_time=result.elapsed_time,
614
+ success=GraphRunnableTask.interpret_results(result.results),
615
+ )
616
+ )
617
+
618
+ if self.args.write_json:
619
+ write_manifest(self.manifest, self.config.project_target_path)
620
+ if hasattr(result, "write"):
621
+ result.write(self.result_path())
622
+ add_artifact_produced(self.result_path())
623
+ fire_event(
624
+ ArtifactWritten(
625
+ artifact_type=result.__class__.__name__, artifact_path=self.result_path()
626
+ )
627
+ )
628
+
629
+ self.task_end_messages(result.results)
630
+ return result
631
+
632
+ @classmethod
633
+ def interpret_results(cls, results):
634
+ if results is None:
635
+ return False
636
+
637
+ num_runtime_errors = len([r for r in results if r.status == NodeStatus.RuntimeErr])
638
+ num_errors = len([r for r in results if r.status == NodeStatus.Error])
639
+ num_fails = len([r for r in results if r.status == NodeStatus.Fail])
640
+ num_skipped = len(
641
+ [
642
+ r
643
+ for r in results
644
+ if r.status == NodeStatus.Skipped and not isinstance(r.node, Exposure)
645
+ ]
646
+ )
647
+ num_partial_success = len([r for r in results if r.status == NodeStatus.PartialSuccess])
648
+ num_total = num_runtime_errors + num_errors + num_fails + num_skipped + num_partial_success
649
+ return num_total == 0
650
+
651
+ def get_model_schemas(self, adapter, selected_uids: Iterable[str]) -> Set[BaseRelation]:
652
+ if self.manifest is None:
653
+ raise DbtInternalError("manifest was None in get_model_schemas")
654
+ result: Set[BaseRelation] = set()
655
+
656
+ for node in self.manifest.nodes.values():
657
+ if node.unique_id not in selected_uids:
658
+ continue
659
+ if node.is_relational and not node.is_ephemeral:
660
+ relation = adapter.Relation.create_from(self.config, node)
661
+ result.add(relation.without_identifier())
662
+
663
+ return result
664
+
665
+ def create_schemas(self, adapter, required_schemas: Set[BaseRelation]):
666
+ # we want the string form of the information schema database
667
+ required_databases: Set[BaseRelation] = set()
668
+ for required in required_schemas:
669
+ db_only = required.include(database=True, schema=False, identifier=False)
670
+ required_databases.add(db_only)
671
+
672
+ existing_schemas_lowered: Set[Tuple[Optional[str], Optional[str]]]
673
+ existing_schemas_lowered = set()
674
+
675
+ def list_schemas(db_only: BaseRelation) -> List[Tuple[Optional[str], str]]:
676
+ # the database can be None on some warehouses that don't support it
677
+ database_quoted: Optional[str]
678
+ db_lowercase = dbt_common.utils.formatting.lowercase(db_only.database)
679
+ if db_only.database is None:
680
+ database_quoted = None
681
+ else:
682
+ database_quoted = str(db_only)
683
+
684
+ # we should never create a null schema, so just filter them out
685
+ return [
686
+ (db_lowercase, s.lower())
687
+ for s in adapter.list_schemas(database_quoted)
688
+ if s is not None
689
+ ]
690
+
691
+ def create_schema(relation: BaseRelation) -> None:
692
+ db = relation.database or ""
693
+ schema = relation.schema
694
+ with adapter.connection_named(f"create_{db}_{schema}"):
695
+ adapter.create_schema(relation)
696
+
697
+ list_futures = []
698
+ create_futures = []
699
+
700
+ # TODO: following has a mypy issue because profile and project config
701
+ # defines threads as int and HasThreadingConfig defines it as Optional[int]
702
+ with dbt_common.utils.executor(self.config) as tpe: # type: ignore
703
+ for req in required_databases:
704
+ if req.database is None:
705
+ name = "list_schemas"
706
+ else:
707
+ name = f"list_{req.database}"
708
+ fut = tpe.submit_connected(adapter, name, list_schemas, req)
709
+ list_futures.append(fut)
710
+
711
+ for ls_future in as_completed(list_futures):
712
+ existing_schemas_lowered.update(ls_future.result())
713
+
714
+ for info in required_schemas:
715
+ if info.schema is None:
716
+ # we are not in the business of creating null schemas, so
717
+ # skip this
718
+ continue
719
+ db: Optional[str] = info.database
720
+ db_lower: Optional[str] = dbt_common.utils.formatting.lowercase(db)
721
+ schema: str = info.schema
722
+
723
+ db_schema = (db_lower, schema.lower())
724
+ if db_schema not in existing_schemas_lowered:
725
+ existing_schemas_lowered.add(db_schema)
726
+ fut = tpe.submit_connected(
727
+ adapter, f'create_{info.database or ""}_{info.schema}', create_schema, info
728
+ )
729
+ create_futures.append(fut)
730
+
731
+ for create_future in as_completed(create_futures):
732
+ # trigger/re-raise any exceptions while creating schemas
733
+ create_future.result()
734
+
735
+ def get_result(self, results, elapsed_time, generated_at):
736
+ return RunExecutionResult(
737
+ results=results,
738
+ elapsed_time=elapsed_time,
739
+ generated_at=generated_at,
740
+ args=dbt.utils.args_to_dict(self.args),
741
+ )
742
+
743
+ def task_end_messages(self, results) -> None:
744
+ print_run_end_messages(results)
745
+
746
+ def _get_previous_state(self) -> Optional[Manifest]:
747
+ state = self.previous_defer_state or self.previous_state
748
+ if not state:
749
+ raise DbtRuntimeError(
750
+ "--state or --defer-state are required for deferral, but neither was provided"
751
+ )
752
+
753
+ if not state.manifest:
754
+ raise DbtRuntimeError(f'Could not find manifest in --state path: "{state.state_path}"')
755
+ return state.manifest
756
+
757
+ def _get_deferred_manifest(self) -> Optional[Manifest]:
758
+ return self._get_previous_state() if self.args.defer else None