dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
dbt/plugins/manager.py ADDED
@@ -0,0 +1,163 @@
1
+ import functools
2
+ import importlib
3
+ import pkgutil
4
+ from types import ModuleType
5
+ from typing import Callable, Dict, List, Mapping
6
+
7
+ import dbt.tracking
8
+ from dbt.contracts.graph.manifest import Manifest
9
+ from dbt.plugins.contracts import PluginArtifacts
10
+ from dbt.plugins.manifest import PluginNodes
11
+ from dbt_common.exceptions import DbtRuntimeError
12
+ from dbt_common.tests import test_caching_enabled
13
+
14
+
15
+ def dbt_hook(func):
16
+ def inner(*args, **kwargs):
17
+ try:
18
+ return func(*args, **kwargs)
19
+ except Exception as e:
20
+ raise DbtRuntimeError(f"{func.__name__}: {e}")
21
+
22
+ setattr(inner, "is_dbt_hook", True)
23
+ return inner
24
+
25
+
26
+ class dbtPlugin:
27
+ """
28
+ EXPERIMENTAL: dbtPlugin is the base class for creating plugins.
29
+ Its interface is **not** stable and will likely change between dbt-core versions.
30
+ """
31
+
32
+ def __init__(self, project_name: str) -> None:
33
+ self.project_name = project_name
34
+ try:
35
+ self.initialize()
36
+ except DbtRuntimeError as e:
37
+ # Remove the first line of DbtRuntimeError to avoid redundant "Runtime Error" line
38
+ raise DbtRuntimeError("\n".join(str(e).split("\n")[1:]))
39
+ except Exception as e:
40
+ raise DbtRuntimeError(str(e))
41
+
42
+ @property
43
+ def name(self) -> str:
44
+ return self.__class__.__name__
45
+
46
+ def initialize(self) -> None:
47
+ """
48
+ Initialize the plugin. This function may be overridden by subclasses that have
49
+ additional initialization steps.
50
+ """
51
+ pass
52
+
53
+ def get_nodes(self) -> PluginNodes:
54
+ """
55
+ Provide PluginNodes to dbt for injection into dbt's DAG.
56
+ Currently the only node types that are accepted are model nodes.
57
+ """
58
+ raise NotImplementedError(f"get_nodes hook not implemented for {self.name}")
59
+
60
+ def get_manifest_artifacts(self, manifest: Manifest) -> PluginArtifacts:
61
+ """
62
+ Given a manifest, provide PluginArtifacts derived for writing by core.
63
+ PluginArtifacts share the same lifecycle as the manifest.json file -- they
64
+ will either be written or not depending on whether the manifest is written.
65
+ """
66
+ raise NotImplementedError(f"get_manifest_artifacts hook not implemented for {self.name}")
67
+
68
+
69
+ @functools.lru_cache(maxsize=None)
70
+ def _get_dbt_modules() -> Mapping[str, ModuleType]:
71
+ # This is an expensive function, especially in the context of testing, when
72
+ # it is called repeatedly, so we break it out and cache the result globally.
73
+ return {
74
+ name: importlib.import_module(name)
75
+ for _, name, _ in pkgutil.iter_modules()
76
+ if name.startswith(PluginManager.PLUGIN_MODULE_PREFIX)
77
+ }
78
+
79
+
80
+ _MODULES_CACHE = None
81
+
82
+
83
+ class PluginManager:
84
+ PLUGIN_MODULE_PREFIX = "dbt_"
85
+ PLUGIN_ATTR_NAME = "plugins"
86
+
87
+ def __init__(self, plugins: List[dbtPlugin]) -> None:
88
+ self._plugins = plugins
89
+ self._valid_hook_names = set()
90
+ # default hook implementations from dbtPlugin
91
+ for hook_name in dir(dbtPlugin):
92
+ if not hook_name.startswith("_"):
93
+ self._valid_hook_names.add(hook_name)
94
+
95
+ self.hooks: Dict[str, List[Callable]] = {}
96
+ for plugin in self._plugins:
97
+ for hook_name in dir(plugin):
98
+ hook = getattr(plugin, hook_name)
99
+ if (
100
+ callable(hook)
101
+ and hasattr(hook, "is_dbt_hook")
102
+ and hook_name in self._valid_hook_names
103
+ ):
104
+ if hook_name in self.hooks:
105
+ self.hooks[hook_name].append(hook)
106
+ else:
107
+ self.hooks[hook_name] = [hook]
108
+
109
+ @classmethod
110
+ def from_modules(cls, project_name: str) -> "PluginManager":
111
+
112
+ if test_caching_enabled():
113
+ global _MODULES_CACHE
114
+ if _MODULES_CACHE is None:
115
+ discovered_dbt_modules = cls.get_prefixed_modules()
116
+ _MODULES_CACHE = discovered_dbt_modules
117
+ else:
118
+ discovered_dbt_modules = _MODULES_CACHE
119
+ else:
120
+ discovered_dbt_modules = cls.get_prefixed_modules()
121
+
122
+ plugins = []
123
+ for name, module in discovered_dbt_modules.items():
124
+ if hasattr(module, cls.PLUGIN_ATTR_NAME):
125
+ available_plugins = getattr(module, cls.PLUGIN_ATTR_NAME, [])
126
+ for plugin_cls in available_plugins:
127
+ assert issubclass(
128
+ plugin_cls, dbtPlugin
129
+ ), f"'plugin' in {name} must be subclass of dbtPlugin"
130
+ plugin = plugin_cls(project_name=project_name)
131
+ plugins.append(plugin)
132
+ return cls(plugins=plugins)
133
+
134
+ @classmethod
135
+ def get_prefixed_modules(cls):
136
+ return {
137
+ name: importlib.import_module(name)
138
+ for _, name, _ in pkgutil.iter_modules()
139
+ if name.startswith(cls.PLUGIN_MODULE_PREFIX)
140
+ }
141
+
142
+ def get_manifest_artifacts(self, manifest: Manifest) -> PluginArtifacts:
143
+ all_plugin_artifacts = {}
144
+ for hook_method in self.hooks.get("get_manifest_artifacts", []):
145
+ plugin_artifacts = hook_method(manifest)
146
+ all_plugin_artifacts.update(plugin_artifacts)
147
+ return all_plugin_artifacts
148
+
149
+ def get_nodes(self) -> PluginNodes:
150
+ all_plugin_nodes = PluginNodes()
151
+ for hook_method in self.hooks.get("get_nodes", []):
152
+ plugin_nodes = hook_method()
153
+ dbt.tracking.track_plugin_get_nodes(
154
+ {
155
+ "plugin_name": hook_method.__self__.name, # type: ignore
156
+ "num_model_nodes": len(plugin_nodes.models),
157
+ "num_model_packages": len(
158
+ {model.package_name for model in plugin_nodes.models.values()}
159
+ ),
160
+ }
161
+ )
162
+ all_plugin_nodes.update(plugin_nodes)
163
+ return all_plugin_nodes
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict
3
+
4
+ from dbt.artifacts.resources import NodeVersion # noqa
5
+
6
+ # all these are just exports, they need "noqa" so flake8 will not complain.
7
+ from dbt.contracts.graph.manifest import Manifest # noqa
8
+ from dbt.contracts.graph.node_args import ModelNodeArgs
9
+ from dbt.graph.graph import UniqueId # noqa
10
+ from dbt.node_types import AccessType, NodeType # noqa
11
+
12
+
13
+ @dataclass
14
+ class PluginNodes:
15
+ models: Dict[str, ModelNodeArgs] = field(default_factory=dict)
16
+
17
+ def add_model(self, model_args: ModelNodeArgs) -> None:
18
+ self.models[model_args.unique_id] = model_args
19
+
20
+ def update(self, other: "PluginNodes") -> None:
21
+ self.models.update(other.models)
dbt/profiler.py ADDED
@@ -0,0 +1,20 @@
1
+ from contextlib import contextmanager
2
+ from cProfile import Profile
3
+ from pstats import Stats
4
+ from typing import Any, Generator
5
+
6
+
7
+ @contextmanager
8
+ def profiler(enable: bool, outfile: str) -> Generator[Any, None, None]:
9
+ try:
10
+ if enable:
11
+ profiler = Profile()
12
+ profiler.enable()
13
+
14
+ yield
15
+ finally:
16
+ if enable:
17
+ profiler.disable()
18
+ stats = Stats(profiler)
19
+ stats.sort_stats("tottime")
20
+ stats.dump_stats(str(outfile))
dbt/py.typed ADDED
@@ -0,0 +1 @@
1
+ # dummy file, our types are defined inline
Binary file
dbt/query_analyzer.py ADDED
@@ -0,0 +1,410 @@
1
+ """
2
+ Query Analyzer for Execution Routing
3
+
4
+ This module analyzes compiled SQL queries to determine the optimal execution strategy:
5
+ - Pushdown: All sources from same connection → execute on source database
6
+ - Federated: Sources from multiple connections → use compute layer
7
+
8
+ The analyzer respects user configuration overrides while providing intelligent defaults.
9
+ """
10
+
11
+ import re
12
+ from dataclasses import dataclass
13
+ from typing import Dict, List, Optional, Set
14
+ from dbt.contracts.graph.manifest import Manifest
15
+ from dbt.contracts.graph.nodes import ManifestNode, SourceDefinition
16
+ from dbt_common.exceptions import DbtRuntimeError
17
+
18
+
19
+ @dataclass
20
+ class QueryAnalysisResult:
21
+ """
22
+ Result of query analysis.
23
+
24
+ Contains information about source connections and recommended execution strategy.
25
+ """
26
+
27
+ # Set of connection names used by this query
28
+ source_connections: Set[str]
29
+
30
+ # Set of source unique_ids referenced
31
+ source_refs: Set[str]
32
+
33
+ # Recommended execution strategy
34
+ strategy: str # "pushdown" or "federated"
35
+
36
+ # Primary connection (for pushdown) or None (for federated)
37
+ primary_connection: Optional[str]
38
+
39
+ # Reason for the chosen strategy
40
+ reason: str
41
+
42
+ # User override applied (if any)
43
+ user_override: Optional[str] = None
44
+
45
+ @property
46
+ def is_pushdown(self) -> bool:
47
+ """Check if pushdown strategy is recommended."""
48
+ return self.strategy == "pushdown"
49
+
50
+ @property
51
+ def is_federated(self) -> bool:
52
+ """Check if federated strategy is recommended."""
53
+ return self.strategy == "federated"
54
+
55
+ @property
56
+ def is_homogeneous(self) -> bool:
57
+ """Check if all sources are from the same connection."""
58
+ return len(self.source_connections) <= 1
59
+
60
+
61
+ class QueryAnalyzer:
62
+ """
63
+ Analyzes compiled SQL queries to determine execution strategy.
64
+
65
+ The analyzer:
66
+ 1. Identifies all source() and ref() references in the query
67
+ 2. Determines which connections are involved
68
+ 3. Recommends pushdown (homogeneous) or federated (heterogeneous) execution
69
+ 4. Respects user configuration overrides
70
+ """
71
+
72
+ def __init__(self, manifest: Manifest):
73
+ """
74
+ Initialize query analyzer.
75
+
76
+ :param manifest: The parsed dbt manifest with all nodes and sources
77
+ """
78
+ self.manifest = manifest
79
+ # Cache for source connection mappings
80
+ self._source_connection_cache: Dict[str, str] = {}
81
+
82
+ def analyze(
83
+ self,
84
+ node: ManifestNode,
85
+ user_compute_override: Optional[str] = None,
86
+ target_connection: Optional[str] = None
87
+ ) -> QueryAnalysisResult:
88
+ """
89
+ Analyze a compiled node to determine execution strategy.
90
+
91
+ :param node: The compiled ManifestNode to analyze
92
+ :param user_compute_override: User's compute config (if specified)
93
+ :param target_connection: Target connection for materialization (if different from source)
94
+ :returns: QueryAnalysisResult with execution strategy
95
+ """
96
+ # Skip analysis for seeds - they don't have sources and accessing
97
+ # node.sources property on SeedNode raises an error
98
+ if node.resource_type == "seed":
99
+ return QueryAnalysisResult(
100
+ source_connections=set(),
101
+ source_refs=set(),
102
+ strategy="pushdown",
103
+ primary_connection=None,
104
+ reason="Seed node - no source analysis needed",
105
+ user_override=None
106
+ )
107
+
108
+ # Extract source and ref dependencies
109
+ source_refs = self._extract_source_references(node)
110
+
111
+ # Map sources to their connections
112
+ source_connections = self._map_sources_to_connections(source_refs)
113
+
114
+ # DVT v0.51.6: Determine strategy based on DVT Rules
115
+ #
116
+ # Rule 1 (Primary Directive): Pushdown whenever model and ALL inputs are in same Target
117
+ # Rule 1.1: Federation ONLY when model requires inputs from a Target different than its own
118
+ # Rule 1.5: Compute engine settings are IGNORED for Pushdown-eligible models
119
+ # Rule 2.2: CLI --target forces all models to that target; sources NOT in target trigger Federation
120
+ #
121
+ # Decision Logic:
122
+ # 1. If no direct sources → Pushdown (refs resolve to target where upstream is materialized)
123
+ # 2. If all sources are in target_connection → Pushdown
124
+ # 3. If any source is NOT in target_connection → Federation
125
+ # 4. user_compute_override does NOT change this decision (Rule 1.5)
126
+
127
+ if len(source_connections) == 0:
128
+ # No direct source() dependencies
129
+ # Models with only refs → Pushdown to target (refs are already materialized there)
130
+ strategy = "pushdown"
131
+ has_refs = hasattr(node, 'depends_on') and node.depends_on and node.depends_on.nodes
132
+ if has_refs and target_connection:
133
+ reason = f"Model refs materialized tables - pushdown to target '{target_connection}'"
134
+ primary_connection = target_connection
135
+ else:
136
+ reason = "No source dependencies - pushdown to default target"
137
+ primary_connection = target_connection
138
+ # Rule 1.5: Compute override is noted but doesn't change strategy
139
+ user_override = user_compute_override if user_compute_override else None
140
+
141
+ elif len(source_connections) == 1:
142
+ single_source_conn = list(source_connections)[0]
143
+
144
+ # Rule 2.2: Check if source is in the target connection
145
+ if target_connection and single_source_conn != target_connection:
146
+ # Source NOT in target → Federation required (Rule 1.1)
147
+ strategy = "federated"
148
+ reason = f"Cross-target: source '{single_source_conn}' != target '{target_connection}' - federation required"
149
+ primary_connection = None
150
+ user_override = user_compute_override
151
+ else:
152
+ # Source IS in target (or no target override) → Pushdown (Rule 1)
153
+ strategy = "pushdown"
154
+ primary_connection = single_source_conn
155
+ reason = f"All sources in target '{primary_connection}' - pushdown"
156
+ # Rule 1.5: Compute override ignored for pushdown-eligible models
157
+ user_override = None
158
+
159
+ else:
160
+ # Multiple source connections → Always requires Federation
161
+ strategy = "federated"
162
+ reason = f"Sources span {len(source_connections)} connections: {sorted(source_connections)} - federation required"
163
+ primary_connection = None
164
+ user_override = user_compute_override
165
+
166
+ return QueryAnalysisResult(
167
+ source_connections=source_connections,
168
+ source_refs=source_refs,
169
+ strategy=strategy,
170
+ primary_connection=primary_connection,
171
+ reason=reason,
172
+ user_override=user_override
173
+ )
174
+
175
+ def _extract_source_references(self, node: ManifestNode) -> Set[str]:
176
+ """
177
+ Extract all source unique_ids DIRECTLY referenced by this node.
178
+
179
+ DVT v0.51.5: Only direct source() references are considered.
180
+ ref() dependencies are NOT traced to their underlying sources because:
181
+ - ref() resolves to the target database where upstream models are materialized
182
+ - If model_a reads from postgres and is materialized to databricks,
183
+ then model_b which refs model_a should use databricks (pushdown),
184
+ NOT trace back to postgres (which would incorrectly force federation)
185
+
186
+ :param node: The node to analyze
187
+ :returns: Set of source unique_ids (direct references only)
188
+ """
189
+ source_refs = set()
190
+
191
+ # Direct source dependencies ONLY
192
+ # ref() dependencies resolve to target database, not original sources
193
+ if hasattr(node, 'sources') and node.sources:
194
+ # node.sources can be a list or set - convert to list for safety
195
+ sources = node.sources if isinstance(node.sources, (list, tuple, set)) else []
196
+ for source in sources:
197
+ # source can be either:
198
+ # 1. A string (full unique_id): "source.package.source_name.table_name"
199
+ # 2. A list/tuple: ["source_name", "table_name"]
200
+ if isinstance(source, str):
201
+ # Full unique_id - use as-is
202
+ source_refs.add(source)
203
+ elif isinstance(source, (list, tuple)) and len(source) == 2:
204
+ # Tuple format: ["source_name", "table_name"]
205
+ # Need to construct full unique_id: "source.{package}.{source_name}.{table_name}"
206
+ source_name, table_name = source
207
+ # Build unique_id using node's package_name
208
+ package_name = node.package_name if hasattr(node, 'package_name') else self.root_project.project_name
209
+ unique_id = f"source.{package_name}.{source_name}.{table_name}"
210
+ source_refs.add(unique_id)
211
+
212
+ # DVT v0.51.5: DO NOT trace ref() dependencies to their underlying sources
213
+ # ref() resolves to target database, where upstream models are already materialized
214
+ # Only direct source() references determine federation vs pushdown
215
+
216
+ return source_refs
217
+
218
+ def _trace_node_to_sources(self, node_id: str, visited: Optional[Set[str]] = None) -> Set[str]:
219
+ """
220
+ Recursively trace a node's dependencies to find all underlying sources.
221
+
222
+ :param node_id: Unique ID of the node to trace
223
+ :param visited: Set of already-visited nodes (for cycle detection)
224
+ :returns: Set of source unique_ids
225
+ """
226
+ if visited is None:
227
+ visited = set()
228
+
229
+ if node_id in visited:
230
+ return set()
231
+
232
+ visited.add(node_id)
233
+ sources = set()
234
+
235
+ # Check if this is a source
236
+ if node_id.startswith('source.'):
237
+ sources.add(node_id)
238
+ return sources
239
+
240
+ # Get the node from manifest
241
+ node = self.manifest.nodes.get(node_id)
242
+ if not node:
243
+ # Node not found (could be disabled or external)
244
+ return sources
245
+
246
+ # Skip seeds - they don't have source dependencies and accessing
247
+ # node.sources property raises an error
248
+ if hasattr(node, 'resource_type') and node.resource_type == 'seed':
249
+ # Seeds have no upstream sources to trace
250
+ return sources
251
+
252
+ # Add direct source dependencies
253
+ if hasattr(node, 'sources') and node.sources:
254
+ # Handle both list and set types safely
255
+ node_sources = node.sources if isinstance(node.sources, (list, tuple, set)) else []
256
+ for source in node_sources:
257
+ if isinstance(source, str):
258
+ sources.add(source)
259
+
260
+ # Recursively trace node dependencies
261
+ if hasattr(node, 'depends_on') and node.depends_on:
262
+ nodes = node.depends_on.nodes if isinstance(node.depends_on.nodes, (list, tuple, set)) else []
263
+ for dep_id in nodes:
264
+ if isinstance(dep_id, str):
265
+ dep_sources = self._trace_node_to_sources(dep_id, visited)
266
+ sources.update(dep_sources)
267
+
268
+ return sources
269
+
270
+ def _map_sources_to_connections(self, source_refs: Set[str]) -> Set[str]:
271
+ """
272
+ Map source unique_ids to their connection names.
273
+
274
+ :param source_refs: Set of source unique_ids
275
+ :returns: Set of connection names
276
+ """
277
+ connections = set()
278
+
279
+ for source_id in source_refs:
280
+ connection = self._get_source_connection(source_id)
281
+ if connection:
282
+ connections.add(connection)
283
+
284
+ return connections
285
+
286
+ def _get_source_connection(self, source_id: str) -> Optional[str]:
287
+ """
288
+ Get the connection name for a source.
289
+
290
+ Uses caching for performance. Checks multiple locations for backward
291
+ compatibility with different source definition styles:
292
+ 1. source.connection (direct attribute)
293
+ 2. source.meta.connection (meta dict)
294
+ 3. source.source_meta.connection (source_meta dict)
295
+
296
+ :param source_id: Source unique_id
297
+ :returns: Connection name or None if not specified
298
+ """
299
+ # Check cache
300
+ if source_id in self._source_connection_cache:
301
+ return self._source_connection_cache[source_id]
302
+
303
+ # Look up source in manifest
304
+ source = self.manifest.sources.get(source_id)
305
+ if not source:
306
+ return None
307
+
308
+ # Get connection - check multiple locations for backward compatibility
309
+ connection = None
310
+
311
+ # 1. Direct connection attribute (preferred)
312
+ if hasattr(source, 'connection') and source.connection:
313
+ connection = source.connection
314
+
315
+ # 2. meta.connection (fallback)
316
+ elif hasattr(source, 'meta') and isinstance(source.meta, dict):
317
+ connection = source.meta.get('connection')
318
+
319
+ # 3. source_meta.connection (legacy fallback)
320
+ elif hasattr(source, 'source_meta') and isinstance(source.source_meta, dict):
321
+ connection = source.source_meta.get('connection')
322
+
323
+ # Cache and return
324
+ self._source_connection_cache[source_id] = connection
325
+ return connection
326
+
327
+ def get_execution_summary(self, node: ManifestNode) -> str:
328
+ """
329
+ Get a human-readable summary of the execution strategy for a node.
330
+
331
+ Useful for logging and debugging.
332
+
333
+ :param node: The node to analyze
334
+ :returns: Summary string
335
+ """
336
+ result = self.analyze(node)
337
+
338
+ summary_parts = [
339
+ f"Node: {node.unique_id}",
340
+ f"Strategy: {result.strategy.upper()}",
341
+ f"Reason: {result.reason}",
342
+ f"Source Connections: {sorted(result.source_connections) if result.source_connections else 'None'}",
343
+ f"Source Count: {len(result.source_refs)}",
344
+ ]
345
+
346
+ if result.primary_connection:
347
+ summary_parts.append(f"Execution Connection: {result.primary_connection}")
348
+
349
+ if result.user_override:
350
+ summary_parts.append(f"User Override: {result.user_override}")
351
+
352
+ return "\n".join(summary_parts)
353
+
354
+ def analyze_batch(
355
+ self,
356
+ nodes: List[ManifestNode]
357
+ ) -> Dict[str, QueryAnalysisResult]:
358
+ """
359
+ Analyze multiple nodes in batch.
360
+
361
+ More efficient than analyzing one at a time due to caching.
362
+
363
+ :param nodes: List of nodes to analyze
364
+ :returns: Dict mapping node unique_id to QueryAnalysisResult
365
+ """
366
+ results = {}
367
+
368
+ for node in nodes:
369
+ result = self.analyze(node)
370
+ results[node.unique_id] = result
371
+
372
+ return results
373
+
374
+ def get_federated_nodes(
375
+ self,
376
+ nodes: List[ManifestNode]
377
+ ) -> List[ManifestNode]:
378
+ """
379
+ Filter nodes that require federated execution.
380
+
381
+ :param nodes: List of nodes to filter
382
+ :returns: List of nodes requiring federated execution
383
+ """
384
+ federated = []
385
+
386
+ for node in nodes:
387
+ result = self.analyze(node)
388
+ if result.is_federated:
389
+ federated.append(node)
390
+
391
+ return federated
392
+
393
+ def get_pushdown_nodes(
394
+ self,
395
+ nodes: List[ManifestNode]
396
+ ) -> List[ManifestNode]:
397
+ """
398
+ Filter nodes eligible for pushdown execution.
399
+
400
+ :param nodes: List of nodes to filter
401
+ :returns: List of nodes eligible for pushdown
402
+ """
403
+ pushdown = []
404
+
405
+ for node in nodes:
406
+ result = self.analyze(node)
407
+ if result.is_pushdown:
408
+ pushdown.append(node)
409
+
410
+ return pushdown
@@ -0,0 +1,2 @@
1
+ from .exposure_runner import ExposureRunner
2
+ from .saved_query_runner import SavedQueryRunner
@@ -0,0 +1,7 @@
1
+ from dbt.runners.no_op_runner import NoOpRunner
2
+
3
+
4
+ class ExposureRunner(NoOpRunner):
5
+ @property
6
+ def description(self) -> str:
7
+ return f"exposure {self.node.name}"
@@ -0,0 +1,45 @@
1
+ import threading
2
+
3
+ from dbt.artifacts.schemas.results import RunStatus
4
+ from dbt.artifacts.schemas.run import RunResult
5
+ from dbt.contracts.graph.manifest import Manifest
6
+ from dbt.events.types import LogNodeNoOpResult
7
+ from dbt.task.base import BaseRunner
8
+ from dbt_common.events.functions import fire_event
9
+
10
+
11
+ class NoOpRunner(BaseRunner):
12
+ @property
13
+ def description(self) -> str:
14
+ raise NotImplementedError("description not implemented")
15
+
16
+ def before_execute(self) -> None:
17
+ pass
18
+
19
+ def compile(self, manifest: Manifest):
20
+ return self.node
21
+
22
+ def after_execute(self, result) -> None:
23
+ fire_event(
24
+ LogNodeNoOpResult(
25
+ description=self.description,
26
+ index=self.node_index,
27
+ total=self.num_nodes,
28
+ node_info=self.node.node_info,
29
+ )
30
+ )
31
+
32
+ def execute(self, compiled_node, manifest):
33
+ # no-op
34
+ return RunResult(
35
+ node=compiled_node,
36
+ status=RunStatus.NoOp,
37
+ timing=[],
38
+ thread_id=threading.current_thread().name,
39
+ execution_time=0,
40
+ message="NO-OP",
41
+ adapter_response={},
42
+ failures=0,
43
+ batch_results=None,
44
+ agate_table=None,
45
+ )
@@ -0,0 +1,7 @@
1
+ from dbt.runners.no_op_runner import NoOpRunner
2
+
3
+
4
+ class SavedQueryRunner(NoOpRunner):
5
+ @property
6
+ def description(self) -> str:
7
+ return f"saved query {self.node.name}"