dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/compute/router.py ADDED
@@ -0,0 +1,483 @@
1
+ """
2
+ Execution router for DVT.
3
+
4
+ This module analyzes queries and routes them to the optimal execution engine
5
+ (pushdown vs compute layer).
6
+ """
7
+
8
+ from typing import Dict, List, Optional
9
+
10
+ from dvt.compute.base import (
11
+ BaseComputeEngine,
12
+ ComputeResult,
13
+ ExecutionStrategy,
14
+ QueryExecutionPlan,
15
+ SourceInfo,
16
+ )
17
+ from dvt.config.compute_config import AutoSelectConfig, ComputeConfig
18
+ from dvt.events import fire_event
19
+ from dvt.events.types import Note
20
+
21
+ from dbt.adapters.exceptions import DbtRuntimeError
22
+
23
+
24
+ class ExecutionRouter:
25
+ """
26
+ Routes queries to optimal execution engine.
27
+
28
+ Analyzes query execution plans and selects:
29
+ - Pushdown (execute on source database)
30
+ - DuckDB (lightweight compute layer)
31
+ - Spark (heavy-duty compute layer)
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ compute_config: ComputeConfig,
37
+ available_engines: Dict[str, BaseComputeEngine],
38
+ ):
39
+ """
40
+ Initialize execution router.
41
+
42
+ Args:
43
+ compute_config: Compute configuration
44
+ available_engines: Dictionary of available engines by name
45
+ """
46
+ self.compute_config = compute_config
47
+ self.available_engines = available_engines
48
+ self.auto_select_config = compute_config.auto_select
49
+
50
+ def analyze_query(
51
+ self,
52
+ sql: str,
53
+ sources: List[SourceInfo],
54
+ ) -> QueryExecutionPlan:
55
+ """
56
+ Analyze query and create execution plan.
57
+
58
+ Args:
59
+ sql: SQL query
60
+ sources: List of source information
61
+
62
+ Returns:
63
+ QueryExecutionPlan
64
+ """
65
+ # Calculate metrics
66
+ unique_adapters = {s.adapter_type for s in sources}
67
+ unique_profiles = {s.profile_name for s in sources}
68
+ is_homogeneous = len(unique_adapters) == 1 and len(unique_profiles) == 1
69
+
70
+ # Estimate data size
71
+ total_size_mb = sum(s.estimated_size_mb or 0 for s in sources)
72
+ total_rows = sum(s.estimated_rows or 0 for s in sources)
73
+
74
+ # Create initial plan
75
+ plan = QueryExecutionPlan(
76
+ strategy=ExecutionStrategy.AUTO,
77
+ sources=sources,
78
+ is_homogeneous=is_homogeneous,
79
+ estimated_data_size_mb=total_size_mb,
80
+ estimated_rows=total_rows,
81
+ )
82
+
83
+ # If homogeneous, can potentially pushdown
84
+ if is_homogeneous and sources:
85
+ plan.pushdown_target = sources[0].profile_name
86
+
87
+ return plan
88
+
89
+ def select_strategy(
90
+ self,
91
+ execution_plan: QueryExecutionPlan,
92
+ requested_engine: Optional[str] = None,
93
+ model_node: Optional[Any] = None,
94
+ ) -> QueryExecutionPlan:
95
+ """
96
+ Select execution strategy for query.
97
+
98
+ Args:
99
+ execution_plan: Query execution plan
100
+ requested_engine: User-requested engine (overrides auto-selection)
101
+ model_node: Optional model node with DVT config overrides
102
+
103
+ Returns:
104
+ Updated execution plan with selected strategy
105
+ """
106
+ # Check for model-level config overrides
107
+ model_config = self._extract_model_config(model_node) if model_node else {}
108
+
109
+ # Model config takes precedence over requested_engine parameter
110
+ if model_config.get("compute_engine"):
111
+ requested_engine = model_config["compute_engine"]
112
+ elif model_config.get("pushdown_enabled") is True:
113
+ requested_engine = "pushdown"
114
+ elif model_config.get("pushdown_enabled") is False:
115
+ # Explicitly disable pushdown, force compute layer
116
+ if not requested_engine:
117
+ requested_engine = self.compute_config.default_engine
118
+
119
+ # If user requested specific engine, honor it
120
+ if requested_engine:
121
+ if requested_engine == "pushdown":
122
+ if execution_plan.is_pushdown_possible():
123
+ execution_plan.strategy = ExecutionStrategy.PUSHDOWN
124
+ execution_plan.compute_engine = None
125
+ execution_plan.reason = "User requested pushdown"
126
+ else:
127
+ raise DbtRuntimeError(
128
+ "Pushdown requested but not possible: "
129
+ f"Query references {len(execution_plan.get_unique_profiles())} profiles"
130
+ )
131
+ else:
132
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
133
+ execution_plan.compute_engine = requested_engine
134
+ execution_plan.reason = f"User requested {requested_engine}"
135
+ return execution_plan
136
+
137
+ # Use auto-selection rules
138
+ if self.auto_select_config.enabled:
139
+ return self._apply_auto_select_rules(execution_plan)
140
+ else:
141
+ # Auto-selection disabled, use default engine
142
+ return self._use_default_engine(execution_plan)
143
+
144
+ def _apply_auto_select_rules(self, execution_plan: QueryExecutionPlan) -> QueryExecutionPlan:
145
+ """
146
+ Apply auto-selection rules to choose strategy.
147
+
148
+ Args:
149
+ execution_plan: Execution plan
150
+
151
+ Returns:
152
+ Updated execution plan
153
+ """
154
+ # Rules are already sorted by priority
155
+ for rule in self.auto_select_config.rules:
156
+ if self._evaluate_rule_condition(rule.condition, execution_plan):
157
+ # Rule matches - apply action
158
+ if rule.action == "use_pushdown":
159
+ if execution_plan.is_pushdown_possible():
160
+ execution_plan.strategy = ExecutionStrategy.PUSHDOWN
161
+ execution_plan.compute_engine = None
162
+ execution_plan.reason = (
163
+ f"Auto-select rule '{rule.name}': {rule.description}"
164
+ )
165
+ fire_event(Note(msg=f"Selected PUSHDOWN via rule '{rule.name}'"))
166
+ return execution_plan
167
+
168
+ elif rule.action == "use_duckdb":
169
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
170
+ execution_plan.compute_engine = "duckdb"
171
+ execution_plan.reason = f"Auto-select rule '{rule.name}': {rule.description}"
172
+ fire_event(Note(msg=f"Selected DUCKDB via rule '{rule.name}'"))
173
+ return execution_plan
174
+
175
+ elif rule.action == "use_spark_local":
176
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
177
+ execution_plan.compute_engine = "spark_local"
178
+ execution_plan.reason = f"Auto-select rule '{rule.name}': {rule.description}"
179
+ fire_event(Note(msg=f"Selected SPARK_LOCAL via rule '{rule.name}'"))
180
+ return execution_plan
181
+
182
+ elif rule.action == "use_spark_cluster":
183
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
184
+ execution_plan.compute_engine = "spark_cluster"
185
+ execution_plan.reason = f"Auto-select rule '{rule.name}': {rule.description}"
186
+ fire_event(Note(msg=f"Selected SPARK_CLUSTER via rule '{rule.name}'"))
187
+ return execution_plan
188
+
189
+ # No rule matched - use default
190
+ return self._use_default_engine(execution_plan)
191
+
192
+ def _evaluate_rule_condition(self, condition, execution_plan: QueryExecutionPlan) -> bool:
193
+ """
194
+ Evaluate a rule condition.
195
+
196
+ Args:
197
+ condition: Condition to evaluate (string or dict)
198
+ execution_plan: Execution plan
199
+
200
+ Returns:
201
+ True if condition is met
202
+ """
203
+ # Simple string conditions
204
+ if isinstance(condition, str):
205
+ if condition == "always":
206
+ return True
207
+ elif condition == "model_has_compute_engine_config":
208
+ # TODO: Check if model has explicit compute_engine config
209
+ return False
210
+ else:
211
+ return False
212
+
213
+ # Dictionary conditions
214
+ if isinstance(condition, dict):
215
+ condition_type = condition.get("type", "and")
216
+
217
+ # Handle 'and' conditions
218
+ if condition_type == "and":
219
+ conditions = condition.get("conditions", [])
220
+ return all(self._evaluate_single_condition(c, execution_plan) for c in conditions)
221
+
222
+ # Handle 'or' conditions
223
+ elif condition_type == "or":
224
+ conditions = condition.get("conditions", [])
225
+ return any(self._evaluate_single_condition(c, execution_plan) for c in conditions)
226
+
227
+ # Single condition dict
228
+ else:
229
+ return self._evaluate_single_condition(condition, execution_plan)
230
+
231
+ return False
232
+
233
+ def _evaluate_single_condition(
234
+ self, condition: Dict, execution_plan: QueryExecutionPlan
235
+ ) -> bool:
236
+ """Evaluate a single condition."""
237
+ # Homogeneous sources
238
+ if "homogeneous_sources" in condition:
239
+ expected = condition["homogeneous_sources"]
240
+ return execution_plan.is_homogeneous == expected
241
+
242
+ # Same as target
243
+ if "same_as_target" in condition:
244
+ # TODO: Check if sources match target
245
+ # For now, assume true if homogeneous
246
+ return execution_plan.is_homogeneous
247
+
248
+ # Data size estimate
249
+ if "data_size_estimate" in condition:
250
+ size_condition = condition["data_size_estimate"]
251
+ if isinstance(size_condition, str):
252
+ # Parse conditions like "< 1GB", "> 10GB"
253
+ return self._parse_size_condition(
254
+ size_condition, execution_plan.estimated_data_size_mb
255
+ )
256
+
257
+ # Row count estimate
258
+ if "row_count_estimate" in condition:
259
+ row_condition = condition["row_count_estimate"]
260
+ if isinstance(row_condition, str):
261
+ # Parse conditions like "> 100000000"
262
+ return self._parse_row_condition(row_condition, execution_plan.estimated_rows)
263
+
264
+ # Heterogeneous sources
265
+ if "heterogeneous_sources" in condition:
266
+ expected = condition["heterogeneous_sources"]
267
+ return (not execution_plan.is_homogeneous) == expected
268
+
269
+ # Adapter count
270
+ if "adapter_count" in condition:
271
+ count_condition = condition["adapter_count"]
272
+ actual_count = len(execution_plan.get_unique_adapters())
273
+ if isinstance(count_condition, str):
274
+ return self._parse_comparison(count_condition, actual_count)
275
+
276
+ return False
277
+
278
+ def _parse_size_condition(self, condition: str, size_mb: float) -> bool:
279
+ """Parse size condition like '< 1GB' or '> 10GB'."""
280
+ condition = condition.strip()
281
+
282
+ # Extract operator and value
283
+ if condition.startswith(">="):
284
+ op = ">="
285
+ value_str = condition[2:].strip()
286
+ elif condition.startswith("<="):
287
+ op = "<="
288
+ value_str = condition[2:].strip()
289
+ elif condition.startswith(">"):
290
+ op = ">"
291
+ value_str = condition[1:].strip()
292
+ elif condition.startswith("<"):
293
+ op = "<"
294
+ value_str = condition[1:].strip()
295
+ else:
296
+ return False
297
+
298
+ # Parse value (handle GB, MB units)
299
+ value_mb = self._parse_size_value(value_str)
300
+
301
+ # Compare
302
+ if op == ">":
303
+ return size_mb > value_mb
304
+ elif op == ">=":
305
+ return size_mb >= value_mb
306
+ elif op == "<":
307
+ return size_mb < value_mb
308
+ elif op == "<=":
309
+ return size_mb <= value_mb
310
+
311
+ return False
312
+
313
+ def _parse_size_value(self, value_str: str) -> float:
314
+ """Parse size value like '1GB' or '100MB' to MB."""
315
+ value_str = value_str.strip().upper()
316
+
317
+ if value_str.endswith("GB"):
318
+ return float(value_str[:-2]) * 1024
319
+ elif value_str.endswith("MB"):
320
+ return float(value_str[:-2])
321
+ elif value_str.endswith("KB"):
322
+ return float(value_str[:-2]) / 1024
323
+ else:
324
+ # Assume MB
325
+ return float(value_str)
326
+
327
+ def _parse_row_condition(self, condition: str, row_count: int) -> bool:
328
+ """Parse row condition like '> 100000000'."""
329
+ condition = condition.strip()
330
+
331
+ # Extract operator and value
332
+ if condition.startswith(">="):
333
+ op = ">="
334
+ value = int(condition[2:].strip())
335
+ elif condition.startswith("<="):
336
+ op = "<="
337
+ value = int(condition[2:].strip())
338
+ elif condition.startswith(">"):
339
+ op = ">"
340
+ value = int(condition[1:].strip())
341
+ elif condition.startswith("<"):
342
+ op = "<"
343
+ value = int(condition[1:].strip())
344
+ else:
345
+ return False
346
+
347
+ # Compare
348
+ if op == ">":
349
+ return row_count > value
350
+ elif op == ">=":
351
+ return row_count >= value
352
+ elif op == "<":
353
+ return row_count < value
354
+ elif op == "<=":
355
+ return row_count <= value
356
+
357
+ return False
358
+
359
+ def _parse_comparison(self, condition: str, value: int) -> bool:
360
+ """Parse comparison like '> 2'."""
361
+ condition = condition.strip()
362
+
363
+ if condition.startswith(">="):
364
+ return value >= int(condition[2:].strip())
365
+ elif condition.startswith("<="):
366
+ return value <= int(condition[2:].strip())
367
+ elif condition.startswith(">"):
368
+ return value > int(condition[1:].strip())
369
+ elif condition.startswith("<"):
370
+ return value < int(condition[1:].strip())
371
+ elif condition.startswith("=="):
372
+ return value == int(condition[2:].strip())
373
+
374
+ return False
375
+
376
+ def _use_default_engine(self, execution_plan: QueryExecutionPlan) -> QueryExecutionPlan:
377
+ """Use default engine from configuration."""
378
+ default_engine = self.compute_config.default_engine
379
+
380
+ if default_engine == "auto":
381
+ # Use heuristics
382
+ if execution_plan.is_pushdown_possible():
383
+ execution_plan.strategy = ExecutionStrategy.PUSHDOWN
384
+ execution_plan.compute_engine = None
385
+ execution_plan.reason = "Default: Pushdown for homogeneous sources"
386
+ elif execution_plan.estimated_data_size_mb < 1024: # < 1GB
387
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
388
+ execution_plan.compute_engine = "duckdb"
389
+ execution_plan.reason = "Default: DuckDB for small data"
390
+ else:
391
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
392
+ execution_plan.compute_engine = "spark_local"
393
+ execution_plan.reason = "Default: Spark for large data"
394
+ else:
395
+ # Use specified default
396
+ if default_engine == "pushdown":
397
+ if execution_plan.is_pushdown_possible():
398
+ execution_plan.strategy = ExecutionStrategy.PUSHDOWN
399
+ execution_plan.compute_engine = None
400
+ else:
401
+ # Fall back to DuckDB
402
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
403
+ execution_plan.compute_engine = "duckdb"
404
+ execution_plan.reason = "Pushdown not possible, using DuckDB"
405
+ else:
406
+ execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
407
+ execution_plan.compute_engine = default_engine
408
+
409
+ execution_plan.reason = f"Default engine: {default_engine}"
410
+
411
+ return execution_plan
412
+
413
+ def execute(
414
+ self,
415
+ sql: str,
416
+ execution_plan: QueryExecutionPlan,
417
+ ) -> ComputeResult:
418
+ """
419
+ Execute query using selected strategy.
420
+
421
+ Args:
422
+ sql: SQL query
423
+ execution_plan: Execution plan with selected strategy
424
+
425
+ Returns:
426
+ ComputeResult
427
+ """
428
+ # Get engine for execution
429
+ if execution_plan.strategy == ExecutionStrategy.PUSHDOWN:
430
+ engine_name = "pushdown"
431
+ else:
432
+ engine_name = execution_plan.compute_engine or "duckdb"
433
+
434
+ # Get engine instance
435
+ engine = self.available_engines.get(engine_name)
436
+ if not engine:
437
+ return ComputeResult(
438
+ success=False,
439
+ error=f"Compute engine '{engine_name}' not available",
440
+ )
441
+
442
+ # Check if engine can handle this plan
443
+ if not engine.can_handle(execution_plan):
444
+ return ComputeResult(
445
+ success=False,
446
+ error=f"Engine '{engine_name}' cannot handle this execution plan",
447
+ )
448
+
449
+ # Execute
450
+ fire_event(Note(msg=f"Executing via {engine_name}: {execution_plan.reason}"))
451
+ return engine.execute_query(sql, execution_plan)
452
+
453
+ def _extract_model_config(self, model_node: Any) -> Dict[str, Any]:
454
+ """
455
+ Extract DVT-specific config from model node.
456
+
457
+ Args:
458
+ model_node: Model node (ModelNode, etc.)
459
+
460
+ Returns:
461
+ Dictionary with DVT config fields
462
+ """
463
+ config_dict = {}
464
+
465
+ if hasattr(model_node, "config"):
466
+ model_config = model_node.config
467
+
468
+ # Extract compute_engine
469
+ if hasattr(model_config, "compute_engine") and model_config.compute_engine:
470
+ config_dict["compute_engine"] = model_config.compute_engine
471
+
472
+ # Extract pushdown_enabled
473
+ if (
474
+ hasattr(model_config, "pushdown_enabled")
475
+ and model_config.pushdown_enabled is not None
476
+ ):
477
+ config_dict["pushdown_enabled"] = model_config.pushdown_enabled
478
+
479
+ # Extract target_profile
480
+ if hasattr(model_config, "target_profile") and model_config.target_profile:
481
+ config_dict["target_profile"] = model_config.target_profile
482
+
483
+ return config_dict
dvt/config/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ # all these are just exports, they need "noqa" so flake8 will not complain.
2
+ from .profile import Profile # noqa
3
+ from .project import IsFQNResource, PartialProject, Project # noqa
4
+ from .runtime import RuntimeConfig # noqa
dvt/config/catalogs.py ADDED
@@ -0,0 +1,95 @@
1
+ import os
2
+ from copy import deepcopy
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from dvt.artifacts.resources import Catalog, CatalogWriteIntegrationConfig
6
+ from dvt.clients.yaml_helper import load_yaml_text
7
+ from dvt.config.renderer import SecretRenderer
8
+ from dvt.constants import CATALOGS_FILE_NAME
9
+ from dvt.exceptions import YamlLoadError
10
+
11
+ from dbt_common.clients.system import load_file_contents
12
+ from dbt_common.exceptions import CompilationError, DbtValidationError
13
+
14
+
15
+ def load_catalogs_yml(project_dir: str, project_name: str) -> Dict[str, Any]:
16
+ path = os.path.join(project_dir, CATALOGS_FILE_NAME)
17
+
18
+ if os.path.isfile(path):
19
+ try:
20
+ contents = load_file_contents(path, strip=False)
21
+ yaml_content = load_yaml_text(contents)
22
+
23
+ if not yaml_content:
24
+ raise DbtValidationError(f"The file at {path} is empty")
25
+
26
+ return yaml_content
27
+ except DbtValidationError as e:
28
+ raise YamlLoadError(project_name=project_name, path=CATALOGS_FILE_NAME, exc=e)
29
+
30
+ return {}
31
+
32
+
33
+ def load_single_catalog(raw_catalog: Dict[str, Any], renderer: SecretRenderer) -> Catalog:
34
+ try:
35
+ rendered_catalog = renderer.render_data(raw_catalog)
36
+ except CompilationError as exc:
37
+ raise DbtValidationError(str(exc)) from exc
38
+
39
+ Catalog.validate(rendered_catalog)
40
+
41
+ write_integrations = []
42
+ write_integration_names = set()
43
+
44
+ for raw_integration in rendered_catalog.get("write_integrations", []):
45
+ if raw_integration["name"] in write_integration_names:
46
+ raise DbtValidationError(
47
+ f"Catalog '{rendered_catalog['name']}' cannot have multiple 'write_integrations' with the same name: '{raw_integration['name']}'."
48
+ )
49
+
50
+ # We're going to let the adapter validate the integration config
51
+ write_integrations.append(
52
+ CatalogWriteIntegrationConfig(**raw_integration, catalog_name=raw_catalog["name"])
53
+ )
54
+ write_integration_names.add(raw_integration["name"])
55
+
56
+ # Validate + set default active_write_integration if unset
57
+ active_write_integration = rendered_catalog.get("active_write_integration")
58
+ valid_write_integration_names = [integration.name for integration in write_integrations]
59
+
60
+ if not active_write_integration:
61
+ if len(valid_write_integration_names) == 1:
62
+ active_write_integration = write_integrations[0].name
63
+ else:
64
+ raise DbtValidationError(
65
+ f"Catalog '{rendered_catalog['name']}' must specify an 'active_write_integration' when multiple 'write_integrations' are provided."
66
+ )
67
+ else:
68
+ if active_write_integration not in valid_write_integration_names:
69
+ raise DbtValidationError(
70
+ f"Catalog '{rendered_catalog['name']}' must specify an 'active_write_integration' from its set of defined 'write_integrations': {valid_write_integration_names}. Got: '{active_write_integration}'."
71
+ )
72
+
73
+ return Catalog(
74
+ name=raw_catalog["name"],
75
+ active_write_integration=active_write_integration,
76
+ write_integrations=write_integrations,
77
+ )
78
+
79
+
80
+ def load_catalogs(project_dir: str, project_name: str, cli_vars: Dict[str, Any]) -> List[Catalog]:
81
+ raw_catalogs = load_catalogs_yml(project_dir, project_name).get("catalogs", [])
82
+ catalogs_renderer = SecretRenderer(cli_vars)
83
+
84
+ return [load_single_catalog(raw_catalog, catalogs_renderer) for raw_catalog in raw_catalogs]
85
+
86
+
87
+ def get_active_write_integration(catalog: Catalog) -> Optional[CatalogWriteIntegrationConfig]:
88
+ for integration in catalog.write_integrations:
89
+ if integration.name == catalog.active_write_integration:
90
+ active_integration = deepcopy(integration)
91
+ active_integration.catalog_name = active_integration.name
92
+ active_integration.name = catalog.name
93
+ return active_integration
94
+
95
+ return None