dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/compute/router.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Execution router for DVT.
|
|
3
|
+
|
|
4
|
+
This module analyzes queries and routes them to the optimal execution engine
|
|
5
|
+
(pushdown vs compute layer).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from dvt.compute.base import (
|
|
11
|
+
BaseComputeEngine,
|
|
12
|
+
ComputeResult,
|
|
13
|
+
ExecutionStrategy,
|
|
14
|
+
QueryExecutionPlan,
|
|
15
|
+
SourceInfo,
|
|
16
|
+
)
|
|
17
|
+
from dvt.config.compute_config import AutoSelectConfig, ComputeConfig
|
|
18
|
+
from dvt.events import fire_event
|
|
19
|
+
from dvt.events.types import Note
|
|
20
|
+
|
|
21
|
+
from dbt.adapters.exceptions import DbtRuntimeError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ExecutionRouter:
|
|
25
|
+
"""
|
|
26
|
+
Routes queries to optimal execution engine.
|
|
27
|
+
|
|
28
|
+
Analyzes query execution plans and selects:
|
|
29
|
+
- Pushdown (execute on source database)
|
|
30
|
+
- DuckDB (lightweight compute layer)
|
|
31
|
+
- Spark (heavy-duty compute layer)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
compute_config: ComputeConfig,
|
|
37
|
+
available_engines: Dict[str, BaseComputeEngine],
|
|
38
|
+
):
|
|
39
|
+
"""
|
|
40
|
+
Initialize execution router.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
compute_config: Compute configuration
|
|
44
|
+
available_engines: Dictionary of available engines by name
|
|
45
|
+
"""
|
|
46
|
+
self.compute_config = compute_config
|
|
47
|
+
self.available_engines = available_engines
|
|
48
|
+
self.auto_select_config = compute_config.auto_select
|
|
49
|
+
|
|
50
|
+
def analyze_query(
|
|
51
|
+
self,
|
|
52
|
+
sql: str,
|
|
53
|
+
sources: List[SourceInfo],
|
|
54
|
+
) -> QueryExecutionPlan:
|
|
55
|
+
"""
|
|
56
|
+
Analyze query and create execution plan.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
sql: SQL query
|
|
60
|
+
sources: List of source information
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
QueryExecutionPlan
|
|
64
|
+
"""
|
|
65
|
+
# Calculate metrics
|
|
66
|
+
unique_adapters = {s.adapter_type for s in sources}
|
|
67
|
+
unique_profiles = {s.profile_name for s in sources}
|
|
68
|
+
is_homogeneous = len(unique_adapters) == 1 and len(unique_profiles) == 1
|
|
69
|
+
|
|
70
|
+
# Estimate data size
|
|
71
|
+
total_size_mb = sum(s.estimated_size_mb or 0 for s in sources)
|
|
72
|
+
total_rows = sum(s.estimated_rows or 0 for s in sources)
|
|
73
|
+
|
|
74
|
+
# Create initial plan
|
|
75
|
+
plan = QueryExecutionPlan(
|
|
76
|
+
strategy=ExecutionStrategy.AUTO,
|
|
77
|
+
sources=sources,
|
|
78
|
+
is_homogeneous=is_homogeneous,
|
|
79
|
+
estimated_data_size_mb=total_size_mb,
|
|
80
|
+
estimated_rows=total_rows,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# If homogeneous, can potentially pushdown
|
|
84
|
+
if is_homogeneous and sources:
|
|
85
|
+
plan.pushdown_target = sources[0].profile_name
|
|
86
|
+
|
|
87
|
+
return plan
|
|
88
|
+
|
|
89
|
+
def select_strategy(
|
|
90
|
+
self,
|
|
91
|
+
execution_plan: QueryExecutionPlan,
|
|
92
|
+
requested_engine: Optional[str] = None,
|
|
93
|
+
model_node: Optional[Any] = None,
|
|
94
|
+
) -> QueryExecutionPlan:
|
|
95
|
+
"""
|
|
96
|
+
Select execution strategy for query.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
execution_plan: Query execution plan
|
|
100
|
+
requested_engine: User-requested engine (overrides auto-selection)
|
|
101
|
+
model_node: Optional model node with DVT config overrides
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Updated execution plan with selected strategy
|
|
105
|
+
"""
|
|
106
|
+
# Check for model-level config overrides
|
|
107
|
+
model_config = self._extract_model_config(model_node) if model_node else {}
|
|
108
|
+
|
|
109
|
+
# Model config takes precedence over requested_engine parameter
|
|
110
|
+
if model_config.get("compute_engine"):
|
|
111
|
+
requested_engine = model_config["compute_engine"]
|
|
112
|
+
elif model_config.get("pushdown_enabled") is True:
|
|
113
|
+
requested_engine = "pushdown"
|
|
114
|
+
elif model_config.get("pushdown_enabled") is False:
|
|
115
|
+
# Explicitly disable pushdown, force compute layer
|
|
116
|
+
if not requested_engine:
|
|
117
|
+
requested_engine = self.compute_config.default_engine
|
|
118
|
+
|
|
119
|
+
# If user requested specific engine, honor it
|
|
120
|
+
if requested_engine:
|
|
121
|
+
if requested_engine == "pushdown":
|
|
122
|
+
if execution_plan.is_pushdown_possible():
|
|
123
|
+
execution_plan.strategy = ExecutionStrategy.PUSHDOWN
|
|
124
|
+
execution_plan.compute_engine = None
|
|
125
|
+
execution_plan.reason = "User requested pushdown"
|
|
126
|
+
else:
|
|
127
|
+
raise DbtRuntimeError(
|
|
128
|
+
"Pushdown requested but not possible: "
|
|
129
|
+
f"Query references {len(execution_plan.get_unique_profiles())} profiles"
|
|
130
|
+
)
|
|
131
|
+
else:
|
|
132
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
133
|
+
execution_plan.compute_engine = requested_engine
|
|
134
|
+
execution_plan.reason = f"User requested {requested_engine}"
|
|
135
|
+
return execution_plan
|
|
136
|
+
|
|
137
|
+
# Use auto-selection rules
|
|
138
|
+
if self.auto_select_config.enabled:
|
|
139
|
+
return self._apply_auto_select_rules(execution_plan)
|
|
140
|
+
else:
|
|
141
|
+
# Auto-selection disabled, use default engine
|
|
142
|
+
return self._use_default_engine(execution_plan)
|
|
143
|
+
|
|
144
|
+
def _apply_auto_select_rules(self, execution_plan: QueryExecutionPlan) -> QueryExecutionPlan:
|
|
145
|
+
"""
|
|
146
|
+
Apply auto-selection rules to choose strategy.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
execution_plan: Execution plan
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Updated execution plan
|
|
153
|
+
"""
|
|
154
|
+
# Rules are already sorted by priority
|
|
155
|
+
for rule in self.auto_select_config.rules:
|
|
156
|
+
if self._evaluate_rule_condition(rule.condition, execution_plan):
|
|
157
|
+
# Rule matches - apply action
|
|
158
|
+
if rule.action == "use_pushdown":
|
|
159
|
+
if execution_plan.is_pushdown_possible():
|
|
160
|
+
execution_plan.strategy = ExecutionStrategy.PUSHDOWN
|
|
161
|
+
execution_plan.compute_engine = None
|
|
162
|
+
execution_plan.reason = (
|
|
163
|
+
f"Auto-select rule '{rule.name}': {rule.description}"
|
|
164
|
+
)
|
|
165
|
+
fire_event(Note(msg=f"Selected PUSHDOWN via rule '{rule.name}'"))
|
|
166
|
+
return execution_plan
|
|
167
|
+
|
|
168
|
+
elif rule.action == "use_duckdb":
|
|
169
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
170
|
+
execution_plan.compute_engine = "duckdb"
|
|
171
|
+
execution_plan.reason = f"Auto-select rule '{rule.name}': {rule.description}"
|
|
172
|
+
fire_event(Note(msg=f"Selected DUCKDB via rule '{rule.name}'"))
|
|
173
|
+
return execution_plan
|
|
174
|
+
|
|
175
|
+
elif rule.action == "use_spark_local":
|
|
176
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
177
|
+
execution_plan.compute_engine = "spark_local"
|
|
178
|
+
execution_plan.reason = f"Auto-select rule '{rule.name}': {rule.description}"
|
|
179
|
+
fire_event(Note(msg=f"Selected SPARK_LOCAL via rule '{rule.name}'"))
|
|
180
|
+
return execution_plan
|
|
181
|
+
|
|
182
|
+
elif rule.action == "use_spark_cluster":
|
|
183
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
184
|
+
execution_plan.compute_engine = "spark_cluster"
|
|
185
|
+
execution_plan.reason = f"Auto-select rule '{rule.name}': {rule.description}"
|
|
186
|
+
fire_event(Note(msg=f"Selected SPARK_CLUSTER via rule '{rule.name}'"))
|
|
187
|
+
return execution_plan
|
|
188
|
+
|
|
189
|
+
# No rule matched - use default
|
|
190
|
+
return self._use_default_engine(execution_plan)
|
|
191
|
+
|
|
192
|
+
def _evaluate_rule_condition(self, condition, execution_plan: QueryExecutionPlan) -> bool:
|
|
193
|
+
"""
|
|
194
|
+
Evaluate a rule condition.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
condition: Condition to evaluate (string or dict)
|
|
198
|
+
execution_plan: Execution plan
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
True if condition is met
|
|
202
|
+
"""
|
|
203
|
+
# Simple string conditions
|
|
204
|
+
if isinstance(condition, str):
|
|
205
|
+
if condition == "always":
|
|
206
|
+
return True
|
|
207
|
+
elif condition == "model_has_compute_engine_config":
|
|
208
|
+
# TODO: Check if model has explicit compute_engine config
|
|
209
|
+
return False
|
|
210
|
+
else:
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
# Dictionary conditions
|
|
214
|
+
if isinstance(condition, dict):
|
|
215
|
+
condition_type = condition.get("type", "and")
|
|
216
|
+
|
|
217
|
+
# Handle 'and' conditions
|
|
218
|
+
if condition_type == "and":
|
|
219
|
+
conditions = condition.get("conditions", [])
|
|
220
|
+
return all(self._evaluate_single_condition(c, execution_plan) for c in conditions)
|
|
221
|
+
|
|
222
|
+
# Handle 'or' conditions
|
|
223
|
+
elif condition_type == "or":
|
|
224
|
+
conditions = condition.get("conditions", [])
|
|
225
|
+
return any(self._evaluate_single_condition(c, execution_plan) for c in conditions)
|
|
226
|
+
|
|
227
|
+
# Single condition dict
|
|
228
|
+
else:
|
|
229
|
+
return self._evaluate_single_condition(condition, execution_plan)
|
|
230
|
+
|
|
231
|
+
return False
|
|
232
|
+
|
|
233
|
+
def _evaluate_single_condition(
|
|
234
|
+
self, condition: Dict, execution_plan: QueryExecutionPlan
|
|
235
|
+
) -> bool:
|
|
236
|
+
"""Evaluate a single condition."""
|
|
237
|
+
# Homogeneous sources
|
|
238
|
+
if "homogeneous_sources" in condition:
|
|
239
|
+
expected = condition["homogeneous_sources"]
|
|
240
|
+
return execution_plan.is_homogeneous == expected
|
|
241
|
+
|
|
242
|
+
# Same as target
|
|
243
|
+
if "same_as_target" in condition:
|
|
244
|
+
# TODO: Check if sources match target
|
|
245
|
+
# For now, assume true if homogeneous
|
|
246
|
+
return execution_plan.is_homogeneous
|
|
247
|
+
|
|
248
|
+
# Data size estimate
|
|
249
|
+
if "data_size_estimate" in condition:
|
|
250
|
+
size_condition = condition["data_size_estimate"]
|
|
251
|
+
if isinstance(size_condition, str):
|
|
252
|
+
# Parse conditions like "< 1GB", "> 10GB"
|
|
253
|
+
return self._parse_size_condition(
|
|
254
|
+
size_condition, execution_plan.estimated_data_size_mb
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Row count estimate
|
|
258
|
+
if "row_count_estimate" in condition:
|
|
259
|
+
row_condition = condition["row_count_estimate"]
|
|
260
|
+
if isinstance(row_condition, str):
|
|
261
|
+
# Parse conditions like "> 100000000"
|
|
262
|
+
return self._parse_row_condition(row_condition, execution_plan.estimated_rows)
|
|
263
|
+
|
|
264
|
+
# Heterogeneous sources
|
|
265
|
+
if "heterogeneous_sources" in condition:
|
|
266
|
+
expected = condition["heterogeneous_sources"]
|
|
267
|
+
return (not execution_plan.is_homogeneous) == expected
|
|
268
|
+
|
|
269
|
+
# Adapter count
|
|
270
|
+
if "adapter_count" in condition:
|
|
271
|
+
count_condition = condition["adapter_count"]
|
|
272
|
+
actual_count = len(execution_plan.get_unique_adapters())
|
|
273
|
+
if isinstance(count_condition, str):
|
|
274
|
+
return self._parse_comparison(count_condition, actual_count)
|
|
275
|
+
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
def _parse_size_condition(self, condition: str, size_mb: float) -> bool:
|
|
279
|
+
"""Parse size condition like '< 1GB' or '> 10GB'."""
|
|
280
|
+
condition = condition.strip()
|
|
281
|
+
|
|
282
|
+
# Extract operator and value
|
|
283
|
+
if condition.startswith(">="):
|
|
284
|
+
op = ">="
|
|
285
|
+
value_str = condition[2:].strip()
|
|
286
|
+
elif condition.startswith("<="):
|
|
287
|
+
op = "<="
|
|
288
|
+
value_str = condition[2:].strip()
|
|
289
|
+
elif condition.startswith(">"):
|
|
290
|
+
op = ">"
|
|
291
|
+
value_str = condition[1:].strip()
|
|
292
|
+
elif condition.startswith("<"):
|
|
293
|
+
op = "<"
|
|
294
|
+
value_str = condition[1:].strip()
|
|
295
|
+
else:
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
# Parse value (handle GB, MB units)
|
|
299
|
+
value_mb = self._parse_size_value(value_str)
|
|
300
|
+
|
|
301
|
+
# Compare
|
|
302
|
+
if op == ">":
|
|
303
|
+
return size_mb > value_mb
|
|
304
|
+
elif op == ">=":
|
|
305
|
+
return size_mb >= value_mb
|
|
306
|
+
elif op == "<":
|
|
307
|
+
return size_mb < value_mb
|
|
308
|
+
elif op == "<=":
|
|
309
|
+
return size_mb <= value_mb
|
|
310
|
+
|
|
311
|
+
return False
|
|
312
|
+
|
|
313
|
+
def _parse_size_value(self, value_str: str) -> float:
|
|
314
|
+
"""Parse size value like '1GB' or '100MB' to MB."""
|
|
315
|
+
value_str = value_str.strip().upper()
|
|
316
|
+
|
|
317
|
+
if value_str.endswith("GB"):
|
|
318
|
+
return float(value_str[:-2]) * 1024
|
|
319
|
+
elif value_str.endswith("MB"):
|
|
320
|
+
return float(value_str[:-2])
|
|
321
|
+
elif value_str.endswith("KB"):
|
|
322
|
+
return float(value_str[:-2]) / 1024
|
|
323
|
+
else:
|
|
324
|
+
# Assume MB
|
|
325
|
+
return float(value_str)
|
|
326
|
+
|
|
327
|
+
def _parse_row_condition(self, condition: str, row_count: int) -> bool:
|
|
328
|
+
"""Parse row condition like '> 100000000'."""
|
|
329
|
+
condition = condition.strip()
|
|
330
|
+
|
|
331
|
+
# Extract operator and value
|
|
332
|
+
if condition.startswith(">="):
|
|
333
|
+
op = ">="
|
|
334
|
+
value = int(condition[2:].strip())
|
|
335
|
+
elif condition.startswith("<="):
|
|
336
|
+
op = "<="
|
|
337
|
+
value = int(condition[2:].strip())
|
|
338
|
+
elif condition.startswith(">"):
|
|
339
|
+
op = ">"
|
|
340
|
+
value = int(condition[1:].strip())
|
|
341
|
+
elif condition.startswith("<"):
|
|
342
|
+
op = "<"
|
|
343
|
+
value = int(condition[1:].strip())
|
|
344
|
+
else:
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
# Compare
|
|
348
|
+
if op == ">":
|
|
349
|
+
return row_count > value
|
|
350
|
+
elif op == ">=":
|
|
351
|
+
return row_count >= value
|
|
352
|
+
elif op == "<":
|
|
353
|
+
return row_count < value
|
|
354
|
+
elif op == "<=":
|
|
355
|
+
return row_count <= value
|
|
356
|
+
|
|
357
|
+
return False
|
|
358
|
+
|
|
359
|
+
def _parse_comparison(self, condition: str, value: int) -> bool:
|
|
360
|
+
"""Parse comparison like '> 2'."""
|
|
361
|
+
condition = condition.strip()
|
|
362
|
+
|
|
363
|
+
if condition.startswith(">="):
|
|
364
|
+
return value >= int(condition[2:].strip())
|
|
365
|
+
elif condition.startswith("<="):
|
|
366
|
+
return value <= int(condition[2:].strip())
|
|
367
|
+
elif condition.startswith(">"):
|
|
368
|
+
return value > int(condition[1:].strip())
|
|
369
|
+
elif condition.startswith("<"):
|
|
370
|
+
return value < int(condition[1:].strip())
|
|
371
|
+
elif condition.startswith("=="):
|
|
372
|
+
return value == int(condition[2:].strip())
|
|
373
|
+
|
|
374
|
+
return False
|
|
375
|
+
|
|
376
|
+
def _use_default_engine(self, execution_plan: QueryExecutionPlan) -> QueryExecutionPlan:
|
|
377
|
+
"""Use default engine from configuration."""
|
|
378
|
+
default_engine = self.compute_config.default_engine
|
|
379
|
+
|
|
380
|
+
if default_engine == "auto":
|
|
381
|
+
# Use heuristics
|
|
382
|
+
if execution_plan.is_pushdown_possible():
|
|
383
|
+
execution_plan.strategy = ExecutionStrategy.PUSHDOWN
|
|
384
|
+
execution_plan.compute_engine = None
|
|
385
|
+
execution_plan.reason = "Default: Pushdown for homogeneous sources"
|
|
386
|
+
elif execution_plan.estimated_data_size_mb < 1024: # < 1GB
|
|
387
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
388
|
+
execution_plan.compute_engine = "duckdb"
|
|
389
|
+
execution_plan.reason = "Default: DuckDB for small data"
|
|
390
|
+
else:
|
|
391
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
392
|
+
execution_plan.compute_engine = "spark_local"
|
|
393
|
+
execution_plan.reason = "Default: Spark for large data"
|
|
394
|
+
else:
|
|
395
|
+
# Use specified default
|
|
396
|
+
if default_engine == "pushdown":
|
|
397
|
+
if execution_plan.is_pushdown_possible():
|
|
398
|
+
execution_plan.strategy = ExecutionStrategy.PUSHDOWN
|
|
399
|
+
execution_plan.compute_engine = None
|
|
400
|
+
else:
|
|
401
|
+
# Fall back to DuckDB
|
|
402
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
403
|
+
execution_plan.compute_engine = "duckdb"
|
|
404
|
+
execution_plan.reason = "Pushdown not possible, using DuckDB"
|
|
405
|
+
else:
|
|
406
|
+
execution_plan.strategy = ExecutionStrategy.COMPUTE_LAYER
|
|
407
|
+
execution_plan.compute_engine = default_engine
|
|
408
|
+
|
|
409
|
+
execution_plan.reason = f"Default engine: {default_engine}"
|
|
410
|
+
|
|
411
|
+
return execution_plan
|
|
412
|
+
|
|
413
|
+
def execute(
|
|
414
|
+
self,
|
|
415
|
+
sql: str,
|
|
416
|
+
execution_plan: QueryExecutionPlan,
|
|
417
|
+
) -> ComputeResult:
|
|
418
|
+
"""
|
|
419
|
+
Execute query using selected strategy.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
sql: SQL query
|
|
423
|
+
execution_plan: Execution plan with selected strategy
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
ComputeResult
|
|
427
|
+
"""
|
|
428
|
+
# Get engine for execution
|
|
429
|
+
if execution_plan.strategy == ExecutionStrategy.PUSHDOWN:
|
|
430
|
+
engine_name = "pushdown"
|
|
431
|
+
else:
|
|
432
|
+
engine_name = execution_plan.compute_engine or "duckdb"
|
|
433
|
+
|
|
434
|
+
# Get engine instance
|
|
435
|
+
engine = self.available_engines.get(engine_name)
|
|
436
|
+
if not engine:
|
|
437
|
+
return ComputeResult(
|
|
438
|
+
success=False,
|
|
439
|
+
error=f"Compute engine '{engine_name}' not available",
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Check if engine can handle this plan
|
|
443
|
+
if not engine.can_handle(execution_plan):
|
|
444
|
+
return ComputeResult(
|
|
445
|
+
success=False,
|
|
446
|
+
error=f"Engine '{engine_name}' cannot handle this execution plan",
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Execute
|
|
450
|
+
fire_event(Note(msg=f"Executing via {engine_name}: {execution_plan.reason}"))
|
|
451
|
+
return engine.execute_query(sql, execution_plan)
|
|
452
|
+
|
|
453
|
+
def _extract_model_config(self, model_node: Any) -> Dict[str, Any]:
|
|
454
|
+
"""
|
|
455
|
+
Extract DVT-specific config from model node.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
model_node: Model node (ModelNode, etc.)
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Dictionary with DVT config fields
|
|
462
|
+
"""
|
|
463
|
+
config_dict = {}
|
|
464
|
+
|
|
465
|
+
if hasattr(model_node, "config"):
|
|
466
|
+
model_config = model_node.config
|
|
467
|
+
|
|
468
|
+
# Extract compute_engine
|
|
469
|
+
if hasattr(model_config, "compute_engine") and model_config.compute_engine:
|
|
470
|
+
config_dict["compute_engine"] = model_config.compute_engine
|
|
471
|
+
|
|
472
|
+
# Extract pushdown_enabled
|
|
473
|
+
if (
|
|
474
|
+
hasattr(model_config, "pushdown_enabled")
|
|
475
|
+
and model_config.pushdown_enabled is not None
|
|
476
|
+
):
|
|
477
|
+
config_dict["pushdown_enabled"] = model_config.pushdown_enabled
|
|
478
|
+
|
|
479
|
+
# Extract target_profile
|
|
480
|
+
if hasattr(model_config, "target_profile") and model_config.target_profile:
|
|
481
|
+
config_dict["target_profile"] = model_config.target_profile
|
|
482
|
+
|
|
483
|
+
return config_dict
|
dvt/config/__init__.py
ADDED
dvt/config/catalogs.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from dvt.artifacts.resources import Catalog, CatalogWriteIntegrationConfig
|
|
6
|
+
from dvt.clients.yaml_helper import load_yaml_text
|
|
7
|
+
from dvt.config.renderer import SecretRenderer
|
|
8
|
+
from dvt.constants import CATALOGS_FILE_NAME
|
|
9
|
+
from dvt.exceptions import YamlLoadError
|
|
10
|
+
|
|
11
|
+
from dbt_common.clients.system import load_file_contents
|
|
12
|
+
from dbt_common.exceptions import CompilationError, DbtValidationError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_catalogs_yml(project_dir: str, project_name: str) -> Dict[str, Any]:
|
|
16
|
+
path = os.path.join(project_dir, CATALOGS_FILE_NAME)
|
|
17
|
+
|
|
18
|
+
if os.path.isfile(path):
|
|
19
|
+
try:
|
|
20
|
+
contents = load_file_contents(path, strip=False)
|
|
21
|
+
yaml_content = load_yaml_text(contents)
|
|
22
|
+
|
|
23
|
+
if not yaml_content:
|
|
24
|
+
raise DbtValidationError(f"The file at {path} is empty")
|
|
25
|
+
|
|
26
|
+
return yaml_content
|
|
27
|
+
except DbtValidationError as e:
|
|
28
|
+
raise YamlLoadError(project_name=project_name, path=CATALOGS_FILE_NAME, exc=e)
|
|
29
|
+
|
|
30
|
+
return {}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def load_single_catalog(raw_catalog: Dict[str, Any], renderer: SecretRenderer) -> Catalog:
|
|
34
|
+
try:
|
|
35
|
+
rendered_catalog = renderer.render_data(raw_catalog)
|
|
36
|
+
except CompilationError as exc:
|
|
37
|
+
raise DbtValidationError(str(exc)) from exc
|
|
38
|
+
|
|
39
|
+
Catalog.validate(rendered_catalog)
|
|
40
|
+
|
|
41
|
+
write_integrations = []
|
|
42
|
+
write_integration_names = set()
|
|
43
|
+
|
|
44
|
+
for raw_integration in rendered_catalog.get("write_integrations", []):
|
|
45
|
+
if raw_integration["name"] in write_integration_names:
|
|
46
|
+
raise DbtValidationError(
|
|
47
|
+
f"Catalog '{rendered_catalog['name']}' cannot have multiple 'write_integrations' with the same name: '{raw_integration['name']}'."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# We're going to let the adapter validate the integration config
|
|
51
|
+
write_integrations.append(
|
|
52
|
+
CatalogWriteIntegrationConfig(**raw_integration, catalog_name=raw_catalog["name"])
|
|
53
|
+
)
|
|
54
|
+
write_integration_names.add(raw_integration["name"])
|
|
55
|
+
|
|
56
|
+
# Validate + set default active_write_integration if unset
|
|
57
|
+
active_write_integration = rendered_catalog.get("active_write_integration")
|
|
58
|
+
valid_write_integration_names = [integration.name for integration in write_integrations]
|
|
59
|
+
|
|
60
|
+
if not active_write_integration:
|
|
61
|
+
if len(valid_write_integration_names) == 1:
|
|
62
|
+
active_write_integration = write_integrations[0].name
|
|
63
|
+
else:
|
|
64
|
+
raise DbtValidationError(
|
|
65
|
+
f"Catalog '{rendered_catalog['name']}' must specify an 'active_write_integration' when multiple 'write_integrations' are provided."
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
if active_write_integration not in valid_write_integration_names:
|
|
69
|
+
raise DbtValidationError(
|
|
70
|
+
f"Catalog '{rendered_catalog['name']}' must specify an 'active_write_integration' from its set of defined 'write_integrations': {valid_write_integration_names}. Got: '{active_write_integration}'."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return Catalog(
|
|
74
|
+
name=raw_catalog["name"],
|
|
75
|
+
active_write_integration=active_write_integration,
|
|
76
|
+
write_integrations=write_integrations,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def load_catalogs(project_dir: str, project_name: str, cli_vars: Dict[str, Any]) -> List[Catalog]:
|
|
81
|
+
raw_catalogs = load_catalogs_yml(project_dir, project_name).get("catalogs", [])
|
|
82
|
+
catalogs_renderer = SecretRenderer(cli_vars)
|
|
83
|
+
|
|
84
|
+
return [load_single_catalog(raw_catalog, catalogs_renderer) for raw_catalog in raw_catalogs]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_active_write_integration(catalog: Catalog) -> Optional[CatalogWriteIntegrationConfig]:
|
|
88
|
+
for integration in catalog.write_integrations:
|
|
89
|
+
if integration.name == catalog.active_write_integration:
|
|
90
|
+
active_integration = deepcopy(integration)
|
|
91
|
+
active_integration.catalog_name = active_integration.name
|
|
92
|
+
active_integration.name = catalog.name
|
|
93
|
+
return active_integration
|
|
94
|
+
|
|
95
|
+
return None
|