dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Compute layer configuration for DVT.
|
|
3
|
+
|
|
4
|
+
This module handles loading and parsing compute.yml configuration files,
|
|
5
|
+
which define DuckDB and Spark settings for the compute layer.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
13
|
+
|
|
14
|
+
from dbt_common.events.functions import fire_event
|
|
15
|
+
from dbt_common.events.types import Note
|
|
16
|
+
from dbt_common.exceptions import DbtRuntimeError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class DuckDBConfig:
|
|
21
|
+
"""DuckDB compute engine configuration."""
|
|
22
|
+
|
|
23
|
+
memory_limit: str = "8GB"
|
|
24
|
+
threads: int = 4
|
|
25
|
+
temp_directory: str = "/tmp/duckdb"
|
|
26
|
+
max_memory: str = "8GB"
|
|
27
|
+
enable_optimizer: bool = True
|
|
28
|
+
enable_profiling: bool = False
|
|
29
|
+
enable_progress_bar: bool = True
|
|
30
|
+
extensions: List[str] = field(
|
|
31
|
+
default_factory=lambda: [
|
|
32
|
+
"httpfs",
|
|
33
|
+
"postgres_scanner",
|
|
34
|
+
"mysql_scanner",
|
|
35
|
+
"parquet",
|
|
36
|
+
"json",
|
|
37
|
+
"icu",
|
|
38
|
+
"fts",
|
|
39
|
+
]
|
|
40
|
+
)
|
|
41
|
+
s3: Optional[Dict[str, Any]] = None
|
|
42
|
+
postgres_scanner: Optional[Dict[str, Any]] = None
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_dict(cls, data: Dict[str, Any]) -> "DuckDBConfig":
|
|
46
|
+
"""Create DuckDBConfig from dictionary."""
|
|
47
|
+
return cls(
|
|
48
|
+
memory_limit=data.get("memory_limit", "8GB"),
|
|
49
|
+
threads=data.get("threads", 4),
|
|
50
|
+
temp_directory=data.get("temp_directory", "/tmp/duckdb"),
|
|
51
|
+
max_memory=data.get("max_memory", "8GB"),
|
|
52
|
+
enable_optimizer=data.get("enable_optimizer", True),
|
|
53
|
+
enable_profiling=data.get("enable_profiling", False),
|
|
54
|
+
enable_progress_bar=data.get("enable_progress_bar", True),
|
|
55
|
+
extensions=data.get(
|
|
56
|
+
"extensions", cls.__dataclass_fields__["extensions"].default_factory()
|
|
57
|
+
),
|
|
58
|
+
s3=data.get("s3"),
|
|
59
|
+
postgres_scanner=data.get("postgres_scanner"),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class SparkConnector:
|
|
65
|
+
"""Spark connector/JAR specification."""
|
|
66
|
+
|
|
67
|
+
name: str
|
|
68
|
+
version: str
|
|
69
|
+
maven: str
|
|
70
|
+
enabled: bool = True
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def from_dict(cls, data: Dict[str, Any]) -> "SparkConnector":
|
|
74
|
+
"""Create SparkConnector from dictionary."""
|
|
75
|
+
return cls(
|
|
76
|
+
name=data["name"],
|
|
77
|
+
version=data["version"],
|
|
78
|
+
maven=data["maven"],
|
|
79
|
+
enabled=data.get("enabled", True),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class SparkLocalConfig:
|
|
85
|
+
"""Spark local (single node) configuration."""
|
|
86
|
+
|
|
87
|
+
master: str = "local[*]"
|
|
88
|
+
app_name: str = "dvt-transformation"
|
|
89
|
+
memory: str = "4g"
|
|
90
|
+
driver_memory: str = "2g"
|
|
91
|
+
executor_memory: str = "4g"
|
|
92
|
+
executor_cores: int = 4
|
|
93
|
+
default_parallelism: int = 8
|
|
94
|
+
ui_port: int = 4040
|
|
95
|
+
ui_enabled: bool = True
|
|
96
|
+
log_level: str = "WARN"
|
|
97
|
+
config: Dict[str, Any] = field(default_factory=dict)
|
|
98
|
+
connectors: List[SparkConnector] = field(default_factory=list)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def from_dict(cls, data: Dict[str, Any]) -> "SparkLocalConfig":
|
|
102
|
+
"""Create SparkLocalConfig from dictionary."""
|
|
103
|
+
connectors = [SparkConnector.from_dict(c) for c in data.get("connectors", [])]
|
|
104
|
+
return cls(
|
|
105
|
+
master=data.get("master", "local[*]"),
|
|
106
|
+
app_name=data.get("app_name", "dvt-transformation"),
|
|
107
|
+
memory=data.get("memory", "4g"),
|
|
108
|
+
driver_memory=data.get("driver_memory", "2g"),
|
|
109
|
+
executor_memory=data.get("executor_memory", "4g"),
|
|
110
|
+
executor_cores=data.get("executor_cores", 4),
|
|
111
|
+
default_parallelism=data.get("default_parallelism", 8),
|
|
112
|
+
ui_port=data.get("ui_port", 4040),
|
|
113
|
+
ui_enabled=data.get("ui_enabled", True),
|
|
114
|
+
log_level=data.get("log_level", "WARN"),
|
|
115
|
+
config=data.get("config", {}),
|
|
116
|
+
connectors=connectors,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class SparkClusterConfig:
|
|
122
|
+
"""Spark cluster (distributed) configuration."""
|
|
123
|
+
|
|
124
|
+
master: str
|
|
125
|
+
deploy_mode: str = "client"
|
|
126
|
+
app_name: str = "dvt-transformation-cluster"
|
|
127
|
+
executor_memory: str = "8g"
|
|
128
|
+
executor_cores: int = 4
|
|
129
|
+
num_executors: int = 10
|
|
130
|
+
driver_memory: str = "4g"
|
|
131
|
+
driver_cores: int = 2
|
|
132
|
+
dynamic_allocation: Optional[Dict[str, Any]] = None
|
|
133
|
+
config: Dict[str, Any] = field(default_factory=dict)
|
|
134
|
+
connectors: List[SparkConnector] = field(default_factory=list)
|
|
135
|
+
kerberos: Optional[Dict[str, Any]] = None
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def from_dict(cls, data: Dict[str, Any]) -> "SparkClusterConfig":
|
|
139
|
+
"""Create SparkClusterConfig from dictionary."""
|
|
140
|
+
connectors = [SparkConnector.from_dict(c) for c in data.get("connectors", [])]
|
|
141
|
+
return cls(
|
|
142
|
+
master=data["master"],
|
|
143
|
+
deploy_mode=data.get("deploy_mode", "client"),
|
|
144
|
+
app_name=data.get("app_name", "dvt-transformation-cluster"),
|
|
145
|
+
executor_memory=data.get("executor_memory", "8g"),
|
|
146
|
+
executor_cores=data.get("executor_cores", 4),
|
|
147
|
+
num_executors=data.get("num_executors", 10),
|
|
148
|
+
driver_memory=data.get("driver_memory", "4g"),
|
|
149
|
+
driver_cores=data.get("driver_cores", 2),
|
|
150
|
+
dynamic_allocation=data.get("dynamic_allocation"),
|
|
151
|
+
config=data.get("config", {}),
|
|
152
|
+
connectors=connectors,
|
|
153
|
+
kerberos=data.get("kerberos"),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class AutoSelectRule:
|
|
159
|
+
"""Auto-selection rule for compute engine."""
|
|
160
|
+
|
|
161
|
+
name: str
|
|
162
|
+
priority: int
|
|
163
|
+
condition: Union[str, Dict[str, Any]]
|
|
164
|
+
action: str
|
|
165
|
+
description: str = ""
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def from_dict(cls, data: Dict[str, Any]) -> "AutoSelectRule":
|
|
169
|
+
"""Create AutoSelectRule from dictionary."""
|
|
170
|
+
return cls(
|
|
171
|
+
name=data["name"],
|
|
172
|
+
priority=data["priority"],
|
|
173
|
+
condition=data["condition"],
|
|
174
|
+
action=data["action"],
|
|
175
|
+
description=data.get("description", ""),
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@dataclass
|
|
180
|
+
class AutoSelectConfig:
|
|
181
|
+
"""Auto-selection configuration."""
|
|
182
|
+
|
|
183
|
+
enabled: bool = True
|
|
184
|
+
rules: List[AutoSelectRule] = field(default_factory=list)
|
|
185
|
+
|
|
186
|
+
@classmethod
|
|
187
|
+
def from_dict(cls, data: Dict[str, Any]) -> "AutoSelectConfig":
|
|
188
|
+
"""Create AutoSelectConfig from dictionary."""
|
|
189
|
+
rules = [AutoSelectRule.from_dict(r) for r in data.get("rules", [])]
|
|
190
|
+
# Sort rules by priority (highest first)
|
|
191
|
+
rules.sort(key=lambda r: r.priority, reverse=True)
|
|
192
|
+
return cls(
|
|
193
|
+
enabled=data.get("enabled", True),
|
|
194
|
+
rules=rules,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass
|
|
199
|
+
class ConnectorManagementConfig:
|
|
200
|
+
"""Connector management configuration."""
|
|
201
|
+
|
|
202
|
+
auto_download: bool = True
|
|
203
|
+
cache_dir: str = "~/.dvt/connectors"
|
|
204
|
+
maven_repos: List[str] = field(
|
|
205
|
+
default_factory=lambda: [
|
|
206
|
+
"https://repo1.maven.org/maven2",
|
|
207
|
+
"https://packages.confluent.io/maven",
|
|
208
|
+
"https://maven-central.storage.googleapis.com/maven2",
|
|
209
|
+
]
|
|
210
|
+
)
|
|
211
|
+
verify_checksums: bool = True
|
|
212
|
+
check_updates: str = "weekly"
|
|
213
|
+
bundled_path: str = "${DVT_INSTALL_DIR}/connectors/jars"
|
|
214
|
+
|
|
215
|
+
@classmethod
|
|
216
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ConnectorManagementConfig":
|
|
217
|
+
"""Create ConnectorManagementConfig from dictionary."""
|
|
218
|
+
return cls(
|
|
219
|
+
auto_download=data.get("auto_download", True),
|
|
220
|
+
cache_dir=data.get("cache_dir", "~/.dvt/connectors"),
|
|
221
|
+
maven_repos=data.get(
|
|
222
|
+
"maven_repos", cls.__dataclass_fields__["maven_repos"].default_factory()
|
|
223
|
+
),
|
|
224
|
+
verify_checksums=data.get("verify_checksums", True),
|
|
225
|
+
check_updates=data.get("check_updates", "weekly"),
|
|
226
|
+
bundled_path=data.get("bundled_path", "${DVT_INSTALL_DIR}/connectors/jars"),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@dataclass
|
|
231
|
+
class PerformanceConfig:
|
|
232
|
+
"""Performance monitoring configuration."""
|
|
233
|
+
|
|
234
|
+
enable_profiling: bool = False
|
|
235
|
+
log_slow_queries: bool = True
|
|
236
|
+
slow_query_threshold: str = "60s"
|
|
237
|
+
collect_metrics: bool = True
|
|
238
|
+
metrics_output: str = "/tmp/dvt_metrics.json"
|
|
239
|
+
save_execution_plans: bool = False
|
|
240
|
+
execution_plan_dir: str = "~/.dvt/execution_plans"
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def from_dict(cls, data: Dict[str, Any]) -> "PerformanceConfig":
|
|
244
|
+
"""Create PerformanceConfig from dictionary."""
|
|
245
|
+
return cls(
|
|
246
|
+
enable_profiling=data.get("enable_profiling", False),
|
|
247
|
+
log_slow_queries=data.get("log_slow_queries", True),
|
|
248
|
+
slow_query_threshold=data.get("slow_query_threshold", "60s"),
|
|
249
|
+
collect_metrics=data.get("collect_metrics", True),
|
|
250
|
+
metrics_output=data.get("metrics_output", "/tmp/dvt_metrics.json"),
|
|
251
|
+
save_execution_plans=data.get("save_execution_plans", False),
|
|
252
|
+
execution_plan_dir=data.get("execution_plan_dir", "~/.dvt/execution_plans"),
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@dataclass
|
|
257
|
+
class DevelopmentConfig:
|
|
258
|
+
"""Development and debugging configuration."""
|
|
259
|
+
|
|
260
|
+
verbose_errors: bool = True
|
|
261
|
+
explain_queries: bool = False
|
|
262
|
+
dev_mode: bool = False
|
|
263
|
+
dev_limit: int = 1000
|
|
264
|
+
cache_intermediate: bool = True
|
|
265
|
+
cache_dir: str = "/tmp/dvt_cache"
|
|
266
|
+
|
|
267
|
+
@classmethod
|
|
268
|
+
def from_dict(cls, data: Dict[str, Any]) -> "DevelopmentConfig":
|
|
269
|
+
"""Create DevelopmentConfig from dictionary."""
|
|
270
|
+
return cls(
|
|
271
|
+
verbose_errors=data.get("verbose_errors", True),
|
|
272
|
+
explain_queries=data.get("explain_queries", False),
|
|
273
|
+
dev_mode=data.get("dev_mode", False),
|
|
274
|
+
dev_limit=data.get("dev_limit", 1000),
|
|
275
|
+
cache_intermediate=data.get("cache_intermediate", True),
|
|
276
|
+
cache_dir=data.get("cache_dir", "/tmp/dvt_cache"),
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@dataclass
|
|
281
|
+
class ComputeConfig:
|
|
282
|
+
"""
|
|
283
|
+
Complete compute layer configuration.
|
|
284
|
+
|
|
285
|
+
This represents the parsed compute.yml file.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
default_engine: str = "auto"
|
|
289
|
+
duckdb: DuckDBConfig = field(default_factory=DuckDBConfig)
|
|
290
|
+
spark_local: Optional[SparkLocalConfig] = None
|
|
291
|
+
spark_cluster: Optional[SparkClusterConfig] = None
|
|
292
|
+
auto_select: AutoSelectConfig = field(default_factory=AutoSelectConfig)
|
|
293
|
+
connector_management: ConnectorManagementConfig = field(
|
|
294
|
+
default_factory=ConnectorManagementConfig
|
|
295
|
+
)
|
|
296
|
+
performance: PerformanceConfig = field(default_factory=PerformanceConfig)
|
|
297
|
+
development: DevelopmentConfig = field(default_factory=DevelopmentConfig)
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ComputeConfig":
|
|
301
|
+
"""Create ComputeConfig from dictionary."""
|
|
302
|
+
return cls(
|
|
303
|
+
default_engine=data.get("default_engine", "auto"),
|
|
304
|
+
duckdb=DuckDBConfig.from_dict(data.get("duckdb", {})),
|
|
305
|
+
spark_local=(
|
|
306
|
+
SparkLocalConfig.from_dict(data["spark_local"]) if "spark_local" in data else None
|
|
307
|
+
),
|
|
308
|
+
spark_cluster=(
|
|
309
|
+
SparkClusterConfig.from_dict(data["spark_cluster"])
|
|
310
|
+
if "spark_cluster" in data
|
|
311
|
+
else None
|
|
312
|
+
),
|
|
313
|
+
auto_select=AutoSelectConfig.from_dict(data.get("auto_select", {})),
|
|
314
|
+
connector_management=ConnectorManagementConfig.from_dict(
|
|
315
|
+
data.get("connector_management", {})
|
|
316
|
+
),
|
|
317
|
+
performance=PerformanceConfig.from_dict(data.get("performance", {})),
|
|
318
|
+
development=DevelopmentConfig.from_dict(data.get("development", {})),
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
@classmethod
|
|
322
|
+
def load_from_file(cls, file_path: Path) -> "ComputeConfig":
|
|
323
|
+
"""
|
|
324
|
+
Load compute configuration from YAML file.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
file_path: Path to compute.yml file
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
ComputeConfig instance
|
|
331
|
+
|
|
332
|
+
Raises:
|
|
333
|
+
DbtRuntimeError: If file cannot be read or parsed
|
|
334
|
+
"""
|
|
335
|
+
try:
|
|
336
|
+
if not file_path.exists():
|
|
337
|
+
fire_event(Note(msg=f"Compute config not found at {file_path}, using defaults"))
|
|
338
|
+
return cls()
|
|
339
|
+
|
|
340
|
+
with open(file_path, "r") as f:
|
|
341
|
+
data = yaml.safe_load(f)
|
|
342
|
+
|
|
343
|
+
if data is None:
|
|
344
|
+
fire_event(Note(msg=f"Empty compute config at {file_path}, using defaults"))
|
|
345
|
+
return cls()
|
|
346
|
+
|
|
347
|
+
return cls.from_dict(data)
|
|
348
|
+
|
|
349
|
+
except yaml.YAMLError as e:
|
|
350
|
+
raise DbtRuntimeError(f"Failed to parse compute config: {e}")
|
|
351
|
+
except Exception as e:
|
|
352
|
+
raise DbtRuntimeError(f"Failed to load compute config from {file_path}: {e}")
|
|
353
|
+
|
|
354
|
+
def get_engine_config(
|
|
355
|
+
self, engine: str
|
|
356
|
+
) -> Union[DuckDBConfig, SparkLocalConfig, SparkClusterConfig, None]:
|
|
357
|
+
"""
|
|
358
|
+
Get configuration for specific compute engine.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
engine: Engine name ('duckdb', 'spark_local', 'spark_cluster')
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Engine configuration or None if not configured
|
|
365
|
+
"""
|
|
366
|
+
if engine == "duckdb":
|
|
367
|
+
return self.duckdb
|
|
368
|
+
elif engine == "spark_local":
|
|
369
|
+
return self.spark_local
|
|
370
|
+
elif engine == "spark_cluster":
|
|
371
|
+
return self.spark_cluster
|
|
372
|
+
else:
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def load_compute_config(project_dir: Optional[Path] = None) -> ComputeConfig:
|
|
377
|
+
"""
|
|
378
|
+
Load compute configuration from standard locations.
|
|
379
|
+
|
|
380
|
+
Searches in order:
|
|
381
|
+
1. <project_root>/compute.yml
|
|
382
|
+
2. ~/.dbt/compute.yml
|
|
383
|
+
3. Default configuration
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
project_dir: Project directory (optional)
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
ComputeConfig instance
|
|
390
|
+
"""
|
|
391
|
+
# Try project directory first
|
|
392
|
+
if project_dir:
|
|
393
|
+
project_compute = project_dir / "compute.yml"
|
|
394
|
+
if project_compute.exists():
|
|
395
|
+
fire_event(Note(msg=f"Loading compute config from {project_compute}"))
|
|
396
|
+
return ComputeConfig.load_from_file(project_compute)
|
|
397
|
+
|
|
398
|
+
# Try home directory
|
|
399
|
+
home_compute = Path.home() / ".dbt" / "compute.yml"
|
|
400
|
+
if home_compute.exists():
|
|
401
|
+
fire_event(Note(msg=f"Loading compute config from {home_compute}"))
|
|
402
|
+
return ComputeConfig.load_from_file(home_compute)
|
|
403
|
+
|
|
404
|
+
# Use defaults
|
|
405
|
+
fire_event(Note(msg="No compute.yml found, using default configuration"))
|
|
406
|
+
return ComputeConfig()
|