dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,406 @@
1
+ """
2
+ Compute layer configuration for DVT.
3
+
4
+ This module handles loading and parsing compute.yml configuration files,
5
+ which define DuckDB and Spark settings for the compute layer.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ import yaml
13
+
14
+ from dbt_common.events.functions import fire_event
15
+ from dbt_common.events.types import Note
16
+ from dbt_common.exceptions import DbtRuntimeError
17
+
18
+
19
+ @dataclass
20
+ class DuckDBConfig:
21
+ """DuckDB compute engine configuration."""
22
+
23
+ memory_limit: str = "8GB"
24
+ threads: int = 4
25
+ temp_directory: str = "/tmp/duckdb"
26
+ max_memory: str = "8GB"
27
+ enable_optimizer: bool = True
28
+ enable_profiling: bool = False
29
+ enable_progress_bar: bool = True
30
+ extensions: List[str] = field(
31
+ default_factory=lambda: [
32
+ "httpfs",
33
+ "postgres_scanner",
34
+ "mysql_scanner",
35
+ "parquet",
36
+ "json",
37
+ "icu",
38
+ "fts",
39
+ ]
40
+ )
41
+ s3: Optional[Dict[str, Any]] = None
42
+ postgres_scanner: Optional[Dict[str, Any]] = None
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: Dict[str, Any]) -> "DuckDBConfig":
46
+ """Create DuckDBConfig from dictionary."""
47
+ return cls(
48
+ memory_limit=data.get("memory_limit", "8GB"),
49
+ threads=data.get("threads", 4),
50
+ temp_directory=data.get("temp_directory", "/tmp/duckdb"),
51
+ max_memory=data.get("max_memory", "8GB"),
52
+ enable_optimizer=data.get("enable_optimizer", True),
53
+ enable_profiling=data.get("enable_profiling", False),
54
+ enable_progress_bar=data.get("enable_progress_bar", True),
55
+ extensions=data.get(
56
+ "extensions", cls.__dataclass_fields__["extensions"].default_factory()
57
+ ),
58
+ s3=data.get("s3"),
59
+ postgres_scanner=data.get("postgres_scanner"),
60
+ )
61
+
62
+
63
+ @dataclass
64
+ class SparkConnector:
65
+ """Spark connector/JAR specification."""
66
+
67
+ name: str
68
+ version: str
69
+ maven: str
70
+ enabled: bool = True
71
+
72
+ @classmethod
73
+ def from_dict(cls, data: Dict[str, Any]) -> "SparkConnector":
74
+ """Create SparkConnector from dictionary."""
75
+ return cls(
76
+ name=data["name"],
77
+ version=data["version"],
78
+ maven=data["maven"],
79
+ enabled=data.get("enabled", True),
80
+ )
81
+
82
+
83
+ @dataclass
84
+ class SparkLocalConfig:
85
+ """Spark local (single node) configuration."""
86
+
87
+ master: str = "local[*]"
88
+ app_name: str = "dvt-transformation"
89
+ memory: str = "4g"
90
+ driver_memory: str = "2g"
91
+ executor_memory: str = "4g"
92
+ executor_cores: int = 4
93
+ default_parallelism: int = 8
94
+ ui_port: int = 4040
95
+ ui_enabled: bool = True
96
+ log_level: str = "WARN"
97
+ config: Dict[str, Any] = field(default_factory=dict)
98
+ connectors: List[SparkConnector] = field(default_factory=list)
99
+
100
+ @classmethod
101
+ def from_dict(cls, data: Dict[str, Any]) -> "SparkLocalConfig":
102
+ """Create SparkLocalConfig from dictionary."""
103
+ connectors = [SparkConnector.from_dict(c) for c in data.get("connectors", [])]
104
+ return cls(
105
+ master=data.get("master", "local[*]"),
106
+ app_name=data.get("app_name", "dvt-transformation"),
107
+ memory=data.get("memory", "4g"),
108
+ driver_memory=data.get("driver_memory", "2g"),
109
+ executor_memory=data.get("executor_memory", "4g"),
110
+ executor_cores=data.get("executor_cores", 4),
111
+ default_parallelism=data.get("default_parallelism", 8),
112
+ ui_port=data.get("ui_port", 4040),
113
+ ui_enabled=data.get("ui_enabled", True),
114
+ log_level=data.get("log_level", "WARN"),
115
+ config=data.get("config", {}),
116
+ connectors=connectors,
117
+ )
118
+
119
+
120
+ @dataclass
121
+ class SparkClusterConfig:
122
+ """Spark cluster (distributed) configuration."""
123
+
124
+ master: str
125
+ deploy_mode: str = "client"
126
+ app_name: str = "dvt-transformation-cluster"
127
+ executor_memory: str = "8g"
128
+ executor_cores: int = 4
129
+ num_executors: int = 10
130
+ driver_memory: str = "4g"
131
+ driver_cores: int = 2
132
+ dynamic_allocation: Optional[Dict[str, Any]] = None
133
+ config: Dict[str, Any] = field(default_factory=dict)
134
+ connectors: List[SparkConnector] = field(default_factory=list)
135
+ kerberos: Optional[Dict[str, Any]] = None
136
+
137
+ @classmethod
138
+ def from_dict(cls, data: Dict[str, Any]) -> "SparkClusterConfig":
139
+ """Create SparkClusterConfig from dictionary."""
140
+ connectors = [SparkConnector.from_dict(c) for c in data.get("connectors", [])]
141
+ return cls(
142
+ master=data["master"],
143
+ deploy_mode=data.get("deploy_mode", "client"),
144
+ app_name=data.get("app_name", "dvt-transformation-cluster"),
145
+ executor_memory=data.get("executor_memory", "8g"),
146
+ executor_cores=data.get("executor_cores", 4),
147
+ num_executors=data.get("num_executors", 10),
148
+ driver_memory=data.get("driver_memory", "4g"),
149
+ driver_cores=data.get("driver_cores", 2),
150
+ dynamic_allocation=data.get("dynamic_allocation"),
151
+ config=data.get("config", {}),
152
+ connectors=connectors,
153
+ kerberos=data.get("kerberos"),
154
+ )
155
+
156
+
157
+ @dataclass
158
+ class AutoSelectRule:
159
+ """Auto-selection rule for compute engine."""
160
+
161
+ name: str
162
+ priority: int
163
+ condition: Union[str, Dict[str, Any]]
164
+ action: str
165
+ description: str = ""
166
+
167
+ @classmethod
168
+ def from_dict(cls, data: Dict[str, Any]) -> "AutoSelectRule":
169
+ """Create AutoSelectRule from dictionary."""
170
+ return cls(
171
+ name=data["name"],
172
+ priority=data["priority"],
173
+ condition=data["condition"],
174
+ action=data["action"],
175
+ description=data.get("description", ""),
176
+ )
177
+
178
+
179
+ @dataclass
180
+ class AutoSelectConfig:
181
+ """Auto-selection configuration."""
182
+
183
+ enabled: bool = True
184
+ rules: List[AutoSelectRule] = field(default_factory=list)
185
+
186
+ @classmethod
187
+ def from_dict(cls, data: Dict[str, Any]) -> "AutoSelectConfig":
188
+ """Create AutoSelectConfig from dictionary."""
189
+ rules = [AutoSelectRule.from_dict(r) for r in data.get("rules", [])]
190
+ # Sort rules by priority (highest first)
191
+ rules.sort(key=lambda r: r.priority, reverse=True)
192
+ return cls(
193
+ enabled=data.get("enabled", True),
194
+ rules=rules,
195
+ )
196
+
197
+
198
+ @dataclass
199
+ class ConnectorManagementConfig:
200
+ """Connector management configuration."""
201
+
202
+ auto_download: bool = True
203
+ cache_dir: str = "~/.dvt/connectors"
204
+ maven_repos: List[str] = field(
205
+ default_factory=lambda: [
206
+ "https://repo1.maven.org/maven2",
207
+ "https://packages.confluent.io/maven",
208
+ "https://maven-central.storage.googleapis.com/maven2",
209
+ ]
210
+ )
211
+ verify_checksums: bool = True
212
+ check_updates: str = "weekly"
213
+ bundled_path: str = "${DVT_INSTALL_DIR}/connectors/jars"
214
+
215
+ @classmethod
216
+ def from_dict(cls, data: Dict[str, Any]) -> "ConnectorManagementConfig":
217
+ """Create ConnectorManagementConfig from dictionary."""
218
+ return cls(
219
+ auto_download=data.get("auto_download", True),
220
+ cache_dir=data.get("cache_dir", "~/.dvt/connectors"),
221
+ maven_repos=data.get(
222
+ "maven_repos", cls.__dataclass_fields__["maven_repos"].default_factory()
223
+ ),
224
+ verify_checksums=data.get("verify_checksums", True),
225
+ check_updates=data.get("check_updates", "weekly"),
226
+ bundled_path=data.get("bundled_path", "${DVT_INSTALL_DIR}/connectors/jars"),
227
+ )
228
+
229
+
230
+ @dataclass
231
+ class PerformanceConfig:
232
+ """Performance monitoring configuration."""
233
+
234
+ enable_profiling: bool = False
235
+ log_slow_queries: bool = True
236
+ slow_query_threshold: str = "60s"
237
+ collect_metrics: bool = True
238
+ metrics_output: str = "/tmp/dvt_metrics.json"
239
+ save_execution_plans: bool = False
240
+ execution_plan_dir: str = "~/.dvt/execution_plans"
241
+
242
+ @classmethod
243
+ def from_dict(cls, data: Dict[str, Any]) -> "PerformanceConfig":
244
+ """Create PerformanceConfig from dictionary."""
245
+ return cls(
246
+ enable_profiling=data.get("enable_profiling", False),
247
+ log_slow_queries=data.get("log_slow_queries", True),
248
+ slow_query_threshold=data.get("slow_query_threshold", "60s"),
249
+ collect_metrics=data.get("collect_metrics", True),
250
+ metrics_output=data.get("metrics_output", "/tmp/dvt_metrics.json"),
251
+ save_execution_plans=data.get("save_execution_plans", False),
252
+ execution_plan_dir=data.get("execution_plan_dir", "~/.dvt/execution_plans"),
253
+ )
254
+
255
+
256
+ @dataclass
257
+ class DevelopmentConfig:
258
+ """Development and debugging configuration."""
259
+
260
+ verbose_errors: bool = True
261
+ explain_queries: bool = False
262
+ dev_mode: bool = False
263
+ dev_limit: int = 1000
264
+ cache_intermediate: bool = True
265
+ cache_dir: str = "/tmp/dvt_cache"
266
+
267
+ @classmethod
268
+ def from_dict(cls, data: Dict[str, Any]) -> "DevelopmentConfig":
269
+ """Create DevelopmentConfig from dictionary."""
270
+ return cls(
271
+ verbose_errors=data.get("verbose_errors", True),
272
+ explain_queries=data.get("explain_queries", False),
273
+ dev_mode=data.get("dev_mode", False),
274
+ dev_limit=data.get("dev_limit", 1000),
275
+ cache_intermediate=data.get("cache_intermediate", True),
276
+ cache_dir=data.get("cache_dir", "/tmp/dvt_cache"),
277
+ )
278
+
279
+
280
+ @dataclass
281
+ class ComputeConfig:
282
+ """
283
+ Complete compute layer configuration.
284
+
285
+ This represents the parsed compute.yml file.
286
+ """
287
+
288
+ default_engine: str = "auto"
289
+ duckdb: DuckDBConfig = field(default_factory=DuckDBConfig)
290
+ spark_local: Optional[SparkLocalConfig] = None
291
+ spark_cluster: Optional[SparkClusterConfig] = None
292
+ auto_select: AutoSelectConfig = field(default_factory=AutoSelectConfig)
293
+ connector_management: ConnectorManagementConfig = field(
294
+ default_factory=ConnectorManagementConfig
295
+ )
296
+ performance: PerformanceConfig = field(default_factory=PerformanceConfig)
297
+ development: DevelopmentConfig = field(default_factory=DevelopmentConfig)
298
+
299
+ @classmethod
300
+ def from_dict(cls, data: Dict[str, Any]) -> "ComputeConfig":
301
+ """Create ComputeConfig from dictionary."""
302
+ return cls(
303
+ default_engine=data.get("default_engine", "auto"),
304
+ duckdb=DuckDBConfig.from_dict(data.get("duckdb", {})),
305
+ spark_local=(
306
+ SparkLocalConfig.from_dict(data["spark_local"]) if "spark_local" in data else None
307
+ ),
308
+ spark_cluster=(
309
+ SparkClusterConfig.from_dict(data["spark_cluster"])
310
+ if "spark_cluster" in data
311
+ else None
312
+ ),
313
+ auto_select=AutoSelectConfig.from_dict(data.get("auto_select", {})),
314
+ connector_management=ConnectorManagementConfig.from_dict(
315
+ data.get("connector_management", {})
316
+ ),
317
+ performance=PerformanceConfig.from_dict(data.get("performance", {})),
318
+ development=DevelopmentConfig.from_dict(data.get("development", {})),
319
+ )
320
+
321
+ @classmethod
322
+ def load_from_file(cls, file_path: Path) -> "ComputeConfig":
323
+ """
324
+ Load compute configuration from YAML file.
325
+
326
+ Args:
327
+ file_path: Path to compute.yml file
328
+
329
+ Returns:
330
+ ComputeConfig instance
331
+
332
+ Raises:
333
+ DbtRuntimeError: If file cannot be read or parsed
334
+ """
335
+ try:
336
+ if not file_path.exists():
337
+ fire_event(Note(msg=f"Compute config not found at {file_path}, using defaults"))
338
+ return cls()
339
+
340
+ with open(file_path, "r") as f:
341
+ data = yaml.safe_load(f)
342
+
343
+ if data is None:
344
+ fire_event(Note(msg=f"Empty compute config at {file_path}, using defaults"))
345
+ return cls()
346
+
347
+ return cls.from_dict(data)
348
+
349
+ except yaml.YAMLError as e:
350
+ raise DbtRuntimeError(f"Failed to parse compute config: {e}")
351
+ except Exception as e:
352
+ raise DbtRuntimeError(f"Failed to load compute config from {file_path}: {e}")
353
+
354
+ def get_engine_config(
355
+ self, engine: str
356
+ ) -> Union[DuckDBConfig, SparkLocalConfig, SparkClusterConfig, None]:
357
+ """
358
+ Get configuration for specific compute engine.
359
+
360
+ Args:
361
+ engine: Engine name ('duckdb', 'spark_local', 'spark_cluster')
362
+
363
+ Returns:
364
+ Engine configuration or None if not configured
365
+ """
366
+ if engine == "duckdb":
367
+ return self.duckdb
368
+ elif engine == "spark_local":
369
+ return self.spark_local
370
+ elif engine == "spark_cluster":
371
+ return self.spark_cluster
372
+ else:
373
+ return None
374
+
375
+
376
+ def load_compute_config(project_dir: Optional[Path] = None) -> ComputeConfig:
377
+ """
378
+ Load compute configuration from standard locations.
379
+
380
+ Searches in order:
381
+ 1. <project_root>/compute.yml
382
+ 2. ~/.dbt/compute.yml
383
+ 3. Default configuration
384
+
385
+ Args:
386
+ project_dir: Project directory (optional)
387
+
388
+ Returns:
389
+ ComputeConfig instance
390
+ """
391
+ # Try project directory first
392
+ if project_dir:
393
+ project_compute = project_dir / "compute.yml"
394
+ if project_compute.exists():
395
+ fire_event(Note(msg=f"Loading compute config from {project_compute}"))
396
+ return ComputeConfig.load_from_file(project_compute)
397
+
398
+ # Try home directory
399
+ home_compute = Path.home() / ".dbt" / "compute.yml"
400
+ if home_compute.exists():
401
+ fire_event(Note(msg=f"Loading compute config from {home_compute}"))
402
+ return ComputeConfig.load_from_file(home_compute)
403
+
404
+ # Use defaults
405
+ fire_event(Note(msg="No compute.yml found, using default configuration"))
406
+ return ComputeConfig()