dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
dbt/task/compute.py ADDED
@@ -0,0 +1,454 @@
1
+ """
2
+ Compute Task
3
+
4
+ Handles DVT compute engine management commands:
5
+ - test: List all compute engines with connection status
6
+ - edit: Open computes.yml in user's editor
7
+ - validate: Validate compute engine configurations
8
+
9
+ v0.5.97: Simplified CLI - removed register/remove (use dvt compute edit instead)
10
+ computes.yml with comprehensive samples replaces interactive registration.
11
+ """
12
+
13
+ import os
14
+ import subprocess
15
+ import sys
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from dbt.config.compute import ComputeRegistry, SparkPlatform, DEFAULT_COMPUTES_YAML
20
+ from dbt_common.exceptions import DbtRuntimeError
21
+
22
+
23
+ class ComputeTask:
24
+ """Task for managing DVT compute engines."""
25
+
26
+ def __init__(self, project_dir: Optional[str] = None):
27
+ """
28
+ Initialize ComputeTask.
29
+
30
+ :param project_dir: Path to project root directory (defaults to current directory)
31
+ """
32
+ self.project_dir = project_dir or str(Path.cwd())
33
+ self.registry = ComputeRegistry(self.project_dir)
34
+
35
+ def list_computes(self) -> bool:
36
+ """
37
+ List all compute engines with their names and basic info.
38
+
39
+ v0.51.1: Simple list command for quick reference.
40
+
41
+ :returns: True always (for CLI exit code)
42
+ """
43
+ clusters = self.registry.list()
44
+
45
+ if not clusters:
46
+ print("No compute engines configured.")
47
+ print("\nRun 'dvt compute edit' to configure compute engines.")
48
+ return True
49
+
50
+ print(f"\nCompute Engines ({len(clusters)} configured)")
51
+ print("-" * 40)
52
+
53
+ for cluster in clusters:
54
+ default_marker = " (default)" if cluster.name == self.registry.target_compute else ""
55
+ platform = cluster.detect_platform()
56
+ print(f" {cluster.name}{default_marker}")
57
+ print(f" Platform: {platform.value}")
58
+ if cluster.description:
59
+ print(f" Description: {cluster.description}")
60
+
61
+ print("")
62
+ print(f"Default: {self.registry.target_compute}")
63
+ print(f"Config: {self.registry.get_config_path()}")
64
+
65
+ return True
66
+
67
+ def test_single_compute(self, compute_name: str) -> bool:
68
+ """
69
+ Test a specific compute engine by name.
70
+
71
+ v0.5.99: Added single compute testing via `dvt compute test <name>`.
72
+
73
+ :param compute_name: Name of the compute engine to test
74
+ :returns: True if test passes, False otherwise
75
+ """
76
+ clusters = self.registry.list()
77
+ cluster = None
78
+
79
+ # Find the cluster by name
80
+ for c in clusters:
81
+ if c.name == compute_name:
82
+ cluster = c
83
+ break
84
+
85
+ if cluster is None:
86
+ print(f"❌ Compute engine '{compute_name}' not found.")
87
+ print(f"\nAvailable compute engines:")
88
+ for c in clusters:
89
+ default_marker = " (default)" if c.name == self.registry.target_compute else ""
90
+ print(f" - {c.name}{default_marker}")
91
+ print(f"\nRun 'dvt compute edit' to configure compute engines.")
92
+ return False
93
+
94
+ # Test the specific cluster
95
+ print(f"\n" + "=" * 70)
96
+ print(f"Testing Compute Engine: {compute_name}")
97
+ print("=" * 70)
98
+
99
+ default_marker = " (default)" if cluster.name == self.registry.target_compute else ""
100
+ platform = cluster.detect_platform()
101
+
102
+ print(f"\n{cluster.name}{default_marker}")
103
+ print(f" Type: {cluster.type}")
104
+ print(f" Platform: {platform.value}")
105
+ if cluster.description:
106
+ print(f" Description: {cluster.description}")
107
+
108
+ # Test the cluster
109
+ status, message = self._test_single_cluster(cluster)
110
+
111
+ if status == "ok":
112
+ print(f" Status: ✅ {message}")
113
+ elif status == "warning":
114
+ print(f" Status: ⚠️ {message}")
115
+ elif status == "error":
116
+ print(f" Status: ❌ {message}")
117
+
118
+ print("\n" + "-" * 70)
119
+
120
+ return status in ("ok", "warning")
121
+
122
+ def _test_single_cluster(self, cluster) -> tuple:
123
+ """
124
+ Test a single cluster and return status.
125
+
126
+ v0.5.98: Full connectivity testing with three stages:
127
+ 1. Config validation
128
+ 2. Session creation + SELECT 1 test
129
+ 3. (Optional) JDBC read test
130
+
131
+ v0.51.1: Enhanced session isolation - forcefully stop ALL Spark sessions
132
+ before and after each test to prevent config bleed between computes.
133
+
134
+ :param cluster: ComputeCluster to test
135
+ :returns: Tuple of (status, message) where status is 'ok', 'warning', or 'error'
136
+ """
137
+ platform = cluster.detect_platform()
138
+
139
+ if cluster.type == "spark":
140
+ # Stage 1: Config validation
141
+ config_result = self._validate_cluster_config(cluster, platform)
142
+ if config_result[0] == "error":
143
+ return config_result
144
+
145
+ # Stage 2: Full connectivity test (session + SQL)
146
+ try:
147
+ # v0.51.1: Force stop ALL Spark sessions before testing
148
+ # This ensures each compute engine test gets a completely fresh JVM context
149
+ self._force_stop_all_spark_sessions()
150
+
151
+ strategy = self._get_strategy_for_cluster(cluster, platform)
152
+ if strategy is None:
153
+ return config_result # Return config validation result
154
+
155
+ success, message = strategy.test_connectivity()
156
+
157
+ # v0.51.1: Force stop ALL sessions after test to not interfere with next compute
158
+ self._force_stop_all_spark_sessions()
159
+
160
+ if success:
161
+ return ("ok", message)
162
+ else:
163
+ return ("error", message)
164
+
165
+ except ImportError as e:
166
+ # Missing dependency (PySpark, databricks-connect, etc.)
167
+ return ("warning", str(e))
168
+ except AttributeError as e:
169
+ # databricks-connect may have compatibility issues with pyspark
170
+ if "Hook" in str(e) or "SparkSession" in str(e):
171
+ return ("warning", f"databricks-connect/pyspark version conflict: {str(e)[:50]}")
172
+ return ("error", f"Connectivity test failed: {str(e)}")
173
+ except Exception as e:
174
+ return ("error", f"Connectivity test failed: {str(e)}")
175
+
176
+ return ("ok", "Configuration valid")
177
+
178
+ def _force_stop_all_spark_sessions(self) -> None:
179
+ """
180
+ Force stop ALL Spark sessions to ensure complete isolation.
181
+
182
+ v0.51.1: This is critical for compute testing because:
183
+ 1. Different computes have different spark.jars.packages configs
184
+ 2. Spark's getOrCreate() returns existing session without re-applying config
185
+ 3. We need a fresh JVM context for each compute's JDBC drivers
186
+
187
+ This method:
188
+ 1. Stops active session
189
+ 2. Clears the local session cache used by LocalStrategy
190
+ 3. Forces garbage collection to release JVM resources
191
+ """
192
+ try:
193
+ from pyspark.sql import SparkSession
194
+
195
+ # Stop active session
196
+ active = SparkSession.getActiveSession()
197
+ if active:
198
+ active.stop()
199
+
200
+ # Clear local strategy cache
201
+ try:
202
+ from dbt.compute.strategies.local import _SPARK_SESSION_CACHE
203
+ _SPARK_SESSION_CACHE.clear()
204
+ except (ImportError, AttributeError):
205
+ pass
206
+
207
+ # Give JVM time to release resources
208
+ import time
209
+ time.sleep(0.5)
210
+
211
+ except ImportError:
212
+ pass # PySpark not installed
213
+ except Exception:
214
+ pass # Best effort cleanup
215
+
216
+ def _validate_cluster_config(self, cluster, platform: SparkPlatform) -> tuple:
217
+ """
218
+ Validate cluster configuration (Stage 1).
219
+
220
+ :param cluster: ComputeCluster to validate
221
+ :param platform: Detected SparkPlatform
222
+ :returns: Tuple of (status, message)
223
+ """
224
+ if platform == SparkPlatform.LOCAL:
225
+ try:
226
+ import pyspark # noqa: F401
227
+ # PySpark 4.0+ doesn't have __version__ attribute, use importlib
228
+ try:
229
+ from importlib.metadata import version
230
+ pyspark_version = version("pyspark")
231
+ except Exception:
232
+ pyspark_version = "unknown"
233
+ return ("ok", f"PySpark {pyspark_version} available")
234
+ except ImportError:
235
+ return ("error", "PySpark not installed")
236
+
237
+ elif platform == SparkPlatform.EMR:
238
+ required = ["master"]
239
+ missing = [k for k in required if k not in cluster.config]
240
+ if missing:
241
+ return ("error", f"Missing config: {', '.join(missing)}")
242
+ master = cluster.config.get("master", "")
243
+ if master.lower() != "yarn":
244
+ return ("error", f"EMR requires master='yarn', got: {master}")
245
+ return ("ok", "EMR config valid")
246
+
247
+ elif platform == SparkPlatform.DATAPROC:
248
+ required = ["project", "region", "cluster"]
249
+ missing = [k for k in required if k not in cluster.config]
250
+ if missing:
251
+ return ("error", f"Missing config: {', '.join(missing)}")
252
+ return ("ok", "Dataproc config valid")
253
+
254
+ elif platform == SparkPlatform.STANDALONE:
255
+ master = cluster.config.get("master", "")
256
+ if not master.startswith("spark://"):
257
+ return ("error", f"Standalone requires master='spark://...', got: {master}")
258
+ return ("ok", f"Standalone config valid ({master})")
259
+
260
+ else:
261
+ # External/generic
262
+ if "master" in cluster.config:
263
+ return ("ok", f"External cluster at {cluster.config['master']}")
264
+ return ("ok", "Configuration valid")
265
+
266
+ def _get_strategy_for_cluster(self, cluster, platform: SparkPlatform):
267
+ """
268
+ Get the connection strategy for a cluster.
269
+
270
+ :param cluster: ComputeCluster
271
+ :param platform: Detected SparkPlatform
272
+ :returns: BaseConnectionStrategy instance or None
273
+ """
274
+ try:
275
+ if platform == SparkPlatform.LOCAL:
276
+ from dbt.compute.strategies.local import LocalStrategy
277
+ return LocalStrategy(cluster.config, app_name=f"DVT-{cluster.name}")
278
+
279
+ elif platform == SparkPlatform.EMR:
280
+ from dbt.compute.strategies import get_emr_strategy
281
+ EMRStrategy = get_emr_strategy()
282
+ return EMRStrategy(cluster.config, app_name=f"DVT-{cluster.name}")
283
+
284
+ elif platform == SparkPlatform.DATAPROC:
285
+ from dbt.compute.strategies import get_dataproc_strategy
286
+ DataprocStrategy = get_dataproc_strategy()
287
+ return DataprocStrategy(cluster.config, app_name=f"DVT-{cluster.name}")
288
+
289
+ elif platform == SparkPlatform.STANDALONE:
290
+ from dbt.compute.strategies import get_standalone_strategy
291
+ StandaloneStrategy = get_standalone_strategy()
292
+ return StandaloneStrategy(cluster.config, app_name=f"DVT-{cluster.name}")
293
+
294
+ else:
295
+ # External - no specific strategy, skip connectivity test
296
+ return None
297
+
298
+ except ImportError as e:
299
+ raise ImportError(f"Missing dependency for {platform.value}: {str(e)}")
300
+
301
+ def edit_config(self) -> bool:
302
+ """
303
+ Open computes.yml in user's preferred editor.
304
+
305
+ Uses EDITOR environment variable, falls back to common editors.
306
+
307
+ :returns: True if editor launched successfully
308
+ """
309
+ # Ensure config exists with full template
310
+ config_path = self.registry.ensure_config_exists()
311
+
312
+ # If file doesn't have full samples, write the template
313
+ with open(config_path, "r") as f:
314
+ content = f.read()
315
+ if "DATABRICKS" not in content:
316
+ # Write full template to get all the samples
317
+ with open(config_path, "w") as f:
318
+ f.write(DEFAULT_COMPUTES_YAML)
319
+
320
+ print(f"Opening: {config_path}")
321
+ print("")
322
+ print("After editing, run 'dvt compute validate' to check syntax.")
323
+ print("")
324
+
325
+ # Get editor from environment or use defaults
326
+ editor = os.environ.get("EDITOR")
327
+ if not editor:
328
+ editor = os.environ.get("VISUAL")
329
+ if not editor:
330
+ # Try common editors
331
+ for ed in ["code", "nano", "vim", "vi", "notepad"]:
332
+ try:
333
+ subprocess.run(["which", ed], capture_output=True, check=True)
334
+ editor = ed
335
+ break
336
+ except (subprocess.CalledProcessError, FileNotFoundError):
337
+ continue
338
+
339
+ if not editor:
340
+ print(f"No editor found. Please open manually: {config_path}")
341
+ return False
342
+
343
+ try:
344
+ # Handle VS Code specially (--wait flag)
345
+ if editor in ("code", "code-insiders"):
346
+ subprocess.run([editor, "--wait", str(config_path)])
347
+ else:
348
+ subprocess.run([editor, str(config_path)])
349
+
350
+ # Reload and validate after edit
351
+ print("\nValidating changes...")
352
+ return self.validate_config()
353
+
354
+ except Exception as e:
355
+ print(f"Error opening editor: {e}", file=sys.stderr)
356
+ print(f"Please open manually: {config_path}")
357
+ return False
358
+
359
+ def validate_config(self) -> bool:
360
+ """
361
+ Validate computes.yml syntax and configuration.
362
+
363
+ :returns: True if configuration is valid
364
+ """
365
+ config_path = self.registry.get_config_path()
366
+
367
+ if not config_path.exists():
368
+ print(f"✗ Config file not found: {config_path}")
369
+ print("\nRun 'dvt compute edit' to create one.")
370
+ return False
371
+
372
+ print(f"Validating: {config_path}")
373
+ print("")
374
+
375
+ try:
376
+ # Try to load the YAML
377
+ import yaml
378
+ with open(config_path, "r") as f:
379
+ data = yaml.safe_load(f)
380
+
381
+ if not data:
382
+ print("✗ Config file is empty")
383
+ return False
384
+
385
+ errors = []
386
+ warnings = []
387
+
388
+ # Check target_compute
389
+ target = data.get("target_compute")
390
+ if not target:
391
+ errors.append("Missing 'target_compute' field")
392
+
393
+ # Check computes section
394
+ computes = data.get("computes", {})
395
+ if not computes:
396
+ errors.append("No compute engines defined in 'computes' section")
397
+ else:
398
+ # Validate each compute
399
+ for name, config in computes.items():
400
+ if config is None:
401
+ continue # Skip commented-out entries
402
+
403
+ if not isinstance(config, dict):
404
+ errors.append(f"Compute '{name}': invalid configuration (expected dict)")
405
+ continue
406
+
407
+ # Check type
408
+ compute_type = config.get("type")
409
+ if not compute_type:
410
+ errors.append(f"Compute '{name}': missing 'type' field")
411
+ elif compute_type not in ("spark",):
412
+ warnings.append(f"Compute '{name}': unknown type '{compute_type}' (only 'spark' supported)")
413
+
414
+ # Check config section
415
+ if "config" not in config:
416
+ warnings.append(f"Compute '{name}': no 'config' section (will use defaults)")
417
+
418
+ # Check target_compute references valid engine
419
+ if target and target not in computes:
420
+ errors.append(f"target_compute '{target}' not found in computes section")
421
+
422
+ # Print results
423
+ if errors:
424
+ print("Errors:")
425
+ for err in errors:
426
+ print(f" ✗ {err}")
427
+ print("")
428
+
429
+ if warnings:
430
+ print("Warnings:")
431
+ for warn in warnings:
432
+ print(f" ⚠ {warn}")
433
+ print("")
434
+
435
+ if not errors and not warnings:
436
+ print("✓ Configuration is valid")
437
+ print(f" Target compute: {target}")
438
+ print(f" Engines defined: {len([c for c in computes.values() if c])}")
439
+ return True
440
+
441
+ if not errors:
442
+ print("✓ Configuration is valid (with warnings)")
443
+ return True
444
+
445
+ return False
446
+
447
+ except yaml.YAMLError as e:
448
+ print(f"✗ YAML syntax error:")
449
+ print(f" {e}")
450
+ return False
451
+
452
+ except Exception as e:
453
+ print(f"✗ Validation failed: {e}")
454
+ return False