dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
dbt/task/spark.py ADDED
@@ -0,0 +1,414 @@
1
+ """
2
+ Spark Task Module
3
+
4
+ Handles DVT spark management commands:
5
+ - check: Show PySpark version, Java compatibility, and cluster info
6
+ - set-version: Interactive selection to install PySpark version
7
+ - match-cluster: Detect cluster version and suggest compatible PySpark
8
+
9
+ v0.51.3: New module for comprehensive Spark management.
10
+ """
11
+
12
+ import os
13
+ import subprocess
14
+ import sys
15
+ from typing import Optional, Tuple
16
+
17
+ import click
18
+
19
+ from dbt.compute.java_compat import (
20
+ PYSPARK_JAVA_COMPATIBILITY,
21
+ PYSPARK_VERSIONS,
22
+ check_java_pyspark_compatibility,
23
+ detect_spark_cluster_version,
24
+ get_current_java,
25
+ get_pyspark_info,
26
+ )
27
+
28
+
29
+ class SparkTask:
30
+ """Task for managing Spark/PySpark installations."""
31
+
32
+ def check(self) -> bool:
33
+ """
34
+ Check PySpark installation, Java compatibility, and show status.
35
+
36
+ Returns:
37
+ bool: True if PySpark is installed and Java is compatible
38
+ """
39
+ click.echo()
40
+ click.echo(click.style("PySpark Status", fg="cyan", bold=True))
41
+ click.echo("-" * 40)
42
+
43
+ # Get PySpark info
44
+ pyspark = get_pyspark_info()
45
+ if pyspark:
46
+ click.echo(f" Version: {pyspark.version}")
47
+ click.echo(f" Major.Minor: {pyspark.major_minor}")
48
+ click.echo(f" Required Java: {', '.join(str(v) for v in pyspark.java_supported)}")
49
+ click.echo(f" Recommended: Java {pyspark.java_recommended}")
50
+ else:
51
+ click.echo(click.style(" ✗ PySpark not installed!", fg="red"))
52
+ click.echo()
53
+ click.echo(" Install with: pip install pyspark")
54
+ click.echo(" Or run 'dvt spark set-version' to select a version")
55
+ click.echo()
56
+ return False
57
+
58
+ click.echo()
59
+ click.echo(click.style("Java Compatibility", fg="cyan", bold=True))
60
+ click.echo("-" * 40)
61
+
62
+ # Get current Java
63
+ java = get_current_java()
64
+ if java:
65
+ click.echo(f" JAVA_HOME: {java.path}")
66
+ click.echo(f" Version: Java {java.version}")
67
+ click.echo(f" Vendor: {java.vendor}")
68
+
69
+ # Check compatibility
70
+ is_compat, msg = check_java_pyspark_compatibility(java.version, pyspark.major_minor)
71
+ click.echo()
72
+ if is_compat:
73
+ click.echo(click.style(f" ✓ {msg}", fg="green"))
74
+ else:
75
+ click.echo(click.style(f" ✗ {msg}", fg="red"))
76
+ click.echo()
77
+ click.echo(" Run 'dvt java set' to select a compatible Java version")
78
+ else:
79
+ click.echo(click.style(" ✗ Java not found!", fg="red"))
80
+ click.echo()
81
+ supported = ", ".join(str(v) for v in pyspark.java_supported)
82
+ click.echo(f" PySpark {pyspark.version} requires Java {supported}")
83
+ click.echo()
84
+ click.echo(" Run 'dvt java search' to find Java installations")
85
+ click.echo(" Run 'dvt java install' for installation guide")
86
+ click.echo()
87
+ return False
88
+
89
+ click.echo()
90
+ return is_compat if java else False
91
+
92
+ def set_version(self) -> bool:
93
+ """
94
+ Interactive selection to install a specific PySpark version.
95
+
96
+ Presents available PySpark versions with Java requirements,
97
+ then installs the selected version via pip.
98
+
99
+ Returns:
100
+ bool: True if installation successful
101
+ """
102
+ click.echo()
103
+ click.echo(click.style("Select PySpark Version to Install", fg="cyan", bold=True))
104
+ click.echo("=" * 50)
105
+ click.echo()
106
+
107
+ # Get current Java for compatibility display
108
+ java = get_current_java()
109
+ current_java_version = java.version if java else None
110
+
111
+ # Display available versions
112
+ for i, (version, major_minor, tag) in enumerate(PYSPARK_VERSIONS, 1):
113
+ compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor, {})
114
+ supported = compat.get("supported", [])
115
+ supported_str = ", ".join(str(v) for v in supported)
116
+
117
+ # Tag display
118
+ if tag == "latest":
119
+ tag_display = click.style(" (latest)", fg="green")
120
+ elif tag == "stable":
121
+ tag_display = click.style(" (stable)", fg="blue")
122
+ else:
123
+ tag_display = ""
124
+
125
+ # Compatibility indicator
126
+ if current_java_version and supported:
127
+ if current_java_version in supported:
128
+ compat_marker = click.style(" ✓", fg="green")
129
+ else:
130
+ compat_marker = click.style(" ✗", fg="red")
131
+ else:
132
+ compat_marker = ""
133
+
134
+ click.echo(f" [{i}] PySpark {version}{tag_display}{compat_marker}")
135
+ click.echo(f" Requires Java: {supported_str}")
136
+ click.echo()
137
+
138
+ click.echo(f" [{len(PYSPARK_VERSIONS) + 1}] Custom version")
139
+ click.echo()
140
+
141
+ # Show current Java info
142
+ if java:
143
+ click.echo(f" Current Java: {java.version} ({java.vendor})")
144
+ click.echo(f" ✓ = compatible with your Java, ✗ = incompatible")
145
+ click.echo()
146
+
147
+ # Get user choice
148
+ while True:
149
+ try:
150
+ choice = click.prompt("Your choice", type=int)
151
+ if 1 <= choice <= len(PYSPARK_VERSIONS) + 1:
152
+ break
153
+ click.echo(click.style(f"Please enter a number between 1 and {len(PYSPARK_VERSIONS) + 1}", fg="yellow"))
154
+ except click.Abort:
155
+ click.echo("\nAborted.")
156
+ return False
157
+
158
+ # Determine version to install
159
+ if choice <= len(PYSPARK_VERSIONS):
160
+ version_to_install, major_minor, _ = PYSPARK_VERSIONS[choice - 1]
161
+ else:
162
+ # Custom version
163
+ version_to_install = click.prompt("Enter PySpark version (e.g., 3.4.1)")
164
+ parts = version_to_install.split(".")
165
+ major_minor = f"{parts[0]}.{parts[1]}" if len(parts) >= 2 else parts[0]
166
+
167
+ # Check Java compatibility before installing
168
+ compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor, {})
169
+ supported = compat.get("supported", [])
170
+
171
+ if java and supported and java.version not in supported:
172
+ click.echo()
173
+ click.echo(click.style(f"⚠️ Warning: PySpark {major_minor} requires Java {', '.join(str(v) for v in supported)}", fg="yellow"))
174
+ click.echo(f" Your current Java: {java.version}")
175
+ click.echo()
176
+ if not click.confirm("Install anyway? (You'll need to switch Java versions)"):
177
+ return False
178
+
179
+ # Install PySpark
180
+ click.echo()
181
+ click.echo(f"Installing PySpark {version_to_install}...")
182
+ click.echo()
183
+
184
+ try:
185
+ # Use pip to install
186
+ cmd = [sys.executable, "-m", "pip", "install", f"pyspark=={version_to_install}"]
187
+ result = subprocess.run(cmd, capture_output=False, text=True)
188
+
189
+ if result.returncode == 0:
190
+ click.echo()
191
+ click.echo(click.style(f"✓ PySpark {version_to_install} installed successfully", fg="green"))
192
+
193
+ # Post-install compatibility check
194
+ if java and supported and java.version not in supported:
195
+ click.echo()
196
+ click.echo(click.style("⚠️ Java compatibility note:", fg="yellow"))
197
+ click.echo(f" PySpark {major_minor} requires Java {', '.join(str(v) for v in supported)}")
198
+ click.echo(f" Run 'dvt java set' to select a compatible Java version")
199
+
200
+ click.echo()
201
+ return True
202
+ else:
203
+ click.echo()
204
+ click.echo(click.style(f"✗ Failed to install PySpark {version_to_install}", fg="red"))
205
+ click.echo()
206
+ return False
207
+
208
+ except Exception as e:
209
+ click.echo()
210
+ click.echo(click.style(f"✗ Installation error: {str(e)}", fg="red"))
211
+ click.echo()
212
+ return False
213
+
214
+ def match_cluster(self, compute_name: str) -> bool:
215
+ """
216
+ Detect Spark version from a cluster and suggest compatible PySpark.
217
+
218
+ Reads the compute configuration from computes.yml, connects to the
219
+ cluster, and compares versions with locally installed PySpark.
220
+
221
+ Args:
222
+ compute_name: Name of compute engine in computes.yml
223
+
224
+ Returns:
225
+ bool: True if versions match, False if mismatch or error
226
+ """
227
+ from dbt.config.compute import ComputeRegistry
228
+
229
+ click.echo()
230
+ click.echo(click.style(f"Checking cluster compatibility: {compute_name}", fg="cyan", bold=True))
231
+ click.echo("=" * 50)
232
+ click.echo()
233
+
234
+ # Load compute configuration
235
+ try:
236
+ registry = ComputeRegistry()
237
+ compute_cluster = registry.get(compute_name)
238
+ if not compute_cluster:
239
+ click.echo(click.style(f"✗ Compute '{compute_name}' not found in computes.yml", fg="red"))
240
+ click.echo()
241
+ click.echo(" Run 'dvt compute list' to see available compute engines")
242
+ click.echo()
243
+ return False
244
+ except Exception as e:
245
+ click.echo(click.style(f"✗ Error loading compute config: {str(e)}", fg="red"))
246
+ click.echo()
247
+ return False
248
+
249
+ # Get cluster info (ComputeCluster has config attribute)
250
+ config = compute_cluster.config if hasattr(compute_cluster, 'config') else {}
251
+ master_url = config.get("master")
252
+ host = config.get("host")
253
+ compute_type = compute_cluster.type if hasattr(compute_cluster, 'type') else 'spark'
254
+
255
+ click.echo(f" Compute: {compute_name}")
256
+ click.echo(f" Type: {compute_type}")
257
+ if master_url:
258
+ click.echo(f" Master URL: {master_url}")
259
+ if host:
260
+ click.echo(f" Host: {host}")
261
+
262
+ # Detect cluster version
263
+ click.echo()
264
+ click.echo("Connecting to cluster...")
265
+
266
+ cluster_version = None
267
+ if master_url and master_url.startswith("spark://"):
268
+ # Standalone cluster
269
+ cluster_version = detect_spark_cluster_version(master_url)
270
+ elif master_url == "local[*]" or (master_url and master_url.startswith("local")):
271
+ # Local mode - just use PySpark version
272
+ pyspark = get_pyspark_info()
273
+ if pyspark:
274
+ cluster_version = pyspark.version
275
+ click.echo(click.style(" (Local mode - using PySpark version)", fg="blue"))
276
+ elif host and "databricks" in host.lower():
277
+ # Databricks - requires databricks-connect
278
+ click.echo(click.style(" Databricks cluster detected", fg="blue"))
279
+ click.echo(" Note: Version detection requires active connection")
280
+ # Try to get version via Databricks Connect if installed
281
+ try:
282
+ from databricks.connect import DatabricksSession
283
+ # We can't actually connect without full config, so just note it
284
+ click.echo(" Run 'dvt compute test {compute_name}' to verify connectivity")
285
+ except ImportError:
286
+ click.echo(" Install databricks-connect for full support")
287
+
288
+ if cluster_version:
289
+ click.echo()
290
+ click.echo(click.style("Cluster Information", fg="cyan", bold=True))
291
+ click.echo("-" * 40)
292
+ click.echo(f" Spark Version: {cluster_version}")
293
+
294
+ # Extract major.minor
295
+ parts = cluster_version.split(".")
296
+ cluster_major_minor = f"{parts[0]}.{parts[1]}" if len(parts) >= 2 else parts[0]
297
+
298
+ # Compare with local PySpark
299
+ pyspark = get_pyspark_info()
300
+ click.echo()
301
+ click.echo(click.style("Version Comparison", fg="cyan", bold=True))
302
+ click.echo("-" * 40)
303
+
304
+ if pyspark:
305
+ click.echo(f" Driver (local): PySpark {pyspark.version}")
306
+ click.echo(f" Cluster: Spark {cluster_version}")
307
+
308
+ if pyspark.major_minor == cluster_major_minor:
309
+ click.echo()
310
+ click.echo(click.style(" ✓ Versions match!", fg="green"))
311
+ click.echo()
312
+ return True
313
+ else:
314
+ click.echo()
315
+ click.echo(click.style(" ✗ VERSION MISMATCH!", fg="red", bold=True))
316
+ click.echo()
317
+ click.echo(f" Driver (local): PySpark {pyspark.major_minor}")
318
+ click.echo(f" Cluster: Spark {cluster_major_minor}")
319
+ click.echo()
320
+ click.echo(click.style("Recommendation:", fg="yellow"))
321
+ click.echo(f" Run 'dvt spark set-version' and select PySpark {cluster_major_minor}.x")
322
+ click.echo()
323
+
324
+ # Check Java requirements for target version
325
+ target_compat = PYSPARK_JAVA_COMPATIBILITY.get(cluster_major_minor)
326
+ if target_compat:
327
+ java = get_current_java()
328
+ supported = target_compat["supported"]
329
+ click.echo(click.style("Java Note:", fg="yellow"))
330
+ click.echo(f" PySpark {cluster_major_minor} requires Java {', '.join(str(v) for v in supported)}")
331
+ if java:
332
+ if java.version in supported:
333
+ click.echo(f" Current Java {java.version} is compatible ✓")
334
+ else:
335
+ click.echo(f" Current Java {java.version} is NOT compatible")
336
+ click.echo(f" Run 'dvt java set' to select a compatible version")
337
+ click.echo()
338
+
339
+ return False
340
+ else:
341
+ click.echo(click.style(" ✗ PySpark not installed locally", fg="red"))
342
+ click.echo()
343
+ click.echo(f" Run 'dvt spark set-version' and select PySpark {cluster_major_minor}.x")
344
+ click.echo()
345
+ return False
346
+ else:
347
+ click.echo()
348
+ click.echo(click.style(" ⚠️ Could not detect cluster version", fg="yellow"))
349
+ click.echo()
350
+ click.echo(" Possible reasons:")
351
+ click.echo(" - Cluster is not running")
352
+ click.echo(" - Network connectivity issues")
353
+ click.echo(" - Firewall blocking connection")
354
+ click.echo()
355
+ click.echo(" Try:")
356
+ click.echo(f" - Start the cluster")
357
+ click.echo(f" - Run 'dvt compute test {compute_name}' to verify connectivity")
358
+ click.echo()
359
+ return False
360
+
361
+ def show_versions(self) -> None:
362
+ """
363
+ Display PySpark/Java compatibility matrix.
364
+
365
+ Shows all available PySpark versions and their Java requirements.
366
+ """
367
+ click.echo()
368
+ click.echo(click.style("PySpark/Java Compatibility Matrix", fg="cyan", bold=True))
369
+ click.echo("=" * 60)
370
+ click.echo()
371
+
372
+ # Get current versions
373
+ pyspark = get_pyspark_info()
374
+ java = get_current_java()
375
+
376
+ click.echo("Available PySpark Versions:")
377
+ click.echo()
378
+
379
+ for version, major_minor, tag in PYSPARK_VERSIONS:
380
+ compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor, {})
381
+ supported = compat.get("supported", [])
382
+ recommended = compat.get("recommended", supported[0] if supported else "?")
383
+
384
+ # Current marker
385
+ current_marker = ""
386
+ if pyspark and pyspark.version == version:
387
+ current_marker = click.style(" * INSTALLED", fg="green")
388
+
389
+ # Tag
390
+ if tag == "latest":
391
+ tag_display = click.style(" (latest)", fg="green")
392
+ elif tag == "stable":
393
+ tag_display = click.style(" (stable)", fg="blue")
394
+ else:
395
+ tag_display = ""
396
+
397
+ click.echo(f" PySpark {version}{tag_display}{current_marker}")
398
+ click.echo(f" Java Required: {', '.join(str(v) for v in supported)}")
399
+ click.echo(f" Java Recommended: {recommended}")
400
+ click.echo()
401
+
402
+ # Show current status
403
+ click.echo("-" * 60)
404
+ click.echo()
405
+ click.echo("Current Environment:")
406
+ if pyspark:
407
+ click.echo(f" PySpark: {pyspark.version}")
408
+ else:
409
+ click.echo(" PySpark: not installed")
410
+ if java:
411
+ click.echo(f" Java: {java.version} ({java.vendor})")
412
+ else:
413
+ click.echo(" Java: not found")
414
+ click.echo()
dbt/task/sql.py ADDED
@@ -0,0 +1,110 @@
1
+ import traceback
2
+ from abc import abstractmethod
3
+ from datetime import datetime, timezone
4
+ from typing import Generic, TypeVar
5
+
6
+ import dbt.exceptions
7
+ import dbt_common.exceptions.base
8
+ from dbt.contracts.graph.manifest import Manifest
9
+ from dbt.contracts.sql import (
10
+ RemoteCompileResult,
11
+ RemoteCompileResultMixin,
12
+ RemoteRunResult,
13
+ ResultTable,
14
+ )
15
+ from dbt.events.types import SQLRunnerException
16
+ from dbt.task.compile import CompileRunner
17
+ from dbt_common.events.functions import fire_event
18
+
19
+ SQLResult = TypeVar("SQLResult", bound=RemoteCompileResultMixin)
20
+
21
+
22
+ class GenericSqlRunner(CompileRunner, Generic[SQLResult]):
23
+ def __init__(self, config, adapter, node, node_index, num_nodes) -> None:
24
+ CompileRunner.__init__(self, config, adapter, node, node_index, num_nodes)
25
+
26
+ def handle_exception(self, e, ctx):
27
+ fire_event(
28
+ SQLRunnerException(
29
+ exc=str(e), exc_info=traceback.format_exc(), node_info=self.node.node_info
30
+ )
31
+ )
32
+ # REVIEW: This code is invalid and will always throw.
33
+ if isinstance(e, dbt.exceptions.Exception):
34
+ if isinstance(e, dbt_common.exceptions.DbtRuntimeError):
35
+ e.add_node(ctx.node)
36
+ return e
37
+
38
+ def before_execute(self) -> None:
39
+ pass
40
+
41
+ def after_execute(self, result) -> None:
42
+ pass
43
+
44
+ def compile(self, manifest: Manifest):
45
+ return self.compiler.compile_node(self.node, manifest, {}, write=False)
46
+
47
+ @abstractmethod
48
+ def execute(self, compiled_node, manifest) -> SQLResult:
49
+ pass
50
+
51
+ @abstractmethod
52
+ def from_run_result(self, result, start_time, timing_info) -> SQLResult:
53
+ pass
54
+
55
+ def error_result(self, node, error, start_time, timing_info):
56
+ raise error
57
+
58
+ def ephemeral_result(self, node, start_time, timing_info):
59
+ raise dbt_common.exceptions.base.NotImplementedError(
60
+ "cannot execute ephemeral nodes remotely!"
61
+ )
62
+
63
+
64
+ class SqlCompileRunner(GenericSqlRunner[RemoteCompileResult]):
65
+ def execute(self, compiled_node, manifest) -> RemoteCompileResult:
66
+ return RemoteCompileResult(
67
+ raw_code=compiled_node.raw_code,
68
+ compiled_code=compiled_node.compiled_code,
69
+ node=compiled_node,
70
+ timing=[], # this will get added later
71
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
72
+ )
73
+
74
+ def from_run_result(self, result, start_time, timing_info) -> RemoteCompileResult:
75
+ return RemoteCompileResult(
76
+ raw_code=result.raw_code,
77
+ compiled_code=result.compiled_code,
78
+ node=result.node,
79
+ timing=timing_info,
80
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
81
+ )
82
+
83
+
84
+ class SqlExecuteRunner(GenericSqlRunner[RemoteRunResult]):
85
+ def execute(self, compiled_node, manifest) -> RemoteRunResult:
86
+ _, execute_result = self.adapter.execute(compiled_node.compiled_code, fetch=True)
87
+
88
+ table = ResultTable(
89
+ column_names=list(execute_result.column_names),
90
+ rows=[list(row) for row in execute_result],
91
+ )
92
+
93
+ return RemoteRunResult(
94
+ raw_code=compiled_node.raw_code,
95
+ compiled_code=compiled_node.compiled_code,
96
+ node=compiled_node,
97
+ table=table,
98
+ timing=[],
99
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
100
+ )
101
+
102
+ def from_run_result(self, result, start_time, timing_info) -> RemoteRunResult:
103
+ return RemoteRunResult(
104
+ raw_code=result.raw_code,
105
+ compiled_code=result.compiled_code,
106
+ node=result.node,
107
+ table=result.table,
108
+ timing=timing_info,
109
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
110
+ )