dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,689 @@
1
+ """
2
+ Java/PySpark Compatibility Module
3
+
4
+ Centralized logic for:
5
+ - Java installation detection (cross-platform)
6
+ - PySpark version detection
7
+ - Java/PySpark compatibility checking
8
+ - Spark cluster version detection
9
+
10
+ v0.51.3: New module for comprehensive Java/Spark management.
11
+ """
12
+
13
+ import glob
14
+ import os
15
+ import platform
16
+ import re
17
+ import subprocess
18
+ from dataclasses import dataclass
19
+ from typing import Dict, List, Optional, Set, Tuple
20
+
21
+
22
+ # =============================================================================
23
+ # COMPATIBILITY MATRIX
24
+ # =============================================================================
25
+ # PySpark major.minor -> Java requirements
26
+ # Based on Apache Spark official documentation
27
+
28
+ PYSPARK_JAVA_COMPATIBILITY: Dict[str, Dict] = {
29
+ "4.0": {"min": 17, "max": 21, "recommended": 17, "supported": [17, 21]},
30
+ "3.5": {"min": 8, "max": 17, "recommended": 17, "supported": [8, 11, 17]},
31
+ "3.4": {"min": 8, "max": 17, "recommended": 11, "supported": [8, 11, 17]},
32
+ "3.3": {"min": 8, "max": 11, "recommended": 11, "supported": [8, 11]},
33
+ "3.2": {"min": 8, "max": 11, "recommended": 11, "supported": [8, 11]},
34
+ "3.1": {"min": 8, "max": 11, "recommended": 11, "supported": [8, 11]},
35
+ "3.0": {"min": 8, "max": 11, "recommended": 8, "supported": [8, 11]},
36
+ }
37
+
38
+ # Available PySpark versions for interactive selection
39
+ PYSPARK_VERSIONS = [
40
+ ("4.0.1", "4.0", "latest"),
41
+ ("3.5.3", "3.5", "stable"),
42
+ ("3.4.3", "3.4", ""),
43
+ ("3.3.4", "3.3", ""),
44
+ ("3.2.4", "3.2", ""),
45
+ ]
46
+
47
+
48
+ # =============================================================================
49
+ # DATA CLASSES
50
+ # =============================================================================
51
+
52
+ @dataclass
53
+ class JavaInstallation:
54
+ """Represents a Java installation found on the system."""
55
+ path: str # JAVA_HOME path
56
+ version: int # Major version (8, 11, 17, 21)
57
+ version_string: str # Full version string (e.g., "openjdk version 17.0.1")
58
+ vendor: str # e.g., "OpenJDK", "Oracle", "Adoptium", "Amazon Corretto"
59
+ is_current: bool = False # True if this is the active JAVA_HOME
60
+
61
+
62
+ @dataclass
63
+ class PySparkInfo:
64
+ """PySpark installation information."""
65
+ version: str # Full version (e.g., "4.0.1")
66
+ major_minor: str # Major.minor (e.g., "4.0")
67
+ java_min: int
68
+ java_max: int
69
+ java_recommended: int
70
+ java_supported: List[int]
71
+
72
+
73
+ # =============================================================================
74
+ # JAVA DETECTION FUNCTIONS
75
+ # =============================================================================
76
+
77
+ def get_java_version(java_bin_path: str) -> Tuple[Optional[int], Optional[str], Optional[str]]:
78
+ """
79
+ Get Java major version, full version string, and vendor from java binary.
80
+
81
+ :param java_bin_path: Path to java binary
82
+ :returns: Tuple of (major_version, version_string, vendor) or (None, None, None)
83
+ """
84
+ try:
85
+ result = subprocess.run(
86
+ [java_bin_path, "-version"],
87
+ capture_output=True,
88
+ text=True,
89
+ timeout=10
90
+ )
91
+ version_output = result.stderr + result.stdout
92
+
93
+ # Parse version (e.g., "openjdk version \"21.0.5\"" or "java version \"1.8.0\"")
94
+ version_match = re.search(r'version "(\d+)\.?', version_output)
95
+ if not version_match:
96
+ return None, None, None
97
+
98
+ major = int(version_match.group(1))
99
+ # Handle old Java versioning (1.8 = Java 8)
100
+ if major == 1:
101
+ minor_match = re.search(r'version "1\.(\d+)', version_output)
102
+ if minor_match:
103
+ major = int(minor_match.group(1))
104
+
105
+ # Extract first line as version string
106
+ version_string = version_output.split('\n')[0].strip()
107
+
108
+ # Detect vendor
109
+ vendor = "Unknown"
110
+ lower_output = version_output.lower()
111
+ if "openjdk" in lower_output:
112
+ if "temurin" in lower_output or "adoptium" in lower_output:
113
+ vendor = "Eclipse Adoptium"
114
+ elif "corretto" in lower_output:
115
+ vendor = "Amazon Corretto"
116
+ elif "zulu" in lower_output:
117
+ vendor = "Azul Zulu"
118
+ elif "graalvm" in lower_output:
119
+ vendor = "GraalVM"
120
+ else:
121
+ vendor = "OpenJDK"
122
+ elif "java(tm)" in lower_output or "oracle" in lower_output:
123
+ vendor = "Oracle"
124
+ elif "ibm" in lower_output:
125
+ vendor = "IBM"
126
+
127
+ return major, version_string, vendor
128
+
129
+ except Exception:
130
+ return None, None, None
131
+
132
+
133
+ def get_current_java() -> Optional[JavaInstallation]:
134
+ """
135
+ Get the currently configured Java installation (from JAVA_HOME or PATH).
136
+
137
+ :returns: JavaInstallation or None if not found
138
+ """
139
+ import shutil
140
+
141
+ # Check JAVA_HOME first
142
+ java_home = os.environ.get("JAVA_HOME")
143
+ if java_home and os.path.exists(java_home):
144
+ java_bin = os.path.join(java_home, "bin", "java")
145
+ if platform.system() == "Windows":
146
+ java_bin += ".exe"
147
+ if os.path.exists(java_bin):
148
+ version, version_str, vendor = get_java_version(java_bin)
149
+ if version:
150
+ return JavaInstallation(
151
+ path=java_home,
152
+ version=version,
153
+ version_string=version_str or f"Java {version}",
154
+ vendor=vendor or "Unknown",
155
+ is_current=True
156
+ )
157
+
158
+ # Check PATH
159
+ java_bin = shutil.which("java")
160
+ if java_bin:
161
+ version, version_str, vendor = get_java_version(java_bin)
162
+ if version:
163
+ # Infer JAVA_HOME from binary location
164
+ java_home = os.path.dirname(os.path.dirname(os.path.realpath(java_bin)))
165
+ return JavaInstallation(
166
+ path=java_home,
167
+ version=version,
168
+ version_string=version_str or f"Java {version}",
169
+ vendor=vendor or "Unknown",
170
+ is_current=True
171
+ )
172
+
173
+ return None
174
+
175
+
176
+ def _check_java_path(path: str, is_current: bool = False) -> Optional[JavaInstallation]:
177
+ """
178
+ Check if a path contains a valid Java installation.
179
+
180
+ :param path: JAVA_HOME path to check
181
+ :param is_current: Whether this is the current JAVA_HOME
182
+ :returns: JavaInstallation or None
183
+ """
184
+ if not os.path.exists(path):
185
+ return None
186
+
187
+ java_bin = os.path.join(path, "bin", "java")
188
+ if platform.system() == "Windows":
189
+ java_bin += ".exe"
190
+
191
+ if not os.path.exists(java_bin):
192
+ return None
193
+
194
+ version, version_str, vendor = get_java_version(java_bin)
195
+ if version:
196
+ return JavaInstallation(
197
+ path=path,
198
+ version=version,
199
+ version_string=version_str or f"Java {version}",
200
+ vendor=vendor or "Unknown",
201
+ is_current=is_current
202
+ )
203
+ return None
204
+
205
+
206
+ def _find_java_macos() -> List[JavaInstallation]:
207
+ """Find Java installations on macOS."""
208
+ installations = []
209
+
210
+ # 1. Use java_home utility to list all
211
+ try:
212
+ result = subprocess.run(
213
+ ["/usr/libexec/java_home", "-V"],
214
+ capture_output=True,
215
+ text=True,
216
+ timeout=10
217
+ )
218
+ # Parse output - paths are at the end of each line
219
+ for line in result.stderr.split('\n'):
220
+ match = re.search(r'(/[^\s]+)$', line.strip())
221
+ if match:
222
+ path = match.group(1)
223
+ inst = _check_java_path(path)
224
+ if inst:
225
+ installations.append(inst)
226
+ except Exception:
227
+ pass
228
+
229
+ # 2. Search common paths
230
+ search_patterns = [
231
+ "/Library/Java/JavaVirtualMachines/*/Contents/Home",
232
+ "/usr/local/opt/openjdk@*/libexec/openjdk.jdk/Contents/Home",
233
+ "/usr/local/Cellar/openjdk@*/*/libexec/openjdk.jdk/Contents/Home",
234
+ "/opt/homebrew/opt/openjdk@*/libexec/openjdk.jdk/Contents/Home",
235
+ os.path.expanduser("~/.sdkman/candidates/java/*"),
236
+ os.path.expanduser("~/Library/Java/JavaVirtualMachines/*/Contents/Home"),
237
+ ]
238
+
239
+ for pattern in search_patterns:
240
+ try:
241
+ for path in glob.glob(pattern):
242
+ inst = _check_java_path(path)
243
+ if inst:
244
+ installations.append(inst)
245
+ except Exception:
246
+ continue
247
+
248
+ return installations
249
+
250
+
251
+ def _find_java_linux() -> List[JavaInstallation]:
252
+ """Find Java installations on Linux."""
253
+ installations = []
254
+
255
+ # 1. Check update-alternatives
256
+ try:
257
+ result = subprocess.run(
258
+ ["update-alternatives", "--list", "java"],
259
+ capture_output=True,
260
+ text=True,
261
+ timeout=10
262
+ )
263
+ for line in result.stdout.strip().split('\n'):
264
+ if line and os.path.exists(line):
265
+ # Path is typically /usr/lib/jvm/java-X-openjdk/bin/java
266
+ bin_path = os.path.dirname(line)
267
+ java_home = os.path.dirname(bin_path)
268
+ inst = _check_java_path(java_home)
269
+ if inst:
270
+ installations.append(inst)
271
+ except Exception:
272
+ pass
273
+
274
+ # 2. Search common paths
275
+ search_patterns = [
276
+ "/usr/lib/jvm/java-*-openjdk*",
277
+ "/usr/lib/jvm/jdk-*",
278
+ "/usr/lib/jvm/temurin-*",
279
+ "/usr/lib/jvm/adoptium-*",
280
+ "/usr/java/jdk*",
281
+ "/opt/java/*",
282
+ "/opt/jdk*",
283
+ os.path.expanduser("~/.sdkman/candidates/java/*"),
284
+ ]
285
+
286
+ for pattern in search_patterns:
287
+ try:
288
+ for path in glob.glob(pattern):
289
+ inst = _check_java_path(path)
290
+ if inst:
291
+ installations.append(inst)
292
+ except Exception:
293
+ continue
294
+
295
+ return installations
296
+
297
+
298
+ def _find_java_windows() -> List[JavaInstallation]:
299
+ """Find Java installations on Windows."""
300
+ installations = []
301
+
302
+ # 1. Check Windows Registry
303
+ try:
304
+ import winreg
305
+ reg_paths = [
306
+ (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\JavaSoft\Java Development Kit"),
307
+ (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\JavaSoft\JDK"),
308
+ (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Eclipse Adoptium\JDK"),
309
+ (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\AdoptOpenJDK\JDK"),
310
+ ]
311
+ for hkey, subkey in reg_paths:
312
+ try:
313
+ key = winreg.OpenKey(hkey, subkey)
314
+ i = 0
315
+ while True:
316
+ try:
317
+ version = winreg.EnumKey(key, i)
318
+ version_key = winreg.OpenKey(key, version)
319
+ java_home, _ = winreg.QueryValueEx(version_key, "JavaHome")
320
+ inst = _check_java_path(java_home)
321
+ if inst:
322
+ installations.append(inst)
323
+ i += 1
324
+ except OSError:
325
+ break
326
+ except FileNotFoundError:
327
+ pass
328
+ except ImportError:
329
+ pass
330
+
331
+ # 2. Search common paths
332
+ search_patterns = [
333
+ r"C:\Program Files\Java\jdk*",
334
+ r"C:\Program Files\Eclipse Adoptium\jdk-*",
335
+ r"C:\Program Files\OpenJDK\jdk-*",
336
+ r"C:\Program Files\AdoptOpenJDK\jdk-*",
337
+ r"C:\Program Files\Amazon Corretto\jdk*",
338
+ r"C:\Program Files\Zulu\zulu-*",
339
+ os.path.expanduser(r"~\scoop\apps\openjdk*\current"),
340
+ os.path.expanduser(r"~\scoop\apps\temurin*\current"),
341
+ ]
342
+
343
+ for pattern in search_patterns:
344
+ try:
345
+ for path in glob.glob(pattern):
346
+ inst = _check_java_path(path)
347
+ if inst:
348
+ installations.append(inst)
349
+ except Exception:
350
+ continue
351
+
352
+ return installations
353
+
354
+
355
+ def find_all_java_installations() -> List[JavaInstallation]:
356
+ """
357
+ Find ALL Java installations on the system (cross-platform).
358
+
359
+ :returns: List of JavaInstallation objects, sorted by version (newest first)
360
+ """
361
+ os_type = platform.system()
362
+ installations = []
363
+
364
+ # Get current Java first
365
+ current = get_current_java()
366
+ current_path = current.path if current else None
367
+
368
+ # Find all installations based on OS
369
+ if os_type == "Darwin":
370
+ installations = _find_java_macos()
371
+ elif os_type == "Linux":
372
+ installations = _find_java_linux()
373
+ elif os_type == "Windows":
374
+ installations = _find_java_windows()
375
+
376
+ # Mark current and remove duplicates
377
+ seen_paths = set()
378
+ unique = []
379
+ for inst in installations:
380
+ normalized_path = os.path.normpath(inst.path)
381
+ if normalized_path not in seen_paths:
382
+ seen_paths.add(normalized_path)
383
+ inst.is_current = (normalized_path == os.path.normpath(current_path)) if current_path else False
384
+ unique.append(inst)
385
+
386
+ # Add current if not already found
387
+ if current and os.path.normpath(current.path) not in seen_paths:
388
+ unique.append(current)
389
+
390
+ # Sort by version (newest first)
391
+ return sorted(unique, key=lambda x: x.version, reverse=True)
392
+
393
+
394
+ def _is_valid_java_home(path: str) -> bool:
395
+ """
396
+ Check if a path is a valid JAVA_HOME directory.
397
+
398
+ A valid JAVA_HOME should contain bin/java and not be a system path like /usr.
399
+ """
400
+ if not path:
401
+ return False
402
+
403
+ # Reject system paths that aren't proper JDK directories
404
+ invalid_paths = ["/usr", "/usr/local", "/"]
405
+ if os.path.normpath(path) in invalid_paths:
406
+ return False
407
+
408
+ # Check for bin/java or bin/java.exe
409
+ java_bin = os.path.join(path, "bin", "java")
410
+ if platform.system() == "Windows":
411
+ java_bin += ".exe"
412
+
413
+ return os.path.isfile(java_bin)
414
+
415
+
416
+ def select_best_java(installations: List[JavaInstallation], supported_versions: List[int]) -> Optional[JavaInstallation]:
417
+ """
418
+ Select the best Java installation from a list for given supported versions.
419
+
420
+ Prefers: (1) proper JDK directory, (2) highest supported version, (3) not current but valid
421
+
422
+ :param installations: List of JavaInstallation
423
+ :param supported_versions: List of supported Java major versions
424
+ :returns: Best JavaInstallation or None
425
+ """
426
+ if not installations or not supported_versions:
427
+ return None
428
+
429
+ # Filter to only compatible versions with valid JDK paths
430
+ compatible = [
431
+ inst for inst in installations
432
+ if inst.version in supported_versions and _is_valid_java_home(inst.path)
433
+ ]
434
+ if not compatible:
435
+ return None
436
+
437
+ # Prefer proper JDK directories (not /usr or similar)
438
+ # Sort by: highest version first
439
+ compatible.sort(key=lambda x: x.version, reverse=True)
440
+
441
+ # Return the highest version compatible Java with a valid path
442
+ return compatible[0]
443
+
444
+
445
+ # =============================================================================
446
+ # PYSPARK DETECTION FUNCTIONS
447
+ # =============================================================================
448
+
449
+ def get_pyspark_info() -> Optional[PySparkInfo]:
450
+ """
451
+ Get installed PySpark version and compatibility requirements.
452
+
453
+ :returns: PySparkInfo or None if PySpark not installed
454
+ """
455
+ try:
456
+ import importlib.metadata
457
+ version = importlib.metadata.version("pyspark")
458
+
459
+ # Extract major.minor
460
+ parts = version.split(".")
461
+ if len(parts) >= 2:
462
+ major_minor = f"{parts[0]}.{parts[1]}"
463
+ else:
464
+ major_minor = parts[0]
465
+
466
+ # Look up compatibility requirements
467
+ compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor)
468
+ if compat:
469
+ return PySparkInfo(
470
+ version=version,
471
+ major_minor=major_minor,
472
+ java_min=compat["min"],
473
+ java_max=compat["max"],
474
+ java_recommended=compat["recommended"],
475
+ java_supported=compat["supported"]
476
+ )
477
+ else:
478
+ # Unknown version - assume latest requirements
479
+ return PySparkInfo(
480
+ version=version,
481
+ major_minor=major_minor,
482
+ java_min=17,
483
+ java_max=21,
484
+ java_recommended=17,
485
+ java_supported=[17, 21]
486
+ )
487
+ except Exception:
488
+ return None
489
+
490
+
491
+ def get_pyspark_versions_for_java(java_version: int) -> List[str]:
492
+ """
493
+ Get list of PySpark versions compatible with a given Java version.
494
+
495
+ :param java_version: Java major version
496
+ :returns: List of PySpark major.minor versions
497
+ """
498
+ compatible = []
499
+ for pyspark_version, compat in PYSPARK_JAVA_COMPATIBILITY.items():
500
+ if java_version in compat["supported"]:
501
+ compatible.append(pyspark_version)
502
+ return sorted(compatible, reverse=True)
503
+
504
+
505
+ # =============================================================================
506
+ # COMPATIBILITY CHECKING
507
+ # =============================================================================
508
+
509
+ def check_java_pyspark_compatibility(java_version: int, pyspark_major_minor: str) -> Tuple[bool, str]:
510
+ """
511
+ Check if Java version is compatible with PySpark version.
512
+
513
+ :param java_version: Java major version (e.g., 17)
514
+ :param pyspark_major_minor: PySpark major.minor (e.g., "4.0")
515
+ :returns: Tuple of (is_compatible, message)
516
+ """
517
+ compat = PYSPARK_JAVA_COMPATIBILITY.get(pyspark_major_minor)
518
+ if not compat:
519
+ return True, f"Unknown PySpark version {pyspark_major_minor}, assuming compatible"
520
+
521
+ if java_version in compat["supported"]:
522
+ return True, f"Java {java_version} is compatible with PySpark {pyspark_major_minor}"
523
+
524
+ supported_str = ", ".join(str(v) for v in compat["supported"])
525
+ return False, f"Java {java_version} is NOT compatible with PySpark {pyspark_major_minor}. Requires Java {supported_str}."
526
+
527
+
528
+ def validate_java_for_spark() -> Tuple[bool, str]:
529
+ """
530
+ Validate that current Java is compatible with installed PySpark.
531
+
532
+ :returns: Tuple of (is_valid, message)
533
+ """
534
+ # Check PySpark
535
+ pyspark = get_pyspark_info()
536
+ if not pyspark:
537
+ return False, "PySpark is not installed. Install it with: pip install pyspark"
538
+
539
+ # Check Java
540
+ java = get_current_java()
541
+ if not java:
542
+ return False, f"Java not found. PySpark {pyspark.version} requires Java {pyspark.java_supported}. Run 'dvt java search' to find installations."
543
+
544
+ # Check compatibility
545
+ is_compat, msg = check_java_pyspark_compatibility(java.version, pyspark.major_minor)
546
+ if not is_compat:
547
+ return False, f"{msg} Run 'dvt java set' to select a compatible Java version."
548
+
549
+ return True, f"Java {java.version} is compatible with PySpark {pyspark.version}"
550
+
551
+
552
+ # =============================================================================
553
+ # CLUSTER VERSION DETECTION
554
+ # =============================================================================
555
+
556
+ def detect_spark_cluster_version(master_url: str, timeout: int = 30) -> Optional[str]:
557
+ """
558
+ Detect Spark version from a running cluster.
559
+
560
+ Connects to the cluster and queries spark.version.
561
+
562
+ :param master_url: Spark master URL (spark://host:port)
563
+ :param timeout: Connection timeout in seconds
564
+ :returns: Spark version string (e.g., "3.2.4") or None if detection fails
565
+ """
566
+ try:
567
+ from pyspark.sql import SparkSession
568
+ import concurrent.futures
569
+
570
+ def _detect():
571
+ spark = None
572
+ try:
573
+ spark = (SparkSession.builder
574
+ .appName("DVT-VersionDetect")
575
+ .master(master_url)
576
+ .config("spark.ui.enabled", "false")
577
+ .config("spark.ui.showConsoleProgress", "false")
578
+ .getOrCreate())
579
+ return spark.version
580
+ finally:
581
+ if spark:
582
+ try:
583
+ spark.stop()
584
+ except Exception:
585
+ pass
586
+
587
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
588
+ future = executor.submit(_detect)
589
+ return future.result(timeout=timeout)
590
+
591
+ except Exception:
592
+ return None
593
+
594
+
595
+ # =============================================================================
596
+ # SHELL CONFIG MODIFICATION
597
+ # =============================================================================
598
+
599
+ def get_shell_config_file() -> Tuple[Optional[str], str]:
600
+ """
601
+ Get the appropriate shell config file for the current OS and shell.
602
+
603
+ :returns: Tuple of (config_file_path, shell_name) or (None, "unknown")
604
+ """
605
+ os_type = platform.system()
606
+
607
+ if os_type == "Windows":
608
+ # PowerShell profile
609
+ profile = os.path.expandvars(r"$PROFILE")
610
+ if profile and "$" not in profile:
611
+ return profile, "PowerShell"
612
+ return None, "unknown"
613
+
614
+ # Unix-like (macOS, Linux)
615
+ shell = os.environ.get("SHELL", "")
616
+ home = os.path.expanduser("~")
617
+
618
+ if "zsh" in shell:
619
+ return os.path.join(home, ".zshrc"), "zsh"
620
+ elif "bash" in shell:
621
+ # Check for .bashrc first (Linux), then .bash_profile (macOS)
622
+ bashrc = os.path.join(home, ".bashrc")
623
+ if os.path.exists(bashrc):
624
+ return bashrc, "bash"
625
+ return os.path.join(home, ".bash_profile"), "bash"
626
+ elif "fish" in shell:
627
+ return os.path.join(home, ".config", "fish", "config.fish"), "fish"
628
+
629
+ # Default to .profile
630
+ return os.path.join(home, ".profile"), "sh"
631
+
632
+
633
+ def set_java_home_persistent(java_home: str) -> Tuple[bool, str]:
634
+ """
635
+ Set JAVA_HOME persistently by modifying shell config file.
636
+
637
+ :param java_home: Path to JAVA_HOME
638
+ :returns: Tuple of (success, message)
639
+ """
640
+ config_file, shell = get_shell_config_file()
641
+ if not config_file:
642
+ return False, f"Could not determine shell config file for {platform.system()}"
643
+
644
+ os_type = platform.system()
645
+
646
+ try:
647
+ # Create export line based on shell
648
+ if shell == "PowerShell":
649
+ export_line = f'$env:JAVA_HOME = "{java_home}"'
650
+ path_line = f'$env:PATH = "$env:JAVA_HOME\\bin;$env:PATH"'
651
+ lines_to_add = f"\n# DVT Java Configuration\n{export_line}\n{path_line}\n"
652
+ elif shell == "fish":
653
+ export_line = f'set -gx JAVA_HOME "{java_home}"'
654
+ path_line = 'set -gx PATH "$JAVA_HOME/bin" $PATH'
655
+ lines_to_add = f"\n# DVT Java Configuration\n{export_line}\n{path_line}\n"
656
+ else:
657
+ export_line = f'export JAVA_HOME="{java_home}"'
658
+ path_line = 'export PATH="$JAVA_HOME/bin:$PATH"'
659
+ lines_to_add = f"\n# DVT Java Configuration\n{export_line}\n{path_line}\n"
660
+
661
+ # Read existing config
662
+ existing_content = ""
663
+ if os.path.exists(config_file):
664
+ with open(config_file, "r") as f:
665
+ existing_content = f.read()
666
+
667
+ # Check if we already have a DVT Java section
668
+ if "# DVT Java Configuration" in existing_content:
669
+ # Replace existing section
670
+ pattern = r"# DVT Java Configuration\n[^\n]+\n[^\n]+\n"
671
+ new_content = re.sub(pattern, lines_to_add.lstrip("\n"), existing_content)
672
+ else:
673
+ # Append to file
674
+ new_content = existing_content + lines_to_add
675
+
676
+ # Write back
677
+ os.makedirs(os.path.dirname(config_file), exist_ok=True)
678
+ with open(config_file, "w") as f:
679
+ f.write(new_content)
680
+
681
+ # Also set for current session
682
+ os.environ["JAVA_HOME"] = java_home
683
+ bin_path = os.path.join(java_home, "bin")
684
+ os.environ["PATH"] = bin_path + os.pathsep + os.environ.get("PATH", "")
685
+
686
+ return True, f"Updated {config_file}. Run 'source {config_file}' or restart terminal."
687
+
688
+ except Exception as e:
689
+ return False, f"Failed to update {config_file}: {str(e)}"