dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
@@ -0,0 +1,443 @@
1
+ """
2
+ Local Spark Connection Strategy
3
+
4
+ Provides embedded PySpark session for local development and testing.
5
+ This is the default strategy extracted from the original SparkEngine implementation.
6
+
7
+ Includes auto-configuration of Java with PySpark compatibility checking.
8
+
9
+ v0.51.3: Refactored to use java_compat module for centralized Java/PySpark compatibility.
10
+ v0.5.98: Added JAR provisioning using local file paths (spark.jars).
11
+ v0.58.5: Fixed Java 21 segfaults by NOT loading jdk.incubator.vector module.
12
+ """
13
+
14
+ import os
15
+ from typing import Dict, Optional, Set, Tuple
16
+
17
+ from dbt.compute.strategies.base import BaseConnectionStrategy
18
+ from dbt_common.exceptions import DbtRuntimeError
19
+
20
+ try:
21
+ from pyspark.sql import SparkSession
22
+
23
+ PYSPARK_AVAILABLE = True
24
+ except ImportError:
25
+ PYSPARK_AVAILABLE = False
26
+ SparkSession = None
27
+
28
+ # Global Spark session cache for reuse across calls (within same process)
29
+ _SPARK_SESSION_CACHE = {}
30
+
31
+ # Thread lock for safe session management
32
+ import threading
33
+ _SPARK_SESSION_LOCK = threading.Lock()
34
+
35
+
36
+ def cleanup_all_spark_sessions():
37
+ """
38
+ Clean up ALL cached Spark sessions.
39
+
40
+ DVT v0.58.4: Call this at the end of runs to prevent semaphore leaks
41
+ and segfaults when the thread pool terminates.
42
+
43
+ Thread-safe - uses lock for cache access.
44
+ """
45
+ global _SPARK_SESSION_CACHE
46
+
47
+ with _SPARK_SESSION_LOCK:
48
+ for cache_key, spark in list(_SPARK_SESSION_CACHE.items()):
49
+ try:
50
+ spark.stop()
51
+ except Exception:
52
+ pass # Best effort cleanup
53
+ _SPARK_SESSION_CACHE.clear()
54
+
55
+
56
+ def _disable_multiprocessing_resource_tracker():
57
+ """
58
+ Disable Python's multiprocessing resource tracker to prevent segfaults.
59
+
60
+ DVT v0.58.5: PySpark 4.0 + Java 21 creates semaphores that conflict with
61
+ Python's resource tracker during shutdown, causing segfaults on macOS.
62
+ Disabling the tracker prevents these conflicts.
63
+ """
64
+ import multiprocessing
65
+ try:
66
+ # Disable resource tracking for semaphores
67
+ from multiprocessing import resource_tracker
68
+ # Replace the tracker's main function with a no-op
69
+ resource_tracker._resource_tracker = None
70
+ resource_tracker._fd = None
71
+ except Exception:
72
+ pass # Best effort - if it fails, continue anyway
73
+
74
+
75
+ def _ensure_java_available():
76
+ """
77
+ Ensure Java is available and compatible with installed PySpark.
78
+
79
+ Uses the centralized java_compat module for cross-platform Java detection
80
+ and PySpark compatibility checking.
81
+
82
+ v0.51.3: Refactored to use java_compat module with enhanced compatibility checking.
83
+ Always sets JAVA_HOME to a proper JDK path (not /usr or invalid paths).
84
+ """
85
+ from dbt.compute.java_compat import (
86
+ get_pyspark_info,
87
+ find_all_java_installations,
88
+ select_best_java,
89
+ )
90
+
91
+ # Get PySpark requirements
92
+ pyspark = get_pyspark_info()
93
+ if not pyspark:
94
+ raise DbtRuntimeError(
95
+ "PySpark is not installed. Install it with: pip install pyspark\n"
96
+ "Or run 'dvt spark set-version' to select a specific version."
97
+ )
98
+
99
+ # Always search for Java installations and select the best one
100
+ # This ensures JAVA_HOME is set to a proper JDK path (not /usr or invalid)
101
+ all_java = find_all_java_installations()
102
+ best_java = select_best_java(all_java, pyspark.java_supported)
103
+
104
+ if best_java:
105
+ # Set JAVA_HOME to the best compatible Java found
106
+ # This is needed even if Java is in PATH because PySpark's scripts
107
+ # rely on JAVA_HOME being set to a proper JDK directory
108
+ os.environ["JAVA_HOME"] = best_java.path
109
+ bin_path = os.path.join(best_java.path, "bin")
110
+ # Prepend to PATH to ensure this Java is used
111
+ os.environ["PATH"] = bin_path + os.pathsep + os.environ.get("PATH", "")
112
+ return
113
+
114
+ # No compatible Java found - show error with guidance
115
+ supported_str = ", ".join(str(v) for v in pyspark.java_supported)
116
+ raise DbtRuntimeError(
117
+ f"No compatible Java found for PySpark {pyspark.version}.\n"
118
+ f"PySpark {pyspark.major_minor} requires Java {supported_str}.\n\n"
119
+ f"Run 'dvt java search' to find Java installations.\n"
120
+ f"Run 'dvt java set' to select a compatible version.\n"
121
+ f"Run 'dvt java install' for installation guide."
122
+ )
123
+
124
+
125
+ class LocalStrategy(BaseConnectionStrategy):
126
+ """
127
+ Local embedded Spark strategy.
128
+
129
+ Creates an in-process PySpark session with local[*] master.
130
+ Best for development, testing, and small-medium workloads.
131
+
132
+ Configuration:
133
+ {
134
+ "master": "local[*]", # optional, defaults to local[*]
135
+ "spark.driver.memory": "4g", # optional
136
+ "spark.executor.memory": "4g", # optional
137
+ # ... any other Spark configs
138
+ }
139
+ """
140
+
141
+ def validate_config(self) -> None:
142
+ """
143
+ Validate local strategy configuration.
144
+
145
+ Local strategy is flexible - no required fields.
146
+ """
147
+ # Local strategy accepts any config - very flexible
148
+ # Just ensure it's a dictionary
149
+ if not isinstance(self.config, dict):
150
+ raise DbtRuntimeError(
151
+ f"Local Spark config must be a dictionary, got {type(self.config)}"
152
+ )
153
+
154
+ def get_spark_session(self, adapter_types: Optional[Set[str]] = None) -> SparkSession:
155
+ """
156
+ Create or reuse local Spark session (BLAZING FAST).
157
+
158
+ Creates an embedded PySpark session with optimized configuration for speed.
159
+ Implements session caching to reuse existing sessions.
160
+
161
+ DVT v0.5.3: Uses direct JAR paths instead of spark.jars.packages to avoid
162
+ verbose Ivy output. JARs are downloaded once and cached in ~/.dvt/jdbc_jars/
163
+
164
+ :param adapter_types: Set of adapter types that need JDBC drivers (optional, for API compatibility)
165
+ :returns: Initialized SparkSession
166
+ :raises DbtRuntimeError: If session creation fails
167
+ """
168
+ import sys
169
+ import hashlib
170
+
171
+ if not PYSPARK_AVAILABLE:
172
+ raise DbtRuntimeError("PySpark is not available. Install it with: pip install pyspark")
173
+
174
+ # DVT v0.58.5: Disable resource tracker BEFORE any JVM operations
175
+ # This prevents segfaults caused by semaphore cleanup conflicts
176
+ _disable_multiprocessing_resource_tracker()
177
+
178
+ # DVT v0.58.5: Set JVM options BEFORE Java starts to disable vector API
179
+ # This must happen before PySpark creates the JVM
180
+ _java_opts = os.environ.get("_JAVA_OPTIONS", "")
181
+ if "-XX:-UseVectorCmov" not in _java_opts:
182
+ os.environ["_JAVA_OPTIONS"] = f"{_java_opts} -XX:-UseVectorCmov -XX:-UseSIMDForMemoryOps -XX:+IgnoreUnrecognizedVMOptions".strip()
183
+
184
+ # Auto-configure Java first
185
+ _ensure_java_available()
186
+
187
+ # Create cache key from config to reuse sessions with same configuration
188
+ config_str = str(sorted(self.config.items()))
189
+ cache_key = hashlib.md5(config_str.encode()).hexdigest()
190
+
191
+ # Thread-safe session management
192
+ with _SPARK_SESSION_LOCK:
193
+ # Check if we have a cached session with this config
194
+ if cache_key in _SPARK_SESSION_CACHE:
195
+ cached_spark = _SPARK_SESSION_CACHE[cache_key]
196
+ # Verify session is still active
197
+ try:
198
+ cached_spark.sparkContext.getConf() # Will fail if session is dead
199
+ return cached_spark
200
+ except Exception:
201
+ # Session died, remove from cache
202
+ del _SPARK_SESSION_CACHE[cache_key]
203
+
204
+ # v0.51.0: Stop any existing session with DIFFERENT config
205
+ # This ensures we get correct spark.jars.packages for this strategy
206
+ try:
207
+ existing = SparkSession.getActiveSession()
208
+ if existing:
209
+ existing.stop()
210
+ # Clear the global cache too
211
+ _SPARK_SESSION_CACHE.clear()
212
+ except Exception:
213
+ pass
214
+
215
+ # DVT v0.5.3: Suppress Java/Spark startup warnings completely
216
+ # Create a custom log4j2 config to silence Spark startup noise
217
+ import tempfile
218
+ log4j_config = """
219
+ status = error
220
+ appender.console.type = Console
221
+ appender.console.name = console
222
+ appender.console.layout.type = PatternLayout
223
+ appender.console.layout.pattern = %msg%n
224
+ rootLogger.level = error
225
+ rootLogger.appenderRef.console.ref = console
226
+ logger.spark.name = org.apache.spark
227
+ logger.spark.level = error
228
+ logger.hadoop.name = org.apache.hadoop
229
+ logger.hadoop.level = error
230
+ """
231
+ log4j_file = os.path.join(tempfile.gettempdir(), "dvt_log4j2.properties")
232
+ with open(log4j_file, "w") as f:
233
+ f.write(log4j_config)
234
+
235
+ # Use persistent JAR cache in project directory
236
+ dvt_home = os.path.expanduser("~/.dvt")
237
+ jar_cache_dir = os.path.join(dvt_home, "jdbc_jars")
238
+ os.makedirs(jar_cache_dir, exist_ok=True)
239
+
240
+ # DVT v0.5.3: Get cached JDBC jars (from project dir, not home dir)
241
+ jar_paths = self._get_jdbc_jars(jar_cache_dir)
242
+
243
+ builder = SparkSession.builder.appName(self.app_name)
244
+
245
+ # Use local[2] instead of local[*] for faster startup
246
+ master = self.config.get("master", "local[2]")
247
+ builder = builder.master(master)
248
+
249
+ # Optimized default configurations for SPEED
250
+ fast_configs = {
251
+ # Memory optimization
252
+ "spark.driver.memory": "1g",
253
+ "spark.executor.memory": "1g",
254
+
255
+ # DVT v0.5.3: Use direct JAR paths (NO Ivy output!)
256
+ "spark.jars": ",".join(jar_paths) if jar_paths else "",
257
+
258
+ # DVT v0.58.5: Add JARs to classpath for JDBC driver loading
259
+ "spark.driver.extraClassPath": ":".join(jar_paths) if jar_paths else "",
260
+ "spark.executor.extraClassPath": ":".join(jar_paths) if jar_paths else "",
261
+
262
+ # DVT v0.58.5: Java 21 compatibility flags for PySpark 4.0
263
+ # Minimal JVM options - don't restrict modules (causes Spark failures)
264
+ "spark.driver.extraJavaOptions": " ".join([
265
+ f"-Dlog4j2.configurationFile=file:{log4j_file}",
266
+ "-Djava.util.logging.level=SEVERE",
267
+ # Java module system compatibility for Spark
268
+ "--add-opens=java.base/java.lang=ALL-UNNAMED",
269
+ "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
270
+ "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
271
+ "--add-opens=java.base/java.io=ALL-UNNAMED",
272
+ "--add-opens=java.base/java.net=ALL-UNNAMED",
273
+ "--add-opens=java.base/java.nio=ALL-UNNAMED",
274
+ "--add-opens=java.base/java.util=ALL-UNNAMED",
275
+ "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
276
+ "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
277
+ "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
278
+ "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
279
+ "--add-opens=java.base/sun.security.action=ALL-UNNAMED",
280
+ "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED",
281
+ "--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED",
282
+ "-XX:+IgnoreUnrecognizedVMOptions",
283
+ ]),
284
+
285
+ # Suppress Spark UI and progress
286
+ "spark.ui.enabled": "false",
287
+ "spark.ui.showConsoleProgress": "false",
288
+ "spark.eventLog.enabled": "false",
289
+
290
+ # Network optimizations
291
+ "spark.driver.bindAddress": "127.0.0.1",
292
+ "spark.driver.host": "localhost",
293
+
294
+ # Reduce shuffle partitions for faster queries on small data
295
+ "spark.sql.shuffle.partitions": "8",
296
+
297
+ # DVT v0.58.4: Disable Arrow temporarily to avoid segfaults on macOS + Java 21
298
+ # Arrow's native code can cause segfaults during Spark session creation
299
+ "spark.sql.execution.arrow.pyspark.enabled": "false",
300
+ "spark.sql.execution.arrow.pyspark.fallback.enabled": "true",
301
+ "spark.sql.execution.arrow.enabled": "false",
302
+
303
+ # Disable adaptive optimization (slow for small data)
304
+ "spark.sql.adaptive.enabled": "false",
305
+ "spark.sql.adaptive.coalescePartitions.enabled": "false",
306
+ }
307
+
308
+ # Apply fast configs (can be overridden by user config)
309
+ for key, value in fast_configs.items():
310
+ if key not in self.config:
311
+ builder = builder.config(key, value)
312
+
313
+ # Apply user-provided configs (except 'master' which is already set)
314
+ for key, value in self.config.items():
315
+ if key != "master":
316
+ builder = builder.config(key, value)
317
+
318
+ # Create Spark session
319
+ spark = builder.getOrCreate()
320
+
321
+ # Set log level to ERROR to suppress Spark warnings
322
+ spark.sparkContext.setLogLevel("ERROR")
323
+
324
+ # Cache the session for reuse (thread-safe)
325
+ with _SPARK_SESSION_LOCK:
326
+ _SPARK_SESSION_CACHE[cache_key] = spark
327
+
328
+ return spark
329
+
330
+ def _get_jdbc_jars(self, cache_dir: str) -> list:
331
+ """
332
+ Discover ALL JDBC JAR files from project cache at runtime.
333
+
334
+ v0.5.96: Dynamic discovery - finds all *.jar files in .dvt/jdbc_jars/
335
+ This enables project folder portability (move folder → JARs still work).
336
+
337
+ JARs are downloaded via 'dvt target sync' command.
338
+
339
+ :param cache_dir: Directory to look for JAR files (ignored, uses project dir)
340
+ :returns: List of JAR file absolute paths
341
+ """
342
+ import glob
343
+
344
+ # Look for JARs in project directory (current working directory)
345
+ project_dir = os.getcwd()
346
+ jar_cache_dir = os.path.join(project_dir, ".dvt", "jdbc_jars")
347
+
348
+ # Discover ALL *.jar files dynamically (not hardcoded list)
349
+ jar_pattern = os.path.join(jar_cache_dir, "*.jar")
350
+ jar_paths = sorted(glob.glob(jar_pattern))
351
+
352
+ # No warning needed - clean output
353
+ # User should run 'dvt target sync' if JARs needed
354
+
355
+ return jar_paths
356
+
357
+ def close(self, spark: Optional[SparkSession]) -> None:
358
+ """
359
+ Close Spark session after execution.
360
+
361
+ By default, closes the session to free resources and prevent blocking other models.
362
+ Session caching can be enabled by setting DVT_SPARK_KEEP_ALIVE=1 for faster
363
+ consecutive runs within the same Python process.
364
+
365
+ Set DVT_SPARK_KEEP_ALIVE=1 environment variable to keep sessions alive (advanced).
366
+
367
+ :param spark: SparkSession to close (or optionally keep alive)
368
+ """
369
+ import os
370
+
371
+ # Check if caching is enabled (opt-in, not default)
372
+ keep_alive = os.environ.get("DVT_SPARK_KEEP_ALIVE", "0") == "1"
373
+
374
+ if keep_alive:
375
+ # DVT v0.4.8: Suppressed verbose output
376
+ # Session stays alive in cache for reuse (opt-in)
377
+ # print("[DVT] Spark session kept alive in cache (DVT_SPARK_KEEP_ALIVE=1)", flush=True)
378
+ pass
379
+ elif spark:
380
+ try:
381
+ # Clear from cache first (thread-safe)
382
+ with _SPARK_SESSION_LOCK:
383
+ for key, cached_spark in list(_SPARK_SESSION_CACHE.items()):
384
+ if cached_spark is spark:
385
+ del _SPARK_SESSION_CACHE[key]
386
+ break
387
+
388
+ # Stop the session
389
+ spark.stop()
390
+ # DVT v0.4.8: Suppressed verbose output
391
+ # print("[DVT] ✓ Spark session closed", flush=True)
392
+ except Exception:
393
+ pass # Best effort cleanup
394
+
395
+ def estimate_cost(self, duration_minutes: float) -> float:
396
+ """
397
+ Estimate cost for local execution.
398
+
399
+ Local execution is free (runs on local machine).
400
+
401
+ :param duration_minutes: Estimated query duration
402
+ :returns: 0.0 (free)
403
+ """
404
+ return 0.0
405
+
406
+ def get_platform_name(self) -> str:
407
+ """Get platform name."""
408
+ return "local"
409
+
410
+ def get_jar_provisioning_config(self, adapter_types: Set[str]) -> Dict[str, str]:
411
+ """
412
+ Get Spark config for JDBC JAR provisioning using local file paths.
413
+
414
+ Local Spark uses spark.jars with local file paths from .dvt/jdbc_jars/
415
+ for instant startup (no download at runtime).
416
+
417
+ :param adapter_types: Set of adapter types (ignored - uses all JARs found)
418
+ :returns: Dictionary with spark.jars config
419
+ """
420
+ from dbt.compute.jar_provisioning import LocalJARProvisioning
421
+
422
+ provisioning = LocalJARProvisioning(project_dir=os.getcwd())
423
+ return provisioning.get_spark_config(adapter_types)
424
+
425
+ def test_connectivity(self) -> Tuple[bool, str]:
426
+ """
427
+ Test connectivity by creating a local Spark session.
428
+
429
+ :returns: Tuple of (success, message)
430
+ """
431
+ # Check PySpark at runtime (not module import time)
432
+ try:
433
+ from pyspark.sql import SparkSession as _ # noqa: F401
434
+ except ImportError:
435
+ return (False, "PySpark not installed")
436
+
437
+ try:
438
+ spark = self.get_spark_session()
439
+ # Run simple SQL to verify
440
+ spark.sql("SELECT 1 AS test").collect()
441
+ return (True, "Local Spark session created and SQL test passed")
442
+ except Exception as e:
443
+ return (False, f"Local Spark failed: {e}")