dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
@@ -0,0 +1,262 @@
1
+ """
2
+ Standalone Spark Cluster Connection Strategy
3
+
4
+ Provides connection to self-managed Spark clusters (on-premises or cloud VMs).
5
+
6
+ v0.5.98: New strategy for standalone Spark clusters with Maven-based JAR provisioning.
7
+ Fixes the bug where external clusters incorrectly fell back to LocalStrategy
8
+ with local JAR paths that don't exist on remote workers.
9
+
10
+ Configuration:
11
+ {
12
+ "master": "spark://master-node:7077", # Required: Spark master URL
13
+ "spark.driver.memory": "4g", # Optional: driver memory
14
+ "spark.executor.memory": "8g", # Optional: executor memory
15
+ "spark.executor.cores": "4", # Optional: cores per executor
16
+ "spark.executor.instances": "10", # Optional: number of executors
17
+ }
18
+
19
+ Requirements:
20
+ - Standalone Spark cluster must be running
21
+ - Spark master must be accessible from client machine
22
+ - Workers must have network access to Maven Central (for JAR downloads)
23
+ """
24
+
25
+ from typing import Any, Dict, Optional, Set, Tuple
26
+
27
+ from dbt.compute.strategies.base import BaseConnectionStrategy
28
+ from dbt_common.exceptions import DbtRuntimeError
29
+
30
+ try:
31
+ from pyspark.sql import SparkSession
32
+
33
+ PYSPARK_AVAILABLE = True
34
+ except ImportError:
35
+ PYSPARK_AVAILABLE = False
36
+ SparkSession = None
37
+
38
+
39
+ class StandaloneStrategy(BaseConnectionStrategy):
40
+ """
41
+ Standalone Spark cluster connection strategy.
42
+
43
+ Connects to self-managed Spark clusters using spark:// master URL.
44
+ Uses spark.jars.packages for JDBC JAR provisioning so workers can
45
+ download drivers from Maven Central.
46
+ """
47
+
48
+ def validate_config(self) -> None:
49
+ """
50
+ Validate Standalone strategy configuration.
51
+
52
+ Required:
53
+ - master: Must start with "spark://" for standalone clusters
54
+
55
+ :raises DbtRuntimeError: If configuration is invalid
56
+ """
57
+ if not isinstance(self.config, dict):
58
+ raise DbtRuntimeError(
59
+ f"Standalone config must be a dictionary, got {type(self.config)}"
60
+ )
61
+
62
+ # Check master format
63
+ master = self.config.get("master", "")
64
+ if not master.startswith("spark://"):
65
+ raise DbtRuntimeError(
66
+ f"Standalone config requires master to start with 'spark://', got: {master}"
67
+ )
68
+
69
+ def get_spark_session(self, adapter_types: Optional[Set[str]] = None) -> SparkSession:
70
+ """
71
+ Create Spark session connected to standalone cluster.
72
+
73
+ :param adapter_types: Set of adapter types that need JDBC drivers
74
+ :returns: Initialized SparkSession connected to standalone cluster
75
+ :raises DbtRuntimeError: If session creation fails
76
+ """
77
+ if not PYSPARK_AVAILABLE:
78
+ raise DbtRuntimeError("PySpark is not available. Install it with: pip install pyspark")
79
+
80
+ try:
81
+ # v0.51.0: Ensure Java is available
82
+ from dbt.compute.strategies.local import _ensure_java_available
83
+ _ensure_java_available()
84
+
85
+ # v0.51.0: Stop any existing session to ensure fresh config
86
+ existing = SparkSession.getActiveSession()
87
+ if existing:
88
+ existing.stop()
89
+
90
+ builder = SparkSession.builder.appName(self.app_name)
91
+
92
+ # Set master URL
93
+ master = self.config.get("master")
94
+ builder = builder.master(master)
95
+
96
+ # v0.5.99: Get JDBC JAR config (Maven coordinates for remote workers)
97
+ # Merge with user-provided spark.jars.packages instead of overwriting
98
+ if adapter_types is None:
99
+ from dbt.compute.jar_provisioning import get_required_adapter_types
100
+ adapter_types = get_required_adapter_types()
101
+
102
+ auto_packages = []
103
+ if adapter_types:
104
+ jar_config = self.get_jar_provisioning_config(adapter_types)
105
+ auto_packages_str = jar_config.get("spark.jars.packages", "")
106
+ if auto_packages_str:
107
+ auto_packages = [p.strip() for p in auto_packages_str.split(",") if p.strip()]
108
+
109
+ # Get user-provided packages from config
110
+ user_packages_str = self.config.get("spark.jars.packages", "")
111
+ user_packages = [p.strip() for p in user_packages_str.split(",") if p.strip()]
112
+
113
+ # Merge packages (user + auto-detected)
114
+ all_packages = list(set(user_packages + auto_packages))
115
+ if all_packages:
116
+ builder = builder.config("spark.jars.packages", ",".join(all_packages))
117
+
118
+ # Apply user-provided configs (except spark.jars.packages which we merged)
119
+ for key, value in self.config.items():
120
+ if key != "master" and key != "spark.jars.packages":
121
+ builder = builder.config(key, value)
122
+
123
+ # Default optimizations
124
+ default_configs = {
125
+ "spark.sql.execution.arrow.pyspark.enabled": "true",
126
+ "spark.sql.execution.arrow.pyspark.fallback.enabled": "true",
127
+ }
128
+ for key, value in default_configs.items():
129
+ if key not in self.config:
130
+ builder = builder.config(key, value)
131
+
132
+ # DVT v0.51.5: Auto-configure driver host for Docker Spark clusters
133
+ # When master is on localhost, workers (in Docker containers) need to reach
134
+ # the driver running on the host machine via host.docker.internal
135
+ if "spark.driver.host" not in self.config:
136
+ if "localhost" in master or "127.0.0.1" in master:
137
+ builder = builder.config("spark.driver.host", "host.docker.internal")
138
+
139
+ # Create session
140
+ spark = builder.getOrCreate()
141
+ spark.sparkContext.setLogLevel("WARN")
142
+
143
+ return spark
144
+
145
+ except Exception as e:
146
+ error_msg = str(e)
147
+ master = self.config.get("master", "unknown")
148
+ if "Connection refused" in error_msg:
149
+ raise DbtRuntimeError(
150
+ f"Cannot connect to Spark master at '{master}'. "
151
+ f"Ensure the cluster is running and accessible. Error: {error_msg}"
152
+ ) from e
153
+ raise DbtRuntimeError(f"Failed to create Standalone Spark session: {error_msg}") from e
154
+
155
+ def close(self, spark: Optional[SparkSession]) -> None:
156
+ """
157
+ Clean up Spark session.
158
+
159
+ For standalone clusters, we stop the application but the cluster continues running.
160
+
161
+ :param spark: SparkSession to clean up
162
+ """
163
+ if spark:
164
+ try:
165
+ spark.stop()
166
+ except Exception:
167
+ pass # Best effort cleanup
168
+
169
+ def estimate_cost(self, duration_minutes: float) -> float:
170
+ """
171
+ Estimate cost for standalone cluster execution.
172
+
173
+ For self-managed clusters, returns 0.0 as cost depends on infrastructure.
174
+
175
+ :param duration_minutes: Estimated query duration in minutes
176
+ :returns: 0.0 (infrastructure cost varies)
177
+ """
178
+ # Self-managed clusters have variable cost based on infrastructure
179
+ return 0.0
180
+
181
+ def get_platform_name(self) -> str:
182
+ """Get platform name."""
183
+ return "standalone"
184
+
185
+ def get_jar_provisioning_config(self, adapter_types: Set[str]) -> Dict[str, str]:
186
+ """
187
+ Get Spark config for JDBC JAR provisioning using Maven coordinates.
188
+
189
+ Standalone clusters need spark.jars.packages so workers can download
190
+ JDBC drivers from Maven Central. Local file paths don't work because
191
+ they're not available on remote worker nodes.
192
+
193
+ :param adapter_types: Set of adapter types that need JDBC drivers
194
+ :returns: Dictionary with spark.jars.packages config
195
+ """
196
+ from dbt.compute.jar_provisioning import RemoteJARProvisioning
197
+
198
+ provisioning = RemoteJARProvisioning()
199
+ return provisioning.get_spark_config(adapter_types)
200
+
201
+ def test_connectivity(self) -> Tuple[bool, str]:
202
+ """
203
+ Test connectivity to standalone Spark cluster.
204
+
205
+ v0.51.1: Added timeout to prevent hanging when workers unavailable.
206
+ v0.51.8: Increased timeout to 90s for Docker clusters (JDBC JAR download time).
207
+
208
+ :returns: Tuple of (success, message)
209
+ """
210
+ if not PYSPARK_AVAILABLE:
211
+ return (False, "PySpark not installed")
212
+
213
+ import concurrent.futures
214
+
215
+ master = self.config.get("master", "unknown")
216
+
217
+ def _run_test():
218
+ spark = self.get_spark_session()
219
+ spark.sql("SELECT 1 AS test").collect()
220
+ return True
221
+
222
+ try:
223
+ # Use ThreadPoolExecutor with timeout to prevent hanging
224
+ # when workers aren't available
225
+ # v0.51.8: Increased from 30s to 90s - Docker Spark clusters need time
226
+ # for JDBC JAR downloads from Maven on first run
227
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
228
+ future = executor.submit(_run_test)
229
+ try:
230
+ result = future.result(timeout=90) # 90 second timeout for JAR downloads
231
+ return (True, "Standalone cluster session created and SQL test passed")
232
+ except concurrent.futures.TimeoutError:
233
+ return (False,
234
+ f"Timeout (90s): Workers not responding at '{master}'.\n"
235
+ f"Check: cluster workers are running, network access from driver to workers.\n"
236
+ f"Note: First run may take longer due to JDBC JAR downloads."
237
+ )
238
+
239
+ except Exception as e:
240
+ error_msg = str(e)
241
+ if "Connection refused" in error_msg:
242
+ return (False, f"Cannot connect to Spark master at '{master}'")
243
+ if "Initial job has not accepted any resources" in error_msg:
244
+ return (False,
245
+ f"Workers not accepting tasks at '{master}'.\n"
246
+ f"Check: spark.driver.host is set correctly for your network topology"
247
+ )
248
+ return (False, f"Standalone connection failed: {e}")
249
+
250
+ def get_cluster_info(self) -> Dict[str, Any]:
251
+ """
252
+ Get information about the standalone cluster configuration.
253
+
254
+ :returns: Dictionary with cluster metadata
255
+ """
256
+ return {
257
+ "platform": "standalone",
258
+ "master": self.config.get("master", "unknown"),
259
+ "executor_instances": self.config.get("spark.executor.instances", "dynamic"),
260
+ "executor_memory": self.config.get("spark.executor.memory", "default"),
261
+ "executor_cores": self.config.get("spark.executor.cores", "default"),
262
+ }
dbt/config/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ # all these are just exports, they need "noqa" so flake8 will not complain.
2
+ from .profile import Profile # noqa
3
+ from .project import IsFQNResource, PartialProject, Project # noqa
4
+ from .runtime import RuntimeConfig # noqa
dbt/config/catalogs.py ADDED
@@ -0,0 +1,94 @@
1
+ import os
2
+ from copy import deepcopy
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from dbt.artifacts.resources import Catalog, CatalogWriteIntegrationConfig
6
+ from dbt.clients.yaml_helper import load_yaml_text
7
+ from dbt.config.renderer import SecretRenderer
8
+ from dbt.constants import CATALOGS_FILE_NAME
9
+ from dbt.exceptions import YamlLoadError
10
+ from dbt_common.clients.system import load_file_contents
11
+ from dbt_common.exceptions import CompilationError, DbtValidationError
12
+
13
+
14
+ def load_catalogs_yml(project_dir: str, project_name: str) -> Dict[str, Any]:
15
+ path = os.path.join(project_dir, CATALOGS_FILE_NAME)
16
+
17
+ if os.path.isfile(path):
18
+ try:
19
+ contents = load_file_contents(path, strip=False)
20
+ yaml_content = load_yaml_text(contents)
21
+
22
+ if not yaml_content:
23
+ raise DbtValidationError(f"The file at {path} is empty")
24
+
25
+ return yaml_content
26
+ except DbtValidationError as e:
27
+ raise YamlLoadError(project_name=project_name, path=CATALOGS_FILE_NAME, exc=e)
28
+
29
+ return {}
30
+
31
+
32
+ def load_single_catalog(raw_catalog: Dict[str, Any], renderer: SecretRenderer) -> Catalog:
33
+ try:
34
+ rendered_catalog = renderer.render_data(raw_catalog)
35
+ except CompilationError as exc:
36
+ raise DbtValidationError(str(exc)) from exc
37
+
38
+ Catalog.validate(rendered_catalog)
39
+
40
+ write_integrations = []
41
+ write_integration_names = set()
42
+
43
+ for raw_integration in rendered_catalog.get("write_integrations", []):
44
+ if raw_integration["name"] in write_integration_names:
45
+ raise DbtValidationError(
46
+ f"Catalog '{rendered_catalog['name']}' cannot have multiple 'write_integrations' with the same name: '{raw_integration['name']}'."
47
+ )
48
+
49
+ # We're going to let the adapter validate the integration config
50
+ write_integrations.append(
51
+ CatalogWriteIntegrationConfig(**raw_integration, catalog_name=raw_catalog["name"])
52
+ )
53
+ write_integration_names.add(raw_integration["name"])
54
+
55
+ # Validate + set default active_write_integration if unset
56
+ active_write_integration = rendered_catalog.get("active_write_integration")
57
+ valid_write_integration_names = [integration.name for integration in write_integrations]
58
+
59
+ if not active_write_integration:
60
+ if len(valid_write_integration_names) == 1:
61
+ active_write_integration = write_integrations[0].name
62
+ else:
63
+ raise DbtValidationError(
64
+ f"Catalog '{rendered_catalog['name']}' must specify an 'active_write_integration' when multiple 'write_integrations' are provided."
65
+ )
66
+ else:
67
+ if active_write_integration not in valid_write_integration_names:
68
+ raise DbtValidationError(
69
+ f"Catalog '{rendered_catalog['name']}' must specify an 'active_write_integration' from its set of defined 'write_integrations': {valid_write_integration_names}. Got: '{active_write_integration}'."
70
+ )
71
+
72
+ return Catalog(
73
+ name=raw_catalog["name"],
74
+ active_write_integration=active_write_integration,
75
+ write_integrations=write_integrations,
76
+ )
77
+
78
+
79
+ def load_catalogs(project_dir: str, project_name: str, cli_vars: Dict[str, Any]) -> List[Catalog]:
80
+ raw_catalogs = load_catalogs_yml(project_dir, project_name).get("catalogs", [])
81
+ catalogs_renderer = SecretRenderer(cli_vars)
82
+
83
+ return [load_single_catalog(raw_catalog, catalogs_renderer) for raw_catalog in raw_catalogs]
84
+
85
+
86
+ def get_active_write_integration(catalog: Catalog) -> Optional[CatalogWriteIntegrationConfig]:
87
+ for integration in catalog.write_integrations:
88
+ if integration.name == catalog.active_write_integration:
89
+ active_integration = deepcopy(integration)
90
+ active_integration.catalog_name = active_integration.name
91
+ active_integration.name = catalog.name
92
+ return active_integration
93
+
94
+ return None