dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,678 @@
1
+ """
2
+ JDBC Utilities for Spark Engine
3
+
4
+ Provides utilities for converting dbt adapter credentials to JDBC configurations
5
+ and helpers for optimizing parallel reads via partitioning.
6
+
7
+ This module enables DVT to bypass memory bottlenecks by using Spark JDBC connectors
8
+ to read data directly from source databases into Spark workers (distributed).
9
+
10
+ Architecture:
11
+ - Maps adapter credentials → JDBC URL + properties
12
+ - Auto-detects optimal partition columns for parallel reads
13
+ - Estimates partition bounds for efficient data distribution
14
+ """
15
+
16
+ from typing import Dict, Optional, Tuple
17
+
18
+ from dbt.adapters.base import BaseAdapter
19
+ from dbt.adapters.contracts.connection import Credentials
20
+ from dbt_common.exceptions import DbtRuntimeError
21
+
22
+
23
+ # JDBC driver class mapping for database types
24
+ # DVT v0.5.9: Complete support for all dbt adapters with JDBC connectivity
25
+ JDBC_DRIVER_MAPPING = {
26
+ # ============================================================
27
+ # Cloud Data Warehouses
28
+ # ============================================================
29
+ "postgres": "org.postgresql.Driver",
30
+ "postgresql": "org.postgresql.Driver",
31
+ "snowflake": "net.snowflake.client.jdbc.SnowflakeDriver",
32
+ "bigquery": "com.simba.googlebigquery.jdbc.Driver",
33
+ "redshift": "com.amazon.redshift.jdbc.Driver",
34
+ "databricks": "com.databricks.client.jdbc.Driver",
35
+ "firebolt": "com.firebolt.FireboltDriver",
36
+
37
+ # ============================================================
38
+ # Microsoft Ecosystem (all use same JDBC driver)
39
+ # ============================================================
40
+ "sqlserver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
41
+ "mssql": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
42
+ "fabric": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
43
+ "synapse": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
44
+
45
+ # ============================================================
46
+ # Enterprise Data Warehouses
47
+ # ============================================================
48
+ "oracle": "oracle.jdbc.OracleDriver",
49
+ "db2": "com.ibm.db2.jcc.DB2Driver",
50
+ "teradata": "com.teradata.jdbc.TeraDriver",
51
+ "exasol": "com.exasol.jdbc.EXADriver",
52
+ "vertica": "com.vertica.jdbc.Driver",
53
+
54
+ # ============================================================
55
+ # SQL Engines & Query Platforms
56
+ # ============================================================
57
+ "spark": "org.apache.hive.jdbc.HiveDriver",
58
+ "trino": "io.trino.jdbc.TrinoDriver",
59
+ "presto": "io.prestosql.jdbc.PrestoDriver",
60
+ "athena": "com.simba.athena.jdbc.Driver",
61
+ "hive": "org.apache.hive.jdbc.HiveDriver",
62
+ "impala": "com.cloudera.impala.jdbc.Driver",
63
+ "dremio": "com.dremio.jdbc.Driver",
64
+ "glue": "com.amazonaws.glue.sql.jdbc.Driver",
65
+
66
+ # ============================================================
67
+ # Open Source Databases
68
+ # ============================================================
69
+ "mysql": "com.mysql.cj.jdbc.Driver",
70
+ "mariadb": "org.mariadb.jdbc.Driver",
71
+ "sqlite": "org.sqlite.JDBC",
72
+ "duckdb": "org.duckdb.DuckDBDriver",
73
+ "cratedb": "io.crate.client.jdbc.CrateDriver",
74
+
75
+ # ============================================================
76
+ # OLAP & Analytics Databases
77
+ # ============================================================
78
+ "clickhouse": "com.clickhouse.jdbc.ClickHouseDriver",
79
+ "singlestore": "com.singlestore.jdbc.Driver",
80
+ "starrocks": "com.mysql.cj.jdbc.Driver", # StarRocks uses MySQL protocol
81
+ "doris": "com.mysql.cj.jdbc.Driver", # Apache Doris uses MySQL protocol
82
+ "greenplum": "org.postgresql.Driver", # Greenplum uses PostgreSQL protocol
83
+ "monetdb": "org.monetdb.jdbc.MonetDriver",
84
+
85
+ # ============================================================
86
+ # Time-Series & Streaming
87
+ # ============================================================
88
+ "timescaledb": "org.postgresql.Driver", # TimescaleDB uses PostgreSQL
89
+ "questdb": "org.postgresql.Driver", # QuestDB supports PostgreSQL wire protocol
90
+ "materialize": "org.postgresql.Driver", # Materialize uses PostgreSQL wire protocol
91
+ "rockset": "com.rockset.jdbc.RocksetDriver",
92
+
93
+ # ============================================================
94
+ # Graph & Multi-Model
95
+ # ============================================================
96
+ "neo4j": "org.neo4j.Driver",
97
+
98
+ # ============================================================
99
+ # Data Lake Formats (via Spark connectors)
100
+ # ============================================================
101
+ "delta": "org.apache.hive.jdbc.HiveDriver", # Delta Lake via Spark
102
+ "iceberg": "org.apache.hive.jdbc.HiveDriver", # Apache Iceberg via Spark
103
+ "hudi": "org.apache.hive.jdbc.HiveDriver", # Apache Hudi via Spark
104
+
105
+ # ============================================================
106
+ # AlloyDB (Google - PostgreSQL compatible)
107
+ # ============================================================
108
+ "alloydb": "org.postgresql.Driver", # AlloyDB is PostgreSQL-compatible
109
+ }
110
+
111
+
112
+ def _rewrite_localhost_for_docker(jdbc_url: str) -> str:
113
+ """
114
+ Rewrite localhost/127.0.0.1 to host.docker.internal for Docker Spark clusters.
115
+
116
+ DVT v0.51.8: When using Docker-based Spark clusters, workers inside containers
117
+ need host.docker.internal to reach the host machine. With host.docker.internal
118
+ also added to the host's /etc/hosts (pointing to 127.0.0.1), the same JDBC URL
119
+ works for both driver (on host) and workers (in containers).
120
+
121
+ :param jdbc_url: Original JDBC URL
122
+ :returns: JDBC URL with localhost replaced by host.docker.internal
123
+ """
124
+ import re
125
+ # Replace localhost or 127.0.0.1 with host.docker.internal
126
+ url = re.sub(r'//localhost([:/?])', r'//host.docker.internal\1', jdbc_url)
127
+ url = re.sub(r'//127\.0\.0\.1([:/?])', r'//host.docker.internal\1', url)
128
+ return url
129
+
130
+
131
+ # Global flag to enable Docker JDBC URL rewriting
132
+ _docker_mode_enabled = False
133
+
134
+
135
+ def set_docker_mode(enabled: bool) -> None:
136
+ """Enable or disable Docker mode for JDBC URL rewriting."""
137
+ global _docker_mode_enabled
138
+ _docker_mode_enabled = enabled
139
+
140
+
141
+ def build_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
142
+ """
143
+ Build JDBC configuration from dbt adapter credentials.
144
+
145
+ Converts adapter-specific credentials to JDBC URL and connection properties
146
+ that can be used by Spark JDBC connectors.
147
+
148
+ :param credentials: dbt adapter credentials object
149
+ :returns: Tuple of (jdbc_url, jdbc_properties)
150
+ :raises DbtRuntimeError: If adapter type is not supported or credentials are invalid
151
+
152
+ Example:
153
+ >>> from dbt.adapters.postgres import PostgresCredentials
154
+ >>> creds = PostgresCredentials(
155
+ ... host="localhost",
156
+ ... port=5432,
157
+ ... user="analytics",
158
+ ... password="secret",
159
+ ... database="warehouse",
160
+ ... schema="public"
161
+ ... )
162
+ >>> url, props = build_jdbc_config(creds)
163
+ >>> print(url)
164
+ jdbc:postgresql://localhost:5432/warehouse
165
+ >>> print(props)
166
+ {'user': 'analytics', 'password': 'secret', 'driver': 'org.postgresql.Driver'}
167
+ """
168
+ adapter_type = credentials.type.lower()
169
+
170
+ # Check if adapter type is supported
171
+ if adapter_type not in JDBC_DRIVER_MAPPING:
172
+ raise DbtRuntimeError(
173
+ f"JDBC connectivity not supported for adapter type '{adapter_type}'. "
174
+ f"Supported types: {', '.join(JDBC_DRIVER_MAPPING.keys())}"
175
+ )
176
+
177
+ # Build JDBC URL and properties based on adapter type
178
+ if adapter_type in ("postgres", "postgresql"):
179
+ jdbc_url, jdbc_props = _build_postgres_jdbc_config(credentials)
180
+ elif adapter_type == "mysql":
181
+ jdbc_url, jdbc_props = _build_mysql_jdbc_config(credentials)
182
+ elif adapter_type == "snowflake":
183
+ jdbc_url, jdbc_props = _build_snowflake_jdbc_config(credentials)
184
+ elif adapter_type == "redshift":
185
+ jdbc_url, jdbc_props = _build_redshift_jdbc_config(credentials)
186
+ elif adapter_type == "bigquery":
187
+ jdbc_url, jdbc_props = _build_bigquery_jdbc_config(credentials)
188
+ elif adapter_type in ("sqlserver", "mssql"):
189
+ jdbc_url, jdbc_props = _build_sqlserver_jdbc_config(credentials)
190
+ elif adapter_type == "oracle":
191
+ jdbc_url, jdbc_props = _build_oracle_jdbc_config(credentials)
192
+ elif adapter_type == "databricks":
193
+ jdbc_url, jdbc_props = _build_databricks_jdbc_config(credentials)
194
+ else:
195
+ raise DbtRuntimeError(
196
+ f"JDBC configuration builder not implemented for adapter type '{adapter_type}'"
197
+ )
198
+
199
+ # DVT v0.51.8: Rewrite localhost URLs for Docker Spark clusters
200
+ if _docker_mode_enabled:
201
+ jdbc_url = _rewrite_localhost_for_docker(jdbc_url)
202
+
203
+ return jdbc_url, jdbc_props
204
+
205
+
206
+ def _build_postgres_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
207
+ """Build JDBC config for PostgreSQL."""
208
+ creds_dict = credentials.to_dict()
209
+
210
+ host = creds_dict.get("host", "localhost")
211
+ port = creds_dict.get("port", 5432)
212
+ database = creds_dict.get("database")
213
+ user = creds_dict.get("user")
214
+ password = creds_dict.get("password", "")
215
+
216
+ if not database:
217
+ raise DbtRuntimeError("PostgreSQL credentials missing required field: database")
218
+ if not user:
219
+ raise DbtRuntimeError("PostgreSQL credentials missing required field: user")
220
+
221
+ jdbc_url = f"jdbc:postgresql://{host}:{port}/{database}"
222
+
223
+ jdbc_properties = {
224
+ "user": user,
225
+ "password": password,
226
+ "driver": JDBC_DRIVER_MAPPING["postgres"],
227
+ }
228
+
229
+ # Optional: Add SSL configuration if present
230
+ if creds_dict.get("sslmode"):
231
+ jdbc_properties["ssl"] = "true" if creds_dict["sslmode"] != "disable" else "false"
232
+
233
+ return jdbc_url, jdbc_properties
234
+
235
+
236
+ def _build_mysql_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
237
+ """Build JDBC config for MySQL."""
238
+ creds_dict = credentials.to_dict()
239
+
240
+ host = creds_dict.get("host", "localhost")
241
+ port = creds_dict.get("port", 3306)
242
+ database = creds_dict.get("database")
243
+ user = creds_dict.get("user")
244
+ password = creds_dict.get("password", "")
245
+
246
+ if not database:
247
+ raise DbtRuntimeError("MySQL credentials missing required field: database")
248
+ if not user:
249
+ raise DbtRuntimeError("MySQL credentials missing required field: user")
250
+
251
+ jdbc_url = f"jdbc:mysql://{host}:{port}/{database}"
252
+
253
+ jdbc_properties = {
254
+ "user": user,
255
+ "password": password,
256
+ "driver": JDBC_DRIVER_MAPPING["mysql"],
257
+ }
258
+
259
+ return jdbc_url, jdbc_properties
260
+
261
+
262
+ def _build_snowflake_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
263
+ """Build JDBC config for Snowflake."""
264
+ creds_dict = credentials.to_dict()
265
+
266
+ account = creds_dict.get("account")
267
+ user = creds_dict.get("user")
268
+ password = creds_dict.get("password", "")
269
+ database = creds_dict.get("database")
270
+ warehouse = creds_dict.get("warehouse")
271
+ schema = creds_dict.get("schema", "public")
272
+
273
+ if not account:
274
+ raise DbtRuntimeError("Snowflake credentials missing required field: account")
275
+ if not user:
276
+ raise DbtRuntimeError("Snowflake credentials missing required field: user")
277
+
278
+ # Snowflake JDBC URL format with Arrow disabled via URL parameter
279
+ # This is more reliable than JDBC properties for Snowflake driver
280
+ jdbc_url = f"jdbc:snowflake://{account}.snowflakecomputing.com/?JDBC_QUERY_RESULT_FORMAT=JSON"
281
+
282
+ jdbc_properties = {
283
+ "user": user,
284
+ "password": password,
285
+ "driver": JDBC_DRIVER_MAPPING["snowflake"],
286
+ # CRITICAL FIX v0.4.4: Disable Arrow format to avoid Java 21 module access errors
287
+ # Property must be uppercase and set in BOTH URL and properties for reliability
288
+ "JDBC_QUERY_RESULT_FORMAT": "JSON",
289
+ "jdbc_query_result_format": "json", # Lowercase variant for compatibility
290
+ # Additional Snowflake-specific optimizations
291
+ "JDBC_USE_SESSION_TIMEZONE": "false", # Use UTC for consistency
292
+ }
293
+
294
+ # Add optional properties
295
+ if database:
296
+ jdbc_properties["db"] = database
297
+ if warehouse:
298
+ jdbc_properties["warehouse"] = warehouse
299
+ if schema:
300
+ jdbc_properties["schema"] = schema
301
+
302
+ return jdbc_url, jdbc_properties
303
+
304
+
305
+ def _build_redshift_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
306
+ """Build JDBC config for Amazon Redshift."""
307
+ creds_dict = credentials.to_dict()
308
+
309
+ host = creds_dict.get("host")
310
+ port = creds_dict.get("port", 5439)
311
+ database = creds_dict.get("database")
312
+ user = creds_dict.get("user")
313
+ password = creds_dict.get("password", "")
314
+
315
+ if not host:
316
+ raise DbtRuntimeError("Redshift credentials missing required field: host")
317
+ if not database:
318
+ raise DbtRuntimeError("Redshift credentials missing required field: database")
319
+ if not user:
320
+ raise DbtRuntimeError("Redshift credentials missing required field: user")
321
+
322
+ jdbc_url = f"jdbc:redshift://{host}:{port}/{database}"
323
+
324
+ jdbc_properties = {
325
+ "user": user,
326
+ "password": password,
327
+ "driver": JDBC_DRIVER_MAPPING["redshift"],
328
+ }
329
+
330
+ return jdbc_url, jdbc_properties
331
+
332
+
333
+ def _build_bigquery_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
334
+ """Build JDBC config for Google BigQuery."""
335
+ creds_dict = credentials.to_dict()
336
+
337
+ project = creds_dict.get("project")
338
+ dataset = creds_dict.get("dataset") or creds_dict.get("schema")
339
+
340
+ if not project:
341
+ raise DbtRuntimeError("BigQuery credentials missing required field: project")
342
+
343
+ # BigQuery JDBC URL format
344
+ jdbc_url = "jdbc:bigquery://https://www.googleapis.com/bigquery/v2:443"
345
+
346
+ jdbc_properties = {
347
+ "ProjectId": project,
348
+ "driver": JDBC_DRIVER_MAPPING["bigquery"],
349
+ }
350
+
351
+ if dataset:
352
+ jdbc_properties["DefaultDataset"] = dataset
353
+
354
+ # Handle authentication
355
+ # BigQuery typically uses service account JSON or OAuth
356
+ if creds_dict.get("keyfile"):
357
+ jdbc_properties["OAuthType"] = "0" # Service account
358
+ jdbc_properties["OAuthServiceAcctEmail"] = creds_dict.get("client_email", "")
359
+ jdbc_properties["OAuthPvtKeyPath"] = creds_dict["keyfile"]
360
+
361
+ return jdbc_url, jdbc_properties
362
+
363
+
364
+ def _build_sqlserver_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
365
+ """Build JDBC config for Microsoft SQL Server."""
366
+ creds_dict = credentials.to_dict()
367
+
368
+ host = creds_dict.get("host", "localhost")
369
+ port = creds_dict.get("port", 1433)
370
+ database = creds_dict.get("database")
371
+ user = creds_dict.get("user")
372
+ password = creds_dict.get("password", "")
373
+
374
+ if not database:
375
+ raise DbtRuntimeError("SQL Server credentials missing required field: database")
376
+ if not user:
377
+ raise DbtRuntimeError("SQL Server credentials missing required field: user")
378
+
379
+ jdbc_url = f"jdbc:sqlserver://{host}:{port};databaseName={database}"
380
+
381
+ jdbc_properties = {
382
+ "user": user,
383
+ "password": password,
384
+ "driver": JDBC_DRIVER_MAPPING["sqlserver"],
385
+ }
386
+
387
+ return jdbc_url, jdbc_properties
388
+
389
+
390
+ def _build_oracle_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
391
+ """Build JDBC config for Oracle Database."""
392
+ creds_dict = credentials.to_dict()
393
+
394
+ host = creds_dict.get("host", "localhost")
395
+ port = creds_dict.get("port", 1521)
396
+ database = creds_dict.get("database") or creds_dict.get("service_name")
397
+ user = creds_dict.get("user")
398
+ password = creds_dict.get("password", "")
399
+
400
+ if not database:
401
+ raise DbtRuntimeError("Oracle credentials missing required field: database/service_name")
402
+ if not user:
403
+ raise DbtRuntimeError("Oracle credentials missing required field: user")
404
+
405
+ # Oracle thin driver format
406
+ jdbc_url = f"jdbc:oracle:thin:@{host}:{port}:{database}"
407
+
408
+ jdbc_properties = {
409
+ "user": user,
410
+ "password": password,
411
+ "driver": JDBC_DRIVER_MAPPING["oracle"],
412
+ }
413
+
414
+ return jdbc_url, jdbc_properties
415
+
416
+
417
+ def _build_databricks_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
418
+ """
419
+ Build JDBC config for Databricks SQL Warehouse or Cluster.
420
+
421
+ DVT v0.51.5: Added support for Databricks JDBC connectivity.
422
+
423
+ Databricks JDBC URL format:
424
+ jdbc:databricks://<host>:443/default;transportMode=http;ssl=1;httpPath=<http_path>;AuthMech=3;
425
+
426
+ The dbt-databricks adapter credentials include:
427
+ - host: Databricks workspace URL (e.g., dbc-xxxxx.cloud.databricks.com)
428
+ - http_path: SQL warehouse or cluster HTTP path
429
+ - token: Personal access token for authentication
430
+ - catalog: Unity Catalog name (optional)
431
+ - schema: Default schema (optional)
432
+ """
433
+ creds_dict = credentials.to_dict()
434
+
435
+ host = creds_dict.get("host")
436
+ http_path = creds_dict.get("http_path")
437
+ token = creds_dict.get("token")
438
+ catalog = creds_dict.get("catalog", "hive_metastore")
439
+ schema = creds_dict.get("schema", "default")
440
+
441
+ if not host:
442
+ raise DbtRuntimeError("Databricks credentials missing required field: host")
443
+ if not http_path:
444
+ raise DbtRuntimeError("Databricks credentials missing required field: http_path")
445
+ if not token:
446
+ raise DbtRuntimeError("Databricks credentials missing required field: token")
447
+
448
+ # Build Databricks JDBC URL
449
+ # Format: jdbc:databricks://<host>:443/<catalog>;transportMode=http;ssl=1;httpPath=<http_path>;AuthMech=3;
450
+ jdbc_url = (
451
+ f"jdbc:databricks://{host}:443/{catalog};"
452
+ f"transportMode=http;ssl=1;httpPath={http_path};AuthMech=3"
453
+ )
454
+
455
+ jdbc_properties = {
456
+ "UID": "token", # Databricks uses "token" as username for PAT auth
457
+ "PWD": token,
458
+ "driver": JDBC_DRIVER_MAPPING["databricks"],
459
+ }
460
+
461
+ return jdbc_url, jdbc_properties
462
+
463
+
464
+ def auto_detect_partition_column(adapter: BaseAdapter, schema: str, table: str) -> Optional[str]:
465
+ """
466
+ Auto-detect the best column for partitioning parallel JDBC reads.
467
+
468
+ Queries table metadata to find a suitable partition column. Prioritizes:
469
+ 1. Primary key columns (single column PKs only)
470
+ 2. Columns named 'id' or ending with '_id'
471
+ 3. Timestamp/date columns
472
+ 4. Integer columns
473
+
474
+ :param adapter: dbt adapter to use for querying metadata
475
+ :param schema: Schema/dataset name
476
+ :param table: Table name
477
+ :returns: Column name suitable for partitioning, or None if not found
478
+
479
+ Example:
480
+ >>> column = auto_detect_partition_column(adapter, "public", "users")
481
+ >>> if column:
482
+ ... print(f"Using {column} for partitioning")
483
+ ... else:
484
+ ... print("No suitable partition column found")
485
+ """
486
+ try:
487
+ # Strategy 1: Check for primary key
488
+ pk_column = _get_primary_key_column(adapter, schema, table)
489
+ if pk_column:
490
+ return pk_column
491
+
492
+ # Strategy 2: Get all columns and look for ID-like columns
493
+ columns = _get_table_columns(adapter, schema, table)
494
+
495
+ # Look for ID columns (exact match or suffix)
496
+ for col_name, col_type in columns:
497
+ col_name_lower = col_name.lower()
498
+ if col_name_lower == "id" or col_name_lower.endswith("_id"):
499
+ # Check if it's an integer type
500
+ if _is_integer_type(col_type):
501
+ return col_name
502
+
503
+ # Strategy 3: Look for timestamp/date columns
504
+ for col_name, col_type in columns:
505
+ if _is_timestamp_type(col_type):
506
+ return col_name
507
+
508
+ # Strategy 4: Look for any integer column
509
+ for col_name, col_type in columns:
510
+ if _is_integer_type(col_type):
511
+ return col_name
512
+
513
+ # No suitable column found
514
+ return None
515
+
516
+ except Exception:
517
+ # If metadata query fails, return None (caller can decide to read without partitioning)
518
+ return None
519
+
520
+
521
+ def estimate_partition_bounds(
522
+ adapter: BaseAdapter, schema: str, table: str, column: str
523
+ ) -> Tuple[int, int]:
524
+ """
525
+ Estimate partition bounds (min/max) for a numeric partition column.
526
+
527
+ Queries the table to get MIN and MAX values of the partition column,
528
+ which are used by Spark JDBC to distribute reads across workers.
529
+
530
+ :param adapter: dbt adapter to use for querying
531
+ :param schema: Schema/dataset name
532
+ :param table: Table name
533
+ :param column: Partition column name
534
+ :returns: Tuple of (lower_bound, upper_bound)
535
+ :raises DbtRuntimeError: If query fails or column is not numeric
536
+
537
+ Example:
538
+ >>> lower, upper = estimate_partition_bounds(adapter, "public", "orders", "order_id")
539
+ >>> print(f"Partition range: {lower} to {upper}")
540
+ Partition range: 1 to 1000000
541
+ """
542
+ try:
543
+ # Build qualified table name
544
+ qualified_table = f"{schema}.{table}"
545
+
546
+ # Query for min/max
547
+ sql = f"SELECT MIN({column}) as min_val, MAX({column}) as max_val FROM {qualified_table}"
548
+
549
+ # Execute via adapter
550
+ response, result_table = adapter.execute(sql, auto_begin=False, fetch=True)
551
+
552
+ if not result_table or len(result_table.rows) == 0:
553
+ raise DbtRuntimeError(
554
+ f"Failed to estimate partition bounds for {qualified_table}.{column}: "
555
+ "Query returned no results"
556
+ )
557
+
558
+ row = result_table.rows[0]
559
+ min_val = row[0]
560
+ max_val = row[1]
561
+
562
+ if min_val is None or max_val is None:
563
+ raise DbtRuntimeError(
564
+ f"Failed to estimate partition bounds for {qualified_table}.{column}: "
565
+ "Column contains only NULL values"
566
+ )
567
+
568
+ # Convert to integers
569
+ lower_bound = int(min_val)
570
+ upper_bound = int(max_val)
571
+
572
+ return lower_bound, upper_bound
573
+
574
+ except Exception as e:
575
+ raise DbtRuntimeError(
576
+ f"Failed to estimate partition bounds for {schema}.{table}.{column}: {str(e)}"
577
+ ) from e
578
+
579
+
580
+ # Helper functions for metadata queries
581
+
582
+
583
+ def _get_primary_key_column(adapter: BaseAdapter, schema: str, table: str) -> Optional[str]:
584
+ """
585
+ Get primary key column name (if single-column PK exists).
586
+
587
+ Implementation is adapter-specific. Returns None if not implemented
588
+ or if PK is composite.
589
+ """
590
+ adapter_type = adapter.type().lower()
591
+
592
+ try:
593
+ if adapter_type in ("postgres", "postgresql", "redshift"):
594
+ # PostgreSQL/Redshift: Query information_schema
595
+ sql = f"""
596
+ SELECT a.attname
597
+ FROM pg_index i
598
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
599
+ WHERE i.indrelid = '{schema}.{table}'::regclass
600
+ AND i.indisprimary
601
+ """
602
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
603
+ if result and len(result.rows) == 1:
604
+ return result.rows[0][0]
605
+
606
+ elif adapter_type == "mysql":
607
+ # MySQL: Query information_schema
608
+ sql = f"""
609
+ SELECT COLUMN_NAME
610
+ FROM information_schema.KEY_COLUMN_USAGE
611
+ WHERE TABLE_SCHEMA = '{schema}'
612
+ AND TABLE_NAME = '{table}'
613
+ AND CONSTRAINT_NAME = 'PRIMARY'
614
+ """
615
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
616
+ if result and len(result.rows) == 1:
617
+ return result.rows[0][0]
618
+
619
+ # For other adapters or if query fails, return None
620
+ return None
621
+
622
+ except Exception:
623
+ return None
624
+
625
+
626
+ def _get_table_columns(adapter: BaseAdapter, schema: str, table: str) -> list[Tuple[str, str]]:
627
+ """
628
+ Get list of (column_name, column_type) for a table.
629
+ """
630
+ adapter_type = adapter.type().lower()
631
+
632
+ try:
633
+ if adapter_type in ("postgres", "postgresql", "redshift"):
634
+ sql = f"""
635
+ SELECT column_name, data_type
636
+ FROM information_schema.columns
637
+ WHERE table_schema = '{schema}'
638
+ AND table_name = '{table}'
639
+ ORDER BY ordinal_position
640
+ """
641
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
642
+ return [(row[0], row[1]) for row in result.rows]
643
+
644
+ elif adapter_type == "mysql":
645
+ sql = f"""
646
+ SELECT COLUMN_NAME, DATA_TYPE
647
+ FROM information_schema.COLUMNS
648
+ WHERE TABLE_SCHEMA = '{schema}'
649
+ AND TABLE_NAME = '{table}'
650
+ ORDER BY ORDINAL_POSITION
651
+ """
652
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
653
+ return [(row[0], row[1]) for row in result.rows]
654
+
655
+ else:
656
+ # Fallback: Use LIMIT 0 query to get columns
657
+ sql = f"SELECT * FROM {schema}.{table} LIMIT 0"
658
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
659
+ # Return column names with unknown types
660
+ return [(col, "unknown") for col in result.column_names]
661
+
662
+ except Exception:
663
+ return []
664
+
665
+
666
+ def _is_integer_type(sql_type: str) -> bool:
667
+ """Check if SQL type is an integer type."""
668
+ sql_type_upper = sql_type.upper()
669
+ return any(
670
+ int_type in sql_type_upper
671
+ for int_type in ["INT", "INTEGER", "BIGINT", "SMALLINT", "SERIAL"]
672
+ )
673
+
674
+
675
+ def _is_timestamp_type(sql_type: str) -> bool:
676
+ """Check if SQL type is a timestamp/date type."""
677
+ sql_type_upper = sql_type.upper()
678
+ return any(time_type in sql_type_upper for time_type in ["TIMESTAMP", "DATETIME", "DATE"])