dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1252 @@
1
+ """
2
+ JDBC Utilities for Spark Engine
3
+
4
+ Provides utilities for converting dbt adapter credentials to JDBC configurations
5
+ and helpers for optimizing parallel reads via partitioning.
6
+
7
+ This module enables DVT to bypass memory bottlenecks by using Spark JDBC connectors
8
+ to read data directly from source databases into Spark workers (distributed).
9
+
10
+ Architecture:
11
+ - Maps adapter credentials → JDBC URL + properties
12
+ - Auto-detects optimal partition columns for parallel reads
13
+ - Estimates partition bounds for efficient data distribution
14
+ """
15
+
16
+ from typing import Dict, Optional, Tuple
17
+
18
+ from dbt.adapters.base import BaseAdapter
19
+ from dbt.adapters.contracts.connection import Credentials
20
+ from dbt_common.exceptions import DbtRuntimeError
21
+
22
+
23
+ # JDBC driver class mapping for database types
24
+ # DVT v0.5.9: Complete support for all dbt adapters with JDBC connectivity
25
+ JDBC_DRIVER_MAPPING = {
26
+ # ============================================================
27
+ # Cloud Data Warehouses
28
+ # ============================================================
29
+ "postgres": "org.postgresql.Driver",
30
+ "postgresql": "org.postgresql.Driver",
31
+ "snowflake": "net.snowflake.client.jdbc.SnowflakeDriver",
32
+ "bigquery": "com.simba.googlebigquery.jdbc.Driver",
33
+ "redshift": "com.amazon.redshift.jdbc.Driver",
34
+ "databricks": "com.databricks.client.jdbc.Driver",
35
+ "firebolt": "com.firebolt.FireboltDriver",
36
+
37
+ # ============================================================
38
+ # Microsoft Ecosystem (all use same JDBC driver)
39
+ # ============================================================
40
+ "sqlserver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
41
+ "mssql": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
42
+ "fabric": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
43
+ "synapse": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
44
+
45
+ # ============================================================
46
+ # Enterprise Data Warehouses
47
+ # ============================================================
48
+ "oracle": "oracle.jdbc.OracleDriver",
49
+ "db2": "com.ibm.db2.jcc.DB2Driver",
50
+ "teradata": "com.teradata.jdbc.TeraDriver",
51
+ "exasol": "com.exasol.jdbc.EXADriver",
52
+ "vertica": "com.vertica.jdbc.Driver",
53
+
54
+ # ============================================================
55
+ # SQL Engines & Query Platforms
56
+ # ============================================================
57
+ "spark": "org.apache.hive.jdbc.HiveDriver",
58
+ "trino": "io.trino.jdbc.TrinoDriver",
59
+ "presto": "io.prestosql.jdbc.PrestoDriver",
60
+ "athena": "com.simba.athena.jdbc.Driver",
61
+ "hive": "org.apache.hive.jdbc.HiveDriver",
62
+ "impala": "com.cloudera.impala.jdbc.Driver",
63
+ "dremio": "com.dremio.jdbc.Driver",
64
+ "glue": "com.amazonaws.glue.sql.jdbc.Driver",
65
+
66
+ # ============================================================
67
+ # Open Source Databases
68
+ # ============================================================
69
+ "mysql": "com.mysql.cj.jdbc.Driver",
70
+ "mariadb": "org.mariadb.jdbc.Driver",
71
+ "sqlite": "org.sqlite.JDBC",
72
+ "duckdb": "org.duckdb.DuckDBDriver",
73
+ "cratedb": "io.crate.client.jdbc.CrateDriver",
74
+
75
+ # ============================================================
76
+ # OLAP & Analytics Databases
77
+ # ============================================================
78
+ "clickhouse": "com.clickhouse.jdbc.ClickHouseDriver",
79
+ "singlestore": "com.singlestore.jdbc.Driver",
80
+ "starrocks": "com.mysql.cj.jdbc.Driver", # StarRocks uses MySQL protocol
81
+ "doris": "com.mysql.cj.jdbc.Driver", # Apache Doris uses MySQL protocol
82
+ "greenplum": "org.postgresql.Driver", # Greenplum uses PostgreSQL protocol
83
+ "monetdb": "org.monetdb.jdbc.MonetDriver",
84
+
85
+ # ============================================================
86
+ # Time-Series & Streaming
87
+ # ============================================================
88
+ "timescaledb": "org.postgresql.Driver", # TimescaleDB uses PostgreSQL
89
+ "questdb": "org.postgresql.Driver", # QuestDB supports PostgreSQL wire protocol
90
+ "materialize": "org.postgresql.Driver", # Materialize uses PostgreSQL wire protocol
91
+ "rockset": "com.rockset.jdbc.RocksetDriver",
92
+
93
+ # ============================================================
94
+ # Graph & Multi-Model
95
+ # ============================================================
96
+ "neo4j": "org.neo4j.Driver",
97
+
98
+ # ============================================================
99
+ # Data Lake Formats (via Spark connectors)
100
+ # ============================================================
101
+ "delta": "org.apache.hive.jdbc.HiveDriver", # Delta Lake via Spark
102
+ "iceberg": "org.apache.hive.jdbc.HiveDriver", # Apache Iceberg via Spark
103
+ "hudi": "org.apache.hive.jdbc.HiveDriver", # Apache Hudi via Spark
104
+
105
+ # ============================================================
106
+ # AlloyDB (Google - PostgreSQL compatible)
107
+ # ============================================================
108
+ "alloydb": "org.postgresql.Driver", # AlloyDB is PostgreSQL-compatible
109
+ }
110
+
111
+
112
+ def _rewrite_localhost_for_docker(jdbc_url: str) -> str:
113
+ """
114
+ Rewrite localhost/127.0.0.1 to host.docker.internal for Docker Spark clusters.
115
+
116
+ DVT v0.51.8: When using Docker-based Spark clusters, workers inside containers
117
+ need host.docker.internal to reach the host machine. With host.docker.internal
118
+ also added to the host's /etc/hosts (pointing to 127.0.0.1), the same JDBC URL
119
+ works for both driver (on host) and workers (in containers).
120
+
121
+ :param jdbc_url: Original JDBC URL
122
+ :returns: JDBC URL with localhost replaced by host.docker.internal
123
+ """
124
+ import re
125
+ # Replace localhost or 127.0.0.1 with host.docker.internal
126
+ url = re.sub(r'//localhost([:/?])', r'//host.docker.internal\1', jdbc_url)
127
+ url = re.sub(r'//127\.0\.0\.1([:/?])', r'//host.docker.internal\1', url)
128
+ return url
129
+
130
+
131
+ # Global flag to enable Docker JDBC URL rewriting
132
+ _docker_mode_enabled = False
133
+
134
+
135
+ def set_docker_mode(enabled: bool) -> None:
136
+ """Enable or disable Docker mode for JDBC URL rewriting."""
137
+ global _docker_mode_enabled
138
+ _docker_mode_enabled = enabled
139
+
140
+
141
+ def build_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
142
+ """
143
+ Build JDBC configuration from dbt adapter credentials.
144
+
145
+ Converts adapter-specific credentials to JDBC URL and connection properties
146
+ that can be used by Spark JDBC connectors.
147
+
148
+ :param credentials: dbt adapter credentials object
149
+ :returns: Tuple of (jdbc_url, jdbc_properties)
150
+ :raises DbtRuntimeError: If adapter type is not supported or credentials are invalid
151
+
152
+ Example:
153
+ >>> from dbt.adapters.postgres import PostgresCredentials
154
+ >>> creds = PostgresCredentials(
155
+ ... host="localhost",
156
+ ... port=5432,
157
+ ... user="analytics",
158
+ ... password="secret",
159
+ ... database="warehouse",
160
+ ... schema="public"
161
+ ... )
162
+ >>> url, props = build_jdbc_config(creds)
163
+ >>> print(url)
164
+ jdbc:postgresql://localhost:5432/warehouse
165
+ >>> print(props)
166
+ {'user': 'analytics', 'password': 'secret', 'driver': 'org.postgresql.Driver'}
167
+ """
168
+ adapter_type = credentials.type.lower()
169
+
170
+ # Check if adapter type is supported
171
+ if adapter_type not in JDBC_DRIVER_MAPPING:
172
+ raise DbtRuntimeError(
173
+ f"JDBC connectivity not supported for adapter type '{adapter_type}'. "
174
+ f"Supported types: {', '.join(JDBC_DRIVER_MAPPING.keys())}"
175
+ )
176
+
177
+ # Build JDBC URL and properties based on adapter type
178
+ # DVT v0.59.0a30: Comprehensive JDBC support for ALL dbt adapters
179
+ if adapter_type in ("postgres", "postgresql"):
180
+ jdbc_url, jdbc_props = _build_postgres_jdbc_config(credentials)
181
+ elif adapter_type == "mysql":
182
+ jdbc_url, jdbc_props = _build_mysql_jdbc_config(credentials)
183
+ elif adapter_type == "snowflake":
184
+ jdbc_url, jdbc_props = _build_snowflake_jdbc_config(credentials)
185
+ elif adapter_type == "redshift":
186
+ jdbc_url, jdbc_props = _build_redshift_jdbc_config(credentials)
187
+ elif adapter_type == "bigquery":
188
+ jdbc_url, jdbc_props = _build_bigquery_jdbc_config(credentials)
189
+ elif adapter_type in ("sqlserver", "mssql", "fabric", "synapse"):
190
+ jdbc_url, jdbc_props = _build_sqlserver_jdbc_config(credentials)
191
+ elif adapter_type == "oracle":
192
+ jdbc_url, jdbc_props = _build_oracle_jdbc_config(credentials)
193
+ elif adapter_type == "databricks":
194
+ jdbc_url, jdbc_props = _build_databricks_jdbc_config(credentials)
195
+ elif adapter_type == "duckdb":
196
+ jdbc_url, jdbc_props = _build_duckdb_jdbc_config(credentials)
197
+ elif adapter_type == "mariadb":
198
+ jdbc_url, jdbc_props = _build_mariadb_jdbc_config(credentials)
199
+ elif adapter_type == "sqlite":
200
+ jdbc_url, jdbc_props = _build_sqlite_jdbc_config(credentials)
201
+ elif adapter_type == "clickhouse":
202
+ jdbc_url, jdbc_props = _build_clickhouse_jdbc_config(credentials)
203
+ elif adapter_type == "trino":
204
+ jdbc_url, jdbc_props = _build_trino_jdbc_config(credentials)
205
+ elif adapter_type == "presto":
206
+ jdbc_url, jdbc_props = _build_presto_jdbc_config(credentials)
207
+ elif adapter_type == "athena":
208
+ jdbc_url, jdbc_props = _build_athena_jdbc_config(credentials)
209
+ elif adapter_type in ("hive", "spark"):
210
+ jdbc_url, jdbc_props = _build_hive_jdbc_config(credentials)
211
+ elif adapter_type == "impala":
212
+ jdbc_url, jdbc_props = _build_impala_jdbc_config(credentials)
213
+ elif adapter_type == "teradata":
214
+ jdbc_url, jdbc_props = _build_teradata_jdbc_config(credentials)
215
+ elif adapter_type == "exasol":
216
+ jdbc_url, jdbc_props = _build_exasol_jdbc_config(credentials)
217
+ elif adapter_type == "vertica":
218
+ jdbc_url, jdbc_props = _build_vertica_jdbc_config(credentials)
219
+ elif adapter_type == "db2":
220
+ jdbc_url, jdbc_props = _build_db2_jdbc_config(credentials)
221
+ elif adapter_type == "singlestore":
222
+ jdbc_url, jdbc_props = _build_singlestore_jdbc_config(credentials)
223
+ elif adapter_type in ("starrocks", "doris"):
224
+ # StarRocks and Doris use MySQL protocol
225
+ jdbc_url, jdbc_props = _build_mysql_jdbc_config(credentials)
226
+ elif adapter_type in ("greenplum", "timescaledb", "questdb", "materialize", "alloydb"):
227
+ # These use PostgreSQL protocol
228
+ jdbc_url, jdbc_props = _build_postgres_jdbc_config(credentials)
229
+ elif adapter_type == "dremio":
230
+ jdbc_url, jdbc_props = _build_dremio_jdbc_config(credentials)
231
+ elif adapter_type == "firebolt":
232
+ jdbc_url, jdbc_props = _build_firebolt_jdbc_config(credentials)
233
+ elif adapter_type == "rockset":
234
+ jdbc_url, jdbc_props = _build_rockset_jdbc_config(credentials)
235
+ elif adapter_type == "monetdb":
236
+ jdbc_url, jdbc_props = _build_monetdb_jdbc_config(credentials)
237
+ elif adapter_type == "cratedb":
238
+ jdbc_url, jdbc_props = _build_cratedb_jdbc_config(credentials)
239
+ else:
240
+ # Fallback: Try generic builder based on credentials structure
241
+ jdbc_url, jdbc_props = _build_generic_jdbc_config(credentials, adapter_type)
242
+
243
+ # DVT v0.51.8: Rewrite localhost URLs for Docker Spark clusters
244
+ if _docker_mode_enabled:
245
+ jdbc_url = _rewrite_localhost_for_docker(jdbc_url)
246
+
247
+ return jdbc_url, jdbc_props
248
+
249
+
250
+ def _build_postgres_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
251
+ """Build JDBC config for PostgreSQL."""
252
+ creds_dict = credentials.to_dict()
253
+
254
+ host = creds_dict.get("host", "localhost")
255
+ port = creds_dict.get("port", 5432)
256
+ database = creds_dict.get("database")
257
+ user = creds_dict.get("user")
258
+ password = creds_dict.get("password", "")
259
+
260
+ if not database:
261
+ raise DbtRuntimeError("PostgreSQL credentials missing required field: database")
262
+ if not user:
263
+ raise DbtRuntimeError("PostgreSQL credentials missing required field: user")
264
+
265
+ jdbc_url = f"jdbc:postgresql://{host}:{port}/{database}"
266
+
267
+ jdbc_properties = {
268
+ "user": user,
269
+ "password": password,
270
+ "driver": JDBC_DRIVER_MAPPING["postgres"],
271
+ }
272
+
273
+ # Optional: Add SSL configuration if present
274
+ if creds_dict.get("sslmode"):
275
+ jdbc_properties["ssl"] = "true" if creds_dict["sslmode"] != "disable" else "false"
276
+
277
+ return jdbc_url, jdbc_properties
278
+
279
+
280
+ def _build_mysql_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
281
+ """Build JDBC config for MySQL."""
282
+ creds_dict = credentials.to_dict()
283
+
284
+ host = creds_dict.get("host", "localhost")
285
+ port = creds_dict.get("port", 3306)
286
+ database = creds_dict.get("database")
287
+ user = creds_dict.get("user")
288
+ password = creds_dict.get("password", "")
289
+
290
+ if not database:
291
+ raise DbtRuntimeError("MySQL credentials missing required field: database")
292
+ if not user:
293
+ raise DbtRuntimeError("MySQL credentials missing required field: user")
294
+
295
+ jdbc_url = f"jdbc:mysql://{host}:{port}/{database}"
296
+
297
+ jdbc_properties = {
298
+ "user": user,
299
+ "password": password,
300
+ "driver": JDBC_DRIVER_MAPPING["mysql"],
301
+ }
302
+
303
+ return jdbc_url, jdbc_properties
304
+
305
+
306
+ def _build_snowflake_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
307
+ """Build JDBC config for Snowflake."""
308
+ creds_dict = credentials.to_dict()
309
+
310
+ account = creds_dict.get("account")
311
+ user = creds_dict.get("user")
312
+ password = creds_dict.get("password", "")
313
+ database = creds_dict.get("database")
314
+ warehouse = creds_dict.get("warehouse")
315
+ schema = creds_dict.get("schema", "public")
316
+
317
+ if not account:
318
+ raise DbtRuntimeError("Snowflake credentials missing required field: account")
319
+ if not user:
320
+ raise DbtRuntimeError("Snowflake credentials missing required field: user")
321
+
322
+ # Snowflake JDBC URL format with Arrow disabled via URL parameter
323
+ # This is more reliable than JDBC properties for Snowflake driver
324
+ jdbc_url = f"jdbc:snowflake://{account}.snowflakecomputing.com/?JDBC_QUERY_RESULT_FORMAT=JSON"
325
+
326
+ jdbc_properties = {
327
+ "user": user,
328
+ "password": password,
329
+ "driver": JDBC_DRIVER_MAPPING["snowflake"],
330
+ # CRITICAL FIX v0.4.4: Disable Arrow format to avoid Java 21 module access errors
331
+ # Property must be uppercase and set in BOTH URL and properties for reliability
332
+ "JDBC_QUERY_RESULT_FORMAT": "JSON",
333
+ "jdbc_query_result_format": "json", # Lowercase variant for compatibility
334
+ # Additional Snowflake-specific optimizations
335
+ "JDBC_USE_SESSION_TIMEZONE": "false", # Use UTC for consistency
336
+ }
337
+
338
+ # Add optional properties
339
+ if database:
340
+ jdbc_properties["db"] = database
341
+ if warehouse:
342
+ jdbc_properties["warehouse"] = warehouse
343
+ if schema:
344
+ jdbc_properties["schema"] = schema
345
+
346
+ return jdbc_url, jdbc_properties
347
+
348
+
349
+ def _build_redshift_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
350
+ """Build JDBC config for Amazon Redshift."""
351
+ creds_dict = credentials.to_dict()
352
+
353
+ host = creds_dict.get("host")
354
+ port = creds_dict.get("port", 5439)
355
+ database = creds_dict.get("database")
356
+ user = creds_dict.get("user")
357
+ password = creds_dict.get("password", "")
358
+
359
+ if not host:
360
+ raise DbtRuntimeError("Redshift credentials missing required field: host")
361
+ if not database:
362
+ raise DbtRuntimeError("Redshift credentials missing required field: database")
363
+ if not user:
364
+ raise DbtRuntimeError("Redshift credentials missing required field: user")
365
+
366
+ jdbc_url = f"jdbc:redshift://{host}:{port}/{database}"
367
+
368
+ jdbc_properties = {
369
+ "user": user,
370
+ "password": password,
371
+ "driver": JDBC_DRIVER_MAPPING["redshift"],
372
+ }
373
+
374
+ return jdbc_url, jdbc_properties
375
+
376
+
377
+ def _build_bigquery_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
378
+ """Build JDBC config for Google BigQuery."""
379
+ creds_dict = credentials.to_dict()
380
+
381
+ project = creds_dict.get("project")
382
+ dataset = creds_dict.get("dataset") or creds_dict.get("schema")
383
+
384
+ if not project:
385
+ raise DbtRuntimeError("BigQuery credentials missing required field: project")
386
+
387
+ # BigQuery JDBC URL format
388
+ jdbc_url = "jdbc:bigquery://https://www.googleapis.com/bigquery/v2:443"
389
+
390
+ jdbc_properties = {
391
+ "ProjectId": project,
392
+ "driver": JDBC_DRIVER_MAPPING["bigquery"],
393
+ }
394
+
395
+ if dataset:
396
+ jdbc_properties["DefaultDataset"] = dataset
397
+
398
+ # Handle authentication
399
+ # BigQuery typically uses service account JSON or OAuth
400
+ if creds_dict.get("keyfile"):
401
+ jdbc_properties["OAuthType"] = "0" # Service account
402
+ jdbc_properties["OAuthServiceAcctEmail"] = creds_dict.get("client_email", "")
403
+ jdbc_properties["OAuthPvtKeyPath"] = creds_dict["keyfile"]
404
+
405
+ return jdbc_url, jdbc_properties
406
+
407
+
408
+ def _build_sqlserver_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
409
+ """Build JDBC config for Microsoft SQL Server."""
410
+ creds_dict = credentials.to_dict()
411
+
412
+ host = creds_dict.get("host", "localhost")
413
+ port = creds_dict.get("port", 1433)
414
+ database = creds_dict.get("database")
415
+ user = creds_dict.get("user")
416
+ password = creds_dict.get("password", "")
417
+
418
+ if not database:
419
+ raise DbtRuntimeError("SQL Server credentials missing required field: database")
420
+ if not user:
421
+ raise DbtRuntimeError("SQL Server credentials missing required field: user")
422
+
423
+ jdbc_url = f"jdbc:sqlserver://{host}:{port};databaseName={database}"
424
+
425
+ jdbc_properties = {
426
+ "user": user,
427
+ "password": password,
428
+ "driver": JDBC_DRIVER_MAPPING["sqlserver"],
429
+ }
430
+
431
+ return jdbc_url, jdbc_properties
432
+
433
+
434
+ def _build_oracle_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
435
+ """Build JDBC config for Oracle Database."""
436
+ creds_dict = credentials.to_dict()
437
+
438
+ host = creds_dict.get("host", "localhost")
439
+ port = creds_dict.get("port", 1521)
440
+ database = creds_dict.get("database") or creds_dict.get("service_name")
441
+ user = creds_dict.get("user")
442
+ password = creds_dict.get("password", "")
443
+
444
+ if not database:
445
+ raise DbtRuntimeError("Oracle credentials missing required field: database/service_name")
446
+ if not user:
447
+ raise DbtRuntimeError("Oracle credentials missing required field: user")
448
+
449
+ # Oracle thin driver format
450
+ jdbc_url = f"jdbc:oracle:thin:@{host}:{port}:{database}"
451
+
452
+ jdbc_properties = {
453
+ "user": user,
454
+ "password": password,
455
+ "driver": JDBC_DRIVER_MAPPING["oracle"],
456
+ }
457
+
458
+ return jdbc_url, jdbc_properties
459
+
460
+
461
+ def _build_databricks_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
462
+ """
463
+ Build JDBC config for Databricks SQL Warehouse or Cluster.
464
+
465
+ DVT v0.51.5: Added support for Databricks JDBC connectivity.
466
+
467
+ Databricks JDBC URL format:
468
+ jdbc:databricks://<host>:443/default;transportMode=http;ssl=1;httpPath=<http_path>;AuthMech=3;
469
+
470
+ The dbt-databricks adapter credentials include:
471
+ - host: Databricks workspace URL (e.g., dbc-xxxxx.cloud.databricks.com)
472
+ - http_path: SQL warehouse or cluster HTTP path
473
+ - token: Personal access token for authentication
474
+ - catalog: Unity Catalog name (optional)
475
+ - schema: Default schema (optional)
476
+ """
477
+ creds_dict = credentials.to_dict()
478
+
479
+ host = creds_dict.get("host")
480
+ http_path = creds_dict.get("http_path")
481
+ token = creds_dict.get("token")
482
+ catalog = creds_dict.get("catalog", "hive_metastore")
483
+ schema = creds_dict.get("schema", "default")
484
+
485
+ if not host:
486
+ raise DbtRuntimeError("Databricks credentials missing required field: host")
487
+ if not http_path:
488
+ raise DbtRuntimeError("Databricks credentials missing required field: http_path")
489
+ if not token:
490
+ raise DbtRuntimeError("Databricks credentials missing required field: token")
491
+
492
+ # Build Databricks JDBC URL
493
+ # Format: jdbc:databricks://<host>:443/<catalog>;transportMode=http;ssl=1;httpPath=<http_path>;AuthMech=3;
494
+ jdbc_url = (
495
+ f"jdbc:databricks://{host}:443/{catalog};"
496
+ f"transportMode=http;ssl=1;httpPath={http_path};AuthMech=3"
497
+ )
498
+
499
+ jdbc_properties = {
500
+ "UID": "token", # Databricks uses "token" as username for PAT auth
501
+ "PWD": token,
502
+ "driver": JDBC_DRIVER_MAPPING["databricks"],
503
+ }
504
+
505
+ return jdbc_url, jdbc_properties
506
+
507
+
508
+ # ============================================================
509
+ # DVT v0.59.0a30: Additional JDBC Config Builders
510
+ # ============================================================
511
+
512
+
513
+ def _build_duckdb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
514
+ """
515
+ Build JDBC config for DuckDB.
516
+
517
+ DuckDB JDBC URL format:
518
+ - In-memory: jdbc:duckdb:
519
+ - File-based: jdbc:duckdb:/path/to/database.duckdb
520
+
521
+ Note: For federation, DuckDB is typically a target (write destination).
522
+ The Spark JDBC write will create/update the file.
523
+ """
524
+ creds_dict = credentials.to_dict()
525
+
526
+ # Get the database path (DuckDB uses 'path' for file location)
527
+ path = creds_dict.get("path") or creds_dict.get("database", ":memory:")
528
+
529
+ # Expand ~ and resolve path
530
+ if path and path != ":memory:":
531
+ import os
532
+ path = os.path.expanduser(path)
533
+ path = os.path.abspath(path)
534
+
535
+ # Build JDBC URL
536
+ if path == ":memory:":
537
+ jdbc_url = "jdbc:duckdb:"
538
+ else:
539
+ jdbc_url = f"jdbc:duckdb:{path}"
540
+
541
+ jdbc_properties = {
542
+ "driver": JDBC_DRIVER_MAPPING["duckdb"],
543
+ }
544
+
545
+ return jdbc_url, jdbc_properties
546
+
547
+
548
+ def _build_mariadb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
549
+ """Build JDBC config for MariaDB."""
550
+ creds_dict = credentials.to_dict()
551
+
552
+ host = creds_dict.get("host", "localhost")
553
+ port = creds_dict.get("port", 3306)
554
+ database = creds_dict.get("database")
555
+ user = creds_dict.get("user")
556
+ password = creds_dict.get("password", "")
557
+
558
+ if not database:
559
+ raise DbtRuntimeError("MariaDB credentials missing required field: database")
560
+ if not user:
561
+ raise DbtRuntimeError("MariaDB credentials missing required field: user")
562
+
563
+ jdbc_url = f"jdbc:mariadb://{host}:{port}/{database}"
564
+
565
+ jdbc_properties = {
566
+ "user": user,
567
+ "password": password,
568
+ "driver": JDBC_DRIVER_MAPPING["mariadb"],
569
+ }
570
+
571
+ return jdbc_url, jdbc_properties
572
+
573
+
574
+ def _build_sqlite_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
575
+ """Build JDBC config for SQLite."""
576
+ creds_dict = credentials.to_dict()
577
+
578
+ path = creds_dict.get("path") or creds_dict.get("database", ":memory:")
579
+
580
+ # Expand ~ and resolve path
581
+ if path and path != ":memory:":
582
+ import os
583
+ path = os.path.expanduser(path)
584
+ path = os.path.abspath(path)
585
+
586
+ jdbc_url = f"jdbc:sqlite:{path}"
587
+
588
+ jdbc_properties = {
589
+ "driver": JDBC_DRIVER_MAPPING["sqlite"],
590
+ }
591
+
592
+ return jdbc_url, jdbc_properties
593
+
594
+
595
+ def _build_clickhouse_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
596
+ """Build JDBC config for ClickHouse."""
597
+ creds_dict = credentials.to_dict()
598
+
599
+ host = creds_dict.get("host", "localhost")
600
+ port = creds_dict.get("port", 8123) # HTTP port, JDBC uses 8443 for secure
601
+ database = creds_dict.get("database", "default")
602
+ user = creds_dict.get("user", "default")
603
+ password = creds_dict.get("password", "")
604
+
605
+ # ClickHouse JDBC URL format
606
+ jdbc_url = f"jdbc:clickhouse://{host}:{port}/{database}"
607
+
608
+ jdbc_properties = {
609
+ "user": user,
610
+ "password": password,
611
+ "driver": JDBC_DRIVER_MAPPING["clickhouse"],
612
+ }
613
+
614
+ return jdbc_url, jdbc_properties
615
+
616
+
617
+ def _build_trino_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
618
+ """Build JDBC config for Trino."""
619
+ creds_dict = credentials.to_dict()
620
+
621
+ host = creds_dict.get("host", "localhost")
622
+ port = creds_dict.get("port", 8080)
623
+ catalog = creds_dict.get("catalog") or creds_dict.get("database", "hive")
624
+ schema = creds_dict.get("schema", "default")
625
+ user = creds_dict.get("user", "trino")
626
+ password = creds_dict.get("password", "")
627
+
628
+ # Trino JDBC URL format
629
+ jdbc_url = f"jdbc:trino://{host}:{port}/{catalog}/{schema}"
630
+
631
+ jdbc_properties = {
632
+ "user": user,
633
+ "driver": JDBC_DRIVER_MAPPING["trino"],
634
+ }
635
+ if password:
636
+ jdbc_properties["password"] = password
637
+
638
+ return jdbc_url, jdbc_properties
639
+
640
+
641
+ def _build_presto_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
642
+ """Build JDBC config for Presto."""
643
+ creds_dict = credentials.to_dict()
644
+
645
+ host = creds_dict.get("host", "localhost")
646
+ port = creds_dict.get("port", 8080)
647
+ catalog = creds_dict.get("catalog") or creds_dict.get("database", "hive")
648
+ schema = creds_dict.get("schema", "default")
649
+ user = creds_dict.get("user", "presto")
650
+ password = creds_dict.get("password", "")
651
+
652
+ # Presto JDBC URL format
653
+ jdbc_url = f"jdbc:presto://{host}:{port}/{catalog}/{schema}"
654
+
655
+ jdbc_properties = {
656
+ "user": user,
657
+ "driver": JDBC_DRIVER_MAPPING["presto"],
658
+ }
659
+ if password:
660
+ jdbc_properties["password"] = password
661
+
662
+ return jdbc_url, jdbc_properties
663
+
664
+
665
+ def _build_athena_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
666
+ """Build JDBC config for AWS Athena."""
667
+ creds_dict = credentials.to_dict()
668
+
669
+ region = creds_dict.get("region", "us-east-1")
670
+ s3_staging_dir = creds_dict.get("s3_staging_dir")
671
+ database = creds_dict.get("database", "default")
672
+
673
+ if not s3_staging_dir:
674
+ raise DbtRuntimeError("Athena credentials missing required field: s3_staging_dir")
675
+
676
+ # Athena JDBC URL format
677
+ jdbc_url = (
678
+ f"jdbc:awsathena://athena.{region}.amazonaws.com:443;"
679
+ f"S3OutputLocation={s3_staging_dir}"
680
+ )
681
+
682
+ jdbc_properties = {
683
+ "Schema": database,
684
+ "driver": JDBC_DRIVER_MAPPING["athena"],
685
+ }
686
+
687
+ # Handle AWS authentication
688
+ if creds_dict.get("aws_access_key_id"):
689
+ jdbc_properties["AwsCredentialsProviderClass"] = "com.simba.athena.amazonaws.auth.AWSStaticCredentialsProvider"
690
+ jdbc_properties["AwsCredentialsProviderArguments"] = (
691
+ f"{creds_dict['aws_access_key_id']},{creds_dict.get('aws_secret_access_key', '')}"
692
+ )
693
+
694
+ return jdbc_url, jdbc_properties
695
+
696
+
697
+ def _build_hive_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
698
+ """Build JDBC config for Apache Hive."""
699
+ creds_dict = credentials.to_dict()
700
+
701
+ host = creds_dict.get("host", "localhost")
702
+ port = creds_dict.get("port", 10000)
703
+ database = creds_dict.get("database", "default")
704
+ user = creds_dict.get("user", "")
705
+ password = creds_dict.get("password", "")
706
+
707
+ # Hive JDBC URL format
708
+ jdbc_url = f"jdbc:hive2://{host}:{port}/{database}"
709
+
710
+ jdbc_properties = {
711
+ "driver": JDBC_DRIVER_MAPPING["hive"],
712
+ }
713
+ if user:
714
+ jdbc_properties["user"] = user
715
+ if password:
716
+ jdbc_properties["password"] = password
717
+
718
+ return jdbc_url, jdbc_properties
719
+
720
+
721
+ def _build_impala_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
722
+ """Build JDBC config for Cloudera Impala."""
723
+ creds_dict = credentials.to_dict()
724
+
725
+ host = creds_dict.get("host", "localhost")
726
+ port = creds_dict.get("port", 21050)
727
+ database = creds_dict.get("database", "default")
728
+ user = creds_dict.get("user", "")
729
+ password = creds_dict.get("password", "")
730
+
731
+ # Impala JDBC URL format
732
+ jdbc_url = f"jdbc:impala://{host}:{port}/{database}"
733
+
734
+ jdbc_properties = {
735
+ "driver": JDBC_DRIVER_MAPPING["impala"],
736
+ }
737
+ if user:
738
+ jdbc_properties["user"] = user
739
+ if password:
740
+ jdbc_properties["password"] = password
741
+
742
+ return jdbc_url, jdbc_properties
743
+
744
+
745
+ def _build_teradata_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
746
+ """Build JDBC config for Teradata."""
747
+ creds_dict = credentials.to_dict()
748
+
749
+ host = creds_dict.get("host", "localhost")
750
+ database = creds_dict.get("database")
751
+ user = creds_dict.get("user")
752
+ password = creds_dict.get("password", "")
753
+
754
+ if not user:
755
+ raise DbtRuntimeError("Teradata credentials missing required field: user")
756
+
757
+ # Teradata JDBC URL format
758
+ jdbc_url = f"jdbc:teradata://{host}"
759
+ if database:
760
+ jdbc_url += f"/DATABASE={database}"
761
+
762
+ jdbc_properties = {
763
+ "user": user,
764
+ "password": password,
765
+ "driver": JDBC_DRIVER_MAPPING["teradata"],
766
+ }
767
+
768
+ return jdbc_url, jdbc_properties
769
+
770
+
771
+ def _build_exasol_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
772
+ """Build JDBC config for Exasol."""
773
+ creds_dict = credentials.to_dict()
774
+
775
+ host = creds_dict.get("host", "localhost")
776
+ port = creds_dict.get("port", 8563)
777
+ user = creds_dict.get("user")
778
+ password = creds_dict.get("password", "")
779
+
780
+ if not user:
781
+ raise DbtRuntimeError("Exasol credentials missing required field: user")
782
+
783
+ # Exasol JDBC URL format
784
+ jdbc_url = f"jdbc:exa:{host}:{port}"
785
+
786
+ jdbc_properties = {
787
+ "user": user,
788
+ "password": password,
789
+ "driver": JDBC_DRIVER_MAPPING["exasol"],
790
+ }
791
+
792
+ return jdbc_url, jdbc_properties
793
+
794
+
795
+ def _build_vertica_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
796
+ """Build JDBC config for Vertica."""
797
+ creds_dict = credentials.to_dict()
798
+
799
+ host = creds_dict.get("host", "localhost")
800
+ port = creds_dict.get("port", 5433)
801
+ database = creds_dict.get("database")
802
+ user = creds_dict.get("user")
803
+ password = creds_dict.get("password", "")
804
+
805
+ if not database:
806
+ raise DbtRuntimeError("Vertica credentials missing required field: database")
807
+ if not user:
808
+ raise DbtRuntimeError("Vertica credentials missing required field: user")
809
+
810
+ # Vertica JDBC URL format
811
+ jdbc_url = f"jdbc:vertica://{host}:{port}/{database}"
812
+
813
+ jdbc_properties = {
814
+ "user": user,
815
+ "password": password,
816
+ "driver": JDBC_DRIVER_MAPPING["vertica"],
817
+ }
818
+
819
+ return jdbc_url, jdbc_properties
820
+
821
+
822
+ def _build_db2_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
823
+ """Build JDBC config for IBM DB2."""
824
+ creds_dict = credentials.to_dict()
825
+
826
+ host = creds_dict.get("host", "localhost")
827
+ port = creds_dict.get("port", 50000)
828
+ database = creds_dict.get("database")
829
+ user = creds_dict.get("user")
830
+ password = creds_dict.get("password", "")
831
+
832
+ if not database:
833
+ raise DbtRuntimeError("DB2 credentials missing required field: database")
834
+ if not user:
835
+ raise DbtRuntimeError("DB2 credentials missing required field: user")
836
+
837
+ # DB2 JDBC URL format
838
+ jdbc_url = f"jdbc:db2://{host}:{port}/{database}"
839
+
840
+ jdbc_properties = {
841
+ "user": user,
842
+ "password": password,
843
+ "driver": JDBC_DRIVER_MAPPING["db2"],
844
+ }
845
+
846
+ return jdbc_url, jdbc_properties
847
+
848
+
849
+ def _build_singlestore_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
850
+ """Build JDBC config for SingleStore (formerly MemSQL)."""
851
+ creds_dict = credentials.to_dict()
852
+
853
+ host = creds_dict.get("host", "localhost")
854
+ port = creds_dict.get("port", 3306) # SingleStore uses MySQL port
855
+ database = creds_dict.get("database")
856
+ user = creds_dict.get("user")
857
+ password = creds_dict.get("password", "")
858
+
859
+ if not database:
860
+ raise DbtRuntimeError("SingleStore credentials missing required field: database")
861
+ if not user:
862
+ raise DbtRuntimeError("SingleStore credentials missing required field: user")
863
+
864
+ # SingleStore JDBC URL format
865
+ jdbc_url = f"jdbc:singlestore://{host}:{port}/{database}"
866
+
867
+ jdbc_properties = {
868
+ "user": user,
869
+ "password": password,
870
+ "driver": JDBC_DRIVER_MAPPING["singlestore"],
871
+ }
872
+
873
+ return jdbc_url, jdbc_properties
874
+
875
+
876
+ def _build_dremio_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
877
+ """Build JDBC config for Dremio."""
878
+ creds_dict = credentials.to_dict()
879
+
880
+ host = creds_dict.get("host", "localhost")
881
+ port = creds_dict.get("port", 31010)
882
+ user = creds_dict.get("user")
883
+ password = creds_dict.get("password", "")
884
+
885
+ if not user:
886
+ raise DbtRuntimeError("Dremio credentials missing required field: user")
887
+
888
+ # Dremio JDBC URL format
889
+ jdbc_url = f"jdbc:dremio:direct={host}:{port}"
890
+
891
+ jdbc_properties = {
892
+ "user": user,
893
+ "password": password,
894
+ "driver": JDBC_DRIVER_MAPPING["dremio"],
895
+ }
896
+
897
+ return jdbc_url, jdbc_properties
898
+
899
+
900
+ def _build_firebolt_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
901
+ """Build JDBC config for Firebolt."""
902
+ creds_dict = credentials.to_dict()
903
+
904
+ database = creds_dict.get("database")
905
+ engine = creds_dict.get("engine")
906
+ user = creds_dict.get("user")
907
+ password = creds_dict.get("password", "")
908
+
909
+ if not database:
910
+ raise DbtRuntimeError("Firebolt credentials missing required field: database")
911
+ if not user:
912
+ raise DbtRuntimeError("Firebolt credentials missing required field: user")
913
+
914
+ # Firebolt JDBC URL format
915
+ jdbc_url = f"jdbc:firebolt:{database}"
916
+ if engine:
917
+ jdbc_url += f"?engine={engine}"
918
+
919
+ jdbc_properties = {
920
+ "user": user,
921
+ "password": password,
922
+ "driver": JDBC_DRIVER_MAPPING["firebolt"],
923
+ }
924
+
925
+ return jdbc_url, jdbc_properties
926
+
927
+
928
+ def _build_rockset_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
929
+ """Build JDBC config for Rockset."""
930
+ creds_dict = credentials.to_dict()
931
+
932
+ api_key = creds_dict.get("api_key") or creds_dict.get("password")
933
+ api_server = creds_dict.get("api_server", "api.usw2a1.rockset.com")
934
+
935
+ if not api_key:
936
+ raise DbtRuntimeError("Rockset credentials missing required field: api_key")
937
+
938
+ # Rockset JDBC URL format
939
+ jdbc_url = f"jdbc:rockset://{api_server}"
940
+
941
+ jdbc_properties = {
942
+ "apiKey": api_key,
943
+ "driver": JDBC_DRIVER_MAPPING["rockset"],
944
+ }
945
+
946
+ return jdbc_url, jdbc_properties
947
+
948
+
949
+ def _build_monetdb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
950
+ """Build JDBC config for MonetDB."""
951
+ creds_dict = credentials.to_dict()
952
+
953
+ host = creds_dict.get("host", "localhost")
954
+ port = creds_dict.get("port", 50000)
955
+ database = creds_dict.get("database")
956
+ user = creds_dict.get("user")
957
+ password = creds_dict.get("password", "")
958
+
959
+ if not database:
960
+ raise DbtRuntimeError("MonetDB credentials missing required field: database")
961
+ if not user:
962
+ raise DbtRuntimeError("MonetDB credentials missing required field: user")
963
+
964
+ # MonetDB JDBC URL format
965
+ jdbc_url = f"jdbc:monetdb://{host}:{port}/{database}"
966
+
967
+ jdbc_properties = {
968
+ "user": user,
969
+ "password": password,
970
+ "driver": JDBC_DRIVER_MAPPING["monetdb"],
971
+ }
972
+
973
+ return jdbc_url, jdbc_properties
974
+
975
+
976
+ def _build_cratedb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
977
+ """Build JDBC config for CrateDB."""
978
+ creds_dict = credentials.to_dict()
979
+
980
+ host = creds_dict.get("host", "localhost")
981
+ port = creds_dict.get("port", 5432) # CrateDB uses PostgreSQL port
982
+ schema = creds_dict.get("schema", "doc")
983
+ user = creds_dict.get("user", "crate")
984
+ password = creds_dict.get("password", "")
985
+
986
+ # CrateDB JDBC URL format
987
+ jdbc_url = f"jdbc:crate://{host}:{port}/?schema={schema}"
988
+
989
+ jdbc_properties = {
990
+ "user": user,
991
+ "password": password,
992
+ "driver": JDBC_DRIVER_MAPPING["cratedb"],
993
+ }
994
+
995
+ return jdbc_url, jdbc_properties
996
+
997
+
998
+ def _build_generic_jdbc_config(credentials: Credentials, adapter_type: str) -> Tuple[str, Dict[str, str]]:
999
+ """
1000
+ Generic JDBC config builder for adapters not explicitly supported.
1001
+
1002
+ This tries to build a reasonable JDBC URL based on common credential patterns.
1003
+ Works for many databases that follow standard connection conventions.
1004
+ """
1005
+ creds_dict = credentials.to_dict()
1006
+
1007
+ host = creds_dict.get("host", "localhost")
1008
+ port = creds_dict.get("port", 5432)
1009
+ database = creds_dict.get("database") or creds_dict.get("schema", "")
1010
+ user = creds_dict.get("user", "")
1011
+ password = creds_dict.get("password", "")
1012
+
1013
+ # Get driver from mapping if available
1014
+ driver = JDBC_DRIVER_MAPPING.get(adapter_type)
1015
+ if not driver:
1016
+ raise DbtRuntimeError(
1017
+ f"No JDBC driver mapping found for adapter type '{adapter_type}'. "
1018
+ f"Please add support for this adapter in jdbc_utils.py"
1019
+ )
1020
+
1021
+ # Build generic JDBC URL
1022
+ if database:
1023
+ jdbc_url = f"jdbc:{adapter_type}://{host}:{port}/{database}"
1024
+ else:
1025
+ jdbc_url = f"jdbc:{adapter_type}://{host}:{port}"
1026
+
1027
+ jdbc_properties = {
1028
+ "driver": driver,
1029
+ }
1030
+ if user:
1031
+ jdbc_properties["user"] = user
1032
+ if password:
1033
+ jdbc_properties["password"] = password
1034
+
1035
+ return jdbc_url, jdbc_properties
1036
+
1037
+
1038
+ def auto_detect_partition_column(adapter: BaseAdapter, schema: str, table: str) -> Optional[str]:
1039
+ """
1040
+ Auto-detect the best column for partitioning parallel JDBC reads.
1041
+
1042
+ Queries table metadata to find a suitable partition column. Prioritizes:
1043
+ 1. Primary key columns (single column PKs only)
1044
+ 2. Columns named 'id' or ending with '_id'
1045
+ 3. Timestamp/date columns
1046
+ 4. Integer columns
1047
+
1048
+ :param adapter: dbt adapter to use for querying metadata
1049
+ :param schema: Schema/dataset name
1050
+ :param table: Table name
1051
+ :returns: Column name suitable for partitioning, or None if not found
1052
+
1053
+ Example:
1054
+ >>> column = auto_detect_partition_column(adapter, "public", "users")
1055
+ >>> if column:
1056
+ ... print(f"Using {column} for partitioning")
1057
+ ... else:
1058
+ ... print("No suitable partition column found")
1059
+ """
1060
+ try:
1061
+ # Strategy 1: Check for primary key
1062
+ pk_column = _get_primary_key_column(adapter, schema, table)
1063
+ if pk_column:
1064
+ return pk_column
1065
+
1066
+ # Strategy 2: Get all columns and look for ID-like columns
1067
+ columns = _get_table_columns(adapter, schema, table)
1068
+
1069
+ # Look for ID columns (exact match or suffix)
1070
+ for col_name, col_type in columns:
1071
+ col_name_lower = col_name.lower()
1072
+ if col_name_lower == "id" or col_name_lower.endswith("_id"):
1073
+ # Check if it's an integer type
1074
+ if _is_integer_type(col_type):
1075
+ return col_name
1076
+
1077
+ # Strategy 3: Look for timestamp/date columns
1078
+ for col_name, col_type in columns:
1079
+ if _is_timestamp_type(col_type):
1080
+ return col_name
1081
+
1082
+ # Strategy 4: Look for any integer column
1083
+ for col_name, col_type in columns:
1084
+ if _is_integer_type(col_type):
1085
+ return col_name
1086
+
1087
+ # No suitable column found
1088
+ return None
1089
+
1090
+ except Exception:
1091
+ # If metadata query fails, return None (caller can decide to read without partitioning)
1092
+ return None
1093
+
1094
+
1095
+ def estimate_partition_bounds(
1096
+ adapter: BaseAdapter, schema: str, table: str, column: str
1097
+ ) -> Tuple[int, int]:
1098
+ """
1099
+ Estimate partition bounds (min/max) for a numeric partition column.
1100
+
1101
+ Queries the table to get MIN and MAX values of the partition column,
1102
+ which are used by Spark JDBC to distribute reads across workers.
1103
+
1104
+ :param adapter: dbt adapter to use for querying
1105
+ :param schema: Schema/dataset name
1106
+ :param table: Table name
1107
+ :param column: Partition column name
1108
+ :returns: Tuple of (lower_bound, upper_bound)
1109
+ :raises DbtRuntimeError: If query fails or column is not numeric
1110
+
1111
+ Example:
1112
+ >>> lower, upper = estimate_partition_bounds(adapter, "public", "orders", "order_id")
1113
+ >>> print(f"Partition range: {lower} to {upper}")
1114
+ Partition range: 1 to 1000000
1115
+ """
1116
+ try:
1117
+ # Build qualified table name
1118
+ qualified_table = f"{schema}.{table}"
1119
+
1120
+ # Query for min/max
1121
+ sql = f"SELECT MIN({column}) as min_val, MAX({column}) as max_val FROM {qualified_table}"
1122
+
1123
+ # Execute via adapter
1124
+ response, result_table = adapter.execute(sql, auto_begin=False, fetch=True)
1125
+
1126
+ if not result_table or len(result_table.rows) == 0:
1127
+ raise DbtRuntimeError(
1128
+ f"Failed to estimate partition bounds for {qualified_table}.{column}: "
1129
+ "Query returned no results"
1130
+ )
1131
+
1132
+ row = result_table.rows[0]
1133
+ min_val = row[0]
1134
+ max_val = row[1]
1135
+
1136
+ if min_val is None or max_val is None:
1137
+ raise DbtRuntimeError(
1138
+ f"Failed to estimate partition bounds for {qualified_table}.{column}: "
1139
+ "Column contains only NULL values"
1140
+ )
1141
+
1142
+ # Convert to integers
1143
+ lower_bound = int(min_val)
1144
+ upper_bound = int(max_val)
1145
+
1146
+ return lower_bound, upper_bound
1147
+
1148
+ except Exception as e:
1149
+ raise DbtRuntimeError(
1150
+ f"Failed to estimate partition bounds for {schema}.{table}.{column}: {str(e)}"
1151
+ ) from e
1152
+
1153
+
1154
+ # Helper functions for metadata queries
1155
+
1156
+
1157
+ def _get_primary_key_column(adapter: BaseAdapter, schema: str, table: str) -> Optional[str]:
1158
+ """
1159
+ Get primary key column name (if single-column PK exists).
1160
+
1161
+ Implementation is adapter-specific. Returns None if not implemented
1162
+ or if PK is composite.
1163
+ """
1164
+ adapter_type = adapter.type().lower()
1165
+
1166
+ try:
1167
+ if adapter_type in ("postgres", "postgresql", "redshift"):
1168
+ # PostgreSQL/Redshift: Query information_schema
1169
+ sql = f"""
1170
+ SELECT a.attname
1171
+ FROM pg_index i
1172
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
1173
+ WHERE i.indrelid = '{schema}.{table}'::regclass
1174
+ AND i.indisprimary
1175
+ """
1176
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
1177
+ if result and len(result.rows) == 1:
1178
+ return result.rows[0][0]
1179
+
1180
+ elif adapter_type == "mysql":
1181
+ # MySQL: Query information_schema
1182
+ sql = f"""
1183
+ SELECT COLUMN_NAME
1184
+ FROM information_schema.KEY_COLUMN_USAGE
1185
+ WHERE TABLE_SCHEMA = '{schema}'
1186
+ AND TABLE_NAME = '{table}'
1187
+ AND CONSTRAINT_NAME = 'PRIMARY'
1188
+ """
1189
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
1190
+ if result and len(result.rows) == 1:
1191
+ return result.rows[0][0]
1192
+
1193
+ # For other adapters or if query fails, return None
1194
+ return None
1195
+
1196
+ except Exception:
1197
+ return None
1198
+
1199
+
1200
+ def _get_table_columns(adapter: BaseAdapter, schema: str, table: str) -> list[Tuple[str, str]]:
1201
+ """
1202
+ Get list of (column_name, column_type) for a table.
1203
+ """
1204
+ adapter_type = adapter.type().lower()
1205
+
1206
+ try:
1207
+ if adapter_type in ("postgres", "postgresql", "redshift"):
1208
+ sql = f"""
1209
+ SELECT column_name, data_type
1210
+ FROM information_schema.columns
1211
+ WHERE table_schema = '{schema}'
1212
+ AND table_name = '{table}'
1213
+ ORDER BY ordinal_position
1214
+ """
1215
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
1216
+ return [(row[0], row[1]) for row in result.rows]
1217
+
1218
+ elif adapter_type == "mysql":
1219
+ sql = f"""
1220
+ SELECT COLUMN_NAME, DATA_TYPE
1221
+ FROM information_schema.COLUMNS
1222
+ WHERE TABLE_SCHEMA = '{schema}'
1223
+ AND TABLE_NAME = '{table}'
1224
+ ORDER BY ORDINAL_POSITION
1225
+ """
1226
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
1227
+ return [(row[0], row[1]) for row in result.rows]
1228
+
1229
+ else:
1230
+ # Fallback: Use LIMIT 0 query to get columns
1231
+ sql = f"SELECT * FROM {schema}.{table} LIMIT 0"
1232
+ response, result = adapter.execute(sql, auto_begin=False, fetch=True)
1233
+ # Return column names with unknown types
1234
+ return [(col, "unknown") for col in result.column_names]
1235
+
1236
+ except Exception:
1237
+ return []
1238
+
1239
+
1240
+ def _is_integer_type(sql_type: str) -> bool:
1241
+ """Check if SQL type is an integer type."""
1242
+ sql_type_upper = sql_type.upper()
1243
+ return any(
1244
+ int_type in sql_type_upper
1245
+ for int_type in ["INT", "INTEGER", "BIGINT", "SMALLINT", "SERIAL"]
1246
+ )
1247
+
1248
+
1249
+ def _is_timestamp_type(sql_type: str) -> bool:
1250
+ """Check if SQL type is a timestamp/date type."""
1251
+ sql_type_upper = sql_type.upper()
1252
+ return any(time_type in sql_type_upper for time_type in ["TIMESTAMP", "DATETIME", "DATE"])