dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1252 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JDBC Utilities for Spark Engine
|
|
3
|
+
|
|
4
|
+
Provides utilities for converting dbt adapter credentials to JDBC configurations
|
|
5
|
+
and helpers for optimizing parallel reads via partitioning.
|
|
6
|
+
|
|
7
|
+
This module enables DVT to bypass memory bottlenecks by using Spark JDBC connectors
|
|
8
|
+
to read data directly from source databases into Spark workers (distributed).
|
|
9
|
+
|
|
10
|
+
Architecture:
|
|
11
|
+
- Maps adapter credentials → JDBC URL + properties
|
|
12
|
+
- Auto-detects optimal partition columns for parallel reads
|
|
13
|
+
- Estimates partition bounds for efficient data distribution
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from typing import Dict, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
from dbt.adapters.base import BaseAdapter
|
|
19
|
+
from dbt.adapters.contracts.connection import Credentials
|
|
20
|
+
from dbt_common.exceptions import DbtRuntimeError
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# JDBC driver class mapping for database types
|
|
24
|
+
# DVT v0.5.9: Complete support for all dbt adapters with JDBC connectivity
|
|
25
|
+
JDBC_DRIVER_MAPPING = {
|
|
26
|
+
# ============================================================
|
|
27
|
+
# Cloud Data Warehouses
|
|
28
|
+
# ============================================================
|
|
29
|
+
"postgres": "org.postgresql.Driver",
|
|
30
|
+
"postgresql": "org.postgresql.Driver",
|
|
31
|
+
"snowflake": "net.snowflake.client.jdbc.SnowflakeDriver",
|
|
32
|
+
"bigquery": "com.simba.googlebigquery.jdbc.Driver",
|
|
33
|
+
"redshift": "com.amazon.redshift.jdbc.Driver",
|
|
34
|
+
"databricks": "com.databricks.client.jdbc.Driver",
|
|
35
|
+
"firebolt": "com.firebolt.FireboltDriver",
|
|
36
|
+
|
|
37
|
+
# ============================================================
|
|
38
|
+
# Microsoft Ecosystem (all use same JDBC driver)
|
|
39
|
+
# ============================================================
|
|
40
|
+
"sqlserver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
|
|
41
|
+
"mssql": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
|
|
42
|
+
"fabric": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
|
|
43
|
+
"synapse": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
|
|
44
|
+
|
|
45
|
+
# ============================================================
|
|
46
|
+
# Enterprise Data Warehouses
|
|
47
|
+
# ============================================================
|
|
48
|
+
"oracle": "oracle.jdbc.OracleDriver",
|
|
49
|
+
"db2": "com.ibm.db2.jcc.DB2Driver",
|
|
50
|
+
"teradata": "com.teradata.jdbc.TeraDriver",
|
|
51
|
+
"exasol": "com.exasol.jdbc.EXADriver",
|
|
52
|
+
"vertica": "com.vertica.jdbc.Driver",
|
|
53
|
+
|
|
54
|
+
# ============================================================
|
|
55
|
+
# SQL Engines & Query Platforms
|
|
56
|
+
# ============================================================
|
|
57
|
+
"spark": "org.apache.hive.jdbc.HiveDriver",
|
|
58
|
+
"trino": "io.trino.jdbc.TrinoDriver",
|
|
59
|
+
"presto": "io.prestosql.jdbc.PrestoDriver",
|
|
60
|
+
"athena": "com.simba.athena.jdbc.Driver",
|
|
61
|
+
"hive": "org.apache.hive.jdbc.HiveDriver",
|
|
62
|
+
"impala": "com.cloudera.impala.jdbc.Driver",
|
|
63
|
+
"dremio": "com.dremio.jdbc.Driver",
|
|
64
|
+
"glue": "com.amazonaws.glue.sql.jdbc.Driver",
|
|
65
|
+
|
|
66
|
+
# ============================================================
|
|
67
|
+
# Open Source Databases
|
|
68
|
+
# ============================================================
|
|
69
|
+
"mysql": "com.mysql.cj.jdbc.Driver",
|
|
70
|
+
"mariadb": "org.mariadb.jdbc.Driver",
|
|
71
|
+
"sqlite": "org.sqlite.JDBC",
|
|
72
|
+
"duckdb": "org.duckdb.DuckDBDriver",
|
|
73
|
+
"cratedb": "io.crate.client.jdbc.CrateDriver",
|
|
74
|
+
|
|
75
|
+
# ============================================================
|
|
76
|
+
# OLAP & Analytics Databases
|
|
77
|
+
# ============================================================
|
|
78
|
+
"clickhouse": "com.clickhouse.jdbc.ClickHouseDriver",
|
|
79
|
+
"singlestore": "com.singlestore.jdbc.Driver",
|
|
80
|
+
"starrocks": "com.mysql.cj.jdbc.Driver", # StarRocks uses MySQL protocol
|
|
81
|
+
"doris": "com.mysql.cj.jdbc.Driver", # Apache Doris uses MySQL protocol
|
|
82
|
+
"greenplum": "org.postgresql.Driver", # Greenplum uses PostgreSQL protocol
|
|
83
|
+
"monetdb": "org.monetdb.jdbc.MonetDriver",
|
|
84
|
+
|
|
85
|
+
# ============================================================
|
|
86
|
+
# Time-Series & Streaming
|
|
87
|
+
# ============================================================
|
|
88
|
+
"timescaledb": "org.postgresql.Driver", # TimescaleDB uses PostgreSQL
|
|
89
|
+
"questdb": "org.postgresql.Driver", # QuestDB supports PostgreSQL wire protocol
|
|
90
|
+
"materialize": "org.postgresql.Driver", # Materialize uses PostgreSQL wire protocol
|
|
91
|
+
"rockset": "com.rockset.jdbc.RocksetDriver",
|
|
92
|
+
|
|
93
|
+
# ============================================================
|
|
94
|
+
# Graph & Multi-Model
|
|
95
|
+
# ============================================================
|
|
96
|
+
"neo4j": "org.neo4j.Driver",
|
|
97
|
+
|
|
98
|
+
# ============================================================
|
|
99
|
+
# Data Lake Formats (via Spark connectors)
|
|
100
|
+
# ============================================================
|
|
101
|
+
"delta": "org.apache.hive.jdbc.HiveDriver", # Delta Lake via Spark
|
|
102
|
+
"iceberg": "org.apache.hive.jdbc.HiveDriver", # Apache Iceberg via Spark
|
|
103
|
+
"hudi": "org.apache.hive.jdbc.HiveDriver", # Apache Hudi via Spark
|
|
104
|
+
|
|
105
|
+
# ============================================================
|
|
106
|
+
# AlloyDB (Google - PostgreSQL compatible)
|
|
107
|
+
# ============================================================
|
|
108
|
+
"alloydb": "org.postgresql.Driver", # AlloyDB is PostgreSQL-compatible
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _rewrite_localhost_for_docker(jdbc_url: str) -> str:
|
|
113
|
+
"""
|
|
114
|
+
Rewrite localhost/127.0.0.1 to host.docker.internal for Docker Spark clusters.
|
|
115
|
+
|
|
116
|
+
DVT v0.51.8: When using Docker-based Spark clusters, workers inside containers
|
|
117
|
+
need host.docker.internal to reach the host machine. With host.docker.internal
|
|
118
|
+
also added to the host's /etc/hosts (pointing to 127.0.0.1), the same JDBC URL
|
|
119
|
+
works for both driver (on host) and workers (in containers).
|
|
120
|
+
|
|
121
|
+
:param jdbc_url: Original JDBC URL
|
|
122
|
+
:returns: JDBC URL with localhost replaced by host.docker.internal
|
|
123
|
+
"""
|
|
124
|
+
import re
|
|
125
|
+
# Replace localhost or 127.0.0.1 with host.docker.internal
|
|
126
|
+
url = re.sub(r'//localhost([:/?])', r'//host.docker.internal\1', jdbc_url)
|
|
127
|
+
url = re.sub(r'//127\.0\.0\.1([:/?])', r'//host.docker.internal\1', url)
|
|
128
|
+
return url
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# Global flag to enable Docker JDBC URL rewriting
|
|
132
|
+
_docker_mode_enabled = False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def set_docker_mode(enabled: bool) -> None:
|
|
136
|
+
"""Enable or disable Docker mode for JDBC URL rewriting."""
|
|
137
|
+
global _docker_mode_enabled
|
|
138
|
+
_docker_mode_enabled = enabled
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def build_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
142
|
+
"""
|
|
143
|
+
Build JDBC configuration from dbt adapter credentials.
|
|
144
|
+
|
|
145
|
+
Converts adapter-specific credentials to JDBC URL and connection properties
|
|
146
|
+
that can be used by Spark JDBC connectors.
|
|
147
|
+
|
|
148
|
+
:param credentials: dbt adapter credentials object
|
|
149
|
+
:returns: Tuple of (jdbc_url, jdbc_properties)
|
|
150
|
+
:raises DbtRuntimeError: If adapter type is not supported or credentials are invalid
|
|
151
|
+
|
|
152
|
+
Example:
|
|
153
|
+
>>> from dbt.adapters.postgres import PostgresCredentials
|
|
154
|
+
>>> creds = PostgresCredentials(
|
|
155
|
+
... host="localhost",
|
|
156
|
+
... port=5432,
|
|
157
|
+
... user="analytics",
|
|
158
|
+
... password="secret",
|
|
159
|
+
... database="warehouse",
|
|
160
|
+
... schema="public"
|
|
161
|
+
... )
|
|
162
|
+
>>> url, props = build_jdbc_config(creds)
|
|
163
|
+
>>> print(url)
|
|
164
|
+
jdbc:postgresql://localhost:5432/warehouse
|
|
165
|
+
>>> print(props)
|
|
166
|
+
{'user': 'analytics', 'password': 'secret', 'driver': 'org.postgresql.Driver'}
|
|
167
|
+
"""
|
|
168
|
+
adapter_type = credentials.type.lower()
|
|
169
|
+
|
|
170
|
+
# Check if adapter type is supported
|
|
171
|
+
if adapter_type not in JDBC_DRIVER_MAPPING:
|
|
172
|
+
raise DbtRuntimeError(
|
|
173
|
+
f"JDBC connectivity not supported for adapter type '{adapter_type}'. "
|
|
174
|
+
f"Supported types: {', '.join(JDBC_DRIVER_MAPPING.keys())}"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Build JDBC URL and properties based on adapter type
|
|
178
|
+
# DVT v0.59.0a30: Comprehensive JDBC support for ALL dbt adapters
|
|
179
|
+
if adapter_type in ("postgres", "postgresql"):
|
|
180
|
+
jdbc_url, jdbc_props = _build_postgres_jdbc_config(credentials)
|
|
181
|
+
elif adapter_type == "mysql":
|
|
182
|
+
jdbc_url, jdbc_props = _build_mysql_jdbc_config(credentials)
|
|
183
|
+
elif adapter_type == "snowflake":
|
|
184
|
+
jdbc_url, jdbc_props = _build_snowflake_jdbc_config(credentials)
|
|
185
|
+
elif adapter_type == "redshift":
|
|
186
|
+
jdbc_url, jdbc_props = _build_redshift_jdbc_config(credentials)
|
|
187
|
+
elif adapter_type == "bigquery":
|
|
188
|
+
jdbc_url, jdbc_props = _build_bigquery_jdbc_config(credentials)
|
|
189
|
+
elif adapter_type in ("sqlserver", "mssql", "fabric", "synapse"):
|
|
190
|
+
jdbc_url, jdbc_props = _build_sqlserver_jdbc_config(credentials)
|
|
191
|
+
elif adapter_type == "oracle":
|
|
192
|
+
jdbc_url, jdbc_props = _build_oracle_jdbc_config(credentials)
|
|
193
|
+
elif adapter_type == "databricks":
|
|
194
|
+
jdbc_url, jdbc_props = _build_databricks_jdbc_config(credentials)
|
|
195
|
+
elif adapter_type == "duckdb":
|
|
196
|
+
jdbc_url, jdbc_props = _build_duckdb_jdbc_config(credentials)
|
|
197
|
+
elif adapter_type == "mariadb":
|
|
198
|
+
jdbc_url, jdbc_props = _build_mariadb_jdbc_config(credentials)
|
|
199
|
+
elif adapter_type == "sqlite":
|
|
200
|
+
jdbc_url, jdbc_props = _build_sqlite_jdbc_config(credentials)
|
|
201
|
+
elif adapter_type == "clickhouse":
|
|
202
|
+
jdbc_url, jdbc_props = _build_clickhouse_jdbc_config(credentials)
|
|
203
|
+
elif adapter_type == "trino":
|
|
204
|
+
jdbc_url, jdbc_props = _build_trino_jdbc_config(credentials)
|
|
205
|
+
elif adapter_type == "presto":
|
|
206
|
+
jdbc_url, jdbc_props = _build_presto_jdbc_config(credentials)
|
|
207
|
+
elif adapter_type == "athena":
|
|
208
|
+
jdbc_url, jdbc_props = _build_athena_jdbc_config(credentials)
|
|
209
|
+
elif adapter_type in ("hive", "spark"):
|
|
210
|
+
jdbc_url, jdbc_props = _build_hive_jdbc_config(credentials)
|
|
211
|
+
elif adapter_type == "impala":
|
|
212
|
+
jdbc_url, jdbc_props = _build_impala_jdbc_config(credentials)
|
|
213
|
+
elif adapter_type == "teradata":
|
|
214
|
+
jdbc_url, jdbc_props = _build_teradata_jdbc_config(credentials)
|
|
215
|
+
elif adapter_type == "exasol":
|
|
216
|
+
jdbc_url, jdbc_props = _build_exasol_jdbc_config(credentials)
|
|
217
|
+
elif adapter_type == "vertica":
|
|
218
|
+
jdbc_url, jdbc_props = _build_vertica_jdbc_config(credentials)
|
|
219
|
+
elif adapter_type == "db2":
|
|
220
|
+
jdbc_url, jdbc_props = _build_db2_jdbc_config(credentials)
|
|
221
|
+
elif adapter_type == "singlestore":
|
|
222
|
+
jdbc_url, jdbc_props = _build_singlestore_jdbc_config(credentials)
|
|
223
|
+
elif adapter_type in ("starrocks", "doris"):
|
|
224
|
+
# StarRocks and Doris use MySQL protocol
|
|
225
|
+
jdbc_url, jdbc_props = _build_mysql_jdbc_config(credentials)
|
|
226
|
+
elif adapter_type in ("greenplum", "timescaledb", "questdb", "materialize", "alloydb"):
|
|
227
|
+
# These use PostgreSQL protocol
|
|
228
|
+
jdbc_url, jdbc_props = _build_postgres_jdbc_config(credentials)
|
|
229
|
+
elif adapter_type == "dremio":
|
|
230
|
+
jdbc_url, jdbc_props = _build_dremio_jdbc_config(credentials)
|
|
231
|
+
elif adapter_type == "firebolt":
|
|
232
|
+
jdbc_url, jdbc_props = _build_firebolt_jdbc_config(credentials)
|
|
233
|
+
elif adapter_type == "rockset":
|
|
234
|
+
jdbc_url, jdbc_props = _build_rockset_jdbc_config(credentials)
|
|
235
|
+
elif adapter_type == "monetdb":
|
|
236
|
+
jdbc_url, jdbc_props = _build_monetdb_jdbc_config(credentials)
|
|
237
|
+
elif adapter_type == "cratedb":
|
|
238
|
+
jdbc_url, jdbc_props = _build_cratedb_jdbc_config(credentials)
|
|
239
|
+
else:
|
|
240
|
+
# Fallback: Try generic builder based on credentials structure
|
|
241
|
+
jdbc_url, jdbc_props = _build_generic_jdbc_config(credentials, adapter_type)
|
|
242
|
+
|
|
243
|
+
# DVT v0.51.8: Rewrite localhost URLs for Docker Spark clusters
|
|
244
|
+
if _docker_mode_enabled:
|
|
245
|
+
jdbc_url = _rewrite_localhost_for_docker(jdbc_url)
|
|
246
|
+
|
|
247
|
+
return jdbc_url, jdbc_props
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _build_postgres_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
251
|
+
"""Build JDBC config for PostgreSQL."""
|
|
252
|
+
creds_dict = credentials.to_dict()
|
|
253
|
+
|
|
254
|
+
host = creds_dict.get("host", "localhost")
|
|
255
|
+
port = creds_dict.get("port", 5432)
|
|
256
|
+
database = creds_dict.get("database")
|
|
257
|
+
user = creds_dict.get("user")
|
|
258
|
+
password = creds_dict.get("password", "")
|
|
259
|
+
|
|
260
|
+
if not database:
|
|
261
|
+
raise DbtRuntimeError("PostgreSQL credentials missing required field: database")
|
|
262
|
+
if not user:
|
|
263
|
+
raise DbtRuntimeError("PostgreSQL credentials missing required field: user")
|
|
264
|
+
|
|
265
|
+
jdbc_url = f"jdbc:postgresql://{host}:{port}/{database}"
|
|
266
|
+
|
|
267
|
+
jdbc_properties = {
|
|
268
|
+
"user": user,
|
|
269
|
+
"password": password,
|
|
270
|
+
"driver": JDBC_DRIVER_MAPPING["postgres"],
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Optional: Add SSL configuration if present
|
|
274
|
+
if creds_dict.get("sslmode"):
|
|
275
|
+
jdbc_properties["ssl"] = "true" if creds_dict["sslmode"] != "disable" else "false"
|
|
276
|
+
|
|
277
|
+
return jdbc_url, jdbc_properties
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _build_mysql_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
281
|
+
"""Build JDBC config for MySQL."""
|
|
282
|
+
creds_dict = credentials.to_dict()
|
|
283
|
+
|
|
284
|
+
host = creds_dict.get("host", "localhost")
|
|
285
|
+
port = creds_dict.get("port", 3306)
|
|
286
|
+
database = creds_dict.get("database")
|
|
287
|
+
user = creds_dict.get("user")
|
|
288
|
+
password = creds_dict.get("password", "")
|
|
289
|
+
|
|
290
|
+
if not database:
|
|
291
|
+
raise DbtRuntimeError("MySQL credentials missing required field: database")
|
|
292
|
+
if not user:
|
|
293
|
+
raise DbtRuntimeError("MySQL credentials missing required field: user")
|
|
294
|
+
|
|
295
|
+
jdbc_url = f"jdbc:mysql://{host}:{port}/{database}"
|
|
296
|
+
|
|
297
|
+
jdbc_properties = {
|
|
298
|
+
"user": user,
|
|
299
|
+
"password": password,
|
|
300
|
+
"driver": JDBC_DRIVER_MAPPING["mysql"],
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return jdbc_url, jdbc_properties
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _build_snowflake_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
307
|
+
"""Build JDBC config for Snowflake."""
|
|
308
|
+
creds_dict = credentials.to_dict()
|
|
309
|
+
|
|
310
|
+
account = creds_dict.get("account")
|
|
311
|
+
user = creds_dict.get("user")
|
|
312
|
+
password = creds_dict.get("password", "")
|
|
313
|
+
database = creds_dict.get("database")
|
|
314
|
+
warehouse = creds_dict.get("warehouse")
|
|
315
|
+
schema = creds_dict.get("schema", "public")
|
|
316
|
+
|
|
317
|
+
if not account:
|
|
318
|
+
raise DbtRuntimeError("Snowflake credentials missing required field: account")
|
|
319
|
+
if not user:
|
|
320
|
+
raise DbtRuntimeError("Snowflake credentials missing required field: user")
|
|
321
|
+
|
|
322
|
+
# Snowflake JDBC URL format with Arrow disabled via URL parameter
|
|
323
|
+
# This is more reliable than JDBC properties for Snowflake driver
|
|
324
|
+
jdbc_url = f"jdbc:snowflake://{account}.snowflakecomputing.com/?JDBC_QUERY_RESULT_FORMAT=JSON"
|
|
325
|
+
|
|
326
|
+
jdbc_properties = {
|
|
327
|
+
"user": user,
|
|
328
|
+
"password": password,
|
|
329
|
+
"driver": JDBC_DRIVER_MAPPING["snowflake"],
|
|
330
|
+
# CRITICAL FIX v0.4.4: Disable Arrow format to avoid Java 21 module access errors
|
|
331
|
+
# Property must be uppercase and set in BOTH URL and properties for reliability
|
|
332
|
+
"JDBC_QUERY_RESULT_FORMAT": "JSON",
|
|
333
|
+
"jdbc_query_result_format": "json", # Lowercase variant for compatibility
|
|
334
|
+
# Additional Snowflake-specific optimizations
|
|
335
|
+
"JDBC_USE_SESSION_TIMEZONE": "false", # Use UTC for consistency
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
# Add optional properties
|
|
339
|
+
if database:
|
|
340
|
+
jdbc_properties["db"] = database
|
|
341
|
+
if warehouse:
|
|
342
|
+
jdbc_properties["warehouse"] = warehouse
|
|
343
|
+
if schema:
|
|
344
|
+
jdbc_properties["schema"] = schema
|
|
345
|
+
|
|
346
|
+
return jdbc_url, jdbc_properties
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _build_redshift_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
350
|
+
"""Build JDBC config for Amazon Redshift."""
|
|
351
|
+
creds_dict = credentials.to_dict()
|
|
352
|
+
|
|
353
|
+
host = creds_dict.get("host")
|
|
354
|
+
port = creds_dict.get("port", 5439)
|
|
355
|
+
database = creds_dict.get("database")
|
|
356
|
+
user = creds_dict.get("user")
|
|
357
|
+
password = creds_dict.get("password", "")
|
|
358
|
+
|
|
359
|
+
if not host:
|
|
360
|
+
raise DbtRuntimeError("Redshift credentials missing required field: host")
|
|
361
|
+
if not database:
|
|
362
|
+
raise DbtRuntimeError("Redshift credentials missing required field: database")
|
|
363
|
+
if not user:
|
|
364
|
+
raise DbtRuntimeError("Redshift credentials missing required field: user")
|
|
365
|
+
|
|
366
|
+
jdbc_url = f"jdbc:redshift://{host}:{port}/{database}"
|
|
367
|
+
|
|
368
|
+
jdbc_properties = {
|
|
369
|
+
"user": user,
|
|
370
|
+
"password": password,
|
|
371
|
+
"driver": JDBC_DRIVER_MAPPING["redshift"],
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
return jdbc_url, jdbc_properties
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _build_bigquery_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
378
|
+
"""Build JDBC config for Google BigQuery."""
|
|
379
|
+
creds_dict = credentials.to_dict()
|
|
380
|
+
|
|
381
|
+
project = creds_dict.get("project")
|
|
382
|
+
dataset = creds_dict.get("dataset") or creds_dict.get("schema")
|
|
383
|
+
|
|
384
|
+
if not project:
|
|
385
|
+
raise DbtRuntimeError("BigQuery credentials missing required field: project")
|
|
386
|
+
|
|
387
|
+
# BigQuery JDBC URL format
|
|
388
|
+
jdbc_url = "jdbc:bigquery://https://www.googleapis.com/bigquery/v2:443"
|
|
389
|
+
|
|
390
|
+
jdbc_properties = {
|
|
391
|
+
"ProjectId": project,
|
|
392
|
+
"driver": JDBC_DRIVER_MAPPING["bigquery"],
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
if dataset:
|
|
396
|
+
jdbc_properties["DefaultDataset"] = dataset
|
|
397
|
+
|
|
398
|
+
# Handle authentication
|
|
399
|
+
# BigQuery typically uses service account JSON or OAuth
|
|
400
|
+
if creds_dict.get("keyfile"):
|
|
401
|
+
jdbc_properties["OAuthType"] = "0" # Service account
|
|
402
|
+
jdbc_properties["OAuthServiceAcctEmail"] = creds_dict.get("client_email", "")
|
|
403
|
+
jdbc_properties["OAuthPvtKeyPath"] = creds_dict["keyfile"]
|
|
404
|
+
|
|
405
|
+
return jdbc_url, jdbc_properties
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _build_sqlserver_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
409
|
+
"""Build JDBC config for Microsoft SQL Server."""
|
|
410
|
+
creds_dict = credentials.to_dict()
|
|
411
|
+
|
|
412
|
+
host = creds_dict.get("host", "localhost")
|
|
413
|
+
port = creds_dict.get("port", 1433)
|
|
414
|
+
database = creds_dict.get("database")
|
|
415
|
+
user = creds_dict.get("user")
|
|
416
|
+
password = creds_dict.get("password", "")
|
|
417
|
+
|
|
418
|
+
if not database:
|
|
419
|
+
raise DbtRuntimeError("SQL Server credentials missing required field: database")
|
|
420
|
+
if not user:
|
|
421
|
+
raise DbtRuntimeError("SQL Server credentials missing required field: user")
|
|
422
|
+
|
|
423
|
+
jdbc_url = f"jdbc:sqlserver://{host}:{port};databaseName={database}"
|
|
424
|
+
|
|
425
|
+
jdbc_properties = {
|
|
426
|
+
"user": user,
|
|
427
|
+
"password": password,
|
|
428
|
+
"driver": JDBC_DRIVER_MAPPING["sqlserver"],
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return jdbc_url, jdbc_properties
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _build_oracle_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
435
|
+
"""Build JDBC config for Oracle Database."""
|
|
436
|
+
creds_dict = credentials.to_dict()
|
|
437
|
+
|
|
438
|
+
host = creds_dict.get("host", "localhost")
|
|
439
|
+
port = creds_dict.get("port", 1521)
|
|
440
|
+
database = creds_dict.get("database") or creds_dict.get("service_name")
|
|
441
|
+
user = creds_dict.get("user")
|
|
442
|
+
password = creds_dict.get("password", "")
|
|
443
|
+
|
|
444
|
+
if not database:
|
|
445
|
+
raise DbtRuntimeError("Oracle credentials missing required field: database/service_name")
|
|
446
|
+
if not user:
|
|
447
|
+
raise DbtRuntimeError("Oracle credentials missing required field: user")
|
|
448
|
+
|
|
449
|
+
# Oracle thin driver format
|
|
450
|
+
jdbc_url = f"jdbc:oracle:thin:@{host}:{port}:{database}"
|
|
451
|
+
|
|
452
|
+
jdbc_properties = {
|
|
453
|
+
"user": user,
|
|
454
|
+
"password": password,
|
|
455
|
+
"driver": JDBC_DRIVER_MAPPING["oracle"],
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return jdbc_url, jdbc_properties
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _build_databricks_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
462
|
+
"""
|
|
463
|
+
Build JDBC config for Databricks SQL Warehouse or Cluster.
|
|
464
|
+
|
|
465
|
+
DVT v0.51.5: Added support for Databricks JDBC connectivity.
|
|
466
|
+
|
|
467
|
+
Databricks JDBC URL format:
|
|
468
|
+
jdbc:databricks://<host>:443/default;transportMode=http;ssl=1;httpPath=<http_path>;AuthMech=3;
|
|
469
|
+
|
|
470
|
+
The dbt-databricks adapter credentials include:
|
|
471
|
+
- host: Databricks workspace URL (e.g., dbc-xxxxx.cloud.databricks.com)
|
|
472
|
+
- http_path: SQL warehouse or cluster HTTP path
|
|
473
|
+
- token: Personal access token for authentication
|
|
474
|
+
- catalog: Unity Catalog name (optional)
|
|
475
|
+
- schema: Default schema (optional)
|
|
476
|
+
"""
|
|
477
|
+
creds_dict = credentials.to_dict()
|
|
478
|
+
|
|
479
|
+
host = creds_dict.get("host")
|
|
480
|
+
http_path = creds_dict.get("http_path")
|
|
481
|
+
token = creds_dict.get("token")
|
|
482
|
+
catalog = creds_dict.get("catalog", "hive_metastore")
|
|
483
|
+
schema = creds_dict.get("schema", "default")
|
|
484
|
+
|
|
485
|
+
if not host:
|
|
486
|
+
raise DbtRuntimeError("Databricks credentials missing required field: host")
|
|
487
|
+
if not http_path:
|
|
488
|
+
raise DbtRuntimeError("Databricks credentials missing required field: http_path")
|
|
489
|
+
if not token:
|
|
490
|
+
raise DbtRuntimeError("Databricks credentials missing required field: token")
|
|
491
|
+
|
|
492
|
+
# Build Databricks JDBC URL
|
|
493
|
+
# Format: jdbc:databricks://<host>:443/<catalog>;transportMode=http;ssl=1;httpPath=<http_path>;AuthMech=3;
|
|
494
|
+
jdbc_url = (
|
|
495
|
+
f"jdbc:databricks://{host}:443/{catalog};"
|
|
496
|
+
f"transportMode=http;ssl=1;httpPath={http_path};AuthMech=3"
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
jdbc_properties = {
|
|
500
|
+
"UID": "token", # Databricks uses "token" as username for PAT auth
|
|
501
|
+
"PWD": token,
|
|
502
|
+
"driver": JDBC_DRIVER_MAPPING["databricks"],
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
return jdbc_url, jdbc_properties
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
# ============================================================
|
|
509
|
+
# DVT v0.59.0a30: Additional JDBC Config Builders
|
|
510
|
+
# ============================================================
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _build_duckdb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
514
|
+
"""
|
|
515
|
+
Build JDBC config for DuckDB.
|
|
516
|
+
|
|
517
|
+
DuckDB JDBC URL format:
|
|
518
|
+
- In-memory: jdbc:duckdb:
|
|
519
|
+
- File-based: jdbc:duckdb:/path/to/database.duckdb
|
|
520
|
+
|
|
521
|
+
Note: For federation, DuckDB is typically a target (write destination).
|
|
522
|
+
The Spark JDBC write will create/update the file.
|
|
523
|
+
"""
|
|
524
|
+
creds_dict = credentials.to_dict()
|
|
525
|
+
|
|
526
|
+
# Get the database path (DuckDB uses 'path' for file location)
|
|
527
|
+
path = creds_dict.get("path") or creds_dict.get("database", ":memory:")
|
|
528
|
+
|
|
529
|
+
# Expand ~ and resolve path
|
|
530
|
+
if path and path != ":memory:":
|
|
531
|
+
import os
|
|
532
|
+
path = os.path.expanduser(path)
|
|
533
|
+
path = os.path.abspath(path)
|
|
534
|
+
|
|
535
|
+
# Build JDBC URL
|
|
536
|
+
if path == ":memory:":
|
|
537
|
+
jdbc_url = "jdbc:duckdb:"
|
|
538
|
+
else:
|
|
539
|
+
jdbc_url = f"jdbc:duckdb:{path}"
|
|
540
|
+
|
|
541
|
+
jdbc_properties = {
|
|
542
|
+
"driver": JDBC_DRIVER_MAPPING["duckdb"],
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
return jdbc_url, jdbc_properties
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _build_mariadb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
549
|
+
"""Build JDBC config for MariaDB."""
|
|
550
|
+
creds_dict = credentials.to_dict()
|
|
551
|
+
|
|
552
|
+
host = creds_dict.get("host", "localhost")
|
|
553
|
+
port = creds_dict.get("port", 3306)
|
|
554
|
+
database = creds_dict.get("database")
|
|
555
|
+
user = creds_dict.get("user")
|
|
556
|
+
password = creds_dict.get("password", "")
|
|
557
|
+
|
|
558
|
+
if not database:
|
|
559
|
+
raise DbtRuntimeError("MariaDB credentials missing required field: database")
|
|
560
|
+
if not user:
|
|
561
|
+
raise DbtRuntimeError("MariaDB credentials missing required field: user")
|
|
562
|
+
|
|
563
|
+
jdbc_url = f"jdbc:mariadb://{host}:{port}/{database}"
|
|
564
|
+
|
|
565
|
+
jdbc_properties = {
|
|
566
|
+
"user": user,
|
|
567
|
+
"password": password,
|
|
568
|
+
"driver": JDBC_DRIVER_MAPPING["mariadb"],
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return jdbc_url, jdbc_properties
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _build_sqlite_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
575
|
+
"""Build JDBC config for SQLite."""
|
|
576
|
+
creds_dict = credentials.to_dict()
|
|
577
|
+
|
|
578
|
+
path = creds_dict.get("path") or creds_dict.get("database", ":memory:")
|
|
579
|
+
|
|
580
|
+
# Expand ~ and resolve path
|
|
581
|
+
if path and path != ":memory:":
|
|
582
|
+
import os
|
|
583
|
+
path = os.path.expanduser(path)
|
|
584
|
+
path = os.path.abspath(path)
|
|
585
|
+
|
|
586
|
+
jdbc_url = f"jdbc:sqlite:{path}"
|
|
587
|
+
|
|
588
|
+
jdbc_properties = {
|
|
589
|
+
"driver": JDBC_DRIVER_MAPPING["sqlite"],
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
return jdbc_url, jdbc_properties
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def _build_clickhouse_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
596
|
+
"""Build JDBC config for ClickHouse."""
|
|
597
|
+
creds_dict = credentials.to_dict()
|
|
598
|
+
|
|
599
|
+
host = creds_dict.get("host", "localhost")
|
|
600
|
+
port = creds_dict.get("port", 8123) # HTTP port, JDBC uses 8443 for secure
|
|
601
|
+
database = creds_dict.get("database", "default")
|
|
602
|
+
user = creds_dict.get("user", "default")
|
|
603
|
+
password = creds_dict.get("password", "")
|
|
604
|
+
|
|
605
|
+
# ClickHouse JDBC URL format
|
|
606
|
+
jdbc_url = f"jdbc:clickhouse://{host}:{port}/{database}"
|
|
607
|
+
|
|
608
|
+
jdbc_properties = {
|
|
609
|
+
"user": user,
|
|
610
|
+
"password": password,
|
|
611
|
+
"driver": JDBC_DRIVER_MAPPING["clickhouse"],
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
return jdbc_url, jdbc_properties
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def _build_trino_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
618
|
+
"""Build JDBC config for Trino."""
|
|
619
|
+
creds_dict = credentials.to_dict()
|
|
620
|
+
|
|
621
|
+
host = creds_dict.get("host", "localhost")
|
|
622
|
+
port = creds_dict.get("port", 8080)
|
|
623
|
+
catalog = creds_dict.get("catalog") or creds_dict.get("database", "hive")
|
|
624
|
+
schema = creds_dict.get("schema", "default")
|
|
625
|
+
user = creds_dict.get("user", "trino")
|
|
626
|
+
password = creds_dict.get("password", "")
|
|
627
|
+
|
|
628
|
+
# Trino JDBC URL format
|
|
629
|
+
jdbc_url = f"jdbc:trino://{host}:{port}/{catalog}/{schema}"
|
|
630
|
+
|
|
631
|
+
jdbc_properties = {
|
|
632
|
+
"user": user,
|
|
633
|
+
"driver": JDBC_DRIVER_MAPPING["trino"],
|
|
634
|
+
}
|
|
635
|
+
if password:
|
|
636
|
+
jdbc_properties["password"] = password
|
|
637
|
+
|
|
638
|
+
return jdbc_url, jdbc_properties
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def _build_presto_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
642
|
+
"""Build JDBC config for Presto."""
|
|
643
|
+
creds_dict = credentials.to_dict()
|
|
644
|
+
|
|
645
|
+
host = creds_dict.get("host", "localhost")
|
|
646
|
+
port = creds_dict.get("port", 8080)
|
|
647
|
+
catalog = creds_dict.get("catalog") or creds_dict.get("database", "hive")
|
|
648
|
+
schema = creds_dict.get("schema", "default")
|
|
649
|
+
user = creds_dict.get("user", "presto")
|
|
650
|
+
password = creds_dict.get("password", "")
|
|
651
|
+
|
|
652
|
+
# Presto JDBC URL format
|
|
653
|
+
jdbc_url = f"jdbc:presto://{host}:{port}/{catalog}/{schema}"
|
|
654
|
+
|
|
655
|
+
jdbc_properties = {
|
|
656
|
+
"user": user,
|
|
657
|
+
"driver": JDBC_DRIVER_MAPPING["presto"],
|
|
658
|
+
}
|
|
659
|
+
if password:
|
|
660
|
+
jdbc_properties["password"] = password
|
|
661
|
+
|
|
662
|
+
return jdbc_url, jdbc_properties
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _build_athena_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
666
|
+
"""Build JDBC config for AWS Athena."""
|
|
667
|
+
creds_dict = credentials.to_dict()
|
|
668
|
+
|
|
669
|
+
region = creds_dict.get("region", "us-east-1")
|
|
670
|
+
s3_staging_dir = creds_dict.get("s3_staging_dir")
|
|
671
|
+
database = creds_dict.get("database", "default")
|
|
672
|
+
|
|
673
|
+
if not s3_staging_dir:
|
|
674
|
+
raise DbtRuntimeError("Athena credentials missing required field: s3_staging_dir")
|
|
675
|
+
|
|
676
|
+
# Athena JDBC URL format
|
|
677
|
+
jdbc_url = (
|
|
678
|
+
f"jdbc:awsathena://athena.{region}.amazonaws.com:443;"
|
|
679
|
+
f"S3OutputLocation={s3_staging_dir}"
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
jdbc_properties = {
|
|
683
|
+
"Schema": database,
|
|
684
|
+
"driver": JDBC_DRIVER_MAPPING["athena"],
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
# Handle AWS authentication
|
|
688
|
+
if creds_dict.get("aws_access_key_id"):
|
|
689
|
+
jdbc_properties["AwsCredentialsProviderClass"] = "com.simba.athena.amazonaws.auth.AWSStaticCredentialsProvider"
|
|
690
|
+
jdbc_properties["AwsCredentialsProviderArguments"] = (
|
|
691
|
+
f"{creds_dict['aws_access_key_id']},{creds_dict.get('aws_secret_access_key', '')}"
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
return jdbc_url, jdbc_properties
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _build_hive_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
698
|
+
"""Build JDBC config for Apache Hive."""
|
|
699
|
+
creds_dict = credentials.to_dict()
|
|
700
|
+
|
|
701
|
+
host = creds_dict.get("host", "localhost")
|
|
702
|
+
port = creds_dict.get("port", 10000)
|
|
703
|
+
database = creds_dict.get("database", "default")
|
|
704
|
+
user = creds_dict.get("user", "")
|
|
705
|
+
password = creds_dict.get("password", "")
|
|
706
|
+
|
|
707
|
+
# Hive JDBC URL format
|
|
708
|
+
jdbc_url = f"jdbc:hive2://{host}:{port}/{database}"
|
|
709
|
+
|
|
710
|
+
jdbc_properties = {
|
|
711
|
+
"driver": JDBC_DRIVER_MAPPING["hive"],
|
|
712
|
+
}
|
|
713
|
+
if user:
|
|
714
|
+
jdbc_properties["user"] = user
|
|
715
|
+
if password:
|
|
716
|
+
jdbc_properties["password"] = password
|
|
717
|
+
|
|
718
|
+
return jdbc_url, jdbc_properties
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
def _build_impala_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
722
|
+
"""Build JDBC config for Cloudera Impala."""
|
|
723
|
+
creds_dict = credentials.to_dict()
|
|
724
|
+
|
|
725
|
+
host = creds_dict.get("host", "localhost")
|
|
726
|
+
port = creds_dict.get("port", 21050)
|
|
727
|
+
database = creds_dict.get("database", "default")
|
|
728
|
+
user = creds_dict.get("user", "")
|
|
729
|
+
password = creds_dict.get("password", "")
|
|
730
|
+
|
|
731
|
+
# Impala JDBC URL format
|
|
732
|
+
jdbc_url = f"jdbc:impala://{host}:{port}/{database}"
|
|
733
|
+
|
|
734
|
+
jdbc_properties = {
|
|
735
|
+
"driver": JDBC_DRIVER_MAPPING["impala"],
|
|
736
|
+
}
|
|
737
|
+
if user:
|
|
738
|
+
jdbc_properties["user"] = user
|
|
739
|
+
if password:
|
|
740
|
+
jdbc_properties["password"] = password
|
|
741
|
+
|
|
742
|
+
return jdbc_url, jdbc_properties
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def _build_teradata_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
746
|
+
"""Build JDBC config for Teradata."""
|
|
747
|
+
creds_dict = credentials.to_dict()
|
|
748
|
+
|
|
749
|
+
host = creds_dict.get("host", "localhost")
|
|
750
|
+
database = creds_dict.get("database")
|
|
751
|
+
user = creds_dict.get("user")
|
|
752
|
+
password = creds_dict.get("password", "")
|
|
753
|
+
|
|
754
|
+
if not user:
|
|
755
|
+
raise DbtRuntimeError("Teradata credentials missing required field: user")
|
|
756
|
+
|
|
757
|
+
# Teradata JDBC URL format
|
|
758
|
+
jdbc_url = f"jdbc:teradata://{host}"
|
|
759
|
+
if database:
|
|
760
|
+
jdbc_url += f"/DATABASE={database}"
|
|
761
|
+
|
|
762
|
+
jdbc_properties = {
|
|
763
|
+
"user": user,
|
|
764
|
+
"password": password,
|
|
765
|
+
"driver": JDBC_DRIVER_MAPPING["teradata"],
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
return jdbc_url, jdbc_properties
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def _build_exasol_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
772
|
+
"""Build JDBC config for Exasol."""
|
|
773
|
+
creds_dict = credentials.to_dict()
|
|
774
|
+
|
|
775
|
+
host = creds_dict.get("host", "localhost")
|
|
776
|
+
port = creds_dict.get("port", 8563)
|
|
777
|
+
user = creds_dict.get("user")
|
|
778
|
+
password = creds_dict.get("password", "")
|
|
779
|
+
|
|
780
|
+
if not user:
|
|
781
|
+
raise DbtRuntimeError("Exasol credentials missing required field: user")
|
|
782
|
+
|
|
783
|
+
# Exasol JDBC URL format
|
|
784
|
+
jdbc_url = f"jdbc:exa:{host}:{port}"
|
|
785
|
+
|
|
786
|
+
jdbc_properties = {
|
|
787
|
+
"user": user,
|
|
788
|
+
"password": password,
|
|
789
|
+
"driver": JDBC_DRIVER_MAPPING["exasol"],
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
return jdbc_url, jdbc_properties
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def _build_vertica_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
796
|
+
"""Build JDBC config for Vertica."""
|
|
797
|
+
creds_dict = credentials.to_dict()
|
|
798
|
+
|
|
799
|
+
host = creds_dict.get("host", "localhost")
|
|
800
|
+
port = creds_dict.get("port", 5433)
|
|
801
|
+
database = creds_dict.get("database")
|
|
802
|
+
user = creds_dict.get("user")
|
|
803
|
+
password = creds_dict.get("password", "")
|
|
804
|
+
|
|
805
|
+
if not database:
|
|
806
|
+
raise DbtRuntimeError("Vertica credentials missing required field: database")
|
|
807
|
+
if not user:
|
|
808
|
+
raise DbtRuntimeError("Vertica credentials missing required field: user")
|
|
809
|
+
|
|
810
|
+
# Vertica JDBC URL format
|
|
811
|
+
jdbc_url = f"jdbc:vertica://{host}:{port}/{database}"
|
|
812
|
+
|
|
813
|
+
jdbc_properties = {
|
|
814
|
+
"user": user,
|
|
815
|
+
"password": password,
|
|
816
|
+
"driver": JDBC_DRIVER_MAPPING["vertica"],
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
return jdbc_url, jdbc_properties
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def _build_db2_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
823
|
+
"""Build JDBC config for IBM DB2."""
|
|
824
|
+
creds_dict = credentials.to_dict()
|
|
825
|
+
|
|
826
|
+
host = creds_dict.get("host", "localhost")
|
|
827
|
+
port = creds_dict.get("port", 50000)
|
|
828
|
+
database = creds_dict.get("database")
|
|
829
|
+
user = creds_dict.get("user")
|
|
830
|
+
password = creds_dict.get("password", "")
|
|
831
|
+
|
|
832
|
+
if not database:
|
|
833
|
+
raise DbtRuntimeError("DB2 credentials missing required field: database")
|
|
834
|
+
if not user:
|
|
835
|
+
raise DbtRuntimeError("DB2 credentials missing required field: user")
|
|
836
|
+
|
|
837
|
+
# DB2 JDBC URL format
|
|
838
|
+
jdbc_url = f"jdbc:db2://{host}:{port}/{database}"
|
|
839
|
+
|
|
840
|
+
jdbc_properties = {
|
|
841
|
+
"user": user,
|
|
842
|
+
"password": password,
|
|
843
|
+
"driver": JDBC_DRIVER_MAPPING["db2"],
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
return jdbc_url, jdbc_properties
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def _build_singlestore_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
850
|
+
"""Build JDBC config for SingleStore (formerly MemSQL)."""
|
|
851
|
+
creds_dict = credentials.to_dict()
|
|
852
|
+
|
|
853
|
+
host = creds_dict.get("host", "localhost")
|
|
854
|
+
port = creds_dict.get("port", 3306) # SingleStore uses MySQL port
|
|
855
|
+
database = creds_dict.get("database")
|
|
856
|
+
user = creds_dict.get("user")
|
|
857
|
+
password = creds_dict.get("password", "")
|
|
858
|
+
|
|
859
|
+
if not database:
|
|
860
|
+
raise DbtRuntimeError("SingleStore credentials missing required field: database")
|
|
861
|
+
if not user:
|
|
862
|
+
raise DbtRuntimeError("SingleStore credentials missing required field: user")
|
|
863
|
+
|
|
864
|
+
# SingleStore JDBC URL format
|
|
865
|
+
jdbc_url = f"jdbc:singlestore://{host}:{port}/{database}"
|
|
866
|
+
|
|
867
|
+
jdbc_properties = {
|
|
868
|
+
"user": user,
|
|
869
|
+
"password": password,
|
|
870
|
+
"driver": JDBC_DRIVER_MAPPING["singlestore"],
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
return jdbc_url, jdbc_properties
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def _build_dremio_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
877
|
+
"""Build JDBC config for Dremio."""
|
|
878
|
+
creds_dict = credentials.to_dict()
|
|
879
|
+
|
|
880
|
+
host = creds_dict.get("host", "localhost")
|
|
881
|
+
port = creds_dict.get("port", 31010)
|
|
882
|
+
user = creds_dict.get("user")
|
|
883
|
+
password = creds_dict.get("password", "")
|
|
884
|
+
|
|
885
|
+
if not user:
|
|
886
|
+
raise DbtRuntimeError("Dremio credentials missing required field: user")
|
|
887
|
+
|
|
888
|
+
# Dremio JDBC URL format
|
|
889
|
+
jdbc_url = f"jdbc:dremio:direct={host}:{port}"
|
|
890
|
+
|
|
891
|
+
jdbc_properties = {
|
|
892
|
+
"user": user,
|
|
893
|
+
"password": password,
|
|
894
|
+
"driver": JDBC_DRIVER_MAPPING["dremio"],
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
return jdbc_url, jdbc_properties
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _build_firebolt_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
901
|
+
"""Build JDBC config for Firebolt."""
|
|
902
|
+
creds_dict = credentials.to_dict()
|
|
903
|
+
|
|
904
|
+
database = creds_dict.get("database")
|
|
905
|
+
engine = creds_dict.get("engine")
|
|
906
|
+
user = creds_dict.get("user")
|
|
907
|
+
password = creds_dict.get("password", "")
|
|
908
|
+
|
|
909
|
+
if not database:
|
|
910
|
+
raise DbtRuntimeError("Firebolt credentials missing required field: database")
|
|
911
|
+
if not user:
|
|
912
|
+
raise DbtRuntimeError("Firebolt credentials missing required field: user")
|
|
913
|
+
|
|
914
|
+
# Firebolt JDBC URL format
|
|
915
|
+
jdbc_url = f"jdbc:firebolt:{database}"
|
|
916
|
+
if engine:
|
|
917
|
+
jdbc_url += f"?engine={engine}"
|
|
918
|
+
|
|
919
|
+
jdbc_properties = {
|
|
920
|
+
"user": user,
|
|
921
|
+
"password": password,
|
|
922
|
+
"driver": JDBC_DRIVER_MAPPING["firebolt"],
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
return jdbc_url, jdbc_properties
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def _build_rockset_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
929
|
+
"""Build JDBC config for Rockset."""
|
|
930
|
+
creds_dict = credentials.to_dict()
|
|
931
|
+
|
|
932
|
+
api_key = creds_dict.get("api_key") or creds_dict.get("password")
|
|
933
|
+
api_server = creds_dict.get("api_server", "api.usw2a1.rockset.com")
|
|
934
|
+
|
|
935
|
+
if not api_key:
|
|
936
|
+
raise DbtRuntimeError("Rockset credentials missing required field: api_key")
|
|
937
|
+
|
|
938
|
+
# Rockset JDBC URL format
|
|
939
|
+
jdbc_url = f"jdbc:rockset://{api_server}"
|
|
940
|
+
|
|
941
|
+
jdbc_properties = {
|
|
942
|
+
"apiKey": api_key,
|
|
943
|
+
"driver": JDBC_DRIVER_MAPPING["rockset"],
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
return jdbc_url, jdbc_properties
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
def _build_monetdb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
950
|
+
"""Build JDBC config for MonetDB."""
|
|
951
|
+
creds_dict = credentials.to_dict()
|
|
952
|
+
|
|
953
|
+
host = creds_dict.get("host", "localhost")
|
|
954
|
+
port = creds_dict.get("port", 50000)
|
|
955
|
+
database = creds_dict.get("database")
|
|
956
|
+
user = creds_dict.get("user")
|
|
957
|
+
password = creds_dict.get("password", "")
|
|
958
|
+
|
|
959
|
+
if not database:
|
|
960
|
+
raise DbtRuntimeError("MonetDB credentials missing required field: database")
|
|
961
|
+
if not user:
|
|
962
|
+
raise DbtRuntimeError("MonetDB credentials missing required field: user")
|
|
963
|
+
|
|
964
|
+
# MonetDB JDBC URL format
|
|
965
|
+
jdbc_url = f"jdbc:monetdb://{host}:{port}/{database}"
|
|
966
|
+
|
|
967
|
+
jdbc_properties = {
|
|
968
|
+
"user": user,
|
|
969
|
+
"password": password,
|
|
970
|
+
"driver": JDBC_DRIVER_MAPPING["monetdb"],
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
return jdbc_url, jdbc_properties
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
def _build_cratedb_jdbc_config(credentials: Credentials) -> Tuple[str, Dict[str, str]]:
|
|
977
|
+
"""Build JDBC config for CrateDB."""
|
|
978
|
+
creds_dict = credentials.to_dict()
|
|
979
|
+
|
|
980
|
+
host = creds_dict.get("host", "localhost")
|
|
981
|
+
port = creds_dict.get("port", 5432) # CrateDB uses PostgreSQL port
|
|
982
|
+
schema = creds_dict.get("schema", "doc")
|
|
983
|
+
user = creds_dict.get("user", "crate")
|
|
984
|
+
password = creds_dict.get("password", "")
|
|
985
|
+
|
|
986
|
+
# CrateDB JDBC URL format
|
|
987
|
+
jdbc_url = f"jdbc:crate://{host}:{port}/?schema={schema}"
|
|
988
|
+
|
|
989
|
+
jdbc_properties = {
|
|
990
|
+
"user": user,
|
|
991
|
+
"password": password,
|
|
992
|
+
"driver": JDBC_DRIVER_MAPPING["cratedb"],
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
return jdbc_url, jdbc_properties
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
def _build_generic_jdbc_config(credentials: Credentials, adapter_type: str) -> Tuple[str, Dict[str, str]]:
|
|
999
|
+
"""
|
|
1000
|
+
Generic JDBC config builder for adapters not explicitly supported.
|
|
1001
|
+
|
|
1002
|
+
This tries to build a reasonable JDBC URL based on common credential patterns.
|
|
1003
|
+
Works for many databases that follow standard connection conventions.
|
|
1004
|
+
"""
|
|
1005
|
+
creds_dict = credentials.to_dict()
|
|
1006
|
+
|
|
1007
|
+
host = creds_dict.get("host", "localhost")
|
|
1008
|
+
port = creds_dict.get("port", 5432)
|
|
1009
|
+
database = creds_dict.get("database") or creds_dict.get("schema", "")
|
|
1010
|
+
user = creds_dict.get("user", "")
|
|
1011
|
+
password = creds_dict.get("password", "")
|
|
1012
|
+
|
|
1013
|
+
# Get driver from mapping if available
|
|
1014
|
+
driver = JDBC_DRIVER_MAPPING.get(adapter_type)
|
|
1015
|
+
if not driver:
|
|
1016
|
+
raise DbtRuntimeError(
|
|
1017
|
+
f"No JDBC driver mapping found for adapter type '{adapter_type}'. "
|
|
1018
|
+
f"Please add support for this adapter in jdbc_utils.py"
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
# Build generic JDBC URL
|
|
1022
|
+
if database:
|
|
1023
|
+
jdbc_url = f"jdbc:{adapter_type}://{host}:{port}/{database}"
|
|
1024
|
+
else:
|
|
1025
|
+
jdbc_url = f"jdbc:{adapter_type}://{host}:{port}"
|
|
1026
|
+
|
|
1027
|
+
jdbc_properties = {
|
|
1028
|
+
"driver": driver,
|
|
1029
|
+
}
|
|
1030
|
+
if user:
|
|
1031
|
+
jdbc_properties["user"] = user
|
|
1032
|
+
if password:
|
|
1033
|
+
jdbc_properties["password"] = password
|
|
1034
|
+
|
|
1035
|
+
return jdbc_url, jdbc_properties
|
|
1036
|
+
|
|
1037
|
+
|
|
1038
|
+
def auto_detect_partition_column(adapter: BaseAdapter, schema: str, table: str) -> Optional[str]:
|
|
1039
|
+
"""
|
|
1040
|
+
Auto-detect the best column for partitioning parallel JDBC reads.
|
|
1041
|
+
|
|
1042
|
+
Queries table metadata to find a suitable partition column. Prioritizes:
|
|
1043
|
+
1. Primary key columns (single column PKs only)
|
|
1044
|
+
2. Columns named 'id' or ending with '_id'
|
|
1045
|
+
3. Timestamp/date columns
|
|
1046
|
+
4. Integer columns
|
|
1047
|
+
|
|
1048
|
+
:param adapter: dbt adapter to use for querying metadata
|
|
1049
|
+
:param schema: Schema/dataset name
|
|
1050
|
+
:param table: Table name
|
|
1051
|
+
:returns: Column name suitable for partitioning, or None if not found
|
|
1052
|
+
|
|
1053
|
+
Example:
|
|
1054
|
+
>>> column = auto_detect_partition_column(adapter, "public", "users")
|
|
1055
|
+
>>> if column:
|
|
1056
|
+
... print(f"Using {column} for partitioning")
|
|
1057
|
+
... else:
|
|
1058
|
+
... print("No suitable partition column found")
|
|
1059
|
+
"""
|
|
1060
|
+
try:
|
|
1061
|
+
# Strategy 1: Check for primary key
|
|
1062
|
+
pk_column = _get_primary_key_column(adapter, schema, table)
|
|
1063
|
+
if pk_column:
|
|
1064
|
+
return pk_column
|
|
1065
|
+
|
|
1066
|
+
# Strategy 2: Get all columns and look for ID-like columns
|
|
1067
|
+
columns = _get_table_columns(adapter, schema, table)
|
|
1068
|
+
|
|
1069
|
+
# Look for ID columns (exact match or suffix)
|
|
1070
|
+
for col_name, col_type in columns:
|
|
1071
|
+
col_name_lower = col_name.lower()
|
|
1072
|
+
if col_name_lower == "id" or col_name_lower.endswith("_id"):
|
|
1073
|
+
# Check if it's an integer type
|
|
1074
|
+
if _is_integer_type(col_type):
|
|
1075
|
+
return col_name
|
|
1076
|
+
|
|
1077
|
+
# Strategy 3: Look for timestamp/date columns
|
|
1078
|
+
for col_name, col_type in columns:
|
|
1079
|
+
if _is_timestamp_type(col_type):
|
|
1080
|
+
return col_name
|
|
1081
|
+
|
|
1082
|
+
# Strategy 4: Look for any integer column
|
|
1083
|
+
for col_name, col_type in columns:
|
|
1084
|
+
if _is_integer_type(col_type):
|
|
1085
|
+
return col_name
|
|
1086
|
+
|
|
1087
|
+
# No suitable column found
|
|
1088
|
+
return None
|
|
1089
|
+
|
|
1090
|
+
except Exception:
|
|
1091
|
+
# If metadata query fails, return None (caller can decide to read without partitioning)
|
|
1092
|
+
return None
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def estimate_partition_bounds(
|
|
1096
|
+
adapter: BaseAdapter, schema: str, table: str, column: str
|
|
1097
|
+
) -> Tuple[int, int]:
|
|
1098
|
+
"""
|
|
1099
|
+
Estimate partition bounds (min/max) for a numeric partition column.
|
|
1100
|
+
|
|
1101
|
+
Queries the table to get MIN and MAX values of the partition column,
|
|
1102
|
+
which are used by Spark JDBC to distribute reads across workers.
|
|
1103
|
+
|
|
1104
|
+
:param adapter: dbt adapter to use for querying
|
|
1105
|
+
:param schema: Schema/dataset name
|
|
1106
|
+
:param table: Table name
|
|
1107
|
+
:param column: Partition column name
|
|
1108
|
+
:returns: Tuple of (lower_bound, upper_bound)
|
|
1109
|
+
:raises DbtRuntimeError: If query fails or column is not numeric
|
|
1110
|
+
|
|
1111
|
+
Example:
|
|
1112
|
+
>>> lower, upper = estimate_partition_bounds(adapter, "public", "orders", "order_id")
|
|
1113
|
+
>>> print(f"Partition range: {lower} to {upper}")
|
|
1114
|
+
Partition range: 1 to 1000000
|
|
1115
|
+
"""
|
|
1116
|
+
try:
|
|
1117
|
+
# Build qualified table name
|
|
1118
|
+
qualified_table = f"{schema}.{table}"
|
|
1119
|
+
|
|
1120
|
+
# Query for min/max
|
|
1121
|
+
sql = f"SELECT MIN({column}) as min_val, MAX({column}) as max_val FROM {qualified_table}"
|
|
1122
|
+
|
|
1123
|
+
# Execute via adapter
|
|
1124
|
+
response, result_table = adapter.execute(sql, auto_begin=False, fetch=True)
|
|
1125
|
+
|
|
1126
|
+
if not result_table or len(result_table.rows) == 0:
|
|
1127
|
+
raise DbtRuntimeError(
|
|
1128
|
+
f"Failed to estimate partition bounds for {qualified_table}.{column}: "
|
|
1129
|
+
"Query returned no results"
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
row = result_table.rows[0]
|
|
1133
|
+
min_val = row[0]
|
|
1134
|
+
max_val = row[1]
|
|
1135
|
+
|
|
1136
|
+
if min_val is None or max_val is None:
|
|
1137
|
+
raise DbtRuntimeError(
|
|
1138
|
+
f"Failed to estimate partition bounds for {qualified_table}.{column}: "
|
|
1139
|
+
"Column contains only NULL values"
|
|
1140
|
+
)
|
|
1141
|
+
|
|
1142
|
+
# Convert to integers
|
|
1143
|
+
lower_bound = int(min_val)
|
|
1144
|
+
upper_bound = int(max_val)
|
|
1145
|
+
|
|
1146
|
+
return lower_bound, upper_bound
|
|
1147
|
+
|
|
1148
|
+
except Exception as e:
|
|
1149
|
+
raise DbtRuntimeError(
|
|
1150
|
+
f"Failed to estimate partition bounds for {schema}.{table}.{column}: {str(e)}"
|
|
1151
|
+
) from e
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
# Helper functions for metadata queries
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def _get_primary_key_column(adapter: BaseAdapter, schema: str, table: str) -> Optional[str]:
|
|
1158
|
+
"""
|
|
1159
|
+
Get primary key column name (if single-column PK exists).
|
|
1160
|
+
|
|
1161
|
+
Implementation is adapter-specific. Returns None if not implemented
|
|
1162
|
+
or if PK is composite.
|
|
1163
|
+
"""
|
|
1164
|
+
adapter_type = adapter.type().lower()
|
|
1165
|
+
|
|
1166
|
+
try:
|
|
1167
|
+
if adapter_type in ("postgres", "postgresql", "redshift"):
|
|
1168
|
+
# PostgreSQL/Redshift: Query information_schema
|
|
1169
|
+
sql = f"""
|
|
1170
|
+
SELECT a.attname
|
|
1171
|
+
FROM pg_index i
|
|
1172
|
+
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
|
1173
|
+
WHERE i.indrelid = '{schema}.{table}'::regclass
|
|
1174
|
+
AND i.indisprimary
|
|
1175
|
+
"""
|
|
1176
|
+
response, result = adapter.execute(sql, auto_begin=False, fetch=True)
|
|
1177
|
+
if result and len(result.rows) == 1:
|
|
1178
|
+
return result.rows[0][0]
|
|
1179
|
+
|
|
1180
|
+
elif adapter_type == "mysql":
|
|
1181
|
+
# MySQL: Query information_schema
|
|
1182
|
+
sql = f"""
|
|
1183
|
+
SELECT COLUMN_NAME
|
|
1184
|
+
FROM information_schema.KEY_COLUMN_USAGE
|
|
1185
|
+
WHERE TABLE_SCHEMA = '{schema}'
|
|
1186
|
+
AND TABLE_NAME = '{table}'
|
|
1187
|
+
AND CONSTRAINT_NAME = 'PRIMARY'
|
|
1188
|
+
"""
|
|
1189
|
+
response, result = adapter.execute(sql, auto_begin=False, fetch=True)
|
|
1190
|
+
if result and len(result.rows) == 1:
|
|
1191
|
+
return result.rows[0][0]
|
|
1192
|
+
|
|
1193
|
+
# For other adapters or if query fails, return None
|
|
1194
|
+
return None
|
|
1195
|
+
|
|
1196
|
+
except Exception:
|
|
1197
|
+
return None
|
|
1198
|
+
|
|
1199
|
+
|
|
1200
|
+
def _get_table_columns(adapter: BaseAdapter, schema: str, table: str) -> list[Tuple[str, str]]:
|
|
1201
|
+
"""
|
|
1202
|
+
Get list of (column_name, column_type) for a table.
|
|
1203
|
+
"""
|
|
1204
|
+
adapter_type = adapter.type().lower()
|
|
1205
|
+
|
|
1206
|
+
try:
|
|
1207
|
+
if adapter_type in ("postgres", "postgresql", "redshift"):
|
|
1208
|
+
sql = f"""
|
|
1209
|
+
SELECT column_name, data_type
|
|
1210
|
+
FROM information_schema.columns
|
|
1211
|
+
WHERE table_schema = '{schema}'
|
|
1212
|
+
AND table_name = '{table}'
|
|
1213
|
+
ORDER BY ordinal_position
|
|
1214
|
+
"""
|
|
1215
|
+
response, result = adapter.execute(sql, auto_begin=False, fetch=True)
|
|
1216
|
+
return [(row[0], row[1]) for row in result.rows]
|
|
1217
|
+
|
|
1218
|
+
elif adapter_type == "mysql":
|
|
1219
|
+
sql = f"""
|
|
1220
|
+
SELECT COLUMN_NAME, DATA_TYPE
|
|
1221
|
+
FROM information_schema.COLUMNS
|
|
1222
|
+
WHERE TABLE_SCHEMA = '{schema}'
|
|
1223
|
+
AND TABLE_NAME = '{table}'
|
|
1224
|
+
ORDER BY ORDINAL_POSITION
|
|
1225
|
+
"""
|
|
1226
|
+
response, result = adapter.execute(sql, auto_begin=False, fetch=True)
|
|
1227
|
+
return [(row[0], row[1]) for row in result.rows]
|
|
1228
|
+
|
|
1229
|
+
else:
|
|
1230
|
+
# Fallback: Use LIMIT 0 query to get columns
|
|
1231
|
+
sql = f"SELECT * FROM {schema}.{table} LIMIT 0"
|
|
1232
|
+
response, result = adapter.execute(sql, auto_begin=False, fetch=True)
|
|
1233
|
+
# Return column names with unknown types
|
|
1234
|
+
return [(col, "unknown") for col in result.column_names]
|
|
1235
|
+
|
|
1236
|
+
except Exception:
|
|
1237
|
+
return []
|
|
1238
|
+
|
|
1239
|
+
|
|
1240
|
+
def _is_integer_type(sql_type: str) -> bool:
|
|
1241
|
+
"""Check if SQL type is an integer type."""
|
|
1242
|
+
sql_type_upper = sql_type.upper()
|
|
1243
|
+
return any(
|
|
1244
|
+
int_type in sql_type_upper
|
|
1245
|
+
for int_type in ["INT", "INTEGER", "BIGINT", "SMALLINT", "SERIAL"]
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
|
|
1249
|
+
def _is_timestamp_type(sql_type: str) -> bool:
|
|
1250
|
+
"""Check if SQL type is a timestamp/date type."""
|
|
1251
|
+
sql_type_upper = sql_type.upper()
|
|
1252
|
+
return any(time_type in sql_type_upper for time_type in ["TIMESTAMP", "DATETIME", "DATE"])
|