dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,759 @@
1
+ """
2
+ Target Sync Task
3
+
4
+ Handles DVT target synchronization:
5
+ - Scans profiles.yml connections to detect required adapter types
6
+ - Reports connections found and their types
7
+ - Shows adapter install instructions (manual pip/uv install)
8
+ - Resolves JDBC JARs with transitive dependencies via Maven POM
9
+ - Downloads all JARs to project .dvt/jdbc_jars/ directory
10
+ - Configures spark.jars to use pre-downloaded JARs (no Spark download at runtime)
11
+ - Removes unused adapters and JARs (with --clean flag)
12
+
13
+ v0.5.91: Smart sync based on profiles.yml connections
14
+ v0.5.93: Actual JDBC JAR download to project directory
15
+ v0.5.94: Show install instructions instead of auto-installing adapters
16
+ v0.5.95: Hybrid JAR resolution - DVT downloads with transitive deps, spark.jars config
17
+ """
18
+
19
+ import os
20
+ import subprocess
21
+ import sys
22
+ import urllib.request
23
+ import xml.etree.ElementTree as ET
24
+ from pathlib import Path
25
+ from typing import Dict, List, Optional, Set, Tuple
26
+
27
+ from dbt.config.compute import ComputeRegistry, get_dvt_dir
28
+ from dbt_common.exceptions import DbtRuntimeError
29
+
30
+
31
+ # Maven repository URL
32
+ MAVEN_REPO = "https://repo1.maven.org/maven2"
33
+
34
+
35
+ # Mapping of adapter type to dbt adapter package name
36
+ ADAPTER_PACKAGE_MAPPING = {
37
+ "postgres": "dbt-postgres",
38
+ "snowflake": "dbt-snowflake",
39
+ "bigquery": "dbt-bigquery",
40
+ "redshift": "dbt-redshift",
41
+ "spark": "dbt-spark",
42
+ "databricks": "dbt-databricks",
43
+ "trino": "dbt-trino",
44
+ "duckdb": "dbt-duckdb",
45
+ "mysql": "dbt-mysql",
46
+ "sqlserver": "dbt-sqlserver",
47
+ "synapse": "dbt-synapse",
48
+ "fabric": "dbt-fabric",
49
+ "oracle": "dbt-oracle",
50
+ "teradata": "dbt-teradata",
51
+ "clickhouse": "dbt-clickhouse",
52
+ "greenplum": "dbt-greenplum",
53
+ "vertica": "dbt-vertica",
54
+ "sqlite": "dbt-sqlite",
55
+ "mariadb": "dbt-mysql", # Uses MySQL adapter
56
+ "exasol": "dbt-exasol",
57
+ "db2": "dbt-db2",
58
+ "athena": "dbt-athena-community",
59
+ "presto": "dbt-presto",
60
+ "hive": "dbt-hive",
61
+ "impala": "dbt-impala",
62
+ "singlestore": "dbt-singlestore",
63
+ "firebolt": "dbt-firebolt",
64
+ "starrocks": "dbt-starrocks",
65
+ "doris": "dbt-doris",
66
+ "materialize": "dbt-materialize",
67
+ "rockset": "dbt-rockset",
68
+ "questdb": "dbt-questdb",
69
+ "neo4j": "dbt-neo4j",
70
+ "timescaledb": "dbt-postgres", # Uses PostgreSQL adapter
71
+ }
72
+
73
+ # Mapping of adapter type to JDBC Maven coordinates (ONE JAR per adapter)
74
+ # These are pure JDBC drivers that Spark uses for spark.read.jdbc()
75
+ # NOTE: All versions verified against Maven Central as of Dec 2025
76
+ ADAPTER_JDBC_MAPPING = {
77
+ # Official dbt-labs adapters - JDBC drivers only
78
+ "postgres": "org.postgresql:postgresql:42.7.4",
79
+ "snowflake": "net.snowflake:snowflake-jdbc:3.16.1",
80
+ "bigquery": "com.google.cloud.bigdataoss:gcs-connector:hadoop3-2.2.22", # GCS connector for BQ
81
+ "redshift": "com.amazon.redshift:redshift-jdbc42:2.1.0.32",
82
+ "spark": "", # Native, no JDBC needed
83
+ "databricks": "com.databricks:databricks-jdbc:2.6.36",
84
+ "trino": "io.trino:trino-jdbc:443",
85
+ "duckdb": "org.duckdb:duckdb_jdbc:1.1.3",
86
+ # Community adapters - JDBC drivers only (verified on Maven)
87
+ "mysql": "com.mysql:mysql-connector-j:9.1.0",
88
+ "sqlserver": "com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11",
89
+ "synapse": "com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11",
90
+ "fabric": "com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11",
91
+ "oracle": "com.oracle.database.jdbc:ojdbc11:23.6.0.24.10",
92
+ "teradata": "com.teradata.jdbc:terajdbc:20.00.00.20",
93
+ "clickhouse": "com.clickhouse:clickhouse-jdbc:0.6.5",
94
+ "greenplum": "org.postgresql:postgresql:42.7.4", # PostgreSQL compatible
95
+ "vertica": "com.vertica.jdbc:vertica-jdbc:24.3.0-0",
96
+ "sqlite": "org.xerial:sqlite-jdbc:3.47.1.0",
97
+ "mariadb": "org.mariadb.jdbc:mariadb-java-client:3.4.1",
98
+ "exasol": "com.exasol:exasol-jdbc:24.2.0",
99
+ "db2": "com.ibm.db2:jcc:11.5.9.0",
100
+ "presto": "io.prestosql:presto-jdbc:350",
101
+ "hive": "org.apache.hive:hive-jdbc:3.1.3",
102
+ "singlestore": "com.singlestore:singlestore-jdbc-client:1.2.9",
103
+ "starrocks": "com.mysql:mysql-connector-j:9.1.0", # MySQL wire protocol
104
+ "doris": "com.mysql:mysql-connector-j:9.1.0", # MySQL wire protocol
105
+ "materialize": "org.postgresql:postgresql:42.7.4", # PostgreSQL wire protocol
106
+ "neo4j": "org.neo4j:neo4j-jdbc-driver:4.0.10",
107
+ "timescaledb": "org.postgresql:postgresql:42.7.4", # PostgreSQL extension
108
+ "questdb": "org.postgresql:postgresql:42.7.4", # PostgreSQL wire protocol
109
+ # Adapters without Maven JDBC drivers (require manual JAR download):
110
+ # athena, impala, firebolt, rockset - use respective vendor download pages
111
+ }
112
+
113
+
114
+ class TargetSyncTask:
115
+ """Task for synchronizing adapters and JARs based on profiles.yml connections."""
116
+
117
+ def __init__(
118
+ self,
119
+ project_dir: Optional[str] = None,
120
+ profiles_dir: Optional[str] = None,
121
+ profile_name: Optional[str] = None,
122
+ ):
123
+ """
124
+ Initialize TargetSyncTask.
125
+
126
+ :param project_dir: Path to project root directory
127
+ :param profiles_dir: Path to profiles directory (defaults to ~/.dvt/)
128
+ :param profile_name: Profile name to sync (defaults to project profile)
129
+ """
130
+ self.project_dir = project_dir or str(Path.cwd())
131
+ self.profiles_dir = profiles_dir or str(get_dvt_dir())
132
+ self.profile_name = profile_name
133
+ self.compute_registry = ComputeRegistry(self.project_dir)
134
+
135
+ def _get_profile_name(self) -> Optional[str]:
136
+ """Get the profile name from project or explicit parameter."""
137
+ if self.profile_name:
138
+ return self.profile_name
139
+
140
+ # Try to read from dbt_project.yml
141
+ project_file = Path(self.project_dir) / "dbt_project.yml"
142
+ if project_file.exists():
143
+ try:
144
+ from dbt.clients.yaml_helper import load_yaml_text
145
+
146
+ content = project_file.read_text()
147
+ data = load_yaml_text(content)
148
+ if data and "profile" in data:
149
+ return data["profile"]
150
+ except Exception:
151
+ pass
152
+
153
+ return None
154
+
155
+ def _load_profiles(self) -> Dict:
156
+ """Load profiles.yml and return the data."""
157
+ profiles_path = Path(self.profiles_dir) / "profiles.yml"
158
+ if not profiles_path.exists():
159
+ raise DbtRuntimeError(
160
+ f"profiles.yml not found at {profiles_path}\n"
161
+ f"Create it with: dvt init <project_name>"
162
+ )
163
+
164
+ try:
165
+ from dbt.clients.yaml_helper import load_yaml_text
166
+
167
+ content = profiles_path.read_text()
168
+ return load_yaml_text(content) or {}
169
+ except Exception as e:
170
+ raise DbtRuntimeError(f"Failed to load profiles.yml: {e}") from e
171
+
172
+ def get_connections_info(self) -> Dict[str, Dict]:
173
+ """
174
+ Scan profiles.yml and return detailed info about connections.
175
+
176
+ :returns: Dict mapping connection name to {type, profile}
177
+ """
178
+ profiles = self._load_profiles()
179
+ connections = {}
180
+
181
+ profile_name = self._get_profile_name()
182
+
183
+ if profile_name and profile_name in profiles:
184
+ # Scan only the specified profile
185
+ profile_data = profiles[profile_name]
186
+ outputs = profile_data.get("outputs", {})
187
+ for target_name, target_config in outputs.items():
188
+ adapter_type = target_config.get("type")
189
+ if adapter_type:
190
+ connections[target_name] = {
191
+ "type": adapter_type,
192
+ "profile": profile_name,
193
+ }
194
+ else:
195
+ # Scan all profiles
196
+ for prof_name, profile_data in profiles.items():
197
+ if isinstance(profile_data, dict):
198
+ outputs = profile_data.get("outputs", {})
199
+ for target_name, target_config in outputs.items():
200
+ if isinstance(target_config, dict):
201
+ adapter_type = target_config.get("type")
202
+ if adapter_type:
203
+ connections[f"{prof_name}.{target_name}"] = {
204
+ "type": adapter_type,
205
+ "profile": prof_name,
206
+ }
207
+
208
+ return connections
209
+
210
+ def get_required_adapter_types(self) -> Set[str]:
211
+ """
212
+ Scan profiles.yml and return the set of adapter types needed.
213
+
214
+ :returns: Set of adapter type names (e.g., {'postgres', 'snowflake'})
215
+ """
216
+ connections = self.get_connections_info()
217
+ return {info["type"] for info in connections.values()}
218
+
219
+ def get_installed_adapters(self) -> Set[str]:
220
+ """
221
+ Detect which dbt adapters are currently installed.
222
+
223
+ :returns: Set of installed adapter type names
224
+ """
225
+ import importlib.util
226
+
227
+ installed = set()
228
+
229
+ adapter_modules = {
230
+ "postgres": "dbt.adapters.postgres",
231
+ "snowflake": "dbt.adapters.snowflake",
232
+ "bigquery": "dbt.adapters.bigquery",
233
+ "redshift": "dbt.adapters.redshift",
234
+ "spark": "dbt.adapters.spark",
235
+ "databricks": "dbt.adapters.databricks",
236
+ "trino": "dbt.adapters.trino",
237
+ "duckdb": "dbt.adapters.duckdb",
238
+ "mysql": "dbt.adapters.mysql",
239
+ "sqlserver": "dbt.adapters.sqlserver",
240
+ "synapse": "dbt.adapters.synapse",
241
+ "fabric": "dbt.adapters.fabric",
242
+ "oracle": "dbt.adapters.oracle",
243
+ "teradata": "dbt.adapters.teradata",
244
+ "clickhouse": "dbt.adapters.clickhouse",
245
+ "greenplum": "dbt.adapters.greenplum",
246
+ "vertica": "dbt.adapters.vertica",
247
+ "sqlite": "dbt.adapters.sqlite",
248
+ "mariadb": "dbt.adapters.mysql", # Uses MySQL adapter
249
+ "exasol": "dbt.adapters.exasol",
250
+ "athena": "dbt.adapters.athena",
251
+ "hive": "dbt.adapters.hive",
252
+ "impala": "dbt.adapters.impala",
253
+ "singlestore": "dbt.adapters.singlestore",
254
+ "firebolt": "dbt.adapters.firebolt",
255
+ "starrocks": "dbt.adapters.starrocks",
256
+ "doris": "dbt.adapters.doris",
257
+ "materialize": "dbt.adapters.materialize",
258
+ "rockset": "dbt.adapters.rockset",
259
+ }
260
+
261
+ for adapter_type, module_name in adapter_modules.items():
262
+ spec = importlib.util.find_spec(module_name)
263
+ if spec is not None:
264
+ installed.add(adapter_type)
265
+
266
+ return installed
267
+
268
+ def install_adapters(
269
+ self, adapter_types: Set[str], verbose: bool = True
270
+ ) -> Tuple[List[str], List[str]]:
271
+ """
272
+ Install dbt adapters for the given adapter types.
273
+
274
+ :param adapter_types: Set of adapter type names to install
275
+ :param verbose: Print progress messages
276
+ :returns: Tuple of (installed packages, failed packages)
277
+ """
278
+ installed = []
279
+ failed = []
280
+
281
+ for adapter_type in adapter_types:
282
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type)
283
+ if not package:
284
+ if verbose:
285
+ print(f" ⚠ Unknown adapter type: {adapter_type}")
286
+ continue
287
+
288
+ if verbose:
289
+ print(f" Installing {package}...")
290
+
291
+ try:
292
+ # Use pip to install the adapter
293
+ result = subprocess.run(
294
+ [sys.executable, "-m", "pip", "install", package, "--quiet"],
295
+ capture_output=True,
296
+ text=True,
297
+ )
298
+ if result.returncode == 0:
299
+ installed.append(package)
300
+ if verbose:
301
+ print(f" ✓ {package} installed")
302
+ else:
303
+ failed.append(package)
304
+ if verbose:
305
+ print(f" ✗ Failed to install {package}")
306
+ if result.stderr:
307
+ print(f" {result.stderr[:200]}")
308
+ except Exception as e:
309
+ failed.append(package)
310
+ if verbose:
311
+ print(f" ✗ Error installing {package}: {e}")
312
+
313
+ return installed, failed
314
+
315
+ def uninstall_adapters(
316
+ self, adapter_types: Set[str], verbose: bool = True
317
+ ) -> Tuple[List[str], List[str]]:
318
+ """
319
+ Uninstall dbt adapters for the given adapter types.
320
+
321
+ :param adapter_types: Set of adapter type names to uninstall
322
+ :param verbose: Print progress messages
323
+ :returns: Tuple of (uninstalled packages, failed packages)
324
+ """
325
+ uninstalled = []
326
+ failed = []
327
+
328
+ for adapter_type in adapter_types:
329
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type)
330
+ if not package:
331
+ continue
332
+
333
+ if verbose:
334
+ print(f" Removing {package}...")
335
+
336
+ try:
337
+ result = subprocess.run(
338
+ [sys.executable, "-m", "pip", "uninstall", package, "-y", "--quiet"],
339
+ capture_output=True,
340
+ text=True,
341
+ )
342
+ if result.returncode == 0:
343
+ uninstalled.append(package)
344
+ if verbose:
345
+ print(f" ✓ {package} removed")
346
+ else:
347
+ failed.append(package)
348
+ if verbose:
349
+ print(f" ✗ Failed to remove {package}")
350
+ except Exception as e:
351
+ failed.append(package)
352
+ if verbose:
353
+ print(f" ✗ Error removing {package}: {e}")
354
+
355
+ return uninstalled, failed
356
+
357
+ def _maven_coord_to_url(self, coord: str) -> Tuple[str, str]:
358
+ """
359
+ Convert Maven coordinate to download URL and JAR filename.
360
+
361
+ :param coord: Maven coordinate (e.g., 'org.postgresql:postgresql:42.7.4')
362
+ :returns: Tuple of (download_url, jar_filename)
363
+ """
364
+ parts = coord.split(":")
365
+ if len(parts) < 3:
366
+ raise ValueError(f"Invalid Maven coordinate: {coord}")
367
+
368
+ group_id = parts[0]
369
+ artifact_id = parts[1]
370
+ version = parts[2]
371
+
372
+ # Convert group.id to group/id path
373
+ group_path = group_id.replace(".", "/")
374
+
375
+ # Build URL
376
+ jar_name = f"{artifact_id}-{version}.jar"
377
+ url = f"{MAVEN_REPO}/{group_path}/{artifact_id}/{version}/{jar_name}"
378
+
379
+ return url, jar_name
380
+
381
+ def _resolve_transitive_deps(
382
+ self, coord: str, resolved: Optional[Set[str]] = None, depth: int = 0
383
+ ) -> Set[str]:
384
+ """
385
+ Resolve transitive dependencies for a Maven coordinate by parsing POM file.
386
+
387
+ :param coord: Maven coordinate (e.g., 'org.postgresql:postgresql:42.7.4')
388
+ :param resolved: Set of already resolved coordinates (to avoid cycles)
389
+ :param depth: Current recursion depth (max 3 to avoid deep trees)
390
+ :returns: Set of all coordinates (including transitive deps)
391
+ """
392
+ if resolved is None:
393
+ resolved = set()
394
+
395
+ # Parse coordinate
396
+ parts = coord.split(":")
397
+ if len(parts) < 3:
398
+ return resolved
399
+
400
+ group_id, artifact_id, version = parts[0], parts[1], parts[2]
401
+
402
+ # Avoid cycles and limit depth
403
+ if coord in resolved or depth > 3:
404
+ return resolved
405
+
406
+ resolved.add(coord)
407
+
408
+ # Skip transitive resolution for known self-contained JDBC drivers
409
+ # Most JDBC drivers bundle their dependencies or have minimal deps
410
+ self_contained_drivers = {
411
+ "postgresql", "snowflake-jdbc", "mysql-connector-j", "mssql-jdbc",
412
+ "ojdbc11", "terajdbc", "clickhouse-jdbc", "sqlite-jdbc",
413
+ "mariadb-java-client", "exasol-jdbc", "jcc", "trino-jdbc",
414
+ "presto-jdbc", "duckdb_jdbc", "databricks-jdbc", "redshift-jdbc42",
415
+ "singlestore-jdbc-client", "neo4j-jdbc-driver", "vertica-jdbc"
416
+ }
417
+ if artifact_id in self_contained_drivers:
418
+ return resolved
419
+
420
+ # Try to fetch and parse POM for transitive deps
421
+ try:
422
+ group_path = group_id.replace(".", "/")
423
+ pom_url = f"{MAVEN_REPO}/{group_path}/{artifact_id}/{version}/{artifact_id}-{version}.pom"
424
+
425
+ request = urllib.request.Request(
426
+ pom_url,
427
+ headers={"User-Agent": "DVT-Core/0.5.95"}
428
+ )
429
+
430
+ with urllib.request.urlopen(request, timeout=10) as response:
431
+ pom_content = response.read().decode("utf-8")
432
+
433
+ # Parse POM XML
434
+ root = ET.fromstring(pom_content)
435
+ ns = {"m": "http://maven.apache.org/POM/4.0.0"}
436
+
437
+ # Find dependencies
438
+ for dep in root.findall(".//m:dependency", ns):
439
+ dep_group = dep.find("m:groupId", ns)
440
+ dep_artifact = dep.find("m:artifactId", ns)
441
+ dep_version = dep.find("m:version", ns)
442
+ dep_scope = dep.find("m:scope", ns)
443
+ dep_optional = dep.find("m:optional", ns)
444
+
445
+ # Skip test, provided, and optional dependencies
446
+ if dep_scope is not None and dep_scope.text in ("test", "provided"):
447
+ continue
448
+ if dep_optional is not None and dep_optional.text == "true":
449
+ continue
450
+
451
+ if dep_group is not None and dep_artifact is not None and dep_version is not None:
452
+ dep_coord = f"{dep_group.text}:{dep_artifact.text}:{dep_version.text}"
453
+ # Recursively resolve (limited depth)
454
+ self._resolve_transitive_deps(dep_coord, resolved, depth + 1)
455
+
456
+ except Exception:
457
+ # If POM parsing fails, just return current resolved set
458
+ pass
459
+
460
+ return resolved
461
+
462
+ def _download_jar(self, url: str, dest_path: Path, verbose: bool = True) -> bool:
463
+ """
464
+ Download a JAR file from URL.
465
+
466
+ :param url: URL to download from
467
+ :param dest_path: Destination path
468
+ :param verbose: Print progress messages
469
+ :returns: True if successful
470
+ """
471
+ try:
472
+ if verbose:
473
+ print(f" Downloading {dest_path.name}...")
474
+
475
+ # Create request with user agent
476
+ request = urllib.request.Request(
477
+ url,
478
+ headers={"User-Agent": "DVT-Core/0.5.95"}
479
+ )
480
+
481
+ with urllib.request.urlopen(request, timeout=60) as response:
482
+ with open(dest_path, "wb") as f:
483
+ f.write(response.read())
484
+
485
+ if verbose:
486
+ size_mb = dest_path.stat().st_size / (1024 * 1024)
487
+ print(f" ✓ Downloaded ({size_mb:.1f} MB)")
488
+ return True
489
+
490
+ except Exception as e:
491
+ if verbose:
492
+ print(f" ✗ Failed: {e}")
493
+ return False
494
+
495
+ def download_jdbc_jars(
496
+ self, adapter_types: Set[str], verbose: bool = True
497
+ ) -> Tuple[List[str], List[str]]:
498
+ """
499
+ Download JDBC JARs to project .dvt/jdbc_jars/ directory.
500
+
501
+ v0.5.95: Hybrid approach - resolves transitive dependencies via Maven POM,
502
+ downloads all JARs to local cache, then uses spark.jars for fast startup.
503
+
504
+ :param adapter_types: Set of adapter type names
505
+ :param verbose: Print progress messages
506
+ :returns: Tuple of (downloaded jars, failed jars)
507
+ """
508
+ # Ensure jdbc_jars directory exists
509
+ jdbc_jars_dir = Path(self.project_dir) / ".dvt" / "jdbc_jars"
510
+ jdbc_jars_dir.mkdir(parents=True, exist_ok=True)
511
+
512
+ downloaded = []
513
+ failed = []
514
+
515
+ # Build list of required JAR coordinates (direct dependencies)
516
+ direct_coords = set()
517
+ for adapter_type in adapter_types:
518
+ jars = ADAPTER_JDBC_MAPPING.get(adapter_type, "")
519
+ if jars:
520
+ for jar in jars.split(","):
521
+ jar = jar.strip()
522
+ if jar:
523
+ direct_coords.add(jar)
524
+
525
+ if not direct_coords:
526
+ if verbose:
527
+ print("\n No JDBC JARs needed for these adapters")
528
+ return downloaded, failed
529
+
530
+ if verbose:
531
+ print(f"\n Resolving JDBC dependencies...")
532
+ print(f" Direct dependencies: {len(direct_coords)}")
533
+
534
+ # Resolve transitive dependencies for all direct coords
535
+ all_coords = set()
536
+ for coord in direct_coords:
537
+ resolved = self._resolve_transitive_deps(coord)
538
+ all_coords.update(resolved)
539
+
540
+ if verbose:
541
+ transitive_count = len(all_coords) - len(direct_coords)
542
+ if transitive_count > 0:
543
+ print(f" Transitive dependencies: {transitive_count}")
544
+ print(f" Total JARs to download: {len(all_coords)}")
545
+ print(f"\n Downloading to {jdbc_jars_dir}/")
546
+
547
+ for coord in sorted(all_coords):
548
+ try:
549
+ url, jar_name = self._maven_coord_to_url(coord)
550
+ dest_path = jdbc_jars_dir / jar_name
551
+
552
+ # Skip if already downloaded
553
+ if dest_path.exists():
554
+ if verbose:
555
+ print(f" {jar_name} (cached)")
556
+ downloaded.append(jar_name)
557
+ continue
558
+
559
+ # Download the JAR
560
+ if self._download_jar(url, dest_path, verbose):
561
+ downloaded.append(jar_name)
562
+ else:
563
+ failed.append(jar_name)
564
+
565
+ except ValueError as e:
566
+ if verbose:
567
+ print(f" ⚠ Skipping {coord}: {e}")
568
+ continue
569
+ except Exception as e:
570
+ if verbose:
571
+ print(f" ✗ Error with {coord}: {e}")
572
+ failed.append(coord)
573
+
574
+ return downloaded, failed
575
+
576
+ def update_jdbc_jars(self, adapter_types: Set[str], verbose: bool = True) -> bool:
577
+ """
578
+ Report JDBC JAR status (JARs discovered at runtime by Spark).
579
+
580
+ v0.5.96: No longer stores spark.jars in config - JARs are discovered at runtime.
581
+ This enables project folder portability (move folder → JARs still work).
582
+
583
+ The LocalStrategy._get_jdbc_jars() method discovers JARs from current project
584
+ directory at runtime: <project>/.dvt/jdbc_jars/*.jar
585
+
586
+ :param adapter_types: Set of adapter type names
587
+ :param verbose: Print progress messages
588
+ :returns: True if JARs found
589
+ """
590
+ # Get the jdbc_jars directory
591
+ jdbc_jars_dir = Path(self.project_dir) / ".dvt" / "jdbc_jars"
592
+
593
+ # Find all downloaded JAR files
594
+ jar_paths = []
595
+ if jdbc_jars_dir.exists():
596
+ jar_paths = sorted(jdbc_jars_dir.glob("*.jar"))
597
+
598
+ if verbose:
599
+ if jar_paths:
600
+ print(f"\n JDBC JARs downloaded ({len(jar_paths)}):")
601
+ for jar_path in jar_paths:
602
+ print(f" - {jar_path.name}")
603
+ print(f"\n ✓ JARs stored in: {jdbc_jars_dir}")
604
+ print(" (Spark discovers JARs at runtime - portable across folder moves)")
605
+ else:
606
+ print("\n No JDBC JARs downloaded")
607
+
608
+ # v0.5.96: Remove spark.jars from config if present (old absolute path config)
609
+ # JARs are now discovered at runtime from project directory
610
+ spark_local = self.compute_registry.get("spark-local")
611
+ if spark_local:
612
+ modified = False
613
+ if "spark.jars" in spark_local.config:
614
+ spark_local.config.pop("spark.jars", None)
615
+ modified = True
616
+ if "spark.jars.packages" in spark_local.config:
617
+ spark_local.config.pop("spark.jars.packages", None)
618
+ modified = True
619
+ if modified:
620
+ self.compute_registry._save()
621
+ if verbose:
622
+ print("\n ✓ Cleaned up old spark.jars config (now uses runtime discovery)")
623
+
624
+ return bool(jar_paths)
625
+
626
+ def sync(self, verbose: bool = True, clean: bool = False, dry_run: bool = False) -> bool:
627
+ """
628
+ Synchronize adapters and JARs based on profiles.yml.
629
+
630
+ :param verbose: Print progress messages
631
+ :param clean: If True, remove adapters not needed by profiles.yml
632
+ :param dry_run: If True, only report what would be done without making changes
633
+ :returns: True if sync successful
634
+ """
635
+ if verbose:
636
+ print("\nDVT Target Sync")
637
+ print("=" * 60)
638
+
639
+ # Get connection info from profiles.yml
640
+ try:
641
+ connections = self.get_connections_info()
642
+ required = self.get_required_adapter_types()
643
+ except DbtRuntimeError as e:
644
+ print(f"✗ Error: {e}")
645
+ return False
646
+
647
+ # Report connections found
648
+ if verbose:
649
+ profile_name = self._get_profile_name()
650
+ if profile_name:
651
+ print(f"Profile: {profile_name}")
652
+ print(f"\nConnections found: {len(connections)}")
653
+ print("-" * 40)
654
+
655
+ # Group connections by type
656
+ by_type: Dict[str, List[str]] = {}
657
+ for conn_name, info in connections.items():
658
+ adapter_type = info["type"]
659
+ if adapter_type not in by_type:
660
+ by_type[adapter_type] = []
661
+ by_type[adapter_type].append(conn_name)
662
+
663
+ for adapter_type in sorted(by_type.keys()):
664
+ conn_names = by_type[adapter_type]
665
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type, "unknown")
666
+ print(f"\n {adapter_type} ({len(conn_names)} connection(s)):")
667
+ for conn_name in conn_names:
668
+ print(f" - {conn_name}")
669
+ print(f" Package: {package}")
670
+
671
+ if not required:
672
+ print("\n⚠ No connections found in profiles.yml")
673
+ print(" Add connections to ~/.dvt/profiles.yml first")
674
+ return False
675
+
676
+ # Get currently installed adapters
677
+ installed = self.get_installed_adapters()
678
+
679
+ # Determine what to install and uninstall
680
+ to_install = required - installed
681
+ to_uninstall = installed - required if clean else set()
682
+
683
+ # Report what will be installed
684
+ if verbose:
685
+ print("\n" + "-" * 40)
686
+ print("\nAdapter Status:")
687
+
688
+ if to_install:
689
+ print(f"\n To install ({len(to_install)}):")
690
+ for adapter_type in sorted(to_install):
691
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type, "unknown")
692
+ print(f" - {adapter_type}: pip install {package}")
693
+ else:
694
+ print("\n ✓ All required adapters already installed")
695
+
696
+ if to_uninstall:
697
+ print(f"\n To remove ({len(to_uninstall)}):")
698
+ for adapter_type in sorted(to_uninstall):
699
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type, "unknown")
700
+ print(f" - {adapter_type}: pip uninstall {package}")
701
+
702
+ # Report adapters installed but not used (if not cleaning)
703
+ unused = installed - required
704
+ if unused and not clean:
705
+ print(f"\n Installed but not used ({len(unused)}):")
706
+ for adapter_type in sorted(unused):
707
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type, "unknown")
708
+ print(f" - {adapter_type} ({package})")
709
+ print(" (use --clean to remove unused adapters)")
710
+
711
+ # If dry run, stop here
712
+ if dry_run:
713
+ if verbose:
714
+ print("\n" + "=" * 60)
715
+ print("Dry run complete. No changes made.")
716
+ return True
717
+
718
+ # Show install instructions for missing adapters (don't auto-install)
719
+ if to_install:
720
+ if verbose:
721
+ print(f"\n" + "-" * 40)
722
+ print(f"\nMissing Adapters ({len(to_install)}):")
723
+ print(" Install manually with pip or uv:\n")
724
+ for adapter_type in sorted(to_install):
725
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type, "unknown")
726
+ print(f" pip install {package}")
727
+ print(f" # or: uv pip install {package}\n")
728
+
729
+ # Show uninstall instructions for unused adapters (only if clean=True)
730
+ if to_uninstall:
731
+ if verbose:
732
+ print(f"\n" + "-" * 40)
733
+ print(f"\nUnused Adapters ({len(to_uninstall)}):")
734
+ print(" Uninstall manually with pip or uv:\n")
735
+ for adapter_type in sorted(to_uninstall):
736
+ package = ADAPTER_PACKAGE_MAPPING.get(adapter_type, "unknown")
737
+ print(f" pip uninstall {package}")
738
+ print(f" # or: uv pip uninstall {package}\n")
739
+
740
+ # Download JDBC JARs to project directory
741
+ if verbose:
742
+ print("\n" + "-" * 40)
743
+ print("\nDownloading JDBC JARs...")
744
+ downloaded_jars, failed_jars = self.download_jdbc_jars(required, verbose)
745
+ if failed_jars and verbose:
746
+ print(f"\n ⚠ {len(failed_jars)} JAR(s) failed to download")
747
+
748
+ # Update JDBC JARs config in spark-local
749
+ if verbose:
750
+ print("\n" + "-" * 40)
751
+ print("\nUpdating JDBC configuration...")
752
+ self.update_jdbc_jars(required, verbose)
753
+
754
+ if verbose:
755
+ print("\n" + "=" * 60)
756
+ print("✓ Sync complete")
757
+ print("\nYou can now run: dvt run")
758
+
759
+ return True