dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
dvt_cli/__init__.py ADDED
@@ -0,0 +1,158 @@
1
+ """
2
+ DVT CLI Entry Point Package
3
+
4
+ This standalone package provides the entry point for the DVT command-line
5
+ interface. It's separate from the 'dbt' namespace to avoid conflicts with
6
+ dbt-core during the initial import.
7
+
8
+ Why this package exists:
9
+ -----------------------
10
+ DVT extends dbt-core with additional commands (compute, target, migrate).
11
+ However, dbt adapters (like dbt-postgres) depend on dbt-core, so both
12
+ dvt-core and dbt-core end up installed together. Both packages provide
13
+ the 'dbt' namespace, which causes import conflicts.
14
+
15
+ The key issue is that both packages install files to the same location
16
+ (site-packages/dbt/). When dbt-core is installed after dvt-core, it
17
+ OVERWRITES DVT's files. This package detects this condition at runtime
18
+ and automatically restores DVT's files by reinstalling dvt-core.
19
+ """
20
+
21
+ import subprocess
22
+ import sys
23
+ from pathlib import Path
24
+
25
+
26
+ def _check_dvt_cli_intact() -> bool:
27
+ """
28
+ Check if DVT's CLI is intact by looking for DVT-specific commands.
29
+
30
+ Returns True if DVT's main.py is present (has --target-compute flag),
31
+ False if dbt-core has overwritten it.
32
+ """
33
+ try:
34
+ # Import the cli module
35
+ from dbt.cli import main as cli_main
36
+
37
+ # Check if DVT's signature command exists
38
+ # DVT adds 'compute' group which dbt-core doesn't have
39
+ if hasattr(cli_main, 'cli'):
40
+ cli = cli_main.cli
41
+ # Check for DVT-specific command groups
42
+ # DVT adds: compute, target, migrate, profile, spark, java
43
+ if hasattr(cli, 'commands'):
44
+ commands = cli.commands
45
+ # 'compute' command is DVT-specific
46
+ return 'compute' in commands
47
+ else:
48
+ # Try invoking to check commands (older click pattern)
49
+ import click
50
+ if isinstance(cli, click.MultiCommand):
51
+ # Try to list commands
52
+ ctx = click.Context(cli)
53
+ cmds = cli.list_commands(ctx)
54
+ return 'compute' in cmds
55
+ return False
56
+ except Exception:
57
+ return False
58
+
59
+
60
+ def _restore_dvt_files() -> bool:
61
+ """
62
+ Restore DVT's files by reinstalling dvt-core.
63
+
64
+ This is called when dbt-core has overwritten DVT's files.
65
+ We reinstall dvt-core with --no-deps to restore only DVT's files
66
+ without affecting dbt-core (which adapters need for metadata).
67
+
68
+ Tries multiple methods: uv, pip command, pip module.
69
+
70
+ Returns True if restoration was successful.
71
+ """
72
+ import shutil
73
+
74
+ print(" 🔧 DVT: Restoring DVT files (dbt-core overwrote them)...", file=sys.stderr)
75
+
76
+ # Method 1: Try uv (preferred - faster and more reliable)
77
+ uv_path = shutil.which("uv")
78
+ if uv_path:
79
+ try:
80
+ result = subprocess.run(
81
+ [uv_path, "pip", "install", "--python", sys.executable,
82
+ "--reinstall", "--no-deps", "dvt-core", "--quiet"],
83
+ capture_output=True,
84
+ text=True,
85
+ )
86
+ if result.returncode == 0:
87
+ print(" ✓ DVT files restored. Please re-run your command.", file=sys.stderr)
88
+ return True
89
+ except Exception:
90
+ pass
91
+
92
+ # Method 2: Try pip command
93
+ pip_path = shutil.which("pip") or shutil.which("pip3")
94
+ if pip_path:
95
+ try:
96
+ result = subprocess.run(
97
+ [pip_path, "install", "--reinstall", "--no-deps",
98
+ "dvt-core", "--quiet"],
99
+ capture_output=True,
100
+ text=True,
101
+ )
102
+ if result.returncode == 0:
103
+ print(" ✓ DVT files restored. Please re-run your command.", file=sys.stderr)
104
+ return True
105
+ except Exception:
106
+ pass
107
+
108
+ # Method 3: Try pip as Python module
109
+ try:
110
+ result = subprocess.run(
111
+ [sys.executable, "-m", "pip", "install", "--reinstall",
112
+ "--no-deps", "dvt-core", "--quiet"],
113
+ capture_output=True,
114
+ text=True,
115
+ )
116
+ if result.returncode == 0:
117
+ print(" ✓ DVT files restored. Please re-run your command.", file=sys.stderr)
118
+ return True
119
+ except Exception:
120
+ pass
121
+
122
+ print(" ⚠ Failed to restore DVT files. Please run manually:", file=sys.stderr)
123
+ print(" pip install --reinstall --no-deps dvt-core", file=sys.stderr)
124
+ return False
125
+
126
+
127
+ def _clear_dbt_modules():
128
+ """Clear cached dbt modules so they get reimported."""
129
+ modules_to_clear = [k for k in list(sys.modules.keys()) if k.startswith('dbt')]
130
+ for mod in modules_to_clear:
131
+ del sys.modules[mod]
132
+
133
+
134
+ def dvt_cli():
135
+ """
136
+ DVT CLI entry point function.
137
+
138
+ This is the main entry point for the 'dvt' command. It automatically
139
+ detects if dbt-core has overwritten DVT's files and restores them.
140
+
141
+ Users who want backward compatibility with 'dbt' command can create
142
+ a shell alias: alias dbt=dvt
143
+ """
144
+ # Check if DVT's CLI is intact
145
+ if not _check_dvt_cli_intact():
146
+ # DVT's files have been overwritten by dbt-core
147
+ # This happens when users install dbt adapters that depend on dbt-core
148
+ if _restore_dvt_files():
149
+ # Clear cached modules and exit - user needs to re-run
150
+ _clear_dbt_modules()
151
+ sys.exit(0)
152
+ else:
153
+ # Restoration failed, try to run anyway
154
+ print(" ⚠ Could not restore DVT files. Some features may not work.", file=sys.stderr)
155
+
156
+ # Import and run the CLI
157
+ from dbt.cli.main import cli
158
+ cli()
@@ -0,0 +1,288 @@
1
+ Metadata-Version: 2.4
2
+ Name: dvt-core
3
+ Version: 0.59.0a51
4
+ Summary: DVT (Data Virtualization Tool) - Multi-source data federation and transformation with Spark-unified compute layer.
5
+ Author: DVT Contributors
6
+ Maintainer: DVT Contributors
7
+ License-Expression: Apache-2.0
8
+ Project-URL: Homepage, https://github.com/dvt-core/dvt-core
9
+ Project-URL: Documentation, https://github.com/dvt-core/dvt-core#readme
10
+ Project-URL: Repository, https://github.com/dvt-core/dvt-core.git
11
+ Project-URL: Issues, https://github.com/dvt-core/dvt-core/issues
12
+ Keywords: data,virtualization,federation,multi-source,dbt,analytics,transform,spark,jdbc,databricks
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Operating System :: MacOS :: MacOS X
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: Implementation :: CPython
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: agate<1.10,>=1.7.0
25
+ Requires-Dist: Jinja2<4,>=3.1.3
26
+ Requires-Dist: mashumaro[msgpack]<3.15,>=3.9
27
+ Requires-Dist: click<9.0,>=8.0.2
28
+ Requires-Dist: jsonschema<5.0,>=4.19.1
29
+ Requires-Dist: networkx<4.0,>=2.3
30
+ Requires-Dist: protobuf<7.0,>=6.0
31
+ Requires-Dist: requests<3.0.0
32
+ Requires-Dist: snowplow-tracker<2.0,>=1.0.2
33
+ Requires-Dist: pathspec<0.13,>=0.9
34
+ Requires-Dist: sqlparse<0.6.0,>=0.5.0
35
+ Requires-Dist: dbt-extractor<=0.6,>=0.5.0
36
+ Requires-Dist: dbt-semantic-interfaces<0.10,>=0.9.0
37
+ Requires-Dist: dbt-common<2.0,>=1.27.0
38
+ Requires-Dist: dbt-adapters<2.0,>=1.15.5
39
+ Requires-Dist: dbt-protos<2.0,>=1.0.375
40
+ Requires-Dist: pydantic<3
41
+ Requires-Dist: packaging>20.9
42
+ Requires-Dist: pytz>=2015.7
43
+ Requires-Dist: pyyaml>=6.0
44
+ Requires-Dist: daff>=1.3.46
45
+ Requires-Dist: typing-extensions>=4.4
46
+ Requires-Dist: dbt-postgres<2.0,>=1.9.0
47
+ Requires-Dist: pyspark<5.0.0,>=3.5.0
48
+ Requires-Dist: duckdb>=0.9.0
49
+ Requires-Dist: rich>=13.0.0
50
+ Provides-Extra: databricks
51
+ Requires-Dist: databricks-connect>=13.0.0; extra == "databricks"
52
+
53
+ # DVT-Core: Data Virtualization Tool
54
+
55
+ **DVT-Core** is a multi-source data federation and transformation platform built on dbt-core architecture. Query and transform data across multiple heterogeneous data sources with intelligent query pushdown and compute layer integration.
56
+
57
+ ## Features
58
+
59
+ - 🔄 **Multi-Source Queries**: Join data from PostgreSQL, Snowflake, BigQuery, MySQL, and more in a single query
60
+ - 🧠 **Intelligent Routing**: Automatically pushes down queries when possible, uses compute layer when needed
61
+ - ⚡ **JDBC Performance**: Spark JDBC-based data transfer for maximum efficiency
62
+ - 🔧 **Familiar Workflow**: Same dbt commands, same project structure, enhanced capabilities
63
+ - 🎯 **Smart Compute Selection**: Automatically chooses between Spark Local (embedded) or Spark Cluster (distributed)
64
+ - 🎛️ **Full Control**: Override everything with `target=` and `compute=` config options
65
+ - ✅ **100% Compatible**: Works with existing dbt projects and all dbt adapters
66
+
67
+ ## Quick Start
68
+
69
+ ### Installation
70
+
71
+ ```bash
72
+ pip install dvt-core
73
+ ```
74
+
75
+ Or with uv:
76
+
77
+ ```bash
78
+ uv pip install dvt-core
79
+ ```
80
+
81
+ ### Configure Multi-Connection Profile
82
+
83
+ ```yaml
84
+ # profiles.yml
85
+ my_project:
86
+ connections:
87
+ postgres_prod:
88
+ type: postgres
89
+ host: prod-db.example.com
90
+ port: 5432
91
+ user: prod_user
92
+ password: "{{ env_var('POSTGRES_PASSWORD') }}"
93
+ database: analytics
94
+ schema: public
95
+ threads: 4
96
+
97
+ snowflake_warehouse:
98
+ type: snowflake
99
+ account: abc123
100
+ user: snow_user
101
+ password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
102
+ database: warehouse
103
+ schema: public
104
+ warehouse: compute_wh
105
+ threads: 8
106
+
107
+ default_target: snowflake_warehouse
108
+ threads: 4
109
+ ```
110
+
111
+ ### Define Sources with Connections
112
+
113
+ ```yaml
114
+ # models/sources.yml
115
+ sources:
116
+ - name: postgres_data
117
+ connection: postgres_prod
118
+ tables:
119
+ - name: orders
120
+ - name: customers
121
+
122
+ - name: snowflake_data
123
+ connection: snowflake_warehouse
124
+ tables:
125
+ - name: products
126
+ ```
127
+
128
+ ### Create Multi-Source Model
129
+
130
+ ```sql
131
+ -- models/combined_sales.sql
132
+ {{ config(
133
+ materialized='table',
134
+ target='snowflake_warehouse', -- Optional: override materialization target
135
+ compute='spark-local' -- Optional: force compute engine
136
+ ) }}
137
+
138
+ SELECT
139
+ o.order_id,
140
+ o.order_date,
141
+ c.customer_name,
142
+ p.product_name,
143
+ o.quantity * p.price as total_amount
144
+ FROM {{ source('postgres_data', 'orders') }} o
145
+ JOIN {{ source('postgres_data', 'customers') }} c
146
+ ON o.customer_id = c.customer_id
147
+ JOIN {{ source('snowflake_data', 'products') }} p
148
+ ON o.product_id = p.product_id
149
+ WHERE o.order_date >= '2024-01-01'
150
+ ```
151
+
152
+ ### Run DVT
153
+
154
+ ```bash
155
+ # Standard dbt commands work
156
+ dvt run --select combined_sales
157
+
158
+ # DVT automatically:
159
+ # 1. Analyzes query (sees postgres + snowflake sources)
160
+ # 2. Determines federated execution needed
161
+ # 3. Selects compute engine (Spark Local or Cluster based on workload)
162
+ # 4. Loads data from postgres and snowflake via adapters
163
+ # 5. Executes join in compute engine
164
+ # 6. Materializes result to target (snowflake)
165
+ ```
166
+
167
+ ## Architecture
168
+
169
+ ```
170
+ ┌─────────────┐ ┌──────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────────┐
171
+ │ Source DBs │────▶│ Adapters │────▶│ JDBC │────▶│ Compute │────▶│ Adapters │
172
+ │(Postgres, │ │ (Read) │ │ │ │ (Spark) │ │ (Write) │
173
+ │ MySQL, etc.)│ │ │ │ │ │ │ │ │
174
+ └─────────────┘ └──────────┘ └─────────────┘ └──────────┘ └──────────────┘
175
+
176
+
177
+ ┌──────────────┐
178
+ │ Target DB │
179
+ │ (Snowflake, │
180
+ │ BigQuery) │
181
+ └──────────────┘
182
+ ```
183
+
184
+ ## Execution Strategies
185
+
186
+ ### Pushdown (Homogeneous Sources)
187
+
188
+ When all sources come from the same connection, DVT executes the query directly on the source database:
189
+
190
+ ```sql
191
+ -- All sources from same connection → Execute on source database
192
+ SELECT * FROM {{ source('postgres', 'orders') }}
193
+ JOIN {{ source('postgres', 'customers') }} USING (customer_id)
194
+ -- Executed directly in PostgreSQL (no data movement)
195
+ ```
196
+
197
+ ### Federated (Heterogeneous Sources)
198
+
199
+ When sources come from different connections, DVT uses the compute layer:
200
+
201
+ ```sql
202
+ -- Sources from different connections → Use compute layer
203
+ SELECT * FROM {{ source('postgres', 'orders') }}
204
+ JOIN {{ source('mysql', 'products') }} USING (product_id)
205
+ -- Data loaded into Spark, join executed there
206
+ ```
207
+
208
+ ## CLI Commands
209
+
210
+ ### Standard dbt Commands
211
+
212
+ All dbt commands work unchanged:
213
+
214
+ ```bash
215
+ dvt run
216
+ dvt test
217
+ dvt build
218
+ dvt docs generate
219
+ dvt docs serve
220
+ ```
221
+
222
+ ### DVT-Specific Commands
223
+
224
+ Manage external Spark clusters:
225
+
226
+ ```bash
227
+ # Register external Spark cluster
228
+ dvt compute register prod_cluster --master spark://master:7077
229
+
230
+ # List registered clusters
231
+ dvt compute list
232
+
233
+ # Remove cluster
234
+ dvt compute remove prod_cluster
235
+ ```
236
+
237
+ ## Configuration Options
238
+
239
+ ### Model Configuration
240
+
241
+ ```sql
242
+ {{ config(
243
+ materialized='table',
244
+ target='snowflake_analytics', -- Where to write results
245
+ compute='spark-local' -- Force Spark Local for processing
246
+ ) }}
247
+ ```
248
+
249
+ ### Smart Compute Selection
250
+
251
+ DVT automatically selects the optimal compute engine:
252
+
253
+ - **Spark Local**: Small/medium workloads (< 10GB), fast in-process execution
254
+ - **Spark Cluster**: Large workloads (> 10GB), distributed processing
255
+
256
+ Override with `compute='spark-local'` or `compute='spark-cluster'` in config.
257
+
258
+ ## Key Principles
259
+
260
+ 1. **Adapters for I/O only** - Read from sources, write to targets
261
+ 2. **Compute engines for processing only** - Never materialize
262
+ 3. **JDBC as universal data format** - Efficient transfer
263
+ 4. **Backward compatibility** - All dbt projects work unchanged
264
+ 5. **User configuration always wins** - Override any automatic decision
265
+
266
+ ## Requirements
267
+
268
+ - Python 3.10+
269
+ - dbt-compatible adapters for your data sources
270
+ - PySpark (installed automatically)
271
+
272
+ ## License
273
+
274
+ Apache License 2.0 (same as dbt-core)
275
+
276
+ ## Acknowledgments
277
+
278
+ Built on [dbt-core](https://github.com/dbt-labs/dbt-core) architecture. DVT extends dbt's capabilities while preserving its excellent design patterns and developer experience.
279
+
280
+ ## Links
281
+
282
+ - [Documentation](https://github.com/dvt-core/dvt-core#readme)
283
+ - [Issues](https://github.com/dvt-core/dvt-core/issues)
284
+ - [Repository](https://github.com/dvt-core/dvt-core)
285
+
286
+ ---
287
+
288
+ **Transform data across any source, materialize to any target, with intelligent query optimization.**