dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
dvt_cli/__init__.py ADDED
@@ -0,0 +1,72 @@
1
+ """
2
+ DVT CLI Entry Point Package
3
+
4
+ This standalone package provides the entry point for the DVT command-line
5
+ interface. It's separate from the 'dbt' namespace to avoid conflicts with
6
+ dbt-core during the initial import.
7
+
8
+ Why this package exists:
9
+ -----------------------
10
+ DVT extends dbt-core with additional commands (compute, target, migrate).
11
+ However, dbt adapters (like dbt-postgres) depend on dbt-core, so both
12
+ dvt-core and dbt-core end up installed together. Both packages provide
13
+ the 'dbt' namespace, which causes import conflicts.
14
+
15
+ By using a separate 'dvt_cli' package for the entry point, we can
16
+ manipulate sys.path BEFORE any 'dbt' modules are imported, ensuring
17
+ DVT's extended dbt package takes precedence.
18
+ """
19
+
20
+ import sys
21
+ from pathlib import Path
22
+
23
+
24
+ def _ensure_dvt_precedence():
25
+ """
26
+ Ensure DVT's dbt package takes precedence over dbt-core's version.
27
+
28
+ When both dvt-core and dbt-core are installed (dbt-core comes as a
29
+ dependency of dbt adapters like dbt-postgres), Python's namespace
30
+ package mechanism may load dbt-core's modules instead of DVT's.
31
+
32
+ This function manipulates sys.path to ensure DVT's path comes first,
33
+ guaranteeing that DVT's extended CLI (with compute, target, migrate
34
+ commands) is used instead of vanilla dbt-core.
35
+ """
36
+ # Find where dvt-core's dbt package is located
37
+ # This file is at: <dvt-core>/dvt_cli/__init__.py
38
+ # So the package root (containing 'dbt/') is: <dvt-core>/
39
+ this_file = Path(__file__).resolve()
40
+ dvt_package_root = this_file.parent.parent
41
+
42
+ dvt_path = str(dvt_package_root)
43
+
44
+ # Remove dvt_path if it's already in sys.path (to move it to front)
45
+ if dvt_path in sys.path:
46
+ sys.path.remove(dvt_path)
47
+
48
+ # Insert at the beginning to take precedence over site-packages
49
+ sys.path.insert(0, dvt_path)
50
+
51
+ # Clear any already-imported dbt modules so they get re-imported
52
+ # from the correct location
53
+ modules_to_clear = [k for k in list(sys.modules.keys()) if k.startswith('dbt')]
54
+ for mod in modules_to_clear:
55
+ del sys.modules[mod]
56
+
57
+
58
+ def dvt_cli():
59
+ """
60
+ DVT CLI entry point function.
61
+
62
+ This is the main entry point for the 'dvt' command. It ensures DVT's
63
+ version of the dbt package takes precedence, then runs the CLI.
64
+
65
+ Users who want backward compatibility with 'dbt' command can create
66
+ a shell alias: alias dbt=dvt
67
+ """
68
+ _ensure_dvt_precedence()
69
+
70
+ # Now import the CLI - this will get DVT's version
71
+ from dbt.cli.main import cli
72
+ cli()
@@ -0,0 +1,288 @@
1
+ Metadata-Version: 2.4
2
+ Name: dvt-core
3
+ Version: 0.58.6
4
+ Summary: DVT (Data Virtualization Tool) - Multi-source data federation and transformation with Spark-unified compute layer.
5
+ Author: DVT Contributors
6
+ Maintainer: DVT Contributors
7
+ License-Expression: Apache-2.0
8
+ Project-URL: Homepage, https://github.com/dvt-core/dvt-core
9
+ Project-URL: Documentation, https://github.com/dvt-core/dvt-core#readme
10
+ Project-URL: Repository, https://github.com/dvt-core/dvt-core.git
11
+ Project-URL: Issues, https://github.com/dvt-core/dvt-core/issues
12
+ Keywords: data,virtualization,federation,multi-source,dbt,analytics,transform,spark,jdbc,databricks
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Operating System :: MacOS :: MacOS X
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: Implementation :: CPython
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: agate<1.10,>=1.7.0
25
+ Requires-Dist: Jinja2<4,>=3.1.3
26
+ Requires-Dist: mashumaro[msgpack]<3.15,>=3.9
27
+ Requires-Dist: click<9.0,>=8.0.2
28
+ Requires-Dist: jsonschema<5.0,>=4.19.1
29
+ Requires-Dist: networkx<4.0,>=2.3
30
+ Requires-Dist: protobuf<7.0,>=6.0
31
+ Requires-Dist: requests<3.0.0
32
+ Requires-Dist: snowplow-tracker<2.0,>=1.0.2
33
+ Requires-Dist: pathspec<0.13,>=0.9
34
+ Requires-Dist: sqlparse<0.6.0,>=0.5.0
35
+ Requires-Dist: dbt-extractor<=0.6,>=0.5.0
36
+ Requires-Dist: dbt-semantic-interfaces<0.10,>=0.9.0
37
+ Requires-Dist: dbt-common<2.0,>=1.27.0
38
+ Requires-Dist: dbt-adapters<2.0,>=1.15.5
39
+ Requires-Dist: dbt-protos<2.0,>=1.0.375
40
+ Requires-Dist: pydantic<3
41
+ Requires-Dist: packaging>20.9
42
+ Requires-Dist: pytz>=2015.7
43
+ Requires-Dist: pyyaml>=6.0
44
+ Requires-Dist: daff>=1.3.46
45
+ Requires-Dist: typing-extensions>=4.4
46
+ Requires-Dist: dbt-postgres<2.0,>=1.9.0
47
+ Requires-Dist: pyspark<5.0.0,>=3.5.0
48
+ Requires-Dist: duckdb>=0.9.0
49
+ Requires-Dist: rich>=13.0.0
50
+ Provides-Extra: databricks
51
+ Requires-Dist: databricks-connect>=13.0.0; extra == "databricks"
52
+
53
+ # DVT-Core: Data Virtualization Tool
54
+
55
+ **DVT-Core** is a multi-source data federation and transformation platform built on dbt-core architecture. Query and transform data across multiple heterogeneous data sources with intelligent query pushdown and compute layer integration.
56
+
57
+ ## Features
58
+
59
+ - 🔄 **Multi-Source Queries**: Join data from PostgreSQL, Snowflake, BigQuery, MySQL, and more in a single query
60
+ - 🧠 **Intelligent Routing**: Automatically pushes down queries when possible, uses compute layer when needed
61
+ - ⚡ **JDBC Performance**: Spark JDBC-based data transfer for maximum efficiency
62
+ - 🔧 **Familiar Workflow**: Same dbt commands, same project structure, enhanced capabilities
63
+ - 🎯 **Smart Compute Selection**: Automatically chooses between Spark Local (embedded) or Spark Cluster (distributed)
64
+ - 🎛️ **Full Control**: Override everything with `target=` and `compute=` config options
65
+ - ✅ **100% Compatible**: Works with existing dbt projects and all dbt adapters
66
+
67
+ ## Quick Start
68
+
69
+ ### Installation
70
+
71
+ ```bash
72
+ pip install dvt-core
73
+ ```
74
+
75
+ Or with uv:
76
+
77
+ ```bash
78
+ uv pip install dvt-core
79
+ ```
80
+
81
+ ### Configure Multi-Connection Profile
82
+
83
+ ```yaml
84
+ # profiles.yml
85
+ my_project:
86
+ connections:
87
+ postgres_prod:
88
+ type: postgres
89
+ host: prod-db.example.com
90
+ port: 5432
91
+ user: prod_user
92
+ password: "{{ env_var('POSTGRES_PASSWORD') }}"
93
+ database: analytics
94
+ schema: public
95
+ threads: 4
96
+
97
+ snowflake_warehouse:
98
+ type: snowflake
99
+ account: abc123
100
+ user: snow_user
101
+ password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
102
+ database: warehouse
103
+ schema: public
104
+ warehouse: compute_wh
105
+ threads: 8
106
+
107
+ default_target: snowflake_warehouse
108
+ threads: 4
109
+ ```
110
+
111
+ ### Define Sources with Connections
112
+
113
+ ```yaml
114
+ # models/sources.yml
115
+ sources:
116
+ - name: postgres_data
117
+ connection: postgres_prod
118
+ tables:
119
+ - name: orders
120
+ - name: customers
121
+
122
+ - name: snowflake_data
123
+ connection: snowflake_warehouse
124
+ tables:
125
+ - name: products
126
+ ```
127
+
128
+ ### Create Multi-Source Model
129
+
130
+ ```sql
131
+ -- models/combined_sales.sql
132
+ {{ config(
133
+ materialized='table',
134
+ target='snowflake_warehouse', -- Optional: override materialization target
135
+ compute='spark-local' -- Optional: force compute engine
136
+ ) }}
137
+
138
+ SELECT
139
+ o.order_id,
140
+ o.order_date,
141
+ c.customer_name,
142
+ p.product_name,
143
+ o.quantity * p.price as total_amount
144
+ FROM {{ source('postgres_data', 'orders') }} o
145
+ JOIN {{ source('postgres_data', 'customers') }} c
146
+ ON o.customer_id = c.customer_id
147
+ JOIN {{ source('snowflake_data', 'products') }} p
148
+ ON o.product_id = p.product_id
149
+ WHERE o.order_date >= '2024-01-01'
150
+ ```
151
+
152
+ ### Run DVT
153
+
154
+ ```bash
155
+ # Standard dbt commands work
156
+ dvt run --select combined_sales
157
+
158
+ # DVT automatically:
159
+ # 1. Analyzes query (sees postgres + snowflake sources)
160
+ # 2. Determines federated execution needed
161
+ # 3. Selects compute engine (Spark Local or Cluster based on workload)
162
+ # 4. Loads data from postgres and snowflake via adapters
163
+ # 5. Executes join in compute engine
164
+ # 6. Materializes result to target (snowflake)
165
+ ```
166
+
167
+ ## Architecture
168
+
169
+ ```
170
+ ┌─────────────┐ ┌──────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────────┐
171
+ │ Source DBs │────▶│ Adapters │────▶│ JDBC │────▶│ Compute │────▶│ Adapters │
172
+ │(Postgres, │ │ (Read) │ │ │ │ (Spark) │ │ (Write) │
173
+ │ MySQL, etc.)│ │ │ │ │ │ │ │ │
174
+ └─────────────┘ └──────────┘ └─────────────┘ └──────────┘ └──────────────┘
175
+
176
+
177
+ ┌──────────────┐
178
+ │ Target DB │
179
+ │ (Snowflake, │
180
+ │ BigQuery) │
181
+ └──────────────┘
182
+ ```
183
+
184
+ ## Execution Strategies
185
+
186
+ ### Pushdown (Homogeneous Sources)
187
+
188
+ When all sources come from the same connection, DVT executes the query directly on the source database:
189
+
190
+ ```sql
191
+ -- All sources from same connection → Execute on source database
192
+ SELECT * FROM {{ source('postgres', 'orders') }}
193
+ JOIN {{ source('postgres', 'customers') }} USING (customer_id)
194
+ -- Executed directly in PostgreSQL (no data movement)
195
+ ```
196
+
197
+ ### Federated (Heterogeneous Sources)
198
+
199
+ When sources come from different connections, DVT uses the compute layer:
200
+
201
+ ```sql
202
+ -- Sources from different connections → Use compute layer
203
+ SELECT * FROM {{ source('postgres', 'orders') }}
204
+ JOIN {{ source('mysql', 'products') }} USING (product_id)
205
+ -- Data loaded into Spark, join executed there
206
+ ```
207
+
208
+ ## CLI Commands
209
+
210
+ ### Standard dbt Commands
211
+
212
+ All dbt commands work unchanged:
213
+
214
+ ```bash
215
+ dvt run
216
+ dvt test
217
+ dvt build
218
+ dvt docs generate
219
+ dvt docs serve
220
+ ```
221
+
222
+ ### DVT-Specific Commands
223
+
224
+ Manage external Spark clusters:
225
+
226
+ ```bash
227
+ # Register external Spark cluster
228
+ dvt compute register prod_cluster --master spark://master:7077
229
+
230
+ # List registered clusters
231
+ dvt compute list
232
+
233
+ # Remove cluster
234
+ dvt compute remove prod_cluster
235
+ ```
236
+
237
+ ## Configuration Options
238
+
239
+ ### Model Configuration
240
+
241
+ ```sql
242
+ {{ config(
243
+ materialized='table',
244
+ target='snowflake_analytics', -- Where to write results
245
+ compute='spark-local' -- Force Spark Local for processing
246
+ ) }}
247
+ ```
248
+
249
+ ### Smart Compute Selection
250
+
251
+ DVT automatically selects the optimal compute engine:
252
+
253
+ - **Spark Local**: Small/medium workloads (< 10GB), fast in-process execution
254
+ - **Spark Cluster**: Large workloads (> 10GB), distributed processing
255
+
256
+ Override with `compute='spark-local'` or `compute='spark-cluster'` in config.
257
+
258
+ ## Key Principles
259
+
260
+ 1. **Adapters for I/O only** - Read from sources, write to targets
261
+ 2. **Compute engines for processing only** - Never materialize
262
+ 3. **JDBC as universal data format** - Efficient transfer
263
+ 4. **Backward compatibility** - All dbt projects work unchanged
264
+ 5. **User configuration always wins** - Override any automatic decision
265
+
266
+ ## Requirements
267
+
268
+ - Python 3.10+
269
+ - dbt-compatible adapters for your data sources
270
+ - PySpark (installed automatically)
271
+
272
+ ## License
273
+
274
+ Apache License 2.0 (same as dbt-core)
275
+
276
+ ## Acknowledgments
277
+
278
+ Built on [dbt-core](https://github.com/dbt-labs/dbt-core) architecture. DVT extends dbt's capabilities while preserving its excellent design patterns and developer experience.
279
+
280
+ ## Links
281
+
282
+ - [Documentation](https://github.com/dvt-core/dvt-core#readme)
283
+ - [Issues](https://github.com/dvt-core/dvt-core/issues)
284
+ - [Repository](https://github.com/dvt-core/dvt-core)
285
+
286
+ ---
287
+
288
+ **Transform data across any source, materialize to any target, with intelligent query optimization.**