dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2403 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +50 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
- dbt/compute/jar_provisioning.py +255 -0
- dbt/compute/java_compat.cpython-311-darwin.so +0 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
- dbt/compute/jdbc_utils.py +678 -0
- dbt/compute/metadata/__init__.py +40 -0
- dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/store.py +1499 -0
- dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/local.py +443 -0
- dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.cpython-311-darwin.so +0 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
- dbt/config/dvt_profile.py +342 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +873 -0
- dbt/config/project_utils.py +28 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +553 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +417 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/starter_project/.gitignore +4 -0
- dbt/include/starter_project/README.md +15 -0
- dbt/include/starter_project/__init__.py +3 -0
- dbt/include/starter_project/analyses/.gitkeep +0 -0
- dbt/include/starter_project/dbt_project.yml +36 -0
- dbt/include/starter_project/macros/.gitkeep +0 -0
- dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/starter_project/models/example/schema.yml +21 -0
- dbt/include/starter_project/seeds/.gitkeep +0 -0
- dbt/include/starter_project/snapshots/.gitkeep +0 -0
- dbt/include/starter_project/tests/.gitkeep +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +118 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2204 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.cpython-311-darwin.so +0 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +503 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.cpython-311-darwin.so +0 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +505 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +947 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.cpython-311-darwin.so +0 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +362 -0
- dbt/task/dvt_run.py +204 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.cpython-311-darwin.so +0 -0
- dbt/task/init.py +604 -0
- dbt/task/java.cpython-311-darwin.so +0 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.cpython-311-darwin.so +0 -0
- dbt/task/metadata.py +804 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.cpython-311-darwin.so +0 -0
- dbt/task/profile.py +1307 -0
- dbt/task/profile_serve.py +615 -0
- dbt/task/retract.py +438 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1387 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.cpython-311-darwin.so +0 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.cpython-311-darwin.so +0 -0
- dbt/task/target_sync.py +766 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +270 -0
- dvt_cli/__init__.py +72 -0
- dvt_core-0.58.6.dist-info/METADATA +288 -0
- dvt_core-0.58.6.dist-info/RECORD +324 -0
- dvt_core-0.58.6.dist-info/WHEEL +5 -0
- dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
- dvt_core-0.58.6.dist-info/top_level.txt +2 -0
dvt_cli/__init__.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DVT CLI Entry Point Package
|
|
3
|
+
|
|
4
|
+
This standalone package provides the entry point for the DVT command-line
|
|
5
|
+
interface. It's separate from the 'dbt' namespace to avoid conflicts with
|
|
6
|
+
dbt-core during the initial import.
|
|
7
|
+
|
|
8
|
+
Why this package exists:
|
|
9
|
+
-----------------------
|
|
10
|
+
DVT extends dbt-core with additional commands (compute, target, migrate).
|
|
11
|
+
However, dbt adapters (like dbt-postgres) depend on dbt-core, so both
|
|
12
|
+
dvt-core and dbt-core end up installed together. Both packages provide
|
|
13
|
+
the 'dbt' namespace, which causes import conflicts.
|
|
14
|
+
|
|
15
|
+
By using a separate 'dvt_cli' package for the entry point, we can
|
|
16
|
+
manipulate sys.path BEFORE any 'dbt' modules are imported, ensuring
|
|
17
|
+
DVT's extended dbt package takes precedence.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _ensure_dvt_precedence():
|
|
25
|
+
"""
|
|
26
|
+
Ensure DVT's dbt package takes precedence over dbt-core's version.
|
|
27
|
+
|
|
28
|
+
When both dvt-core and dbt-core are installed (dbt-core comes as a
|
|
29
|
+
dependency of dbt adapters like dbt-postgres), Python's namespace
|
|
30
|
+
package mechanism may load dbt-core's modules instead of DVT's.
|
|
31
|
+
|
|
32
|
+
This function manipulates sys.path to ensure DVT's path comes first,
|
|
33
|
+
guaranteeing that DVT's extended CLI (with compute, target, migrate
|
|
34
|
+
commands) is used instead of vanilla dbt-core.
|
|
35
|
+
"""
|
|
36
|
+
# Find where dvt-core's dbt package is located
|
|
37
|
+
# This file is at: <dvt-core>/dvt_cli/__init__.py
|
|
38
|
+
# So the package root (containing 'dbt/') is: <dvt-core>/
|
|
39
|
+
this_file = Path(__file__).resolve()
|
|
40
|
+
dvt_package_root = this_file.parent.parent
|
|
41
|
+
|
|
42
|
+
dvt_path = str(dvt_package_root)
|
|
43
|
+
|
|
44
|
+
# Remove dvt_path if it's already in sys.path (to move it to front)
|
|
45
|
+
if dvt_path in sys.path:
|
|
46
|
+
sys.path.remove(dvt_path)
|
|
47
|
+
|
|
48
|
+
# Insert at the beginning to take precedence over site-packages
|
|
49
|
+
sys.path.insert(0, dvt_path)
|
|
50
|
+
|
|
51
|
+
# Clear any already-imported dbt modules so they get re-imported
|
|
52
|
+
# from the correct location
|
|
53
|
+
modules_to_clear = [k for k in list(sys.modules.keys()) if k.startswith('dbt')]
|
|
54
|
+
for mod in modules_to_clear:
|
|
55
|
+
del sys.modules[mod]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def dvt_cli():
|
|
59
|
+
"""
|
|
60
|
+
DVT CLI entry point function.
|
|
61
|
+
|
|
62
|
+
This is the main entry point for the 'dvt' command. It ensures DVT's
|
|
63
|
+
version of the dbt package takes precedence, then runs the CLI.
|
|
64
|
+
|
|
65
|
+
Users who want backward compatibility with 'dbt' command can create
|
|
66
|
+
a shell alias: alias dbt=dvt
|
|
67
|
+
"""
|
|
68
|
+
_ensure_dvt_precedence()
|
|
69
|
+
|
|
70
|
+
# Now import the CLI - this will get DVT's version
|
|
71
|
+
from dbt.cli.main import cli
|
|
72
|
+
cli()
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dvt-core
|
|
3
|
+
Version: 0.58.6
|
|
4
|
+
Summary: DVT (Data Virtualization Tool) - Multi-source data federation and transformation with Spark-unified compute layer.
|
|
5
|
+
Author: DVT Contributors
|
|
6
|
+
Maintainer: DVT Contributors
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Project-URL: Homepage, https://github.com/dvt-core/dvt-core
|
|
9
|
+
Project-URL: Documentation, https://github.com/dvt-core/dvt-core#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/dvt-core/dvt-core.git
|
|
11
|
+
Project-URL: Issues, https://github.com/dvt-core/dvt-core/issues
|
|
12
|
+
Keywords: data,virtualization,federation,multi-source,dbt,analytics,transform,spark,jdbc,databricks
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: agate<1.10,>=1.7.0
|
|
25
|
+
Requires-Dist: Jinja2<4,>=3.1.3
|
|
26
|
+
Requires-Dist: mashumaro[msgpack]<3.15,>=3.9
|
|
27
|
+
Requires-Dist: click<9.0,>=8.0.2
|
|
28
|
+
Requires-Dist: jsonschema<5.0,>=4.19.1
|
|
29
|
+
Requires-Dist: networkx<4.0,>=2.3
|
|
30
|
+
Requires-Dist: protobuf<7.0,>=6.0
|
|
31
|
+
Requires-Dist: requests<3.0.0
|
|
32
|
+
Requires-Dist: snowplow-tracker<2.0,>=1.0.2
|
|
33
|
+
Requires-Dist: pathspec<0.13,>=0.9
|
|
34
|
+
Requires-Dist: sqlparse<0.6.0,>=0.5.0
|
|
35
|
+
Requires-Dist: dbt-extractor<=0.6,>=0.5.0
|
|
36
|
+
Requires-Dist: dbt-semantic-interfaces<0.10,>=0.9.0
|
|
37
|
+
Requires-Dist: dbt-common<2.0,>=1.27.0
|
|
38
|
+
Requires-Dist: dbt-adapters<2.0,>=1.15.5
|
|
39
|
+
Requires-Dist: dbt-protos<2.0,>=1.0.375
|
|
40
|
+
Requires-Dist: pydantic<3
|
|
41
|
+
Requires-Dist: packaging>20.9
|
|
42
|
+
Requires-Dist: pytz>=2015.7
|
|
43
|
+
Requires-Dist: pyyaml>=6.0
|
|
44
|
+
Requires-Dist: daff>=1.3.46
|
|
45
|
+
Requires-Dist: typing-extensions>=4.4
|
|
46
|
+
Requires-Dist: dbt-postgres<2.0,>=1.9.0
|
|
47
|
+
Requires-Dist: pyspark<5.0.0,>=3.5.0
|
|
48
|
+
Requires-Dist: duckdb>=0.9.0
|
|
49
|
+
Requires-Dist: rich>=13.0.0
|
|
50
|
+
Provides-Extra: databricks
|
|
51
|
+
Requires-Dist: databricks-connect>=13.0.0; extra == "databricks"
|
|
52
|
+
|
|
53
|
+
# DVT-Core: Data Virtualization Tool
|
|
54
|
+
|
|
55
|
+
**DVT-Core** is a multi-source data federation and transformation platform built on dbt-core architecture. Query and transform data across multiple heterogeneous data sources with intelligent query pushdown and compute layer integration.
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
- 🔄 **Multi-Source Queries**: Join data from PostgreSQL, Snowflake, BigQuery, MySQL, and more in a single query
|
|
60
|
+
- 🧠 **Intelligent Routing**: Automatically pushes down queries when possible, uses compute layer when needed
|
|
61
|
+
- ⚡ **JDBC Performance**: Spark JDBC-based data transfer for maximum efficiency
|
|
62
|
+
- 🔧 **Familiar Workflow**: Same dbt commands, same project structure, enhanced capabilities
|
|
63
|
+
- 🎯 **Smart Compute Selection**: Automatically chooses between Spark Local (embedded) or Spark Cluster (distributed)
|
|
64
|
+
- 🎛️ **Full Control**: Override everything with `target=` and `compute=` config options
|
|
65
|
+
- ✅ **100% Compatible**: Works with existing dbt projects and all dbt adapters
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
### Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install dvt-core
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Or with uv:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
uv pip install dvt-core
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Configure Multi-Connection Profile
|
|
82
|
+
|
|
83
|
+
```yaml
|
|
84
|
+
# profiles.yml
|
|
85
|
+
my_project:
|
|
86
|
+
connections:
|
|
87
|
+
postgres_prod:
|
|
88
|
+
type: postgres
|
|
89
|
+
host: prod-db.example.com
|
|
90
|
+
port: 5432
|
|
91
|
+
user: prod_user
|
|
92
|
+
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
|
93
|
+
database: analytics
|
|
94
|
+
schema: public
|
|
95
|
+
threads: 4
|
|
96
|
+
|
|
97
|
+
snowflake_warehouse:
|
|
98
|
+
type: snowflake
|
|
99
|
+
account: abc123
|
|
100
|
+
user: snow_user
|
|
101
|
+
password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
|
|
102
|
+
database: warehouse
|
|
103
|
+
schema: public
|
|
104
|
+
warehouse: compute_wh
|
|
105
|
+
threads: 8
|
|
106
|
+
|
|
107
|
+
default_target: snowflake_warehouse
|
|
108
|
+
threads: 4
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Define Sources with Connections
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
# models/sources.yml
|
|
115
|
+
sources:
|
|
116
|
+
- name: postgres_data
|
|
117
|
+
connection: postgres_prod
|
|
118
|
+
tables:
|
|
119
|
+
- name: orders
|
|
120
|
+
- name: customers
|
|
121
|
+
|
|
122
|
+
- name: snowflake_data
|
|
123
|
+
connection: snowflake_warehouse
|
|
124
|
+
tables:
|
|
125
|
+
- name: products
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Create Multi-Source Model
|
|
129
|
+
|
|
130
|
+
```sql
|
|
131
|
+
-- models/combined_sales.sql
|
|
132
|
+
{{ config(
|
|
133
|
+
materialized='table',
|
|
134
|
+
target='snowflake_warehouse', -- Optional: override materialization target
|
|
135
|
+
compute='spark-local' -- Optional: force compute engine
|
|
136
|
+
) }}
|
|
137
|
+
|
|
138
|
+
SELECT
|
|
139
|
+
o.order_id,
|
|
140
|
+
o.order_date,
|
|
141
|
+
c.customer_name,
|
|
142
|
+
p.product_name,
|
|
143
|
+
o.quantity * p.price as total_amount
|
|
144
|
+
FROM {{ source('postgres_data', 'orders') }} o
|
|
145
|
+
JOIN {{ source('postgres_data', 'customers') }} c
|
|
146
|
+
ON o.customer_id = c.customer_id
|
|
147
|
+
JOIN {{ source('snowflake_data', 'products') }} p
|
|
148
|
+
ON o.product_id = p.product_id
|
|
149
|
+
WHERE o.order_date >= '2024-01-01'
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Run DVT
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Standard dbt commands work
|
|
156
|
+
dvt run --select combined_sales
|
|
157
|
+
|
|
158
|
+
# DVT automatically:
|
|
159
|
+
# 1. Analyzes query (sees postgres + snowflake sources)
|
|
160
|
+
# 2. Determines federated execution needed
|
|
161
|
+
# 3. Selects compute engine (Spark Local or Cluster based on workload)
|
|
162
|
+
# 4. Loads data from postgres and snowflake via adapters
|
|
163
|
+
# 5. Executes join in compute engine
|
|
164
|
+
# 6. Materializes result to target (snowflake)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Architecture
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
┌─────────────┐ ┌──────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────────┐
|
|
171
|
+
│ Source DBs │────▶│ Adapters │────▶│ JDBC │────▶│ Compute │────▶│ Adapters │
|
|
172
|
+
│(Postgres, │ │ (Read) │ │ │ │ (Spark) │ │ (Write) │
|
|
173
|
+
│ MySQL, etc.)│ │ │ │ │ │ │ │ │
|
|
174
|
+
└─────────────┘ └──────────┘ └─────────────┘ └──────────┘ └──────────────┘
|
|
175
|
+
│
|
|
176
|
+
▼
|
|
177
|
+
┌──────────────┐
|
|
178
|
+
│ Target DB │
|
|
179
|
+
│ (Snowflake, │
|
|
180
|
+
│ BigQuery) │
|
|
181
|
+
└──────────────┘
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Execution Strategies
|
|
185
|
+
|
|
186
|
+
### Pushdown (Homogeneous Sources)
|
|
187
|
+
|
|
188
|
+
When all sources come from the same connection, DVT executes the query directly on the source database:
|
|
189
|
+
|
|
190
|
+
```sql
|
|
191
|
+
-- All sources from same connection → Execute on source database
|
|
192
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
193
|
+
JOIN {{ source('postgres', 'customers') }} USING (customer_id)
|
|
194
|
+
-- Executed directly in PostgreSQL (no data movement)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Federated (Heterogeneous Sources)
|
|
198
|
+
|
|
199
|
+
When sources come from different connections, DVT uses the compute layer:
|
|
200
|
+
|
|
201
|
+
```sql
|
|
202
|
+
-- Sources from different connections → Use compute layer
|
|
203
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
204
|
+
JOIN {{ source('mysql', 'products') }} USING (product_id)
|
|
205
|
+
-- Data loaded into Spark, join executed there
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## CLI Commands
|
|
209
|
+
|
|
210
|
+
### Standard dbt Commands
|
|
211
|
+
|
|
212
|
+
All dbt commands work unchanged:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
dvt run
|
|
216
|
+
dvt test
|
|
217
|
+
dvt build
|
|
218
|
+
dvt docs generate
|
|
219
|
+
dvt docs serve
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### DVT-Specific Commands
|
|
223
|
+
|
|
224
|
+
Manage external Spark clusters:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
# Register external Spark cluster
|
|
228
|
+
dvt compute register prod_cluster --master spark://master:7077
|
|
229
|
+
|
|
230
|
+
# List registered clusters
|
|
231
|
+
dvt compute list
|
|
232
|
+
|
|
233
|
+
# Remove cluster
|
|
234
|
+
dvt compute remove prod_cluster
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Configuration Options
|
|
238
|
+
|
|
239
|
+
### Model Configuration
|
|
240
|
+
|
|
241
|
+
```sql
|
|
242
|
+
{{ config(
|
|
243
|
+
materialized='table',
|
|
244
|
+
target='snowflake_analytics', -- Where to write results
|
|
245
|
+
compute='spark-local' -- Force Spark Local for processing
|
|
246
|
+
) }}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Smart Compute Selection
|
|
250
|
+
|
|
251
|
+
DVT automatically selects the optimal compute engine:
|
|
252
|
+
|
|
253
|
+
- **Spark Local**: Small/medium workloads (< 10GB), fast in-process execution
|
|
254
|
+
- **Spark Cluster**: Large workloads (> 10GB), distributed processing
|
|
255
|
+
|
|
256
|
+
Override with `compute='spark-local'` or `compute='spark-cluster'` in config.
|
|
257
|
+
|
|
258
|
+
## Key Principles
|
|
259
|
+
|
|
260
|
+
1. **Adapters for I/O only** - Read from sources, write to targets
|
|
261
|
+
2. **Compute engines for processing only** - Never materialize
|
|
262
|
+
3. **JDBC as universal data format** - Efficient transfer
|
|
263
|
+
4. **Backward compatibility** - All dbt projects work unchanged
|
|
264
|
+
5. **User configuration always wins** - Override any automatic decision
|
|
265
|
+
|
|
266
|
+
## Requirements
|
|
267
|
+
|
|
268
|
+
- Python 3.10+
|
|
269
|
+
- dbt-compatible adapters for your data sources
|
|
270
|
+
- PySpark (installed automatically)
|
|
271
|
+
|
|
272
|
+
## License
|
|
273
|
+
|
|
274
|
+
Apache License 2.0 (same as dbt-core)
|
|
275
|
+
|
|
276
|
+
## Acknowledgments
|
|
277
|
+
|
|
278
|
+
Built on [dbt-core](https://github.com/dbt-labs/dbt-core) architecture. DVT extends dbt's capabilities while preserving its excellent design patterns and developer experience.
|
|
279
|
+
|
|
280
|
+
## Links
|
|
281
|
+
|
|
282
|
+
- [Documentation](https://github.com/dvt-core/dvt-core#readme)
|
|
283
|
+
- [Issues](https://github.com/dvt-core/dvt-core/issues)
|
|
284
|
+
- [Repository](https://github.com/dvt-core/dvt-core)
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
**Transform data across any source, materialize to any target, with intelligent query optimization.**
|