dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
dvt_cli/__init__.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DVT CLI Entry Point Package
|
|
3
|
+
|
|
4
|
+
This standalone package provides the entry point for the DVT command-line
|
|
5
|
+
interface. It's separate from the 'dbt' namespace to avoid conflicts with
|
|
6
|
+
dbt-core during the initial import.
|
|
7
|
+
|
|
8
|
+
Why this package exists:
|
|
9
|
+
-----------------------
|
|
10
|
+
DVT extends dbt-core with additional commands (compute, target, migrate).
|
|
11
|
+
However, dbt adapters (like dbt-postgres) depend on dbt-core, so both
|
|
12
|
+
dvt-core and dbt-core end up installed together. Both packages provide
|
|
13
|
+
the 'dbt' namespace, which causes import conflicts.
|
|
14
|
+
|
|
15
|
+
The key issue is that both packages install files to the same location
|
|
16
|
+
(site-packages/dbt/). When dbt-core is installed after dvt-core, it
|
|
17
|
+
OVERWRITES DVT's files. This package detects this condition at runtime
|
|
18
|
+
and automatically restores DVT's files by reinstalling dvt-core.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import subprocess
|
|
22
|
+
import sys
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _check_dvt_cli_intact() -> bool:
|
|
27
|
+
"""
|
|
28
|
+
Check if DVT's CLI is intact by looking for DVT-specific commands.
|
|
29
|
+
|
|
30
|
+
Returns True if DVT's main.py is present (has --target-compute flag),
|
|
31
|
+
False if dbt-core has overwritten it.
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
# Import the cli module
|
|
35
|
+
from dbt.cli import main as cli_main
|
|
36
|
+
|
|
37
|
+
# Check if DVT's signature command exists
|
|
38
|
+
# DVT adds 'compute' group which dbt-core doesn't have
|
|
39
|
+
if hasattr(cli_main, 'cli'):
|
|
40
|
+
cli = cli_main.cli
|
|
41
|
+
# Check for DVT-specific command groups
|
|
42
|
+
# DVT adds: compute, target, migrate, profile, spark, java
|
|
43
|
+
if hasattr(cli, 'commands'):
|
|
44
|
+
commands = cli.commands
|
|
45
|
+
# 'compute' command is DVT-specific
|
|
46
|
+
return 'compute' in commands
|
|
47
|
+
else:
|
|
48
|
+
# Try invoking to check commands (older click pattern)
|
|
49
|
+
import click
|
|
50
|
+
if isinstance(cli, click.MultiCommand):
|
|
51
|
+
# Try to list commands
|
|
52
|
+
ctx = click.Context(cli)
|
|
53
|
+
cmds = cli.list_commands(ctx)
|
|
54
|
+
return 'compute' in cmds
|
|
55
|
+
return False
|
|
56
|
+
except Exception:
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _restore_dvt_files() -> bool:
|
|
61
|
+
"""
|
|
62
|
+
Restore DVT's files by reinstalling dvt-core.
|
|
63
|
+
|
|
64
|
+
This is called when dbt-core has overwritten DVT's files.
|
|
65
|
+
We reinstall dvt-core with --no-deps to restore only DVT's files
|
|
66
|
+
without affecting dbt-core (which adapters need for metadata).
|
|
67
|
+
|
|
68
|
+
Tries multiple methods: uv, pip command, pip module.
|
|
69
|
+
|
|
70
|
+
Returns True if restoration was successful.
|
|
71
|
+
"""
|
|
72
|
+
import shutil
|
|
73
|
+
|
|
74
|
+
print(" 🔧 DVT: Restoring DVT files (dbt-core overwrote them)...", file=sys.stderr)
|
|
75
|
+
|
|
76
|
+
# Method 1: Try uv (preferred - faster and more reliable)
|
|
77
|
+
uv_path = shutil.which("uv")
|
|
78
|
+
if uv_path:
|
|
79
|
+
try:
|
|
80
|
+
result = subprocess.run(
|
|
81
|
+
[uv_path, "pip", "install", "--python", sys.executable,
|
|
82
|
+
"--reinstall", "--no-deps", "dvt-core", "--quiet"],
|
|
83
|
+
capture_output=True,
|
|
84
|
+
text=True,
|
|
85
|
+
)
|
|
86
|
+
if result.returncode == 0:
|
|
87
|
+
print(" ✓ DVT files restored. Please re-run your command.", file=sys.stderr)
|
|
88
|
+
return True
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
# Method 2: Try pip command
|
|
93
|
+
pip_path = shutil.which("pip") or shutil.which("pip3")
|
|
94
|
+
if pip_path:
|
|
95
|
+
try:
|
|
96
|
+
result = subprocess.run(
|
|
97
|
+
[pip_path, "install", "--reinstall", "--no-deps",
|
|
98
|
+
"dvt-core", "--quiet"],
|
|
99
|
+
capture_output=True,
|
|
100
|
+
text=True,
|
|
101
|
+
)
|
|
102
|
+
if result.returncode == 0:
|
|
103
|
+
print(" ✓ DVT files restored. Please re-run your command.", file=sys.stderr)
|
|
104
|
+
return True
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
# Method 3: Try pip as Python module
|
|
109
|
+
try:
|
|
110
|
+
result = subprocess.run(
|
|
111
|
+
[sys.executable, "-m", "pip", "install", "--reinstall",
|
|
112
|
+
"--no-deps", "dvt-core", "--quiet"],
|
|
113
|
+
capture_output=True,
|
|
114
|
+
text=True,
|
|
115
|
+
)
|
|
116
|
+
if result.returncode == 0:
|
|
117
|
+
print(" ✓ DVT files restored. Please re-run your command.", file=sys.stderr)
|
|
118
|
+
return True
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
print(" ⚠ Failed to restore DVT files. Please run manually:", file=sys.stderr)
|
|
123
|
+
print(" pip install --reinstall --no-deps dvt-core", file=sys.stderr)
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _clear_dbt_modules():
|
|
128
|
+
"""Clear cached dbt modules so they get reimported."""
|
|
129
|
+
modules_to_clear = [k for k in list(sys.modules.keys()) if k.startswith('dbt')]
|
|
130
|
+
for mod in modules_to_clear:
|
|
131
|
+
del sys.modules[mod]
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def dvt_cli():
|
|
135
|
+
"""
|
|
136
|
+
DVT CLI entry point function.
|
|
137
|
+
|
|
138
|
+
This is the main entry point for the 'dvt' command. It automatically
|
|
139
|
+
detects if dbt-core has overwritten DVT's files and restores them.
|
|
140
|
+
|
|
141
|
+
Users who want backward compatibility with 'dbt' command can create
|
|
142
|
+
a shell alias: alias dbt=dvt
|
|
143
|
+
"""
|
|
144
|
+
# Check if DVT's CLI is intact
|
|
145
|
+
if not _check_dvt_cli_intact():
|
|
146
|
+
# DVT's files have been overwritten by dbt-core
|
|
147
|
+
# This happens when users install dbt adapters that depend on dbt-core
|
|
148
|
+
if _restore_dvt_files():
|
|
149
|
+
# Clear cached modules and exit - user needs to re-run
|
|
150
|
+
_clear_dbt_modules()
|
|
151
|
+
sys.exit(0)
|
|
152
|
+
else:
|
|
153
|
+
# Restoration failed, try to run anyway
|
|
154
|
+
print(" ⚠ Could not restore DVT files. Some features may not work.", file=sys.stderr)
|
|
155
|
+
|
|
156
|
+
# Import and run the CLI
|
|
157
|
+
from dbt.cli.main import cli
|
|
158
|
+
cli()
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dvt-core
|
|
3
|
+
Version: 0.59.0a51
|
|
4
|
+
Summary: DVT (Data Virtualization Tool) - Multi-source data federation and transformation with Spark-unified compute layer.
|
|
5
|
+
Author: DVT Contributors
|
|
6
|
+
Maintainer: DVT Contributors
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Project-URL: Homepage, https://github.com/dvt-core/dvt-core
|
|
9
|
+
Project-URL: Documentation, https://github.com/dvt-core/dvt-core#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/dvt-core/dvt-core.git
|
|
11
|
+
Project-URL: Issues, https://github.com/dvt-core/dvt-core/issues
|
|
12
|
+
Keywords: data,virtualization,federation,multi-source,dbt,analytics,transform,spark,jdbc,databricks
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: agate<1.10,>=1.7.0
|
|
25
|
+
Requires-Dist: Jinja2<4,>=3.1.3
|
|
26
|
+
Requires-Dist: mashumaro[msgpack]<3.15,>=3.9
|
|
27
|
+
Requires-Dist: click<9.0,>=8.0.2
|
|
28
|
+
Requires-Dist: jsonschema<5.0,>=4.19.1
|
|
29
|
+
Requires-Dist: networkx<4.0,>=2.3
|
|
30
|
+
Requires-Dist: protobuf<7.0,>=6.0
|
|
31
|
+
Requires-Dist: requests<3.0.0
|
|
32
|
+
Requires-Dist: snowplow-tracker<2.0,>=1.0.2
|
|
33
|
+
Requires-Dist: pathspec<0.13,>=0.9
|
|
34
|
+
Requires-Dist: sqlparse<0.6.0,>=0.5.0
|
|
35
|
+
Requires-Dist: dbt-extractor<=0.6,>=0.5.0
|
|
36
|
+
Requires-Dist: dbt-semantic-interfaces<0.10,>=0.9.0
|
|
37
|
+
Requires-Dist: dbt-common<2.0,>=1.27.0
|
|
38
|
+
Requires-Dist: dbt-adapters<2.0,>=1.15.5
|
|
39
|
+
Requires-Dist: dbt-protos<2.0,>=1.0.375
|
|
40
|
+
Requires-Dist: pydantic<3
|
|
41
|
+
Requires-Dist: packaging>20.9
|
|
42
|
+
Requires-Dist: pytz>=2015.7
|
|
43
|
+
Requires-Dist: pyyaml>=6.0
|
|
44
|
+
Requires-Dist: daff>=1.3.46
|
|
45
|
+
Requires-Dist: typing-extensions>=4.4
|
|
46
|
+
Requires-Dist: dbt-postgres<2.0,>=1.9.0
|
|
47
|
+
Requires-Dist: pyspark<5.0.0,>=3.5.0
|
|
48
|
+
Requires-Dist: duckdb>=0.9.0
|
|
49
|
+
Requires-Dist: rich>=13.0.0
|
|
50
|
+
Provides-Extra: databricks
|
|
51
|
+
Requires-Dist: databricks-connect>=13.0.0; extra == "databricks"
|
|
52
|
+
|
|
53
|
+
# DVT-Core: Data Virtualization Tool
|
|
54
|
+
|
|
55
|
+
**DVT-Core** is a multi-source data federation and transformation platform built on dbt-core architecture. Query and transform data across multiple heterogeneous data sources with intelligent query pushdown and compute layer integration.
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
- 🔄 **Multi-Source Queries**: Join data from PostgreSQL, Snowflake, BigQuery, MySQL, and more in a single query
|
|
60
|
+
- 🧠 **Intelligent Routing**: Automatically pushes down queries when possible, uses compute layer when needed
|
|
61
|
+
- ⚡ **JDBC Performance**: Spark JDBC-based data transfer for maximum efficiency
|
|
62
|
+
- 🔧 **Familiar Workflow**: Same dbt commands, same project structure, enhanced capabilities
|
|
63
|
+
- 🎯 **Smart Compute Selection**: Automatically chooses between Spark Local (embedded) or Spark Cluster (distributed)
|
|
64
|
+
- 🎛️ **Full Control**: Override everything with `target=` and `compute=` config options
|
|
65
|
+
- ✅ **100% Compatible**: Works with existing dbt projects and all dbt adapters
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
### Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install dvt-core
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Or with uv:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
uv pip install dvt-core
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Configure Multi-Connection Profile
|
|
82
|
+
|
|
83
|
+
```yaml
|
|
84
|
+
# profiles.yml
|
|
85
|
+
my_project:
|
|
86
|
+
connections:
|
|
87
|
+
postgres_prod:
|
|
88
|
+
type: postgres
|
|
89
|
+
host: prod-db.example.com
|
|
90
|
+
port: 5432
|
|
91
|
+
user: prod_user
|
|
92
|
+
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
|
93
|
+
database: analytics
|
|
94
|
+
schema: public
|
|
95
|
+
threads: 4
|
|
96
|
+
|
|
97
|
+
snowflake_warehouse:
|
|
98
|
+
type: snowflake
|
|
99
|
+
account: abc123
|
|
100
|
+
user: snow_user
|
|
101
|
+
password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
|
|
102
|
+
database: warehouse
|
|
103
|
+
schema: public
|
|
104
|
+
warehouse: compute_wh
|
|
105
|
+
threads: 8
|
|
106
|
+
|
|
107
|
+
default_target: snowflake_warehouse
|
|
108
|
+
threads: 4
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Define Sources with Connections
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
# models/sources.yml
|
|
115
|
+
sources:
|
|
116
|
+
- name: postgres_data
|
|
117
|
+
connection: postgres_prod
|
|
118
|
+
tables:
|
|
119
|
+
- name: orders
|
|
120
|
+
- name: customers
|
|
121
|
+
|
|
122
|
+
- name: snowflake_data
|
|
123
|
+
connection: snowflake_warehouse
|
|
124
|
+
tables:
|
|
125
|
+
- name: products
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Create Multi-Source Model
|
|
129
|
+
|
|
130
|
+
```sql
|
|
131
|
+
-- models/combined_sales.sql
|
|
132
|
+
{{ config(
|
|
133
|
+
materialized='table',
|
|
134
|
+
target='snowflake_warehouse', -- Optional: override materialization target
|
|
135
|
+
compute='spark-local' -- Optional: force compute engine
|
|
136
|
+
) }}
|
|
137
|
+
|
|
138
|
+
SELECT
|
|
139
|
+
o.order_id,
|
|
140
|
+
o.order_date,
|
|
141
|
+
c.customer_name,
|
|
142
|
+
p.product_name,
|
|
143
|
+
o.quantity * p.price as total_amount
|
|
144
|
+
FROM {{ source('postgres_data', 'orders') }} o
|
|
145
|
+
JOIN {{ source('postgres_data', 'customers') }} c
|
|
146
|
+
ON o.customer_id = c.customer_id
|
|
147
|
+
JOIN {{ source('snowflake_data', 'products') }} p
|
|
148
|
+
ON o.product_id = p.product_id
|
|
149
|
+
WHERE o.order_date >= '2024-01-01'
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Run DVT
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Standard dbt commands work
|
|
156
|
+
dvt run --select combined_sales
|
|
157
|
+
|
|
158
|
+
# DVT automatically:
|
|
159
|
+
# 1. Analyzes query (sees postgres + snowflake sources)
|
|
160
|
+
# 2. Determines federated execution needed
|
|
161
|
+
# 3. Selects compute engine (Spark Local or Cluster based on workload)
|
|
162
|
+
# 4. Loads data from postgres and snowflake via adapters
|
|
163
|
+
# 5. Executes join in compute engine
|
|
164
|
+
# 6. Materializes result to target (snowflake)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Architecture
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
┌─────────────┐ ┌──────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────────┐
|
|
171
|
+
│ Source DBs │────▶│ Adapters │────▶│ JDBC │────▶│ Compute │────▶│ Adapters │
|
|
172
|
+
│(Postgres, │ │ (Read) │ │ │ │ (Spark) │ │ (Write) │
|
|
173
|
+
│ MySQL, etc.)│ │ │ │ │ │ │ │ │
|
|
174
|
+
└─────────────┘ └──────────┘ └─────────────┘ └──────────┘ └──────────────┘
|
|
175
|
+
│
|
|
176
|
+
▼
|
|
177
|
+
┌──────────────┐
|
|
178
|
+
│ Target DB │
|
|
179
|
+
│ (Snowflake, │
|
|
180
|
+
│ BigQuery) │
|
|
181
|
+
└──────────────┘
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Execution Strategies
|
|
185
|
+
|
|
186
|
+
### Pushdown (Homogeneous Sources)
|
|
187
|
+
|
|
188
|
+
When all sources come from the same connection, DVT executes the query directly on the source database:
|
|
189
|
+
|
|
190
|
+
```sql
|
|
191
|
+
-- All sources from same connection → Execute on source database
|
|
192
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
193
|
+
JOIN {{ source('postgres', 'customers') }} USING (customer_id)
|
|
194
|
+
-- Executed directly in PostgreSQL (no data movement)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Federated (Heterogeneous Sources)
|
|
198
|
+
|
|
199
|
+
When sources come from different connections, DVT uses the compute layer:
|
|
200
|
+
|
|
201
|
+
```sql
|
|
202
|
+
-- Sources from different connections → Use compute layer
|
|
203
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
204
|
+
JOIN {{ source('mysql', 'products') }} USING (product_id)
|
|
205
|
+
-- Data loaded into Spark, join executed there
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## CLI Commands
|
|
209
|
+
|
|
210
|
+
### Standard dbt Commands
|
|
211
|
+
|
|
212
|
+
All dbt commands work unchanged:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
dvt run
|
|
216
|
+
dvt test
|
|
217
|
+
dvt build
|
|
218
|
+
dvt docs generate
|
|
219
|
+
dvt docs serve
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### DVT-Specific Commands
|
|
223
|
+
|
|
224
|
+
Manage external Spark clusters:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
# Register external Spark cluster
|
|
228
|
+
dvt compute register prod_cluster --master spark://master:7077
|
|
229
|
+
|
|
230
|
+
# List registered clusters
|
|
231
|
+
dvt compute list
|
|
232
|
+
|
|
233
|
+
# Remove cluster
|
|
234
|
+
dvt compute remove prod_cluster
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Configuration Options
|
|
238
|
+
|
|
239
|
+
### Model Configuration
|
|
240
|
+
|
|
241
|
+
```sql
|
|
242
|
+
{{ config(
|
|
243
|
+
materialized='table',
|
|
244
|
+
target='snowflake_analytics', -- Where to write results
|
|
245
|
+
compute='spark-local' -- Force Spark Local for processing
|
|
246
|
+
) }}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Smart Compute Selection
|
|
250
|
+
|
|
251
|
+
DVT automatically selects the optimal compute engine:
|
|
252
|
+
|
|
253
|
+
- **Spark Local**: Small/medium workloads (< 10GB), fast in-process execution
|
|
254
|
+
- **Spark Cluster**: Large workloads (> 10GB), distributed processing
|
|
255
|
+
|
|
256
|
+
Override with `compute='spark-local'` or `compute='spark-cluster'` in config.
|
|
257
|
+
|
|
258
|
+
## Key Principles
|
|
259
|
+
|
|
260
|
+
1. **Adapters for I/O only** - Read from sources, write to targets
|
|
261
|
+
2. **Compute engines for processing only** - Never materialize
|
|
262
|
+
3. **JDBC as universal data format** - Efficient transfer
|
|
263
|
+
4. **Backward compatibility** - All dbt projects work unchanged
|
|
264
|
+
5. **User configuration always wins** - Override any automatic decision
|
|
265
|
+
|
|
266
|
+
## Requirements
|
|
267
|
+
|
|
268
|
+
- Python 3.10+
|
|
269
|
+
- dbt-compatible adapters for your data sources
|
|
270
|
+
- PySpark (installed automatically)
|
|
271
|
+
|
|
272
|
+
## License
|
|
273
|
+
|
|
274
|
+
Apache License 2.0 (same as dbt-core)
|
|
275
|
+
|
|
276
|
+
## Acknowledgments
|
|
277
|
+
|
|
278
|
+
Built on [dbt-core](https://github.com/dbt-labs/dbt-core) architecture. DVT extends dbt's capabilities while preserving its excellent design patterns and developer experience.
|
|
279
|
+
|
|
280
|
+
## Links
|
|
281
|
+
|
|
282
|
+
- [Documentation](https://github.com/dvt-core/dvt-core#readme)
|
|
283
|
+
- [Issues](https://github.com/dvt-core/dvt-core/issues)
|
|
284
|
+
- [Repository](https://github.com/dvt-core/dvt-core)
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
**Transform data across any source, materialize to any target, with intelligent query optimization.**
|