dvt-core 0.59.0a38__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dvt_core-0.59.0a38/MANIFEST.in +189 -0
- dvt_core-0.59.0a38/PKG-INFO +288 -0
- dvt_core-0.59.0a38/README.md +236 -0
- dvt_core-0.59.0a38/dbt/__init__.py +7 -0
- dvt_core-0.59.0a38/dbt/_pydantic_shim.py +26 -0
- dvt_core-0.59.0a38/dbt/artifacts/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/artifacts/exceptions/__init__.py +1 -0
- dvt_core-0.59.0a38/dbt/artifacts/exceptions/schemas.py +31 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/__init__.py +116 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/base.py +67 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/types.py +93 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/analysis.py +10 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/catalog.py +23 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/components.py +274 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/config.py +277 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/documentation.py +11 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/exposure.py +51 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/function.py +52 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/generic_test.py +31 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/group.py +21 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/hook.py +11 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/macro.py +29 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/metric.py +172 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/model.py +145 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/owner.py +10 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/saved_query.py +111 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/seed.py +41 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/singular_test.py +14 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/snapshot.py +91 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/source_definition.py +84 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt_core-0.59.0a38/dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/base.py +191 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/batch_results.py +24 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/results.py +147 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/run/__init__.py +2 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/run/v5/run.py +184 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt_core-0.59.0a38/dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt_core-0.59.0a38/dbt/artifacts/utils/validation.py +153 -0
- dvt_core-0.59.0a38/dbt/cli/__init__.py +1 -0
- dvt_core-0.59.0a38/dbt/cli/context.py +17 -0
- dvt_core-0.59.0a38/dbt/cli/exceptions.py +57 -0
- dvt_core-0.59.0a38/dbt/cli/flags.py +560 -0
- dvt_core-0.59.0a38/dbt/cli/main.py +2660 -0
- dvt_core-0.59.0a38/dbt/cli/option_types.py +121 -0
- dvt_core-0.59.0a38/dbt/cli/options.py +80 -0
- dvt_core-0.59.0a38/dbt/cli/params.py +844 -0
- dvt_core-0.59.0a38/dbt/cli/requires.py +490 -0
- dvt_core-0.59.0a38/dbt/cli/resolvers.py +60 -0
- dvt_core-0.59.0a38/dbt/cli/types.py +40 -0
- dvt_core-0.59.0a38/dbt/clients/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/clients/checked_load.py +83 -0
- dvt_core-0.59.0a38/dbt/clients/git.py +164 -0
- dvt_core-0.59.0a38/dbt/clients/jinja.py +206 -0
- dvt_core-0.59.0a38/dbt/clients/jinja_static.py +245 -0
- dvt_core-0.59.0a38/dbt/clients/registry.py +192 -0
- dvt_core-0.59.0a38/dbt/clients/yaml_helper.py +68 -0
- dvt_core-0.59.0a38/dbt/compilation.py +876 -0
- dvt_core-0.59.0a38/dbt/compute/__init__.py +14 -0
- dvt_core-0.59.0a38/dbt/compute/engines/__init__.py +12 -0
- dvt_core-0.59.0a38/dbt/compute/engines/spark_engine.py +642 -0
- dvt_core-0.59.0a38/dbt/compute/federated_executor.py +1080 -0
- dvt_core-0.59.0a38/dbt/compute/filter_pushdown.py +273 -0
- dvt_core-0.59.0a38/dbt/compute/jar_provisioning.py +273 -0
- dvt_core-0.59.0a38/dbt/compute/java_compat.py +689 -0
- dvt_core-0.59.0a38/dbt/compute/jdbc_utils.py +1252 -0
- dvt_core-0.59.0a38/dbt/compute/metadata/__init__.py +40 -0
- dvt_core-0.59.0a38/dbt/compute/metadata/adapters_registry.py +370 -0
- dvt_core-0.59.0a38/dbt/compute/metadata/registry.py +674 -0
- dvt_core-0.59.0a38/dbt/compute/metadata/store.py +1499 -0
- dvt_core-0.59.0a38/dbt/compute/smart_selector.py +377 -0
- dvt_core-0.59.0a38/dbt/compute/spark_logger.py +272 -0
- dvt_core-0.59.0a38/dbt/compute/strategies/__init__.py +55 -0
- dvt_core-0.59.0a38/dbt/compute/strategies/base.py +165 -0
- dvt_core-0.59.0a38/dbt/compute/strategies/dataproc.py +207 -0
- dvt_core-0.59.0a38/dbt/compute/strategies/emr.py +203 -0
- dvt_core-0.59.0a38/dbt/compute/strategies/local.py +472 -0
- dvt_core-0.59.0a38/dbt/compute/strategies/standalone.py +262 -0
- dvt_core-0.59.0a38/dbt/config/__init__.py +4 -0
- dvt_core-0.59.0a38/dbt/config/catalogs.py +94 -0
- dvt_core-0.59.0a38/dbt/config/compute.py +513 -0
- dvt_core-0.59.0a38/dbt/config/dvt_profile.py +342 -0
- dvt_core-0.59.0a38/dbt/config/profile.py +422 -0
- dvt_core-0.59.0a38/dbt/config/project.py +888 -0
- dvt_core-0.59.0a38/dbt/config/project_utils.py +48 -0
- dvt_core-0.59.0a38/dbt/config/renderer.py +231 -0
- dvt_core-0.59.0a38/dbt/config/runtime.py +553 -0
- dvt_core-0.59.0a38/dbt/config/selectors.py +208 -0
- dvt_core-0.59.0a38/dbt/config/utils.py +77 -0
- dvt_core-0.59.0a38/dbt/constants.py +28 -0
- dvt_core-0.59.0a38/dbt/context/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/context/base.py +745 -0
- dvt_core-0.59.0a38/dbt/context/configured.py +135 -0
- dvt_core-0.59.0a38/dbt/context/context_config.py +382 -0
- dvt_core-0.59.0a38/dbt/context/docs.py +82 -0
- dvt_core-0.59.0a38/dbt/context/exceptions_jinja.py +178 -0
- dvt_core-0.59.0a38/dbt/context/macro_resolver.py +195 -0
- dvt_core-0.59.0a38/dbt/context/macros.py +171 -0
- dvt_core-0.59.0a38/dbt/context/manifest.py +72 -0
- dvt_core-0.59.0a38/dbt/context/providers.py +2249 -0
- dvt_core-0.59.0a38/dbt/context/query_header.py +13 -0
- dvt_core-0.59.0a38/dbt/context/secret.py +58 -0
- dvt_core-0.59.0a38/dbt/context/target.py +74 -0
- dvt_core-0.59.0a38/dbt/contracts/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/contracts/files.py +413 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/manifest.py +1904 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/metrics.py +97 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/model_config.py +70 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/node_args.py +42 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/nodes.py +1806 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/semantic_manifest.py +232 -0
- dvt_core-0.59.0a38/dbt/contracts/graph/unparsed.py +811 -0
- dvt_core-0.59.0a38/dbt/contracts/project.py +419 -0
- dvt_core-0.59.0a38/dbt/contracts/results.py +53 -0
- dvt_core-0.59.0a38/dbt/contracts/selection.py +23 -0
- dvt_core-0.59.0a38/dbt/contracts/sql.py +85 -0
- dvt_core-0.59.0a38/dbt/contracts/state.py +68 -0
- dvt_core-0.59.0a38/dbt/contracts/util.py +46 -0
- dvt_core-0.59.0a38/dbt/deprecations.py +348 -0
- dvt_core-0.59.0a38/dbt/deps/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/deps/base.py +152 -0
- dvt_core-0.59.0a38/dbt/deps/git.py +195 -0
- dvt_core-0.59.0a38/dbt/deps/local.py +79 -0
- dvt_core-0.59.0a38/dbt/deps/registry.py +130 -0
- dvt_core-0.59.0a38/dbt/deps/resolver.py +149 -0
- dvt_core-0.59.0a38/dbt/deps/tarball.py +120 -0
- dvt_core-0.59.0a38/dbt/docs/source/_ext/dbt_click.py +119 -0
- dvt_core-0.59.0a38/dbt/docs/source/conf.py +32 -0
- dvt_core-0.59.0a38/dbt/env_vars.py +64 -0
- dvt_core-0.59.0a38/dbt/event_time/event_time.py +40 -0
- dvt_core-0.59.0a38/dbt/event_time/sample_window.py +60 -0
- dvt_core-0.59.0a38/dbt/events/__init__.py +15 -0
- dvt_core-0.59.0a38/dbt/events/base_types.py +36 -0
- dvt_core-0.59.0a38/dbt/events/core_types_pb2.py +2 -0
- dvt_core-0.59.0a38/dbt/events/logging.py +108 -0
- dvt_core-0.59.0a38/dbt/events/types.py +2516 -0
- dvt_core-0.59.0a38/dbt/exceptions.py +1486 -0
- dvt_core-0.59.0a38/dbt/flags.py +89 -0
- dvt_core-0.59.0a38/dbt/graph/__init__.py +11 -0
- dvt_core-0.59.0a38/dbt/graph/cli.py +249 -0
- dvt_core-0.59.0a38/dbt/graph/graph.py +172 -0
- dvt_core-0.59.0a38/dbt/graph/queue.py +214 -0
- dvt_core-0.59.0a38/dbt/graph/selector.py +374 -0
- dvt_core-0.59.0a38/dbt/graph/selector_methods.py +975 -0
- dvt_core-0.59.0a38/dbt/graph/selector_spec.py +222 -0
- dvt_core-0.59.0a38/dbt/graph/thread_pool.py +18 -0
- dvt_core-0.59.0a38/dbt/hooks.py +21 -0
- dvt_core-0.59.0a38/dbt/include/README.md +49 -0
- dvt_core-0.59.0a38/dbt/include/__init__.py +3 -0
- dvt_core-0.59.0a38/dbt/include/data/adapters_registry.duckdb +0 -0
- dvt_core-0.59.0a38/dbt/include/data/build_comprehensive_registry.py +1254 -0
- dvt_core-0.59.0a38/dbt/include/data/build_registry.py +242 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/adapter_queries.csv +33 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/syntax_rules.csv +9 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dvt_core-0.59.0a38/dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/README.md +15 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/__init__.py +3 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dvt_core-0.59.0a38/dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dvt_core-0.59.0a38/dbt/internal_deprecations.py +26 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/__init__.py +3 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/jsonschemas.py +309 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/project/0.0.110.json +4717 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/project/0.0.85.json +2015 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt_core-0.59.0a38/dbt/jsonschemas/resources/latest.json +6773 -0
- dvt_core-0.59.0a38/dbt/links.py +4 -0
- dvt_core-0.59.0a38/dbt/materializations/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/materializations/incremental/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/materializations/incremental/microbatch.py +236 -0
- dvt_core-0.59.0a38/dbt/mp_context.py +8 -0
- dvt_core-0.59.0a38/dbt/node_types.py +37 -0
- dvt_core-0.59.0a38/dbt/parser/__init__.py +23 -0
- dvt_core-0.59.0a38/dbt/parser/analysis.py +21 -0
- dvt_core-0.59.0a38/dbt/parser/base.py +548 -0
- dvt_core-0.59.0a38/dbt/parser/common.py +266 -0
- dvt_core-0.59.0a38/dbt/parser/docs.py +52 -0
- dvt_core-0.59.0a38/dbt/parser/fixtures.py +51 -0
- dvt_core-0.59.0a38/dbt/parser/functions.py +30 -0
- dvt_core-0.59.0a38/dbt/parser/generic_test.py +100 -0
- dvt_core-0.59.0a38/dbt/parser/generic_test_builders.py +333 -0
- dvt_core-0.59.0a38/dbt/parser/hooks.py +122 -0
- dvt_core-0.59.0a38/dbt/parser/macros.py +137 -0
- dvt_core-0.59.0a38/dbt/parser/manifest.py +2208 -0
- dvt_core-0.59.0a38/dbt/parser/models.py +573 -0
- dvt_core-0.59.0a38/dbt/parser/partial.py +1178 -0
- dvt_core-0.59.0a38/dbt/parser/read_files.py +445 -0
- dvt_core-0.59.0a38/dbt/parser/schema_generic_tests.py +422 -0
- dvt_core-0.59.0a38/dbt/parser/schema_renderer.py +111 -0
- dvt_core-0.59.0a38/dbt/parser/schema_yaml_readers.py +935 -0
- dvt_core-0.59.0a38/dbt/parser/schemas.py +1466 -0
- dvt_core-0.59.0a38/dbt/parser/search.py +149 -0
- dvt_core-0.59.0a38/dbt/parser/seeds.py +28 -0
- dvt_core-0.59.0a38/dbt/parser/singular_test.py +20 -0
- dvt_core-0.59.0a38/dbt/parser/snapshots.py +44 -0
- dvt_core-0.59.0a38/dbt/parser/sources.py +558 -0
- dvt_core-0.59.0a38/dbt/parser/sql.py +62 -0
- dvt_core-0.59.0a38/dbt/parser/unit_tests.py +621 -0
- dvt_core-0.59.0a38/dbt/plugins/__init__.py +20 -0
- dvt_core-0.59.0a38/dbt/plugins/contracts.py +9 -0
- dvt_core-0.59.0a38/dbt/plugins/exceptions.py +2 -0
- dvt_core-0.59.0a38/dbt/plugins/manager.py +163 -0
- dvt_core-0.59.0a38/dbt/plugins/manifest.py +21 -0
- dvt_core-0.59.0a38/dbt/profiler.py +20 -0
- dvt_core-0.59.0a38/dbt/py.typed +1 -0
- dvt_core-0.59.0a38/dbt/query_analyzer.py +410 -0
- dvt_core-0.59.0a38/dbt/runners/__init__.py +2 -0
- dvt_core-0.59.0a38/dbt/runners/exposure_runner.py +7 -0
- dvt_core-0.59.0a38/dbt/runners/no_op_runner.py +45 -0
- dvt_core-0.59.0a38/dbt/runners/saved_query_runner.py +7 -0
- dvt_core-0.59.0a38/dbt/selected_resources.py +8 -0
- dvt_core-0.59.0a38/dbt/task/__init__.py +0 -0
- dvt_core-0.59.0a38/dbt/task/base.py +506 -0
- dvt_core-0.59.0a38/dbt/task/build.py +197 -0
- dvt_core-0.59.0a38/dbt/task/clean.py +56 -0
- dvt_core-0.59.0a38/dbt/task/clone.py +161 -0
- dvt_core-0.59.0a38/dbt/task/compile.py +150 -0
- dvt_core-0.59.0a38/dbt/task/compute.py +458 -0
- dvt_core-0.59.0a38/dbt/task/debug.py +513 -0
- dvt_core-0.59.0a38/dbt/task/deps.py +280 -0
- dvt_core-0.59.0a38/dbt/task/docs/__init__.py +3 -0
- dvt_core-0.59.0a38/dbt/task/docs/api/__init__.py +23 -0
- dvt_core-0.59.0a38/dbt/task/docs/api/catalog.py +204 -0
- dvt_core-0.59.0a38/dbt/task/docs/api/lineage.py +234 -0
- dvt_core-0.59.0a38/dbt/task/docs/api/profile.py +204 -0
- dvt_core-0.59.0a38/dbt/task/docs/api/spark.py +186 -0
- dvt_core-0.59.0a38/dbt/task/docs/generate.py +947 -0
- dvt_core-0.59.0a38/dbt/task/docs/index.html +250 -0
- dvt_core-0.59.0a38/dbt/task/docs/serve.py +174 -0
- dvt_core-0.59.0a38/dbt/task/dvt_output.py +509 -0
- dvt_core-0.59.0a38/dbt/task/dvt_run.py +282 -0
- dvt_core-0.59.0a38/dbt/task/dvt_seed.py +669 -0
- dvt_core-0.59.0a38/dbt/task/freshness.py +322 -0
- dvt_core-0.59.0a38/dbt/task/function.py +121 -0
- dvt_core-0.59.0a38/dbt/task/group_lookup.py +46 -0
- dvt_core-0.59.0a38/dbt/task/init.py +1000 -0
- dvt_core-0.59.0a38/dbt/task/java.py +316 -0
- dvt_core-0.59.0a38/dbt/task/list.py +236 -0
- dvt_core-0.59.0a38/dbt/task/metadata.py +804 -0
- dvt_core-0.59.0a38/dbt/task/migrate.py +714 -0
- dvt_core-0.59.0a38/dbt/task/printer.py +175 -0
- dvt_core-0.59.0a38/dbt/task/profile.py +1489 -0
- dvt_core-0.59.0a38/dbt/task/profile_serve.py +662 -0
- dvt_core-0.59.0a38/dbt/task/retract.py +441 -0
- dvt_core-0.59.0a38/dbt/task/retry.py +175 -0
- dvt_core-0.59.0a38/dbt/task/run.py +1514 -0
- dvt_core-0.59.0a38/dbt/task/run_operation.py +141 -0
- dvt_core-0.59.0a38/dbt/task/runnable.py +758 -0
- dvt_core-0.59.0a38/dbt/task/seed.py +103 -0
- dvt_core-0.59.0a38/dbt/task/show.py +149 -0
- dvt_core-0.59.0a38/dbt/task/snapshot.py +56 -0
- dvt_core-0.59.0a38/dbt/task/spark.py +414 -0
- dvt_core-0.59.0a38/dbt/task/sql.py +110 -0
- dvt_core-0.59.0a38/dbt/task/target_sync.py +767 -0
- dvt_core-0.59.0a38/dbt/task/test.py +464 -0
- dvt_core-0.59.0a38/dbt/tests/fixtures/__init__.py +1 -0
- dvt_core-0.59.0a38/dbt/tests/fixtures/project.py +620 -0
- dvt_core-0.59.0a38/dbt/tests/util.py +651 -0
- dvt_core-0.59.0a38/dbt/tracking.py +529 -0
- dvt_core-0.59.0a38/dbt/utils/__init__.py +3 -0
- dvt_core-0.59.0a38/dbt/utils/artifact_upload.py +151 -0
- dvt_core-0.59.0a38/dbt/utils/utils.py +408 -0
- dvt_core-0.59.0a38/dbt/version.py +270 -0
- dvt_core-0.59.0a38/dvt_cli/__init__.py +72 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/PKG-INFO +288 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/SOURCES.txt +567 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/dependency_links.txt +1 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/entry_points.txt +2 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/not-zip-safe +1 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/requires.txt +29 -0
- dvt_core-0.59.0a38/dvt_core.egg-info/top_level.txt +2 -0
- dvt_core-0.59.0a38/pyproject.toml +152 -0
- dvt_core-0.59.0a38/setup.cfg +4 -0
- dvt_core-0.59.0a38/setup.py +126 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# DVT-Core MANIFEST.in - Closed Source Binary Distribution
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# This file controls what goes into the wheel package.
|
|
5
|
+
# DVT v0.52.0: Binary distribution with Cython-compiled modules.
|
|
6
|
+
# =============================================================================
|
|
7
|
+
|
|
8
|
+
# =============================================================================
|
|
9
|
+
# INCLUDE: Compiled binary extensions
|
|
10
|
+
# =============================================================================
|
|
11
|
+
# These are the Cython-compiled modules (.so on Linux, .dylib on macOS)
|
|
12
|
+
|
|
13
|
+
# Root dbt module compiled files
|
|
14
|
+
recursive-include dbt *.so
|
|
15
|
+
recursive-include dbt *.dylib
|
|
16
|
+
recursive-include dbt *.pyd
|
|
17
|
+
|
|
18
|
+
# =============================================================================
|
|
19
|
+
# INCLUDE: Required Python files (cannot be compiled)
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# These must remain as Python source due to:
|
|
22
|
+
# - Entry points (sys.path manipulation)
|
|
23
|
+
# - Click decorators (dynamic)
|
|
24
|
+
# - Namespace packages (pkgutil.extend_path)
|
|
25
|
+
# - Abstract base classes (polymorphism)
|
|
26
|
+
# - Dynamic imports (plugin patterns)
|
|
27
|
+
# - Manifest introspection (runtime reflection)
|
|
28
|
+
|
|
29
|
+
# Entry point package
|
|
30
|
+
include dvt_cli/__init__.py
|
|
31
|
+
include dvt_cli/*.py
|
|
32
|
+
|
|
33
|
+
# CLI modules (Click decorators)
|
|
34
|
+
include dbt/cli/__init__.py
|
|
35
|
+
include dbt/cli/*.py
|
|
36
|
+
|
|
37
|
+
# Namespace package roots
|
|
38
|
+
include dbt/__init__.py
|
|
39
|
+
include dbt/compute/__init__.py
|
|
40
|
+
include dbt/config/__init__.py
|
|
41
|
+
|
|
42
|
+
# Strategy files (abstract + dynamic dispatch)
|
|
43
|
+
include dbt/compute/strategies/__init__.py
|
|
44
|
+
include dbt/compute/strategies/*.py
|
|
45
|
+
|
|
46
|
+
# Engine files (multi-strategy dispatch)
|
|
47
|
+
include dbt/compute/engines/__init__.py
|
|
48
|
+
include dbt/compute/engines/*.py
|
|
49
|
+
|
|
50
|
+
# Metadata module (registry, store)
|
|
51
|
+
include dbt/compute/metadata/__init__.py
|
|
52
|
+
include dbt/compute/metadata/*.py
|
|
53
|
+
|
|
54
|
+
# Federated executor (manifest introspection)
|
|
55
|
+
include dbt/compute/federated_executor.py
|
|
56
|
+
|
|
57
|
+
# Task files (adapter factory, CLI)
|
|
58
|
+
include dbt/task/__init__.py
|
|
59
|
+
include dbt/task/*.py
|
|
60
|
+
|
|
61
|
+
# =============================================================================
|
|
62
|
+
# INCLUDE: dbt-core inherited files (cannot be compiled)
|
|
63
|
+
# =============================================================================
|
|
64
|
+
# These are part of the dbt-core base that DVT inherits
|
|
65
|
+
|
|
66
|
+
recursive-include dbt/artifacts *.py
|
|
67
|
+
recursive-include dbt/clients *.py
|
|
68
|
+
recursive-include dbt/context *.py
|
|
69
|
+
recursive-include dbt/contracts *.py
|
|
70
|
+
recursive-include dbt/deps *.py
|
|
71
|
+
recursive-include dbt/events *.py
|
|
72
|
+
recursive-include dbt/event_time *.py
|
|
73
|
+
recursive-include dbt/graph *.py
|
|
74
|
+
recursive-include dbt/materializations *.py
|
|
75
|
+
recursive-include dbt/parser *.py
|
|
76
|
+
recursive-include dbt/plugins *.py
|
|
77
|
+
recursive-include dbt/runners *.py
|
|
78
|
+
recursive-include dbt/tests *.py
|
|
79
|
+
recursive-include dbt/utils *.py
|
|
80
|
+
recursive-include dbt/docs *.py
|
|
81
|
+
|
|
82
|
+
# Top-level dbt modules
|
|
83
|
+
include dbt/*.py
|
|
84
|
+
|
|
85
|
+
# =============================================================================
|
|
86
|
+
# INCLUDE: Static assets and templates
|
|
87
|
+
# =============================================================================
|
|
88
|
+
|
|
89
|
+
# Starter project templates (Jinja SQL, YAML configs)
|
|
90
|
+
# NOTE: Using PLACEHOLDER instead of .gitkeep because setuptools doesn't include hidden files
|
|
91
|
+
recursive-include dbt/include *.py *.sql *.yml *.html *.md *.csv *.duckdb PLACEHOLDER
|
|
92
|
+
|
|
93
|
+
# Documentation server HTML
|
|
94
|
+
recursive-include dbt/task/docs *.html
|
|
95
|
+
|
|
96
|
+
# JSON schemas for validation
|
|
97
|
+
recursive-include dbt/jsonschemas *.json
|
|
98
|
+
|
|
99
|
+
# Type hints marker
|
|
100
|
+
include dbt/py.typed
|
|
101
|
+
|
|
102
|
+
# =============================================================================
|
|
103
|
+
# INCLUDE: License and documentation
|
|
104
|
+
# =============================================================================
|
|
105
|
+
|
|
106
|
+
include LICENSE
|
|
107
|
+
include LICENSE_DVT
|
|
108
|
+
include NOTICES
|
|
109
|
+
include README.md
|
|
110
|
+
|
|
111
|
+
# =============================================================================
|
|
112
|
+
# GLOBAL EXCLUDES
|
|
113
|
+
# =============================================================================
|
|
114
|
+
|
|
115
|
+
# Exclude Python cache and compiled files
|
|
116
|
+
global-exclude __pycache__
|
|
117
|
+
global-exclude *.pyc
|
|
118
|
+
global-exclude *.pyo
|
|
119
|
+
global-exclude .DS_Store
|
|
120
|
+
global-exclude *.egg-info
|
|
121
|
+
|
|
122
|
+
# Exclude development files
|
|
123
|
+
global-exclude .git*
|
|
124
|
+
global-exclude .pre-commit*
|
|
125
|
+
global-exclude tox.ini
|
|
126
|
+
global-exclude pytest.ini
|
|
127
|
+
global-exclude .coverage
|
|
128
|
+
global-exclude *.log
|
|
129
|
+
|
|
130
|
+
# =============================================================================
|
|
131
|
+
# EXCLUDE: DVT compiled modules (MUST be after includes to take precedence)
|
|
132
|
+
# =============================================================================
|
|
133
|
+
# v0.57.0: Exclude ONLY modules that are Cython-compiled
|
|
134
|
+
# IMPORTANT: Package __init__.py files are NOT compiled (they need to remain
|
|
135
|
+
# as .py files so Python recognizes directories as packages).
|
|
136
|
+
# These excludes MUST come AFTER the includes to override them
|
|
137
|
+
|
|
138
|
+
# Core Query Analysis
|
|
139
|
+
exclude dbt/query_analyzer.py
|
|
140
|
+
|
|
141
|
+
# Configuration
|
|
142
|
+
exclude dbt/config/dvt_profile.py
|
|
143
|
+
exclude dbt/config/compute.py
|
|
144
|
+
|
|
145
|
+
# Task Commands (DVT-specific CLI)
|
|
146
|
+
exclude dbt/task/metadata.py
|
|
147
|
+
exclude dbt/task/target_sync.py
|
|
148
|
+
exclude dbt/task/compute.py
|
|
149
|
+
exclude dbt/task/profile.py
|
|
150
|
+
exclude dbt/task/java.py
|
|
151
|
+
exclude dbt/task/spark.py
|
|
152
|
+
exclude dbt/task/init.py
|
|
153
|
+
exclude dbt/task/docs/serve.py
|
|
154
|
+
# NOTE: dbt/task/docs/api/__init__.py is NOT excluded (must remain as .py)
|
|
155
|
+
exclude dbt/task/docs/api/catalog.py
|
|
156
|
+
exclude dbt/task/docs/api/lineage.py
|
|
157
|
+
exclude dbt/task/docs/api/profile.py
|
|
158
|
+
exclude dbt/task/docs/api/spark.py
|
|
159
|
+
|
|
160
|
+
# Compute Layer (Base)
|
|
161
|
+
# NOTE: dbt/compute/__init__.py is NOT excluded (must remain as .py)
|
|
162
|
+
exclude dbt/compute/federated_executor.py
|
|
163
|
+
exclude dbt/compute/smart_selector.py
|
|
164
|
+
exclude dbt/compute/filter_pushdown.py
|
|
165
|
+
exclude dbt/compute/jdbc_utils.py
|
|
166
|
+
exclude dbt/compute/jar_provisioning.py
|
|
167
|
+
exclude dbt/compute/java_compat.py
|
|
168
|
+
|
|
169
|
+
# Compute Engines
|
|
170
|
+
# NOTE: dbt/compute/engines/__init__.py is NOT excluded (must remain as .py)
|
|
171
|
+
exclude dbt/compute/engines/spark_engine.py
|
|
172
|
+
|
|
173
|
+
# Compute Strategies
|
|
174
|
+
# NOTE: dbt/compute/strategies/__init__.py is NOT excluded (must remain as .py)
|
|
175
|
+
exclude dbt/compute/strategies/base.py
|
|
176
|
+
exclude dbt/compute/strategies/local.py
|
|
177
|
+
exclude dbt/compute/strategies/standalone.py
|
|
178
|
+
exclude dbt/compute/strategies/emr.py
|
|
179
|
+
exclude dbt/compute/strategies/dataproc.py
|
|
180
|
+
|
|
181
|
+
# Metadata Architecture
|
|
182
|
+
# NOTE: dbt/compute/metadata/__init__.py is NOT excluded (must remain as .py)
|
|
183
|
+
exclude dbt/compute/metadata/adapters_registry.py
|
|
184
|
+
exclude dbt/compute/metadata/registry.py
|
|
185
|
+
exclude dbt/compute/metadata/store.py
|
|
186
|
+
|
|
187
|
+
# Exclude Cython build artifacts (should not be in wheel)
|
|
188
|
+
global-exclude *.c
|
|
189
|
+
global-exclude *.h
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dvt-core
|
|
3
|
+
Version: 0.59.0a38
|
|
4
|
+
Summary: DVT (Data Virtualization Tool) - Multi-source data federation and transformation with Spark-unified compute layer.
|
|
5
|
+
Author: DVT Contributors
|
|
6
|
+
Maintainer: DVT Contributors
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Project-URL: Homepage, https://github.com/dvt-core/dvt-core
|
|
9
|
+
Project-URL: Documentation, https://github.com/dvt-core/dvt-core#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/dvt-core/dvt-core.git
|
|
11
|
+
Project-URL: Issues, https://github.com/dvt-core/dvt-core/issues
|
|
12
|
+
Keywords: data,virtualization,federation,multi-source,dbt,analytics,transform,spark,jdbc,databricks
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: agate<1.10,>=1.7.0
|
|
25
|
+
Requires-Dist: Jinja2<4,>=3.1.3
|
|
26
|
+
Requires-Dist: mashumaro[msgpack]<3.15,>=3.9
|
|
27
|
+
Requires-Dist: click<9.0,>=8.0.2
|
|
28
|
+
Requires-Dist: jsonschema<5.0,>=4.19.1
|
|
29
|
+
Requires-Dist: networkx<4.0,>=2.3
|
|
30
|
+
Requires-Dist: protobuf<7.0,>=6.0
|
|
31
|
+
Requires-Dist: requests<3.0.0
|
|
32
|
+
Requires-Dist: snowplow-tracker<2.0,>=1.0.2
|
|
33
|
+
Requires-Dist: pathspec<0.13,>=0.9
|
|
34
|
+
Requires-Dist: sqlparse<0.6.0,>=0.5.0
|
|
35
|
+
Requires-Dist: dbt-extractor<=0.6,>=0.5.0
|
|
36
|
+
Requires-Dist: dbt-semantic-interfaces<0.10,>=0.9.0
|
|
37
|
+
Requires-Dist: dbt-common<2.0,>=1.27.0
|
|
38
|
+
Requires-Dist: dbt-adapters<2.0,>=1.15.5
|
|
39
|
+
Requires-Dist: dbt-protos<2.0,>=1.0.375
|
|
40
|
+
Requires-Dist: pydantic<3
|
|
41
|
+
Requires-Dist: packaging>20.9
|
|
42
|
+
Requires-Dist: pytz>=2015.7
|
|
43
|
+
Requires-Dist: pyyaml>=6.0
|
|
44
|
+
Requires-Dist: daff>=1.3.46
|
|
45
|
+
Requires-Dist: typing-extensions>=4.4
|
|
46
|
+
Requires-Dist: dbt-postgres<2.0,>=1.9.0
|
|
47
|
+
Requires-Dist: pyspark<5.0.0,>=3.5.0
|
|
48
|
+
Requires-Dist: duckdb>=0.9.0
|
|
49
|
+
Requires-Dist: rich>=13.0.0
|
|
50
|
+
Provides-Extra: databricks
|
|
51
|
+
Requires-Dist: databricks-connect>=13.0.0; extra == "databricks"
|
|
52
|
+
|
|
53
|
+
# DVT-Core: Data Virtualization Tool
|
|
54
|
+
|
|
55
|
+
**DVT-Core** is a multi-source data federation and transformation platform built on dbt-core architecture. Query and transform data across multiple heterogeneous data sources with intelligent query pushdown and compute layer integration.
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
- 🔄 **Multi-Source Queries**: Join data from PostgreSQL, Snowflake, BigQuery, MySQL, and more in a single query
|
|
60
|
+
- 🧠 **Intelligent Routing**: Automatically pushes down queries when possible, uses compute layer when needed
|
|
61
|
+
- ⚡ **JDBC Performance**: Spark JDBC-based data transfer for maximum efficiency
|
|
62
|
+
- 🔧 **Familiar Workflow**: Same dbt commands, same project structure, enhanced capabilities
|
|
63
|
+
- 🎯 **Smart Compute Selection**: Automatically chooses between Spark Local (embedded) or Spark Cluster (distributed)
|
|
64
|
+
- 🎛️ **Full Control**: Override everything with `target=` and `compute=` config options
|
|
65
|
+
- ✅ **100% Compatible**: Works with existing dbt projects and all dbt adapters
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
### Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install dvt-core
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Or with uv:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
uv pip install dvt-core
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Configure Multi-Connection Profile
|
|
82
|
+
|
|
83
|
+
```yaml
|
|
84
|
+
# profiles.yml
|
|
85
|
+
my_project:
|
|
86
|
+
connections:
|
|
87
|
+
postgres_prod:
|
|
88
|
+
type: postgres
|
|
89
|
+
host: prod-db.example.com
|
|
90
|
+
port: 5432
|
|
91
|
+
user: prod_user
|
|
92
|
+
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
|
93
|
+
database: analytics
|
|
94
|
+
schema: public
|
|
95
|
+
threads: 4
|
|
96
|
+
|
|
97
|
+
snowflake_warehouse:
|
|
98
|
+
type: snowflake
|
|
99
|
+
account: abc123
|
|
100
|
+
user: snow_user
|
|
101
|
+
password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
|
|
102
|
+
database: warehouse
|
|
103
|
+
schema: public
|
|
104
|
+
warehouse: compute_wh
|
|
105
|
+
threads: 8
|
|
106
|
+
|
|
107
|
+
default_target: snowflake_warehouse
|
|
108
|
+
threads: 4
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Define Sources with Connections
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
# models/sources.yml
|
|
115
|
+
sources:
|
|
116
|
+
- name: postgres_data
|
|
117
|
+
connection: postgres_prod
|
|
118
|
+
tables:
|
|
119
|
+
- name: orders
|
|
120
|
+
- name: customers
|
|
121
|
+
|
|
122
|
+
- name: snowflake_data
|
|
123
|
+
connection: snowflake_warehouse
|
|
124
|
+
tables:
|
|
125
|
+
- name: products
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Create Multi-Source Model
|
|
129
|
+
|
|
130
|
+
```sql
|
|
131
|
+
-- models/combined_sales.sql
|
|
132
|
+
{{ config(
|
|
133
|
+
materialized='table',
|
|
134
|
+
target='snowflake_warehouse', -- Optional: override materialization target
|
|
135
|
+
compute='spark-local' -- Optional: force compute engine
|
|
136
|
+
) }}
|
|
137
|
+
|
|
138
|
+
SELECT
|
|
139
|
+
o.order_id,
|
|
140
|
+
o.order_date,
|
|
141
|
+
c.customer_name,
|
|
142
|
+
p.product_name,
|
|
143
|
+
o.quantity * p.price as total_amount
|
|
144
|
+
FROM {{ source('postgres_data', 'orders') }} o
|
|
145
|
+
JOIN {{ source('postgres_data', 'customers') }} c
|
|
146
|
+
ON o.customer_id = c.customer_id
|
|
147
|
+
JOIN {{ source('snowflake_data', 'products') }} p
|
|
148
|
+
ON o.product_id = p.product_id
|
|
149
|
+
WHERE o.order_date >= '2024-01-01'
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Run DVT
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Standard dbt commands work
|
|
156
|
+
dvt run --select combined_sales
|
|
157
|
+
|
|
158
|
+
# DVT automatically:
|
|
159
|
+
# 1. Analyzes query (sees postgres + snowflake sources)
|
|
160
|
+
# 2. Determines federated execution needed
|
|
161
|
+
# 3. Selects compute engine (Spark Local or Cluster based on workload)
|
|
162
|
+
# 4. Loads data from postgres and snowflake via adapters
|
|
163
|
+
# 5. Executes join in compute engine
|
|
164
|
+
# 6. Materializes result to target (snowflake)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Architecture
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
┌─────────────┐ ┌──────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────────┐
|
|
171
|
+
│ Source DBs │────▶│ Adapters │────▶│ JDBC │────▶│ Compute │────▶│ Adapters │
|
|
172
|
+
│(Postgres, │ │ (Read) │ │ │ │ (Spark) │ │ (Write) │
|
|
173
|
+
│ MySQL, etc.)│ │ │ │ │ │ │ │ │
|
|
174
|
+
└─────────────┘ └──────────┘ └─────────────┘ └──────────┘ └──────────────┘
|
|
175
|
+
│
|
|
176
|
+
▼
|
|
177
|
+
┌──────────────┐
|
|
178
|
+
│ Target DB │
|
|
179
|
+
│ (Snowflake, │
|
|
180
|
+
│ BigQuery) │
|
|
181
|
+
└──────────────┘
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Execution Strategies
|
|
185
|
+
|
|
186
|
+
### Pushdown (Homogeneous Sources)
|
|
187
|
+
|
|
188
|
+
When all sources come from the same connection, DVT executes the query directly on the source database:
|
|
189
|
+
|
|
190
|
+
```sql
|
|
191
|
+
-- All sources from same connection → Execute on source database
|
|
192
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
193
|
+
JOIN {{ source('postgres', 'customers') }} USING (customer_id)
|
|
194
|
+
-- Executed directly in PostgreSQL (no data movement)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Federated (Heterogeneous Sources)
|
|
198
|
+
|
|
199
|
+
When sources come from different connections, DVT uses the compute layer:
|
|
200
|
+
|
|
201
|
+
```sql
|
|
202
|
+
-- Sources from different connections → Use compute layer
|
|
203
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
204
|
+
JOIN {{ source('mysql', 'products') }} USING (product_id)
|
|
205
|
+
-- Data loaded into Spark, join executed there
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## CLI Commands
|
|
209
|
+
|
|
210
|
+
### Standard dbt Commands
|
|
211
|
+
|
|
212
|
+
All dbt commands work unchanged:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
dvt run
|
|
216
|
+
dvt test
|
|
217
|
+
dvt build
|
|
218
|
+
dvt docs generate
|
|
219
|
+
dvt docs serve
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### DVT-Specific Commands
|
|
223
|
+
|
|
224
|
+
Manage external Spark clusters:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
# Register external Spark cluster
|
|
228
|
+
dvt compute register prod_cluster --master spark://master:7077
|
|
229
|
+
|
|
230
|
+
# List registered clusters
|
|
231
|
+
dvt compute list
|
|
232
|
+
|
|
233
|
+
# Remove cluster
|
|
234
|
+
dvt compute remove prod_cluster
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Configuration Options
|
|
238
|
+
|
|
239
|
+
### Model Configuration
|
|
240
|
+
|
|
241
|
+
```sql
|
|
242
|
+
{{ config(
|
|
243
|
+
materialized='table',
|
|
244
|
+
target='snowflake_analytics', -- Where to write results
|
|
245
|
+
compute='spark-local' -- Force Spark Local for processing
|
|
246
|
+
) }}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Smart Compute Selection
|
|
250
|
+
|
|
251
|
+
DVT automatically selects the optimal compute engine:
|
|
252
|
+
|
|
253
|
+
- **Spark Local**: Small/medium workloads (< 10GB), fast in-process execution
|
|
254
|
+
- **Spark Cluster**: Large workloads (> 10GB), distributed processing
|
|
255
|
+
|
|
256
|
+
Override with `compute='spark-local'` or `compute='spark-cluster'` in config.
|
|
257
|
+
|
|
258
|
+
## Key Principles
|
|
259
|
+
|
|
260
|
+
1. **Adapters for I/O only** - Read from sources, write to targets
|
|
261
|
+
2. **Compute engines for processing only** - Never materialize
|
|
262
|
+
3. **JDBC as universal data format** - Efficient transfer
|
|
263
|
+
4. **Backward compatibility** - All dbt projects work unchanged
|
|
264
|
+
5. **User configuration always wins** - Override any automatic decision
|
|
265
|
+
|
|
266
|
+
## Requirements
|
|
267
|
+
|
|
268
|
+
- Python 3.10+
|
|
269
|
+
- dbt-compatible adapters for your data sources
|
|
270
|
+
- PySpark (installed automatically)
|
|
271
|
+
|
|
272
|
+
## License
|
|
273
|
+
|
|
274
|
+
Apache License 2.0 (same as dbt-core)
|
|
275
|
+
|
|
276
|
+
## Acknowledgments
|
|
277
|
+
|
|
278
|
+
Built on [dbt-core](https://github.com/dbt-labs/dbt-core) architecture. DVT extends dbt's capabilities while preserving its excellent design patterns and developer experience.
|
|
279
|
+
|
|
280
|
+
## Links
|
|
281
|
+
|
|
282
|
+
- [Documentation](https://github.com/dvt-core/dvt-core#readme)
|
|
283
|
+
- [Issues](https://github.com/dvt-core/dvt-core/issues)
|
|
284
|
+
- [Repository](https://github.com/dvt-core/dvt-core)
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
**Transform data across any source, materialize to any target, with intelligent query optimization.**
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# DVT-Core: Data Virtualization Tool
|
|
2
|
+
|
|
3
|
+
**DVT-Core** is a multi-source data federation and transformation platform built on dbt-core architecture. Query and transform data across multiple heterogeneous data sources with intelligent query pushdown and compute layer integration.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🔄 **Multi-Source Queries**: Join data from PostgreSQL, Snowflake, BigQuery, MySQL, and more in a single query
|
|
8
|
+
- 🧠 **Intelligent Routing**: Automatically pushes down queries when possible, uses compute layer when needed
|
|
9
|
+
- ⚡ **JDBC Performance**: Spark JDBC-based data transfer for maximum efficiency
|
|
10
|
+
- 🔧 **Familiar Workflow**: Same dbt commands, same project structure, enhanced capabilities
|
|
11
|
+
- 🎯 **Smart Compute Selection**: Automatically chooses between Spark Local (embedded) or Spark Cluster (distributed)
|
|
12
|
+
- 🎛️ **Full Control**: Override everything with `target=` and `compute=` config options
|
|
13
|
+
- ✅ **100% Compatible**: Works with existing dbt projects and all dbt adapters
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install dvt-core
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or with uv:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
uv pip install dvt-core
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Configure Multi-Connection Profile
|
|
30
|
+
|
|
31
|
+
```yaml
|
|
32
|
+
# profiles.yml
|
|
33
|
+
my_project:
|
|
34
|
+
connections:
|
|
35
|
+
postgres_prod:
|
|
36
|
+
type: postgres
|
|
37
|
+
host: prod-db.example.com
|
|
38
|
+
port: 5432
|
|
39
|
+
user: prod_user
|
|
40
|
+
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
|
41
|
+
database: analytics
|
|
42
|
+
schema: public
|
|
43
|
+
threads: 4
|
|
44
|
+
|
|
45
|
+
snowflake_warehouse:
|
|
46
|
+
type: snowflake
|
|
47
|
+
account: abc123
|
|
48
|
+
user: snow_user
|
|
49
|
+
password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
|
|
50
|
+
database: warehouse
|
|
51
|
+
schema: public
|
|
52
|
+
warehouse: compute_wh
|
|
53
|
+
threads: 8
|
|
54
|
+
|
|
55
|
+
default_target: snowflake_warehouse
|
|
56
|
+
threads: 4
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Define Sources with Connections
|
|
60
|
+
|
|
61
|
+
```yaml
|
|
62
|
+
# models/sources.yml
|
|
63
|
+
sources:
|
|
64
|
+
- name: postgres_data
|
|
65
|
+
connection: postgres_prod
|
|
66
|
+
tables:
|
|
67
|
+
- name: orders
|
|
68
|
+
- name: customers
|
|
69
|
+
|
|
70
|
+
- name: snowflake_data
|
|
71
|
+
connection: snowflake_warehouse
|
|
72
|
+
tables:
|
|
73
|
+
- name: products
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Create Multi-Source Model
|
|
77
|
+
|
|
78
|
+
```sql
|
|
79
|
+
-- models/combined_sales.sql
|
|
80
|
+
{{ config(
|
|
81
|
+
materialized='table',
|
|
82
|
+
target='snowflake_warehouse', -- Optional: override materialization target
|
|
83
|
+
compute='spark-local' -- Optional: force compute engine
|
|
84
|
+
) }}
|
|
85
|
+
|
|
86
|
+
SELECT
|
|
87
|
+
o.order_id,
|
|
88
|
+
o.order_date,
|
|
89
|
+
c.customer_name,
|
|
90
|
+
p.product_name,
|
|
91
|
+
o.quantity * p.price as total_amount
|
|
92
|
+
FROM {{ source('postgres_data', 'orders') }} o
|
|
93
|
+
JOIN {{ source('postgres_data', 'customers') }} c
|
|
94
|
+
ON o.customer_id = c.customer_id
|
|
95
|
+
JOIN {{ source('snowflake_data', 'products') }} p
|
|
96
|
+
ON o.product_id = p.product_id
|
|
97
|
+
WHERE o.order_date >= '2024-01-01'
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Run DVT
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Standard dbt commands work
|
|
104
|
+
dvt run --select combined_sales
|
|
105
|
+
|
|
106
|
+
# DVT automatically:
|
|
107
|
+
# 1. Analyzes query (sees postgres + snowflake sources)
|
|
108
|
+
# 2. Determines federated execution needed
|
|
109
|
+
# 3. Selects compute engine (Spark Local or Cluster based on workload)
|
|
110
|
+
# 4. Loads data from postgres and snowflake via adapters
|
|
111
|
+
# 5. Executes join in compute engine
|
|
112
|
+
# 6. Materializes result to target (snowflake)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Architecture
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
┌─────────────┐ ┌──────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────────┐
|
|
119
|
+
│ Source DBs │────▶│ Adapters │────▶│ JDBC │────▶│ Compute │────▶│ Adapters │
|
|
120
|
+
│(Postgres, │ │ (Read) │ │ │ │ (Spark) │ │ (Write) │
|
|
121
|
+
│ MySQL, etc.)│ │ │ │ │ │ │ │ │
|
|
122
|
+
└─────────────┘ └──────────┘ └─────────────┘ └──────────┘ └──────────────┘
|
|
123
|
+
│
|
|
124
|
+
▼
|
|
125
|
+
┌──────────────┐
|
|
126
|
+
│ Target DB │
|
|
127
|
+
│ (Snowflake, │
|
|
128
|
+
│ BigQuery) │
|
|
129
|
+
└──────────────┘
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Execution Strategies
|
|
133
|
+
|
|
134
|
+
### Pushdown (Homogeneous Sources)
|
|
135
|
+
|
|
136
|
+
When all sources come from the same connection, DVT executes the query directly on the source database:
|
|
137
|
+
|
|
138
|
+
```sql
|
|
139
|
+
-- All sources from same connection → Execute on source database
|
|
140
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
141
|
+
JOIN {{ source('postgres', 'customers') }} USING (customer_id)
|
|
142
|
+
-- Executed directly in PostgreSQL (no data movement)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Federated (Heterogeneous Sources)
|
|
146
|
+
|
|
147
|
+
When sources come from different connections, DVT uses the compute layer:
|
|
148
|
+
|
|
149
|
+
```sql
|
|
150
|
+
-- Sources from different connections → Use compute layer
|
|
151
|
+
SELECT * FROM {{ source('postgres', 'orders') }}
|
|
152
|
+
JOIN {{ source('mysql', 'products') }} USING (product_id)
|
|
153
|
+
-- Data loaded into Spark, join executed there
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## CLI Commands
|
|
157
|
+
|
|
158
|
+
### Standard dbt Commands
|
|
159
|
+
|
|
160
|
+
All dbt commands work unchanged:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
dvt run
|
|
164
|
+
dvt test
|
|
165
|
+
dvt build
|
|
166
|
+
dvt docs generate
|
|
167
|
+
dvt docs serve
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### DVT-Specific Commands
|
|
171
|
+
|
|
172
|
+
Manage external Spark clusters:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
# Register external Spark cluster
|
|
176
|
+
dvt compute register prod_cluster --master spark://master:7077
|
|
177
|
+
|
|
178
|
+
# List registered clusters
|
|
179
|
+
dvt compute list
|
|
180
|
+
|
|
181
|
+
# Remove cluster
|
|
182
|
+
dvt compute remove prod_cluster
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Configuration Options
|
|
186
|
+
|
|
187
|
+
### Model Configuration
|
|
188
|
+
|
|
189
|
+
```sql
|
|
190
|
+
{{ config(
|
|
191
|
+
materialized='table',
|
|
192
|
+
target='snowflake_analytics', -- Where to write results
|
|
193
|
+
compute='spark-local' -- Force Spark Local for processing
|
|
194
|
+
) }}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Smart Compute Selection
|
|
198
|
+
|
|
199
|
+
DVT automatically selects the optimal compute engine:
|
|
200
|
+
|
|
201
|
+
- **Spark Local**: Small/medium workloads (< 10GB), fast in-process execution
|
|
202
|
+
- **Spark Cluster**: Large workloads (> 10GB), distributed processing
|
|
203
|
+
|
|
204
|
+
Override with `compute='spark-local'` or `compute='spark-cluster'` in config.
|
|
205
|
+
|
|
206
|
+
## Key Principles
|
|
207
|
+
|
|
208
|
+
1. **Adapters for I/O only** - Read from sources, write to targets
|
|
209
|
+
2. **Compute engines for processing only** - Never materialize
|
|
210
|
+
3. **JDBC as universal data format** - Efficient transfer
|
|
211
|
+
4. **Backward compatibility** - All dbt projects work unchanged
|
|
212
|
+
5. **User configuration always wins** - Override any automatic decision
|
|
213
|
+
|
|
214
|
+
## Requirements
|
|
215
|
+
|
|
216
|
+
- Python 3.10+
|
|
217
|
+
- dbt-compatible adapters for your data sources
|
|
218
|
+
- PySpark (installed automatically)
|
|
219
|
+
|
|
220
|
+
## License
|
|
221
|
+
|
|
222
|
+
Apache License 2.0 (same as dbt-core)
|
|
223
|
+
|
|
224
|
+
## Acknowledgments
|
|
225
|
+
|
|
226
|
+
Built on [dbt-core](https://github.com/dbt-labs/dbt-core) architecture. DVT extends dbt's capabilities while preserving its excellent design patterns and developer experience.
|
|
227
|
+
|
|
228
|
+
## Links
|
|
229
|
+
|
|
230
|
+
- [Documentation](https://github.com/dvt-core/dvt-core#readme)
|
|
231
|
+
- [Issues](https://github.com/dvt-core/dvt-core/issues)
|
|
232
|
+
- [Repository](https://github.com/dvt-core/dvt-core)
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
**Transform data across any source, materialize to any target, with intelligent query optimization.**
|