dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
dbt/task/metadata.py
ADDED
|
@@ -0,0 +1,804 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# DVT Metadata Task
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Manages metadata for DVT projects - sources and materialized models.
|
|
5
|
+
#
|
|
6
|
+
# Commands:
|
|
7
|
+
# dvt metadata reset # Clear all metadata from store
|
|
8
|
+
# dvt metadata snapshot # Capture metadata for sources + models
|
|
9
|
+
# dvt metadata export # Display metadata in CLI (Rich table)
|
|
10
|
+
# dvt metadata export-csv <file> # Export to CSV file
|
|
11
|
+
# dvt metadata export-json <file> # Export to JSON file
|
|
12
|
+
#
|
|
13
|
+
# DVT v0.57.0: Replaces dvt snap with enhanced metadata management
|
|
14
|
+
# =============================================================================
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Dict, List, Optional, Any, Tuple
|
|
20
|
+
|
|
21
|
+
from dbt.task.base import BaseTask
|
|
22
|
+
from dbt.flags import get_flags
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MetadataTask(BaseTask):
|
|
26
|
+
"""
|
|
27
|
+
Task to manage DVT project metadata.
|
|
28
|
+
|
|
29
|
+
This task handles:
|
|
30
|
+
1. Capturing metadata from source definitions (sources.yml)
|
|
31
|
+
2. Capturing metadata from materialized models
|
|
32
|
+
3. Exporting metadata to various formats
|
|
33
|
+
4. Clearing/resetting the metadata store
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, args):
|
|
37
|
+
super().__init__(args)
|
|
38
|
+
self._metadata_store = None
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def metadata_store(self):
|
|
42
|
+
"""Lazy load the metadata store."""
|
|
43
|
+
if self._metadata_store is None:
|
|
44
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
45
|
+
project_root = Path(get_flags().PROJECT_DIR or ".")
|
|
46
|
+
self._metadata_store = ProjectMetadataStore(project_root)
|
|
47
|
+
return self._metadata_store
|
|
48
|
+
|
|
49
|
+
def run(self):
|
|
50
|
+
"""Execute the metadata task based on subcommand."""
|
|
51
|
+
subcommand = getattr(self.args, 'subcommand', 'snapshot')
|
|
52
|
+
|
|
53
|
+
if subcommand == 'reset':
|
|
54
|
+
return self.run_reset()
|
|
55
|
+
elif subcommand == 'snapshot':
|
|
56
|
+
return self.run_snapshot()
|
|
57
|
+
elif subcommand == 'export':
|
|
58
|
+
return self.run_export()
|
|
59
|
+
elif subcommand == 'export-csv':
|
|
60
|
+
return self.run_export_csv()
|
|
61
|
+
elif subcommand == 'export-json':
|
|
62
|
+
return self.run_export_json()
|
|
63
|
+
else:
|
|
64
|
+
# Default to snapshot
|
|
65
|
+
return self.run_snapshot()
|
|
66
|
+
|
|
67
|
+
# =========================================================================
|
|
68
|
+
# Reset Subcommand
|
|
69
|
+
# =========================================================================
|
|
70
|
+
|
|
71
|
+
def run_reset(self):
|
|
72
|
+
"""Clear all metadata from the store."""
|
|
73
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
74
|
+
|
|
75
|
+
project_dir = getattr(self.args, 'project_dir', None)
|
|
76
|
+
project_root = Path(project_dir) if project_dir else Path(".")
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
from rich.console import Console
|
|
80
|
+
console = Console()
|
|
81
|
+
use_rich = True
|
|
82
|
+
except ImportError:
|
|
83
|
+
use_rich = False
|
|
84
|
+
|
|
85
|
+
dvt_dir = project_root / ".dvt"
|
|
86
|
+
if not dvt_dir.exists():
|
|
87
|
+
msg = "No .dvt directory found. Nothing to reset."
|
|
88
|
+
if use_rich:
|
|
89
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
90
|
+
else:
|
|
91
|
+
print(msg)
|
|
92
|
+
return True, True
|
|
93
|
+
|
|
94
|
+
with ProjectMetadataStore(project_root) as store:
|
|
95
|
+
store.initialize()
|
|
96
|
+
store.clear_all_metadata()
|
|
97
|
+
|
|
98
|
+
msg = "Metadata store cleared successfully."
|
|
99
|
+
if use_rich:
|
|
100
|
+
console.print(f"[green]✓[/green] {msg}")
|
|
101
|
+
else:
|
|
102
|
+
print(f"✓ {msg}")
|
|
103
|
+
|
|
104
|
+
return True, True
|
|
105
|
+
|
|
106
|
+
# =========================================================================
|
|
107
|
+
# Snapshot Subcommand
|
|
108
|
+
# =========================================================================
|
|
109
|
+
|
|
110
|
+
def run_snapshot(self):
|
|
111
|
+
"""Capture metadata for all sources and materialized models."""
|
|
112
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
113
|
+
from dbt.compute.metadata.store import TableMetadata, ColumnMetadata
|
|
114
|
+
from dbt.compute.metadata.registry import TypeRegistry
|
|
115
|
+
|
|
116
|
+
project_dir = getattr(self.args, 'project_dir', None)
|
|
117
|
+
project_root = Path(project_dir) if project_dir else Path(".")
|
|
118
|
+
|
|
119
|
+
# Try to use Rich for beautiful output
|
|
120
|
+
try:
|
|
121
|
+
from rich.console import Console
|
|
122
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
123
|
+
from rich.table import Table
|
|
124
|
+
from rich.panel import Panel
|
|
125
|
+
console = Console()
|
|
126
|
+
use_rich = True
|
|
127
|
+
except ImportError:
|
|
128
|
+
use_rich = False
|
|
129
|
+
|
|
130
|
+
# Ensure .dvt directory exists
|
|
131
|
+
dvt_dir = project_root / ".dvt"
|
|
132
|
+
if not dvt_dir.exists():
|
|
133
|
+
dvt_dir.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
if use_rich:
|
|
135
|
+
console.print(f"[cyan]Created {dvt_dir}[/cyan]")
|
|
136
|
+
else:
|
|
137
|
+
print(f"Created {dvt_dir}")
|
|
138
|
+
|
|
139
|
+
# Header
|
|
140
|
+
if use_rich:
|
|
141
|
+
console.print(Panel.fit(
|
|
142
|
+
"[bold cyan]DVT Metadata Snapshot[/bold cyan]\n"
|
|
143
|
+
"Capturing metadata for sources and models",
|
|
144
|
+
border_style="cyan"
|
|
145
|
+
))
|
|
146
|
+
console.print()
|
|
147
|
+
else:
|
|
148
|
+
print("DVT Metadata Snapshot")
|
|
149
|
+
print("=" * 40)
|
|
150
|
+
print()
|
|
151
|
+
|
|
152
|
+
# Load sources and models
|
|
153
|
+
sources = self._load_sources(project_root)
|
|
154
|
+
models = self._load_models(project_root)
|
|
155
|
+
|
|
156
|
+
if not sources and not models:
|
|
157
|
+
msg = "No sources or models found in project."
|
|
158
|
+
if use_rich:
|
|
159
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
160
|
+
else:
|
|
161
|
+
print(msg)
|
|
162
|
+
return True, True
|
|
163
|
+
|
|
164
|
+
total_sources = len(sources)
|
|
165
|
+
total_models = len(models)
|
|
166
|
+
if use_rich:
|
|
167
|
+
console.print(f"Found [cyan]{total_sources}[/cyan] source(s) and [cyan]{total_models}[/cyan] model(s)")
|
|
168
|
+
console.print()
|
|
169
|
+
else:
|
|
170
|
+
print(f"Found {total_sources} source(s) and {total_models} model(s)")
|
|
171
|
+
print()
|
|
172
|
+
|
|
173
|
+
# Process sources and models
|
|
174
|
+
with ProjectMetadataStore(project_root) as store:
|
|
175
|
+
store.initialize()
|
|
176
|
+
|
|
177
|
+
source_tables = 0
|
|
178
|
+
source_columns = 0
|
|
179
|
+
model_tables = 0
|
|
180
|
+
model_columns = 0
|
|
181
|
+
errors = []
|
|
182
|
+
|
|
183
|
+
# Snapshot sources
|
|
184
|
+
if sources:
|
|
185
|
+
if use_rich:
|
|
186
|
+
console.print("[bold]Snapping sources...[/bold]")
|
|
187
|
+
else:
|
|
188
|
+
print("Snapping sources...")
|
|
189
|
+
|
|
190
|
+
for source_name, source_config in sources.items():
|
|
191
|
+
try:
|
|
192
|
+
t_count, c_count = self._snap_source(store, source_name, source_config)
|
|
193
|
+
source_tables += t_count
|
|
194
|
+
source_columns += c_count
|
|
195
|
+
if use_rich:
|
|
196
|
+
console.print(f" [green]✓[/green] {source_name}: {t_count} tables, {c_count} columns")
|
|
197
|
+
else:
|
|
198
|
+
print(f" ✓ {source_name}: {t_count} tables, {c_count} columns")
|
|
199
|
+
except Exception as e:
|
|
200
|
+
errors.append((source_name, str(e)))
|
|
201
|
+
if use_rich:
|
|
202
|
+
console.print(f" [red]✗[/red] {source_name}: {e}")
|
|
203
|
+
else:
|
|
204
|
+
print(f" ✗ {source_name}: {e}")
|
|
205
|
+
|
|
206
|
+
# Snapshot models
|
|
207
|
+
if models:
|
|
208
|
+
if use_rich:
|
|
209
|
+
console.print()
|
|
210
|
+
console.print("[bold]Snapping models...[/bold]")
|
|
211
|
+
else:
|
|
212
|
+
print()
|
|
213
|
+
print("Snapping models...")
|
|
214
|
+
|
|
215
|
+
for model_name, model_config in models.items():
|
|
216
|
+
try:
|
|
217
|
+
t_count, c_count = self._snap_model(store, model_name, model_config)
|
|
218
|
+
model_tables += t_count
|
|
219
|
+
model_columns += c_count
|
|
220
|
+
if t_count > 0:
|
|
221
|
+
if use_rich:
|
|
222
|
+
console.print(f" [green]✓[/green] {model_name}: {c_count} columns")
|
|
223
|
+
else:
|
|
224
|
+
print(f" ✓ {model_name}: {c_count} columns")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
errors.append((f"model:{model_name}", str(e)))
|
|
227
|
+
if use_rich:
|
|
228
|
+
console.print(f" [red]✗[/red] {model_name}: {e}")
|
|
229
|
+
else:
|
|
230
|
+
print(f" ✗ {model_name}: {e}")
|
|
231
|
+
|
|
232
|
+
# Summary
|
|
233
|
+
total_tables = source_tables + model_tables
|
|
234
|
+
total_columns = source_columns + model_columns
|
|
235
|
+
|
|
236
|
+
if use_rich:
|
|
237
|
+
console.print()
|
|
238
|
+
if errors:
|
|
239
|
+
console.print(Panel(
|
|
240
|
+
f"[yellow]Completed with {len(errors)} error(s)[/yellow]\n"
|
|
241
|
+
f"Tables: {total_tables} | Columns: {total_columns}",
|
|
242
|
+
title="Summary",
|
|
243
|
+
border_style="yellow"
|
|
244
|
+
))
|
|
245
|
+
else:
|
|
246
|
+
console.print(Panel(
|
|
247
|
+
f"[green]Success![/green]\n"
|
|
248
|
+
f"Sources: {source_tables} tables, {source_columns} columns\n"
|
|
249
|
+
f"Models: {model_tables} tables, {model_columns} columns\n"
|
|
250
|
+
f"[dim]Saved to .dvt/metadata_store.duckdb[/dim]",
|
|
251
|
+
title="Summary",
|
|
252
|
+
border_style="green"
|
|
253
|
+
))
|
|
254
|
+
else:
|
|
255
|
+
print()
|
|
256
|
+
print("=" * 40)
|
|
257
|
+
if errors:
|
|
258
|
+
print(f"Completed with {len(errors)} error(s)")
|
|
259
|
+
else:
|
|
260
|
+
print(f"Success: {total_tables} tables, {total_columns} columns")
|
|
261
|
+
print(f"Saved to .dvt/metadata_store.duckdb")
|
|
262
|
+
|
|
263
|
+
return len(errors) == 0, True
|
|
264
|
+
|
|
265
|
+
# =========================================================================
|
|
266
|
+
# Export Subcommand (CLI display)
|
|
267
|
+
# =========================================================================
|
|
268
|
+
|
|
269
|
+
def run_export(self):
|
|
270
|
+
"""Display metadata in Rich-formatted CLI output."""
|
|
271
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
272
|
+
|
|
273
|
+
project_dir = getattr(self.args, 'project_dir', None)
|
|
274
|
+
project_root = Path(project_dir) if project_dir else Path(".")
|
|
275
|
+
|
|
276
|
+
# Try to use Rich
|
|
277
|
+
try:
|
|
278
|
+
from rich.console import Console
|
|
279
|
+
from rich.table import Table
|
|
280
|
+
from rich.panel import Panel
|
|
281
|
+
console = Console()
|
|
282
|
+
use_rich = True
|
|
283
|
+
except ImportError:
|
|
284
|
+
use_rich = False
|
|
285
|
+
|
|
286
|
+
dvt_dir = project_root / ".dvt"
|
|
287
|
+
if not dvt_dir.exists():
|
|
288
|
+
msg = "No .dvt directory found. Run 'dvt metadata snapshot' first."
|
|
289
|
+
if use_rich:
|
|
290
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
291
|
+
else:
|
|
292
|
+
print(msg)
|
|
293
|
+
return False, False
|
|
294
|
+
|
|
295
|
+
with ProjectMetadataStore(project_root) as store:
|
|
296
|
+
store.initialize()
|
|
297
|
+
|
|
298
|
+
# Get all sources/tables
|
|
299
|
+
all_tables = store.get_all_sources()
|
|
300
|
+
|
|
301
|
+
if not all_tables:
|
|
302
|
+
msg = "No metadata found. Run 'dvt metadata snapshot' first."
|
|
303
|
+
if use_rich:
|
|
304
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
305
|
+
else:
|
|
306
|
+
print(msg)
|
|
307
|
+
return True, True
|
|
308
|
+
|
|
309
|
+
if use_rich:
|
|
310
|
+
console.print(Panel.fit(
|
|
311
|
+
"[bold cyan]DVT Metadata Store[/bold cyan]",
|
|
312
|
+
border_style="cyan"
|
|
313
|
+
))
|
|
314
|
+
console.print()
|
|
315
|
+
|
|
316
|
+
# Create summary table
|
|
317
|
+
table = Table(title="Captured Metadata")
|
|
318
|
+
table.add_column("Type", style="cyan")
|
|
319
|
+
table.add_column("Source/Model", style="green")
|
|
320
|
+
table.add_column("Table", style="white")
|
|
321
|
+
table.add_column("Columns", justify="right")
|
|
322
|
+
table.add_column("Last Updated", style="dim")
|
|
323
|
+
|
|
324
|
+
for source_name, table_name in all_tables:
|
|
325
|
+
metadata = store.get_table_metadata(source_name, table_name)
|
|
326
|
+
if metadata:
|
|
327
|
+
# Determine type (source or model)
|
|
328
|
+
item_type = "Model" if source_name.startswith("model:") else "Source"
|
|
329
|
+
display_name = source_name.replace("model:", "") if item_type == "Model" else source_name
|
|
330
|
+
|
|
331
|
+
table.add_row(
|
|
332
|
+
item_type,
|
|
333
|
+
display_name,
|
|
334
|
+
table_name,
|
|
335
|
+
str(len(metadata.columns)),
|
|
336
|
+
metadata.last_refreshed.strftime("%Y-%m-%d %H:%M") if metadata.last_refreshed else "-"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
console.print(table)
|
|
340
|
+
|
|
341
|
+
# Stats
|
|
342
|
+
stats = store.get_stats()
|
|
343
|
+
console.print()
|
|
344
|
+
console.print(f"[dim]Total: {stats['metadata_tables']} tables, {stats['metadata_columns']} columns[/dim]")
|
|
345
|
+
|
|
346
|
+
else:
|
|
347
|
+
print("DVT Metadata Store")
|
|
348
|
+
print("=" * 60)
|
|
349
|
+
print(f"{'Type':<10} {'Source/Model':<20} {'Table':<20} {'Columns':>8}")
|
|
350
|
+
print("-" * 60)
|
|
351
|
+
|
|
352
|
+
for source_name, table_name in all_tables:
|
|
353
|
+
metadata = store.get_table_metadata(source_name, table_name)
|
|
354
|
+
if metadata:
|
|
355
|
+
item_type = "Model" if source_name.startswith("model:") else "Source"
|
|
356
|
+
display_name = source_name.replace("model:", "") if item_type == "Model" else source_name
|
|
357
|
+
print(f"{item_type:<10} {display_name:<20} {table_name:<20} {len(metadata.columns):>8}")
|
|
358
|
+
|
|
359
|
+
print("-" * 60)
|
|
360
|
+
|
|
361
|
+
return True, True
|
|
362
|
+
|
|
363
|
+
# =========================================================================
|
|
364
|
+
# Export CSV Subcommand
|
|
365
|
+
# =========================================================================
|
|
366
|
+
|
|
367
|
+
def run_export_csv(self):
|
|
368
|
+
"""Export metadata to CSV file."""
|
|
369
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
370
|
+
|
|
371
|
+
project_dir = getattr(self.args, 'project_dir', None)
|
|
372
|
+
project_root = Path(project_dir) if project_dir else Path(".")
|
|
373
|
+
filename = getattr(self.args, 'filename', 'metadata.csv')
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
from rich.console import Console
|
|
377
|
+
console = Console()
|
|
378
|
+
use_rich = True
|
|
379
|
+
except ImportError:
|
|
380
|
+
use_rich = False
|
|
381
|
+
|
|
382
|
+
dvt_dir = project_root / ".dvt"
|
|
383
|
+
if not dvt_dir.exists():
|
|
384
|
+
msg = "No .dvt directory found. Run 'dvt metadata snapshot' first."
|
|
385
|
+
if use_rich:
|
|
386
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
387
|
+
else:
|
|
388
|
+
print(msg)
|
|
389
|
+
return False, False
|
|
390
|
+
|
|
391
|
+
with ProjectMetadataStore(project_root) as store:
|
|
392
|
+
store.initialize()
|
|
393
|
+
|
|
394
|
+
# Get all metadata as CSV
|
|
395
|
+
all_tables = store.get_all_sources()
|
|
396
|
+
|
|
397
|
+
if not all_tables:
|
|
398
|
+
msg = "No metadata found. Run 'dvt metadata snapshot' first."
|
|
399
|
+
if use_rich:
|
|
400
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
401
|
+
else:
|
|
402
|
+
print(msg)
|
|
403
|
+
return True, True
|
|
404
|
+
|
|
405
|
+
# Build CSV content
|
|
406
|
+
import csv
|
|
407
|
+
output_path = Path(filename)
|
|
408
|
+
|
|
409
|
+
with open(output_path, 'w', newline='') as csvfile:
|
|
410
|
+
writer = csv.writer(csvfile)
|
|
411
|
+
# Header
|
|
412
|
+
writer.writerow([
|
|
413
|
+
'type', 'source_name', 'table_name', 'column_name',
|
|
414
|
+
'adapter_type', 'spark_type', 'is_nullable', 'is_primary_key',
|
|
415
|
+
'ordinal_position', 'last_refreshed'
|
|
416
|
+
])
|
|
417
|
+
|
|
418
|
+
# Data
|
|
419
|
+
for source_name, table_name in all_tables:
|
|
420
|
+
metadata = store.get_table_metadata(source_name, table_name)
|
|
421
|
+
if metadata:
|
|
422
|
+
item_type = "model" if source_name.startswith("model:") else "source"
|
|
423
|
+
for col in metadata.columns:
|
|
424
|
+
writer.writerow([
|
|
425
|
+
item_type,
|
|
426
|
+
source_name,
|
|
427
|
+
table_name,
|
|
428
|
+
col.column_name,
|
|
429
|
+
col.adapter_type,
|
|
430
|
+
col.spark_type,
|
|
431
|
+
col.is_nullable,
|
|
432
|
+
col.is_primary_key,
|
|
433
|
+
col.ordinal_position,
|
|
434
|
+
metadata.last_refreshed.isoformat() if metadata.last_refreshed else ''
|
|
435
|
+
])
|
|
436
|
+
|
|
437
|
+
if use_rich:
|
|
438
|
+
console.print(f"[green]✓[/green] Exported to [cyan]{output_path}[/cyan]")
|
|
439
|
+
else:
|
|
440
|
+
print(f"✓ Exported to {output_path}")
|
|
441
|
+
|
|
442
|
+
return True, True
|
|
443
|
+
|
|
444
|
+
# =========================================================================
|
|
445
|
+
# Export JSON Subcommand
|
|
446
|
+
# =========================================================================
|
|
447
|
+
|
|
448
|
+
def run_export_json(self):
|
|
449
|
+
"""Export metadata to JSON file."""
|
|
450
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
451
|
+
|
|
452
|
+
project_dir = getattr(self.args, 'project_dir', None)
|
|
453
|
+
project_root = Path(project_dir) if project_dir else Path(".")
|
|
454
|
+
filename = getattr(self.args, 'filename', 'metadata.json')
|
|
455
|
+
|
|
456
|
+
try:
|
|
457
|
+
from rich.console import Console
|
|
458
|
+
console = Console()
|
|
459
|
+
use_rich = True
|
|
460
|
+
except ImportError:
|
|
461
|
+
use_rich = False
|
|
462
|
+
|
|
463
|
+
dvt_dir = project_root / ".dvt"
|
|
464
|
+
if not dvt_dir.exists():
|
|
465
|
+
msg = "No .dvt directory found. Run 'dvt metadata snapshot' first."
|
|
466
|
+
if use_rich:
|
|
467
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
468
|
+
else:
|
|
469
|
+
print(msg)
|
|
470
|
+
return False, False
|
|
471
|
+
|
|
472
|
+
with ProjectMetadataStore(project_root) as store:
|
|
473
|
+
store.initialize()
|
|
474
|
+
|
|
475
|
+
all_tables = store.get_all_sources()
|
|
476
|
+
|
|
477
|
+
if not all_tables:
|
|
478
|
+
msg = "No metadata found. Run 'dvt metadata snapshot' first."
|
|
479
|
+
if use_rich:
|
|
480
|
+
console.print(f"[yellow]{msg}[/yellow]")
|
|
481
|
+
else:
|
|
482
|
+
print(msg)
|
|
483
|
+
return True, True
|
|
484
|
+
|
|
485
|
+
# Build JSON structure
|
|
486
|
+
metadata_json = {
|
|
487
|
+
"version": "1.0",
|
|
488
|
+
"exported_at": datetime.now().isoformat(),
|
|
489
|
+
"sources": {},
|
|
490
|
+
"models": {}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
for source_name, table_name in all_tables:
|
|
494
|
+
metadata = store.get_table_metadata(source_name, table_name)
|
|
495
|
+
if metadata:
|
|
496
|
+
is_model = source_name.startswith("model:")
|
|
497
|
+
target_dict = metadata_json["models"] if is_model else metadata_json["sources"]
|
|
498
|
+
clean_name = source_name.replace("model:", "") if is_model else source_name
|
|
499
|
+
|
|
500
|
+
if clean_name not in target_dict:
|
|
501
|
+
target_dict[clean_name] = {
|
|
502
|
+
"adapter": metadata.adapter_name,
|
|
503
|
+
"connection": metadata.connection_name,
|
|
504
|
+
"tables": {}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
target_dict[clean_name]["tables"][table_name] = {
|
|
508
|
+
"schema": metadata.schema_name,
|
|
509
|
+
"last_refreshed": metadata.last_refreshed.isoformat() if metadata.last_refreshed else None,
|
|
510
|
+
"columns": [
|
|
511
|
+
{
|
|
512
|
+
"name": col.column_name,
|
|
513
|
+
"adapter_type": col.adapter_type,
|
|
514
|
+
"spark_type": col.spark_type,
|
|
515
|
+
"nullable": col.is_nullable,
|
|
516
|
+
"primary_key": col.is_primary_key,
|
|
517
|
+
"position": col.ordinal_position
|
|
518
|
+
}
|
|
519
|
+
for col in metadata.columns
|
|
520
|
+
]
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
# Write JSON
|
|
524
|
+
output_path = Path(filename)
|
|
525
|
+
with open(output_path, 'w') as f:
|
|
526
|
+
json.dump(metadata_json, f, indent=2)
|
|
527
|
+
|
|
528
|
+
if use_rich:
|
|
529
|
+
console.print(f"[green]✓[/green] Exported to [cyan]{output_path}[/cyan]")
|
|
530
|
+
else:
|
|
531
|
+
print(f"✓ Exported to {output_path}")
|
|
532
|
+
|
|
533
|
+
return True, True
|
|
534
|
+
|
|
535
|
+
# =========================================================================
|
|
536
|
+
# Helper Methods
|
|
537
|
+
# =========================================================================
|
|
538
|
+
|
|
539
|
+
def _load_sources(self, project_root: Path) -> Dict[str, Dict[str, Any]]:
|
|
540
|
+
"""Load source definitions from the project."""
|
|
541
|
+
import yaml
|
|
542
|
+
|
|
543
|
+
sources = {}
|
|
544
|
+
models_dir = project_root / "models"
|
|
545
|
+
if not models_dir.exists():
|
|
546
|
+
return sources
|
|
547
|
+
|
|
548
|
+
for yml_file in models_dir.rglob("*.yml"):
|
|
549
|
+
try:
|
|
550
|
+
with open(yml_file) as f:
|
|
551
|
+
content = yaml.safe_load(f)
|
|
552
|
+
|
|
553
|
+
if content and "sources" in content:
|
|
554
|
+
for source in content["sources"]:
|
|
555
|
+
source_name = source.get("name")
|
|
556
|
+
if source_name:
|
|
557
|
+
sources[source_name] = source
|
|
558
|
+
except Exception:
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
return sources
|
|
562
|
+
|
|
563
|
+
def _load_models(self, project_root: Path) -> Dict[str, Dict[str, Any]]:
|
|
564
|
+
"""Load model metadata from catalog.json (actual database schema).
|
|
565
|
+
|
|
566
|
+
The catalog.json is generated by `dvt docs generate` and contains
|
|
567
|
+
actual column information from the database, not just what's documented
|
|
568
|
+
in YAML files.
|
|
569
|
+
"""
|
|
570
|
+
models = {}
|
|
571
|
+
|
|
572
|
+
# Primary source: catalog.json (actual database schema)
|
|
573
|
+
catalog_path = project_root / "target" / "catalog.json"
|
|
574
|
+
if catalog_path.exists():
|
|
575
|
+
try:
|
|
576
|
+
with open(catalog_path) as f:
|
|
577
|
+
catalog = json.load(f)
|
|
578
|
+
|
|
579
|
+
nodes = catalog.get("nodes", {})
|
|
580
|
+
for node_id, node_info in nodes.items():
|
|
581
|
+
# Only process models (not seeds, tests, etc.)
|
|
582
|
+
if node_id.startswith("model."):
|
|
583
|
+
metadata = node_info.get("metadata", {})
|
|
584
|
+
columns = node_info.get("columns", {})
|
|
585
|
+
|
|
586
|
+
if columns:
|
|
587
|
+
model_name = metadata.get("name")
|
|
588
|
+
if model_name:
|
|
589
|
+
models[model_name] = {
|
|
590
|
+
"name": model_name,
|
|
591
|
+
"unique_id": node_id,
|
|
592
|
+
"database": metadata.get("database"),
|
|
593
|
+
"schema": metadata.get("schema"),
|
|
594
|
+
"type": metadata.get("type"), # TABLE, VIEW
|
|
595
|
+
"columns": [
|
|
596
|
+
{
|
|
597
|
+
"name": col_info.get("name"),
|
|
598
|
+
"data_type": col_info.get("type"),
|
|
599
|
+
"index": col_info.get("index", 0),
|
|
600
|
+
}
|
|
601
|
+
for col_name, col_info in columns.items()
|
|
602
|
+
],
|
|
603
|
+
"_from_catalog": True,
|
|
604
|
+
}
|
|
605
|
+
except Exception as e:
|
|
606
|
+
# Fall back to YAML if catalog fails
|
|
607
|
+
pass
|
|
608
|
+
|
|
609
|
+
# Fallback: YAML definitions (for models not in catalog)
|
|
610
|
+
if not models:
|
|
611
|
+
import yaml
|
|
612
|
+
models_dir = project_root / "models"
|
|
613
|
+
if models_dir.exists():
|
|
614
|
+
for yml_file in models_dir.rglob("*.yml"):
|
|
615
|
+
try:
|
|
616
|
+
with open(yml_file) as f:
|
|
617
|
+
content = yaml.safe_load(f)
|
|
618
|
+
|
|
619
|
+
if content and "models" in content:
|
|
620
|
+
for model in content["models"]:
|
|
621
|
+
model_name = model.get("name")
|
|
622
|
+
if model_name and model.get("columns"):
|
|
623
|
+
model["_file_path"] = str(yml_file)
|
|
624
|
+
model["_from_catalog"] = False
|
|
625
|
+
models[model_name] = model
|
|
626
|
+
except Exception:
|
|
627
|
+
pass
|
|
628
|
+
|
|
629
|
+
return models
|
|
630
|
+
|
|
631
|
+
def _snap_source(
|
|
632
|
+
self,
|
|
633
|
+
store,
|
|
634
|
+
source_name: str,
|
|
635
|
+
source_config: Dict[str, Any]
|
|
636
|
+
) -> Tuple[int, int]:
|
|
637
|
+
"""Snapshot metadata from a single source."""
|
|
638
|
+
from dbt.compute.metadata.store import TableMetadata, ColumnMetadata
|
|
639
|
+
from dbt.compute.metadata.registry import TypeRegistry
|
|
640
|
+
|
|
641
|
+
tables_count = 0
|
|
642
|
+
columns_count = 0
|
|
643
|
+
|
|
644
|
+
schema = source_config.get("schema", "public")
|
|
645
|
+
tables = source_config.get("tables", [])
|
|
646
|
+
adapter_name = source_config.get("adapter", "postgres")
|
|
647
|
+
|
|
648
|
+
for table_config in tables:
|
|
649
|
+
table_name = table_config.get("name")
|
|
650
|
+
if not table_name:
|
|
651
|
+
continue
|
|
652
|
+
|
|
653
|
+
columns_config = table_config.get("columns", [])
|
|
654
|
+
if not columns_config:
|
|
655
|
+
continue
|
|
656
|
+
|
|
657
|
+
columns = []
|
|
658
|
+
for idx, col_config in enumerate(columns_config):
|
|
659
|
+
col_name = col_config.get("name")
|
|
660
|
+
if not col_name:
|
|
661
|
+
continue
|
|
662
|
+
|
|
663
|
+
adapter_type = col_config.get("data_type", "VARCHAR")
|
|
664
|
+
type_info = TypeRegistry.get_spark_type(adapter_name, adapter_type)
|
|
665
|
+
spark_type = type_info["spark_native_type"] if type_info else "StringType"
|
|
666
|
+
|
|
667
|
+
columns.append(ColumnMetadata(
|
|
668
|
+
column_name=col_name,
|
|
669
|
+
adapter_type=adapter_type,
|
|
670
|
+
spark_type=spark_type,
|
|
671
|
+
is_nullable=col_config.get("nullable", True),
|
|
672
|
+
is_primary_key=col_config.get("primary_key", False),
|
|
673
|
+
ordinal_position=idx + 1,
|
|
674
|
+
))
|
|
675
|
+
|
|
676
|
+
if columns:
|
|
677
|
+
metadata = TableMetadata(
|
|
678
|
+
source_name=source_name,
|
|
679
|
+
table_name=table_name,
|
|
680
|
+
adapter_name=adapter_name,
|
|
681
|
+
connection_name=source_name,
|
|
682
|
+
schema_name=schema,
|
|
683
|
+
columns=columns,
|
|
684
|
+
last_refreshed=datetime.now(),
|
|
685
|
+
)
|
|
686
|
+
store.save_table_metadata(metadata)
|
|
687
|
+
tables_count += 1
|
|
688
|
+
columns_count += len(columns)
|
|
689
|
+
|
|
690
|
+
return tables_count, columns_count
|
|
691
|
+
|
|
692
|
+
def _snap_model(
|
|
693
|
+
self,
|
|
694
|
+
store,
|
|
695
|
+
model_name: str,
|
|
696
|
+
model_config: Dict[str, Any]
|
|
697
|
+
) -> Tuple[int, int]:
|
|
698
|
+
"""Snapshot metadata from a model definition.
|
|
699
|
+
|
|
700
|
+
Handles both catalog-based (actual database schema) and YAML-based
|
|
701
|
+
(documented columns) sources.
|
|
702
|
+
"""
|
|
703
|
+
from dbt.compute.metadata.store import TableMetadata, ColumnMetadata
|
|
704
|
+
from dbt.compute.metadata.registry import TypeRegistry
|
|
705
|
+
|
|
706
|
+
columns_config = model_config.get("columns", [])
|
|
707
|
+
if not columns_config:
|
|
708
|
+
return 0, 0
|
|
709
|
+
|
|
710
|
+
# Determine adapter type based on database in catalog
|
|
711
|
+
from_catalog = model_config.get("_from_catalog", False)
|
|
712
|
+
database = model_config.get("database", "")
|
|
713
|
+
schema_name = model_config.get("schema", "default")
|
|
714
|
+
|
|
715
|
+
# Infer adapter from database name in catalog
|
|
716
|
+
if from_catalog:
|
|
717
|
+
# Use database name to guess adapter (postgres, snowflake, etc.)
|
|
718
|
+
adapter_name = self._infer_adapter_from_database(database)
|
|
719
|
+
else:
|
|
720
|
+
config = model_config.get("config", {})
|
|
721
|
+
adapter_name = config.get("adapter_type", "postgres")
|
|
722
|
+
|
|
723
|
+
columns = []
|
|
724
|
+
for idx, col_config in enumerate(columns_config):
|
|
725
|
+
col_name = col_config.get("name")
|
|
726
|
+
if not col_name:
|
|
727
|
+
continue
|
|
728
|
+
|
|
729
|
+
# Get adapter type from catalog (actual DB type) or YAML
|
|
730
|
+
adapter_type = col_config.get("data_type") or col_config.get("type", "STRING")
|
|
731
|
+
|
|
732
|
+
# Convert adapter type to Spark type
|
|
733
|
+
type_info = TypeRegistry.get_spark_type(adapter_name, adapter_type)
|
|
734
|
+
spark_type = type_info["spark_native_type"] if type_info else "StringType"
|
|
735
|
+
|
|
736
|
+
# For catalog-based, use index for position
|
|
737
|
+
if from_catalog:
|
|
738
|
+
ordinal_position = col_config.get("index", idx + 1)
|
|
739
|
+
else:
|
|
740
|
+
ordinal_position = idx + 1
|
|
741
|
+
|
|
742
|
+
# Nullable defaults to True; catalog doesn't provide this info
|
|
743
|
+
is_nullable = True
|
|
744
|
+
is_primary_key = False
|
|
745
|
+
|
|
746
|
+
# Check YAML tests for not_null and unique (only for YAML-based)
|
|
747
|
+
if not from_catalog:
|
|
748
|
+
tests = col_config.get("tests", []) or col_config.get("data_tests", [])
|
|
749
|
+
if tests:
|
|
750
|
+
for test in tests:
|
|
751
|
+
if test == "not_null" or (isinstance(test, dict) and "not_null" in test):
|
|
752
|
+
is_nullable = False
|
|
753
|
+
if test == "unique" or (isinstance(test, dict) and "unique" in test):
|
|
754
|
+
is_primary_key = True
|
|
755
|
+
|
|
756
|
+
columns.append(ColumnMetadata(
|
|
757
|
+
column_name=col_name,
|
|
758
|
+
adapter_type=adapter_type,
|
|
759
|
+
spark_type=spark_type,
|
|
760
|
+
is_nullable=is_nullable,
|
|
761
|
+
is_primary_key=is_primary_key,
|
|
762
|
+
ordinal_position=ordinal_position,
|
|
763
|
+
))
|
|
764
|
+
|
|
765
|
+
if columns:
|
|
766
|
+
# Sort by ordinal position for consistent output
|
|
767
|
+
columns.sort(key=lambda c: c.ordinal_position)
|
|
768
|
+
|
|
769
|
+
metadata = TableMetadata(
|
|
770
|
+
source_name=f"model:{model_name}",
|
|
771
|
+
table_name=model_name,
|
|
772
|
+
adapter_name=adapter_name,
|
|
773
|
+
connection_name="default",
|
|
774
|
+
schema_name=schema_name,
|
|
775
|
+
columns=columns,
|
|
776
|
+
last_refreshed=datetime.now(),
|
|
777
|
+
)
|
|
778
|
+
store.save_table_metadata(metadata)
|
|
779
|
+
return 1, len(columns)
|
|
780
|
+
|
|
781
|
+
return 0, 0
|
|
782
|
+
|
|
783
|
+
def _infer_adapter_from_database(self, database: str) -> str:
|
|
784
|
+
"""Infer adapter type from database name."""
|
|
785
|
+
db_lower = database.lower() if database else ""
|
|
786
|
+
|
|
787
|
+
# Common database name patterns
|
|
788
|
+
if "postgres" in db_lower or "pg" in db_lower:
|
|
789
|
+
return "postgres"
|
|
790
|
+
elif "snowflake" in db_lower or "sf" in db_lower:
|
|
791
|
+
return "snowflake"
|
|
792
|
+
elif "databricks" in db_lower or "spark" in db_lower:
|
|
793
|
+
return "databricks"
|
|
794
|
+
elif "redshift" in db_lower:
|
|
795
|
+
return "redshift"
|
|
796
|
+
elif "bigquery" in db_lower or "bq" in db_lower:
|
|
797
|
+
return "bigquery"
|
|
798
|
+
elif "mysql" in db_lower:
|
|
799
|
+
return "mysql"
|
|
800
|
+
elif "sqlserver" in db_lower or "mssql" in db_lower:
|
|
801
|
+
return "sqlserver"
|
|
802
|
+
else:
|
|
803
|
+
# Default to postgres as it's most common
|
|
804
|
+
return "postgres"
|