dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1002 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from dataclasses import replace
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from itertools import chain
|
|
6
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
7
|
+
|
|
8
|
+
import agate
|
|
9
|
+
|
|
10
|
+
import dbt.compilation
|
|
11
|
+
import dbt.exceptions
|
|
12
|
+
import dbt.utils
|
|
13
|
+
import dbt_common.utils.formatting
|
|
14
|
+
from dbt.adapters.events.types import (
|
|
15
|
+
BuildingCatalog,
|
|
16
|
+
CannotGenerateDocs,
|
|
17
|
+
CatalogWritten,
|
|
18
|
+
WriteCatalogFailure,
|
|
19
|
+
)
|
|
20
|
+
from dbt.adapters.factory import get_adapter
|
|
21
|
+
from dbt.artifacts.schemas.catalog import (
|
|
22
|
+
CatalogArtifact,
|
|
23
|
+
CatalogKey,
|
|
24
|
+
CatalogResults,
|
|
25
|
+
CatalogTable,
|
|
26
|
+
ColumnMetadata,
|
|
27
|
+
PrimitiveDict,
|
|
28
|
+
StatsDict,
|
|
29
|
+
StatsItem,
|
|
30
|
+
TableMetadata,
|
|
31
|
+
)
|
|
32
|
+
from dbt.artifacts.schemas.results import NodeStatus
|
|
33
|
+
from dbt.constants import CATALOG_FILENAME, MANIFEST_FILE_NAME
|
|
34
|
+
from dbt.context.providers import generate_runtime_macro_context
|
|
35
|
+
from dbt.contracts.graph.manifest import Manifest
|
|
36
|
+
from dbt.contracts.graph.nodes import ResultNode
|
|
37
|
+
from dbt.events.types import ArtifactWritten
|
|
38
|
+
from dbt.exceptions import AmbiguousCatalogMatchError
|
|
39
|
+
from dbt.graph import ResourceTypeSelector
|
|
40
|
+
from dbt.graph.graph import UniqueId
|
|
41
|
+
from dbt.node_types import EXECUTABLE_NODE_TYPES, NodeType
|
|
42
|
+
from dbt.parser.manifest import write_manifest
|
|
43
|
+
from dbt.task.compile import CompileTask
|
|
44
|
+
from dbt.task.docs import DOCS_INDEX_FILE_PATH
|
|
45
|
+
from dbt.utils.artifact_upload import add_artifact_produced
|
|
46
|
+
from dbt_common.clients.system import load_file_contents
|
|
47
|
+
from dbt_common.dataclass_schema import ValidationError
|
|
48
|
+
from dbt_common.events.functions import fire_event
|
|
49
|
+
from dbt_common.exceptions import DbtInternalError
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_stripped_prefix(source: Dict[str, Any], prefix: str) -> Dict[str, Any]:
|
|
53
|
+
"""Go through the source, extracting every key/value pair where the key starts
|
|
54
|
+
with the given prefix.
|
|
55
|
+
"""
|
|
56
|
+
cut = len(prefix)
|
|
57
|
+
return {k[cut:]: v for k, v in source.items() if k.startswith(prefix)}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def build_catalog_table(data, adapter_type: Optional[str] = None) -> CatalogTable:
|
|
61
|
+
# build the new table's metadata + stats
|
|
62
|
+
metadata = TableMetadata.from_dict(get_stripped_prefix(data, "table_"))
|
|
63
|
+
stats = format_stats(get_stripped_prefix(data, "stats:"))
|
|
64
|
+
|
|
65
|
+
# DVT v0.4.3: Add adapter type metadata for visualization
|
|
66
|
+
# This enables adapter logos and connection badges in dbt docs
|
|
67
|
+
if adapter_type:
|
|
68
|
+
# Add adapter type to metadata comment for catalog display
|
|
69
|
+
comment_text = metadata.comment or ""
|
|
70
|
+
if comment_text and not comment_text.endswith(' '):
|
|
71
|
+
comment_text += " "
|
|
72
|
+
metadata = replace(
|
|
73
|
+
metadata,
|
|
74
|
+
comment=f"{comment_text}[adapter:{adapter_type}]"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return CatalogTable(
|
|
78
|
+
metadata=metadata,
|
|
79
|
+
stats=stats,
|
|
80
|
+
columns={},
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# keys are database name, schema name, table name
|
|
85
|
+
class Catalog(Dict[CatalogKey, CatalogTable]):
|
|
86
|
+
def __init__(self, columns: List[PrimitiveDict]) -> None:
|
|
87
|
+
super().__init__()
|
|
88
|
+
for col in columns:
|
|
89
|
+
self.add_column(col)
|
|
90
|
+
|
|
91
|
+
def get_table(self, data: PrimitiveDict, adapter_type: Optional[str] = None) -> CatalogTable:
|
|
92
|
+
database = data.get("table_database")
|
|
93
|
+
if database is None:
|
|
94
|
+
dkey: Optional[str] = None
|
|
95
|
+
else:
|
|
96
|
+
dkey = str(database)
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
key = CatalogKey(
|
|
100
|
+
dkey,
|
|
101
|
+
str(data["table_schema"]),
|
|
102
|
+
str(data["table_name"]),
|
|
103
|
+
)
|
|
104
|
+
except KeyError as exc:
|
|
105
|
+
raise dbt_common.exceptions.CompilationError(
|
|
106
|
+
"Catalog information missing required key {} (got {})".format(exc, data)
|
|
107
|
+
)
|
|
108
|
+
table: CatalogTable
|
|
109
|
+
if key in self:
|
|
110
|
+
table = self[key]
|
|
111
|
+
else:
|
|
112
|
+
table = build_catalog_table(data, adapter_type)
|
|
113
|
+
self[key] = table
|
|
114
|
+
return table
|
|
115
|
+
|
|
116
|
+
def add_column(self, data: PrimitiveDict):
|
|
117
|
+
table = self.get_table(data)
|
|
118
|
+
column_data = get_stripped_prefix(data, "column_")
|
|
119
|
+
# the index should really never be that big so it's ok to end up
|
|
120
|
+
# serializing this to JSON (2^53 is the max safe value there)
|
|
121
|
+
column_data["index"] = int(column_data["index"])
|
|
122
|
+
|
|
123
|
+
column = ColumnMetadata.from_dict(column_data)
|
|
124
|
+
table.columns[column.name] = column
|
|
125
|
+
|
|
126
|
+
def make_unique_id_map(
|
|
127
|
+
self, manifest: Manifest, selected_node_ids: Optional[Set[UniqueId]] = None
|
|
128
|
+
) -> Tuple[Dict[str, CatalogTable], Dict[str, CatalogTable]]:
|
|
129
|
+
"""
|
|
130
|
+
Create mappings between CatalogKeys and CatalogTables for nodes and sources, filtered by selected_node_ids.
|
|
131
|
+
|
|
132
|
+
By default, selected_node_ids is None and all nodes and sources defined in the manifest are included in the mappings.
|
|
133
|
+
"""
|
|
134
|
+
nodes: Dict[str, CatalogTable] = {}
|
|
135
|
+
sources: Dict[str, CatalogTable] = {}
|
|
136
|
+
|
|
137
|
+
node_map, source_map = get_unique_id_mapping(manifest)
|
|
138
|
+
table: CatalogTable
|
|
139
|
+
for table in self.values():
|
|
140
|
+
key = table.key()
|
|
141
|
+
if key in node_map:
|
|
142
|
+
unique_id = node_map[key]
|
|
143
|
+
if selected_node_ids is None or unique_id in selected_node_ids:
|
|
144
|
+
# DVT v0.4.3: Add comprehensive adapter and connection metadata for nodes
|
|
145
|
+
node = manifest.nodes.get(unique_id)
|
|
146
|
+
connection_name = None
|
|
147
|
+
adapter_type = None
|
|
148
|
+
compute_engine = None
|
|
149
|
+
|
|
150
|
+
if node:
|
|
151
|
+
# Get target connection name
|
|
152
|
+
if hasattr(node.config, 'target') and node.config.target:
|
|
153
|
+
connection_name = node.config.target
|
|
154
|
+
|
|
155
|
+
# Get compute engine if specified
|
|
156
|
+
if hasattr(node.config, 'compute') and node.config.compute:
|
|
157
|
+
compute_engine = node.config.compute
|
|
158
|
+
|
|
159
|
+
# Build metadata tags for catalog display
|
|
160
|
+
comment_text = table.metadata.comment or ""
|
|
161
|
+
tags = []
|
|
162
|
+
|
|
163
|
+
if connection_name:
|
|
164
|
+
tags.append(f"target:{connection_name}")
|
|
165
|
+
if compute_engine:
|
|
166
|
+
tags.append(f"compute:{compute_engine}")
|
|
167
|
+
|
|
168
|
+
if tags:
|
|
169
|
+
if comment_text and not comment_text.endswith(' '):
|
|
170
|
+
comment_text += " "
|
|
171
|
+
comment_text += f"[{' | '.join(tags)}]"
|
|
172
|
+
|
|
173
|
+
# Create updated metadata with enriched info
|
|
174
|
+
updated_metadata = replace(
|
|
175
|
+
table.metadata,
|
|
176
|
+
comment=comment_text if tags else table.metadata.comment
|
|
177
|
+
)
|
|
178
|
+
nodes[unique_id] = replace(table, unique_id=unique_id, metadata=updated_metadata)
|
|
179
|
+
|
|
180
|
+
unique_ids = source_map.get(table.key(), set())
|
|
181
|
+
for unique_id in unique_ids:
|
|
182
|
+
if unique_id in sources:
|
|
183
|
+
raise AmbiguousCatalogMatchError(
|
|
184
|
+
unique_id,
|
|
185
|
+
sources[unique_id].to_dict(omit_none=True),
|
|
186
|
+
table.to_dict(omit_none=True),
|
|
187
|
+
)
|
|
188
|
+
elif selected_node_ids is None or unique_id in selected_node_ids:
|
|
189
|
+
# DVT v0.4.3: Add comprehensive adapter and connection metadata for sources
|
|
190
|
+
source = manifest.sources.get(unique_id)
|
|
191
|
+
connection_name = None
|
|
192
|
+
adapter_type = None
|
|
193
|
+
|
|
194
|
+
if source:
|
|
195
|
+
# Get connection name for source
|
|
196
|
+
if hasattr(source, 'connection') and source.connection:
|
|
197
|
+
connection_name = source.connection
|
|
198
|
+
|
|
199
|
+
# Try to determine adapter type from connection
|
|
200
|
+
# Check if we can get adapter info from manifest's profile
|
|
201
|
+
if connection_name:
|
|
202
|
+
# Sources store connection name, we need to map it to adapter type
|
|
203
|
+
# This requires access to the RuntimeConfig which has the profile info
|
|
204
|
+
# For now, we'll add just the connection tag and let dbt docs UI handle it
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
# Build metadata tags for catalog display
|
|
208
|
+
comment_text = table.metadata.comment or ""
|
|
209
|
+
tags = []
|
|
210
|
+
|
|
211
|
+
if connection_name:
|
|
212
|
+
tags.append(f"source:{connection_name}")
|
|
213
|
+
|
|
214
|
+
if tags:
|
|
215
|
+
if comment_text and not comment_text.endswith(' '):
|
|
216
|
+
comment_text += " "
|
|
217
|
+
comment_text += f"[{' | '.join(tags)}]"
|
|
218
|
+
|
|
219
|
+
# Create updated metadata with enriched info
|
|
220
|
+
updated_metadata = replace(
|
|
221
|
+
table.metadata,
|
|
222
|
+
comment=comment_text if tags else table.metadata.comment
|
|
223
|
+
)
|
|
224
|
+
sources[unique_id] = replace(table, unique_id=unique_id, metadata=updated_metadata)
|
|
225
|
+
return nodes, sources
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def format_stats(stats: PrimitiveDict) -> StatsDict:
|
|
229
|
+
"""Given a dictionary following this layout:
|
|
230
|
+
|
|
231
|
+
{
|
|
232
|
+
'encoded:label': 'Encoded',
|
|
233
|
+
'encoded:value': 'Yes',
|
|
234
|
+
'encoded:description': 'Indicates if the column is encoded',
|
|
235
|
+
'encoded:include': True,
|
|
236
|
+
|
|
237
|
+
'size:label': 'Size',
|
|
238
|
+
'size:value': 128,
|
|
239
|
+
'size:description': 'Size of the table in MB',
|
|
240
|
+
'size:include': True,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
format_stats will convert the dict into a StatsDict with keys of 'encoded'
|
|
244
|
+
and 'size'.
|
|
245
|
+
"""
|
|
246
|
+
stats_collector: StatsDict = {}
|
|
247
|
+
|
|
248
|
+
base_keys = {k.split(":")[0] for k in stats}
|
|
249
|
+
for key in base_keys:
|
|
250
|
+
dct: PrimitiveDict = {"id": key}
|
|
251
|
+
for subkey in ("label", "value", "description", "include"):
|
|
252
|
+
dct[subkey] = stats["{}:{}".format(key, subkey)]
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
stats_item = StatsItem.from_dict(dct)
|
|
256
|
+
except ValidationError:
|
|
257
|
+
continue
|
|
258
|
+
if stats_item.include:
|
|
259
|
+
stats_collector[key] = stats_item
|
|
260
|
+
|
|
261
|
+
# we always have a 'has_stats' field, it's never included
|
|
262
|
+
has_stats = StatsItem(
|
|
263
|
+
id="has_stats",
|
|
264
|
+
label="Has Stats?",
|
|
265
|
+
value=len(stats_collector) > 0,
|
|
266
|
+
description="Indicates whether there are statistics for this table",
|
|
267
|
+
include=False,
|
|
268
|
+
)
|
|
269
|
+
stats_collector["has_stats"] = has_stats
|
|
270
|
+
return stats_collector
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def mapping_key(node: ResultNode) -> CatalogKey:
|
|
274
|
+
dkey = dbt_common.utils.formatting.lowercase(node.database)
|
|
275
|
+
return CatalogKey(dkey, node.schema.lower(), node.identifier.lower())
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def get_unique_id_mapping(
|
|
279
|
+
manifest: Manifest,
|
|
280
|
+
) -> Tuple[Dict[CatalogKey, str], Dict[CatalogKey, Set[str]]]:
|
|
281
|
+
# A single relation could have multiple unique IDs pointing to it if a
|
|
282
|
+
# source were also a node.
|
|
283
|
+
node_map: Dict[CatalogKey, str] = {}
|
|
284
|
+
source_map: Dict[CatalogKey, Set[str]] = {}
|
|
285
|
+
for unique_id, node in manifest.nodes.items():
|
|
286
|
+
key = mapping_key(node)
|
|
287
|
+
node_map[key] = unique_id
|
|
288
|
+
|
|
289
|
+
for unique_id, source in manifest.sources.items():
|
|
290
|
+
key = mapping_key(source)
|
|
291
|
+
if key not in source_map:
|
|
292
|
+
source_map[key] = set()
|
|
293
|
+
source_map[key].add(unique_id)
|
|
294
|
+
return node_map, source_map
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class GenerateTask(CompileTask):
|
|
298
|
+
def run(self) -> CatalogArtifact:
|
|
299
|
+
compile_results = None
|
|
300
|
+
if self.args.compile:
|
|
301
|
+
compile_results = CompileTask.run(self)
|
|
302
|
+
if any(r.status == NodeStatus.Error for r in compile_results):
|
|
303
|
+
fire_event(CannotGenerateDocs())
|
|
304
|
+
return CatalogArtifact.from_results(
|
|
305
|
+
nodes={},
|
|
306
|
+
sources={},
|
|
307
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
308
|
+
errors=None,
|
|
309
|
+
compile_results=compile_results,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
shutil.copyfile(
|
|
313
|
+
DOCS_INDEX_FILE_PATH, os.path.join(self.config.project_target_path, "index.html")
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
for asset_path in self.config.asset_paths:
|
|
317
|
+
to_asset_path = os.path.join(self.config.project_target_path, asset_path)
|
|
318
|
+
|
|
319
|
+
if os.path.exists(to_asset_path):
|
|
320
|
+
shutil.rmtree(to_asset_path)
|
|
321
|
+
|
|
322
|
+
from_asset_path = os.path.join(self.config.project_root, asset_path)
|
|
323
|
+
|
|
324
|
+
if os.path.exists(from_asset_path):
|
|
325
|
+
shutil.copytree(from_asset_path, to_asset_path)
|
|
326
|
+
|
|
327
|
+
if self.manifest is None:
|
|
328
|
+
raise DbtInternalError("self.manifest was None in run!")
|
|
329
|
+
|
|
330
|
+
selected_node_ids: Optional[Set[UniqueId]] = None
|
|
331
|
+
if self.args.empty_catalog:
|
|
332
|
+
catalog_table: agate.Table = agate.Table([])
|
|
333
|
+
exceptions: List[Exception] = []
|
|
334
|
+
selected_node_ids = set()
|
|
335
|
+
else:
|
|
336
|
+
# DVT v0.4.4: Multi-adapter catalog generation
|
|
337
|
+
# Group catalogable nodes by their connection/adapter to avoid cross-db errors
|
|
338
|
+
fire_event(BuildingCatalog())
|
|
339
|
+
|
|
340
|
+
# Get selected nodes if applicable
|
|
341
|
+
relations = None
|
|
342
|
+
if self.job_queue is not None:
|
|
343
|
+
selected_node_ids = self.job_queue.get_selected_nodes()
|
|
344
|
+
selected_nodes = self._get_nodes_from_ids(self.manifest, selected_node_ids)
|
|
345
|
+
|
|
346
|
+
# Source selection is handled separately
|
|
347
|
+
selected_source_ids = self._get_selected_source_ids()
|
|
348
|
+
selected_source_nodes = self._get_nodes_from_ids(
|
|
349
|
+
self.manifest, selected_source_ids
|
|
350
|
+
)
|
|
351
|
+
selected_node_ids.update(selected_source_ids)
|
|
352
|
+
selected_nodes.extend(selected_source_nodes)
|
|
353
|
+
|
|
354
|
+
# Group all catalogable nodes by their connection/adapter
|
|
355
|
+
catalogable_nodes = chain(
|
|
356
|
+
[
|
|
357
|
+
node
|
|
358
|
+
for node in self.manifest.nodes.values()
|
|
359
|
+
if (node.is_relational and not node.is_ephemeral_model)
|
|
360
|
+
],
|
|
361
|
+
self.manifest.sources.values(),
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Group nodes by connection name
|
|
365
|
+
from collections import defaultdict
|
|
366
|
+
from dbt.contracts.graph.nodes import SourceDefinition
|
|
367
|
+
|
|
368
|
+
# DVT v0.59.0: Build database-to-connection mapping for multi-adapter support
|
|
369
|
+
# This allows inferring the correct connection when sources don't have explicit connection field
|
|
370
|
+
database_to_connection: Dict[str, str] = {}
|
|
371
|
+
try:
|
|
372
|
+
# RuntimeConfig stores outputs directly as credentials objects in self.outputs
|
|
373
|
+
if hasattr(self.config, 'outputs') and self.config.outputs:
|
|
374
|
+
for output_name, credentials in self.config.outputs.items():
|
|
375
|
+
# Credentials objects have database attribute
|
|
376
|
+
output_database = None
|
|
377
|
+
if hasattr(credentials, 'database') and credentials.database:
|
|
378
|
+
output_database = credentials.database
|
|
379
|
+
|
|
380
|
+
if output_database:
|
|
381
|
+
# Normalize to lowercase for comparison
|
|
382
|
+
database_to_connection[output_database.lower()] = output_name
|
|
383
|
+
except Exception:
|
|
384
|
+
pass # Fallback to default behavior if we can't build the mapping
|
|
385
|
+
|
|
386
|
+
nodes_by_connection: Dict[str, List] = defaultdict(list)
|
|
387
|
+
for node in catalogable_nodes:
|
|
388
|
+
# Determine which connection/adapter this node uses
|
|
389
|
+
if isinstance(node, SourceDefinition):
|
|
390
|
+
# Sources use their 'connection' field or meta.connection
|
|
391
|
+
connection_name = (
|
|
392
|
+
node.connection or
|
|
393
|
+
(node.meta.get('connection') if node.meta else None)
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# DVT v0.59.0: If no explicit connection, infer from database name
|
|
397
|
+
if not connection_name and hasattr(node, 'database') and node.database:
|
|
398
|
+
node_db_lower = node.database.lower()
|
|
399
|
+
connection_name = database_to_connection.get(node_db_lower)
|
|
400
|
+
|
|
401
|
+
# Final fallback to default target
|
|
402
|
+
if not connection_name:
|
|
403
|
+
connection_name = self.config.target_name
|
|
404
|
+
|
|
405
|
+
elif hasattr(node, 'config') and hasattr(node.config, 'target') and node.config.target:
|
|
406
|
+
# Models use config.target override
|
|
407
|
+
connection_name = node.config.target
|
|
408
|
+
else:
|
|
409
|
+
# Default to target connection
|
|
410
|
+
connection_name = self.config.target_name
|
|
411
|
+
|
|
412
|
+
nodes_by_connection[connection_name].append(node)
|
|
413
|
+
|
|
414
|
+
# DVT v0.59.0: Debug logging for multi-adapter grouping
|
|
415
|
+
import logging
|
|
416
|
+
_logger = logging.getLogger(__name__)
|
|
417
|
+
_logger.info(f"DVT: Grouped {sum(len(v) for v in nodes_by_connection.values())} nodes into {len(nodes_by_connection)} connections:")
|
|
418
|
+
for conn_name, conn_nodes in nodes_by_connection.items():
|
|
419
|
+
_logger.info(f" {conn_name}: {len(conn_nodes)} nodes")
|
|
420
|
+
|
|
421
|
+
# Query catalog for each connection with its appropriate adapter
|
|
422
|
+
all_catalog_tables: List[agate.Table] = []
|
|
423
|
+
exceptions: List[Exception] = []
|
|
424
|
+
|
|
425
|
+
for connection_name, nodes in nodes_by_connection.items():
|
|
426
|
+
try:
|
|
427
|
+
# Get adapter for this connection
|
|
428
|
+
adapter = self.config.get_adapter(connection_name)
|
|
429
|
+
|
|
430
|
+
# DVT v0.59.0: Set up adapter for macro execution
|
|
431
|
+
adapter.set_macro_resolver(self.manifest)
|
|
432
|
+
|
|
433
|
+
# For non-default targets, we need to provide a macro context generator
|
|
434
|
+
# that uses the correct adapter. Use the default generator which will
|
|
435
|
+
# create the context with the adapter's own config.
|
|
436
|
+
def make_connection_macro_context(connection_adapter, config_arg, manifest_arg):
|
|
437
|
+
def connection_macro_context(macro, config, manifest_inner, package_name):
|
|
438
|
+
# Use the adapter's config for macro context generation
|
|
439
|
+
# This ensures the adapter in context matches the connection
|
|
440
|
+
return generate_runtime_macro_context(
|
|
441
|
+
macro, connection_adapter.config, manifest_inner, package_name
|
|
442
|
+
)
|
|
443
|
+
return connection_macro_context
|
|
444
|
+
|
|
445
|
+
adapter.set_macro_context_generator(
|
|
446
|
+
make_connection_macro_context(adapter, self.config, self.manifest)
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
with adapter.connection_named(f"generate_catalog_{connection_name}"):
|
|
450
|
+
# Build relations set for this connection if we have selected nodes
|
|
451
|
+
connection_relations = None
|
|
452
|
+
if self.job_queue is not None and selected_node_ids:
|
|
453
|
+
connection_relations = {
|
|
454
|
+
adapter.Relation.create_from(adapter.config, node)
|
|
455
|
+
for node in nodes
|
|
456
|
+
if node.unique_id in selected_node_ids
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
# Get schemas used by this connection's nodes
|
|
460
|
+
connection_schemas = set()
|
|
461
|
+
for node in nodes:
|
|
462
|
+
if hasattr(node, 'schema') and node.schema:
|
|
463
|
+
if hasattr(node, 'database') and node.database:
|
|
464
|
+
connection_schemas.add((node.database, node.schema))
|
|
465
|
+
|
|
466
|
+
# Query catalog for this connection's nodes
|
|
467
|
+
catalog_table_part, connection_exceptions = adapter.get_filtered_catalog(
|
|
468
|
+
nodes, connection_schemas, connection_relations
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
all_catalog_tables.append(catalog_table_part)
|
|
472
|
+
|
|
473
|
+
# DVT v0.4.7: Filter out "not implemented" errors from Snowflake/other adapters
|
|
474
|
+
# that don't support catalog generation
|
|
475
|
+
filtered_exceptions = [
|
|
476
|
+
e for e in connection_exceptions
|
|
477
|
+
if not ("not implemented" in str(e).lower() and
|
|
478
|
+
isinstance(e, dbt.exceptions.CompilationError))
|
|
479
|
+
]
|
|
480
|
+
exceptions.extend(filtered_exceptions)
|
|
481
|
+
|
|
482
|
+
except dbt.exceptions.CompilationError as e:
|
|
483
|
+
# DVT v0.4.9: Universal fallback for adapters without get_catalog_relations
|
|
484
|
+
if "not implemented" in str(e).lower():
|
|
485
|
+
try:
|
|
486
|
+
# Try INFORMATION_SCHEMA fallback (works for most SQL databases)
|
|
487
|
+
catalog_table_part = self._get_catalog_via_information_schema(
|
|
488
|
+
adapter, connection_name, connection_schemas
|
|
489
|
+
)
|
|
490
|
+
if catalog_table_part and len(catalog_table_part) > 0:
|
|
491
|
+
all_catalog_tables.append(catalog_table_part)
|
|
492
|
+
fire_event(
|
|
493
|
+
BuildingCatalog() # Log success
|
|
494
|
+
)
|
|
495
|
+
except Exception as fallback_ex:
|
|
496
|
+
# DVT v0.4.9: Log fallback errors for debugging
|
|
497
|
+
import traceback
|
|
498
|
+
fire_event(
|
|
499
|
+
CannotGenerateDocs(
|
|
500
|
+
msg=f"INFORMATION_SCHEMA fallback failed for '{connection_name}': {str(fallback_ex)}\n{traceback.format_exc()}"
|
|
501
|
+
)
|
|
502
|
+
)
|
|
503
|
+
else:
|
|
504
|
+
# Other compilation errors should be reported
|
|
505
|
+
exceptions.append(e)
|
|
506
|
+
except Exception as e:
|
|
507
|
+
# Log error but continue with other connections
|
|
508
|
+
exceptions.append(e)
|
|
509
|
+
|
|
510
|
+
# Merge all catalog tables into one
|
|
511
|
+
if all_catalog_tables:
|
|
512
|
+
# Merge by concatenating rows from all tables
|
|
513
|
+
if len(all_catalog_tables) == 1:
|
|
514
|
+
catalog_table = all_catalog_tables[0]
|
|
515
|
+
else:
|
|
516
|
+
# Combine all tables - they should have the same columns
|
|
517
|
+
catalog_table = agate.Table.merge(all_catalog_tables)
|
|
518
|
+
else:
|
|
519
|
+
catalog_table = agate.Table([])
|
|
520
|
+
|
|
521
|
+
catalog_data: List[PrimitiveDict] = [
|
|
522
|
+
dict(zip(catalog_table.column_names, map(dbt.utils._coerce_decimal, row)))
|
|
523
|
+
for row in catalog_table
|
|
524
|
+
]
|
|
525
|
+
|
|
526
|
+
catalog = Catalog(catalog_data)
|
|
527
|
+
|
|
528
|
+
errors: Optional[List[str]] = None
|
|
529
|
+
if exceptions:
|
|
530
|
+
errors = [str(e) for e in exceptions]
|
|
531
|
+
|
|
532
|
+
nodes, sources = catalog.make_unique_id_map(self.manifest, selected_node_ids)
|
|
533
|
+
results = self.get_catalog_results(
|
|
534
|
+
nodes=nodes,
|
|
535
|
+
sources=sources,
|
|
536
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
537
|
+
compile_results=compile_results,
|
|
538
|
+
errors=errors,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
catalog_path = os.path.join(self.config.project_target_path, CATALOG_FILENAME)
|
|
542
|
+
results.write(catalog_path)
|
|
543
|
+
add_artifact_produced(catalog_path)
|
|
544
|
+
fire_event(
|
|
545
|
+
ArtifactWritten(artifact_type=results.__class__.__name__, artifact_path=catalog_path)
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
if self.args.compile:
|
|
549
|
+
write_manifest(self.manifest, self.config.project_target_path)
|
|
550
|
+
|
|
551
|
+
if self.args.static:
|
|
552
|
+
|
|
553
|
+
# Read manifest.json and catalog.json
|
|
554
|
+
read_manifest_data = load_file_contents(
|
|
555
|
+
os.path.join(self.config.project_target_path, MANIFEST_FILE_NAME)
|
|
556
|
+
)
|
|
557
|
+
read_catalog_data = load_file_contents(catalog_path)
|
|
558
|
+
|
|
559
|
+
# Create new static index file contents
|
|
560
|
+
index_data = load_file_contents(DOCS_INDEX_FILE_PATH)
|
|
561
|
+
index_data = index_data.replace('"MANIFEST.JSON INLINE DATA"', read_manifest_data)
|
|
562
|
+
index_data = index_data.replace('"CATALOG.JSON INLINE DATA"', read_catalog_data)
|
|
563
|
+
|
|
564
|
+
# Write out the new index file
|
|
565
|
+
static_index_path = os.path.join(self.config.project_target_path, "static_index.html")
|
|
566
|
+
with open(static_index_path, "wb") as static_index_file:
|
|
567
|
+
static_index_file.write(bytes(index_data, "utf8"))
|
|
568
|
+
|
|
569
|
+
if exceptions:
|
|
570
|
+
fire_event(WriteCatalogFailure(num_exceptions=len(exceptions)))
|
|
571
|
+
fire_event(CatalogWritten(path=os.path.abspath(catalog_path)))
|
|
572
|
+
|
|
573
|
+
# DVT v0.56.0: Write enriched catalog to metadata_store.duckdb
|
|
574
|
+
self._write_catalog_to_duckdb(nodes, sources)
|
|
575
|
+
self._write_lineage_to_duckdb()
|
|
576
|
+
|
|
577
|
+
return results
|
|
578
|
+
|
|
579
|
+
def get_node_selector(self) -> ResourceTypeSelector:
|
|
580
|
+
if self.manifest is None or self.graph is None:
|
|
581
|
+
raise DbtInternalError("manifest and graph must be set to perform node selection")
|
|
582
|
+
return ResourceTypeSelector(
|
|
583
|
+
graph=self.graph,
|
|
584
|
+
manifest=self.manifest,
|
|
585
|
+
previous_state=self.previous_state,
|
|
586
|
+
resource_types=EXECUTABLE_NODE_TYPES,
|
|
587
|
+
include_empty_nodes=True,
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
def get_catalog_results(
|
|
591
|
+
self,
|
|
592
|
+
nodes: Dict[str, CatalogTable],
|
|
593
|
+
sources: Dict[str, CatalogTable],
|
|
594
|
+
generated_at: datetime,
|
|
595
|
+
compile_results: Optional[Any],
|
|
596
|
+
errors: Optional[List[str]],
|
|
597
|
+
) -> CatalogArtifact:
|
|
598
|
+
return CatalogArtifact.from_results(
|
|
599
|
+
generated_at=generated_at,
|
|
600
|
+
nodes=nodes,
|
|
601
|
+
sources=sources,
|
|
602
|
+
compile_results=compile_results,
|
|
603
|
+
errors=errors,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
@classmethod
|
|
607
|
+
def interpret_results(self, results: Optional[CatalogResults]) -> bool:
|
|
608
|
+
if results is None:
|
|
609
|
+
return False
|
|
610
|
+
if results.errors:
|
|
611
|
+
return False
|
|
612
|
+
compile_results = results._compile_results
|
|
613
|
+
if compile_results is None:
|
|
614
|
+
return True
|
|
615
|
+
|
|
616
|
+
return super().interpret_results(compile_results)
|
|
617
|
+
|
|
618
|
+
@staticmethod
|
|
619
|
+
def _get_nodes_from_ids(manifest: Manifest, node_ids: Iterable[str]) -> List[ResultNode]:
|
|
620
|
+
selected: List[ResultNode] = []
|
|
621
|
+
for unique_id in node_ids:
|
|
622
|
+
if unique_id in manifest.nodes:
|
|
623
|
+
node = manifest.nodes[unique_id]
|
|
624
|
+
if node.is_relational and not node.is_ephemeral_model:
|
|
625
|
+
selected.append(node)
|
|
626
|
+
elif unique_id in manifest.sources:
|
|
627
|
+
source = manifest.sources[unique_id]
|
|
628
|
+
selected.append(source)
|
|
629
|
+
return selected
|
|
630
|
+
|
|
631
|
+
def _get_selected_source_ids(self) -> Set[UniqueId]:
|
|
632
|
+
if self.manifest is None or self.graph is None:
|
|
633
|
+
raise DbtInternalError("manifest and graph must be set to perform node selection")
|
|
634
|
+
|
|
635
|
+
source_selector = ResourceTypeSelector(
|
|
636
|
+
graph=self.graph,
|
|
637
|
+
manifest=self.manifest,
|
|
638
|
+
previous_state=self.previous_state,
|
|
639
|
+
resource_types=[NodeType.Source],
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
return source_selector.get_graph_queue(self.get_selection_spec()).get_selected_nodes()
|
|
643
|
+
|
|
644
|
+
def _get_catalog_via_information_schema(
|
|
645
|
+
self, adapter, connection_name: str, schemas: Set[Tuple[str, str]]
|
|
646
|
+
) -> agate.Table:
|
|
647
|
+
"""
|
|
648
|
+
DVT v0.4.8: Universal fallback for catalog generation using INFORMATION_SCHEMA.
|
|
649
|
+
|
|
650
|
+
Works for most SQL databases (Postgres, MySQL, Snowflake, Redshift, BigQuery, SQL Server).
|
|
651
|
+
Falls back gracefully for databases without INFORMATION_SCHEMA (Oracle, DB2).
|
|
652
|
+
|
|
653
|
+
:param adapter: Database adapter
|
|
654
|
+
:param connection_name: Connection name for logging
|
|
655
|
+
:param schemas: Set of (database, schema) tuples to query
|
|
656
|
+
:return: agate.Table with catalog data
|
|
657
|
+
"""
|
|
658
|
+
if not schemas:
|
|
659
|
+
return agate.Table([])
|
|
660
|
+
|
|
661
|
+
# Build WHERE clause for schemas
|
|
662
|
+
schema_conditions = []
|
|
663
|
+
for database, schema in schemas:
|
|
664
|
+
# Most databases only need schema filter, some need database too
|
|
665
|
+
schema_conditions.append(f"table_schema = '{schema}'")
|
|
666
|
+
|
|
667
|
+
where_clause = " OR ".join(schema_conditions)
|
|
668
|
+
|
|
669
|
+
# Universal INFORMATION_SCHEMA query (works for most SQL databases)
|
|
670
|
+
query = f"""
|
|
671
|
+
SELECT
|
|
672
|
+
table_catalog as table_database,
|
|
673
|
+
table_schema,
|
|
674
|
+
table_name,
|
|
675
|
+
column_name,
|
|
676
|
+
data_type,
|
|
677
|
+
ordinal_position as column_index
|
|
678
|
+
FROM information_schema.columns
|
|
679
|
+
WHERE {where_clause}
|
|
680
|
+
ORDER BY table_schema, table_name, ordinal_position
|
|
681
|
+
"""
|
|
682
|
+
|
|
683
|
+
try:
|
|
684
|
+
# Execute query using adapter's connection
|
|
685
|
+
_, result = adapter.execute(query, auto_begin=False, fetch=True)
|
|
686
|
+
|
|
687
|
+
# Convert to agate.Table format expected by catalog
|
|
688
|
+
if result and len(result) > 0:
|
|
689
|
+
# Transform result into catalog format
|
|
690
|
+
catalog_data = []
|
|
691
|
+
for row in result:
|
|
692
|
+
catalog_data.append({
|
|
693
|
+
'table_database': row[0],
|
|
694
|
+
'table_schema': row[1],
|
|
695
|
+
'table_name': row[2],
|
|
696
|
+
'column_name': row[3],
|
|
697
|
+
'column_type': row[4],
|
|
698
|
+
'column_index': row[5]
|
|
699
|
+
})
|
|
700
|
+
|
|
701
|
+
# Create agate.Table with proper column types
|
|
702
|
+
return agate.Table(catalog_data)
|
|
703
|
+
else:
|
|
704
|
+
return agate.Table([])
|
|
705
|
+
|
|
706
|
+
except Exception as e:
|
|
707
|
+
# Fallback failed - database might not support INFORMATION_SCHEMA
|
|
708
|
+
# (e.g., Oracle, DB2, or permission issues)
|
|
709
|
+
fire_event(
|
|
710
|
+
CannotGenerateDocs(
|
|
711
|
+
msg=f"INFORMATION_SCHEMA fallback failed for '{connection_name}': {str(e)}"
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
return agate.Table([])
|
|
715
|
+
|
|
716
|
+
# =========================================================================
|
|
717
|
+
# DVT v0.59.0: DuckDB Catalog and Lineage Storage (uses CatalogStore)
|
|
718
|
+
# =========================================================================
|
|
719
|
+
|
|
720
|
+
def _write_catalog_to_duckdb(
|
|
721
|
+
self,
|
|
722
|
+
nodes: Dict[str, CatalogTable],
|
|
723
|
+
sources: Dict[str, CatalogTable],
|
|
724
|
+
) -> None:
|
|
725
|
+
"""
|
|
726
|
+
Write enriched catalog to catalog.duckdb.
|
|
727
|
+
|
|
728
|
+
DVT v0.59.0: Uses CatalogStore (separate from metastore.duckdb).
|
|
729
|
+
Stores catalog nodes with connection info, adapter type,
|
|
730
|
+
and visual enrichment (icons, colors) for enhanced docs serve.
|
|
731
|
+
"""
|
|
732
|
+
try:
|
|
733
|
+
import json
|
|
734
|
+
from pathlib import Path
|
|
735
|
+
from dbt.compute.metadata import CatalogStore, CatalogNode
|
|
736
|
+
|
|
737
|
+
project_root = Path(self.config.project_root)
|
|
738
|
+
store = CatalogStore(project_root)
|
|
739
|
+
store.initialize(drop_existing=False) # Don't drop - just ensure exists
|
|
740
|
+
|
|
741
|
+
# Clear existing catalog data
|
|
742
|
+
store.clear_catalog_nodes()
|
|
743
|
+
|
|
744
|
+
# Adapter icon mapping
|
|
745
|
+
adapter_icons = {
|
|
746
|
+
'postgres': 'postgresql',
|
|
747
|
+
'snowflake': 'snowflake',
|
|
748
|
+
'bigquery': 'bigquery',
|
|
749
|
+
'redshift': 'redshift',
|
|
750
|
+
'databricks': 'databricks',
|
|
751
|
+
'spark': 'spark',
|
|
752
|
+
'duckdb': 'duckdb',
|
|
753
|
+
'mysql': 'mysql',
|
|
754
|
+
'sqlserver': 'sqlserver',
|
|
755
|
+
'oracle': 'oracle',
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
# Connection color mapping (for visual distinction)
|
|
759
|
+
connection_colors = [
|
|
760
|
+
'#3498db', # Blue
|
|
761
|
+
'#2ecc71', # Green
|
|
762
|
+
'#e74c3c', # Red
|
|
763
|
+
'#9b59b6', # Purple
|
|
764
|
+
'#f39c12', # Orange
|
|
765
|
+
'#1abc9c', # Teal
|
|
766
|
+
'#e91e63', # Pink
|
|
767
|
+
'#607d8b', # Blue Grey
|
|
768
|
+
]
|
|
769
|
+
color_index = 0
|
|
770
|
+
connection_color_map: Dict[str, str] = {}
|
|
771
|
+
|
|
772
|
+
# Process nodes (models)
|
|
773
|
+
for unique_id, table in nodes.items():
|
|
774
|
+
node = self.manifest.nodes.get(unique_id) if self.manifest else None
|
|
775
|
+
|
|
776
|
+
# Get connection and adapter info
|
|
777
|
+
connection_name = "default"
|
|
778
|
+
adapter_type = None
|
|
779
|
+
materialized = None
|
|
780
|
+
tags = []
|
|
781
|
+
meta = {}
|
|
782
|
+
|
|
783
|
+
if node:
|
|
784
|
+
if hasattr(node.config, 'target') and node.config.target:
|
|
785
|
+
connection_name = node.config.target
|
|
786
|
+
if hasattr(node.config, 'materialized'):
|
|
787
|
+
materialized = node.config.materialized
|
|
788
|
+
if hasattr(node, 'tags'):
|
|
789
|
+
tags = list(node.tags)
|
|
790
|
+
if hasattr(node, 'meta'):
|
|
791
|
+
meta = dict(node.meta) if node.meta else {}
|
|
792
|
+
|
|
793
|
+
# Assign connection color
|
|
794
|
+
if connection_name not in connection_color_map:
|
|
795
|
+
connection_color_map[connection_name] = connection_colors[color_index % len(connection_colors)]
|
|
796
|
+
color_index += 1
|
|
797
|
+
|
|
798
|
+
# Get adapter type from connection
|
|
799
|
+
try:
|
|
800
|
+
adapter = self.config.get_adapter(connection_name)
|
|
801
|
+
adapter_type = adapter.type() if hasattr(adapter, 'type') else None
|
|
802
|
+
except Exception:
|
|
803
|
+
adapter_type = None
|
|
804
|
+
|
|
805
|
+
icon_type = adapter_icons.get(adapter_type, 'database') if adapter_type else 'database'
|
|
806
|
+
|
|
807
|
+
# Serialize columns
|
|
808
|
+
columns_data = []
|
|
809
|
+
for col_name, col_meta in table.columns.items():
|
|
810
|
+
columns_data.append({
|
|
811
|
+
'name': col_name,
|
|
812
|
+
'type': col_meta.type if hasattr(col_meta, 'type') else None,
|
|
813
|
+
'comment': col_meta.comment if hasattr(col_meta, 'comment') else None,
|
|
814
|
+
})
|
|
815
|
+
|
|
816
|
+
# Get row count from stats
|
|
817
|
+
row_count = None
|
|
818
|
+
if hasattr(table, 'stats') and table.stats:
|
|
819
|
+
for stat_id, stat in table.stats.items():
|
|
820
|
+
if stat_id == 'row_count' and hasattr(stat, 'value'):
|
|
821
|
+
try:
|
|
822
|
+
row_count = int(stat.value)
|
|
823
|
+
except (ValueError, TypeError):
|
|
824
|
+
pass
|
|
825
|
+
|
|
826
|
+
catalog_node = CatalogNode(
|
|
827
|
+
unique_id=unique_id,
|
|
828
|
+
resource_type='model',
|
|
829
|
+
name=node.name if node else table.metadata.name,
|
|
830
|
+
schema_name=table.metadata.schema,
|
|
831
|
+
database=table.metadata.database,
|
|
832
|
+
connection_name=connection_name,
|
|
833
|
+
adapter_type=adapter_type,
|
|
834
|
+
description=node.description if node and hasattr(node, 'description') else None,
|
|
835
|
+
icon_type=icon_type,
|
|
836
|
+
color_hex=connection_color_map.get(connection_name),
|
|
837
|
+
materialized=materialized,
|
|
838
|
+
tags=json.dumps(tags) if tags else None,
|
|
839
|
+
meta=json.dumps(meta) if meta else None,
|
|
840
|
+
columns=json.dumps(columns_data) if columns_data else None,
|
|
841
|
+
row_count=row_count,
|
|
842
|
+
)
|
|
843
|
+
store.save_catalog_node(catalog_node)
|
|
844
|
+
|
|
845
|
+
# Process sources
|
|
846
|
+
for unique_id, table in sources.items():
|
|
847
|
+
source = self.manifest.sources.get(unique_id) if self.manifest else None
|
|
848
|
+
|
|
849
|
+
# Get connection and adapter info
|
|
850
|
+
connection_name = "default"
|
|
851
|
+
adapter_type = None
|
|
852
|
+
tags = []
|
|
853
|
+
meta = {}
|
|
854
|
+
|
|
855
|
+
if source:
|
|
856
|
+
if hasattr(source, 'connection') and source.connection:
|
|
857
|
+
connection_name = source.connection
|
|
858
|
+
elif hasattr(source, 'meta') and source.meta and source.meta.get('connection'):
|
|
859
|
+
connection_name = source.meta.get('connection')
|
|
860
|
+
if hasattr(source, 'tags'):
|
|
861
|
+
tags = list(source.tags)
|
|
862
|
+
if hasattr(source, 'meta'):
|
|
863
|
+
meta = dict(source.meta) if source.meta else {}
|
|
864
|
+
|
|
865
|
+
# Assign connection color
|
|
866
|
+
if connection_name not in connection_color_map:
|
|
867
|
+
connection_color_map[connection_name] = connection_colors[color_index % len(connection_colors)]
|
|
868
|
+
color_index += 1
|
|
869
|
+
|
|
870
|
+
# Get adapter type from connection
|
|
871
|
+
try:
|
|
872
|
+
adapter = self.config.get_adapter(connection_name)
|
|
873
|
+
adapter_type = adapter.type() if hasattr(adapter, 'type') else None
|
|
874
|
+
except Exception:
|
|
875
|
+
adapter_type = None
|
|
876
|
+
|
|
877
|
+
icon_type = adapter_icons.get(adapter_type, 'database') if adapter_type else 'database'
|
|
878
|
+
|
|
879
|
+
# Serialize columns
|
|
880
|
+
columns_data = []
|
|
881
|
+
for col_name, col_meta in table.columns.items():
|
|
882
|
+
columns_data.append({
|
|
883
|
+
'name': col_name,
|
|
884
|
+
'type': col_meta.type if hasattr(col_meta, 'type') else None,
|
|
885
|
+
'comment': col_meta.comment if hasattr(col_meta, 'comment') else None,
|
|
886
|
+
})
|
|
887
|
+
|
|
888
|
+
catalog_node = CatalogNode(
|
|
889
|
+
unique_id=unique_id,
|
|
890
|
+
resource_type='source',
|
|
891
|
+
name=source.name if source else table.metadata.name,
|
|
892
|
+
schema_name=table.metadata.schema,
|
|
893
|
+
database=table.metadata.database,
|
|
894
|
+
connection_name=connection_name,
|
|
895
|
+
adapter_type=adapter_type,
|
|
896
|
+
description=source.description if source and hasattr(source, 'description') else None,
|
|
897
|
+
icon_type=icon_type,
|
|
898
|
+
color_hex=connection_color_map.get(connection_name),
|
|
899
|
+
tags=json.dumps(tags) if tags else None,
|
|
900
|
+
meta=json.dumps(meta) if meta else None,
|
|
901
|
+
columns=json.dumps(columns_data) if columns_data else None,
|
|
902
|
+
)
|
|
903
|
+
store.save_catalog_node(catalog_node)
|
|
904
|
+
|
|
905
|
+
store.close()
|
|
906
|
+
fire_event(CatalogWritten(path=str(store.db_path)))
|
|
907
|
+
|
|
908
|
+
except ImportError:
|
|
909
|
+
# DuckDB not installed - skip
|
|
910
|
+
pass
|
|
911
|
+
except Exception as e:
|
|
912
|
+
# Log but don't fail catalog generation
|
|
913
|
+
fire_event(
|
|
914
|
+
CannotGenerateDocs(msg=f"Could not write catalog to DuckDB: {str(e)}")
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
def _write_lineage_to_duckdb(self) -> None:
|
|
918
|
+
"""
|
|
919
|
+
Write lineage edges to catalog.duckdb.
|
|
920
|
+
|
|
921
|
+
DVT v0.59.0: Uses CatalogStore (separate from metastore.duckdb).
|
|
922
|
+
Stores full DAG with cross-connection indicators
|
|
923
|
+
for enhanced visualization in docs serve.
|
|
924
|
+
"""
|
|
925
|
+
if self.manifest is None:
|
|
926
|
+
return
|
|
927
|
+
|
|
928
|
+
try:
|
|
929
|
+
from pathlib import Path
|
|
930
|
+
from dbt.compute.metadata import CatalogStore, LineageEdge
|
|
931
|
+
|
|
932
|
+
project_root = Path(self.config.project_root)
|
|
933
|
+
store = CatalogStore(project_root)
|
|
934
|
+
store.initialize(drop_existing=False) # Don't drop - just ensure exists
|
|
935
|
+
|
|
936
|
+
# Clear existing lineage data
|
|
937
|
+
store.clear_lineage_edges()
|
|
938
|
+
|
|
939
|
+
# Build connection map for cross-connection detection
|
|
940
|
+
node_connections: Dict[str, str] = {}
|
|
941
|
+
|
|
942
|
+
# Map nodes to connections
|
|
943
|
+
for unique_id, node in self.manifest.nodes.items():
|
|
944
|
+
if hasattr(node.config, 'target') and node.config.target:
|
|
945
|
+
node_connections[unique_id] = node.config.target
|
|
946
|
+
else:
|
|
947
|
+
node_connections[unique_id] = self.config.target_name
|
|
948
|
+
|
|
949
|
+
# Map sources to connections
|
|
950
|
+
for unique_id, source in self.manifest.sources.items():
|
|
951
|
+
if hasattr(source, 'connection') and source.connection:
|
|
952
|
+
node_connections[unique_id] = source.connection
|
|
953
|
+
elif hasattr(source, 'meta') and source.meta and source.meta.get('connection'):
|
|
954
|
+
node_connections[unique_id] = source.meta.get('connection')
|
|
955
|
+
else:
|
|
956
|
+
node_connections[unique_id] = self.config.target_name
|
|
957
|
+
|
|
958
|
+
# Process dependencies
|
|
959
|
+
for unique_id, node in self.manifest.nodes.items():
|
|
960
|
+
if not hasattr(node, 'depends_on') or not node.depends_on:
|
|
961
|
+
continue
|
|
962
|
+
|
|
963
|
+
# Some depends_on objects don't have 'nodes' (e.g., MacroDependsOn)
|
|
964
|
+
if not hasattr(node.depends_on, 'nodes') or not node.depends_on.nodes:
|
|
965
|
+
continue
|
|
966
|
+
|
|
967
|
+
target_connection = node_connections.get(unique_id, self.config.target_name)
|
|
968
|
+
|
|
969
|
+
# Process node dependencies
|
|
970
|
+
for dep_id in node.depends_on.nodes:
|
|
971
|
+
source_connection = node_connections.get(dep_id, self.config.target_name)
|
|
972
|
+
|
|
973
|
+
# Determine edge type
|
|
974
|
+
if dep_id.startswith('source.'):
|
|
975
|
+
edge_type = 'source'
|
|
976
|
+
elif dep_id.startswith('model.'):
|
|
977
|
+
edge_type = 'ref'
|
|
978
|
+
else:
|
|
979
|
+
edge_type = 'depends_on'
|
|
980
|
+
|
|
981
|
+
is_cross = source_connection != target_connection
|
|
982
|
+
|
|
983
|
+
edge = LineageEdge(
|
|
984
|
+
source_node_id=dep_id,
|
|
985
|
+
target_node_id=unique_id,
|
|
986
|
+
edge_type=edge_type,
|
|
987
|
+
is_cross_connection=is_cross,
|
|
988
|
+
source_connection=source_connection,
|
|
989
|
+
target_connection=target_connection,
|
|
990
|
+
)
|
|
991
|
+
store.save_lineage_edge(edge)
|
|
992
|
+
|
|
993
|
+
store.close()
|
|
994
|
+
|
|
995
|
+
except ImportError:
|
|
996
|
+
# DuckDB not installed - skip
|
|
997
|
+
pass
|
|
998
|
+
except Exception as e:
|
|
999
|
+
# Log but don't fail
|
|
1000
|
+
fire_event(
|
|
1001
|
+
CannotGenerateDocs(msg=f"Could not write lineage to DuckDB: {str(e)}")
|
|
1002
|
+
)
|