dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DVT Docs Serve - Lineage API
|
|
3
|
+
|
|
4
|
+
v0.56.0: Lineage graph, node traversal, and cross-connection edges.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from fastapi import APIRouter, HTTPException, Query
|
|
12
|
+
except ImportError:
|
|
13
|
+
class APIRouter:
|
|
14
|
+
def __init__(self, *args, **kwargs):
|
|
15
|
+
pass
|
|
16
|
+
def get(self, *args, **kwargs):
|
|
17
|
+
def decorator(func):
|
|
18
|
+
return func
|
|
19
|
+
return decorator
|
|
20
|
+
class HTTPException(Exception):
|
|
21
|
+
def __init__(self, status_code, detail):
|
|
22
|
+
self.status_code = status_code
|
|
23
|
+
self.detail = detail
|
|
24
|
+
def Query(*args, **kwargs):
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
router = APIRouter(prefix="/api/lineage", tags=["lineage"])
|
|
29
|
+
|
|
30
|
+
# Will be set by serve.py
|
|
31
|
+
_project_root: Optional[Path] = None
|
|
32
|
+
_store_initialized: bool = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def set_project_root(project_root: Path) -> None:
|
|
36
|
+
"""Set project root for API access."""
|
|
37
|
+
global _project_root, _store_initialized
|
|
38
|
+
_project_root = project_root
|
|
39
|
+
_store_initialized = True # Assume serve.py already initialized
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_store():
|
|
43
|
+
"""Get metadata store instance (lazy initialization)."""
|
|
44
|
+
global _store_initialized
|
|
45
|
+
|
|
46
|
+
if _project_root is None:
|
|
47
|
+
raise HTTPException(status_code=500, detail="Project root not configured")
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
51
|
+
store = ProjectMetadataStore(_project_root)
|
|
52
|
+
# Only initialize if not already done by serve.py
|
|
53
|
+
if not _store_initialized:
|
|
54
|
+
store.initialize()
|
|
55
|
+
_store_initialized = True
|
|
56
|
+
return store
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise HTTPException(status_code=500, detail=f"Could not open metadata store: {e}")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@router.get("/graph")
|
|
62
|
+
async def get_lineage_graph() -> Dict[str, Any]:
|
|
63
|
+
"""
|
|
64
|
+
Get the full lineage graph.
|
|
65
|
+
|
|
66
|
+
Returns nodes and edges suitable for visualization.
|
|
67
|
+
"""
|
|
68
|
+
store = _get_store()
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
graph = store.get_lineage_graph()
|
|
72
|
+
return graph
|
|
73
|
+
finally:
|
|
74
|
+
store.close()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@router.get("/node/{node_id}")
|
|
78
|
+
async def get_node_lineage(
|
|
79
|
+
node_id: str,
|
|
80
|
+
depth: int = Query(1, ge=1, le=10, description="Traversal depth"),
|
|
81
|
+
) -> Dict[str, Any]:
|
|
82
|
+
"""
|
|
83
|
+
Get lineage for a specific node.
|
|
84
|
+
|
|
85
|
+
Returns upstream and downstream nodes up to specified depth using BFS traversal.
|
|
86
|
+
"""
|
|
87
|
+
store = _get_store()
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
# Get the node
|
|
91
|
+
node = store.get_catalog_node(node_id)
|
|
92
|
+
if not node:
|
|
93
|
+
raise HTTPException(status_code=404, detail=f"Node not found: {node_id}")
|
|
94
|
+
|
|
95
|
+
# BFS traversal for upstream nodes
|
|
96
|
+
upstream_nodes = _traverse_lineage(
|
|
97
|
+
store, node_id, depth, direction="upstream"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# BFS traversal for downstream nodes
|
|
101
|
+
downstream_nodes = _traverse_lineage(
|
|
102
|
+
store, node_id, depth, direction="downstream"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
"node": {
|
|
107
|
+
"unique_id": node.unique_id,
|
|
108
|
+
"name": node.name,
|
|
109
|
+
"resource_type": node.resource_type,
|
|
110
|
+
"connection": node.connection_name,
|
|
111
|
+
"icon": node.icon_type,
|
|
112
|
+
"color": node.color_hex,
|
|
113
|
+
},
|
|
114
|
+
"upstream": upstream_nodes,
|
|
115
|
+
"downstream": downstream_nodes,
|
|
116
|
+
"depth": depth,
|
|
117
|
+
}
|
|
118
|
+
finally:
|
|
119
|
+
store.close()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _traverse_lineage(
|
|
123
|
+
store, start_node_id: str, max_depth: int, direction: str
|
|
124
|
+
) -> List[Dict[str, Any]]:
|
|
125
|
+
"""
|
|
126
|
+
BFS traversal of lineage graph.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
store: ProjectMetadataStore instance
|
|
130
|
+
start_node_id: Starting node ID
|
|
131
|
+
max_depth: Maximum traversal depth
|
|
132
|
+
direction: "upstream" or "downstream"
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of nodes with depth information
|
|
136
|
+
"""
|
|
137
|
+
from collections import deque
|
|
138
|
+
|
|
139
|
+
visited = set()
|
|
140
|
+
result = []
|
|
141
|
+
|
|
142
|
+
# Queue: (node_id, current_depth, edge_info)
|
|
143
|
+
queue = deque([(start_node_id, 0, None)])
|
|
144
|
+
visited.add(start_node_id)
|
|
145
|
+
|
|
146
|
+
while queue:
|
|
147
|
+
current_id, current_depth, edge_info = queue.popleft()
|
|
148
|
+
|
|
149
|
+
# Skip the start node itself
|
|
150
|
+
if current_depth > 0:
|
|
151
|
+
node = store.get_catalog_node(current_id)
|
|
152
|
+
if node:
|
|
153
|
+
result.append({
|
|
154
|
+
"unique_id": node.unique_id,
|
|
155
|
+
"name": node.name,
|
|
156
|
+
"resource_type": node.resource_type,
|
|
157
|
+
"connection": node.connection_name,
|
|
158
|
+
"depth": current_depth,
|
|
159
|
+
"edge_type": edge_info.edge_type if edge_info else None,
|
|
160
|
+
"is_cross_connection": edge_info.is_cross_connection if edge_info else False,
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
# Stop if we've reached max depth
|
|
164
|
+
if current_depth >= max_depth:
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
# Get edges based on direction
|
|
168
|
+
if direction == "upstream":
|
|
169
|
+
edges = store.get_upstream_edges(current_id)
|
|
170
|
+
next_nodes = [(e.source_node_id, e) for e in edges]
|
|
171
|
+
else:
|
|
172
|
+
edges = store.get_downstream_edges(current_id)
|
|
173
|
+
next_nodes = [(e.target_node_id, e) for e in edges]
|
|
174
|
+
|
|
175
|
+
# Add unvisited neighbors to queue
|
|
176
|
+
for next_id, edge in next_nodes:
|
|
177
|
+
if next_id not in visited:
|
|
178
|
+
visited.add(next_id)
|
|
179
|
+
queue.append((next_id, current_depth + 1, edge))
|
|
180
|
+
|
|
181
|
+
return result
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@router.get("/cross-connection")
|
|
185
|
+
async def get_cross_connection_edges() -> List[Dict[str, Any]]:
|
|
186
|
+
"""
|
|
187
|
+
Get all cross-connection edges.
|
|
188
|
+
|
|
189
|
+
Returns edges that span different database connections.
|
|
190
|
+
"""
|
|
191
|
+
store = _get_store()
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
edges = store.get_cross_connection_edges()
|
|
195
|
+
return [
|
|
196
|
+
{
|
|
197
|
+
"id": e.id,
|
|
198
|
+
"source_node_id": e.source_node_id,
|
|
199
|
+
"target_node_id": e.target_node_id,
|
|
200
|
+
"edge_type": e.edge_type,
|
|
201
|
+
"source_connection": e.source_connection,
|
|
202
|
+
"target_connection": e.target_connection,
|
|
203
|
+
}
|
|
204
|
+
for e in edges
|
|
205
|
+
]
|
|
206
|
+
finally:
|
|
207
|
+
store.close()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@router.get("/stats")
|
|
211
|
+
async def get_lineage_stats() -> Dict[str, Any]:
|
|
212
|
+
"""
|
|
213
|
+
Get lineage statistics.
|
|
214
|
+
|
|
215
|
+
Returns edge counts, cross-connection counts, etc.
|
|
216
|
+
"""
|
|
217
|
+
store = _get_store()
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
all_edges = store.get_all_lineage_edges()
|
|
221
|
+
cross_edges = store.get_cross_connection_edges()
|
|
222
|
+
|
|
223
|
+
# Count by edge type
|
|
224
|
+
type_counts: Dict[str, int] = {}
|
|
225
|
+
for edge in all_edges:
|
|
226
|
+
type_counts[edge.edge_type] = type_counts.get(edge.edge_type, 0) + 1
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"total_edges": len(all_edges),
|
|
230
|
+
"cross_connection_edges": len(cross_edges),
|
|
231
|
+
"by_type": type_counts,
|
|
232
|
+
}
|
|
233
|
+
finally:
|
|
234
|
+
store.close()
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DVT Docs Serve - Profile API
|
|
3
|
+
|
|
4
|
+
v0.56.0: Profile results, alerts, and table profiles.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from fastapi import APIRouter, HTTPException, Query
|
|
12
|
+
except ImportError:
|
|
13
|
+
class APIRouter:
|
|
14
|
+
def __init__(self, *args, **kwargs):
|
|
15
|
+
pass
|
|
16
|
+
def get(self, *args, **kwargs):
|
|
17
|
+
def decorator(func):
|
|
18
|
+
return func
|
|
19
|
+
return decorator
|
|
20
|
+
class HTTPException(Exception):
|
|
21
|
+
def __init__(self, status_code, detail):
|
|
22
|
+
self.status_code = status_code
|
|
23
|
+
self.detail = detail
|
|
24
|
+
def Query(*args, **kwargs):
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
router = APIRouter(prefix="/api/profile", tags=["profile"])
|
|
29
|
+
|
|
30
|
+
# Will be set by serve.py
|
|
31
|
+
_project_root: Optional[Path] = None
|
|
32
|
+
_store_initialized: bool = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def set_project_root(project_root: Path) -> None:
|
|
36
|
+
"""Set project root for API access."""
|
|
37
|
+
global _project_root, _store_initialized
|
|
38
|
+
_project_root = project_root
|
|
39
|
+
_store_initialized = True # Assume serve.py already initialized
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_store():
|
|
43
|
+
"""Get metadata store instance (lazy initialization)."""
|
|
44
|
+
global _store_initialized
|
|
45
|
+
|
|
46
|
+
if _project_root is None:
|
|
47
|
+
raise HTTPException(status_code=500, detail="Project root not configured")
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from dbt.compute.metadata import ProjectMetadataStore
|
|
51
|
+
store = ProjectMetadataStore(_project_root)
|
|
52
|
+
# Only initialize if not already done by serve.py
|
|
53
|
+
if not _store_initialized:
|
|
54
|
+
store.initialize()
|
|
55
|
+
_store_initialized = True
|
|
56
|
+
return store
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise HTTPException(status_code=500, detail=f"Could not open metadata store: {e}")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@router.get("/tables")
|
|
62
|
+
async def list_profiled_tables() -> List[Dict[str, Any]]:
|
|
63
|
+
"""
|
|
64
|
+
List all profiled tables.
|
|
65
|
+
|
|
66
|
+
Returns table names with last profile timestamp and mode.
|
|
67
|
+
"""
|
|
68
|
+
store = _get_store()
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
tables = store.get_all_profiled_tables()
|
|
72
|
+
return [
|
|
73
|
+
{
|
|
74
|
+
"source_name": t[0],
|
|
75
|
+
"table_name": t[1],
|
|
76
|
+
"profile_mode": t[2],
|
|
77
|
+
"last_profiled": t[3].isoformat() if t[3] else None,
|
|
78
|
+
}
|
|
79
|
+
for t in tables
|
|
80
|
+
]
|
|
81
|
+
finally:
|
|
82
|
+
store.close()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@router.get("/tables/{source_name}/{table_name}")
|
|
86
|
+
async def get_table_profile(
|
|
87
|
+
source_name: str,
|
|
88
|
+
table_name: str,
|
|
89
|
+
mode: Optional[str] = Query(None, description="Profile mode filter"),
|
|
90
|
+
) -> Dict[str, Any]:
|
|
91
|
+
"""
|
|
92
|
+
Get profile results for a specific table.
|
|
93
|
+
|
|
94
|
+
Returns all column profiles with metrics.
|
|
95
|
+
"""
|
|
96
|
+
import json
|
|
97
|
+
|
|
98
|
+
store = _get_store()
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
profiles = store.get_table_profile(source_name, table_name, mode)
|
|
102
|
+
if not profiles:
|
|
103
|
+
raise HTTPException(
|
|
104
|
+
status_code=404,
|
|
105
|
+
detail=f"No profile found for {source_name}.{table_name}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Get first profile for table-level stats
|
|
109
|
+
first = profiles[0]
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
"source_name": source_name,
|
|
113
|
+
"table_name": table_name,
|
|
114
|
+
"profile_mode": first.profile_mode,
|
|
115
|
+
"row_count": first.row_count,
|
|
116
|
+
"column_count": len(profiles),
|
|
117
|
+
"profiled_at": first.profiled_at.isoformat() if first.profiled_at else None,
|
|
118
|
+
"columns": [
|
|
119
|
+
{
|
|
120
|
+
"column_name": p.column_name,
|
|
121
|
+
"null_count": p.null_count,
|
|
122
|
+
"null_percent": p.null_percent,
|
|
123
|
+
"distinct_count": p.distinct_count,
|
|
124
|
+
"distinct_percent": p.distinct_percent,
|
|
125
|
+
"min_value": p.min_value,
|
|
126
|
+
"max_value": p.max_value,
|
|
127
|
+
"mean_value": p.mean_value,
|
|
128
|
+
"median_value": p.median_value,
|
|
129
|
+
"stddev_value": p.stddev_value,
|
|
130
|
+
"p25": p.p25,
|
|
131
|
+
"p50": p.p50,
|
|
132
|
+
"p75": p.p75,
|
|
133
|
+
"min_length": p.min_length,
|
|
134
|
+
"max_length": p.max_length,
|
|
135
|
+
"avg_length": p.avg_length,
|
|
136
|
+
"histogram": json.loads(p.histogram) if p.histogram else None,
|
|
137
|
+
"top_values": json.loads(p.top_values) if p.top_values else None,
|
|
138
|
+
"alerts": json.loads(p.alerts) if p.alerts else [],
|
|
139
|
+
"duration_ms": p.duration_ms,
|
|
140
|
+
}
|
|
141
|
+
for p in profiles
|
|
142
|
+
],
|
|
143
|
+
}
|
|
144
|
+
finally:
|
|
145
|
+
store.close()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@router.get("/alerts")
|
|
149
|
+
async def get_profile_alerts(
|
|
150
|
+
source_name: Optional[str] = Query(None, description="Filter by source"),
|
|
151
|
+
severity: Optional[str] = Query(None, description="Filter by severity: info, warning, error"),
|
|
152
|
+
) -> List[Dict[str, Any]]:
|
|
153
|
+
"""
|
|
154
|
+
Get all profile alerts.
|
|
155
|
+
|
|
156
|
+
Returns alerts from profile results, optionally filtered.
|
|
157
|
+
"""
|
|
158
|
+
store = _get_store()
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
alerts = store.get_profile_alerts(source_name)
|
|
162
|
+
|
|
163
|
+
# Filter by severity if provided
|
|
164
|
+
if severity:
|
|
165
|
+
alerts = [a for a in alerts if a.get("severity") == severity]
|
|
166
|
+
|
|
167
|
+
return alerts
|
|
168
|
+
finally:
|
|
169
|
+
store.close()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@router.get("/stats")
|
|
173
|
+
async def get_profile_stats() -> Dict[str, Any]:
|
|
174
|
+
"""
|
|
175
|
+
Get profiling statistics.
|
|
176
|
+
|
|
177
|
+
Returns counts of profiled tables, columns, alerts.
|
|
178
|
+
"""
|
|
179
|
+
store = _get_store()
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
tables = store.get_all_profiled_tables()
|
|
183
|
+
alerts = store.get_profile_alerts()
|
|
184
|
+
|
|
185
|
+
# Count alerts by type
|
|
186
|
+
alert_types: Dict[str, int] = {}
|
|
187
|
+
for alert in alerts:
|
|
188
|
+
alert_type = alert.get("type", "unknown")
|
|
189
|
+
alert_types[alert_type] = alert_types.get(alert_type, 0) + 1
|
|
190
|
+
|
|
191
|
+
# Count alerts by severity
|
|
192
|
+
alert_severities: Dict[str, int] = {}
|
|
193
|
+
for alert in alerts:
|
|
194
|
+
severity = alert.get("severity", "info")
|
|
195
|
+
alert_severities[severity] = alert_severities.get(severity, 0) + 1
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"total_tables": len(tables),
|
|
199
|
+
"total_alerts": len(alerts),
|
|
200
|
+
"alerts_by_type": alert_types,
|
|
201
|
+
"alerts_by_severity": alert_severities,
|
|
202
|
+
}
|
|
203
|
+
finally:
|
|
204
|
+
store.close()
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DVT Docs Serve - Spark Status API
|
|
3
|
+
|
|
4
|
+
v0.56.0: Basic Spark status for local Spark only.
|
|
5
|
+
|
|
6
|
+
Note: For external clusters (EMR, Dataproc, Databricks),
|
|
7
|
+
use the platform's native monitoring tools.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from fastapi import APIRouter, HTTPException
|
|
15
|
+
except ImportError:
|
|
16
|
+
class APIRouter:
|
|
17
|
+
def __init__(self, *args, **kwargs):
|
|
18
|
+
pass
|
|
19
|
+
def get(self, *args, **kwargs):
|
|
20
|
+
def decorator(func):
|
|
21
|
+
return func
|
|
22
|
+
return decorator
|
|
23
|
+
class HTTPException(Exception):
|
|
24
|
+
def __init__(self, status_code, detail):
|
|
25
|
+
self.status_code = status_code
|
|
26
|
+
self.detail = detail
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
router = APIRouter(prefix="/api/spark", tags=["spark"])
|
|
30
|
+
|
|
31
|
+
# Will be set by serve.py
|
|
32
|
+
_project_root: Optional[Path] = None
|
|
33
|
+
_store_initialized: bool = False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def set_project_root(project_root: Path) -> None:
|
|
37
|
+
"""Set project root for API access."""
|
|
38
|
+
global _project_root, _store_initialized
|
|
39
|
+
_project_root = project_root
|
|
40
|
+
_store_initialized = True # Assume serve.py already initialized
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@router.get("/status")
|
|
44
|
+
async def get_spark_status() -> Dict[str, Any]:
|
|
45
|
+
"""
|
|
46
|
+
Get current Spark status.
|
|
47
|
+
|
|
48
|
+
Returns basic status info for local Spark.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
# Try to detect if Spark is running
|
|
52
|
+
from dbt.config.compute import ComputeRegistry
|
|
53
|
+
|
|
54
|
+
registry = ComputeRegistry(_project_root) if _project_root else None
|
|
55
|
+
if not registry:
|
|
56
|
+
return {
|
|
57
|
+
"engine": "unknown",
|
|
58
|
+
"status": "not_configured",
|
|
59
|
+
"message": "Compute registry not available",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Get default compute
|
|
63
|
+
default_compute = registry.target_compute
|
|
64
|
+
compute_cluster = registry.get(default_compute)
|
|
65
|
+
|
|
66
|
+
if not compute_cluster:
|
|
67
|
+
return {
|
|
68
|
+
"engine": "none",
|
|
69
|
+
"status": "not_configured",
|
|
70
|
+
"message": "No compute engine configured",
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# Check if it's local Spark
|
|
74
|
+
platform = compute_cluster.detect_platform()
|
|
75
|
+
is_local = platform == "local"
|
|
76
|
+
|
|
77
|
+
# Try to get Spark session status
|
|
78
|
+
spark_ui_url = None
|
|
79
|
+
spark_running = False
|
|
80
|
+
|
|
81
|
+
if is_local:
|
|
82
|
+
try:
|
|
83
|
+
from pyspark.sql import SparkSession
|
|
84
|
+
|
|
85
|
+
# Check for existing session (don't create new one)
|
|
86
|
+
existing = SparkSession.getActiveSession()
|
|
87
|
+
if existing:
|
|
88
|
+
spark_running = True
|
|
89
|
+
spark_ui_url = existing.sparkContext.uiWebUrl
|
|
90
|
+
except Exception:
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
"engine": compute_cluster.name,
|
|
95
|
+
"platform": platform,
|
|
96
|
+
"status": "running" if spark_running else "ready",
|
|
97
|
+
"is_local": is_local,
|
|
98
|
+
"spark_ui_url": spark_ui_url,
|
|
99
|
+
"message": "Local Spark ready" if is_local else f"External platform: {platform}",
|
|
100
|
+
"config": {
|
|
101
|
+
"master": compute_cluster.config.get("master", "local[*]") if is_local else None,
|
|
102
|
+
"driver_memory": compute_cluster.config.get("spark.driver.memory"),
|
|
103
|
+
"executor_memory": compute_cluster.config.get("spark.executor.memory"),
|
|
104
|
+
},
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
return {
|
|
109
|
+
"engine": "unknown",
|
|
110
|
+
"status": "error",
|
|
111
|
+
"message": f"Could not get Spark status: {str(e)}",
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@router.get("/ui-url")
|
|
116
|
+
async def get_spark_ui_url() -> Dict[str, Any]:
|
|
117
|
+
"""
|
|
118
|
+
Get Spark UI URL if available.
|
|
119
|
+
|
|
120
|
+
Only works for local Spark when a session is active.
|
|
121
|
+
"""
|
|
122
|
+
try:
|
|
123
|
+
from pyspark.sql import SparkSession
|
|
124
|
+
|
|
125
|
+
existing = SparkSession.getActiveSession()
|
|
126
|
+
if existing:
|
|
127
|
+
return {
|
|
128
|
+
"available": True,
|
|
129
|
+
"url": existing.sparkContext.uiWebUrl,
|
|
130
|
+
}
|
|
131
|
+
else:
|
|
132
|
+
return {
|
|
133
|
+
"available": False,
|
|
134
|
+
"url": None,
|
|
135
|
+
"message": "No active Spark session",
|
|
136
|
+
}
|
|
137
|
+
except ImportError:
|
|
138
|
+
return {
|
|
139
|
+
"available": False,
|
|
140
|
+
"url": None,
|
|
141
|
+
"message": "PySpark not installed",
|
|
142
|
+
}
|
|
143
|
+
except Exception as e:
|
|
144
|
+
return {
|
|
145
|
+
"available": False,
|
|
146
|
+
"url": None,
|
|
147
|
+
"message": str(e),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@router.get("/config")
|
|
152
|
+
async def get_spark_config() -> Dict[str, Any]:
|
|
153
|
+
"""
|
|
154
|
+
Get Spark configuration from computes.yml.
|
|
155
|
+
|
|
156
|
+
Returns the configured Spark settings without starting a session.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
from dbt.config.compute import ComputeRegistry
|
|
160
|
+
|
|
161
|
+
registry = ComputeRegistry(_project_root) if _project_root else None
|
|
162
|
+
if not registry:
|
|
163
|
+
raise HTTPException(status_code=500, detail="Compute registry not available")
|
|
164
|
+
|
|
165
|
+
# Get all Spark computes
|
|
166
|
+
all_computes = registry.list()
|
|
167
|
+
spark_computes = [c for c in all_computes if c.type == "spark"]
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
"default_compute": registry.target_compute,
|
|
171
|
+
"spark_computes": [
|
|
172
|
+
{
|
|
173
|
+
"name": c.name,
|
|
174
|
+
"platform": c.detect_platform(),
|
|
175
|
+
"config": {
|
|
176
|
+
"master": c.config.get("master"),
|
|
177
|
+
"driver_memory": c.config.get("spark.driver.memory"),
|
|
178
|
+
"executor_memory": c.config.get("spark.executor.memory"),
|
|
179
|
+
"jars_packages": c.config.get("spark.jars.packages"),
|
|
180
|
+
},
|
|
181
|
+
}
|
|
182
|
+
for c in spark_computes
|
|
183
|
+
],
|
|
184
|
+
}
|
|
185
|
+
except Exception as e:
|
|
186
|
+
raise HTTPException(status_code=500, detail=str(e))
|