iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_core_mcp/__init__.py +18 -0
- dbt_core_mcp/__main__.py +436 -0
- dbt_core_mcp/context.py +459 -0
- dbt_core_mcp/cte_generator.py +601 -0
- dbt_core_mcp/dbt/__init__.py +1 -0
- dbt_core_mcp/dbt/bridge_runner.py +1361 -0
- dbt_core_mcp/dbt/manifest.py +781 -0
- dbt_core_mcp/dbt/runner.py +67 -0
- dbt_core_mcp/dependencies.py +50 -0
- dbt_core_mcp/server.py +381 -0
- dbt_core_mcp/tools/__init__.py +77 -0
- dbt_core_mcp/tools/analyze_impact.py +78 -0
- dbt_core_mcp/tools/build_models.py +190 -0
- dbt_core_mcp/tools/demo/__init__.py +1 -0
- dbt_core_mcp/tools/demo/hello.html +267 -0
- dbt_core_mcp/tools/demo/ui_demo.py +41 -0
- dbt_core_mcp/tools/get_column_lineage.py +1988 -0
- dbt_core_mcp/tools/get_lineage.py +89 -0
- dbt_core_mcp/tools/get_project_info.py +96 -0
- dbt_core_mcp/tools/get_resource_info.py +134 -0
- dbt_core_mcp/tools/install_deps.py +102 -0
- dbt_core_mcp/tools/list_resources.py +84 -0
- dbt_core_mcp/tools/load_seeds.py +179 -0
- dbt_core_mcp/tools/query_database.py +459 -0
- dbt_core_mcp/tools/run_models.py +234 -0
- dbt_core_mcp/tools/snapshot_models.py +120 -0
- dbt_core_mcp/tools/test_models.py +238 -0
- dbt_core_mcp/utils/__init__.py +1 -0
- dbt_core_mcp/utils/env_detector.py +186 -0
- dbt_core_mcp/utils/process_check.py +130 -0
- dbt_core_mcp/utils/tool_utils.py +411 -0
- dbt_core_mcp/utils/warehouse_adapter.py +82 -0
- dbt_core_mcp/utils/warehouse_databricks.py +297 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,781 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DBT Manifest Loader.
|
|
3
|
+
|
|
4
|
+
Reads and parses DBT's manifest.json file to provide structured access
|
|
5
|
+
to models, sources, tests, and other DBT entities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class DbtModel:
|
|
19
|
+
"""Represents a dbt model from the manifest."""
|
|
20
|
+
|
|
21
|
+
name: str
|
|
22
|
+
unique_id: str
|
|
23
|
+
resource_type: str
|
|
24
|
+
schema: str
|
|
25
|
+
database: str
|
|
26
|
+
alias: str
|
|
27
|
+
description: str
|
|
28
|
+
materialization: str
|
|
29
|
+
tags: list[str]
|
|
30
|
+
depends_on: list[str]
|
|
31
|
+
package_name: str
|
|
32
|
+
original_file_path: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DbtSource:
|
|
37
|
+
"""Represents a dbt source from the manifest."""
|
|
38
|
+
|
|
39
|
+
name: str
|
|
40
|
+
unique_id: str
|
|
41
|
+
source_name: str
|
|
42
|
+
schema: str
|
|
43
|
+
database: str
|
|
44
|
+
identifier: str
|
|
45
|
+
description: str
|
|
46
|
+
tags: list[str]
|
|
47
|
+
package_name: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ManifestLoader:
|
|
51
|
+
"""
|
|
52
|
+
Load and parse DBT manifest.json.
|
|
53
|
+
|
|
54
|
+
Provides structured access to models, sources, and other DBT entities.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, manifest_path: Path):
|
|
58
|
+
"""
|
|
59
|
+
Initialize the manifest loader.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
manifest_path: Path to manifest.json file
|
|
63
|
+
"""
|
|
64
|
+
self.manifest_path = manifest_path
|
|
65
|
+
self._manifest: dict[str, Any] | None = None
|
|
66
|
+
self._manifest_mtime: float | None = None # Track last modification time
|
|
67
|
+
|
|
68
|
+
async def load(self, force: bool = False) -> None:
|
|
69
|
+
"""
|
|
70
|
+
Load the manifest from disk.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
force: If True, reload even if already loaded. If False, only reload if file changed.
|
|
74
|
+
"""
|
|
75
|
+
if not self.manifest_path.exists():
|
|
76
|
+
raise FileNotFoundError(f"Manifest not found: {self.manifest_path}")
|
|
77
|
+
|
|
78
|
+
# Check if reload is needed
|
|
79
|
+
current_mtime = self.manifest_path.stat().st_mtime
|
|
80
|
+
|
|
81
|
+
if not force and self._manifest is not None and self._manifest_mtime == current_mtime:
|
|
82
|
+
logger.debug("Manifest already loaded and unchanged, skipping reload")
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
logger.debug(f"Loading manifest from {self.manifest_path}")
|
|
86
|
+
|
|
87
|
+
with open(self.manifest_path, "r") as f:
|
|
88
|
+
self._manifest = json.load(f)
|
|
89
|
+
|
|
90
|
+
self._manifest_mtime = current_mtime
|
|
91
|
+
logger.info("Manifest loaded successfully")
|
|
92
|
+
|
|
93
|
+
def is_loaded(self) -> bool:
|
|
94
|
+
"""Check if the manifest data has been loaded.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
True if manifest data is loaded in memory, False otherwise
|
|
98
|
+
"""
|
|
99
|
+
return self._manifest is not None
|
|
100
|
+
|
|
101
|
+
def get_resources(self, resource_type: str | None = None) -> list[dict[str, Any]]:
|
|
102
|
+
"""
|
|
103
|
+
Get all resources from the manifest, optionally filtered by type.
|
|
104
|
+
|
|
105
|
+
Returns simplified resource information across all types (models, sources, seeds, etc.).
|
|
106
|
+
Designed for LLM consumption with consistent structure across resource types.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
|
|
110
|
+
If None, returns all resources.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of resource dictionaries with consistent structure:
|
|
114
|
+
{
|
|
115
|
+
"name": str,
|
|
116
|
+
"unique_id": str,
|
|
117
|
+
"resource_type": str,
|
|
118
|
+
"schema": str (if applicable),
|
|
119
|
+
"database": str (if applicable),
|
|
120
|
+
"description": str,
|
|
121
|
+
"tags": list[str],
|
|
122
|
+
"package_name": str,
|
|
123
|
+
...additional type-specific fields
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
RuntimeError: If manifest not loaded
|
|
128
|
+
ValueError: If invalid resource_type provided
|
|
129
|
+
"""
|
|
130
|
+
if not self._manifest:
|
|
131
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
132
|
+
|
|
133
|
+
# Validate resource_type if provided
|
|
134
|
+
valid_types = {"model", "source", "seed", "snapshot", "test", "analysis"}
|
|
135
|
+
if resource_type is not None and resource_type not in valid_types:
|
|
136
|
+
raise ValueError(f"Invalid resource_type '{resource_type}'. Must be one of: {', '.join(sorted(valid_types))}")
|
|
137
|
+
|
|
138
|
+
resources: list[dict[str, Any]] = []
|
|
139
|
+
|
|
140
|
+
# Collect from nodes (models, tests, seeds, snapshots, analyses)
|
|
141
|
+
nodes = self._manifest.get("nodes", {})
|
|
142
|
+
for unique_id, node in nodes.items():
|
|
143
|
+
if not isinstance(node, dict):
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
node_type = node.get("resource_type")
|
|
147
|
+
|
|
148
|
+
# Filter by type if specified
|
|
149
|
+
if resource_type is not None and node_type != resource_type:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
# Build consistent resource dict
|
|
153
|
+
resource: dict[str, Any] = {
|
|
154
|
+
"name": node.get("name", ""),
|
|
155
|
+
"unique_id": unique_id,
|
|
156
|
+
"resource_type": node_type,
|
|
157
|
+
"package_name": node.get("package_name", ""),
|
|
158
|
+
"description": node.get("description", ""),
|
|
159
|
+
"tags": node.get("tags", []),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# Add common fields for materialized resources
|
|
163
|
+
if node_type in ("model", "seed", "snapshot"):
|
|
164
|
+
resource["schema"] = node.get("schema", "")
|
|
165
|
+
resource["database"] = node.get("database", "")
|
|
166
|
+
resource["alias"] = node.get("alias", "")
|
|
167
|
+
|
|
168
|
+
# Add type-specific fields
|
|
169
|
+
if node_type == "model":
|
|
170
|
+
resource["materialization"] = node.get("config", {}).get("materialized", "")
|
|
171
|
+
resource["file_path"] = node.get("original_file_path", "")
|
|
172
|
+
elif node_type == "seed":
|
|
173
|
+
resource["file_path"] = node.get("original_file_path", "")
|
|
174
|
+
elif node_type == "snapshot":
|
|
175
|
+
resource["file_path"] = node.get("original_file_path", "")
|
|
176
|
+
elif node_type == "test":
|
|
177
|
+
resource["test_metadata"] = node.get("test_metadata", {})
|
|
178
|
+
resource["column_name"] = node.get("column_name")
|
|
179
|
+
|
|
180
|
+
resources.append(resource)
|
|
181
|
+
|
|
182
|
+
# Collect from sources (if not filtered out)
|
|
183
|
+
if resource_type is None or resource_type == "source":
|
|
184
|
+
sources = self._manifest.get("sources", {})
|
|
185
|
+
for unique_id, source in sources.items():
|
|
186
|
+
if not isinstance(source, dict):
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
resource = {
|
|
190
|
+
"name": source.get("name", ""),
|
|
191
|
+
"unique_id": unique_id,
|
|
192
|
+
"resource_type": "source",
|
|
193
|
+
"source_name": source.get("source_name", ""),
|
|
194
|
+
"schema": source.get("schema", ""),
|
|
195
|
+
"database": source.get("database", ""),
|
|
196
|
+
"identifier": source.get("identifier", ""),
|
|
197
|
+
"package_name": source.get("package_name", ""),
|
|
198
|
+
"description": source.get("description", ""),
|
|
199
|
+
"tags": source.get("tags", []),
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
resources.append(resource)
|
|
203
|
+
|
|
204
|
+
logger.debug(f"Found {len(resources)} resources" + (f" of type '{resource_type}'" if resource_type else ""))
|
|
205
|
+
return resources
|
|
206
|
+
|
|
207
|
+
def get_compiled_code(self, name: str) -> str | None:
|
|
208
|
+
"""
|
|
209
|
+
Get the compiled SQL code for a model.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
name: Model name
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Compiled SQL string if available, None if not compiled yet
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
RuntimeError: If manifest not loaded
|
|
219
|
+
ValueError: If model not found
|
|
220
|
+
"""
|
|
221
|
+
node = self.get_resource_node(name, "model") # Will raise ValueError if not found
|
|
222
|
+
return node.get("compiled_code")
|
|
223
|
+
|
|
224
|
+
def get_resource_node(self, name: str, resource_type: str | None = None) -> dict[str, Any]:
|
|
225
|
+
"""
|
|
226
|
+
Get a resource node by name with auto-detection across all resource types.
|
|
227
|
+
|
|
228
|
+
This method searches for resources across models, sources, seeds, snapshots, tests, etc.
|
|
229
|
+
Designed for LLM consumption - returns all matches when ambiguous rather than raising errors.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
name: Resource name. For sources, can be "source_name.table_name" or just "table_name"
|
|
233
|
+
resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
|
|
234
|
+
If None, searches all types.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Single resource dict if exactly one match found, or dict with multiple_matches=True
|
|
238
|
+
containing all matching resources for LLM to process.
|
|
239
|
+
|
|
240
|
+
Raises:
|
|
241
|
+
RuntimeError: If manifest not loaded
|
|
242
|
+
ValueError: If resource not found (only case that raises)
|
|
243
|
+
|
|
244
|
+
Examples:
|
|
245
|
+
get_resource_node("customers") -> single model dict
|
|
246
|
+
get_resource_node("customers", "source") -> single source dict
|
|
247
|
+
get_resource_node("customers") with multiple matches -> {"multiple_matches": True, ...}
|
|
248
|
+
"""
|
|
249
|
+
if not self._manifest:
|
|
250
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
251
|
+
|
|
252
|
+
# Validate resource_type if provided
|
|
253
|
+
valid_types = {"model", "source", "seed", "snapshot", "test", "analysis"}
|
|
254
|
+
if resource_type is not None and resource_type not in valid_types:
|
|
255
|
+
raise ValueError(f"Invalid resource_type '{resource_type}'. Must be one of: {', '.join(sorted(valid_types))}")
|
|
256
|
+
|
|
257
|
+
matches: list[dict[str, Any]] = []
|
|
258
|
+
|
|
259
|
+
# For sources, try "source_name.table_name" format first
|
|
260
|
+
if "." in name and (resource_type is None or resource_type == "source"):
|
|
261
|
+
parts = name.split(".", 1)
|
|
262
|
+
if len(parts) == 2:
|
|
263
|
+
# Search sources dict directly
|
|
264
|
+
sources_dict = self._manifest.get("sources", {})
|
|
265
|
+
for _, source in sources_dict.items():
|
|
266
|
+
if isinstance(source, dict) and source.get("source_name") == parts[0] and source.get("name") == parts[1]:
|
|
267
|
+
matches.append(dict(source))
|
|
268
|
+
break
|
|
269
|
+
|
|
270
|
+
# Search nodes (models, tests, snapshots, seeds, analyses, etc.)
|
|
271
|
+
nodes = self._manifest.get("nodes", {})
|
|
272
|
+
for unique_id, node in nodes.items():
|
|
273
|
+
if not isinstance(node, dict):
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
node_type = node.get("resource_type")
|
|
277
|
+
node_name = node.get("name")
|
|
278
|
+
|
|
279
|
+
# Type filter if specified
|
|
280
|
+
if resource_type is not None and node_type != resource_type:
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
if node_name == name:
|
|
284
|
+
matches.append(dict(node))
|
|
285
|
+
|
|
286
|
+
# Search sources by table name only (fallback when no dot in name)
|
|
287
|
+
if resource_type is None or resource_type == "source":
|
|
288
|
+
sources = self._manifest.get("sources", {})
|
|
289
|
+
for unique_id, source in sources.items():
|
|
290
|
+
if not isinstance(source, dict):
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
if source.get("name") == name:
|
|
294
|
+
# Avoid duplicates if already matched via source_name.table_name
|
|
295
|
+
if not any(m.get("unique_id") == unique_id for m in matches):
|
|
296
|
+
matches.append(dict(source))
|
|
297
|
+
|
|
298
|
+
# Handle results based on match count
|
|
299
|
+
if len(matches) == 0:
|
|
300
|
+
type_label = resource_type.title() if resource_type else "Resource"
|
|
301
|
+
list_hint = f"Use list_resources(type='{resource_type}') to see all available {resource_type}s." if resource_type else "Use list_resources() to see all available resources."
|
|
302
|
+
raise ValueError(f"{type_label} '{name}' not found.\n{list_hint}")
|
|
303
|
+
elif len(matches) == 1:
|
|
304
|
+
# Single match - return the resource directly
|
|
305
|
+
return matches[0]
|
|
306
|
+
else:
|
|
307
|
+
# Multiple matches - return all with metadata for LLM to process
|
|
308
|
+
return {
|
|
309
|
+
"multiple_matches": True,
|
|
310
|
+
"name": name,
|
|
311
|
+
"match_count": len(matches),
|
|
312
|
+
"matches": matches,
|
|
313
|
+
"message": f"Found {len(matches)} resources named '{name}'. Returning all matches for context.",
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
def get_resource_info(
|
|
317
|
+
self,
|
|
318
|
+
name: str,
|
|
319
|
+
resource_type: str | None = None,
|
|
320
|
+
include_database_schema: bool = True,
|
|
321
|
+
include_compiled_sql: bool = True,
|
|
322
|
+
) -> dict[str, Any]:
|
|
323
|
+
"""Get detailed resource information with optional enrichments.
|
|
324
|
+
|
|
325
|
+
This method extends get_resource_node() with optional enrichments:
|
|
326
|
+
- include_database_schema: Query actual database schema
|
|
327
|
+
- include_compiled_sql: Include compiled SQL (models only, requires compilation)
|
|
328
|
+
|
|
329
|
+
Note: This method does NOT trigger compilation. If compiled SQL is requested but
|
|
330
|
+
not available in the manifest, the 'compiled_sql' field will be None. The caller
|
|
331
|
+
(e.g., server tool) is responsible for triggering compilation if needed.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
name: Resource name
|
|
335
|
+
resource_type: Optional resource type filter
|
|
336
|
+
include_database_schema: Include database schema information (default: True)
|
|
337
|
+
include_compiled_sql: Include compiled SQL for models (default: True)
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Resource dictionary with optional enrichments
|
|
341
|
+
"""
|
|
342
|
+
result = self.get_resource_node(name, resource_type)
|
|
343
|
+
|
|
344
|
+
# Handle multiple matches case - return as-is
|
|
345
|
+
if result.get("multiple_matches"):
|
|
346
|
+
return result
|
|
347
|
+
|
|
348
|
+
# Single match - enrich with additional data if requested
|
|
349
|
+
node_type = result.get("resource_type")
|
|
350
|
+
|
|
351
|
+
# Create a copy without heavy fields
|
|
352
|
+
result_copy = dict(result)
|
|
353
|
+
result_copy.pop("raw_code", None)
|
|
354
|
+
result_copy.pop("compiled_code", None)
|
|
355
|
+
|
|
356
|
+
# Include compiled SQL for models if requested and available
|
|
357
|
+
if include_compiled_sql and node_type == "model":
|
|
358
|
+
compiled_code = result.get("compiled_code")
|
|
359
|
+
|
|
360
|
+
if compiled_code:
|
|
361
|
+
result_copy["compiled_sql"] = compiled_code
|
|
362
|
+
result_copy["compiled_sql_cached"] = True
|
|
363
|
+
else:
|
|
364
|
+
# Not compiled yet - set to None to indicate it's not available
|
|
365
|
+
result_copy["compiled_sql"] = None
|
|
366
|
+
result_copy["compiled_sql_cached"] = False
|
|
367
|
+
|
|
368
|
+
return result_copy
|
|
369
|
+
|
|
370
|
+
def get_project_info(self) -> dict[str, Any]:
|
|
371
|
+
"""
|
|
372
|
+
Get high-level project information from the manifest.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Dictionary with project metadata
|
|
376
|
+
"""
|
|
377
|
+
if not self._manifest:
|
|
378
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
379
|
+
|
|
380
|
+
metadata: dict[str, Any] = self._manifest.get("metadata", {}) # type: ignore[assignment]
|
|
381
|
+
|
|
382
|
+
# Count resources directly from manifest
|
|
383
|
+
nodes = self._manifest.get("nodes", {})
|
|
384
|
+
model_count = sum(1 for node in nodes.values() if isinstance(node, dict) and node.get("resource_type") == "model")
|
|
385
|
+
source_count = len(self._manifest.get("sources", {}))
|
|
386
|
+
|
|
387
|
+
return {
|
|
388
|
+
"project_name": metadata.get("project_name", ""),
|
|
389
|
+
"dbt_version": metadata.get("dbt_version", ""),
|
|
390
|
+
"adapter_type": metadata.get("adapter_type", ""),
|
|
391
|
+
"generated_at": metadata.get("generated_at", ""),
|
|
392
|
+
"model_count": model_count,
|
|
393
|
+
"source_count": source_count,
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
def get_manifest_dict(self) -> dict[str, Any]:
|
|
397
|
+
"""Get the raw manifest dictionary.
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Raw manifest dictionary
|
|
401
|
+
|
|
402
|
+
Raises:
|
|
403
|
+
RuntimeError: If manifest not loaded
|
|
404
|
+
"""
|
|
405
|
+
if not self._manifest:
|
|
406
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
407
|
+
return self._manifest
|
|
408
|
+
|
|
409
|
+
def get_node_by_unique_id(self, unique_id: str) -> dict[str, Any] | None:
|
|
410
|
+
"""Get a node (model, test, etc.) by its unique_id.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
unique_id: The unique identifier (e.g., 'model.package.model_name')
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
Node dictionary or None if not found
|
|
417
|
+
"""
|
|
418
|
+
if not self._manifest:
|
|
419
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
420
|
+
|
|
421
|
+
# Check nodes first (models, tests, snapshots, etc.)
|
|
422
|
+
nodes = self._manifest.get("nodes", {})
|
|
423
|
+
if unique_id in nodes:
|
|
424
|
+
return dict(nodes[unique_id])
|
|
425
|
+
|
|
426
|
+
# Check sources
|
|
427
|
+
sources = self._manifest.get("sources", {})
|
|
428
|
+
if unique_id in sources:
|
|
429
|
+
return dict(sources[unique_id])
|
|
430
|
+
|
|
431
|
+
return None
|
|
432
|
+
|
|
433
|
+
def get_upstream_nodes(self, unique_id: str, max_depth: int | None = None, current_depth: int = 0) -> list[dict[str, Any]]:
|
|
434
|
+
"""Get all upstream dependencies of a node recursively.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
unique_id: The unique identifier of the node
|
|
438
|
+
max_depth: Maximum depth to traverse (None for unlimited)
|
|
439
|
+
current_depth: Current recursion depth (internal use)
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
List of dictionaries with upstream node info:
|
|
443
|
+
{"unique_id": str, "name": str, "type": str, "distance": int}
|
|
444
|
+
"""
|
|
445
|
+
if not self._manifest:
|
|
446
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
447
|
+
|
|
448
|
+
if max_depth is not None and current_depth >= max_depth:
|
|
449
|
+
return []
|
|
450
|
+
|
|
451
|
+
parent_map = self._manifest.get("parent_map", {})
|
|
452
|
+
parents = parent_map.get(unique_id, [])
|
|
453
|
+
|
|
454
|
+
upstream: list[dict[str, Any]] = []
|
|
455
|
+
seen: set[str] = set()
|
|
456
|
+
|
|
457
|
+
for parent_id in parents:
|
|
458
|
+
if parent_id in seen:
|
|
459
|
+
continue
|
|
460
|
+
seen.add(parent_id)
|
|
461
|
+
|
|
462
|
+
node = self.get_node_by_unique_id(parent_id)
|
|
463
|
+
if node:
|
|
464
|
+
resource_type = node.get("resource_type", "unknown")
|
|
465
|
+
upstream.append(
|
|
466
|
+
{
|
|
467
|
+
"unique_id": parent_id,
|
|
468
|
+
"name": node.get("name", ""),
|
|
469
|
+
"type": resource_type,
|
|
470
|
+
"distance": current_depth + 1,
|
|
471
|
+
}
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
# Recurse
|
|
475
|
+
if max_depth is None or current_depth + 1 < max_depth:
|
|
476
|
+
grandparents = self.get_upstream_nodes(parent_id, max_depth, current_depth + 1)
|
|
477
|
+
for gp in grandparents:
|
|
478
|
+
if gp["unique_id"] not in seen:
|
|
479
|
+
seen.add(str(gp["unique_id"]))
|
|
480
|
+
upstream.append(gp)
|
|
481
|
+
|
|
482
|
+
return upstream
|
|
483
|
+
|
|
484
|
+
def get_lineage(
|
|
485
|
+
self,
|
|
486
|
+
name: str,
|
|
487
|
+
resource_type: str | None = None,
|
|
488
|
+
direction: str = "both",
|
|
489
|
+
depth: int | None = None,
|
|
490
|
+
) -> dict[str, Any]:
|
|
491
|
+
"""
|
|
492
|
+
Get lineage (dependency tree) for any resource type with auto-detection.
|
|
493
|
+
|
|
494
|
+
This unified method works across all resource types (models, sources, seeds, etc.)
|
|
495
|
+
and provides upstream, downstream, or bidirectional dependency traversal.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
name: Resource name. For sources, use "source_name.table_name" or just "table_name"
|
|
499
|
+
resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
|
|
500
|
+
If None, auto-detects resource type.
|
|
501
|
+
direction: Lineage direction:
|
|
502
|
+
- "upstream": Show where data comes from (parents)
|
|
503
|
+
- "downstream": Show what depends on this resource (children)
|
|
504
|
+
- "both": Show full lineage (default)
|
|
505
|
+
depth: Maximum levels to traverse (None for unlimited)
|
|
506
|
+
- depth=1: Immediate dependencies only
|
|
507
|
+
- depth=2: Dependencies + their dependencies
|
|
508
|
+
- None: Full dependency tree
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
Dictionary with lineage information:
|
|
512
|
+
{
|
|
513
|
+
"resource": {...}, # The target resource info
|
|
514
|
+
"upstream": [...], # List of upstream dependencies (if direction in ["upstream", "both"])
|
|
515
|
+
"downstream": [...], # List of downstream dependents (if direction in ["downstream", "both"])
|
|
516
|
+
"stats": {
|
|
517
|
+
"upstream_count": int,
|
|
518
|
+
"downstream_count": int,
|
|
519
|
+
"total_dependencies": int
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
If multiple matches found, returns:
|
|
524
|
+
{"multiple_matches": True, "matches": [...], "message": "..."}
|
|
525
|
+
|
|
526
|
+
Raises:
|
|
527
|
+
RuntimeError: If manifest not loaded
|
|
528
|
+
ValueError: If resource not found or invalid direction
|
|
529
|
+
|
|
530
|
+
Examples:
|
|
531
|
+
get_lineage("customers") -> auto-detect and show full lineage
|
|
532
|
+
get_lineage("customers", "model", "upstream") -> show where customers model gets data
|
|
533
|
+
get_lineage("customers", direction="downstream", depth=2) -> 2 levels of dependents
|
|
534
|
+
"""
|
|
535
|
+
if not self._manifest:
|
|
536
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
537
|
+
|
|
538
|
+
# Validate direction
|
|
539
|
+
valid_directions = {"upstream", "downstream", "both"}
|
|
540
|
+
if direction not in valid_directions:
|
|
541
|
+
raise ValueError(f"Invalid direction '{direction}'. Must be one of: {', '.join(sorted(valid_directions))}")
|
|
542
|
+
|
|
543
|
+
# Get the resource (auto-detect if resource_type not specified)
|
|
544
|
+
resource = self.get_resource_node(name, resource_type)
|
|
545
|
+
|
|
546
|
+
# Handle multiple matches - return for LLM to process
|
|
547
|
+
if resource.get("multiple_matches"):
|
|
548
|
+
return resource
|
|
549
|
+
|
|
550
|
+
# Extract unique_id for lineage traversal
|
|
551
|
+
unique_id = resource.get("unique_id")
|
|
552
|
+
if not unique_id:
|
|
553
|
+
raise ValueError(f"Resource '{name}' does not have a unique_id")
|
|
554
|
+
|
|
555
|
+
# Build lineage based on direction
|
|
556
|
+
result: dict[str, Any] = {
|
|
557
|
+
"resource": {
|
|
558
|
+
"name": resource.get("name"),
|
|
559
|
+
"unique_id": unique_id,
|
|
560
|
+
"resource_type": resource.get("resource_type"),
|
|
561
|
+
"package_name": resource.get("package_name"),
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
upstream: list[dict[str, Any]] = []
|
|
566
|
+
downstream: list[dict[str, Any]] = []
|
|
567
|
+
|
|
568
|
+
if direction in ("upstream", "both"):
|
|
569
|
+
upstream = self.get_upstream_nodes(unique_id, max_depth=depth)
|
|
570
|
+
result["upstream"] = upstream
|
|
571
|
+
|
|
572
|
+
if direction in ("downstream", "both"):
|
|
573
|
+
downstream = self.get_downstream_nodes(unique_id, max_depth=depth)
|
|
574
|
+
result["downstream"] = downstream
|
|
575
|
+
|
|
576
|
+
# Add statistics
|
|
577
|
+
result["stats"] = {
|
|
578
|
+
"upstream_count": len(upstream),
|
|
579
|
+
"downstream_count": len(downstream),
|
|
580
|
+
"total_dependencies": len(upstream) + len(downstream),
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
return result
|
|
584
|
+
|
|
585
|
+
def analyze_impact(
|
|
586
|
+
self,
|
|
587
|
+
name: str,
|
|
588
|
+
resource_type: str | None = None,
|
|
589
|
+
) -> dict[str, Any]:
|
|
590
|
+
"""
|
|
591
|
+
Analyze the impact of changing a resource across all resource types.
|
|
592
|
+
|
|
593
|
+
Shows all downstream dependencies that would be affected by changes,
|
|
594
|
+
including models, tests, and other resources. Provides actionable
|
|
595
|
+
recommendations for running affected resources.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
name: Resource name. For sources, use "source_name.table_name" or just "table_name"
|
|
599
|
+
resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
|
|
600
|
+
If None, auto-detects resource type.
|
|
601
|
+
|
|
602
|
+
Returns:
|
|
603
|
+
Dictionary with impact analysis:
|
|
604
|
+
{
|
|
605
|
+
"resource": {...}, # The target resource info
|
|
606
|
+
"impact": {
|
|
607
|
+
"models_affected": [...], # Downstream models by distance
|
|
608
|
+
"models_affected_count": int,
|
|
609
|
+
"tests_affected_count": int,
|
|
610
|
+
"other_affected_count": int,
|
|
611
|
+
"total_affected": int
|
|
612
|
+
},
|
|
613
|
+
"affected_by_distance": {
|
|
614
|
+
"1": [...], # Immediate dependents
|
|
615
|
+
"2": [...], # Second-level dependents
|
|
616
|
+
...
|
|
617
|
+
},
|
|
618
|
+
"recommendation": str, # Suggested dbt command
|
|
619
|
+
"message": str # Human-readable impact assessment
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
If multiple matches found, returns:
|
|
623
|
+
{"multiple_matches": True, "matches": [...], "message": "..."}
|
|
624
|
+
|
|
625
|
+
Raises:
|
|
626
|
+
RuntimeError: If manifest not loaded
|
|
627
|
+
ValueError: If resource not found
|
|
628
|
+
|
|
629
|
+
Examples:
|
|
630
|
+
analyze_impact("stg_customers") -> impact of changing staging model
|
|
631
|
+
analyze_impact("jaffle_shop.orders", "source") -> impact of source change
|
|
632
|
+
analyze_impact("raw_customers", "seed") -> impact of seed change
|
|
633
|
+
"""
|
|
634
|
+
if not self._manifest:
|
|
635
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
636
|
+
|
|
637
|
+
# Get the resource (auto-detect if resource_type not specified)
|
|
638
|
+
resource = self.get_resource_node(name, resource_type)
|
|
639
|
+
|
|
640
|
+
# Handle multiple matches - return for LLM to process
|
|
641
|
+
if resource.get("multiple_matches"):
|
|
642
|
+
return resource
|
|
643
|
+
|
|
644
|
+
# Extract unique_id for impact traversal
|
|
645
|
+
unique_id = resource.get("unique_id")
|
|
646
|
+
if not unique_id:
|
|
647
|
+
raise ValueError(f"Resource '{name}' does not have a unique_id")
|
|
648
|
+
|
|
649
|
+
# Get all downstream dependencies (no depth limit for impact)
|
|
650
|
+
downstream = self.get_downstream_nodes(unique_id, max_depth=None)
|
|
651
|
+
|
|
652
|
+
# Categorize by resource type
|
|
653
|
+
models_affected: list[dict[str, Any]] = []
|
|
654
|
+
tests_affected: list[dict[str, Any]] = []
|
|
655
|
+
other_affected: list[dict[str, Any]] = []
|
|
656
|
+
affected_by_distance: dict[str, list[dict[str, Any]]] = {}
|
|
657
|
+
|
|
658
|
+
for dep in downstream:
|
|
659
|
+
dep_type = str(dep["type"])
|
|
660
|
+
distance = str(dep["distance"])
|
|
661
|
+
|
|
662
|
+
# Group by distance
|
|
663
|
+
if distance not in affected_by_distance:
|
|
664
|
+
affected_by_distance[distance] = []
|
|
665
|
+
affected_by_distance[distance].append(dep)
|
|
666
|
+
|
|
667
|
+
# Categorize by type
|
|
668
|
+
if dep_type == "model":
|
|
669
|
+
models_affected.append(dep)
|
|
670
|
+
elif dep_type == "test":
|
|
671
|
+
tests_affected.append(dep)
|
|
672
|
+
else:
|
|
673
|
+
other_affected.append(dep)
|
|
674
|
+
|
|
675
|
+
# Sort models by distance for better readability
|
|
676
|
+
models_affected_sorted = sorted(models_affected, key=lambda x: (int(x["distance"]), str(x["name"])))
|
|
677
|
+
|
|
678
|
+
# Build recommendation based on resource type
|
|
679
|
+
resource_name = resource.get("name", name)
|
|
680
|
+
current_resource_type = resource.get("resource_type")
|
|
681
|
+
|
|
682
|
+
if current_resource_type == "source":
|
|
683
|
+
# For sources, recommend running downstream models
|
|
684
|
+
if len(models_affected) == 0:
|
|
685
|
+
recommendation = f"dbt test -s source:{resource.get('source_name')}.{resource_name}"
|
|
686
|
+
else:
|
|
687
|
+
recommendation = f"dbt run -s {resource_name}+"
|
|
688
|
+
elif current_resource_type == "seed":
|
|
689
|
+
# For seeds, recommend seeding + downstream
|
|
690
|
+
if len(models_affected) == 0:
|
|
691
|
+
recommendation = f"dbt seed -s {resource_name} && dbt test -s {resource_name}"
|
|
692
|
+
else:
|
|
693
|
+
recommendation = f"dbt seed -s {resource_name} && dbt run -s {resource_name}+"
|
|
694
|
+
else:
|
|
695
|
+
# For models, snapshots, etc.
|
|
696
|
+
if len(models_affected) == 0:
|
|
697
|
+
recommendation = f"dbt run -s {resource_name}"
|
|
698
|
+
else:
|
|
699
|
+
recommendation = f"dbt run -s {resource_name}+"
|
|
700
|
+
|
|
701
|
+
# Build result
|
|
702
|
+
result: dict[str, Any] = {
|
|
703
|
+
"resource": {
|
|
704
|
+
"name": resource_name,
|
|
705
|
+
"unique_id": unique_id,
|
|
706
|
+
"resource_type": current_resource_type,
|
|
707
|
+
"package_name": resource.get("package_name"),
|
|
708
|
+
},
|
|
709
|
+
"impact": {
|
|
710
|
+
"models_affected": models_affected_sorted,
|
|
711
|
+
"models_affected_count": len(models_affected),
|
|
712
|
+
"tests_affected_count": len(tests_affected),
|
|
713
|
+
"other_affected_count": len(other_affected),
|
|
714
|
+
"total_affected": len(downstream),
|
|
715
|
+
},
|
|
716
|
+
"affected_by_distance": affected_by_distance,
|
|
717
|
+
"recommendation": recommendation,
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
# Add helpful message based on impact size
|
|
721
|
+
if len(models_affected) == 0:
|
|
722
|
+
result["message"] = "No downstream models affected. Only this resource needs to be run/tested."
|
|
723
|
+
elif len(models_affected) <= 3:
|
|
724
|
+
result["message"] = f"Low impact: {len(models_affected)} downstream model(s) affected."
|
|
725
|
+
elif len(models_affected) <= 10:
|
|
726
|
+
result["message"] = f"Medium impact: {len(models_affected)} downstream models affected."
|
|
727
|
+
else:
|
|
728
|
+
result["message"] = f"High impact: {len(models_affected)} downstream models affected. Consider incremental changes."
|
|
729
|
+
|
|
730
|
+
return result
|
|
731
|
+
|
|
732
|
+
def get_downstream_nodes(self, unique_id: str, max_depth: int | None = None, current_depth: int = 0) -> list[dict[str, Any]]:
|
|
733
|
+
"""Get all downstream dependents of a node recursively.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
unique_id: The unique identifier of the node
|
|
737
|
+
max_depth: Maximum depth to traverse (None for unlimited)
|
|
738
|
+
current_depth: Current recursion depth (internal use)
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
List of dictionaries with downstream node info:
|
|
742
|
+
{"unique_id": str, "name": str, "type": str, "distance": int}
|
|
743
|
+
"""
|
|
744
|
+
if not self._manifest:
|
|
745
|
+
raise RuntimeError("Manifest not loaded. Call load() first.")
|
|
746
|
+
|
|
747
|
+
if max_depth is not None and current_depth >= max_depth:
|
|
748
|
+
return []
|
|
749
|
+
|
|
750
|
+
child_map = self._manifest.get("child_map", {})
|
|
751
|
+
children = child_map.get(unique_id, [])
|
|
752
|
+
|
|
753
|
+
downstream: list[dict[str, Any]] = []
|
|
754
|
+
seen: set[str] = set()
|
|
755
|
+
|
|
756
|
+
for child_id in children:
|
|
757
|
+
if child_id in seen:
|
|
758
|
+
continue
|
|
759
|
+
seen.add(child_id)
|
|
760
|
+
|
|
761
|
+
node = self.get_node_by_unique_id(child_id)
|
|
762
|
+
if node:
|
|
763
|
+
resource_type = node.get("resource_type", "unknown")
|
|
764
|
+
downstream.append(
|
|
765
|
+
{
|
|
766
|
+
"unique_id": child_id,
|
|
767
|
+
"name": node.get("name", ""),
|
|
768
|
+
"type": resource_type,
|
|
769
|
+
"distance": current_depth + 1,
|
|
770
|
+
}
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
# Recurse
|
|
774
|
+
if max_depth is None or current_depth + 1 < max_depth:
|
|
775
|
+
grandchildren = self.get_downstream_nodes(child_id, max_depth, current_depth + 1)
|
|
776
|
+
for gc in grandchildren:
|
|
777
|
+
if gc["unique_id"] not in seen:
|
|
778
|
+
seen.add(str(gc["unique_id"]))
|
|
779
|
+
downstream.append(gc)
|
|
780
|
+
|
|
781
|
+
return downstream
|