iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. dbt_core_mcp/__init__.py +18 -0
  2. dbt_core_mcp/__main__.py +436 -0
  3. dbt_core_mcp/context.py +459 -0
  4. dbt_core_mcp/cte_generator.py +601 -0
  5. dbt_core_mcp/dbt/__init__.py +1 -0
  6. dbt_core_mcp/dbt/bridge_runner.py +1361 -0
  7. dbt_core_mcp/dbt/manifest.py +781 -0
  8. dbt_core_mcp/dbt/runner.py +67 -0
  9. dbt_core_mcp/dependencies.py +50 -0
  10. dbt_core_mcp/server.py +381 -0
  11. dbt_core_mcp/tools/__init__.py +77 -0
  12. dbt_core_mcp/tools/analyze_impact.py +78 -0
  13. dbt_core_mcp/tools/build_models.py +190 -0
  14. dbt_core_mcp/tools/demo/__init__.py +1 -0
  15. dbt_core_mcp/tools/demo/hello.html +267 -0
  16. dbt_core_mcp/tools/demo/ui_demo.py +41 -0
  17. dbt_core_mcp/tools/get_column_lineage.py +1988 -0
  18. dbt_core_mcp/tools/get_lineage.py +89 -0
  19. dbt_core_mcp/tools/get_project_info.py +96 -0
  20. dbt_core_mcp/tools/get_resource_info.py +134 -0
  21. dbt_core_mcp/tools/install_deps.py +102 -0
  22. dbt_core_mcp/tools/list_resources.py +84 -0
  23. dbt_core_mcp/tools/load_seeds.py +179 -0
  24. dbt_core_mcp/tools/query_database.py +459 -0
  25. dbt_core_mcp/tools/run_models.py +234 -0
  26. dbt_core_mcp/tools/snapshot_models.py +120 -0
  27. dbt_core_mcp/tools/test_models.py +238 -0
  28. dbt_core_mcp/utils/__init__.py +1 -0
  29. dbt_core_mcp/utils/env_detector.py +186 -0
  30. dbt_core_mcp/utils/process_check.py +130 -0
  31. dbt_core_mcp/utils/tool_utils.py +411 -0
  32. dbt_core_mcp/utils/warehouse_adapter.py +82 -0
  33. dbt_core_mcp/utils/warehouse_databricks.py +297 -0
  34. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
  35. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
  36. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
  37. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
  38. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,781 @@
1
+ """
2
+ DBT Manifest Loader.
3
+
4
+ Reads and parses DBT's manifest.json file to provide structured access
5
+ to models, sources, tests, and other DBT entities.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass
18
+ class DbtModel:
19
+ """Represents a dbt model from the manifest."""
20
+
21
+ name: str
22
+ unique_id: str
23
+ resource_type: str
24
+ schema: str
25
+ database: str
26
+ alias: str
27
+ description: str
28
+ materialization: str
29
+ tags: list[str]
30
+ depends_on: list[str]
31
+ package_name: str
32
+ original_file_path: str
33
+
34
+
35
+ @dataclass
36
+ class DbtSource:
37
+ """Represents a dbt source from the manifest."""
38
+
39
+ name: str
40
+ unique_id: str
41
+ source_name: str
42
+ schema: str
43
+ database: str
44
+ identifier: str
45
+ description: str
46
+ tags: list[str]
47
+ package_name: str
48
+
49
+
50
+ class ManifestLoader:
51
+ """
52
+ Load and parse DBT manifest.json.
53
+
54
+ Provides structured access to models, sources, and other DBT entities.
55
+ """
56
+
57
+ def __init__(self, manifest_path: Path):
58
+ """
59
+ Initialize the manifest loader.
60
+
61
+ Args:
62
+ manifest_path: Path to manifest.json file
63
+ """
64
+ self.manifest_path = manifest_path
65
+ self._manifest: dict[str, Any] | None = None
66
+ self._manifest_mtime: float | None = None # Track last modification time
67
+
68
+ async def load(self, force: bool = False) -> None:
69
+ """
70
+ Load the manifest from disk.
71
+
72
+ Args:
73
+ force: If True, reload even if already loaded. If False, only reload if file changed.
74
+ """
75
+ if not self.manifest_path.exists():
76
+ raise FileNotFoundError(f"Manifest not found: {self.manifest_path}")
77
+
78
+ # Check if reload is needed
79
+ current_mtime = self.manifest_path.stat().st_mtime
80
+
81
+ if not force and self._manifest is not None and self._manifest_mtime == current_mtime:
82
+ logger.debug("Manifest already loaded and unchanged, skipping reload")
83
+ return
84
+
85
+ logger.debug(f"Loading manifest from {self.manifest_path}")
86
+
87
+ with open(self.manifest_path, "r") as f:
88
+ self._manifest = json.load(f)
89
+
90
+ self._manifest_mtime = current_mtime
91
+ logger.info("Manifest loaded successfully")
92
+
93
+ def is_loaded(self) -> bool:
94
+ """Check if the manifest data has been loaded.
95
+
96
+ Returns:
97
+ True if manifest data is loaded in memory, False otherwise
98
+ """
99
+ return self._manifest is not None
100
+
101
+ def get_resources(self, resource_type: str | None = None) -> list[dict[str, Any]]:
102
+ """
103
+ Get all resources from the manifest, optionally filtered by type.
104
+
105
+ Returns simplified resource information across all types (models, sources, seeds, etc.).
106
+ Designed for LLM consumption with consistent structure across resource types.
107
+
108
+ Args:
109
+ resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
110
+ If None, returns all resources.
111
+
112
+ Returns:
113
+ List of resource dictionaries with consistent structure:
114
+ {
115
+ "name": str,
116
+ "unique_id": str,
117
+ "resource_type": str,
118
+ "schema": str (if applicable),
119
+ "database": str (if applicable),
120
+ "description": str,
121
+ "tags": list[str],
122
+ "package_name": str,
123
+ ...additional type-specific fields
124
+ }
125
+
126
+ Raises:
127
+ RuntimeError: If manifest not loaded
128
+ ValueError: If invalid resource_type provided
129
+ """
130
+ if not self._manifest:
131
+ raise RuntimeError("Manifest not loaded. Call load() first.")
132
+
133
+ # Validate resource_type if provided
134
+ valid_types = {"model", "source", "seed", "snapshot", "test", "analysis"}
135
+ if resource_type is not None and resource_type not in valid_types:
136
+ raise ValueError(f"Invalid resource_type '{resource_type}'. Must be one of: {', '.join(sorted(valid_types))}")
137
+
138
+ resources: list[dict[str, Any]] = []
139
+
140
+ # Collect from nodes (models, tests, seeds, snapshots, analyses)
141
+ nodes = self._manifest.get("nodes", {})
142
+ for unique_id, node in nodes.items():
143
+ if not isinstance(node, dict):
144
+ continue
145
+
146
+ node_type = node.get("resource_type")
147
+
148
+ # Filter by type if specified
149
+ if resource_type is not None and node_type != resource_type:
150
+ continue
151
+
152
+ # Build consistent resource dict
153
+ resource: dict[str, Any] = {
154
+ "name": node.get("name", ""),
155
+ "unique_id": unique_id,
156
+ "resource_type": node_type,
157
+ "package_name": node.get("package_name", ""),
158
+ "description": node.get("description", ""),
159
+ "tags": node.get("tags", []),
160
+ }
161
+
162
+ # Add common fields for materialized resources
163
+ if node_type in ("model", "seed", "snapshot"):
164
+ resource["schema"] = node.get("schema", "")
165
+ resource["database"] = node.get("database", "")
166
+ resource["alias"] = node.get("alias", "")
167
+
168
+ # Add type-specific fields
169
+ if node_type == "model":
170
+ resource["materialization"] = node.get("config", {}).get("materialized", "")
171
+ resource["file_path"] = node.get("original_file_path", "")
172
+ elif node_type == "seed":
173
+ resource["file_path"] = node.get("original_file_path", "")
174
+ elif node_type == "snapshot":
175
+ resource["file_path"] = node.get("original_file_path", "")
176
+ elif node_type == "test":
177
+ resource["test_metadata"] = node.get("test_metadata", {})
178
+ resource["column_name"] = node.get("column_name")
179
+
180
+ resources.append(resource)
181
+
182
+ # Collect from sources (if not filtered out)
183
+ if resource_type is None or resource_type == "source":
184
+ sources = self._manifest.get("sources", {})
185
+ for unique_id, source in sources.items():
186
+ if not isinstance(source, dict):
187
+ continue
188
+
189
+ resource = {
190
+ "name": source.get("name", ""),
191
+ "unique_id": unique_id,
192
+ "resource_type": "source",
193
+ "source_name": source.get("source_name", ""),
194
+ "schema": source.get("schema", ""),
195
+ "database": source.get("database", ""),
196
+ "identifier": source.get("identifier", ""),
197
+ "package_name": source.get("package_name", ""),
198
+ "description": source.get("description", ""),
199
+ "tags": source.get("tags", []),
200
+ }
201
+
202
+ resources.append(resource)
203
+
204
+ logger.debug(f"Found {len(resources)} resources" + (f" of type '{resource_type}'" if resource_type else ""))
205
+ return resources
206
+
207
+ def get_compiled_code(self, name: str) -> str | None:
208
+ """
209
+ Get the compiled SQL code for a model.
210
+
211
+ Args:
212
+ name: Model name
213
+
214
+ Returns:
215
+ Compiled SQL string if available, None if not compiled yet
216
+
217
+ Raises:
218
+ RuntimeError: If manifest not loaded
219
+ ValueError: If model not found
220
+ """
221
+ node = self.get_resource_node(name, "model") # Will raise ValueError if not found
222
+ return node.get("compiled_code")
223
+
224
+ def get_resource_node(self, name: str, resource_type: str | None = None) -> dict[str, Any]:
225
+ """
226
+ Get a resource node by name with auto-detection across all resource types.
227
+
228
+ This method searches for resources across models, sources, seeds, snapshots, tests, etc.
229
+ Designed for LLM consumption - returns all matches when ambiguous rather than raising errors.
230
+
231
+ Args:
232
+ name: Resource name. For sources, can be "source_name.table_name" or just "table_name"
233
+ resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
234
+ If None, searches all types.
235
+
236
+ Returns:
237
+ Single resource dict if exactly one match found, or dict with multiple_matches=True
238
+ containing all matching resources for LLM to process.
239
+
240
+ Raises:
241
+ RuntimeError: If manifest not loaded
242
+ ValueError: If resource not found (only case that raises)
243
+
244
+ Examples:
245
+ get_resource_node("customers") -> single model dict
246
+ get_resource_node("customers", "source") -> single source dict
247
+ get_resource_node("customers") with multiple matches -> {"multiple_matches": True, ...}
248
+ """
249
+ if not self._manifest:
250
+ raise RuntimeError("Manifest not loaded. Call load() first.")
251
+
252
+ # Validate resource_type if provided
253
+ valid_types = {"model", "source", "seed", "snapshot", "test", "analysis"}
254
+ if resource_type is not None and resource_type not in valid_types:
255
+ raise ValueError(f"Invalid resource_type '{resource_type}'. Must be one of: {', '.join(sorted(valid_types))}")
256
+
257
+ matches: list[dict[str, Any]] = []
258
+
259
+ # For sources, try "source_name.table_name" format first
260
+ if "." in name and (resource_type is None or resource_type == "source"):
261
+ parts = name.split(".", 1)
262
+ if len(parts) == 2:
263
+ # Search sources dict directly
264
+ sources_dict = self._manifest.get("sources", {})
265
+ for _, source in sources_dict.items():
266
+ if isinstance(source, dict) and source.get("source_name") == parts[0] and source.get("name") == parts[1]:
267
+ matches.append(dict(source))
268
+ break
269
+
270
+ # Search nodes (models, tests, snapshots, seeds, analyses, etc.)
271
+ nodes = self._manifest.get("nodes", {})
272
+ for unique_id, node in nodes.items():
273
+ if not isinstance(node, dict):
274
+ continue
275
+
276
+ node_type = node.get("resource_type")
277
+ node_name = node.get("name")
278
+
279
+ # Type filter if specified
280
+ if resource_type is not None and node_type != resource_type:
281
+ continue
282
+
283
+ if node_name == name:
284
+ matches.append(dict(node))
285
+
286
+ # Search sources by table name only (fallback when no dot in name)
287
+ if resource_type is None or resource_type == "source":
288
+ sources = self._manifest.get("sources", {})
289
+ for unique_id, source in sources.items():
290
+ if not isinstance(source, dict):
291
+ continue
292
+
293
+ if source.get("name") == name:
294
+ # Avoid duplicates if already matched via source_name.table_name
295
+ if not any(m.get("unique_id") == unique_id for m in matches):
296
+ matches.append(dict(source))
297
+
298
+ # Handle results based on match count
299
+ if len(matches) == 0:
300
+ type_label = resource_type.title() if resource_type else "Resource"
301
+ list_hint = f"Use list_resources(type='{resource_type}') to see all available {resource_type}s." if resource_type else "Use list_resources() to see all available resources."
302
+ raise ValueError(f"{type_label} '{name}' not found.\n{list_hint}")
303
+ elif len(matches) == 1:
304
+ # Single match - return the resource directly
305
+ return matches[0]
306
+ else:
307
+ # Multiple matches - return all with metadata for LLM to process
308
+ return {
309
+ "multiple_matches": True,
310
+ "name": name,
311
+ "match_count": len(matches),
312
+ "matches": matches,
313
+ "message": f"Found {len(matches)} resources named '{name}'. Returning all matches for context.",
314
+ }
315
+
316
+ def get_resource_info(
317
+ self,
318
+ name: str,
319
+ resource_type: str | None = None,
320
+ include_database_schema: bool = True,
321
+ include_compiled_sql: bool = True,
322
+ ) -> dict[str, Any]:
323
+ """Get detailed resource information with optional enrichments.
324
+
325
+ This method extends get_resource_node() with optional enrichments:
326
+ - include_database_schema: Query actual database schema
327
+ - include_compiled_sql: Include compiled SQL (models only, requires compilation)
328
+
329
+ Note: This method does NOT trigger compilation. If compiled SQL is requested but
330
+ not available in the manifest, the 'compiled_sql' field will be None. The caller
331
+ (e.g., server tool) is responsible for triggering compilation if needed.
332
+
333
+ Args:
334
+ name: Resource name
335
+ resource_type: Optional resource type filter
336
+ include_database_schema: Include database schema information (default: True)
337
+ include_compiled_sql: Include compiled SQL for models (default: True)
338
+
339
+ Returns:
340
+ Resource dictionary with optional enrichments
341
+ """
342
+ result = self.get_resource_node(name, resource_type)
343
+
344
+ # Handle multiple matches case - return as-is
345
+ if result.get("multiple_matches"):
346
+ return result
347
+
348
+ # Single match - enrich with additional data if requested
349
+ node_type = result.get("resource_type")
350
+
351
+ # Create a copy without heavy fields
352
+ result_copy = dict(result)
353
+ result_copy.pop("raw_code", None)
354
+ result_copy.pop("compiled_code", None)
355
+
356
+ # Include compiled SQL for models if requested and available
357
+ if include_compiled_sql and node_type == "model":
358
+ compiled_code = result.get("compiled_code")
359
+
360
+ if compiled_code:
361
+ result_copy["compiled_sql"] = compiled_code
362
+ result_copy["compiled_sql_cached"] = True
363
+ else:
364
+ # Not compiled yet - set to None to indicate it's not available
365
+ result_copy["compiled_sql"] = None
366
+ result_copy["compiled_sql_cached"] = False
367
+
368
+ return result_copy
369
+
370
+ def get_project_info(self) -> dict[str, Any]:
371
+ """
372
+ Get high-level project information from the manifest.
373
+
374
+ Returns:
375
+ Dictionary with project metadata
376
+ """
377
+ if not self._manifest:
378
+ raise RuntimeError("Manifest not loaded. Call load() first.")
379
+
380
+ metadata: dict[str, Any] = self._manifest.get("metadata", {}) # type: ignore[assignment]
381
+
382
+ # Count resources directly from manifest
383
+ nodes = self._manifest.get("nodes", {})
384
+ model_count = sum(1 for node in nodes.values() if isinstance(node, dict) and node.get("resource_type") == "model")
385
+ source_count = len(self._manifest.get("sources", {}))
386
+
387
+ return {
388
+ "project_name": metadata.get("project_name", ""),
389
+ "dbt_version": metadata.get("dbt_version", ""),
390
+ "adapter_type": metadata.get("adapter_type", ""),
391
+ "generated_at": metadata.get("generated_at", ""),
392
+ "model_count": model_count,
393
+ "source_count": source_count,
394
+ }
395
+
396
+ def get_manifest_dict(self) -> dict[str, Any]:
397
+ """Get the raw manifest dictionary.
398
+
399
+ Returns:
400
+ Raw manifest dictionary
401
+
402
+ Raises:
403
+ RuntimeError: If manifest not loaded
404
+ """
405
+ if not self._manifest:
406
+ raise RuntimeError("Manifest not loaded. Call load() first.")
407
+ return self._manifest
408
+
409
+ def get_node_by_unique_id(self, unique_id: str) -> dict[str, Any] | None:
410
+ """Get a node (model, test, etc.) by its unique_id.
411
+
412
+ Args:
413
+ unique_id: The unique identifier (e.g., 'model.package.model_name')
414
+
415
+ Returns:
416
+ Node dictionary or None if not found
417
+ """
418
+ if not self._manifest:
419
+ raise RuntimeError("Manifest not loaded. Call load() first.")
420
+
421
+ # Check nodes first (models, tests, snapshots, etc.)
422
+ nodes = self._manifest.get("nodes", {})
423
+ if unique_id in nodes:
424
+ return dict(nodes[unique_id])
425
+
426
+ # Check sources
427
+ sources = self._manifest.get("sources", {})
428
+ if unique_id in sources:
429
+ return dict(sources[unique_id])
430
+
431
+ return None
432
+
433
+ def get_upstream_nodes(self, unique_id: str, max_depth: int | None = None, current_depth: int = 0) -> list[dict[str, Any]]:
434
+ """Get all upstream dependencies of a node recursively.
435
+
436
+ Args:
437
+ unique_id: The unique identifier of the node
438
+ max_depth: Maximum depth to traverse (None for unlimited)
439
+ current_depth: Current recursion depth (internal use)
440
+
441
+ Returns:
442
+ List of dictionaries with upstream node info:
443
+ {"unique_id": str, "name": str, "type": str, "distance": int}
444
+ """
445
+ if not self._manifest:
446
+ raise RuntimeError("Manifest not loaded. Call load() first.")
447
+
448
+ if max_depth is not None and current_depth >= max_depth:
449
+ return []
450
+
451
+ parent_map = self._manifest.get("parent_map", {})
452
+ parents = parent_map.get(unique_id, [])
453
+
454
+ upstream: list[dict[str, Any]] = []
455
+ seen: set[str] = set()
456
+
457
+ for parent_id in parents:
458
+ if parent_id in seen:
459
+ continue
460
+ seen.add(parent_id)
461
+
462
+ node = self.get_node_by_unique_id(parent_id)
463
+ if node:
464
+ resource_type = node.get("resource_type", "unknown")
465
+ upstream.append(
466
+ {
467
+ "unique_id": parent_id,
468
+ "name": node.get("name", ""),
469
+ "type": resource_type,
470
+ "distance": current_depth + 1,
471
+ }
472
+ )
473
+
474
+ # Recurse
475
+ if max_depth is None or current_depth + 1 < max_depth:
476
+ grandparents = self.get_upstream_nodes(parent_id, max_depth, current_depth + 1)
477
+ for gp in grandparents:
478
+ if gp["unique_id"] not in seen:
479
+ seen.add(str(gp["unique_id"]))
480
+ upstream.append(gp)
481
+
482
+ return upstream
483
+
484
+ def get_lineage(
485
+ self,
486
+ name: str,
487
+ resource_type: str | None = None,
488
+ direction: str = "both",
489
+ depth: int | None = None,
490
+ ) -> dict[str, Any]:
491
+ """
492
+ Get lineage (dependency tree) for any resource type with auto-detection.
493
+
494
+ This unified method works across all resource types (models, sources, seeds, etc.)
495
+ and provides upstream, downstream, or bidirectional dependency traversal.
496
+
497
+ Args:
498
+ name: Resource name. For sources, use "source_name.table_name" or just "table_name"
499
+ resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
500
+ If None, auto-detects resource type.
501
+ direction: Lineage direction:
502
+ - "upstream": Show where data comes from (parents)
503
+ - "downstream": Show what depends on this resource (children)
504
+ - "both": Show full lineage (default)
505
+ depth: Maximum levels to traverse (None for unlimited)
506
+ - depth=1: Immediate dependencies only
507
+ - depth=2: Dependencies + their dependencies
508
+ - None: Full dependency tree
509
+
510
+ Returns:
511
+ Dictionary with lineage information:
512
+ {
513
+ "resource": {...}, # The target resource info
514
+ "upstream": [...], # List of upstream dependencies (if direction in ["upstream", "both"])
515
+ "downstream": [...], # List of downstream dependents (if direction in ["downstream", "both"])
516
+ "stats": {
517
+ "upstream_count": int,
518
+ "downstream_count": int,
519
+ "total_dependencies": int
520
+ }
521
+ }
522
+
523
+ If multiple matches found, returns:
524
+ {"multiple_matches": True, "matches": [...], "message": "..."}
525
+
526
+ Raises:
527
+ RuntimeError: If manifest not loaded
528
+ ValueError: If resource not found or invalid direction
529
+
530
+ Examples:
531
+ get_lineage("customers") -> auto-detect and show full lineage
532
+ get_lineage("customers", "model", "upstream") -> show where customers model gets data
533
+ get_lineage("customers", direction="downstream", depth=2) -> 2 levels of dependents
534
+ """
535
+ if not self._manifest:
536
+ raise RuntimeError("Manifest not loaded. Call load() first.")
537
+
538
+ # Validate direction
539
+ valid_directions = {"upstream", "downstream", "both"}
540
+ if direction not in valid_directions:
541
+ raise ValueError(f"Invalid direction '{direction}'. Must be one of: {', '.join(sorted(valid_directions))}")
542
+
543
+ # Get the resource (auto-detect if resource_type not specified)
544
+ resource = self.get_resource_node(name, resource_type)
545
+
546
+ # Handle multiple matches - return for LLM to process
547
+ if resource.get("multiple_matches"):
548
+ return resource
549
+
550
+ # Extract unique_id for lineage traversal
551
+ unique_id = resource.get("unique_id")
552
+ if not unique_id:
553
+ raise ValueError(f"Resource '{name}' does not have a unique_id")
554
+
555
+ # Build lineage based on direction
556
+ result: dict[str, Any] = {
557
+ "resource": {
558
+ "name": resource.get("name"),
559
+ "unique_id": unique_id,
560
+ "resource_type": resource.get("resource_type"),
561
+ "package_name": resource.get("package_name"),
562
+ }
563
+ }
564
+
565
+ upstream: list[dict[str, Any]] = []
566
+ downstream: list[dict[str, Any]] = []
567
+
568
+ if direction in ("upstream", "both"):
569
+ upstream = self.get_upstream_nodes(unique_id, max_depth=depth)
570
+ result["upstream"] = upstream
571
+
572
+ if direction in ("downstream", "both"):
573
+ downstream = self.get_downstream_nodes(unique_id, max_depth=depth)
574
+ result["downstream"] = downstream
575
+
576
+ # Add statistics
577
+ result["stats"] = {
578
+ "upstream_count": len(upstream),
579
+ "downstream_count": len(downstream),
580
+ "total_dependencies": len(upstream) + len(downstream),
581
+ }
582
+
583
+ return result
584
+
585
+ def analyze_impact(
586
+ self,
587
+ name: str,
588
+ resource_type: str | None = None,
589
+ ) -> dict[str, Any]:
590
+ """
591
+ Analyze the impact of changing a resource across all resource types.
592
+
593
+ Shows all downstream dependencies that would be affected by changes,
594
+ including models, tests, and other resources. Provides actionable
595
+ recommendations for running affected resources.
596
+
597
+ Args:
598
+ name: Resource name. For sources, use "source_name.table_name" or just "table_name"
599
+ resource_type: Optional filter (model, source, seed, snapshot, test, analysis).
600
+ If None, auto-detects resource type.
601
+
602
+ Returns:
603
+ Dictionary with impact analysis:
604
+ {
605
+ "resource": {...}, # The target resource info
606
+ "impact": {
607
+ "models_affected": [...], # Downstream models by distance
608
+ "models_affected_count": int,
609
+ "tests_affected_count": int,
610
+ "other_affected_count": int,
611
+ "total_affected": int
612
+ },
613
+ "affected_by_distance": {
614
+ "1": [...], # Immediate dependents
615
+ "2": [...], # Second-level dependents
616
+ ...
617
+ },
618
+ "recommendation": str, # Suggested dbt command
619
+ "message": str # Human-readable impact assessment
620
+ }
621
+
622
+ If multiple matches found, returns:
623
+ {"multiple_matches": True, "matches": [...], "message": "..."}
624
+
625
+ Raises:
626
+ RuntimeError: If manifest not loaded
627
+ ValueError: If resource not found
628
+
629
+ Examples:
630
+ analyze_impact("stg_customers") -> impact of changing staging model
631
+ analyze_impact("jaffle_shop.orders", "source") -> impact of source change
632
+ analyze_impact("raw_customers", "seed") -> impact of seed change
633
+ """
634
+ if not self._manifest:
635
+ raise RuntimeError("Manifest not loaded. Call load() first.")
636
+
637
+ # Get the resource (auto-detect if resource_type not specified)
638
+ resource = self.get_resource_node(name, resource_type)
639
+
640
+ # Handle multiple matches - return for LLM to process
641
+ if resource.get("multiple_matches"):
642
+ return resource
643
+
644
+ # Extract unique_id for impact traversal
645
+ unique_id = resource.get("unique_id")
646
+ if not unique_id:
647
+ raise ValueError(f"Resource '{name}' does not have a unique_id")
648
+
649
+ # Get all downstream dependencies (no depth limit for impact)
650
+ downstream = self.get_downstream_nodes(unique_id, max_depth=None)
651
+
652
+ # Categorize by resource type
653
+ models_affected: list[dict[str, Any]] = []
654
+ tests_affected: list[dict[str, Any]] = []
655
+ other_affected: list[dict[str, Any]] = []
656
+ affected_by_distance: dict[str, list[dict[str, Any]]] = {}
657
+
658
+ for dep in downstream:
659
+ dep_type = str(dep["type"])
660
+ distance = str(dep["distance"])
661
+
662
+ # Group by distance
663
+ if distance not in affected_by_distance:
664
+ affected_by_distance[distance] = []
665
+ affected_by_distance[distance].append(dep)
666
+
667
+ # Categorize by type
668
+ if dep_type == "model":
669
+ models_affected.append(dep)
670
+ elif dep_type == "test":
671
+ tests_affected.append(dep)
672
+ else:
673
+ other_affected.append(dep)
674
+
675
+ # Sort models by distance for better readability
676
+ models_affected_sorted = sorted(models_affected, key=lambda x: (int(x["distance"]), str(x["name"])))
677
+
678
+ # Build recommendation based on resource type
679
+ resource_name = resource.get("name", name)
680
+ current_resource_type = resource.get("resource_type")
681
+
682
+ if current_resource_type == "source":
683
+ # For sources, recommend running downstream models
684
+ if len(models_affected) == 0:
685
+ recommendation = f"dbt test -s source:{resource.get('source_name')}.{resource_name}"
686
+ else:
687
+ recommendation = f"dbt run -s {resource_name}+"
688
+ elif current_resource_type == "seed":
689
+ # For seeds, recommend seeding + downstream
690
+ if len(models_affected) == 0:
691
+ recommendation = f"dbt seed -s {resource_name} && dbt test -s {resource_name}"
692
+ else:
693
+ recommendation = f"dbt seed -s {resource_name} && dbt run -s {resource_name}+"
694
+ else:
695
+ # For models, snapshots, etc.
696
+ if len(models_affected) == 0:
697
+ recommendation = f"dbt run -s {resource_name}"
698
+ else:
699
+ recommendation = f"dbt run -s {resource_name}+"
700
+
701
+ # Build result
702
+ result: dict[str, Any] = {
703
+ "resource": {
704
+ "name": resource_name,
705
+ "unique_id": unique_id,
706
+ "resource_type": current_resource_type,
707
+ "package_name": resource.get("package_name"),
708
+ },
709
+ "impact": {
710
+ "models_affected": models_affected_sorted,
711
+ "models_affected_count": len(models_affected),
712
+ "tests_affected_count": len(tests_affected),
713
+ "other_affected_count": len(other_affected),
714
+ "total_affected": len(downstream),
715
+ },
716
+ "affected_by_distance": affected_by_distance,
717
+ "recommendation": recommendation,
718
+ }
719
+
720
+ # Add helpful message based on impact size
721
+ if len(models_affected) == 0:
722
+ result["message"] = "No downstream models affected. Only this resource needs to be run/tested."
723
+ elif len(models_affected) <= 3:
724
+ result["message"] = f"Low impact: {len(models_affected)} downstream model(s) affected."
725
+ elif len(models_affected) <= 10:
726
+ result["message"] = f"Medium impact: {len(models_affected)} downstream models affected."
727
+ else:
728
+ result["message"] = f"High impact: {len(models_affected)} downstream models affected. Consider incremental changes."
729
+
730
+ return result
731
+
732
+ def get_downstream_nodes(self, unique_id: str, max_depth: int | None = None, current_depth: int = 0) -> list[dict[str, Any]]:
733
+ """Get all downstream dependents of a node recursively.
734
+
735
+ Args:
736
+ unique_id: The unique identifier of the node
737
+ max_depth: Maximum depth to traverse (None for unlimited)
738
+ current_depth: Current recursion depth (internal use)
739
+
740
+ Returns:
741
+ List of dictionaries with downstream node info:
742
+ {"unique_id": str, "name": str, "type": str, "distance": int}
743
+ """
744
+ if not self._manifest:
745
+ raise RuntimeError("Manifest not loaded. Call load() first.")
746
+
747
+ if max_depth is not None and current_depth >= max_depth:
748
+ return []
749
+
750
+ child_map = self._manifest.get("child_map", {})
751
+ children = child_map.get(unique_id, [])
752
+
753
+ downstream: list[dict[str, Any]] = []
754
+ seen: set[str] = set()
755
+
756
+ for child_id in children:
757
+ if child_id in seen:
758
+ continue
759
+ seen.add(child_id)
760
+
761
+ node = self.get_node_by_unique_id(child_id)
762
+ if node:
763
+ resource_type = node.get("resource_type", "unknown")
764
+ downstream.append(
765
+ {
766
+ "unique_id": child_id,
767
+ "name": node.get("name", ""),
768
+ "type": resource_type,
769
+ "distance": current_depth + 1,
770
+ }
771
+ )
772
+
773
+ # Recurse
774
+ if max_depth is None or current_depth + 1 < max_depth:
775
+ grandchildren = self.get_downstream_nodes(child_id, max_depth, current_depth + 1)
776
+ for gc in grandchildren:
777
+ if gc["unique_id"] not in seen:
778
+ seen.add(str(gc["unique_id"]))
779
+ downstream.append(gc)
780
+
781
+ return downstream