iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. dbt_core_mcp/__init__.py +18 -0
  2. dbt_core_mcp/__main__.py +436 -0
  3. dbt_core_mcp/context.py +459 -0
  4. dbt_core_mcp/cte_generator.py +601 -0
  5. dbt_core_mcp/dbt/__init__.py +1 -0
  6. dbt_core_mcp/dbt/bridge_runner.py +1361 -0
  7. dbt_core_mcp/dbt/manifest.py +781 -0
  8. dbt_core_mcp/dbt/runner.py +67 -0
  9. dbt_core_mcp/dependencies.py +50 -0
  10. dbt_core_mcp/server.py +381 -0
  11. dbt_core_mcp/tools/__init__.py +77 -0
  12. dbt_core_mcp/tools/analyze_impact.py +78 -0
  13. dbt_core_mcp/tools/build_models.py +190 -0
  14. dbt_core_mcp/tools/demo/__init__.py +1 -0
  15. dbt_core_mcp/tools/demo/hello.html +267 -0
  16. dbt_core_mcp/tools/demo/ui_demo.py +41 -0
  17. dbt_core_mcp/tools/get_column_lineage.py +1988 -0
  18. dbt_core_mcp/tools/get_lineage.py +89 -0
  19. dbt_core_mcp/tools/get_project_info.py +96 -0
  20. dbt_core_mcp/tools/get_resource_info.py +134 -0
  21. dbt_core_mcp/tools/install_deps.py +102 -0
  22. dbt_core_mcp/tools/list_resources.py +84 -0
  23. dbt_core_mcp/tools/load_seeds.py +179 -0
  24. dbt_core_mcp/tools/query_database.py +459 -0
  25. dbt_core_mcp/tools/run_models.py +234 -0
  26. dbt_core_mcp/tools/snapshot_models.py +120 -0
  27. dbt_core_mcp/tools/test_models.py +238 -0
  28. dbt_core_mcp/utils/__init__.py +1 -0
  29. dbt_core_mcp/utils/env_detector.py +186 -0
  30. dbt_core_mcp/utils/process_check.py +130 -0
  31. dbt_core_mcp/utils/tool_utils.py +411 -0
  32. dbt_core_mcp/utils/warehouse_adapter.py +82 -0
  33. dbt_core_mcp/utils/warehouse_databricks.py +297 -0
  34. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
  35. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
  36. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
  37. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
  38. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,89 @@
1
+ """Get lineage (dependency tree) for any dbt resource.
2
+
3
+ This module implements the get_lineage tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ name: str,
22
+ resource_type: str | None,
23
+ direction: str,
24
+ depth: int | None,
25
+ state: DbtCoreServerContext,
26
+ force_parse: bool = True,
27
+ ) -> dict[str, Any]:
28
+ """Implementation function for get_lineage tool.
29
+
30
+ Separated for testing purposes - tests call this directly with explicit state.
31
+ The @tool() decorated get_lineage() function calls this with injected dependencies.
32
+ """
33
+ # Initialize state if needed (metadata tool uses force_parse=True)
34
+ await state.ensure_initialized(ctx, force_parse)
35
+
36
+ # Delegate to manifest helper for lineage traversal
37
+ try:
38
+ return state.manifest.get_lineage(name, resource_type, direction, depth) # type: ignore
39
+ except ValueError as e:
40
+ raise ValueError(f"Lineage error: {e}")
41
+
42
+
43
+ @dbtTool()
44
+ async def get_lineage(
45
+ ctx: Context,
46
+ name: str,
47
+ resource_type: str | None = None,
48
+ direction: str = "both",
49
+ depth: int | None = None,
50
+ state: DbtCoreServerContext = Depends(get_state),
51
+ ) -> dict[str, Any]:
52
+ """Get lineage (dependency tree) for any dbt resource with auto-detection.
53
+
54
+ This unified tool works across all resource types (models, sources, seeds, snapshots, etc.)
55
+ showing upstream and/or downstream dependencies with configurable depth.
56
+
57
+ Args:
58
+ name: Resource name. For sources, use "source_name.table_name" or just "table_name"
59
+ Examples: "customers", "jaffle_shop.orders", "raw_customers"
60
+ resource_type: Optional filter to narrow search:
61
+ - "model": Data transformation models
62
+ - "source": External data sources
63
+ - "seed": CSV reference data files
64
+ - "snapshot": SCD Type 2 historical tables
65
+ - "test": Data quality tests
66
+ - "analysis": Ad-hoc analysis queries
67
+ - None: Auto-detect (searches all types)
68
+ direction: Lineage direction:
69
+ - "upstream": Show where data comes from (parents)
70
+ - "downstream": Show what depends on this resource (children)
71
+ - "both": Show full lineage (default)
72
+ depth: Maximum levels to traverse (None for unlimited)
73
+ - depth=1: Immediate dependencies only
74
+ - depth=2: Dependencies + their dependencies
75
+ - None: Full dependency tree
76
+
77
+ Returns:
78
+ Lineage information with upstream/downstream nodes and statistics.
79
+ If multiple matches found, returns all matches for LLM to process.
80
+
81
+ Raises:
82
+ ValueError: If resource not found or invalid direction
83
+
84
+ Examples:
85
+ get_lineage("customers") -> auto-detect and show full lineage
86
+ get_lineage("customers", "model", "upstream") -> where customers model gets data
87
+ get_lineage("jaffle_shop.orders", "source", "downstream", 2) -> 2 levels of dependents
88
+ """
89
+ return await _implementation(ctx, name, resource_type, direction, depth, state)
@@ -0,0 +1,96 @@
1
+ """Get information about the dbt project with optional diagnostics.
2
+
3
+ This module implements the get_project_info tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ run_debug: bool,
22
+ state: DbtCoreServerContext,
23
+ force_parse: bool = True,
24
+ ) -> dict[str, Any]:
25
+ """Implementation function for get_project_info tool.
26
+
27
+ Separated for testing purposes - tests call this directly with explicit state.
28
+ The @tool() decorated get_project_info() function calls this with injected dependencies.
29
+ """
30
+ # Initialize state if needed (metadata tool uses force_parse=True)
31
+ await state.ensure_initialized(ctx, force_parse)
32
+
33
+ try:
34
+ # Collect manifest metadata for quick status check
35
+ info = state.manifest.get_project_info() # type: ignore
36
+ info["project_dir"] = str(state.project_dir)
37
+ info["profiles_dir"] = state.profiles_dir
38
+ info["status"] = "ready"
39
+
40
+ # Add dbt-core-mcp version
41
+ from .. import __version__
42
+
43
+ info["dbt_core_mcp_version"] = __version__
44
+
45
+ # Optionally run full dbt debug for connectivity diagnostics
46
+ if run_debug:
47
+ runner = await state.get_runner()
48
+ debug_result_obj = await runner.invoke(["debug"]) # type: ignore
49
+
50
+ # Convert DbtRunnerResult to dictionary
51
+ debug_result = {
52
+ "success": debug_result_obj.success,
53
+ "output": debug_result_obj.stdout if debug_result_obj.stdout else "",
54
+ }
55
+
56
+ # Normalize debug output into structured diagnostics
57
+ diagnostics: dict[str, Any] = {
58
+ "command_run": "dbt debug",
59
+ "success": debug_result.get("success", False),
60
+ "output": debug_result.get("output", ""),
61
+ }
62
+
63
+ # Extract connection status from output
64
+ output = str(debug_result.get("output", ""))
65
+ if "Connection test: [OK connection ok]" in output or "Connection test: OK" in output:
66
+ diagnostics["connection_status"] = "ok"
67
+ elif "Connection test: [ERROR" in output or "Connection test: FAIL" in output:
68
+ diagnostics["connection_status"] = "failed"
69
+ else:
70
+ diagnostics["connection_status"] = "unknown"
71
+
72
+ info["diagnostics"] = diagnostics
73
+
74
+ return info
75
+
76
+ except Exception as e:
77
+ raise ValueError(f"Failed to get project info: {e}")
78
+
79
+
80
+ @dbtTool()
81
+ async def get_project_info(
82
+ ctx: Context,
83
+ run_debug: bool = True,
84
+ state: DbtCoreServerContext = Depends(get_state),
85
+ ) -> dict[str, Any]:
86
+ """Get information about the dbt project with optional diagnostics.
87
+
88
+ Args:
89
+ ctx: MCP context (provided by FastMCP)
90
+ run_debug: Run `dbt debug` to validate environment and test connection (default: True)
91
+ state: Shared state object injected by FastMCP
92
+
93
+ Returns:
94
+ Dictionary with project information and diagnostic results
95
+ """
96
+ return await _implementation(ctx, run_debug, state)
@@ -0,0 +1,134 @@
1
+ """Get detailed information about any dbt resource.
2
+
3
+ This module implements the get_resource_info tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ name: str,
22
+ resource_type: str | None,
23
+ include_database_schema: bool,
24
+ include_compiled_sql: bool,
25
+ state: DbtCoreServerContext,
26
+ force_parse: bool = True,
27
+ ) -> dict[str, Any]:
28
+ """Implementation function for get_resource_info tool.
29
+
30
+ Separated for testing purposes - tests call this directly with explicit state.
31
+ The @tool() decorated get_resource_info() function calls this with injected dependencies.
32
+ """
33
+ # Initialize state if needed (metadata tool uses force_parse=True)
34
+ await state.ensure_initialized(ctx, force_parse)
35
+
36
+ try:
37
+ # Get resource info with manifest method (handles basic enrichment)
38
+ result = state.manifest.get_resource_info( # type: ignore
39
+ name,
40
+ resource_type,
41
+ include_database_schema=False, # We'll handle this below for database schema
42
+ include_compiled_sql=include_compiled_sql,
43
+ )
44
+
45
+ # Handle multiple matches case
46
+ if result.get("multiple_matches"):
47
+ # Enrich each match with database schema if requested
48
+ if include_database_schema:
49
+ matches = result.get("matches", [])
50
+ for match in matches:
51
+ node_type = match.get("resource_type")
52
+ if node_type in ("model", "seed", "snapshot", "source"):
53
+ resource_name = match.get("name")
54
+ source_name = match.get("source_name") if node_type == "source" else None
55
+ schema = await state.get_table_schema_from_db(resource_name, source_name)
56
+ if schema:
57
+ match["database_columns"] = schema
58
+ return result
59
+
60
+ # Single match - check if we need to trigger compilation
61
+ node_type = result.get("resource_type")
62
+
63
+ if include_compiled_sql and node_type == "model":
64
+ # If compiled SQL requested but not available, trigger compilation
65
+ if result.get("compiled_sql") is None and not result.get("compiled_sql_cached"):
66
+ logger.info(f"Compiling model: {name}")
67
+ runner = await state.get_runner()
68
+ compile_result = await runner.invoke_compile(name, force=False) # type: ignore
69
+
70
+ if compile_result.success:
71
+ # Reload manifest to get compiled code
72
+ await state.manifest.load() # type: ignore
73
+ # Re-fetch the resource to get updated compiled_code
74
+ result = state.manifest.get_resource_info( # type: ignore
75
+ name,
76
+ resource_type,
77
+ include_database_schema=False,
78
+ include_compiled_sql=True,
79
+ )
80
+
81
+ # Query database schema for applicable resource types (ref/source aware)
82
+ if include_database_schema and node_type in ("model", "seed", "snapshot", "source"):
83
+ resource_name = result.get("name", name)
84
+ # For sources, pass source_name to use source() instead of ref()
85
+ source_name = result.get("source_name") if node_type == "source" else None
86
+ schema = await state.get_table_schema_from_db(resource_name, source_name)
87
+ if schema:
88
+ result["database_columns"] = schema
89
+
90
+ return result
91
+
92
+ except ValueError as e:
93
+ raise ValueError(f"Resource not found: {e}")
94
+
95
+
96
+ @dbtTool()
97
+ async def get_resource_info(
98
+ ctx: Context,
99
+ name: str,
100
+ resource_type: str | None = None,
101
+ include_database_schema: bool = True,
102
+ include_compiled_sql: bool = True,
103
+ state: DbtCoreServerContext = Depends(get_state),
104
+ ) -> dict[str, Any]:
105
+ """Get detailed information about any dbt resource (model, source, seed, snapshot, test, etc.).
106
+
107
+ This unified tool works across all resource types, auto-detecting the resource or filtering by type.
108
+ Designed for LLM consumption - returns complete data even when multiple matches exist.
109
+
110
+ Args:
111
+ name: Resource name. For sources, use "source_name.table_name" or just "table_name"
112
+ resource_type: Optional filter to narrow search:
113
+ - "model": Data transformation models
114
+ - "source": External data sources
115
+ - "seed": CSV reference data files
116
+ - "snapshot": SCD Type 2 historical tables
117
+ - "test": Data quality tests
118
+ - "analysis": Ad-hoc analysis queries
119
+ - None: Auto-detect (searches all types)
120
+ include_database_schema: If True (default), query actual database table schema
121
+ for models/seeds/snapshots/sources and add as 'database_columns' field
122
+ include_compiled_sql: If True (default), include compiled SQL with Jinja resolved
123
+ ({{ ref() }}, {{ source() }} → actual table names). Only applicable to models.
124
+ Will trigger dbt compile if not already compiled. Set to False to skip compilation.
125
+ state: Shared state object injected by FastMCP
126
+
127
+ Returns:
128
+ Resource information dictionary. If multiple matches found, returns:
129
+ {"multiple_matches": True, "matches": [...], "message": "..."}
130
+
131
+ Raises:
132
+ ValueError: If resource not found
133
+ """
134
+ return await _implementation(ctx, name, resource_type, include_database_schema, include_compiled_sql, state)
@@ -0,0 +1,102 @@
1
+ """Install dbt packages defined in packages.yml.
2
+
3
+ This module implements the install_deps tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ state: DbtCoreServerContext,
22
+ ) -> dict[str, Any]:
23
+ """Implementation function for install_deps tool.
24
+
25
+ Separated for testing purposes - tests call this directly with explicit state.
26
+ The @tool() decorated install_deps() function calls this with injected dependencies.
27
+ """ # Ensure dbt components are initialized
28
+ await state.ensure_initialized(ctx, force_parse=False)
29
+ # Execute dbt deps
30
+ logger.info("Running dbt deps to install packages")
31
+
32
+ runner = await state.get_runner()
33
+ result = await runner.invoke(["deps"])
34
+
35
+ if not result.success:
36
+ # Bubble up installer failure with context
37
+ raise RuntimeError(f"dbt deps failed: {result.exception}")
38
+
39
+ # Parse installed packages from manifest
40
+ installed_packages: set[str] = set()
41
+
42
+ assert state.manifest is not None
43
+ manifest_dict = state.manifest.get_manifest_dict()
44
+ macros = manifest_dict.get("macros", {})
45
+ project_name = manifest_dict.get("metadata", {}).get("project_name", "")
46
+
47
+ for unique_id in macros:
48
+ # macro.package_name.macro_name format
49
+ if unique_id.startswith("macro."):
50
+ parts = unique_id.split(".")
51
+ if len(parts) >= 2:
52
+ package_name = parts[1]
53
+ # Exclude built-in dbt package and project package
54
+ if package_name != "dbt" and package_name != project_name:
55
+ installed_packages.add(package_name)
56
+
57
+ return {
58
+ "status": "success",
59
+ "command": "dbt deps",
60
+ "installed_packages": sorted(installed_packages),
61
+ "message": f"Successfully installed {len(installed_packages)} package(s)",
62
+ }
63
+
64
+
65
+ @dbtTool()
66
+ async def install_deps(
67
+ ctx: Context,
68
+ state: DbtCoreServerContext = Depends(get_state),
69
+ ) -> dict[str, Any]:
70
+ """Install dbt packages defined in packages.yml.
71
+
72
+ This tool enables interactive workflow where an LLM can:
73
+ 1. Suggest using a dbt package (e.g., dbt_utils)
74
+ 2. Edit packages.yml to add the package
75
+ 3. Run install_deps() to install it
76
+ 4. Write code that uses the package's macros
77
+
78
+ This completes the recommendation workflow without breaking conversation flow.
79
+
80
+ **When to use**:
81
+ - After adding/modifying packages.yml
82
+ - Before using macros from external packages
83
+ - When setting up a new dbt project
84
+
85
+ **Package Discovery**:
86
+ After installation, use list_resources(resource_type="macro") to verify
87
+ installed packages and discover available macros.
88
+
89
+ Returns:
90
+ Installation results with status and installed packages
91
+
92
+ Example workflow:
93
+ User: "Create a date dimension table"
94
+ LLM: 1. Checks: list_resources(type="macro") -> no dbt_utils
95
+ 2. Edits: packages.yml (adds dbt_utils package)
96
+ 3. Runs: install_deps() (installs package)
97
+ 4. Creates: models/date_dim.sql (uses dbt_utils.date_spine)
98
+
99
+ Note: This is an interactive development tool, not infrastructure automation.
100
+ It enables the LLM to act on its own recommendations mid-conversation.
101
+ """
102
+ return await _implementation(ctx, state)
@@ -0,0 +1,84 @@
1
+ """List all resources in the dbt project.
2
+
3
+ This module implements the list_resources tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ resource_type: str | None,
22
+ state: DbtCoreServerContext,
23
+ force_parse: bool = True,
24
+ ) -> list[dict[str, Any]]:
25
+ """Implementation function for list_resources tool.
26
+
27
+ Separated for testing purposes - tests call this directly with explicit state.
28
+ The @tool() decorated list_resources() function calls this with injected dependencies.
29
+ """
30
+ # Initialize state if needed (metadata tool uses force_parse=True)
31
+ await state.ensure_initialized(ctx, force_parse)
32
+
33
+ # Return simplified manifest resources (LLM-friendly structure)
34
+ return state.manifest.get_resources(resource_type) # type: ignore
35
+
36
+
37
+ @dbtTool()
38
+ async def list_resources(
39
+ ctx: Context,
40
+ resource_type: str | None = None,
41
+ state: DbtCoreServerContext = Depends(get_state),
42
+ ) -> list[dict[str, Any]]:
43
+ """List all resources in the dbt project with optional filtering by type.
44
+
45
+ This unified tool provides a consistent view across all dbt resource types.
46
+ Returns simplified resource information optimized for LLM consumption.
47
+
48
+ Args:
49
+ resource_type: Optional filter to narrow results:
50
+ - "model": Data transformation models
51
+ - "source": External data sources
52
+ - "seed": CSV reference data files
53
+ - "snapshot": SCD Type 2 historical tables
54
+ - "test": Data quality tests
55
+ - "analysis": Ad-hoc analysis queries
56
+ - "macro": Jinja macros (includes macros from installed packages)
57
+ - None: Return all resources (default)
58
+
59
+ Returns:
60
+ List of resource dictionaries with consistent structure across types.
61
+ Each resource includes: name, unique_id, resource_type, description, tags, etc.
62
+
63
+ Package Discovery:
64
+ Use resource_type="macro" to discover installed dbt packages.
65
+ Macros follow the naming pattern: macro.{package_name}.{macro_name}
66
+
67
+ Example - Check if dbt_utils is installed:
68
+ macros = list_resources("macro")
69
+ has_dbt_utils = any(m["unique_id"].startswith("macro.dbt_utils.") for m in macros)
70
+
71
+ Example - List all installed packages:
72
+ macros = list_resources("macro")
73
+ packages = {m["unique_id"].split(".")[1] for m in macros
74
+ if m["unique_id"].startswith("macro.") and
75
+ m["unique_id"].split(".")[1] != "dbt"}
76
+
77
+ Examples:
78
+ list_resources() -> all resources
79
+ list_resources("model") -> only models
80
+ list_resources("source") -> only sources
81
+ list_resources("test") -> only tests
82
+ list_resources("macro") -> all macros (discover installed packages)
83
+ """
84
+ return await _implementation(ctx, resource_type, state)
@@ -0,0 +1,179 @@
1
+ """Load seed data (CSV files) from seeds/ directory into database tables.
2
+
3
+ This module implements the load_seeds tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ select: str | None,
22
+ exclude: str | None,
23
+ select_state_modified: bool,
24
+ select_state_modified_plus_downstream: bool,
25
+ full_refresh: bool,
26
+ show: bool,
27
+ state: DbtCoreServerContext,
28
+ ) -> dict[str, Any]:
29
+ """Implementation function for load_seeds tool.
30
+
31
+ Separated for testing purposes - tests call this directly with explicit state.
32
+ The @tool() decorated load_seeds() function calls this with injected dependencies.
33
+ """
34
+ # Ensure dbt components are initialized
35
+ await state.ensure_initialized(ctx, force_parse=False)
36
+
37
+ # Build selector (state-based when available)
38
+ selector = await state.prepare_state_based_selection(select_state_modified, select_state_modified_plus_downstream, select)
39
+
40
+ if select_state_modified and not selector:
41
+ raise RuntimeError("No previous state found - cannot determine modifications. Run 'dbt seed' first to create baseline state.")
42
+
43
+ # Construct dbt CLI args for seed
44
+ args = ["seed"]
45
+
46
+ if selector:
47
+ target_path = state.get_project_paths().get("target-path", "target")
48
+ args.extend(["-s", selector, "--state", f"{target_path}/state_last_run"])
49
+ elif select:
50
+ args.extend(["-s", select])
51
+
52
+ if exclude:
53
+ args.extend(["--exclude", exclude])
54
+
55
+ if full_refresh:
56
+ args.append("--full-refresh")
57
+
58
+ if show:
59
+ args.append("--show")
60
+
61
+ logger.info(f"Running DBT seed with args: {args}")
62
+
63
+ # Stream progress back to MCP client (if provided)
64
+ async def progress_callback(current: int, total: int, message: str) -> None:
65
+ if ctx:
66
+ await ctx.report_progress(progress=current, total=total, message=message)
67
+
68
+ # Clear stale run_results so we parse only fresh output
69
+ state.clear_stale_run_results()
70
+
71
+ runner = await state.get_runner()
72
+ result = await runner.invoke(args, progress_callback=progress_callback if ctx else None) # type: ignore
73
+
74
+ run_results = state.validate_and_parse_results(result, "seed")
75
+
76
+ if result.success:
77
+ await state.save_execution_state()
78
+
79
+ run_results = state.parse_run_results()
80
+
81
+ if ctx:
82
+ if run_results.get("results"):
83
+ results_list = run_results["results"]
84
+ total = len(results_list)
85
+ passed_count = sum(1 for r in results_list if r.get("status") == "success")
86
+ failed_count = sum(1 for r in results_list if r.get("status") in ("error", "fail"))
87
+
88
+ parts = []
89
+ if passed_count > 0:
90
+ parts.append(f"✅ {passed_count} passed" if failed_count > 0 else "✅ All passed")
91
+ if failed_count > 0:
92
+ parts.append(f"❌ {failed_count} failed")
93
+
94
+ summary = f"Seed: {total}/{total} seeds completed ({', '.join(parts)})"
95
+ await ctx.report_progress(progress=total, total=total, message=summary)
96
+ else:
97
+ await ctx.report_progress(progress=0, total=0, message="0 seeds matched selector")
98
+
99
+ if not run_results.get("results"):
100
+ raise RuntimeError(f"No seeds matched selector: {select or selector or 'all'}")
101
+
102
+ return {
103
+ "status": "success",
104
+ "command": " ".join(args),
105
+ "results": run_results.get("results", []),
106
+ "elapsed_time": run_results.get("elapsed_time"),
107
+ }
108
+
109
+
110
+ @dbtTool()
111
+ async def load_seeds(
112
+ ctx: Context,
113
+ select: str | None = None,
114
+ exclude: str | None = None,
115
+ select_state_modified: bool = False,
116
+ select_state_modified_plus_downstream: bool = False,
117
+ full_refresh: bool = False,
118
+ show: bool = False,
119
+ state: DbtCoreServerContext = Depends(get_state),
120
+ ) -> dict[str, Any]:
121
+ """Load seed data (CSV files) from seeds/ directory into database tables.
122
+
123
+ **When to use**: Run this before building models or tests that depend on reference data.
124
+ Seeds must be loaded before models that reference them can execute.
125
+
126
+ **What are seeds**: CSV files containing static reference data (country codes,
127
+ product categories, lookup tables, etc.). Unlike models (which are .sql files),
128
+ seeds are CSV files that are loaded directly into database tables.
129
+
130
+ State-based selection modes (detects changed CSV files):
131
+ - select_state_modified: Load only seeds modified since last successful run (state:modified)
132
+ - select_state_modified_plus_downstream: Load modified + downstream dependencies (state:modified+)
133
+ Note: Requires select_state_modified=True
134
+
135
+ Manual selection (alternative to state-based):
136
+ - select: dbt selector syntax (e.g., "raw_customers", "tag:lookup")
137
+ - exclude: Exclude specific seeds
138
+
139
+ Important: Change detection for seeds works via file hash comparison:
140
+ - Seeds < 1 MiB: Content hash is compared (recommended)
141
+ - Seeds >= 1 MiB: Only file path changes are detected (content changes ignored)
142
+ For large seeds, use manual selection or run all seeds.
143
+
144
+ Args:
145
+ select: Manual selector for seeds
146
+ exclude: Exclude selector
147
+ select_state_modified: Use state:modified selector (changed seeds only)
148
+ select_state_modified_plus_downstream: Extend to state:modified+ (changed + downstream)
149
+ full_refresh: Truncate and reload seed tables (default behavior)
150
+ show: Show preview of loaded data
151
+ state: Shared state object injected by FastMCP
152
+
153
+ Returns:
154
+ Seed results with status and loaded seed info
155
+
156
+ See also:
157
+ - run_models(): Execute .sql model files (not CSV seeds)
158
+ - build_models(): Runs both seeds and models together in DAG order
159
+ - test_models(): Run tests (requires seeds to be loaded first if tests reference them)
160
+
161
+ Examples:
162
+ # Before running tests that depend on reference data
163
+ load_seeds()
164
+ test_models(select="test_customer_country_code")
165
+
166
+ # After adding a new CSV lookup table
167
+ load_seeds(select="new_product_categories")
168
+
169
+ # Fix "relation does not exist" errors from models referencing seeds
170
+ load_seeds() # Load missing seed tables first
171
+ run_models(select="stg_orders")
172
+
173
+ # Incremental workflow: only reload what changed
174
+ load_seeds(select_state_modified=True)
175
+
176
+ # Full refresh of a specific seed
177
+ load_seeds(select="country_codes", full_refresh=True)
178
+ """
179
+ return await _implementation(ctx, select, exclude, select_state_modified, select_state_modified_plus_downstream, full_refresh, show, state)