iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_core_mcp/__init__.py +18 -0
- dbt_core_mcp/__main__.py +436 -0
- dbt_core_mcp/context.py +459 -0
- dbt_core_mcp/cte_generator.py +601 -0
- dbt_core_mcp/dbt/__init__.py +1 -0
- dbt_core_mcp/dbt/bridge_runner.py +1361 -0
- dbt_core_mcp/dbt/manifest.py +781 -0
- dbt_core_mcp/dbt/runner.py +67 -0
- dbt_core_mcp/dependencies.py +50 -0
- dbt_core_mcp/server.py +381 -0
- dbt_core_mcp/tools/__init__.py +77 -0
- dbt_core_mcp/tools/analyze_impact.py +78 -0
- dbt_core_mcp/tools/build_models.py +190 -0
- dbt_core_mcp/tools/demo/__init__.py +1 -0
- dbt_core_mcp/tools/demo/hello.html +267 -0
- dbt_core_mcp/tools/demo/ui_demo.py +41 -0
- dbt_core_mcp/tools/get_column_lineage.py +1988 -0
- dbt_core_mcp/tools/get_lineage.py +89 -0
- dbt_core_mcp/tools/get_project_info.py +96 -0
- dbt_core_mcp/tools/get_resource_info.py +134 -0
- dbt_core_mcp/tools/install_deps.py +102 -0
- dbt_core_mcp/tools/list_resources.py +84 -0
- dbt_core_mcp/tools/load_seeds.py +179 -0
- dbt_core_mcp/tools/query_database.py +459 -0
- dbt_core_mcp/tools/run_models.py +234 -0
- dbt_core_mcp/tools/snapshot_models.py +120 -0
- dbt_core_mcp/tools/test_models.py +238 -0
- dbt_core_mcp/utils/__init__.py +1 -0
- dbt_core_mcp/utils/env_detector.py +186 -0
- dbt_core_mcp/utils/process_check.py +130 -0
- dbt_core_mcp/utils/tool_utils.py +411 -0
- dbt_core_mcp/utils/warehouse_adapter.py +82 -0
- dbt_core_mcp/utils/warehouse_databricks.py +297 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Get lineage (dependency tree) for any dbt resource.
|
|
2
|
+
|
|
3
|
+
This module implements the get_lineage tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
name: str,
|
|
22
|
+
resource_type: str | None,
|
|
23
|
+
direction: str,
|
|
24
|
+
depth: int | None,
|
|
25
|
+
state: DbtCoreServerContext,
|
|
26
|
+
force_parse: bool = True,
|
|
27
|
+
) -> dict[str, Any]:
|
|
28
|
+
"""Implementation function for get_lineage tool.
|
|
29
|
+
|
|
30
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
31
|
+
The @tool() decorated get_lineage() function calls this with injected dependencies.
|
|
32
|
+
"""
|
|
33
|
+
# Initialize state if needed (metadata tool uses force_parse=True)
|
|
34
|
+
await state.ensure_initialized(ctx, force_parse)
|
|
35
|
+
|
|
36
|
+
# Delegate to manifest helper for lineage traversal
|
|
37
|
+
try:
|
|
38
|
+
return state.manifest.get_lineage(name, resource_type, direction, depth) # type: ignore
|
|
39
|
+
except ValueError as e:
|
|
40
|
+
raise ValueError(f"Lineage error: {e}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dbtTool()
|
|
44
|
+
async def get_lineage(
|
|
45
|
+
ctx: Context,
|
|
46
|
+
name: str,
|
|
47
|
+
resource_type: str | None = None,
|
|
48
|
+
direction: str = "both",
|
|
49
|
+
depth: int | None = None,
|
|
50
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
51
|
+
) -> dict[str, Any]:
|
|
52
|
+
"""Get lineage (dependency tree) for any dbt resource with auto-detection.
|
|
53
|
+
|
|
54
|
+
This unified tool works across all resource types (models, sources, seeds, snapshots, etc.)
|
|
55
|
+
showing upstream and/or downstream dependencies with configurable depth.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
name: Resource name. For sources, use "source_name.table_name" or just "table_name"
|
|
59
|
+
Examples: "customers", "jaffle_shop.orders", "raw_customers"
|
|
60
|
+
resource_type: Optional filter to narrow search:
|
|
61
|
+
- "model": Data transformation models
|
|
62
|
+
- "source": External data sources
|
|
63
|
+
- "seed": CSV reference data files
|
|
64
|
+
- "snapshot": SCD Type 2 historical tables
|
|
65
|
+
- "test": Data quality tests
|
|
66
|
+
- "analysis": Ad-hoc analysis queries
|
|
67
|
+
- None: Auto-detect (searches all types)
|
|
68
|
+
direction: Lineage direction:
|
|
69
|
+
- "upstream": Show where data comes from (parents)
|
|
70
|
+
- "downstream": Show what depends on this resource (children)
|
|
71
|
+
- "both": Show full lineage (default)
|
|
72
|
+
depth: Maximum levels to traverse (None for unlimited)
|
|
73
|
+
- depth=1: Immediate dependencies only
|
|
74
|
+
- depth=2: Dependencies + their dependencies
|
|
75
|
+
- None: Full dependency tree
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Lineage information with upstream/downstream nodes and statistics.
|
|
79
|
+
If multiple matches found, returns all matches for LLM to process.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If resource not found or invalid direction
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
get_lineage("customers") -> auto-detect and show full lineage
|
|
86
|
+
get_lineage("customers", "model", "upstream") -> where customers model gets data
|
|
87
|
+
get_lineage("jaffle_shop.orders", "source", "downstream", 2) -> 2 levels of dependents
|
|
88
|
+
"""
|
|
89
|
+
return await _implementation(ctx, name, resource_type, direction, depth, state)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Get information about the dbt project with optional diagnostics.
|
|
2
|
+
|
|
3
|
+
This module implements the get_project_info tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
run_debug: bool,
|
|
22
|
+
state: DbtCoreServerContext,
|
|
23
|
+
force_parse: bool = True,
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
"""Implementation function for get_project_info tool.
|
|
26
|
+
|
|
27
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
28
|
+
The @tool() decorated get_project_info() function calls this with injected dependencies.
|
|
29
|
+
"""
|
|
30
|
+
# Initialize state if needed (metadata tool uses force_parse=True)
|
|
31
|
+
await state.ensure_initialized(ctx, force_parse)
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
# Collect manifest metadata for quick status check
|
|
35
|
+
info = state.manifest.get_project_info() # type: ignore
|
|
36
|
+
info["project_dir"] = str(state.project_dir)
|
|
37
|
+
info["profiles_dir"] = state.profiles_dir
|
|
38
|
+
info["status"] = "ready"
|
|
39
|
+
|
|
40
|
+
# Add dbt-core-mcp version
|
|
41
|
+
from .. import __version__
|
|
42
|
+
|
|
43
|
+
info["dbt_core_mcp_version"] = __version__
|
|
44
|
+
|
|
45
|
+
# Optionally run full dbt debug for connectivity diagnostics
|
|
46
|
+
if run_debug:
|
|
47
|
+
runner = await state.get_runner()
|
|
48
|
+
debug_result_obj = await runner.invoke(["debug"]) # type: ignore
|
|
49
|
+
|
|
50
|
+
# Convert DbtRunnerResult to dictionary
|
|
51
|
+
debug_result = {
|
|
52
|
+
"success": debug_result_obj.success,
|
|
53
|
+
"output": debug_result_obj.stdout if debug_result_obj.stdout else "",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Normalize debug output into structured diagnostics
|
|
57
|
+
diagnostics: dict[str, Any] = {
|
|
58
|
+
"command_run": "dbt debug",
|
|
59
|
+
"success": debug_result.get("success", False),
|
|
60
|
+
"output": debug_result.get("output", ""),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Extract connection status from output
|
|
64
|
+
output = str(debug_result.get("output", ""))
|
|
65
|
+
if "Connection test: [OK connection ok]" in output or "Connection test: OK" in output:
|
|
66
|
+
diagnostics["connection_status"] = "ok"
|
|
67
|
+
elif "Connection test: [ERROR" in output or "Connection test: FAIL" in output:
|
|
68
|
+
diagnostics["connection_status"] = "failed"
|
|
69
|
+
else:
|
|
70
|
+
diagnostics["connection_status"] = "unknown"
|
|
71
|
+
|
|
72
|
+
info["diagnostics"] = diagnostics
|
|
73
|
+
|
|
74
|
+
return info
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise ValueError(f"Failed to get project info: {e}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dbtTool()
|
|
81
|
+
async def get_project_info(
|
|
82
|
+
ctx: Context,
|
|
83
|
+
run_debug: bool = True,
|
|
84
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
|
+
"""Get information about the dbt project with optional diagnostics.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
ctx: MCP context (provided by FastMCP)
|
|
90
|
+
run_debug: Run `dbt debug` to validate environment and test connection (default: True)
|
|
91
|
+
state: Shared state object injected by FastMCP
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Dictionary with project information and diagnostic results
|
|
95
|
+
"""
|
|
96
|
+
return await _implementation(ctx, run_debug, state)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Get detailed information about any dbt resource.
|
|
2
|
+
|
|
3
|
+
This module implements the get_resource_info tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
name: str,
|
|
22
|
+
resource_type: str | None,
|
|
23
|
+
include_database_schema: bool,
|
|
24
|
+
include_compiled_sql: bool,
|
|
25
|
+
state: DbtCoreServerContext,
|
|
26
|
+
force_parse: bool = True,
|
|
27
|
+
) -> dict[str, Any]:
|
|
28
|
+
"""Implementation function for get_resource_info tool.
|
|
29
|
+
|
|
30
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
31
|
+
The @tool() decorated get_resource_info() function calls this with injected dependencies.
|
|
32
|
+
"""
|
|
33
|
+
# Initialize state if needed (metadata tool uses force_parse=True)
|
|
34
|
+
await state.ensure_initialized(ctx, force_parse)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Get resource info with manifest method (handles basic enrichment)
|
|
38
|
+
result = state.manifest.get_resource_info( # type: ignore
|
|
39
|
+
name,
|
|
40
|
+
resource_type,
|
|
41
|
+
include_database_schema=False, # We'll handle this below for database schema
|
|
42
|
+
include_compiled_sql=include_compiled_sql,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Handle multiple matches case
|
|
46
|
+
if result.get("multiple_matches"):
|
|
47
|
+
# Enrich each match with database schema if requested
|
|
48
|
+
if include_database_schema:
|
|
49
|
+
matches = result.get("matches", [])
|
|
50
|
+
for match in matches:
|
|
51
|
+
node_type = match.get("resource_type")
|
|
52
|
+
if node_type in ("model", "seed", "snapshot", "source"):
|
|
53
|
+
resource_name = match.get("name")
|
|
54
|
+
source_name = match.get("source_name") if node_type == "source" else None
|
|
55
|
+
schema = await state.get_table_schema_from_db(resource_name, source_name)
|
|
56
|
+
if schema:
|
|
57
|
+
match["database_columns"] = schema
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
# Single match - check if we need to trigger compilation
|
|
61
|
+
node_type = result.get("resource_type")
|
|
62
|
+
|
|
63
|
+
if include_compiled_sql and node_type == "model":
|
|
64
|
+
# If compiled SQL requested but not available, trigger compilation
|
|
65
|
+
if result.get("compiled_sql") is None and not result.get("compiled_sql_cached"):
|
|
66
|
+
logger.info(f"Compiling model: {name}")
|
|
67
|
+
runner = await state.get_runner()
|
|
68
|
+
compile_result = await runner.invoke_compile(name, force=False) # type: ignore
|
|
69
|
+
|
|
70
|
+
if compile_result.success:
|
|
71
|
+
# Reload manifest to get compiled code
|
|
72
|
+
await state.manifest.load() # type: ignore
|
|
73
|
+
# Re-fetch the resource to get updated compiled_code
|
|
74
|
+
result = state.manifest.get_resource_info( # type: ignore
|
|
75
|
+
name,
|
|
76
|
+
resource_type,
|
|
77
|
+
include_database_schema=False,
|
|
78
|
+
include_compiled_sql=True,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Query database schema for applicable resource types (ref/source aware)
|
|
82
|
+
if include_database_schema and node_type in ("model", "seed", "snapshot", "source"):
|
|
83
|
+
resource_name = result.get("name", name)
|
|
84
|
+
# For sources, pass source_name to use source() instead of ref()
|
|
85
|
+
source_name = result.get("source_name") if node_type == "source" else None
|
|
86
|
+
schema = await state.get_table_schema_from_db(resource_name, source_name)
|
|
87
|
+
if schema:
|
|
88
|
+
result["database_columns"] = schema
|
|
89
|
+
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
except ValueError as e:
|
|
93
|
+
raise ValueError(f"Resource not found: {e}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dbtTool()
|
|
97
|
+
async def get_resource_info(
|
|
98
|
+
ctx: Context,
|
|
99
|
+
name: str,
|
|
100
|
+
resource_type: str | None = None,
|
|
101
|
+
include_database_schema: bool = True,
|
|
102
|
+
include_compiled_sql: bool = True,
|
|
103
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
104
|
+
) -> dict[str, Any]:
|
|
105
|
+
"""Get detailed information about any dbt resource (model, source, seed, snapshot, test, etc.).
|
|
106
|
+
|
|
107
|
+
This unified tool works across all resource types, auto-detecting the resource or filtering by type.
|
|
108
|
+
Designed for LLM consumption - returns complete data even when multiple matches exist.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
name: Resource name. For sources, use "source_name.table_name" or just "table_name"
|
|
112
|
+
resource_type: Optional filter to narrow search:
|
|
113
|
+
- "model": Data transformation models
|
|
114
|
+
- "source": External data sources
|
|
115
|
+
- "seed": CSV reference data files
|
|
116
|
+
- "snapshot": SCD Type 2 historical tables
|
|
117
|
+
- "test": Data quality tests
|
|
118
|
+
- "analysis": Ad-hoc analysis queries
|
|
119
|
+
- None: Auto-detect (searches all types)
|
|
120
|
+
include_database_schema: If True (default), query actual database table schema
|
|
121
|
+
for models/seeds/snapshots/sources and add as 'database_columns' field
|
|
122
|
+
include_compiled_sql: If True (default), include compiled SQL with Jinja resolved
|
|
123
|
+
({{ ref() }}, {{ source() }} → actual table names). Only applicable to models.
|
|
124
|
+
Will trigger dbt compile if not already compiled. Set to False to skip compilation.
|
|
125
|
+
state: Shared state object injected by FastMCP
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Resource information dictionary. If multiple matches found, returns:
|
|
129
|
+
{"multiple_matches": True, "matches": [...], "message": "..."}
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
ValueError: If resource not found
|
|
133
|
+
"""
|
|
134
|
+
return await _implementation(ctx, name, resource_type, include_database_schema, include_compiled_sql, state)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Install dbt packages defined in packages.yml.
|
|
2
|
+
|
|
3
|
+
This module implements the install_deps tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
state: DbtCoreServerContext,
|
|
22
|
+
) -> dict[str, Any]:
|
|
23
|
+
"""Implementation function for install_deps tool.
|
|
24
|
+
|
|
25
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
26
|
+
The @tool() decorated install_deps() function calls this with injected dependencies.
|
|
27
|
+
""" # Ensure dbt components are initialized
|
|
28
|
+
await state.ensure_initialized(ctx, force_parse=False)
|
|
29
|
+
# Execute dbt deps
|
|
30
|
+
logger.info("Running dbt deps to install packages")
|
|
31
|
+
|
|
32
|
+
runner = await state.get_runner()
|
|
33
|
+
result = await runner.invoke(["deps"])
|
|
34
|
+
|
|
35
|
+
if not result.success:
|
|
36
|
+
# Bubble up installer failure with context
|
|
37
|
+
raise RuntimeError(f"dbt deps failed: {result.exception}")
|
|
38
|
+
|
|
39
|
+
# Parse installed packages from manifest
|
|
40
|
+
installed_packages: set[str] = set()
|
|
41
|
+
|
|
42
|
+
assert state.manifest is not None
|
|
43
|
+
manifest_dict = state.manifest.get_manifest_dict()
|
|
44
|
+
macros = manifest_dict.get("macros", {})
|
|
45
|
+
project_name = manifest_dict.get("metadata", {}).get("project_name", "")
|
|
46
|
+
|
|
47
|
+
for unique_id in macros:
|
|
48
|
+
# macro.package_name.macro_name format
|
|
49
|
+
if unique_id.startswith("macro."):
|
|
50
|
+
parts = unique_id.split(".")
|
|
51
|
+
if len(parts) >= 2:
|
|
52
|
+
package_name = parts[1]
|
|
53
|
+
# Exclude built-in dbt package and project package
|
|
54
|
+
if package_name != "dbt" and package_name != project_name:
|
|
55
|
+
installed_packages.add(package_name)
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
"status": "success",
|
|
59
|
+
"command": "dbt deps",
|
|
60
|
+
"installed_packages": sorted(installed_packages),
|
|
61
|
+
"message": f"Successfully installed {len(installed_packages)} package(s)",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dbtTool()
|
|
66
|
+
async def install_deps(
|
|
67
|
+
ctx: Context,
|
|
68
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
"""Install dbt packages defined in packages.yml.
|
|
71
|
+
|
|
72
|
+
This tool enables interactive workflow where an LLM can:
|
|
73
|
+
1. Suggest using a dbt package (e.g., dbt_utils)
|
|
74
|
+
2. Edit packages.yml to add the package
|
|
75
|
+
3. Run install_deps() to install it
|
|
76
|
+
4. Write code that uses the package's macros
|
|
77
|
+
|
|
78
|
+
This completes the recommendation workflow without breaking conversation flow.
|
|
79
|
+
|
|
80
|
+
**When to use**:
|
|
81
|
+
- After adding/modifying packages.yml
|
|
82
|
+
- Before using macros from external packages
|
|
83
|
+
- When setting up a new dbt project
|
|
84
|
+
|
|
85
|
+
**Package Discovery**:
|
|
86
|
+
After installation, use list_resources(resource_type="macro") to verify
|
|
87
|
+
installed packages and discover available macros.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Installation results with status and installed packages
|
|
91
|
+
|
|
92
|
+
Example workflow:
|
|
93
|
+
User: "Create a date dimension table"
|
|
94
|
+
LLM: 1. Checks: list_resources(type="macro") -> no dbt_utils
|
|
95
|
+
2. Edits: packages.yml (adds dbt_utils package)
|
|
96
|
+
3. Runs: install_deps() (installs package)
|
|
97
|
+
4. Creates: models/date_dim.sql (uses dbt_utils.date_spine)
|
|
98
|
+
|
|
99
|
+
Note: This is an interactive development tool, not infrastructure automation.
|
|
100
|
+
It enables the LLM to act on its own recommendations mid-conversation.
|
|
101
|
+
"""
|
|
102
|
+
return await _implementation(ctx, state)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""List all resources in the dbt project.
|
|
2
|
+
|
|
3
|
+
This module implements the list_resources tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
resource_type: str | None,
|
|
22
|
+
state: DbtCoreServerContext,
|
|
23
|
+
force_parse: bool = True,
|
|
24
|
+
) -> list[dict[str, Any]]:
|
|
25
|
+
"""Implementation function for list_resources tool.
|
|
26
|
+
|
|
27
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
28
|
+
The @tool() decorated list_resources() function calls this with injected dependencies.
|
|
29
|
+
"""
|
|
30
|
+
# Initialize state if needed (metadata tool uses force_parse=True)
|
|
31
|
+
await state.ensure_initialized(ctx, force_parse)
|
|
32
|
+
|
|
33
|
+
# Return simplified manifest resources (LLM-friendly structure)
|
|
34
|
+
return state.manifest.get_resources(resource_type) # type: ignore
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dbtTool()
|
|
38
|
+
async def list_resources(
|
|
39
|
+
ctx: Context,
|
|
40
|
+
resource_type: str | None = None,
|
|
41
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
42
|
+
) -> list[dict[str, Any]]:
|
|
43
|
+
"""List all resources in the dbt project with optional filtering by type.
|
|
44
|
+
|
|
45
|
+
This unified tool provides a consistent view across all dbt resource types.
|
|
46
|
+
Returns simplified resource information optimized for LLM consumption.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
resource_type: Optional filter to narrow results:
|
|
50
|
+
- "model": Data transformation models
|
|
51
|
+
- "source": External data sources
|
|
52
|
+
- "seed": CSV reference data files
|
|
53
|
+
- "snapshot": SCD Type 2 historical tables
|
|
54
|
+
- "test": Data quality tests
|
|
55
|
+
- "analysis": Ad-hoc analysis queries
|
|
56
|
+
- "macro": Jinja macros (includes macros from installed packages)
|
|
57
|
+
- None: Return all resources (default)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of resource dictionaries with consistent structure across types.
|
|
61
|
+
Each resource includes: name, unique_id, resource_type, description, tags, etc.
|
|
62
|
+
|
|
63
|
+
Package Discovery:
|
|
64
|
+
Use resource_type="macro" to discover installed dbt packages.
|
|
65
|
+
Macros follow the naming pattern: macro.{package_name}.{macro_name}
|
|
66
|
+
|
|
67
|
+
Example - Check if dbt_utils is installed:
|
|
68
|
+
macros = list_resources("macro")
|
|
69
|
+
has_dbt_utils = any(m["unique_id"].startswith("macro.dbt_utils.") for m in macros)
|
|
70
|
+
|
|
71
|
+
Example - List all installed packages:
|
|
72
|
+
macros = list_resources("macro")
|
|
73
|
+
packages = {m["unique_id"].split(".")[1] for m in macros
|
|
74
|
+
if m["unique_id"].startswith("macro.") and
|
|
75
|
+
m["unique_id"].split(".")[1] != "dbt"}
|
|
76
|
+
|
|
77
|
+
Examples:
|
|
78
|
+
list_resources() -> all resources
|
|
79
|
+
list_resources("model") -> only models
|
|
80
|
+
list_resources("source") -> only sources
|
|
81
|
+
list_resources("test") -> only tests
|
|
82
|
+
list_resources("macro") -> all macros (discover installed packages)
|
|
83
|
+
"""
|
|
84
|
+
return await _implementation(ctx, resource_type, state)
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Load seed data (CSV files) from seeds/ directory into database tables.
|
|
2
|
+
|
|
3
|
+
This module implements the load_seeds tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
select: str | None,
|
|
22
|
+
exclude: str | None,
|
|
23
|
+
select_state_modified: bool,
|
|
24
|
+
select_state_modified_plus_downstream: bool,
|
|
25
|
+
full_refresh: bool,
|
|
26
|
+
show: bool,
|
|
27
|
+
state: DbtCoreServerContext,
|
|
28
|
+
) -> dict[str, Any]:
|
|
29
|
+
"""Implementation function for load_seeds tool.
|
|
30
|
+
|
|
31
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
32
|
+
The @tool() decorated load_seeds() function calls this with injected dependencies.
|
|
33
|
+
"""
|
|
34
|
+
# Ensure dbt components are initialized
|
|
35
|
+
await state.ensure_initialized(ctx, force_parse=False)
|
|
36
|
+
|
|
37
|
+
# Build selector (state-based when available)
|
|
38
|
+
selector = await state.prepare_state_based_selection(select_state_modified, select_state_modified_plus_downstream, select)
|
|
39
|
+
|
|
40
|
+
if select_state_modified and not selector:
|
|
41
|
+
raise RuntimeError("No previous state found - cannot determine modifications. Run 'dbt seed' first to create baseline state.")
|
|
42
|
+
|
|
43
|
+
# Construct dbt CLI args for seed
|
|
44
|
+
args = ["seed"]
|
|
45
|
+
|
|
46
|
+
if selector:
|
|
47
|
+
target_path = state.get_project_paths().get("target-path", "target")
|
|
48
|
+
args.extend(["-s", selector, "--state", f"{target_path}/state_last_run"])
|
|
49
|
+
elif select:
|
|
50
|
+
args.extend(["-s", select])
|
|
51
|
+
|
|
52
|
+
if exclude:
|
|
53
|
+
args.extend(["--exclude", exclude])
|
|
54
|
+
|
|
55
|
+
if full_refresh:
|
|
56
|
+
args.append("--full-refresh")
|
|
57
|
+
|
|
58
|
+
if show:
|
|
59
|
+
args.append("--show")
|
|
60
|
+
|
|
61
|
+
logger.info(f"Running DBT seed with args: {args}")
|
|
62
|
+
|
|
63
|
+
# Stream progress back to MCP client (if provided)
|
|
64
|
+
async def progress_callback(current: int, total: int, message: str) -> None:
|
|
65
|
+
if ctx:
|
|
66
|
+
await ctx.report_progress(progress=current, total=total, message=message)
|
|
67
|
+
|
|
68
|
+
# Clear stale run_results so we parse only fresh output
|
|
69
|
+
state.clear_stale_run_results()
|
|
70
|
+
|
|
71
|
+
runner = await state.get_runner()
|
|
72
|
+
result = await runner.invoke(args, progress_callback=progress_callback if ctx else None) # type: ignore
|
|
73
|
+
|
|
74
|
+
run_results = state.validate_and_parse_results(result, "seed")
|
|
75
|
+
|
|
76
|
+
if result.success:
|
|
77
|
+
await state.save_execution_state()
|
|
78
|
+
|
|
79
|
+
run_results = state.parse_run_results()
|
|
80
|
+
|
|
81
|
+
if ctx:
|
|
82
|
+
if run_results.get("results"):
|
|
83
|
+
results_list = run_results["results"]
|
|
84
|
+
total = len(results_list)
|
|
85
|
+
passed_count = sum(1 for r in results_list if r.get("status") == "success")
|
|
86
|
+
failed_count = sum(1 for r in results_list if r.get("status") in ("error", "fail"))
|
|
87
|
+
|
|
88
|
+
parts = []
|
|
89
|
+
if passed_count > 0:
|
|
90
|
+
parts.append(f"✅ {passed_count} passed" if failed_count > 0 else "✅ All passed")
|
|
91
|
+
if failed_count > 0:
|
|
92
|
+
parts.append(f"❌ {failed_count} failed")
|
|
93
|
+
|
|
94
|
+
summary = f"Seed: {total}/{total} seeds completed ({', '.join(parts)})"
|
|
95
|
+
await ctx.report_progress(progress=total, total=total, message=summary)
|
|
96
|
+
else:
|
|
97
|
+
await ctx.report_progress(progress=0, total=0, message="0 seeds matched selector")
|
|
98
|
+
|
|
99
|
+
if not run_results.get("results"):
|
|
100
|
+
raise RuntimeError(f"No seeds matched selector: {select or selector or 'all'}")
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
"status": "success",
|
|
104
|
+
"command": " ".join(args),
|
|
105
|
+
"results": run_results.get("results", []),
|
|
106
|
+
"elapsed_time": run_results.get("elapsed_time"),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dbtTool()
|
|
111
|
+
async def load_seeds(
|
|
112
|
+
ctx: Context,
|
|
113
|
+
select: str | None = None,
|
|
114
|
+
exclude: str | None = None,
|
|
115
|
+
select_state_modified: bool = False,
|
|
116
|
+
select_state_modified_plus_downstream: bool = False,
|
|
117
|
+
full_refresh: bool = False,
|
|
118
|
+
show: bool = False,
|
|
119
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
120
|
+
) -> dict[str, Any]:
|
|
121
|
+
"""Load seed data (CSV files) from seeds/ directory into database tables.
|
|
122
|
+
|
|
123
|
+
**When to use**: Run this before building models or tests that depend on reference data.
|
|
124
|
+
Seeds must be loaded before models that reference them can execute.
|
|
125
|
+
|
|
126
|
+
**What are seeds**: CSV files containing static reference data (country codes,
|
|
127
|
+
product categories, lookup tables, etc.). Unlike models (which are .sql files),
|
|
128
|
+
seeds are CSV files that are loaded directly into database tables.
|
|
129
|
+
|
|
130
|
+
State-based selection modes (detects changed CSV files):
|
|
131
|
+
- select_state_modified: Load only seeds modified since last successful run (state:modified)
|
|
132
|
+
- select_state_modified_plus_downstream: Load modified + downstream dependencies (state:modified+)
|
|
133
|
+
Note: Requires select_state_modified=True
|
|
134
|
+
|
|
135
|
+
Manual selection (alternative to state-based):
|
|
136
|
+
- select: dbt selector syntax (e.g., "raw_customers", "tag:lookup")
|
|
137
|
+
- exclude: Exclude specific seeds
|
|
138
|
+
|
|
139
|
+
Important: Change detection for seeds works via file hash comparison:
|
|
140
|
+
- Seeds < 1 MiB: Content hash is compared (recommended)
|
|
141
|
+
- Seeds >= 1 MiB: Only file path changes are detected (content changes ignored)
|
|
142
|
+
For large seeds, use manual selection or run all seeds.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
select: Manual selector for seeds
|
|
146
|
+
exclude: Exclude selector
|
|
147
|
+
select_state_modified: Use state:modified selector (changed seeds only)
|
|
148
|
+
select_state_modified_plus_downstream: Extend to state:modified+ (changed + downstream)
|
|
149
|
+
full_refresh: Truncate and reload seed tables (default behavior)
|
|
150
|
+
show: Show preview of loaded data
|
|
151
|
+
state: Shared state object injected by FastMCP
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Seed results with status and loaded seed info
|
|
155
|
+
|
|
156
|
+
See also:
|
|
157
|
+
- run_models(): Execute .sql model files (not CSV seeds)
|
|
158
|
+
- build_models(): Runs both seeds and models together in DAG order
|
|
159
|
+
- test_models(): Run tests (requires seeds to be loaded first if tests reference them)
|
|
160
|
+
|
|
161
|
+
Examples:
|
|
162
|
+
# Before running tests that depend on reference data
|
|
163
|
+
load_seeds()
|
|
164
|
+
test_models(select="test_customer_country_code")
|
|
165
|
+
|
|
166
|
+
# After adding a new CSV lookup table
|
|
167
|
+
load_seeds(select="new_product_categories")
|
|
168
|
+
|
|
169
|
+
# Fix "relation does not exist" errors from models referencing seeds
|
|
170
|
+
load_seeds() # Load missing seed tables first
|
|
171
|
+
run_models(select="stg_orders")
|
|
172
|
+
|
|
173
|
+
# Incremental workflow: only reload what changed
|
|
174
|
+
load_seeds(select_state_modified=True)
|
|
175
|
+
|
|
176
|
+
# Full refresh of a specific seed
|
|
177
|
+
load_seeds(select="country_codes", full_refresh=True)
|
|
178
|
+
"""
|
|
179
|
+
return await _implementation(ctx, select, exclude, select_state_modified, select_state_modified_plus_downstream, full_refresh, show, state)
|