iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_core_mcp/__init__.py +18 -0
- dbt_core_mcp/__main__.py +436 -0
- dbt_core_mcp/context.py +459 -0
- dbt_core_mcp/cte_generator.py +601 -0
- dbt_core_mcp/dbt/__init__.py +1 -0
- dbt_core_mcp/dbt/bridge_runner.py +1361 -0
- dbt_core_mcp/dbt/manifest.py +781 -0
- dbt_core_mcp/dbt/runner.py +67 -0
- dbt_core_mcp/dependencies.py +50 -0
- dbt_core_mcp/server.py +381 -0
- dbt_core_mcp/tools/__init__.py +77 -0
- dbt_core_mcp/tools/analyze_impact.py +78 -0
- dbt_core_mcp/tools/build_models.py +190 -0
- dbt_core_mcp/tools/demo/__init__.py +1 -0
- dbt_core_mcp/tools/demo/hello.html +267 -0
- dbt_core_mcp/tools/demo/ui_demo.py +41 -0
- dbt_core_mcp/tools/get_column_lineage.py +1988 -0
- dbt_core_mcp/tools/get_lineage.py +89 -0
- dbt_core_mcp/tools/get_project_info.py +96 -0
- dbt_core_mcp/tools/get_resource_info.py +134 -0
- dbt_core_mcp/tools/install_deps.py +102 -0
- dbt_core_mcp/tools/list_resources.py +84 -0
- dbt_core_mcp/tools/load_seeds.py +179 -0
- dbt_core_mcp/tools/query_database.py +459 -0
- dbt_core_mcp/tools/run_models.py +234 -0
- dbt_core_mcp/tools/snapshot_models.py +120 -0
- dbt_core_mcp/tools/test_models.py +238 -0
- dbt_core_mcp/utils/__init__.py +1 -0
- dbt_core_mcp/utils/env_detector.py +186 -0
- dbt_core_mcp/utils/process_check.py +130 -0
- dbt_core_mcp/utils/tool_utils.py +411 -0
- dbt_core_mcp/utils/warehouse_adapter.py +82 -0
- dbt_core_mcp/utils/warehouse_databricks.py +297 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DBT Runner Protocol.
|
|
3
|
+
|
|
4
|
+
Defines the interface for running dbt commands, supporting both in-process
|
|
5
|
+
and subprocess execution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Protocol
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DbtRunnerResult:
|
|
13
|
+
"""Result from a dbt command execution."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, success: bool, exception: Exception | None = None, stdout: str = "", stderr: str = "", elapsed_time: float | None = None):
|
|
16
|
+
"""
|
|
17
|
+
Initialize a dbt runner result.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
success: Whether the command succeeded
|
|
21
|
+
exception: Exception if the command failed
|
|
22
|
+
stdout: Standard output from the command
|
|
23
|
+
stderr: Standard error from the command
|
|
24
|
+
elapsed_time: Optional elapsed time in seconds for the command execution
|
|
25
|
+
"""
|
|
26
|
+
self.success = success
|
|
27
|
+
self.exception = exception
|
|
28
|
+
self.stdout = stdout
|
|
29
|
+
self.stderr = stderr
|
|
30
|
+
self.elapsed_time = elapsed_time
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DbtRunner(Protocol):
|
|
34
|
+
"""Protocol for dbt command execution."""
|
|
35
|
+
|
|
36
|
+
def invoke(self, args: list[str]) -> DbtRunnerResult:
|
|
37
|
+
"""
|
|
38
|
+
Execute a dbt command.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
args: dbt command arguments (e.g., ['parse'], ['run', '--select', 'model'])
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Result of the command execution
|
|
45
|
+
"""
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
def get_manifest_path(self) -> Path:
|
|
49
|
+
"""
|
|
50
|
+
Get the path to the manifest.json file.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Path to target/manifest.json
|
|
54
|
+
"""
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
def invoke_query(self, sql: str) -> DbtRunnerResult:
|
|
58
|
+
"""
|
|
59
|
+
Execute a SQL query.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
sql: SQL query to execute (include LIMIT in SQL if needed)
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Result with query output
|
|
66
|
+
"""
|
|
67
|
+
...
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Dependency injection providers for dbt Core MCP tools.
|
|
2
|
+
|
|
3
|
+
This module provides dependency functions that can be used with FastMCP's Depends()
|
|
4
|
+
to inject shared state and context into tool functions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from .context import DbtCoreServerContext
|
|
11
|
+
|
|
12
|
+
# Global state reference - set by server during initialization
|
|
13
|
+
_server_state: "DbtCoreServerContext | None" = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def set_server_state(state: "DbtCoreServerContext") -> None:
|
|
17
|
+
"""Set the global server state reference.
|
|
18
|
+
|
|
19
|
+
Called by DbtCoreMcpServer during initialization.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
state: The DbtCoreServerContext instance to make available to tools
|
|
23
|
+
"""
|
|
24
|
+
global _server_state
|
|
25
|
+
_server_state = state
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_state() -> "DbtCoreServerContext":
|
|
29
|
+
"""Dependency provider for server state.
|
|
30
|
+
|
|
31
|
+
Used with FastMCP's Depends() to inject DbtCoreServerContext into tools.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
The shared DbtCoreServerContext instance
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
RuntimeError: If server state has not been initialized
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
@tool
|
|
41
|
+
async def my_tool(
|
|
42
|
+
ctx: Context,
|
|
43
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
44
|
+
) -> dict:
|
|
45
|
+
# state is automatically injected
|
|
46
|
+
pass
|
|
47
|
+
"""
|
|
48
|
+
if _server_state is None:
|
|
49
|
+
raise RuntimeError("Server state not initialized")
|
|
50
|
+
return _server_state
|
dbt_core_mcp/server.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dbt Core MCP Server Implementation.
|
|
3
|
+
|
|
4
|
+
This server provides tools for interacting with dbt projects via the Model Context Protocol.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
from contextlib import asynccontextmanager
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, AsyncIterator
|
|
13
|
+
from urllib.parse import unquote
|
|
14
|
+
from urllib.request import url2pathname
|
|
15
|
+
|
|
16
|
+
from fastmcp import FastMCP
|
|
17
|
+
from fastmcp.server.context import Context
|
|
18
|
+
from fastmcp.server.middleware.error_handling import ErrorHandlingMiddleware
|
|
19
|
+
from fastmcp.server.middleware.rate_limiting import RateLimitingMiddleware
|
|
20
|
+
|
|
21
|
+
from .context import DbtCoreServerContext
|
|
22
|
+
from .dbt.bridge_runner import BridgeRunner
|
|
23
|
+
from .dbt.manifest import ManifestLoader
|
|
24
|
+
from .dependencies import set_server_state
|
|
25
|
+
|
|
26
|
+
# Import tools for static registration
|
|
27
|
+
from .utils.env_detector import detect_python_command
|
|
28
|
+
|
|
29
|
+
# Re-export DbtCoreServerContext for tools
|
|
30
|
+
__all__ = ["DbtCoreServerContext", "DbtCoreMcpServer", "create_server"]
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DbtCoreMcpServer:
|
|
36
|
+
"""
|
|
37
|
+
dbt Core MCP Server.
|
|
38
|
+
|
|
39
|
+
Provides tools for interacting with dbt projects.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, project_dir: str | None = None, timeout: float | None = None) -> None:
|
|
43
|
+
"""Initialize the server.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
project_dir: Optional path to dbt project directory. If not provided,
|
|
47
|
+
automatically detects from MCP workspace roots or falls back to cwd.
|
|
48
|
+
timeout: Optional timeout in seconds for dbt commands (default: None for no timeout).
|
|
49
|
+
"""
|
|
50
|
+
# FastMCP initialization with recommended arguments
|
|
51
|
+
from . import __version__
|
|
52
|
+
|
|
53
|
+
@asynccontextmanager
|
|
54
|
+
async def lifespan(app: FastMCP) -> AsyncIterator[None]:
|
|
55
|
+
"""Lifespan context manager for server startup and shutdown."""
|
|
56
|
+
# Startup
|
|
57
|
+
logger.info("dbt Core MCP Server starting up...")
|
|
58
|
+
yield
|
|
59
|
+
# Shutdown
|
|
60
|
+
logger.info("dbt Core MCP Server shutting down...")
|
|
61
|
+
if self.runner:
|
|
62
|
+
logger.info("Stopping persistent dbt process...")
|
|
63
|
+
await self.runner.shutdown()
|
|
64
|
+
logger.info("Persistent dbt process stopped successfully")
|
|
65
|
+
logger.info("dbt Core MCP Server shutdown complete")
|
|
66
|
+
|
|
67
|
+
self.app = FastMCP(
|
|
68
|
+
version=__version__,
|
|
69
|
+
name="dbt Core MCP",
|
|
70
|
+
lifespan=lifespan,
|
|
71
|
+
instructions="""dbt Core MCP Server for interacting with dbt projects.
|
|
72
|
+
|
|
73
|
+
This server provides tools to:
|
|
74
|
+
- Query dbt project metadata
|
|
75
|
+
- Run dbt commands
|
|
76
|
+
- Inspect models, sources, and tests
|
|
77
|
+
- View compiled SQL
|
|
78
|
+
- Access dbt documentation
|
|
79
|
+
|
|
80
|
+
Usage:
|
|
81
|
+
- Use the tools to interact with your dbt project
|
|
82
|
+
- Query model lineage and dependencies
|
|
83
|
+
- Run and test dbt models
|
|
84
|
+
|
|
85
|
+
## Subagent Delegation Protocol
|
|
86
|
+
|
|
87
|
+
ALWAYS use #runSubagent for dbt data exploration and analysis. Your context window fills quickly with query results - avoid this by delegating complex workflows to subagents.
|
|
88
|
+
|
|
89
|
+
When to delegate to #runSubagent:
|
|
90
|
+
- Data exploration across multiple models/sources
|
|
91
|
+
- Multi-query analysis workflows
|
|
92
|
+
- Large result sets (>100 rows expected)
|
|
93
|
+
- Impact analysis requiring sample queries
|
|
94
|
+
- Any workflow involving: schema inspection → queries → analysis
|
|
95
|
+
|
|
96
|
+
The subagent handles all the heavy lifting and returns only a concise summary, keeping your main context clean.
|
|
97
|
+
|
|
98
|
+
Single simple queries with known schema: Execute directly using the tools.
|
|
99
|
+
""",
|
|
100
|
+
) # type: ignore[arg-type]
|
|
101
|
+
|
|
102
|
+
# Store the explicit project_dir if provided, otherwise will detect from workspace roots
|
|
103
|
+
_explicit_project_dir = Path(project_dir) if project_dir else None
|
|
104
|
+
project_dir_resolved: Path | None = None
|
|
105
|
+
profiles_dir = os.path.expanduser("~/.dbt")
|
|
106
|
+
|
|
107
|
+
# Parse experimental features flag from environment
|
|
108
|
+
experimental_features = os.getenv("EXPERIMENTAL_FEATURES", "false").lower() == "true"
|
|
109
|
+
|
|
110
|
+
# Create shared state with all dbt components
|
|
111
|
+
self.state = DbtCoreServerContext(
|
|
112
|
+
app=self.app,
|
|
113
|
+
project_dir=project_dir_resolved,
|
|
114
|
+
profiles_dir=profiles_dir,
|
|
115
|
+
timeout=timeout,
|
|
116
|
+
runner=None,
|
|
117
|
+
manifest=None,
|
|
118
|
+
adapter_type=None,
|
|
119
|
+
force_fresh_runner=False, # Set to False to reuse runners for performance
|
|
120
|
+
experimental_features=experimental_features,
|
|
121
|
+
_init_lock=asyncio.Lock(),
|
|
122
|
+
_explicit_project_dir=_explicit_project_dir,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Set back-reference for delegation
|
|
126
|
+
self.state.server = self
|
|
127
|
+
set_server_state(self.state)
|
|
128
|
+
|
|
129
|
+
# Keep references for backward compatibility with existing helper methods
|
|
130
|
+
self.project_dir = project_dir_resolved
|
|
131
|
+
self.profiles_dir = profiles_dir
|
|
132
|
+
self.timeout = timeout
|
|
133
|
+
self._explicit_project_dir = _explicit_project_dir
|
|
134
|
+
self.runner = None
|
|
135
|
+
self.manifest = None
|
|
136
|
+
self.adapter_type = None
|
|
137
|
+
self.force_fresh_runner = False
|
|
138
|
+
self._init_lock = asyncio.Lock()
|
|
139
|
+
|
|
140
|
+
# Add built-in FastMCP middleware (2.11.0)
|
|
141
|
+
self.app.add_middleware(ErrorHandlingMiddleware()) # Handle errors first
|
|
142
|
+
self.app.add_middleware(RateLimitingMiddleware(max_requests_per_second=50))
|
|
143
|
+
# TimingMiddleware and LoggingMiddleware removed - they use structlog with column alignment
|
|
144
|
+
# which causes formatting issues in VS Code's output panel
|
|
145
|
+
|
|
146
|
+
# Register tools dynamically
|
|
147
|
+
self._register_tools()
|
|
148
|
+
# Register resources
|
|
149
|
+
self._register_resources()
|
|
150
|
+
|
|
151
|
+
logger.info("dbt Core MCP Server initialized")
|
|
152
|
+
logger.info(f"Profiles directory: {self.profiles_dir}")
|
|
153
|
+
|
|
154
|
+
# Public wrappers for DbtCoreServerContext to avoid private-member access warnings
|
|
155
|
+
async def ensure_initialized_with_context(self, ctx: Context | None, force_parse: bool = False) -> None:
|
|
156
|
+
await self._ensure_initialized_with_context(ctx, force_parse=force_parse)
|
|
157
|
+
|
|
158
|
+
async def get_runner(self) -> BridgeRunner:
|
|
159
|
+
return await self._get_runner()
|
|
160
|
+
|
|
161
|
+
def _register_tools(self) -> None:
|
|
162
|
+
"""Dynamically register all dbt Core MCP tools."""
|
|
163
|
+
from .tools import discover_tools_in_package, get_tool_metadata
|
|
164
|
+
|
|
165
|
+
tool_functions = discover_tools_in_package("dbt_core_mcp.tools")
|
|
166
|
+
for tool_func in tool_functions:
|
|
167
|
+
metadata = get_tool_metadata(tool_func, default=None)
|
|
168
|
+
if metadata:
|
|
169
|
+
allowed_keys = {
|
|
170
|
+
"name",
|
|
171
|
+
"description",
|
|
172
|
+
"tags",
|
|
173
|
+
"enabled",
|
|
174
|
+
"icons",
|
|
175
|
+
"annotations",
|
|
176
|
+
"meta",
|
|
177
|
+
}
|
|
178
|
+
tool_kwargs = {key: value for key, value in metadata.items() if key in allowed_keys}
|
|
179
|
+
self.app.tool(**tool_kwargs)(tool_func)
|
|
180
|
+
logger.info("Registered tool metadata for %s: %s", tool_func.__name__, metadata)
|
|
181
|
+
else:
|
|
182
|
+
self.app.tool()(tool_func)
|
|
183
|
+
|
|
184
|
+
def _register_resources(self) -> None:
|
|
185
|
+
demo_html_path = Path(__file__).resolve().parent / "tools" / "demo" / "hello.html"
|
|
186
|
+
|
|
187
|
+
# Register MCP UI resource for demo_ui tool.
|
|
188
|
+
# It is CRITICAL for vscode that it is resource:// and not ui://
|
|
189
|
+
@self.app.resource(
|
|
190
|
+
uri="resource://demo/hello",
|
|
191
|
+
name="Demo UI",
|
|
192
|
+
mime_type="text/html;profile=mcp-app",
|
|
193
|
+
)
|
|
194
|
+
def demo_ui() -> str:
|
|
195
|
+
logger.info("Serving MCP UI resource: resource://demo/hello")
|
|
196
|
+
return demo_html_path.read_text(encoding="utf-8")
|
|
197
|
+
|
|
198
|
+
# Also register with ui:// for compatibility (VS Code may request either)
|
|
199
|
+
@self.app.resource(
|
|
200
|
+
uri="ui://demo/hello",
|
|
201
|
+
name="Demo UI (Legacy)",
|
|
202
|
+
mime_type="text/html;profile=mcp-app",
|
|
203
|
+
)
|
|
204
|
+
def demo_ui_legacy() -> str:
|
|
205
|
+
logger.info("Serving MCP UI resource: ui://demo/hello")
|
|
206
|
+
return demo_html_path.read_text(encoding="utf-8")
|
|
207
|
+
|
|
208
|
+
# (Removed) Legacy toolImpl_* wrappers: tools are now auto-discovered via FileSystemProvider
|
|
209
|
+
|
|
210
|
+
def _detect_project_dir(self) -> Path:
|
|
211
|
+
"""Detect the dbt project directory.
|
|
212
|
+
|
|
213
|
+
Resolution order:
|
|
214
|
+
1. Use explicit project_dir if provided during initialization
|
|
215
|
+
2. Fall back to current working directory
|
|
216
|
+
|
|
217
|
+
Note: Workspace roots detection happens in _detect_workspace_roots()
|
|
218
|
+
which is called asynchronously from tool contexts.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Path to the dbt project directory
|
|
222
|
+
"""
|
|
223
|
+
# Use explicit project_dir if provided
|
|
224
|
+
if self._explicit_project_dir:
|
|
225
|
+
logger.debug(f"Using explicit project directory: {self._explicit_project_dir}")
|
|
226
|
+
return self._explicit_project_dir
|
|
227
|
+
|
|
228
|
+
# Fall back to current working directory
|
|
229
|
+
cwd = Path.cwd()
|
|
230
|
+
logger.info(f"Using current working directory: {cwd}")
|
|
231
|
+
return cwd
|
|
232
|
+
|
|
233
|
+
async def _detect_workspace_roots(self, ctx: Any) -> Path | None:
|
|
234
|
+
"""Attempt to detect workspace roots from MCP context.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
ctx: FastMCP Context object
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Path to first workspace root, or None if unavailable
|
|
241
|
+
"""
|
|
242
|
+
try:
|
|
243
|
+
if isinstance(ctx, Context):
|
|
244
|
+
roots = await ctx.list_roots()
|
|
245
|
+
if roots:
|
|
246
|
+
# Convert file:// URL to platform-appropriate path
|
|
247
|
+
# First unquote to decode %XX sequences, then url2pathname for platform conversion
|
|
248
|
+
uri_path = roots[0].uri.path if hasattr(roots[0].uri, "path") else str(roots[0].uri)
|
|
249
|
+
if uri_path:
|
|
250
|
+
workspace_root = Path(url2pathname(unquote(uri_path)))
|
|
251
|
+
logger.info(f"Detected workspace root from MCP client: {workspace_root}")
|
|
252
|
+
return workspace_root
|
|
253
|
+
except Exception as e:
|
|
254
|
+
logger.debug(f"Could not access workspace roots: {e}")
|
|
255
|
+
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
async def _get_runner(self) -> BridgeRunner:
|
|
259
|
+
"""Get BridgeRunner instance with explicit control over creation.
|
|
260
|
+
|
|
261
|
+
Uses self.force_fresh_runner to determine behavior:
|
|
262
|
+
- If True, always create a fresh BridgeRunner instance
|
|
263
|
+
- If False, reuse existing runner if available
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
BridgeRunner instance
|
|
267
|
+
"""
|
|
268
|
+
if self.force_fresh_runner or not self.runner:
|
|
269
|
+
if not self.project_dir:
|
|
270
|
+
raise RuntimeError("Project directory not set")
|
|
271
|
+
|
|
272
|
+
# Detect Python command for user's environment
|
|
273
|
+
python_cmd = detect_python_command(self.project_dir)
|
|
274
|
+
logger.info(f"Creating {'fresh' if self.force_fresh_runner else 'new'} BridgeRunner with command: {python_cmd}")
|
|
275
|
+
|
|
276
|
+
# Create bridge runner with persistent process for better performance
|
|
277
|
+
self.runner = BridgeRunner(self.project_dir, python_cmd, timeout=self.timeout, use_persistent_process=True)
|
|
278
|
+
|
|
279
|
+
return self.runner
|
|
280
|
+
|
|
281
|
+
async def _initialize_dbt_components(self, needs_parse: bool = True, force_parse: bool = False) -> None:
|
|
282
|
+
"""Initialize dbt runner and manifest loader.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
needs_parse: Whether to run dbt parse. If False, assumes manifest already exists and is fresh.
|
|
286
|
+
force_parse: If True, force parsing even if manifest exists (for tools needing fresh data).
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
if not self.project_dir:
|
|
290
|
+
raise RuntimeError("Project directory not set")
|
|
291
|
+
|
|
292
|
+
# Get runner (fresh or reused based on self.force_fresh_runner)
|
|
293
|
+
runner = await self._get_runner()
|
|
294
|
+
|
|
295
|
+
# Run parse if needed and not skipped via force_parse=False
|
|
296
|
+
if needs_parse or force_parse:
|
|
297
|
+
logger.info("Running dbt parse...")
|
|
298
|
+
parse_args = ["parse"] # Use partial parse for efficiency
|
|
299
|
+
result = await runner.invoke(parse_args)
|
|
300
|
+
if not result.success:
|
|
301
|
+
error_msg = str(result.exception) if result.exception else "Unknown error"
|
|
302
|
+
raise RuntimeError(f"Failed to parse dbt project: {error_msg}")
|
|
303
|
+
|
|
304
|
+
# Initialize or reload manifest loader
|
|
305
|
+
manifest_path = runner.get_manifest_path()
|
|
306
|
+
if not self.manifest:
|
|
307
|
+
self.manifest = ManifestLoader(manifest_path)
|
|
308
|
+
await self.manifest.load()
|
|
309
|
+
|
|
310
|
+
# Keep shared state in sync with server-owned components
|
|
311
|
+
self._sync_shared_state()
|
|
312
|
+
|
|
313
|
+
logger.info("dbt components initialized successfully")
|
|
314
|
+
|
|
315
|
+
def _sync_shared_state(self) -> None:
|
|
316
|
+
"""Keep DbtCoreServerContext references aligned with server fields."""
|
|
317
|
+
self.state.project_dir = self.project_dir
|
|
318
|
+
self.state.runner = self.runner
|
|
319
|
+
self.state.manifest = self.manifest
|
|
320
|
+
self.state.adapter_type = self.adapter_type
|
|
321
|
+
self.state.force_fresh_runner = self.force_fresh_runner
|
|
322
|
+
|
|
323
|
+
async def _ensure_initialized_with_context(self, ctx: Any, force_parse: bool = False) -> None:
|
|
324
|
+
"""Ensure dbt components are initialized, with optional workspace root detection.
|
|
325
|
+
|
|
326
|
+
Uses async lock to prevent concurrent initialization races when multiple tools
|
|
327
|
+
are called simultaneously.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
ctx: FastMCP Context for accessing workspace roots
|
|
331
|
+
force_parse: If True, force parsing even if manifest exists (for tools needing fresh data)
|
|
332
|
+
"""
|
|
333
|
+
async with self._init_lock:
|
|
334
|
+
# Always check for workspace changes, even if previously initialized
|
|
335
|
+
detected_workspace: Path | None = None
|
|
336
|
+
|
|
337
|
+
if not self._explicit_project_dir:
|
|
338
|
+
detected_workspace = await self._detect_workspace_roots(ctx)
|
|
339
|
+
|
|
340
|
+
# If workspace changed, reinitialize everything
|
|
341
|
+
if detected_workspace and detected_workspace != self.project_dir:
|
|
342
|
+
logger.info(f"Workspace changed from {self.project_dir} to {detected_workspace}, reinitializing...")
|
|
343
|
+
self.project_dir = detected_workspace
|
|
344
|
+
self.runner = None
|
|
345
|
+
self.manifest = None
|
|
346
|
+
|
|
347
|
+
# Ensure project directory is set (first time or after workspace change)
|
|
348
|
+
if not self.project_dir:
|
|
349
|
+
if detected_workspace:
|
|
350
|
+
self.project_dir = detected_workspace
|
|
351
|
+
else:
|
|
352
|
+
self.project_dir = self._detect_project_dir()
|
|
353
|
+
logger.info(f"dbt project directory: {self.project_dir}")
|
|
354
|
+
|
|
355
|
+
if not self.project_dir:
|
|
356
|
+
raise RuntimeError("dbt project directory not set. The MCP server requires a workspace with a dbt_project.yml file.")
|
|
357
|
+
|
|
358
|
+
await self._initialize_dbt_components(needs_parse=not self.state.manifest_exists(), force_parse=force_parse)
|
|
359
|
+
|
|
360
|
+
def run(self, stateless: bool = False) -> None:
|
|
361
|
+
"""Run the MCP server.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
stateless: Enable stateless mode for seamless restarts (auto-enabled with --reload).
|
|
365
|
+
"""
|
|
366
|
+
self.app.run(show_banner=False, stateless=stateless)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def create_server(project_dir: str | None = None, timeout: float | None = None) -> DbtCoreMcpServer:
|
|
370
|
+
"""Create a new dbt Core MCP server instance.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
project_dir: Optional path to dbt project directory.
|
|
374
|
+
If not provided, automatically detects from MCP workspace roots
|
|
375
|
+
or falls back to current working directory.
|
|
376
|
+
timeout: Optional timeout in seconds for dbt commands (default: None for no timeout).
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
DbtCoreMcpServer instance
|
|
380
|
+
"""
|
|
381
|
+
return DbtCoreMcpServer(project_dir=project_dir, timeout=timeout)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""dbt-core-mcp tools package.
|
|
2
|
+
|
|
3
|
+
Each tool module implements a single MCP tool with:
|
|
4
|
+
- setup(app, state): Registers the tool with FastMCP
|
|
5
|
+
- _implementation(...): Pure logic function (testable)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib
|
|
9
|
+
import inspect
|
|
10
|
+
import pkgutil
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Callable, TypeVar
|
|
13
|
+
|
|
14
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def dbtTool(**metadata: Any) -> Callable[[F], F]:
|
|
18
|
+
"""Decorator to mark dbt MCP tool functions.
|
|
19
|
+
|
|
20
|
+
Stores optional metadata on the function for later use during registration.
|
|
21
|
+
Actual tool registration happens in server.py.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def decorator(func: F) -> F:
|
|
25
|
+
func.metadata = metadata # type: ignore[attr-defined]
|
|
26
|
+
return func
|
|
27
|
+
|
|
28
|
+
return decorator
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_tool_metadata(func: Callable[..., Any], default: Any = None) -> Any:
|
|
32
|
+
return getattr(func, "metadata", default)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def has_tool_metadata(func: Callable[..., Any]) -> bool:
|
|
36
|
+
return hasattr(func, "metadata")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def set_tool_metadata(func: Callable[..., Any], metadata: Any) -> None:
|
|
40
|
+
func.metadata = metadata # type: ignore[attr-defined]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def discover_tools(module: Any) -> list[Callable[..., Any]]:
|
|
44
|
+
tools: list[Callable[..., Any]] = []
|
|
45
|
+
for _, obj in inspect.getmembers(module):
|
|
46
|
+
if (inspect.isfunction(obj) or inspect.iscoroutinefunction(obj)) and has_tool_metadata(obj):
|
|
47
|
+
tools.append(obj)
|
|
48
|
+
return tools
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def discover_tools_in_path(path: str | Path, package_prefix: str) -> list[Callable[..., Any]]:
|
|
52
|
+
tools: list[Callable[..., Any]] = []
|
|
53
|
+
base_path = Path(path)
|
|
54
|
+
for module_info in pkgutil.walk_packages([str(base_path)], prefix=f"{package_prefix}."):
|
|
55
|
+
module = importlib.import_module(module_info.name)
|
|
56
|
+
tools.extend(discover_tools(module))
|
|
57
|
+
return tools
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def discover_tools_in_package(package_name: str) -> list[Callable[..., Any]]:
|
|
61
|
+
package = importlib.import_module(package_name)
|
|
62
|
+
package_file = getattr(package, "__file__", None)
|
|
63
|
+
if not package_file:
|
|
64
|
+
raise RuntimeError(f"Package {package_name} has no __file__ path to scan")
|
|
65
|
+
package_path = Path(package_file).parent
|
|
66
|
+
return discover_tools_in_path(package_path, package_name)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
__all__ = [
|
|
70
|
+
"dbtTool",
|
|
71
|
+
"get_tool_metadata",
|
|
72
|
+
"has_tool_metadata",
|
|
73
|
+
"set_tool_metadata",
|
|
74
|
+
"discover_tools",
|
|
75
|
+
"discover_tools_in_path",
|
|
76
|
+
"discover_tools_in_package",
|
|
77
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Analyze the impact of changing any dbt resource.
|
|
2
|
+
|
|
3
|
+
This module implements the analyze_impact tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(ctx: Context | None, name: str, resource_type: str | None, state: DbtCoreServerContext, force_parse: bool = True) -> dict[str, Any]:
|
|
20
|
+
"""Implementation function for analyze_impact tool.
|
|
21
|
+
|
|
22
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
23
|
+
The @tool() decorated analyze_impact() function calls this with injected dependencies.
|
|
24
|
+
"""
|
|
25
|
+
# Initialize state if needed (metadata tool uses force_parse=True)
|
|
26
|
+
await state.ensure_initialized(ctx, force_parse)
|
|
27
|
+
|
|
28
|
+
# Delegate to manifest helper for downstream impact calculation
|
|
29
|
+
try:
|
|
30
|
+
return state.manifest.analyze_impact(name, resource_type) # type: ignore
|
|
31
|
+
except ValueError as e:
|
|
32
|
+
raise ValueError(f"Impact analysis error: {e}")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dbtTool()
|
|
36
|
+
async def analyze_impact(
|
|
37
|
+
ctx: Context,
|
|
38
|
+
name: str,
|
|
39
|
+
resource_type: str | None = None,
|
|
40
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
41
|
+
) -> dict[str, Any]:
|
|
42
|
+
"""Analyze the impact of changing any dbt resource with auto-detection.
|
|
43
|
+
|
|
44
|
+
This unified tool works across all resource types (models, sources, seeds, snapshots, etc.)
|
|
45
|
+
showing all downstream dependencies that would be affected by changes. Provides actionable
|
|
46
|
+
recommendations for running affected resources.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
name: Resource name. For sources, use "source_name.table_name" or just "table_name"
|
|
50
|
+
Examples: "stg_customers", "jaffle_shop.orders", "raw_customers"
|
|
51
|
+
resource_type: Optional filter to narrow search:
|
|
52
|
+
- "model": Data transformation models
|
|
53
|
+
- "source": External data sources
|
|
54
|
+
- "seed": CSV reference data files
|
|
55
|
+
- "snapshot": SCD Type 2 historical tables
|
|
56
|
+
- "test": Data quality tests
|
|
57
|
+
- "analysis": Ad-hoc analysis queries
|
|
58
|
+
- None: Auto-detect (searches all types)
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Impact analysis with:
|
|
62
|
+
- List of affected models by distance
|
|
63
|
+
- Count of affected tests and other resources
|
|
64
|
+
- Total impact statistics
|
|
65
|
+
- Resources grouped by distance from changed resource
|
|
66
|
+
- Recommended dbt command to run affected resources
|
|
67
|
+
- Human-readable impact assessment message
|
|
68
|
+
If multiple matches found, returns all matches for LLM to process.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
ValueError: If resource not found
|
|
72
|
+
|
|
73
|
+
Examples:
|
|
74
|
+
analyze_impact("stg_customers") -> auto-detect and show impact
|
|
75
|
+
analyze_impact("jaffle_shop.orders", "source") -> impact of source change
|
|
76
|
+
analyze_impact("raw_customers", "seed") -> impact of seed data change
|
|
77
|
+
"""
|
|
78
|
+
return await _implementation(ctx, name, resource_type, state)
|