iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_core_mcp/__init__.py +18 -0
- dbt_core_mcp/__main__.py +436 -0
- dbt_core_mcp/context.py +459 -0
- dbt_core_mcp/cte_generator.py +601 -0
- dbt_core_mcp/dbt/__init__.py +1 -0
- dbt_core_mcp/dbt/bridge_runner.py +1361 -0
- dbt_core_mcp/dbt/manifest.py +781 -0
- dbt_core_mcp/dbt/runner.py +67 -0
- dbt_core_mcp/dependencies.py +50 -0
- dbt_core_mcp/server.py +381 -0
- dbt_core_mcp/tools/__init__.py +77 -0
- dbt_core_mcp/tools/analyze_impact.py +78 -0
- dbt_core_mcp/tools/build_models.py +190 -0
- dbt_core_mcp/tools/demo/__init__.py +1 -0
- dbt_core_mcp/tools/demo/hello.html +267 -0
- dbt_core_mcp/tools/demo/ui_demo.py +41 -0
- dbt_core_mcp/tools/get_column_lineage.py +1988 -0
- dbt_core_mcp/tools/get_lineage.py +89 -0
- dbt_core_mcp/tools/get_project_info.py +96 -0
- dbt_core_mcp/tools/get_resource_info.py +134 -0
- dbt_core_mcp/tools/install_deps.py +102 -0
- dbt_core_mcp/tools/list_resources.py +84 -0
- dbt_core_mcp/tools/load_seeds.py +179 -0
- dbt_core_mcp/tools/query_database.py +459 -0
- dbt_core_mcp/tools/run_models.py +234 -0
- dbt_core_mcp/tools/snapshot_models.py +120 -0
- dbt_core_mcp/tools/test_models.py +238 -0
- dbt_core_mcp/utils/__init__.py +1 -0
- dbt_core_mcp/utils/env_detector.py +186 -0
- dbt_core_mcp/utils/process_check.py +130 -0
- dbt_core_mcp/utils/tool_utils.py +411 -0
- dbt_core_mcp/utils/warehouse_adapter.py +82 -0
- dbt_core_mcp/utils/warehouse_databricks.py +297 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Snapshot models (capture historical changes).
|
|
2
|
+
|
|
3
|
+
This module implements the snapshot_models tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
10
|
+
from fastmcp.server.context import Context
|
|
11
|
+
|
|
12
|
+
from ..context import DbtCoreServerContext
|
|
13
|
+
from ..dependencies import get_state
|
|
14
|
+
from . import dbtTool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _implementation(
|
|
20
|
+
ctx: Context | None,
|
|
21
|
+
select: str | None,
|
|
22
|
+
exclude: str | None,
|
|
23
|
+
state: DbtCoreServerContext,
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
"""Implementation function for snapshot_models tool.
|
|
26
|
+
|
|
27
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
28
|
+
The @tool() decorated snapshot_models() function calls this with injected dependencies.
|
|
29
|
+
"""
|
|
30
|
+
# Ensure dbt components are initialized
|
|
31
|
+
await state.ensure_initialized(ctx, force_parse=False)
|
|
32
|
+
|
|
33
|
+
# Construct dbt CLI args for snapshot
|
|
34
|
+
args = ["snapshot"]
|
|
35
|
+
|
|
36
|
+
if select:
|
|
37
|
+
args.extend(["-s", select])
|
|
38
|
+
|
|
39
|
+
if exclude:
|
|
40
|
+
args.extend(["--exclude", exclude])
|
|
41
|
+
|
|
42
|
+
logger.info(f"Running DBT snapshot with args: {args}")
|
|
43
|
+
|
|
44
|
+
# Clear stale run_results so we parse only fresh output
|
|
45
|
+
state.clear_stale_run_results()
|
|
46
|
+
|
|
47
|
+
runner = await state.get_runner()
|
|
48
|
+
result = await runner.invoke(args) # type: ignore
|
|
49
|
+
|
|
50
|
+
run_results = state.validate_and_parse_results(result, "snapshot")
|
|
51
|
+
|
|
52
|
+
if ctx:
|
|
53
|
+
if run_results.get("results"):
|
|
54
|
+
results_list = run_results["results"]
|
|
55
|
+
total = len(results_list)
|
|
56
|
+
passed_count = sum(1 for r in results_list if r.get("status") == "success")
|
|
57
|
+
failed_count = sum(1 for r in results_list if r.get("status") in ("error", "fail"))
|
|
58
|
+
|
|
59
|
+
# Summarize final progress back to caller
|
|
60
|
+
parts = []
|
|
61
|
+
if passed_count > 0:
|
|
62
|
+
parts.append(f"✅ {passed_count} passed" if failed_count > 0 else "✅ All passed")
|
|
63
|
+
if failed_count > 0:
|
|
64
|
+
parts.append(f"❌ {failed_count} failed")
|
|
65
|
+
|
|
66
|
+
summary = f"Snapshot: {total}/{total} snapshots completed ({', '.join(parts)})"
|
|
67
|
+
await ctx.report_progress(progress=total, total=total, message=summary)
|
|
68
|
+
else:
|
|
69
|
+
await ctx.report_progress(progress=0, total=0, message="0 snapshots matched selector")
|
|
70
|
+
|
|
71
|
+
if not run_results.get("results"):
|
|
72
|
+
raise RuntimeError(f"No snapshots matched selector: {select or 'all'}")
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"status": "success",
|
|
76
|
+
"command": " ".join(args),
|
|
77
|
+
"results": run_results.get("results", []),
|
|
78
|
+
"elapsed_time": run_results.get("elapsed_time"),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dbtTool()
|
|
83
|
+
async def snapshot_models(
|
|
84
|
+
ctx: Context,
|
|
85
|
+
select: str | None = None,
|
|
86
|
+
exclude: str | None = None,
|
|
87
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
88
|
+
) -> dict[str, Any]:
|
|
89
|
+
"""Snapshot models (capture historical changes - SCD Type 2).
|
|
90
|
+
|
|
91
|
+
Snapshots capture historical changes in data, enabling you to track slowly changing
|
|
92
|
+
dimensions over time. This is particularly useful for maintaining accurate historical
|
|
93
|
+
records in data warehouses.
|
|
94
|
+
|
|
95
|
+
**When to use**: To track changes in slowly changing dimensions (SCD Type 2).
|
|
96
|
+
For example, tracking customer address changes over time while preserving history.
|
|
97
|
+
|
|
98
|
+
**How it works**: dbt compares current source data with existing snapshot table,
|
|
99
|
+
identifies changes, and inserts new rows with validity timestamps (dbt_valid_from,
|
|
100
|
+
dbt_valid_to, dbt_updated_at). Original rows are closed by setting dbt_valid_to.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
select: dbt selector syntax (e.g., "snapshot_name", "tag:daily")
|
|
104
|
+
exclude: Exclude specific snapshots
|
|
105
|
+
state: Shared state object injected by FastMCP
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Snapshot results with status and timing info
|
|
109
|
+
|
|
110
|
+
Examples:
|
|
111
|
+
# Run all snapshots
|
|
112
|
+
snapshot_models()
|
|
113
|
+
|
|
114
|
+
# Run specific snapshot
|
|
115
|
+
snapshot_models(select="customers_snapshot")
|
|
116
|
+
|
|
117
|
+
# Run tagged snapshots
|
|
118
|
+
snapshot_models(select="tag:daily")
|
|
119
|
+
"""
|
|
120
|
+
return await _implementation(ctx, select, exclude, state)
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Run dbt tests on models and sources.
|
|
2
|
+
|
|
3
|
+
This module implements the test_models tool for dbt Core MCP.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
|
|
11
|
+
from fastmcp.exceptions import McpError # type: ignore[attr-defined]
|
|
12
|
+
from fastmcp.server.context import Context
|
|
13
|
+
from mcp.types import ErrorData
|
|
14
|
+
|
|
15
|
+
from ..context import DbtCoreServerContext
|
|
16
|
+
from ..cte_generator import cleanup_cte_tests, generate_cte_tests
|
|
17
|
+
from ..dependencies import get_state
|
|
18
|
+
from . import dbtTool
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def _implementation(
|
|
24
|
+
ctx: Context | None,
|
|
25
|
+
select: str | None,
|
|
26
|
+
exclude: str | None,
|
|
27
|
+
select_state_modified: bool,
|
|
28
|
+
select_state_modified_plus_downstream: bool,
|
|
29
|
+
fail_fast: bool,
|
|
30
|
+
keep_cte_tests: bool,
|
|
31
|
+
state: DbtCoreServerContext,
|
|
32
|
+
) -> dict[str, Any]:
|
|
33
|
+
"""Implementation function for test_models tool.
|
|
34
|
+
|
|
35
|
+
Separated for testing purposes - tests call this directly with explicit state.
|
|
36
|
+
The @tool() decorated test_models() function calls this with injected dependencies.
|
|
37
|
+
"""
|
|
38
|
+
# Ensure dbt components are initialized
|
|
39
|
+
await state.ensure_initialized(ctx, force_parse=False)
|
|
40
|
+
|
|
41
|
+
# Track if CTE tests were generated for cleanup
|
|
42
|
+
cte_tests_generated = False
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
# Generate CTE tests if experimental features enabled
|
|
46
|
+
if state.experimental_features and state.project_dir:
|
|
47
|
+
logger.info("Experimental features enabled - generating CTE tests")
|
|
48
|
+
try:
|
|
49
|
+
cte_count = generate_cte_tests(state.project_dir)
|
|
50
|
+
if cte_count > 0:
|
|
51
|
+
logger.info(f"Generated {cte_count} CTE tests")
|
|
52
|
+
cte_tests_generated = True
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.warning(f"CTE test generation failed: {e}")
|
|
55
|
+
# Don't fail the entire test run if CTE generation fails
|
|
56
|
+
|
|
57
|
+
# Build state-based selector if requested (avoids redundant parsing when possible)
|
|
58
|
+
selector = await state.prepare_state_based_selection(select_state_modified, select_state_modified_plus_downstream, select)
|
|
59
|
+
|
|
60
|
+
# If user asked for state:modified but no baseline exists, fail fast with clear guidance
|
|
61
|
+
if select_state_modified and not selector:
|
|
62
|
+
raise RuntimeError("No previous state found - cannot determine modifications. Run 'dbt run' or 'dbt build' first to create baseline state.")
|
|
63
|
+
|
|
64
|
+
args = ["test"]
|
|
65
|
+
|
|
66
|
+
# Add selector if we have one (state-based or manual)
|
|
67
|
+
if selector:
|
|
68
|
+
target_path = state.get_project_paths().get("target-path", "target")
|
|
69
|
+
args.extend(["-s", selector, "--state", f"{target_path}/state_last_run"])
|
|
70
|
+
elif select:
|
|
71
|
+
args.extend(["-s", select])
|
|
72
|
+
|
|
73
|
+
if exclude:
|
|
74
|
+
args.extend(["--exclude", exclude])
|
|
75
|
+
|
|
76
|
+
if fail_fast:
|
|
77
|
+
args.append("--fail-fast")
|
|
78
|
+
|
|
79
|
+
# Execute with progress reporting
|
|
80
|
+
logger.info(f"Running dbt tests with args: {args}")
|
|
81
|
+
|
|
82
|
+
# Define progress callback if context available
|
|
83
|
+
async def progress_callback(current: int, total: int, message: str) -> None:
|
|
84
|
+
if ctx:
|
|
85
|
+
await ctx.report_progress(progress=current, total=total, message=message)
|
|
86
|
+
|
|
87
|
+
# Delete stale run_results.json to ensure we only read fresh results
|
|
88
|
+
state.clear_stale_run_results()
|
|
89
|
+
|
|
90
|
+
runner = await state.get_runner()
|
|
91
|
+
result = await runner.invoke(args, progress_callback=progress_callback if ctx else None) # type: ignore
|
|
92
|
+
|
|
93
|
+
# Parse run_results.json to discriminate system errors from business outcomes
|
|
94
|
+
run_results = state.validate_and_parse_results(result, "test")
|
|
95
|
+
|
|
96
|
+
# Business outcome - dbt executed tests (some may have failed)
|
|
97
|
+
# Continue with existing run_results parsing
|
|
98
|
+
results_list = run_results.get("results", [])
|
|
99
|
+
|
|
100
|
+
# Remove heavy fields from results to reduce token usage
|
|
101
|
+
for result in results_list:
|
|
102
|
+
result.pop("compiled_code", None)
|
|
103
|
+
result.pop("raw_code", None)
|
|
104
|
+
|
|
105
|
+
# Send final progress update with test summary
|
|
106
|
+
await state.report_final_progress(ctx, results_list, "Test", "tests")
|
|
107
|
+
|
|
108
|
+
# Empty results means selector matched nothing - this is an error
|
|
109
|
+
if not results_list:
|
|
110
|
+
raise RuntimeError(f"No tests matched selector: {select or selector or 'all'}")
|
|
111
|
+
|
|
112
|
+
# Check if any tests failed - if so, return RPC error with structured data
|
|
113
|
+
failed_tests = [r for r in results_list if r.get("status") == "fail"]
|
|
114
|
+
if failed_tests:
|
|
115
|
+
# Check if any failed tests are unit tests (they have diff output)
|
|
116
|
+
has_unit_test_failures = any("unit_test" in r.get("unique_id", "") for r in failed_tests)
|
|
117
|
+
|
|
118
|
+
# Add diff format legend for unit test failures
|
|
119
|
+
diff_legend = ""
|
|
120
|
+
if has_unit_test_failures:
|
|
121
|
+
diff_legend = (
|
|
122
|
+
"\n\nUnit test diff format (daff tabular diff):\n"
|
|
123
|
+
" @@ Header row with column names\n"
|
|
124
|
+
" +++ Row present in actual output but missing from expected\n"
|
|
125
|
+
" --- Row present in expected but missing from actual output\n"
|
|
126
|
+
" → Row with at least one modified cell (→, -->, etc.)\n"
|
|
127
|
+
" ... Rows omitted for brevity\n"
|
|
128
|
+
" old_value→new_value Shows the change in a specific cell\n"
|
|
129
|
+
"\nFull specification: https://paulfitz.github.io/daff-doc/spec.html"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
error_data = {
|
|
133
|
+
"error": "test_failure",
|
|
134
|
+
"message": f"{len(failed_tests)} test(s) failed{diff_legend}",
|
|
135
|
+
"command": " ".join(args),
|
|
136
|
+
"results": results_list, # Include ALL results (both passing and failing)
|
|
137
|
+
"elapsed_time": run_results.get("elapsed_time"),
|
|
138
|
+
"summary": {
|
|
139
|
+
"total": len(results_list),
|
|
140
|
+
"passed": len([r for r in results_list if r.get("status") == "pass"]),
|
|
141
|
+
"failed": len(failed_tests),
|
|
142
|
+
},
|
|
143
|
+
}
|
|
144
|
+
# Use McpError with custom code for business failures (not system errors)
|
|
145
|
+
# Code -32000 to -32099 are reserved for implementation-defined server errors
|
|
146
|
+
raise McpError(
|
|
147
|
+
ErrorData(
|
|
148
|
+
code=-32000, # Server error (business failure, not system error)
|
|
149
|
+
message=json.dumps(error_data, indent=2),
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# All tests passed - return success
|
|
154
|
+
return {
|
|
155
|
+
"status": "success",
|
|
156
|
+
"command": " ".join(args),
|
|
157
|
+
"results": results_list,
|
|
158
|
+
"elapsed_time": run_results.get("elapsed_time"),
|
|
159
|
+
}
|
|
160
|
+
finally:
|
|
161
|
+
# Clean up CTE tests if they were generated and not debugging
|
|
162
|
+
if cte_tests_generated and not keep_cte_tests and state.project_dir:
|
|
163
|
+
try:
|
|
164
|
+
cleanup_cte_tests(state.project_dir)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
logger.warning(f"Failed to cleanup CTE tests: {e}")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@dbtTool()
|
|
170
|
+
async def test_models(
|
|
171
|
+
ctx: Context,
|
|
172
|
+
select: str | None = None,
|
|
173
|
+
exclude: str | None = None,
|
|
174
|
+
select_state_modified: bool = False,
|
|
175
|
+
select_state_modified_plus_downstream: bool = False,
|
|
176
|
+
fail_fast: bool = False,
|
|
177
|
+
keep_cte_tests: bool = False,
|
|
178
|
+
state: DbtCoreServerContext = Depends(get_state),
|
|
179
|
+
) -> dict[str, Any]:
|
|
180
|
+
"""Run dbt tests on models and sources.
|
|
181
|
+
|
|
182
|
+
**When to use**: After running models to validate data quality. Tests check constraints
|
|
183
|
+
like uniqueness, not-null, relationships, and custom data quality rules.
|
|
184
|
+
|
|
185
|
+
**Important**: Ensure seeds and models are built before running tests that depend on them.
|
|
186
|
+
|
|
187
|
+
State-based selection modes (uses dbt state:modified selector):
|
|
188
|
+
- select_state_modified: Test only models modified since last successful run (state:modified)
|
|
189
|
+
- select_state_modified_plus_downstream: Test modified + downstream dependencies (state:modified+)
|
|
190
|
+
Note: Requires select_state_modified=True
|
|
191
|
+
|
|
192
|
+
Manual selection (alternative to state-based):
|
|
193
|
+
- select: dbt selector syntax (e.g., "customers", "tag:mart", "test_type:generic")
|
|
194
|
+
- exclude: Exclude specific tests
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
select: Manual selector for tests/models to test
|
|
198
|
+
exclude: Exclude selector
|
|
199
|
+
select_state_modified: Use state:modified selector (changed models only)
|
|
200
|
+
select_state_modified_plus_downstream: Extend to state:modified+ (changed + downstream)
|
|
201
|
+
fail_fast: Stop execution on first failure
|
|
202
|
+
keep_cte_tests: Keep generated CTE test files for debugging (default: False)
|
|
203
|
+
state: Shared state object injected by FastMCP
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Test results with status and failures
|
|
207
|
+
|
|
208
|
+
See also:
|
|
209
|
+
- run_models(): Execute models before testing them
|
|
210
|
+
- build_models(): Run models + tests together automatically
|
|
211
|
+
- load_seeds(): Load seeds if tests reference seed data
|
|
212
|
+
|
|
213
|
+
Examples:
|
|
214
|
+
# After building a model, test it
|
|
215
|
+
run_models(select="customers")
|
|
216
|
+
test_models(select="customers")
|
|
217
|
+
|
|
218
|
+
# Test only generic tests (not singular)
|
|
219
|
+
test_models(select="test_type:generic")
|
|
220
|
+
|
|
221
|
+
# Test everything that changed
|
|
222
|
+
test_models(select_state_modified=True)
|
|
223
|
+
|
|
224
|
+
# Stop on first failure for quick feedback
|
|
225
|
+
test_models(fail_fast=True)
|
|
226
|
+
|
|
227
|
+
# Keep CTE test files for debugging
|
|
228
|
+
test_models(keep_cte_tests=True)
|
|
229
|
+
|
|
230
|
+
Note: Unit test failures show diffs in the "daff" tabular format:
|
|
231
|
+
@@ = column headers
|
|
232
|
+
+++ = row in actual, not in expected (extra row)
|
|
233
|
+
--- = row in expected, not in actual (missing row)
|
|
234
|
+
→ = row with modified cell(s), shown as old_value→new_value
|
|
235
|
+
... = omitted matching rows
|
|
236
|
+
Full format spec: https://paulfitz.github.io/daff-doc/spec.html
|
|
237
|
+
"""
|
|
238
|
+
return await _implementation(ctx, select, exclude, select_state_modified, select_state_modified_plus_downstream, fail_fast, keep_cte_tests, state)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules for dbt-core-mcp."""
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Environment detection for dbt projects.
|
|
3
|
+
|
|
4
|
+
Detects the Python environment setup and returns the appropriate command
|
|
5
|
+
and environment variables to run Python in that environment.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def detect_python_command(project_dir: Path) -> list[str]:
|
|
18
|
+
"""
|
|
19
|
+
Detect how to run Python in the project's environment.
|
|
20
|
+
|
|
21
|
+
Detects common Python environment setups and returns the command prefix
|
|
22
|
+
needed to run Python in that environment.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
project_dir: Path to the dbt project directory
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Command prefix to run Python (e.g., ['uv', 'run', 'python'])
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
>>> detect_python_command(Path("/project/with/uv"))
|
|
32
|
+
['uv', 'run', 'python']
|
|
33
|
+
>>> detect_python_command(Path("/project/with/poetry"))
|
|
34
|
+
['poetry', 'run', 'python']
|
|
35
|
+
"""
|
|
36
|
+
# Convert to absolute path
|
|
37
|
+
project_dir = project_dir.resolve()
|
|
38
|
+
|
|
39
|
+
# Check for standard venv (.venv or venv) - prefer this for uv projects to avoid VIRTUAL_ENV conflicts
|
|
40
|
+
venv_path = _find_venv(project_dir)
|
|
41
|
+
if venv_path:
|
|
42
|
+
logger.info(f"Detected venv at {venv_path}")
|
|
43
|
+
python_exe = _get_venv_python(venv_path)
|
|
44
|
+
return [str(python_exe)]
|
|
45
|
+
|
|
46
|
+
# Check for uv (uv.lock) - only if no venv found
|
|
47
|
+
if (project_dir / "uv.lock").exists():
|
|
48
|
+
logger.info(f"Detected uv environment in {project_dir}")
|
|
49
|
+
return ["uv", "run", "--directory", str(project_dir), "python"]
|
|
50
|
+
|
|
51
|
+
# Check for poetry (poetry.lock)
|
|
52
|
+
if (project_dir / "poetry.lock").exists():
|
|
53
|
+
logger.info(f"Detected poetry environment in {project_dir}")
|
|
54
|
+
return ["poetry", "run", "--directory", str(project_dir), "python"]
|
|
55
|
+
|
|
56
|
+
# Check for pipenv (Pipfile.lock)
|
|
57
|
+
if (project_dir / "Pipfile.lock").exists():
|
|
58
|
+
logger.info(f"Detected pipenv environment in {project_dir}")
|
|
59
|
+
# pipenv doesn't have --directory, need to cd first
|
|
60
|
+
return ["pipenv", "run", "python"]
|
|
61
|
+
|
|
62
|
+
# Check for conda environment
|
|
63
|
+
conda_env = os.environ.get("CONDA_DEFAULT_ENV")
|
|
64
|
+
if conda_env:
|
|
65
|
+
logger.info(f"Detected conda environment: {conda_env}")
|
|
66
|
+
return ["conda", "run", "-n", conda_env, "python"]
|
|
67
|
+
|
|
68
|
+
# Fall back to system Python
|
|
69
|
+
logger.warning(f"No virtual environment detected in {project_dir}, using system Python")
|
|
70
|
+
return [sys.executable]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_env_vars(python_command: list[str]) -> dict[str, str] | None:
|
|
74
|
+
"""
|
|
75
|
+
Get environment variables needed for the given Python command.
|
|
76
|
+
|
|
77
|
+
This centralizes environment-specific configuration that's needed
|
|
78
|
+
to properly run commands in different virtual environment managers.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
python_command: The Python command prefix (e.g., ['pipenv', 'run', 'python'])
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dictionary of environment variables to set, or None if no special env needed
|
|
85
|
+
|
|
86
|
+
Examples:
|
|
87
|
+
>>> get_env_vars(['pipenv', 'run', 'python'])
|
|
88
|
+
{'PIPENV_IGNORE_VIRTUALENVS': '1', 'PIPENV_VERBOSITY': '-1'}
|
|
89
|
+
>>> get_env_vars(['python'])
|
|
90
|
+
None
|
|
91
|
+
"""
|
|
92
|
+
if not python_command:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
env_tool = python_command[0]
|
|
96
|
+
|
|
97
|
+
if env_tool == "pipenv":
|
|
98
|
+
# Pipenv needs to ignore outer virtualenvs when running inside another env (e.g., uv run)
|
|
99
|
+
# This prevents pipenv from using the wrong environment
|
|
100
|
+
return {
|
|
101
|
+
"PIPENV_IGNORE_VIRTUALENVS": "1",
|
|
102
|
+
"PIPENV_VERBOSITY": "-1",
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Add more env-specific settings here as needed
|
|
106
|
+
# elif env_tool == "poetry":
|
|
107
|
+
# return {"POETRY_VIRTUALENVS_IN_PROJECT": "true"}
|
|
108
|
+
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _find_venv(project_dir: Path) -> Optional[Path]:
|
|
113
|
+
"""Find a virtual environment directory."""
|
|
114
|
+
for venv_name in [".venv", "venv", "env"]:
|
|
115
|
+
venv_path = project_dir / venv_name
|
|
116
|
+
if venv_path.is_dir() and _is_venv(venv_path):
|
|
117
|
+
return venv_path
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _is_venv(path: Path) -> bool:
|
|
122
|
+
"""Check if a directory is a valid virtual environment."""
|
|
123
|
+
# Check for pyvenv.cfg (created by venv module)
|
|
124
|
+
if (path / "pyvenv.cfg").exists():
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
# Check for Scripts/python.exe (Windows) or bin/python (Unix)
|
|
128
|
+
if sys.platform == "win32":
|
|
129
|
+
return (path / "Scripts" / "python.exe").exists()
|
|
130
|
+
else:
|
|
131
|
+
return (path / "bin" / "python").exists()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _get_venv_python(venv_path: Path) -> Path:
|
|
135
|
+
"""Get the Python executable path from a venv."""
|
|
136
|
+
if sys.platform == "win32":
|
|
137
|
+
return venv_path / "Scripts" / "python.exe"
|
|
138
|
+
else:
|
|
139
|
+
return venv_path / "bin" / "python"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def detect_dbt_adapter(project_dir: Path) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Detect the dbt adapter type from profiles.yml.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
project_dir: Path to the dbt project directory
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Adapter type (e.g., 'duckdb', 'postgres', 'snowflake')
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
FileNotFoundError: If dbt_project.yml or profiles.yml not found
|
|
154
|
+
KeyError: If required keys not found in YAML files
|
|
155
|
+
"""
|
|
156
|
+
import yaml
|
|
157
|
+
|
|
158
|
+
# Read dbt_project.yml to get profile name
|
|
159
|
+
project_yml_path = project_dir / "dbt_project.yml"
|
|
160
|
+
if not project_yml_path.exists():
|
|
161
|
+
raise FileNotFoundError(f"dbt_project.yml not found in {project_dir}")
|
|
162
|
+
|
|
163
|
+
project_yml = yaml.safe_load(project_yml_path.read_text())
|
|
164
|
+
profile_name = project_yml["profile"]
|
|
165
|
+
|
|
166
|
+
# Find profiles.yml (project dir or ~/.dbt/)
|
|
167
|
+
profiles_path = project_dir / "profiles.yml"
|
|
168
|
+
if not profiles_path.exists():
|
|
169
|
+
profiles_path = Path.home() / ".dbt" / "profiles.yml"
|
|
170
|
+
|
|
171
|
+
if not profiles_path.exists():
|
|
172
|
+
raise FileNotFoundError(f"profiles.yml not found in {project_dir} or ~/.dbt/")
|
|
173
|
+
|
|
174
|
+
# Read profiles.yml and get adapter type
|
|
175
|
+
profiles = yaml.safe_load(profiles_path.read_text())
|
|
176
|
+
profile = profiles[profile_name]
|
|
177
|
+
|
|
178
|
+
# Get target (default or first output)
|
|
179
|
+
target_name = profile.get("target")
|
|
180
|
+
if target_name is None:
|
|
181
|
+
target_name = list(profile["outputs"].keys())[0]
|
|
182
|
+
|
|
183
|
+
adapter_type = profile["outputs"][target_name]["type"]
|
|
184
|
+
|
|
185
|
+
logger.info(f"Detected dbt adapter: {adapter_type}")
|
|
186
|
+
return str(adapter_type)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Process checking utilities for detecting running dbt processes.
|
|
3
|
+
|
|
4
|
+
This module provides utilities to check if dbt is currently running
|
|
5
|
+
in the same project directory, helping prevent concurrent execution issues.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import psutil
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_dbt_running(project_dir: Path, exclude_pid: int | None = None) -> bool:
|
|
17
|
+
"""
|
|
18
|
+
Check if dbt is currently running in the specified project directory.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
project_dir: Path to the dbt project directory to check
|
|
22
|
+
exclude_pid: Optional PID to exclude from detection (e.g., our own daemon process)
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
True if a dbt process is detected running in the project directory
|
|
26
|
+
"""
|
|
27
|
+
project_dir = project_dir.resolve() # Normalize path
|
|
28
|
+
logger.debug(f"Checking for dbt processes in: {project_dir}")
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
for proc in psutil.process_iter(["pid", "name", "cmdline", "cwd"]):
|
|
32
|
+
try:
|
|
33
|
+
# Skip if this is the excluded PID (our own daemon)
|
|
34
|
+
if exclude_pid and proc.info.get("pid") == exclude_pid:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
# Check if this is a dbt-related process
|
|
38
|
+
cmdline = proc.info.get("cmdline") or []
|
|
39
|
+
if not cmdline:
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
# Look for 'dbt' command in the command line
|
|
43
|
+
# We want actual 'dbt' CLI commands, not just processes that import dbt
|
|
44
|
+
cmdline_str = " ".join(cmdline).lower()
|
|
45
|
+
|
|
46
|
+
# Skip if this is our own MCP server or Python imports
|
|
47
|
+
if "dbt-core-mcp" in cmdline_str or "dbt_core_mcp" in cmdline_str:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
# Skip MCP persistent dbt processes (identifiable by their loop script markers)
|
|
51
|
+
# These processes run idle waiting for stdin and don't interfere with external dbt commands
|
|
52
|
+
if '"type": "ready"' in cmdline_str or 'type": "ready' in cmdline_str:
|
|
53
|
+
logger.debug(f"Skipping MCP persistent process (PID {proc.info['pid']})")
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
# Look for actual dbt CLI usage: 'dbt run', 'dbt parse', 'python -m dbt.cli.main', etc.
|
|
57
|
+
is_dbt_command = False
|
|
58
|
+
for arg in cmdline:
|
|
59
|
+
arg_lower = arg.lower()
|
|
60
|
+
# Check for 'dbt' as a standalone command or module
|
|
61
|
+
if arg_lower == "dbt" or arg_lower.endswith("dbt.exe") or arg_lower.endswith("dbt"):
|
|
62
|
+
is_dbt_command = True
|
|
63
|
+
break
|
|
64
|
+
# Check for 'python -m dbt.cli.main'
|
|
65
|
+
if "dbt.cli.main" in arg_lower or "dbt/cli/main" in arg_lower:
|
|
66
|
+
is_dbt_command = True
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
if not is_dbt_command:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# Check if it's running in the same project directory
|
|
73
|
+
# Compare working directory
|
|
74
|
+
proc_cwd = proc.info.get("cwd")
|
|
75
|
+
if proc_cwd:
|
|
76
|
+
proc_path = Path(proc_cwd).resolve()
|
|
77
|
+
# Only match if:
|
|
78
|
+
# 1. Exact match - same directory
|
|
79
|
+
# 2. Process is running in a subdirectory of our project
|
|
80
|
+
if proc_path == project_dir or proc_path.is_relative_to(project_dir):
|
|
81
|
+
logger.info(f"Found running dbt process (PID {proc.info['pid']}): {cmdline}")
|
|
82
|
+
return True
|
|
83
|
+
|
|
84
|
+
# Also check if project directory is mentioned in command line
|
|
85
|
+
project_str = str(project_dir)
|
|
86
|
+
if project_str in cmdline_str or str(project_dir).replace("\\", "/") in cmdline_str:
|
|
87
|
+
logger.info(f"Found dbt process with project path (PID {proc.info['pid']}): {cmdline}")
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
91
|
+
# Process disappeared or we don't have permission - skip it
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
logger.debug("No running dbt processes detected")
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
# If we can't check, assume it's safe to proceed
|
|
99
|
+
logger.warning(f"Error checking for dbt processes: {e}")
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def wait_for_dbt_completion(project_dir: Path, timeout: float = 10.0, poll_interval: float = 0.2) -> bool:
|
|
104
|
+
"""
|
|
105
|
+
Wait for any running dbt processes to complete.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
project_dir: Path to the dbt project directory
|
|
109
|
+
timeout: Maximum time to wait in seconds (default: 10)
|
|
110
|
+
poll_interval: How often to check in seconds (default: 0.2)
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
True if dbt finished or was not running, False if timeout occurred
|
|
114
|
+
"""
|
|
115
|
+
import time
|
|
116
|
+
|
|
117
|
+
logger.debug(f"Waiting for dbt completion (timeout: {timeout}s)")
|
|
118
|
+
|
|
119
|
+
elapsed = 0.0
|
|
120
|
+
while elapsed < timeout:
|
|
121
|
+
if not is_dbt_running(project_dir):
|
|
122
|
+
logger.debug("dbt process check clear")
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
logger.debug(f"DBT still running, waiting... ({elapsed:.1f}/{timeout}s)")
|
|
126
|
+
time.sleep(poll_interval)
|
|
127
|
+
elapsed += poll_interval
|
|
128
|
+
|
|
129
|
+
logger.warning(f"Timeout waiting for dbt to complete after {timeout}s")
|
|
130
|
+
return False
|