iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. dbt_core_mcp/__init__.py +18 -0
  2. dbt_core_mcp/__main__.py +436 -0
  3. dbt_core_mcp/context.py +459 -0
  4. dbt_core_mcp/cte_generator.py +601 -0
  5. dbt_core_mcp/dbt/__init__.py +1 -0
  6. dbt_core_mcp/dbt/bridge_runner.py +1361 -0
  7. dbt_core_mcp/dbt/manifest.py +781 -0
  8. dbt_core_mcp/dbt/runner.py +67 -0
  9. dbt_core_mcp/dependencies.py +50 -0
  10. dbt_core_mcp/server.py +381 -0
  11. dbt_core_mcp/tools/__init__.py +77 -0
  12. dbt_core_mcp/tools/analyze_impact.py +78 -0
  13. dbt_core_mcp/tools/build_models.py +190 -0
  14. dbt_core_mcp/tools/demo/__init__.py +1 -0
  15. dbt_core_mcp/tools/demo/hello.html +267 -0
  16. dbt_core_mcp/tools/demo/ui_demo.py +41 -0
  17. dbt_core_mcp/tools/get_column_lineage.py +1988 -0
  18. dbt_core_mcp/tools/get_lineage.py +89 -0
  19. dbt_core_mcp/tools/get_project_info.py +96 -0
  20. dbt_core_mcp/tools/get_resource_info.py +134 -0
  21. dbt_core_mcp/tools/install_deps.py +102 -0
  22. dbt_core_mcp/tools/list_resources.py +84 -0
  23. dbt_core_mcp/tools/load_seeds.py +179 -0
  24. dbt_core_mcp/tools/query_database.py +459 -0
  25. dbt_core_mcp/tools/run_models.py +234 -0
  26. dbt_core_mcp/tools/snapshot_models.py +120 -0
  27. dbt_core_mcp/tools/test_models.py +238 -0
  28. dbt_core_mcp/utils/__init__.py +1 -0
  29. dbt_core_mcp/utils/env_detector.py +186 -0
  30. dbt_core_mcp/utils/process_check.py +130 -0
  31. dbt_core_mcp/utils/tool_utils.py +411 -0
  32. dbt_core_mcp/utils/warehouse_adapter.py +82 -0
  33. dbt_core_mcp/utils/warehouse_databricks.py +297 -0
  34. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
  35. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
  36. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
  37. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
  38. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,459 @@
1
+ """Execute SQL queries against the dbt project's database.
2
+
3
+ This module implements the query_database tool for dbt Core MCP.
4
+ """
5
+
6
+ import csv
7
+ import io
8
+ import json
9
+ import logging
10
+ import re
11
+ import tempfile
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
16
+ from fastmcp.server.context import Context
17
+
18
+ from ..context import DbtCoreServerContext
19
+ from ..cte_generator import generate_cte_model
20
+ from ..dependencies import get_state
21
+ from . import dbtTool
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def extract_cte_sql(
27
+ project_dir: Path,
28
+ cte_name: str,
29
+ model_name: str,
30
+ additional_sql: str = "",
31
+ model_paths: list[str] | None = None,
32
+ ) -> str:
33
+ """Extract CTE SQL from a model file and optionally replace the final SELECT.
34
+
35
+ This function extracts a specific CTE from a dbt model file, resolves all its
36
+ upstream dependencies, and optionally replaces the final SELECT with a full query.
37
+
38
+ Args:
39
+ project_dir: Path to the dbt project directory
40
+ cte_name: Name of the CTE to extract
41
+ model_name: Name of the model file containing the CTE (without .sql extension)
42
+ additional_sql: Optional full SELECT/WITH query to replace the final SELECT
43
+ (use __cte__ or {{ cte }}; replaced with the CTE name at execution)
44
+ model_paths: List of model directory paths (defaults to ["models"])
45
+
46
+ Returns:
47
+ Complete SQL ready to execute (either the extracted CTE or replaced with full SQL)
48
+
49
+ Raises:
50
+ ValueError: If model file not found, multiple files found, or CTE extraction fails
51
+
52
+ Examples:
53
+ # Extract a CTE with a full SELECT
54
+ sql = extract_cte_sql(
55
+ project_dir,
56
+ "customer_agg",
57
+ "customers",
58
+ "SELECT * FROM __cte__ WHERE order_count > 5 LIMIT 10"
59
+ )
60
+
61
+ # Extract a CTE and replace final SELECT
62
+ sql = extract_cte_sql(
63
+ project_dir,
64
+ "customer_agg",
65
+ "customers",
66
+ "SELECT customer_id, COUNT(*) AS cnt FROM __cte__ GROUP BY customer_id"
67
+ )
68
+ """
69
+ # Use default model_paths if not provided
70
+ if model_paths is None:
71
+ model_paths = ["models"]
72
+
73
+ # Find the model file - search all configured model paths
74
+ model_files = []
75
+ for model_path in model_paths:
76
+ models_dir = project_dir / model_path
77
+ if models_dir.exists():
78
+ model_files.extend(list(models_dir.rglob(f"{model_name}.sql")))
79
+
80
+ if not model_files:
81
+ paths_searched = ", ".join(model_paths)
82
+ raise ValueError(f"Model file '{model_name}.sql' not found in any model paths: {paths_searched}")
83
+
84
+ if len(model_files) > 1:
85
+ raise ValueError(f"Multiple model files found for '{model_name}': {[str(f) for f in model_files]}")
86
+
87
+ model_file = model_files[0]
88
+ logger.info(f"Extracting CTE '{cte_name}' from model '{model_name}' at {model_file}")
89
+
90
+ # Create a temporary file for the extracted CTE model
91
+ # Use system temp directory to avoid dbt picking it up as a model
92
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as tmp_file:
93
+ tmp_path = Path(tmp_file.name)
94
+
95
+ try:
96
+ # Generate CTE model using the generator logic
97
+ # The generate_cte_model expects empty test_given list when we're just extracting
98
+ success = generate_cte_model(
99
+ base_model_path=model_file,
100
+ cte_name=cte_name,
101
+ test_given=[], # No CTE mocking when querying
102
+ output_path=tmp_path,
103
+ )
104
+
105
+ if not success:
106
+ example = 'Example: query_database(cte_name="%s", model_name="%s", sql="SELECT * FROM __cte__ WHERE <filters>" )' % (cte_name, model_name)
107
+ raise ValueError("Failed to extract CTE '%s' from model '%s'. Ensure the CTE name matches the model definition and include __cte__ in your SQL. %s" % (cte_name, model_name, example))
108
+
109
+ # Read the generated CTE SQL
110
+ cte_sql = tmp_path.read_text()
111
+
112
+ # Remove the sqlfluff disable comment if present
113
+ cte_sql = re.sub(r"^-- sqlfluff:disable\s*\n", "", cte_sql, flags=re.MULTILINE)
114
+
115
+ # If user provided additional SQL, require a full SELECT/WITH query
116
+ # The CTE extraction already includes "select * from {cte_name}" at the end
117
+ additional_sql_stripped = additional_sql.strip() if additional_sql else ""
118
+ if additional_sql_stripped:
119
+ if not re.match(r"^(select|with)\b", additional_sql_stripped, flags=re.IGNORECASE):
120
+ # Construct the correct query from the partial SQL provided
121
+ suggested_query = f"SELECT * FROM __cte__ {additional_sql_stripped}"
122
+ raise ValueError(f"additional_sql must be a full SELECT/WITH query when querying a CTE. Did you mean: {suggested_query}")
123
+
124
+ # Ensure the query references the CTE via __cte__ or {{ cte }}
125
+ if not re.search(r"__cte__|{{\s*cte\s*}}", additional_sql_stripped, flags=re.IGNORECASE):
126
+ raise ValueError("additional_sql must reference the CTE using __cte__ or {{ cte }}. Example: SELECT * FROM __cte__ WHERE <condition>")
127
+
128
+ pattern = rf"select \* from {re.escape(cte_name)}$"
129
+ query_sql = re.sub(r"\{\{\s*cte\s*\}\}", cte_name, additional_sql_stripped, flags=re.IGNORECASE)
130
+ query_sql = query_sql.replace("__cte__", cte_name)
131
+ final_sql = re.sub(pattern, query_sql, cte_sql, flags=re.IGNORECASE | re.MULTILINE)
132
+ else:
133
+ # Use the CTE SQL as-is (already has select * from cte_name)
134
+ final_sql = cte_sql
135
+
136
+ logger.debug(f"Final SQL to execute:\n{final_sql[:500]}...")
137
+ return final_sql
138
+
139
+ finally:
140
+ # Clean up temporary file
141
+ # Use a small delay to allow processes to release the file
142
+ import time
143
+
144
+ time.sleep(0.1)
145
+ try:
146
+ if tmp_path.exists():
147
+ tmp_path.unlink()
148
+ except PermissionError:
149
+ # File is still in use (Windows), try to delete it later
150
+ logger.debug(f"Temporary CTE file {tmp_path} still in use, will be cleaned up by OS")
151
+ except Exception as e:
152
+ logger.warning(f"Failed to delete temporary CTE file {tmp_path}: {e}")
153
+
154
+
155
+ async def _implementation(
156
+ ctx: Context | None,
157
+ sql: str,
158
+ output_file: str | None,
159
+ output_format: str,
160
+ cte_name: str | None,
161
+ model_name: str | None,
162
+ state: DbtCoreServerContext,
163
+ ) -> dict[str, Any]:
164
+ """Implementation function for query_database tool.
165
+
166
+ Separated for testing purposes - tests call this directly with explicit state.
167
+ The @tool() decorated query_database() function calls this with injected dependencies.
168
+ """
169
+ # Ensure dbt components are initialized
170
+ await state.ensure_initialized(ctx, force_parse=False)
171
+
172
+ async def progress_callback(current: int, total: int, message: str) -> None:
173
+ if ctx:
174
+ await ctx.report_progress(progress=current, total=total, message=message)
175
+
176
+ # Handle CTE query if cte_name is provided
177
+ if cte_name:
178
+ if not model_name:
179
+ raise ValueError("model_name is required when querying a CTE (cte_name is specified)")
180
+
181
+ if not state.project_dir:
182
+ raise ValueError("Project directory not initialized")
183
+
184
+ # Get configured model paths
185
+ model_paths = state.get_project_paths()["model-paths"]
186
+
187
+ # Extract CTE SQL using the dedicated function
188
+ sql = extract_cte_sql(
189
+ project_dir=state.project_dir,
190
+ cte_name=cte_name,
191
+ model_name=model_name,
192
+ additional_sql=sql,
193
+ model_paths=model_paths,
194
+ )
195
+
196
+ # Execute query using dbt show with --no-populate-cache for optimal performance
197
+ runner = await state.get_runner()
198
+ result = await runner.invoke_query(sql, progress_callback=progress_callback if ctx else None) # type: ignore
199
+
200
+ if not result.success:
201
+ error_msg = str(result.exception) if result.exception else "Unknown error"
202
+ # Include dbt output in error message for context
203
+ full_error = error_msg
204
+
205
+ # Try to extract error from stdout or stderr
206
+ if result.stdout and "Error" in result.stdout:
207
+ full_error = result.stdout
208
+ elif hasattr(result, "stderr") and result.stderr:
209
+ full_error = result.stderr
210
+
211
+ logger.error(f"Query execution failed. Error: {error_msg}, stdout: {result.stdout if hasattr(result, 'stdout') else 'N/A'}")
212
+ raise RuntimeError(f"Query execution failed: {full_error}")
213
+
214
+ # Parse JSON output from dbt show (extract the "show" payload)
215
+ output = result.stdout if hasattr(result, "stdout") else ""
216
+
217
+ try:
218
+ # dbt show --output json returns: {"show": [...rows...]}
219
+ # Find the JSON object (look for {"show": pattern)
220
+ json_match = re.search(r'\{\s*"show"\s*:\s*\[', output)
221
+ if not json_match:
222
+ return {
223
+ "status": "failed",
224
+ "error": "No JSON output found in dbt show response",
225
+ }
226
+
227
+ # Use JSONDecoder to parse just the first complete JSON object
228
+ # This handles extra data after the JSON (like log lines)
229
+ decoder = json.JSONDecoder()
230
+ data, _ = decoder.raw_decode(output, json_match.start())
231
+
232
+ if "show" in data:
233
+ rows = data["show"]
234
+ row_count = len(rows)
235
+
236
+ # Handle different output formats
237
+ # Get elapsed time from result if available
238
+ elapsed_time = result.elapsed_time if hasattr(result, "elapsed_time") and result.elapsed_time is not None else None
239
+
240
+ if output_format in ("csv", "tsv"):
241
+ # Convert to CSV/TSV format
242
+ delimiter = "\t" if output_format == "tsv" else ","
243
+ csv_buffer = io.StringIO()
244
+
245
+ if rows:
246
+ writer = csv.DictWriter(csv_buffer, fieldnames=rows[0].keys(), delimiter=delimiter)
247
+ writer.writeheader()
248
+ writer.writerows(rows)
249
+ csv_string = csv_buffer.getvalue()
250
+ else:
251
+ csv_string = ""
252
+
253
+ if output_file:
254
+ # Save to file
255
+ output_path = Path(output_file)
256
+ output_path.parent.mkdir(parents=True, exist_ok=True)
257
+
258
+ with open(output_path, "w", encoding="utf-8", newline="") as f:
259
+ f.write(csv_string)
260
+
261
+ # Get file size
262
+ file_size_bytes = output_path.stat().st_size
263
+ file_size_kb = file_size_bytes / 1024
264
+
265
+ response: dict[str, Any] = {
266
+ "status": "success",
267
+ "row_count": row_count,
268
+ "format": output_format,
269
+ "saved_to": str(output_path),
270
+ "file_size_kb": round(file_size_kb, 2),
271
+ }
272
+ if elapsed_time is not None:
273
+ response["elapsed_time"] = round(elapsed_time, 2)
274
+ return response
275
+ else:
276
+ # Return CSV/TSV inline
277
+ response: dict[str, Any] = {
278
+ "status": "success",
279
+ "row_count": row_count,
280
+ "format": output_format,
281
+ output_format: csv_string,
282
+ }
283
+ if elapsed_time is not None:
284
+ response["elapsed_time"] = round(elapsed_time, 2)
285
+ return response
286
+ else:
287
+ # JSON format (default)
288
+ # Get elapsed time from result if available
289
+ elapsed_time = result.elapsed_time if hasattr(result, "elapsed_time") and result.elapsed_time is not None else None
290
+
291
+ if output_file:
292
+ # Ensure directory exists
293
+ output_path = Path(output_file)
294
+ output_path.parent.mkdir(parents=True, exist_ok=True)
295
+
296
+ # Write rows to file
297
+ with open(output_path, "w", encoding="utf-8") as f:
298
+ json.dump(rows, f, indent=2)
299
+
300
+ # Get file size
301
+ file_size_bytes = output_path.stat().st_size
302
+ file_size_kb = file_size_bytes / 1024
303
+
304
+ # Return metadata with preview
305
+ response: dict[str, Any] = {
306
+ "status": "success",
307
+ "row_count": row_count,
308
+ "saved_to": str(output_path),
309
+ "file_size_kb": round(file_size_kb, 2),
310
+ "columns": list(rows[0].keys()) if rows else [],
311
+ "preview": rows[:3], # First 3 rows as preview
312
+ }
313
+ if elapsed_time is not None:
314
+ response["elapsed_time"] = round(elapsed_time, 2)
315
+ return response
316
+ else:
317
+ # Return all rows inline
318
+ response: dict[str, Any] = {
319
+ "status": "success",
320
+ "row_count": row_count,
321
+ "rows": rows,
322
+ }
323
+ if elapsed_time is not None:
324
+ response["elapsed_time"] = round(elapsed_time, 2)
325
+ return response
326
+ else:
327
+ return {
328
+ "status": "failed",
329
+ "error": "Unexpected JSON format from dbt show",
330
+ "data": data,
331
+ }
332
+
333
+ except json.JSONDecodeError as e:
334
+ return {
335
+ "status": "error",
336
+ "message": f"Failed to parse query results: {e}",
337
+ "raw_output": output[:500],
338
+ }
339
+
340
+
341
+ @dbtTool()
342
+ async def query_database(
343
+ ctx: Context,
344
+ sql: str,
345
+ output_file: str | None = None,
346
+ output_format: str = "json",
347
+ cte_name: str | None = None,
348
+ model_name: str | None = None,
349
+ state: DbtCoreServerContext = Depends(get_state),
350
+ ) -> dict[str, Any]:
351
+ """Execute a SQL query against the dbt project's database.
352
+
353
+ This tool compiles and runs SQL with Jinja templating support, allowing you to use
354
+ {{ ref('model') }} and {{ source('src', 'table') }} in your queries.
355
+
356
+ **SQL Templating**:
357
+ - Use {{ ref('model_name') }} to reference dbt models
358
+ - Use {{ source('source_name', 'table_name') }} to reference source tables
359
+ - dbt compiles these to actual table names before execution
360
+
361
+ **CTE Querying (LLM quick reference)**
362
+ - Always pass `cte_name` + `model_name` in parameters (not in SQL)
363
+ - Always write a normal `SELECT ... FROM __cte__ ...`
364
+ - `__cte__` or `{{ cte }}` is replaced with the CTE name
365
+ - What happens under the hood
366
+ - Extracts the target CTE plus upstream CTEs from the model
367
+ - Runs your query against that extracted CTE
368
+ - Templating
369
+ - dbt resolves all `{{ ref() }}` / `{{ source() }}` automatically; no manual table names
370
+ - Invalid syntax to avoid
371
+ - `{{ ref('model', cte='name') }}` does **not** exist; always use `cte_name` + `model_name`
372
+
373
+ **Output Management**:
374
+ - For large result sets (>100 rows), use output_file to save results
375
+ - If output_file is omitted, all data returns inline (may consume large context)
376
+ - output_file is automatically created with parent directories
377
+
378
+ **Output Formats**:
379
+ - json (default): Returns data as JSON array of objects
380
+ - csv: Returns comma-separated values with header row
381
+ - tsv: Returns tab-separated values with header row
382
+ - CSV/TSV formats use proper quoting (only when necessary) and are Excel-compatible
383
+
384
+ Args:
385
+ sql: SQL query with Jinja templating: {{ ref('model') }}, {{ source('src', 'table') }}
386
+ For exploratory queries, include LIMIT. For aggregations/counts, omit it.
387
+ When using cte_name/model_name, provide a full `SELECT`/`WITH` query that
388
+ selects from `__cte__` (or `{{ cte }}`), which is replaced with the CTE name.
389
+ output_file: Optional file path to save results. Recommended for large result sets (>100 rows).
390
+ If provided, only metadata is returned (no preview for CSV/TSV).
391
+ If omitted, all data is returned inline (may consume large context).
392
+ output_format: Output format - "json" (default), "csv", or "tsv"
393
+ cte_name: Optional CTE name to query from a model (requires model_name)
394
+ model_name: Optional model name containing the CTE (required when cte_name is specified)
395
+ state: Shared state object injected by FastMCP
396
+
397
+ Returns:
398
+ JSON inline: {"status": "success", "row_count": N, "rows": [...], "elapsed_time": X.XX}
399
+ JSON file: {"status": "success", "row_count": N, "saved_to": "path", "preview": [...], "elapsed_time": X.XX}
400
+ CSV/TSV inline: {"status": "success", "row_count": N, "format": "csv", "csv": "...", "elapsed_time": X.XX}
401
+ CSV/TSV file: {"status": "success", "row_count": N, "format": "csv", "saved_to": "path", "elapsed_time": X.XX}
402
+
403
+ Note: elapsed_time is in seconds and represents the total query execution time including compilation
404
+
405
+ Raises:
406
+ RuntimeError: If query execution fails
407
+ ValueError: If invalid CTE/model parameters provided
408
+
409
+ Examples:
410
+ # Simple query with ref()
411
+ query_database(sql="SELECT * FROM {{ ref('customers') }} LIMIT 10")
412
+
413
+ # Query with source()
414
+ query_database(sql="SELECT * FROM {{ source('jaffle_shop', 'orders') }} LIMIT 5")
415
+
416
+ # Aggregation (no LIMIT needed)
417
+ query_database(sql="SELECT COUNT(*) as total FROM {{ ref('customers') }}")
418
+
419
+ # Query a specific CTE from a model
420
+ query_database(
421
+ cte_name="customer_agg",
422
+ model_name="customers",
423
+ sql="SELECT * FROM __cte__ LIMIT 10"
424
+ )
425
+
426
+ # Query a CTE with filtering
427
+ query_database(
428
+ cte_name="customer_agg",
429
+ model_name="customers",
430
+ sql="SELECT * FROM __cte__ WHERE order_count > 5 LIMIT 20"
431
+ )
432
+
433
+ # Query a CTE with aggregation (full SELECT)
434
+ query_database(
435
+ cte_name="customer_agg",
436
+ model_name="customers",
437
+ sql="SELECT customer_id, COUNT(*) AS cnt FROM __cte__ GROUP BY customer_id"
438
+ )
439
+
440
+ # WRONG - Do NOT use ref() with cte parameter (does not exist):
441
+ # query_database(sql="SELECT * FROM {{ ref('model', cte='cte_name') }}")
442
+ #
443
+ # CORRECT - Use cte_name and model_name parameters instead:
444
+ # query_database(cte_name="cte_name", model_name="model", sql="SELECT * FROM __cte__ LIMIT 10")
445
+
446
+ # Save large results to file
447
+ query_database(
448
+ sql="SELECT * FROM {{ ref('orders') }}",
449
+ output_file="temp_auto/orders_export.json"
450
+ )
451
+
452
+ # Export as CSV
453
+ query_database(
454
+ sql="SELECT * FROM {{ ref('customers') }}",
455
+ output_file="temp_auto/customers.csv",
456
+ output_format="csv"
457
+ )
458
+ """
459
+ return await _implementation(ctx, sql, output_file, output_format, cte_name, model_name, state)
@@ -0,0 +1,234 @@
1
+ """Run dbt models (compile SQL and execute against database).
2
+
3
+ This module implements the run_models tool for dbt Core MCP.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from fastmcp.dependencies import Depends # type: ignore[reportAttributeAccessIssue]
10
+ from fastmcp.server.context import Context
11
+
12
+ from ..context import DbtCoreServerContext
13
+ from ..dependencies import get_state
14
+ from . import dbtTool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ async def _implementation(
20
+ ctx: Context | None,
21
+ select: str | None,
22
+ exclude: str | None,
23
+ select_state_modified: bool,
24
+ select_state_modified_plus_downstream: bool,
25
+ full_refresh: bool,
26
+ fail_fast: bool,
27
+ check_schema_changes: bool,
28
+ cache_selected_only: bool,
29
+ state: DbtCoreServerContext,
30
+ ) -> dict[str, Any]:
31
+ """Implementation function for run_models tool.
32
+
33
+ Separated for testing purposes - tests call this directly with explicit state.
34
+ The @tool() decorated run_models() function calls this with injected dependencies.
35
+ """
36
+ # Ensure dbt components are initialized
37
+ await state.ensure_initialized(ctx, force_parse=False)
38
+
39
+ # Build selector (state-based if requested, otherwise manual)
40
+ selector = await state.prepare_state_based_selection(select_state_modified, select_state_modified_plus_downstream, select)
41
+
42
+ if select_state_modified and not selector:
43
+ raise RuntimeError("No previous state found - cannot determine modifications. Run 'dbt run' or 'dbt build' first to create baseline state.")
44
+
45
+ # Construct dbt CLI args for run
46
+ args = ["run"]
47
+
48
+ if cache_selected_only and (select or selector or select_state_modified):
49
+ args.append("--cache-selected-only")
50
+
51
+ if selector:
52
+ target_path = state.get_project_paths().get("target-path", "target")
53
+ args.extend(["-s", selector, "--state", f"{target_path}/state_last_run"])
54
+ elif select:
55
+ args.extend(["-s", select])
56
+
57
+ if exclude:
58
+ args.extend(["--exclude", exclude])
59
+
60
+ if full_refresh:
61
+ args.append("--full-refresh")
62
+
63
+ if fail_fast:
64
+ args.append("--fail-fast")
65
+
66
+ pre_run_columns: dict[str, list[str]] = {}
67
+ expected_total: int | None = None
68
+
69
+ # Optional pre-run schema snapshot for change detection and accurate progress totals
70
+ if check_schema_changes or True:
71
+ list_args = ["list", "--resource-type", "model", "--output", "name"]
72
+
73
+ if select_state_modified:
74
+ selector = "state:modified+" if select_state_modified_plus_downstream else "state:modified"
75
+ target_path = state.get_project_paths().get("target-path", "target")
76
+ list_args.extend(["-s", selector, "--state", f"{target_path}/state_last_run"])
77
+ elif select:
78
+ list_args.extend(["-s", select])
79
+
80
+ if exclude:
81
+ list_args.extend(["--exclude", exclude])
82
+
83
+ logger.info(f"Getting model list: {list_args}")
84
+ runner = await state.get_runner()
85
+ list_result = await runner.invoke(list_args) # type: ignore
86
+
87
+ if list_result.success and list_result.stdout:
88
+ model_count = 0
89
+ for line in list_result.stdout.strip().split("\n"):
90
+ line = line.strip()
91
+ if not line or line.startswith("{") or ":" in line[:10] or "Running with dbt=" in line or "Registered adapter:" in line:
92
+ continue
93
+ model_count += 1
94
+
95
+ if check_schema_changes:
96
+ model_name = line
97
+ logger.info(f"Querying pre-run columns for {model_name}")
98
+ cols = await state.get_table_columns_from_db(model_name)
99
+ if cols:
100
+ pre_run_columns[model_name] = cols
101
+ else:
102
+ pre_run_columns[model_name] = []
103
+
104
+ if model_count > 0:
105
+ expected_total = model_count
106
+ logger.info(f"Expected total models to run: {expected_total}")
107
+
108
+ logger.info(f"Running dbt models with args: {args}")
109
+ logger.info(f"Expected total for progress: {expected_total}")
110
+
111
+ # Stream progress back to MCP client (if provided)
112
+ async def progress_callback(current: int, total: int, message: str) -> None:
113
+ if ctx:
114
+ await ctx.report_progress(progress=current, total=total, message=message)
115
+
116
+ # Clear stale run_results so we parse only fresh output
117
+ state.clear_stale_run_results()
118
+
119
+ runner = await state.get_runner()
120
+ result = await runner.invoke(args, progress_callback=progress_callback if ctx else None, expected_total=expected_total) # type: ignore
121
+
122
+ run_results = state.validate_and_parse_results(result, "run")
123
+
124
+ schema_changes: dict[str, dict[str, list[str]]] = {}
125
+ if check_schema_changes and pre_run_columns:
126
+ logger.info("Detecting schema changes by comparing pre/post-run database columns")
127
+
128
+ for model_name, old_columns in pre_run_columns.items():
129
+ new_columns = await state.get_table_columns_from_db(model_name)
130
+
131
+ if not new_columns:
132
+ continue
133
+
134
+ added = [c for c in new_columns if c not in old_columns]
135
+ removed = [c for c in old_columns if c not in new_columns] if old_columns else []
136
+
137
+ if added or removed:
138
+ schema_changes[model_name] = {}
139
+ if added:
140
+ schema_changes[model_name]["added"] = added
141
+ if removed:
142
+ schema_changes[model_name]["removed"] = removed
143
+
144
+ if result.success:
145
+ await state.save_execution_state()
146
+
147
+ # Summarize final progress back to caller
148
+ results_list = run_results.get("results", [])
149
+ await state.report_final_progress(ctx, results_list, "Run", "models")
150
+
151
+ if not results_list:
152
+ raise RuntimeError(f"No models matched selector: {select or selector or 'all'}")
153
+
154
+ response: dict[str, Any] = {
155
+ "status": "success",
156
+ "command": " ".join(args),
157
+ "results": run_results.get("results", []),
158
+ "elapsed_time": run_results.get("elapsed_time"),
159
+ }
160
+
161
+ if schema_changes:
162
+ response["schema_changes"] = schema_changes
163
+ response["recommendation"] = "Schema changes detected. Consider running downstream models with modified_downstream=True to propagate changes."
164
+
165
+ return response
166
+
167
+
168
+ @dbtTool()
169
+ async def run_models(
170
+ ctx: Context,
171
+ select: str | None = None,
172
+ exclude: str | None = None,
173
+ select_state_modified: bool = False,
174
+ select_state_modified_plus_downstream: bool = False,
175
+ full_refresh: bool = False,
176
+ fail_fast: bool = False,
177
+ check_schema_changes: bool = False,
178
+ cache_selected_only: bool = True,
179
+ state: DbtCoreServerContext = Depends(get_state),
180
+ ) -> dict[str, Any]:
181
+ """Run dbt models (compile SQL and execute against database).
182
+
183
+ **What are models**: SQL files (.sql) containing SELECT statements that define data transformations.
184
+ Models are compiled and executed to create/update tables and views in your database.
185
+
186
+ **Important**: This tool runs models only (SQL files). For CSV seed files, use load_seeds().
187
+ For running everything together (seeds + models + tests), use build_models().
188
+
189
+ State-based selection modes (uses dbt state:modified selector):
190
+ - select_state_modified: Run only models modified since last successful run (state:modified)
191
+ - select_state_modified_plus_downstream: Run modified + downstream dependencies (state:modified+)
192
+ Note: Requires select_state_modified=True
193
+
194
+ Manual selection (alternative to state-based):
195
+ - select: dbt selector syntax (e.g., "customers", "tag:mart", "stg_*")
196
+ - exclude: Exclude specific models
197
+
198
+ Args:
199
+ select: Manual selector (e.g., "customers", "tag:mart", "path:marts/*")
200
+ exclude: Exclude selector (e.g., "tag:deprecated")
201
+ select_state_modified: Use state:modified selector (changed models only)
202
+ select_state_modified_plus_downstream: Extend to state:modified+ (changed + downstream)
203
+ full_refresh: Force full refresh of incremental models
204
+ fail_fast: Stop execution on first failure
205
+ check_schema_changes: Detect schema changes and recommend downstream runs
206
+ cache_selected_only: Only cache schemas for selected models (default True for performance)
207
+ state: Shared state object injected by FastMCP
208
+
209
+ Returns:
210
+ Execution results with status, models run, timing info, and optional schema_changes
211
+
212
+ See also:
213
+ - seed_data(): Load CSV files (must run before models that reference them)
214
+ - build_models(): Run models + tests together in DAG order
215
+ - test_models(): Run tests after models complete
216
+
217
+ Examples:
218
+ # Run a specific model
219
+ run_models(select="customers")
220
+
221
+ # After loading seeds, run dependent models
222
+ seed_data()
223
+ run_models(select="stg_orders")
224
+
225
+ # Incremental: run only what changed
226
+ run_models(select_state_modified=True)
227
+
228
+ # Run changed models + everything downstream
229
+ run_models(select_state_modified=True, select_state_modified_plus_downstream=True)
230
+
231
+ # Full refresh marts (rebuild from scratch)
232
+ run_models(select="tag:mart", full_refresh=True)
233
+ """
234
+ return await _implementation(ctx, select, exclude, select_state_modified, select_state_modified_plus_downstream, full_refresh, fail_fast, check_schema_changes, cache_selected_only, state)