iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. dbt_core_mcp/__init__.py +18 -0
  2. dbt_core_mcp/__main__.py +436 -0
  3. dbt_core_mcp/context.py +459 -0
  4. dbt_core_mcp/cte_generator.py +601 -0
  5. dbt_core_mcp/dbt/__init__.py +1 -0
  6. dbt_core_mcp/dbt/bridge_runner.py +1361 -0
  7. dbt_core_mcp/dbt/manifest.py +781 -0
  8. dbt_core_mcp/dbt/runner.py +67 -0
  9. dbt_core_mcp/dependencies.py +50 -0
  10. dbt_core_mcp/server.py +381 -0
  11. dbt_core_mcp/tools/__init__.py +77 -0
  12. dbt_core_mcp/tools/analyze_impact.py +78 -0
  13. dbt_core_mcp/tools/build_models.py +190 -0
  14. dbt_core_mcp/tools/demo/__init__.py +1 -0
  15. dbt_core_mcp/tools/demo/hello.html +267 -0
  16. dbt_core_mcp/tools/demo/ui_demo.py +41 -0
  17. dbt_core_mcp/tools/get_column_lineage.py +1988 -0
  18. dbt_core_mcp/tools/get_lineage.py +89 -0
  19. dbt_core_mcp/tools/get_project_info.py +96 -0
  20. dbt_core_mcp/tools/get_resource_info.py +134 -0
  21. dbt_core_mcp/tools/install_deps.py +102 -0
  22. dbt_core_mcp/tools/list_resources.py +84 -0
  23. dbt_core_mcp/tools/load_seeds.py +179 -0
  24. dbt_core_mcp/tools/query_database.py +459 -0
  25. dbt_core_mcp/tools/run_models.py +234 -0
  26. dbt_core_mcp/tools/snapshot_models.py +120 -0
  27. dbt_core_mcp/tools/test_models.py +238 -0
  28. dbt_core_mcp/utils/__init__.py +1 -0
  29. dbt_core_mcp/utils/env_detector.py +186 -0
  30. dbt_core_mcp/utils/process_check.py +130 -0
  31. dbt_core_mcp/utils/tool_utils.py +411 -0
  32. dbt_core_mcp/utils/warehouse_adapter.py +82 -0
  33. dbt_core_mcp/utils/warehouse_databricks.py +297 -0
  34. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
  35. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
  36. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
  37. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
  38. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,459 @@
1
+ """
2
+ dbt Core MCP Server Context.
3
+
4
+ Application-scoped context initialized once at server startup and shared with all tools.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import logging
10
+ import re
11
+ import shutil
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Any, cast
15
+
16
+ import yaml
17
+ from fastmcp import FastMCP
18
+ from fastmcp.server.context import Context
19
+ from typing_extensions import TypedDict
20
+
21
+ if TYPE_CHECKING:
22
+ from .dbt.bridge_runner import BridgeRunner
23
+ from .dbt.manifest import ManifestLoader
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ # TypedDict with hyphenated keys requires functional syntax
29
+ ProjectPaths = TypedDict(
30
+ "ProjectPaths",
31
+ {
32
+ "model-paths": list[str],
33
+ "seed-paths": list[str],
34
+ "snapshot-paths": list[str],
35
+ "analysis-paths": list[str],
36
+ "macro-paths": list[str],
37
+ "test-paths": list[str],
38
+ "target-path": str,
39
+ },
40
+ )
41
+
42
+
43
+ @dataclass
44
+ class DbtCoreServerContext:
45
+ """Application-scoped context accessible to all tools.
46
+
47
+ This context is created once at server startup and injected into every tool
48
+ via their setup() function. It provides access to:
49
+ - Project configuration (directory, profiles)
50
+ - dbt runner (for executing commands)
51
+ - Manifest (for querying metadata)
52
+ - Helper methods (parsing results, managing state, querying DB)
53
+ """
54
+
55
+ app: FastMCP
56
+ project_dir: Path | None
57
+ profiles_dir: str
58
+ timeout: float | None
59
+ runner: "BridgeRunner | None"
60
+ manifest: "ManifestLoader | None"
61
+ adapter_type: str | None
62
+ force_fresh_runner: bool
63
+ experimental_features: bool
64
+ _init_lock: asyncio.Lock
65
+ _explicit_project_dir: Path | None
66
+ server: Any = None # Type is DbtCoreMcpServer but use Any to avoid circular import
67
+
68
+ async def ensure_initialized(self, ctx: Any, force_parse: bool = False) -> None:
69
+ """Ensure server is initialized (delegates to server instance)."""
70
+ if self.server:
71
+ await self.server.ensure_initialized_with_context(ctx, force_parse=force_parse)
72
+
73
+ async def get_runner(self) -> "BridgeRunner":
74
+ """Get BridgeRunner instance (delegates to server instance)."""
75
+ if self.server:
76
+ return await self.server.get_runner()
77
+ raise RuntimeError("Server not initialized")
78
+
79
+ def parse_run_results(self) -> dict[str, Any]:
80
+ """Parse target/run_results.json after dbt run/test/build.
81
+
82
+ Returns:
83
+ Dictionary with results array and metadata
84
+ """
85
+ if not self.project_dir:
86
+ return {"results": [], "elapsed_time": 0}
87
+
88
+ run_results_path = self.project_dir / "target" / "run_results.json"
89
+ if not run_results_path.exists():
90
+ return {"results": [], "elapsed_time": 0}
91
+
92
+ try:
93
+ with open(run_results_path, encoding="utf-8") as f:
94
+ data = json.load(f)
95
+
96
+ # Simplify results for output
97
+ simplified_results = []
98
+ for result in data.get("results", []):
99
+ simplified_result = {
100
+ "unique_id": result.get("unique_id"),
101
+ "status": result.get("status"),
102
+ "message": result.get("message"),
103
+ "execution_time": result.get("execution_time"),
104
+ "failures": result.get("failures"),
105
+ }
106
+
107
+ # Include additional diagnostic fields for failed tests
108
+ if result.get("status") in ("fail", "error"):
109
+ simplified_result["compiled_code"] = result.get("compiled_code")
110
+ simplified_result["adapter_response"] = result.get("adapter_response")
111
+
112
+ simplified_results.append(simplified_result)
113
+
114
+ return {
115
+ "results": simplified_results,
116
+ "elapsed_time": data.get("elapsed_time", 0),
117
+ }
118
+ except Exception as e:
119
+ logger.warning(f"Failed to parse run_results.json: {e}")
120
+ return {"results": [], "elapsed_time": 0}
121
+
122
+ def validate_and_parse_results(self, result: Any, command_name: str) -> dict[str, Any]:
123
+ """Parse run_results.json and validate execution succeeded.
124
+
125
+ Args:
126
+ result: The execution result from dbt runner
127
+ command_name: Name of dbt command (e.g., "run", "test", "build", "seed")
128
+
129
+ Returns:
130
+ Parsed run_results dictionary
131
+
132
+ Raises:
133
+ RuntimeError: If dbt failed before execution (parse error, connection failure, etc.)
134
+ """
135
+ run_results = self.parse_run_results()
136
+
137
+ if not run_results.get("results"):
138
+ # No results means dbt failed before execution
139
+ if result and not result.success:
140
+ error_msg = str(result.exception) if result.exception else f"dbt {command_name} execution failed"
141
+ # Extract specific error from stdout if available
142
+ if result.stdout and "Error" in result.stdout:
143
+ lines = result.stdout.split("\n")
144
+ for i, line in enumerate(lines):
145
+ if "Error" in line or "error" in line:
146
+ error_msg = "\n".join(lines[i : min(i + 5, len(lines))]).strip()
147
+ break
148
+ else:
149
+ # Include full stdout/stderr for debugging when no specific error found
150
+ stdout_preview = (result.stdout[:500] + "...") if result.stdout and len(result.stdout) > 500 else (result.stdout or "(no stdout)")
151
+ stderr_preview = (result.stderr[:500] + "...") if result.stderr and len(result.stderr) > 500 else (result.stderr or "(no stderr)")
152
+ error_msg = f"{error_msg}\nstdout: {stdout_preview}\nstderr: {stderr_preview}"
153
+ raise RuntimeError(f"dbt {command_name} failed to execute: {error_msg}")
154
+
155
+ return run_results
156
+
157
+ async def report_final_progress(
158
+ self,
159
+ ctx: Context | None,
160
+ results_list: list[dict[str, Any]],
161
+ command_name: str,
162
+ resource_type: str,
163
+ ) -> None:
164
+ """Report final progress with status breakdown.
165
+
166
+ Args:
167
+ ctx: MCP context for progress reporting
168
+ results_list: List of result dictionaries from dbt execution
169
+ command_name: Command prefix for message (e.g., "Run", "Test", "Build")
170
+ resource_type: Resource type for message (e.g., "models", "tests", "resources")
171
+ """
172
+ if not ctx:
173
+ return
174
+
175
+ if not results_list:
176
+ await ctx.report_progress(progress=0, total=0, message=f"0 {resource_type} matched selector")
177
+ return
178
+
179
+ # Count statuses - different commands use different status values
180
+ total = len(results_list)
181
+ passed_count = sum(1 for r in results_list if r.get("status") in ("success", "pass"))
182
+ failed_count = sum(1 for r in results_list if r.get("status") in ("error", "fail"))
183
+ skip_count = sum(1 for r in results_list if r.get("status") in ("skipped", "skip"))
184
+ warn_count = sum(1 for r in results_list if r.get("status") == "warn")
185
+
186
+ # Build status parts
187
+ parts = []
188
+ if passed_count > 0:
189
+ # Use "All passed" only if no other statuses present
190
+ has_other_statuses = failed_count > 0 or warn_count > 0 or skip_count > 0
191
+ parts.append(f"✅ {passed_count} passed" if has_other_statuses else "✅ All passed")
192
+ if failed_count > 0:
193
+ parts.append(f"❌ {failed_count} failed")
194
+ if warn_count > 0:
195
+ parts.append(f"⚠️ {warn_count} warned")
196
+ if skip_count > 0:
197
+ parts.append(f"⏭️ {skip_count} skipped")
198
+
199
+ summary = f"{command_name}: {total}/{total} {resource_type} completed ({', '.join(parts)})"
200
+ await ctx.report_progress(progress=total, total=total, message=summary)
201
+
202
+ async def get_table_schema_from_db(self, model_name: str, source_name: str | None = None) -> list[dict[str, Any]]:
203
+ """Get full table schema from database using DESCRIBE.
204
+
205
+ Args:
206
+ model_name: Name of the model/table
207
+ source_name: If provided, treat as source and use source() instead of ref()
208
+
209
+ Returns:
210
+ List of column dictionaries with details (column_name, column_type, null, etc.)
211
+ Empty list if query fails or table doesn't exist
212
+ """
213
+ try:
214
+ if source_name:
215
+ sql = f"DESCRIBE {{{{ source('{source_name}', '{model_name}') }}}}"
216
+ else:
217
+ sql = f"DESCRIBE {{{{ ref('{model_name}') }}}}"
218
+
219
+ runner = await self.get_runner()
220
+ result = await runner.invoke_query(sql) # type: ignore
221
+
222
+ if not result.success or not result.stdout:
223
+ return []
224
+
225
+ # Parse JSON output using robust regex + JSONDecoder
226
+ json_match = re.search(r'\{\s*"show"\s*:\s*\[', result.stdout)
227
+ if not json_match:
228
+ return []
229
+
230
+ decoder = json.JSONDecoder()
231
+ data, _ = decoder.raw_decode(result.stdout, json_match.start())
232
+
233
+ if "show" in data:
234
+ return data["show"] # type: ignore[no-any-return]
235
+
236
+ return []
237
+ except Exception as e:
238
+ logger.warning(f"Failed to query table schema for {model_name}: {e}")
239
+ return []
240
+
241
+ async def get_table_columns_from_db(self, model_name: str) -> list[str]:
242
+ """Get actual column names from database table.
243
+
244
+ Args:
245
+ model_name: Name of the model
246
+
247
+ Returns:
248
+ List of column names from the actual table
249
+ """
250
+ schema = await self.get_table_schema_from_db(model_name)
251
+ if not schema:
252
+ return []
253
+
254
+ # Extract column names from schema
255
+ columns: list[str] = []
256
+ for row in schema:
257
+ # Try common column name fields
258
+ col_name = row.get("column_name") or row.get("Field") or row.get("name") or row.get("COLUMN_NAME")
259
+ if col_name and isinstance(col_name, str):
260
+ columns.append(col_name)
261
+
262
+ logger.info(f"Extracted {len(columns)} columns for {model_name}: {columns}")
263
+ return sorted(columns)
264
+
265
+ def clear_stale_run_results(self) -> None:
266
+ """Delete stale run_results.json before command execution.
267
+
268
+ This prevents reading cached results from previous runs.
269
+ """
270
+ if not self.project_dir:
271
+ return
272
+
273
+ run_results_path = self.project_dir / "target" / "run_results.json"
274
+ if run_results_path.exists():
275
+ try:
276
+ run_results_path.unlink()
277
+ logger.debug("Deleted stale run_results.json before execution")
278
+ except OSError as e:
279
+ logger.warning(f"Could not delete stale run_results.json: {e}")
280
+
281
+ async def save_execution_state(self) -> None:
282
+ """Save current manifest as state for future state-based runs.
283
+
284
+ After successful execution, saves manifest.json to target/state_last_run/
285
+ so future runs can use --state to detect modifications.
286
+ """
287
+ if not self.project_dir or not self.runner:
288
+ return
289
+
290
+ target_path = self.get_project_paths()["target-path"]
291
+ state_dir = self.project_dir / target_path / "state_last_run"
292
+ state_dir.mkdir(parents=True, exist_ok=True)
293
+
294
+ manifest_path = self.runner.get_manifest_path() # type: ignore
295
+
296
+ try:
297
+ shutil.copy(manifest_path, state_dir / "manifest.json")
298
+ logger.debug(f"Saved execution state to {state_dir}")
299
+ except OSError as e:
300
+ logger.warning(f"Failed to save execution state: {e}")
301
+
302
+ def get_project_paths(self) -> ProjectPaths:
303
+ """Read configured paths from dbt_project.yml.
304
+
305
+ Returns:
306
+ Dictionary with path types as keys and path values (lists for most, string for target-path)
307
+ """
308
+ if not self.project_dir:
309
+ return cast(ProjectPaths, {})
310
+
311
+ project_file = self.project_dir / "dbt_project.yml"
312
+ if not project_file.exists():
313
+ return cast(ProjectPaths, {})
314
+
315
+ try:
316
+ with open(project_file, encoding="utf-8") as f:
317
+ config = yaml.safe_load(f)
318
+
319
+ return {
320
+ "model-paths": config.get("model-paths", ["models"]),
321
+ "seed-paths": config.get("seed-paths", ["seeds"]),
322
+ "snapshot-paths": config.get("snapshot-paths", ["snapshots"]),
323
+ "analysis-paths": config.get("analysis-paths", ["analyses"]),
324
+ "macro-paths": config.get("macro-paths", ["macros"]),
325
+ "test-paths": config.get("test-paths", ["tests"]),
326
+ "target-path": config.get("target-path", "target"),
327
+ }
328
+ except Exception as e:
329
+ logger.warning(f"Failed to parse dbt_project.yml: {e}")
330
+ return cast(ProjectPaths, {})
331
+
332
+ def compare_model_schemas(
333
+ self,
334
+ model_unique_ids: list[str],
335
+ state_manifest_path: Path,
336
+ ) -> dict[str, Any]:
337
+ """Compare schemas of models before and after run.
338
+
339
+ Args:
340
+ model_unique_ids: List of model unique IDs that were run
341
+ state_manifest_path: Path to the saved state manifest.json
342
+
343
+ Returns:
344
+ Dictionary with schema changes per model
345
+ """
346
+ if not state_manifest_path.exists() or not self.manifest:
347
+ return {}
348
+
349
+ try:
350
+ # Load state (before) manifest
351
+ with open(state_manifest_path, encoding="utf-8") as f:
352
+ state_manifest = json.load(f)
353
+
354
+ current_manifest_data = self.manifest.get_manifest_dict()
355
+ schema_changes: dict[str, dict[str, Any]] = {}
356
+
357
+ for unique_id in model_unique_ids:
358
+ # Skip non-model nodes (like tests)
359
+ if not unique_id.startswith("model."):
360
+ continue
361
+
362
+ # Get before and after column definitions
363
+ before_node = state_manifest.get("nodes", {}).get(unique_id, {})
364
+ after_node = current_manifest_data.get("nodes", {}).get(unique_id, {})
365
+
366
+ before_columns = before_node.get("columns", {})
367
+ after_columns = after_node.get("columns", {})
368
+
369
+ # Skip if no column definitions exist (not in schema.yml)
370
+ if not before_columns and not after_columns:
371
+ continue
372
+
373
+ # Compare columns
374
+ before_names = set(before_columns.keys())
375
+ after_names = set(after_columns.keys())
376
+
377
+ added = sorted(after_names - before_names)
378
+ removed = sorted(before_names - after_names)
379
+
380
+ # Check for type changes in common columns
381
+ changed_types = {}
382
+ for col in before_names & after_names:
383
+ before_type = before_columns[col].get("data_type")
384
+ after_type = after_columns[col].get("data_type")
385
+ if before_type != after_type and before_type is not None and after_type is not None:
386
+ changed_types[col] = {"from": before_type, "to": after_type}
387
+
388
+ # Only record if there are actual changes
389
+ if added or removed or changed_types:
390
+ model_name = after_node.get("name", unique_id.split(".")[-1])
391
+ schema_changes[model_name] = {
392
+ "changed": True,
393
+ "added_columns": added,
394
+ "removed_columns": removed,
395
+ "changed_types": changed_types,
396
+ }
397
+
398
+ return schema_changes
399
+
400
+ except Exception as e:
401
+ logger.warning(f"Failed to compare schemas: {e}")
402
+ return {}
403
+
404
+ def manifest_exists(self) -> bool:
405
+ """Check if manifest.json exists.
406
+
407
+ Simple check - tools will handle their own parsing as needed.
408
+ """
409
+ if self.project_dir is None:
410
+ return False
411
+ target_path = self.get_project_paths()["target-path"]
412
+ manifest_path = self.project_dir / target_path / "manifest.json"
413
+ return manifest_path.exists()
414
+
415
+ async def prepare_state_based_selection(
416
+ self,
417
+ select_state_modified: bool,
418
+ select_state_modified_plus_downstream: bool,
419
+ select: str | None,
420
+ ) -> str | None:
421
+ """Validate and prepare state-based selection.
422
+
423
+ Args:
424
+ select_state_modified: Use state:modified selector
425
+ select_state_modified_plus_downstream: Extend to state:modified+
426
+ select: Manual selector (conflicts with state-based)
427
+
428
+ Returns:
429
+ The dbt selector string to use ("state:modified" or "state:modified+"), or None if:
430
+ - Not using state-based selection
431
+ - No previous state exists (cannot determine modifications)
432
+
433
+ Raises:
434
+ ValueError: If validation fails
435
+ """
436
+ # Validate: hierarchical requirement
437
+ if select_state_modified_plus_downstream and not select_state_modified:
438
+ raise ValueError("select_state_modified_plus_downstream requires select_state_modified=True")
439
+
440
+ # Validate: can't use both state-based and manual selection
441
+ if select_state_modified and select:
442
+ raise ValueError("Cannot use both select_state_modified* flags and select parameter")
443
+
444
+ # If not using state-based selection, return None
445
+ if not select_state_modified:
446
+ return None
447
+
448
+ # Check if state exists
449
+ if not self.project_dir:
450
+ return None
451
+
452
+ target_path = self.get_project_paths()["target-path"]
453
+ state_dir = self.project_dir / target_path / "state_last_run"
454
+ if not state_dir.exists():
455
+ # No state - cannot determine modifications
456
+ return None
457
+
458
+ # Return selector (state exists)
459
+ return "state:modified+" if select_state_modified_plus_downstream else "state:modified"