iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. dbt_core_mcp/__init__.py +18 -0
  2. dbt_core_mcp/__main__.py +436 -0
  3. dbt_core_mcp/context.py +459 -0
  4. dbt_core_mcp/cte_generator.py +601 -0
  5. dbt_core_mcp/dbt/__init__.py +1 -0
  6. dbt_core_mcp/dbt/bridge_runner.py +1361 -0
  7. dbt_core_mcp/dbt/manifest.py +781 -0
  8. dbt_core_mcp/dbt/runner.py +67 -0
  9. dbt_core_mcp/dependencies.py +50 -0
  10. dbt_core_mcp/server.py +381 -0
  11. dbt_core_mcp/tools/__init__.py +77 -0
  12. dbt_core_mcp/tools/analyze_impact.py +78 -0
  13. dbt_core_mcp/tools/build_models.py +190 -0
  14. dbt_core_mcp/tools/demo/__init__.py +1 -0
  15. dbt_core_mcp/tools/demo/hello.html +267 -0
  16. dbt_core_mcp/tools/demo/ui_demo.py +41 -0
  17. dbt_core_mcp/tools/get_column_lineage.py +1988 -0
  18. dbt_core_mcp/tools/get_lineage.py +89 -0
  19. dbt_core_mcp/tools/get_project_info.py +96 -0
  20. dbt_core_mcp/tools/get_resource_info.py +134 -0
  21. dbt_core_mcp/tools/install_deps.py +102 -0
  22. dbt_core_mcp/tools/list_resources.py +84 -0
  23. dbt_core_mcp/tools/load_seeds.py +179 -0
  24. dbt_core_mcp/tools/query_database.py +459 -0
  25. dbt_core_mcp/tools/run_models.py +234 -0
  26. dbt_core_mcp/tools/snapshot_models.py +120 -0
  27. dbt_core_mcp/tools/test_models.py +238 -0
  28. dbt_core_mcp/utils/__init__.py +1 -0
  29. dbt_core_mcp/utils/env_detector.py +186 -0
  30. dbt_core_mcp/utils/process_check.py +130 -0
  31. dbt_core_mcp/utils/tool_utils.py +411 -0
  32. dbt_core_mcp/utils/warehouse_adapter.py +82 -0
  33. dbt_core_mcp/utils/warehouse_databricks.py +297 -0
  34. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
  35. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
  36. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
  37. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
  38. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1361 @@
1
+ """
2
+ Bridge Runner for dbt.
3
+
4
+ Executes dbt commands in the user's Python environment via subprocess,
5
+ using an inline Python script to invoke dbtRunner.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import platform
12
+ import re
13
+ import time
14
+ from pathlib import Path
15
+ from typing import Any, Callable
16
+
17
+ import psutil
18
+
19
+ from ..utils.env_detector import detect_dbt_adapter, get_env_vars
20
+ from ..utils.process_check import is_dbt_running, wait_for_dbt_completion
21
+ from ..utils.warehouse_adapter import WarehouseAdapter, create_warehouse_adapter
22
+ from .runner import DbtRunnerResult
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def _get_action_verb(command: str) -> str:
28
+ """Map dbt command to user-friendly action verb for progress messages."""
29
+ command_map = {
30
+ "show": "Querying database",
31
+ "compile": "Compiling",
32
+ "parse": "Parsing",
33
+ "list": "Listing resources",
34
+ "ls": "Listing resources",
35
+ "debug": "Running diagnostics",
36
+ "deps": "Installing dependencies",
37
+ "build": "Building",
38
+ "test": "Testing",
39
+ "run": "Running",
40
+ "seed": "Seeding",
41
+ "snapshot": "Snapshotting",
42
+ }
43
+ # For commands not in the map, capitalize and use as-is
44
+ return command_map.get(command, command.capitalize() if command else "Executing")
45
+
46
+
47
+ class BridgeRunner:
48
+ """
49
+ Execute dbt commands in user's environment via subprocess bridge.
50
+
51
+ This runner executes DBT using the dbtRunner API within the user's
52
+ Python environment, avoiding version conflicts while still benefiting
53
+ from dbtRunner's structured results.
54
+ """
55
+
56
+ def __init__(self, project_dir: Path, python_command: list[str], timeout: float | None = None, use_persistent_process: bool = True):
57
+ """
58
+ Initialize the bridge runner.
59
+
60
+ Args:
61
+ project_dir: Path to the dbt project directory
62
+ python_command: Command to run Python in the user's environment
63
+ (e.g., ['uv', 'run', 'python'] or ['/path/to/venv/bin/python'])
64
+ timeout: Timeout in seconds for dbt commands (default: None for no timeout)
65
+ use_persistent_process: If True, reuse a persistent dbt process for better performance
66
+ """
67
+ self.project_dir = project_dir.resolve() # Ensure absolute path
68
+ self.python_command = python_command
69
+ self.timeout = timeout
70
+ self.use_persistent_process = use_persistent_process
71
+ self._project_config: dict[str, Any] | None = None # Lazy-loaded project configuration
72
+ self._project_config_mtime: float | None = None # Track last modification time
73
+
74
+ # Get target-path from config (lazy-load happens in _get_project_config)
75
+ config = self._get_project_config()
76
+ target_path = config.get("target-path", "target")
77
+ self._target_dir = self.project_dir / target_path
78
+
79
+ # Detect profiles directory (project dir or ~/.dbt)
80
+ self.profiles_dir = self.project_dir if (self.project_dir / "profiles.yml").exists() else Path.home() / ".dbt"
81
+ logger.info(f"Using profiles directory: {self.profiles_dir}")
82
+
83
+ # Initialize warehouse adapter for pre-warming
84
+ self._warehouse_adapter: WarehouseAdapter | None = None
85
+ self._init_warehouse_adapter()
86
+
87
+ # Persistent dbt process for performance
88
+ self._dbt_process: asyncio.subprocess.Process | None = None
89
+ self._process_lock = asyncio.Lock() # Ensure sequential access
90
+ self._request_counter = 0
91
+
92
+ def _get_project_config(self) -> dict[str, Any]:
93
+ """
94
+ Lazy-load and cache dbt_project.yml configuration.
95
+ Reloads if file has been modified since last read.
96
+
97
+ Returns:
98
+ Dictionary with project configuration
99
+ """
100
+ import yaml
101
+
102
+ project_file = self.project_dir / "dbt_project.yml"
103
+
104
+ # Check if file exists and get modification time
105
+ if project_file.exists():
106
+ current_mtime = project_file.stat().st_mtime
107
+
108
+ # Reload if never loaded or file has changed
109
+ if self._project_config is None or self._project_config_mtime != current_mtime:
110
+ try:
111
+ with open(project_file) as f:
112
+ loaded_config = yaml.safe_load(f)
113
+ self._project_config = loaded_config if isinstance(loaded_config, dict) else {}
114
+ self._project_config_mtime = current_mtime
115
+ except Exception as e:
116
+ logger.warning(f"Failed to parse dbt_project.yml: {e}")
117
+ self._project_config = {}
118
+ self._project_config_mtime = None
119
+ else:
120
+ self._project_config = {}
121
+ self._project_config_mtime = None
122
+
123
+ return self._project_config if self._project_config is not None else {}
124
+
125
+ def _init_warehouse_adapter(self) -> None:
126
+ """
127
+ Initialize the warehouse adapter based on dbt profile configuration.
128
+
129
+ Detects the database type from profiles.yml and creates the appropriate
130
+ adapter (Databricks, Snowflake, or no-op default).
131
+ """
132
+ try:
133
+ adapter_type = detect_dbt_adapter(self.project_dir)
134
+ self._warehouse_adapter = create_warehouse_adapter(self.project_dir, adapter_type)
135
+ logger.info(f"Initialized warehouse adapter for {adapter_type}")
136
+ except Exception as e:
137
+ logger.warning(f"Failed to initialize warehouse adapter: {e}, using no-op adapter")
138
+ from ..utils.warehouse_adapter import NoOpWarehouseAdapter
139
+
140
+ self._warehouse_adapter = NoOpWarehouseAdapter()
141
+
142
+ async def _start_persistent_process(self) -> None:
143
+ """Start the persistent dbt process if not already running."""
144
+ if self._dbt_process is not None and self._dbt_process.returncode is None:
145
+ # Process already running
146
+ return
147
+
148
+ logger.info("Starting persistent dbt process...")
149
+
150
+ # Build unified script in loop mode
151
+ loop_script = self._build_unified_script([], loop_mode=True)
152
+
153
+ # Build command to run loop script
154
+ cmd = [*self.python_command, "-c", loop_script]
155
+
156
+ # Get environment variables
157
+ env_vars = get_env_vars(self.python_command)
158
+ env = None
159
+ if env_vars:
160
+ import os
161
+ import tempfile
162
+
163
+ env = os.environ.copy()
164
+ # Force UTF-8 encoding for subprocess to handle Unicode characters in dbt output
165
+ env["PYTHONIOENCODING"] = "utf-8"
166
+ # Use unique temp directory per project for dbt logs to avoid Windows file locking
167
+ # Hash the project path to create a unique but consistent subdirectory
168
+ import hashlib
169
+
170
+ project_hash = hashlib.md5(str(self.project_dir).encode()).hexdigest()[:8]
171
+ dbt_log_dir = Path(tempfile.gettempdir()) / f"dbt_mcp_logs_{project_hash}"
172
+ dbt_log_dir.mkdir(parents=True, exist_ok=True)
173
+ env["DBT_LOG_PATH"] = str(dbt_log_dir)
174
+ # Disable log file rotation to prevent Windows file locking issues
175
+ env["DBT_MAX_LOG_FILE_SIZE"] = "0" # Disable rotation by size
176
+ env.update(env_vars)
177
+ else:
178
+ import os
179
+ import tempfile
180
+
181
+ env = os.environ.copy()
182
+ # Force UTF-8 encoding for subprocess to handle Unicode characters in dbt output
183
+ env["PYTHONIOENCODING"] = "utf-8"
184
+ # Use unique temp directory per project for dbt logs to avoid Windows file locking
185
+ # Hash the project path to create a unique but consistent subdirectory
186
+ import hashlib
187
+
188
+ project_hash = hashlib.md5(str(self.project_dir).encode()).hexdigest()[:8]
189
+ dbt_log_dir = Path(tempfile.gettempdir()) / f"dbt_mcp_logs_{project_hash}"
190
+ dbt_log_dir.mkdir(parents=True, exist_ok=True)
191
+ env["DBT_LOG_PATH"] = str(dbt_log_dir)
192
+ # Disable log file rotation to prevent Windows file locking issues
193
+ env["DBT_MAX_LOG_FILE_SIZE"] = "0" # Disable rotation by size
194
+
195
+ # Start process
196
+ self._dbt_process = await asyncio.create_subprocess_exec(
197
+ *cmd,
198
+ stdin=asyncio.subprocess.PIPE,
199
+ stdout=asyncio.subprocess.PIPE,
200
+ stderr=asyncio.subprocess.PIPE,
201
+ cwd=self.project_dir,
202
+ env=env,
203
+ )
204
+ assert self._dbt_process is not None
205
+ assert self._dbt_process.stdout is not None
206
+ assert self._dbt_process.stderr is not None
207
+ assert self._dbt_process.stdin is not None
208
+
209
+ # Wait for ready signal
210
+ try:
211
+ ready_line = await asyncio.wait_for(self._dbt_process.stdout.readline(), timeout=30.0)
212
+ ready_str = ready_line.decode().strip()
213
+
214
+ if not ready_str:
215
+ # No output - check stderr for errors
216
+ stderr_data = await asyncio.wait_for(self._dbt_process.stderr.read(), timeout=1.0)
217
+ stderr_str = stderr_data.decode() if stderr_data else "(no stderr)"
218
+ raise RuntimeError(f"Persistent process started but sent no ready message. stderr: {stderr_str[:500]}")
219
+
220
+ try:
221
+ ready_msg = json.loads(ready_str)
222
+ except json.JSONDecodeError:
223
+ # Invalid JSON - check stderr
224
+ stderr_data = await asyncio.wait_for(self._dbt_process.stderr.read(), timeout=1.0)
225
+ stderr_str = stderr_data.decode() if stderr_data else "(no stderr)"
226
+ raise RuntimeError(f"Invalid ready message: {ready_str[:200]}. stderr: {stderr_str[:500]}")
227
+
228
+ if ready_msg.get("type") == "ready":
229
+ logger.info(f"Persistent dbt process started (PID {self._dbt_process.pid})")
230
+ else:
231
+ raise RuntimeError(f"Unexpected ready message: {ready_msg}")
232
+ except asyncio.TimeoutError:
233
+ logger.error("Timeout waiting for dbt process to become ready")
234
+ await self._stop_persistent_process()
235
+ raise RuntimeError("Failed to start persistent dbt process")
236
+ except Exception as e:
237
+ logger.error(f"Error starting persistent dbt process: {e}")
238
+ await self._stop_persistent_process()
239
+ raise
240
+
241
+ async def _stop_persistent_process(self) -> None:
242
+ """Stop the persistent dbt process gracefully."""
243
+ if self._dbt_process is None:
244
+ return
245
+ assert self._dbt_process is not None
246
+ assert self._dbt_process.stdin is not None
247
+
248
+ try:
249
+ if self._dbt_process.returncode is None:
250
+ # Send shutdown command
251
+ logger.info("Shutting down persistent dbt process...")
252
+ shutdown_msg = json.dumps({"shutdown": True}) + "\n"
253
+ self._dbt_process.stdin.write(shutdown_msg.encode())
254
+ await self._dbt_process.stdin.drain()
255
+
256
+ # Wait for graceful shutdown
257
+ try:
258
+ await asyncio.wait_for(self._dbt_process.wait(), timeout=5.0)
259
+ logger.info("Persistent dbt process shut down gracefully")
260
+ except asyncio.TimeoutError:
261
+ logger.warning("Timeout waiting for process shutdown, killing...")
262
+ self._dbt_process.kill()
263
+ await self._dbt_process.wait()
264
+ except Exception as e:
265
+ logger.warning(f"Error during shutdown: {e}, killing process...")
266
+ if self._dbt_process.returncode is None:
267
+ self._dbt_process.kill()
268
+ await self._dbt_process.wait()
269
+ finally:
270
+ # Close pipes to prevent resource warnings
271
+ if self._dbt_process:
272
+ if self._dbt_process.stdin:
273
+ self._dbt_process.stdin.close()
274
+ # stdout and stderr are StreamReaders - no close() method needed
275
+ self._dbt_process = None
276
+
277
+ async def _invoke_persistent(self, args: list[str], progress_callback: Callable[[int, int, str], Any] | None = None, expected_total: int | None = None) -> DbtRunnerResult:
278
+ """Execute a command using the persistent dbt process."""
279
+ # Ensure process is started
280
+ await self._start_persistent_process()
281
+ assert self._dbt_process is not None
282
+ assert self._dbt_process.stdin is not None
283
+ assert self._dbt_process.stdout is not None
284
+
285
+ # Build request
286
+ self._request_counter += 1
287
+ request = {
288
+ "command": args,
289
+ }
290
+
291
+ # Send request
292
+ request_line = json.dumps(request) + "\n"
293
+ self._dbt_process.stdin.write(request_line.encode())
294
+ await self._dbt_process.stdin.drain()
295
+
296
+ # Read output with progress parsing (same as one-off subprocess!)
297
+ try:
298
+ if progress_callback:
299
+ logger.info("Progress callback provided, enabling streaming output")
300
+ command_name = args[0] if args else None
301
+ stdout, stderr = await self._stream_with_progress(self._dbt_process, progress_callback, expected_total, command_name)
302
+ else:
303
+ logger.info("No progress callback, using buffered output")
304
+ # Read until we get the completion JSON
305
+ stdout_lines = []
306
+
307
+ while True:
308
+ if self.timeout:
309
+ line_bytes = await asyncio.wait_for(
310
+ self._dbt_process.stdout.readline(),
311
+ timeout=self.timeout,
312
+ )
313
+ else:
314
+ line_bytes = await self._dbt_process.stdout.readline()
315
+
316
+ if not line_bytes:
317
+ break
318
+ line = line_bytes.decode("utf-8", errors="replace").rstrip()
319
+ # Check if this is the completion marker
320
+ if line.startswith('{"success":'):
321
+ stdout_lines.append(line) # Include completion marker
322
+ break
323
+ stdout_lines.append(line)
324
+
325
+ stdout = "\n".join(stdout_lines)
326
+ stderr = ""
327
+
328
+ # Parse success from last line (completion marker)
329
+ last_line = stdout.strip().split("\n")[-1] if stdout else ""
330
+ try:
331
+ completion = json.loads(last_line)
332
+ success = completion.get("success", False)
333
+ except json.JSONDecodeError:
334
+ # If no valid completion marker, assume failure
335
+ logger.warning("No valid completion marker found in output")
336
+ success = False
337
+
338
+ return DbtRunnerResult(success=success, stdout=stdout, stderr=stderr)
339
+
340
+ except asyncio.CancelledError:
341
+ # User aborted - force kill the persistent process immediately
342
+ logger.info("Cancellation detected, force killing persistent process")
343
+ if self._dbt_process and self._dbt_process.returncode is None:
344
+ pid = self._dbt_process.pid
345
+ self._dbt_process.kill()
346
+ logger.info(f"Kill signal sent to PID {pid}, waiting for process to terminate...")
347
+
348
+ # Poll process status and log updates while waiting
349
+ # Use shield to prevent cancellation from interrupting cleanup
350
+ start_time = asyncio.get_event_loop().time()
351
+ poll_interval = 1.0 # Check every second
352
+ timeout = 30.0 # Give up after 30 seconds
353
+
354
+ logger.info(f"Entering wait loop for PID {pid}")
355
+
356
+ async def wait_for_termination() -> None:
357
+ while True:
358
+ try:
359
+ logger.info(f"Attempting to wait for process {pid} (timeout={poll_interval}s)...")
360
+ # Check if process has terminated
361
+ if self._dbt_process is not None:
362
+ await asyncio.wait_for(self._dbt_process.wait(), timeout=poll_interval)
363
+ logger.info(f"wait_for completed successfully for PID {pid}")
364
+ logger.info(f"Persistent process terminated (PID {pid}, exit code: {self._dbt_process.returncode})")
365
+ break
366
+ except asyncio.TimeoutError:
367
+ # Still waiting - log status update
368
+ elapsed = asyncio.get_event_loop().time() - start_time
369
+ if elapsed > timeout:
370
+ logger.warning(f"Process {pid} did not terminate after {timeout}s, giving up wait")
371
+ break
372
+ logger.info(f"Still waiting for PID {pid} to terminate... ({elapsed:.1f}s elapsed)")
373
+
374
+ await asyncio.shield(wait_for_termination())
375
+ self._dbt_process = None
376
+ raise
377
+ except asyncio.TimeoutError:
378
+ logger.error("Timeout waiting for response from persistent process")
379
+ # Kill and restart process on timeout
380
+ await self._stop_persistent_process()
381
+ return DbtRunnerResult(
382
+ success=False,
383
+ exception=RuntimeError(f"Command timed out after {self.timeout} seconds"),
384
+ )
385
+ except Exception as e:
386
+ logger.error(f"Error communicating with persistent process: {e}")
387
+ # Kill and restart process on error
388
+ await self._stop_persistent_process()
389
+ return DbtRunnerResult(success=False, exception=e)
390
+
391
+ async def invoke(self, args: list[str], progress_callback: Callable[[int, int, str], Any] | None = None, expected_total: int | None = None) -> DbtRunnerResult:
392
+ """
393
+ Execute a dbt command via subprocess bridge.
394
+
395
+ Args:
396
+ args: dbt command arguments (e.g., ['parse'], ['run', '--select', 'model'])
397
+ progress_callback: Optional async callback for progress updates.
398
+ Called with (current, total, message) for each model processed.
399
+ expected_total: Optional expected total count from pre-execution `dbt list`.
400
+ If provided, progress will start with correct total immediately.
401
+
402
+ Returns:
403
+ Result of the command execution
404
+ """
405
+ invoke_total_start = time.time()
406
+
407
+ # Debug: Check if progress_callback exists
408
+ logger.info(f"invoke() called with progress_callback: {progress_callback is not None}")
409
+
410
+ # Calculate setup steps for progress reporting
411
+ setup_steps = []
412
+ if self._needs_database_access(args) and self._warehouse_adapter:
413
+ setup_steps.append("warehouse")
414
+ setup_steps.append("concurrency")
415
+ if self.use_persistent_process:
416
+ setup_steps.append("lock")
417
+ total_setup_steps = len(setup_steps)
418
+ current_setup_step = 0
419
+
420
+ # Helper to report setup progress
421
+ async def report_setup_progress(message: str) -> None:
422
+ nonlocal current_setup_step
423
+ current_setup_step += 1 # Increment FIRST so we show progress immediately
424
+ logger.info(f"Setup progress: step {current_setup_step}/{total_setup_steps}: {message}")
425
+ if progress_callback:
426
+ try:
427
+ result = progress_callback(current_setup_step, total_setup_steps, message)
428
+ if asyncio.iscoroutine(result):
429
+ await result
430
+ logger.info("Setup progress callback invoked successfully")
431
+ except Exception as e:
432
+ logger.warning(f"Setup progress callback error: {e}")
433
+ else:
434
+ logger.warning(f"No progress_callback available for setup step: {message}")
435
+
436
+ # Pre-warm warehouse if needed (for commands that require database access)
437
+ if self._needs_database_access(args):
438
+ try:
439
+ if self._warehouse_adapter:
440
+ await report_setup_progress("Pre-warming warehouse...")
441
+ prewarm_start = time.time()
442
+ await self._warehouse_adapter.prewarm(None) # Don't pass callback - we're handling progress
443
+ prewarm_end = time.time()
444
+ logger.info(f"Warehouse pre-warming took {prewarm_end - prewarm_start:.2f}s")
445
+ except Exception as e:
446
+ logger.warning(f"Warehouse pre-warming failed (continuing anyway): {e}")
447
+
448
+ # Check for external dbt processes (excluding our persistent process)
449
+ await report_setup_progress("Checking for running processes...")
450
+ concurrency_start = time.time()
451
+ exclude_pid = self._dbt_process.pid if self._dbt_process else None
452
+ if is_dbt_running(self.project_dir, exclude_pid=exclude_pid):
453
+ logger.info("External dbt process detected, waiting for completion...")
454
+
455
+ # Report waiting state
456
+ if progress_callback:
457
+ try:
458
+ result = progress_callback(0, 1, "Waiting for another dbt process to finish...")
459
+ if asyncio.iscoroutine(result):
460
+ await result
461
+ except Exception as e:
462
+ logger.warning(f"Progress callback error: {e}")
463
+
464
+ if not wait_for_dbt_completion(self.project_dir, timeout=10.0, poll_interval=0.2):
465
+ logger.error("Timeout waiting for external dbt process to complete")
466
+ return DbtRunnerResult(
467
+ success=False,
468
+ exception=RuntimeError("dbt is already running in this project. Please wait for it to complete."),
469
+ )
470
+ concurrency_end = time.time()
471
+ logger.info(f"Concurrency check took {concurrency_end - concurrency_start:.2f}s")
472
+
473
+ # Use persistent process if enabled
474
+ if self.use_persistent_process:
475
+ # Determine what we're waiting for
476
+ if self._process_lock.locked():
477
+ # Lock is held by another command
478
+ await report_setup_progress("Waiting for available process...")
479
+ elif self._dbt_process is None:
480
+ # Process doesn't exist yet - will need to start it
481
+ await report_setup_progress("Starting dbt process...")
482
+ else:
483
+ # Process exists, just acquiring lock
484
+ await report_setup_progress("Acquiring process lock...")
485
+
486
+ async with self._process_lock:
487
+ logger.info("Using persistent dbt process")
488
+
489
+ # Reset progress bar for dbt execution phase
490
+ # Setup is complete (3/3), now starting dbt execution (1/1000 = 0.1% minimal bar)
491
+ # Note: 0/N doesn't trigger visual reset, but 1/1000 gives tiny visible progress
492
+ logger.info(f"Resetting progress bar, progress_callback exists: {progress_callback is not None}")
493
+ if progress_callback:
494
+ command = args[0] if args else ""
495
+ action = _get_action_verb(command)
496
+ reset_message = f"{action}..."
497
+
498
+ try:
499
+ logger.info(f"Invoking reset callback: 1/1000 - {reset_message}")
500
+ result = progress_callback(1, 1000, reset_message)
501
+ if asyncio.iscoroutine(result):
502
+ await result
503
+ logger.info("Reset callback completed successfully")
504
+ except Exception as e:
505
+ logger.warning(f"Progress callback error: {e}")
506
+
507
+ result = await self._invoke_persistent(args, progress_callback, expected_total)
508
+ logger.info(f"Total invoke() time: {time.time() - invoke_total_start:.2f}s")
509
+ return result
510
+
511
+ # Fall back to one-off subprocess
512
+ logger.info("Using one-off subprocess (persistent mode disabled)")
513
+
514
+ # Build unified Python script in one-off mode
515
+ script = self._build_unified_script(args, loop_mode=False)
516
+
517
+ # Execute in user's environment
518
+ full_command = [*self.python_command, "-c", script]
519
+
520
+ logger.info(f"Executing dbt command: {args}")
521
+ logger.info(f"Using Python: {self.python_command}")
522
+ logger.info(f"Working directory: {self.project_dir}")
523
+
524
+ # Get environment-specific variables (e.g., PIPENV_IGNORE_VIRTUALENVS for pipenv)
525
+ env_vars = get_env_vars(self.python_command)
526
+ import os
527
+ import tempfile
528
+
529
+ env = os.environ.copy()
530
+
531
+ # Force UTF-8 encoding for subprocess to handle Unicode characters in dbt output
532
+ env["PYTHONIOENCODING"] = "utf-8"
533
+
534
+ # Use unique temp directory per project for dbt logs to avoid Windows file locking
535
+ # Hash the project path to create a unique but consistent subdirectory
536
+ import hashlib
537
+
538
+ project_hash = hashlib.md5(str(self.project_dir).encode()).hexdigest()[:8]
539
+ dbt_log_dir = Path(tempfile.gettempdir()) / f"dbt_mcp_logs_{project_hash}"
540
+ dbt_log_dir.mkdir(parents=True, exist_ok=True)
541
+ env["DBT_LOG_PATH"] = str(dbt_log_dir)
542
+ # Disable log file rotation to prevent Windows file locking issues
543
+ env["DBT_MAX_LOG_FILE_SIZE"] = "0" # Disable rotation by size
544
+
545
+ if env_vars:
546
+ env.update(env_vars)
547
+ logger.info(f"Adding environment variables: {list(env_vars.keys())}")
548
+
549
+ proc = None
550
+ try:
551
+ logger.info("Starting subprocess...")
552
+ subprocess_start = time.time()
553
+ # Use create_subprocess_exec for proper async process handling
554
+ proc = await asyncio.create_subprocess_exec(
555
+ *full_command,
556
+ cwd=self.project_dir,
557
+ stdout=asyncio.subprocess.PIPE,
558
+ stderr=asyncio.subprocess.PIPE,
559
+ stdin=asyncio.subprocess.DEVNULL,
560
+ env=env,
561
+ )
562
+ subprocess_created = time.time()
563
+ logger.info(f"Subprocess creation took {subprocess_created - subprocess_start:.2f}s")
564
+
565
+ # Report initial progress immediately
566
+ if progress_callback:
567
+ try:
568
+ result = progress_callback(0, 1, "Starting dbt...")
569
+ if asyncio.iscoroutine(result):
570
+ await result
571
+ except Exception as e:
572
+ logger.warning(f"Progress callback error: {e}")
573
+
574
+ # Stream output and capture progress if callback provided
575
+ dbt_execution_start = time.time()
576
+ if progress_callback:
577
+ logger.info("Progress callback provided, enabling streaming output")
578
+ command_name = args[0] if args else None
579
+ stdout, stderr = await self._stream_with_progress(proc, progress_callback, expected_total, command_name)
580
+ else:
581
+ logger.info("No progress callback, using buffered output")
582
+ # Wait for completion with timeout (original behavior)
583
+ try:
584
+ stdout_bytes, stderr_bytes = await asyncio.wait_for(
585
+ proc.communicate(),
586
+ timeout=self.timeout,
587
+ )
588
+ stdout = stdout_bytes.decode("utf-8") if stdout_bytes else ""
589
+ stderr = stderr_bytes.decode("utf-8") if stderr_bytes else ""
590
+ except asyncio.TimeoutError:
591
+ # Kill process on timeout
592
+ logger.error(f"dbt command timed out after {self.timeout} seconds, killing process")
593
+ proc.kill()
594
+ await proc.wait()
595
+ return DbtRunnerResult(
596
+ success=False,
597
+ exception=RuntimeError(f"dbt command timed out after {self.timeout} seconds"),
598
+ )
599
+
600
+ dbt_execution_end = time.time()
601
+ logger.info(f"dbt execution (from start to completion) took {dbt_execution_end - dbt_execution_start:.2f}s")
602
+
603
+ returncode = proc.returncode
604
+ logger.info(f"Subprocess completed with return code: {returncode}")
605
+ logger.info(f"Total invoke() time: {time.time() - invoke_total_start:.2f}s")
606
+
607
+ # Parse result from stdout
608
+ if returncode == 0:
609
+ # Extract JSON from last line (DBT output may contain logs)
610
+ try:
611
+ last_line = stdout.strip().split("\n")[-1]
612
+ output = json.loads(last_line)
613
+ success = output.get("success", False)
614
+ logger.info(f"dbt command {'succeeded' if success else 'failed'}: {args}")
615
+ return DbtRunnerResult(success=success, stdout=stdout, stderr=stderr)
616
+ except (json.JSONDecodeError, IndexError) as e:
617
+ # If no JSON output, check return code
618
+ logger.warning(f"No JSON output from dbt command: {e}. stdout: {stdout[:200]}")
619
+ return DbtRunnerResult(success=True, stdout=stdout, stderr=stderr)
620
+ else:
621
+ # Non-zero return code indicates failure
622
+ error_msg = stderr.strip() if stderr else stdout.strip()
623
+ logger.error(f"dbt command failed with code {returncode}")
624
+ logger.error(f"stdout: {stdout[:500]}")
625
+ logger.error(f"stderr: {stderr[:500]}")
626
+
627
+ # Try to extract meaningful error from stderr or stdout
628
+ if not error_msg and stdout:
629
+ error_msg = stdout.strip()
630
+
631
+ return DbtRunnerResult(success=False, exception=RuntimeError(error_msg or f"dbt command failed with code {returncode}"), stdout=stdout, stderr=stderr)
632
+ except asyncio.CancelledError:
633
+ # Kill the subprocess when cancelled
634
+ if proc and proc.returncode is None:
635
+ logger.info(f"Cancellation detected, killing subprocess PID {proc.pid}")
636
+ await asyncio.shield(self._kill_process_tree(proc))
637
+ raise
638
+ except Exception as e:
639
+ logger.exception(f"Error executing dbt command: {e}")
640
+ # Clean up process on unexpected errors
641
+ if proc and proc.returncode is None:
642
+ proc.kill()
643
+ await proc.wait()
644
+ return DbtRunnerResult(success=False, exception=e, stdout="", stderr="")
645
+
646
+ async def _stream_with_progress(self, proc: asyncio.subprocess.Process, progress_callback: Callable[[int, int, str], Any], expected_total: int | None = None, command_name: str | None = None) -> tuple[str, str]:
647
+ """
648
+ Stream stdout/stderr and report progress in real-time.
649
+
650
+ Parses dbt output for progress indicators like:
651
+ - "1 of 5 START sql table model public.customers"
652
+ - "1 of 5 OK created sql table model public.customers"
653
+
654
+ Args:
655
+ proc: The running subprocess
656
+ progress_callback: Async callback(current, total, message)
657
+ expected_total: Expected total number of resources
658
+ command_name: Optional dbt command name (e.g., "build", "test") for progress messages
659
+
660
+ Returns:
661
+ Tuple of (stdout, stderr) as strings
662
+ """
663
+ logger.info("Starting stdout/stderr streaming with progress parsing")
664
+
665
+ # Pattern to match dbt progress lines with timestamp prefix: "12:04:38 1 of 5 START/OK/PASS/ERROR ..."
666
+ # Models use: START, OK, ERROR, FAIL, SKIP, WARN
667
+ # Tests use: START, PASS, FAIL, ERROR, SKIP, WARN
668
+ # Seeds use: START, INSERT, ERROR, SKIP
669
+ progress_pattern = re.compile(r"^\d{2}:\d{2}:\d{2}\s+(\d+) of (\d+) (START|OK|PASS|INSERT|ERROR|FAIL|SKIP|WARN)\s+(.+)$")
670
+
671
+ stdout_lines = []
672
+ stderr_lines = []
673
+ line_count = 0
674
+
675
+ # Track overall progress across all stages
676
+ overall_progress = 0
677
+ total_resources = expected_total if expected_total is not None else 0
678
+ seen_resources = set() # Track unique resources to avoid double-counting
679
+ running_models = [] # Track models currently running (FIFO order)
680
+ running_start_times = {} # Track start timestamps for elapsed time
681
+ ok_count = 0
682
+ error_count = 0
683
+ skip_count = 0
684
+ warn_count = 0
685
+
686
+ # Report initial progress if we have expected_total
687
+ if expected_total is not None and progress_callback:
688
+ try:
689
+ result = progress_callback(0, expected_total, "0/{} completed • Preparing...".format(expected_total))
690
+ if asyncio.iscoroutine(result):
691
+ await result
692
+ except Exception as e:
693
+ logger.warning(f"Initial progress callback error: {e}")
694
+
695
+ async def read_stdout() -> None:
696
+ """Read and parse stdout line by line."""
697
+ nonlocal line_count
698
+ assert proc.stdout is not None
699
+ logger.info("Starting stdout reader")
700
+ try:
701
+ while True:
702
+ line_bytes = await proc.stdout.readline()
703
+ if not line_bytes:
704
+ logger.info(f"Stdout EOF reached after {line_count} lines")
705
+ break
706
+
707
+ line = line_bytes.decode("utf-8", errors="replace").rstrip()
708
+ stdout_lines.append(line)
709
+ line_count += 1
710
+
711
+ # Log ALL lines to see the actual output format
712
+ logger.info(f"stdout[{line_count}]: {line}")
713
+
714
+ # Check for completion marker from persistent process
715
+ if line.startswith('{"success":'):
716
+ logger.info(f"Completion marker detected, stopping read: {line}")
717
+ break
718
+
719
+ # Detect when parsing completes and execution begins
720
+ # Line pattern: "HH:MM:SS Concurrency: N threads (target='...')"
721
+ if "Concurrency:" in line and "threads" in line and progress_callback:
722
+ try:
723
+ exec_msg = _get_action_verb(command_name) if command_name else "Executing..."
724
+ result = progress_callback(1, 1000, exec_msg)
725
+ if asyncio.iscoroutine(result):
726
+ await result
727
+ logger.info(f"Updated progress to '{exec_msg}'")
728
+ except Exception as e:
729
+ logger.warning(f"Progress callback error on concurrency line: {e}")
730
+
731
+ # Check for progress indicators
732
+ match = progress_pattern.match(line)
733
+ if match:
734
+ logger.info(f"Progress match found: {line}")
735
+ total = int(match.group(2))
736
+ status = match.group(3)
737
+ model_info = match.group(4).strip()
738
+
739
+ # Declare nonlocal variables for modification
740
+ nonlocal total_resources, overall_progress, ok_count, error_count, skip_count, warn_count
741
+
742
+ # Update total from progress lines (this is the actual count being executed)
743
+ if total > total_resources:
744
+ total_resources = total
745
+
746
+ # Extract model/test/seed name from info string
747
+ # Models: "sql table model schema.model_name ..."
748
+ # Tests: "test not_null_customers_customer_id ...... [RUN]"
749
+ # Seeds START: "seed file main.raw_customers ...... [RUN]"
750
+ # Seeds OK: "loaded seed file main.raw_customers ...... [INSERT 3 in 0.12s]"
751
+ model_name = model_info
752
+
753
+ # For models, extract after " model "
754
+ if " model " in model_info:
755
+ parts = model_info.split(" model ")
756
+ if len(parts) > 1:
757
+ # Get "schema.model_name" or just "model_name"
758
+ model_name = parts[1].split()[0] if parts[1] else model_info
759
+ # For seeds, extract after "seed file " or "loaded seed file "
760
+ elif "seed file " in model_info:
761
+ # Find "seed file " and extract what comes after
762
+ idx = model_info.find("seed file ")
763
+ if idx != -1:
764
+ # Extract from after "seed file " (10 chars)
765
+ rest = model_info[idx + 10 :]
766
+ model_name = rest.split()[0] if rest.split() else model_info
767
+ # For tests, handle "test " and "unit_test " prefixes
768
+ elif model_info.startswith("test "):
769
+ # Remove "test " prefix and get the name
770
+ model_name = model_info[5:].split()[0] if len(model_info) > 5 else model_info
771
+ elif model_info.startswith("unit_test "):
772
+ # For unit tests, extract the full test path after "unit_test "
773
+ # Format: "unit_test model_name::test_name"
774
+ rest = model_info[10:] # Skip "unit_test "
775
+ # Extract up to any trailing markers like [RUN]
776
+ model_name = rest.split(" [")[0].strip() if " [" in rest else rest.strip()
777
+ else:
778
+ # For other cases, just take the first word
779
+ first_word = model_info.split()[0] if model_info.split() else model_info
780
+ model_name = first_word
781
+
782
+ # Clean up markers like [RUN] or [PASS] or [INSERT 3] and dots
783
+ import re
784
+
785
+ model_name = re.sub(r"\s*\.+\s*\[(RUN|PASS|FAIL|ERROR|SKIP|WARN|INSERT)\].*$", "", model_name)
786
+ model_name = re.sub(r"\s+\[.*$", "", model_name) # Remove any bracketed content
787
+ model_name = model_name.strip()
788
+
789
+ # Handle START events - add to running queue
790
+ if status == "START":
791
+ if model_name not in running_models:
792
+ running_models.append(model_name)
793
+ running_start_times[model_name] = time.time()
794
+ logger.info(f"Model started: {model_name}")
795
+
796
+ # Handle completion events - remove from running queue
797
+ elif status in ("OK", "PASS", "INSERT", "ERROR", "FAIL", "SKIP", "WARN"):
798
+ # Create unique resource key to avoid double-counting
799
+ resource_key = f"{status}:{model_name}"
800
+
801
+ # Only increment overall progress for new resources
802
+ if resource_key not in seen_resources:
803
+ seen_resources.add(resource_key)
804
+ overall_progress += 1
805
+
806
+ # Track success/error/skip/warn counts
807
+ if status in ("OK", "PASS", "INSERT"):
808
+ ok_count += 1
809
+ elif status in ("ERROR", "FAIL"):
810
+ error_count += 1
811
+ elif status == "SKIP":
812
+ skip_count += 1
813
+ elif status == "WARN":
814
+ warn_count += 1
815
+
816
+ logger.info(f"New resource: {resource_key}, overall progress: {overall_progress}/{total_resources}")
817
+
818
+ # ALWAYS remove from running queue on completion (regardless of whether it's new)
819
+ if model_name in running_models:
820
+ running_models.remove(model_name)
821
+ running_start_times.pop(model_name, None)
822
+ logger.info(f"Model completed: {model_name}, status: {status}")
823
+
824
+ # Build progress message: "5/20 completed (✅ 3, ❌ 1, ⚠️ 1) • Running (2): customers (5s)"
825
+ # Show statuses conditionally (only when > 0)
826
+ status_parts = []
827
+ if ok_count > 0:
828
+ status_parts.append(f"✅ {ok_count}")
829
+ if error_count > 0:
830
+ status_parts.append(f"❌ {error_count}")
831
+ if warn_count > 0:
832
+ status_parts.append(f"⚠️ {warn_count}")
833
+ if skip_count > 0:
834
+ status_parts.append(f"⏭️ {skip_count}")
835
+
836
+ # Format: "5/14 completed (✅ 3, ❌ 2)" or just "5/14 completed" if no statuses yet
837
+ if status_parts:
838
+ summary_stats = f"{overall_progress}/{total_resources} completed ({', '.join(status_parts)})"
839
+ else:
840
+ summary_stats = f"{overall_progress}/{total_resources} completed"
841
+
842
+ # Clear running models if all work is complete
843
+ if overall_progress == total_resources and total_resources > 0:
844
+ running_models.clear()
845
+ running_start_times.clear()
846
+
847
+ # Format running list with elapsed times
848
+ max_display = 2
849
+ if len(running_models) > 0:
850
+ current_time = time.time()
851
+ running_with_times = []
852
+ for model in running_models[:max_display]:
853
+ elapsed = int(current_time - running_start_times.get(model, current_time))
854
+ running_with_times.append(f"{model} ({elapsed}s)")
855
+
856
+ if len(running_models) > max_display:
857
+ displayed = ", ".join(running_with_times)
858
+ running_str = f"Running ({len(running_models)}): {displayed} +{len(running_models) - max_display} more"
859
+ else:
860
+ running_str = f"Running ({len(running_models)}): {', '.join(running_with_times)}"
861
+
862
+ accumulated_message = f"{summary_stats} • {running_str}"
863
+ else:
864
+ accumulated_message = summary_stats if overall_progress > 0 else ""
865
+
866
+ # Call progress callback with overall progress and accumulated message (non-blocking)
867
+ if accumulated_message: # Only call if we have a message
868
+ try:
869
+ logger.info(f"PROGRESS CALLBACK: ({overall_progress}/{total_resources}) {accumulated_message}")
870
+ result = progress_callback(overall_progress, total_resources, accumulated_message)
871
+ if asyncio.iscoroutine(result):
872
+ await result
873
+ except Exception as e:
874
+ logger.warning(f"Progress callback error: {e}")
875
+ except asyncio.CancelledError:
876
+ logger.info("stdout reader cancelled")
877
+ raise
878
+ except Exception as e:
879
+ logger.warning(f"stdout reader error: {e}")
880
+
881
+ async def read_stderr() -> None:
882
+ """Read stderr line by line."""
883
+ assert proc.stderr is not None
884
+ try:
885
+ while True:
886
+ line_bytes = await proc.stderr.readline()
887
+ if not line_bytes:
888
+ break
889
+ line = line_bytes.decode("utf-8", errors="replace").rstrip()
890
+ stderr_lines.append(line)
891
+ # Log stderr in real-time to see bridge script diagnostics
892
+ if line:
893
+ logger.info(f"stderr: {line}")
894
+ except asyncio.CancelledError:
895
+ logger.info("stderr reader cancelled")
896
+ raise
897
+ except Exception as e:
898
+ logger.warning(f"stderr reader error: {e}")
899
+
900
+ # Run both readers concurrently with timeout
901
+ stdout_task = None
902
+ stderr_task = None
903
+ try:
904
+ # Create tasks for both readers
905
+ stdout_task = asyncio.create_task(read_stdout())
906
+ stderr_task = asyncio.create_task(read_stderr())
907
+
908
+ # Wait for stdout to complete (it will break on completion marker)
909
+ if self.timeout:
910
+ await asyncio.wait_for(stdout_task, timeout=self.timeout)
911
+ else:
912
+ await stdout_task
913
+
914
+ # Once stdout is done, cancel stderr (which is likely still blocking)
915
+ if stderr_task and not stderr_task.done():
916
+ stderr_task.cancel()
917
+ try:
918
+ await stderr_task
919
+ except asyncio.CancelledError:
920
+ pass
921
+
922
+ except asyncio.TimeoutError:
923
+ logger.error(f"dbt command timed out after {self.timeout} seconds, killing process")
924
+ # Cancel both reader tasks
925
+ if stdout_task and not stdout_task.done():
926
+ stdout_task.cancel()
927
+ if stderr_task and not stderr_task.done():
928
+ stderr_task.cancel()
929
+ try:
930
+ tasks = [t for t in [stdout_task, stderr_task] if t is not None]
931
+ if tasks:
932
+ await asyncio.gather(*tasks, return_exceptions=True)
933
+ except Exception:
934
+ pass
935
+ # Kill the process
936
+ proc.kill()
937
+ await proc.wait()
938
+ raise RuntimeError(f"dbt command timed out after {self.timeout} seconds")
939
+ except asyncio.CancelledError:
940
+ logger.info("Stream readers cancelled")
941
+ # Cancel both reader tasks
942
+ if stdout_task and not stdout_task.done():
943
+ stdout_task.cancel()
944
+ if stderr_task and not stderr_task.done():
945
+ stderr_task.cancel()
946
+ try:
947
+ tasks = [t for t in [stdout_task, stderr_task] if t is not None]
948
+ if tasks:
949
+ await asyncio.gather(*tasks, return_exceptions=True)
950
+ except Exception:
951
+ pass
952
+ raise
953
+ finally:
954
+ # Send final progress update if we have completed resources
955
+ if progress_callback and overall_progress > 0:
956
+ try:
957
+ # Build final status message
958
+ status_parts = []
959
+ if ok_count > 0:
960
+ status_parts.append(f"✅ {ok_count}")
961
+ if error_count > 0:
962
+ status_parts.append(f"❌ {error_count}")
963
+ if warn_count > 0:
964
+ status_parts.append(f"⚠️ {warn_count}")
965
+ if skip_count > 0:
966
+ status_parts.append(f"⏭️ {skip_count}")
967
+
968
+ if status_parts:
969
+ final_message = f"{overall_progress}/{total_resources} completed ({', '.join(status_parts)})"
970
+ else:
971
+ final_message = f"{overall_progress}/{total_resources} completed"
972
+
973
+ logger.info(f"FINAL PROGRESS: ({overall_progress}/{total_resources}) {final_message}")
974
+ result = progress_callback(overall_progress, total_resources, final_message)
975
+ if asyncio.iscoroutine(result):
976
+ await result
977
+ except Exception as e:
978
+ logger.warning(f"Final progress callback error: {e}")
979
+
980
+ # For one-off subprocesses, ensure process completes
981
+ # For persistent processes, DON'T wait (process stays alive)
982
+ # We can detect persistent by checking if we have _dbt_process
983
+ is_persistent = hasattr(self, "_dbt_process") and self._dbt_process is not None and proc.pid == self._dbt_process.pid
984
+ if not is_persistent and proc.returncode is None:
985
+ await proc.wait()
986
+
987
+ return "\n".join(stdout_lines), "\n".join(stderr_lines)
988
+
989
+ async def _kill_process_tree(self, proc: asyncio.subprocess.Process) -> None:
990
+ """Kill a process and all its children."""
991
+ pid = proc.pid
992
+ if pid is None:
993
+ logger.warning("Cannot kill process: PID is None")
994
+ return
995
+
996
+ # Log child processes before killing
997
+ try:
998
+ parent = psutil.Process(pid)
999
+ children = parent.children(recursive=True)
1000
+ if children:
1001
+ logger.info(f"Process {pid} has {len(children)} child process(es): {[p.pid for p in children]}")
1002
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
1003
+ pass
1004
+
1005
+ if platform.system() == "Windows":
1006
+ # On Windows, try graceful termination first, then force kill
1007
+ try:
1008
+ # Step 1: Try graceful termination (without /F flag)
1009
+ logger.info(f"Attempting graceful termination of process tree for PID {pid}")
1010
+ terminate_proc = await asyncio.create_subprocess_exec(
1011
+ "taskkill",
1012
+ "/T", # Kill tree, but no /F (force) flag
1013
+ "/PID",
1014
+ str(pid),
1015
+ stdout=asyncio.subprocess.DEVNULL,
1016
+ stderr=asyncio.subprocess.DEVNULL,
1017
+ )
1018
+
1019
+ # Wait for taskkill command to complete (it returns immediately)
1020
+ await terminate_proc.wait()
1021
+
1022
+ # Now wait for the actual process to terminate (poll with timeout)
1023
+ start_time = asyncio.get_event_loop().time()
1024
+ timeout = 10.0
1025
+ poll_interval = 0.5
1026
+
1027
+ while (asyncio.get_event_loop().time() - start_time) < timeout:
1028
+ if not self._is_process_running(pid):
1029
+ logger.info(f"Process {pid} terminated gracefully")
1030
+ return
1031
+ await asyncio.sleep(poll_interval)
1032
+
1033
+ # If we get here, process didn't terminate gracefully
1034
+ logger.info(f"Process {pid} still running after {timeout}s, forcing kill...")
1035
+
1036
+ # Step 2: Force kill if graceful didn't work
1037
+ logger.info(f"Force killing process tree for PID {pid}")
1038
+ kill_proc = await asyncio.create_subprocess_exec(
1039
+ "taskkill",
1040
+ "/F", # Force
1041
+ "/T", # Kill tree
1042
+ "/PID",
1043
+ str(pid),
1044
+ stdout=asyncio.subprocess.DEVNULL,
1045
+ stderr=asyncio.subprocess.DEVNULL,
1046
+ )
1047
+
1048
+ await asyncio.wait_for(kill_proc.wait(), timeout=5.0)
1049
+
1050
+ # Verify process is dead
1051
+ await asyncio.sleep(0.3)
1052
+ try:
1053
+ if psutil.Process(pid).is_running():
1054
+ logger.warning(f"Process {pid} still running after force kill")
1055
+ else:
1056
+ logger.info(f"Successfully killed process tree for PID {pid}")
1057
+ except psutil.NoSuchProcess:
1058
+ logger.info(f"Process {pid} terminated successfully")
1059
+
1060
+ except asyncio.TimeoutError:
1061
+ logger.warning(f"Force kill timed out for PID {pid}")
1062
+ except Exception as e:
1063
+ logger.warning(f"Failed to kill process tree: {e}")
1064
+ # Last resort fallback
1065
+ try:
1066
+ proc.kill()
1067
+ await proc.wait()
1068
+ except Exception:
1069
+ pass
1070
+ else:
1071
+ # On Unix, terminate then kill if needed
1072
+ try:
1073
+ proc.terminate()
1074
+ await asyncio.wait_for(proc.wait(), timeout=2.0)
1075
+ except asyncio.TimeoutError:
1076
+ proc.kill()
1077
+ await proc.wait()
1078
+
1079
+ def _is_process_running(self, pid: int) -> bool:
1080
+ """Check if a process is still running."""
1081
+ try:
1082
+ process = psutil.Process(pid)
1083
+ return process.is_running()
1084
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
1085
+ return False
1086
+
1087
+ def get_manifest_path(self) -> Path:
1088
+ """Get the path to the manifest.json file."""
1089
+ return self._target_dir / "manifest.json"
1090
+
1091
+ async def shutdown(self) -> None:
1092
+ """Shutdown the bridge runner and clean up resources."""
1093
+ await self._stop_persistent_process()
1094
+
1095
+ def __del__(self) -> None:
1096
+ """Cleanup on garbage collection."""
1097
+ # Try to stop persistent process on cleanup
1098
+ if hasattr(self, "_dbt_process") and self._dbt_process and self._dbt_process.returncode is None:
1099
+ logger.warning("BridgeRunner deleted with active process, forcing cleanup")
1100
+ try:
1101
+ # Close stdin (StreamWriter) to prevent resource warnings
1102
+ if self._dbt_process.stdin:
1103
+ self._dbt_process.stdin.close()
1104
+ # stdout and stderr are StreamReaders - no close() method needed
1105
+ # Then kill the process
1106
+ self._dbt_process.kill()
1107
+ except Exception as e:
1108
+ logger.warning(f"Error killing process during cleanup: {e}")
1109
+
1110
+ async def invoke_query(self, sql: str, progress_callback: Callable[[int, int, str], Any] | None = None) -> DbtRunnerResult:
1111
+ """
1112
+ Execute a SQL query using dbt show.
1113
+
1114
+ This method supports Jinja templating including {{ ref() }} and {{ source() }}.
1115
+ The SQL should include LIMIT clause if needed - no automatic limiting is applied.
1116
+
1117
+ Args:
1118
+ sql: SQL query to execute (supports Jinja: {{ ref('model') }}, {{ source('src', 'table') }})
1119
+ Include LIMIT in the SQL if you want to limit results.
1120
+ progress_callback: Optional callback for progress updates (current, total, message)
1121
+
1122
+ Returns:
1123
+ Result with query output in JSON format
1124
+ """
1125
+ # Use --inline for Jinja support with ref() and source()
1126
+ # Use --no-populate-cache to skip expensive information_schema queries
1127
+ args = [
1128
+ "show",
1129
+ "--inline",
1130
+ sql,
1131
+ "--limit",
1132
+ "-1",
1133
+ "--output",
1134
+ "json",
1135
+ "--no-populate-cache",
1136
+ ]
1137
+
1138
+ # Report query execution starting
1139
+ if progress_callback:
1140
+ try:
1141
+ result = progress_callback(0, 1, "Executing query...")
1142
+ if asyncio.iscoroutine(result):
1143
+ await result
1144
+ except Exception as e:
1145
+ logger.warning(f"Progress callback error: {e}")
1146
+
1147
+ # Execute the command with progress callback
1148
+ invoke_start = time.time()
1149
+ result = await self.invoke(args, progress_callback=progress_callback)
1150
+ invoke_end = time.time()
1151
+ elapsed = invoke_end - invoke_start
1152
+ logger.info(f"invoke() took {elapsed:.2f}s total")
1153
+
1154
+ # Store elapsed time in result for tool to report
1155
+ result.elapsed_time = elapsed
1156
+
1157
+ # Report query completion
1158
+ if progress_callback and result.success:
1159
+ try:
1160
+ completion_result = progress_callback(1, 1, "Query complete")
1161
+ if asyncio.iscoroutine(completion_result):
1162
+ await completion_result
1163
+ except Exception as e:
1164
+ logger.warning(f"Progress callback error: {e}")
1165
+
1166
+ return result
1167
+
1168
+ async def invoke_compile(self, model_name: str, force: bool = False) -> DbtRunnerResult:
1169
+ """
1170
+ Compile a specific model, optionally forcing recompilation.
1171
+
1172
+ Args:
1173
+ model_name: Name of the model to compile (e.g., 'customers')
1174
+ force: If True, always compile. If False, only compile if not already compiled.
1175
+
1176
+ Returns:
1177
+ Result of the compilation
1178
+ """
1179
+ # If not forcing, check if already compiled
1180
+ if not force:
1181
+ manifest_path = self.get_manifest_path()
1182
+ if manifest_path.exists():
1183
+ try:
1184
+ with open(manifest_path) as f:
1185
+ manifest = json.load(f)
1186
+
1187
+ # Check if model has compiled_code
1188
+ nodes = manifest.get("nodes", {})
1189
+ for node in nodes.values():
1190
+ if node.get("resource_type") == "model" and node.get("name") == model_name:
1191
+ if node.get("compiled_code"):
1192
+ logger.info(f"Model '{model_name}' already compiled, skipping compilation")
1193
+ return DbtRunnerResult(success=True, stdout="Already compiled", stderr="")
1194
+ break
1195
+ except Exception as e:
1196
+ logger.warning(f"Failed to check compilation status: {e}, forcing compilation")
1197
+
1198
+ # Run compile for specific model
1199
+ logger.info(f"Compiling model: {model_name}")
1200
+ args = ["compile", "-s", model_name]
1201
+ result = await self.invoke(args)
1202
+
1203
+ return result
1204
+
1205
+ def _needs_database_access(self, args: list[str]) -> bool:
1206
+ """
1207
+ Determine if a dbt command requires database access.
1208
+
1209
+ Commands like 'parse', 'deps', 'clean', 'list' don't need database access.
1210
+ Commands like 'run', 'test', 'build', 'seed', 'snapshot', 'show' do.
1211
+
1212
+ Args:
1213
+ args: dbt command arguments
1214
+
1215
+ Returns:
1216
+ True if command needs database access, False otherwise
1217
+ """
1218
+ if not args:
1219
+ return False
1220
+
1221
+ command = args[0].lower()
1222
+
1223
+ # Commands that DON'T need database access
1224
+ no_db_commands = {
1225
+ "parse",
1226
+ "deps",
1227
+ "clean",
1228
+ "debug", # debug checks connection but doesn't require warehouse to be running
1229
+ "list",
1230
+ "ls",
1231
+ "compile", # compile doesn't execute SQL, just generates it
1232
+ }
1233
+
1234
+ return command not in no_db_commands
1235
+
1236
+ def _build_unified_script(self, args: list[str], loop_mode: bool = False) -> str:
1237
+ """
1238
+ Build unified Python script that can run in one-off or persistent loop mode.
1239
+
1240
+ Args:
1241
+ args: dbt command arguments (ignored in loop mode)
1242
+ loop_mode: If True, run persistent loop. If False, execute once and exit.
1243
+
1244
+ Returns:
1245
+ Python script as string
1246
+ """
1247
+ # Add --profiles-dir to args if not already present (for one-off mode)
1248
+ if not loop_mode and "--profiles-dir" not in args:
1249
+ args = [*args, "--profiles-dir", str(self.profiles_dir)]
1250
+
1251
+ # Add --log-format text to get human-readable output for progress parsing
1252
+ if not loop_mode and "--log-format" not in args:
1253
+ args = [*args, "--log-format", "text"]
1254
+
1255
+ # Convert args to JSON-safe format for one-off mode
1256
+ args_json = json.dumps(args) if not loop_mode else "[]"
1257
+
1258
+ script = f"""
1259
+ import json
1260
+ import sys
1261
+ import os
1262
+
1263
+ # Disable buffering for immediate I/O
1264
+ sys.stdin.reconfigure(line_buffering=True)
1265
+ sys.stdout.reconfigure(line_buffering=True)
1266
+ sys.stderr.reconfigure(line_buffering=True)
1267
+
1268
+ # Set environment for text output
1269
+ os.environ['DBT_USE_COLORS'] = '0'
1270
+ os.environ['DBT_PRINTER_WIDTH'] = '80'
1271
+
1272
+ # Import dbtRunner
1273
+ try:
1274
+ from dbt.cli.main import dbtRunner
1275
+ except ImportError as e:
1276
+ error_msg = {{"success": False, "error": f"Failed to import dbtRunner: {{e}}"}}
1277
+ print(json.dumps(error_msg), flush=True)
1278
+ sys.exit(1)
1279
+
1280
+ # Initialize dbtRunner once
1281
+ dbt = dbtRunner()
1282
+
1283
+ # Check mode: loop vs one-off
1284
+ loop_mode = {str(loop_mode)}
1285
+
1286
+ if loop_mode:
1287
+ # === PERSISTENT LOOP MODE ===
1288
+
1289
+ # Signal ready
1290
+ ready_msg = {{"type": "ready"}}
1291
+ print(json.dumps(ready_msg), flush=True)
1292
+
1293
+ # Process commands in a loop
1294
+ while True:
1295
+ try:
1296
+ # Read command from stdin (blocking)
1297
+ line = sys.stdin.readline()
1298
+ if not line:
1299
+ # EOF - client disconnected
1300
+ break
1301
+
1302
+ request = json.loads(line.strip())
1303
+
1304
+ # Check for shutdown command
1305
+ if request.get("shutdown"):
1306
+ break
1307
+
1308
+ # Extract command details
1309
+ command_args = request.get("command", [])
1310
+
1311
+ # Add profiles_dir if not already present
1312
+ if "--profiles-dir" not in command_args:
1313
+ command_args = [*command_args, "--profiles-dir", {repr(str(self.profiles_dir))}]
1314
+
1315
+ # Add text log format for consistent output
1316
+ if "--log-format" not in command_args:
1317
+ command_args = [*command_args, "--log-format", "text"]
1318
+
1319
+ # Execute command - output goes to stdout naturally
1320
+ try:
1321
+ print(f"[DBT-BRIDGE] Running command: {{command_args[0] if command_args else 'unknown'}}", file=sys.stderr, flush=True)
1322
+ result = dbt.invoke(command_args)
1323
+ success = result.success
1324
+ except Exception as e:
1325
+ success = False
1326
+ print(f"Error executing dbt command: {{e}}", file=sys.stderr, flush=True)
1327
+
1328
+ # Ensure all dbt output is flushed before sending completion marker
1329
+ sys.stdout.flush()
1330
+ sys.stderr.flush()
1331
+
1332
+ # Send completion marker as JSON on last line
1333
+ completion = {{"success": success}}
1334
+ print(json.dumps(completion), flush=True)
1335
+
1336
+ except json.JSONDecodeError as e:
1337
+ error_response = {{"type": "error", "error": f"Invalid JSON: {{e}}"}}
1338
+ print(json.dumps(error_response), flush=True)
1339
+ except Exception as e:
1340
+ error_response = {{"type": "error", "error": f"Unexpected error: {{e}}"}}
1341
+ print(json.dumps(error_response), flush=True)
1342
+
1343
+ else:
1344
+ # === ONE-OFF EXECUTION MODE ===
1345
+
1346
+ try:
1347
+ # Execute dbtRunner with arguments
1348
+ result = dbt.invoke({args_json})
1349
+
1350
+ # Return success status on last line (JSON)
1351
+ output = {{"success": result.success}}
1352
+ print(json.dumps(output))
1353
+ sys.exit(0 if result.success else 1)
1354
+
1355
+ except Exception as e:
1356
+ # Ensure we always exit, even on error
1357
+ error_output = {{"success": False, "error": str(e)}}
1358
+ print(json.dumps(error_output))
1359
+ sys.exit(1)
1360
+ """
1361
+ return script