iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_core_mcp/__init__.py +18 -0
- dbt_core_mcp/__main__.py +436 -0
- dbt_core_mcp/context.py +459 -0
- dbt_core_mcp/cte_generator.py +601 -0
- dbt_core_mcp/dbt/__init__.py +1 -0
- dbt_core_mcp/dbt/bridge_runner.py +1361 -0
- dbt_core_mcp/dbt/manifest.py +781 -0
- dbt_core_mcp/dbt/runner.py +67 -0
- dbt_core_mcp/dependencies.py +50 -0
- dbt_core_mcp/server.py +381 -0
- dbt_core_mcp/tools/__init__.py +77 -0
- dbt_core_mcp/tools/analyze_impact.py +78 -0
- dbt_core_mcp/tools/build_models.py +190 -0
- dbt_core_mcp/tools/demo/__init__.py +1 -0
- dbt_core_mcp/tools/demo/hello.html +267 -0
- dbt_core_mcp/tools/demo/ui_demo.py +41 -0
- dbt_core_mcp/tools/get_column_lineage.py +1988 -0
- dbt_core_mcp/tools/get_lineage.py +89 -0
- dbt_core_mcp/tools/get_project_info.py +96 -0
- dbt_core_mcp/tools/get_resource_info.py +134 -0
- dbt_core_mcp/tools/install_deps.py +102 -0
- dbt_core_mcp/tools/list_resources.py +84 -0
- dbt_core_mcp/tools/load_seeds.py +179 -0
- dbt_core_mcp/tools/query_database.py +459 -0
- dbt_core_mcp/tools/run_models.py +234 -0
- dbt_core_mcp/tools/snapshot_models.py +120 -0
- dbt_core_mcp/tools/test_models.py +238 -0
- dbt_core_mcp/utils/__init__.py +1 -0
- dbt_core_mcp/utils/env_detector.py +186 -0
- dbt_core_mcp/utils/process_check.py +130 -0
- dbt_core_mcp/utils/tool_utils.py +411 -0
- dbt_core_mcp/utils/warehouse_adapter.py +82 -0
- dbt_core_mcp/utils/warehouse_databricks.py +297 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1361 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bridge Runner for dbt.
|
|
3
|
+
|
|
4
|
+
Executes dbt commands in the user's Python environment via subprocess,
|
|
5
|
+
using an inline Python script to invoke dbtRunner.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import platform
|
|
12
|
+
import re
|
|
13
|
+
import time
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Callable
|
|
16
|
+
|
|
17
|
+
import psutil
|
|
18
|
+
|
|
19
|
+
from ..utils.env_detector import detect_dbt_adapter, get_env_vars
|
|
20
|
+
from ..utils.process_check import is_dbt_running, wait_for_dbt_completion
|
|
21
|
+
from ..utils.warehouse_adapter import WarehouseAdapter, create_warehouse_adapter
|
|
22
|
+
from .runner import DbtRunnerResult
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_action_verb(command: str) -> str:
|
|
28
|
+
"""Map dbt command to user-friendly action verb for progress messages."""
|
|
29
|
+
command_map = {
|
|
30
|
+
"show": "Querying database",
|
|
31
|
+
"compile": "Compiling",
|
|
32
|
+
"parse": "Parsing",
|
|
33
|
+
"list": "Listing resources",
|
|
34
|
+
"ls": "Listing resources",
|
|
35
|
+
"debug": "Running diagnostics",
|
|
36
|
+
"deps": "Installing dependencies",
|
|
37
|
+
"build": "Building",
|
|
38
|
+
"test": "Testing",
|
|
39
|
+
"run": "Running",
|
|
40
|
+
"seed": "Seeding",
|
|
41
|
+
"snapshot": "Snapshotting",
|
|
42
|
+
}
|
|
43
|
+
# For commands not in the map, capitalize and use as-is
|
|
44
|
+
return command_map.get(command, command.capitalize() if command else "Executing")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class BridgeRunner:
|
|
48
|
+
"""
|
|
49
|
+
Execute dbt commands in user's environment via subprocess bridge.
|
|
50
|
+
|
|
51
|
+
This runner executes DBT using the dbtRunner API within the user's
|
|
52
|
+
Python environment, avoiding version conflicts while still benefiting
|
|
53
|
+
from dbtRunner's structured results.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, project_dir: Path, python_command: list[str], timeout: float | None = None, use_persistent_process: bool = True):
|
|
57
|
+
"""
|
|
58
|
+
Initialize the bridge runner.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
project_dir: Path to the dbt project directory
|
|
62
|
+
python_command: Command to run Python in the user's environment
|
|
63
|
+
(e.g., ['uv', 'run', 'python'] or ['/path/to/venv/bin/python'])
|
|
64
|
+
timeout: Timeout in seconds for dbt commands (default: None for no timeout)
|
|
65
|
+
use_persistent_process: If True, reuse a persistent dbt process for better performance
|
|
66
|
+
"""
|
|
67
|
+
self.project_dir = project_dir.resolve() # Ensure absolute path
|
|
68
|
+
self.python_command = python_command
|
|
69
|
+
self.timeout = timeout
|
|
70
|
+
self.use_persistent_process = use_persistent_process
|
|
71
|
+
self._project_config: dict[str, Any] | None = None # Lazy-loaded project configuration
|
|
72
|
+
self._project_config_mtime: float | None = None # Track last modification time
|
|
73
|
+
|
|
74
|
+
# Get target-path from config (lazy-load happens in _get_project_config)
|
|
75
|
+
config = self._get_project_config()
|
|
76
|
+
target_path = config.get("target-path", "target")
|
|
77
|
+
self._target_dir = self.project_dir / target_path
|
|
78
|
+
|
|
79
|
+
# Detect profiles directory (project dir or ~/.dbt)
|
|
80
|
+
self.profiles_dir = self.project_dir if (self.project_dir / "profiles.yml").exists() else Path.home() / ".dbt"
|
|
81
|
+
logger.info(f"Using profiles directory: {self.profiles_dir}")
|
|
82
|
+
|
|
83
|
+
# Initialize warehouse adapter for pre-warming
|
|
84
|
+
self._warehouse_adapter: WarehouseAdapter | None = None
|
|
85
|
+
self._init_warehouse_adapter()
|
|
86
|
+
|
|
87
|
+
# Persistent dbt process for performance
|
|
88
|
+
self._dbt_process: asyncio.subprocess.Process | None = None
|
|
89
|
+
self._process_lock = asyncio.Lock() # Ensure sequential access
|
|
90
|
+
self._request_counter = 0
|
|
91
|
+
|
|
92
|
+
def _get_project_config(self) -> dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Lazy-load and cache dbt_project.yml configuration.
|
|
95
|
+
Reloads if file has been modified since last read.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Dictionary with project configuration
|
|
99
|
+
"""
|
|
100
|
+
import yaml
|
|
101
|
+
|
|
102
|
+
project_file = self.project_dir / "dbt_project.yml"
|
|
103
|
+
|
|
104
|
+
# Check if file exists and get modification time
|
|
105
|
+
if project_file.exists():
|
|
106
|
+
current_mtime = project_file.stat().st_mtime
|
|
107
|
+
|
|
108
|
+
# Reload if never loaded or file has changed
|
|
109
|
+
if self._project_config is None or self._project_config_mtime != current_mtime:
|
|
110
|
+
try:
|
|
111
|
+
with open(project_file) as f:
|
|
112
|
+
loaded_config = yaml.safe_load(f)
|
|
113
|
+
self._project_config = loaded_config if isinstance(loaded_config, dict) else {}
|
|
114
|
+
self._project_config_mtime = current_mtime
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.warning(f"Failed to parse dbt_project.yml: {e}")
|
|
117
|
+
self._project_config = {}
|
|
118
|
+
self._project_config_mtime = None
|
|
119
|
+
else:
|
|
120
|
+
self._project_config = {}
|
|
121
|
+
self._project_config_mtime = None
|
|
122
|
+
|
|
123
|
+
return self._project_config if self._project_config is not None else {}
|
|
124
|
+
|
|
125
|
+
def _init_warehouse_adapter(self) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Initialize the warehouse adapter based on dbt profile configuration.
|
|
128
|
+
|
|
129
|
+
Detects the database type from profiles.yml and creates the appropriate
|
|
130
|
+
adapter (Databricks, Snowflake, or no-op default).
|
|
131
|
+
"""
|
|
132
|
+
try:
|
|
133
|
+
adapter_type = detect_dbt_adapter(self.project_dir)
|
|
134
|
+
self._warehouse_adapter = create_warehouse_adapter(self.project_dir, adapter_type)
|
|
135
|
+
logger.info(f"Initialized warehouse adapter for {adapter_type}")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logger.warning(f"Failed to initialize warehouse adapter: {e}, using no-op adapter")
|
|
138
|
+
from ..utils.warehouse_adapter import NoOpWarehouseAdapter
|
|
139
|
+
|
|
140
|
+
self._warehouse_adapter = NoOpWarehouseAdapter()
|
|
141
|
+
|
|
142
|
+
async def _start_persistent_process(self) -> None:
|
|
143
|
+
"""Start the persistent dbt process if not already running."""
|
|
144
|
+
if self._dbt_process is not None and self._dbt_process.returncode is None:
|
|
145
|
+
# Process already running
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
logger.info("Starting persistent dbt process...")
|
|
149
|
+
|
|
150
|
+
# Build unified script in loop mode
|
|
151
|
+
loop_script = self._build_unified_script([], loop_mode=True)
|
|
152
|
+
|
|
153
|
+
# Build command to run loop script
|
|
154
|
+
cmd = [*self.python_command, "-c", loop_script]
|
|
155
|
+
|
|
156
|
+
# Get environment variables
|
|
157
|
+
env_vars = get_env_vars(self.python_command)
|
|
158
|
+
env = None
|
|
159
|
+
if env_vars:
|
|
160
|
+
import os
|
|
161
|
+
import tempfile
|
|
162
|
+
|
|
163
|
+
env = os.environ.copy()
|
|
164
|
+
# Force UTF-8 encoding for subprocess to handle Unicode characters in dbt output
|
|
165
|
+
env["PYTHONIOENCODING"] = "utf-8"
|
|
166
|
+
# Use unique temp directory per project for dbt logs to avoid Windows file locking
|
|
167
|
+
# Hash the project path to create a unique but consistent subdirectory
|
|
168
|
+
import hashlib
|
|
169
|
+
|
|
170
|
+
project_hash = hashlib.md5(str(self.project_dir).encode()).hexdigest()[:8]
|
|
171
|
+
dbt_log_dir = Path(tempfile.gettempdir()) / f"dbt_mcp_logs_{project_hash}"
|
|
172
|
+
dbt_log_dir.mkdir(parents=True, exist_ok=True)
|
|
173
|
+
env["DBT_LOG_PATH"] = str(dbt_log_dir)
|
|
174
|
+
# Disable log file rotation to prevent Windows file locking issues
|
|
175
|
+
env["DBT_MAX_LOG_FILE_SIZE"] = "0" # Disable rotation by size
|
|
176
|
+
env.update(env_vars)
|
|
177
|
+
else:
|
|
178
|
+
import os
|
|
179
|
+
import tempfile
|
|
180
|
+
|
|
181
|
+
env = os.environ.copy()
|
|
182
|
+
# Force UTF-8 encoding for subprocess to handle Unicode characters in dbt output
|
|
183
|
+
env["PYTHONIOENCODING"] = "utf-8"
|
|
184
|
+
# Use unique temp directory per project for dbt logs to avoid Windows file locking
|
|
185
|
+
# Hash the project path to create a unique but consistent subdirectory
|
|
186
|
+
import hashlib
|
|
187
|
+
|
|
188
|
+
project_hash = hashlib.md5(str(self.project_dir).encode()).hexdigest()[:8]
|
|
189
|
+
dbt_log_dir = Path(tempfile.gettempdir()) / f"dbt_mcp_logs_{project_hash}"
|
|
190
|
+
dbt_log_dir.mkdir(parents=True, exist_ok=True)
|
|
191
|
+
env["DBT_LOG_PATH"] = str(dbt_log_dir)
|
|
192
|
+
# Disable log file rotation to prevent Windows file locking issues
|
|
193
|
+
env["DBT_MAX_LOG_FILE_SIZE"] = "0" # Disable rotation by size
|
|
194
|
+
|
|
195
|
+
# Start process
|
|
196
|
+
self._dbt_process = await asyncio.create_subprocess_exec(
|
|
197
|
+
*cmd,
|
|
198
|
+
stdin=asyncio.subprocess.PIPE,
|
|
199
|
+
stdout=asyncio.subprocess.PIPE,
|
|
200
|
+
stderr=asyncio.subprocess.PIPE,
|
|
201
|
+
cwd=self.project_dir,
|
|
202
|
+
env=env,
|
|
203
|
+
)
|
|
204
|
+
assert self._dbt_process is not None
|
|
205
|
+
assert self._dbt_process.stdout is not None
|
|
206
|
+
assert self._dbt_process.stderr is not None
|
|
207
|
+
assert self._dbt_process.stdin is not None
|
|
208
|
+
|
|
209
|
+
# Wait for ready signal
|
|
210
|
+
try:
|
|
211
|
+
ready_line = await asyncio.wait_for(self._dbt_process.stdout.readline(), timeout=30.0)
|
|
212
|
+
ready_str = ready_line.decode().strip()
|
|
213
|
+
|
|
214
|
+
if not ready_str:
|
|
215
|
+
# No output - check stderr for errors
|
|
216
|
+
stderr_data = await asyncio.wait_for(self._dbt_process.stderr.read(), timeout=1.0)
|
|
217
|
+
stderr_str = stderr_data.decode() if stderr_data else "(no stderr)"
|
|
218
|
+
raise RuntimeError(f"Persistent process started but sent no ready message. stderr: {stderr_str[:500]}")
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
ready_msg = json.loads(ready_str)
|
|
222
|
+
except json.JSONDecodeError:
|
|
223
|
+
# Invalid JSON - check stderr
|
|
224
|
+
stderr_data = await asyncio.wait_for(self._dbt_process.stderr.read(), timeout=1.0)
|
|
225
|
+
stderr_str = stderr_data.decode() if stderr_data else "(no stderr)"
|
|
226
|
+
raise RuntimeError(f"Invalid ready message: {ready_str[:200]}. stderr: {stderr_str[:500]}")
|
|
227
|
+
|
|
228
|
+
if ready_msg.get("type") == "ready":
|
|
229
|
+
logger.info(f"Persistent dbt process started (PID {self._dbt_process.pid})")
|
|
230
|
+
else:
|
|
231
|
+
raise RuntimeError(f"Unexpected ready message: {ready_msg}")
|
|
232
|
+
except asyncio.TimeoutError:
|
|
233
|
+
logger.error("Timeout waiting for dbt process to become ready")
|
|
234
|
+
await self._stop_persistent_process()
|
|
235
|
+
raise RuntimeError("Failed to start persistent dbt process")
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.error(f"Error starting persistent dbt process: {e}")
|
|
238
|
+
await self._stop_persistent_process()
|
|
239
|
+
raise
|
|
240
|
+
|
|
241
|
+
async def _stop_persistent_process(self) -> None:
|
|
242
|
+
"""Stop the persistent dbt process gracefully."""
|
|
243
|
+
if self._dbt_process is None:
|
|
244
|
+
return
|
|
245
|
+
assert self._dbt_process is not None
|
|
246
|
+
assert self._dbt_process.stdin is not None
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
if self._dbt_process.returncode is None:
|
|
250
|
+
# Send shutdown command
|
|
251
|
+
logger.info("Shutting down persistent dbt process...")
|
|
252
|
+
shutdown_msg = json.dumps({"shutdown": True}) + "\n"
|
|
253
|
+
self._dbt_process.stdin.write(shutdown_msg.encode())
|
|
254
|
+
await self._dbt_process.stdin.drain()
|
|
255
|
+
|
|
256
|
+
# Wait for graceful shutdown
|
|
257
|
+
try:
|
|
258
|
+
await asyncio.wait_for(self._dbt_process.wait(), timeout=5.0)
|
|
259
|
+
logger.info("Persistent dbt process shut down gracefully")
|
|
260
|
+
except asyncio.TimeoutError:
|
|
261
|
+
logger.warning("Timeout waiting for process shutdown, killing...")
|
|
262
|
+
self._dbt_process.kill()
|
|
263
|
+
await self._dbt_process.wait()
|
|
264
|
+
except Exception as e:
|
|
265
|
+
logger.warning(f"Error during shutdown: {e}, killing process...")
|
|
266
|
+
if self._dbt_process.returncode is None:
|
|
267
|
+
self._dbt_process.kill()
|
|
268
|
+
await self._dbt_process.wait()
|
|
269
|
+
finally:
|
|
270
|
+
# Close pipes to prevent resource warnings
|
|
271
|
+
if self._dbt_process:
|
|
272
|
+
if self._dbt_process.stdin:
|
|
273
|
+
self._dbt_process.stdin.close()
|
|
274
|
+
# stdout and stderr are StreamReaders - no close() method needed
|
|
275
|
+
self._dbt_process = None
|
|
276
|
+
|
|
277
|
+
async def _invoke_persistent(self, args: list[str], progress_callback: Callable[[int, int, str], Any] | None = None, expected_total: int | None = None) -> DbtRunnerResult:
|
|
278
|
+
"""Execute a command using the persistent dbt process."""
|
|
279
|
+
# Ensure process is started
|
|
280
|
+
await self._start_persistent_process()
|
|
281
|
+
assert self._dbt_process is not None
|
|
282
|
+
assert self._dbt_process.stdin is not None
|
|
283
|
+
assert self._dbt_process.stdout is not None
|
|
284
|
+
|
|
285
|
+
# Build request
|
|
286
|
+
self._request_counter += 1
|
|
287
|
+
request = {
|
|
288
|
+
"command": args,
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
# Send request
|
|
292
|
+
request_line = json.dumps(request) + "\n"
|
|
293
|
+
self._dbt_process.stdin.write(request_line.encode())
|
|
294
|
+
await self._dbt_process.stdin.drain()
|
|
295
|
+
|
|
296
|
+
# Read output with progress parsing (same as one-off subprocess!)
|
|
297
|
+
try:
|
|
298
|
+
if progress_callback:
|
|
299
|
+
logger.info("Progress callback provided, enabling streaming output")
|
|
300
|
+
command_name = args[0] if args else None
|
|
301
|
+
stdout, stderr = await self._stream_with_progress(self._dbt_process, progress_callback, expected_total, command_name)
|
|
302
|
+
else:
|
|
303
|
+
logger.info("No progress callback, using buffered output")
|
|
304
|
+
# Read until we get the completion JSON
|
|
305
|
+
stdout_lines = []
|
|
306
|
+
|
|
307
|
+
while True:
|
|
308
|
+
if self.timeout:
|
|
309
|
+
line_bytes = await asyncio.wait_for(
|
|
310
|
+
self._dbt_process.stdout.readline(),
|
|
311
|
+
timeout=self.timeout,
|
|
312
|
+
)
|
|
313
|
+
else:
|
|
314
|
+
line_bytes = await self._dbt_process.stdout.readline()
|
|
315
|
+
|
|
316
|
+
if not line_bytes:
|
|
317
|
+
break
|
|
318
|
+
line = line_bytes.decode("utf-8", errors="replace").rstrip()
|
|
319
|
+
# Check if this is the completion marker
|
|
320
|
+
if line.startswith('{"success":'):
|
|
321
|
+
stdout_lines.append(line) # Include completion marker
|
|
322
|
+
break
|
|
323
|
+
stdout_lines.append(line)
|
|
324
|
+
|
|
325
|
+
stdout = "\n".join(stdout_lines)
|
|
326
|
+
stderr = ""
|
|
327
|
+
|
|
328
|
+
# Parse success from last line (completion marker)
|
|
329
|
+
last_line = stdout.strip().split("\n")[-1] if stdout else ""
|
|
330
|
+
try:
|
|
331
|
+
completion = json.loads(last_line)
|
|
332
|
+
success = completion.get("success", False)
|
|
333
|
+
except json.JSONDecodeError:
|
|
334
|
+
# If no valid completion marker, assume failure
|
|
335
|
+
logger.warning("No valid completion marker found in output")
|
|
336
|
+
success = False
|
|
337
|
+
|
|
338
|
+
return DbtRunnerResult(success=success, stdout=stdout, stderr=stderr)
|
|
339
|
+
|
|
340
|
+
except asyncio.CancelledError:
|
|
341
|
+
# User aborted - force kill the persistent process immediately
|
|
342
|
+
logger.info("Cancellation detected, force killing persistent process")
|
|
343
|
+
if self._dbt_process and self._dbt_process.returncode is None:
|
|
344
|
+
pid = self._dbt_process.pid
|
|
345
|
+
self._dbt_process.kill()
|
|
346
|
+
logger.info(f"Kill signal sent to PID {pid}, waiting for process to terminate...")
|
|
347
|
+
|
|
348
|
+
# Poll process status and log updates while waiting
|
|
349
|
+
# Use shield to prevent cancellation from interrupting cleanup
|
|
350
|
+
start_time = asyncio.get_event_loop().time()
|
|
351
|
+
poll_interval = 1.0 # Check every second
|
|
352
|
+
timeout = 30.0 # Give up after 30 seconds
|
|
353
|
+
|
|
354
|
+
logger.info(f"Entering wait loop for PID {pid}")
|
|
355
|
+
|
|
356
|
+
async def wait_for_termination() -> None:
|
|
357
|
+
while True:
|
|
358
|
+
try:
|
|
359
|
+
logger.info(f"Attempting to wait for process {pid} (timeout={poll_interval}s)...")
|
|
360
|
+
# Check if process has terminated
|
|
361
|
+
if self._dbt_process is not None:
|
|
362
|
+
await asyncio.wait_for(self._dbt_process.wait(), timeout=poll_interval)
|
|
363
|
+
logger.info(f"wait_for completed successfully for PID {pid}")
|
|
364
|
+
logger.info(f"Persistent process terminated (PID {pid}, exit code: {self._dbt_process.returncode})")
|
|
365
|
+
break
|
|
366
|
+
except asyncio.TimeoutError:
|
|
367
|
+
# Still waiting - log status update
|
|
368
|
+
elapsed = asyncio.get_event_loop().time() - start_time
|
|
369
|
+
if elapsed > timeout:
|
|
370
|
+
logger.warning(f"Process {pid} did not terminate after {timeout}s, giving up wait")
|
|
371
|
+
break
|
|
372
|
+
logger.info(f"Still waiting for PID {pid} to terminate... ({elapsed:.1f}s elapsed)")
|
|
373
|
+
|
|
374
|
+
await asyncio.shield(wait_for_termination())
|
|
375
|
+
self._dbt_process = None
|
|
376
|
+
raise
|
|
377
|
+
except asyncio.TimeoutError:
|
|
378
|
+
logger.error("Timeout waiting for response from persistent process")
|
|
379
|
+
# Kill and restart process on timeout
|
|
380
|
+
await self._stop_persistent_process()
|
|
381
|
+
return DbtRunnerResult(
|
|
382
|
+
success=False,
|
|
383
|
+
exception=RuntimeError(f"Command timed out after {self.timeout} seconds"),
|
|
384
|
+
)
|
|
385
|
+
except Exception as e:
|
|
386
|
+
logger.error(f"Error communicating with persistent process: {e}")
|
|
387
|
+
# Kill and restart process on error
|
|
388
|
+
await self._stop_persistent_process()
|
|
389
|
+
return DbtRunnerResult(success=False, exception=e)
|
|
390
|
+
|
|
391
|
+
async def invoke(self, args: list[str], progress_callback: Callable[[int, int, str], Any] | None = None, expected_total: int | None = None) -> DbtRunnerResult:
|
|
392
|
+
"""
|
|
393
|
+
Execute a dbt command via subprocess bridge.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
args: dbt command arguments (e.g., ['parse'], ['run', '--select', 'model'])
|
|
397
|
+
progress_callback: Optional async callback for progress updates.
|
|
398
|
+
Called with (current, total, message) for each model processed.
|
|
399
|
+
expected_total: Optional expected total count from pre-execution `dbt list`.
|
|
400
|
+
If provided, progress will start with correct total immediately.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
Result of the command execution
|
|
404
|
+
"""
|
|
405
|
+
invoke_total_start = time.time()
|
|
406
|
+
|
|
407
|
+
# Debug: Check if progress_callback exists
|
|
408
|
+
logger.info(f"invoke() called with progress_callback: {progress_callback is not None}")
|
|
409
|
+
|
|
410
|
+
# Calculate setup steps for progress reporting
|
|
411
|
+
setup_steps = []
|
|
412
|
+
if self._needs_database_access(args) and self._warehouse_adapter:
|
|
413
|
+
setup_steps.append("warehouse")
|
|
414
|
+
setup_steps.append("concurrency")
|
|
415
|
+
if self.use_persistent_process:
|
|
416
|
+
setup_steps.append("lock")
|
|
417
|
+
total_setup_steps = len(setup_steps)
|
|
418
|
+
current_setup_step = 0
|
|
419
|
+
|
|
420
|
+
# Helper to report setup progress
|
|
421
|
+
async def report_setup_progress(message: str) -> None:
|
|
422
|
+
nonlocal current_setup_step
|
|
423
|
+
current_setup_step += 1 # Increment FIRST so we show progress immediately
|
|
424
|
+
logger.info(f"Setup progress: step {current_setup_step}/{total_setup_steps}: {message}")
|
|
425
|
+
if progress_callback:
|
|
426
|
+
try:
|
|
427
|
+
result = progress_callback(current_setup_step, total_setup_steps, message)
|
|
428
|
+
if asyncio.iscoroutine(result):
|
|
429
|
+
await result
|
|
430
|
+
logger.info("Setup progress callback invoked successfully")
|
|
431
|
+
except Exception as e:
|
|
432
|
+
logger.warning(f"Setup progress callback error: {e}")
|
|
433
|
+
else:
|
|
434
|
+
logger.warning(f"No progress_callback available for setup step: {message}")
|
|
435
|
+
|
|
436
|
+
# Pre-warm warehouse if needed (for commands that require database access)
|
|
437
|
+
if self._needs_database_access(args):
|
|
438
|
+
try:
|
|
439
|
+
if self._warehouse_adapter:
|
|
440
|
+
await report_setup_progress("Pre-warming warehouse...")
|
|
441
|
+
prewarm_start = time.time()
|
|
442
|
+
await self._warehouse_adapter.prewarm(None) # Don't pass callback - we're handling progress
|
|
443
|
+
prewarm_end = time.time()
|
|
444
|
+
logger.info(f"Warehouse pre-warming took {prewarm_end - prewarm_start:.2f}s")
|
|
445
|
+
except Exception as e:
|
|
446
|
+
logger.warning(f"Warehouse pre-warming failed (continuing anyway): {e}")
|
|
447
|
+
|
|
448
|
+
# Check for external dbt processes (excluding our persistent process)
|
|
449
|
+
await report_setup_progress("Checking for running processes...")
|
|
450
|
+
concurrency_start = time.time()
|
|
451
|
+
exclude_pid = self._dbt_process.pid if self._dbt_process else None
|
|
452
|
+
if is_dbt_running(self.project_dir, exclude_pid=exclude_pid):
|
|
453
|
+
logger.info("External dbt process detected, waiting for completion...")
|
|
454
|
+
|
|
455
|
+
# Report waiting state
|
|
456
|
+
if progress_callback:
|
|
457
|
+
try:
|
|
458
|
+
result = progress_callback(0, 1, "Waiting for another dbt process to finish...")
|
|
459
|
+
if asyncio.iscoroutine(result):
|
|
460
|
+
await result
|
|
461
|
+
except Exception as e:
|
|
462
|
+
logger.warning(f"Progress callback error: {e}")
|
|
463
|
+
|
|
464
|
+
if not wait_for_dbt_completion(self.project_dir, timeout=10.0, poll_interval=0.2):
|
|
465
|
+
logger.error("Timeout waiting for external dbt process to complete")
|
|
466
|
+
return DbtRunnerResult(
|
|
467
|
+
success=False,
|
|
468
|
+
exception=RuntimeError("dbt is already running in this project. Please wait for it to complete."),
|
|
469
|
+
)
|
|
470
|
+
concurrency_end = time.time()
|
|
471
|
+
logger.info(f"Concurrency check took {concurrency_end - concurrency_start:.2f}s")
|
|
472
|
+
|
|
473
|
+
# Use persistent process if enabled
|
|
474
|
+
if self.use_persistent_process:
|
|
475
|
+
# Determine what we're waiting for
|
|
476
|
+
if self._process_lock.locked():
|
|
477
|
+
# Lock is held by another command
|
|
478
|
+
await report_setup_progress("Waiting for available process...")
|
|
479
|
+
elif self._dbt_process is None:
|
|
480
|
+
# Process doesn't exist yet - will need to start it
|
|
481
|
+
await report_setup_progress("Starting dbt process...")
|
|
482
|
+
else:
|
|
483
|
+
# Process exists, just acquiring lock
|
|
484
|
+
await report_setup_progress("Acquiring process lock...")
|
|
485
|
+
|
|
486
|
+
async with self._process_lock:
|
|
487
|
+
logger.info("Using persistent dbt process")
|
|
488
|
+
|
|
489
|
+
# Reset progress bar for dbt execution phase
|
|
490
|
+
# Setup is complete (3/3), now starting dbt execution (1/1000 = 0.1% minimal bar)
|
|
491
|
+
# Note: 0/N doesn't trigger visual reset, but 1/1000 gives tiny visible progress
|
|
492
|
+
logger.info(f"Resetting progress bar, progress_callback exists: {progress_callback is not None}")
|
|
493
|
+
if progress_callback:
|
|
494
|
+
command = args[0] if args else ""
|
|
495
|
+
action = _get_action_verb(command)
|
|
496
|
+
reset_message = f"{action}..."
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
logger.info(f"Invoking reset callback: 1/1000 - {reset_message}")
|
|
500
|
+
result = progress_callback(1, 1000, reset_message)
|
|
501
|
+
if asyncio.iscoroutine(result):
|
|
502
|
+
await result
|
|
503
|
+
logger.info("Reset callback completed successfully")
|
|
504
|
+
except Exception as e:
|
|
505
|
+
logger.warning(f"Progress callback error: {e}")
|
|
506
|
+
|
|
507
|
+
result = await self._invoke_persistent(args, progress_callback, expected_total)
|
|
508
|
+
logger.info(f"Total invoke() time: {time.time() - invoke_total_start:.2f}s")
|
|
509
|
+
return result
|
|
510
|
+
|
|
511
|
+
# Fall back to one-off subprocess
|
|
512
|
+
logger.info("Using one-off subprocess (persistent mode disabled)")
|
|
513
|
+
|
|
514
|
+
# Build unified Python script in one-off mode
|
|
515
|
+
script = self._build_unified_script(args, loop_mode=False)
|
|
516
|
+
|
|
517
|
+
# Execute in user's environment
|
|
518
|
+
full_command = [*self.python_command, "-c", script]
|
|
519
|
+
|
|
520
|
+
logger.info(f"Executing dbt command: {args}")
|
|
521
|
+
logger.info(f"Using Python: {self.python_command}")
|
|
522
|
+
logger.info(f"Working directory: {self.project_dir}")
|
|
523
|
+
|
|
524
|
+
# Get environment-specific variables (e.g., PIPENV_IGNORE_VIRTUALENVS for pipenv)
|
|
525
|
+
env_vars = get_env_vars(self.python_command)
|
|
526
|
+
import os
|
|
527
|
+
import tempfile
|
|
528
|
+
|
|
529
|
+
env = os.environ.copy()
|
|
530
|
+
|
|
531
|
+
# Force UTF-8 encoding for subprocess to handle Unicode characters in dbt output
|
|
532
|
+
env["PYTHONIOENCODING"] = "utf-8"
|
|
533
|
+
|
|
534
|
+
# Use unique temp directory per project for dbt logs to avoid Windows file locking
|
|
535
|
+
# Hash the project path to create a unique but consistent subdirectory
|
|
536
|
+
import hashlib
|
|
537
|
+
|
|
538
|
+
project_hash = hashlib.md5(str(self.project_dir).encode()).hexdigest()[:8]
|
|
539
|
+
dbt_log_dir = Path(tempfile.gettempdir()) / f"dbt_mcp_logs_{project_hash}"
|
|
540
|
+
dbt_log_dir.mkdir(parents=True, exist_ok=True)
|
|
541
|
+
env["DBT_LOG_PATH"] = str(dbt_log_dir)
|
|
542
|
+
# Disable log file rotation to prevent Windows file locking issues
|
|
543
|
+
env["DBT_MAX_LOG_FILE_SIZE"] = "0" # Disable rotation by size
|
|
544
|
+
|
|
545
|
+
if env_vars:
|
|
546
|
+
env.update(env_vars)
|
|
547
|
+
logger.info(f"Adding environment variables: {list(env_vars.keys())}")
|
|
548
|
+
|
|
549
|
+
proc = None
|
|
550
|
+
try:
|
|
551
|
+
logger.info("Starting subprocess...")
|
|
552
|
+
subprocess_start = time.time()
|
|
553
|
+
# Use create_subprocess_exec for proper async process handling
|
|
554
|
+
proc = await asyncio.create_subprocess_exec(
|
|
555
|
+
*full_command,
|
|
556
|
+
cwd=self.project_dir,
|
|
557
|
+
stdout=asyncio.subprocess.PIPE,
|
|
558
|
+
stderr=asyncio.subprocess.PIPE,
|
|
559
|
+
stdin=asyncio.subprocess.DEVNULL,
|
|
560
|
+
env=env,
|
|
561
|
+
)
|
|
562
|
+
subprocess_created = time.time()
|
|
563
|
+
logger.info(f"Subprocess creation took {subprocess_created - subprocess_start:.2f}s")
|
|
564
|
+
|
|
565
|
+
# Report initial progress immediately
|
|
566
|
+
if progress_callback:
|
|
567
|
+
try:
|
|
568
|
+
result = progress_callback(0, 1, "Starting dbt...")
|
|
569
|
+
if asyncio.iscoroutine(result):
|
|
570
|
+
await result
|
|
571
|
+
except Exception as e:
|
|
572
|
+
logger.warning(f"Progress callback error: {e}")
|
|
573
|
+
|
|
574
|
+
# Stream output and capture progress if callback provided
|
|
575
|
+
dbt_execution_start = time.time()
|
|
576
|
+
if progress_callback:
|
|
577
|
+
logger.info("Progress callback provided, enabling streaming output")
|
|
578
|
+
command_name = args[0] if args else None
|
|
579
|
+
stdout, stderr = await self._stream_with_progress(proc, progress_callback, expected_total, command_name)
|
|
580
|
+
else:
|
|
581
|
+
logger.info("No progress callback, using buffered output")
|
|
582
|
+
# Wait for completion with timeout (original behavior)
|
|
583
|
+
try:
|
|
584
|
+
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
585
|
+
proc.communicate(),
|
|
586
|
+
timeout=self.timeout,
|
|
587
|
+
)
|
|
588
|
+
stdout = stdout_bytes.decode("utf-8") if stdout_bytes else ""
|
|
589
|
+
stderr = stderr_bytes.decode("utf-8") if stderr_bytes else ""
|
|
590
|
+
except asyncio.TimeoutError:
|
|
591
|
+
# Kill process on timeout
|
|
592
|
+
logger.error(f"dbt command timed out after {self.timeout} seconds, killing process")
|
|
593
|
+
proc.kill()
|
|
594
|
+
await proc.wait()
|
|
595
|
+
return DbtRunnerResult(
|
|
596
|
+
success=False,
|
|
597
|
+
exception=RuntimeError(f"dbt command timed out after {self.timeout} seconds"),
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
dbt_execution_end = time.time()
|
|
601
|
+
logger.info(f"dbt execution (from start to completion) took {dbt_execution_end - dbt_execution_start:.2f}s")
|
|
602
|
+
|
|
603
|
+
returncode = proc.returncode
|
|
604
|
+
logger.info(f"Subprocess completed with return code: {returncode}")
|
|
605
|
+
logger.info(f"Total invoke() time: {time.time() - invoke_total_start:.2f}s")
|
|
606
|
+
|
|
607
|
+
# Parse result from stdout
|
|
608
|
+
if returncode == 0:
|
|
609
|
+
# Extract JSON from last line (DBT output may contain logs)
|
|
610
|
+
try:
|
|
611
|
+
last_line = stdout.strip().split("\n")[-1]
|
|
612
|
+
output = json.loads(last_line)
|
|
613
|
+
success = output.get("success", False)
|
|
614
|
+
logger.info(f"dbt command {'succeeded' if success else 'failed'}: {args}")
|
|
615
|
+
return DbtRunnerResult(success=success, stdout=stdout, stderr=stderr)
|
|
616
|
+
except (json.JSONDecodeError, IndexError) as e:
|
|
617
|
+
# If no JSON output, check return code
|
|
618
|
+
logger.warning(f"No JSON output from dbt command: {e}. stdout: {stdout[:200]}")
|
|
619
|
+
return DbtRunnerResult(success=True, stdout=stdout, stderr=stderr)
|
|
620
|
+
else:
|
|
621
|
+
# Non-zero return code indicates failure
|
|
622
|
+
error_msg = stderr.strip() if stderr else stdout.strip()
|
|
623
|
+
logger.error(f"dbt command failed with code {returncode}")
|
|
624
|
+
logger.error(f"stdout: {stdout[:500]}")
|
|
625
|
+
logger.error(f"stderr: {stderr[:500]}")
|
|
626
|
+
|
|
627
|
+
# Try to extract meaningful error from stderr or stdout
|
|
628
|
+
if not error_msg and stdout:
|
|
629
|
+
error_msg = stdout.strip()
|
|
630
|
+
|
|
631
|
+
return DbtRunnerResult(success=False, exception=RuntimeError(error_msg or f"dbt command failed with code {returncode}"), stdout=stdout, stderr=stderr)
|
|
632
|
+
except asyncio.CancelledError:
|
|
633
|
+
# Kill the subprocess when cancelled
|
|
634
|
+
if proc and proc.returncode is None:
|
|
635
|
+
logger.info(f"Cancellation detected, killing subprocess PID {proc.pid}")
|
|
636
|
+
await asyncio.shield(self._kill_process_tree(proc))
|
|
637
|
+
raise
|
|
638
|
+
except Exception as e:
|
|
639
|
+
logger.exception(f"Error executing dbt command: {e}")
|
|
640
|
+
# Clean up process on unexpected errors
|
|
641
|
+
if proc and proc.returncode is None:
|
|
642
|
+
proc.kill()
|
|
643
|
+
await proc.wait()
|
|
644
|
+
return DbtRunnerResult(success=False, exception=e, stdout="", stderr="")
|
|
645
|
+
|
|
646
|
+
async def _stream_with_progress(self, proc: asyncio.subprocess.Process, progress_callback: Callable[[int, int, str], Any], expected_total: int | None = None, command_name: str | None = None) -> tuple[str, str]:
|
|
647
|
+
"""
|
|
648
|
+
Stream stdout/stderr and report progress in real-time.
|
|
649
|
+
|
|
650
|
+
Parses dbt output for progress indicators like:
|
|
651
|
+
- "1 of 5 START sql table model public.customers"
|
|
652
|
+
- "1 of 5 OK created sql table model public.customers"
|
|
653
|
+
|
|
654
|
+
Args:
|
|
655
|
+
proc: The running subprocess
|
|
656
|
+
progress_callback: Async callback(current, total, message)
|
|
657
|
+
expected_total: Expected total number of resources
|
|
658
|
+
command_name: Optional dbt command name (e.g., "build", "test") for progress messages
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
Tuple of (stdout, stderr) as strings
|
|
662
|
+
"""
|
|
663
|
+
logger.info("Starting stdout/stderr streaming with progress parsing")
|
|
664
|
+
|
|
665
|
+
# Pattern to match dbt progress lines with timestamp prefix: "12:04:38 1 of 5 START/OK/PASS/ERROR ..."
|
|
666
|
+
# Models use: START, OK, ERROR, FAIL, SKIP, WARN
|
|
667
|
+
# Tests use: START, PASS, FAIL, ERROR, SKIP, WARN
|
|
668
|
+
# Seeds use: START, INSERT, ERROR, SKIP
|
|
669
|
+
progress_pattern = re.compile(r"^\d{2}:\d{2}:\d{2}\s+(\d+) of (\d+) (START|OK|PASS|INSERT|ERROR|FAIL|SKIP|WARN)\s+(.+)$")
|
|
670
|
+
|
|
671
|
+
stdout_lines = []
|
|
672
|
+
stderr_lines = []
|
|
673
|
+
line_count = 0
|
|
674
|
+
|
|
675
|
+
# Track overall progress across all stages
|
|
676
|
+
overall_progress = 0
|
|
677
|
+
total_resources = expected_total if expected_total is not None else 0
|
|
678
|
+
seen_resources = set() # Track unique resources to avoid double-counting
|
|
679
|
+
running_models = [] # Track models currently running (FIFO order)
|
|
680
|
+
running_start_times = {} # Track start timestamps for elapsed time
|
|
681
|
+
ok_count = 0
|
|
682
|
+
error_count = 0
|
|
683
|
+
skip_count = 0
|
|
684
|
+
warn_count = 0
|
|
685
|
+
|
|
686
|
+
# Report initial progress if we have expected_total
|
|
687
|
+
if expected_total is not None and progress_callback:
|
|
688
|
+
try:
|
|
689
|
+
result = progress_callback(0, expected_total, "0/{} completed • Preparing...".format(expected_total))
|
|
690
|
+
if asyncio.iscoroutine(result):
|
|
691
|
+
await result
|
|
692
|
+
except Exception as e:
|
|
693
|
+
logger.warning(f"Initial progress callback error: {e}")
|
|
694
|
+
|
|
695
|
+
async def read_stdout() -> None:
|
|
696
|
+
"""Read and parse stdout line by line."""
|
|
697
|
+
nonlocal line_count
|
|
698
|
+
assert proc.stdout is not None
|
|
699
|
+
logger.info("Starting stdout reader")
|
|
700
|
+
try:
|
|
701
|
+
while True:
|
|
702
|
+
line_bytes = await proc.stdout.readline()
|
|
703
|
+
if not line_bytes:
|
|
704
|
+
logger.info(f"Stdout EOF reached after {line_count} lines")
|
|
705
|
+
break
|
|
706
|
+
|
|
707
|
+
line = line_bytes.decode("utf-8", errors="replace").rstrip()
|
|
708
|
+
stdout_lines.append(line)
|
|
709
|
+
line_count += 1
|
|
710
|
+
|
|
711
|
+
# Log ALL lines to see the actual output format
|
|
712
|
+
logger.info(f"stdout[{line_count}]: {line}")
|
|
713
|
+
|
|
714
|
+
# Check for completion marker from persistent process
|
|
715
|
+
if line.startswith('{"success":'):
|
|
716
|
+
logger.info(f"Completion marker detected, stopping read: {line}")
|
|
717
|
+
break
|
|
718
|
+
|
|
719
|
+
# Detect when parsing completes and execution begins
|
|
720
|
+
# Line pattern: "HH:MM:SS Concurrency: N threads (target='...')"
|
|
721
|
+
if "Concurrency:" in line and "threads" in line and progress_callback:
|
|
722
|
+
try:
|
|
723
|
+
exec_msg = _get_action_verb(command_name) if command_name else "Executing..."
|
|
724
|
+
result = progress_callback(1, 1000, exec_msg)
|
|
725
|
+
if asyncio.iscoroutine(result):
|
|
726
|
+
await result
|
|
727
|
+
logger.info(f"Updated progress to '{exec_msg}'")
|
|
728
|
+
except Exception as e:
|
|
729
|
+
logger.warning(f"Progress callback error on concurrency line: {e}")
|
|
730
|
+
|
|
731
|
+
# Check for progress indicators
|
|
732
|
+
match = progress_pattern.match(line)
|
|
733
|
+
if match:
|
|
734
|
+
logger.info(f"Progress match found: {line}")
|
|
735
|
+
total = int(match.group(2))
|
|
736
|
+
status = match.group(3)
|
|
737
|
+
model_info = match.group(4).strip()
|
|
738
|
+
|
|
739
|
+
# Declare nonlocal variables for modification
|
|
740
|
+
nonlocal total_resources, overall_progress, ok_count, error_count, skip_count, warn_count
|
|
741
|
+
|
|
742
|
+
# Update total from progress lines (this is the actual count being executed)
|
|
743
|
+
if total > total_resources:
|
|
744
|
+
total_resources = total
|
|
745
|
+
|
|
746
|
+
# Extract model/test/seed name from info string
|
|
747
|
+
# Models: "sql table model schema.model_name ..."
|
|
748
|
+
# Tests: "test not_null_customers_customer_id ...... [RUN]"
|
|
749
|
+
# Seeds START: "seed file main.raw_customers ...... [RUN]"
|
|
750
|
+
# Seeds OK: "loaded seed file main.raw_customers ...... [INSERT 3 in 0.12s]"
|
|
751
|
+
model_name = model_info
|
|
752
|
+
|
|
753
|
+
# For models, extract after " model "
|
|
754
|
+
if " model " in model_info:
|
|
755
|
+
parts = model_info.split(" model ")
|
|
756
|
+
if len(parts) > 1:
|
|
757
|
+
# Get "schema.model_name" or just "model_name"
|
|
758
|
+
model_name = parts[1].split()[0] if parts[1] else model_info
|
|
759
|
+
# For seeds, extract after "seed file " or "loaded seed file "
|
|
760
|
+
elif "seed file " in model_info:
|
|
761
|
+
# Find "seed file " and extract what comes after
|
|
762
|
+
idx = model_info.find("seed file ")
|
|
763
|
+
if idx != -1:
|
|
764
|
+
# Extract from after "seed file " (10 chars)
|
|
765
|
+
rest = model_info[idx + 10 :]
|
|
766
|
+
model_name = rest.split()[0] if rest.split() else model_info
|
|
767
|
+
# For tests, handle "test " and "unit_test " prefixes
|
|
768
|
+
elif model_info.startswith("test "):
|
|
769
|
+
# Remove "test " prefix and get the name
|
|
770
|
+
model_name = model_info[5:].split()[0] if len(model_info) > 5 else model_info
|
|
771
|
+
elif model_info.startswith("unit_test "):
|
|
772
|
+
# For unit tests, extract the full test path after "unit_test "
|
|
773
|
+
# Format: "unit_test model_name::test_name"
|
|
774
|
+
rest = model_info[10:] # Skip "unit_test "
|
|
775
|
+
# Extract up to any trailing markers like [RUN]
|
|
776
|
+
model_name = rest.split(" [")[0].strip() if " [" in rest else rest.strip()
|
|
777
|
+
else:
|
|
778
|
+
# For other cases, just take the first word
|
|
779
|
+
first_word = model_info.split()[0] if model_info.split() else model_info
|
|
780
|
+
model_name = first_word
|
|
781
|
+
|
|
782
|
+
# Clean up markers like [RUN] or [PASS] or [INSERT 3] and dots
|
|
783
|
+
import re
|
|
784
|
+
|
|
785
|
+
model_name = re.sub(r"\s*\.+\s*\[(RUN|PASS|FAIL|ERROR|SKIP|WARN|INSERT)\].*$", "", model_name)
|
|
786
|
+
model_name = re.sub(r"\s+\[.*$", "", model_name) # Remove any bracketed content
|
|
787
|
+
model_name = model_name.strip()
|
|
788
|
+
|
|
789
|
+
# Handle START events - add to running queue
|
|
790
|
+
if status == "START":
|
|
791
|
+
if model_name not in running_models:
|
|
792
|
+
running_models.append(model_name)
|
|
793
|
+
running_start_times[model_name] = time.time()
|
|
794
|
+
logger.info(f"Model started: {model_name}")
|
|
795
|
+
|
|
796
|
+
# Handle completion events - remove from running queue
|
|
797
|
+
elif status in ("OK", "PASS", "INSERT", "ERROR", "FAIL", "SKIP", "WARN"):
|
|
798
|
+
# Create unique resource key to avoid double-counting
|
|
799
|
+
resource_key = f"{status}:{model_name}"
|
|
800
|
+
|
|
801
|
+
# Only increment overall progress for new resources
|
|
802
|
+
if resource_key not in seen_resources:
|
|
803
|
+
seen_resources.add(resource_key)
|
|
804
|
+
overall_progress += 1
|
|
805
|
+
|
|
806
|
+
# Track success/error/skip/warn counts
|
|
807
|
+
if status in ("OK", "PASS", "INSERT"):
|
|
808
|
+
ok_count += 1
|
|
809
|
+
elif status in ("ERROR", "FAIL"):
|
|
810
|
+
error_count += 1
|
|
811
|
+
elif status == "SKIP":
|
|
812
|
+
skip_count += 1
|
|
813
|
+
elif status == "WARN":
|
|
814
|
+
warn_count += 1
|
|
815
|
+
|
|
816
|
+
logger.info(f"New resource: {resource_key}, overall progress: {overall_progress}/{total_resources}")
|
|
817
|
+
|
|
818
|
+
# ALWAYS remove from running queue on completion (regardless of whether it's new)
|
|
819
|
+
if model_name in running_models:
|
|
820
|
+
running_models.remove(model_name)
|
|
821
|
+
running_start_times.pop(model_name, None)
|
|
822
|
+
logger.info(f"Model completed: {model_name}, status: {status}")
|
|
823
|
+
|
|
824
|
+
# Build progress message: "5/20 completed (✅ 3, ❌ 1, ⚠️ 1) • Running (2): customers (5s)"
|
|
825
|
+
# Show statuses conditionally (only when > 0)
|
|
826
|
+
status_parts = []
|
|
827
|
+
if ok_count > 0:
|
|
828
|
+
status_parts.append(f"✅ {ok_count}")
|
|
829
|
+
if error_count > 0:
|
|
830
|
+
status_parts.append(f"❌ {error_count}")
|
|
831
|
+
if warn_count > 0:
|
|
832
|
+
status_parts.append(f"⚠️ {warn_count}")
|
|
833
|
+
if skip_count > 0:
|
|
834
|
+
status_parts.append(f"⏭️ {skip_count}")
|
|
835
|
+
|
|
836
|
+
# Format: "5/14 completed (✅ 3, ❌ 2)" or just "5/14 completed" if no statuses yet
|
|
837
|
+
if status_parts:
|
|
838
|
+
summary_stats = f"{overall_progress}/{total_resources} completed ({', '.join(status_parts)})"
|
|
839
|
+
else:
|
|
840
|
+
summary_stats = f"{overall_progress}/{total_resources} completed"
|
|
841
|
+
|
|
842
|
+
# Clear running models if all work is complete
|
|
843
|
+
if overall_progress == total_resources and total_resources > 0:
|
|
844
|
+
running_models.clear()
|
|
845
|
+
running_start_times.clear()
|
|
846
|
+
|
|
847
|
+
# Format running list with elapsed times
|
|
848
|
+
max_display = 2
|
|
849
|
+
if len(running_models) > 0:
|
|
850
|
+
current_time = time.time()
|
|
851
|
+
running_with_times = []
|
|
852
|
+
for model in running_models[:max_display]:
|
|
853
|
+
elapsed = int(current_time - running_start_times.get(model, current_time))
|
|
854
|
+
running_with_times.append(f"{model} ({elapsed}s)")
|
|
855
|
+
|
|
856
|
+
if len(running_models) > max_display:
|
|
857
|
+
displayed = ", ".join(running_with_times)
|
|
858
|
+
running_str = f"Running ({len(running_models)}): {displayed} +{len(running_models) - max_display} more"
|
|
859
|
+
else:
|
|
860
|
+
running_str = f"Running ({len(running_models)}): {', '.join(running_with_times)}"
|
|
861
|
+
|
|
862
|
+
accumulated_message = f"{summary_stats} • {running_str}"
|
|
863
|
+
else:
|
|
864
|
+
accumulated_message = summary_stats if overall_progress > 0 else ""
|
|
865
|
+
|
|
866
|
+
# Call progress callback with overall progress and accumulated message (non-blocking)
|
|
867
|
+
if accumulated_message: # Only call if we have a message
|
|
868
|
+
try:
|
|
869
|
+
logger.info(f"PROGRESS CALLBACK: ({overall_progress}/{total_resources}) {accumulated_message}")
|
|
870
|
+
result = progress_callback(overall_progress, total_resources, accumulated_message)
|
|
871
|
+
if asyncio.iscoroutine(result):
|
|
872
|
+
await result
|
|
873
|
+
except Exception as e:
|
|
874
|
+
logger.warning(f"Progress callback error: {e}")
|
|
875
|
+
except asyncio.CancelledError:
|
|
876
|
+
logger.info("stdout reader cancelled")
|
|
877
|
+
raise
|
|
878
|
+
except Exception as e:
|
|
879
|
+
logger.warning(f"stdout reader error: {e}")
|
|
880
|
+
|
|
881
|
+
async def read_stderr() -> None:
|
|
882
|
+
"""Read stderr line by line."""
|
|
883
|
+
assert proc.stderr is not None
|
|
884
|
+
try:
|
|
885
|
+
while True:
|
|
886
|
+
line_bytes = await proc.stderr.readline()
|
|
887
|
+
if not line_bytes:
|
|
888
|
+
break
|
|
889
|
+
line = line_bytes.decode("utf-8", errors="replace").rstrip()
|
|
890
|
+
stderr_lines.append(line)
|
|
891
|
+
# Log stderr in real-time to see bridge script diagnostics
|
|
892
|
+
if line:
|
|
893
|
+
logger.info(f"stderr: {line}")
|
|
894
|
+
except asyncio.CancelledError:
|
|
895
|
+
logger.info("stderr reader cancelled")
|
|
896
|
+
raise
|
|
897
|
+
except Exception as e:
|
|
898
|
+
logger.warning(f"stderr reader error: {e}")
|
|
899
|
+
|
|
900
|
+
# Run both readers concurrently with timeout
|
|
901
|
+
stdout_task = None
|
|
902
|
+
stderr_task = None
|
|
903
|
+
try:
|
|
904
|
+
# Create tasks for both readers
|
|
905
|
+
stdout_task = asyncio.create_task(read_stdout())
|
|
906
|
+
stderr_task = asyncio.create_task(read_stderr())
|
|
907
|
+
|
|
908
|
+
# Wait for stdout to complete (it will break on completion marker)
|
|
909
|
+
if self.timeout:
|
|
910
|
+
await asyncio.wait_for(stdout_task, timeout=self.timeout)
|
|
911
|
+
else:
|
|
912
|
+
await stdout_task
|
|
913
|
+
|
|
914
|
+
# Once stdout is done, cancel stderr (which is likely still blocking)
|
|
915
|
+
if stderr_task and not stderr_task.done():
|
|
916
|
+
stderr_task.cancel()
|
|
917
|
+
try:
|
|
918
|
+
await stderr_task
|
|
919
|
+
except asyncio.CancelledError:
|
|
920
|
+
pass
|
|
921
|
+
|
|
922
|
+
except asyncio.TimeoutError:
|
|
923
|
+
logger.error(f"dbt command timed out after {self.timeout} seconds, killing process")
|
|
924
|
+
# Cancel both reader tasks
|
|
925
|
+
if stdout_task and not stdout_task.done():
|
|
926
|
+
stdout_task.cancel()
|
|
927
|
+
if stderr_task and not stderr_task.done():
|
|
928
|
+
stderr_task.cancel()
|
|
929
|
+
try:
|
|
930
|
+
tasks = [t for t in [stdout_task, stderr_task] if t is not None]
|
|
931
|
+
if tasks:
|
|
932
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
933
|
+
except Exception:
|
|
934
|
+
pass
|
|
935
|
+
# Kill the process
|
|
936
|
+
proc.kill()
|
|
937
|
+
await proc.wait()
|
|
938
|
+
raise RuntimeError(f"dbt command timed out after {self.timeout} seconds")
|
|
939
|
+
except asyncio.CancelledError:
|
|
940
|
+
logger.info("Stream readers cancelled")
|
|
941
|
+
# Cancel both reader tasks
|
|
942
|
+
if stdout_task and not stdout_task.done():
|
|
943
|
+
stdout_task.cancel()
|
|
944
|
+
if stderr_task and not stderr_task.done():
|
|
945
|
+
stderr_task.cancel()
|
|
946
|
+
try:
|
|
947
|
+
tasks = [t for t in [stdout_task, stderr_task] if t is not None]
|
|
948
|
+
if tasks:
|
|
949
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
950
|
+
except Exception:
|
|
951
|
+
pass
|
|
952
|
+
raise
|
|
953
|
+
finally:
|
|
954
|
+
# Send final progress update if we have completed resources
|
|
955
|
+
if progress_callback and overall_progress > 0:
|
|
956
|
+
try:
|
|
957
|
+
# Build final status message
|
|
958
|
+
status_parts = []
|
|
959
|
+
if ok_count > 0:
|
|
960
|
+
status_parts.append(f"✅ {ok_count}")
|
|
961
|
+
if error_count > 0:
|
|
962
|
+
status_parts.append(f"❌ {error_count}")
|
|
963
|
+
if warn_count > 0:
|
|
964
|
+
status_parts.append(f"⚠️ {warn_count}")
|
|
965
|
+
if skip_count > 0:
|
|
966
|
+
status_parts.append(f"⏭️ {skip_count}")
|
|
967
|
+
|
|
968
|
+
if status_parts:
|
|
969
|
+
final_message = f"{overall_progress}/{total_resources} completed ({', '.join(status_parts)})"
|
|
970
|
+
else:
|
|
971
|
+
final_message = f"{overall_progress}/{total_resources} completed"
|
|
972
|
+
|
|
973
|
+
logger.info(f"FINAL PROGRESS: ({overall_progress}/{total_resources}) {final_message}")
|
|
974
|
+
result = progress_callback(overall_progress, total_resources, final_message)
|
|
975
|
+
if asyncio.iscoroutine(result):
|
|
976
|
+
await result
|
|
977
|
+
except Exception as e:
|
|
978
|
+
logger.warning(f"Final progress callback error: {e}")
|
|
979
|
+
|
|
980
|
+
# For one-off subprocesses, ensure process completes
|
|
981
|
+
# For persistent processes, DON'T wait (process stays alive)
|
|
982
|
+
# We can detect persistent by checking if we have _dbt_process
|
|
983
|
+
is_persistent = hasattr(self, "_dbt_process") and self._dbt_process is not None and proc.pid == self._dbt_process.pid
|
|
984
|
+
if not is_persistent and proc.returncode is None:
|
|
985
|
+
await proc.wait()
|
|
986
|
+
|
|
987
|
+
return "\n".join(stdout_lines), "\n".join(stderr_lines)
|
|
988
|
+
|
|
989
|
+
async def _kill_process_tree(self, proc: asyncio.subprocess.Process) -> None:
|
|
990
|
+
"""Kill a process and all its children."""
|
|
991
|
+
pid = proc.pid
|
|
992
|
+
if pid is None:
|
|
993
|
+
logger.warning("Cannot kill process: PID is None")
|
|
994
|
+
return
|
|
995
|
+
|
|
996
|
+
# Log child processes before killing
|
|
997
|
+
try:
|
|
998
|
+
parent = psutil.Process(pid)
|
|
999
|
+
children = parent.children(recursive=True)
|
|
1000
|
+
if children:
|
|
1001
|
+
logger.info(f"Process {pid} has {len(children)} child process(es): {[p.pid for p in children]}")
|
|
1002
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
1003
|
+
pass
|
|
1004
|
+
|
|
1005
|
+
if platform.system() == "Windows":
|
|
1006
|
+
# On Windows, try graceful termination first, then force kill
|
|
1007
|
+
try:
|
|
1008
|
+
# Step 1: Try graceful termination (without /F flag)
|
|
1009
|
+
logger.info(f"Attempting graceful termination of process tree for PID {pid}")
|
|
1010
|
+
terminate_proc = await asyncio.create_subprocess_exec(
|
|
1011
|
+
"taskkill",
|
|
1012
|
+
"/T", # Kill tree, but no /F (force) flag
|
|
1013
|
+
"/PID",
|
|
1014
|
+
str(pid),
|
|
1015
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
1016
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
# Wait for taskkill command to complete (it returns immediately)
|
|
1020
|
+
await terminate_proc.wait()
|
|
1021
|
+
|
|
1022
|
+
# Now wait for the actual process to terminate (poll with timeout)
|
|
1023
|
+
start_time = asyncio.get_event_loop().time()
|
|
1024
|
+
timeout = 10.0
|
|
1025
|
+
poll_interval = 0.5
|
|
1026
|
+
|
|
1027
|
+
while (asyncio.get_event_loop().time() - start_time) < timeout:
|
|
1028
|
+
if not self._is_process_running(pid):
|
|
1029
|
+
logger.info(f"Process {pid} terminated gracefully")
|
|
1030
|
+
return
|
|
1031
|
+
await asyncio.sleep(poll_interval)
|
|
1032
|
+
|
|
1033
|
+
# If we get here, process didn't terminate gracefully
|
|
1034
|
+
logger.info(f"Process {pid} still running after {timeout}s, forcing kill...")
|
|
1035
|
+
|
|
1036
|
+
# Step 2: Force kill if graceful didn't work
|
|
1037
|
+
logger.info(f"Force killing process tree for PID {pid}")
|
|
1038
|
+
kill_proc = await asyncio.create_subprocess_exec(
|
|
1039
|
+
"taskkill",
|
|
1040
|
+
"/F", # Force
|
|
1041
|
+
"/T", # Kill tree
|
|
1042
|
+
"/PID",
|
|
1043
|
+
str(pid),
|
|
1044
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
1045
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
await asyncio.wait_for(kill_proc.wait(), timeout=5.0)
|
|
1049
|
+
|
|
1050
|
+
# Verify process is dead
|
|
1051
|
+
await asyncio.sleep(0.3)
|
|
1052
|
+
try:
|
|
1053
|
+
if psutil.Process(pid).is_running():
|
|
1054
|
+
logger.warning(f"Process {pid} still running after force kill")
|
|
1055
|
+
else:
|
|
1056
|
+
logger.info(f"Successfully killed process tree for PID {pid}")
|
|
1057
|
+
except psutil.NoSuchProcess:
|
|
1058
|
+
logger.info(f"Process {pid} terminated successfully")
|
|
1059
|
+
|
|
1060
|
+
except asyncio.TimeoutError:
|
|
1061
|
+
logger.warning(f"Force kill timed out for PID {pid}")
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
logger.warning(f"Failed to kill process tree: {e}")
|
|
1064
|
+
# Last resort fallback
|
|
1065
|
+
try:
|
|
1066
|
+
proc.kill()
|
|
1067
|
+
await proc.wait()
|
|
1068
|
+
except Exception:
|
|
1069
|
+
pass
|
|
1070
|
+
else:
|
|
1071
|
+
# On Unix, terminate then kill if needed
|
|
1072
|
+
try:
|
|
1073
|
+
proc.terminate()
|
|
1074
|
+
await asyncio.wait_for(proc.wait(), timeout=2.0)
|
|
1075
|
+
except asyncio.TimeoutError:
|
|
1076
|
+
proc.kill()
|
|
1077
|
+
await proc.wait()
|
|
1078
|
+
|
|
1079
|
+
def _is_process_running(self, pid: int) -> bool:
|
|
1080
|
+
"""Check if a process is still running."""
|
|
1081
|
+
try:
|
|
1082
|
+
process = psutil.Process(pid)
|
|
1083
|
+
return process.is_running()
|
|
1084
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
1085
|
+
return False
|
|
1086
|
+
|
|
1087
|
+
def get_manifest_path(self) -> Path:
|
|
1088
|
+
"""Get the path to the manifest.json file."""
|
|
1089
|
+
return self._target_dir / "manifest.json"
|
|
1090
|
+
|
|
1091
|
+
async def shutdown(self) -> None:
|
|
1092
|
+
"""Shutdown the bridge runner and clean up resources."""
|
|
1093
|
+
await self._stop_persistent_process()
|
|
1094
|
+
|
|
1095
|
+
def __del__(self) -> None:
|
|
1096
|
+
"""Cleanup on garbage collection."""
|
|
1097
|
+
# Try to stop persistent process on cleanup
|
|
1098
|
+
if hasattr(self, "_dbt_process") and self._dbt_process and self._dbt_process.returncode is None:
|
|
1099
|
+
logger.warning("BridgeRunner deleted with active process, forcing cleanup")
|
|
1100
|
+
try:
|
|
1101
|
+
# Close stdin (StreamWriter) to prevent resource warnings
|
|
1102
|
+
if self._dbt_process.stdin:
|
|
1103
|
+
self._dbt_process.stdin.close()
|
|
1104
|
+
# stdout and stderr are StreamReaders - no close() method needed
|
|
1105
|
+
# Then kill the process
|
|
1106
|
+
self._dbt_process.kill()
|
|
1107
|
+
except Exception as e:
|
|
1108
|
+
logger.warning(f"Error killing process during cleanup: {e}")
|
|
1109
|
+
|
|
1110
|
+
async def invoke_query(self, sql: str, progress_callback: Callable[[int, int, str], Any] | None = None) -> DbtRunnerResult:
|
|
1111
|
+
"""
|
|
1112
|
+
Execute a SQL query using dbt show.
|
|
1113
|
+
|
|
1114
|
+
This method supports Jinja templating including {{ ref() }} and {{ source() }}.
|
|
1115
|
+
The SQL should include LIMIT clause if needed - no automatic limiting is applied.
|
|
1116
|
+
|
|
1117
|
+
Args:
|
|
1118
|
+
sql: SQL query to execute (supports Jinja: {{ ref('model') }}, {{ source('src', 'table') }})
|
|
1119
|
+
Include LIMIT in the SQL if you want to limit results.
|
|
1120
|
+
progress_callback: Optional callback for progress updates (current, total, message)
|
|
1121
|
+
|
|
1122
|
+
Returns:
|
|
1123
|
+
Result with query output in JSON format
|
|
1124
|
+
"""
|
|
1125
|
+
# Use --inline for Jinja support with ref() and source()
|
|
1126
|
+
# Use --no-populate-cache to skip expensive information_schema queries
|
|
1127
|
+
args = [
|
|
1128
|
+
"show",
|
|
1129
|
+
"--inline",
|
|
1130
|
+
sql,
|
|
1131
|
+
"--limit",
|
|
1132
|
+
"-1",
|
|
1133
|
+
"--output",
|
|
1134
|
+
"json",
|
|
1135
|
+
"--no-populate-cache",
|
|
1136
|
+
]
|
|
1137
|
+
|
|
1138
|
+
# Report query execution starting
|
|
1139
|
+
if progress_callback:
|
|
1140
|
+
try:
|
|
1141
|
+
result = progress_callback(0, 1, "Executing query...")
|
|
1142
|
+
if asyncio.iscoroutine(result):
|
|
1143
|
+
await result
|
|
1144
|
+
except Exception as e:
|
|
1145
|
+
logger.warning(f"Progress callback error: {e}")
|
|
1146
|
+
|
|
1147
|
+
# Execute the command with progress callback
|
|
1148
|
+
invoke_start = time.time()
|
|
1149
|
+
result = await self.invoke(args, progress_callback=progress_callback)
|
|
1150
|
+
invoke_end = time.time()
|
|
1151
|
+
elapsed = invoke_end - invoke_start
|
|
1152
|
+
logger.info(f"invoke() took {elapsed:.2f}s total")
|
|
1153
|
+
|
|
1154
|
+
# Store elapsed time in result for tool to report
|
|
1155
|
+
result.elapsed_time = elapsed
|
|
1156
|
+
|
|
1157
|
+
# Report query completion
|
|
1158
|
+
if progress_callback and result.success:
|
|
1159
|
+
try:
|
|
1160
|
+
completion_result = progress_callback(1, 1, "Query complete")
|
|
1161
|
+
if asyncio.iscoroutine(completion_result):
|
|
1162
|
+
await completion_result
|
|
1163
|
+
except Exception as e:
|
|
1164
|
+
logger.warning(f"Progress callback error: {e}")
|
|
1165
|
+
|
|
1166
|
+
return result
|
|
1167
|
+
|
|
1168
|
+
async def invoke_compile(self, model_name: str, force: bool = False) -> DbtRunnerResult:
|
|
1169
|
+
"""
|
|
1170
|
+
Compile a specific model, optionally forcing recompilation.
|
|
1171
|
+
|
|
1172
|
+
Args:
|
|
1173
|
+
model_name: Name of the model to compile (e.g., 'customers')
|
|
1174
|
+
force: If True, always compile. If False, only compile if not already compiled.
|
|
1175
|
+
|
|
1176
|
+
Returns:
|
|
1177
|
+
Result of the compilation
|
|
1178
|
+
"""
|
|
1179
|
+
# If not forcing, check if already compiled
|
|
1180
|
+
if not force:
|
|
1181
|
+
manifest_path = self.get_manifest_path()
|
|
1182
|
+
if manifest_path.exists():
|
|
1183
|
+
try:
|
|
1184
|
+
with open(manifest_path) as f:
|
|
1185
|
+
manifest = json.load(f)
|
|
1186
|
+
|
|
1187
|
+
# Check if model has compiled_code
|
|
1188
|
+
nodes = manifest.get("nodes", {})
|
|
1189
|
+
for node in nodes.values():
|
|
1190
|
+
if node.get("resource_type") == "model" and node.get("name") == model_name:
|
|
1191
|
+
if node.get("compiled_code"):
|
|
1192
|
+
logger.info(f"Model '{model_name}' already compiled, skipping compilation")
|
|
1193
|
+
return DbtRunnerResult(success=True, stdout="Already compiled", stderr="")
|
|
1194
|
+
break
|
|
1195
|
+
except Exception as e:
|
|
1196
|
+
logger.warning(f"Failed to check compilation status: {e}, forcing compilation")
|
|
1197
|
+
|
|
1198
|
+
# Run compile for specific model
|
|
1199
|
+
logger.info(f"Compiling model: {model_name}")
|
|
1200
|
+
args = ["compile", "-s", model_name]
|
|
1201
|
+
result = await self.invoke(args)
|
|
1202
|
+
|
|
1203
|
+
return result
|
|
1204
|
+
|
|
1205
|
+
def _needs_database_access(self, args: list[str]) -> bool:
|
|
1206
|
+
"""
|
|
1207
|
+
Determine if a dbt command requires database access.
|
|
1208
|
+
|
|
1209
|
+
Commands like 'parse', 'deps', 'clean', 'list' don't need database access.
|
|
1210
|
+
Commands like 'run', 'test', 'build', 'seed', 'snapshot', 'show' do.
|
|
1211
|
+
|
|
1212
|
+
Args:
|
|
1213
|
+
args: dbt command arguments
|
|
1214
|
+
|
|
1215
|
+
Returns:
|
|
1216
|
+
True if command needs database access, False otherwise
|
|
1217
|
+
"""
|
|
1218
|
+
if not args:
|
|
1219
|
+
return False
|
|
1220
|
+
|
|
1221
|
+
command = args[0].lower()
|
|
1222
|
+
|
|
1223
|
+
# Commands that DON'T need database access
|
|
1224
|
+
no_db_commands = {
|
|
1225
|
+
"parse",
|
|
1226
|
+
"deps",
|
|
1227
|
+
"clean",
|
|
1228
|
+
"debug", # debug checks connection but doesn't require warehouse to be running
|
|
1229
|
+
"list",
|
|
1230
|
+
"ls",
|
|
1231
|
+
"compile", # compile doesn't execute SQL, just generates it
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
return command not in no_db_commands
|
|
1235
|
+
|
|
1236
|
+
def _build_unified_script(self, args: list[str], loop_mode: bool = False) -> str:
|
|
1237
|
+
"""
|
|
1238
|
+
Build unified Python script that can run in one-off or persistent loop mode.
|
|
1239
|
+
|
|
1240
|
+
Args:
|
|
1241
|
+
args: dbt command arguments (ignored in loop mode)
|
|
1242
|
+
loop_mode: If True, run persistent loop. If False, execute once and exit.
|
|
1243
|
+
|
|
1244
|
+
Returns:
|
|
1245
|
+
Python script as string
|
|
1246
|
+
"""
|
|
1247
|
+
# Add --profiles-dir to args if not already present (for one-off mode)
|
|
1248
|
+
if not loop_mode and "--profiles-dir" not in args:
|
|
1249
|
+
args = [*args, "--profiles-dir", str(self.profiles_dir)]
|
|
1250
|
+
|
|
1251
|
+
# Add --log-format text to get human-readable output for progress parsing
|
|
1252
|
+
if not loop_mode and "--log-format" not in args:
|
|
1253
|
+
args = [*args, "--log-format", "text"]
|
|
1254
|
+
|
|
1255
|
+
# Convert args to JSON-safe format for one-off mode
|
|
1256
|
+
args_json = json.dumps(args) if not loop_mode else "[]"
|
|
1257
|
+
|
|
1258
|
+
script = f"""
|
|
1259
|
+
import json
|
|
1260
|
+
import sys
|
|
1261
|
+
import os
|
|
1262
|
+
|
|
1263
|
+
# Disable buffering for immediate I/O
|
|
1264
|
+
sys.stdin.reconfigure(line_buffering=True)
|
|
1265
|
+
sys.stdout.reconfigure(line_buffering=True)
|
|
1266
|
+
sys.stderr.reconfigure(line_buffering=True)
|
|
1267
|
+
|
|
1268
|
+
# Set environment for text output
|
|
1269
|
+
os.environ['DBT_USE_COLORS'] = '0'
|
|
1270
|
+
os.environ['DBT_PRINTER_WIDTH'] = '80'
|
|
1271
|
+
|
|
1272
|
+
# Import dbtRunner
|
|
1273
|
+
try:
|
|
1274
|
+
from dbt.cli.main import dbtRunner
|
|
1275
|
+
except ImportError as e:
|
|
1276
|
+
error_msg = {{"success": False, "error": f"Failed to import dbtRunner: {{e}}"}}
|
|
1277
|
+
print(json.dumps(error_msg), flush=True)
|
|
1278
|
+
sys.exit(1)
|
|
1279
|
+
|
|
1280
|
+
# Initialize dbtRunner once
|
|
1281
|
+
dbt = dbtRunner()
|
|
1282
|
+
|
|
1283
|
+
# Check mode: loop vs one-off
|
|
1284
|
+
loop_mode = {str(loop_mode)}
|
|
1285
|
+
|
|
1286
|
+
if loop_mode:
|
|
1287
|
+
# === PERSISTENT LOOP MODE ===
|
|
1288
|
+
|
|
1289
|
+
# Signal ready
|
|
1290
|
+
ready_msg = {{"type": "ready"}}
|
|
1291
|
+
print(json.dumps(ready_msg), flush=True)
|
|
1292
|
+
|
|
1293
|
+
# Process commands in a loop
|
|
1294
|
+
while True:
|
|
1295
|
+
try:
|
|
1296
|
+
# Read command from stdin (blocking)
|
|
1297
|
+
line = sys.stdin.readline()
|
|
1298
|
+
if not line:
|
|
1299
|
+
# EOF - client disconnected
|
|
1300
|
+
break
|
|
1301
|
+
|
|
1302
|
+
request = json.loads(line.strip())
|
|
1303
|
+
|
|
1304
|
+
# Check for shutdown command
|
|
1305
|
+
if request.get("shutdown"):
|
|
1306
|
+
break
|
|
1307
|
+
|
|
1308
|
+
# Extract command details
|
|
1309
|
+
command_args = request.get("command", [])
|
|
1310
|
+
|
|
1311
|
+
# Add profiles_dir if not already present
|
|
1312
|
+
if "--profiles-dir" not in command_args:
|
|
1313
|
+
command_args = [*command_args, "--profiles-dir", {repr(str(self.profiles_dir))}]
|
|
1314
|
+
|
|
1315
|
+
# Add text log format for consistent output
|
|
1316
|
+
if "--log-format" not in command_args:
|
|
1317
|
+
command_args = [*command_args, "--log-format", "text"]
|
|
1318
|
+
|
|
1319
|
+
# Execute command - output goes to stdout naturally
|
|
1320
|
+
try:
|
|
1321
|
+
print(f"[DBT-BRIDGE] Running command: {{command_args[0] if command_args else 'unknown'}}", file=sys.stderr, flush=True)
|
|
1322
|
+
result = dbt.invoke(command_args)
|
|
1323
|
+
success = result.success
|
|
1324
|
+
except Exception as e:
|
|
1325
|
+
success = False
|
|
1326
|
+
print(f"Error executing dbt command: {{e}}", file=sys.stderr, flush=True)
|
|
1327
|
+
|
|
1328
|
+
# Ensure all dbt output is flushed before sending completion marker
|
|
1329
|
+
sys.stdout.flush()
|
|
1330
|
+
sys.stderr.flush()
|
|
1331
|
+
|
|
1332
|
+
# Send completion marker as JSON on last line
|
|
1333
|
+
completion = {{"success": success}}
|
|
1334
|
+
print(json.dumps(completion), flush=True)
|
|
1335
|
+
|
|
1336
|
+
except json.JSONDecodeError as e:
|
|
1337
|
+
error_response = {{"type": "error", "error": f"Invalid JSON: {{e}}"}}
|
|
1338
|
+
print(json.dumps(error_response), flush=True)
|
|
1339
|
+
except Exception as e:
|
|
1340
|
+
error_response = {{"type": "error", "error": f"Unexpected error: {{e}}"}}
|
|
1341
|
+
print(json.dumps(error_response), flush=True)
|
|
1342
|
+
|
|
1343
|
+
else:
|
|
1344
|
+
# === ONE-OFF EXECUTION MODE ===
|
|
1345
|
+
|
|
1346
|
+
try:
|
|
1347
|
+
# Execute dbtRunner with arguments
|
|
1348
|
+
result = dbt.invoke({args_json})
|
|
1349
|
+
|
|
1350
|
+
# Return success status on last line (JSON)
|
|
1351
|
+
output = {{"success": result.success}}
|
|
1352
|
+
print(json.dumps(output))
|
|
1353
|
+
sys.exit(0 if result.success else 1)
|
|
1354
|
+
|
|
1355
|
+
except Exception as e:
|
|
1356
|
+
# Ensure we always exit, even on error
|
|
1357
|
+
error_output = {{"success": False, "error": str(e)}}
|
|
1358
|
+
print(json.dumps(error_output))
|
|
1359
|
+
sys.exit(1)
|
|
1360
|
+
"""
|
|
1361
|
+
return script
|