mcp-stata 1.7.3__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/stata_client.py CHANGED
@@ -1,22 +1,26 @@
1
- import base64
1
+ import asyncio
2
+ import inspect
2
3
  import json
3
4
  import logging
4
5
  import os
6
+ import platform
5
7
  import re
6
8
  import subprocess
7
9
  import sys
8
- import threading
9
- from importlib.metadata import PackageNotFoundError, version
10
10
  import tempfile
11
+ import threading
11
12
  import time
13
+ import uuid
12
14
  from contextlib import contextmanager
15
+ from importlib.metadata import PackageNotFoundError, version
13
16
  from io import StringIO
14
- from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
17
+ from typing import Any, Awaitable, Callable, Dict, Generator, List, Optional, Tuple
15
18
 
16
19
  import anyio
17
20
  from anyio import get_cancelled_exc_class
18
21
 
19
- from .discovery import find_stata_path
22
+ from .discovery import find_stata_candidates
23
+ from .config import MAX_LIMIT
20
24
  from .models import (
21
25
  CommandResponse,
22
26
  ErrorEnvelope,
@@ -33,6 +37,29 @@ from .graph_detector import StreamingGraphCache
33
37
 
34
38
  logger = logging.getLogger("mcp_stata")
35
39
 
40
+ _POLARS_AVAILABLE: Optional[bool] = None
41
+
42
+ def _check_polars_available() -> bool:
43
+ """
44
+ Check if Polars can be safely imported.
45
+ Must detect problematic platforms BEFORE attempting import,
46
+ since the crash is a fatal signal, not a catchable exception.
47
+ """
48
+ if sys.platform == "win32" and platform.machine().lower() in ("arm64", "aarch64"):
49
+ return False
50
+
51
+ try:
52
+ import polars # noqa: F401
53
+ return True
54
+ except ImportError:
55
+ return False
56
+
57
+
58
+ def _get_polars_available() -> bool:
59
+ global _POLARS_AVAILABLE
60
+ if _POLARS_AVAILABLE is None:
61
+ _POLARS_AVAILABLE = _check_polars_available()
62
+ return _POLARS_AVAILABLE
36
63
 
37
64
  # ============================================================================
38
65
  # MODULE-LEVEL DISCOVERY CACHE
@@ -40,26 +67,30 @@ logger = logging.getLogger("mcp_stata")
40
67
  # This cache ensures Stata discovery runs exactly once per process lifetime
41
68
  _discovery_lock = threading.Lock()
42
69
  _discovery_result: Optional[Tuple[str, str]] = None # (path, edition)
70
+ _discovery_candidates: Optional[List[Tuple[str, str]]] = None
43
71
  _discovery_attempted = False
44
72
  _discovery_error: Optional[Exception] = None
45
73
 
46
74
 
47
- def _get_discovered_stata() -> Tuple[str, str]:
75
+ def _get_discovery_candidates() -> List[Tuple[str, str]]:
48
76
  """
49
- Get the discovered Stata path and edition, running discovery only once.
77
+ Get ordered discovery candidates, running discovery only once.
50
78
 
51
79
  Returns:
52
- Tuple of (stata_executable_path, edition)
80
+ List of (stata_executable_path, edition) ordered by preference.
53
81
 
54
82
  Raises:
55
83
  RuntimeError: If Stata discovery fails
56
84
  """
57
- global _discovery_result, _discovery_attempted, _discovery_error
85
+ global _discovery_result, _discovery_candidates, _discovery_attempted, _discovery_error
58
86
 
59
87
  with _discovery_lock:
60
88
  # If we've already successfully discovered Stata, return cached result
61
89
  if _discovery_result is not None:
62
- return _discovery_result
90
+ return _discovery_candidates or [_discovery_result]
91
+
92
+ if _discovery_candidates is not None:
93
+ return _discovery_candidates
63
94
 
64
95
  # If we've already attempted and failed, re-raise the cached error
65
96
  if _discovery_attempted and _discovery_error is not None:
@@ -83,13 +114,17 @@ def _get_discovered_stata() -> Tuple[str, str]:
83
114
  logger.info("mcp-stata version: %s", pkg_version)
84
115
 
85
116
  # Run discovery
86
- stata_exec_path, edition = find_stata_path()
117
+ candidates = find_stata_candidates()
87
118
 
88
119
  # Cache the successful result
89
- _discovery_result = (stata_exec_path, edition)
90
- logger.info("Discovery found Stata at: %s (%s)", stata_exec_path, edition)
120
+ _discovery_candidates = candidates
121
+ if candidates:
122
+ _discovery_result = candidates[0]
123
+ logger.info("Discovery found Stata at: %s (%s)", _discovery_result[0], _discovery_result[1])
124
+ else:
125
+ raise FileNotFoundError("No Stata candidates discovered")
91
126
 
92
- return _discovery_result
127
+ return candidates
93
128
 
94
129
  except FileNotFoundError as e:
95
130
  _discovery_error = e
@@ -102,12 +137,22 @@ def _get_discovered_stata() -> Tuple[str, str]:
102
137
  ) from e
103
138
 
104
139
 
140
+ def _get_discovered_stata() -> Tuple[str, str]:
141
+ """
142
+ Preserve existing API: return the highest-priority discovered Stata candidate.
143
+ """
144
+ candidates = _get_discovery_candidates()
145
+ if not candidates:
146
+ raise RuntimeError("Stata binary not found: no candidates discovered")
147
+ return candidates[0]
148
+
149
+
105
150
  class StataClient:
106
151
  _initialized = False
107
152
  _exec_lock: threading.Lock
108
153
  _cache_init_lock = threading.Lock() # Class-level lock for cache initialization
109
154
  _is_executing = False # Flag to prevent recursive Stata calls
110
- MAX_DATA_ROWS = 500
155
+ MAX_DATA_ROWS = MAX_LIMIT
111
156
  MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Maximum graph exports (~50MB)
112
157
  MAX_CACHE_SIZE = 100 # Maximum number of graphs to cache
113
158
  MAX_CACHE_BYTES = 500 * 1024 * 1024 # Maximum cache size in bytes (~500MB)
@@ -115,21 +160,21 @@ class StataClient:
115
160
 
116
161
  def __new__(cls):
117
162
  inst = super(StataClient, cls).__new__(cls)
118
- inst._exec_lock = threading.Lock()
163
+ inst._exec_lock = threading.RLock()
119
164
  inst._is_executing = False
120
165
  return inst
121
166
 
122
167
  @contextmanager
123
- def _redirect_io(self):
168
+ def _redirect_io(self, out_buf, err_buf):
124
169
  """Safely redirect stdout/stderr for the duration of a Stata call."""
125
- out_buf, err_buf = StringIO(), StringIO()
126
170
  backup_stdout, backup_stderr = sys.stdout, sys.stderr
127
171
  sys.stdout, sys.stderr = out_buf, err_buf
128
172
  try:
129
- yield out_buf, err_buf
173
+ yield
130
174
  finally:
131
175
  sys.stdout, sys.stderr = backup_stdout, backup_stderr
132
176
 
177
+
133
178
  @staticmethod
134
179
  def _stata_quote(value: str) -> str:
135
180
  """Return a Stata double-quoted string literal for value."""
@@ -150,253 +195,985 @@ class StataClient:
150
195
  sys.stdout, sys.stderr = backup_stdout, backup_stderr
151
196
 
152
197
  @staticmethod
153
- def _create_graph_cache_callback(on_graph_cached, notify_log):
154
- """Create a standardized graph cache callback with proper error handling."""
155
- async def graph_cache_callback(graph_name: str, success: bool) -> None:
156
- try:
157
- if on_graph_cached:
158
- await on_graph_cached(graph_name, success)
159
- except Exception as e:
160
- logger.error(f"Graph cache callback failed: {e}")
161
-
162
- try:
163
- # Also notify via log channel
164
- await notify_log(json.dumps({
165
- "event": "graph_cached",
166
- "graph": graph_name,
167
- "success": success
168
- }))
169
- except Exception as e:
170
- logger.error(f"Failed to notify about graph cache: {e}")
171
-
172
- return graph_cache_callback
173
- def _request_break_in(self) -> None:
174
- """
175
- Attempt to interrupt a running Stata command when cancellation is requested.
176
-
177
- Uses the Stata sfi.breakIn hook when available; errors are swallowed because
178
- cancellation should never crash the host process.
179
- """
180
- try:
181
- import sfi # type: ignore[import-not-found]
182
-
183
- break_fn = getattr(sfi, "breakIn", None) or getattr(sfi, "break_in", None)
184
- if callable(break_fn):
185
- try:
186
- break_fn()
187
- logger.info("Sent breakIn() to Stata for cancellation")
188
- except Exception as e: # pragma: no cover - best-effort
189
- logger.warning(f"Failed to send breakIn() to Stata: {e}")
190
- else: # pragma: no cover - environment without Stata runtime
191
- logger.debug("sfi.breakIn not available; cannot interrupt Stata")
192
- except Exception as e: # pragma: no cover - import failure or other
193
- logger.debug(f"Unable to import sfi for cancellation: {e}")
194
-
195
- async def _wait_for_stata_stop(self, timeout: float = 2.0) -> bool:
196
- """
197
- After requesting a break, poll the Stata interface so it can surface BreakError
198
- and return control. This is best-effort and time-bounded.
199
- """
200
- deadline = time.monotonic() + timeout
198
+ def _safe_unlink(path: str) -> None:
199
+ if not path:
200
+ return
201
201
  try:
202
- import sfi # type: ignore[import-not-found]
202
+ if os.path.exists(path):
203
+ os.unlink(path)
204
+ except Exception:
205
+ pass
203
206
 
204
- toolkit = getattr(sfi, "SFIToolkit", None)
205
- poll = getattr(toolkit, "pollnow", None) or getattr(toolkit, "pollstd", None)
206
- BreakError = getattr(sfi, "BreakError", None)
207
- except Exception: # pragma: no cover
208
- return False
207
+ def _create_smcl_log_path(self, *, prefix: str = "mcp_smcl_", max_hex: Optional[int] = None) -> str:
208
+ hex_id = uuid.uuid4().hex if max_hex is None else uuid.uuid4().hex[:max_hex]
209
+ smcl_path = os.path.join(tempfile.gettempdir(), f"{prefix}{hex_id}.smcl")
210
+ self._safe_unlink(smcl_path)
211
+ return smcl_path
209
212
 
210
- if not callable(poll):
211
- return False
213
+ @staticmethod
214
+ def _make_smcl_log_name() -> str:
215
+ return f"_mcp_smcl_{uuid.uuid4().hex[:8]}"
212
216
 
213
- last_exc: Optional[Exception] = None
214
- while time.monotonic() < deadline:
217
+ def _open_smcl_log(self, smcl_path: str, log_name: str, *, quiet: bool = False) -> bool:
218
+ cmd = f"{'quietly ' if quiet else ''}log using \"{smcl_path}\", replace smcl name({log_name})"
219
+ for attempt in range(4):
215
220
  try:
216
- poll()
217
- except Exception as e: # pragma: no cover - depends on Stata runtime
218
- last_exc = e
219
- if BreakError is not None and isinstance(e, BreakError):
220
- logger.info("Stata BreakError detected; cancellation acknowledged by Stata")
221
- return True
222
- # If Stata already stopped, break on any other exception.
223
- break
224
- await anyio.sleep(0.05)
225
-
226
- if last_exc:
227
- logger.debug(f"Cancellation poll exited with {last_exc}")
221
+ self.stata.run(cmd, echo=False)
222
+ return True
223
+ except Exception:
224
+ if attempt < 3:
225
+ time.sleep(0.1)
228
226
  return False
229
227
 
230
- @contextmanager
231
- def _temp_cwd(self, cwd: Optional[str]):
232
- if cwd is None:
233
- yield
234
- return
235
- prev = os.getcwd()
236
- os.chdir(cwd)
228
+ def _close_smcl_log(self, log_name: str) -> None:
237
229
  try:
238
- yield
239
- finally:
240
- os.chdir(prev)
230
+ self.stata.run(f"capture log close {log_name}", echo=False)
231
+ except Exception:
232
+ pass
241
233
 
242
- def init(self):
243
- """Initializes usage of pystata using cached discovery results."""
244
- if self._initialized:
234
+ def _restore_results_from_hold(self, hold_attr: str) -> None:
235
+ if not hasattr(self, hold_attr):
245
236
  return
246
-
237
+ hold_name = getattr(self, hold_attr)
247
238
  try:
248
- import stata_setup
249
-
250
- # Get discovered Stata path (cached from first call)
251
- stata_exec_path, edition = _get_discovered_stata()
252
-
253
- candidates = []
254
-
255
- # Prefer the binary directory first (documented input for stata_setup)
256
- bin_dir = os.path.dirname(stata_exec_path)
257
- if bin_dir:
258
- candidates.append(bin_dir)
259
-
260
- # 2. App Bundle: .../StataMP.app (macOS only)
261
- curr = bin_dir
262
- app_bundle = None
263
- while len(curr) > 1:
264
- if curr.endswith(".app"):
265
- app_bundle = curr
266
- break
267
- parent = os.path.dirname(curr)
268
- if parent == curr: # Reached root directory, prevent infinite loop on Windows
269
- break
270
- curr = parent
271
-
272
- if app_bundle:
273
- candidates.insert(0, os.path.dirname(app_bundle))
274
- candidates.insert(1, app_bundle)
275
-
276
- # Deduplicate preserving order
277
- seen = set()
278
- deduped = []
279
- for c in candidates:
280
- if c in seen:
281
- continue
282
- seen.add(c)
283
- deduped.append(c)
284
- candidates = deduped
239
+ self.stata.run(f"capture _return restore {hold_name}", echo=False)
240
+ self._last_results = self.get_stored_results(force_fresh=True)
241
+ except Exception:
242
+ pass
243
+ finally:
244
+ try:
245
+ delattr(self, hold_attr)
246
+ except Exception:
247
+ pass
285
248
 
286
- success = False
287
- for path in candidates:
288
- try:
289
- stata_setup.config(path, edition)
290
- success = True
291
- logger.debug("stata_setup.config succeeded with path: %s", path)
292
- break
293
- except Exception:
294
- continue
249
+ def _create_streaming_log(self, *, trace: bool) -> tuple[tempfile.NamedTemporaryFile, str, TailBuffer, FileTeeIO]:
250
+ log_file = tempfile.NamedTemporaryFile(
251
+ prefix="mcp_stata_",
252
+ suffix=".log",
253
+ delete=False,
254
+ mode="w",
255
+ encoding="utf-8",
256
+ errors="replace",
257
+ buffering=1,
258
+ )
259
+ log_path = log_file.name
260
+ tail = TailBuffer(max_chars=200000 if trace else 20000)
261
+ tee = FileTeeIO(log_file, tail)
262
+ return log_file, log_path, tail, tee
295
263
 
296
- if not success:
297
- raise RuntimeError(
298
- f"stata_setup.config failed. Tried: {candidates}. "
299
- f"Derived from binary: {stata_exec_path}"
300
- )
264
+ def _init_streaming_graph_cache(
265
+ self,
266
+ auto_cache_graphs: bool,
267
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]],
268
+ notify_log: Callable[[str], Awaitable[None]],
269
+ ) -> Optional[StreamingGraphCache]:
270
+ if not auto_cache_graphs:
271
+ return None
272
+ graph_cache = StreamingGraphCache(self, auto_cache=True)
273
+ graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
274
+ graph_cache.add_cache_callback(graph_cache_callback)
275
+ return graph_cache
276
+
277
+ def _capture_graph_state(
278
+ self,
279
+ graph_cache: Optional[StreamingGraphCache],
280
+ emit_graph_ready: bool,
281
+ ) -> Optional[dict[str, str]]:
282
+ # Capture initial graph state BEFORE execution starts
283
+ if graph_cache:
284
+ try:
285
+ graph_cache._initial_graphs = set(self.list_graphs(force_refresh=True))
286
+ logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
287
+ except Exception as e:
288
+ logger.debug(f"Failed to capture initial graph state: {e}")
289
+ graph_cache._initial_graphs = set()
301
290
 
302
- # Cache the binary path for later use (e.g., PNG export on Windows)
303
- self._stata_exec_path = os.path.abspath(stata_exec_path)
291
+ graph_ready_initial = None
292
+ if emit_graph_ready:
293
+ try:
294
+ graph_ready_initial = {}
295
+ for graph_name in self.list_graphs(force_refresh=True):
296
+ graph_ready_initial[graph_name] = self._get_graph_signature(graph_name)
297
+ logger.debug("Graph-ready initial state captured: %s", set(graph_ready_initial))
298
+ except Exception as e:
299
+ logger.debug("Failed to capture graph-ready state: %s", e)
300
+ graph_ready_initial = {}
301
+ return graph_ready_initial
304
302
 
305
- from pystata import stata # type: ignore[import-not-found]
306
- self.stata = stata
307
- self._initialized = True
308
-
309
- # Initialize list_graphs TTL cache
310
- self._list_graphs_cache = None
311
- self._list_graphs_cache_time = 0
312
- self._list_graphs_cache_lock = threading.Lock()
303
+ async def _cache_new_graphs(
304
+ self,
305
+ graph_cache: Optional[StreamingGraphCache],
306
+ *,
307
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]],
308
+ total_lines: int,
309
+ completed_label: str,
310
+ ) -> None:
311
+ if not graph_cache or not graph_cache.auto_cache:
312
+ return
313
+ try:
314
+ cached_graphs = []
315
+ initial_graphs = getattr(graph_cache, "_initial_graphs", set())
316
+ current_graphs = set(self.list_graphs(force_refresh=True))
317
+ new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
313
318
 
314
- # Map user-facing graph names (may include spaces/punctuation) to valid
315
- # internal Stata graph names.
316
- self._graph_name_aliases: Dict[str, str] = {}
317
- self._graph_name_reverse: Dict[str, str] = {}
318
-
319
- logger.info("StataClient initialized successfully with %s (%s)", stata_exec_path, edition)
319
+ if new_graphs:
320
+ logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
320
321
 
321
- except ImportError as e:
322
- raise RuntimeError(
323
- f"Failed to import stata_setup or pystata: {e}. "
324
- "Ensure they are installed (pip install pystata stata-setup)."
325
- ) from e
322
+ for graph_name in new_graphs:
323
+ try:
324
+ cache_result = await anyio.to_thread.run_sync(
325
+ self.cache_graph_on_creation,
326
+ graph_name,
327
+ )
328
+ if cache_result:
329
+ cached_graphs.append(graph_name)
330
+ graph_cache._cached_graphs.add(graph_name)
326
331
 
327
- def _make_valid_stata_name(self, name: str) -> str:
328
- """Create a valid Stata name (<=32 chars, [A-Za-z_][A-Za-z0-9_]*)."""
329
- base = re.sub(r"[^A-Za-z0-9_]", "_", name or "")
330
- if not base:
331
- base = "Graph"
332
- if not re.match(r"^[A-Za-z_]", base):
333
- base = f"G_{base}"
334
- base = base[:32]
332
+ for callback in graph_cache._cache_callbacks:
333
+ try:
334
+ result = callback(graph_name, cache_result)
335
+ if inspect.isawaitable(result):
336
+ await result
337
+ except Exception:
338
+ pass
339
+ except Exception as e:
340
+ logger.error(f"Error caching graph {graph_name}: {e}")
335
341
 
336
- # Avoid collisions.
337
- candidate = base
338
- i = 1
339
- while candidate in getattr(self, "_graph_name_reverse", {}):
340
- suffix = f"_{i}"
341
- candidate = (base[: max(0, 32 - len(suffix))] + suffix)[:32]
342
- i += 1
343
- return candidate
342
+ if cached_graphs and notify_progress:
343
+ await notify_progress(
344
+ float(total_lines) if total_lines > 0 else 1,
345
+ float(total_lines) if total_lines > 0 else 1,
346
+ f"{completed_label} completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}",
347
+ )
348
+ except Exception as e:
349
+ logger.error(f"Post-execution graph detection failed: {e}")
344
350
 
345
- def _resolve_graph_name_for_stata(self, name: str) -> str:
346
- """Return internal Stata graph name for a user-facing name."""
347
- if not name:
348
- return name
349
- aliases = getattr(self, "_graph_name_aliases", None)
350
- if aliases and name in aliases:
351
- return aliases[name]
352
- return name
351
+ def _emit_graph_ready_task(
352
+ self,
353
+ *,
354
+ emit_graph_ready: bool,
355
+ graph_ready_initial: Optional[dict[str, str]],
356
+ notify_log: Callable[[str], Awaitable[None]],
357
+ graph_ready_task_id: Optional[str],
358
+ graph_ready_format: str,
359
+ ) -> None:
360
+ if emit_graph_ready and graph_ready_initial is not None:
361
+ try:
362
+ asyncio.create_task(
363
+ self._emit_graph_ready_events(
364
+ graph_ready_initial,
365
+ notify_log,
366
+ graph_ready_task_id,
367
+ graph_ready_format,
368
+ )
369
+ )
370
+ except Exception as e:
371
+ logger.warning("graph_ready emission failed to start: %s", e)
353
372
 
354
- def _maybe_rewrite_graph_name_in_command(self, code: str) -> str:
355
- """Rewrite name("...") to a valid Stata name and store alias mapping."""
356
- if not code:
357
- return code
358
- if not hasattr(self, "_graph_name_aliases"):
359
- self._graph_name_aliases = {}
360
- self._graph_name_reverse = {}
373
+ async def _stream_smcl_log(
374
+ self,
375
+ *,
376
+ smcl_path: str,
377
+ notify_log: Callable[[str], Awaitable[None]],
378
+ done: anyio.Event,
379
+ on_chunk: Optional[Callable[[str], Awaitable[None]]] = None,
380
+ ) -> None:
381
+ last_pos = 0
382
+ # Wait for Stata to create the SMCL file (placeholder removed to avoid locks)
383
+ while not done.is_set() and not os.path.exists(smcl_path):
384
+ await anyio.sleep(0.05)
361
385
 
362
- # Handle common patterns: name("..." ...) or name(`"..."' ...)
363
- pat = re.compile(r"name\(\s*(?:`\"(?P<cq>[^\"]*)\"'|\"(?P<dq>[^\"]*)\")\s*(?P<rest>[^)]*)\)")
386
+ try:
387
+ def _read_content() -> str:
388
+ try:
389
+ with open(smcl_path, "r", encoding="utf-8", errors="replace") as f:
390
+ f.seek(last_pos)
391
+ return f.read()
392
+ except PermissionError:
393
+ if os.name == "nt":
394
+ try:
395
+ res = subprocess.run(f'type "{smcl_path}"', shell=True, capture_output=True)
396
+ full_content = res.stdout.decode("utf-8", errors="replace")
397
+ if len(full_content) > last_pos:
398
+ return full_content[last_pos:]
399
+ return ""
400
+ except Exception:
401
+ return ""
402
+ raise
403
+ except FileNotFoundError:
404
+ return ""
364
405
 
365
- def repl(m: re.Match) -> str:
366
- original = m.group("cq") if m.group("cq") is not None else m.group("dq")
367
- original = original or ""
368
- internal = self._graph_name_aliases.get(original)
369
- if not internal:
370
- internal = self._make_valid_stata_name(original)
371
- self._graph_name_aliases[original] = internal
372
- self._graph_name_reverse[internal] = original
373
- rest = m.group("rest") or ""
374
- return f"name({internal}{rest})"
406
+ while not done.is_set():
407
+ chunk = await anyio.to_thread.run_sync(_read_content)
408
+ if chunk:
409
+ last_pos += len(chunk)
410
+ await notify_log(chunk)
411
+ if on_chunk is not None:
412
+ await on_chunk(chunk)
413
+ await anyio.sleep(0.05)
414
+
415
+ chunk = await anyio.to_thread.run_sync(_read_content)
416
+ if chunk:
417
+ last_pos += len(chunk)
418
+ await notify_log(chunk)
419
+ if on_chunk is not None:
420
+ await on_chunk(chunk)
375
421
 
376
- return pat.sub(repl, code)
422
+ except Exception as e:
423
+ logger.warning(f"Log streaming failed: {e}")
377
424
 
378
- def _read_return_code(self) -> int:
379
- """Read the last Stata return code without mutating rc."""
380
- try:
381
- from sfi import Macro # type: ignore[import-not-found]
382
- rc_val = Macro.getCValue("rc") # type: ignore[attr-defined]
383
- return int(float(rc_val))
384
- except Exception:
385
- try:
386
- self.stata.run("global MCP_RC = c(rc)")
387
- from sfi import Macro as Macro2 # type: ignore[import-not-found]
388
- rc_val = Macro2.getGlobal("MCP_RC")
389
- return int(float(rc_val))
425
+ def _run_streaming_blocking(
426
+ self,
427
+ *,
428
+ command: str,
429
+ tee: FileTeeIO,
430
+ cwd: Optional[str],
431
+ trace: bool,
432
+ echo: bool,
433
+ smcl_path: str,
434
+ smcl_log_name: str,
435
+ hold_attr: str,
436
+ require_smcl_log: bool = False,
437
+ ) -> tuple[int, Optional[Exception]]:
438
+ rc = -1
439
+ exc: Optional[Exception] = None
440
+ with self._exec_lock:
441
+ self._is_executing = True
442
+ try:
443
+ from sfi import Scalar, SFIToolkit # Import SFI tools
444
+ with self._temp_cwd(cwd):
445
+ log_opened = self._open_smcl_log(smcl_path, smcl_log_name)
446
+ if require_smcl_log and not log_opened:
447
+ exc = RuntimeError("Failed to open SMCL log")
448
+ rc = 1
449
+ if exc is None:
450
+ try:
451
+ with self._redirect_io_streaming(tee, tee):
452
+ try:
453
+ if trace:
454
+ self.stata.run("set trace on")
455
+ ret = self.stata.run(command, echo=echo)
456
+
457
+ setattr(self, hold_attr, f"mcp_hold_{uuid.uuid4().hex[:8]}")
458
+ self.stata.run(
459
+ f"capture _return hold {getattr(self, hold_attr)}",
460
+ echo=False,
461
+ )
462
+
463
+ if isinstance(ret, str) and ret:
464
+ try:
465
+ tee.write(ret)
466
+ except Exception:
467
+ pass
468
+ try:
469
+ rc = self._get_rc_from_scalar(Scalar)
470
+ except Exception:
471
+ pass
472
+ except Exception as e:
473
+ exc = e
474
+ if rc in (-1, 0):
475
+ rc = 1
476
+ finally:
477
+ if trace:
478
+ try:
479
+ self.stata.run("set trace off")
480
+ except Exception:
481
+ pass
482
+ finally:
483
+ self._close_smcl_log(smcl_log_name)
484
+ self._restore_results_from_hold(hold_attr)
485
+ return rc, exc
486
+ # If we get here, SMCL log failed and we're required to stop.
487
+ return rc, exc
488
+ finally:
489
+ self._is_executing = False
490
+ return rc, exc
491
+
492
+ def _resolve_do_file_path(
493
+ self,
494
+ path: str,
495
+ cwd: Optional[str],
496
+ ) -> tuple[Optional[str], Optional[str], Optional[CommandResponse]]:
497
+ if cwd is not None and not os.path.isdir(cwd):
498
+ return None, None, CommandResponse(
499
+ command=f'do "{path}"',
500
+ rc=601,
501
+ stdout="",
502
+ stderr=None,
503
+ success=False,
504
+ error=ErrorEnvelope(
505
+ message=f"cwd not found: {cwd}",
506
+ rc=601,
507
+ command=path,
508
+ ),
509
+ )
510
+
511
+ effective_path = path
512
+ if cwd is not None and not os.path.isabs(path):
513
+ effective_path = os.path.abspath(os.path.join(cwd, path))
514
+
515
+ if not os.path.exists(effective_path):
516
+ return None, None, CommandResponse(
517
+ command=f'do "{effective_path}"',
518
+ rc=601,
519
+ stdout="",
520
+ stderr=None,
521
+ success=False,
522
+ error=ErrorEnvelope(
523
+ message=f"Do-file not found: {effective_path}",
524
+ rc=601,
525
+ command=effective_path,
526
+ ),
527
+ )
528
+
529
+ path_for_stata = effective_path.replace("\\", "/")
530
+ command = f'do "{path_for_stata}"'
531
+ return effective_path, command, None
532
+
533
+ @contextmanager
534
+ def _smcl_log_capture(self) -> "Generator[Tuple[str, str], None, None]":
535
+ """
536
+ Context manager that wraps command execution in a named SMCL log.
537
+
538
+ This runs alongside any user logs (named logs can coexist).
539
+ Yields (log_name, log_path) tuple for use within the context.
540
+ The SMCL file is NOT deleted automatically - caller should clean up.
541
+
542
+ Usage:
543
+ with self._smcl_log_capture() as (log_name, smcl_path):
544
+ self.stata.run(cmd)
545
+ # After context, read smcl_path for raw SMCL output
546
+ """
547
+ # Use a unique name but DO NOT join start with mkstemp to avoid existing file locks.
548
+ # Stata will create the file.
549
+ smcl_path = self._create_smcl_log_path()
550
+ # Unique log name to avoid collisions with user logs
551
+ log_name = self._make_smcl_log_name()
552
+
553
+ try:
554
+ # Open named SMCL log (quietly to avoid polluting output)
555
+ log_opened = self._open_smcl_log(smcl_path, log_name, quiet=True)
556
+ if not log_opened:
557
+ # Still yield, consumer might see empty file or handle error,
558
+ # but we can't do much if Stata refuses to log.
559
+ pass
560
+
561
+ yield log_name, smcl_path
562
+ finally:
563
+ # Always close our named log
564
+ self._close_smcl_log(log_name)
565
+
566
+ def _read_smcl_file(self, path: str) -> str:
567
+ """Read SMCL file contents, handling encoding issues and Windows file locks."""
568
+ try:
569
+ with open(path, 'r', encoding='utf-8', errors='replace') as f:
570
+ return f.read()
571
+ except PermissionError:
572
+ if os.name == "nt":
573
+ # Windows Fallback: Try to use 'type' command to bypass exclusive lock
574
+ try:
575
+ res = subprocess.run(f'type "{path}"', shell=True, capture_output=True)
576
+ if res.returncode == 0:
577
+ return res.stdout.decode('utf-8', errors='replace')
578
+ except Exception as e:
579
+ logger.debug(f"Combined fallback read failed: {e}")
580
+ logger.warning(f"Failed to read SMCL file {path} due to lock")
581
+ return ""
582
+ except Exception as e:
583
+ logger.warning(f"Failed to read SMCL file {path}: {e}")
584
+ return ""
585
+
586
+ def _extract_error_from_smcl(self, smcl_content: str, rc: int) -> Tuple[str, str]:
587
+ """
588
+ Extract error message and context from raw SMCL output.
589
+
590
+ Uses {err} tags as the authoritative source for error detection.
591
+
592
+ Returns:
593
+ Tuple of (error_message, context_string)
594
+ """
595
+ if not smcl_content:
596
+ return f"Stata error r({rc})", ""
597
+
598
+ lines = smcl_content.splitlines()
599
+
600
+ # Search backwards for {err} tags - they indicate error lines
601
+ error_lines = []
602
+ error_start_idx = -1
603
+
604
+ for i in range(len(lines) - 1, -1, -1):
605
+ line = lines[i]
606
+ if '{err}' in line:
607
+ if error_start_idx == -1:
608
+ error_start_idx = i
609
+ # Walk backwards to find consecutive {err} lines
610
+ j = i
611
+ while j >= 0 and '{err}' in lines[j]:
612
+ error_lines.insert(0, lines[j])
613
+ j -= 1
614
+ break
615
+
616
+ if error_lines:
617
+ # Clean SMCL tags from error message
618
+ clean_lines = []
619
+ for line in error_lines:
620
+ # Remove SMCL tags but keep the text content
621
+ cleaned = re.sub(r'\{[^}]*\}', '', line).strip()
622
+ if cleaned:
623
+ clean_lines.append(cleaned)
624
+
625
+ error_msg = " ".join(clean_lines) or f"Stata error r({rc})"
626
+
627
+ # Context is everything from error start to end
628
+ context_start = max(0, error_start_idx - 5) # Include 5 lines before error
629
+ context = "\n".join(lines[context_start:])
630
+
631
+ return error_msg, context
632
+
633
+ # Fallback: no {err} found, return last 30 lines as context
634
+ context_start = max(0, len(lines) - 30)
635
+ context = "\n".join(lines[context_start:])
636
+
637
+ return f"Stata error r({rc})", context
638
+
639
+ def _parse_rc_from_smcl(self, smcl_content: str) -> Optional[int]:
640
+ """Parse return code from SMCL content using specific structural patterns."""
641
+ if not smcl_content:
642
+ return None
643
+
644
+ # 1. Primary check: SMCL search tag {search r(N), ...}
645
+ # This is the most authoritative interactive indicator
646
+ matches = list(re.finditer(r'\{search r\((\d+)\)', smcl_content))
647
+ if matches:
648
+ try:
649
+ return int(matches[-1].group(1))
650
+ except Exception:
651
+ pass
652
+
653
+ # 2. Secondary check: Standalone r(N); pattern
654
+ # This appears at the end of command blocks
655
+ matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', smcl_content))
656
+ if matches:
657
+ try:
658
+ return int(matches[-1].group(1))
390
659
  except Exception:
660
+ pass
661
+
662
+ return None
663
+
664
+ @staticmethod
665
+ def _create_graph_cache_callback(on_graph_cached, notify_log):
666
+ """Create a standardized graph cache callback with proper error handling."""
667
+ async def graph_cache_callback(graph_name: str, success: bool) -> None:
668
+ try:
669
+ if on_graph_cached:
670
+ await on_graph_cached(graph_name, success)
671
+ except Exception as e:
672
+ logger.error(f"Graph cache callback failed: {e}")
673
+
674
+ try:
675
+ # Also notify via log channel
676
+ await notify_log(json.dumps({
677
+ "event": "graph_cached",
678
+ "graph": graph_name,
679
+ "success": success
680
+ }))
681
+ except Exception as e:
682
+ logger.error(f"Failed to notify about graph cache: {e}")
683
+
684
+ return graph_cache_callback
685
+
686
+ def _get_cached_graph_path(self, graph_name: str) -> Optional[str]:
687
+ if not hasattr(self, "_cache_lock") or not hasattr(self, "_preemptive_cache"):
688
+ return None
689
+ try:
690
+ with self._cache_lock:
691
+ return self._preemptive_cache.get(graph_name)
692
+ except Exception:
693
+ return None
694
+
695
+ async def _emit_graph_ready_for_graphs(
696
+ self,
697
+ graph_names: List[str],
698
+ *,
699
+ notify_log: Callable[[str], Awaitable[None]],
700
+ task_id: Optional[str],
701
+ export_format: str,
702
+ graph_ready_initial: Optional[dict[str, str]],
703
+ ) -> None:
704
+ if not graph_names:
705
+ return
706
+ fmt = (export_format or "svg").strip().lower()
707
+ for graph_name in graph_names:
708
+ signature = self._get_graph_signature(graph_name)
709
+ if graph_ready_initial is not None:
710
+ previous = graph_ready_initial.get(graph_name)
711
+ if previous is not None and previous == signature:
712
+ continue
713
+ try:
714
+ export_path = None
715
+ if fmt == "svg":
716
+ export_path = self._get_cached_graph_path(graph_name)
717
+ if not export_path:
718
+ export_path = await anyio.to_thread.run_sync(
719
+ lambda: self.export_graph(graph_name, format=fmt)
720
+ )
721
+ payload = {
722
+ "event": "graph_ready",
723
+ "task_id": task_id,
724
+ "graph": {
725
+ "name": graph_name,
726
+ "path": export_path,
727
+ "label": graph_name,
728
+ },
729
+ }
730
+ await notify_log(json.dumps(payload))
731
+ if graph_ready_initial is not None:
732
+ graph_ready_initial[graph_name] = signature
733
+ except Exception as e:
734
+ logger.warning("graph_ready export failed for %s: %s", graph_name, e)
735
+
736
+ async def _maybe_cache_graphs_on_chunk(
737
+ self,
738
+ *,
739
+ graph_cache: Optional[StreamingGraphCache],
740
+ emit_graph_ready: bool,
741
+ notify_log: Callable[[str], Awaitable[None]],
742
+ graph_ready_task_id: Optional[str],
743
+ graph_ready_format: str,
744
+ graph_ready_initial: Optional[dict[str, str]],
745
+ last_check: List[float],
746
+ ) -> None:
747
+ if not graph_cache or not graph_cache.auto_cache:
748
+ return
749
+ if self._is_executing:
750
+ return
751
+ now = time.monotonic()
752
+ if last_check and now - last_check[0] < 0.25:
753
+ return
754
+ if last_check:
755
+ last_check[0] = now
756
+ try:
757
+ cached_names = await graph_cache.cache_detected_graphs_with_pystata()
758
+ except Exception as e:
759
+ logger.debug("graph_ready polling failed: %s", e)
760
+ return
761
+ if emit_graph_ready and cached_names:
762
+ await self._emit_graph_ready_for_graphs(
763
+ cached_names,
764
+ notify_log=notify_log,
765
+ task_id=graph_ready_task_id,
766
+ export_format=graph_ready_format,
767
+ graph_ready_initial=graph_ready_initial,
768
+ )
769
+
770
+ async def _emit_graph_ready_events(
771
+ self,
772
+ initial_graphs: dict[str, str],
773
+ notify_log: Callable[[str], Awaitable[None]],
774
+ task_id: Optional[str],
775
+ export_format: str,
776
+ ) -> None:
777
+ try:
778
+ current_graphs = list(self.list_graphs(force_refresh=True))
779
+ except Exception as e:
780
+ logger.warning("graph_ready: list_graphs failed: %s", e)
781
+ return
782
+
783
+ if not current_graphs:
784
+ return
785
+
786
+ for graph_name in current_graphs:
787
+ signature = self._get_graph_signature(graph_name)
788
+ previous = initial_graphs.get(graph_name)
789
+ if previous is not None and previous == signature:
790
+ continue
791
+ try:
792
+ export_path = await anyio.to_thread.run_sync(
793
+ lambda: self.export_graph(graph_name, format=export_format)
794
+ )
795
+ payload = {
796
+ "event": "graph_ready",
797
+ "task_id": task_id,
798
+ "graph": {
799
+ "name": graph_name,
800
+ "path": export_path,
801
+ "label": graph_name,
802
+ },
803
+ }
804
+ await notify_log(json.dumps(payload))
805
+ initial_graphs[graph_name] = signature
806
+ except Exception as e:
807
+ logger.warning("graph_ready export failed for %s: %s", graph_name, e)
808
+
809
+ def _get_graph_signature(self, graph_name: str) -> str:
810
+ if not graph_name:
811
+ return ""
812
+ try:
813
+ response = self.exec_lightweight(f"graph describe {graph_name}")
814
+ if response.success and response.stdout:
815
+ return response.stdout
816
+ if response.stderr:
817
+ return response.stderr
818
+ except Exception:
819
+ return ""
820
+ return ""
821
+
822
+ def _request_break_in(self) -> None:
823
+ """
824
+ Attempt to interrupt a running Stata command when cancellation is requested.
825
+
826
+ Uses the Stata sfi.breakIn hook when available; errors are swallowed because
827
+ cancellation should never crash the host process.
828
+ """
829
+ try:
830
+ import sfi # type: ignore[import-not-found]
831
+
832
+ break_fn = getattr(sfi, "breakIn", None) or getattr(sfi, "break_in", None)
833
+ if callable(break_fn):
834
+ try:
835
+ break_fn()
836
+ logger.info("Sent breakIn() to Stata for cancellation")
837
+ except Exception as e: # pragma: no cover - best-effort
838
+ logger.warning(f"Failed to send breakIn() to Stata: {e}")
839
+ else: # pragma: no cover - environment without Stata runtime
840
+ logger.debug("sfi.breakIn not available; cannot interrupt Stata")
841
+ except Exception as e: # pragma: no cover - import failure or other
842
+ logger.debug(f"Unable to import sfi for cancellation: {e}")
843
+
844
+ async def _wait_for_stata_stop(self, timeout: float = 2.0) -> bool:
845
+ """
846
+ After requesting a break, poll the Stata interface so it can surface BreakError
847
+ and return control. This is best-effort and time-bounded.
848
+ """
849
+ deadline = time.monotonic() + timeout
850
+ try:
851
+ import sfi # type: ignore[import-not-found]
852
+
853
+ toolkit = getattr(sfi, "SFIToolkit", None)
854
+ poll = getattr(toolkit, "pollnow", None) or getattr(toolkit, "pollstd", None)
855
+ BreakError = getattr(sfi, "BreakError", None)
856
+ except Exception: # pragma: no cover
857
+ return False
858
+
859
+ if not callable(poll):
860
+ return False
861
+
862
+ last_exc: Optional[Exception] = None
863
+ while time.monotonic() < deadline:
864
+ try:
865
+ poll()
866
+ except Exception as e: # pragma: no cover - depends on Stata runtime
867
+ last_exc = e
868
+ if BreakError is not None and isinstance(e, BreakError):
869
+ logger.info("Stata BreakError detected; cancellation acknowledged by Stata")
870
+ return True
871
+ # If Stata already stopped, break on any other exception.
872
+ break
873
+ await anyio.sleep(0.05)
874
+
875
+ if last_exc:
876
+ logger.debug(f"Cancellation poll exited with {last_exc}")
877
+ return False
878
+
879
+ @contextmanager
880
+ def _temp_cwd(self, cwd: Optional[str]):
881
+ if cwd is None:
882
+ yield
883
+ return
884
+ prev = os.getcwd()
885
+ os.chdir(cwd)
886
+ try:
887
+ yield
888
+ finally:
889
+ os.chdir(prev)
890
+
891
+ @contextmanager
892
+ def _safe_redirect_fds(self):
893
+ """Redirects fd 1 (stdout) to fd 2 (stderr) at the OS level."""
894
+ # Save original stdout fd
895
+ try:
896
+ stdout_fd = os.dup(1)
897
+ except Exception:
898
+ # Fallback if we can't dup (e.g. strange environment)
899
+ yield
900
+ return
901
+
902
+ try:
903
+ # Redirect OS-level stdout to stderr
904
+ os.dup2(2, 1)
905
+ yield
906
+ finally:
907
+ # Restore stdout
908
+ try:
909
+ os.dup2(stdout_fd, 1)
910
+ os.close(stdout_fd)
911
+ except Exception:
912
+ pass
913
+
914
+ def init(self):
915
+ """Initializes usage of pystata using cached discovery results."""
916
+ if self._initialized:
917
+ return
918
+
919
+ # Suppress any non-UTF8 banner output from PyStata on stdout, which breaks MCP stdio transport
920
+ from contextlib import redirect_stdout, redirect_stderr
921
+
922
+ try:
923
+ import stata_setup
924
+
925
+ # Get discovered Stata paths (cached from first call)
926
+ discovery_candidates = _get_discovery_candidates()
927
+
928
+ # Diagnostic: force faulthandler to output to stderr for C crashes
929
+ import faulthandler
930
+ faulthandler.enable(file=sys.stderr)
931
+ import subprocess
932
+
933
+ success = False
934
+ last_error = None
935
+ chosen_exec: Optional[Tuple[str, str]] = None
936
+
937
+ for stata_exec_path, edition in discovery_candidates:
938
+ candidates = []
939
+ # Prefer the binary directory first (documented input for stata_setup)
940
+ bin_dir = os.path.dirname(stata_exec_path)
941
+
942
+ # 2. App Bundle: .../StataMP.app (macOS only)
943
+ curr = bin_dir
944
+ app_bundle = None
945
+ while len(curr) > 1:
946
+ if curr.endswith(".app"):
947
+ app_bundle = curr
948
+ break
949
+ parent = os.path.dirname(curr)
950
+ if parent == curr:
951
+ break
952
+ curr = parent
953
+
954
+ ordered_candidates = []
955
+ if bin_dir:
956
+ ordered_candidates.append(bin_dir)
957
+ if app_bundle:
958
+ ordered_candidates.append(app_bundle)
959
+ parent_dir = os.path.dirname(app_bundle)
960
+ if parent_dir not in ordered_candidates:
961
+ ordered_candidates.append(parent_dir)
962
+
963
+ # Deduplicate preserving order
964
+ seen = set()
965
+ candidates = []
966
+ for c in ordered_candidates:
967
+ if c not in seen:
968
+ seen.add(c)
969
+ candidates.append(c)
970
+
971
+ for path in candidates:
972
+ try:
973
+ # 1. Pre-flight check in a subprocess to capture hard exits/crashes
974
+ sys.stderr.write(f"[mcp_stata] DEBUG: Pre-flight check for path '{path}'\n")
975
+ sys.stderr.flush()
976
+
977
+ preflight_code = f"""
978
+ import sys
979
+ import stata_setup
980
+ from contextlib import redirect_stdout, redirect_stderr
981
+ with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr):
982
+ try:
983
+ stata_setup.config({repr(path)}, {repr(edition)})
984
+ from pystata import stata
985
+ stata.run('about', echo=True)
986
+ print('PREFLIGHT_OK')
987
+ except Exception as e:
988
+ print(f'PREFLIGHT_FAIL: {{e}}', file=sys.stderr)
989
+ sys.exit(1)
990
+ """
991
+
992
+ try:
993
+ res = subprocess.run(
994
+ [sys.executable, "-c", preflight_code],
995
+ capture_output=True, text=True, timeout=30
996
+ )
997
+ if res.returncode != 0:
998
+ sys.stderr.write(f"[mcp_stata] Pre-flight failed (rc={res.returncode}) for '{path}'\n")
999
+ if res.stdout.strip():
1000
+ sys.stderr.write(f"--- Pre-flight stdout ---\n{res.stdout.strip()}\n")
1001
+ if res.stderr.strip():
1002
+ sys.stderr.write(f"--- Pre-flight stderr ---\n{res.stderr.strip()}\n")
1003
+ sys.stderr.flush()
1004
+ last_error = f"Pre-flight failed: {res.stdout.strip()} {res.stderr.strip()}"
1005
+ continue
1006
+ else:
1007
+ sys.stderr.write(f"[mcp_stata] Pre-flight succeeded for '{path}'. Proceeding to in-process init.\n")
1008
+ sys.stderr.flush()
1009
+ except Exception as pre_e:
1010
+ sys.stderr.write(f"[mcp_stata] Pre-flight execution error for '{path}': {repr(pre_e)}\n")
1011
+ sys.stderr.flush()
1012
+ last_error = pre_e
1013
+ continue
1014
+
1015
+ msg = f"[mcp_stata] DEBUG: In-process stata_setup.config('{path}', '{edition}')\n"
1016
+ sys.stderr.write(msg)
1017
+ sys.stderr.flush()
1018
+ # Redirect both sys.stdout/err AND the raw fds to our stderr pipe.
1019
+ with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
1020
+ stata_setup.config(path, edition)
1021
+
1022
+ sys.stderr.write(f"[mcp_stata] DEBUG: stata_setup.config succeeded for path: {path}\n")
1023
+ sys.stderr.flush()
1024
+ success = True
1025
+ chosen_exec = (stata_exec_path, edition)
1026
+ logger.info("stata_setup.config succeeded with path: %s", path)
1027
+ break
1028
+ except BaseException as e:
1029
+ last_error = e
1030
+ sys.stderr.write(f"[mcp_stata] WARNING: In-process stata_setup.config caught: {repr(e)}\n")
1031
+ sys.stderr.flush()
1032
+ logger.warning("stata_setup.config failed for path '%s': %s", path, e)
1033
+ if isinstance(e, SystemExit):
1034
+ break
1035
+ continue
1036
+
1037
+ if success:
1038
+ # Cache winning candidate for subsequent lookups
1039
+ global _discovery_result
1040
+ if chosen_exec:
1041
+ _discovery_result = chosen_exec
1042
+ break
1043
+
1044
+ if not success:
1045
+ error_msg = (
1046
+ f"stata_setup.config failed to initialize Stata. "
1047
+ f"Tried candidates: {discovery_candidates}. "
1048
+ f"Last error: {repr(last_error)}"
1049
+ )
1050
+ sys.stderr.write(f"[mcp_stata] ERROR: {error_msg}\n")
1051
+ sys.stderr.flush()
1052
+ logger.error(error_msg)
1053
+ raise RuntimeError(error_msg)
1054
+
1055
+ # Cache the binary path for later use (e.g., PNG export on Windows)
1056
+ self._stata_exec_path = os.path.abspath(stata_exec_path)
1057
+
1058
+ try:
1059
+ sys.stderr.write("[mcp_stata] DEBUG: Importing pystata and warming up...\n")
1060
+ sys.stderr.flush()
1061
+ with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
1062
+ from pystata import stata # type: ignore[import-not-found]
1063
+ # Warm up the engine and swallow any late splash screen output
1064
+ stata.run("display 1", echo=False)
1065
+ self.stata = stata
1066
+ self._initialized = True
1067
+ sys.stderr.write("[mcp_stata] DEBUG: pystata warmed up successfully\n")
1068
+ sys.stderr.flush()
1069
+ except BaseException as e:
1070
+ sys.stderr.write(f"[mcp_stata] ERROR: Failed to load pystata or run initial command: {repr(e)}\n")
1071
+ sys.stderr.flush()
1072
+ logger.error("Failed to load pystata or run initial command: %s", e)
1073
+ raise
1074
+
1075
+ # Initialize list_graphs TTL cache
1076
+ self._list_graphs_cache = None
1077
+ self._list_graphs_cache_time = 0
1078
+ self._list_graphs_cache_lock = threading.Lock()
1079
+
1080
+ # Map user-facing graph names (may include spaces/punctuation) to valid
1081
+ # internal Stata graph names.
1082
+ self._graph_name_aliases: Dict[str, str] = {}
1083
+ self._graph_name_reverse: Dict[str, str] = {}
1084
+
1085
+ logger.info("StataClient initialized successfully with %s (%s)", stata_exec_path, edition)
1086
+
1087
+ except ImportError as e:
1088
+ raise RuntimeError(
1089
+ f"Failed to import stata_setup or pystata: {e}. "
1090
+ "Ensure they are installed (pip install pystata stata-setup)."
1091
+ ) from e
1092
+
1093
+ def _make_valid_stata_name(self, name: str) -> str:
1094
+ """Create a valid Stata name (<=32 chars, [A-Za-z_][A-Za-z0-9_]*)."""
1095
+ base = re.sub(r"[^A-Za-z0-9_]", "_", name or "")
1096
+ if not base:
1097
+ base = "Graph"
1098
+ if not re.match(r"^[A-Za-z_]", base):
1099
+ base = f"G_{base}"
1100
+ base = base[:32]
1101
+
1102
+ # Avoid collisions.
1103
+ candidate = base
1104
+ i = 1
1105
+ while candidate in getattr(self, "_graph_name_reverse", {}):
1106
+ suffix = f"_{i}"
1107
+ candidate = (base[: max(0, 32 - len(suffix))] + suffix)[:32]
1108
+ i += 1
1109
+ return candidate
1110
+
1111
+ def _resolve_graph_name_for_stata(self, name: str) -> str:
1112
+ """Return internal Stata graph name for a user-facing name."""
1113
+ if not name:
1114
+ return name
1115
+ aliases = getattr(self, "_graph_name_aliases", None)
1116
+ if aliases and name in aliases:
1117
+ return aliases[name]
1118
+ return name
1119
+
1120
+ def _maybe_rewrite_graph_name_in_command(self, code: str) -> str:
1121
+ """Rewrite name("...") to a valid Stata name and store alias mapping."""
1122
+ if not code:
1123
+ return code
1124
+ if not hasattr(self, "_graph_name_aliases"):
1125
+ self._graph_name_aliases = {}
1126
+ self._graph_name_reverse = {}
1127
+
1128
+ # Handle common patterns: name("..." ...) or name(`"..."' ...)
1129
+ pat = re.compile(r"name\(\s*(?:`\"(?P<cq>[^\"]*)\"'|\"(?P<dq>[^\"]*)\")\s*(?P<rest>[^)]*)\)")
1130
+
1131
+ def repl(m: re.Match) -> str:
1132
+ original = m.group("cq") if m.group("cq") is not None else m.group("dq")
1133
+ original = original or ""
1134
+ internal = self._graph_name_aliases.get(original)
1135
+ if not internal:
1136
+ internal = self._make_valid_stata_name(original)
1137
+ self._graph_name_aliases[original] = internal
1138
+ self._graph_name_reverse[internal] = original
1139
+ rest = m.group("rest") or ""
1140
+ return f"name({internal}{rest})"
1141
+
1142
+ return pat.sub(repl, code)
1143
+
1144
+ def _get_rc_from_scalar(self, Scalar) -> int:
1145
+ """Safely get return code, handling None values."""
1146
+ try:
1147
+ from sfi import Macro
1148
+ rc_val = Macro.getGlobal("_rc")
1149
+ if rc_val is None:
391
1150
  return -1
1151
+ return int(float(rc_val))
1152
+ except Exception:
1153
+ return -1
392
1154
 
393
1155
  def _parse_rc_from_text(self, text: str) -> Optional[int]:
394
- match = re.search(r"r\((\d+)\)", text)
395
- if match:
1156
+ """Parse return code from plain text using structural patterns."""
1157
+ if not text:
1158
+ return None
1159
+
1160
+ # 1. Primary check: 'search r(N)' pattern (SMCL tag potentially stripped)
1161
+ matches = list(re.finditer(r'search r\((\d+)\)', text))
1162
+ if matches:
396
1163
  try:
397
- return int(match.group(1))
1164
+ return int(matches[-1].group(1))
398
1165
  except Exception:
399
- return None
1166
+ pass
1167
+
1168
+ # 2. Secondary check: Standalone r(N); pattern
1169
+ # This appears at the end of command blocks
1170
+ matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', text))
1171
+ if matches:
1172
+ try:
1173
+ return int(matches[-1].group(1))
1174
+ except Exception:
1175
+ pass
1176
+
400
1177
  return None
401
1178
 
402
1179
  def _parse_line_from_text(self, text: str) -> Optional[int]:
@@ -405,75 +1182,207 @@ class StataClient:
405
1182
  try:
406
1183
  return int(match.group(1))
407
1184
  except Exception:
408
- return None
409
- return None
1185
+ return None
1186
+ return None
1187
+
1188
+ def _read_log_backwards_until_error(self, path: str, max_bytes: int = 5_000_000) -> str:
1189
+ """
1190
+ Read log file backwards in chunks, stopping when we find {err} tags or reach the start.
1191
+
1192
+ This is more efficient and robust than reading huge fixed tails, as we only read
1193
+ what we need to find the error.
1194
+
1195
+ Args:
1196
+ path: Path to the log file
1197
+ max_bytes: Maximum total bytes to read (safety limit, default 5MB)
1198
+
1199
+ Returns:
1200
+ The relevant portion of the log containing the error and context
1201
+ """
1202
+ try:
1203
+ chunk_size = 50_000 # Read 50KB chunks at a time
1204
+ total_read = 0
1205
+ chunks = []
1206
+
1207
+ with open(path, 'rb') as f:
1208
+ # Get file size
1209
+ f.seek(0, os.SEEK_END)
1210
+ file_size = f.tell()
1211
+
1212
+ if file_size == 0:
1213
+ return ""
1214
+
1215
+ # Start from the end
1216
+ position = file_size
1217
+
1218
+ while position > 0 and total_read < max_bytes:
1219
+ # Calculate how much to read in this chunk
1220
+ read_size = min(chunk_size, position, max_bytes - total_read)
1221
+ position -= read_size
1222
+
1223
+ # Seek and read
1224
+ f.seek(position)
1225
+ chunk = f.read(read_size)
1226
+ chunks.insert(0, chunk)
1227
+ total_read += read_size
1228
+
1229
+ # Decode and check for error tags
1230
+ try:
1231
+ accumulated = b''.join(chunks).decode('utf-8', errors='replace')
1232
+
1233
+ # Check if we've found an error tag
1234
+ if '{err}' in accumulated:
1235
+ # Found it! Read one more chunk for context before the error
1236
+ if position > 0 and total_read < max_bytes:
1237
+ extra_read = min(chunk_size, position, max_bytes - total_read)
1238
+ position -= extra_read
1239
+ f.seek(position)
1240
+ extra_chunk = f.read(extra_read)
1241
+ chunks.insert(0, extra_chunk)
1242
+
1243
+ return b''.join(chunks).decode('utf-8', errors='replace')
1244
+
1245
+ except UnicodeDecodeError:
1246
+ # Continue reading if we hit a decode error (might be mid-character)
1247
+ continue
1248
+
1249
+ # Read everything we've accumulated
1250
+ return b''.join(chunks).decode('utf-8', errors='replace')
1251
+
1252
+ except Exception as e:
1253
+ logger.warning(f"Error reading log backwards: {e}")
1254
+ # Fallback to regular tail read
1255
+ return self._read_log_tail(path, 200_000)
1256
+
1257
+ def _read_log_tail_smart(self, path: str, rc: int, trace: bool = False) -> str:
1258
+ """
1259
+ Smart log tail reader that adapts based on whether an error occurred.
1260
+
1261
+ - If rc == 0: Read normal tail (20KB without trace, 200KB with trace)
1262
+ - If rc != 0: Search backwards dynamically to find the error
1263
+
1264
+ Args:
1265
+ path: Path to the log file
1266
+ rc: Return code from Stata
1267
+ trace: Whether trace mode was enabled
1268
+
1269
+ Returns:
1270
+ Relevant log content
1271
+ """
1272
+ if rc != 0:
1273
+ # Error occurred - search backwards for {err} tags
1274
+ return self._read_log_backwards_until_error(path)
1275
+ else:
1276
+ # Success - just read normal tail
1277
+ tail_size = 200_000 if trace else 20_000
1278
+ return self._read_log_tail(path, tail_size)
1279
+
1280
+ def _read_log_tail(self, path: str, max_chars: int) -> str:
1281
+ try:
1282
+ with open(path, "rb") as f:
1283
+ f.seek(0, os.SEEK_END)
1284
+ size = f.tell()
1285
+
1286
+ if size <= 0:
1287
+ return ""
1288
+ read_size = min(size, max_chars)
1289
+ f.seek(-read_size, os.SEEK_END)
1290
+ data = f.read(read_size)
1291
+ return data.decode("utf-8", errors="replace")
1292
+ except Exception:
1293
+ return ""
1294
+
1295
+ def _build_combined_log(
1296
+ self,
1297
+ tail: TailBuffer,
1298
+ path: str,
1299
+ rc: int,
1300
+ trace: bool,
1301
+ exc: Optional[Exception],
1302
+ ) -> str:
1303
+ tail_text = tail.get_value()
1304
+ log_tail = self._read_log_tail_smart(path, rc, trace)
1305
+ if log_tail and len(log_tail) > len(tail_text):
1306
+ tail_text = log_tail
1307
+ return (tail_text or "") + (f"\n{exc}" if exc else "")
1308
+
1309
+ def _truncate_command_output(
1310
+ self,
1311
+ result: CommandResponse,
1312
+ max_output_lines: Optional[int],
1313
+ ) -> CommandResponse:
1314
+ if max_output_lines is None or not result.stdout:
1315
+ return result
1316
+ lines = result.stdout.splitlines()
1317
+ if len(lines) <= max_output_lines:
1318
+ return result
1319
+ truncated_lines = lines[:max_output_lines]
1320
+ truncated_lines.append(
1321
+ f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)"
1322
+ )
1323
+ truncated_stdout = "\n".join(truncated_lines)
1324
+ if hasattr(result, "model_copy"):
1325
+ return result.model_copy(update={"stdout": truncated_stdout})
1326
+ return result.copy(update={"stdout": truncated_stdout})
1327
+
1328
+ def _run_plain_capture(self, code: str) -> str:
1329
+ """
1330
+ Run a Stata command while capturing output using a named SMCL log.
1331
+ This is the most reliable way to capture output (like return list)
1332
+ without interfering with user logs or being affected by stdout redirection issues.
1333
+ """
1334
+ if not self._initialized:
1335
+ self.init()
1336
+
1337
+ with self._exec_lock:
1338
+ hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
1339
+ # Hold results BEFORE opening the capture log
1340
+ self.stata.run(f"capture _return hold {hold_name}", echo=False)
1341
+
1342
+ try:
1343
+ with self._smcl_log_capture() as (log_name, smcl_path):
1344
+ # Restore results INSIDE the capture log so return list can see them
1345
+ self.stata.run(f"capture _return restore {hold_name}", echo=False)
1346
+ try:
1347
+ self.stata.run(code, echo=True)
1348
+ except Exception:
1349
+ pass
1350
+ except Exception:
1351
+ # Cleanup hold if log capture failed to open
1352
+ self.stata.run(f"capture _return drop {hold_name}", echo=False)
1353
+ content = ""
1354
+ smcl_path = None
1355
+ else:
1356
+ # Read SMCL content and convert to text
1357
+ content = self._read_smcl_file(smcl_path)
1358
+ # Remove the temp file
1359
+ self._safe_unlink(smcl_path)
1360
+
1361
+ return self._smcl_to_text(content)
410
1362
 
411
- def _read_log_tail(self, path: str, max_chars: int) -> str:
1363
+ def _count_do_file_lines(self, path: str) -> int:
1364
+ """
1365
+ Count the number of executable lines in a .do file for progress inference.
1366
+
1367
+ Blank lines and comment-only lines (starting with * or //) are ignored.
1368
+ """
412
1369
  try:
413
- with open(path, "rb") as f:
414
- f.seek(0, os.SEEK_END)
415
- size = f.tell()
416
- if size <= 0:
417
- return ""
418
- read_size = min(size, max_chars)
419
- f.seek(-read_size, os.SEEK_END)
420
- data = f.read(read_size)
421
- return data.decode("utf-8", errors="replace")
1370
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
1371
+ lines = f.read().splitlines()
422
1372
  except Exception:
423
- return ""
1373
+ return 0
424
1374
 
425
- def _select_stata_error_message(self, text: str, fallback: str) -> str:
426
- if not text:
427
- return fallback
428
- ignore_patterns = (
429
- r"^r\(\d+\);?$",
430
- r"^end of do-file$",
431
- r"^execution terminated$",
432
- r"^[-=*]{3,}.*$",
433
- )
434
- rc_pattern = r"^r\(\d+\);?$"
435
- error_patterns = (
436
- r"\btype mismatch\b",
437
- r"\bnot found\b",
438
- r"\bnot allowed\b",
439
- r"\bno observations\b",
440
- r"\bconformability error\b",
441
- r"\binvalid\b",
442
- r"\bsyntax error\b",
443
- r"\berror\b",
444
- )
445
- lines = text.splitlines()
446
- for raw in reversed(lines):
447
- line = raw.strip()
448
- if not line:
449
- continue
450
- if any(re.search(pat, line, re.IGNORECASE) for pat in error_patterns):
451
- return line
452
- for i in range(len(lines) - 1, -1, -1):
453
- line = lines[i].strip()
454
- if not line:
455
- continue
456
- if re.match(rc_pattern, line, re.IGNORECASE):
457
- for j in range(i - 1, -1, -1):
458
- prev_line = lines[j].strip()
459
- if not prev_line:
460
- continue
461
- if prev_line.startswith((".", ">", "-", "=")):
462
- continue
463
- if any(re.match(pat, prev_line, re.IGNORECASE) for pat in ignore_patterns):
464
- continue
465
- return prev_line
466
- return line
467
- for raw in reversed(lines):
468
- line = raw.strip()
469
- if not line:
1375
+ total = 0
1376
+ for line in lines:
1377
+ s = line.strip()
1378
+ if not s:
470
1379
  continue
471
- if line.startswith((".", ">", "-", "=")):
1380
+ if s.startswith("*"):
472
1381
  continue
473
- if any(re.match(pat, line, re.IGNORECASE) for pat in ignore_patterns):
1382
+ if s.startswith("//"):
474
1383
  continue
475
- return line
476
- return fallback
1384
+ total += 1
1385
+ return total
477
1386
 
478
1387
  def _smcl_to_text(self, smcl: str) -> str:
479
1388
  """Convert simple SMCL markup into plain text for LLM-friendly help."""
@@ -486,153 +1395,181 @@ class StataClient:
486
1395
  lines = [line.rstrip() for line in cleaned.splitlines()]
487
1396
  return "\n".join(lines).strip()
488
1397
 
489
- def _build_error_envelope(
490
- self,
491
- command: str,
492
- rc: int,
493
- stdout: str,
494
- stderr: str,
495
- exc: Optional[Exception],
496
- trace: bool,
497
- ) -> ErrorEnvelope:
498
- combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
499
- rc_hint = self._parse_rc_from_text(combined) if combined else None
500
- rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
501
- line_no = self._parse_line_from_text(combined) if combined else None
502
- snippet = combined[-800:] if combined else None
503
- fallback = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
504
- if fallback == "Stata error" and rc_final is not None:
505
- fallback = f"Stata error r({rc_final})"
506
- message = self._select_stata_error_message(combined, fallback)
507
- return ErrorEnvelope(
508
- message=message,
509
- rc=rc_final,
510
- line=line_no,
511
- command=command,
512
- stdout=stdout or None,
513
- stderr=stderr or None,
514
- snippet=snippet,
515
- trace=trace or None,
516
- )
1398
+ def _extract_error_and_context(self, log_content: str, rc: int) -> Tuple[str, str]:
1399
+ """
1400
+ Extracts the error message and trace context using {err} SMCL tags.
1401
+ """
1402
+ if not log_content:
1403
+ return f"Stata error r({rc})", ""
1404
+
1405
+ lines = log_content.splitlines()
1406
+
1407
+ # Search backwards for the {err} tag
1408
+ for i in range(len(lines) - 1, -1, -1):
1409
+ line = lines[i]
1410
+ if '{err}' in line:
1411
+ # Found the (last) error line.
1412
+ # Walk backwards to find the start of the error block (consecutive {err} lines)
1413
+ start_idx = i
1414
+ while start_idx > 0 and '{err}' in lines[start_idx-1]:
1415
+ start_idx -= 1
1416
+
1417
+ # The full error message is the concatenation of all {err} lines in this block
1418
+ error_lines = []
1419
+ for j in range(start_idx, i + 1):
1420
+ error_lines.append(lines[j].strip())
1421
+
1422
+ clean_msg = " ".join(filter(None, error_lines)) or f"Stata error r({rc})"
1423
+
1424
+ # Capture everything from the start of the error block to the end
1425
+ context_str = "\n".join(lines[start_idx:])
1426
+ return clean_msg, context_str
1427
+
1428
+ # Fallback: grab the last 30 lines
1429
+ context_start = max(0, len(lines) - 30)
1430
+ context_str = "\n".join(lines[context_start:])
1431
+
1432
+ return f"Stata error r({rc})", context_str
517
1433
 
518
1434
  def _exec_with_capture(self, code: str, echo: bool = True, trace: bool = False, cwd: Optional[str] = None) -> CommandResponse:
519
- """Execute Stata code with stdout/stderr capture and rc detection."""
520
1435
  if not self._initialized:
521
1436
  self.init()
522
1437
 
1438
+ # Rewrite graph names with special characters to internal aliases
523
1439
  code = self._maybe_rewrite_graph_name_in_command(code)
524
1440
 
525
- if cwd is not None and not os.path.isdir(cwd):
526
- return CommandResponse(
527
- command=code,
528
- rc=601,
529
- stdout="",
530
- stderr=None,
531
- success=False,
532
- error=ErrorEnvelope(
533
- message=f"cwd not found: {cwd}",
534
- rc=601,
535
- command=code,
536
- ),
537
- )
1441
+ output_buffer = StringIO()
1442
+ error_buffer = StringIO()
1443
+ rc = 0
1444
+ sys_error = None
1445
+ error_envelope = None
1446
+ smcl_content = ""
1447
+ smcl_path = None
538
1448
 
539
- start_time = time.time()
540
- exc: Optional[Exception] = None
541
- ret_text: Optional[str] = None
542
1449
  with self._exec_lock:
543
- # Set execution flag to prevent recursive Stata calls
544
- self._is_executing = True
545
1450
  try:
1451
+ from sfi import Scalar, SFIToolkit
546
1452
  with self._temp_cwd(cwd):
547
- with self._redirect_io() as (out_buf, err_buf):
548
- try:
549
- if trace:
550
- self.stata.run("set trace on")
551
- ret = self.stata.run(code, echo=echo)
552
- if isinstance(ret, str) and ret:
553
- ret_text = ret
554
- except Exception as e:
555
- exc = e
556
- finally:
557
- rc = self._read_return_code()
558
- if trace:
559
- try:
560
- self.stata.run("set trace off")
561
- except Exception:
562
- pass
563
- finally:
564
- # Clear execution flag
565
- self._is_executing = False
1453
+ # Create SMCL log for authoritative output capture
1454
+ # Use shorter unique path to avoid Windows path issues
1455
+ smcl_path = self._create_smcl_log_path(prefix="mcp_", max_hex=16)
1456
+ log_name = self._make_smcl_log_name()
1457
+ self._open_smcl_log(smcl_path, log_name)
1458
+
1459
+ try:
1460
+ with self._redirect_io(output_buffer, error_buffer):
1461
+ try:
1462
+ if trace:
1463
+ self.stata.run("set trace on")
566
1464
 
567
- stdout = out_buf.getvalue()
568
- # Some PyStata builds return output as a string rather than printing.
569
- if (not stdout or not stdout.strip()) and ret_text:
570
- stdout = ret_text
571
- stderr = err_buf.getvalue()
572
- combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
573
- rc_hint = self._parse_rc_from_text(combined) if combined else None
574
- if exc is None and rc_hint is not None and rc_hint != 0:
575
- # Prefer r(#) parsed from the current command output when present.
576
- rc = rc_hint
577
- # If no exception and stderr is empty and no r(#) is present, treat rc anomalies as success
578
- # (e.g., stale/spurious c(rc) reads).
579
- if exc is None and (not stderr or not stderr.strip()) and rc_hint is None:
580
- rc = 0 if rc is None or rc != 0 else rc
581
- success = rc == 0 and exc is None
582
- error = None
583
- if not success:
584
- error = self._build_error_envelope(code, rc, stdout, stderr, exc, trace)
585
- duration = time.time() - start_time
586
- code_preview = code.replace("\n", "\\n")
587
- logger.info(
588
- "stata.run rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
589
- rc,
590
- success,
591
- trace,
592
- duration * 1000,
593
- code_preview[:120],
594
- )
595
- # Mutually exclusive - when error, output is in ErrorEnvelope only
596
- return CommandResponse(
1465
+ # Run the user code
1466
+ self.stata.run(code, echo=echo)
1467
+
1468
+ # Hold results IMMEDIATELY to prevent clobbering by cleanup
1469
+ self._hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
1470
+ self.stata.run(f"capture _return hold {self._hold_name}", echo=False)
1471
+
1472
+ finally:
1473
+ if trace:
1474
+ try:
1475
+ self.stata.run("set trace off")
1476
+ except Exception:
1477
+ pass
1478
+ finally:
1479
+ # Close SMCL log AFTER output redirection
1480
+ self._close_smcl_log(log_name)
1481
+ # Restore and capture results while still inside the lock
1482
+ self._restore_results_from_hold("_hold_name")
1483
+
1484
+ except Exception as e:
1485
+ sys_error = str(e)
1486
+ # Try to parse RC from exception message
1487
+ parsed_rc = self._parse_rc_from_text(sys_error)
1488
+ rc = parsed_rc if parsed_rc is not None else 1
1489
+
1490
+ # Read SMCL content as the authoritative source
1491
+ if smcl_path:
1492
+ smcl_content = self._read_smcl_file(smcl_path)
1493
+ # Clean up SMCL file
1494
+ self._safe_unlink(smcl_path)
1495
+
1496
+ stdout_content = output_buffer.getvalue()
1497
+ stderr_content = error_buffer.getvalue()
1498
+
1499
+ # If RC wasn't captured or is generic, try to parse from SMCL
1500
+ if rc in (0, 1, -1) and smcl_content:
1501
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
1502
+ if parsed_rc is not None and parsed_rc != 0:
1503
+ rc = parsed_rc
1504
+ elif rc == -1:
1505
+ rc = 0
1506
+
1507
+ # If stdout is empty but SMCL has content AND command succeeded, use SMCL as stdout
1508
+ # This handles cases where Stata writes to log but not to redirected stdout
1509
+ # For errors, we keep stdout empty and error info goes to ErrorEnvelope
1510
+ if rc == 0 and not stdout_content and smcl_content:
1511
+ # Convert SMCL to plain text for stdout
1512
+ stdout_content = self._smcl_to_text(smcl_content)
1513
+
1514
+ if rc != 0:
1515
+ if sys_error:
1516
+ msg = sys_error
1517
+ context = sys_error
1518
+ else:
1519
+ # Extract error from SMCL (authoritative source)
1520
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
1521
+
1522
+ error_envelope = ErrorEnvelope(
1523
+ message=msg,
1524
+ rc=rc,
1525
+ context=context,
1526
+ snippet=smcl_content[-800:] if smcl_content else (stdout_content + stderr_content)[-800:],
1527
+ smcl_output=smcl_content # Include raw SMCL for debugging
1528
+ )
1529
+ stderr_content = context
1530
+
1531
+ resp = CommandResponse(
597
1532
  command=code,
598
1533
  rc=rc,
599
- stdout="" if not success else stdout,
600
- stderr=None,
601
- success=success,
602
- error=error,
1534
+ stdout=stdout_content,
1535
+ stderr=stderr_content,
1536
+ success=(rc == 0),
1537
+ error=error_envelope,
1538
+ log_path=smcl_path if smcl_path else None,
1539
+ smcl_output=smcl_content,
603
1540
  )
604
1541
 
605
- def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
606
- """Execute Stata code while leaving stdout/stderr alone.
1542
+ # Capture results immediately after execution, INSIDE the lock
1543
+ try:
1544
+ self._last_results = self.get_stored_results(force_fresh=True)
1545
+ except Exception:
1546
+ self._last_results = None
607
1547
 
608
- PyStata's output bridge uses its own thread and can misbehave on Windows
609
- when we redirect stdio (e.g., graph export). This path keeps the normal
610
- handlers and just reads rc afterward.
611
- """
1548
+ return resp
1549
+
1550
+ def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
1551
+ """Execute Stata code while leaving stdout/stderr alone."""
612
1552
  if not self._initialized:
613
1553
  self.init()
614
1554
 
615
1555
  exc: Optional[Exception] = None
616
1556
  ret_text: Optional[str] = None
1557
+ rc = 0
1558
+
617
1559
  with self._exec_lock:
618
1560
  try:
1561
+ from sfi import Scalar # Import SFI tools
619
1562
  if trace:
620
1563
  self.stata.run("set trace on")
621
1564
  ret = self.stata.run(code, echo=echo)
622
1565
  if isinstance(ret, str) and ret:
623
1566
  ret_text = ret
1567
+
1568
+
624
1569
  except Exception as e:
625
1570
  exc = e
1571
+ rc = 1
626
1572
  finally:
627
- rc = self._read_return_code()
628
- # If Stata returned an r(#) in text, prefer it.
629
- combined = "\n".join(filter(None, [ret_text or "", str(exc) if exc else ""])).strip()
630
- rc_hint = self._parse_rc_from_text(combined) if combined else None
631
- if exc is None and rc_hint is not None and rc_hint != 0:
632
- rc = rc_hint
633
- if exc is None and (rc is None or rc == -1) and rc_hint is None:
634
- # Normalize spurious rc reads only when missing/invalid
635
- rc = 0
636
1573
  if trace:
637
1574
  try:
638
1575
  self.stata.run("set trace off")
@@ -644,8 +1581,13 @@ class StataClient:
644
1581
  success = rc == 0 and exc is None
645
1582
  error = None
646
1583
  if not success:
647
- # Pass ret_text as stdout for snippet parsing.
648
- error = self._build_error_envelope(code, rc, ret_text or "", stderr, exc, trace)
1584
+ msg = str(exc) if exc else f"Stata error r({rc})"
1585
+ error = ErrorEnvelope(
1586
+ message=msg,
1587
+ rc=rc,
1588
+ command=code,
1589
+ stdout=ret_text,
1590
+ )
649
1591
 
650
1592
  return CommandResponse(
651
1593
  command=code,
@@ -656,23 +1598,64 @@ class StataClient:
656
1598
  error=error,
657
1599
  )
658
1600
 
1601
+ def exec_lightweight(self, code: str) -> CommandResponse:
1602
+ """
1603
+ Executes a command using simple stdout redirection (no SMCL logs).
1604
+ Much faster on Windows as it avoids FS operations.
1605
+ LIMITED: Does not support error envelopes or complex return code parsing.
1606
+ """
1607
+ if not self._initialized:
1608
+ self.init()
1609
+
1610
+ code = self._maybe_rewrite_graph_name_in_command(code)
1611
+
1612
+ output_buffer = StringIO()
1613
+ error_buffer = StringIO()
1614
+ rc = 0
1615
+ exc = None
1616
+
1617
+ with self._exec_lock:
1618
+ with self._redirect_io(output_buffer, error_buffer):
1619
+ try:
1620
+ self.stata.run(code, echo=False)
1621
+ except Exception as e:
1622
+ exc = e
1623
+ rc = 1
1624
+
1625
+ stdout = output_buffer.getvalue()
1626
+ stderr = error_buffer.getvalue()
1627
+
1628
+ return CommandResponse(
1629
+ command=code,
1630
+ rc=rc,
1631
+ stdout=stdout,
1632
+ stderr=stderr if not exc else str(exc),
1633
+ success=(rc == 0),
1634
+ error=None
1635
+ )
1636
+
659
1637
  async def run_command_streaming(
660
- self,
661
- code: str,
662
- *,
663
- notify_log: Callable[[str], Awaitable[None]],
664
- notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
665
- echo: bool = True,
666
- trace: bool = False,
667
- max_output_lines: Optional[int] = None,
668
- cwd: Optional[str] = None,
669
- auto_cache_graphs: bool = False,
670
- on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
671
- ) -> CommandResponse:
1638
+ self,
1639
+ code: str,
1640
+ *,
1641
+ notify_log: Callable[[str], Awaitable[None]],
1642
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
1643
+ echo: bool = True,
1644
+ trace: bool = False,
1645
+ max_output_lines: Optional[int] = None,
1646
+ cwd: Optional[str] = None,
1647
+ auto_cache_graphs: bool = False,
1648
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
1649
+ emit_graph_ready: bool = False,
1650
+ graph_ready_task_id: Optional[str] = None,
1651
+ graph_ready_format: str = "svg",
1652
+ ) -> CommandResponse:
672
1653
  if not self._initialized:
673
1654
  self.init()
674
1655
 
675
1656
  code = self._maybe_rewrite_graph_name_in_command(code)
1657
+ auto_cache_graphs = auto_cache_graphs or emit_graph_ready
1658
+ total_lines = 0 # Commands (not do-files) do not have line-based progress
676
1659
 
677
1660
  if cwd is not None and not os.path.isdir(cwd):
678
1661
  return CommandResponse(
@@ -690,211 +1673,183 @@ class StataClient:
690
1673
 
691
1674
  start_time = time.time()
692
1675
  exc: Optional[Exception] = None
1676
+ smcl_content = ""
1677
+ smcl_path = None
693
1678
 
694
1679
  # Setup streaming graph cache if enabled
695
- graph_cache = None
696
- if auto_cache_graphs:
697
- graph_cache = StreamingGraphCache(self, auto_cache=True)
698
-
699
- graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
700
-
701
- graph_cache.add_cache_callback(graph_cache_callback)
1680
+ graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
702
1681
 
703
- log_file = tempfile.NamedTemporaryFile(
704
- prefix="mcp_stata_",
705
- suffix=".log",
706
- delete=False,
707
- mode="w",
708
- encoding="utf-8",
709
- errors="replace",
710
- buffering=1,
711
- )
712
- log_path = log_file.name
713
- tail = TailBuffer(max_chars=200000 if trace else 20000)
714
- tee = FileTeeIO(log_file, tail)
1682
+ _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
1683
+
1684
+ # Create SMCL log path for authoritative output capture
1685
+ smcl_path = self._create_smcl_log_path()
1686
+ smcl_log_name = self._make_smcl_log_name()
715
1687
 
716
1688
  # Inform the MCP client immediately where to read/tail the output.
717
- await notify_log(json.dumps({"event": "log_path", "path": log_path}))
1689
+ await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
718
1690
 
719
1691
  rc = -1
1692
+ path_for_stata = code.replace("\\", "/")
1693
+ command = f'{path_for_stata}'
1694
+
1695
+ graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
1696
+ graph_poll_state = [0.0]
1697
+
1698
+ async def on_chunk_for_graphs(_chunk: str) -> None:
1699
+ await self._maybe_cache_graphs_on_chunk(
1700
+ graph_cache=graph_cache,
1701
+ emit_graph_ready=emit_graph_ready,
1702
+ notify_log=notify_log,
1703
+ graph_ready_task_id=graph_ready_task_id,
1704
+ graph_ready_format=graph_ready_format,
1705
+ graph_ready_initial=graph_ready_initial,
1706
+ last_check=graph_poll_state,
1707
+ )
720
1708
 
721
- def _run_blocking() -> None:
722
- nonlocal rc, exc
723
- with self._exec_lock:
724
- self._is_executing = True
725
- try:
726
- with self._temp_cwd(cwd):
727
- with self._redirect_io_streaming(tee, tee):
728
- try:
729
- if trace:
730
- self.stata.run("set trace on")
731
- ret = self.stata.run(code, echo=echo)
732
- # Some PyStata builds return output as a string rather than printing.
733
- if isinstance(ret, str) and ret:
734
- try:
735
- tee.write(ret)
736
- except Exception:
737
- pass
738
- except Exception as e:
739
- exc = e
740
- finally:
741
- rc = self._read_return_code()
742
- if trace:
743
- try:
744
- self.stata.run("set trace off")
745
- except Exception:
746
- pass
747
- finally:
748
- self._is_executing = False
1709
+ done = anyio.Event()
1710
+
1711
+ async with anyio.create_task_group() as tg:
1712
+ async def stream_smcl() -> None:
1713
+ await self._stream_smcl_log(
1714
+ smcl_path=smcl_path,
1715
+ notify_log=notify_log,
1716
+ done=done,
1717
+ on_chunk=on_chunk_for_graphs if graph_cache else None,
1718
+ )
1719
+
1720
+ tg.start_soon(stream_smcl)
749
1721
 
750
- try:
751
1722
  if notify_progress is not None:
752
- await notify_progress(0, None, "Running Stata command")
753
-
754
- await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
755
- except get_cancelled_exc_class():
756
- # Best-effort cancellation: signal Stata to break, wait briefly, then propagate.
757
- self._request_break_in()
758
- await self._wait_for_stata_stop()
759
- raise
760
- finally:
761
- tee.close()
1723
+ if total_lines > 0:
1724
+ await notify_progress(0, float(total_lines), f"Executing command: 0/{total_lines}")
1725
+ else:
1726
+ await notify_progress(0, None, "Running command")
762
1727
 
763
- # Cache detected graphs after command completes
764
- if graph_cache:
765
1728
  try:
766
- # Use the enhanced pystata-integrated caching method
767
- if hasattr(graph_cache, 'cache_detected_graphs_with_pystata'):
768
- cached_graphs = await graph_cache.cache_detected_graphs_with_pystata()
769
- else:
770
- cached_graphs = await graph_cache.cache_detected_graphs()
771
-
772
- if cached_graphs and notify_progress:
773
- await notify_progress(1, 1, f"Command completed. Cached {len(cached_graphs)} graphs: {', '.join(cached_graphs)}")
774
- except Exception as e:
775
- logger.warning(f"Failed to cache detected graphs: {e}")
1729
+ run_blocking = lambda: self._run_streaming_blocking(
1730
+ command=command,
1731
+ tee=tee,
1732
+ cwd=cwd,
1733
+ trace=trace,
1734
+ echo=echo,
1735
+ smcl_path=smcl_path,
1736
+ smcl_log_name=smcl_log_name,
1737
+ hold_attr="_hold_name_stream",
1738
+ )
1739
+ try:
1740
+ rc, exc = await anyio.to_thread.run_sync(
1741
+ run_blocking,
1742
+ abandon_on_cancel=True,
1743
+ )
1744
+ except TypeError:
1745
+ rc, exc = await anyio.to_thread.run_sync(run_blocking)
1746
+ except get_cancelled_exc_class():
1747
+ self._request_break_in()
1748
+ await self._wait_for_stata_stop()
1749
+ raise
1750
+ finally:
1751
+ done.set()
1752
+ tee.close()
776
1753
 
777
- tail_text = tail.get_value()
778
- log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
779
- if log_tail and len(log_tail) > len(tail_text):
780
- tail_text = log_tail
781
- combined = (tail_text or "") + (f"\n{exc}" if exc else "")
782
- rc_hint = self._parse_rc_from_text(combined) if combined else None
783
- if exc is None and rc_hint is not None and rc_hint != 0:
784
- rc = rc_hint
785
- if exc is None and rc_hint is None:
786
- rc = 0 if rc is None or rc != 0 else rc
787
- success = rc == 0 and exc is None
1754
+ # Read SMCL content as the authoritative source
1755
+ smcl_content = self._read_smcl_file(smcl_path)
1756
+
1757
+ await self._cache_new_graphs(
1758
+ graph_cache,
1759
+ notify_progress=notify_progress,
1760
+ total_lines=total_lines,
1761
+ completed_label="Command",
1762
+ )
1763
+ self._emit_graph_ready_task(
1764
+ emit_graph_ready=emit_graph_ready,
1765
+ graph_ready_initial=graph_ready_initial,
1766
+ notify_log=notify_log,
1767
+ graph_ready_task_id=graph_ready_task_id,
1768
+ graph_ready_format=graph_ready_format,
1769
+ )
1770
+
1771
+ combined = self._build_combined_log(tail, smcl_path, rc, trace, exc)
1772
+
1773
+ # Use SMCL content as primary source for RC detection
1774
+ if not exc or rc in (1, -1):
1775
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
1776
+ if parsed_rc is not None and parsed_rc != 0:
1777
+ rc = parsed_rc
1778
+ elif rc in (-1, 0, 1): # Also check text if rc is generic 1 or unset
1779
+ parsed_rc_text = self._parse_rc_from_text(combined)
1780
+ if parsed_rc_text is not None:
1781
+ rc = parsed_rc_text
1782
+ elif rc == -1:
1783
+ rc = 0 # Default to success if no error trace found
1784
+
1785
+ success = (rc == 0 and exc is None)
1786
+ stderr_final = None
788
1787
  error = None
1788
+
789
1789
  if not success:
790
- snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
791
- rc_hint = self._parse_rc_from_text(combined) if combined else None
792
- rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
793
- line_no = self._parse_line_from_text(combined) if combined else None
794
- fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
795
- if fallback == "Stata error" and rc_final is not None:
796
- fallback = f"Stata error r({rc_final})"
797
- message = self._select_stata_error_message(combined, fallback)
1790
+ # Use SMCL as authoritative source for error extraction
1791
+ if smcl_content:
1792
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
1793
+ else:
1794
+ # Fallback to combined log
1795
+ msg, context = self._extract_error_and_context(combined, rc)
798
1796
 
799
1797
  error = ErrorEnvelope(
800
- message=message,
801
- rc=rc_final,
802
- line=line_no,
803
- command=code,
1798
+ message=msg,
1799
+ context=context,
1800
+ rc=rc,
1801
+ command=command,
804
1802
  log_path=log_path,
805
- snippet=snippet,
806
- trace=trace or None,
1803
+ snippet=smcl_content[-800:] if smcl_content else combined[-800:],
1804
+ smcl_output=smcl_content,
807
1805
  )
1806
+ stderr_final = context
808
1807
 
809
1808
  duration = time.time() - start_time
810
- code_preview = code.replace("\n", "\\n")
811
1809
  logger.info(
812
1810
  "stata.run(stream) rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
813
1811
  rc,
814
1812
  success,
815
1813
  trace,
816
1814
  duration * 1000,
817
- code_preview[:120],
1815
+ code.replace("\n", "\\n")[:120],
818
1816
  )
819
1817
 
820
1818
  result = CommandResponse(
821
1819
  command=code,
822
1820
  rc=rc,
823
1821
  stdout="",
824
- stderr=None,
1822
+ stderr=stderr_final,
825
1823
  log_path=log_path,
826
1824
  success=success,
827
1825
  error=error,
1826
+ smcl_output=smcl_content,
828
1827
  )
829
1828
 
830
1829
  if notify_progress is not None:
831
1830
  await notify_progress(1, 1, "Finished")
832
1831
 
833
- return result
834
-
835
- def _count_do_file_lines(self, path: str) -> int:
836
- try:
837
- with open(path, "r", encoding="utf-8", errors="replace") as f:
838
- lines = f.read().splitlines()
839
- except Exception:
840
- return 0
841
-
842
- total = 0
843
- for line in lines:
844
- s = line.strip()
845
- if not s:
846
- continue
847
- if s.startswith("*"):
848
- continue
849
- if s.startswith("//"):
850
- continue
851
- total += 1
852
- return total
853
-
854
- async def run_do_file_streaming(
855
- self,
856
- path: str,
857
- *,
858
- notify_log: Callable[[str], Awaitable[None]],
859
- notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
860
- echo: bool = True,
861
- trace: bool = False,
862
- max_output_lines: Optional[int] = None,
863
- cwd: Optional[str] = None,
864
- auto_cache_graphs: bool = False,
865
- on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
866
- ) -> CommandResponse:
867
- if cwd is not None and not os.path.isdir(cwd):
868
- return CommandResponse(
869
- command=f'do "{path}"',
870
- rc=601,
871
- stdout="",
872
- stderr=None,
873
- success=False,
874
- error=ErrorEnvelope(
875
- message=f"cwd not found: {cwd}",
876
- rc=601,
877
- command=path,
878
- ),
879
- )
880
-
881
- effective_path = path
882
- if cwd is not None and not os.path.isabs(path):
883
- effective_path = os.path.abspath(os.path.join(cwd, path))
1832
+ return result
884
1833
 
885
- if not os.path.exists(effective_path):
886
- return CommandResponse(
887
- command=f'do "{effective_path}"',
888
- rc=601,
889
- stdout="",
890
- stderr=None,
891
- success=False,
892
- error=ErrorEnvelope(
893
- message=f"Do-file not found: {effective_path}",
894
- rc=601,
895
- command=effective_path,
896
- ),
897
- )
1834
+ async def run_do_file_streaming(
1835
+ self,
1836
+ path: str,
1837
+ *,
1838
+ notify_log: Callable[[str], Awaitable[None]],
1839
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
1840
+ echo: bool = True,
1841
+ trace: bool = False,
1842
+ max_output_lines: Optional[int] = None,
1843
+ cwd: Optional[str] = None,
1844
+ auto_cache_graphs: bool = False,
1845
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
1846
+ emit_graph_ready: bool = False,
1847
+ graph_ready_task_id: Optional[str] = None,
1848
+ graph_ready_format: str = "svg",
1849
+ ) -> CommandResponse:
1850
+ effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
1851
+ if error_response is not None:
1852
+ return error_response
898
1853
 
899
1854
  total_lines = self._count_do_file_lines(effective_path)
900
1855
  executed_lines = 0
@@ -923,104 +1878,55 @@ class StataClient:
923
1878
  if not self._initialized:
924
1879
  self.init()
925
1880
 
1881
+ auto_cache_graphs = auto_cache_graphs or emit_graph_ready
1882
+
926
1883
  start_time = time.time()
927
1884
  exc: Optional[Exception] = None
1885
+ smcl_content = ""
1886
+ smcl_path = None
928
1887
 
929
- # Setup streaming graph cache if enabled
930
- graph_cache = None
931
- if auto_cache_graphs:
932
- graph_cache = StreamingGraphCache(self, auto_cache=True)
933
-
934
- graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
935
-
936
- graph_cache.add_cache_callback(graph_cache_callback)
1888
+ graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
1889
+ _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
937
1890
 
938
- log_file = tempfile.NamedTemporaryFile(
939
- prefix="mcp_stata_",
940
- suffix=".log",
941
- delete=False,
942
- mode="w",
943
- encoding="utf-8",
944
- errors="replace",
945
- buffering=1,
946
- )
947
- log_path = log_file.name
948
- tail = TailBuffer(max_chars=200000 if trace else 20000)
949
- tee = FileTeeIO(log_file, tail)
1891
+ smcl_path = self._create_smcl_log_path()
1892
+ smcl_log_name = self._make_smcl_log_name()
950
1893
 
951
1894
  # Inform the MCP client immediately where to read/tail the output.
952
- await notify_log(json.dumps({"event": "log_path", "path": log_path}))
1895
+ await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
953
1896
 
954
1897
  rc = -1
955
- path_for_stata = effective_path.replace("\\", "/")
956
- command = f'do "{path_for_stata}"'
1898
+ graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
1899
+ graph_poll_state = [0.0]
1900
+
1901
+ async def on_chunk_for_graphs(_chunk: str) -> None:
1902
+ await self._maybe_cache_graphs_on_chunk(
1903
+ graph_cache=graph_cache,
1904
+ emit_graph_ready=emit_graph_ready,
1905
+ notify_log=notify_log,
1906
+ graph_ready_task_id=graph_ready_task_id,
1907
+ graph_ready_format=graph_ready_format,
1908
+ graph_ready_initial=graph_ready_initial,
1909
+ last_check=graph_poll_state,
1910
+ )
957
1911
 
958
- # Capture initial graph state BEFORE execution starts
959
- # This allows post-execution detection to identify new graphs
1912
+ on_chunk_callback = on_chunk_for_progress
960
1913
  if graph_cache:
961
- try:
962
- graph_cache._initial_graphs = set(self.list_graphs())
963
- logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
964
- except Exception as e:
965
- logger.debug(f"Failed to capture initial graph state: {e}")
966
- graph_cache._initial_graphs = set()
967
-
968
- def _run_blocking() -> None:
969
- nonlocal rc, exc
970
- with self._exec_lock:
971
- # Set execution flag to prevent recursive Stata calls
972
- self._is_executing = True
973
- try:
974
- with self._temp_cwd(cwd):
975
- with self._redirect_io_streaming(tee, tee):
976
- try:
977
- if trace:
978
- self.stata.run("set trace on")
979
- ret = self.stata.run(command, echo=echo)
980
- # Some PyStata builds return output as a string rather than printing.
981
- if isinstance(ret, str) and ret:
982
- try:
983
- tee.write(ret)
984
- except Exception:
985
- pass
986
- except Exception as e:
987
- exc = e
988
- finally:
989
- rc = self._read_return_code()
990
- if trace:
991
- try:
992
- self.stata.run("set trace off")
993
- except Exception:
994
- pass
995
- finally:
996
- # Clear execution flag
997
- self._is_executing = False
1914
+ async def on_chunk_callback(chunk: str) -> None:
1915
+ await on_chunk_for_progress(chunk)
1916
+ await on_chunk_for_graphs(chunk)
998
1917
 
999
1918
  done = anyio.Event()
1000
1919
 
1001
- async def _monitor_progress_from_log() -> None:
1002
- if notify_progress is None or total_lines <= 0:
1003
- return
1004
- last_pos = 0
1005
- try:
1006
- with open(log_path, "r", encoding="utf-8", errors="replace") as f:
1007
- while not done.is_set():
1008
- f.seek(last_pos)
1009
- chunk = f.read()
1010
- if chunk:
1011
- last_pos = f.tell()
1012
- await on_chunk_for_progress(chunk)
1013
- await anyio.sleep(0.05)
1014
-
1015
- f.seek(last_pos)
1016
- chunk = f.read()
1017
- if chunk:
1018
- await on_chunk_for_progress(chunk)
1019
- except Exception:
1020
- return
1021
-
1022
1920
  async with anyio.create_task_group() as tg:
1023
- tg.start_soon(_monitor_progress_from_log)
1921
+ async def stream_smcl() -> None:
1922
+ await self._stream_smcl_log(
1923
+ smcl_path=smcl_path,
1924
+ notify_log=notify_log,
1925
+ done=done,
1926
+ on_chunk=on_chunk_callback,
1927
+ )
1928
+
1929
+ tg.start_soon(stream_smcl)
1024
1930
 
1025
1931
  if notify_progress is not None:
1026
1932
  if total_lines > 0:
@@ -1029,7 +1935,23 @@ class StataClient:
1029
1935
  await notify_progress(0, None, "Running do-file")
1030
1936
 
1031
1937
  try:
1032
- await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
1938
+ run_blocking = lambda: self._run_streaming_blocking(
1939
+ command=command,
1940
+ tee=tee,
1941
+ cwd=cwd,
1942
+ trace=trace,
1943
+ echo=echo,
1944
+ smcl_path=smcl_path,
1945
+ smcl_log_name=smcl_log_name,
1946
+ hold_attr="_hold_name_do",
1947
+ )
1948
+ try:
1949
+ rc, exc = await anyio.to_thread.run_sync(
1950
+ run_blocking,
1951
+ abandon_on_cancel=True,
1952
+ )
1953
+ except TypeError:
1954
+ rc, exc = await anyio.to_thread.run_sync(run_blocking)
1033
1955
  except get_cancelled_exc_class():
1034
1956
  self._request_break_in()
1035
1957
  await self._wait_for_stata_stop()
@@ -1038,109 +1960,59 @@ class StataClient:
1038
1960
  done.set()
1039
1961
  tee.close()
1040
1962
 
1041
- # Robust post-execution graph detection and caching
1042
- # This is the ONLY place where graphs are detected and cached
1043
- # Runs after execution completes, when it's safe to call list_graphs()
1044
- if graph_cache and graph_cache.auto_cache:
1045
- cached_graphs = []
1046
- try:
1047
- # Get initial state (before execution)
1048
- initial_graphs = getattr(graph_cache, '_initial_graphs', set())
1049
-
1050
- # Get current state (after execution)
1051
- logger.debug("Post-execution: Querying graph state via list_graphs()")
1052
- current_graphs = set(self.list_graphs())
1053
-
1054
- # Detect new graphs (created during execution)
1055
- new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
1056
-
1057
- if new_graphs:
1058
- logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
1059
-
1060
- # Cache each detected graph
1061
- for graph_name in new_graphs:
1062
- try:
1063
- logger.debug(f"Caching graph: {graph_name}")
1064
- cache_result = await anyio.to_thread.run_sync(
1065
- self.cache_graph_on_creation,
1066
- graph_name
1067
- )
1068
-
1069
- if cache_result:
1070
- cached_graphs.append(graph_name)
1071
- graph_cache._cached_graphs.add(graph_name)
1072
- logger.debug(f"Successfully cached graph: {graph_name}")
1073
- else:
1074
- logger.warning(f"Failed to cache graph: {graph_name}")
1075
-
1076
- # Trigger callbacks
1077
- for callback in graph_cache._cache_callbacks:
1078
- try:
1079
- await anyio.to_thread.run_sync(callback, graph_name, cache_result)
1080
- except Exception as e:
1081
- logger.debug(f"Callback failed for {graph_name}: {e}")
1082
-
1083
- except Exception as e:
1084
- logger.error(f"Error caching graph {graph_name}: {e}")
1085
- # Trigger callbacks with failure
1086
- for callback in graph_cache._cache_callbacks:
1087
- try:
1088
- await anyio.to_thread.run_sync(callback, graph_name, False)
1089
- except Exception:
1090
- pass
1091
-
1092
- # Check for dropped graphs (for completeness)
1093
- dropped_graphs = initial_graphs - current_graphs
1094
- if dropped_graphs:
1095
- logger.debug(f"Graphs dropped during execution: {sorted(dropped_graphs)}")
1096
- for graph_name in dropped_graphs:
1097
- try:
1098
- self.invalidate_graph_cache(graph_name)
1099
- except Exception:
1100
- pass
1101
-
1102
- # Notify progress if graphs were cached
1103
- if cached_graphs and notify_progress:
1104
- await notify_progress(
1105
- float(total_lines) if total_lines > 0 else 1,
1106
- float(total_lines) if total_lines > 0 else 1,
1107
- f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
1108
- )
1963
+ # Read SMCL content as the authoritative source
1964
+ smcl_content = self._read_smcl_file(smcl_path)
1109
1965
 
1110
- except Exception as e:
1111
- logger.error(f"Post-execution graph detection failed: {e}")
1966
+ await self._cache_new_graphs(
1967
+ graph_cache,
1968
+ notify_progress=notify_progress,
1969
+ total_lines=total_lines,
1970
+ completed_label="Do-file",
1971
+ )
1972
+ self._emit_graph_ready_task(
1973
+ emit_graph_ready=emit_graph_ready,
1974
+ graph_ready_initial=graph_ready_initial,
1975
+ notify_log=notify_log,
1976
+ graph_ready_task_id=graph_ready_task_id,
1977
+ graph_ready_format=graph_ready_format,
1978
+ )
1112
1979
 
1113
- tail_text = tail.get_value()
1114
- log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
1115
- if log_tail and len(log_tail) > len(tail_text):
1116
- tail_text = log_tail
1117
- combined = (tail_text or "") + (f"\n{exc}" if exc else "")
1118
- rc_hint = self._parse_rc_from_text(combined) if combined else None
1119
- if exc is None and rc_hint is not None and rc_hint != 0:
1120
- rc = rc_hint
1121
- if exc is None and rc_hint is None:
1122
- rc = 0 if rc is None or rc != 0 else rc
1123
- success = rc == 0 and exc is None
1980
+ combined = self._build_combined_log(tail, log_path, rc, trace, exc)
1981
+
1982
+ # Use SMCL content as primary source for RC detection
1983
+ if not exc or rc in (1, -1):
1984
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
1985
+ if parsed_rc is not None and parsed_rc != 0:
1986
+ rc = parsed_rc
1987
+ elif rc in (-1, 0, 1):
1988
+ parsed_rc_text = self._parse_rc_from_text(combined)
1989
+ if parsed_rc_text is not None:
1990
+ rc = parsed_rc_text
1991
+ elif rc == -1:
1992
+ rc = 0 # Default to success if no error found
1993
+
1994
+ success = (rc == 0 and exc is None)
1995
+ stderr_final = None
1124
1996
  error = None
1997
+
1125
1998
  if not success:
1126
- snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
1127
- rc_hint = self._parse_rc_from_text(combined) if combined else None
1128
- rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
1129
- line_no = self._parse_line_from_text(combined) if combined else None
1130
- fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
1131
- if fallback == "Stata error" and rc_final is not None:
1132
- fallback = f"Stata error r({rc_final})"
1133
- message = self._select_stata_error_message(combined, fallback)
1999
+ # Use SMCL as authoritative source for error extraction
2000
+ if smcl_content:
2001
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
2002
+ else:
2003
+ # Fallback to combined log
2004
+ msg, context = self._extract_error_and_context(combined, rc)
1134
2005
 
1135
2006
  error = ErrorEnvelope(
1136
- message=message,
1137
- rc=rc_final,
1138
- line=line_no,
2007
+ message=msg,
2008
+ context=context,
2009
+ rc=rc,
1139
2010
  command=command,
1140
2011
  log_path=log_path,
1141
- snippet=snippet,
1142
- trace=trace or None,
2012
+ snippet=smcl_content[-800:] if smcl_content else combined[-800:],
2013
+ smcl_output=smcl_content,
1143
2014
  )
2015
+ stderr_final = context
1144
2016
 
1145
2017
  duration = time.time() - start_time
1146
2018
  logger.info(
@@ -1156,10 +2028,11 @@ class StataClient:
1156
2028
  command=command,
1157
2029
  rc=rc,
1158
2030
  stdout="",
1159
- stderr=None,
2031
+ stderr=stderr_final,
1160
2032
  log_path=log_path,
1161
2033
  success=success,
1162
2034
  error=error,
2035
+ smcl_output=smcl_content,
1163
2036
  )
1164
2037
 
1165
2038
  if notify_progress is not None:
@@ -1181,22 +2054,7 @@ class StataClient:
1181
2054
  """
1182
2055
  result = self._exec_with_capture(code, echo=echo, trace=trace, cwd=cwd)
1183
2056
 
1184
- # Truncate stdout if requested
1185
- if max_output_lines is not None and result.stdout:
1186
- lines = result.stdout.splitlines()
1187
- if len(lines) > max_output_lines:
1188
- truncated_lines = lines[:max_output_lines]
1189
- truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
1190
- result = CommandResponse(
1191
- command=result.command,
1192
- rc=result.rc,
1193
- stdout="\n".join(truncated_lines),
1194
- stderr=result.stderr,
1195
- success=result.success,
1196
- error=result.error,
1197
- )
1198
-
1199
- return result
2057
+ return self._truncate_command_output(result, max_output_lines)
1200
2058
 
1201
2059
  def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
1202
2060
  """Returns valid JSON-serializable data."""
@@ -1253,16 +2111,19 @@ class StataClient:
1253
2111
  sortlist = ""
1254
2112
  changed = False
1255
2113
  try:
1256
- frame = str(Macro.getCValue("frame") or "default")
2114
+ frame = str(Macro.getGlobal("frame") or "default")
1257
2115
  except Exception:
2116
+ logger.debug("Failed to get 'frame' macro", exc_info=True)
1258
2117
  frame = "default"
1259
2118
  try:
1260
- sortlist = str(Macro.getCValue("sortlist") or "")
2119
+ sortlist = str(Macro.getGlobal("sortlist") or "")
1261
2120
  except Exception:
2121
+ logger.debug("Failed to get 'sortlist' macro", exc_info=True)
1262
2122
  sortlist = ""
1263
2123
  try:
1264
- changed = bool(int(float(Macro.getCValue("changed") or "0")))
2124
+ changed = bool(int(float(Macro.getGlobal("changed") or "0")))
1265
2125
  except Exception:
2126
+ logger.debug("Failed to get 'changed' macro", exc_info=True)
1266
2127
  changed = False
1267
2128
 
1268
2129
  return {"frame": frame, "n": n, "k": k, "sortlist": sortlist, "changed": changed}
@@ -1411,6 +2272,96 @@ class StataClient:
1411
2272
  "truncated_cells": truncated_cells,
1412
2273
  }
1413
2274
 
2275
+ def get_arrow_stream(
2276
+ self,
2277
+ *,
2278
+ offset: int,
2279
+ limit: int,
2280
+ vars: List[str],
2281
+ include_obs_no: bool,
2282
+ obs_indices: Optional[List[int]] = None,
2283
+ ) -> bytes:
2284
+ """
2285
+ Returns an Apache Arrow IPC stream (as bytes) for the requested data page.
2286
+ Uses Polars if available (faster), falls back to Pandas.
2287
+ """
2288
+ if not self._initialized:
2289
+ self.init()
2290
+
2291
+ import pyarrow as pa
2292
+ from sfi import Data # type: ignore[import-not-found]
2293
+
2294
+ use_polars = _get_polars_available()
2295
+ if use_polars:
2296
+ import polars as pl
2297
+ else:
2298
+ import pandas as pd
2299
+
2300
+ state = self.get_dataset_state()
2301
+ n = int(state.get("n", 0) or 0)
2302
+ k = int(state.get("k", 0) or 0)
2303
+ if k == 0 and n == 0:
2304
+ raise RuntimeError("No data in memory")
2305
+
2306
+ var_map = self._get_var_index_map()
2307
+ for v in vars:
2308
+ if v not in var_map:
2309
+ raise ValueError(f"Invalid variable: {v}")
2310
+
2311
+ # Determine observations to fetch
2312
+ if obs_indices is None:
2313
+ start = offset
2314
+ end = min(offset + limit, n)
2315
+ obs_list = list(range(start, end)) if start < n else []
2316
+ else:
2317
+ start = offset
2318
+ end = min(offset + limit, len(obs_indices))
2319
+ obs_list = obs_indices[start:end]
2320
+
2321
+ try:
2322
+ if not obs_list:
2323
+ # Empty schema-only table
2324
+ if use_polars:
2325
+ schema_cols = {}
2326
+ if include_obs_no:
2327
+ schema_cols["_n"] = pl.Int64
2328
+ for v in vars:
2329
+ schema_cols[v] = pl.Utf8
2330
+ table = pl.DataFrame(schema=schema_cols).to_arrow()
2331
+ else:
2332
+ columns = {}
2333
+ if include_obs_no:
2334
+ columns["_n"] = pa.array([], type=pa.int64())
2335
+ for v in vars:
2336
+ columns[v] = pa.array([], type=pa.string())
2337
+ table = pa.table(columns)
2338
+ else:
2339
+ # Fetch all data in one C-call
2340
+ raw_data = Data.get(var=vars, obs=obs_list, valuelabel=False)
2341
+
2342
+ if use_polars:
2343
+ df = pl.DataFrame(raw_data, schema=vars, orient="row")
2344
+ if include_obs_no:
2345
+ obs_nums = [i + 1 for i in obs_list]
2346
+ df = df.with_columns(pl.Series("_n", obs_nums, dtype=pl.Int64))
2347
+ df = df.select(["_n"] + vars)
2348
+ table = df.to_arrow()
2349
+ else:
2350
+ df = pd.DataFrame(raw_data, columns=vars)
2351
+ if include_obs_no:
2352
+ df.insert(0, "_n", [i + 1 for i in obs_list])
2353
+ table = pa.Table.from_pandas(df, preserve_index=False)
2354
+
2355
+ # Serialize to IPC Stream
2356
+ sink = pa.BufferOutputStream()
2357
+ with pa.RecordBatchStreamWriter(sink, table.schema) as writer:
2358
+ writer.write_table(table)
2359
+
2360
+ return sink.getvalue().to_pybytes()
2361
+
2362
+ except Exception as e:
2363
+ raise RuntimeError(f"Failed to generate Arrow stream: {e}")
2364
+
1414
2365
  _FILTER_IDENT = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b")
1415
2366
 
1416
2367
  def _extract_filter_vars(self, filter_expr: str) -> List[str]:
@@ -1599,15 +2550,21 @@ class StataClient:
1599
2550
 
1600
2551
  # Cache miss or expired, fetch fresh data
1601
2552
  try:
1602
- # 'graph dir' returns list in r(list)
1603
- # We need to ensure we run it quietly so we don't spam.
1604
- self.stata.run("quietly graph dir, memory")
1605
-
1606
- # Accessing r-class results in Python can be tricky via pystata's run command.
1607
- # We stash the result in a global macro that python sfi can easily read.
1608
- from sfi import Macro # type: ignore[import-not-found]
1609
- self.stata.run("global mcp_graph_list `r(list)'")
1610
- graph_list_str = Macro.getGlobal("mcp_graph_list")
2553
+ # Preservation of r() results is critical because this can be called
2554
+ # automatically after every user command (e.g., during streaming).
2555
+ import time
2556
+ hold_name = f"_mcp_ghold_{int(time.time() * 1000 % 1000000)}"
2557
+ self.stata.run(f"capture _return hold {hold_name}", echo=False)
2558
+
2559
+ try:
2560
+ self.stata.run("macro define mcp_graph_list \"\"", echo=False)
2561
+ self.stata.run("quietly graph dir, memory", echo=False)
2562
+ from sfi import Macro # type: ignore[import-not-found]
2563
+ self.stata.run("macro define mcp_graph_list `r(list)'", echo=False)
2564
+ graph_list_str = Macro.getGlobal("mcp_graph_list")
2565
+ finally:
2566
+ self.stata.run(f"capture _return restore {hold_name}", echo=False)
2567
+
1611
2568
  raw_list = graph_list_str.split() if graph_list_str else []
1612
2569
 
1613
2570
  # Map internal Stata names back to user-facing names when we have an alias.
@@ -1619,7 +2576,7 @@ class StataClient:
1619
2576
  # Update cache
1620
2577
  with self._list_graphs_cache_lock:
1621
2578
  self._list_graphs_cache = result
1622
- self._list_graphs_cache_time = current_time
2579
+ self._list_graphs_cache_time = time.time()
1623
2580
 
1624
2581
  return result
1625
2582
 
@@ -1654,8 +2611,8 @@ class StataClient:
1654
2611
  import tempfile
1655
2612
 
1656
2613
  fmt = (format or "pdf").strip().lower()
1657
- if fmt not in {"pdf", "png"}:
1658
- raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
2614
+ if fmt not in {"pdf", "png", "svg"}:
2615
+ raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png, svg.")
1659
2616
 
1660
2617
  if not filename:
1661
2618
  suffix = f".{fmt}"
@@ -1808,73 +2765,77 @@ class StataClient:
1808
2765
  logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
1809
2766
  return self._smcl_to_text(smcl)
1810
2767
  except Exception as e:
1811
- return f"Error reading help file at {fn}: {e}"
2768
+ logger.warning("Help file read failed for %s: %s", topic, e)
2769
+
2770
+ # If no help file found, return a fallback message
2771
+ return f"Help file for '{topic}' not found."
1812
2772
 
1813
- # Fallback to URL if file not found
1814
- return f"Help file for '{topic}' not found. Please consult: https://www.stata.com/help.cgi?{topic}"
2773
+ def get_stored_results(self, force_fresh: bool = False) -> Dict[str, Any]:
2774
+ """Returns e() and r() results using SFI for maximum reliability."""
2775
+ if not force_fresh and self._last_results is not None:
2776
+ return self._last_results
1815
2777
 
1816
- def get_stored_results(self) -> Dict[str, Any]:
1817
- """Returns e() and r() results."""
1818
2778
  if not self._initialized:
1819
2779
  self.init()
1820
2780
 
1821
- results = {"r": {}, "e": {}}
1822
-
1823
- # We parse 'return list' output as there is no direct bulk export of stored results
1824
- raw_r_resp = self.run_command_structured("return list", echo=True)
1825
- raw_e_resp = self.run_command_structured("ereturn list", echo=True)
1826
- raw_r = raw_r_resp.stdout if raw_r_resp.success else (raw_r_resp.error.snippet if raw_r_resp.error else "")
1827
- raw_e = raw_e_resp.stdout if raw_e_resp.success else (raw_e_resp.error.snippet if raw_e_resp.error else "")
1828
-
1829
- # Simple parser
1830
- def parse_list(text):
1831
- data = {}
1832
- # We don't strictly need to track sections if we check patterns
1833
- for line in text.splitlines():
1834
- line = line.strip()
1835
- if not line:
1836
- continue
1837
-
1838
- # scalars: r(name) = value
1839
- if "=" in line and ("r(" in line or "e(" in line):
1840
- try:
1841
- name_part, val_part = line.split("=", 1)
1842
- name_part = name_part.strip() # "r(mean)"
1843
- val_part = val_part.strip() # "6165.2..."
1844
-
1845
- # Extract just the name inside r(...) if desired,
1846
- # or keep full key "r(mean)".
1847
- # User likely wants "mean" inside "r" dict.
1848
-
1849
- if "(" in name_part and name_part.endswith(")"):
1850
- # r(mean) -> mean
1851
- start = name_part.find("(") + 1
1852
- end = name_part.find(")")
1853
- key = name_part[start:end]
1854
- data[key] = val_part
1855
- except Exception:
1856
- pass
1857
-
1858
- # macros: r(name) : "value"
1859
- elif ":" in line and ("r(" in line or "e(" in line):
1860
- try:
1861
- name_part, val_part = line.split(":", 1)
1862
- name_part = name_part.strip()
1863
- val_part = val_part.strip().strip('"')
1864
-
1865
- if "(" in name_part and name_part.endswith(")"):
1866
- start = name_part.find("(") + 1
1867
- end = name_part.find(")")
1868
- key = name_part[start:end]
1869
- data[key] = val_part
1870
- except Exception:
1871
- pass
1872
- return data
1873
-
1874
- results["r"] = parse_list(raw_r)
1875
- results["e"] = parse_list(raw_e)
1876
-
1877
- return results
2781
+ with self._exec_lock:
2782
+ # We must be extremely careful not to clobber r()/e() while fetching their names.
2783
+ # We use a hold to peek at the results.
2784
+ hold_name = f"mcp_peek_{uuid.uuid4().hex[:8]}"
2785
+ self.stata.run(f"capture _return hold {hold_name}", echo=False)
2786
+
2787
+ try:
2788
+ from sfi import Scalar, Macro
2789
+ results = {"r": {}, "e": {}}
2790
+
2791
+ for rclass in ["r", "e"]:
2792
+ # Restore with 'hold' to peek at results without losing them from the hold
2793
+ # Note: Stata 18+ supports 'restore ..., hold' which is ideal.
2794
+ self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
2795
+
2796
+ # Fetch names using backtick expansion (which we verified works better than colon)
2797
+ # and avoid leading underscores which were causing syntax errors with 'global'
2798
+ self.stata.run(f"macro define mcp_scnames `: {rclass}(scalars)'", echo=False)
2799
+ self.stata.run(f"macro define mcp_macnames `: {rclass}(macros)'", echo=False)
2800
+
2801
+ # 1. Capture Scalars
2802
+ names_str = Macro.getGlobal("mcp_scnames")
2803
+ if names_str:
2804
+ for name in names_str.split():
2805
+ try:
2806
+ val = Scalar.getValue(f"{rclass}({name})")
2807
+ results[rclass][name] = val
2808
+ except Exception:
2809
+ pass
2810
+
2811
+ # 2. Capture Macros (strings)
2812
+ macros_str = Macro.getGlobal("mcp_macnames")
2813
+ if macros_str:
2814
+ for name in macros_str.split():
2815
+ try:
2816
+ # Restore/Hold again to be safe before fetching each macro
2817
+ self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
2818
+ # Capture the string value into a macro
2819
+ self.stata.run(f"macro define mcp_mval `{rclass}({name})'", echo=False)
2820
+ val = Macro.getGlobal("mcp_mval")
2821
+ results[rclass][name] = val
2822
+ except Exception:
2823
+ pass
2824
+
2825
+ # Cleanup
2826
+ self.stata.run("macro drop mcp_scnames mcp_macnames mcp_mval", echo=False)
2827
+ self.stata.run(f"capture _return restore {hold_name}", echo=False) # Restore one last time to leave Stata in correct state
2828
+
2829
+ self._last_results = results
2830
+ return results
2831
+ except Exception as e:
2832
+ logger.error(f"SFI-based get_stored_results failed: {e}")
2833
+ # Try to clean up hold if we failed
2834
+ try:
2835
+ self.stata.run(f"capture _return drop {hold_name}", echo=False)
2836
+ except Exception:
2837
+ pass
2838
+ return {"r": {}, "e": {}}
1878
2839
 
1879
2840
  def invalidate_graph_cache(self, graph_name: str = None) -> None:
1880
2841
  """Invalidate cache for specific graph or all graphs.
@@ -2324,117 +3285,79 @@ class StataClient:
2324
3285
  return False
2325
3286
 
2326
3287
  def run_do_file(self, path: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
2327
- if cwd is not None and not os.path.isdir(cwd):
2328
- return CommandResponse(
2329
- command=f'do "{path}"',
2330
- rc=601,
2331
- stdout="",
2332
- stderr=None,
2333
- success=False,
2334
- error=ErrorEnvelope(
2335
- message=f"cwd not found: {cwd}",
2336
- rc=601,
2337
- command=path,
2338
- ),
2339
- )
2340
-
2341
- effective_path = path
2342
- if cwd is not None and not os.path.isabs(path):
2343
- effective_path = os.path.abspath(os.path.join(cwd, path))
2344
-
2345
- if not os.path.exists(effective_path):
2346
- return CommandResponse(
2347
- command=f'do "{effective_path}"',
2348
- rc=601,
2349
- stdout="",
2350
- stderr=None,
2351
- success=False,
2352
- error=ErrorEnvelope(
2353
- message=f"Do-file not found: {effective_path}",
2354
- rc=601,
2355
- command=effective_path,
2356
- ),
2357
- )
3288
+ effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
3289
+ if error_response is not None:
3290
+ return error_response
2358
3291
 
2359
3292
  if not self._initialized:
2360
3293
  self.init()
2361
3294
 
2362
3295
  start_time = time.time()
2363
3296
  exc: Optional[Exception] = None
2364
- path_for_stata = effective_path.replace("\\", "/")
2365
- command = f'do "{path_for_stata}"'
3297
+ smcl_content = ""
3298
+ smcl_path = None
2366
3299
 
2367
- log_file = tempfile.NamedTemporaryFile(
2368
- prefix="mcp_stata_",
2369
- suffix=".log",
2370
- delete=False,
2371
- mode="w",
2372
- encoding="utf-8",
2373
- errors="replace",
2374
- buffering=1,
2375
- )
2376
- log_path = log_file.name
2377
- tail = TailBuffer(max_chars=200000 if trace else 20000)
2378
- tee = FileTeeIO(log_file, tail)
3300
+ _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
3301
+ smcl_path = self._create_smcl_log_path()
3302
+ smcl_log_name = self._make_smcl_log_name()
2379
3303
 
2380
3304
  rc = -1
3305
+ try:
3306
+ rc, exc = self._run_streaming_blocking(
3307
+ command=command,
3308
+ tee=tee,
3309
+ cwd=cwd,
3310
+ trace=trace,
3311
+ echo=echo,
3312
+ smcl_path=smcl_path,
3313
+ smcl_log_name=smcl_log_name,
3314
+ hold_attr="_hold_name_do_sync",
3315
+ require_smcl_log=True,
3316
+ )
3317
+ except Exception as e:
3318
+ exc = e
3319
+ rc = 1
3320
+ finally:
3321
+ tee.close()
2381
3322
 
2382
- with self._exec_lock:
2383
- with self._temp_cwd(cwd):
2384
- with self._redirect_io_streaming(tee, tee):
2385
- try:
2386
- if trace:
2387
- self.stata.run("set trace on")
2388
- ret = self.stata.run(command, echo=echo)
2389
- # Some PyStata builds return output as a string rather than printing.
2390
- if isinstance(ret, str) and ret:
2391
- try:
2392
- tee.write(ret)
2393
- except Exception:
2394
- pass
2395
- except Exception as e:
2396
- exc = e
2397
- finally:
2398
- rc = self._read_return_code()
2399
- if trace:
2400
- try:
2401
- self.stata.run("set trace off")
2402
- except Exception:
2403
- pass
2404
-
2405
- tee.close()
3323
+ # Read SMCL content as the authoritative source
3324
+ smcl_content = self._read_smcl_file(smcl_path)
2406
3325
 
2407
- tail_text = tail.get_value()
2408
- log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
2409
- if log_tail and len(log_tail) > len(tail_text):
2410
- tail_text = log_tail
2411
- combined = (tail_text or "") + (f"\n{exc}" if exc else "")
2412
- rc_hint = self._parse_rc_from_text(combined) if combined else None
2413
- if exc is None and rc_hint is not None and rc_hint != 0:
2414
- rc = rc_hint
2415
- if exc is None and rc_hint is None:
2416
- rc = 0 if rc is None or rc != 0 else rc
2417
- success = rc == 0 and exc is None
3326
+ combined = self._build_combined_log(tail, log_path, rc, trace, exc)
2418
3327
 
3328
+ # Use SMCL content as primary source for RC detection if not already captured
3329
+ if rc == -1 and not exc:
3330
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
3331
+ if parsed_rc is not None:
3332
+ rc = parsed_rc
3333
+ else:
3334
+ # Fallback to text parsing
3335
+ parsed_rc = self._parse_rc_from_text(combined)
3336
+ rc = parsed_rc if parsed_rc is not None else 0
3337
+ elif exc and rc == 1:
3338
+ # Try to parse more specific RC from exception message
3339
+ parsed_rc = self._parse_rc_from_text(str(exc))
3340
+ if parsed_rc is not None:
3341
+ rc = parsed_rc
3342
+
3343
+ success = (rc == 0 and exc is None)
2419
3344
  error = None
3345
+
2420
3346
  if not success:
2421
- snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
2422
- rc_hint = self._parse_rc_from_text(combined) if combined else None
2423
- rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
2424
- line_no = self._parse_line_from_text(combined) if combined else None
2425
- fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
2426
- if fallback == "Stata error" and rc_final is not None:
2427
- fallback = f"Stata error r({rc_final})"
2428
- message = self._select_stata_error_message(combined, fallback)
3347
+ # Use SMCL as authoritative source for error extraction
3348
+ if smcl_content:
3349
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
3350
+ else:
3351
+ # Fallback to combined log
3352
+ msg, context = self._extract_error_and_context(combined, rc)
2429
3353
 
2430
3354
  error = ErrorEnvelope(
2431
- message=message,
2432
- rc=rc_final,
2433
- line=line_no,
3355
+ message=msg,
3356
+ rc=rc,
3357
+ snippet=context,
2434
3358
  command=command,
2435
3359
  log_path=log_path,
2436
- snippet=snippet,
2437
- trace=trace or None,
3360
+ smcl_output=smcl_content,
2438
3361
  )
2439
3362
 
2440
3363
  duration = time.time() - start_time
@@ -2455,6 +3378,7 @@ class StataClient:
2455
3378
  log_path=log_path,
2456
3379
  success=success,
2457
3380
  error=error,
3381
+ smcl_output=smcl_content,
2458
3382
  )
2459
3383
 
2460
3384
  def load_data(self, source: str, clear: bool = True, max_output_lines: Optional[int] = None) -> CommandResponse:
@@ -2473,40 +3397,8 @@ class StataClient:
2473
3397
  cmd = f"sysuse {src}{clear_suffix}"
2474
3398
 
2475
3399
  result = self._exec_with_capture(cmd, echo=True, trace=False)
2476
-
2477
- # Truncate stdout if requested
2478
- if max_output_lines is not None and result.stdout:
2479
- lines = result.stdout.splitlines()
2480
- if len(lines) > max_output_lines:
2481
- truncated_lines = lines[:max_output_lines]
2482
- truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
2483
- result = CommandResponse(
2484
- command=result.command,
2485
- rc=result.rc,
2486
- stdout="\n".join(truncated_lines),
2487
- stderr=result.stderr,
2488
- success=result.success,
2489
- error=result.error,
2490
- )
2491
-
2492
- return result
3400
+ return self._truncate_command_output(result, max_output_lines)
2493
3401
 
2494
3402
  def codebook(self, varname: str, trace: bool = False, max_output_lines: Optional[int] = None) -> CommandResponse:
2495
3403
  result = self._exec_with_capture(f"codebook {varname}", trace=trace)
2496
-
2497
- # Truncate stdout if requested
2498
- if max_output_lines is not None and result.stdout:
2499
- lines = result.stdout.splitlines()
2500
- if len(lines) > max_output_lines:
2501
- truncated_lines = lines[:max_output_lines]
2502
- truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
2503
- result = CommandResponse(
2504
- command=result.command,
2505
- rc=result.rc,
2506
- stdout="\n".join(truncated_lines),
2507
- stderr=result.stderr,
2508
- success=result.success,
2509
- error=result.error,
2510
- )
2511
-
2512
- return result
3404
+ return self._truncate_command_output(result, max_output_lines)