PyPI - mcp-stata - Versions diffs - 1.7.6__py3-none-any.whl → 1.13.0__py3-none-any.whl - Mend

mcp-stata 1.7.6py3-none-any.whl → 1.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-stata might be problematic. Click here for more details.

Files changed (15) hide show

mcp_stata/config.py +20 -0
mcp_stata/discovery.py +134 -59
mcp_stata/graph_detector.py +29 -26
mcp_stata/models.py +2 -0
mcp_stata/server.py +647 -19
mcp_stata/stata_client.py +1642 -684
mcp_stata/streaming_io.py +3 -1
mcp_stata/test_stata.py +54 -0
mcp_stata/ui_http.py +178 -19
{mcp_stata-1.7.6.dist-info → mcp_stata-1.13.0.dist-info}/METADATA +15 -3
mcp_stata-1.13.0.dist-info/RECORD +16 -0
mcp_stata-1.7.6.dist-info/RECORD +0 -14
{mcp_stata-1.7.6.dist-info → mcp_stata-1.13.0.dist-info}/WHEEL +0 -0
{mcp_stata-1.7.6.dist-info → mcp_stata-1.13.0.dist-info}/entry_points.txt +0 -0
{mcp_stata-1.7.6.dist-info → mcp_stata-1.13.0.dist-info}/licenses/LICENSE +0 -0

mcp_stata/stata_client.py CHANGED Viewed

@@ -1,22 +1,26 @@
-import base64
+import asyncio
+import inspect
 import json
 import logging
 import os
+import platform
 import re
 import subprocess
 import sys
-import threading
-from importlib.metadata import PackageNotFoundError, version
 import tempfile
+import threading
 import time
+import uuid
 from contextlib import contextmanager
+from importlib.metadata import PackageNotFoundError, version
 from io import StringIO
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
+from typing import Any, Awaitable, Callable, Dict, Generator, List, Optional, Tuple
 import anyio
 from anyio import get_cancelled_exc_class
-from .discovery import find_stata_path
+from .discovery import find_stata_candidates
+from .config import MAX_LIMIT
 from .models import (
     CommandResponse,
     ErrorEnvelope,
@@ -33,6 +37,29 @@ from .graph_detector import StreamingGraphCache
 logger = logging.getLogger("mcp_stata")
+_POLARS_AVAILABLE: Optional[bool] = None
+def _check_polars_available() -> bool:
+    """
+    Check if Polars can be safely imported.
+    Must detect problematic platforms BEFORE attempting import,
+    since the crash is a fatal signal, not a catchable exception.
+    """
+    if sys.platform == "win32" and platform.machine().lower() in ("arm64", "aarch64"):
+        return False
+    try:
+        import polars  # noqa: F401
+        return True
+    except ImportError:
+        return False
+def _get_polars_available() -> bool:
+    global _POLARS_AVAILABLE
+    if _POLARS_AVAILABLE is None:
+        _POLARS_AVAILABLE = _check_polars_available()
+    return _POLARS_AVAILABLE
 # ============================================================================
 # MODULE-LEVEL DISCOVERY CACHE
@@ -40,26 +67,30 @@ logger = logging.getLogger("mcp_stata")
 # This cache ensures Stata discovery runs exactly once per process lifetime
 _discovery_lock = threading.Lock()
 _discovery_result: Optional[Tuple[str, str]] = None  # (path, edition)
+_discovery_candidates: Optional[List[Tuple[str, str]]] = None
 _discovery_attempted = False
 _discovery_error: Optional[Exception] = None
-def _get_discovered_stata() -> Tuple[str, str]:
+def _get_discovery_candidates() -> List[Tuple[str, str]]:
     """
-    Get the discovered Stata path and edition, running discovery only once.
+    Get ordered discovery candidates, running discovery only once.
     Returns:
-        Tuple of (stata_executable_path, edition)
+        List of (stata_executable_path, edition) ordered by preference.
     Raises:
         RuntimeError: If Stata discovery fails
     """
-    global _discovery_result, _discovery_attempted, _discovery_error
+    global _discovery_result, _discovery_candidates, _discovery_attempted, _discovery_error
     with _discovery_lock:
         # If we've already successfully discovered Stata, return cached result
         if _discovery_result is not None:
-            return _discovery_result
+            return _discovery_candidates or [_discovery_result]
+        if _discovery_candidates is not None:
+            return _discovery_candidates
         # If we've already attempted and failed, re-raise the cached error
         if _discovery_attempted and _discovery_error is not None:
@@ -83,13 +114,17 @@ def _get_discovered_stata() -> Tuple[str, str]:
             logger.info("mcp-stata version: %s", pkg_version)
             # Run discovery
-            stata_exec_path, edition = find_stata_path()
+            candidates = find_stata_candidates()
             # Cache the successful result
-            _discovery_result = (stata_exec_path, edition)
-            logger.info("Discovery found Stata at: %s (%s)", stata_exec_path, edition)
+            _discovery_candidates = candidates
+            if candidates:
+                _discovery_result = candidates[0]
+                logger.info("Discovery found Stata at: %s (%s)", _discovery_result[0], _discovery_result[1])
+            else:
+                raise FileNotFoundError("No Stata candidates discovered")
-            return _discovery_result
+            return candidates
         except FileNotFoundError as e:
             _discovery_error = e
@@ -102,12 +137,22 @@ def _get_discovered_stata() -> Tuple[str, str]:
             ) from e
+def _get_discovered_stata() -> Tuple[str, str]:
+    """
+    Preserve existing API: return the highest-priority discovered Stata candidate.
+    """
+    candidates = _get_discovery_candidates()
+    if not candidates:
+        raise RuntimeError("Stata binary not found: no candidates discovered")
+    return candidates[0]
 class StataClient:
     _initialized = False
     _exec_lock: threading.Lock
     _cache_init_lock = threading.Lock()  # Class-level lock for cache initialization
     _is_executing = False  # Flag to prevent recursive Stata calls
-    MAX_DATA_ROWS = 500
+    MAX_DATA_ROWS = MAX_LIMIT
     MAX_GRAPH_BYTES = 50 * 1024 * 1024  # Maximum graph exports (~50MB)
     MAX_CACHE_SIZE = 100  # Maximum number of graphs to cache
     MAX_CACHE_BYTES = 500 * 1024 * 1024  # Maximum cache size in bytes (~500MB)
@@ -115,7 +160,7 @@ class StataClient:
     def __new__(cls):
         inst = super(StataClient, cls).__new__(cls)
-        inst._exec_lock = threading.Lock()
+        inst._exec_lock = threading.RLock()
         inst._is_executing = False
         return inst
@@ -129,38 +174,6 @@ class StataClient:
         finally:
             sys.stdout, sys.stderr = backup_stdout, backup_stderr
-    def _select_stata_error_message(self, text: str, fallback: str) -> str:
-        """
-        Helper for tests and legacy callers to extract the clean error message.
-        """
-        if not text:
-            return fallback
-        lines = text.splitlines()
-        trace_pattern = re.compile(r'^\s*[-=.]')
-        noise_pattern = re.compile(r'^(?:\}|\{txt\}|\{com\}|end of do-file)')
-        for line in reversed(lines):
-            stripped = line.strip()
-            if not stripped:
-                continue
-            if trace_pattern.match(line):
-                continue
-            if noise_pattern.match(stripped):
-                continue
-            if stripped.startswith("r(") and stripped.endswith(");"):
-                # If we hit r(123); we might want the line ABOVE it if it's not noise
-                continue
-            # Preserve SMCL tags
-            return stripped
-        # If we couldn't find a better message, try to find r(N);
-        match = re.search(r"r\(\d+\);", text)
-        if match:
-            return match.group(0)
-        return fallback
     @staticmethod
     def _stata_quote(value: str) -> str:
@@ -181,6 +194,473 @@ class StataClient:
         finally:
             sys.stdout, sys.stderr = backup_stdout, backup_stderr
+    @staticmethod
+    def _safe_unlink(path: str) -> None:
+        if not path:
+            return
+        try:
+            if os.path.exists(path):
+                os.unlink(path)
+        except Exception:
+            pass
+    def _create_smcl_log_path(self, *, prefix: str = "mcp_smcl_", max_hex: Optional[int] = None) -> str:
+        hex_id = uuid.uuid4().hex if max_hex is None else uuid.uuid4().hex[:max_hex]
+        smcl_path = os.path.join(tempfile.gettempdir(), f"{prefix}{hex_id}.smcl")
+        self._safe_unlink(smcl_path)
+        return smcl_path
+    @staticmethod
+    def _make_smcl_log_name() -> str:
+        return f"_mcp_smcl_{uuid.uuid4().hex[:8]}"
+    def _open_smcl_log(self, smcl_path: str, log_name: str, *, quiet: bool = False) -> bool:
+        cmd = f"{'quietly ' if quiet else ''}log using \"{smcl_path}\", replace smcl name({log_name})"
+        for attempt in range(4):
+            try:
+                self.stata.run(cmd, echo=False)
+                return True
+            except Exception:
+                if attempt < 3:
+                    time.sleep(0.1)
+        return False
+    def _close_smcl_log(self, log_name: str) -> None:
+        try:
+            self.stata.run(f"capture log close {log_name}", echo=False)
+        except Exception:
+            pass
+    def _restore_results_from_hold(self, hold_attr: str) -> None:
+        if not hasattr(self, hold_attr):
+            return
+        hold_name = getattr(self, hold_attr)
+        try:
+            self.stata.run(f"capture _return restore {hold_name}", echo=False)
+            self._last_results = self.get_stored_results(force_fresh=True)
+        except Exception:
+            pass
+        finally:
+            try:
+                delattr(self, hold_attr)
+            except Exception:
+                pass
+    def _create_streaming_log(self, *, trace: bool) -> tuple[tempfile.NamedTemporaryFile, str, TailBuffer, FileTeeIO]:
+        log_file = tempfile.NamedTemporaryFile(
+            prefix="mcp_stata_",
+            suffix=".log",
+            delete=False,
+            mode="w",
+            encoding="utf-8",
+            errors="replace",
+            buffering=1,
+        )
+        log_path = log_file.name
+        tail = TailBuffer(max_chars=200000 if trace else 20000)
+        tee = FileTeeIO(log_file, tail)
+        return log_file, log_path, tail, tee
+    def _init_streaming_graph_cache(
+        self,
+        auto_cache_graphs: bool,
+        on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]],
+        notify_log: Callable[[str], Awaitable[None]],
+    ) -> Optional[StreamingGraphCache]:
+        if not auto_cache_graphs:
+            return None
+        graph_cache = StreamingGraphCache(self, auto_cache=True)
+        graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
+        graph_cache.add_cache_callback(graph_cache_callback)
+        return graph_cache
+    def _capture_graph_state(
+        self,
+        graph_cache: Optional[StreamingGraphCache],
+        emit_graph_ready: bool,
+    ) -> Optional[dict[str, str]]:
+        # Capture initial graph state BEFORE execution starts
+        if graph_cache:
+            try:
+                graph_cache._initial_graphs = set(self.list_graphs(force_refresh=True))
+                logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
+            except Exception as e:
+                logger.debug(f"Failed to capture initial graph state: {e}")
+                graph_cache._initial_graphs = set()
+        graph_ready_initial = None
+        if emit_graph_ready:
+            try:
+                graph_ready_initial = {}
+                for graph_name in self.list_graphs(force_refresh=True):
+                    graph_ready_initial[graph_name] = self._get_graph_signature(graph_name)
+                logger.debug("Graph-ready initial state captured: %s", set(graph_ready_initial))
+            except Exception as e:
+                logger.debug("Failed to capture graph-ready state: %s", e)
+                graph_ready_initial = {}
+        return graph_ready_initial
+    async def _cache_new_graphs(
+        self,
+        graph_cache: Optional[StreamingGraphCache],
+        *,
+        notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]],
+        total_lines: int,
+        completed_label: str,
+    ) -> None:
+        if not graph_cache or not graph_cache.auto_cache:
+            return
+        try:
+            cached_graphs = []
+            initial_graphs = getattr(graph_cache, "_initial_graphs", set())
+            current_graphs = set(self.list_graphs(force_refresh=True))
+            new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
+            if new_graphs:
+                logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
+            for graph_name in new_graphs:
+                try:
+                    cache_result = await anyio.to_thread.run_sync(
+                        self.cache_graph_on_creation,
+                        graph_name,
+                    )
+                    if cache_result:
+                        cached_graphs.append(graph_name)
+                        graph_cache._cached_graphs.add(graph_name)
+                    for callback in graph_cache._cache_callbacks:
+                        try:
+                            result = callback(graph_name, cache_result)
+                            if inspect.isawaitable(result):
+                                await result
+                        except Exception:
+                            pass
+                except Exception as e:
+                    logger.error(f"Error caching graph {graph_name}: {e}")
+            if cached_graphs and notify_progress:
+                await notify_progress(
+                    float(total_lines) if total_lines > 0 else 1,
+                    float(total_lines) if total_lines > 0 else 1,
+                    f"{completed_label} completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}",
+                )
+        except Exception as e:
+            logger.error(f"Post-execution graph detection failed: {e}")
+    def _emit_graph_ready_task(
+        self,
+        *,
+        emit_graph_ready: bool,
+        graph_ready_initial: Optional[dict[str, str]],
+        notify_log: Callable[[str], Awaitable[None]],
+        graph_ready_task_id: Optional[str],
+        graph_ready_format: str,
+    ) -> None:
+        if emit_graph_ready and graph_ready_initial is not None:
+            try:
+                asyncio.create_task(
+                    self._emit_graph_ready_events(
+                        graph_ready_initial,
+                        notify_log,
+                        graph_ready_task_id,
+                        graph_ready_format,
+                    )
+                )
+            except Exception as e:
+                logger.warning("graph_ready emission failed to start: %s", e)
+    async def _stream_smcl_log(
+        self,
+        *,
+        smcl_path: str,
+        notify_log: Callable[[str], Awaitable[None]],
+        done: anyio.Event,
+        on_chunk: Optional[Callable[[str], Awaitable[None]]] = None,
+    ) -> None:
+        last_pos = 0
+        # Wait for Stata to create the SMCL file (placeholder removed to avoid locks)
+        while not done.is_set() and not os.path.exists(smcl_path):
+            await anyio.sleep(0.05)
+        try:
+            def _read_content() -> str:
+                try:
+                    with open(smcl_path, "r", encoding="utf-8", errors="replace") as f:
+                        f.seek(last_pos)
+                        return f.read()
+                except PermissionError:
+                    if os.name == "nt":
+                        try:
+                            res = subprocess.run(f'type "{smcl_path}"', shell=True, capture_output=True)
+                            full_content = res.stdout.decode("utf-8", errors="replace")
+                            if len(full_content) > last_pos:
+                                return full_content[last_pos:]
+                            return ""
+                        except Exception:
+                            return ""
+                    raise
+                except FileNotFoundError:
+                    return ""
+            while not done.is_set():
+                chunk = await anyio.to_thread.run_sync(_read_content)
+                if chunk:
+                    last_pos += len(chunk)
+                    await notify_log(chunk)
+                    if on_chunk is not None:
+                        await on_chunk(chunk)
+                await anyio.sleep(0.05)
+            chunk = await anyio.to_thread.run_sync(_read_content)
+            if chunk:
+                last_pos += len(chunk)
+                await notify_log(chunk)
+                if on_chunk is not None:
+                    await on_chunk(chunk)
+        except Exception as e:
+            logger.warning(f"Log streaming failed: {e}")
+    def _run_streaming_blocking(
+        self,
+        *,
+        command: str,
+        tee: FileTeeIO,
+        cwd: Optional[str],
+        trace: bool,
+        echo: bool,
+        smcl_path: str,
+        smcl_log_name: str,
+        hold_attr: str,
+        require_smcl_log: bool = False,
+    ) -> tuple[int, Optional[Exception]]:
+        rc = -1
+        exc: Optional[Exception] = None
+        with self._exec_lock:
+            self._is_executing = True
+            try:
+                from sfi import Scalar, SFIToolkit  # Import SFI tools
+                with self._temp_cwd(cwd):
+                    log_opened = self._open_smcl_log(smcl_path, smcl_log_name)
+                    if require_smcl_log and not log_opened:
+                        exc = RuntimeError("Failed to open SMCL log")
+                        rc = 1
+                    if exc is None:
+                        try:
+                            with self._redirect_io_streaming(tee, tee):
+                                try:
+                                    if trace:
+                                        self.stata.run("set trace on")
+                                    ret = self.stata.run(command, echo=echo)
+                                    setattr(self, hold_attr, f"mcp_hold_{uuid.uuid4().hex[:8]}")
+                                    self.stata.run(
+                                        f"capture _return hold {getattr(self, hold_attr)}",
+                                        echo=False,
+                                    )
+                                    if isinstance(ret, str) and ret:
+                                        try:
+                                            tee.write(ret)
+                                        except Exception:
+                                            pass
+                                    try:
+                                        rc = self._get_rc_from_scalar(Scalar)
+                                    except Exception:
+                                        pass
+                                except Exception as e:
+                                    exc = e
+                                    if rc in (-1, 0):
+                                        rc = 1
+                                finally:
+                                    if trace:
+                                        try:
+                                            self.stata.run("set trace off")
+                                        except Exception:
+                                            pass
+                        finally:
+                            self._close_smcl_log(smcl_log_name)
+                            self._restore_results_from_hold(hold_attr)
+                        return rc, exc
+                    # If we get here, SMCL log failed and we're required to stop.
+                    return rc, exc
+            finally:
+                self._is_executing = False
+        return rc, exc
+    def _resolve_do_file_path(
+        self,
+        path: str,
+        cwd: Optional[str],
+    ) -> tuple[Optional[str], Optional[str], Optional[CommandResponse]]:
+        if cwd is not None and not os.path.isdir(cwd):
+            return None, None, CommandResponse(
+                command=f'do "{path}"',
+                rc=601,
+                stdout="",
+                stderr=None,
+                success=False,
+                error=ErrorEnvelope(
+                    message=f"cwd not found: {cwd}",
+                    rc=601,
+                    command=path,
+                ),
+            )
+        effective_path = path
+        if cwd is not None and not os.path.isabs(path):
+            effective_path = os.path.abspath(os.path.join(cwd, path))
+        if not os.path.exists(effective_path):
+            return None, None, CommandResponse(
+                command=f'do "{effective_path}"',
+                rc=601,
+                stdout="",
+                stderr=None,
+                success=False,
+                error=ErrorEnvelope(
+                    message=f"Do-file not found: {effective_path}",
+                    rc=601,
+                    command=effective_path,
+                ),
+            )
+        path_for_stata = effective_path.replace("\\", "/")
+        command = f'do "{path_for_stata}"'
+        return effective_path, command, None
+    @contextmanager
+    def _smcl_log_capture(self) -> "Generator[Tuple[str, str], None, None]":
+        """
+        Context manager that wraps command execution in a named SMCL log.
+        This runs alongside any user logs (named logs can coexist).
+        Yields (log_name, log_path) tuple for use within the context.
+        The SMCL file is NOT deleted automatically - caller should clean up.
+        Usage:
+            with self._smcl_log_capture() as (log_name, smcl_path):
+                self.stata.run(cmd)
+            # After context, read smcl_path for raw SMCL output
+        """
+        # Use a unique name but DO NOT join start with mkstemp to avoid existing file locks.
+        # Stata will create the file.
+        smcl_path = self._create_smcl_log_path()
+        # Unique log name to avoid collisions with user logs
+        log_name = self._make_smcl_log_name()
+        try:
+            # Open named SMCL log (quietly to avoid polluting output)
+            log_opened = self._open_smcl_log(smcl_path, log_name, quiet=True)
+            if not log_opened:
+                # Still yield, consumer might see empty file or handle error,
+                # but we can't do much if Stata refuses to log.
+                pass
+            yield log_name, smcl_path
+        finally:
+            # Always close our named log
+            self._close_smcl_log(log_name)
+    def _read_smcl_file(self, path: str) -> str:
+        """Read SMCL file contents, handling encoding issues and Windows file locks."""
+        try:
+            with open(path, 'r', encoding='utf-8', errors='replace') as f:
+                return f.read()
+        except PermissionError:
+            if os.name == "nt":
+                # Windows Fallback: Try to use 'type' command to bypass exclusive lock
+                try:
+                    res = subprocess.run(f'type "{path}"', shell=True, capture_output=True)
+                    if res.returncode == 0:
+                        return res.stdout.decode('utf-8', errors='replace')
+                except Exception as e:
+                    logger.debug(f"Combined fallback read failed: {e}")
+            logger.warning(f"Failed to read SMCL file {path} due to lock")
+            return ""
+        except Exception as e:
+            logger.warning(f"Failed to read SMCL file {path}: {e}")
+            return ""
+    def _extract_error_from_smcl(self, smcl_content: str, rc: int) -> Tuple[str, str]:
+        """
+        Extract error message and context from raw SMCL output.
+        Uses {err} tags as the authoritative source for error detection.
+        Returns:
+            Tuple of (error_message, context_string)
+        """
+        if not smcl_content:
+            return f"Stata error r({rc})", ""
+        lines = smcl_content.splitlines()
+        # Search backwards for {err} tags - they indicate error lines
+        error_lines = []
+        error_start_idx = -1
+        for i in range(len(lines) - 1, -1, -1):
+            line = lines[i]
+            if '{err}' in line:
+                if error_start_idx == -1:
+                    error_start_idx = i
+                # Walk backwards to find consecutive {err} lines
+                j = i
+                while j >= 0 and '{err}' in lines[j]:
+                    error_lines.insert(0, lines[j])
+                    j -= 1
+                break
+        if error_lines:
+            # Clean SMCL tags from error message
+            clean_lines = []
+            for line in error_lines:
+                # Remove SMCL tags but keep the text content
+                cleaned = re.sub(r'\{[^}]*\}', '', line).strip()
+                if cleaned:
+                    clean_lines.append(cleaned)
+            error_msg = " ".join(clean_lines) or f"Stata error r({rc})"
+            # Context is everything from error start to end
+            context_start = max(0, error_start_idx - 5)  # Include 5 lines before error
+            context = "\n".join(lines[context_start:])
+            return error_msg, context
+        # Fallback: no {err} found, return last 30 lines as context
+        context_start = max(0, len(lines) - 30)
+        context = "\n".join(lines[context_start:])
+        return f"Stata error r({rc})", context
+    def _parse_rc_from_smcl(self, smcl_content: str) -> Optional[int]:
+        """Parse return code from SMCL content using specific structural patterns."""
+        if not smcl_content:
+            return None
+        # 1. Primary check: SMCL search tag {search r(N), ...}
+        # This is the most authoritative interactive indicator
+        matches = list(re.finditer(r'\{search r\((\d+)\)', smcl_content))
+        if matches:
+            try:
+                return int(matches[-1].group(1))
+            except Exception:
+                pass
+        # 2. Secondary check: Standalone r(N); pattern
+        # This appears at the end of command blocks
+        matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', smcl_content))
+        if matches:
+            try:
+                return int(matches[-1].group(1))
+            except Exception:
+                pass
+        return None
     @staticmethod
     def _create_graph_cache_callback(on_graph_cached, notify_log):
         """Create a standardized graph cache callback with proper error handling."""
@@ -203,6 +683,142 @@ class StataClient:
         return graph_cache_callback
+    def _get_cached_graph_path(self, graph_name: str) -> Optional[str]:
+        if not hasattr(self, "_cache_lock") or not hasattr(self, "_preemptive_cache"):
+            return None
+        try:
+            with self._cache_lock:
+                return self._preemptive_cache.get(graph_name)
+        except Exception:
+            return None
+    async def _emit_graph_ready_for_graphs(
+        self,
+        graph_names: List[str],
+        *,
+        notify_log: Callable[[str], Awaitable[None]],
+        task_id: Optional[str],
+        export_format: str,
+        graph_ready_initial: Optional[dict[str, str]],
+    ) -> None:
+        if not graph_names:
+            return
+        fmt = (export_format or "svg").strip().lower()
+        for graph_name in graph_names:
+            signature = self._get_graph_signature(graph_name)
+            if graph_ready_initial is not None:
+                previous = graph_ready_initial.get(graph_name)
+                if previous is not None and previous == signature:
+                    continue
+            try:
+                export_path = None
+                if fmt == "svg":
+                    export_path = self._get_cached_graph_path(graph_name)
+                if not export_path:
+                    export_path = await anyio.to_thread.run_sync(
+                        lambda: self.export_graph(graph_name, format=fmt)
+                    )
+                payload = {
+                    "event": "graph_ready",
+                    "task_id": task_id,
+                    "graph": {
+                        "name": graph_name,
+                        "path": export_path,
+                        "label": graph_name,
+                    },
+                }
+                await notify_log(json.dumps(payload))
+                if graph_ready_initial is not None:
+                    graph_ready_initial[graph_name] = signature
+            except Exception as e:
+                logger.warning("graph_ready export failed for %s: %s", graph_name, e)
+    async def _maybe_cache_graphs_on_chunk(
+        self,
+        *,
+        graph_cache: Optional[StreamingGraphCache],
+        emit_graph_ready: bool,
+        notify_log: Callable[[str], Awaitable[None]],
+        graph_ready_task_id: Optional[str],
+        graph_ready_format: str,
+        graph_ready_initial: Optional[dict[str, str]],
+        last_check: List[float],
+    ) -> None:
+        if not graph_cache or not graph_cache.auto_cache:
+            return
+        if self._is_executing:
+            return
+        now = time.monotonic()
+        if last_check and now - last_check[0] < 0.25:
+            return
+        if last_check:
+            last_check[0] = now
+        try:
+            cached_names = await graph_cache.cache_detected_graphs_with_pystata()
+        except Exception as e:
+            logger.debug("graph_ready polling failed: %s", e)
+            return
+        if emit_graph_ready and cached_names:
+            await self._emit_graph_ready_for_graphs(
+                cached_names,
+                notify_log=notify_log,
+                task_id=graph_ready_task_id,
+                export_format=graph_ready_format,
+                graph_ready_initial=graph_ready_initial,
+            )
+    async def _emit_graph_ready_events(
+        self,
+        initial_graphs: dict[str, str],
+        notify_log: Callable[[str], Awaitable[None]],
+        task_id: Optional[str],
+        export_format: str,
+    ) -> None:
+        try:
+            current_graphs = list(self.list_graphs(force_refresh=True))
+        except Exception as e:
+            logger.warning("graph_ready: list_graphs failed: %s", e)
+            return
+        if not current_graphs:
+            return
+        for graph_name in current_graphs:
+            signature = self._get_graph_signature(graph_name)
+            previous = initial_graphs.get(graph_name)
+            if previous is not None and previous == signature:
+                continue
+            try:
+                export_path = await anyio.to_thread.run_sync(
+                    lambda: self.export_graph(graph_name, format=export_format)
+                )
+                payload = {
+                    "event": "graph_ready",
+                    "task_id": task_id,
+                    "graph": {
+                        "name": graph_name,
+                        "path": export_path,
+                        "label": graph_name,
+                    },
+                }
+                await notify_log(json.dumps(payload))
+                initial_graphs[graph_name] = signature
+            except Exception as e:
+                logger.warning("graph_ready export failed for %s: %s", graph_name, e)
+    def _get_graph_signature(self, graph_name: str) -> str:
+        if not graph_name:
+            return ""
+        try:
+            response = self.exec_lightweight(f"graph describe {graph_name}")
+            if response.success and response.stdout:
+                return response.stdout
+            if response.stderr:
+                return response.stderr
+        except Exception:
+            return ""
+        return ""
     def _request_break_in(self) -> None:
         """
         Attempt to interrupt a running Stata command when cancellation is requested.
@@ -272,72 +888,189 @@ class StataClient:
         finally:
             os.chdir(prev)
+    @contextmanager
+    def _safe_redirect_fds(self):
+        """Redirects fd 1 (stdout) to fd 2 (stderr) at the OS level."""
+        # Save original stdout fd
+        try:
+            stdout_fd = os.dup(1)
+        except Exception:
+            # Fallback if we can't dup (e.g. strange environment)
+            yield
+            return
+        try:
+            # Redirect OS-level stdout to stderr
+            os.dup2(2, 1)
+            yield
+        finally:
+            # Restore stdout
+            try:
+                os.dup2(stdout_fd, 1)
+                os.close(stdout_fd)
+            except Exception:
+                pass
     def init(self):
         """Initializes usage of pystata using cached discovery results."""
         if self._initialized:
             return
+        # Suppress any non-UTF8 banner output from PyStata on stdout, which breaks MCP stdio transport
+        from contextlib import redirect_stdout, redirect_stderr
         try:
             import stata_setup
-            # Get discovered Stata path (cached from first call)
-            stata_exec_path, edition = _get_discovered_stata()
-            candidates = []
-            # Prefer the binary directory first (documented input for stata_setup)
-            bin_dir = os.path.dirname(stata_exec_path)
-            if bin_dir:
-                candidates.append(bin_dir)
-            # 2. App Bundle: .../StataMP.app (macOS only)
-            curr = bin_dir
-            app_bundle = None
-            while len(curr) > 1:
-                if curr.endswith(".app"):
-                    app_bundle = curr
-                    break
-                parent = os.path.dirname(curr)
-                if parent == curr:  # Reached root directory, prevent infinite loop on Windows
-                    break
-                curr = parent
-            if app_bundle:
-                candidates.insert(0, os.path.dirname(app_bundle))
-                candidates.insert(1, app_bundle)
+            # Get discovered Stata paths (cached from first call)
+            discovery_candidates = _get_discovery_candidates()
-            # Deduplicate preserving order
-            seen = set()
-            deduped = []
-            for c in candidates:
-                if c in seen:
-                    continue
-                seen.add(c)
-                deduped.append(c)
-            candidates = deduped
+            # Diagnostic: force faulthandler to output to stderr for C crashes
+            import faulthandler
+            faulthandler.enable(file=sys.stderr)
+            import subprocess
             success = False
-            for path in candidates:
-                try:
-                    stata_setup.config(path, edition)
-                    success = True
-                    logger.debug("stata_setup.config succeeded with path: %s", path)
+            last_error = None
+            chosen_exec: Optional[Tuple[str, str]] = None
+            for stata_exec_path, edition in discovery_candidates:
+                candidates = []
+                # Prefer the binary directory first (documented input for stata_setup)
+                bin_dir = os.path.dirname(stata_exec_path)
+                # 2. App Bundle: .../StataMP.app (macOS only)
+                curr = bin_dir
+                app_bundle = None
+                while len(curr) > 1:
+                    if curr.endswith(".app"):
+                        app_bundle = curr
+                        break
+                    parent = os.path.dirname(curr)
+                    if parent == curr:
+                        break
+                    curr = parent
+                ordered_candidates = []
+                if bin_dir:
+                    ordered_candidates.append(bin_dir)
+                if app_bundle:
+                    ordered_candidates.append(app_bundle)
+                    parent_dir = os.path.dirname(app_bundle)
+                    if parent_dir not in ordered_candidates:
+                        ordered_candidates.append(parent_dir)
+                # Deduplicate preserving order
+                seen = set()
+                candidates = []
+                for c in ordered_candidates:
+                    if c not in seen:
+                        seen.add(c)
+                        candidates.append(c)
+                for path in candidates:
+                    try:
+                        # 1. Pre-flight check in a subprocess to capture hard exits/crashes
+                        sys.stderr.write(f"[mcp_stata] DEBUG: Pre-flight check for path '{path}'\n")
+                        sys.stderr.flush()
+                        preflight_code = f"""
+import sys
+import stata_setup
+from contextlib import redirect_stdout, redirect_stderr
+with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr):
+    try:
+        stata_setup.config({repr(path)}, {repr(edition)})
+        from pystata import stata
+        stata.run('about', echo=True)
+        print('PREFLIGHT_OK')
+    except Exception as e:
+        print(f'PREFLIGHT_FAIL: {{e}}', file=sys.stderr)
+        sys.exit(1)
+"""
+                        try:
+                            res = subprocess.run(
+                                [sys.executable, "-c", preflight_code],
+                                capture_output=True, text=True, timeout=30
+                            )
+                            if res.returncode != 0:
+                                sys.stderr.write(f"[mcp_stata] Pre-flight failed (rc={res.returncode}) for '{path}'\n")
+                                if res.stdout.strip():
+                                    sys.stderr.write(f"--- Pre-flight stdout ---\n{res.stdout.strip()}\n")
+                                if res.stderr.strip():
+                                    sys.stderr.write(f"--- Pre-flight stderr ---\n{res.stderr.strip()}\n")
+                                sys.stderr.flush()
+                                last_error = f"Pre-flight failed: {res.stdout.strip()} {res.stderr.strip()}"
+                                continue
+                            else:
+                                sys.stderr.write(f"[mcp_stata] Pre-flight succeeded for '{path}'. Proceeding to in-process init.\n")
+                                sys.stderr.flush()
+                        except Exception as pre_e:
+                            sys.stderr.write(f"[mcp_stata] Pre-flight execution error for '{path}': {repr(pre_e)}\n")
+                            sys.stderr.flush()
+                            last_error = pre_e
+                            continue
+                        msg = f"[mcp_stata] DEBUG: In-process stata_setup.config('{path}', '{edition}')\n"
+                        sys.stderr.write(msg)
+                        sys.stderr.flush()
+                        # Redirect both sys.stdout/err AND the raw fds to our stderr pipe.
+                        with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
+                            stata_setup.config(path, edition)
+                        sys.stderr.write(f"[mcp_stata] DEBUG: stata_setup.config succeeded for path: {path}\n")
+                        sys.stderr.flush()
+                        success = True
+                        chosen_exec = (stata_exec_path, edition)
+                        logger.info("stata_setup.config succeeded with path: %s", path)
+                        break
+                    except BaseException as e:
+                        last_error = e
+                        sys.stderr.write(f"[mcp_stata] WARNING: In-process stata_setup.config caught: {repr(e)}\n")
+                        sys.stderr.flush()
+                        logger.warning("stata_setup.config failed for path '%s': %s", path, e)
+                        if isinstance(e, SystemExit):
+                            break
+                        continue
+                if success:
+                    # Cache winning candidate for subsequent lookups
+                    global _discovery_result
+                    if chosen_exec:
+                        _discovery_result = chosen_exec
                     break
-                except Exception:
-                    continue
             if not success:
-                raise RuntimeError(
-                    f"stata_setup.config failed. Tried: {candidates}. "
-                    f"Derived from binary: {stata_exec_path}"
+                error_msg = (
+                    f"stata_setup.config failed to initialize Stata. "
+                    f"Tried candidates: {discovery_candidates}. "
+                    f"Last error: {repr(last_error)}"
                 )
+                sys.stderr.write(f"[mcp_stata] ERROR: {error_msg}\n")
+                sys.stderr.flush()
+                logger.error(error_msg)
+                raise RuntimeError(error_msg)
             # Cache the binary path for later use (e.g., PNG export on Windows)
             self._stata_exec_path = os.path.abspath(stata_exec_path)
-            from pystata import stata  # type: ignore[import-not-found]
-            self.stata = stata
-            self._initialized = True
+            try:
+                sys.stderr.write("[mcp_stata] DEBUG: Importing pystata and warming up...\n")
+                sys.stderr.flush()
+                with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
+                    from pystata import stata  # type: ignore[import-not-found]
+                    # Warm up the engine and swallow any late splash screen output
+                    stata.run("display 1", echo=False)
+                self.stata = stata
+                self._initialized = True
+                sys.stderr.write("[mcp_stata] DEBUG: pystata warmed up successfully\n")
+                sys.stderr.flush()
+            except BaseException as e:
+                sys.stderr.write(f"[mcp_stata] ERROR: Failed to load pystata or run initial command: {repr(e)}\n")
+                sys.stderr.flush()
+                logger.error("Failed to load pystata or run initial command: %s", e)
+                raise
             # Initialize list_graphs TTL cache
             self._list_graphs_cache = None
@@ -408,61 +1141,148 @@ class StataClient:
         return pat.sub(repl, code)
-    def _read_return_code(self) -> int:
-        """Read the last Stata return code without mutating rc."""
-        try:
-            from sfi import Macro  # type: ignore[import-not-found]
-            rc_val = Macro.getCValue("rc")  # type: ignore[attr-defined]
-            if rc_val is not None:
-                return int(float(rc_val))
-            # If getCValue returns None, fall through to the alternative approach
-        except Exception:
-            pass
-        # Alternative approach: use a global macro
-        # CRITICAL: This must be done carefully to avoid mutating c(rc)
-        try:
-            self.stata.run("global MCP_RC = c(rc)")
-            from sfi import Macro as Macro2  # type: ignore[import-not-found]
-            rc_val = Macro2.getGlobal("MCP_RC")
-            return int(float(rc_val))
-        except Exception:
-            return -1
     def _get_rc_from_scalar(self, Scalar) -> int:
         """Safely get return code, handling None values."""
         try:
             from sfi import Macro
-            rc_val = Macro.getCValue("rc")
+            rc_val = Macro.getGlobal("_rc")
             if rc_val is None:
                 return -1
             return int(float(rc_val))
         except Exception:
             return -1
-    def _parse_rc_from_text(self, text: str) -> Optional[int]:
-        match = re.search(r"r\((\d+)\)", text)
-        if match:
-            try:
-                return int(match.group(1))
-            except Exception:
-                return None
-        return None
+    def _parse_rc_from_text(self, text: str) -> Optional[int]:
+        """Parse return code from plain text using structural patterns."""
+        if not text:
+            return None
+        # 1. Primary check: 'search r(N)' pattern (SMCL tag potentially stripped)
+        matches = list(re.finditer(r'search r\((\d+)\)', text))
+        if matches:
+            try:
+                return int(matches[-1].group(1))
+            except Exception:
+                pass
+        # 2. Secondary check: Standalone r(N); pattern
+        # This appears at the end of command blocks
+        matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', text))
+        if matches:
+            try:
+                return int(matches[-1].group(1))
+            except Exception:
+                pass
+        return None
+    def _parse_line_from_text(self, text: str) -> Optional[int]:
+        match = re.search(r"line\s+(\d+)", text, re.IGNORECASE)
+        if match:
+            try:
+                return int(match.group(1))
+            except Exception:
+                return None
+        return None
+    def _read_log_backwards_until_error(self, path: str, max_bytes: int = 5_000_000) -> str:
+        """
+        Read log file backwards in chunks, stopping when we find {err} tags or reach the start.
+        This is more efficient and robust than reading huge fixed tails, as we only read
+        what we need to find the error.
+        Args:
+            path: Path to the log file
+            max_bytes: Maximum total bytes to read (safety limit, default 5MB)
+        Returns:
+            The relevant portion of the log containing the error and context
+        """
+        try:
+            chunk_size = 50_000  # Read 50KB chunks at a time
+            total_read = 0
+            chunks = []
+            with open(path, 'rb') as f:
+                # Get file size
+                f.seek(0, os.SEEK_END)
+                file_size = f.tell()
+                if file_size == 0:
+                    return ""
+                # Start from the end
+                position = file_size
+                while position > 0 and total_read < max_bytes:
+                    # Calculate how much to read in this chunk
+                    read_size = min(chunk_size, position, max_bytes - total_read)
+                    position -= read_size
+                    # Seek and read
+                    f.seek(position)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total_read += read_size
+                    # Decode and check for error tags
+                    try:
+                        accumulated = b''.join(chunks).decode('utf-8', errors='replace')
+                        # Check if we've found an error tag
+                        if '{err}' in accumulated:
+                            # Found it! Read one more chunk for context before the error
+                            if position > 0 and total_read < max_bytes:
+                                extra_read = min(chunk_size, position, max_bytes - total_read)
+                                position -= extra_read
+                                f.seek(position)
+                                extra_chunk = f.read(extra_read)
+                                chunks.insert(0, extra_chunk)
+                            return b''.join(chunks).decode('utf-8', errors='replace')
+                    except UnicodeDecodeError:
+                        # Continue reading if we hit a decode error (might be mid-character)
+                        continue
+                # Read everything we've accumulated
+                return b''.join(chunks).decode('utf-8', errors='replace')
-    def _parse_line_from_text(self, text: str) -> Optional[int]:
-        match = re.search(r"line\s+(\d+)", text, re.IGNORECASE)
-        if match:
-            try:
-                return int(match.group(1))
-            except Exception:
-                return None
-        return None
+        except Exception as e:
+            logger.warning(f"Error reading log backwards: {e}")
+            # Fallback to regular tail read
+            return self._read_log_tail(path, 200_000)
+    def _read_log_tail_smart(self, path: str, rc: int, trace: bool = False) -> str:
+        """
+        Smart log tail reader that adapts based on whether an error occurred.
+        - If rc == 0: Read normal tail (20KB without trace, 200KB with trace)
+        - If rc != 0: Search backwards dynamically to find the error
+        Args:
+            path: Path to the log file
+            rc: Return code from Stata
+            trace: Whether trace mode was enabled
+        Returns:
+            Relevant log content
+        """
+        if rc != 0:
+            # Error occurred - search backwards for {err} tags
+            return self._read_log_backwards_until_error(path)
+        else:
+            # Success - just read normal tail
+            tail_size = 200_000 if trace else 20_000
+            return self._read_log_tail(path, tail_size)
     def _read_log_tail(self, path: str, max_chars: int) -> str:
         try:
             with open(path, "rb") as f:
                 f.seek(0, os.SEEK_END)
                 size = f.tell()
                 if size <= 0:
                     return ""
                 read_size = min(size, max_chars)
@@ -472,6 +1292,98 @@ class StataClient:
         except Exception:
             return ""
+    def _build_combined_log(
+        self,
+        tail: TailBuffer,
+        path: str,
+        rc: int,
+        trace: bool,
+        exc: Optional[Exception],
+    ) -> str:
+        tail_text = tail.get_value()
+        log_tail = self._read_log_tail_smart(path, rc, trace)
+        if log_tail and len(log_tail) > len(tail_text):
+            tail_text = log_tail
+        return (tail_text or "") + (f"\n{exc}" if exc else "")
+    def _truncate_command_output(
+        self,
+        result: CommandResponse,
+        max_output_lines: Optional[int],
+    ) -> CommandResponse:
+        if max_output_lines is None or not result.stdout:
+            return result
+        lines = result.stdout.splitlines()
+        if len(lines) <= max_output_lines:
+            return result
+        truncated_lines = lines[:max_output_lines]
+        truncated_lines.append(
+            f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)"
+        )
+        truncated_stdout = "\n".join(truncated_lines)
+        if hasattr(result, "model_copy"):
+            return result.model_copy(update={"stdout": truncated_stdout})
+        return result.copy(update={"stdout": truncated_stdout})
+    def _run_plain_capture(self, code: str) -> str:
+        """
+        Run a Stata command while capturing output using a named SMCL log.
+        This is the most reliable way to capture output (like return list)
+        without interfering with user logs or being affected by stdout redirection issues.
+        """
+        if not self._initialized:
+            self.init()
+        with self._exec_lock:
+            hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
+            # Hold results BEFORE opening the capture log
+            self.stata.run(f"capture _return hold {hold_name}", echo=False)
+            try:
+                with self._smcl_log_capture() as (log_name, smcl_path):
+                    # Restore results INSIDE the capture log so return list can see them
+                    self.stata.run(f"capture _return restore {hold_name}", echo=False)
+                    try:
+                        self.stata.run(code, echo=True)
+                    except Exception:
+                        pass
+            except Exception:
+                # Cleanup hold if log capture failed to open
+                self.stata.run(f"capture _return drop {hold_name}", echo=False)
+                content = ""
+                smcl_path = None
+            else:
+                # Read SMCL content and convert to text
+                content = self._read_smcl_file(smcl_path)
+            # Remove the temp file
+            self._safe_unlink(smcl_path)
+            return self._smcl_to_text(content)
+    def _count_do_file_lines(self, path: str) -> int:
+        """
+        Count the number of executable lines in a .do file for progress inference.
+        Blank lines and comment-only lines (starting with * or //) are ignored.
+        """
+        try:
+            with open(path, "r", encoding="utf-8", errors="replace") as f:
+                lines = f.read().splitlines()
+        except Exception:
+            return 0
+        total = 0
+        for line in lines:
+            s = line.strip()
+            if not s:
+                continue
+            if s.startswith("*"):
+                continue
+            if s.startswith("//"):
+                continue
+            total += 1
+        return total
     def _smcl_to_text(self, smcl: str) -> str:
         """Convert simple SMCL markup into plain text for LLM-friendly help."""
         # First, keep inline directive content if present (e.g., {bf:word} -> word)
@@ -530,17 +1442,44 @@ class StataClient:
         error_buffer = StringIO()
         rc = 0
         sys_error = None
+        error_envelope = None
+        smcl_content = ""
+        smcl_path = None
         with self._exec_lock:
             try:
-                from sfi import Scalar, SFIToolkit  # Import SFI tools inside execution block
+                from sfi import Scalar, SFIToolkit
                 with self._temp_cwd(cwd):
-                    with self._redirect_io(output_buffer, error_buffer):
-                        if trace:
-                            self.stata.run("set trace on")
-                        # 1. Run the user code
-                        self.stata.run(code, echo=echo)
+                    # Create SMCL log for authoritative output capture
+                    # Use shorter unique path to avoid Windows path issues
+                    smcl_path = self._create_smcl_log_path(prefix="mcp_", max_hex=16)
+                    log_name = self._make_smcl_log_name()
+                    self._open_smcl_log(smcl_path, log_name)
+                    try:
+                        with self._redirect_io(output_buffer, error_buffer):
+                            try:
+                                if trace:
+                                    self.stata.run("set trace on")
+                                # Run the user code
+                                self.stata.run(code, echo=echo)
+                                # Hold results IMMEDIATELY to prevent clobbering by cleanup
+                                self._hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
+                                self.stata.run(f"capture _return hold {self._hold_name}", echo=False)
+                            finally:
+                                if trace:
+                                    try:
+                                        self.stata.run("set trace off")
+                                    except Exception:
+                                        pass
+                    finally:
+                        # Close SMCL log AFTER output redirection
+                        self._close_smcl_log(log_name)
+                        # Restore and capture results while still inside the lock
+                        self._restore_results_from_hold("_hold_name")
             except Exception as e:
                 sys_error = str(e)
@@ -548,36 +1487,66 @@ class StataClient:
                 parsed_rc = self._parse_rc_from_text(sys_error)
                 rc = parsed_rc if parsed_rc is not None else 1
+        # Read SMCL content as the authoritative source
+        if smcl_path:
+            smcl_content = self._read_smcl_file(smcl_path)
+            # Clean up SMCL file
+            self._safe_unlink(smcl_path)
         stdout_content = output_buffer.getvalue()
         stderr_content = error_buffer.getvalue()
-        full_log = stdout_content + "\n" + stderr_content
-        # 2. Extract RC from log tail (primary error detection method)
-        if rc == 1 and not sys_error:  # No exception but might have error in log
-            parsed_rc = self._parse_rc_from_text(full_log)
-            if parsed_rc is not None:
+        # If RC wasn't captured or is generic, try to parse from SMCL
+        if rc in (0, 1, -1) and smcl_content:
+            parsed_rc = self._parse_rc_from_smcl(smcl_content)
+            if parsed_rc is not None and parsed_rc != 0:
                 rc = parsed_rc
+            elif rc == -1:
+                rc = 0
+        # If stdout is empty but SMCL has content AND command succeeded, use SMCL as stdout
+        # This handles cases where Stata writes to log but not to redirected stdout
+        # For errors, we keep stdout empty and error info goes to ErrorEnvelope
+        if rc == 0 and not stdout_content and smcl_content:
+            # Convert SMCL to plain text for stdout
+            stdout_content = self._smcl_to_text(smcl_content)
-        error_envelope = None
         if rc != 0:
             if sys_error:
                 msg = sys_error
-                snippet = sys_error  # Include the exception message as snippet
+                context = sys_error
             else:
-                # Extract error message from log tail
-                msg, context = self._extract_error_and_context(full_log, rc)
-            error_envelope = ErrorEnvelope(message=msg, rc=rc, context=context, snippet=full_log[-800:])
+                # Extract error from SMCL (authoritative source)
+                msg, context = self._extract_error_from_smcl(smcl_content, rc)
+            error_envelope = ErrorEnvelope(
+                message=msg,
+                rc=rc,
+                context=context,
+                snippet=smcl_content[-800:] if smcl_content else (stdout_content + stderr_content)[-800:],
+                smcl_output=smcl_content  # Include raw SMCL for debugging
+            )
+            stderr_content = context
-        return CommandResponse(
+        resp = CommandResponse(
             command=code,
             rc=rc,
             stdout=stdout_content,
             stderr=stderr_content,
             success=(rc == 0),
             error=error_envelope,
+            log_path=smcl_path if smcl_path else None,
+            smcl_output=smcl_content,
         )
+        # Capture results immediately after execution, INSIDE the lock
+        try:
+            self._last_results = self.get_stored_results(force_fresh=True)
+        except Exception:
+            self._last_results = None
+        return resp
     def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
         """Execute Stata code while leaving stdout/stderr alone."""
         if not self._initialized:
@@ -595,9 +1564,7 @@ class StataClient:
                 ret = self.stata.run(code, echo=echo)
                 if isinstance(ret, str) and ret:
                     ret_text = ret
-                # Robust RC check even for no-capture
-                rc = self._read_return_code()
             except Exception as e:
                 exc = e
@@ -631,23 +1598,64 @@ class StataClient:
             error=error,
         )
+    def exec_lightweight(self, code: str) -> CommandResponse:
+        """
+        Executes a command using simple stdout redirection (no SMCL logs).
+        Much faster on Windows as it avoids FS operations.
+        LIMITED: Does not support error envelopes or complex return code parsing.
+        """
+        if not self._initialized:
+            self.init()
+        code = self._maybe_rewrite_graph_name_in_command(code)
+        output_buffer = StringIO()
+        error_buffer = StringIO()
+        rc = 0
+        exc = None
+        with self._exec_lock:
+             with self._redirect_io(output_buffer, error_buffer):
+                try:
+                    self.stata.run(code, echo=False)
+                except Exception as e:
+                    exc = e
+                    rc = 1
+        stdout = output_buffer.getvalue()
+        stderr = error_buffer.getvalue()
+        return CommandResponse(
+            command=code,
+            rc=rc,
+            stdout=stdout,
+            stderr=stderr if not exc else str(exc),
+            success=(rc == 0),
+            error=None
+        )
     async def run_command_streaming(
-        self,
-        code: str,
-        *,
-        notify_log: Callable[[str], Awaitable[None]],
-        notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
-        echo: bool = True,
-        trace: bool = False,
-        max_output_lines: Optional[int] = None,
-        cwd: Optional[str] = None,
-        auto_cache_graphs: bool = False,
-        on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
-    ) -> CommandResponse:
+    self,
+    code: str,
+    *,
+    notify_log: Callable[[str], Awaitable[None]],
+    notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
+    echo: bool = True,
+    trace: bool = False,
+    max_output_lines: Optional[int] = None,
+    cwd: Optional[str] = None,
+    auto_cache_graphs: bool = False,
+    on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
+    emit_graph_ready: bool = False,
+    graph_ready_task_id: Optional[str] = None,
+    graph_ready_format: str = "svg",
+) -> CommandResponse:
         if not self._initialized:
             self.init()
         code = self._maybe_rewrite_graph_name_in_command(code)
+        auto_cache_graphs = auto_cache_graphs or emit_graph_ready
+        total_lines = 0  # Commands (not do-files) do not have line-based progress
         if cwd is not None and not os.path.isdir(cwd):
             return CommandResponse(
@@ -665,206 +1673,183 @@ class StataClient:
         start_time = time.time()
         exc: Optional[Exception] = None
+        smcl_content = ""
+        smcl_path = None
         # Setup streaming graph cache if enabled
-        graph_cache = None
-        if auto_cache_graphs:
-            graph_cache = StreamingGraphCache(self, auto_cache=True)
-            graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
-            graph_cache.add_cache_callback(graph_cache_callback)
+        graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
-        log_file = tempfile.NamedTemporaryFile(
-            prefix="mcp_stata_",
-            suffix=".log",
-            delete=False,
-            mode="w",
-            encoding="utf-8",
-            errors="replace",
-            buffering=1,
-        )
-        log_path = log_file.name
-        tail = TailBuffer(max_chars=200000 if trace else 20000)
-        tee = FileTeeIO(log_file, tail)
+        _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
+        # Create SMCL log path for authoritative output capture
+        smcl_path = self._create_smcl_log_path()
+        smcl_log_name = self._make_smcl_log_name()
         # Inform the MCP client immediately where to read/tail the output.
-        await notify_log(json.dumps({"event": "log_path", "path": log_path}))
+        await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
         rc = -1
+        path_for_stata = code.replace("\\", "/")
+        command = f'{path_for_stata}'
+        graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
+        graph_poll_state = [0.0]
+        async def on_chunk_for_graphs(_chunk: str) -> None:
+            await self._maybe_cache_graphs_on_chunk(
+                graph_cache=graph_cache,
+                emit_graph_ready=emit_graph_ready,
+                notify_log=notify_log,
+                graph_ready_task_id=graph_ready_task_id,
+                graph_ready_format=graph_ready_format,
+                graph_ready_initial=graph_ready_initial,
+                last_check=graph_poll_state,
+            )
-        def _run_blocking() -> None:
-            nonlocal rc, exc
-            with self._exec_lock:
-                self._is_executing = True
-                try:
-                    from sfi import Scalar, SFIToolkit # Import SFI tools
-                    with self._temp_cwd(cwd):
-                        with self._redirect_io_streaming(tee, tee):
-                            try:
-                                if trace:
-                                    self.stata.run("set trace on")
-                                ret = self.stata.run(code, echo=echo)
-                                # Some PyStata builds return output as a string rather than printing.
-                                if isinstance(ret, str) and ret:
-                                    try:
-                                        tee.write(ret)
-                                    except Exception:
-                                        pass
+        done = anyio.Event()
-                                # ROBUST DETECTION & OUTPUT
-                                rc = self._read_return_code()
+        async with anyio.create_task_group() as tg:
+            async def stream_smcl() -> None:
+                await self._stream_smcl_log(
+                    smcl_path=smcl_path,
+                    notify_log=notify_log,
+                    done=done,
+                    on_chunk=on_chunk_for_graphs if graph_cache else None,
+                )
-                            except Exception as e:
-                                exc = e
-                                if rc == 0: rc = 1
-                            finally:
-                                if trace:
-                                    try:
-                                        self.stata.run("set trace off")
-                                    except Exception:
-                                        pass
-                finally:
-                    self._is_executing = False
+            tg.start_soon(stream_smcl)
-        try:
             if notify_progress is not None:
-                await notify_progress(0, None, "Running Stata command")
-            await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
-        except get_cancelled_exc_class():
-            # Best-effort cancellation: signal Stata to break, wait briefly, then propagate.
-            self._request_break_in()
-            await self._wait_for_stata_stop()
-            raise
-        finally:
-            tee.close()
+                if total_lines > 0:
+                    await notify_progress(0, float(total_lines), f"Executing command: 0/{total_lines}")
+                else:
+                    await notify_progress(0, None, "Running command")
-        # Cache detected graphs after command completes
-        if graph_cache:
             try:
-                # Use the enhanced pystata-integrated caching method
-                if hasattr(graph_cache, 'cache_detected_graphs_with_pystata'):
-                    cached_graphs = await graph_cache.cache_detected_graphs_with_pystata()
-                else:
-                    cached_graphs = await graph_cache.cache_detected_graphs()
-                if cached_graphs and notify_progress:
-                    await notify_progress(1, 1, f"Command completed. Cached {len(cached_graphs)} graphs: {', '.join(cached_graphs)}")
-            except Exception as e:
-                logger.warning(f"Failed to cache detected graphs: {e}")
+                run_blocking = lambda: self._run_streaming_blocking(
+                    command=command,
+                    tee=tee,
+                    cwd=cwd,
+                    trace=trace,
+                    echo=echo,
+                    smcl_path=smcl_path,
+                    smcl_log_name=smcl_log_name,
+                    hold_attr="_hold_name_stream",
+                )
+                try:
+                    rc, exc = await anyio.to_thread.run_sync(
+                        run_blocking,
+                        abandon_on_cancel=True,
+                    )
+                except TypeError:
+                    rc, exc = await anyio.to_thread.run_sync(run_blocking)
+            except get_cancelled_exc_class():
+                self._request_break_in()
+                await self._wait_for_stata_stop()
+                raise
+            finally:
+                done.set()
+                tee.close()
-        tail_text = tail.get_value()
-        log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
-        if log_tail and len(log_tail) > len(tail_text):
-            tail_text = log_tail
-        combined = (tail_text or "") + (f"\n{exc}" if exc else "")
+        # Read SMCL content as the authoritative source
+        smcl_content = self._read_smcl_file(smcl_path)
+        await self._cache_new_graphs(
+            graph_cache,
+            notify_progress=notify_progress,
+            total_lines=total_lines,
+            completed_label="Command",
+        )
+        self._emit_graph_ready_task(
+            emit_graph_ready=emit_graph_ready,
+            graph_ready_initial=graph_ready_initial,
+            notify_log=notify_log,
+            graph_ready_task_id=graph_ready_task_id,
+            graph_ready_format=graph_ready_format,
+        )
+        combined = self._build_combined_log(tail, smcl_path, rc, trace, exc)
+        # Use SMCL content as primary source for RC detection
+        if not exc or rc in (1, -1):
+            parsed_rc = self._parse_rc_from_smcl(smcl_content)
+            if parsed_rc is not None and parsed_rc != 0:
+                rc = parsed_rc
+            elif rc in (-1, 0, 1): # Also check text if rc is generic 1 or unset
+                parsed_rc_text = self._parse_rc_from_text(combined)
+                if parsed_rc_text is not None:
+                    rc = parsed_rc_text
+                elif rc == -1:
+                    rc = 0 # Default to success if no error trace found
         success = (rc == 0 and exc is None)
+        stderr_final = None
         error = None
         if not success:
-            # Use robust extractor
-            msg, context = self._extract_error_and_context(combined, rc)
+            # Use SMCL as authoritative source for error extraction
+            if smcl_content:
+                msg, context = self._extract_error_from_smcl(smcl_content, rc)
+            else:
+                # Fallback to combined log
+                msg, context = self._extract_error_and_context(combined, rc)
             error = ErrorEnvelope(
                 message=msg,
                 context=context,
                 rc=rc,
-                command=code,
+                command=command,
                 log_path=log_path,
-                snippet=combined[-800:] # Keep snippet for backward compat
+                snippet=smcl_content[-800:] if smcl_content else combined[-800:],
+                smcl_output=smcl_content,
             )
+            stderr_final = context
         duration = time.time() - start_time
-        code_preview = code.replace("\n", "\\n")
         logger.info(
             "stata.run(stream) rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
-            rc,
-            success,
-            trace,
-            duration * 1000,
-            code_preview[:120],
-        )
-        result = CommandResponse(
-            command=code,
-            rc=rc,
-            stdout="",
-            stderr=None,
-            log_path=log_path,
-            success=success,
-            error=error,
-        )
-        if notify_progress is not None:
-            await notify_progress(1, 1, "Finished")
-        return result
-    def _count_do_file_lines(self, path: str) -> int:
-        try:
-            with open(path, "r", encoding="utf-8", errors="replace") as f:
-                lines = f.read().splitlines()
-        except Exception:
-            return 0
-        total = 0
-        for line in lines:
-            s = line.strip()
-            if not s:
-                continue
-            if s.startswith("*"):
-                continue
-            if s.startswith("//"):
-                continue
-            total += 1
-        return total
-    async def run_do_file_streaming(
-        self,
-        path: str,
-        *,
-        notify_log: Callable[[str], Awaitable[None]],
-        notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
-        echo: bool = True,
-        trace: bool = False,
-        max_output_lines: Optional[int] = None,
-        cwd: Optional[str] = None,
-        auto_cache_graphs: bool = False,
-        on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
-    ) -> CommandResponse:
-        if cwd is not None and not os.path.isdir(cwd):
-            return CommandResponse(
-                command=f'do "{path}"',
-                rc=601,
-                stdout="",
-                stderr=None,
-                success=False,
-                error=ErrorEnvelope(
-                    message=f"cwd not found: {cwd}",
-                    rc=601,
-                    command=path,
-                ),
-            )
+            rc,
+            success,
+            trace,
+            duration * 1000,
+            code.replace("\n", "\\n")[:120],
+        )
-        effective_path = path
-        if cwd is not None and not os.path.isabs(path):
-            effective_path = os.path.abspath(os.path.join(cwd, path))
+        result = CommandResponse(
+            command=code,
+            rc=rc,
+            stdout="",
+            stderr=stderr_final,
+            log_path=log_path,
+            success=success,
+            error=error,
+            smcl_output=smcl_content,
+        )
-        if not os.path.exists(effective_path):
-            return CommandResponse(
-                command=f'do "{effective_path}"',
-                rc=601,
-                stdout="",
-                stderr=None,
-                success=False,
-                error=ErrorEnvelope(
-                    message=f"Do-file not found: {effective_path}",
-                    rc=601,
-                    command=effective_path,
-                ),
-            )
+        if notify_progress is not None:
+            await notify_progress(1, 1, "Finished")
+        return result
+    async def run_do_file_streaming(
+    self,
+    path: str,
+    *,
+    notify_log: Callable[[str], Awaitable[None]],
+    notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
+    echo: bool = True,
+    trace: bool = False,
+    max_output_lines: Optional[int] = None,
+    cwd: Optional[str] = None,
+    auto_cache_graphs: bool = False,
+    on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
+    emit_graph_ready: bool = False,
+    graph_ready_task_id: Optional[str] = None,
+    graph_ready_format: str = "svg",
+) -> CommandResponse:
+        effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
+        if error_response is not None:
+            return error_response
         total_lines = self._count_do_file_lines(effective_path)
         executed_lines = 0
@@ -893,106 +1878,55 @@ class StataClient:
         if not self._initialized:
             self.init()
+        auto_cache_graphs = auto_cache_graphs or emit_graph_ready
         start_time = time.time()
         exc: Optional[Exception] = None
+        smcl_content = ""
+        smcl_path = None
-        # Setup streaming graph cache if enabled
-        graph_cache = None
-        if auto_cache_graphs:
-            graph_cache = StreamingGraphCache(self, auto_cache=True)
-            graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
-            graph_cache.add_cache_callback(graph_cache_callback)
+        graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
+        _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
-        log_file = tempfile.NamedTemporaryFile(
-            prefix="mcp_stata_",
-            suffix=".log",
-            delete=False,
-            mode="w",
-            encoding="utf-8",
-            errors="replace",
-            buffering=1,
-        )
-        log_path = log_file.name
-        tail = TailBuffer(max_chars=200000 if trace else 20000)
-        tee = FileTeeIO(log_file, tail)
+        smcl_path = self._create_smcl_log_path()
+        smcl_log_name = self._make_smcl_log_name()
         # Inform the MCP client immediately where to read/tail the output.
-        await notify_log(json.dumps({"event": "log_path", "path": log_path}))
+        await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
         rc = -1
-        path_for_stata = effective_path.replace("\\", "/")
-        command = f'do "{path_for_stata}"'
+        graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
+        graph_poll_state = [0.0]
+        async def on_chunk_for_graphs(_chunk: str) -> None:
+            await self._maybe_cache_graphs_on_chunk(
+                graph_cache=graph_cache,
+                emit_graph_ready=emit_graph_ready,
+                notify_log=notify_log,
+                graph_ready_task_id=graph_ready_task_id,
+                graph_ready_format=graph_ready_format,
+                graph_ready_initial=graph_ready_initial,
+                last_check=graph_poll_state,
+            )
-        # Capture initial graph state BEFORE execution starts
+        on_chunk_callback = on_chunk_for_progress
         if graph_cache:
-            try:
-                graph_cache._initial_graphs = set(self.list_graphs())
-                logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
-            except Exception as e:
-                logger.debug(f"Failed to capture initial graph state: {e}")
-                graph_cache._initial_graphs = set()
-        def _run_blocking() -> None:
-            nonlocal rc, exc
-            with self._exec_lock:
-                # Set execution flag to prevent recursive Stata calls
-                self._is_executing = True
-                try:
-                    from sfi import Scalar, SFIToolkit # Import SFI tools
-                    with self._temp_cwd(cwd):
-                        with self._redirect_io_streaming(tee, tee):
-                            try:
-                                if trace:
-                                    self.stata.run("set trace on")
-                                ret = self.stata.run(command, echo=echo)
-                                # Some PyStata builds return output as a string rather than printing.
-                                if isinstance(ret, str) and ret:
-                                    try:
-                                        tee.write(ret)
-                                    except Exception:
-                                        pass
-                                # ROBUST DETECTION & OUTPUT
-                                rc = self._read_return_code()
-                            except Exception as e:
-                                exc = e
-                                if rc == 0: rc = 1
-                            finally:
-                                if trace:
-                                    try: self.stata.run("set trace off")
-                                    except: pass
-                finally:
-                    # Clear execution flag
-                    self._is_executing = False
+            async def on_chunk_callback(chunk: str) -> None:
+                await on_chunk_for_progress(chunk)
+                await on_chunk_for_graphs(chunk)
         done = anyio.Event()
-        async def _monitor_progress_from_log() -> None:
-            if notify_progress is None or total_lines <= 0:
-                return
-            last_pos = 0
-            try:
-                with open(log_path, "r", encoding="utf-8", errors="replace") as f:
-                    while not done.is_set():
-                        f.seek(last_pos)
-                        chunk = f.read()
-                        if chunk:
-                            last_pos = f.tell()
-                            await on_chunk_for_progress(chunk)
-                        await anyio.sleep(0.05)
-                    f.seek(last_pos)
-                    chunk = f.read()
-                    if chunk:
-                        await on_chunk_for_progress(chunk)
-            except Exception:
-                return
         async with anyio.create_task_group() as tg:
-            tg.start_soon(_monitor_progress_from_log)
+            async def stream_smcl() -> None:
+                await self._stream_smcl_log(
+                    smcl_path=smcl_path,
+                    notify_log=notify_log,
+                    done=done,
+                    on_chunk=on_chunk_callback,
+                )
+            tg.start_soon(stream_smcl)
             if notify_progress is not None:
                 if total_lines > 0:
@@ -1001,7 +1935,23 @@ class StataClient:
                     await notify_progress(0, None, "Running do-file")
             try:
-                await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
+                run_blocking = lambda: self._run_streaming_blocking(
+                    command=command,
+                    tee=tee,
+                    cwd=cwd,
+                    trace=trace,
+                    echo=echo,
+                    smcl_path=smcl_path,
+                    smcl_log_name=smcl_log_name,
+                    hold_attr="_hold_name_do",
+                )
+                try:
+                    rc, exc = await anyio.to_thread.run_sync(
+                        run_blocking,
+                        abandon_on_cancel=True,
+                    )
+                except TypeError:
+                    rc, exc = await anyio.to_thread.run_sync(run_blocking)
             except get_cancelled_exc_class():
                 self._request_break_in()
                 await self._wait_for_stata_stop()
@@ -1010,57 +1960,48 @@ class StataClient:
                 done.set()
                 tee.close()
-        # Robust post-execution graph detection and caching
-        if graph_cache and graph_cache.auto_cache:
-            try:
-                # [Existing graph cache logic kept identical]
-                cached_graphs = []
-                initial_graphs = getattr(graph_cache, '_initial_graphs', set())
-                current_graphs = set(self.list_graphs())
-                new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
-                if new_graphs:
-                    logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
+        # Read SMCL content as the authoritative source
+        smcl_content = self._read_smcl_file(smcl_path)
-                for graph_name in new_graphs:
-                    try:
-                        cache_result = await anyio.to_thread.run_sync(
-                            self.cache_graph_on_creation,
-                            graph_name
-                        )
-                        if cache_result:
-                            cached_graphs.append(graph_name)
-                            graph_cache._cached_graphs.add(graph_name)
-                        for callback in graph_cache._cache_callbacks:
-                            try:
-                                await anyio.to_thread.run_sync(callback, graph_name, cache_result)
-                            except Exception: pass
-                    except Exception as e:
-                        logger.error(f"Error caching graph {graph_name}: {e}")
-                # Notify progress if graphs were cached
-                if cached_graphs and notify_progress:
-                    await notify_progress(
-                        float(total_lines) if total_lines > 0 else 1,
-                        float(total_lines) if total_lines > 0 else 1,
-                        f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
-                    )
-            except Exception as e:
-                logger.error(f"Post-execution graph detection failed: {e}")
+        await self._cache_new_graphs(
+            graph_cache,
+            notify_progress=notify_progress,
+            total_lines=total_lines,
+            completed_label="Do-file",
+        )
+        self._emit_graph_ready_task(
+            emit_graph_ready=emit_graph_ready,
+            graph_ready_initial=graph_ready_initial,
+            notify_log=notify_log,
+            graph_ready_task_id=graph_ready_task_id,
+            graph_ready_format=graph_ready_format,
+        )
-        tail_text = tail.get_value()
-        log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
-        if log_tail and len(log_tail) > len(tail_text):
-            tail_text = log_tail
-        combined = (tail_text or "") + (f"\n{exc}" if exc else "")
+        combined = self._build_combined_log(tail, log_path, rc, trace, exc)
+        # Use SMCL content as primary source for RC detection
+        if not exc or rc in (1, -1):
+            parsed_rc = self._parse_rc_from_smcl(smcl_content)
+            if parsed_rc is not None and parsed_rc != 0:
+                rc = parsed_rc
+            elif rc in (-1, 0, 1):
+                parsed_rc_text = self._parse_rc_from_text(combined)
+                if parsed_rc_text is not None:
+                    rc = parsed_rc_text
+                elif rc == -1:
+                    rc = 0  # Default to success if no error found
         success = (rc == 0 and exc is None)
+        stderr_final = None
         error = None
         if not success:
-            # Robust extraction
-            msg, context = self._extract_error_and_context(combined, rc)
+            # Use SMCL as authoritative source for error extraction
+            if smcl_content:
+                msg, context = self._extract_error_from_smcl(smcl_content, rc)
+            else:
+                # Fallback to combined log
+                msg, context = self._extract_error_and_context(combined, rc)
             error = ErrorEnvelope(
                 message=msg,
@@ -1068,8 +2009,10 @@ class StataClient:
                 rc=rc,
                 command=command,
                 log_path=log_path,
-                snippet=combined[-800:]
+                snippet=smcl_content[-800:] if smcl_content else combined[-800:],
+                smcl_output=smcl_content,
             )
+            stderr_final = context
         duration = time.time() - start_time
         logger.info(
@@ -1085,10 +2028,11 @@ class StataClient:
             command=command,
             rc=rc,
             stdout="",
-            stderr=None,
+            stderr=stderr_final,
             log_path=log_path,
             success=success,
             error=error,
+            smcl_output=smcl_content,
         )
         if notify_progress is not None:
@@ -1110,22 +2054,7 @@ class StataClient:
         """
         result = self._exec_with_capture(code, echo=echo, trace=trace, cwd=cwd)
-        # Truncate stdout if requested
-        if max_output_lines is not None and result.stdout:
-            lines = result.stdout.splitlines()
-            if len(lines) > max_output_lines:
-                truncated_lines = lines[:max_output_lines]
-                truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
-                result = CommandResponse(
-                    command=result.command,
-                    rc=result.rc,
-                    stdout="\n".join(truncated_lines),
-                    stderr=result.stderr,
-                    success=result.success,
-                    error=result.error,
-                )
-        return result
+        return self._truncate_command_output(result, max_output_lines)
     def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
         """Returns valid JSON-serializable data."""
@@ -1182,16 +2111,19 @@ class StataClient:
         sortlist = ""
         changed = False
         try:
-            frame = str(Macro.getCValue("frame") or "default")
+            frame = str(Macro.getGlobal("frame") or "default")
         except Exception:
+            logger.debug("Failed to get 'frame' macro", exc_info=True)
             frame = "default"
         try:
-            sortlist = str(Macro.getCValue("sortlist") or "")
+            sortlist = str(Macro.getGlobal("sortlist") or "")
         except Exception:
+            logger.debug("Failed to get 'sortlist' macro", exc_info=True)
             sortlist = ""
         try:
-            changed = bool(int(float(Macro.getCValue("changed") or "0")))
+            changed = bool(int(float(Macro.getGlobal("changed") or "0")))
         except Exception:
+            logger.debug("Failed to get 'changed' macro", exc_info=True)
             changed = False
         return {"frame": frame, "n": n, "k": k, "sortlist": sortlist, "changed": changed}
@@ -1340,6 +2272,96 @@ class StataClient:
             "truncated_cells": truncated_cells,
         }
+    def get_arrow_stream(
+        self,
+        *,
+        offset: int,
+        limit: int,
+        vars: List[str],
+        include_obs_no: bool,
+        obs_indices: Optional[List[int]] = None,
+    ) -> bytes:
+        """
+        Returns an Apache Arrow IPC stream (as bytes) for the requested data page.
+        Uses Polars if available (faster), falls back to Pandas.
+        """
+        if not self._initialized:
+            self.init()
+        import pyarrow as pa
+        from sfi import Data  # type: ignore[import-not-found]
+        use_polars = _get_polars_available()
+        if use_polars:
+            import polars as pl
+        else:
+            import pandas as pd
+        state = self.get_dataset_state()
+        n = int(state.get("n", 0) or 0)
+        k = int(state.get("k", 0) or 0)
+        if k == 0 and n == 0:
+            raise RuntimeError("No data in memory")
+        var_map = self._get_var_index_map()
+        for v in vars:
+            if v not in var_map:
+                raise ValueError(f"Invalid variable: {v}")
+        # Determine observations to fetch
+        if obs_indices is None:
+            start = offset
+            end = min(offset + limit, n)
+            obs_list = list(range(start, end)) if start < n else []
+        else:
+            start = offset
+            end = min(offset + limit, len(obs_indices))
+            obs_list = obs_indices[start:end]
+        try:
+            if not obs_list:
+                # Empty schema-only table
+                if use_polars:
+                    schema_cols = {}
+                    if include_obs_no:
+                        schema_cols["_n"] = pl.Int64
+                    for v in vars:
+                        schema_cols[v] = pl.Utf8
+                    table = pl.DataFrame(schema=schema_cols).to_arrow()
+                else:
+                    columns = {}
+                    if include_obs_no:
+                        columns["_n"] = pa.array([], type=pa.int64())
+                    for v in vars:
+                        columns[v] = pa.array([], type=pa.string())
+                    table = pa.table(columns)
+            else:
+                # Fetch all data in one C-call
+                raw_data = Data.get(var=vars, obs=obs_list, valuelabel=False)
+                if use_polars:
+                    df = pl.DataFrame(raw_data, schema=vars, orient="row")
+                    if include_obs_no:
+                        obs_nums = [i + 1 for i in obs_list]
+                        df = df.with_columns(pl.Series("_n", obs_nums, dtype=pl.Int64))
+                        df = df.select(["_n"] + vars)
+                    table = df.to_arrow()
+                else:
+                    df = pd.DataFrame(raw_data, columns=vars)
+                    if include_obs_no:
+                        df.insert(0, "_n", [i + 1 for i in obs_list])
+                    table = pa.Table.from_pandas(df, preserve_index=False)
+            # Serialize to IPC Stream
+            sink = pa.BufferOutputStream()
+            with pa.RecordBatchStreamWriter(sink, table.schema) as writer:
+                writer.write_table(table)
+            return sink.getvalue().to_pybytes()
+        except Exception as e:
+            raise RuntimeError(f"Failed to generate Arrow stream: {e}")
     _FILTER_IDENT = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b")
     def _extract_filter_vars(self, filter_expr: str) -> List[str]:
@@ -1528,15 +2550,21 @@ class StataClient:
         # Cache miss or expired, fetch fresh data
         try:
-            # 'graph dir' returns list in r(list)
-            # We need to ensure we run it quietly so we don't spam.
-            self.stata.run("quietly graph dir, memory")
-            # Accessing r-class results in Python can be tricky via pystata's run command.
-            # We stash the result in a global macro that python sfi can easily read.
-            from sfi import Macro  # type: ignore[import-not-found]
-            self.stata.run("global mcp_graph_list `r(list)'")
-            graph_list_str = Macro.getGlobal("mcp_graph_list")
+            # Preservation of r() results is critical because this can be called
+            # automatically after every user command (e.g., during streaming).
+            import time
+            hold_name = f"_mcp_ghold_{int(time.time() * 1000 % 1000000)}"
+            self.stata.run(f"capture _return hold {hold_name}", echo=False)
+            try:
+                self.stata.run("macro define mcp_graph_list \"\"", echo=False)
+                self.stata.run("quietly graph dir, memory", echo=False)
+                from sfi import Macro  # type: ignore[import-not-found]
+                self.stata.run("macro define mcp_graph_list `r(list)'", echo=False)
+                graph_list_str = Macro.getGlobal("mcp_graph_list")
+            finally:
+                self.stata.run(f"capture _return restore {hold_name}", echo=False)
             raw_list = graph_list_str.split() if graph_list_str else []
             # Map internal Stata names back to user-facing names when we have an alias.
@@ -1548,7 +2576,7 @@ class StataClient:
             # Update cache
             with self._list_graphs_cache_lock:
                 self._list_graphs_cache = result
-                self._list_graphs_cache_time = current_time
+                self._list_graphs_cache_time = time.time()
             return result
@@ -1583,8 +2611,8 @@ class StataClient:
         import tempfile
         fmt = (format or "pdf").strip().lower()
-        if fmt not in {"pdf", "png"}:
-            raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
+        if fmt not in {"pdf", "png", "svg"}:
+            raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png, svg.")
         if not filename:
             suffix = f".{fmt}"
@@ -1737,73 +2765,77 @@ class StataClient:
                     logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
                     return self._smcl_to_text(smcl)
             except Exception as e:
-                return f"Error reading help file at {fn}: {e}"
+                logger.warning("Help file read failed for %s: %s", topic, e)
-        # Fallback to URL if file not found
-        return f"Help file for '{topic}' not found. Please consult: https://www.stata.com/help.cgi?{topic}"
+        # If no help file found, return a fallback message
+        return f"Help file for '{topic}' not found."
+    def get_stored_results(self, force_fresh: bool = False) -> Dict[str, Any]:
+        """Returns e() and r() results using SFI for maximum reliability."""
+        if not force_fresh and self._last_results is not None:
+            return self._last_results
-    def get_stored_results(self) -> Dict[str, Any]:
-        """Returns e() and r() results."""
         if not self._initialized:
             self.init()
-        results = {"r": {}, "e": {}}
-        # We parse 'return list' output as there is no direct bulk export of stored results
-        raw_r_resp = self.run_command_structured("return list", echo=True)
-        raw_e_resp = self.run_command_structured("ereturn list", echo=True)
-        raw_r = raw_r_resp.stdout if raw_r_resp.success else (raw_r_resp.error.snippet if raw_r_resp.error else "")
-        raw_e = raw_e_resp.stdout if raw_e_resp.success else (raw_e_resp.error.snippet if raw_e_resp.error else "")
-        # Simple parser
-        def parse_list(text):
-            data = {}
-            # We don't strictly need to track sections if we check patterns
-            for line in text.splitlines():
-                line = line.strip()
-                if not line:
-                    continue
-                # scalars: r(name) = value
-                if "=" in line and ("r(" in line or "e(" in line):
-                    try:
-                        name_part, val_part = line.split("=", 1)
-                        name_part = name_part.strip()  # "r(mean)"
-                        val_part = val_part.strip()    # "6165.2..."
-                        # Extract just the name inside r(...) if desired,
-                        # or keep full key "r(mean)".
-                        # User likely wants "mean" inside "r" dict.
-                        if "(" in name_part and name_part.endswith(")"):
-                            # r(mean) -> mean
-                            start = name_part.find("(") + 1
-                            end = name_part.find(")")
-                            key = name_part[start:end]
-                            data[key] = val_part
-                    except Exception:
-                        pass
-                # macros: r(name) : "value"
-                elif ":" in line and ("r(" in line or "e(" in line):
-                    try:
-                        name_part, val_part = line.split(":", 1)
-                        name_part = name_part.strip()
-                        val_part = val_part.strip().strip('"')
-                        if "(" in name_part and name_part.endswith(")"):
-                            start = name_part.find("(") + 1
-                            end = name_part.find(")")
-                            key = name_part[start:end]
-                            data[key] = val_part
-                    except Exception:
-                        pass
-            return data
-        results["r"] = parse_list(raw_r)
-        results["e"] = parse_list(raw_e)
-        return results
+        with self._exec_lock:
+            # We must be extremely careful not to clobber r()/e() while fetching their names.
+            # We use a hold to peek at the results.
+            hold_name = f"mcp_peek_{uuid.uuid4().hex[:8]}"
+            self.stata.run(f"capture _return hold {hold_name}", echo=False)
+            try:
+                from sfi import Scalar, Macro
+                results = {"r": {}, "e": {}}
+                for rclass in ["r", "e"]:
+                    # Restore with 'hold' to peek at results without losing them from the hold
+                    # Note: Stata 18+ supports 'restore ..., hold' which is ideal.
+                    self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
+                    # Fetch names using backtick expansion (which we verified works better than colon)
+                    # and avoid leading underscores which were causing syntax errors with 'global'
+                    self.stata.run(f"macro define mcp_scnames `: {rclass}(scalars)'", echo=False)
+                    self.stata.run(f"macro define mcp_macnames `: {rclass}(macros)'", echo=False)
+                    # 1. Capture Scalars
+                    names_str = Macro.getGlobal("mcp_scnames")
+                    if names_str:
+                        for name in names_str.split():
+                            try:
+                                val = Scalar.getValue(f"{rclass}({name})")
+                                results[rclass][name] = val
+                            except Exception:
+                                pass
+                    # 2. Capture Macros (strings)
+                    macros_str = Macro.getGlobal("mcp_macnames")
+                    if macros_str:
+                        for name in macros_str.split():
+                            try:
+                                # Restore/Hold again to be safe before fetching each macro
+                                self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
+                                # Capture the string value into a macro
+                                self.stata.run(f"macro define mcp_mval `{rclass}({name})'", echo=False)
+                                val = Macro.getGlobal("mcp_mval")
+                                results[rclass][name] = val
+                            except Exception:
+                                pass
+                # Cleanup
+                self.stata.run("macro drop mcp_scnames mcp_macnames mcp_mval", echo=False)
+                self.stata.run(f"capture _return restore {hold_name}", echo=False) # Restore one last time to leave Stata in correct state
+                self._last_results = results
+                return results
+            except Exception as e:
+                logger.error(f"SFI-based get_stored_results failed: {e}")
+                # Try to clean up hold if we failed
+                try:
+                    self.stata.run(f"capture _return drop {hold_name}", echo=False)
+                except Exception:
+                    pass
+                return {"r": {}, "e": {}}
     def invalidate_graph_cache(self, graph_name: str = None) -> None:
         """Invalidate cache for specific graph or all graphs.
@@ -2253,105 +3285,57 @@ class StataClient:
         return False
     def run_do_file(self, path: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
-        if cwd is not None and not os.path.isdir(cwd):
-            return CommandResponse(
-                command=f'do "{path}"',
-                rc=601,
-                stdout="",
-                stderr=None,
-                success=False,
-                error=ErrorEnvelope(
-                    message=f"cwd not found: {cwd}",
-                    rc=601,
-                    command=path,
-                ),
-            )
-        effective_path = path
-        if cwd is not None and not os.path.isabs(path):
-            effective_path = os.path.abspath(os.path.join(cwd, path))
-        if not os.path.exists(effective_path):
-            return CommandResponse(
-                command=f'do "{effective_path}"',
-                rc=601,
-                stdout="",
-                stderr=None,
-                success=False,
-                error=ErrorEnvelope(
-                    message=f"Do-file not found: {effective_path}",
-                    rc=601,
-                    command=effective_path,
-                ),
-            )
+        effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
+        if error_response is not None:
+            return error_response
         if not self._initialized:
             self.init()
         start_time = time.time()
         exc: Optional[Exception] = None
-        path_for_stata = effective_path.replace("\\", "/")
-        command = f'do "{path_for_stata}"'
+        smcl_content = ""
+        smcl_path = None
-        log_file = tempfile.NamedTemporaryFile(
-            prefix="mcp_stata_",
-            suffix=".log",
-            delete=False,
-            mode="w",
-            encoding="utf-8",
-            errors="replace",
-            buffering=1,
-        )
-        log_path = log_file.name
-        tail = TailBuffer(max_chars=200000 if trace else 20000)
-        tee = FileTeeIO(log_file, tail)
+        _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
+        smcl_path = self._create_smcl_log_path()
+        smcl_log_name = self._make_smcl_log_name()
         rc = -1
+        try:
+            rc, exc = self._run_streaming_blocking(
+                command=command,
+                tee=tee,
+                cwd=cwd,
+                trace=trace,
+                echo=echo,
+                smcl_path=smcl_path,
+                smcl_log_name=smcl_log_name,
+                hold_attr="_hold_name_do_sync",
+                require_smcl_log=True,
+            )
+        except Exception as e:
+            exc = e
+            rc = 1
+        finally:
+            tee.close()
-        with self._exec_lock:
-            try:
-                from sfi import Scalar, SFIToolkit # Import SFI tools
-                with self._temp_cwd(cwd):
-                    with self._redirect_io_streaming(tee, tee):
-                        try:
-                            if trace:
-                                self.stata.run("set trace on")
-                            ret = self.stata.run(command, echo=echo)
-                            # Some PyStata builds return output as a string rather than printing.
-                            if isinstance(ret, str) and ret:
-                                try:
-                                    tee.write(ret)
-                                except Exception:
-                                    pass
-                        except Exception as e:
-                            exc = e
-                            rc = 1
-                        finally:
-                            if trace:
-                                try:
-                                    self.stata.run("set trace off")
-                                except Exception:
-                                    pass
-            except Exception as e:
-                # Outer catch in case imports or locks fail
-                exc = e
-                rc = 1
-        tee.close()
+        # Read SMCL content as the authoritative source
+        smcl_content = self._read_smcl_file(smcl_path)
-        tail_text = tail.get_value()
-        log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
-        if log_tail and len(log_tail) > len(tail_text):
-            tail_text = log_tail
-        combined = (tail_text or "") + (f"\n{exc}" if exc else "")
+        combined = self._build_combined_log(tail, log_path, rc, trace, exc)
-        # Parse RC from log tail if no exception occurred
+        # Use SMCL content as primary source for RC detection if not already captured
         if rc == -1 and not exc:
-            parsed_rc = self._parse_rc_from_text(combined)
-            rc = parsed_rc if parsed_rc is not None else 0
-        elif exc:
-            # Try to parse RC from exception message
+            parsed_rc = self._parse_rc_from_smcl(smcl_content)
+            if parsed_rc is not None:
+                rc = parsed_rc
+            else:
+                # Fallback to text parsing
+                parsed_rc = self._parse_rc_from_text(combined)
+                rc = parsed_rc if parsed_rc is not None else 0
+        elif exc and rc == 1:
+            # Try to parse more specific RC from exception message
             parsed_rc = self._parse_rc_from_text(str(exc))
             if parsed_rc is not None:
                 rc = parsed_rc
@@ -2360,15 +3344,20 @@ class StataClient:
         error = None
         if not success:
-            # Robust extraction
-            msg, context = self._extract_error_and_context(combined, rc)
+            # Use SMCL as authoritative source for error extraction
+            if smcl_content:
+                msg, context = self._extract_error_from_smcl(smcl_content, rc)
+            else:
+                # Fallback to combined log
+                msg, context = self._extract_error_and_context(combined, rc)
             error = ErrorEnvelope(
                 message=msg,
                 rc=rc,
                 snippet=context,
                 command=command,
-                log_path=log_path
+                log_path=log_path,
+                smcl_output=smcl_content,
             )
         duration = time.time() - start_time
@@ -2389,6 +3378,7 @@ class StataClient:
             log_path=log_path,
             success=success,
             error=error,
+            smcl_output=smcl_content,
         )
     def load_data(self, source: str, clear: bool = True, max_output_lines: Optional[int] = None) -> CommandResponse:
@@ -2407,40 +3397,8 @@ class StataClient:
             cmd = f"sysuse {src}{clear_suffix}"
         result = self._exec_with_capture(cmd, echo=True, trace=False)
-        # Truncate stdout if requested
-        if max_output_lines is not None and result.stdout:
-            lines = result.stdout.splitlines()
-            if len(lines) > max_output_lines:
-                truncated_lines = lines[:max_output_lines]
-                truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
-                result = CommandResponse(
-                    command=result.command,
-                    rc=result.rc,
-                    stdout="\n".join(truncated_lines),
-                    stderr=result.stderr,
-                    success=result.success,
-                    error=result.error,
-                )
-        return result
+        return self._truncate_command_output(result, max_output_lines)
     def codebook(self, varname: str, trace: bool = False, max_output_lines: Optional[int] = None) -> CommandResponse:
         result = self._exec_with_capture(f"codebook {varname}", trace=trace)
-        # Truncate stdout if requested
-        if max_output_lines is not None and result.stdout:
-            lines = result.stdout.splitlines()
-            if len(lines) > max_output_lines:
-                truncated_lines = lines[:max_output_lines]
-                truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
-                result = CommandResponse(
-                    command=result.command,
-                    rc=result.rc,
-                    stdout="\n".join(truncated_lines),
-                    stderr=result.stderr,
-                    success=result.success,
-                    error=result.error,
-                )
-        return result
+        return self._truncate_command_output(result, max_output_lines)

mcp-stata 1.7.6__py3-none-any.whl → 1.13.0__py3-none-any.whl

Potentially problematic release.

mcp-stata 1.7.6py3-none-any.whl → 1.13.0py3-none-any.whl