PyPI - mcp-stata - Versions diffs - 1.6.8__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend - Supply Chain Defender

mcp-stata 1.6.8py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-stata might be problematic. Click here for more details.

Files changed (10) hide show

mcp_stata/discovery.py +151 -31
mcp_stata/models.py +1 -0
mcp_stata/stata_client.py +323 -264
mcp_stata/ui_http.py +37 -1
{mcp_stata-1.6.8.dist-info → mcp_stata-1.7.6.dist-info}/METADATA +60 -2
mcp_stata-1.7.6.dist-info/RECORD +14 -0
mcp_stata-1.6.8.dist-info/RECORD +0 -14
{mcp_stata-1.6.8.dist-info → mcp_stata-1.7.6.dist-info}/WHEEL +0 -0
{mcp_stata-1.6.8.dist-info → mcp_stata-1.7.6.dist-info}/entry_points.txt +0 -0
{mcp_stata-1.6.8.dist-info → mcp_stata-1.7.6.dist-info}/licenses/LICENSE +0 -0

mcp_stata/stata_client.py CHANGED Viewed

@@ -120,16 +120,48 @@ class StataClient:
         return inst
     @contextmanager
-    def _redirect_io(self):
+    def _redirect_io(self, out_buf, err_buf):
         """Safely redirect stdout/stderr for the duration of a Stata call."""
-        out_buf, err_buf = StringIO(), StringIO()
         backup_stdout, backup_stderr = sys.stdout, sys.stderr
         sys.stdout, sys.stderr = out_buf, err_buf
         try:
-            yield out_buf, err_buf
+            yield
         finally:
             sys.stdout, sys.stderr = backup_stdout, backup_stderr
+    def _select_stata_error_message(self, text: str, fallback: str) -> str:
+        """
+        Helper for tests and legacy callers to extract the clean error message.
+        """
+        if not text:
+            return fallback
+        lines = text.splitlines()
+        trace_pattern = re.compile(r'^\s*[-=.]')
+        noise_pattern = re.compile(r'^(?:\}|\{txt\}|\{com\}|end of do-file)')
+        for line in reversed(lines):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            if trace_pattern.match(line):
+                continue
+            if noise_pattern.match(stripped):
+                continue
+            if stripped.startswith("r(") and stripped.endswith(");"):
+                # If we hit r(123); we might want the line ABOVE it if it's not noise
+                continue
+            # Preserve SMCL tags
+            return stripped
+        # If we couldn't find a better message, try to find r(N);
+        match = re.search(r"r\(\d+\);", text)
+        if match:
+            return match.group(0)
+        return fallback
     @staticmethod
     def _stata_quote(value: str) -> str:
         """Return a Stata double-quoted string literal for value."""
@@ -170,6 +202,7 @@ class StataClient:
                 logger.error(f"Failed to notify about graph cache: {e}")
         return graph_cache_callback
     def _request_break_in(self) -> None:
         """
         Attempt to interrupt a running Stata command when cancellation is requested.
@@ -380,15 +413,32 @@ class StataClient:
         try:
             from sfi import Macro  # type: ignore[import-not-found]
             rc_val = Macro.getCValue("rc")  # type: ignore[attr-defined]
+            if rc_val is not None:
+                return int(float(rc_val))
+            # If getCValue returns None, fall through to the alternative approach
+        except Exception:
+            pass
+        # Alternative approach: use a global macro
+        # CRITICAL: This must be done carefully to avoid mutating c(rc)
+        try:
+            self.stata.run("global MCP_RC = c(rc)")
+            from sfi import Macro as Macro2  # type: ignore[import-not-found]
+            rc_val = Macro2.getGlobal("MCP_RC")
             return int(float(rc_val))
         except Exception:
-            try:
-                self.stata.run("global MCP_RC = c(rc)")
-                from sfi import Macro as Macro2  # type: ignore[import-not-found]
-                rc_val = Macro2.getGlobal("MCP_RC")
-                return int(float(rc_val))
-            except Exception:
+            return -1
+    def _get_rc_from_scalar(self, Scalar) -> int:
+        """Safely get return code, handling None values."""
+        try:
+            from sfi import Macro
+            rc_val = Macro.getCValue("rc")
+            if rc_val is None:
                 return -1
+            return int(float(rc_val))
+        except Exception:
+            return -1
     def _parse_rc_from_text(self, text: str) -> Optional[int]:
         match = re.search(r"r\((\d+)\)", text)
@@ -408,6 +458,20 @@ class StataClient:
                 return None
         return None
+    def _read_log_tail(self, path: str, max_chars: int) -> str:
+        try:
+            with open(path, "rb") as f:
+                f.seek(0, os.SEEK_END)
+                size = f.tell()
+                if size <= 0:
+                    return ""
+                read_size = min(size, max_chars)
+                f.seek(-read_size, os.SEEK_END)
+                data = f.read(read_size)
+            return data.decode("utf-8", errors="replace")
+        except Exception:
+            return ""
     def _smcl_to_text(self, smcl: str) -> str:
         """Convert simple SMCL markup into plain text for LLM-friendly help."""
         # First, keep inline directive content if present (e.g., {bf:word} -> word)
@@ -419,150 +483,126 @@ class StataClient:
         lines = [line.rstrip() for line in cleaned.splitlines()]
         return "\n".join(lines).strip()
-    def _build_error_envelope(
-        self,
-        command: str,
-        rc: int,
-        stdout: str,
-        stderr: str,
-        exc: Optional[Exception],
-        trace: bool,
-    ) -> ErrorEnvelope:
-        combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
-        rc_hint = self._parse_rc_from_text(combined) if combined else None
-        rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
-        line_no = self._parse_line_from_text(combined) if combined else None
-        snippet = combined[-800:] if combined else None
-        message = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
-        return ErrorEnvelope(
-            message=message,
-            rc=rc_final,
-            line=line_no,
-            command=command,
-            stdout=stdout or None,
-            stderr=stderr or None,
-            snippet=snippet,
-            trace=trace or None,
-        )
+    def _extract_error_and_context(self, log_content: str, rc: int) -> Tuple[str, str]:
+        """
+        Extracts the error message and trace context using {err} SMCL tags.
+        """
+        if not log_content:
+            return f"Stata error r({rc})", ""
+        lines = log_content.splitlines()
+        # Search backwards for the {err} tag
+        for i in range(len(lines) - 1, -1, -1):
+            line = lines[i]
+            if '{err}' in line:
+                # Found the (last) error line.
+                # Walk backwards to find the start of the error block (consecutive {err} lines)
+                start_idx = i
+                while start_idx > 0 and '{err}' in lines[start_idx-1]:
+                    start_idx -= 1
+                # The full error message is the concatenation of all {err} lines in this block
+                error_lines = []
+                for j in range(start_idx, i + 1):
+                    error_lines.append(lines[j].strip())
+                clean_msg = " ".join(filter(None, error_lines)) or f"Stata error r({rc})"
+                # Capture everything from the start of the error block to the end
+                context_str = "\n".join(lines[start_idx:])
+                return clean_msg, context_str
+        # Fallback: grab the last 30 lines
+        context_start = max(0, len(lines) - 30)
+        context_str = "\n".join(lines[context_start:])
+        return f"Stata error r({rc})", context_str
     def _exec_with_capture(self, code: str, echo: bool = True, trace: bool = False, cwd: Optional[str] = None) -> CommandResponse:
-        """Execute Stata code with stdout/stderr capture and rc detection."""
         if not self._initialized:
             self.init()
+        # Rewrite graph names with special characters to internal aliases
         code = self._maybe_rewrite_graph_name_in_command(code)
-        if cwd is not None and not os.path.isdir(cwd):
-            return CommandResponse(
-                command=code,
-                rc=601,
-                stdout="",
-                stderr=None,
-                success=False,
-                error=ErrorEnvelope(
-                    message=f"cwd not found: {cwd}",
-                    rc=601,
-                    command=code,
-                ),
-            )
+        output_buffer = StringIO()
+        error_buffer = StringIO()
+        rc = 0
+        sys_error = None
-        start_time = time.time()
-        exc: Optional[Exception] = None
-        ret_text: Optional[str] = None
         with self._exec_lock:
-            # Set execution flag to prevent recursive Stata calls
-            self._is_executing = True
             try:
+                from sfi import Scalar, SFIToolkit  # Import SFI tools inside execution block
                 with self._temp_cwd(cwd):
-                    with self._redirect_io() as (out_buf, err_buf):
-                        try:
-                            if trace:
-                                self.stata.run("set trace on")
-                            ret = self.stata.run(code, echo=echo)
-                            if isinstance(ret, str) and ret:
-                                ret_text = ret
-                        except Exception as e:
-                            exc = e
-                        finally:
-                            rc = self._read_return_code()
-                            if trace:
-                                try:
-                                    self.stata.run("set trace off")
-                                except Exception:
-                                    pass
-            finally:
-                # Clear execution flag
-                self._is_executing = False
-        stdout = out_buf.getvalue()
-        # Some PyStata builds return output as a string rather than printing.
-        if (not stdout or not stdout.strip()) and ret_text:
-            stdout = ret_text
-        stderr = err_buf.getvalue()
-        combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
-        rc_hint = self._parse_rc_from_text(combined) if combined else None
-        if exc is None and rc_hint is not None and rc_hint != 0:
-            # Prefer r(#) parsed from the current command output when present.
-            rc = rc_hint
-        # If no exception and stderr is empty and no r(#) is present, treat rc anomalies as success
-        # (e.g., stale/spurious c(rc) reads).
-        if exc is None and (not stderr or not stderr.strip()) and rc_hint is None:
-            rc = 0 if rc is None or rc != 0 else rc
-        success = rc == 0 and exc is None
-        error = None
-        if not success:
-            error = self._build_error_envelope(code, rc, stdout, stderr, exc, trace)
-        duration = time.time() - start_time
-        code_preview = code.replace("\n", "\\n")
-        logger.info(
-            "stata.run rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
-            rc,
-            success,
-            trace,
-            duration * 1000,
-            code_preview[:120],
-        )
-        # Mutually exclusive - when error, output is in ErrorEnvelope only
+                    with self._redirect_io(output_buffer, error_buffer):
+                        if trace:
+                            self.stata.run("set trace on")
+                        # 1. Run the user code
+                        self.stata.run(code, echo=echo)
+            except Exception as e:
+                sys_error = str(e)
+                # Try to parse RC from exception message
+                parsed_rc = self._parse_rc_from_text(sys_error)
+                rc = parsed_rc if parsed_rc is not None else 1
+        stdout_content = output_buffer.getvalue()
+        stderr_content = error_buffer.getvalue()
+        full_log = stdout_content + "\n" + stderr_content
+        # 2. Extract RC from log tail (primary error detection method)
+        if rc == 1 and not sys_error:  # No exception but might have error in log
+            parsed_rc = self._parse_rc_from_text(full_log)
+            if parsed_rc is not None:
+                rc = parsed_rc
+        error_envelope = None
+        if rc != 0:
+            if sys_error:
+                msg = sys_error
+                snippet = sys_error  # Include the exception message as snippet
+            else:
+                # Extract error message from log tail
+                msg, context = self._extract_error_and_context(full_log, rc)
+            error_envelope = ErrorEnvelope(message=msg, rc=rc, context=context, snippet=full_log[-800:])
         return CommandResponse(
             command=code,
             rc=rc,
-            stdout="" if not success else stdout,
-            stderr=None,
-            success=success,
-            error=error,
+            stdout=stdout_content,
+            stderr=stderr_content,
+            success=(rc == 0),
+            error=error_envelope,
         )
     def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
-        """Execute Stata code while leaving stdout/stderr alone.
-        PyStata's output bridge uses its own thread and can misbehave on Windows
-        when we redirect stdio (e.g., graph export). This path keeps the normal
-        handlers and just reads rc afterward.
-        """
+        """Execute Stata code while leaving stdout/stderr alone."""
         if not self._initialized:
             self.init()
         exc: Optional[Exception] = None
         ret_text: Optional[str] = None
+        rc = 0
         with self._exec_lock:
             try:
+                from sfi import Scalar # Import SFI tools
                 if trace:
                     self.stata.run("set trace on")
                 ret = self.stata.run(code, echo=echo)
                 if isinstance(ret, str) and ret:
                     ret_text = ret
+                # Robust RC check even for no-capture
+                rc = self._read_return_code()
             except Exception as e:
                 exc = e
+                rc = 1
             finally:
-                rc = self._read_return_code()
-                # If Stata returned an r(#) in text, prefer it.
-                combined = "\n".join(filter(None, [ret_text or "", str(exc) if exc else ""])).strip()
-                rc_hint = self._parse_rc_from_text(combined) if combined else None
-                if exc is None and rc_hint is not None and rc_hint != 0:
-                    rc = rc_hint
-                if exc is None and (rc is None or rc == -1) and rc_hint is None:
-                    # Normalize spurious rc reads only when missing/invalid
-                    rc = 0
                 if trace:
                     try:
                         self.stata.run("set trace off")
@@ -574,8 +614,13 @@ class StataClient:
         success = rc == 0 and exc is None
         error = None
         if not success:
-            # Pass ret_text as stdout for snippet parsing.
-            error = self._build_error_envelope(code, rc, ret_text or "", stderr, exc, trace)
+            msg = str(exc) if exc else f"Stata error r({rc})"
+            error = ErrorEnvelope(
+                message=msg,
+                rc=rc,
+                command=code,
+                stdout=ret_text,
+            )
         return CommandResponse(
             command=code,
@@ -640,7 +685,7 @@ class StataClient:
             buffering=1,
         )
         log_path = log_file.name
-        tail = TailBuffer(max_chars=8000)
+        tail = TailBuffer(max_chars=200000 if trace else 20000)
         tee = FileTeeIO(log_file, tail)
         # Inform the MCP client immediately where to read/tail the output.
@@ -653,6 +698,7 @@ class StataClient:
             with self._exec_lock:
                 self._is_executing = True
                 try:
+                    from sfi import Scalar, SFIToolkit # Import SFI tools
                     with self._temp_cwd(cwd):
                         with self._redirect_io_streaming(tee, tee):
                             try:
@@ -665,10 +711,14 @@ class StataClient:
                                         tee.write(ret)
                                     except Exception:
                                         pass
+                                # ROBUST DETECTION & OUTPUT
+                                rc = self._read_return_code()
                             except Exception as e:
                                 exc = e
+                                if rc == 0: rc = 1
                             finally:
-                                rc = self._read_return_code()
                                 if trace:
                                     try:
                                         self.stata.run("set trace off")
@@ -705,36 +755,25 @@ class StataClient:
                 logger.warning(f"Failed to cache detected graphs: {e}")
         tail_text = tail.get_value()
+        log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
+        if log_tail and len(log_tail) > len(tail_text):
+            tail_text = log_tail
         combined = (tail_text or "") + (f"\n{exc}" if exc else "")
-        rc_hint = self._parse_rc_from_text(combined) if combined else None
-        if exc is None and rc_hint is not None and rc_hint != 0:
-            rc = rc_hint
-        if exc is None and rc_hint is None:
-            rc = 0 if rc is None or rc != 0 else rc
-        success = rc == 0 and exc is None
+        success = (rc == 0 and exc is None)
         error = None
         if not success:
-            snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
-            rc_hint = self._parse_rc_from_text(combined) if combined else None
-            rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
-            line_no = self._parse_line_from_text(combined) if combined else None
-            message = "Stata error"
-            if tail_text and tail_text.strip():
-                for line in reversed(tail_text.splitlines()):
-                    if line.strip():
-                        message = line.strip()
-                        break
-            elif exc is not None:
-                message = str(exc).strip() or message
+            # Use robust extractor
+            msg, context = self._extract_error_and_context(combined, rc)
             error = ErrorEnvelope(
-                message=message,
-                rc=rc_final,
-                line=line_no,
+                message=msg,
+                context=context,
+                rc=rc,
                 command=code,
                 log_path=log_path,
-                snippet=snippet,
-                trace=trace or None,
+                snippet=combined[-800:] # Keep snippet for backward compat
             )
         duration = time.time() - start_time
@@ -876,7 +915,7 @@ class StataClient:
             buffering=1,
         )
         log_path = log_file.name
-        tail = TailBuffer(max_chars=8000)
+        tail = TailBuffer(max_chars=200000 if trace else 20000)
         tee = FileTeeIO(log_file, tail)
         # Inform the MCP client immediately where to read/tail the output.
@@ -887,7 +926,6 @@ class StataClient:
         command = f'do "{path_for_stata}"'
         # Capture initial graph state BEFORE execution starts
-        # This allows post-execution detection to identify new graphs
         if graph_cache:
             try:
                 graph_cache._initial_graphs = set(self.list_graphs())
@@ -902,6 +940,7 @@ class StataClient:
                 # Set execution flag to prevent recursive Stata calls
                 self._is_executing = True
                 try:
+                    from sfi import Scalar, SFIToolkit # Import SFI tools
                     with self._temp_cwd(cwd):
                         with self._redirect_io_streaming(tee, tee):
                             try:
@@ -914,15 +953,17 @@ class StataClient:
                                         tee.write(ret)
                                     except Exception:
                                         pass
+                                # ROBUST DETECTION & OUTPUT
+                                rc = self._read_return_code()
                             except Exception as e:
                                 exc = e
+                                if rc == 0: rc = 1
                             finally:
-                                rc = self._read_return_code()
                                 if trace:
-                                    try:
-                                        self.stata.run("set trace off")
-                                    except Exception:
-                                        pass
+                                    try: self.stata.run("set trace off")
+                                    except: pass
                 finally:
                     # Clear execution flag
                     self._is_executing = False
@@ -970,65 +1011,33 @@ class StataClient:
                 tee.close()
         # Robust post-execution graph detection and caching
-        # This is the ONLY place where graphs are detected and cached
-        # Runs after execution completes, when it's safe to call list_graphs()
         if graph_cache and graph_cache.auto_cache:
-            cached_graphs = []
             try:
-                # Get initial state (before execution)
+                # [Existing graph cache logic kept identical]
+                cached_graphs = []
                 initial_graphs = getattr(graph_cache, '_initial_graphs', set())
-                # Get current state (after execution)
-                logger.debug("Post-execution: Querying graph state via list_graphs()")
                 current_graphs = set(self.list_graphs())
-                # Detect new graphs (created during execution)
                 new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
                 if new_graphs:
                     logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
-                # Cache each detected graph
                 for graph_name in new_graphs:
                     try:
-                        logger.debug(f"Caching graph: {graph_name}")
                         cache_result = await anyio.to_thread.run_sync(
                             self.cache_graph_on_creation,
                             graph_name
                         )
                         if cache_result:
                             cached_graphs.append(graph_name)
                             graph_cache._cached_graphs.add(graph_name)
-                            logger.debug(f"Successfully cached graph: {graph_name}")
-                        else:
-                            logger.warning(f"Failed to cache graph: {graph_name}")
-                        # Trigger callbacks
                         for callback in graph_cache._cache_callbacks:
                             try:
                                 await anyio.to_thread.run_sync(callback, graph_name, cache_result)
-                            except Exception as e:
-                                logger.debug(f"Callback failed for {graph_name}: {e}")
+                            except Exception: pass
                     except Exception as e:
                         logger.error(f"Error caching graph {graph_name}: {e}")
-                        # Trigger callbacks with failure
-                        for callback in graph_cache._cache_callbacks:
-                            try:
-                                await anyio.to_thread.run_sync(callback, graph_name, False)
-                            except Exception:
-                                pass
-                # Check for dropped graphs (for completeness)
-                dropped_graphs = initial_graphs - current_graphs
-                if dropped_graphs:
-                    logger.debug(f"Graphs dropped during execution: {sorted(dropped_graphs)}")
-                    for graph_name in dropped_graphs:
-                        try:
-                            self.invalidate_graph_cache(graph_name)
-                        except Exception:
-                            pass
                 # Notify progress if graphs were cached
                 if cached_graphs and notify_progress:
@@ -1037,41 +1046,29 @@ class StataClient:
                         float(total_lines) if total_lines > 0 else 1,
                         f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
                     )
             except Exception as e:
                 logger.error(f"Post-execution graph detection failed: {e}")
         tail_text = tail.get_value()
+        log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
+        if log_tail and len(log_tail) > len(tail_text):
+            tail_text = log_tail
         combined = (tail_text or "") + (f"\n{exc}" if exc else "")
-        rc_hint = self._parse_rc_from_text(combined) if combined else None
-        if exc is None and rc_hint is not None and rc_hint != 0:
-            rc = rc_hint
-        if exc is None and rc_hint is None:
-            rc = 0 if rc is None or rc != 0 else rc
-        success = rc == 0 and exc is None
+        success = (rc == 0 and exc is None)
         error = None
         if not success:
-            snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
-            rc_hint = self._parse_rc_from_text(combined) if combined else None
-            rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
-            line_no = self._parse_line_from_text(combined) if combined else None
-            message = "Stata error"
-            if tail_text and tail_text.strip():
-                for line in reversed(tail_text.splitlines()):
-                    if line.strip():
-                        message = line.strip()
-                        break
-            elif exc is not None:
-                message = str(exc).strip() or message
+            # Robust extraction
+            msg, context = self._extract_error_and_context(combined, rc)
             error = ErrorEnvelope(
-                message=message,
-                rc=rc_final,
-                line=line_no,
+                message=msg,
+                context=context,
+                rc=rc,
                 command=command,
                 log_path=log_path,
-                snippet=snippet,
-                trace=trace or None,
+                snippet=combined[-800:]
             )
         duration = time.time() - start_time
@@ -1425,6 +1422,65 @@ class StataClient:
         return indices
+    def apply_sort(self, sort_spec: List[str]) -> None:
+        """
+        Apply sorting to the dataset using gsort.
+        Args:
+            sort_spec: List of variables to sort by, with optional +/- prefix.
+                      e.g., ["-price", "+mpg"] sorts by price descending, then mpg ascending.
+                      No prefix is treated as ascending (+).
+        Raises:
+            ValueError: If sort_spec is invalid or contains invalid variables
+            RuntimeError: If no data in memory or sort command fails
+        """
+        if not self._initialized:
+            self.init()
+        state = self.get_dataset_state()
+        if int(state.get("k", 0) or 0) == 0 and int(state.get("n", 0) or 0) == 0:
+            raise RuntimeError("No data in memory")
+        if not sort_spec or not isinstance(sort_spec, list):
+            raise ValueError("sort_spec must be a non-empty list")
+        # Validate all variables exist
+        var_map = self._get_var_index_map()
+        for spec in sort_spec:
+            if not isinstance(spec, str) or not spec:
+                raise ValueError(f"Invalid sort specification: {spec!r}")
+            # Extract variable name (remove +/- prefix if present)
+            varname = spec.lstrip("+-")
+            if not varname:
+                raise ValueError(f"Invalid sort specification: {spec!r}")
+            if varname not in var_map:
+                raise ValueError(f"Variable not found: {varname}")
+        # Build gsort command
+        # gsort uses - for descending, + or nothing for ascending
+        gsort_args = []
+        for spec in sort_spec:
+            if spec.startswith("-") or spec.startswith("+"):
+                gsort_args.append(spec)
+            else:
+                # No prefix means ascending, add + explicitly for clarity
+                gsort_args.append(f"+{spec}")
+        cmd = f"gsort {' '.join(gsort_args)}"
+        try:
+            result = self.run_command_structured(cmd, echo=False)
+            if not result.success:
+                error_msg = result.error.message if result.error else "Sort failed"
+                raise RuntimeError(f"Failed to sort dataset: {error_msg}")
+        except Exception as e:
+            if isinstance(e, RuntimeError):
+                raise
+            raise RuntimeError(f"Failed to sort dataset: {e}")
     def get_variable_details(self, varname: str) -> str:
         """Returns codebook/summary for a specific variable."""
         resp = self.run_command_structured(f"codebook {varname}", echo=True)
@@ -2247,68 +2303,72 @@ class StataClient:
             buffering=1,
         )
         log_path = log_file.name
-        tail = TailBuffer(max_chars=8000)
+        tail = TailBuffer(max_chars=200000 if trace else 20000)
         tee = FileTeeIO(log_file, tail)
         rc = -1
         with self._exec_lock:
-            with self._temp_cwd(cwd):
-                with self._redirect_io_streaming(tee, tee):
-                    try:
-                        if trace:
-                            self.stata.run("set trace on")
-                        ret = self.stata.run(command, echo=echo)
-                        # Some PyStata builds return output as a string rather than printing.
-                        if isinstance(ret, str) and ret:
-                            try:
-                                tee.write(ret)
-                            except Exception:
-                                pass
-                    except Exception as e:
-                        exc = e
-                    finally:
-                        rc = self._read_return_code()
-                        if trace:
-                            try:
-                                self.stata.run("set trace off")
-                            except Exception:
-                                pass
+            try:
+                from sfi import Scalar, SFIToolkit # Import SFI tools
+                with self._temp_cwd(cwd):
+                    with self._redirect_io_streaming(tee, tee):
+                        try:
+                            if trace:
+                                self.stata.run("set trace on")
+                            ret = self.stata.run(command, echo=echo)
+                            # Some PyStata builds return output as a string rather than printing.
+                            if isinstance(ret, str) and ret:
+                                try:
+                                    tee.write(ret)
+                                except Exception:
+                                    pass
+                        except Exception as e:
+                            exc = e
+                            rc = 1
+                        finally:
+                            if trace:
+                                try:
+                                    self.stata.run("set trace off")
+                                except Exception:
+                                    pass
+            except Exception as e:
+                # Outer catch in case imports or locks fail
+                exc = e
+                rc = 1
         tee.close()
         tail_text = tail.get_value()
+        log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
+        if log_tail and len(log_tail) > len(tail_text):
+            tail_text = log_tail
         combined = (tail_text or "") + (f"\n{exc}" if exc else "")
-        rc_hint = self._parse_rc_from_text(combined) if combined else None
-        if exc is None and rc_hint is not None and rc_hint != 0:
-            rc = rc_hint
-        if exc is None and rc_hint is None:
-            rc = 0 if rc is None or rc != 0 else rc
-        success = rc == 0 and exc is None
+        # Parse RC from log tail if no exception occurred
+        if rc == -1 and not exc:
+            parsed_rc = self._parse_rc_from_text(combined)
+            rc = parsed_rc if parsed_rc is not None else 0
+        elif exc:
+            # Try to parse RC from exception message
+            parsed_rc = self._parse_rc_from_text(str(exc))
+            if parsed_rc is not None:
+                rc = parsed_rc
+        success = (rc == 0 and exc is None)
         error = None
         if not success:
-            snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
-            rc_hint = self._parse_rc_from_text(combined) if combined else None
-            rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
-            line_no = self._parse_line_from_text(combined) if combined else None
-            message = "Stata error"
-            if tail_text and tail_text.strip():
-                for line in reversed(tail_text.splitlines()):
-                    if line.strip():
-                        message = line.strip()
-                        break
-            elif exc is not None:
-                message = str(exc).strip() or message
+            # Robust extraction
+            msg, context = self._extract_error_and_context(combined, rc)
             error = ErrorEnvelope(
-                message=message,
-                rc=rc_final,
-                line=line_no,
+                message=msg,
+                rc=rc,
+                snippet=context,
                 command=command,
-                log_path=log_path,
-                snippet=snippet,
-                trace=trace or None,
+                log_path=log_path
             )
         duration = time.time() - start_time
@@ -2383,5 +2443,4 @@ class StataClient:
                     error=result.error,
                 )
-        return result
+        return result