mcp-stata 1.7.6__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/stata_client.py CHANGED
@@ -1,22 +1,26 @@
1
- import base64
1
+ import asyncio
2
+ import inspect
2
3
  import json
3
4
  import logging
4
5
  import os
6
+ import platform
5
7
  import re
6
8
  import subprocess
7
9
  import sys
8
- import threading
9
- from importlib.metadata import PackageNotFoundError, version
10
10
  import tempfile
11
+ import threading
11
12
  import time
13
+ import uuid
12
14
  from contextlib import contextmanager
15
+ from importlib.metadata import PackageNotFoundError, version
13
16
  from io import StringIO
14
- from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
17
+ from typing import Any, Awaitable, Callable, Dict, Generator, List, Optional, Tuple
15
18
 
16
19
  import anyio
17
20
  from anyio import get_cancelled_exc_class
18
21
 
19
- from .discovery import find_stata_path
22
+ from .discovery import find_stata_candidates
23
+ from .config import MAX_LIMIT
20
24
  from .models import (
21
25
  CommandResponse,
22
26
  ErrorEnvelope,
@@ -33,6 +37,29 @@ from .graph_detector import StreamingGraphCache
33
37
 
34
38
  logger = logging.getLogger("mcp_stata")
35
39
 
40
+ _POLARS_AVAILABLE: Optional[bool] = None
41
+
42
+ def _check_polars_available() -> bool:
43
+ """
44
+ Check if Polars can be safely imported.
45
+ Must detect problematic platforms BEFORE attempting import,
46
+ since the crash is a fatal signal, not a catchable exception.
47
+ """
48
+ if sys.platform == "win32" and platform.machine().lower() in ("arm64", "aarch64"):
49
+ return False
50
+
51
+ try:
52
+ import polars # noqa: F401
53
+ return True
54
+ except ImportError:
55
+ return False
56
+
57
+
58
+ def _get_polars_available() -> bool:
59
+ global _POLARS_AVAILABLE
60
+ if _POLARS_AVAILABLE is None:
61
+ _POLARS_AVAILABLE = _check_polars_available()
62
+ return _POLARS_AVAILABLE
36
63
 
37
64
  # ============================================================================
38
65
  # MODULE-LEVEL DISCOVERY CACHE
@@ -40,26 +67,30 @@ logger = logging.getLogger("mcp_stata")
40
67
  # This cache ensures Stata discovery runs exactly once per process lifetime
41
68
  _discovery_lock = threading.Lock()
42
69
  _discovery_result: Optional[Tuple[str, str]] = None # (path, edition)
70
+ _discovery_candidates: Optional[List[Tuple[str, str]]] = None
43
71
  _discovery_attempted = False
44
72
  _discovery_error: Optional[Exception] = None
45
73
 
46
74
 
47
- def _get_discovered_stata() -> Tuple[str, str]:
75
+ def _get_discovery_candidates() -> List[Tuple[str, str]]:
48
76
  """
49
- Get the discovered Stata path and edition, running discovery only once.
77
+ Get ordered discovery candidates, running discovery only once.
50
78
 
51
79
  Returns:
52
- Tuple of (stata_executable_path, edition)
80
+ List of (stata_executable_path, edition) ordered by preference.
53
81
 
54
82
  Raises:
55
83
  RuntimeError: If Stata discovery fails
56
84
  """
57
- global _discovery_result, _discovery_attempted, _discovery_error
85
+ global _discovery_result, _discovery_candidates, _discovery_attempted, _discovery_error
58
86
 
59
87
  with _discovery_lock:
60
88
  # If we've already successfully discovered Stata, return cached result
61
89
  if _discovery_result is not None:
62
- return _discovery_result
90
+ return _discovery_candidates or [_discovery_result]
91
+
92
+ if _discovery_candidates is not None:
93
+ return _discovery_candidates
63
94
 
64
95
  # If we've already attempted and failed, re-raise the cached error
65
96
  if _discovery_attempted and _discovery_error is not None:
@@ -83,13 +114,17 @@ def _get_discovered_stata() -> Tuple[str, str]:
83
114
  logger.info("mcp-stata version: %s", pkg_version)
84
115
 
85
116
  # Run discovery
86
- stata_exec_path, edition = find_stata_path()
117
+ candidates = find_stata_candidates()
87
118
 
88
119
  # Cache the successful result
89
- _discovery_result = (stata_exec_path, edition)
90
- logger.info("Discovery found Stata at: %s (%s)", stata_exec_path, edition)
120
+ _discovery_candidates = candidates
121
+ if candidates:
122
+ _discovery_result = candidates[0]
123
+ logger.info("Discovery found Stata at: %s (%s)", _discovery_result[0], _discovery_result[1])
124
+ else:
125
+ raise FileNotFoundError("No Stata candidates discovered")
91
126
 
92
- return _discovery_result
127
+ return candidates
93
128
 
94
129
  except FileNotFoundError as e:
95
130
  _discovery_error = e
@@ -102,12 +137,22 @@ def _get_discovered_stata() -> Tuple[str, str]:
102
137
  ) from e
103
138
 
104
139
 
140
+ def _get_discovered_stata() -> Tuple[str, str]:
141
+ """
142
+ Preserve existing API: return the highest-priority discovered Stata candidate.
143
+ """
144
+ candidates = _get_discovery_candidates()
145
+ if not candidates:
146
+ raise RuntimeError("Stata binary not found: no candidates discovered")
147
+ return candidates[0]
148
+
149
+
105
150
  class StataClient:
106
151
  _initialized = False
107
152
  _exec_lock: threading.Lock
108
153
  _cache_init_lock = threading.Lock() # Class-level lock for cache initialization
109
154
  _is_executing = False # Flag to prevent recursive Stata calls
110
- MAX_DATA_ROWS = 500
155
+ MAX_DATA_ROWS = MAX_LIMIT
111
156
  MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Maximum graph exports (~50MB)
112
157
  MAX_CACHE_SIZE = 100 # Maximum number of graphs to cache
113
158
  MAX_CACHE_BYTES = 500 * 1024 * 1024 # Maximum cache size in bytes (~500MB)
@@ -115,7 +160,7 @@ class StataClient:
115
160
 
116
161
  def __new__(cls):
117
162
  inst = super(StataClient, cls).__new__(cls)
118
- inst._exec_lock = threading.Lock()
163
+ inst._exec_lock = threading.RLock()
119
164
  inst._is_executing = False
120
165
  return inst
121
166
 
@@ -129,38 +174,6 @@ class StataClient:
129
174
  finally:
130
175
  sys.stdout, sys.stderr = backup_stdout, backup_stderr
131
176
 
132
- def _select_stata_error_message(self, text: str, fallback: str) -> str:
133
- """
134
- Helper for tests and legacy callers to extract the clean error message.
135
- """
136
- if not text:
137
- return fallback
138
-
139
- lines = text.splitlines()
140
- trace_pattern = re.compile(r'^\s*[-=.]')
141
- noise_pattern = re.compile(r'^(?:\}|\{txt\}|\{com\}|end of do-file)')
142
-
143
- for line in reversed(lines):
144
- stripped = line.strip()
145
- if not stripped:
146
- continue
147
- if trace_pattern.match(line):
148
- continue
149
- if noise_pattern.match(stripped):
150
- continue
151
- if stripped.startswith("r(") and stripped.endswith(");"):
152
- # If we hit r(123); we might want the line ABOVE it if it's not noise
153
- continue
154
-
155
- # Preserve SMCL tags
156
- return stripped
157
-
158
- # If we couldn't find a better message, try to find r(N);
159
- match = re.search(r"r\(\d+\);", text)
160
- if match:
161
- return match.group(0)
162
-
163
- return fallback
164
177
 
165
178
  @staticmethod
166
179
  def _stata_quote(value: str) -> str:
@@ -181,6 +194,473 @@ class StataClient:
181
194
  finally:
182
195
  sys.stdout, sys.stderr = backup_stdout, backup_stderr
183
196
 
197
+ @staticmethod
198
+ def _safe_unlink(path: str) -> None:
199
+ if not path:
200
+ return
201
+ try:
202
+ if os.path.exists(path):
203
+ os.unlink(path)
204
+ except Exception:
205
+ pass
206
+
207
+ def _create_smcl_log_path(self, *, prefix: str = "mcp_smcl_", max_hex: Optional[int] = None) -> str:
208
+ hex_id = uuid.uuid4().hex if max_hex is None else uuid.uuid4().hex[:max_hex]
209
+ smcl_path = os.path.join(tempfile.gettempdir(), f"{prefix}{hex_id}.smcl")
210
+ self._safe_unlink(smcl_path)
211
+ return smcl_path
212
+
213
+ @staticmethod
214
+ def _make_smcl_log_name() -> str:
215
+ return f"_mcp_smcl_{uuid.uuid4().hex[:8]}"
216
+
217
+ def _open_smcl_log(self, smcl_path: str, log_name: str, *, quiet: bool = False) -> bool:
218
+ cmd = f"{'quietly ' if quiet else ''}log using \"{smcl_path}\", replace smcl name({log_name})"
219
+ for attempt in range(4):
220
+ try:
221
+ self.stata.run(cmd, echo=False)
222
+ return True
223
+ except Exception:
224
+ if attempt < 3:
225
+ time.sleep(0.1)
226
+ return False
227
+
228
+ def _close_smcl_log(self, log_name: str) -> None:
229
+ try:
230
+ self.stata.run(f"capture log close {log_name}", echo=False)
231
+ except Exception:
232
+ pass
233
+
234
+ def _restore_results_from_hold(self, hold_attr: str) -> None:
235
+ if not hasattr(self, hold_attr):
236
+ return
237
+ hold_name = getattr(self, hold_attr)
238
+ try:
239
+ self.stata.run(f"capture _return restore {hold_name}", echo=False)
240
+ self._last_results = self.get_stored_results(force_fresh=True)
241
+ except Exception:
242
+ pass
243
+ finally:
244
+ try:
245
+ delattr(self, hold_attr)
246
+ except Exception:
247
+ pass
248
+
249
+ def _create_streaming_log(self, *, trace: bool) -> tuple[tempfile.NamedTemporaryFile, str, TailBuffer, FileTeeIO]:
250
+ log_file = tempfile.NamedTemporaryFile(
251
+ prefix="mcp_stata_",
252
+ suffix=".log",
253
+ delete=False,
254
+ mode="w",
255
+ encoding="utf-8",
256
+ errors="replace",
257
+ buffering=1,
258
+ )
259
+ log_path = log_file.name
260
+ tail = TailBuffer(max_chars=200000 if trace else 20000)
261
+ tee = FileTeeIO(log_file, tail)
262
+ return log_file, log_path, tail, tee
263
+
264
+ def _init_streaming_graph_cache(
265
+ self,
266
+ auto_cache_graphs: bool,
267
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]],
268
+ notify_log: Callable[[str], Awaitable[None]],
269
+ ) -> Optional[StreamingGraphCache]:
270
+ if not auto_cache_graphs:
271
+ return None
272
+ graph_cache = StreamingGraphCache(self, auto_cache=True)
273
+ graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
274
+ graph_cache.add_cache_callback(graph_cache_callback)
275
+ return graph_cache
276
+
277
+ def _capture_graph_state(
278
+ self,
279
+ graph_cache: Optional[StreamingGraphCache],
280
+ emit_graph_ready: bool,
281
+ ) -> Optional[dict[str, str]]:
282
+ # Capture initial graph state BEFORE execution starts
283
+ if graph_cache:
284
+ try:
285
+ graph_cache._initial_graphs = set(self.list_graphs(force_refresh=True))
286
+ logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
287
+ except Exception as e:
288
+ logger.debug(f"Failed to capture initial graph state: {e}")
289
+ graph_cache._initial_graphs = set()
290
+
291
+ graph_ready_initial = None
292
+ if emit_graph_ready:
293
+ try:
294
+ graph_ready_initial = {}
295
+ for graph_name in self.list_graphs(force_refresh=True):
296
+ graph_ready_initial[graph_name] = self._get_graph_signature(graph_name)
297
+ logger.debug("Graph-ready initial state captured: %s", set(graph_ready_initial))
298
+ except Exception as e:
299
+ logger.debug("Failed to capture graph-ready state: %s", e)
300
+ graph_ready_initial = {}
301
+ return graph_ready_initial
302
+
303
+ async def _cache_new_graphs(
304
+ self,
305
+ graph_cache: Optional[StreamingGraphCache],
306
+ *,
307
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]],
308
+ total_lines: int,
309
+ completed_label: str,
310
+ ) -> None:
311
+ if not graph_cache or not graph_cache.auto_cache:
312
+ return
313
+ try:
314
+ cached_graphs = []
315
+ initial_graphs = getattr(graph_cache, "_initial_graphs", set())
316
+ current_graphs = set(self.list_graphs(force_refresh=True))
317
+ new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
318
+
319
+ if new_graphs:
320
+ logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
321
+
322
+ for graph_name in new_graphs:
323
+ try:
324
+ cache_result = await anyio.to_thread.run_sync(
325
+ self.cache_graph_on_creation,
326
+ graph_name,
327
+ )
328
+ if cache_result:
329
+ cached_graphs.append(graph_name)
330
+ graph_cache._cached_graphs.add(graph_name)
331
+
332
+ for callback in graph_cache._cache_callbacks:
333
+ try:
334
+ result = callback(graph_name, cache_result)
335
+ if inspect.isawaitable(result):
336
+ await result
337
+ except Exception:
338
+ pass
339
+ except Exception as e:
340
+ logger.error(f"Error caching graph {graph_name}: {e}")
341
+
342
+ if cached_graphs and notify_progress:
343
+ await notify_progress(
344
+ float(total_lines) if total_lines > 0 else 1,
345
+ float(total_lines) if total_lines > 0 else 1,
346
+ f"{completed_label} completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}",
347
+ )
348
+ except Exception as e:
349
+ logger.error(f"Post-execution graph detection failed: {e}")
350
+
351
+ def _emit_graph_ready_task(
352
+ self,
353
+ *,
354
+ emit_graph_ready: bool,
355
+ graph_ready_initial: Optional[dict[str, str]],
356
+ notify_log: Callable[[str], Awaitable[None]],
357
+ graph_ready_task_id: Optional[str],
358
+ graph_ready_format: str,
359
+ ) -> None:
360
+ if emit_graph_ready and graph_ready_initial is not None:
361
+ try:
362
+ asyncio.create_task(
363
+ self._emit_graph_ready_events(
364
+ graph_ready_initial,
365
+ notify_log,
366
+ graph_ready_task_id,
367
+ graph_ready_format,
368
+ )
369
+ )
370
+ except Exception as e:
371
+ logger.warning("graph_ready emission failed to start: %s", e)
372
+
373
+ async def _stream_smcl_log(
374
+ self,
375
+ *,
376
+ smcl_path: str,
377
+ notify_log: Callable[[str], Awaitable[None]],
378
+ done: anyio.Event,
379
+ on_chunk: Optional[Callable[[str], Awaitable[None]]] = None,
380
+ ) -> None:
381
+ last_pos = 0
382
+ # Wait for Stata to create the SMCL file (placeholder removed to avoid locks)
383
+ while not done.is_set() and not os.path.exists(smcl_path):
384
+ await anyio.sleep(0.05)
385
+
386
+ try:
387
+ def _read_content() -> str:
388
+ try:
389
+ with open(smcl_path, "r", encoding="utf-8", errors="replace") as f:
390
+ f.seek(last_pos)
391
+ return f.read()
392
+ except PermissionError:
393
+ if os.name == "nt":
394
+ try:
395
+ res = subprocess.run(f'type "{smcl_path}"', shell=True, capture_output=True)
396
+ full_content = res.stdout.decode("utf-8", errors="replace")
397
+ if len(full_content) > last_pos:
398
+ return full_content[last_pos:]
399
+ return ""
400
+ except Exception:
401
+ return ""
402
+ raise
403
+ except FileNotFoundError:
404
+ return ""
405
+
406
+ while not done.is_set():
407
+ chunk = await anyio.to_thread.run_sync(_read_content)
408
+ if chunk:
409
+ last_pos += len(chunk)
410
+ await notify_log(chunk)
411
+ if on_chunk is not None:
412
+ await on_chunk(chunk)
413
+ await anyio.sleep(0.05)
414
+
415
+ chunk = await anyio.to_thread.run_sync(_read_content)
416
+ if chunk:
417
+ last_pos += len(chunk)
418
+ await notify_log(chunk)
419
+ if on_chunk is not None:
420
+ await on_chunk(chunk)
421
+
422
+ except Exception as e:
423
+ logger.warning(f"Log streaming failed: {e}")
424
+
425
+ def _run_streaming_blocking(
426
+ self,
427
+ *,
428
+ command: str,
429
+ tee: FileTeeIO,
430
+ cwd: Optional[str],
431
+ trace: bool,
432
+ echo: bool,
433
+ smcl_path: str,
434
+ smcl_log_name: str,
435
+ hold_attr: str,
436
+ require_smcl_log: bool = False,
437
+ ) -> tuple[int, Optional[Exception]]:
438
+ rc = -1
439
+ exc: Optional[Exception] = None
440
+ with self._exec_lock:
441
+ self._is_executing = True
442
+ try:
443
+ from sfi import Scalar, SFIToolkit # Import SFI tools
444
+ with self._temp_cwd(cwd):
445
+ log_opened = self._open_smcl_log(smcl_path, smcl_log_name)
446
+ if require_smcl_log and not log_opened:
447
+ exc = RuntimeError("Failed to open SMCL log")
448
+ rc = 1
449
+ if exc is None:
450
+ try:
451
+ with self._redirect_io_streaming(tee, tee):
452
+ try:
453
+ if trace:
454
+ self.stata.run("set trace on")
455
+ ret = self.stata.run(command, echo=echo)
456
+
457
+ setattr(self, hold_attr, f"mcp_hold_{uuid.uuid4().hex[:8]}")
458
+ self.stata.run(
459
+ f"capture _return hold {getattr(self, hold_attr)}",
460
+ echo=False,
461
+ )
462
+
463
+ if isinstance(ret, str) and ret:
464
+ try:
465
+ tee.write(ret)
466
+ except Exception:
467
+ pass
468
+ try:
469
+ rc = self._get_rc_from_scalar(Scalar)
470
+ except Exception:
471
+ pass
472
+ except Exception as e:
473
+ exc = e
474
+ if rc in (-1, 0):
475
+ rc = 1
476
+ finally:
477
+ if trace:
478
+ try:
479
+ self.stata.run("set trace off")
480
+ except Exception:
481
+ pass
482
+ finally:
483
+ self._close_smcl_log(smcl_log_name)
484
+ self._restore_results_from_hold(hold_attr)
485
+ return rc, exc
486
+ # If we get here, SMCL log failed and we're required to stop.
487
+ return rc, exc
488
+ finally:
489
+ self._is_executing = False
490
+ return rc, exc
491
+
492
+ def _resolve_do_file_path(
493
+ self,
494
+ path: str,
495
+ cwd: Optional[str],
496
+ ) -> tuple[Optional[str], Optional[str], Optional[CommandResponse]]:
497
+ if cwd is not None and not os.path.isdir(cwd):
498
+ return None, None, CommandResponse(
499
+ command=f'do "{path}"',
500
+ rc=601,
501
+ stdout="",
502
+ stderr=None,
503
+ success=False,
504
+ error=ErrorEnvelope(
505
+ message=f"cwd not found: {cwd}",
506
+ rc=601,
507
+ command=path,
508
+ ),
509
+ )
510
+
511
+ effective_path = path
512
+ if cwd is not None and not os.path.isabs(path):
513
+ effective_path = os.path.abspath(os.path.join(cwd, path))
514
+
515
+ if not os.path.exists(effective_path):
516
+ return None, None, CommandResponse(
517
+ command=f'do "{effective_path}"',
518
+ rc=601,
519
+ stdout="",
520
+ stderr=None,
521
+ success=False,
522
+ error=ErrorEnvelope(
523
+ message=f"Do-file not found: {effective_path}",
524
+ rc=601,
525
+ command=effective_path,
526
+ ),
527
+ )
528
+
529
+ path_for_stata = effective_path.replace("\\", "/")
530
+ command = f'do "{path_for_stata}"'
531
+ return effective_path, command, None
532
+
533
+ @contextmanager
534
+ def _smcl_log_capture(self) -> "Generator[Tuple[str, str], None, None]":
535
+ """
536
+ Context manager that wraps command execution in a named SMCL log.
537
+
538
+ This runs alongside any user logs (named logs can coexist).
539
+ Yields (log_name, log_path) tuple for use within the context.
540
+ The SMCL file is NOT deleted automatically - caller should clean up.
541
+
542
+ Usage:
543
+ with self._smcl_log_capture() as (log_name, smcl_path):
544
+ self.stata.run(cmd)
545
+ # After context, read smcl_path for raw SMCL output
546
+ """
547
+ # Use a unique name but DO NOT join start with mkstemp to avoid existing file locks.
548
+ # Stata will create the file.
549
+ smcl_path = self._create_smcl_log_path()
550
+ # Unique log name to avoid collisions with user logs
551
+ log_name = self._make_smcl_log_name()
552
+
553
+ try:
554
+ # Open named SMCL log (quietly to avoid polluting output)
555
+ log_opened = self._open_smcl_log(smcl_path, log_name, quiet=True)
556
+ if not log_opened:
557
+ # Still yield, consumer might see empty file or handle error,
558
+ # but we can't do much if Stata refuses to log.
559
+ pass
560
+
561
+ yield log_name, smcl_path
562
+ finally:
563
+ # Always close our named log
564
+ self._close_smcl_log(log_name)
565
+
566
+ def _read_smcl_file(self, path: str) -> str:
567
+ """Read SMCL file contents, handling encoding issues and Windows file locks."""
568
+ try:
569
+ with open(path, 'r', encoding='utf-8', errors='replace') as f:
570
+ return f.read()
571
+ except PermissionError:
572
+ if os.name == "nt":
573
+ # Windows Fallback: Try to use 'type' command to bypass exclusive lock
574
+ try:
575
+ res = subprocess.run(f'type "{path}"', shell=True, capture_output=True)
576
+ if res.returncode == 0:
577
+ return res.stdout.decode('utf-8', errors='replace')
578
+ except Exception as e:
579
+ logger.debug(f"Combined fallback read failed: {e}")
580
+ logger.warning(f"Failed to read SMCL file {path} due to lock")
581
+ return ""
582
+ except Exception as e:
583
+ logger.warning(f"Failed to read SMCL file {path}: {e}")
584
+ return ""
585
+
586
+ def _extract_error_from_smcl(self, smcl_content: str, rc: int) -> Tuple[str, str]:
587
+ """
588
+ Extract error message and context from raw SMCL output.
589
+
590
+ Uses {err} tags as the authoritative source for error detection.
591
+
592
+ Returns:
593
+ Tuple of (error_message, context_string)
594
+ """
595
+ if not smcl_content:
596
+ return f"Stata error r({rc})", ""
597
+
598
+ lines = smcl_content.splitlines()
599
+
600
+ # Search backwards for {err} tags - they indicate error lines
601
+ error_lines = []
602
+ error_start_idx = -1
603
+
604
+ for i in range(len(lines) - 1, -1, -1):
605
+ line = lines[i]
606
+ if '{err}' in line:
607
+ if error_start_idx == -1:
608
+ error_start_idx = i
609
+ # Walk backwards to find consecutive {err} lines
610
+ j = i
611
+ while j >= 0 and '{err}' in lines[j]:
612
+ error_lines.insert(0, lines[j])
613
+ j -= 1
614
+ break
615
+
616
+ if error_lines:
617
+ # Clean SMCL tags from error message
618
+ clean_lines = []
619
+ for line in error_lines:
620
+ # Remove SMCL tags but keep the text content
621
+ cleaned = re.sub(r'\{[^}]*\}', '', line).strip()
622
+ if cleaned:
623
+ clean_lines.append(cleaned)
624
+
625
+ error_msg = " ".join(clean_lines) or f"Stata error r({rc})"
626
+
627
+ # Context is everything from error start to end
628
+ context_start = max(0, error_start_idx - 5) # Include 5 lines before error
629
+ context = "\n".join(lines[context_start:])
630
+
631
+ return error_msg, context
632
+
633
+ # Fallback: no {err} found, return last 30 lines as context
634
+ context_start = max(0, len(lines) - 30)
635
+ context = "\n".join(lines[context_start:])
636
+
637
+ return f"Stata error r({rc})", context
638
+
639
+ def _parse_rc_from_smcl(self, smcl_content: str) -> Optional[int]:
640
+ """Parse return code from SMCL content using specific structural patterns."""
641
+ if not smcl_content:
642
+ return None
643
+
644
+ # 1. Primary check: SMCL search tag {search r(N), ...}
645
+ # This is the most authoritative interactive indicator
646
+ matches = list(re.finditer(r'\{search r\((\d+)\)', smcl_content))
647
+ if matches:
648
+ try:
649
+ return int(matches[-1].group(1))
650
+ except Exception:
651
+ pass
652
+
653
+ # 2. Secondary check: Standalone r(N); pattern
654
+ # This appears at the end of command blocks
655
+ matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', smcl_content))
656
+ if matches:
657
+ try:
658
+ return int(matches[-1].group(1))
659
+ except Exception:
660
+ pass
661
+
662
+ return None
663
+
184
664
  @staticmethod
185
665
  def _create_graph_cache_callback(on_graph_cached, notify_log):
186
666
  """Create a standardized graph cache callback with proper error handling."""
@@ -203,6 +683,142 @@ class StataClient:
203
683
 
204
684
  return graph_cache_callback
205
685
 
686
+ def _get_cached_graph_path(self, graph_name: str) -> Optional[str]:
687
+ if not hasattr(self, "_cache_lock") or not hasattr(self, "_preemptive_cache"):
688
+ return None
689
+ try:
690
+ with self._cache_lock:
691
+ return self._preemptive_cache.get(graph_name)
692
+ except Exception:
693
+ return None
694
+
695
+ async def _emit_graph_ready_for_graphs(
696
+ self,
697
+ graph_names: List[str],
698
+ *,
699
+ notify_log: Callable[[str], Awaitable[None]],
700
+ task_id: Optional[str],
701
+ export_format: str,
702
+ graph_ready_initial: Optional[dict[str, str]],
703
+ ) -> None:
704
+ if not graph_names:
705
+ return
706
+ fmt = (export_format or "svg").strip().lower()
707
+ for graph_name in graph_names:
708
+ signature = self._get_graph_signature(graph_name)
709
+ if graph_ready_initial is not None:
710
+ previous = graph_ready_initial.get(graph_name)
711
+ if previous is not None and previous == signature:
712
+ continue
713
+ try:
714
+ export_path = None
715
+ if fmt == "svg":
716
+ export_path = self._get_cached_graph_path(graph_name)
717
+ if not export_path:
718
+ export_path = await anyio.to_thread.run_sync(
719
+ lambda: self.export_graph(graph_name, format=fmt)
720
+ )
721
+ payload = {
722
+ "event": "graph_ready",
723
+ "task_id": task_id,
724
+ "graph": {
725
+ "name": graph_name,
726
+ "path": export_path,
727
+ "label": graph_name,
728
+ },
729
+ }
730
+ await notify_log(json.dumps(payload))
731
+ if graph_ready_initial is not None:
732
+ graph_ready_initial[graph_name] = signature
733
+ except Exception as e:
734
+ logger.warning("graph_ready export failed for %s: %s", graph_name, e)
735
+
736
+ async def _maybe_cache_graphs_on_chunk(
737
+ self,
738
+ *,
739
+ graph_cache: Optional[StreamingGraphCache],
740
+ emit_graph_ready: bool,
741
+ notify_log: Callable[[str], Awaitable[None]],
742
+ graph_ready_task_id: Optional[str],
743
+ graph_ready_format: str,
744
+ graph_ready_initial: Optional[dict[str, str]],
745
+ last_check: List[float],
746
+ ) -> None:
747
+ if not graph_cache or not graph_cache.auto_cache:
748
+ return
749
+ if self._is_executing:
750
+ return
751
+ now = time.monotonic()
752
+ if last_check and now - last_check[0] < 0.25:
753
+ return
754
+ if last_check:
755
+ last_check[0] = now
756
+ try:
757
+ cached_names = await graph_cache.cache_detected_graphs_with_pystata()
758
+ except Exception as e:
759
+ logger.debug("graph_ready polling failed: %s", e)
760
+ return
761
+ if emit_graph_ready and cached_names:
762
+ await self._emit_graph_ready_for_graphs(
763
+ cached_names,
764
+ notify_log=notify_log,
765
+ task_id=graph_ready_task_id,
766
+ export_format=graph_ready_format,
767
+ graph_ready_initial=graph_ready_initial,
768
+ )
769
+
770
+ async def _emit_graph_ready_events(
771
+ self,
772
+ initial_graphs: dict[str, str],
773
+ notify_log: Callable[[str], Awaitable[None]],
774
+ task_id: Optional[str],
775
+ export_format: str,
776
+ ) -> None:
777
+ try:
778
+ current_graphs = list(self.list_graphs(force_refresh=True))
779
+ except Exception as e:
780
+ logger.warning("graph_ready: list_graphs failed: %s", e)
781
+ return
782
+
783
+ if not current_graphs:
784
+ return
785
+
786
+ for graph_name in current_graphs:
787
+ signature = self._get_graph_signature(graph_name)
788
+ previous = initial_graphs.get(graph_name)
789
+ if previous is not None and previous == signature:
790
+ continue
791
+ try:
792
+ export_path = await anyio.to_thread.run_sync(
793
+ lambda: self.export_graph(graph_name, format=export_format)
794
+ )
795
+ payload = {
796
+ "event": "graph_ready",
797
+ "task_id": task_id,
798
+ "graph": {
799
+ "name": graph_name,
800
+ "path": export_path,
801
+ "label": graph_name,
802
+ },
803
+ }
804
+ await notify_log(json.dumps(payload))
805
+ initial_graphs[graph_name] = signature
806
+ except Exception as e:
807
+ logger.warning("graph_ready export failed for %s: %s", graph_name, e)
808
+
809
+ def _get_graph_signature(self, graph_name: str) -> str:
810
+ if not graph_name:
811
+ return ""
812
+ try:
813
+ response = self.exec_lightweight(f"graph describe {graph_name}")
814
+ if response.success and response.stdout:
815
+ return response.stdout
816
+ if response.stderr:
817
+ return response.stderr
818
+ except Exception:
819
+ return ""
820
+ return ""
821
+
206
822
  def _request_break_in(self) -> None:
207
823
  """
208
824
  Attempt to interrupt a running Stata command when cancellation is requested.
@@ -272,72 +888,189 @@ class StataClient:
272
888
  finally:
273
889
  os.chdir(prev)
274
890
 
891
+ @contextmanager
892
+ def _safe_redirect_fds(self):
893
+ """Redirects fd 1 (stdout) to fd 2 (stderr) at the OS level."""
894
+ # Save original stdout fd
895
+ try:
896
+ stdout_fd = os.dup(1)
897
+ except Exception:
898
+ # Fallback if we can't dup (e.g. strange environment)
899
+ yield
900
+ return
901
+
902
+ try:
903
+ # Redirect OS-level stdout to stderr
904
+ os.dup2(2, 1)
905
+ yield
906
+ finally:
907
+ # Restore stdout
908
+ try:
909
+ os.dup2(stdout_fd, 1)
910
+ os.close(stdout_fd)
911
+ except Exception:
912
+ pass
913
+
275
914
  def init(self):
276
915
  """Initializes usage of pystata using cached discovery results."""
277
916
  if self._initialized:
278
917
  return
279
918
 
919
+ # Suppress any non-UTF8 banner output from PyStata on stdout, which breaks MCP stdio transport
920
+ from contextlib import redirect_stdout, redirect_stderr
921
+
280
922
  try:
281
923
  import stata_setup
282
924
 
283
- # Get discovered Stata path (cached from first call)
284
- stata_exec_path, edition = _get_discovered_stata()
285
-
286
- candidates = []
287
-
288
- # Prefer the binary directory first (documented input for stata_setup)
289
- bin_dir = os.path.dirname(stata_exec_path)
290
- if bin_dir:
291
- candidates.append(bin_dir)
292
-
293
- # 2. App Bundle: .../StataMP.app (macOS only)
294
- curr = bin_dir
295
- app_bundle = None
296
- while len(curr) > 1:
297
- if curr.endswith(".app"):
298
- app_bundle = curr
299
- break
300
- parent = os.path.dirname(curr)
301
- if parent == curr: # Reached root directory, prevent infinite loop on Windows
302
- break
303
- curr = parent
304
-
305
- if app_bundle:
306
- candidates.insert(0, os.path.dirname(app_bundle))
307
- candidates.insert(1, app_bundle)
925
+ # Get discovered Stata paths (cached from first call)
926
+ discovery_candidates = _get_discovery_candidates()
308
927
 
309
- # Deduplicate preserving order
310
- seen = set()
311
- deduped = []
312
- for c in candidates:
313
- if c in seen:
314
- continue
315
- seen.add(c)
316
- deduped.append(c)
317
- candidates = deduped
928
+ # Diagnostic: force faulthandler to output to stderr for C crashes
929
+ import faulthandler
930
+ faulthandler.enable(file=sys.stderr)
931
+ import subprocess
318
932
 
319
933
  success = False
320
- for path in candidates:
321
- try:
322
- stata_setup.config(path, edition)
323
- success = True
324
- logger.debug("stata_setup.config succeeded with path: %s", path)
934
+ last_error = None
935
+ chosen_exec: Optional[Tuple[str, str]] = None
936
+
937
+ for stata_exec_path, edition in discovery_candidates:
938
+ candidates = []
939
+ # Prefer the binary directory first (documented input for stata_setup)
940
+ bin_dir = os.path.dirname(stata_exec_path)
941
+
942
+ # 2. App Bundle: .../StataMP.app (macOS only)
943
+ curr = bin_dir
944
+ app_bundle = None
945
+ while len(curr) > 1:
946
+ if curr.endswith(".app"):
947
+ app_bundle = curr
948
+ break
949
+ parent = os.path.dirname(curr)
950
+ if parent == curr:
951
+ break
952
+ curr = parent
953
+
954
+ ordered_candidates = []
955
+ if bin_dir:
956
+ ordered_candidates.append(bin_dir)
957
+ if app_bundle:
958
+ ordered_candidates.append(app_bundle)
959
+ parent_dir = os.path.dirname(app_bundle)
960
+ if parent_dir not in ordered_candidates:
961
+ ordered_candidates.append(parent_dir)
962
+
963
+ # Deduplicate preserving order
964
+ seen = set()
965
+ candidates = []
966
+ for c in ordered_candidates:
967
+ if c not in seen:
968
+ seen.add(c)
969
+ candidates.append(c)
970
+
971
+ for path in candidates:
972
+ try:
973
+ # 1. Pre-flight check in a subprocess to capture hard exits/crashes
974
+ sys.stderr.write(f"[mcp_stata] DEBUG: Pre-flight check for path '{path}'\n")
975
+ sys.stderr.flush()
976
+
977
+ preflight_code = f"""
978
+ import sys
979
+ import stata_setup
980
+ from contextlib import redirect_stdout, redirect_stderr
981
+ with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr):
982
+ try:
983
+ stata_setup.config({repr(path)}, {repr(edition)})
984
+ from pystata import stata
985
+ stata.run('about', echo=True)
986
+ print('PREFLIGHT_OK')
987
+ except Exception as e:
988
+ print(f'PREFLIGHT_FAIL: {{e}}', file=sys.stderr)
989
+ sys.exit(1)
990
+ """
991
+
992
+ try:
993
+ res = subprocess.run(
994
+ [sys.executable, "-c", preflight_code],
995
+ capture_output=True, text=True, timeout=30
996
+ )
997
+ if res.returncode != 0:
998
+ sys.stderr.write(f"[mcp_stata] Pre-flight failed (rc={res.returncode}) for '{path}'\n")
999
+ if res.stdout.strip():
1000
+ sys.stderr.write(f"--- Pre-flight stdout ---\n{res.stdout.strip()}\n")
1001
+ if res.stderr.strip():
1002
+ sys.stderr.write(f"--- Pre-flight stderr ---\n{res.stderr.strip()}\n")
1003
+ sys.stderr.flush()
1004
+ last_error = f"Pre-flight failed: {res.stdout.strip()} {res.stderr.strip()}"
1005
+ continue
1006
+ else:
1007
+ sys.stderr.write(f"[mcp_stata] Pre-flight succeeded for '{path}'. Proceeding to in-process init.\n")
1008
+ sys.stderr.flush()
1009
+ except Exception as pre_e:
1010
+ sys.stderr.write(f"[mcp_stata] Pre-flight execution error for '{path}': {repr(pre_e)}\n")
1011
+ sys.stderr.flush()
1012
+ last_error = pre_e
1013
+ continue
1014
+
1015
+ msg = f"[mcp_stata] DEBUG: In-process stata_setup.config('{path}', '{edition}')\n"
1016
+ sys.stderr.write(msg)
1017
+ sys.stderr.flush()
1018
+ # Redirect both sys.stdout/err AND the raw fds to our stderr pipe.
1019
+ with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
1020
+ stata_setup.config(path, edition)
1021
+
1022
+ sys.stderr.write(f"[mcp_stata] DEBUG: stata_setup.config succeeded for path: {path}\n")
1023
+ sys.stderr.flush()
1024
+ success = True
1025
+ chosen_exec = (stata_exec_path, edition)
1026
+ logger.info("stata_setup.config succeeded with path: %s", path)
1027
+ break
1028
+ except BaseException as e:
1029
+ last_error = e
1030
+ sys.stderr.write(f"[mcp_stata] WARNING: In-process stata_setup.config caught: {repr(e)}\n")
1031
+ sys.stderr.flush()
1032
+ logger.warning("stata_setup.config failed for path '%s': %s", path, e)
1033
+ if isinstance(e, SystemExit):
1034
+ break
1035
+ continue
1036
+
1037
+ if success:
1038
+ # Cache winning candidate for subsequent lookups
1039
+ global _discovery_result
1040
+ if chosen_exec:
1041
+ _discovery_result = chosen_exec
325
1042
  break
326
- except Exception:
327
- continue
328
1043
 
329
1044
  if not success:
330
- raise RuntimeError(
331
- f"stata_setup.config failed. Tried: {candidates}. "
332
- f"Derived from binary: {stata_exec_path}"
1045
+ error_msg = (
1046
+ f"stata_setup.config failed to initialize Stata. "
1047
+ f"Tried candidates: {discovery_candidates}. "
1048
+ f"Last error: {repr(last_error)}"
333
1049
  )
1050
+ sys.stderr.write(f"[mcp_stata] ERROR: {error_msg}\n")
1051
+ sys.stderr.flush()
1052
+ logger.error(error_msg)
1053
+ raise RuntimeError(error_msg)
334
1054
 
335
1055
  # Cache the binary path for later use (e.g., PNG export on Windows)
336
1056
  self._stata_exec_path = os.path.abspath(stata_exec_path)
337
1057
 
338
- from pystata import stata # type: ignore[import-not-found]
339
- self.stata = stata
340
- self._initialized = True
1058
+ try:
1059
+ sys.stderr.write("[mcp_stata] DEBUG: Importing pystata and warming up...\n")
1060
+ sys.stderr.flush()
1061
+ with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
1062
+ from pystata import stata # type: ignore[import-not-found]
1063
+ # Warm up the engine and swallow any late splash screen output
1064
+ stata.run("display 1", echo=False)
1065
+ self.stata = stata
1066
+ self._initialized = True
1067
+ sys.stderr.write("[mcp_stata] DEBUG: pystata warmed up successfully\n")
1068
+ sys.stderr.flush()
1069
+ except BaseException as e:
1070
+ sys.stderr.write(f"[mcp_stata] ERROR: Failed to load pystata or run initial command: {repr(e)}\n")
1071
+ sys.stderr.flush()
1072
+ logger.error("Failed to load pystata or run initial command: %s", e)
1073
+ raise
341
1074
 
342
1075
  # Initialize list_graphs TTL cache
343
1076
  self._list_graphs_cache = None
@@ -408,61 +1141,148 @@ class StataClient:
408
1141
 
409
1142
  return pat.sub(repl, code)
410
1143
 
411
- def _read_return_code(self) -> int:
412
- """Read the last Stata return code without mutating rc."""
413
- try:
414
- from sfi import Macro # type: ignore[import-not-found]
415
- rc_val = Macro.getCValue("rc") # type: ignore[attr-defined]
416
- if rc_val is not None:
417
- return int(float(rc_val))
418
- # If getCValue returns None, fall through to the alternative approach
419
- except Exception:
420
- pass
421
-
422
- # Alternative approach: use a global macro
423
- # CRITICAL: This must be done carefully to avoid mutating c(rc)
424
- try:
425
- self.stata.run("global MCP_RC = c(rc)")
426
- from sfi import Macro as Macro2 # type: ignore[import-not-found]
427
- rc_val = Macro2.getGlobal("MCP_RC")
428
- return int(float(rc_val))
429
- except Exception:
430
- return -1
431
-
432
1144
  def _get_rc_from_scalar(self, Scalar) -> int:
433
1145
  """Safely get return code, handling None values."""
434
1146
  try:
435
1147
  from sfi import Macro
436
- rc_val = Macro.getCValue("rc")
1148
+ rc_val = Macro.getGlobal("_rc")
437
1149
  if rc_val is None:
438
1150
  return -1
439
1151
  return int(float(rc_val))
440
1152
  except Exception:
441
1153
  return -1
442
1154
 
443
- def _parse_rc_from_text(self, text: str) -> Optional[int]:
444
- match = re.search(r"r\((\d+)\)", text)
445
- if match:
446
- try:
447
- return int(match.group(1))
448
- except Exception:
449
- return None
450
- return None
1155
+ def _parse_rc_from_text(self, text: str) -> Optional[int]:
1156
+ """Parse return code from plain text using structural patterns."""
1157
+ if not text:
1158
+ return None
1159
+
1160
+ # 1. Primary check: 'search r(N)' pattern (SMCL tag potentially stripped)
1161
+ matches = list(re.finditer(r'search r\((\d+)\)', text))
1162
+ if matches:
1163
+ try:
1164
+ return int(matches[-1].group(1))
1165
+ except Exception:
1166
+ pass
1167
+
1168
+ # 2. Secondary check: Standalone r(N); pattern
1169
+ # This appears at the end of command blocks
1170
+ matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', text))
1171
+ if matches:
1172
+ try:
1173
+ return int(matches[-1].group(1))
1174
+ except Exception:
1175
+ pass
1176
+
1177
+ return None
1178
+
1179
+ def _parse_line_from_text(self, text: str) -> Optional[int]:
1180
+ match = re.search(r"line\s+(\d+)", text, re.IGNORECASE)
1181
+ if match:
1182
+ try:
1183
+ return int(match.group(1))
1184
+ except Exception:
1185
+ return None
1186
+ return None
1187
+
1188
+ def _read_log_backwards_until_error(self, path: str, max_bytes: int = 5_000_000) -> str:
1189
+ """
1190
+ Read log file backwards in chunks, stopping when we find {err} tags or reach the start.
1191
+
1192
+ This is more efficient and robust than reading huge fixed tails, as we only read
1193
+ what we need to find the error.
1194
+
1195
+ Args:
1196
+ path: Path to the log file
1197
+ max_bytes: Maximum total bytes to read (safety limit, default 5MB)
1198
+
1199
+ Returns:
1200
+ The relevant portion of the log containing the error and context
1201
+ """
1202
+ try:
1203
+ chunk_size = 50_000 # Read 50KB chunks at a time
1204
+ total_read = 0
1205
+ chunks = []
1206
+
1207
+ with open(path, 'rb') as f:
1208
+ # Get file size
1209
+ f.seek(0, os.SEEK_END)
1210
+ file_size = f.tell()
1211
+
1212
+ if file_size == 0:
1213
+ return ""
1214
+
1215
+ # Start from the end
1216
+ position = file_size
1217
+
1218
+ while position > 0 and total_read < max_bytes:
1219
+ # Calculate how much to read in this chunk
1220
+ read_size = min(chunk_size, position, max_bytes - total_read)
1221
+ position -= read_size
1222
+
1223
+ # Seek and read
1224
+ f.seek(position)
1225
+ chunk = f.read(read_size)
1226
+ chunks.insert(0, chunk)
1227
+ total_read += read_size
1228
+
1229
+ # Decode and check for error tags
1230
+ try:
1231
+ accumulated = b''.join(chunks).decode('utf-8', errors='replace')
1232
+
1233
+ # Check if we've found an error tag
1234
+ if '{err}' in accumulated:
1235
+ # Found it! Read one more chunk for context before the error
1236
+ if position > 0 and total_read < max_bytes:
1237
+ extra_read = min(chunk_size, position, max_bytes - total_read)
1238
+ position -= extra_read
1239
+ f.seek(position)
1240
+ extra_chunk = f.read(extra_read)
1241
+ chunks.insert(0, extra_chunk)
1242
+
1243
+ return b''.join(chunks).decode('utf-8', errors='replace')
1244
+
1245
+ except UnicodeDecodeError:
1246
+ # Continue reading if we hit a decode error (might be mid-character)
1247
+ continue
1248
+
1249
+ # Read everything we've accumulated
1250
+ return b''.join(chunks).decode('utf-8', errors='replace')
451
1251
 
452
- def _parse_line_from_text(self, text: str) -> Optional[int]:
453
- match = re.search(r"line\s+(\d+)", text, re.IGNORECASE)
454
- if match:
455
- try:
456
- return int(match.group(1))
457
- except Exception:
458
- return None
459
- return None
1252
+ except Exception as e:
1253
+ logger.warning(f"Error reading log backwards: {e}")
1254
+ # Fallback to regular tail read
1255
+ return self._read_log_tail(path, 200_000)
1256
+
1257
+ def _read_log_tail_smart(self, path: str, rc: int, trace: bool = False) -> str:
1258
+ """
1259
+ Smart log tail reader that adapts based on whether an error occurred.
1260
+
1261
+ - If rc == 0: Read normal tail (20KB without trace, 200KB with trace)
1262
+ - If rc != 0: Search backwards dynamically to find the error
1263
+
1264
+ Args:
1265
+ path: Path to the log file
1266
+ rc: Return code from Stata
1267
+ trace: Whether trace mode was enabled
1268
+
1269
+ Returns:
1270
+ Relevant log content
1271
+ """
1272
+ if rc != 0:
1273
+ # Error occurred - search backwards for {err} tags
1274
+ return self._read_log_backwards_until_error(path)
1275
+ else:
1276
+ # Success - just read normal tail
1277
+ tail_size = 200_000 if trace else 20_000
1278
+ return self._read_log_tail(path, tail_size)
460
1279
 
461
1280
  def _read_log_tail(self, path: str, max_chars: int) -> str:
462
1281
  try:
463
1282
  with open(path, "rb") as f:
464
1283
  f.seek(0, os.SEEK_END)
465
1284
  size = f.tell()
1285
+
466
1286
  if size <= 0:
467
1287
  return ""
468
1288
  read_size = min(size, max_chars)
@@ -472,6 +1292,98 @@ class StataClient:
472
1292
  except Exception:
473
1293
  return ""
474
1294
 
1295
+ def _build_combined_log(
1296
+ self,
1297
+ tail: TailBuffer,
1298
+ path: str,
1299
+ rc: int,
1300
+ trace: bool,
1301
+ exc: Optional[Exception],
1302
+ ) -> str:
1303
+ tail_text = tail.get_value()
1304
+ log_tail = self._read_log_tail_smart(path, rc, trace)
1305
+ if log_tail and len(log_tail) > len(tail_text):
1306
+ tail_text = log_tail
1307
+ return (tail_text or "") + (f"\n{exc}" if exc else "")
1308
+
1309
+ def _truncate_command_output(
1310
+ self,
1311
+ result: CommandResponse,
1312
+ max_output_lines: Optional[int],
1313
+ ) -> CommandResponse:
1314
+ if max_output_lines is None or not result.stdout:
1315
+ return result
1316
+ lines = result.stdout.splitlines()
1317
+ if len(lines) <= max_output_lines:
1318
+ return result
1319
+ truncated_lines = lines[:max_output_lines]
1320
+ truncated_lines.append(
1321
+ f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)"
1322
+ )
1323
+ truncated_stdout = "\n".join(truncated_lines)
1324
+ if hasattr(result, "model_copy"):
1325
+ return result.model_copy(update={"stdout": truncated_stdout})
1326
+ return result.copy(update={"stdout": truncated_stdout})
1327
+
1328
+ def _run_plain_capture(self, code: str) -> str:
1329
+ """
1330
+ Run a Stata command while capturing output using a named SMCL log.
1331
+ This is the most reliable way to capture output (like return list)
1332
+ without interfering with user logs or being affected by stdout redirection issues.
1333
+ """
1334
+ if not self._initialized:
1335
+ self.init()
1336
+
1337
+ with self._exec_lock:
1338
+ hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
1339
+ # Hold results BEFORE opening the capture log
1340
+ self.stata.run(f"capture _return hold {hold_name}", echo=False)
1341
+
1342
+ try:
1343
+ with self._smcl_log_capture() as (log_name, smcl_path):
1344
+ # Restore results INSIDE the capture log so return list can see them
1345
+ self.stata.run(f"capture _return restore {hold_name}", echo=False)
1346
+ try:
1347
+ self.stata.run(code, echo=True)
1348
+ except Exception:
1349
+ pass
1350
+ except Exception:
1351
+ # Cleanup hold if log capture failed to open
1352
+ self.stata.run(f"capture _return drop {hold_name}", echo=False)
1353
+ content = ""
1354
+ smcl_path = None
1355
+ else:
1356
+ # Read SMCL content and convert to text
1357
+ content = self._read_smcl_file(smcl_path)
1358
+ # Remove the temp file
1359
+ self._safe_unlink(smcl_path)
1360
+
1361
+ return self._smcl_to_text(content)
1362
+
1363
+ def _count_do_file_lines(self, path: str) -> int:
1364
+ """
1365
+ Count the number of executable lines in a .do file for progress inference.
1366
+
1367
+ Blank lines and comment-only lines (starting with * or //) are ignored.
1368
+ """
1369
+ try:
1370
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
1371
+ lines = f.read().splitlines()
1372
+ except Exception:
1373
+ return 0
1374
+
1375
+ total = 0
1376
+ for line in lines:
1377
+ s = line.strip()
1378
+ if not s:
1379
+ continue
1380
+ if s.startswith("*"):
1381
+ continue
1382
+ if s.startswith("//"):
1383
+ continue
1384
+ total += 1
1385
+ return total
1386
+
475
1387
  def _smcl_to_text(self, smcl: str) -> str:
476
1388
  """Convert simple SMCL markup into plain text for LLM-friendly help."""
477
1389
  # First, keep inline directive content if present (e.g., {bf:word} -> word)
@@ -530,17 +1442,44 @@ class StataClient:
530
1442
  error_buffer = StringIO()
531
1443
  rc = 0
532
1444
  sys_error = None
1445
+ error_envelope = None
1446
+ smcl_content = ""
1447
+ smcl_path = None
533
1448
 
534
1449
  with self._exec_lock:
535
1450
  try:
536
- from sfi import Scalar, SFIToolkit # Import SFI tools inside execution block
1451
+ from sfi import Scalar, SFIToolkit
537
1452
  with self._temp_cwd(cwd):
538
- with self._redirect_io(output_buffer, error_buffer):
539
- if trace:
540
- self.stata.run("set trace on")
541
-
542
- # 1. Run the user code
543
- self.stata.run(code, echo=echo)
1453
+ # Create SMCL log for authoritative output capture
1454
+ # Use shorter unique path to avoid Windows path issues
1455
+ smcl_path = self._create_smcl_log_path(prefix="mcp_", max_hex=16)
1456
+ log_name = self._make_smcl_log_name()
1457
+ self._open_smcl_log(smcl_path, log_name)
1458
+
1459
+ try:
1460
+ with self._redirect_io(output_buffer, error_buffer):
1461
+ try:
1462
+ if trace:
1463
+ self.stata.run("set trace on")
1464
+
1465
+ # Run the user code
1466
+ self.stata.run(code, echo=echo)
1467
+
1468
+ # Hold results IMMEDIATELY to prevent clobbering by cleanup
1469
+ self._hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
1470
+ self.stata.run(f"capture _return hold {self._hold_name}", echo=False)
1471
+
1472
+ finally:
1473
+ if trace:
1474
+ try:
1475
+ self.stata.run("set trace off")
1476
+ except Exception:
1477
+ pass
1478
+ finally:
1479
+ # Close SMCL log AFTER output redirection
1480
+ self._close_smcl_log(log_name)
1481
+ # Restore and capture results while still inside the lock
1482
+ self._restore_results_from_hold("_hold_name")
544
1483
 
545
1484
  except Exception as e:
546
1485
  sys_error = str(e)
@@ -548,36 +1487,66 @@ class StataClient:
548
1487
  parsed_rc = self._parse_rc_from_text(sys_error)
549
1488
  rc = parsed_rc if parsed_rc is not None else 1
550
1489
 
1490
+ # Read SMCL content as the authoritative source
1491
+ if smcl_path:
1492
+ smcl_content = self._read_smcl_file(smcl_path)
1493
+ # Clean up SMCL file
1494
+ self._safe_unlink(smcl_path)
1495
+
551
1496
  stdout_content = output_buffer.getvalue()
552
1497
  stderr_content = error_buffer.getvalue()
553
- full_log = stdout_content + "\n" + stderr_content
554
1498
 
555
- # 2. Extract RC from log tail (primary error detection method)
556
- if rc == 1 and not sys_error: # No exception but might have error in log
557
- parsed_rc = self._parse_rc_from_text(full_log)
558
- if parsed_rc is not None:
1499
+ # If RC wasn't captured or is generic, try to parse from SMCL
1500
+ if rc in (0, 1, -1) and smcl_content:
1501
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
1502
+ if parsed_rc is not None and parsed_rc != 0:
559
1503
  rc = parsed_rc
1504
+ elif rc == -1:
1505
+ rc = 0
1506
+
1507
+ # If stdout is empty but SMCL has content AND command succeeded, use SMCL as stdout
1508
+ # This handles cases where Stata writes to log but not to redirected stdout
1509
+ # For errors, we keep stdout empty and error info goes to ErrorEnvelope
1510
+ if rc == 0 and not stdout_content and smcl_content:
1511
+ # Convert SMCL to plain text for stdout
1512
+ stdout_content = self._smcl_to_text(smcl_content)
560
1513
 
561
- error_envelope = None
562
1514
  if rc != 0:
563
1515
  if sys_error:
564
1516
  msg = sys_error
565
- snippet = sys_error # Include the exception message as snippet
1517
+ context = sys_error
566
1518
  else:
567
- # Extract error message from log tail
568
- msg, context = self._extract_error_and_context(full_log, rc)
569
-
570
- error_envelope = ErrorEnvelope(message=msg, rc=rc, context=context, snippet=full_log[-800:])
1519
+ # Extract error from SMCL (authoritative source)
1520
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
1521
+
1522
+ error_envelope = ErrorEnvelope(
1523
+ message=msg,
1524
+ rc=rc,
1525
+ context=context,
1526
+ snippet=smcl_content[-800:] if smcl_content else (stdout_content + stderr_content)[-800:],
1527
+ smcl_output=smcl_content # Include raw SMCL for debugging
1528
+ )
1529
+ stderr_content = context
571
1530
 
572
- return CommandResponse(
1531
+ resp = CommandResponse(
573
1532
  command=code,
574
1533
  rc=rc,
575
1534
  stdout=stdout_content,
576
1535
  stderr=stderr_content,
577
1536
  success=(rc == 0),
578
1537
  error=error_envelope,
1538
+ log_path=smcl_path if smcl_path else None,
1539
+ smcl_output=smcl_content,
579
1540
  )
580
1541
 
1542
+ # Capture results immediately after execution, INSIDE the lock
1543
+ try:
1544
+ self._last_results = self.get_stored_results(force_fresh=True)
1545
+ except Exception:
1546
+ self._last_results = None
1547
+
1548
+ return resp
1549
+
581
1550
  def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
582
1551
  """Execute Stata code while leaving stdout/stderr alone."""
583
1552
  if not self._initialized:
@@ -595,9 +1564,7 @@ class StataClient:
595
1564
  ret = self.stata.run(code, echo=echo)
596
1565
  if isinstance(ret, str) and ret:
597
1566
  ret_text = ret
598
-
599
- # Robust RC check even for no-capture
600
- rc = self._read_return_code()
1567
+
601
1568
 
602
1569
  except Exception as e:
603
1570
  exc = e
@@ -631,23 +1598,64 @@ class StataClient:
631
1598
  error=error,
632
1599
  )
633
1600
 
1601
+ def exec_lightweight(self, code: str) -> CommandResponse:
1602
+ """
1603
+ Executes a command using simple stdout redirection (no SMCL logs).
1604
+ Much faster on Windows as it avoids FS operations.
1605
+ LIMITED: Does not support error envelopes or complex return code parsing.
1606
+ """
1607
+ if not self._initialized:
1608
+ self.init()
1609
+
1610
+ code = self._maybe_rewrite_graph_name_in_command(code)
1611
+
1612
+ output_buffer = StringIO()
1613
+ error_buffer = StringIO()
1614
+ rc = 0
1615
+ exc = None
1616
+
1617
+ with self._exec_lock:
1618
+ with self._redirect_io(output_buffer, error_buffer):
1619
+ try:
1620
+ self.stata.run(code, echo=False)
1621
+ except Exception as e:
1622
+ exc = e
1623
+ rc = 1
1624
+
1625
+ stdout = output_buffer.getvalue()
1626
+ stderr = error_buffer.getvalue()
1627
+
1628
+ return CommandResponse(
1629
+ command=code,
1630
+ rc=rc,
1631
+ stdout=stdout,
1632
+ stderr=stderr if not exc else str(exc),
1633
+ success=(rc == 0),
1634
+ error=None
1635
+ )
1636
+
634
1637
  async def run_command_streaming(
635
- self,
636
- code: str,
637
- *,
638
- notify_log: Callable[[str], Awaitable[None]],
639
- notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
640
- echo: bool = True,
641
- trace: bool = False,
642
- max_output_lines: Optional[int] = None,
643
- cwd: Optional[str] = None,
644
- auto_cache_graphs: bool = False,
645
- on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
646
- ) -> CommandResponse:
1638
+ self,
1639
+ code: str,
1640
+ *,
1641
+ notify_log: Callable[[str], Awaitable[None]],
1642
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
1643
+ echo: bool = True,
1644
+ trace: bool = False,
1645
+ max_output_lines: Optional[int] = None,
1646
+ cwd: Optional[str] = None,
1647
+ auto_cache_graphs: bool = False,
1648
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
1649
+ emit_graph_ready: bool = False,
1650
+ graph_ready_task_id: Optional[str] = None,
1651
+ graph_ready_format: str = "svg",
1652
+ ) -> CommandResponse:
647
1653
  if not self._initialized:
648
1654
  self.init()
649
1655
 
650
1656
  code = self._maybe_rewrite_graph_name_in_command(code)
1657
+ auto_cache_graphs = auto_cache_graphs or emit_graph_ready
1658
+ total_lines = 0 # Commands (not do-files) do not have line-based progress
651
1659
 
652
1660
  if cwd is not None and not os.path.isdir(cwd):
653
1661
  return CommandResponse(
@@ -665,206 +1673,183 @@ class StataClient:
665
1673
 
666
1674
  start_time = time.time()
667
1675
  exc: Optional[Exception] = None
1676
+ smcl_content = ""
1677
+ smcl_path = None
668
1678
 
669
1679
  # Setup streaming graph cache if enabled
670
- graph_cache = None
671
- if auto_cache_graphs:
672
- graph_cache = StreamingGraphCache(self, auto_cache=True)
673
-
674
- graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
675
-
676
- graph_cache.add_cache_callback(graph_cache_callback)
1680
+ graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
677
1681
 
678
- log_file = tempfile.NamedTemporaryFile(
679
- prefix="mcp_stata_",
680
- suffix=".log",
681
- delete=False,
682
- mode="w",
683
- encoding="utf-8",
684
- errors="replace",
685
- buffering=1,
686
- )
687
- log_path = log_file.name
688
- tail = TailBuffer(max_chars=200000 if trace else 20000)
689
- tee = FileTeeIO(log_file, tail)
1682
+ _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
1683
+
1684
+ # Create SMCL log path for authoritative output capture
1685
+ smcl_path = self._create_smcl_log_path()
1686
+ smcl_log_name = self._make_smcl_log_name()
690
1687
 
691
1688
  # Inform the MCP client immediately where to read/tail the output.
692
- await notify_log(json.dumps({"event": "log_path", "path": log_path}))
1689
+ await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
693
1690
 
694
1691
  rc = -1
1692
+ path_for_stata = code.replace("\\", "/")
1693
+ command = f'{path_for_stata}'
1694
+
1695
+ graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
1696
+ graph_poll_state = [0.0]
1697
+
1698
+ async def on_chunk_for_graphs(_chunk: str) -> None:
1699
+ await self._maybe_cache_graphs_on_chunk(
1700
+ graph_cache=graph_cache,
1701
+ emit_graph_ready=emit_graph_ready,
1702
+ notify_log=notify_log,
1703
+ graph_ready_task_id=graph_ready_task_id,
1704
+ graph_ready_format=graph_ready_format,
1705
+ graph_ready_initial=graph_ready_initial,
1706
+ last_check=graph_poll_state,
1707
+ )
695
1708
 
696
- def _run_blocking() -> None:
697
- nonlocal rc, exc
698
- with self._exec_lock:
699
- self._is_executing = True
700
- try:
701
- from sfi import Scalar, SFIToolkit # Import SFI tools
702
- with self._temp_cwd(cwd):
703
- with self._redirect_io_streaming(tee, tee):
704
- try:
705
- if trace:
706
- self.stata.run("set trace on")
707
- ret = self.stata.run(code, echo=echo)
708
- # Some PyStata builds return output as a string rather than printing.
709
- if isinstance(ret, str) and ret:
710
- try:
711
- tee.write(ret)
712
- except Exception:
713
- pass
1709
+ done = anyio.Event()
714
1710
 
715
- # ROBUST DETECTION & OUTPUT
716
- rc = self._read_return_code()
1711
+ async with anyio.create_task_group() as tg:
1712
+ async def stream_smcl() -> None:
1713
+ await self._stream_smcl_log(
1714
+ smcl_path=smcl_path,
1715
+ notify_log=notify_log,
1716
+ done=done,
1717
+ on_chunk=on_chunk_for_graphs if graph_cache else None,
1718
+ )
717
1719
 
718
- except Exception as e:
719
- exc = e
720
- if rc == 0: rc = 1
721
- finally:
722
- if trace:
723
- try:
724
- self.stata.run("set trace off")
725
- except Exception:
726
- pass
727
- finally:
728
- self._is_executing = False
1720
+ tg.start_soon(stream_smcl)
729
1721
 
730
- try:
731
1722
  if notify_progress is not None:
732
- await notify_progress(0, None, "Running Stata command")
733
-
734
- await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
735
- except get_cancelled_exc_class():
736
- # Best-effort cancellation: signal Stata to break, wait briefly, then propagate.
737
- self._request_break_in()
738
- await self._wait_for_stata_stop()
739
- raise
740
- finally:
741
- tee.close()
1723
+ if total_lines > 0:
1724
+ await notify_progress(0, float(total_lines), f"Executing command: 0/{total_lines}")
1725
+ else:
1726
+ await notify_progress(0, None, "Running command")
742
1727
 
743
- # Cache detected graphs after command completes
744
- if graph_cache:
745
1728
  try:
746
- # Use the enhanced pystata-integrated caching method
747
- if hasattr(graph_cache, 'cache_detected_graphs_with_pystata'):
748
- cached_graphs = await graph_cache.cache_detected_graphs_with_pystata()
749
- else:
750
- cached_graphs = await graph_cache.cache_detected_graphs()
751
-
752
- if cached_graphs and notify_progress:
753
- await notify_progress(1, 1, f"Command completed. Cached {len(cached_graphs)} graphs: {', '.join(cached_graphs)}")
754
- except Exception as e:
755
- logger.warning(f"Failed to cache detected graphs: {e}")
1729
+ run_blocking = lambda: self._run_streaming_blocking(
1730
+ command=command,
1731
+ tee=tee,
1732
+ cwd=cwd,
1733
+ trace=trace,
1734
+ echo=echo,
1735
+ smcl_path=smcl_path,
1736
+ smcl_log_name=smcl_log_name,
1737
+ hold_attr="_hold_name_stream",
1738
+ )
1739
+ try:
1740
+ rc, exc = await anyio.to_thread.run_sync(
1741
+ run_blocking,
1742
+ abandon_on_cancel=True,
1743
+ )
1744
+ except TypeError:
1745
+ rc, exc = await anyio.to_thread.run_sync(run_blocking)
1746
+ except get_cancelled_exc_class():
1747
+ self._request_break_in()
1748
+ await self._wait_for_stata_stop()
1749
+ raise
1750
+ finally:
1751
+ done.set()
1752
+ tee.close()
756
1753
 
757
- tail_text = tail.get_value()
758
- log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
759
- if log_tail and len(log_tail) > len(tail_text):
760
- tail_text = log_tail
761
- combined = (tail_text or "") + (f"\n{exc}" if exc else "")
1754
+ # Read SMCL content as the authoritative source
1755
+ smcl_content = self._read_smcl_file(smcl_path)
1756
+
1757
+ await self._cache_new_graphs(
1758
+ graph_cache,
1759
+ notify_progress=notify_progress,
1760
+ total_lines=total_lines,
1761
+ completed_label="Command",
1762
+ )
1763
+ self._emit_graph_ready_task(
1764
+ emit_graph_ready=emit_graph_ready,
1765
+ graph_ready_initial=graph_ready_initial,
1766
+ notify_log=notify_log,
1767
+ graph_ready_task_id=graph_ready_task_id,
1768
+ graph_ready_format=graph_ready_format,
1769
+ )
1770
+
1771
+ combined = self._build_combined_log(tail, smcl_path, rc, trace, exc)
762
1772
 
1773
+ # Use SMCL content as primary source for RC detection
1774
+ if not exc or rc in (1, -1):
1775
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
1776
+ if parsed_rc is not None and parsed_rc != 0:
1777
+ rc = parsed_rc
1778
+ elif rc in (-1, 0, 1): # Also check text if rc is generic 1 or unset
1779
+ parsed_rc_text = self._parse_rc_from_text(combined)
1780
+ if parsed_rc_text is not None:
1781
+ rc = parsed_rc_text
1782
+ elif rc == -1:
1783
+ rc = 0 # Default to success if no error trace found
1784
+
763
1785
  success = (rc == 0 and exc is None)
1786
+ stderr_final = None
764
1787
  error = None
765
1788
 
766
1789
  if not success:
767
- # Use robust extractor
768
- msg, context = self._extract_error_and_context(combined, rc)
769
-
1790
+ # Use SMCL as authoritative source for error extraction
1791
+ if smcl_content:
1792
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
1793
+ else:
1794
+ # Fallback to combined log
1795
+ msg, context = self._extract_error_and_context(combined, rc)
1796
+
770
1797
  error = ErrorEnvelope(
771
1798
  message=msg,
772
1799
  context=context,
773
1800
  rc=rc,
774
- command=code,
1801
+ command=command,
775
1802
  log_path=log_path,
776
- snippet=combined[-800:] # Keep snippet for backward compat
1803
+ snippet=smcl_content[-800:] if smcl_content else combined[-800:],
1804
+ smcl_output=smcl_content,
777
1805
  )
1806
+ stderr_final = context
778
1807
 
779
1808
  duration = time.time() - start_time
780
- code_preview = code.replace("\n", "\\n")
781
1809
  logger.info(
782
1810
  "stata.run(stream) rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
783
- rc,
784
- success,
785
- trace,
786
- duration * 1000,
787
- code_preview[:120],
788
- )
789
-
790
- result = CommandResponse(
791
- command=code,
792
- rc=rc,
793
- stdout="",
794
- stderr=None,
795
- log_path=log_path,
796
- success=success,
797
- error=error,
798
- )
799
-
800
- if notify_progress is not None:
801
- await notify_progress(1, 1, "Finished")
802
-
803
- return result
804
-
805
- def _count_do_file_lines(self, path: str) -> int:
806
- try:
807
- with open(path, "r", encoding="utf-8", errors="replace") as f:
808
- lines = f.read().splitlines()
809
- except Exception:
810
- return 0
811
-
812
- total = 0
813
- for line in lines:
814
- s = line.strip()
815
- if not s:
816
- continue
817
- if s.startswith("*"):
818
- continue
819
- if s.startswith("//"):
820
- continue
821
- total += 1
822
- return total
823
-
824
- async def run_do_file_streaming(
825
- self,
826
- path: str,
827
- *,
828
- notify_log: Callable[[str], Awaitable[None]],
829
- notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
830
- echo: bool = True,
831
- trace: bool = False,
832
- max_output_lines: Optional[int] = None,
833
- cwd: Optional[str] = None,
834
- auto_cache_graphs: bool = False,
835
- on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
836
- ) -> CommandResponse:
837
- if cwd is not None and not os.path.isdir(cwd):
838
- return CommandResponse(
839
- command=f'do "{path}"',
840
- rc=601,
841
- stdout="",
842
- stderr=None,
843
- success=False,
844
- error=ErrorEnvelope(
845
- message=f"cwd not found: {cwd}",
846
- rc=601,
847
- command=path,
848
- ),
849
- )
1811
+ rc,
1812
+ success,
1813
+ trace,
1814
+ duration * 1000,
1815
+ code.replace("\n", "\\n")[:120],
1816
+ )
850
1817
 
851
- effective_path = path
852
- if cwd is not None and not os.path.isabs(path):
853
- effective_path = os.path.abspath(os.path.join(cwd, path))
1818
+ result = CommandResponse(
1819
+ command=code,
1820
+ rc=rc,
1821
+ stdout="",
1822
+ stderr=stderr_final,
1823
+ log_path=log_path,
1824
+ success=success,
1825
+ error=error,
1826
+ smcl_output=smcl_content,
1827
+ )
854
1828
 
855
- if not os.path.exists(effective_path):
856
- return CommandResponse(
857
- command=f'do "{effective_path}"',
858
- rc=601,
859
- stdout="",
860
- stderr=None,
861
- success=False,
862
- error=ErrorEnvelope(
863
- message=f"Do-file not found: {effective_path}",
864
- rc=601,
865
- command=effective_path,
866
- ),
867
- )
1829
+ if notify_progress is not None:
1830
+ await notify_progress(1, 1, "Finished")
1831
+
1832
+ return result
1833
+
1834
+ async def run_do_file_streaming(
1835
+ self,
1836
+ path: str,
1837
+ *,
1838
+ notify_log: Callable[[str], Awaitable[None]],
1839
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
1840
+ echo: bool = True,
1841
+ trace: bool = False,
1842
+ max_output_lines: Optional[int] = None,
1843
+ cwd: Optional[str] = None,
1844
+ auto_cache_graphs: bool = False,
1845
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
1846
+ emit_graph_ready: bool = False,
1847
+ graph_ready_task_id: Optional[str] = None,
1848
+ graph_ready_format: str = "svg",
1849
+ ) -> CommandResponse:
1850
+ effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
1851
+ if error_response is not None:
1852
+ return error_response
868
1853
 
869
1854
  total_lines = self._count_do_file_lines(effective_path)
870
1855
  executed_lines = 0
@@ -893,106 +1878,55 @@ class StataClient:
893
1878
  if not self._initialized:
894
1879
  self.init()
895
1880
 
1881
+ auto_cache_graphs = auto_cache_graphs or emit_graph_ready
1882
+
896
1883
  start_time = time.time()
897
1884
  exc: Optional[Exception] = None
1885
+ smcl_content = ""
1886
+ smcl_path = None
898
1887
 
899
- # Setup streaming graph cache if enabled
900
- graph_cache = None
901
- if auto_cache_graphs:
902
- graph_cache = StreamingGraphCache(self, auto_cache=True)
903
-
904
- graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
905
-
906
- graph_cache.add_cache_callback(graph_cache_callback)
1888
+ graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
1889
+ _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
907
1890
 
908
- log_file = tempfile.NamedTemporaryFile(
909
- prefix="mcp_stata_",
910
- suffix=".log",
911
- delete=False,
912
- mode="w",
913
- encoding="utf-8",
914
- errors="replace",
915
- buffering=1,
916
- )
917
- log_path = log_file.name
918
- tail = TailBuffer(max_chars=200000 if trace else 20000)
919
- tee = FileTeeIO(log_file, tail)
1891
+ smcl_path = self._create_smcl_log_path()
1892
+ smcl_log_name = self._make_smcl_log_name()
920
1893
 
921
1894
  # Inform the MCP client immediately where to read/tail the output.
922
- await notify_log(json.dumps({"event": "log_path", "path": log_path}))
1895
+ await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
923
1896
 
924
1897
  rc = -1
925
- path_for_stata = effective_path.replace("\\", "/")
926
- command = f'do "{path_for_stata}"'
1898
+ graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
1899
+ graph_poll_state = [0.0]
1900
+
1901
+ async def on_chunk_for_graphs(_chunk: str) -> None:
1902
+ await self._maybe_cache_graphs_on_chunk(
1903
+ graph_cache=graph_cache,
1904
+ emit_graph_ready=emit_graph_ready,
1905
+ notify_log=notify_log,
1906
+ graph_ready_task_id=graph_ready_task_id,
1907
+ graph_ready_format=graph_ready_format,
1908
+ graph_ready_initial=graph_ready_initial,
1909
+ last_check=graph_poll_state,
1910
+ )
927
1911
 
928
- # Capture initial graph state BEFORE execution starts
1912
+ on_chunk_callback = on_chunk_for_progress
929
1913
  if graph_cache:
930
- try:
931
- graph_cache._initial_graphs = set(self.list_graphs())
932
- logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
933
- except Exception as e:
934
- logger.debug(f"Failed to capture initial graph state: {e}")
935
- graph_cache._initial_graphs = set()
936
-
937
- def _run_blocking() -> None:
938
- nonlocal rc, exc
939
- with self._exec_lock:
940
- # Set execution flag to prevent recursive Stata calls
941
- self._is_executing = True
942
- try:
943
- from sfi import Scalar, SFIToolkit # Import SFI tools
944
- with self._temp_cwd(cwd):
945
- with self._redirect_io_streaming(tee, tee):
946
- try:
947
- if trace:
948
- self.stata.run("set trace on")
949
- ret = self.stata.run(command, echo=echo)
950
- # Some PyStata builds return output as a string rather than printing.
951
- if isinstance(ret, str) and ret:
952
- try:
953
- tee.write(ret)
954
- except Exception:
955
- pass
956
-
957
- # ROBUST DETECTION & OUTPUT
958
- rc = self._read_return_code()
959
-
960
- except Exception as e:
961
- exc = e
962
- if rc == 0: rc = 1
963
- finally:
964
- if trace:
965
- try: self.stata.run("set trace off")
966
- except: pass
967
- finally:
968
- # Clear execution flag
969
- self._is_executing = False
1914
+ async def on_chunk_callback(chunk: str) -> None:
1915
+ await on_chunk_for_progress(chunk)
1916
+ await on_chunk_for_graphs(chunk)
970
1917
 
971
1918
  done = anyio.Event()
972
1919
 
973
- async def _monitor_progress_from_log() -> None:
974
- if notify_progress is None or total_lines <= 0:
975
- return
976
- last_pos = 0
977
- try:
978
- with open(log_path, "r", encoding="utf-8", errors="replace") as f:
979
- while not done.is_set():
980
- f.seek(last_pos)
981
- chunk = f.read()
982
- if chunk:
983
- last_pos = f.tell()
984
- await on_chunk_for_progress(chunk)
985
- await anyio.sleep(0.05)
986
-
987
- f.seek(last_pos)
988
- chunk = f.read()
989
- if chunk:
990
- await on_chunk_for_progress(chunk)
991
- except Exception:
992
- return
993
-
994
1920
  async with anyio.create_task_group() as tg:
995
- tg.start_soon(_monitor_progress_from_log)
1921
+ async def stream_smcl() -> None:
1922
+ await self._stream_smcl_log(
1923
+ smcl_path=smcl_path,
1924
+ notify_log=notify_log,
1925
+ done=done,
1926
+ on_chunk=on_chunk_callback,
1927
+ )
1928
+
1929
+ tg.start_soon(stream_smcl)
996
1930
 
997
1931
  if notify_progress is not None:
998
1932
  if total_lines > 0:
@@ -1001,7 +1935,23 @@ class StataClient:
1001
1935
  await notify_progress(0, None, "Running do-file")
1002
1936
 
1003
1937
  try:
1004
- await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
1938
+ run_blocking = lambda: self._run_streaming_blocking(
1939
+ command=command,
1940
+ tee=tee,
1941
+ cwd=cwd,
1942
+ trace=trace,
1943
+ echo=echo,
1944
+ smcl_path=smcl_path,
1945
+ smcl_log_name=smcl_log_name,
1946
+ hold_attr="_hold_name_do",
1947
+ )
1948
+ try:
1949
+ rc, exc = await anyio.to_thread.run_sync(
1950
+ run_blocking,
1951
+ abandon_on_cancel=True,
1952
+ )
1953
+ except TypeError:
1954
+ rc, exc = await anyio.to_thread.run_sync(run_blocking)
1005
1955
  except get_cancelled_exc_class():
1006
1956
  self._request_break_in()
1007
1957
  await self._wait_for_stata_stop()
@@ -1010,57 +1960,48 @@ class StataClient:
1010
1960
  done.set()
1011
1961
  tee.close()
1012
1962
 
1013
- # Robust post-execution graph detection and caching
1014
- if graph_cache and graph_cache.auto_cache:
1015
- try:
1016
- # [Existing graph cache logic kept identical]
1017
- cached_graphs = []
1018
- initial_graphs = getattr(graph_cache, '_initial_graphs', set())
1019
- current_graphs = set(self.list_graphs())
1020
- new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
1021
-
1022
- if new_graphs:
1023
- logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
1963
+ # Read SMCL content as the authoritative source
1964
+ smcl_content = self._read_smcl_file(smcl_path)
1024
1965
 
1025
- for graph_name in new_graphs:
1026
- try:
1027
- cache_result = await anyio.to_thread.run_sync(
1028
- self.cache_graph_on_creation,
1029
- graph_name
1030
- )
1031
- if cache_result:
1032
- cached_graphs.append(graph_name)
1033
- graph_cache._cached_graphs.add(graph_name)
1034
-
1035
- for callback in graph_cache._cache_callbacks:
1036
- try:
1037
- await anyio.to_thread.run_sync(callback, graph_name, cache_result)
1038
- except Exception: pass
1039
- except Exception as e:
1040
- logger.error(f"Error caching graph {graph_name}: {e}")
1041
-
1042
- # Notify progress if graphs were cached
1043
- if cached_graphs and notify_progress:
1044
- await notify_progress(
1045
- float(total_lines) if total_lines > 0 else 1,
1046
- float(total_lines) if total_lines > 0 else 1,
1047
- f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
1048
- )
1049
- except Exception as e:
1050
- logger.error(f"Post-execution graph detection failed: {e}")
1966
+ await self._cache_new_graphs(
1967
+ graph_cache,
1968
+ notify_progress=notify_progress,
1969
+ total_lines=total_lines,
1970
+ completed_label="Do-file",
1971
+ )
1972
+ self._emit_graph_ready_task(
1973
+ emit_graph_ready=emit_graph_ready,
1974
+ graph_ready_initial=graph_ready_initial,
1975
+ notify_log=notify_log,
1976
+ graph_ready_task_id=graph_ready_task_id,
1977
+ graph_ready_format=graph_ready_format,
1978
+ )
1051
1979
 
1052
- tail_text = tail.get_value()
1053
- log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
1054
- if log_tail and len(log_tail) > len(tail_text):
1055
- tail_text = log_tail
1056
- combined = (tail_text or "") + (f"\n{exc}" if exc else "")
1980
+ combined = self._build_combined_log(tail, log_path, rc, trace, exc)
1057
1981
 
1982
+ # Use SMCL content as primary source for RC detection
1983
+ if not exc or rc in (1, -1):
1984
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
1985
+ if parsed_rc is not None and parsed_rc != 0:
1986
+ rc = parsed_rc
1987
+ elif rc in (-1, 0, 1):
1988
+ parsed_rc_text = self._parse_rc_from_text(combined)
1989
+ if parsed_rc_text is not None:
1990
+ rc = parsed_rc_text
1991
+ elif rc == -1:
1992
+ rc = 0 # Default to success if no error found
1993
+
1058
1994
  success = (rc == 0 and exc is None)
1995
+ stderr_final = None
1059
1996
  error = None
1060
1997
 
1061
1998
  if not success:
1062
- # Robust extraction
1063
- msg, context = self._extract_error_and_context(combined, rc)
1999
+ # Use SMCL as authoritative source for error extraction
2000
+ if smcl_content:
2001
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
2002
+ else:
2003
+ # Fallback to combined log
2004
+ msg, context = self._extract_error_and_context(combined, rc)
1064
2005
 
1065
2006
  error = ErrorEnvelope(
1066
2007
  message=msg,
@@ -1068,8 +2009,10 @@ class StataClient:
1068
2009
  rc=rc,
1069
2010
  command=command,
1070
2011
  log_path=log_path,
1071
- snippet=combined[-800:]
2012
+ snippet=smcl_content[-800:] if smcl_content else combined[-800:],
2013
+ smcl_output=smcl_content,
1072
2014
  )
2015
+ stderr_final = context
1073
2016
 
1074
2017
  duration = time.time() - start_time
1075
2018
  logger.info(
@@ -1085,10 +2028,11 @@ class StataClient:
1085
2028
  command=command,
1086
2029
  rc=rc,
1087
2030
  stdout="",
1088
- stderr=None,
2031
+ stderr=stderr_final,
1089
2032
  log_path=log_path,
1090
2033
  success=success,
1091
2034
  error=error,
2035
+ smcl_output=smcl_content,
1092
2036
  )
1093
2037
 
1094
2038
  if notify_progress is not None:
@@ -1110,22 +2054,7 @@ class StataClient:
1110
2054
  """
1111
2055
  result = self._exec_with_capture(code, echo=echo, trace=trace, cwd=cwd)
1112
2056
 
1113
- # Truncate stdout if requested
1114
- if max_output_lines is not None and result.stdout:
1115
- lines = result.stdout.splitlines()
1116
- if len(lines) > max_output_lines:
1117
- truncated_lines = lines[:max_output_lines]
1118
- truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
1119
- result = CommandResponse(
1120
- command=result.command,
1121
- rc=result.rc,
1122
- stdout="\n".join(truncated_lines),
1123
- stderr=result.stderr,
1124
- success=result.success,
1125
- error=result.error,
1126
- )
1127
-
1128
- return result
2057
+ return self._truncate_command_output(result, max_output_lines)
1129
2058
 
1130
2059
  def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
1131
2060
  """Returns valid JSON-serializable data."""
@@ -1182,16 +2111,19 @@ class StataClient:
1182
2111
  sortlist = ""
1183
2112
  changed = False
1184
2113
  try:
1185
- frame = str(Macro.getCValue("frame") or "default")
2114
+ frame = str(Macro.getGlobal("frame") or "default")
1186
2115
  except Exception:
2116
+ logger.debug("Failed to get 'frame' macro", exc_info=True)
1187
2117
  frame = "default"
1188
2118
  try:
1189
- sortlist = str(Macro.getCValue("sortlist") or "")
2119
+ sortlist = str(Macro.getGlobal("sortlist") or "")
1190
2120
  except Exception:
2121
+ logger.debug("Failed to get 'sortlist' macro", exc_info=True)
1191
2122
  sortlist = ""
1192
2123
  try:
1193
- changed = bool(int(float(Macro.getCValue("changed") or "0")))
2124
+ changed = bool(int(float(Macro.getGlobal("changed") or "0")))
1194
2125
  except Exception:
2126
+ logger.debug("Failed to get 'changed' macro", exc_info=True)
1195
2127
  changed = False
1196
2128
 
1197
2129
  return {"frame": frame, "n": n, "k": k, "sortlist": sortlist, "changed": changed}
@@ -1340,6 +2272,96 @@ class StataClient:
1340
2272
  "truncated_cells": truncated_cells,
1341
2273
  }
1342
2274
 
2275
+ def get_arrow_stream(
2276
+ self,
2277
+ *,
2278
+ offset: int,
2279
+ limit: int,
2280
+ vars: List[str],
2281
+ include_obs_no: bool,
2282
+ obs_indices: Optional[List[int]] = None,
2283
+ ) -> bytes:
2284
+ """
2285
+ Returns an Apache Arrow IPC stream (as bytes) for the requested data page.
2286
+ Uses Polars if available (faster), falls back to Pandas.
2287
+ """
2288
+ if not self._initialized:
2289
+ self.init()
2290
+
2291
+ import pyarrow as pa
2292
+ from sfi import Data # type: ignore[import-not-found]
2293
+
2294
+ use_polars = _get_polars_available()
2295
+ if use_polars:
2296
+ import polars as pl
2297
+ else:
2298
+ import pandas as pd
2299
+
2300
+ state = self.get_dataset_state()
2301
+ n = int(state.get("n", 0) or 0)
2302
+ k = int(state.get("k", 0) or 0)
2303
+ if k == 0 and n == 0:
2304
+ raise RuntimeError("No data in memory")
2305
+
2306
+ var_map = self._get_var_index_map()
2307
+ for v in vars:
2308
+ if v not in var_map:
2309
+ raise ValueError(f"Invalid variable: {v}")
2310
+
2311
+ # Determine observations to fetch
2312
+ if obs_indices is None:
2313
+ start = offset
2314
+ end = min(offset + limit, n)
2315
+ obs_list = list(range(start, end)) if start < n else []
2316
+ else:
2317
+ start = offset
2318
+ end = min(offset + limit, len(obs_indices))
2319
+ obs_list = obs_indices[start:end]
2320
+
2321
+ try:
2322
+ if not obs_list:
2323
+ # Empty schema-only table
2324
+ if use_polars:
2325
+ schema_cols = {}
2326
+ if include_obs_no:
2327
+ schema_cols["_n"] = pl.Int64
2328
+ for v in vars:
2329
+ schema_cols[v] = pl.Utf8
2330
+ table = pl.DataFrame(schema=schema_cols).to_arrow()
2331
+ else:
2332
+ columns = {}
2333
+ if include_obs_no:
2334
+ columns["_n"] = pa.array([], type=pa.int64())
2335
+ for v in vars:
2336
+ columns[v] = pa.array([], type=pa.string())
2337
+ table = pa.table(columns)
2338
+ else:
2339
+ # Fetch all data in one C-call
2340
+ raw_data = Data.get(var=vars, obs=obs_list, valuelabel=False)
2341
+
2342
+ if use_polars:
2343
+ df = pl.DataFrame(raw_data, schema=vars, orient="row")
2344
+ if include_obs_no:
2345
+ obs_nums = [i + 1 for i in obs_list]
2346
+ df = df.with_columns(pl.Series("_n", obs_nums, dtype=pl.Int64))
2347
+ df = df.select(["_n"] + vars)
2348
+ table = df.to_arrow()
2349
+ else:
2350
+ df = pd.DataFrame(raw_data, columns=vars)
2351
+ if include_obs_no:
2352
+ df.insert(0, "_n", [i + 1 for i in obs_list])
2353
+ table = pa.Table.from_pandas(df, preserve_index=False)
2354
+
2355
+ # Serialize to IPC Stream
2356
+ sink = pa.BufferOutputStream()
2357
+ with pa.RecordBatchStreamWriter(sink, table.schema) as writer:
2358
+ writer.write_table(table)
2359
+
2360
+ return sink.getvalue().to_pybytes()
2361
+
2362
+ except Exception as e:
2363
+ raise RuntimeError(f"Failed to generate Arrow stream: {e}")
2364
+
1343
2365
  _FILTER_IDENT = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b")
1344
2366
 
1345
2367
  def _extract_filter_vars(self, filter_expr: str) -> List[str]:
@@ -1528,15 +2550,21 @@ class StataClient:
1528
2550
 
1529
2551
  # Cache miss or expired, fetch fresh data
1530
2552
  try:
1531
- # 'graph dir' returns list in r(list)
1532
- # We need to ensure we run it quietly so we don't spam.
1533
- self.stata.run("quietly graph dir, memory")
1534
-
1535
- # Accessing r-class results in Python can be tricky via pystata's run command.
1536
- # We stash the result in a global macro that python sfi can easily read.
1537
- from sfi import Macro # type: ignore[import-not-found]
1538
- self.stata.run("global mcp_graph_list `r(list)'")
1539
- graph_list_str = Macro.getGlobal("mcp_graph_list")
2553
+ # Preservation of r() results is critical because this can be called
2554
+ # automatically after every user command (e.g., during streaming).
2555
+ import time
2556
+ hold_name = f"_mcp_ghold_{int(time.time() * 1000 % 1000000)}"
2557
+ self.stata.run(f"capture _return hold {hold_name}", echo=False)
2558
+
2559
+ try:
2560
+ self.stata.run("macro define mcp_graph_list \"\"", echo=False)
2561
+ self.stata.run("quietly graph dir, memory", echo=False)
2562
+ from sfi import Macro # type: ignore[import-not-found]
2563
+ self.stata.run("macro define mcp_graph_list `r(list)'", echo=False)
2564
+ graph_list_str = Macro.getGlobal("mcp_graph_list")
2565
+ finally:
2566
+ self.stata.run(f"capture _return restore {hold_name}", echo=False)
2567
+
1540
2568
  raw_list = graph_list_str.split() if graph_list_str else []
1541
2569
 
1542
2570
  # Map internal Stata names back to user-facing names when we have an alias.
@@ -1548,7 +2576,7 @@ class StataClient:
1548
2576
  # Update cache
1549
2577
  with self._list_graphs_cache_lock:
1550
2578
  self._list_graphs_cache = result
1551
- self._list_graphs_cache_time = current_time
2579
+ self._list_graphs_cache_time = time.time()
1552
2580
 
1553
2581
  return result
1554
2582
 
@@ -1583,8 +2611,8 @@ class StataClient:
1583
2611
  import tempfile
1584
2612
 
1585
2613
  fmt = (format or "pdf").strip().lower()
1586
- if fmt not in {"pdf", "png"}:
1587
- raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
2614
+ if fmt not in {"pdf", "png", "svg"}:
2615
+ raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png, svg.")
1588
2616
 
1589
2617
  if not filename:
1590
2618
  suffix = f".{fmt}"
@@ -1737,73 +2765,77 @@ class StataClient:
1737
2765
  logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
1738
2766
  return self._smcl_to_text(smcl)
1739
2767
  except Exception as e:
1740
- return f"Error reading help file at {fn}: {e}"
2768
+ logger.warning("Help file read failed for %s: %s", topic, e)
1741
2769
 
1742
- # Fallback to URL if file not found
1743
- return f"Help file for '{topic}' not found. Please consult: https://www.stata.com/help.cgi?{topic}"
2770
+ # If no help file found, return a fallback message
2771
+ return f"Help file for '{topic}' not found."
2772
+
2773
+ def get_stored_results(self, force_fresh: bool = False) -> Dict[str, Any]:
2774
+ """Returns e() and r() results using SFI for maximum reliability."""
2775
+ if not force_fresh and self._last_results is not None:
2776
+ return self._last_results
1744
2777
 
1745
- def get_stored_results(self) -> Dict[str, Any]:
1746
- """Returns e() and r() results."""
1747
2778
  if not self._initialized:
1748
2779
  self.init()
1749
2780
 
1750
- results = {"r": {}, "e": {}}
1751
-
1752
- # We parse 'return list' output as there is no direct bulk export of stored results
1753
- raw_r_resp = self.run_command_structured("return list", echo=True)
1754
- raw_e_resp = self.run_command_structured("ereturn list", echo=True)
1755
- raw_r = raw_r_resp.stdout if raw_r_resp.success else (raw_r_resp.error.snippet if raw_r_resp.error else "")
1756
- raw_e = raw_e_resp.stdout if raw_e_resp.success else (raw_e_resp.error.snippet if raw_e_resp.error else "")
1757
-
1758
- # Simple parser
1759
- def parse_list(text):
1760
- data = {}
1761
- # We don't strictly need to track sections if we check patterns
1762
- for line in text.splitlines():
1763
- line = line.strip()
1764
- if not line:
1765
- continue
1766
-
1767
- # scalars: r(name) = value
1768
- if "=" in line and ("r(" in line or "e(" in line):
1769
- try:
1770
- name_part, val_part = line.split("=", 1)
1771
- name_part = name_part.strip() # "r(mean)"
1772
- val_part = val_part.strip() # "6165.2..."
1773
-
1774
- # Extract just the name inside r(...) if desired,
1775
- # or keep full key "r(mean)".
1776
- # User likely wants "mean" inside "r" dict.
1777
-
1778
- if "(" in name_part and name_part.endswith(")"):
1779
- # r(mean) -> mean
1780
- start = name_part.find("(") + 1
1781
- end = name_part.find(")")
1782
- key = name_part[start:end]
1783
- data[key] = val_part
1784
- except Exception:
1785
- pass
1786
-
1787
- # macros: r(name) : "value"
1788
- elif ":" in line and ("r(" in line or "e(" in line):
1789
- try:
1790
- name_part, val_part = line.split(":", 1)
1791
- name_part = name_part.strip()
1792
- val_part = val_part.strip().strip('"')
1793
-
1794
- if "(" in name_part and name_part.endswith(")"):
1795
- start = name_part.find("(") + 1
1796
- end = name_part.find(")")
1797
- key = name_part[start:end]
1798
- data[key] = val_part
1799
- except Exception:
1800
- pass
1801
- return data
1802
-
1803
- results["r"] = parse_list(raw_r)
1804
- results["e"] = parse_list(raw_e)
1805
-
1806
- return results
2781
+ with self._exec_lock:
2782
+ # We must be extremely careful not to clobber r()/e() while fetching their names.
2783
+ # We use a hold to peek at the results.
2784
+ hold_name = f"mcp_peek_{uuid.uuid4().hex[:8]}"
2785
+ self.stata.run(f"capture _return hold {hold_name}", echo=False)
2786
+
2787
+ try:
2788
+ from sfi import Scalar, Macro
2789
+ results = {"r": {}, "e": {}}
2790
+
2791
+ for rclass in ["r", "e"]:
2792
+ # Restore with 'hold' to peek at results without losing them from the hold
2793
+ # Note: Stata 18+ supports 'restore ..., hold' which is ideal.
2794
+ self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
2795
+
2796
+ # Fetch names using backtick expansion (which we verified works better than colon)
2797
+ # and avoid leading underscores which were causing syntax errors with 'global'
2798
+ self.stata.run(f"macro define mcp_scnames `: {rclass}(scalars)'", echo=False)
2799
+ self.stata.run(f"macro define mcp_macnames `: {rclass}(macros)'", echo=False)
2800
+
2801
+ # 1. Capture Scalars
2802
+ names_str = Macro.getGlobal("mcp_scnames")
2803
+ if names_str:
2804
+ for name in names_str.split():
2805
+ try:
2806
+ val = Scalar.getValue(f"{rclass}({name})")
2807
+ results[rclass][name] = val
2808
+ except Exception:
2809
+ pass
2810
+
2811
+ # 2. Capture Macros (strings)
2812
+ macros_str = Macro.getGlobal("mcp_macnames")
2813
+ if macros_str:
2814
+ for name in macros_str.split():
2815
+ try:
2816
+ # Restore/Hold again to be safe before fetching each macro
2817
+ self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
2818
+ # Capture the string value into a macro
2819
+ self.stata.run(f"macro define mcp_mval `{rclass}({name})'", echo=False)
2820
+ val = Macro.getGlobal("mcp_mval")
2821
+ results[rclass][name] = val
2822
+ except Exception:
2823
+ pass
2824
+
2825
+ # Cleanup
2826
+ self.stata.run("macro drop mcp_scnames mcp_macnames mcp_mval", echo=False)
2827
+ self.stata.run(f"capture _return restore {hold_name}", echo=False) # Restore one last time to leave Stata in correct state
2828
+
2829
+ self._last_results = results
2830
+ return results
2831
+ except Exception as e:
2832
+ logger.error(f"SFI-based get_stored_results failed: {e}")
2833
+ # Try to clean up hold if we failed
2834
+ try:
2835
+ self.stata.run(f"capture _return drop {hold_name}", echo=False)
2836
+ except Exception:
2837
+ pass
2838
+ return {"r": {}, "e": {}}
1807
2839
 
1808
2840
  def invalidate_graph_cache(self, graph_name: str = None) -> None:
1809
2841
  """Invalidate cache for specific graph or all graphs.
@@ -2253,105 +3285,57 @@ class StataClient:
2253
3285
  return False
2254
3286
 
2255
3287
  def run_do_file(self, path: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
2256
- if cwd is not None and not os.path.isdir(cwd):
2257
- return CommandResponse(
2258
- command=f'do "{path}"',
2259
- rc=601,
2260
- stdout="",
2261
- stderr=None,
2262
- success=False,
2263
- error=ErrorEnvelope(
2264
- message=f"cwd not found: {cwd}",
2265
- rc=601,
2266
- command=path,
2267
- ),
2268
- )
2269
-
2270
- effective_path = path
2271
- if cwd is not None and not os.path.isabs(path):
2272
- effective_path = os.path.abspath(os.path.join(cwd, path))
2273
-
2274
- if not os.path.exists(effective_path):
2275
- return CommandResponse(
2276
- command=f'do "{effective_path}"',
2277
- rc=601,
2278
- stdout="",
2279
- stderr=None,
2280
- success=False,
2281
- error=ErrorEnvelope(
2282
- message=f"Do-file not found: {effective_path}",
2283
- rc=601,
2284
- command=effective_path,
2285
- ),
2286
- )
3288
+ effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
3289
+ if error_response is not None:
3290
+ return error_response
2287
3291
 
2288
3292
  if not self._initialized:
2289
3293
  self.init()
2290
3294
 
2291
3295
  start_time = time.time()
2292
3296
  exc: Optional[Exception] = None
2293
- path_for_stata = effective_path.replace("\\", "/")
2294
- command = f'do "{path_for_stata}"'
3297
+ smcl_content = ""
3298
+ smcl_path = None
2295
3299
 
2296
- log_file = tempfile.NamedTemporaryFile(
2297
- prefix="mcp_stata_",
2298
- suffix=".log",
2299
- delete=False,
2300
- mode="w",
2301
- encoding="utf-8",
2302
- errors="replace",
2303
- buffering=1,
2304
- )
2305
- log_path = log_file.name
2306
- tail = TailBuffer(max_chars=200000 if trace else 20000)
2307
- tee = FileTeeIO(log_file, tail)
3300
+ _log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
3301
+ smcl_path = self._create_smcl_log_path()
3302
+ smcl_log_name = self._make_smcl_log_name()
2308
3303
 
2309
3304
  rc = -1
3305
+ try:
3306
+ rc, exc = self._run_streaming_blocking(
3307
+ command=command,
3308
+ tee=tee,
3309
+ cwd=cwd,
3310
+ trace=trace,
3311
+ echo=echo,
3312
+ smcl_path=smcl_path,
3313
+ smcl_log_name=smcl_log_name,
3314
+ hold_attr="_hold_name_do_sync",
3315
+ require_smcl_log=True,
3316
+ )
3317
+ except Exception as e:
3318
+ exc = e
3319
+ rc = 1
3320
+ finally:
3321
+ tee.close()
2310
3322
 
2311
- with self._exec_lock:
2312
- try:
2313
- from sfi import Scalar, SFIToolkit # Import SFI tools
2314
- with self._temp_cwd(cwd):
2315
- with self._redirect_io_streaming(tee, tee):
2316
- try:
2317
- if trace:
2318
- self.stata.run("set trace on")
2319
- ret = self.stata.run(command, echo=echo)
2320
- # Some PyStata builds return output as a string rather than printing.
2321
- if isinstance(ret, str) and ret:
2322
- try:
2323
- tee.write(ret)
2324
- except Exception:
2325
- pass
2326
-
2327
- except Exception as e:
2328
- exc = e
2329
- rc = 1
2330
- finally:
2331
- if trace:
2332
- try:
2333
- self.stata.run("set trace off")
2334
- except Exception:
2335
- pass
2336
- except Exception as e:
2337
- # Outer catch in case imports or locks fail
2338
- exc = e
2339
- rc = 1
2340
-
2341
- tee.close()
3323
+ # Read SMCL content as the authoritative source
3324
+ smcl_content = self._read_smcl_file(smcl_path)
2342
3325
 
2343
- tail_text = tail.get_value()
2344
- log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
2345
- if log_tail and len(log_tail) > len(tail_text):
2346
- tail_text = log_tail
2347
- combined = (tail_text or "") + (f"\n{exc}" if exc else "")
3326
+ combined = self._build_combined_log(tail, log_path, rc, trace, exc)
2348
3327
 
2349
- # Parse RC from log tail if no exception occurred
3328
+ # Use SMCL content as primary source for RC detection if not already captured
2350
3329
  if rc == -1 and not exc:
2351
- parsed_rc = self._parse_rc_from_text(combined)
2352
- rc = parsed_rc if parsed_rc is not None else 0
2353
- elif exc:
2354
- # Try to parse RC from exception message
3330
+ parsed_rc = self._parse_rc_from_smcl(smcl_content)
3331
+ if parsed_rc is not None:
3332
+ rc = parsed_rc
3333
+ else:
3334
+ # Fallback to text parsing
3335
+ parsed_rc = self._parse_rc_from_text(combined)
3336
+ rc = parsed_rc if parsed_rc is not None else 0
3337
+ elif exc and rc == 1:
3338
+ # Try to parse more specific RC from exception message
2355
3339
  parsed_rc = self._parse_rc_from_text(str(exc))
2356
3340
  if parsed_rc is not None:
2357
3341
  rc = parsed_rc
@@ -2360,15 +3344,20 @@ class StataClient:
2360
3344
  error = None
2361
3345
 
2362
3346
  if not success:
2363
- # Robust extraction
2364
- msg, context = self._extract_error_and_context(combined, rc)
3347
+ # Use SMCL as authoritative source for error extraction
3348
+ if smcl_content:
3349
+ msg, context = self._extract_error_from_smcl(smcl_content, rc)
3350
+ else:
3351
+ # Fallback to combined log
3352
+ msg, context = self._extract_error_and_context(combined, rc)
2365
3353
 
2366
3354
  error = ErrorEnvelope(
2367
3355
  message=msg,
2368
3356
  rc=rc,
2369
3357
  snippet=context,
2370
3358
  command=command,
2371
- log_path=log_path
3359
+ log_path=log_path,
3360
+ smcl_output=smcl_content,
2372
3361
  )
2373
3362
 
2374
3363
  duration = time.time() - start_time
@@ -2389,6 +3378,7 @@ class StataClient:
2389
3378
  log_path=log_path,
2390
3379
  success=success,
2391
3380
  error=error,
3381
+ smcl_output=smcl_content,
2392
3382
  )
2393
3383
 
2394
3384
  def load_data(self, source: str, clear: bool = True, max_output_lines: Optional[int] = None) -> CommandResponse:
@@ -2407,40 +3397,8 @@ class StataClient:
2407
3397
  cmd = f"sysuse {src}{clear_suffix}"
2408
3398
 
2409
3399
  result = self._exec_with_capture(cmd, echo=True, trace=False)
2410
-
2411
- # Truncate stdout if requested
2412
- if max_output_lines is not None and result.stdout:
2413
- lines = result.stdout.splitlines()
2414
- if len(lines) > max_output_lines:
2415
- truncated_lines = lines[:max_output_lines]
2416
- truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
2417
- result = CommandResponse(
2418
- command=result.command,
2419
- rc=result.rc,
2420
- stdout="\n".join(truncated_lines),
2421
- stderr=result.stderr,
2422
- success=result.success,
2423
- error=result.error,
2424
- )
2425
-
2426
- return result
3400
+ return self._truncate_command_output(result, max_output_lines)
2427
3401
 
2428
3402
  def codebook(self, varname: str, trace: bool = False, max_output_lines: Optional[int] = None) -> CommandResponse:
2429
3403
  result = self._exec_with_capture(f"codebook {varname}", trace=trace)
2430
-
2431
- # Truncate stdout if requested
2432
- if max_output_lines is not None and result.stdout:
2433
- lines = result.stdout.splitlines()
2434
- if len(lines) > max_output_lines:
2435
- truncated_lines = lines[:max_output_lines]
2436
- truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
2437
- result = CommandResponse(
2438
- command=result.command,
2439
- rc=result.rc,
2440
- stdout="\n".join(truncated_lines),
2441
- stderr=result.stderr,
2442
- success=result.success,
2443
- error=result.error,
2444
- )
2445
-
2446
- return result
3404
+ return self._truncate_command_output(result, max_output_lines)