mcp-stata 1.7.3__py3-none-any.whl → 1.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-stata might be problematic. Click here for more details.
- mcp_stata/config.py +20 -0
- mcp_stata/discovery.py +134 -59
- mcp_stata/graph_detector.py +29 -26
- mcp_stata/models.py +3 -0
- mcp_stata/server.py +647 -19
- mcp_stata/stata_client.py +1881 -989
- mcp_stata/streaming_io.py +3 -1
- mcp_stata/test_stata.py +54 -0
- mcp_stata/ui_http.py +178 -19
- {mcp_stata-1.7.3.dist-info → mcp_stata-1.13.0.dist-info}/METADATA +15 -3
- mcp_stata-1.13.0.dist-info/RECORD +16 -0
- mcp_stata-1.7.3.dist-info/RECORD +0 -14
- {mcp_stata-1.7.3.dist-info → mcp_stata-1.13.0.dist-info}/WHEEL +0 -0
- {mcp_stata-1.7.3.dist-info → mcp_stata-1.13.0.dist-info}/entry_points.txt +0 -0
- {mcp_stata-1.7.3.dist-info → mcp_stata-1.13.0.dist-info}/licenses/LICENSE +0 -0
mcp_stata/stata_client.py
CHANGED
|
@@ -1,22 +1,26 @@
|
|
|
1
|
-
import
|
|
1
|
+
import asyncio
|
|
2
|
+
import inspect
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
import os
|
|
6
|
+
import platform
|
|
5
7
|
import re
|
|
6
8
|
import subprocess
|
|
7
9
|
import sys
|
|
8
|
-
import threading
|
|
9
|
-
from importlib.metadata import PackageNotFoundError, version
|
|
10
10
|
import tempfile
|
|
11
|
+
import threading
|
|
11
12
|
import time
|
|
13
|
+
import uuid
|
|
12
14
|
from contextlib import contextmanager
|
|
15
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
13
16
|
from io import StringIO
|
|
14
|
-
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
|
17
|
+
from typing import Any, Awaitable, Callable, Dict, Generator, List, Optional, Tuple
|
|
15
18
|
|
|
16
19
|
import anyio
|
|
17
20
|
from anyio import get_cancelled_exc_class
|
|
18
21
|
|
|
19
|
-
from .discovery import
|
|
22
|
+
from .discovery import find_stata_candidates
|
|
23
|
+
from .config import MAX_LIMIT
|
|
20
24
|
from .models import (
|
|
21
25
|
CommandResponse,
|
|
22
26
|
ErrorEnvelope,
|
|
@@ -33,6 +37,29 @@ from .graph_detector import StreamingGraphCache
|
|
|
33
37
|
|
|
34
38
|
logger = logging.getLogger("mcp_stata")
|
|
35
39
|
|
|
40
|
+
_POLARS_AVAILABLE: Optional[bool] = None
|
|
41
|
+
|
|
42
|
+
def _check_polars_available() -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Check if Polars can be safely imported.
|
|
45
|
+
Must detect problematic platforms BEFORE attempting import,
|
|
46
|
+
since the crash is a fatal signal, not a catchable exception.
|
|
47
|
+
"""
|
|
48
|
+
if sys.platform == "win32" and platform.machine().lower() in ("arm64", "aarch64"):
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
import polars # noqa: F401
|
|
53
|
+
return True
|
|
54
|
+
except ImportError:
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_polars_available() -> bool:
|
|
59
|
+
global _POLARS_AVAILABLE
|
|
60
|
+
if _POLARS_AVAILABLE is None:
|
|
61
|
+
_POLARS_AVAILABLE = _check_polars_available()
|
|
62
|
+
return _POLARS_AVAILABLE
|
|
36
63
|
|
|
37
64
|
# ============================================================================
|
|
38
65
|
# MODULE-LEVEL DISCOVERY CACHE
|
|
@@ -40,26 +67,30 @@ logger = logging.getLogger("mcp_stata")
|
|
|
40
67
|
# This cache ensures Stata discovery runs exactly once per process lifetime
|
|
41
68
|
_discovery_lock = threading.Lock()
|
|
42
69
|
_discovery_result: Optional[Tuple[str, str]] = None # (path, edition)
|
|
70
|
+
_discovery_candidates: Optional[List[Tuple[str, str]]] = None
|
|
43
71
|
_discovery_attempted = False
|
|
44
72
|
_discovery_error: Optional[Exception] = None
|
|
45
73
|
|
|
46
74
|
|
|
47
|
-
def
|
|
75
|
+
def _get_discovery_candidates() -> List[Tuple[str, str]]:
|
|
48
76
|
"""
|
|
49
|
-
Get
|
|
77
|
+
Get ordered discovery candidates, running discovery only once.
|
|
50
78
|
|
|
51
79
|
Returns:
|
|
52
|
-
|
|
80
|
+
List of (stata_executable_path, edition) ordered by preference.
|
|
53
81
|
|
|
54
82
|
Raises:
|
|
55
83
|
RuntimeError: If Stata discovery fails
|
|
56
84
|
"""
|
|
57
|
-
global _discovery_result, _discovery_attempted, _discovery_error
|
|
85
|
+
global _discovery_result, _discovery_candidates, _discovery_attempted, _discovery_error
|
|
58
86
|
|
|
59
87
|
with _discovery_lock:
|
|
60
88
|
# If we've already successfully discovered Stata, return cached result
|
|
61
89
|
if _discovery_result is not None:
|
|
62
|
-
return _discovery_result
|
|
90
|
+
return _discovery_candidates or [_discovery_result]
|
|
91
|
+
|
|
92
|
+
if _discovery_candidates is not None:
|
|
93
|
+
return _discovery_candidates
|
|
63
94
|
|
|
64
95
|
# If we've already attempted and failed, re-raise the cached error
|
|
65
96
|
if _discovery_attempted and _discovery_error is not None:
|
|
@@ -83,13 +114,17 @@ def _get_discovered_stata() -> Tuple[str, str]:
|
|
|
83
114
|
logger.info("mcp-stata version: %s", pkg_version)
|
|
84
115
|
|
|
85
116
|
# Run discovery
|
|
86
|
-
|
|
117
|
+
candidates = find_stata_candidates()
|
|
87
118
|
|
|
88
119
|
# Cache the successful result
|
|
89
|
-
|
|
90
|
-
|
|
120
|
+
_discovery_candidates = candidates
|
|
121
|
+
if candidates:
|
|
122
|
+
_discovery_result = candidates[0]
|
|
123
|
+
logger.info("Discovery found Stata at: %s (%s)", _discovery_result[0], _discovery_result[1])
|
|
124
|
+
else:
|
|
125
|
+
raise FileNotFoundError("No Stata candidates discovered")
|
|
91
126
|
|
|
92
|
-
return
|
|
127
|
+
return candidates
|
|
93
128
|
|
|
94
129
|
except FileNotFoundError as e:
|
|
95
130
|
_discovery_error = e
|
|
@@ -102,12 +137,22 @@ def _get_discovered_stata() -> Tuple[str, str]:
|
|
|
102
137
|
) from e
|
|
103
138
|
|
|
104
139
|
|
|
140
|
+
def _get_discovered_stata() -> Tuple[str, str]:
|
|
141
|
+
"""
|
|
142
|
+
Preserve existing API: return the highest-priority discovered Stata candidate.
|
|
143
|
+
"""
|
|
144
|
+
candidates = _get_discovery_candidates()
|
|
145
|
+
if not candidates:
|
|
146
|
+
raise RuntimeError("Stata binary not found: no candidates discovered")
|
|
147
|
+
return candidates[0]
|
|
148
|
+
|
|
149
|
+
|
|
105
150
|
class StataClient:
|
|
106
151
|
_initialized = False
|
|
107
152
|
_exec_lock: threading.Lock
|
|
108
153
|
_cache_init_lock = threading.Lock() # Class-level lock for cache initialization
|
|
109
154
|
_is_executing = False # Flag to prevent recursive Stata calls
|
|
110
|
-
MAX_DATA_ROWS =
|
|
155
|
+
MAX_DATA_ROWS = MAX_LIMIT
|
|
111
156
|
MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Maximum graph exports (~50MB)
|
|
112
157
|
MAX_CACHE_SIZE = 100 # Maximum number of graphs to cache
|
|
113
158
|
MAX_CACHE_BYTES = 500 * 1024 * 1024 # Maximum cache size in bytes (~500MB)
|
|
@@ -115,21 +160,21 @@ class StataClient:
|
|
|
115
160
|
|
|
116
161
|
def __new__(cls):
|
|
117
162
|
inst = super(StataClient, cls).__new__(cls)
|
|
118
|
-
inst._exec_lock = threading.
|
|
163
|
+
inst._exec_lock = threading.RLock()
|
|
119
164
|
inst._is_executing = False
|
|
120
165
|
return inst
|
|
121
166
|
|
|
122
167
|
@contextmanager
|
|
123
|
-
def _redirect_io(self):
|
|
168
|
+
def _redirect_io(self, out_buf, err_buf):
|
|
124
169
|
"""Safely redirect stdout/stderr for the duration of a Stata call."""
|
|
125
|
-
out_buf, err_buf = StringIO(), StringIO()
|
|
126
170
|
backup_stdout, backup_stderr = sys.stdout, sys.stderr
|
|
127
171
|
sys.stdout, sys.stderr = out_buf, err_buf
|
|
128
172
|
try:
|
|
129
|
-
yield
|
|
173
|
+
yield
|
|
130
174
|
finally:
|
|
131
175
|
sys.stdout, sys.stderr = backup_stdout, backup_stderr
|
|
132
176
|
|
|
177
|
+
|
|
133
178
|
@staticmethod
|
|
134
179
|
def _stata_quote(value: str) -> str:
|
|
135
180
|
"""Return a Stata double-quoted string literal for value."""
|
|
@@ -150,253 +195,985 @@ class StataClient:
|
|
|
150
195
|
sys.stdout, sys.stderr = backup_stdout, backup_stderr
|
|
151
196
|
|
|
152
197
|
@staticmethod
|
|
153
|
-
def
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
try:
|
|
157
|
-
if on_graph_cached:
|
|
158
|
-
await on_graph_cached(graph_name, success)
|
|
159
|
-
except Exception as e:
|
|
160
|
-
logger.error(f"Graph cache callback failed: {e}")
|
|
161
|
-
|
|
162
|
-
try:
|
|
163
|
-
# Also notify via log channel
|
|
164
|
-
await notify_log(json.dumps({
|
|
165
|
-
"event": "graph_cached",
|
|
166
|
-
"graph": graph_name,
|
|
167
|
-
"success": success
|
|
168
|
-
}))
|
|
169
|
-
except Exception as e:
|
|
170
|
-
logger.error(f"Failed to notify about graph cache: {e}")
|
|
171
|
-
|
|
172
|
-
return graph_cache_callback
|
|
173
|
-
def _request_break_in(self) -> None:
|
|
174
|
-
"""
|
|
175
|
-
Attempt to interrupt a running Stata command when cancellation is requested.
|
|
176
|
-
|
|
177
|
-
Uses the Stata sfi.breakIn hook when available; errors are swallowed because
|
|
178
|
-
cancellation should never crash the host process.
|
|
179
|
-
"""
|
|
180
|
-
try:
|
|
181
|
-
import sfi # type: ignore[import-not-found]
|
|
182
|
-
|
|
183
|
-
break_fn = getattr(sfi, "breakIn", None) or getattr(sfi, "break_in", None)
|
|
184
|
-
if callable(break_fn):
|
|
185
|
-
try:
|
|
186
|
-
break_fn()
|
|
187
|
-
logger.info("Sent breakIn() to Stata for cancellation")
|
|
188
|
-
except Exception as e: # pragma: no cover - best-effort
|
|
189
|
-
logger.warning(f"Failed to send breakIn() to Stata: {e}")
|
|
190
|
-
else: # pragma: no cover - environment without Stata runtime
|
|
191
|
-
logger.debug("sfi.breakIn not available; cannot interrupt Stata")
|
|
192
|
-
except Exception as e: # pragma: no cover - import failure or other
|
|
193
|
-
logger.debug(f"Unable to import sfi for cancellation: {e}")
|
|
194
|
-
|
|
195
|
-
async def _wait_for_stata_stop(self, timeout: float = 2.0) -> bool:
|
|
196
|
-
"""
|
|
197
|
-
After requesting a break, poll the Stata interface so it can surface BreakError
|
|
198
|
-
and return control. This is best-effort and time-bounded.
|
|
199
|
-
"""
|
|
200
|
-
deadline = time.monotonic() + timeout
|
|
198
|
+
def _safe_unlink(path: str) -> None:
|
|
199
|
+
if not path:
|
|
200
|
+
return
|
|
201
201
|
try:
|
|
202
|
-
|
|
202
|
+
if os.path.exists(path):
|
|
203
|
+
os.unlink(path)
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
203
206
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
207
|
+
def _create_smcl_log_path(self, *, prefix: str = "mcp_smcl_", max_hex: Optional[int] = None) -> str:
|
|
208
|
+
hex_id = uuid.uuid4().hex if max_hex is None else uuid.uuid4().hex[:max_hex]
|
|
209
|
+
smcl_path = os.path.join(tempfile.gettempdir(), f"{prefix}{hex_id}.smcl")
|
|
210
|
+
self._safe_unlink(smcl_path)
|
|
211
|
+
return smcl_path
|
|
209
212
|
|
|
210
|
-
|
|
211
|
-
|
|
213
|
+
@staticmethod
|
|
214
|
+
def _make_smcl_log_name() -> str:
|
|
215
|
+
return f"_mcp_smcl_{uuid.uuid4().hex[:8]}"
|
|
212
216
|
|
|
213
|
-
|
|
214
|
-
|
|
217
|
+
def _open_smcl_log(self, smcl_path: str, log_name: str, *, quiet: bool = False) -> bool:
|
|
218
|
+
cmd = f"{'quietly ' if quiet else ''}log using \"{smcl_path}\", replace smcl name({log_name})"
|
|
219
|
+
for attempt in range(4):
|
|
215
220
|
try:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
if
|
|
220
|
-
|
|
221
|
-
return True
|
|
222
|
-
# If Stata already stopped, break on any other exception.
|
|
223
|
-
break
|
|
224
|
-
await anyio.sleep(0.05)
|
|
225
|
-
|
|
226
|
-
if last_exc:
|
|
227
|
-
logger.debug(f"Cancellation poll exited with {last_exc}")
|
|
221
|
+
self.stata.run(cmd, echo=False)
|
|
222
|
+
return True
|
|
223
|
+
except Exception:
|
|
224
|
+
if attempt < 3:
|
|
225
|
+
time.sleep(0.1)
|
|
228
226
|
return False
|
|
229
227
|
|
|
230
|
-
|
|
231
|
-
def _temp_cwd(self, cwd: Optional[str]):
|
|
232
|
-
if cwd is None:
|
|
233
|
-
yield
|
|
234
|
-
return
|
|
235
|
-
prev = os.getcwd()
|
|
236
|
-
os.chdir(cwd)
|
|
228
|
+
def _close_smcl_log(self, log_name: str) -> None:
|
|
237
229
|
try:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
230
|
+
self.stata.run(f"capture log close {log_name}", echo=False)
|
|
231
|
+
except Exception:
|
|
232
|
+
pass
|
|
241
233
|
|
|
242
|
-
def
|
|
243
|
-
|
|
244
|
-
if self._initialized:
|
|
234
|
+
def _restore_results_from_hold(self, hold_attr: str) -> None:
|
|
235
|
+
if not hasattr(self, hold_attr):
|
|
245
236
|
return
|
|
246
|
-
|
|
237
|
+
hold_name = getattr(self, hold_attr)
|
|
247
238
|
try:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
if bin_dir:
|
|
258
|
-
candidates.append(bin_dir)
|
|
259
|
-
|
|
260
|
-
# 2. App Bundle: .../StataMP.app (macOS only)
|
|
261
|
-
curr = bin_dir
|
|
262
|
-
app_bundle = None
|
|
263
|
-
while len(curr) > 1:
|
|
264
|
-
if curr.endswith(".app"):
|
|
265
|
-
app_bundle = curr
|
|
266
|
-
break
|
|
267
|
-
parent = os.path.dirname(curr)
|
|
268
|
-
if parent == curr: # Reached root directory, prevent infinite loop on Windows
|
|
269
|
-
break
|
|
270
|
-
curr = parent
|
|
271
|
-
|
|
272
|
-
if app_bundle:
|
|
273
|
-
candidates.insert(0, os.path.dirname(app_bundle))
|
|
274
|
-
candidates.insert(1, app_bundle)
|
|
275
|
-
|
|
276
|
-
# Deduplicate preserving order
|
|
277
|
-
seen = set()
|
|
278
|
-
deduped = []
|
|
279
|
-
for c in candidates:
|
|
280
|
-
if c in seen:
|
|
281
|
-
continue
|
|
282
|
-
seen.add(c)
|
|
283
|
-
deduped.append(c)
|
|
284
|
-
candidates = deduped
|
|
239
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False)
|
|
240
|
+
self._last_results = self.get_stored_results(force_fresh=True)
|
|
241
|
+
except Exception:
|
|
242
|
+
pass
|
|
243
|
+
finally:
|
|
244
|
+
try:
|
|
245
|
+
delattr(self, hold_attr)
|
|
246
|
+
except Exception:
|
|
247
|
+
pass
|
|
285
248
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
249
|
+
def _create_streaming_log(self, *, trace: bool) -> tuple[tempfile.NamedTemporaryFile, str, TailBuffer, FileTeeIO]:
|
|
250
|
+
log_file = tempfile.NamedTemporaryFile(
|
|
251
|
+
prefix="mcp_stata_",
|
|
252
|
+
suffix=".log",
|
|
253
|
+
delete=False,
|
|
254
|
+
mode="w",
|
|
255
|
+
encoding="utf-8",
|
|
256
|
+
errors="replace",
|
|
257
|
+
buffering=1,
|
|
258
|
+
)
|
|
259
|
+
log_path = log_file.name
|
|
260
|
+
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
261
|
+
tee = FileTeeIO(log_file, tail)
|
|
262
|
+
return log_file, log_path, tail, tee
|
|
295
263
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
264
|
+
def _init_streaming_graph_cache(
|
|
265
|
+
self,
|
|
266
|
+
auto_cache_graphs: bool,
|
|
267
|
+
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]],
|
|
268
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
269
|
+
) -> Optional[StreamingGraphCache]:
|
|
270
|
+
if not auto_cache_graphs:
|
|
271
|
+
return None
|
|
272
|
+
graph_cache = StreamingGraphCache(self, auto_cache=True)
|
|
273
|
+
graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
|
|
274
|
+
graph_cache.add_cache_callback(graph_cache_callback)
|
|
275
|
+
return graph_cache
|
|
276
|
+
|
|
277
|
+
def _capture_graph_state(
|
|
278
|
+
self,
|
|
279
|
+
graph_cache: Optional[StreamingGraphCache],
|
|
280
|
+
emit_graph_ready: bool,
|
|
281
|
+
) -> Optional[dict[str, str]]:
|
|
282
|
+
# Capture initial graph state BEFORE execution starts
|
|
283
|
+
if graph_cache:
|
|
284
|
+
try:
|
|
285
|
+
graph_cache._initial_graphs = set(self.list_graphs(force_refresh=True))
|
|
286
|
+
logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.debug(f"Failed to capture initial graph state: {e}")
|
|
289
|
+
graph_cache._initial_graphs = set()
|
|
301
290
|
|
|
302
|
-
|
|
303
|
-
|
|
291
|
+
graph_ready_initial = None
|
|
292
|
+
if emit_graph_ready:
|
|
293
|
+
try:
|
|
294
|
+
graph_ready_initial = {}
|
|
295
|
+
for graph_name in self.list_graphs(force_refresh=True):
|
|
296
|
+
graph_ready_initial[graph_name] = self._get_graph_signature(graph_name)
|
|
297
|
+
logger.debug("Graph-ready initial state captured: %s", set(graph_ready_initial))
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.debug("Failed to capture graph-ready state: %s", e)
|
|
300
|
+
graph_ready_initial = {}
|
|
301
|
+
return graph_ready_initial
|
|
304
302
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
303
|
+
async def _cache_new_graphs(
|
|
304
|
+
self,
|
|
305
|
+
graph_cache: Optional[StreamingGraphCache],
|
|
306
|
+
*,
|
|
307
|
+
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]],
|
|
308
|
+
total_lines: int,
|
|
309
|
+
completed_label: str,
|
|
310
|
+
) -> None:
|
|
311
|
+
if not graph_cache or not graph_cache.auto_cache:
|
|
312
|
+
return
|
|
313
|
+
try:
|
|
314
|
+
cached_graphs = []
|
|
315
|
+
initial_graphs = getattr(graph_cache, "_initial_graphs", set())
|
|
316
|
+
current_graphs = set(self.list_graphs(force_refresh=True))
|
|
317
|
+
new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
|
|
313
318
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
self._graph_name_aliases: Dict[str, str] = {}
|
|
317
|
-
self._graph_name_reverse: Dict[str, str] = {}
|
|
318
|
-
|
|
319
|
-
logger.info("StataClient initialized successfully with %s (%s)", stata_exec_path, edition)
|
|
319
|
+
if new_graphs:
|
|
320
|
+
logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
|
|
320
321
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
322
|
+
for graph_name in new_graphs:
|
|
323
|
+
try:
|
|
324
|
+
cache_result = await anyio.to_thread.run_sync(
|
|
325
|
+
self.cache_graph_on_creation,
|
|
326
|
+
graph_name,
|
|
327
|
+
)
|
|
328
|
+
if cache_result:
|
|
329
|
+
cached_graphs.append(graph_name)
|
|
330
|
+
graph_cache._cached_graphs.add(graph_name)
|
|
326
331
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
332
|
+
for callback in graph_cache._cache_callbacks:
|
|
333
|
+
try:
|
|
334
|
+
result = callback(graph_name, cache_result)
|
|
335
|
+
if inspect.isawaitable(result):
|
|
336
|
+
await result
|
|
337
|
+
except Exception:
|
|
338
|
+
pass
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.error(f"Error caching graph {graph_name}: {e}")
|
|
335
341
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
342
|
+
if cached_graphs and notify_progress:
|
|
343
|
+
await notify_progress(
|
|
344
|
+
float(total_lines) if total_lines > 0 else 1,
|
|
345
|
+
float(total_lines) if total_lines > 0 else 1,
|
|
346
|
+
f"{completed_label} completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}",
|
|
347
|
+
)
|
|
348
|
+
except Exception as e:
|
|
349
|
+
logger.error(f"Post-execution graph detection failed: {e}")
|
|
344
350
|
|
|
345
|
-
def
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
351
|
+
def _emit_graph_ready_task(
|
|
352
|
+
self,
|
|
353
|
+
*,
|
|
354
|
+
emit_graph_ready: bool,
|
|
355
|
+
graph_ready_initial: Optional[dict[str, str]],
|
|
356
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
357
|
+
graph_ready_task_id: Optional[str],
|
|
358
|
+
graph_ready_format: str,
|
|
359
|
+
) -> None:
|
|
360
|
+
if emit_graph_ready and graph_ready_initial is not None:
|
|
361
|
+
try:
|
|
362
|
+
asyncio.create_task(
|
|
363
|
+
self._emit_graph_ready_events(
|
|
364
|
+
graph_ready_initial,
|
|
365
|
+
notify_log,
|
|
366
|
+
graph_ready_task_id,
|
|
367
|
+
graph_ready_format,
|
|
368
|
+
)
|
|
369
|
+
)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
logger.warning("graph_ready emission failed to start: %s", e)
|
|
353
372
|
|
|
354
|
-
def
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
373
|
+
async def _stream_smcl_log(
|
|
374
|
+
self,
|
|
375
|
+
*,
|
|
376
|
+
smcl_path: str,
|
|
377
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
378
|
+
done: anyio.Event,
|
|
379
|
+
on_chunk: Optional[Callable[[str], Awaitable[None]]] = None,
|
|
380
|
+
) -> None:
|
|
381
|
+
last_pos = 0
|
|
382
|
+
# Wait for Stata to create the SMCL file (placeholder removed to avoid locks)
|
|
383
|
+
while not done.is_set() and not os.path.exists(smcl_path):
|
|
384
|
+
await anyio.sleep(0.05)
|
|
361
385
|
|
|
362
|
-
|
|
363
|
-
|
|
386
|
+
try:
|
|
387
|
+
def _read_content() -> str:
|
|
388
|
+
try:
|
|
389
|
+
with open(smcl_path, "r", encoding="utf-8", errors="replace") as f:
|
|
390
|
+
f.seek(last_pos)
|
|
391
|
+
return f.read()
|
|
392
|
+
except PermissionError:
|
|
393
|
+
if os.name == "nt":
|
|
394
|
+
try:
|
|
395
|
+
res = subprocess.run(f'type "{smcl_path}"', shell=True, capture_output=True)
|
|
396
|
+
full_content = res.stdout.decode("utf-8", errors="replace")
|
|
397
|
+
if len(full_content) > last_pos:
|
|
398
|
+
return full_content[last_pos:]
|
|
399
|
+
return ""
|
|
400
|
+
except Exception:
|
|
401
|
+
return ""
|
|
402
|
+
raise
|
|
403
|
+
except FileNotFoundError:
|
|
404
|
+
return ""
|
|
364
405
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
406
|
+
while not done.is_set():
|
|
407
|
+
chunk = await anyio.to_thread.run_sync(_read_content)
|
|
408
|
+
if chunk:
|
|
409
|
+
last_pos += len(chunk)
|
|
410
|
+
await notify_log(chunk)
|
|
411
|
+
if on_chunk is not None:
|
|
412
|
+
await on_chunk(chunk)
|
|
413
|
+
await anyio.sleep(0.05)
|
|
414
|
+
|
|
415
|
+
chunk = await anyio.to_thread.run_sync(_read_content)
|
|
416
|
+
if chunk:
|
|
417
|
+
last_pos += len(chunk)
|
|
418
|
+
await notify_log(chunk)
|
|
419
|
+
if on_chunk is not None:
|
|
420
|
+
await on_chunk(chunk)
|
|
375
421
|
|
|
376
|
-
|
|
422
|
+
except Exception as e:
|
|
423
|
+
logger.warning(f"Log streaming failed: {e}")
|
|
377
424
|
|
|
378
|
-
def
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
425
|
+
def _run_streaming_blocking(
|
|
426
|
+
self,
|
|
427
|
+
*,
|
|
428
|
+
command: str,
|
|
429
|
+
tee: FileTeeIO,
|
|
430
|
+
cwd: Optional[str],
|
|
431
|
+
trace: bool,
|
|
432
|
+
echo: bool,
|
|
433
|
+
smcl_path: str,
|
|
434
|
+
smcl_log_name: str,
|
|
435
|
+
hold_attr: str,
|
|
436
|
+
require_smcl_log: bool = False,
|
|
437
|
+
) -> tuple[int, Optional[Exception]]:
|
|
438
|
+
rc = -1
|
|
439
|
+
exc: Optional[Exception] = None
|
|
440
|
+
with self._exec_lock:
|
|
441
|
+
self._is_executing = True
|
|
442
|
+
try:
|
|
443
|
+
from sfi import Scalar, SFIToolkit # Import SFI tools
|
|
444
|
+
with self._temp_cwd(cwd):
|
|
445
|
+
log_opened = self._open_smcl_log(smcl_path, smcl_log_name)
|
|
446
|
+
if require_smcl_log and not log_opened:
|
|
447
|
+
exc = RuntimeError("Failed to open SMCL log")
|
|
448
|
+
rc = 1
|
|
449
|
+
if exc is None:
|
|
450
|
+
try:
|
|
451
|
+
with self._redirect_io_streaming(tee, tee):
|
|
452
|
+
try:
|
|
453
|
+
if trace:
|
|
454
|
+
self.stata.run("set trace on")
|
|
455
|
+
ret = self.stata.run(command, echo=echo)
|
|
456
|
+
|
|
457
|
+
setattr(self, hold_attr, f"mcp_hold_{uuid.uuid4().hex[:8]}")
|
|
458
|
+
self.stata.run(
|
|
459
|
+
f"capture _return hold {getattr(self, hold_attr)}",
|
|
460
|
+
echo=False,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
if isinstance(ret, str) and ret:
|
|
464
|
+
try:
|
|
465
|
+
tee.write(ret)
|
|
466
|
+
except Exception:
|
|
467
|
+
pass
|
|
468
|
+
try:
|
|
469
|
+
rc = self._get_rc_from_scalar(Scalar)
|
|
470
|
+
except Exception:
|
|
471
|
+
pass
|
|
472
|
+
except Exception as e:
|
|
473
|
+
exc = e
|
|
474
|
+
if rc in (-1, 0):
|
|
475
|
+
rc = 1
|
|
476
|
+
finally:
|
|
477
|
+
if trace:
|
|
478
|
+
try:
|
|
479
|
+
self.stata.run("set trace off")
|
|
480
|
+
except Exception:
|
|
481
|
+
pass
|
|
482
|
+
finally:
|
|
483
|
+
self._close_smcl_log(smcl_log_name)
|
|
484
|
+
self._restore_results_from_hold(hold_attr)
|
|
485
|
+
return rc, exc
|
|
486
|
+
# If we get here, SMCL log failed and we're required to stop.
|
|
487
|
+
return rc, exc
|
|
488
|
+
finally:
|
|
489
|
+
self._is_executing = False
|
|
490
|
+
return rc, exc
|
|
491
|
+
|
|
492
|
+
def _resolve_do_file_path(
|
|
493
|
+
self,
|
|
494
|
+
path: str,
|
|
495
|
+
cwd: Optional[str],
|
|
496
|
+
) -> tuple[Optional[str], Optional[str], Optional[CommandResponse]]:
|
|
497
|
+
if cwd is not None and not os.path.isdir(cwd):
|
|
498
|
+
return None, None, CommandResponse(
|
|
499
|
+
command=f'do "{path}"',
|
|
500
|
+
rc=601,
|
|
501
|
+
stdout="",
|
|
502
|
+
stderr=None,
|
|
503
|
+
success=False,
|
|
504
|
+
error=ErrorEnvelope(
|
|
505
|
+
message=f"cwd not found: {cwd}",
|
|
506
|
+
rc=601,
|
|
507
|
+
command=path,
|
|
508
|
+
),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
effective_path = path
|
|
512
|
+
if cwd is not None and not os.path.isabs(path):
|
|
513
|
+
effective_path = os.path.abspath(os.path.join(cwd, path))
|
|
514
|
+
|
|
515
|
+
if not os.path.exists(effective_path):
|
|
516
|
+
return None, None, CommandResponse(
|
|
517
|
+
command=f'do "{effective_path}"',
|
|
518
|
+
rc=601,
|
|
519
|
+
stdout="",
|
|
520
|
+
stderr=None,
|
|
521
|
+
success=False,
|
|
522
|
+
error=ErrorEnvelope(
|
|
523
|
+
message=f"Do-file not found: {effective_path}",
|
|
524
|
+
rc=601,
|
|
525
|
+
command=effective_path,
|
|
526
|
+
),
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
path_for_stata = effective_path.replace("\\", "/")
|
|
530
|
+
command = f'do "{path_for_stata}"'
|
|
531
|
+
return effective_path, command, None
|
|
532
|
+
|
|
533
|
+
@contextmanager
|
|
534
|
+
def _smcl_log_capture(self) -> "Generator[Tuple[str, str], None, None]":
|
|
535
|
+
"""
|
|
536
|
+
Context manager that wraps command execution in a named SMCL log.
|
|
537
|
+
|
|
538
|
+
This runs alongside any user logs (named logs can coexist).
|
|
539
|
+
Yields (log_name, log_path) tuple for use within the context.
|
|
540
|
+
The SMCL file is NOT deleted automatically - caller should clean up.
|
|
541
|
+
|
|
542
|
+
Usage:
|
|
543
|
+
with self._smcl_log_capture() as (log_name, smcl_path):
|
|
544
|
+
self.stata.run(cmd)
|
|
545
|
+
# After context, read smcl_path for raw SMCL output
|
|
546
|
+
"""
|
|
547
|
+
# Use a unique name but DO NOT join start with mkstemp to avoid existing file locks.
|
|
548
|
+
# Stata will create the file.
|
|
549
|
+
smcl_path = self._create_smcl_log_path()
|
|
550
|
+
# Unique log name to avoid collisions with user logs
|
|
551
|
+
log_name = self._make_smcl_log_name()
|
|
552
|
+
|
|
553
|
+
try:
|
|
554
|
+
# Open named SMCL log (quietly to avoid polluting output)
|
|
555
|
+
log_opened = self._open_smcl_log(smcl_path, log_name, quiet=True)
|
|
556
|
+
if not log_opened:
|
|
557
|
+
# Still yield, consumer might see empty file or handle error,
|
|
558
|
+
# but we can't do much if Stata refuses to log.
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
yield log_name, smcl_path
|
|
562
|
+
finally:
|
|
563
|
+
# Always close our named log
|
|
564
|
+
self._close_smcl_log(log_name)
|
|
565
|
+
|
|
566
|
+
def _read_smcl_file(self, path: str) -> str:
|
|
567
|
+
"""Read SMCL file contents, handling encoding issues and Windows file locks."""
|
|
568
|
+
try:
|
|
569
|
+
with open(path, 'r', encoding='utf-8', errors='replace') as f:
|
|
570
|
+
return f.read()
|
|
571
|
+
except PermissionError:
|
|
572
|
+
if os.name == "nt":
|
|
573
|
+
# Windows Fallback: Try to use 'type' command to bypass exclusive lock
|
|
574
|
+
try:
|
|
575
|
+
res = subprocess.run(f'type "{path}"', shell=True, capture_output=True)
|
|
576
|
+
if res.returncode == 0:
|
|
577
|
+
return res.stdout.decode('utf-8', errors='replace')
|
|
578
|
+
except Exception as e:
|
|
579
|
+
logger.debug(f"Combined fallback read failed: {e}")
|
|
580
|
+
logger.warning(f"Failed to read SMCL file {path} due to lock")
|
|
581
|
+
return ""
|
|
582
|
+
except Exception as e:
|
|
583
|
+
logger.warning(f"Failed to read SMCL file {path}: {e}")
|
|
584
|
+
return ""
|
|
585
|
+
|
|
586
|
+
def _extract_error_from_smcl(self, smcl_content: str, rc: int) -> Tuple[str, str]:
|
|
587
|
+
"""
|
|
588
|
+
Extract error message and context from raw SMCL output.
|
|
589
|
+
|
|
590
|
+
Uses {err} tags as the authoritative source for error detection.
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
Tuple of (error_message, context_string)
|
|
594
|
+
"""
|
|
595
|
+
if not smcl_content:
|
|
596
|
+
return f"Stata error r({rc})", ""
|
|
597
|
+
|
|
598
|
+
lines = smcl_content.splitlines()
|
|
599
|
+
|
|
600
|
+
# Search backwards for {err} tags - they indicate error lines
|
|
601
|
+
error_lines = []
|
|
602
|
+
error_start_idx = -1
|
|
603
|
+
|
|
604
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
605
|
+
line = lines[i]
|
|
606
|
+
if '{err}' in line:
|
|
607
|
+
if error_start_idx == -1:
|
|
608
|
+
error_start_idx = i
|
|
609
|
+
# Walk backwards to find consecutive {err} lines
|
|
610
|
+
j = i
|
|
611
|
+
while j >= 0 and '{err}' in lines[j]:
|
|
612
|
+
error_lines.insert(0, lines[j])
|
|
613
|
+
j -= 1
|
|
614
|
+
break
|
|
615
|
+
|
|
616
|
+
if error_lines:
|
|
617
|
+
# Clean SMCL tags from error message
|
|
618
|
+
clean_lines = []
|
|
619
|
+
for line in error_lines:
|
|
620
|
+
# Remove SMCL tags but keep the text content
|
|
621
|
+
cleaned = re.sub(r'\{[^}]*\}', '', line).strip()
|
|
622
|
+
if cleaned:
|
|
623
|
+
clean_lines.append(cleaned)
|
|
624
|
+
|
|
625
|
+
error_msg = " ".join(clean_lines) or f"Stata error r({rc})"
|
|
626
|
+
|
|
627
|
+
# Context is everything from error start to end
|
|
628
|
+
context_start = max(0, error_start_idx - 5) # Include 5 lines before error
|
|
629
|
+
context = "\n".join(lines[context_start:])
|
|
630
|
+
|
|
631
|
+
return error_msg, context
|
|
632
|
+
|
|
633
|
+
# Fallback: no {err} found, return last 30 lines as context
|
|
634
|
+
context_start = max(0, len(lines) - 30)
|
|
635
|
+
context = "\n".join(lines[context_start:])
|
|
636
|
+
|
|
637
|
+
return f"Stata error r({rc})", context
|
|
638
|
+
|
|
639
|
+
def _parse_rc_from_smcl(self, smcl_content: str) -> Optional[int]:
|
|
640
|
+
"""Parse return code from SMCL content using specific structural patterns."""
|
|
641
|
+
if not smcl_content:
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
# 1. Primary check: SMCL search tag {search r(N), ...}
|
|
645
|
+
# This is the most authoritative interactive indicator
|
|
646
|
+
matches = list(re.finditer(r'\{search r\((\d+)\)', smcl_content))
|
|
647
|
+
if matches:
|
|
648
|
+
try:
|
|
649
|
+
return int(matches[-1].group(1))
|
|
650
|
+
except Exception:
|
|
651
|
+
pass
|
|
652
|
+
|
|
653
|
+
# 2. Secondary check: Standalone r(N); pattern
|
|
654
|
+
# This appears at the end of command blocks
|
|
655
|
+
matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', smcl_content))
|
|
656
|
+
if matches:
|
|
657
|
+
try:
|
|
658
|
+
return int(matches[-1].group(1))
|
|
390
659
|
except Exception:
|
|
660
|
+
pass
|
|
661
|
+
|
|
662
|
+
return None
|
|
663
|
+
|
|
664
|
+
@staticmethod
|
|
665
|
+
def _create_graph_cache_callback(on_graph_cached, notify_log):
|
|
666
|
+
"""Create a standardized graph cache callback with proper error handling."""
|
|
667
|
+
async def graph_cache_callback(graph_name: str, success: bool) -> None:
|
|
668
|
+
try:
|
|
669
|
+
if on_graph_cached:
|
|
670
|
+
await on_graph_cached(graph_name, success)
|
|
671
|
+
except Exception as e:
|
|
672
|
+
logger.error(f"Graph cache callback failed: {e}")
|
|
673
|
+
|
|
674
|
+
try:
|
|
675
|
+
# Also notify via log channel
|
|
676
|
+
await notify_log(json.dumps({
|
|
677
|
+
"event": "graph_cached",
|
|
678
|
+
"graph": graph_name,
|
|
679
|
+
"success": success
|
|
680
|
+
}))
|
|
681
|
+
except Exception as e:
|
|
682
|
+
logger.error(f"Failed to notify about graph cache: {e}")
|
|
683
|
+
|
|
684
|
+
return graph_cache_callback
|
|
685
|
+
|
|
686
|
+
def _get_cached_graph_path(self, graph_name: str) -> Optional[str]:
|
|
687
|
+
if not hasattr(self, "_cache_lock") or not hasattr(self, "_preemptive_cache"):
|
|
688
|
+
return None
|
|
689
|
+
try:
|
|
690
|
+
with self._cache_lock:
|
|
691
|
+
return self._preemptive_cache.get(graph_name)
|
|
692
|
+
except Exception:
|
|
693
|
+
return None
|
|
694
|
+
|
|
695
|
+
async def _emit_graph_ready_for_graphs(
|
|
696
|
+
self,
|
|
697
|
+
graph_names: List[str],
|
|
698
|
+
*,
|
|
699
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
700
|
+
task_id: Optional[str],
|
|
701
|
+
export_format: str,
|
|
702
|
+
graph_ready_initial: Optional[dict[str, str]],
|
|
703
|
+
) -> None:
|
|
704
|
+
if not graph_names:
|
|
705
|
+
return
|
|
706
|
+
fmt = (export_format or "svg").strip().lower()
|
|
707
|
+
for graph_name in graph_names:
|
|
708
|
+
signature = self._get_graph_signature(graph_name)
|
|
709
|
+
if graph_ready_initial is not None:
|
|
710
|
+
previous = graph_ready_initial.get(graph_name)
|
|
711
|
+
if previous is not None and previous == signature:
|
|
712
|
+
continue
|
|
713
|
+
try:
|
|
714
|
+
export_path = None
|
|
715
|
+
if fmt == "svg":
|
|
716
|
+
export_path = self._get_cached_graph_path(graph_name)
|
|
717
|
+
if not export_path:
|
|
718
|
+
export_path = await anyio.to_thread.run_sync(
|
|
719
|
+
lambda: self.export_graph(graph_name, format=fmt)
|
|
720
|
+
)
|
|
721
|
+
payload = {
|
|
722
|
+
"event": "graph_ready",
|
|
723
|
+
"task_id": task_id,
|
|
724
|
+
"graph": {
|
|
725
|
+
"name": graph_name,
|
|
726
|
+
"path": export_path,
|
|
727
|
+
"label": graph_name,
|
|
728
|
+
},
|
|
729
|
+
}
|
|
730
|
+
await notify_log(json.dumps(payload))
|
|
731
|
+
if graph_ready_initial is not None:
|
|
732
|
+
graph_ready_initial[graph_name] = signature
|
|
733
|
+
except Exception as e:
|
|
734
|
+
logger.warning("graph_ready export failed for %s: %s", graph_name, e)
|
|
735
|
+
|
|
736
|
+
async def _maybe_cache_graphs_on_chunk(
|
|
737
|
+
self,
|
|
738
|
+
*,
|
|
739
|
+
graph_cache: Optional[StreamingGraphCache],
|
|
740
|
+
emit_graph_ready: bool,
|
|
741
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
742
|
+
graph_ready_task_id: Optional[str],
|
|
743
|
+
graph_ready_format: str,
|
|
744
|
+
graph_ready_initial: Optional[dict[str, str]],
|
|
745
|
+
last_check: List[float],
|
|
746
|
+
) -> None:
|
|
747
|
+
if not graph_cache or not graph_cache.auto_cache:
|
|
748
|
+
return
|
|
749
|
+
if self._is_executing:
|
|
750
|
+
return
|
|
751
|
+
now = time.monotonic()
|
|
752
|
+
if last_check and now - last_check[0] < 0.25:
|
|
753
|
+
return
|
|
754
|
+
if last_check:
|
|
755
|
+
last_check[0] = now
|
|
756
|
+
try:
|
|
757
|
+
cached_names = await graph_cache.cache_detected_graphs_with_pystata()
|
|
758
|
+
except Exception as e:
|
|
759
|
+
logger.debug("graph_ready polling failed: %s", e)
|
|
760
|
+
return
|
|
761
|
+
if emit_graph_ready and cached_names:
|
|
762
|
+
await self._emit_graph_ready_for_graphs(
|
|
763
|
+
cached_names,
|
|
764
|
+
notify_log=notify_log,
|
|
765
|
+
task_id=graph_ready_task_id,
|
|
766
|
+
export_format=graph_ready_format,
|
|
767
|
+
graph_ready_initial=graph_ready_initial,
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
async def _emit_graph_ready_events(
|
|
771
|
+
self,
|
|
772
|
+
initial_graphs: dict[str, str],
|
|
773
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
774
|
+
task_id: Optional[str],
|
|
775
|
+
export_format: str,
|
|
776
|
+
) -> None:
|
|
777
|
+
try:
|
|
778
|
+
current_graphs = list(self.list_graphs(force_refresh=True))
|
|
779
|
+
except Exception as e:
|
|
780
|
+
logger.warning("graph_ready: list_graphs failed: %s", e)
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
if not current_graphs:
|
|
784
|
+
return
|
|
785
|
+
|
|
786
|
+
for graph_name in current_graphs:
|
|
787
|
+
signature = self._get_graph_signature(graph_name)
|
|
788
|
+
previous = initial_graphs.get(graph_name)
|
|
789
|
+
if previous is not None and previous == signature:
|
|
790
|
+
continue
|
|
791
|
+
try:
|
|
792
|
+
export_path = await anyio.to_thread.run_sync(
|
|
793
|
+
lambda: self.export_graph(graph_name, format=export_format)
|
|
794
|
+
)
|
|
795
|
+
payload = {
|
|
796
|
+
"event": "graph_ready",
|
|
797
|
+
"task_id": task_id,
|
|
798
|
+
"graph": {
|
|
799
|
+
"name": graph_name,
|
|
800
|
+
"path": export_path,
|
|
801
|
+
"label": graph_name,
|
|
802
|
+
},
|
|
803
|
+
}
|
|
804
|
+
await notify_log(json.dumps(payload))
|
|
805
|
+
initial_graphs[graph_name] = signature
|
|
806
|
+
except Exception as e:
|
|
807
|
+
logger.warning("graph_ready export failed for %s: %s", graph_name, e)
|
|
808
|
+
|
|
809
|
+
def _get_graph_signature(self, graph_name: str) -> str:
|
|
810
|
+
if not graph_name:
|
|
811
|
+
return ""
|
|
812
|
+
try:
|
|
813
|
+
response = self.exec_lightweight(f"graph describe {graph_name}")
|
|
814
|
+
if response.success and response.stdout:
|
|
815
|
+
return response.stdout
|
|
816
|
+
if response.stderr:
|
|
817
|
+
return response.stderr
|
|
818
|
+
except Exception:
|
|
819
|
+
return ""
|
|
820
|
+
return ""
|
|
821
|
+
|
|
822
|
+
def _request_break_in(self) -> None:
|
|
823
|
+
"""
|
|
824
|
+
Attempt to interrupt a running Stata command when cancellation is requested.
|
|
825
|
+
|
|
826
|
+
Uses the Stata sfi.breakIn hook when available; errors are swallowed because
|
|
827
|
+
cancellation should never crash the host process.
|
|
828
|
+
"""
|
|
829
|
+
try:
|
|
830
|
+
import sfi # type: ignore[import-not-found]
|
|
831
|
+
|
|
832
|
+
break_fn = getattr(sfi, "breakIn", None) or getattr(sfi, "break_in", None)
|
|
833
|
+
if callable(break_fn):
|
|
834
|
+
try:
|
|
835
|
+
break_fn()
|
|
836
|
+
logger.info("Sent breakIn() to Stata for cancellation")
|
|
837
|
+
except Exception as e: # pragma: no cover - best-effort
|
|
838
|
+
logger.warning(f"Failed to send breakIn() to Stata: {e}")
|
|
839
|
+
else: # pragma: no cover - environment without Stata runtime
|
|
840
|
+
logger.debug("sfi.breakIn not available; cannot interrupt Stata")
|
|
841
|
+
except Exception as e: # pragma: no cover - import failure or other
|
|
842
|
+
logger.debug(f"Unable to import sfi for cancellation: {e}")
|
|
843
|
+
|
|
844
|
+
async def _wait_for_stata_stop(self, timeout: float = 2.0) -> bool:
|
|
845
|
+
"""
|
|
846
|
+
After requesting a break, poll the Stata interface so it can surface BreakError
|
|
847
|
+
and return control. This is best-effort and time-bounded.
|
|
848
|
+
"""
|
|
849
|
+
deadline = time.monotonic() + timeout
|
|
850
|
+
try:
|
|
851
|
+
import sfi # type: ignore[import-not-found]
|
|
852
|
+
|
|
853
|
+
toolkit = getattr(sfi, "SFIToolkit", None)
|
|
854
|
+
poll = getattr(toolkit, "pollnow", None) or getattr(toolkit, "pollstd", None)
|
|
855
|
+
BreakError = getattr(sfi, "BreakError", None)
|
|
856
|
+
except Exception: # pragma: no cover
|
|
857
|
+
return False
|
|
858
|
+
|
|
859
|
+
if not callable(poll):
|
|
860
|
+
return False
|
|
861
|
+
|
|
862
|
+
last_exc: Optional[Exception] = None
|
|
863
|
+
while time.monotonic() < deadline:
|
|
864
|
+
try:
|
|
865
|
+
poll()
|
|
866
|
+
except Exception as e: # pragma: no cover - depends on Stata runtime
|
|
867
|
+
last_exc = e
|
|
868
|
+
if BreakError is not None and isinstance(e, BreakError):
|
|
869
|
+
logger.info("Stata BreakError detected; cancellation acknowledged by Stata")
|
|
870
|
+
return True
|
|
871
|
+
# If Stata already stopped, break on any other exception.
|
|
872
|
+
break
|
|
873
|
+
await anyio.sleep(0.05)
|
|
874
|
+
|
|
875
|
+
if last_exc:
|
|
876
|
+
logger.debug(f"Cancellation poll exited with {last_exc}")
|
|
877
|
+
return False
|
|
878
|
+
|
|
879
|
+
@contextmanager
|
|
880
|
+
def _temp_cwd(self, cwd: Optional[str]):
|
|
881
|
+
if cwd is None:
|
|
882
|
+
yield
|
|
883
|
+
return
|
|
884
|
+
prev = os.getcwd()
|
|
885
|
+
os.chdir(cwd)
|
|
886
|
+
try:
|
|
887
|
+
yield
|
|
888
|
+
finally:
|
|
889
|
+
os.chdir(prev)
|
|
890
|
+
|
|
891
|
+
@contextmanager
|
|
892
|
+
def _safe_redirect_fds(self):
|
|
893
|
+
"""Redirects fd 1 (stdout) to fd 2 (stderr) at the OS level."""
|
|
894
|
+
# Save original stdout fd
|
|
895
|
+
try:
|
|
896
|
+
stdout_fd = os.dup(1)
|
|
897
|
+
except Exception:
|
|
898
|
+
# Fallback if we can't dup (e.g. strange environment)
|
|
899
|
+
yield
|
|
900
|
+
return
|
|
901
|
+
|
|
902
|
+
try:
|
|
903
|
+
# Redirect OS-level stdout to stderr
|
|
904
|
+
os.dup2(2, 1)
|
|
905
|
+
yield
|
|
906
|
+
finally:
|
|
907
|
+
# Restore stdout
|
|
908
|
+
try:
|
|
909
|
+
os.dup2(stdout_fd, 1)
|
|
910
|
+
os.close(stdout_fd)
|
|
911
|
+
except Exception:
|
|
912
|
+
pass
|
|
913
|
+
|
|
914
|
+
def init(self):
|
|
915
|
+
"""Initializes usage of pystata using cached discovery results."""
|
|
916
|
+
if self._initialized:
|
|
917
|
+
return
|
|
918
|
+
|
|
919
|
+
# Suppress any non-UTF8 banner output from PyStata on stdout, which breaks MCP stdio transport
|
|
920
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
921
|
+
|
|
922
|
+
try:
|
|
923
|
+
import stata_setup
|
|
924
|
+
|
|
925
|
+
# Get discovered Stata paths (cached from first call)
|
|
926
|
+
discovery_candidates = _get_discovery_candidates()
|
|
927
|
+
|
|
928
|
+
# Diagnostic: force faulthandler to output to stderr for C crashes
|
|
929
|
+
import faulthandler
|
|
930
|
+
faulthandler.enable(file=sys.stderr)
|
|
931
|
+
import subprocess
|
|
932
|
+
|
|
933
|
+
success = False
|
|
934
|
+
last_error = None
|
|
935
|
+
chosen_exec: Optional[Tuple[str, str]] = None
|
|
936
|
+
|
|
937
|
+
for stata_exec_path, edition in discovery_candidates:
|
|
938
|
+
candidates = []
|
|
939
|
+
# Prefer the binary directory first (documented input for stata_setup)
|
|
940
|
+
bin_dir = os.path.dirname(stata_exec_path)
|
|
941
|
+
|
|
942
|
+
# 2. App Bundle: .../StataMP.app (macOS only)
|
|
943
|
+
curr = bin_dir
|
|
944
|
+
app_bundle = None
|
|
945
|
+
while len(curr) > 1:
|
|
946
|
+
if curr.endswith(".app"):
|
|
947
|
+
app_bundle = curr
|
|
948
|
+
break
|
|
949
|
+
parent = os.path.dirname(curr)
|
|
950
|
+
if parent == curr:
|
|
951
|
+
break
|
|
952
|
+
curr = parent
|
|
953
|
+
|
|
954
|
+
ordered_candidates = []
|
|
955
|
+
if bin_dir:
|
|
956
|
+
ordered_candidates.append(bin_dir)
|
|
957
|
+
if app_bundle:
|
|
958
|
+
ordered_candidates.append(app_bundle)
|
|
959
|
+
parent_dir = os.path.dirname(app_bundle)
|
|
960
|
+
if parent_dir not in ordered_candidates:
|
|
961
|
+
ordered_candidates.append(parent_dir)
|
|
962
|
+
|
|
963
|
+
# Deduplicate preserving order
|
|
964
|
+
seen = set()
|
|
965
|
+
candidates = []
|
|
966
|
+
for c in ordered_candidates:
|
|
967
|
+
if c not in seen:
|
|
968
|
+
seen.add(c)
|
|
969
|
+
candidates.append(c)
|
|
970
|
+
|
|
971
|
+
for path in candidates:
|
|
972
|
+
try:
|
|
973
|
+
# 1. Pre-flight check in a subprocess to capture hard exits/crashes
|
|
974
|
+
sys.stderr.write(f"[mcp_stata] DEBUG: Pre-flight check for path '{path}'\n")
|
|
975
|
+
sys.stderr.flush()
|
|
976
|
+
|
|
977
|
+
preflight_code = f"""
|
|
978
|
+
import sys
|
|
979
|
+
import stata_setup
|
|
980
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
981
|
+
with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr):
|
|
982
|
+
try:
|
|
983
|
+
stata_setup.config({repr(path)}, {repr(edition)})
|
|
984
|
+
from pystata import stata
|
|
985
|
+
stata.run('about', echo=True)
|
|
986
|
+
print('PREFLIGHT_OK')
|
|
987
|
+
except Exception as e:
|
|
988
|
+
print(f'PREFLIGHT_FAIL: {{e}}', file=sys.stderr)
|
|
989
|
+
sys.exit(1)
|
|
990
|
+
"""
|
|
991
|
+
|
|
992
|
+
try:
|
|
993
|
+
res = subprocess.run(
|
|
994
|
+
[sys.executable, "-c", preflight_code],
|
|
995
|
+
capture_output=True, text=True, timeout=30
|
|
996
|
+
)
|
|
997
|
+
if res.returncode != 0:
|
|
998
|
+
sys.stderr.write(f"[mcp_stata] Pre-flight failed (rc={res.returncode}) for '{path}'\n")
|
|
999
|
+
if res.stdout.strip():
|
|
1000
|
+
sys.stderr.write(f"--- Pre-flight stdout ---\n{res.stdout.strip()}\n")
|
|
1001
|
+
if res.stderr.strip():
|
|
1002
|
+
sys.stderr.write(f"--- Pre-flight stderr ---\n{res.stderr.strip()}\n")
|
|
1003
|
+
sys.stderr.flush()
|
|
1004
|
+
last_error = f"Pre-flight failed: {res.stdout.strip()} {res.stderr.strip()}"
|
|
1005
|
+
continue
|
|
1006
|
+
else:
|
|
1007
|
+
sys.stderr.write(f"[mcp_stata] Pre-flight succeeded for '{path}'. Proceeding to in-process init.\n")
|
|
1008
|
+
sys.stderr.flush()
|
|
1009
|
+
except Exception as pre_e:
|
|
1010
|
+
sys.stderr.write(f"[mcp_stata] Pre-flight execution error for '{path}': {repr(pre_e)}\n")
|
|
1011
|
+
sys.stderr.flush()
|
|
1012
|
+
last_error = pre_e
|
|
1013
|
+
continue
|
|
1014
|
+
|
|
1015
|
+
msg = f"[mcp_stata] DEBUG: In-process stata_setup.config('{path}', '{edition}')\n"
|
|
1016
|
+
sys.stderr.write(msg)
|
|
1017
|
+
sys.stderr.flush()
|
|
1018
|
+
# Redirect both sys.stdout/err AND the raw fds to our stderr pipe.
|
|
1019
|
+
with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
|
|
1020
|
+
stata_setup.config(path, edition)
|
|
1021
|
+
|
|
1022
|
+
sys.stderr.write(f"[mcp_stata] DEBUG: stata_setup.config succeeded for path: {path}\n")
|
|
1023
|
+
sys.stderr.flush()
|
|
1024
|
+
success = True
|
|
1025
|
+
chosen_exec = (stata_exec_path, edition)
|
|
1026
|
+
logger.info("stata_setup.config succeeded with path: %s", path)
|
|
1027
|
+
break
|
|
1028
|
+
except BaseException as e:
|
|
1029
|
+
last_error = e
|
|
1030
|
+
sys.stderr.write(f"[mcp_stata] WARNING: In-process stata_setup.config caught: {repr(e)}\n")
|
|
1031
|
+
sys.stderr.flush()
|
|
1032
|
+
logger.warning("stata_setup.config failed for path '%s': %s", path, e)
|
|
1033
|
+
if isinstance(e, SystemExit):
|
|
1034
|
+
break
|
|
1035
|
+
continue
|
|
1036
|
+
|
|
1037
|
+
if success:
|
|
1038
|
+
# Cache winning candidate for subsequent lookups
|
|
1039
|
+
global _discovery_result
|
|
1040
|
+
if chosen_exec:
|
|
1041
|
+
_discovery_result = chosen_exec
|
|
1042
|
+
break
|
|
1043
|
+
|
|
1044
|
+
if not success:
|
|
1045
|
+
error_msg = (
|
|
1046
|
+
f"stata_setup.config failed to initialize Stata. "
|
|
1047
|
+
f"Tried candidates: {discovery_candidates}. "
|
|
1048
|
+
f"Last error: {repr(last_error)}"
|
|
1049
|
+
)
|
|
1050
|
+
sys.stderr.write(f"[mcp_stata] ERROR: {error_msg}\n")
|
|
1051
|
+
sys.stderr.flush()
|
|
1052
|
+
logger.error(error_msg)
|
|
1053
|
+
raise RuntimeError(error_msg)
|
|
1054
|
+
|
|
1055
|
+
# Cache the binary path for later use (e.g., PNG export on Windows)
|
|
1056
|
+
self._stata_exec_path = os.path.abspath(stata_exec_path)
|
|
1057
|
+
|
|
1058
|
+
try:
|
|
1059
|
+
sys.stderr.write("[mcp_stata] DEBUG: Importing pystata and warming up...\n")
|
|
1060
|
+
sys.stderr.flush()
|
|
1061
|
+
with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
|
|
1062
|
+
from pystata import stata # type: ignore[import-not-found]
|
|
1063
|
+
# Warm up the engine and swallow any late splash screen output
|
|
1064
|
+
stata.run("display 1", echo=False)
|
|
1065
|
+
self.stata = stata
|
|
1066
|
+
self._initialized = True
|
|
1067
|
+
sys.stderr.write("[mcp_stata] DEBUG: pystata warmed up successfully\n")
|
|
1068
|
+
sys.stderr.flush()
|
|
1069
|
+
except BaseException as e:
|
|
1070
|
+
sys.stderr.write(f"[mcp_stata] ERROR: Failed to load pystata or run initial command: {repr(e)}\n")
|
|
1071
|
+
sys.stderr.flush()
|
|
1072
|
+
logger.error("Failed to load pystata or run initial command: %s", e)
|
|
1073
|
+
raise
|
|
1074
|
+
|
|
1075
|
+
# Initialize list_graphs TTL cache
|
|
1076
|
+
self._list_graphs_cache = None
|
|
1077
|
+
self._list_graphs_cache_time = 0
|
|
1078
|
+
self._list_graphs_cache_lock = threading.Lock()
|
|
1079
|
+
|
|
1080
|
+
# Map user-facing graph names (may include spaces/punctuation) to valid
|
|
1081
|
+
# internal Stata graph names.
|
|
1082
|
+
self._graph_name_aliases: Dict[str, str] = {}
|
|
1083
|
+
self._graph_name_reverse: Dict[str, str] = {}
|
|
1084
|
+
|
|
1085
|
+
logger.info("StataClient initialized successfully with %s (%s)", stata_exec_path, edition)
|
|
1086
|
+
|
|
1087
|
+
except ImportError as e:
|
|
1088
|
+
raise RuntimeError(
|
|
1089
|
+
f"Failed to import stata_setup or pystata: {e}. "
|
|
1090
|
+
"Ensure they are installed (pip install pystata stata-setup)."
|
|
1091
|
+
) from e
|
|
1092
|
+
|
|
1093
|
+
def _make_valid_stata_name(self, name: str) -> str:
|
|
1094
|
+
"""Create a valid Stata name (<=32 chars, [A-Za-z_][A-Za-z0-9_]*)."""
|
|
1095
|
+
base = re.sub(r"[^A-Za-z0-9_]", "_", name or "")
|
|
1096
|
+
if not base:
|
|
1097
|
+
base = "Graph"
|
|
1098
|
+
if not re.match(r"^[A-Za-z_]", base):
|
|
1099
|
+
base = f"G_{base}"
|
|
1100
|
+
base = base[:32]
|
|
1101
|
+
|
|
1102
|
+
# Avoid collisions.
|
|
1103
|
+
candidate = base
|
|
1104
|
+
i = 1
|
|
1105
|
+
while candidate in getattr(self, "_graph_name_reverse", {}):
|
|
1106
|
+
suffix = f"_{i}"
|
|
1107
|
+
candidate = (base[: max(0, 32 - len(suffix))] + suffix)[:32]
|
|
1108
|
+
i += 1
|
|
1109
|
+
return candidate
|
|
1110
|
+
|
|
1111
|
+
def _resolve_graph_name_for_stata(self, name: str) -> str:
|
|
1112
|
+
"""Return internal Stata graph name for a user-facing name."""
|
|
1113
|
+
if not name:
|
|
1114
|
+
return name
|
|
1115
|
+
aliases = getattr(self, "_graph_name_aliases", None)
|
|
1116
|
+
if aliases and name in aliases:
|
|
1117
|
+
return aliases[name]
|
|
1118
|
+
return name
|
|
1119
|
+
|
|
1120
|
+
def _maybe_rewrite_graph_name_in_command(self, code: str) -> str:
|
|
1121
|
+
"""Rewrite name("...") to a valid Stata name and store alias mapping."""
|
|
1122
|
+
if not code:
|
|
1123
|
+
return code
|
|
1124
|
+
if not hasattr(self, "_graph_name_aliases"):
|
|
1125
|
+
self._graph_name_aliases = {}
|
|
1126
|
+
self._graph_name_reverse = {}
|
|
1127
|
+
|
|
1128
|
+
# Handle common patterns: name("..." ...) or name(`"..."' ...)
|
|
1129
|
+
pat = re.compile(r"name\(\s*(?:`\"(?P<cq>[^\"]*)\"'|\"(?P<dq>[^\"]*)\")\s*(?P<rest>[^)]*)\)")
|
|
1130
|
+
|
|
1131
|
+
def repl(m: re.Match) -> str:
|
|
1132
|
+
original = m.group("cq") if m.group("cq") is not None else m.group("dq")
|
|
1133
|
+
original = original or ""
|
|
1134
|
+
internal = self._graph_name_aliases.get(original)
|
|
1135
|
+
if not internal:
|
|
1136
|
+
internal = self._make_valid_stata_name(original)
|
|
1137
|
+
self._graph_name_aliases[original] = internal
|
|
1138
|
+
self._graph_name_reverse[internal] = original
|
|
1139
|
+
rest = m.group("rest") or ""
|
|
1140
|
+
return f"name({internal}{rest})"
|
|
1141
|
+
|
|
1142
|
+
return pat.sub(repl, code)
|
|
1143
|
+
|
|
1144
|
+
def _get_rc_from_scalar(self, Scalar) -> int:
|
|
1145
|
+
"""Safely get return code, handling None values."""
|
|
1146
|
+
try:
|
|
1147
|
+
from sfi import Macro
|
|
1148
|
+
rc_val = Macro.getGlobal("_rc")
|
|
1149
|
+
if rc_val is None:
|
|
391
1150
|
return -1
|
|
1151
|
+
return int(float(rc_val))
|
|
1152
|
+
except Exception:
|
|
1153
|
+
return -1
|
|
392
1154
|
|
|
393
1155
|
def _parse_rc_from_text(self, text: str) -> Optional[int]:
|
|
394
|
-
|
|
395
|
-
if
|
|
1156
|
+
"""Parse return code from plain text using structural patterns."""
|
|
1157
|
+
if not text:
|
|
1158
|
+
return None
|
|
1159
|
+
|
|
1160
|
+
# 1. Primary check: 'search r(N)' pattern (SMCL tag potentially stripped)
|
|
1161
|
+
matches = list(re.finditer(r'search r\((\d+)\)', text))
|
|
1162
|
+
if matches:
|
|
396
1163
|
try:
|
|
397
|
-
return int(
|
|
1164
|
+
return int(matches[-1].group(1))
|
|
398
1165
|
except Exception:
|
|
399
|
-
|
|
1166
|
+
pass
|
|
1167
|
+
|
|
1168
|
+
# 2. Secondary check: Standalone r(N); pattern
|
|
1169
|
+
# This appears at the end of command blocks
|
|
1170
|
+
matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', text))
|
|
1171
|
+
if matches:
|
|
1172
|
+
try:
|
|
1173
|
+
return int(matches[-1].group(1))
|
|
1174
|
+
except Exception:
|
|
1175
|
+
pass
|
|
1176
|
+
|
|
400
1177
|
return None
|
|
401
1178
|
|
|
402
1179
|
def _parse_line_from_text(self, text: str) -> Optional[int]:
|
|
@@ -405,75 +1182,207 @@ class StataClient:
|
|
|
405
1182
|
try:
|
|
406
1183
|
return int(match.group(1))
|
|
407
1184
|
except Exception:
|
|
408
|
-
return None
|
|
409
|
-
return None
|
|
1185
|
+
return None
|
|
1186
|
+
return None
|
|
1187
|
+
|
|
1188
|
+
def _read_log_backwards_until_error(self, path: str, max_bytes: int = 5_000_000) -> str:
|
|
1189
|
+
"""
|
|
1190
|
+
Read log file backwards in chunks, stopping when we find {err} tags or reach the start.
|
|
1191
|
+
|
|
1192
|
+
This is more efficient and robust than reading huge fixed tails, as we only read
|
|
1193
|
+
what we need to find the error.
|
|
1194
|
+
|
|
1195
|
+
Args:
|
|
1196
|
+
path: Path to the log file
|
|
1197
|
+
max_bytes: Maximum total bytes to read (safety limit, default 5MB)
|
|
1198
|
+
|
|
1199
|
+
Returns:
|
|
1200
|
+
The relevant portion of the log containing the error and context
|
|
1201
|
+
"""
|
|
1202
|
+
try:
|
|
1203
|
+
chunk_size = 50_000 # Read 50KB chunks at a time
|
|
1204
|
+
total_read = 0
|
|
1205
|
+
chunks = []
|
|
1206
|
+
|
|
1207
|
+
with open(path, 'rb') as f:
|
|
1208
|
+
# Get file size
|
|
1209
|
+
f.seek(0, os.SEEK_END)
|
|
1210
|
+
file_size = f.tell()
|
|
1211
|
+
|
|
1212
|
+
if file_size == 0:
|
|
1213
|
+
return ""
|
|
1214
|
+
|
|
1215
|
+
# Start from the end
|
|
1216
|
+
position = file_size
|
|
1217
|
+
|
|
1218
|
+
while position > 0 and total_read < max_bytes:
|
|
1219
|
+
# Calculate how much to read in this chunk
|
|
1220
|
+
read_size = min(chunk_size, position, max_bytes - total_read)
|
|
1221
|
+
position -= read_size
|
|
1222
|
+
|
|
1223
|
+
# Seek and read
|
|
1224
|
+
f.seek(position)
|
|
1225
|
+
chunk = f.read(read_size)
|
|
1226
|
+
chunks.insert(0, chunk)
|
|
1227
|
+
total_read += read_size
|
|
1228
|
+
|
|
1229
|
+
# Decode and check for error tags
|
|
1230
|
+
try:
|
|
1231
|
+
accumulated = b''.join(chunks).decode('utf-8', errors='replace')
|
|
1232
|
+
|
|
1233
|
+
# Check if we've found an error tag
|
|
1234
|
+
if '{err}' in accumulated:
|
|
1235
|
+
# Found it! Read one more chunk for context before the error
|
|
1236
|
+
if position > 0 and total_read < max_bytes:
|
|
1237
|
+
extra_read = min(chunk_size, position, max_bytes - total_read)
|
|
1238
|
+
position -= extra_read
|
|
1239
|
+
f.seek(position)
|
|
1240
|
+
extra_chunk = f.read(extra_read)
|
|
1241
|
+
chunks.insert(0, extra_chunk)
|
|
1242
|
+
|
|
1243
|
+
return b''.join(chunks).decode('utf-8', errors='replace')
|
|
1244
|
+
|
|
1245
|
+
except UnicodeDecodeError:
|
|
1246
|
+
# Continue reading if we hit a decode error (might be mid-character)
|
|
1247
|
+
continue
|
|
1248
|
+
|
|
1249
|
+
# Read everything we've accumulated
|
|
1250
|
+
return b''.join(chunks).decode('utf-8', errors='replace')
|
|
1251
|
+
|
|
1252
|
+
except Exception as e:
|
|
1253
|
+
logger.warning(f"Error reading log backwards: {e}")
|
|
1254
|
+
# Fallback to regular tail read
|
|
1255
|
+
return self._read_log_tail(path, 200_000)
|
|
1256
|
+
|
|
1257
|
+
def _read_log_tail_smart(self, path: str, rc: int, trace: bool = False) -> str:
|
|
1258
|
+
"""
|
|
1259
|
+
Smart log tail reader that adapts based on whether an error occurred.
|
|
1260
|
+
|
|
1261
|
+
- If rc == 0: Read normal tail (20KB without trace, 200KB with trace)
|
|
1262
|
+
- If rc != 0: Search backwards dynamically to find the error
|
|
1263
|
+
|
|
1264
|
+
Args:
|
|
1265
|
+
path: Path to the log file
|
|
1266
|
+
rc: Return code from Stata
|
|
1267
|
+
trace: Whether trace mode was enabled
|
|
1268
|
+
|
|
1269
|
+
Returns:
|
|
1270
|
+
Relevant log content
|
|
1271
|
+
"""
|
|
1272
|
+
if rc != 0:
|
|
1273
|
+
# Error occurred - search backwards for {err} tags
|
|
1274
|
+
return self._read_log_backwards_until_error(path)
|
|
1275
|
+
else:
|
|
1276
|
+
# Success - just read normal tail
|
|
1277
|
+
tail_size = 200_000 if trace else 20_000
|
|
1278
|
+
return self._read_log_tail(path, tail_size)
|
|
1279
|
+
|
|
1280
|
+
def _read_log_tail(self, path: str, max_chars: int) -> str:
|
|
1281
|
+
try:
|
|
1282
|
+
with open(path, "rb") as f:
|
|
1283
|
+
f.seek(0, os.SEEK_END)
|
|
1284
|
+
size = f.tell()
|
|
1285
|
+
|
|
1286
|
+
if size <= 0:
|
|
1287
|
+
return ""
|
|
1288
|
+
read_size = min(size, max_chars)
|
|
1289
|
+
f.seek(-read_size, os.SEEK_END)
|
|
1290
|
+
data = f.read(read_size)
|
|
1291
|
+
return data.decode("utf-8", errors="replace")
|
|
1292
|
+
except Exception:
|
|
1293
|
+
return ""
|
|
1294
|
+
|
|
1295
|
+
def _build_combined_log(
|
|
1296
|
+
self,
|
|
1297
|
+
tail: TailBuffer,
|
|
1298
|
+
path: str,
|
|
1299
|
+
rc: int,
|
|
1300
|
+
trace: bool,
|
|
1301
|
+
exc: Optional[Exception],
|
|
1302
|
+
) -> str:
|
|
1303
|
+
tail_text = tail.get_value()
|
|
1304
|
+
log_tail = self._read_log_tail_smart(path, rc, trace)
|
|
1305
|
+
if log_tail and len(log_tail) > len(tail_text):
|
|
1306
|
+
tail_text = log_tail
|
|
1307
|
+
return (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
1308
|
+
|
|
1309
|
+
def _truncate_command_output(
|
|
1310
|
+
self,
|
|
1311
|
+
result: CommandResponse,
|
|
1312
|
+
max_output_lines: Optional[int],
|
|
1313
|
+
) -> CommandResponse:
|
|
1314
|
+
if max_output_lines is None or not result.stdout:
|
|
1315
|
+
return result
|
|
1316
|
+
lines = result.stdout.splitlines()
|
|
1317
|
+
if len(lines) <= max_output_lines:
|
|
1318
|
+
return result
|
|
1319
|
+
truncated_lines = lines[:max_output_lines]
|
|
1320
|
+
truncated_lines.append(
|
|
1321
|
+
f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)"
|
|
1322
|
+
)
|
|
1323
|
+
truncated_stdout = "\n".join(truncated_lines)
|
|
1324
|
+
if hasattr(result, "model_copy"):
|
|
1325
|
+
return result.model_copy(update={"stdout": truncated_stdout})
|
|
1326
|
+
return result.copy(update={"stdout": truncated_stdout})
|
|
1327
|
+
|
|
1328
|
+
def _run_plain_capture(self, code: str) -> str:
|
|
1329
|
+
"""
|
|
1330
|
+
Run a Stata command while capturing output using a named SMCL log.
|
|
1331
|
+
This is the most reliable way to capture output (like return list)
|
|
1332
|
+
without interfering with user logs or being affected by stdout redirection issues.
|
|
1333
|
+
"""
|
|
1334
|
+
if not self._initialized:
|
|
1335
|
+
self.init()
|
|
1336
|
+
|
|
1337
|
+
with self._exec_lock:
|
|
1338
|
+
hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
|
|
1339
|
+
# Hold results BEFORE opening the capture log
|
|
1340
|
+
self.stata.run(f"capture _return hold {hold_name}", echo=False)
|
|
1341
|
+
|
|
1342
|
+
try:
|
|
1343
|
+
with self._smcl_log_capture() as (log_name, smcl_path):
|
|
1344
|
+
# Restore results INSIDE the capture log so return list can see them
|
|
1345
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False)
|
|
1346
|
+
try:
|
|
1347
|
+
self.stata.run(code, echo=True)
|
|
1348
|
+
except Exception:
|
|
1349
|
+
pass
|
|
1350
|
+
except Exception:
|
|
1351
|
+
# Cleanup hold if log capture failed to open
|
|
1352
|
+
self.stata.run(f"capture _return drop {hold_name}", echo=False)
|
|
1353
|
+
content = ""
|
|
1354
|
+
smcl_path = None
|
|
1355
|
+
else:
|
|
1356
|
+
# Read SMCL content and convert to text
|
|
1357
|
+
content = self._read_smcl_file(smcl_path)
|
|
1358
|
+
# Remove the temp file
|
|
1359
|
+
self._safe_unlink(smcl_path)
|
|
1360
|
+
|
|
1361
|
+
return self._smcl_to_text(content)
|
|
410
1362
|
|
|
411
|
-
def
|
|
1363
|
+
def _count_do_file_lines(self, path: str) -> int:
|
|
1364
|
+
"""
|
|
1365
|
+
Count the number of executable lines in a .do file for progress inference.
|
|
1366
|
+
|
|
1367
|
+
Blank lines and comment-only lines (starting with * or //) are ignored.
|
|
1368
|
+
"""
|
|
412
1369
|
try:
|
|
413
|
-
with open(path, "
|
|
414
|
-
f.
|
|
415
|
-
size = f.tell()
|
|
416
|
-
if size <= 0:
|
|
417
|
-
return ""
|
|
418
|
-
read_size = min(size, max_chars)
|
|
419
|
-
f.seek(-read_size, os.SEEK_END)
|
|
420
|
-
data = f.read(read_size)
|
|
421
|
-
return data.decode("utf-8", errors="replace")
|
|
1370
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
1371
|
+
lines = f.read().splitlines()
|
|
422
1372
|
except Exception:
|
|
423
|
-
return
|
|
1373
|
+
return 0
|
|
424
1374
|
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
r"^r\(\d+\);?$",
|
|
430
|
-
r"^end of do-file$",
|
|
431
|
-
r"^execution terminated$",
|
|
432
|
-
r"^[-=*]{3,}.*$",
|
|
433
|
-
)
|
|
434
|
-
rc_pattern = r"^r\(\d+\);?$"
|
|
435
|
-
error_patterns = (
|
|
436
|
-
r"\btype mismatch\b",
|
|
437
|
-
r"\bnot found\b",
|
|
438
|
-
r"\bnot allowed\b",
|
|
439
|
-
r"\bno observations\b",
|
|
440
|
-
r"\bconformability error\b",
|
|
441
|
-
r"\binvalid\b",
|
|
442
|
-
r"\bsyntax error\b",
|
|
443
|
-
r"\berror\b",
|
|
444
|
-
)
|
|
445
|
-
lines = text.splitlines()
|
|
446
|
-
for raw in reversed(lines):
|
|
447
|
-
line = raw.strip()
|
|
448
|
-
if not line:
|
|
449
|
-
continue
|
|
450
|
-
if any(re.search(pat, line, re.IGNORECASE) for pat in error_patterns):
|
|
451
|
-
return line
|
|
452
|
-
for i in range(len(lines) - 1, -1, -1):
|
|
453
|
-
line = lines[i].strip()
|
|
454
|
-
if not line:
|
|
455
|
-
continue
|
|
456
|
-
if re.match(rc_pattern, line, re.IGNORECASE):
|
|
457
|
-
for j in range(i - 1, -1, -1):
|
|
458
|
-
prev_line = lines[j].strip()
|
|
459
|
-
if not prev_line:
|
|
460
|
-
continue
|
|
461
|
-
if prev_line.startswith((".", ">", "-", "=")):
|
|
462
|
-
continue
|
|
463
|
-
if any(re.match(pat, prev_line, re.IGNORECASE) for pat in ignore_patterns):
|
|
464
|
-
continue
|
|
465
|
-
return prev_line
|
|
466
|
-
return line
|
|
467
|
-
for raw in reversed(lines):
|
|
468
|
-
line = raw.strip()
|
|
469
|
-
if not line:
|
|
1375
|
+
total = 0
|
|
1376
|
+
for line in lines:
|
|
1377
|
+
s = line.strip()
|
|
1378
|
+
if not s:
|
|
470
1379
|
continue
|
|
471
|
-
if
|
|
1380
|
+
if s.startswith("*"):
|
|
472
1381
|
continue
|
|
473
|
-
if
|
|
1382
|
+
if s.startswith("//"):
|
|
474
1383
|
continue
|
|
475
|
-
|
|
476
|
-
return
|
|
1384
|
+
total += 1
|
|
1385
|
+
return total
|
|
477
1386
|
|
|
478
1387
|
def _smcl_to_text(self, smcl: str) -> str:
|
|
479
1388
|
"""Convert simple SMCL markup into plain text for LLM-friendly help."""
|
|
@@ -486,153 +1395,181 @@ class StataClient:
|
|
|
486
1395
|
lines = [line.rstrip() for line in cleaned.splitlines()]
|
|
487
1396
|
return "\n".join(lines).strip()
|
|
488
1397
|
|
|
489
|
-
def
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
1398
|
+
def _extract_error_and_context(self, log_content: str, rc: int) -> Tuple[str, str]:
|
|
1399
|
+
"""
|
|
1400
|
+
Extracts the error message and trace context using {err} SMCL tags.
|
|
1401
|
+
"""
|
|
1402
|
+
if not log_content:
|
|
1403
|
+
return f"Stata error r({rc})", ""
|
|
1404
|
+
|
|
1405
|
+
lines = log_content.splitlines()
|
|
1406
|
+
|
|
1407
|
+
# Search backwards for the {err} tag
|
|
1408
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
1409
|
+
line = lines[i]
|
|
1410
|
+
if '{err}' in line:
|
|
1411
|
+
# Found the (last) error line.
|
|
1412
|
+
# Walk backwards to find the start of the error block (consecutive {err} lines)
|
|
1413
|
+
start_idx = i
|
|
1414
|
+
while start_idx > 0 and '{err}' in lines[start_idx-1]:
|
|
1415
|
+
start_idx -= 1
|
|
1416
|
+
|
|
1417
|
+
# The full error message is the concatenation of all {err} lines in this block
|
|
1418
|
+
error_lines = []
|
|
1419
|
+
for j in range(start_idx, i + 1):
|
|
1420
|
+
error_lines.append(lines[j].strip())
|
|
1421
|
+
|
|
1422
|
+
clean_msg = " ".join(filter(None, error_lines)) or f"Stata error r({rc})"
|
|
1423
|
+
|
|
1424
|
+
# Capture everything from the start of the error block to the end
|
|
1425
|
+
context_str = "\n".join(lines[start_idx:])
|
|
1426
|
+
return clean_msg, context_str
|
|
1427
|
+
|
|
1428
|
+
# Fallback: grab the last 30 lines
|
|
1429
|
+
context_start = max(0, len(lines) - 30)
|
|
1430
|
+
context_str = "\n".join(lines[context_start:])
|
|
1431
|
+
|
|
1432
|
+
return f"Stata error r({rc})", context_str
|
|
517
1433
|
|
|
518
1434
|
def _exec_with_capture(self, code: str, echo: bool = True, trace: bool = False, cwd: Optional[str] = None) -> CommandResponse:
|
|
519
|
-
"""Execute Stata code with stdout/stderr capture and rc detection."""
|
|
520
1435
|
if not self._initialized:
|
|
521
1436
|
self.init()
|
|
522
1437
|
|
|
1438
|
+
# Rewrite graph names with special characters to internal aliases
|
|
523
1439
|
code = self._maybe_rewrite_graph_name_in_command(code)
|
|
524
1440
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
error=ErrorEnvelope(
|
|
533
|
-
message=f"cwd not found: {cwd}",
|
|
534
|
-
rc=601,
|
|
535
|
-
command=code,
|
|
536
|
-
),
|
|
537
|
-
)
|
|
1441
|
+
output_buffer = StringIO()
|
|
1442
|
+
error_buffer = StringIO()
|
|
1443
|
+
rc = 0
|
|
1444
|
+
sys_error = None
|
|
1445
|
+
error_envelope = None
|
|
1446
|
+
smcl_content = ""
|
|
1447
|
+
smcl_path = None
|
|
538
1448
|
|
|
539
|
-
start_time = time.time()
|
|
540
|
-
exc: Optional[Exception] = None
|
|
541
|
-
ret_text: Optional[str] = None
|
|
542
1449
|
with self._exec_lock:
|
|
543
|
-
# Set execution flag to prevent recursive Stata calls
|
|
544
|
-
self._is_executing = True
|
|
545
1450
|
try:
|
|
1451
|
+
from sfi import Scalar, SFIToolkit
|
|
546
1452
|
with self._temp_cwd(cwd):
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
if trace:
|
|
559
|
-
try:
|
|
560
|
-
self.stata.run("set trace off")
|
|
561
|
-
except Exception:
|
|
562
|
-
pass
|
|
563
|
-
finally:
|
|
564
|
-
# Clear execution flag
|
|
565
|
-
self._is_executing = False
|
|
1453
|
+
# Create SMCL log for authoritative output capture
|
|
1454
|
+
# Use shorter unique path to avoid Windows path issues
|
|
1455
|
+
smcl_path = self._create_smcl_log_path(prefix="mcp_", max_hex=16)
|
|
1456
|
+
log_name = self._make_smcl_log_name()
|
|
1457
|
+
self._open_smcl_log(smcl_path, log_name)
|
|
1458
|
+
|
|
1459
|
+
try:
|
|
1460
|
+
with self._redirect_io(output_buffer, error_buffer):
|
|
1461
|
+
try:
|
|
1462
|
+
if trace:
|
|
1463
|
+
self.stata.run("set trace on")
|
|
566
1464
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
1465
|
+
# Run the user code
|
|
1466
|
+
self.stata.run(code, echo=echo)
|
|
1467
|
+
|
|
1468
|
+
# Hold results IMMEDIATELY to prevent clobbering by cleanup
|
|
1469
|
+
self._hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
|
|
1470
|
+
self.stata.run(f"capture _return hold {self._hold_name}", echo=False)
|
|
1471
|
+
|
|
1472
|
+
finally:
|
|
1473
|
+
if trace:
|
|
1474
|
+
try:
|
|
1475
|
+
self.stata.run("set trace off")
|
|
1476
|
+
except Exception:
|
|
1477
|
+
pass
|
|
1478
|
+
finally:
|
|
1479
|
+
# Close SMCL log AFTER output redirection
|
|
1480
|
+
self._close_smcl_log(log_name)
|
|
1481
|
+
# Restore and capture results while still inside the lock
|
|
1482
|
+
self._restore_results_from_hold("_hold_name")
|
|
1483
|
+
|
|
1484
|
+
except Exception as e:
|
|
1485
|
+
sys_error = str(e)
|
|
1486
|
+
# Try to parse RC from exception message
|
|
1487
|
+
parsed_rc = self._parse_rc_from_text(sys_error)
|
|
1488
|
+
rc = parsed_rc if parsed_rc is not None else 1
|
|
1489
|
+
|
|
1490
|
+
# Read SMCL content as the authoritative source
|
|
1491
|
+
if smcl_path:
|
|
1492
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
1493
|
+
# Clean up SMCL file
|
|
1494
|
+
self._safe_unlink(smcl_path)
|
|
1495
|
+
|
|
1496
|
+
stdout_content = output_buffer.getvalue()
|
|
1497
|
+
stderr_content = error_buffer.getvalue()
|
|
1498
|
+
|
|
1499
|
+
# If RC wasn't captured or is generic, try to parse from SMCL
|
|
1500
|
+
if rc in (0, 1, -1) and smcl_content:
|
|
1501
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
1502
|
+
if parsed_rc is not None and parsed_rc != 0:
|
|
1503
|
+
rc = parsed_rc
|
|
1504
|
+
elif rc == -1:
|
|
1505
|
+
rc = 0
|
|
1506
|
+
|
|
1507
|
+
# If stdout is empty but SMCL has content AND command succeeded, use SMCL as stdout
|
|
1508
|
+
# This handles cases where Stata writes to log but not to redirected stdout
|
|
1509
|
+
# For errors, we keep stdout empty and error info goes to ErrorEnvelope
|
|
1510
|
+
if rc == 0 and not stdout_content and smcl_content:
|
|
1511
|
+
# Convert SMCL to plain text for stdout
|
|
1512
|
+
stdout_content = self._smcl_to_text(smcl_content)
|
|
1513
|
+
|
|
1514
|
+
if rc != 0:
|
|
1515
|
+
if sys_error:
|
|
1516
|
+
msg = sys_error
|
|
1517
|
+
context = sys_error
|
|
1518
|
+
else:
|
|
1519
|
+
# Extract error from SMCL (authoritative source)
|
|
1520
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
1521
|
+
|
|
1522
|
+
error_envelope = ErrorEnvelope(
|
|
1523
|
+
message=msg,
|
|
1524
|
+
rc=rc,
|
|
1525
|
+
context=context,
|
|
1526
|
+
snippet=smcl_content[-800:] if smcl_content else (stdout_content + stderr_content)[-800:],
|
|
1527
|
+
smcl_output=smcl_content # Include raw SMCL for debugging
|
|
1528
|
+
)
|
|
1529
|
+
stderr_content = context
|
|
1530
|
+
|
|
1531
|
+
resp = CommandResponse(
|
|
597
1532
|
command=code,
|
|
598
1533
|
rc=rc,
|
|
599
|
-
stdout=
|
|
600
|
-
stderr=
|
|
601
|
-
success=
|
|
602
|
-
error=
|
|
1534
|
+
stdout=stdout_content,
|
|
1535
|
+
stderr=stderr_content,
|
|
1536
|
+
success=(rc == 0),
|
|
1537
|
+
error=error_envelope,
|
|
1538
|
+
log_path=smcl_path if smcl_path else None,
|
|
1539
|
+
smcl_output=smcl_content,
|
|
603
1540
|
)
|
|
604
1541
|
|
|
605
|
-
|
|
606
|
-
|
|
1542
|
+
# Capture results immediately after execution, INSIDE the lock
|
|
1543
|
+
try:
|
|
1544
|
+
self._last_results = self.get_stored_results(force_fresh=True)
|
|
1545
|
+
except Exception:
|
|
1546
|
+
self._last_results = None
|
|
607
1547
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
"""
|
|
1548
|
+
return resp
|
|
1549
|
+
|
|
1550
|
+
def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
|
|
1551
|
+
"""Execute Stata code while leaving stdout/stderr alone."""
|
|
612
1552
|
if not self._initialized:
|
|
613
1553
|
self.init()
|
|
614
1554
|
|
|
615
1555
|
exc: Optional[Exception] = None
|
|
616
1556
|
ret_text: Optional[str] = None
|
|
1557
|
+
rc = 0
|
|
1558
|
+
|
|
617
1559
|
with self._exec_lock:
|
|
618
1560
|
try:
|
|
1561
|
+
from sfi import Scalar # Import SFI tools
|
|
619
1562
|
if trace:
|
|
620
1563
|
self.stata.run("set trace on")
|
|
621
1564
|
ret = self.stata.run(code, echo=echo)
|
|
622
1565
|
if isinstance(ret, str) and ret:
|
|
623
1566
|
ret_text = ret
|
|
1567
|
+
|
|
1568
|
+
|
|
624
1569
|
except Exception as e:
|
|
625
1570
|
exc = e
|
|
1571
|
+
rc = 1
|
|
626
1572
|
finally:
|
|
627
|
-
rc = self._read_return_code()
|
|
628
|
-
# If Stata returned an r(#) in text, prefer it.
|
|
629
|
-
combined = "\n".join(filter(None, [ret_text or "", str(exc) if exc else ""])).strip()
|
|
630
|
-
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
631
|
-
if exc is None and rc_hint is not None and rc_hint != 0:
|
|
632
|
-
rc = rc_hint
|
|
633
|
-
if exc is None and (rc is None or rc == -1) and rc_hint is None:
|
|
634
|
-
# Normalize spurious rc reads only when missing/invalid
|
|
635
|
-
rc = 0
|
|
636
1573
|
if trace:
|
|
637
1574
|
try:
|
|
638
1575
|
self.stata.run("set trace off")
|
|
@@ -644,8 +1581,13 @@ class StataClient:
|
|
|
644
1581
|
success = rc == 0 and exc is None
|
|
645
1582
|
error = None
|
|
646
1583
|
if not success:
|
|
647
|
-
|
|
648
|
-
error =
|
|
1584
|
+
msg = str(exc) if exc else f"Stata error r({rc})"
|
|
1585
|
+
error = ErrorEnvelope(
|
|
1586
|
+
message=msg,
|
|
1587
|
+
rc=rc,
|
|
1588
|
+
command=code,
|
|
1589
|
+
stdout=ret_text,
|
|
1590
|
+
)
|
|
649
1591
|
|
|
650
1592
|
return CommandResponse(
|
|
651
1593
|
command=code,
|
|
@@ -656,23 +1598,64 @@ class StataClient:
|
|
|
656
1598
|
error=error,
|
|
657
1599
|
)
|
|
658
1600
|
|
|
1601
|
+
def exec_lightweight(self, code: str) -> CommandResponse:
|
|
1602
|
+
"""
|
|
1603
|
+
Executes a command using simple stdout redirection (no SMCL logs).
|
|
1604
|
+
Much faster on Windows as it avoids FS operations.
|
|
1605
|
+
LIMITED: Does not support error envelopes or complex return code parsing.
|
|
1606
|
+
"""
|
|
1607
|
+
if not self._initialized:
|
|
1608
|
+
self.init()
|
|
1609
|
+
|
|
1610
|
+
code = self._maybe_rewrite_graph_name_in_command(code)
|
|
1611
|
+
|
|
1612
|
+
output_buffer = StringIO()
|
|
1613
|
+
error_buffer = StringIO()
|
|
1614
|
+
rc = 0
|
|
1615
|
+
exc = None
|
|
1616
|
+
|
|
1617
|
+
with self._exec_lock:
|
|
1618
|
+
with self._redirect_io(output_buffer, error_buffer):
|
|
1619
|
+
try:
|
|
1620
|
+
self.stata.run(code, echo=False)
|
|
1621
|
+
except Exception as e:
|
|
1622
|
+
exc = e
|
|
1623
|
+
rc = 1
|
|
1624
|
+
|
|
1625
|
+
stdout = output_buffer.getvalue()
|
|
1626
|
+
stderr = error_buffer.getvalue()
|
|
1627
|
+
|
|
1628
|
+
return CommandResponse(
|
|
1629
|
+
command=code,
|
|
1630
|
+
rc=rc,
|
|
1631
|
+
stdout=stdout,
|
|
1632
|
+
stderr=stderr if not exc else str(exc),
|
|
1633
|
+
success=(rc == 0),
|
|
1634
|
+
error=None
|
|
1635
|
+
)
|
|
1636
|
+
|
|
659
1637
|
async def run_command_streaming(
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
1638
|
+
self,
|
|
1639
|
+
code: str,
|
|
1640
|
+
*,
|
|
1641
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
1642
|
+
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
|
|
1643
|
+
echo: bool = True,
|
|
1644
|
+
trace: bool = False,
|
|
1645
|
+
max_output_lines: Optional[int] = None,
|
|
1646
|
+
cwd: Optional[str] = None,
|
|
1647
|
+
auto_cache_graphs: bool = False,
|
|
1648
|
+
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
|
|
1649
|
+
emit_graph_ready: bool = False,
|
|
1650
|
+
graph_ready_task_id: Optional[str] = None,
|
|
1651
|
+
graph_ready_format: str = "svg",
|
|
1652
|
+
) -> CommandResponse:
|
|
672
1653
|
if not self._initialized:
|
|
673
1654
|
self.init()
|
|
674
1655
|
|
|
675
1656
|
code = self._maybe_rewrite_graph_name_in_command(code)
|
|
1657
|
+
auto_cache_graphs = auto_cache_graphs or emit_graph_ready
|
|
1658
|
+
total_lines = 0 # Commands (not do-files) do not have line-based progress
|
|
676
1659
|
|
|
677
1660
|
if cwd is not None and not os.path.isdir(cwd):
|
|
678
1661
|
return CommandResponse(
|
|
@@ -690,211 +1673,183 @@ class StataClient:
|
|
|
690
1673
|
|
|
691
1674
|
start_time = time.time()
|
|
692
1675
|
exc: Optional[Exception] = None
|
|
1676
|
+
smcl_content = ""
|
|
1677
|
+
smcl_path = None
|
|
693
1678
|
|
|
694
1679
|
# Setup streaming graph cache if enabled
|
|
695
|
-
graph_cache =
|
|
696
|
-
if auto_cache_graphs:
|
|
697
|
-
graph_cache = StreamingGraphCache(self, auto_cache=True)
|
|
698
|
-
|
|
699
|
-
graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
|
|
700
|
-
|
|
701
|
-
graph_cache.add_cache_callback(graph_cache_callback)
|
|
1680
|
+
graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
|
|
702
1681
|
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
encoding="utf-8",
|
|
709
|
-
errors="replace",
|
|
710
|
-
buffering=1,
|
|
711
|
-
)
|
|
712
|
-
log_path = log_file.name
|
|
713
|
-
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
714
|
-
tee = FileTeeIO(log_file, tail)
|
|
1682
|
+
_log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
|
|
1683
|
+
|
|
1684
|
+
# Create SMCL log path for authoritative output capture
|
|
1685
|
+
smcl_path = self._create_smcl_log_path()
|
|
1686
|
+
smcl_log_name = self._make_smcl_log_name()
|
|
715
1687
|
|
|
716
1688
|
# Inform the MCP client immediately where to read/tail the output.
|
|
717
|
-
await notify_log(json.dumps({"event": "log_path", "path":
|
|
1689
|
+
await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
|
|
718
1690
|
|
|
719
1691
|
rc = -1
|
|
1692
|
+
path_for_stata = code.replace("\\", "/")
|
|
1693
|
+
command = f'{path_for_stata}'
|
|
1694
|
+
|
|
1695
|
+
graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
|
|
1696
|
+
graph_poll_state = [0.0]
|
|
1697
|
+
|
|
1698
|
+
async def on_chunk_for_graphs(_chunk: str) -> None:
|
|
1699
|
+
await self._maybe_cache_graphs_on_chunk(
|
|
1700
|
+
graph_cache=graph_cache,
|
|
1701
|
+
emit_graph_ready=emit_graph_ready,
|
|
1702
|
+
notify_log=notify_log,
|
|
1703
|
+
graph_ready_task_id=graph_ready_task_id,
|
|
1704
|
+
graph_ready_format=graph_ready_format,
|
|
1705
|
+
graph_ready_initial=graph_ready_initial,
|
|
1706
|
+
last_check=graph_poll_state,
|
|
1707
|
+
)
|
|
720
1708
|
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
if isinstance(ret, str) and ret:
|
|
734
|
-
try:
|
|
735
|
-
tee.write(ret)
|
|
736
|
-
except Exception:
|
|
737
|
-
pass
|
|
738
|
-
except Exception as e:
|
|
739
|
-
exc = e
|
|
740
|
-
finally:
|
|
741
|
-
rc = self._read_return_code()
|
|
742
|
-
if trace:
|
|
743
|
-
try:
|
|
744
|
-
self.stata.run("set trace off")
|
|
745
|
-
except Exception:
|
|
746
|
-
pass
|
|
747
|
-
finally:
|
|
748
|
-
self._is_executing = False
|
|
1709
|
+
done = anyio.Event()
|
|
1710
|
+
|
|
1711
|
+
async with anyio.create_task_group() as tg:
|
|
1712
|
+
async def stream_smcl() -> None:
|
|
1713
|
+
await self._stream_smcl_log(
|
|
1714
|
+
smcl_path=smcl_path,
|
|
1715
|
+
notify_log=notify_log,
|
|
1716
|
+
done=done,
|
|
1717
|
+
on_chunk=on_chunk_for_graphs if graph_cache else None,
|
|
1718
|
+
)
|
|
1719
|
+
|
|
1720
|
+
tg.start_soon(stream_smcl)
|
|
749
1721
|
|
|
750
|
-
try:
|
|
751
1722
|
if notify_progress is not None:
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
# Best-effort cancellation: signal Stata to break, wait briefly, then propagate.
|
|
757
|
-
self._request_break_in()
|
|
758
|
-
await self._wait_for_stata_stop()
|
|
759
|
-
raise
|
|
760
|
-
finally:
|
|
761
|
-
tee.close()
|
|
1723
|
+
if total_lines > 0:
|
|
1724
|
+
await notify_progress(0, float(total_lines), f"Executing command: 0/{total_lines}")
|
|
1725
|
+
else:
|
|
1726
|
+
await notify_progress(0, None, "Running command")
|
|
762
1727
|
|
|
763
|
-
# Cache detected graphs after command completes
|
|
764
|
-
if graph_cache:
|
|
765
1728
|
try:
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
1729
|
+
run_blocking = lambda: self._run_streaming_blocking(
|
|
1730
|
+
command=command,
|
|
1731
|
+
tee=tee,
|
|
1732
|
+
cwd=cwd,
|
|
1733
|
+
trace=trace,
|
|
1734
|
+
echo=echo,
|
|
1735
|
+
smcl_path=smcl_path,
|
|
1736
|
+
smcl_log_name=smcl_log_name,
|
|
1737
|
+
hold_attr="_hold_name_stream",
|
|
1738
|
+
)
|
|
1739
|
+
try:
|
|
1740
|
+
rc, exc = await anyio.to_thread.run_sync(
|
|
1741
|
+
run_blocking,
|
|
1742
|
+
abandon_on_cancel=True,
|
|
1743
|
+
)
|
|
1744
|
+
except TypeError:
|
|
1745
|
+
rc, exc = await anyio.to_thread.run_sync(run_blocking)
|
|
1746
|
+
except get_cancelled_exc_class():
|
|
1747
|
+
self._request_break_in()
|
|
1748
|
+
await self._wait_for_stata_stop()
|
|
1749
|
+
raise
|
|
1750
|
+
finally:
|
|
1751
|
+
done.set()
|
|
1752
|
+
tee.close()
|
|
776
1753
|
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
1754
|
+
# Read SMCL content as the authoritative source
|
|
1755
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
1756
|
+
|
|
1757
|
+
await self._cache_new_graphs(
|
|
1758
|
+
graph_cache,
|
|
1759
|
+
notify_progress=notify_progress,
|
|
1760
|
+
total_lines=total_lines,
|
|
1761
|
+
completed_label="Command",
|
|
1762
|
+
)
|
|
1763
|
+
self._emit_graph_ready_task(
|
|
1764
|
+
emit_graph_ready=emit_graph_ready,
|
|
1765
|
+
graph_ready_initial=graph_ready_initial,
|
|
1766
|
+
notify_log=notify_log,
|
|
1767
|
+
graph_ready_task_id=graph_ready_task_id,
|
|
1768
|
+
graph_ready_format=graph_ready_format,
|
|
1769
|
+
)
|
|
1770
|
+
|
|
1771
|
+
combined = self._build_combined_log(tail, smcl_path, rc, trace, exc)
|
|
1772
|
+
|
|
1773
|
+
# Use SMCL content as primary source for RC detection
|
|
1774
|
+
if not exc or rc in (1, -1):
|
|
1775
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
1776
|
+
if parsed_rc is not None and parsed_rc != 0:
|
|
1777
|
+
rc = parsed_rc
|
|
1778
|
+
elif rc in (-1, 0, 1): # Also check text if rc is generic 1 or unset
|
|
1779
|
+
parsed_rc_text = self._parse_rc_from_text(combined)
|
|
1780
|
+
if parsed_rc_text is not None:
|
|
1781
|
+
rc = parsed_rc_text
|
|
1782
|
+
elif rc == -1:
|
|
1783
|
+
rc = 0 # Default to success if no error trace found
|
|
1784
|
+
|
|
1785
|
+
success = (rc == 0 and exc is None)
|
|
1786
|
+
stderr_final = None
|
|
788
1787
|
error = None
|
|
1788
|
+
|
|
789
1789
|
if not success:
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
fallback = f"Stata error r({rc_final})"
|
|
797
|
-
message = self._select_stata_error_message(combined, fallback)
|
|
1790
|
+
# Use SMCL as authoritative source for error extraction
|
|
1791
|
+
if smcl_content:
|
|
1792
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
1793
|
+
else:
|
|
1794
|
+
# Fallback to combined log
|
|
1795
|
+
msg, context = self._extract_error_and_context(combined, rc)
|
|
798
1796
|
|
|
799
1797
|
error = ErrorEnvelope(
|
|
800
|
-
message=
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
command=
|
|
1798
|
+
message=msg,
|
|
1799
|
+
context=context,
|
|
1800
|
+
rc=rc,
|
|
1801
|
+
command=command,
|
|
804
1802
|
log_path=log_path,
|
|
805
|
-
snippet=
|
|
806
|
-
|
|
1803
|
+
snippet=smcl_content[-800:] if smcl_content else combined[-800:],
|
|
1804
|
+
smcl_output=smcl_content,
|
|
807
1805
|
)
|
|
1806
|
+
stderr_final = context
|
|
808
1807
|
|
|
809
1808
|
duration = time.time() - start_time
|
|
810
|
-
code_preview = code.replace("\n", "\\n")
|
|
811
1809
|
logger.info(
|
|
812
1810
|
"stata.run(stream) rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
|
|
813
1811
|
rc,
|
|
814
1812
|
success,
|
|
815
1813
|
trace,
|
|
816
1814
|
duration * 1000,
|
|
817
|
-
|
|
1815
|
+
code.replace("\n", "\\n")[:120],
|
|
818
1816
|
)
|
|
819
1817
|
|
|
820
1818
|
result = CommandResponse(
|
|
821
1819
|
command=code,
|
|
822
1820
|
rc=rc,
|
|
823
1821
|
stdout="",
|
|
824
|
-
stderr=
|
|
1822
|
+
stderr=stderr_final,
|
|
825
1823
|
log_path=log_path,
|
|
826
1824
|
success=success,
|
|
827
1825
|
error=error,
|
|
1826
|
+
smcl_output=smcl_content,
|
|
828
1827
|
)
|
|
829
1828
|
|
|
830
1829
|
if notify_progress is not None:
|
|
831
1830
|
await notify_progress(1, 1, "Finished")
|
|
832
1831
|
|
|
833
|
-
return result
|
|
834
|
-
|
|
835
|
-
def _count_do_file_lines(self, path: str) -> int:
|
|
836
|
-
try:
|
|
837
|
-
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
838
|
-
lines = f.read().splitlines()
|
|
839
|
-
except Exception:
|
|
840
|
-
return 0
|
|
841
|
-
|
|
842
|
-
total = 0
|
|
843
|
-
for line in lines:
|
|
844
|
-
s = line.strip()
|
|
845
|
-
if not s:
|
|
846
|
-
continue
|
|
847
|
-
if s.startswith("*"):
|
|
848
|
-
continue
|
|
849
|
-
if s.startswith("//"):
|
|
850
|
-
continue
|
|
851
|
-
total += 1
|
|
852
|
-
return total
|
|
853
|
-
|
|
854
|
-
async def run_do_file_streaming(
|
|
855
|
-
self,
|
|
856
|
-
path: str,
|
|
857
|
-
*,
|
|
858
|
-
notify_log: Callable[[str], Awaitable[None]],
|
|
859
|
-
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
|
|
860
|
-
echo: bool = True,
|
|
861
|
-
trace: bool = False,
|
|
862
|
-
max_output_lines: Optional[int] = None,
|
|
863
|
-
cwd: Optional[str] = None,
|
|
864
|
-
auto_cache_graphs: bool = False,
|
|
865
|
-
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
|
|
866
|
-
) -> CommandResponse:
|
|
867
|
-
if cwd is not None and not os.path.isdir(cwd):
|
|
868
|
-
return CommandResponse(
|
|
869
|
-
command=f'do "{path}"',
|
|
870
|
-
rc=601,
|
|
871
|
-
stdout="",
|
|
872
|
-
stderr=None,
|
|
873
|
-
success=False,
|
|
874
|
-
error=ErrorEnvelope(
|
|
875
|
-
message=f"cwd not found: {cwd}",
|
|
876
|
-
rc=601,
|
|
877
|
-
command=path,
|
|
878
|
-
),
|
|
879
|
-
)
|
|
880
|
-
|
|
881
|
-
effective_path = path
|
|
882
|
-
if cwd is not None and not os.path.isabs(path):
|
|
883
|
-
effective_path = os.path.abspath(os.path.join(cwd, path))
|
|
1832
|
+
return result
|
|
884
1833
|
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
1834
|
+
async def run_do_file_streaming(
|
|
1835
|
+
self,
|
|
1836
|
+
path: str,
|
|
1837
|
+
*,
|
|
1838
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
1839
|
+
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
|
|
1840
|
+
echo: bool = True,
|
|
1841
|
+
trace: bool = False,
|
|
1842
|
+
max_output_lines: Optional[int] = None,
|
|
1843
|
+
cwd: Optional[str] = None,
|
|
1844
|
+
auto_cache_graphs: bool = False,
|
|
1845
|
+
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
|
|
1846
|
+
emit_graph_ready: bool = False,
|
|
1847
|
+
graph_ready_task_id: Optional[str] = None,
|
|
1848
|
+
graph_ready_format: str = "svg",
|
|
1849
|
+
) -> CommandResponse:
|
|
1850
|
+
effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
|
|
1851
|
+
if error_response is not None:
|
|
1852
|
+
return error_response
|
|
898
1853
|
|
|
899
1854
|
total_lines = self._count_do_file_lines(effective_path)
|
|
900
1855
|
executed_lines = 0
|
|
@@ -923,104 +1878,55 @@ class StataClient:
|
|
|
923
1878
|
if not self._initialized:
|
|
924
1879
|
self.init()
|
|
925
1880
|
|
|
1881
|
+
auto_cache_graphs = auto_cache_graphs or emit_graph_ready
|
|
1882
|
+
|
|
926
1883
|
start_time = time.time()
|
|
927
1884
|
exc: Optional[Exception] = None
|
|
1885
|
+
smcl_content = ""
|
|
1886
|
+
smcl_path = None
|
|
928
1887
|
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
if auto_cache_graphs:
|
|
932
|
-
graph_cache = StreamingGraphCache(self, auto_cache=True)
|
|
933
|
-
|
|
934
|
-
graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
|
|
935
|
-
|
|
936
|
-
graph_cache.add_cache_callback(graph_cache_callback)
|
|
1888
|
+
graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
|
|
1889
|
+
_log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
|
|
937
1890
|
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
suffix=".log",
|
|
941
|
-
delete=False,
|
|
942
|
-
mode="w",
|
|
943
|
-
encoding="utf-8",
|
|
944
|
-
errors="replace",
|
|
945
|
-
buffering=1,
|
|
946
|
-
)
|
|
947
|
-
log_path = log_file.name
|
|
948
|
-
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
949
|
-
tee = FileTeeIO(log_file, tail)
|
|
1891
|
+
smcl_path = self._create_smcl_log_path()
|
|
1892
|
+
smcl_log_name = self._make_smcl_log_name()
|
|
950
1893
|
|
|
951
1894
|
# Inform the MCP client immediately where to read/tail the output.
|
|
952
|
-
await notify_log(json.dumps({"event": "log_path", "path":
|
|
1895
|
+
await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
|
|
953
1896
|
|
|
954
1897
|
rc = -1
|
|
955
|
-
|
|
956
|
-
|
|
1898
|
+
graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
|
|
1899
|
+
graph_poll_state = [0.0]
|
|
1900
|
+
|
|
1901
|
+
async def on_chunk_for_graphs(_chunk: str) -> None:
|
|
1902
|
+
await self._maybe_cache_graphs_on_chunk(
|
|
1903
|
+
graph_cache=graph_cache,
|
|
1904
|
+
emit_graph_ready=emit_graph_ready,
|
|
1905
|
+
notify_log=notify_log,
|
|
1906
|
+
graph_ready_task_id=graph_ready_task_id,
|
|
1907
|
+
graph_ready_format=graph_ready_format,
|
|
1908
|
+
graph_ready_initial=graph_ready_initial,
|
|
1909
|
+
last_check=graph_poll_state,
|
|
1910
|
+
)
|
|
957
1911
|
|
|
958
|
-
|
|
959
|
-
# This allows post-execution detection to identify new graphs
|
|
1912
|
+
on_chunk_callback = on_chunk_for_progress
|
|
960
1913
|
if graph_cache:
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
except Exception as e:
|
|
965
|
-
logger.debug(f"Failed to capture initial graph state: {e}")
|
|
966
|
-
graph_cache._initial_graphs = set()
|
|
967
|
-
|
|
968
|
-
def _run_blocking() -> None:
|
|
969
|
-
nonlocal rc, exc
|
|
970
|
-
with self._exec_lock:
|
|
971
|
-
# Set execution flag to prevent recursive Stata calls
|
|
972
|
-
self._is_executing = True
|
|
973
|
-
try:
|
|
974
|
-
with self._temp_cwd(cwd):
|
|
975
|
-
with self._redirect_io_streaming(tee, tee):
|
|
976
|
-
try:
|
|
977
|
-
if trace:
|
|
978
|
-
self.stata.run("set trace on")
|
|
979
|
-
ret = self.stata.run(command, echo=echo)
|
|
980
|
-
# Some PyStata builds return output as a string rather than printing.
|
|
981
|
-
if isinstance(ret, str) and ret:
|
|
982
|
-
try:
|
|
983
|
-
tee.write(ret)
|
|
984
|
-
except Exception:
|
|
985
|
-
pass
|
|
986
|
-
except Exception as e:
|
|
987
|
-
exc = e
|
|
988
|
-
finally:
|
|
989
|
-
rc = self._read_return_code()
|
|
990
|
-
if trace:
|
|
991
|
-
try:
|
|
992
|
-
self.stata.run("set trace off")
|
|
993
|
-
except Exception:
|
|
994
|
-
pass
|
|
995
|
-
finally:
|
|
996
|
-
# Clear execution flag
|
|
997
|
-
self._is_executing = False
|
|
1914
|
+
async def on_chunk_callback(chunk: str) -> None:
|
|
1915
|
+
await on_chunk_for_progress(chunk)
|
|
1916
|
+
await on_chunk_for_graphs(chunk)
|
|
998
1917
|
|
|
999
1918
|
done = anyio.Event()
|
|
1000
1919
|
|
|
1001
|
-
async def _monitor_progress_from_log() -> None:
|
|
1002
|
-
if notify_progress is None or total_lines <= 0:
|
|
1003
|
-
return
|
|
1004
|
-
last_pos = 0
|
|
1005
|
-
try:
|
|
1006
|
-
with open(log_path, "r", encoding="utf-8", errors="replace") as f:
|
|
1007
|
-
while not done.is_set():
|
|
1008
|
-
f.seek(last_pos)
|
|
1009
|
-
chunk = f.read()
|
|
1010
|
-
if chunk:
|
|
1011
|
-
last_pos = f.tell()
|
|
1012
|
-
await on_chunk_for_progress(chunk)
|
|
1013
|
-
await anyio.sleep(0.05)
|
|
1014
|
-
|
|
1015
|
-
f.seek(last_pos)
|
|
1016
|
-
chunk = f.read()
|
|
1017
|
-
if chunk:
|
|
1018
|
-
await on_chunk_for_progress(chunk)
|
|
1019
|
-
except Exception:
|
|
1020
|
-
return
|
|
1021
|
-
|
|
1022
1920
|
async with anyio.create_task_group() as tg:
|
|
1023
|
-
|
|
1921
|
+
async def stream_smcl() -> None:
|
|
1922
|
+
await self._stream_smcl_log(
|
|
1923
|
+
smcl_path=smcl_path,
|
|
1924
|
+
notify_log=notify_log,
|
|
1925
|
+
done=done,
|
|
1926
|
+
on_chunk=on_chunk_callback,
|
|
1927
|
+
)
|
|
1928
|
+
|
|
1929
|
+
tg.start_soon(stream_smcl)
|
|
1024
1930
|
|
|
1025
1931
|
if notify_progress is not None:
|
|
1026
1932
|
if total_lines > 0:
|
|
@@ -1029,7 +1935,23 @@ class StataClient:
|
|
|
1029
1935
|
await notify_progress(0, None, "Running do-file")
|
|
1030
1936
|
|
|
1031
1937
|
try:
|
|
1032
|
-
|
|
1938
|
+
run_blocking = lambda: self._run_streaming_blocking(
|
|
1939
|
+
command=command,
|
|
1940
|
+
tee=tee,
|
|
1941
|
+
cwd=cwd,
|
|
1942
|
+
trace=trace,
|
|
1943
|
+
echo=echo,
|
|
1944
|
+
smcl_path=smcl_path,
|
|
1945
|
+
smcl_log_name=smcl_log_name,
|
|
1946
|
+
hold_attr="_hold_name_do",
|
|
1947
|
+
)
|
|
1948
|
+
try:
|
|
1949
|
+
rc, exc = await anyio.to_thread.run_sync(
|
|
1950
|
+
run_blocking,
|
|
1951
|
+
abandon_on_cancel=True,
|
|
1952
|
+
)
|
|
1953
|
+
except TypeError:
|
|
1954
|
+
rc, exc = await anyio.to_thread.run_sync(run_blocking)
|
|
1033
1955
|
except get_cancelled_exc_class():
|
|
1034
1956
|
self._request_break_in()
|
|
1035
1957
|
await self._wait_for_stata_stop()
|
|
@@ -1038,109 +1960,59 @@ class StataClient:
|
|
|
1038
1960
|
done.set()
|
|
1039
1961
|
tee.close()
|
|
1040
1962
|
|
|
1041
|
-
#
|
|
1042
|
-
|
|
1043
|
-
# Runs after execution completes, when it's safe to call list_graphs()
|
|
1044
|
-
if graph_cache and graph_cache.auto_cache:
|
|
1045
|
-
cached_graphs = []
|
|
1046
|
-
try:
|
|
1047
|
-
# Get initial state (before execution)
|
|
1048
|
-
initial_graphs = getattr(graph_cache, '_initial_graphs', set())
|
|
1049
|
-
|
|
1050
|
-
# Get current state (after execution)
|
|
1051
|
-
logger.debug("Post-execution: Querying graph state via list_graphs()")
|
|
1052
|
-
current_graphs = set(self.list_graphs())
|
|
1053
|
-
|
|
1054
|
-
# Detect new graphs (created during execution)
|
|
1055
|
-
new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
|
|
1056
|
-
|
|
1057
|
-
if new_graphs:
|
|
1058
|
-
logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
|
|
1059
|
-
|
|
1060
|
-
# Cache each detected graph
|
|
1061
|
-
for graph_name in new_graphs:
|
|
1062
|
-
try:
|
|
1063
|
-
logger.debug(f"Caching graph: {graph_name}")
|
|
1064
|
-
cache_result = await anyio.to_thread.run_sync(
|
|
1065
|
-
self.cache_graph_on_creation,
|
|
1066
|
-
graph_name
|
|
1067
|
-
)
|
|
1068
|
-
|
|
1069
|
-
if cache_result:
|
|
1070
|
-
cached_graphs.append(graph_name)
|
|
1071
|
-
graph_cache._cached_graphs.add(graph_name)
|
|
1072
|
-
logger.debug(f"Successfully cached graph: {graph_name}")
|
|
1073
|
-
else:
|
|
1074
|
-
logger.warning(f"Failed to cache graph: {graph_name}")
|
|
1075
|
-
|
|
1076
|
-
# Trigger callbacks
|
|
1077
|
-
for callback in graph_cache._cache_callbacks:
|
|
1078
|
-
try:
|
|
1079
|
-
await anyio.to_thread.run_sync(callback, graph_name, cache_result)
|
|
1080
|
-
except Exception as e:
|
|
1081
|
-
logger.debug(f"Callback failed for {graph_name}: {e}")
|
|
1082
|
-
|
|
1083
|
-
except Exception as e:
|
|
1084
|
-
logger.error(f"Error caching graph {graph_name}: {e}")
|
|
1085
|
-
# Trigger callbacks with failure
|
|
1086
|
-
for callback in graph_cache._cache_callbacks:
|
|
1087
|
-
try:
|
|
1088
|
-
await anyio.to_thread.run_sync(callback, graph_name, False)
|
|
1089
|
-
except Exception:
|
|
1090
|
-
pass
|
|
1091
|
-
|
|
1092
|
-
# Check for dropped graphs (for completeness)
|
|
1093
|
-
dropped_graphs = initial_graphs - current_graphs
|
|
1094
|
-
if dropped_graphs:
|
|
1095
|
-
logger.debug(f"Graphs dropped during execution: {sorted(dropped_graphs)}")
|
|
1096
|
-
for graph_name in dropped_graphs:
|
|
1097
|
-
try:
|
|
1098
|
-
self.invalidate_graph_cache(graph_name)
|
|
1099
|
-
except Exception:
|
|
1100
|
-
pass
|
|
1101
|
-
|
|
1102
|
-
# Notify progress if graphs were cached
|
|
1103
|
-
if cached_graphs and notify_progress:
|
|
1104
|
-
await notify_progress(
|
|
1105
|
-
float(total_lines) if total_lines > 0 else 1,
|
|
1106
|
-
float(total_lines) if total_lines > 0 else 1,
|
|
1107
|
-
f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
|
|
1108
|
-
)
|
|
1963
|
+
# Read SMCL content as the authoritative source
|
|
1964
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
1109
1965
|
|
|
1110
|
-
|
|
1111
|
-
|
|
1966
|
+
await self._cache_new_graphs(
|
|
1967
|
+
graph_cache,
|
|
1968
|
+
notify_progress=notify_progress,
|
|
1969
|
+
total_lines=total_lines,
|
|
1970
|
+
completed_label="Do-file",
|
|
1971
|
+
)
|
|
1972
|
+
self._emit_graph_ready_task(
|
|
1973
|
+
emit_graph_ready=emit_graph_ready,
|
|
1974
|
+
graph_ready_initial=graph_ready_initial,
|
|
1975
|
+
notify_log=notify_log,
|
|
1976
|
+
graph_ready_task_id=graph_ready_task_id,
|
|
1977
|
+
graph_ready_format=graph_ready_format,
|
|
1978
|
+
)
|
|
1112
1979
|
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
rc
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1980
|
+
combined = self._build_combined_log(tail, log_path, rc, trace, exc)
|
|
1981
|
+
|
|
1982
|
+
# Use SMCL content as primary source for RC detection
|
|
1983
|
+
if not exc or rc in (1, -1):
|
|
1984
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
1985
|
+
if parsed_rc is not None and parsed_rc != 0:
|
|
1986
|
+
rc = parsed_rc
|
|
1987
|
+
elif rc in (-1, 0, 1):
|
|
1988
|
+
parsed_rc_text = self._parse_rc_from_text(combined)
|
|
1989
|
+
if parsed_rc_text is not None:
|
|
1990
|
+
rc = parsed_rc_text
|
|
1991
|
+
elif rc == -1:
|
|
1992
|
+
rc = 0 # Default to success if no error found
|
|
1993
|
+
|
|
1994
|
+
success = (rc == 0 and exc is None)
|
|
1995
|
+
stderr_final = None
|
|
1124
1996
|
error = None
|
|
1997
|
+
|
|
1125
1998
|
if not success:
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
fallback = f"Stata error r({rc_final})"
|
|
1133
|
-
message = self._select_stata_error_message(combined, fallback)
|
|
1999
|
+
# Use SMCL as authoritative source for error extraction
|
|
2000
|
+
if smcl_content:
|
|
2001
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
2002
|
+
else:
|
|
2003
|
+
# Fallback to combined log
|
|
2004
|
+
msg, context = self._extract_error_and_context(combined, rc)
|
|
1134
2005
|
|
|
1135
2006
|
error = ErrorEnvelope(
|
|
1136
|
-
message=
|
|
1137
|
-
|
|
1138
|
-
|
|
2007
|
+
message=msg,
|
|
2008
|
+
context=context,
|
|
2009
|
+
rc=rc,
|
|
1139
2010
|
command=command,
|
|
1140
2011
|
log_path=log_path,
|
|
1141
|
-
snippet=
|
|
1142
|
-
|
|
2012
|
+
snippet=smcl_content[-800:] if smcl_content else combined[-800:],
|
|
2013
|
+
smcl_output=smcl_content,
|
|
1143
2014
|
)
|
|
2015
|
+
stderr_final = context
|
|
1144
2016
|
|
|
1145
2017
|
duration = time.time() - start_time
|
|
1146
2018
|
logger.info(
|
|
@@ -1156,10 +2028,11 @@ class StataClient:
|
|
|
1156
2028
|
command=command,
|
|
1157
2029
|
rc=rc,
|
|
1158
2030
|
stdout="",
|
|
1159
|
-
stderr=
|
|
2031
|
+
stderr=stderr_final,
|
|
1160
2032
|
log_path=log_path,
|
|
1161
2033
|
success=success,
|
|
1162
2034
|
error=error,
|
|
2035
|
+
smcl_output=smcl_content,
|
|
1163
2036
|
)
|
|
1164
2037
|
|
|
1165
2038
|
if notify_progress is not None:
|
|
@@ -1181,22 +2054,7 @@ class StataClient:
|
|
|
1181
2054
|
"""
|
|
1182
2055
|
result = self._exec_with_capture(code, echo=echo, trace=trace, cwd=cwd)
|
|
1183
2056
|
|
|
1184
|
-
|
|
1185
|
-
if max_output_lines is not None and result.stdout:
|
|
1186
|
-
lines = result.stdout.splitlines()
|
|
1187
|
-
if len(lines) > max_output_lines:
|
|
1188
|
-
truncated_lines = lines[:max_output_lines]
|
|
1189
|
-
truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
|
|
1190
|
-
result = CommandResponse(
|
|
1191
|
-
command=result.command,
|
|
1192
|
-
rc=result.rc,
|
|
1193
|
-
stdout="\n".join(truncated_lines),
|
|
1194
|
-
stderr=result.stderr,
|
|
1195
|
-
success=result.success,
|
|
1196
|
-
error=result.error,
|
|
1197
|
-
)
|
|
1198
|
-
|
|
1199
|
-
return result
|
|
2057
|
+
return self._truncate_command_output(result, max_output_lines)
|
|
1200
2058
|
|
|
1201
2059
|
def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
|
|
1202
2060
|
"""Returns valid JSON-serializable data."""
|
|
@@ -1253,16 +2111,19 @@ class StataClient:
|
|
|
1253
2111
|
sortlist = ""
|
|
1254
2112
|
changed = False
|
|
1255
2113
|
try:
|
|
1256
|
-
frame = str(Macro.
|
|
2114
|
+
frame = str(Macro.getGlobal("frame") or "default")
|
|
1257
2115
|
except Exception:
|
|
2116
|
+
logger.debug("Failed to get 'frame' macro", exc_info=True)
|
|
1258
2117
|
frame = "default"
|
|
1259
2118
|
try:
|
|
1260
|
-
sortlist = str(Macro.
|
|
2119
|
+
sortlist = str(Macro.getGlobal("sortlist") or "")
|
|
1261
2120
|
except Exception:
|
|
2121
|
+
logger.debug("Failed to get 'sortlist' macro", exc_info=True)
|
|
1262
2122
|
sortlist = ""
|
|
1263
2123
|
try:
|
|
1264
|
-
changed = bool(int(float(Macro.
|
|
2124
|
+
changed = bool(int(float(Macro.getGlobal("changed") or "0")))
|
|
1265
2125
|
except Exception:
|
|
2126
|
+
logger.debug("Failed to get 'changed' macro", exc_info=True)
|
|
1266
2127
|
changed = False
|
|
1267
2128
|
|
|
1268
2129
|
return {"frame": frame, "n": n, "k": k, "sortlist": sortlist, "changed": changed}
|
|
@@ -1411,6 +2272,96 @@ class StataClient:
|
|
|
1411
2272
|
"truncated_cells": truncated_cells,
|
|
1412
2273
|
}
|
|
1413
2274
|
|
|
2275
|
+
def get_arrow_stream(
|
|
2276
|
+
self,
|
|
2277
|
+
*,
|
|
2278
|
+
offset: int,
|
|
2279
|
+
limit: int,
|
|
2280
|
+
vars: List[str],
|
|
2281
|
+
include_obs_no: bool,
|
|
2282
|
+
obs_indices: Optional[List[int]] = None,
|
|
2283
|
+
) -> bytes:
|
|
2284
|
+
"""
|
|
2285
|
+
Returns an Apache Arrow IPC stream (as bytes) for the requested data page.
|
|
2286
|
+
Uses Polars if available (faster), falls back to Pandas.
|
|
2287
|
+
"""
|
|
2288
|
+
if not self._initialized:
|
|
2289
|
+
self.init()
|
|
2290
|
+
|
|
2291
|
+
import pyarrow as pa
|
|
2292
|
+
from sfi import Data # type: ignore[import-not-found]
|
|
2293
|
+
|
|
2294
|
+
use_polars = _get_polars_available()
|
|
2295
|
+
if use_polars:
|
|
2296
|
+
import polars as pl
|
|
2297
|
+
else:
|
|
2298
|
+
import pandas as pd
|
|
2299
|
+
|
|
2300
|
+
state = self.get_dataset_state()
|
|
2301
|
+
n = int(state.get("n", 0) or 0)
|
|
2302
|
+
k = int(state.get("k", 0) or 0)
|
|
2303
|
+
if k == 0 and n == 0:
|
|
2304
|
+
raise RuntimeError("No data in memory")
|
|
2305
|
+
|
|
2306
|
+
var_map = self._get_var_index_map()
|
|
2307
|
+
for v in vars:
|
|
2308
|
+
if v not in var_map:
|
|
2309
|
+
raise ValueError(f"Invalid variable: {v}")
|
|
2310
|
+
|
|
2311
|
+
# Determine observations to fetch
|
|
2312
|
+
if obs_indices is None:
|
|
2313
|
+
start = offset
|
|
2314
|
+
end = min(offset + limit, n)
|
|
2315
|
+
obs_list = list(range(start, end)) if start < n else []
|
|
2316
|
+
else:
|
|
2317
|
+
start = offset
|
|
2318
|
+
end = min(offset + limit, len(obs_indices))
|
|
2319
|
+
obs_list = obs_indices[start:end]
|
|
2320
|
+
|
|
2321
|
+
try:
|
|
2322
|
+
if not obs_list:
|
|
2323
|
+
# Empty schema-only table
|
|
2324
|
+
if use_polars:
|
|
2325
|
+
schema_cols = {}
|
|
2326
|
+
if include_obs_no:
|
|
2327
|
+
schema_cols["_n"] = pl.Int64
|
|
2328
|
+
for v in vars:
|
|
2329
|
+
schema_cols[v] = pl.Utf8
|
|
2330
|
+
table = pl.DataFrame(schema=schema_cols).to_arrow()
|
|
2331
|
+
else:
|
|
2332
|
+
columns = {}
|
|
2333
|
+
if include_obs_no:
|
|
2334
|
+
columns["_n"] = pa.array([], type=pa.int64())
|
|
2335
|
+
for v in vars:
|
|
2336
|
+
columns[v] = pa.array([], type=pa.string())
|
|
2337
|
+
table = pa.table(columns)
|
|
2338
|
+
else:
|
|
2339
|
+
# Fetch all data in one C-call
|
|
2340
|
+
raw_data = Data.get(var=vars, obs=obs_list, valuelabel=False)
|
|
2341
|
+
|
|
2342
|
+
if use_polars:
|
|
2343
|
+
df = pl.DataFrame(raw_data, schema=vars, orient="row")
|
|
2344
|
+
if include_obs_no:
|
|
2345
|
+
obs_nums = [i + 1 for i in obs_list]
|
|
2346
|
+
df = df.with_columns(pl.Series("_n", obs_nums, dtype=pl.Int64))
|
|
2347
|
+
df = df.select(["_n"] + vars)
|
|
2348
|
+
table = df.to_arrow()
|
|
2349
|
+
else:
|
|
2350
|
+
df = pd.DataFrame(raw_data, columns=vars)
|
|
2351
|
+
if include_obs_no:
|
|
2352
|
+
df.insert(0, "_n", [i + 1 for i in obs_list])
|
|
2353
|
+
table = pa.Table.from_pandas(df, preserve_index=False)
|
|
2354
|
+
|
|
2355
|
+
# Serialize to IPC Stream
|
|
2356
|
+
sink = pa.BufferOutputStream()
|
|
2357
|
+
with pa.RecordBatchStreamWriter(sink, table.schema) as writer:
|
|
2358
|
+
writer.write_table(table)
|
|
2359
|
+
|
|
2360
|
+
return sink.getvalue().to_pybytes()
|
|
2361
|
+
|
|
2362
|
+
except Exception as e:
|
|
2363
|
+
raise RuntimeError(f"Failed to generate Arrow stream: {e}")
|
|
2364
|
+
|
|
1414
2365
|
_FILTER_IDENT = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b")
|
|
1415
2366
|
|
|
1416
2367
|
def _extract_filter_vars(self, filter_expr: str) -> List[str]:
|
|
@@ -1599,15 +2550,21 @@ class StataClient:
|
|
|
1599
2550
|
|
|
1600
2551
|
# Cache miss or expired, fetch fresh data
|
|
1601
2552
|
try:
|
|
1602
|
-
#
|
|
1603
|
-
#
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
2553
|
+
# Preservation of r() results is critical because this can be called
|
|
2554
|
+
# automatically after every user command (e.g., during streaming).
|
|
2555
|
+
import time
|
|
2556
|
+
hold_name = f"_mcp_ghold_{int(time.time() * 1000 % 1000000)}"
|
|
2557
|
+
self.stata.run(f"capture _return hold {hold_name}", echo=False)
|
|
2558
|
+
|
|
2559
|
+
try:
|
|
2560
|
+
self.stata.run("macro define mcp_graph_list \"\"", echo=False)
|
|
2561
|
+
self.stata.run("quietly graph dir, memory", echo=False)
|
|
2562
|
+
from sfi import Macro # type: ignore[import-not-found]
|
|
2563
|
+
self.stata.run("macro define mcp_graph_list `r(list)'", echo=False)
|
|
2564
|
+
graph_list_str = Macro.getGlobal("mcp_graph_list")
|
|
2565
|
+
finally:
|
|
2566
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False)
|
|
2567
|
+
|
|
1611
2568
|
raw_list = graph_list_str.split() if graph_list_str else []
|
|
1612
2569
|
|
|
1613
2570
|
# Map internal Stata names back to user-facing names when we have an alias.
|
|
@@ -1619,7 +2576,7 @@ class StataClient:
|
|
|
1619
2576
|
# Update cache
|
|
1620
2577
|
with self._list_graphs_cache_lock:
|
|
1621
2578
|
self._list_graphs_cache = result
|
|
1622
|
-
self._list_graphs_cache_time =
|
|
2579
|
+
self._list_graphs_cache_time = time.time()
|
|
1623
2580
|
|
|
1624
2581
|
return result
|
|
1625
2582
|
|
|
@@ -1654,8 +2611,8 @@ class StataClient:
|
|
|
1654
2611
|
import tempfile
|
|
1655
2612
|
|
|
1656
2613
|
fmt = (format or "pdf").strip().lower()
|
|
1657
|
-
if fmt not in {"pdf", "png"}:
|
|
1658
|
-
raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
|
|
2614
|
+
if fmt not in {"pdf", "png", "svg"}:
|
|
2615
|
+
raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png, svg.")
|
|
1659
2616
|
|
|
1660
2617
|
if not filename:
|
|
1661
2618
|
suffix = f".{fmt}"
|
|
@@ -1808,73 +2765,77 @@ class StataClient:
|
|
|
1808
2765
|
logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
|
|
1809
2766
|
return self._smcl_to_text(smcl)
|
|
1810
2767
|
except Exception as e:
|
|
1811
|
-
|
|
2768
|
+
logger.warning("Help file read failed for %s: %s", topic, e)
|
|
2769
|
+
|
|
2770
|
+
# If no help file found, return a fallback message
|
|
2771
|
+
return f"Help file for '{topic}' not found."
|
|
1812
2772
|
|
|
1813
|
-
|
|
1814
|
-
|
|
2773
|
+
def get_stored_results(self, force_fresh: bool = False) -> Dict[str, Any]:
|
|
2774
|
+
"""Returns e() and r() results using SFI for maximum reliability."""
|
|
2775
|
+
if not force_fresh and self._last_results is not None:
|
|
2776
|
+
return self._last_results
|
|
1815
2777
|
|
|
1816
|
-
def get_stored_results(self) -> Dict[str, Any]:
|
|
1817
|
-
"""Returns e() and r() results."""
|
|
1818
2778
|
if not self._initialized:
|
|
1819
2779
|
self.init()
|
|
1820
2780
|
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
2781
|
+
with self._exec_lock:
|
|
2782
|
+
# We must be extremely careful not to clobber r()/e() while fetching their names.
|
|
2783
|
+
# We use a hold to peek at the results.
|
|
2784
|
+
hold_name = f"mcp_peek_{uuid.uuid4().hex[:8]}"
|
|
2785
|
+
self.stata.run(f"capture _return hold {hold_name}", echo=False)
|
|
2786
|
+
|
|
2787
|
+
try:
|
|
2788
|
+
from sfi import Scalar, Macro
|
|
2789
|
+
results = {"r": {}, "e": {}}
|
|
2790
|
+
|
|
2791
|
+
for rclass in ["r", "e"]:
|
|
2792
|
+
# Restore with 'hold' to peek at results without losing them from the hold
|
|
2793
|
+
# Note: Stata 18+ supports 'restore ..., hold' which is ideal.
|
|
2794
|
+
self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
|
|
2795
|
+
|
|
2796
|
+
# Fetch names using backtick expansion (which we verified works better than colon)
|
|
2797
|
+
# and avoid leading underscores which were causing syntax errors with 'global'
|
|
2798
|
+
self.stata.run(f"macro define mcp_scnames `: {rclass}(scalars)'", echo=False)
|
|
2799
|
+
self.stata.run(f"macro define mcp_macnames `: {rclass}(macros)'", echo=False)
|
|
2800
|
+
|
|
2801
|
+
# 1. Capture Scalars
|
|
2802
|
+
names_str = Macro.getGlobal("mcp_scnames")
|
|
2803
|
+
if names_str:
|
|
2804
|
+
for name in names_str.split():
|
|
2805
|
+
try:
|
|
2806
|
+
val = Scalar.getValue(f"{rclass}({name})")
|
|
2807
|
+
results[rclass][name] = val
|
|
2808
|
+
except Exception:
|
|
2809
|
+
pass
|
|
2810
|
+
|
|
2811
|
+
# 2. Capture Macros (strings)
|
|
2812
|
+
macros_str = Macro.getGlobal("mcp_macnames")
|
|
2813
|
+
if macros_str:
|
|
2814
|
+
for name in macros_str.split():
|
|
2815
|
+
try:
|
|
2816
|
+
# Restore/Hold again to be safe before fetching each macro
|
|
2817
|
+
self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
|
|
2818
|
+
# Capture the string value into a macro
|
|
2819
|
+
self.stata.run(f"macro define mcp_mval `{rclass}({name})'", echo=False)
|
|
2820
|
+
val = Macro.getGlobal("mcp_mval")
|
|
2821
|
+
results[rclass][name] = val
|
|
2822
|
+
except Exception:
|
|
2823
|
+
pass
|
|
2824
|
+
|
|
2825
|
+
# Cleanup
|
|
2826
|
+
self.stata.run("macro drop mcp_scnames mcp_macnames mcp_mval", echo=False)
|
|
2827
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False) # Restore one last time to leave Stata in correct state
|
|
2828
|
+
|
|
2829
|
+
self._last_results = results
|
|
2830
|
+
return results
|
|
2831
|
+
except Exception as e:
|
|
2832
|
+
logger.error(f"SFI-based get_stored_results failed: {e}")
|
|
2833
|
+
# Try to clean up hold if we failed
|
|
2834
|
+
try:
|
|
2835
|
+
self.stata.run(f"capture _return drop {hold_name}", echo=False)
|
|
2836
|
+
except Exception:
|
|
2837
|
+
pass
|
|
2838
|
+
return {"r": {}, "e": {}}
|
|
1878
2839
|
|
|
1879
2840
|
def invalidate_graph_cache(self, graph_name: str = None) -> None:
|
|
1880
2841
|
"""Invalidate cache for specific graph or all graphs.
|
|
@@ -2324,117 +3285,79 @@ class StataClient:
|
|
|
2324
3285
|
return False
|
|
2325
3286
|
|
|
2326
3287
|
def run_do_file(self, path: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
rc=601,
|
|
2331
|
-
stdout="",
|
|
2332
|
-
stderr=None,
|
|
2333
|
-
success=False,
|
|
2334
|
-
error=ErrorEnvelope(
|
|
2335
|
-
message=f"cwd not found: {cwd}",
|
|
2336
|
-
rc=601,
|
|
2337
|
-
command=path,
|
|
2338
|
-
),
|
|
2339
|
-
)
|
|
2340
|
-
|
|
2341
|
-
effective_path = path
|
|
2342
|
-
if cwd is not None and not os.path.isabs(path):
|
|
2343
|
-
effective_path = os.path.abspath(os.path.join(cwd, path))
|
|
2344
|
-
|
|
2345
|
-
if not os.path.exists(effective_path):
|
|
2346
|
-
return CommandResponse(
|
|
2347
|
-
command=f'do "{effective_path}"',
|
|
2348
|
-
rc=601,
|
|
2349
|
-
stdout="",
|
|
2350
|
-
stderr=None,
|
|
2351
|
-
success=False,
|
|
2352
|
-
error=ErrorEnvelope(
|
|
2353
|
-
message=f"Do-file not found: {effective_path}",
|
|
2354
|
-
rc=601,
|
|
2355
|
-
command=effective_path,
|
|
2356
|
-
),
|
|
2357
|
-
)
|
|
3288
|
+
effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
|
|
3289
|
+
if error_response is not None:
|
|
3290
|
+
return error_response
|
|
2358
3291
|
|
|
2359
3292
|
if not self._initialized:
|
|
2360
3293
|
self.init()
|
|
2361
3294
|
|
|
2362
3295
|
start_time = time.time()
|
|
2363
3296
|
exc: Optional[Exception] = None
|
|
2364
|
-
|
|
2365
|
-
|
|
3297
|
+
smcl_content = ""
|
|
3298
|
+
smcl_path = None
|
|
2366
3299
|
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
delete=False,
|
|
2371
|
-
mode="w",
|
|
2372
|
-
encoding="utf-8",
|
|
2373
|
-
errors="replace",
|
|
2374
|
-
buffering=1,
|
|
2375
|
-
)
|
|
2376
|
-
log_path = log_file.name
|
|
2377
|
-
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
2378
|
-
tee = FileTeeIO(log_file, tail)
|
|
3300
|
+
_log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
|
|
3301
|
+
smcl_path = self._create_smcl_log_path()
|
|
3302
|
+
smcl_log_name = self._make_smcl_log_name()
|
|
2379
3303
|
|
|
2380
3304
|
rc = -1
|
|
3305
|
+
try:
|
|
3306
|
+
rc, exc = self._run_streaming_blocking(
|
|
3307
|
+
command=command,
|
|
3308
|
+
tee=tee,
|
|
3309
|
+
cwd=cwd,
|
|
3310
|
+
trace=trace,
|
|
3311
|
+
echo=echo,
|
|
3312
|
+
smcl_path=smcl_path,
|
|
3313
|
+
smcl_log_name=smcl_log_name,
|
|
3314
|
+
hold_attr="_hold_name_do_sync",
|
|
3315
|
+
require_smcl_log=True,
|
|
3316
|
+
)
|
|
3317
|
+
except Exception as e:
|
|
3318
|
+
exc = e
|
|
3319
|
+
rc = 1
|
|
3320
|
+
finally:
|
|
3321
|
+
tee.close()
|
|
2381
3322
|
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
with self._redirect_io_streaming(tee, tee):
|
|
2385
|
-
try:
|
|
2386
|
-
if trace:
|
|
2387
|
-
self.stata.run("set trace on")
|
|
2388
|
-
ret = self.stata.run(command, echo=echo)
|
|
2389
|
-
# Some PyStata builds return output as a string rather than printing.
|
|
2390
|
-
if isinstance(ret, str) and ret:
|
|
2391
|
-
try:
|
|
2392
|
-
tee.write(ret)
|
|
2393
|
-
except Exception:
|
|
2394
|
-
pass
|
|
2395
|
-
except Exception as e:
|
|
2396
|
-
exc = e
|
|
2397
|
-
finally:
|
|
2398
|
-
rc = self._read_return_code()
|
|
2399
|
-
if trace:
|
|
2400
|
-
try:
|
|
2401
|
-
self.stata.run("set trace off")
|
|
2402
|
-
except Exception:
|
|
2403
|
-
pass
|
|
2404
|
-
|
|
2405
|
-
tee.close()
|
|
3323
|
+
# Read SMCL content as the authoritative source
|
|
3324
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
2406
3325
|
|
|
2407
|
-
|
|
2408
|
-
log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
|
|
2409
|
-
if log_tail and len(log_tail) > len(tail_text):
|
|
2410
|
-
tail_text = log_tail
|
|
2411
|
-
combined = (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
2412
|
-
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
2413
|
-
if exc is None and rc_hint is not None and rc_hint != 0:
|
|
2414
|
-
rc = rc_hint
|
|
2415
|
-
if exc is None and rc_hint is None:
|
|
2416
|
-
rc = 0 if rc is None or rc != 0 else rc
|
|
2417
|
-
success = rc == 0 and exc is None
|
|
3326
|
+
combined = self._build_combined_log(tail, log_path, rc, trace, exc)
|
|
2418
3327
|
|
|
3328
|
+
# Use SMCL content as primary source for RC detection if not already captured
|
|
3329
|
+
if rc == -1 and not exc:
|
|
3330
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
3331
|
+
if parsed_rc is not None:
|
|
3332
|
+
rc = parsed_rc
|
|
3333
|
+
else:
|
|
3334
|
+
# Fallback to text parsing
|
|
3335
|
+
parsed_rc = self._parse_rc_from_text(combined)
|
|
3336
|
+
rc = parsed_rc if parsed_rc is not None else 0
|
|
3337
|
+
elif exc and rc == 1:
|
|
3338
|
+
# Try to parse more specific RC from exception message
|
|
3339
|
+
parsed_rc = self._parse_rc_from_text(str(exc))
|
|
3340
|
+
if parsed_rc is not None:
|
|
3341
|
+
rc = parsed_rc
|
|
3342
|
+
|
|
3343
|
+
success = (rc == 0 and exc is None)
|
|
2419
3344
|
error = None
|
|
3345
|
+
|
|
2420
3346
|
if not success:
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
fallback = f"Stata error r({rc_final})"
|
|
2428
|
-
message = self._select_stata_error_message(combined, fallback)
|
|
3347
|
+
# Use SMCL as authoritative source for error extraction
|
|
3348
|
+
if smcl_content:
|
|
3349
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
3350
|
+
else:
|
|
3351
|
+
# Fallback to combined log
|
|
3352
|
+
msg, context = self._extract_error_and_context(combined, rc)
|
|
2429
3353
|
|
|
2430
3354
|
error = ErrorEnvelope(
|
|
2431
|
-
message=
|
|
2432
|
-
rc=
|
|
2433
|
-
|
|
3355
|
+
message=msg,
|
|
3356
|
+
rc=rc,
|
|
3357
|
+
snippet=context,
|
|
2434
3358
|
command=command,
|
|
2435
3359
|
log_path=log_path,
|
|
2436
|
-
|
|
2437
|
-
trace=trace or None,
|
|
3360
|
+
smcl_output=smcl_content,
|
|
2438
3361
|
)
|
|
2439
3362
|
|
|
2440
3363
|
duration = time.time() - start_time
|
|
@@ -2455,6 +3378,7 @@ class StataClient:
|
|
|
2455
3378
|
log_path=log_path,
|
|
2456
3379
|
success=success,
|
|
2457
3380
|
error=error,
|
|
3381
|
+
smcl_output=smcl_content,
|
|
2458
3382
|
)
|
|
2459
3383
|
|
|
2460
3384
|
def load_data(self, source: str, clear: bool = True, max_output_lines: Optional[int] = None) -> CommandResponse:
|
|
@@ -2473,40 +3397,8 @@ class StataClient:
|
|
|
2473
3397
|
cmd = f"sysuse {src}{clear_suffix}"
|
|
2474
3398
|
|
|
2475
3399
|
result = self._exec_with_capture(cmd, echo=True, trace=False)
|
|
2476
|
-
|
|
2477
|
-
# Truncate stdout if requested
|
|
2478
|
-
if max_output_lines is not None and result.stdout:
|
|
2479
|
-
lines = result.stdout.splitlines()
|
|
2480
|
-
if len(lines) > max_output_lines:
|
|
2481
|
-
truncated_lines = lines[:max_output_lines]
|
|
2482
|
-
truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
|
|
2483
|
-
result = CommandResponse(
|
|
2484
|
-
command=result.command,
|
|
2485
|
-
rc=result.rc,
|
|
2486
|
-
stdout="\n".join(truncated_lines),
|
|
2487
|
-
stderr=result.stderr,
|
|
2488
|
-
success=result.success,
|
|
2489
|
-
error=result.error,
|
|
2490
|
-
)
|
|
2491
|
-
|
|
2492
|
-
return result
|
|
3400
|
+
return self._truncate_command_output(result, max_output_lines)
|
|
2493
3401
|
|
|
2494
3402
|
def codebook(self, varname: str, trace: bool = False, max_output_lines: Optional[int] = None) -> CommandResponse:
|
|
2495
3403
|
result = self._exec_with_capture(f"codebook {varname}", trace=trace)
|
|
2496
|
-
|
|
2497
|
-
# Truncate stdout if requested
|
|
2498
|
-
if max_output_lines is not None and result.stdout:
|
|
2499
|
-
lines = result.stdout.splitlines()
|
|
2500
|
-
if len(lines) > max_output_lines:
|
|
2501
|
-
truncated_lines = lines[:max_output_lines]
|
|
2502
|
-
truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
|
|
2503
|
-
result = CommandResponse(
|
|
2504
|
-
command=result.command,
|
|
2505
|
-
rc=result.rc,
|
|
2506
|
-
stdout="\n".join(truncated_lines),
|
|
2507
|
-
stderr=result.stderr,
|
|
2508
|
-
success=result.success,
|
|
2509
|
-
error=result.error,
|
|
2510
|
-
)
|
|
2511
|
-
|
|
2512
|
-
return result
|
|
3404
|
+
return self._truncate_command_output(result, max_output_lines)
|