mcp-stata 1.7.6__py3-none-any.whl → 1.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-stata might be problematic. Click here for more details.
- mcp_stata/config.py +20 -0
- mcp_stata/discovery.py +140 -61
- mcp_stata/graph_detector.py +60 -44
- mcp_stata/models.py +2 -1
- mcp_stata/server.py +729 -28
- mcp_stata/stata_client.py +2023 -817
- mcp_stata/streaming_io.py +3 -1
- mcp_stata/test_stata.py +54 -0
- mcp_stata/ui_http.py +178 -19
- {mcp_stata-1.7.6.dist-info → mcp_stata-1.16.6.dist-info}/METADATA +28 -6
- mcp_stata-1.16.6.dist-info/RECORD +16 -0
- mcp_stata-1.7.6.dist-info/RECORD +0 -14
- {mcp_stata-1.7.6.dist-info → mcp_stata-1.16.6.dist-info}/WHEEL +0 -0
- {mcp_stata-1.7.6.dist-info → mcp_stata-1.16.6.dist-info}/entry_points.txt +0 -0
- {mcp_stata-1.7.6.dist-info → mcp_stata-1.16.6.dist-info}/licenses/LICENSE +0 -0
mcp_stata/stata_client.py
CHANGED
|
@@ -1,22 +1,27 @@
|
|
|
1
|
-
import
|
|
1
|
+
import asyncio
|
|
2
|
+
import io
|
|
3
|
+
import inspect
|
|
2
4
|
import json
|
|
3
5
|
import logging
|
|
4
6
|
import os
|
|
7
|
+
import platform
|
|
5
8
|
import re
|
|
6
9
|
import subprocess
|
|
7
10
|
import sys
|
|
8
|
-
import threading
|
|
9
|
-
from importlib.metadata import PackageNotFoundError, version
|
|
10
11
|
import tempfile
|
|
12
|
+
import threading
|
|
11
13
|
import time
|
|
12
|
-
|
|
14
|
+
import uuid
|
|
15
|
+
from contextlib import contextmanager, redirect_stdout, redirect_stderr
|
|
16
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
13
17
|
from io import StringIO
|
|
14
|
-
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
|
18
|
+
from typing import Any, Awaitable, Callable, Dict, Generator, List, Optional, Tuple
|
|
15
19
|
|
|
16
20
|
import anyio
|
|
17
21
|
from anyio import get_cancelled_exc_class
|
|
18
22
|
|
|
19
|
-
from .discovery import
|
|
23
|
+
from .discovery import find_stata_candidates
|
|
24
|
+
from .config import MAX_LIMIT
|
|
20
25
|
from .models import (
|
|
21
26
|
CommandResponse,
|
|
22
27
|
ErrorEnvelope,
|
|
@@ -33,6 +38,29 @@ from .graph_detector import StreamingGraphCache
|
|
|
33
38
|
|
|
34
39
|
logger = logging.getLogger("mcp_stata")
|
|
35
40
|
|
|
41
|
+
_POLARS_AVAILABLE: Optional[bool] = None
|
|
42
|
+
|
|
43
|
+
def _check_polars_available() -> bool:
|
|
44
|
+
"""
|
|
45
|
+
Check if Polars can be safely imported.
|
|
46
|
+
Must detect problematic platforms BEFORE attempting import,
|
|
47
|
+
since the crash is a fatal signal, not a catchable exception.
|
|
48
|
+
"""
|
|
49
|
+
if sys.platform == "win32" and platform.machine().lower() in ("arm64", "aarch64"):
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
import polars # noqa: F401
|
|
54
|
+
return True
|
|
55
|
+
except ImportError:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _get_polars_available() -> bool:
|
|
60
|
+
global _POLARS_AVAILABLE
|
|
61
|
+
if _POLARS_AVAILABLE is None:
|
|
62
|
+
_POLARS_AVAILABLE = _check_polars_available()
|
|
63
|
+
return _POLARS_AVAILABLE
|
|
36
64
|
|
|
37
65
|
# ============================================================================
|
|
38
66
|
# MODULE-LEVEL DISCOVERY CACHE
|
|
@@ -40,26 +68,30 @@ logger = logging.getLogger("mcp_stata")
|
|
|
40
68
|
# This cache ensures Stata discovery runs exactly once per process lifetime
|
|
41
69
|
_discovery_lock = threading.Lock()
|
|
42
70
|
_discovery_result: Optional[Tuple[str, str]] = None # (path, edition)
|
|
71
|
+
_discovery_candidates: Optional[List[Tuple[str, str]]] = None
|
|
43
72
|
_discovery_attempted = False
|
|
44
73
|
_discovery_error: Optional[Exception] = None
|
|
45
74
|
|
|
46
75
|
|
|
47
|
-
def
|
|
76
|
+
def _get_discovery_candidates() -> List[Tuple[str, str]]:
|
|
48
77
|
"""
|
|
49
|
-
Get
|
|
78
|
+
Get ordered discovery candidates, running discovery only once.
|
|
50
79
|
|
|
51
80
|
Returns:
|
|
52
|
-
|
|
81
|
+
List of (stata_executable_path, edition) ordered by preference.
|
|
53
82
|
|
|
54
83
|
Raises:
|
|
55
84
|
RuntimeError: If Stata discovery fails
|
|
56
85
|
"""
|
|
57
|
-
global _discovery_result, _discovery_attempted, _discovery_error
|
|
86
|
+
global _discovery_result, _discovery_candidates, _discovery_attempted, _discovery_error
|
|
58
87
|
|
|
59
88
|
with _discovery_lock:
|
|
60
89
|
# If we've already successfully discovered Stata, return cached result
|
|
61
90
|
if _discovery_result is not None:
|
|
62
|
-
return _discovery_result
|
|
91
|
+
return _discovery_candidates or [_discovery_result]
|
|
92
|
+
|
|
93
|
+
if _discovery_candidates is not None:
|
|
94
|
+
return _discovery_candidates
|
|
63
95
|
|
|
64
96
|
# If we've already attempted and failed, re-raise the cached error
|
|
65
97
|
if _discovery_attempted and _discovery_error is not None:
|
|
@@ -83,13 +115,17 @@ def _get_discovered_stata() -> Tuple[str, str]:
|
|
|
83
115
|
logger.info("mcp-stata version: %s", pkg_version)
|
|
84
116
|
|
|
85
117
|
# Run discovery
|
|
86
|
-
|
|
118
|
+
candidates = find_stata_candidates()
|
|
87
119
|
|
|
88
120
|
# Cache the successful result
|
|
89
|
-
|
|
90
|
-
|
|
121
|
+
_discovery_candidates = candidates
|
|
122
|
+
if candidates:
|
|
123
|
+
_discovery_result = candidates[0]
|
|
124
|
+
logger.info("Discovery found Stata at: %s (%s)", _discovery_result[0], _discovery_result[1])
|
|
125
|
+
else:
|
|
126
|
+
raise FileNotFoundError("No Stata candidates discovered")
|
|
91
127
|
|
|
92
|
-
return
|
|
128
|
+
return candidates
|
|
93
129
|
|
|
94
130
|
except FileNotFoundError as e:
|
|
95
131
|
_discovery_error = e
|
|
@@ -102,23 +138,49 @@ def _get_discovered_stata() -> Tuple[str, str]:
|
|
|
102
138
|
) from e
|
|
103
139
|
|
|
104
140
|
|
|
141
|
+
def _get_discovered_stata() -> Tuple[str, str]:
|
|
142
|
+
"""
|
|
143
|
+
Preserve existing API: return the highest-priority discovered Stata candidate.
|
|
144
|
+
"""
|
|
145
|
+
candidates = _get_discovery_candidates()
|
|
146
|
+
if not candidates:
|
|
147
|
+
raise RuntimeError("Stata binary not found: no candidates discovered")
|
|
148
|
+
return candidates[0]
|
|
149
|
+
|
|
150
|
+
|
|
105
151
|
class StataClient:
|
|
106
152
|
_initialized = False
|
|
107
153
|
_exec_lock: threading.Lock
|
|
108
154
|
_cache_init_lock = threading.Lock() # Class-level lock for cache initialization
|
|
109
155
|
_is_executing = False # Flag to prevent recursive Stata calls
|
|
110
|
-
MAX_DATA_ROWS =
|
|
156
|
+
MAX_DATA_ROWS = MAX_LIMIT
|
|
111
157
|
MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Maximum graph exports (~50MB)
|
|
112
158
|
MAX_CACHE_SIZE = 100 # Maximum number of graphs to cache
|
|
113
159
|
MAX_CACHE_BYTES = 500 * 1024 * 1024 # Maximum cache size in bytes (~500MB)
|
|
114
160
|
LIST_GRAPHS_TTL = 0.075 # TTL for list_graphs cache (75ms)
|
|
115
161
|
|
|
162
|
+
def __init__(self):
|
|
163
|
+
self._exec_lock = threading.RLock()
|
|
164
|
+
self._is_executing = False
|
|
165
|
+
self._command_idx = 0 # Counter for user-initiated commands
|
|
166
|
+
self._initialized = False
|
|
167
|
+
from .graph_detector import GraphCreationDetector
|
|
168
|
+
self._graph_detector = GraphCreationDetector(self)
|
|
169
|
+
|
|
116
170
|
def __new__(cls):
|
|
117
171
|
inst = super(StataClient, cls).__new__(cls)
|
|
118
|
-
inst._exec_lock = threading.
|
|
172
|
+
inst._exec_lock = threading.RLock()
|
|
119
173
|
inst._is_executing = False
|
|
174
|
+
inst._command_idx = 0
|
|
175
|
+
from .graph_detector import GraphCreationDetector
|
|
176
|
+
inst._graph_detector = GraphCreationDetector(inst)
|
|
120
177
|
return inst
|
|
121
178
|
|
|
179
|
+
def _increment_command_idx(self) -> int:
|
|
180
|
+
"""Increment and return the command counter."""
|
|
181
|
+
self._command_idx += 1
|
|
182
|
+
return self._command_idx
|
|
183
|
+
|
|
122
184
|
@contextmanager
|
|
123
185
|
def _redirect_io(self, out_buf, err_buf):
|
|
124
186
|
"""Safely redirect stdout/stderr for the duration of a Stata call."""
|
|
@@ -129,38 +191,6 @@ class StataClient:
|
|
|
129
191
|
finally:
|
|
130
192
|
sys.stdout, sys.stderr = backup_stdout, backup_stderr
|
|
131
193
|
|
|
132
|
-
def _select_stata_error_message(self, text: str, fallback: str) -> str:
|
|
133
|
-
"""
|
|
134
|
-
Helper for tests and legacy callers to extract the clean error message.
|
|
135
|
-
"""
|
|
136
|
-
if not text:
|
|
137
|
-
return fallback
|
|
138
|
-
|
|
139
|
-
lines = text.splitlines()
|
|
140
|
-
trace_pattern = re.compile(r'^\s*[-=.]')
|
|
141
|
-
noise_pattern = re.compile(r'^(?:\}|\{txt\}|\{com\}|end of do-file)')
|
|
142
|
-
|
|
143
|
-
for line in reversed(lines):
|
|
144
|
-
stripped = line.strip()
|
|
145
|
-
if not stripped:
|
|
146
|
-
continue
|
|
147
|
-
if trace_pattern.match(line):
|
|
148
|
-
continue
|
|
149
|
-
if noise_pattern.match(stripped):
|
|
150
|
-
continue
|
|
151
|
-
if stripped.startswith("r(") and stripped.endswith(");"):
|
|
152
|
-
# If we hit r(123); we might want the line ABOVE it if it's not noise
|
|
153
|
-
continue
|
|
154
|
-
|
|
155
|
-
# Preserve SMCL tags
|
|
156
|
-
return stripped
|
|
157
|
-
|
|
158
|
-
# If we couldn't find a better message, try to find r(N);
|
|
159
|
-
match = re.search(r"r\(\d+\);", text)
|
|
160
|
-
if match:
|
|
161
|
-
return match.group(0)
|
|
162
|
-
|
|
163
|
-
return fallback
|
|
164
194
|
|
|
165
195
|
@staticmethod
|
|
166
196
|
def _stata_quote(value: str) -> str:
|
|
@@ -181,6 +211,613 @@ class StataClient:
|
|
|
181
211
|
finally:
|
|
182
212
|
sys.stdout, sys.stderr = backup_stdout, backup_stderr
|
|
183
213
|
|
|
214
|
+
@staticmethod
|
|
215
|
+
def _safe_unlink(path: str) -> None:
|
|
216
|
+
if not path:
|
|
217
|
+
return
|
|
218
|
+
try:
|
|
219
|
+
if os.path.exists(path):
|
|
220
|
+
os.unlink(path)
|
|
221
|
+
except Exception:
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
def _create_smcl_log_path(
|
|
225
|
+
self,
|
|
226
|
+
*,
|
|
227
|
+
prefix: str = "mcp_smcl_",
|
|
228
|
+
max_hex: Optional[int] = None,
|
|
229
|
+
base_dir: Optional[str] = None,
|
|
230
|
+
) -> str:
|
|
231
|
+
hex_id = uuid.uuid4().hex if max_hex is None else uuid.uuid4().hex[:max_hex]
|
|
232
|
+
base = os.path.realpath(tempfile.gettempdir())
|
|
233
|
+
smcl_path = os.path.join(base, f"{prefix}{hex_id}.smcl")
|
|
234
|
+
self._safe_unlink(smcl_path)
|
|
235
|
+
return smcl_path
|
|
236
|
+
|
|
237
|
+
@staticmethod
|
|
238
|
+
def _make_smcl_log_name() -> str:
|
|
239
|
+
return f"_mcp_smcl_{uuid.uuid4().hex[:8]}"
|
|
240
|
+
|
|
241
|
+
def _open_smcl_log(self, smcl_path: str, log_name: str, *, quiet: bool = False) -> bool:
|
|
242
|
+
path_for_stata = smcl_path.replace("\\", "/")
|
|
243
|
+
base_cmd = f"log using \"{path_for_stata}\", replace smcl name({log_name})"
|
|
244
|
+
unnamed_cmd = f"log using \"{path_for_stata}\", replace smcl"
|
|
245
|
+
for attempt in range(4):
|
|
246
|
+
try:
|
|
247
|
+
logger.debug(
|
|
248
|
+
"_open_smcl_log attempt=%s log_name=%s path=%s",
|
|
249
|
+
attempt + 1,
|
|
250
|
+
log_name,
|
|
251
|
+
smcl_path,
|
|
252
|
+
)
|
|
253
|
+
logger.warning(
|
|
254
|
+
"SMCL open attempt %s cwd=%s path=%s",
|
|
255
|
+
attempt + 1,
|
|
256
|
+
os.getcwd(),
|
|
257
|
+
smcl_path,
|
|
258
|
+
)
|
|
259
|
+
logger.debug(
|
|
260
|
+
"SMCL open attempt=%s cwd=%s path=%s cmd=%s",
|
|
261
|
+
attempt + 1,
|
|
262
|
+
os.getcwd(),
|
|
263
|
+
smcl_path,
|
|
264
|
+
base_cmd,
|
|
265
|
+
)
|
|
266
|
+
try:
|
|
267
|
+
close_ret = self.stata.run("capture log close _all", echo=False)
|
|
268
|
+
if close_ret:
|
|
269
|
+
logger.warning("SMCL close_all output: %s", close_ret)
|
|
270
|
+
except Exception:
|
|
271
|
+
pass
|
|
272
|
+
cmd = f"{'quietly ' if quiet else ''}{base_cmd}"
|
|
273
|
+
try:
|
|
274
|
+
output_buf = StringIO()
|
|
275
|
+
with redirect_stdout(output_buf), redirect_stderr(output_buf):
|
|
276
|
+
self.stata.run(cmd, echo=False)
|
|
277
|
+
ret = output_buf.getvalue().strip()
|
|
278
|
+
if ret:
|
|
279
|
+
logger.warning("SMCL log open output: %s", ret)
|
|
280
|
+
except Exception as e:
|
|
281
|
+
logger.warning("SMCL log open failed (attempt %s): %s", attempt + 1, e)
|
|
282
|
+
logger.warning("SMCL log open failed: %r", e)
|
|
283
|
+
try:
|
|
284
|
+
retry_buf = StringIO()
|
|
285
|
+
with redirect_stdout(retry_buf), redirect_stderr(retry_buf):
|
|
286
|
+
self.stata.run(base_cmd, echo=False)
|
|
287
|
+
ret = retry_buf.getvalue().strip()
|
|
288
|
+
if ret:
|
|
289
|
+
logger.warning("SMCL log open output (no quiet): %s", ret)
|
|
290
|
+
except Exception as inner:
|
|
291
|
+
logger.warning("SMCL log open retry failed: %s", inner)
|
|
292
|
+
query_buf = StringIO()
|
|
293
|
+
try:
|
|
294
|
+
with redirect_stdout(query_buf), redirect_stderr(query_buf):
|
|
295
|
+
self.stata.run("log query", echo=False)
|
|
296
|
+
except Exception as query_err:
|
|
297
|
+
query_buf.write(f"log query failed: {query_err!r}")
|
|
298
|
+
query_ret = query_buf.getvalue().strip()
|
|
299
|
+
logger.warning("SMCL log query output: %s", query_ret)
|
|
300
|
+
|
|
301
|
+
if query_ret:
|
|
302
|
+
query_lower = query_ret.lower()
|
|
303
|
+
log_confirmed = "log:" in query_lower and "smcl" in query_lower and " on" in query_lower
|
|
304
|
+
if log_confirmed:
|
|
305
|
+
self._last_smcl_log_named = True
|
|
306
|
+
logger.info("SMCL log confirmed: %s", path_for_stata)
|
|
307
|
+
return True
|
|
308
|
+
logger.warning("SMCL log not confirmed after open; query_ret=%s", query_ret)
|
|
309
|
+
try:
|
|
310
|
+
unnamed_output = StringIO()
|
|
311
|
+
with redirect_stdout(unnamed_output), redirect_stderr(unnamed_output):
|
|
312
|
+
self.stata.run(unnamed_cmd, echo=False)
|
|
313
|
+
unnamed_ret = unnamed_output.getvalue().strip()
|
|
314
|
+
if unnamed_ret:
|
|
315
|
+
logger.warning("SMCL log open output (unnamed): %s", unnamed_ret)
|
|
316
|
+
except Exception as e:
|
|
317
|
+
logger.warning("SMCL log open failed (unnamed, attempt %s): %s", attempt + 1, e)
|
|
318
|
+
unnamed_query_buf = StringIO()
|
|
319
|
+
try:
|
|
320
|
+
with redirect_stdout(unnamed_query_buf), redirect_stderr(unnamed_query_buf):
|
|
321
|
+
self.stata.run("log query", echo=False)
|
|
322
|
+
except Exception as query_err:
|
|
323
|
+
unnamed_query_buf.write(f"log query failed: {query_err!r}")
|
|
324
|
+
unnamed_query = unnamed_query_buf.getvalue().strip()
|
|
325
|
+
if unnamed_query:
|
|
326
|
+
unnamed_lower = unnamed_query.lower()
|
|
327
|
+
unnamed_confirmed = "log:" in unnamed_lower and "smcl" in unnamed_lower and " on" in unnamed_lower
|
|
328
|
+
if unnamed_confirmed:
|
|
329
|
+
self._last_smcl_log_named = False
|
|
330
|
+
logger.info("SMCL log confirmed (unnamed): %s", path_for_stata)
|
|
331
|
+
return True
|
|
332
|
+
except Exception as e:
|
|
333
|
+
logger.warning("Failed to open SMCL log (attempt %s): %s", attempt + 1, e)
|
|
334
|
+
if attempt < 3:
|
|
335
|
+
time.sleep(0.1)
|
|
336
|
+
logger.warning("Failed to open SMCL log with cmd: %s", cmd)
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
def _close_smcl_log(self, log_name: str) -> None:
|
|
340
|
+
try:
|
|
341
|
+
use_named = getattr(self, "_last_smcl_log_named", None)
|
|
342
|
+
if use_named is False:
|
|
343
|
+
self.stata.run("capture log close", echo=False)
|
|
344
|
+
else:
|
|
345
|
+
self.stata.run(f"capture log close {log_name}", echo=False)
|
|
346
|
+
except Exception:
|
|
347
|
+
pass
|
|
348
|
+
|
|
349
|
+
def _restore_results_from_hold(self, hold_attr: str) -> None:
|
|
350
|
+
if not hasattr(self, hold_attr):
|
|
351
|
+
return
|
|
352
|
+
hold_name = getattr(self, hold_attr)
|
|
353
|
+
try:
|
|
354
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False)
|
|
355
|
+
self._last_results = self.get_stored_results(force_fresh=True)
|
|
356
|
+
except Exception:
|
|
357
|
+
pass
|
|
358
|
+
finally:
|
|
359
|
+
try:
|
|
360
|
+
delattr(self, hold_attr)
|
|
361
|
+
except Exception:
|
|
362
|
+
pass
|
|
363
|
+
|
|
364
|
+
def _create_streaming_log(self, *, trace: bool) -> tuple[tempfile.NamedTemporaryFile, str, TailBuffer, FileTeeIO]:
|
|
365
|
+
log_file = tempfile.NamedTemporaryFile(
|
|
366
|
+
prefix="mcp_stata_",
|
|
367
|
+
suffix=".log",
|
|
368
|
+
delete=False,
|
|
369
|
+
mode="w",
|
|
370
|
+
encoding="utf-8",
|
|
371
|
+
errors="replace",
|
|
372
|
+
buffering=1,
|
|
373
|
+
)
|
|
374
|
+
log_path = log_file.name
|
|
375
|
+
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
376
|
+
tee = FileTeeIO(log_file, tail)
|
|
377
|
+
return log_file, log_path, tail, tee
|
|
378
|
+
|
|
379
|
+
def _init_streaming_graph_cache(
|
|
380
|
+
self,
|
|
381
|
+
auto_cache_graphs: bool,
|
|
382
|
+
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]],
|
|
383
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
384
|
+
) -> Optional[StreamingGraphCache]:
|
|
385
|
+
if not auto_cache_graphs:
|
|
386
|
+
return None
|
|
387
|
+
graph_cache = StreamingGraphCache(self, auto_cache=True)
|
|
388
|
+
graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
|
|
389
|
+
graph_cache.add_cache_callback(graph_cache_callback)
|
|
390
|
+
return graph_cache
|
|
391
|
+
|
|
392
|
+
def _capture_graph_state(
|
|
393
|
+
self,
|
|
394
|
+
graph_cache: Optional[StreamingGraphCache],
|
|
395
|
+
emit_graph_ready: bool,
|
|
396
|
+
) -> Optional[dict[str, str]]:
|
|
397
|
+
# Capture initial graph state BEFORE execution starts
|
|
398
|
+
if graph_cache:
|
|
399
|
+
# Clear detection state for the new command (detected/removed sets)
|
|
400
|
+
# but preserve _last_graph_state signatures for modification detection.
|
|
401
|
+
graph_cache.detector.clear_detection_state()
|
|
402
|
+
try:
|
|
403
|
+
graph_cache._initial_graphs = set(self.list_graphs(force_refresh=True))
|
|
404
|
+
logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
|
|
405
|
+
except Exception as e:
|
|
406
|
+
logger.debug(f"Failed to capture initial graph state: {e}")
|
|
407
|
+
graph_cache._initial_graphs = set()
|
|
408
|
+
|
|
409
|
+
graph_ready_initial = None
|
|
410
|
+
if emit_graph_ready:
|
|
411
|
+
try:
|
|
412
|
+
graph_ready_initial = {}
|
|
413
|
+
for graph_name in self.list_graphs(force_refresh=True):
|
|
414
|
+
graph_ready_initial[graph_name] = self._get_graph_signature(graph_name)
|
|
415
|
+
logger.debug("Graph-ready initial state captured: %s", set(graph_ready_initial))
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.debug("Failed to capture graph-ready state: %s", e)
|
|
418
|
+
graph_ready_initial = {}
|
|
419
|
+
return graph_ready_initial
|
|
420
|
+
|
|
421
|
+
async def _cache_new_graphs(
|
|
422
|
+
self,
|
|
423
|
+
graph_cache: Optional[StreamingGraphCache],
|
|
424
|
+
*,
|
|
425
|
+
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]],
|
|
426
|
+
total_lines: int,
|
|
427
|
+
completed_label: str,
|
|
428
|
+
) -> None:
|
|
429
|
+
if not graph_cache or not graph_cache.auto_cache:
|
|
430
|
+
return
|
|
431
|
+
try:
|
|
432
|
+
cached_graphs = []
|
|
433
|
+
# Use detector to find new OR modified graphs
|
|
434
|
+
pystata_detected = await anyio.to_thread.run_sync(graph_cache.detector._detect_graphs_via_pystata)
|
|
435
|
+
|
|
436
|
+
# Combine with any pending graphs in queue
|
|
437
|
+
with graph_cache._lock:
|
|
438
|
+
to_process = set(pystata_detected) | set(graph_cache._graphs_to_cache)
|
|
439
|
+
graph_cache._graphs_to_cache.clear()
|
|
440
|
+
|
|
441
|
+
if to_process:
|
|
442
|
+
logger.info(f"Detected {len(to_process)} new or modified graph(s): {sorted(to_process)}")
|
|
443
|
+
|
|
444
|
+
for graph_name in to_process:
|
|
445
|
+
if graph_name in graph_cache._cached_graphs:
|
|
446
|
+
continue
|
|
447
|
+
|
|
448
|
+
try:
|
|
449
|
+
cache_result = await anyio.to_thread.run_sync(
|
|
450
|
+
self.cache_graph_on_creation,
|
|
451
|
+
graph_name,
|
|
452
|
+
)
|
|
453
|
+
if cache_result:
|
|
454
|
+
cached_graphs.append(graph_name)
|
|
455
|
+
graph_cache._cached_graphs.add(graph_name)
|
|
456
|
+
|
|
457
|
+
for callback in graph_cache._cache_callbacks:
|
|
458
|
+
try:
|
|
459
|
+
result = callback(graph_name, cache_result)
|
|
460
|
+
if inspect.isawaitable(result):
|
|
461
|
+
await result
|
|
462
|
+
except Exception:
|
|
463
|
+
pass
|
|
464
|
+
except Exception as e:
|
|
465
|
+
logger.error(f"Error caching graph {graph_name}: {e}")
|
|
466
|
+
|
|
467
|
+
if cached_graphs and notify_progress:
|
|
468
|
+
await notify_progress(
|
|
469
|
+
float(total_lines) if total_lines > 0 else 1,
|
|
470
|
+
float(total_lines) if total_lines > 0 else 1,
|
|
471
|
+
f"{completed_label} completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}",
|
|
472
|
+
)
|
|
473
|
+
except Exception as e:
|
|
474
|
+
logger.error(f"Post-execution graph detection failed: {e}")
|
|
475
|
+
|
|
476
|
+
def _emit_graph_ready_task(
|
|
477
|
+
self,
|
|
478
|
+
*,
|
|
479
|
+
emit_graph_ready: bool,
|
|
480
|
+
graph_ready_initial: Optional[dict[str, str]],
|
|
481
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
482
|
+
graph_ready_task_id: Optional[str],
|
|
483
|
+
graph_ready_format: str,
|
|
484
|
+
) -> None:
|
|
485
|
+
if emit_graph_ready and graph_ready_initial is not None:
|
|
486
|
+
try:
|
|
487
|
+
asyncio.create_task(
|
|
488
|
+
self._emit_graph_ready_events(
|
|
489
|
+
graph_ready_initial,
|
|
490
|
+
notify_log,
|
|
491
|
+
graph_ready_task_id,
|
|
492
|
+
graph_ready_format,
|
|
493
|
+
)
|
|
494
|
+
)
|
|
495
|
+
except Exception as e:
|
|
496
|
+
logger.warning("graph_ready emission failed to start: %s", e)
|
|
497
|
+
|
|
498
|
+
async def _stream_smcl_log(
|
|
499
|
+
self,
|
|
500
|
+
*,
|
|
501
|
+
smcl_path: str,
|
|
502
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
503
|
+
done: anyio.Event,
|
|
504
|
+
on_chunk: Optional[Callable[[str], Awaitable[None]]] = None,
|
|
505
|
+
) -> None:
|
|
506
|
+
last_pos = 0
|
|
507
|
+
emitted_debug_chunks = 0
|
|
508
|
+
# Wait for Stata to create the SMCL file
|
|
509
|
+
while not done.is_set() and not os.path.exists(smcl_path):
|
|
510
|
+
await anyio.sleep(0.05)
|
|
511
|
+
|
|
512
|
+
try:
|
|
513
|
+
def _read_content() -> str:
|
|
514
|
+
try:
|
|
515
|
+
with open(smcl_path, "r", encoding="utf-8", errors="replace") as f:
|
|
516
|
+
f.seek(last_pos)
|
|
517
|
+
return f.read()
|
|
518
|
+
except PermissionError:
|
|
519
|
+
if os.name == "nt":
|
|
520
|
+
try:
|
|
521
|
+
res = subprocess.run(f'type "{smcl_path}"', shell=True, capture_output=True)
|
|
522
|
+
full_content = res.stdout.decode("utf-8", errors="replace")
|
|
523
|
+
if len(full_content) > last_pos:
|
|
524
|
+
return full_content[last_pos:]
|
|
525
|
+
return ""
|
|
526
|
+
except Exception:
|
|
527
|
+
return ""
|
|
528
|
+
return ""
|
|
529
|
+
except FileNotFoundError:
|
|
530
|
+
return ""
|
|
531
|
+
|
|
532
|
+
while not done.is_set():
|
|
533
|
+
chunk = await anyio.to_thread.run_sync(_read_content)
|
|
534
|
+
if chunk:
|
|
535
|
+
last_pos += len(chunk)
|
|
536
|
+
try:
|
|
537
|
+
await notify_log(chunk)
|
|
538
|
+
except Exception as exc:
|
|
539
|
+
logger.debug("notify_log failed: %s", exc)
|
|
540
|
+
if on_chunk is not None:
|
|
541
|
+
try:
|
|
542
|
+
await on_chunk(chunk)
|
|
543
|
+
except Exception as exc:
|
|
544
|
+
logger.debug("on_chunk callback failed: %s", exc)
|
|
545
|
+
await anyio.sleep(0.05)
|
|
546
|
+
|
|
547
|
+
chunk = await anyio.to_thread.run_sync(_read_content)
|
|
548
|
+
if on_chunk is not None:
|
|
549
|
+
# Final check even if last chunk is empty, to ensure
|
|
550
|
+
# graphs created at the very end are detected.
|
|
551
|
+
try:
|
|
552
|
+
await on_chunk(chunk or "")
|
|
553
|
+
except Exception as exc:
|
|
554
|
+
logger.debug("final on_chunk check failed: %s", exc)
|
|
555
|
+
|
|
556
|
+
if chunk:
|
|
557
|
+
last_pos += len(chunk)
|
|
558
|
+
try:
|
|
559
|
+
await notify_log(chunk)
|
|
560
|
+
except Exception as exc:
|
|
561
|
+
logger.debug("notify_log failed: %s", exc)
|
|
562
|
+
|
|
563
|
+
except Exception as e:
|
|
564
|
+
logger.warning(f"Log streaming failed: {e}")
|
|
565
|
+
|
|
566
|
+
def _run_streaming_blocking(
|
|
567
|
+
self,
|
|
568
|
+
*,
|
|
569
|
+
command: str,
|
|
570
|
+
tee: FileTeeIO,
|
|
571
|
+
cwd: Optional[str],
|
|
572
|
+
trace: bool,
|
|
573
|
+
echo: bool,
|
|
574
|
+
smcl_path: str,
|
|
575
|
+
smcl_log_name: str,
|
|
576
|
+
hold_attr: str,
|
|
577
|
+
require_smcl_log: bool = False,
|
|
578
|
+
) -> tuple[int, Optional[Exception]]:
|
|
579
|
+
rc = -1
|
|
580
|
+
exc: Optional[Exception] = None
|
|
581
|
+
with self._exec_lock:
|
|
582
|
+
self._is_executing = True
|
|
583
|
+
try:
|
|
584
|
+
from sfi import Scalar, SFIToolkit # Import SFI tools
|
|
585
|
+
with self._temp_cwd(cwd):
|
|
586
|
+
logger.debug(
|
|
587
|
+
"opening SMCL log name=%s path=%s cwd=%s",
|
|
588
|
+
smcl_log_name,
|
|
589
|
+
smcl_path,
|
|
590
|
+
os.getcwd(),
|
|
591
|
+
)
|
|
592
|
+
try:
|
|
593
|
+
log_opened = self._open_smcl_log(smcl_path, smcl_log_name, quiet=True)
|
|
594
|
+
except Exception as e:
|
|
595
|
+
log_opened = False
|
|
596
|
+
logger.warning("_open_smcl_log raised: %r", e)
|
|
597
|
+
logger.info("SMCL log_opened=%s path=%s", log_opened, smcl_path)
|
|
598
|
+
if require_smcl_log and not log_opened:
|
|
599
|
+
exc = RuntimeError("Failed to open SMCL log")
|
|
600
|
+
logger.error("SMCL log open failed for %s", smcl_path)
|
|
601
|
+
rc = 1
|
|
602
|
+
if exc is None:
|
|
603
|
+
try:
|
|
604
|
+
with self._redirect_io_streaming(tee, tee):
|
|
605
|
+
try:
|
|
606
|
+
if trace:
|
|
607
|
+
self.stata.run("set trace on")
|
|
608
|
+
logger.debug("running Stata command echo=%s: %s", echo, command)
|
|
609
|
+
ret = self.stata.run(command, echo=echo)
|
|
610
|
+
if ret:
|
|
611
|
+
logger.debug("stata.run output: %s", ret)
|
|
612
|
+
|
|
613
|
+
setattr(self, hold_attr, f"mcp_hold_{uuid.uuid4().hex[:8]}")
|
|
614
|
+
self.stata.run(
|
|
615
|
+
f"capture _return hold {getattr(self, hold_attr)}",
|
|
616
|
+
echo=False,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if isinstance(ret, str) and ret:
|
|
620
|
+
try:
|
|
621
|
+
tee.write(ret)
|
|
622
|
+
except Exception:
|
|
623
|
+
pass
|
|
624
|
+
try:
|
|
625
|
+
rc = self._get_rc_from_scalar(Scalar)
|
|
626
|
+
except Exception:
|
|
627
|
+
pass
|
|
628
|
+
except Exception as e:
|
|
629
|
+
exc = e
|
|
630
|
+
logger.error("stata.run failed: %r", e)
|
|
631
|
+
if rc in (-1, 0):
|
|
632
|
+
rc = 1
|
|
633
|
+
finally:
|
|
634
|
+
if trace:
|
|
635
|
+
try:
|
|
636
|
+
self.stata.run("set trace off")
|
|
637
|
+
except Exception:
|
|
638
|
+
pass
|
|
639
|
+
finally:
|
|
640
|
+
self._close_smcl_log(smcl_log_name)
|
|
641
|
+
self._restore_results_from_hold(hold_attr)
|
|
642
|
+
return rc, exc
|
|
643
|
+
# If we get here, SMCL log failed and we're required to stop.
|
|
644
|
+
return rc, exc
|
|
645
|
+
finally:
|
|
646
|
+
self._is_executing = False
|
|
647
|
+
return rc, exc
|
|
648
|
+
|
|
649
|
+
def _resolve_do_file_path(
|
|
650
|
+
self,
|
|
651
|
+
path: str,
|
|
652
|
+
cwd: Optional[str],
|
|
653
|
+
) -> tuple[Optional[str], Optional[str], Optional[CommandResponse]]:
|
|
654
|
+
if cwd is not None and not os.path.isdir(cwd):
|
|
655
|
+
return None, None, CommandResponse(
|
|
656
|
+
command=f'do "{path}"',
|
|
657
|
+
rc=601,
|
|
658
|
+
stdout="",
|
|
659
|
+
stderr=None,
|
|
660
|
+
success=False,
|
|
661
|
+
error=ErrorEnvelope(
|
|
662
|
+
message=f"cwd not found: {cwd}",
|
|
663
|
+
rc=601,
|
|
664
|
+
command=path,
|
|
665
|
+
),
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
effective_path = path
|
|
669
|
+
if cwd is not None and not os.path.isabs(path):
|
|
670
|
+
effective_path = os.path.abspath(os.path.join(cwd, path))
|
|
671
|
+
|
|
672
|
+
if not os.path.exists(effective_path):
|
|
673
|
+
return None, None, CommandResponse(
|
|
674
|
+
command=f'do "{effective_path}"',
|
|
675
|
+
rc=601,
|
|
676
|
+
stdout="",
|
|
677
|
+
stderr=None,
|
|
678
|
+
success=False,
|
|
679
|
+
error=ErrorEnvelope(
|
|
680
|
+
message=f"Do-file not found: {effective_path}",
|
|
681
|
+
rc=601,
|
|
682
|
+
command=effective_path,
|
|
683
|
+
),
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
path_for_stata = effective_path.replace("\\", "/")
|
|
687
|
+
command = f'do "{path_for_stata}"'
|
|
688
|
+
return effective_path, command, None
|
|
689
|
+
|
|
690
|
+
@contextmanager
|
|
691
|
+
def _smcl_log_capture(self) -> "Generator[Tuple[str, str], None, None]":
|
|
692
|
+
"""
|
|
693
|
+
Context manager that wraps command execution in a named SMCL log.
|
|
694
|
+
|
|
695
|
+
This runs alongside any user logs (named logs can coexist).
|
|
696
|
+
Yields (log_name, log_path) tuple for use within the context.
|
|
697
|
+
The SMCL file is NOT deleted automatically - caller should clean up.
|
|
698
|
+
|
|
699
|
+
Usage:
|
|
700
|
+
with self._smcl_log_capture() as (log_name, smcl_path):
|
|
701
|
+
self.stata.run(cmd)
|
|
702
|
+
# After context, read smcl_path for raw SMCL output
|
|
703
|
+
"""
|
|
704
|
+
# Use a unique name but DO NOT join start with mkstemp to avoid existing file locks.
|
|
705
|
+
# Stata will create the file.
|
|
706
|
+
smcl_path = self._create_smcl_log_path()
|
|
707
|
+
# Unique log name to avoid collisions with user logs
|
|
708
|
+
log_name = self._make_smcl_log_name()
|
|
709
|
+
|
|
710
|
+
try:
|
|
711
|
+
# Open named SMCL log (quietly to avoid polluting output)
|
|
712
|
+
log_opened = self._open_smcl_log(smcl_path, log_name, quiet=True)
|
|
713
|
+
if not log_opened:
|
|
714
|
+
# Still yield, consumer might see empty file or handle error,
|
|
715
|
+
# but we can't do much if Stata refuses to log.
|
|
716
|
+
pass
|
|
717
|
+
|
|
718
|
+
yield log_name, smcl_path
|
|
719
|
+
finally:
|
|
720
|
+
# Always close our named log
|
|
721
|
+
self._close_smcl_log(log_name)
|
|
722
|
+
|
|
723
|
+
def _read_smcl_file(self, path: str) -> str:
|
|
724
|
+
"""Read SMCL file contents, handling encoding issues and Windows file locks."""
|
|
725
|
+
try:
|
|
726
|
+
with open(path, 'r', encoding='utf-8', errors='replace') as f:
|
|
727
|
+
return f.read()
|
|
728
|
+
except PermissionError:
|
|
729
|
+
if os.name == "nt":
|
|
730
|
+
# Windows Fallback: Try to use 'type' command to bypass exclusive lock
|
|
731
|
+
try:
|
|
732
|
+
res = subprocess.run(f'type "{path}"', shell=True, capture_output=True)
|
|
733
|
+
if res.returncode == 0:
|
|
734
|
+
return res.stdout.decode('utf-8', errors='replace')
|
|
735
|
+
except Exception as e:
|
|
736
|
+
logger.debug(f"Combined fallback read failed: {e}")
|
|
737
|
+
logger.warning(f"Failed to read SMCL file {path} due to lock")
|
|
738
|
+
return ""
|
|
739
|
+
except Exception as e:
|
|
740
|
+
logger.warning(f"Failed to read SMCL file {path}: {e}")
|
|
741
|
+
return ""
|
|
742
|
+
|
|
743
|
+
def _extract_error_from_smcl(self, smcl_content: str, rc: int) -> Tuple[str, str]:
|
|
744
|
+
"""
|
|
745
|
+
Extract error message and context from raw SMCL output.
|
|
746
|
+
|
|
747
|
+
Uses {err} tags as the authoritative source for error detection.
|
|
748
|
+
|
|
749
|
+
Returns:
|
|
750
|
+
Tuple of (error_message, context_string)
|
|
751
|
+
"""
|
|
752
|
+
if not smcl_content:
|
|
753
|
+
return f"Stata error r({rc})", ""
|
|
754
|
+
|
|
755
|
+
lines = smcl_content.splitlines()
|
|
756
|
+
|
|
757
|
+
# Search backwards for {err} tags - they indicate error lines
|
|
758
|
+
error_lines = []
|
|
759
|
+
error_start_idx = -1
|
|
760
|
+
|
|
761
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
762
|
+
line = lines[i]
|
|
763
|
+
if '{err}' in line:
|
|
764
|
+
if error_start_idx == -1:
|
|
765
|
+
error_start_idx = i
|
|
766
|
+
# Walk backwards to find consecutive {err} lines
|
|
767
|
+
j = i
|
|
768
|
+
while j >= 0 and '{err}' in lines[j]:
|
|
769
|
+
error_lines.insert(0, lines[j])
|
|
770
|
+
j -= 1
|
|
771
|
+
break
|
|
772
|
+
|
|
773
|
+
if error_lines:
|
|
774
|
+
# Clean SMCL tags from error message
|
|
775
|
+
clean_lines = []
|
|
776
|
+
for line in error_lines:
|
|
777
|
+
# Remove SMCL tags but keep the text content
|
|
778
|
+
cleaned = re.sub(r'\{[^}]*\}', '', line).strip()
|
|
779
|
+
if cleaned:
|
|
780
|
+
clean_lines.append(cleaned)
|
|
781
|
+
|
|
782
|
+
error_msg = " ".join(clean_lines) or f"Stata error r({rc})"
|
|
783
|
+
|
|
784
|
+
# Context is everything from error start to end
|
|
785
|
+
context_start = max(0, error_start_idx - 5) # Include 5 lines before error
|
|
786
|
+
context = "\n".join(lines[context_start:])
|
|
787
|
+
|
|
788
|
+
return error_msg, context
|
|
789
|
+
|
|
790
|
+
# Fallback: no {err} found, return last 30 lines as context
|
|
791
|
+
context_start = max(0, len(lines) - 30)
|
|
792
|
+
context = "\n".join(lines[context_start:])
|
|
793
|
+
|
|
794
|
+
return f"Stata error r({rc})", context
|
|
795
|
+
|
|
796
|
+
def _parse_rc_from_smcl(self, smcl_content: str) -> Optional[int]:
|
|
797
|
+
"""Parse return code from SMCL content using specific structural patterns."""
|
|
798
|
+
if not smcl_content:
|
|
799
|
+
return None
|
|
800
|
+
|
|
801
|
+
# 1. Primary check: SMCL search tag {search r(N), ...}
|
|
802
|
+
# This is the most authoritative interactive indicator
|
|
803
|
+
matches = list(re.finditer(r'\{search r\((\d+)\)', smcl_content))
|
|
804
|
+
if matches:
|
|
805
|
+
try:
|
|
806
|
+
return int(matches[-1].group(1))
|
|
807
|
+
except Exception:
|
|
808
|
+
pass
|
|
809
|
+
|
|
810
|
+
# 2. Secondary check: Standalone r(N); pattern
|
|
811
|
+
# This appears at the end of command blocks
|
|
812
|
+
matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', smcl_content))
|
|
813
|
+
if matches:
|
|
814
|
+
try:
|
|
815
|
+
return int(matches[-1].group(1))
|
|
816
|
+
except Exception:
|
|
817
|
+
pass
|
|
818
|
+
|
|
819
|
+
return None
|
|
820
|
+
|
|
184
821
|
@staticmethod
|
|
185
822
|
def _create_graph_cache_callback(on_graph_cached, notify_log):
|
|
186
823
|
"""Create a standardized graph cache callback with proper error handling."""
|
|
@@ -203,6 +840,159 @@ class StataClient:
|
|
|
203
840
|
|
|
204
841
|
return graph_cache_callback
|
|
205
842
|
|
|
843
|
+
def _get_cached_graph_path(self, graph_name: str) -> Optional[str]:
|
|
844
|
+
if not hasattr(self, "_cache_lock") or not hasattr(self, "_preemptive_cache"):
|
|
845
|
+
return None
|
|
846
|
+
try:
|
|
847
|
+
with self._cache_lock:
|
|
848
|
+
cache_path = self._preemptive_cache.get(graph_name)
|
|
849
|
+
if not cache_path:
|
|
850
|
+
return None
|
|
851
|
+
|
|
852
|
+
# Double-check validity (e.g. signature match for current command)
|
|
853
|
+
if not self._is_cache_valid(graph_name, cache_path):
|
|
854
|
+
return None
|
|
855
|
+
|
|
856
|
+
return cache_path
|
|
857
|
+
except Exception:
|
|
858
|
+
return None
|
|
859
|
+
|
|
860
|
+
async def _emit_graph_ready_for_graphs(
|
|
861
|
+
self,
|
|
862
|
+
graph_names: List[str],
|
|
863
|
+
*,
|
|
864
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
865
|
+
task_id: Optional[str],
|
|
866
|
+
export_format: str,
|
|
867
|
+
graph_ready_initial: Optional[dict[str, str]],
|
|
868
|
+
) -> None:
|
|
869
|
+
if not graph_names:
|
|
870
|
+
return
|
|
871
|
+
fmt = (export_format or "svg").strip().lower()
|
|
872
|
+
for graph_name in graph_names:
|
|
873
|
+
signature = self._get_graph_signature(graph_name)
|
|
874
|
+
if graph_ready_initial is not None:
|
|
875
|
+
previous = graph_ready_initial.get(graph_name)
|
|
876
|
+
if previous is not None and previous == signature:
|
|
877
|
+
continue
|
|
878
|
+
try:
|
|
879
|
+
export_path = None
|
|
880
|
+
if fmt == "svg":
|
|
881
|
+
export_path = self._get_cached_graph_path(graph_name)
|
|
882
|
+
if not export_path:
|
|
883
|
+
export_path = await anyio.to_thread.run_sync(
|
|
884
|
+
lambda: self.export_graph(graph_name, format=fmt)
|
|
885
|
+
)
|
|
886
|
+
payload = {
|
|
887
|
+
"event": "graph_ready",
|
|
888
|
+
"task_id": task_id,
|
|
889
|
+
"graph": {
|
|
890
|
+
"name": graph_name,
|
|
891
|
+
"path": export_path,
|
|
892
|
+
"label": graph_name,
|
|
893
|
+
},
|
|
894
|
+
}
|
|
895
|
+
await notify_log(json.dumps(payload))
|
|
896
|
+
if graph_ready_initial is not None:
|
|
897
|
+
graph_ready_initial[graph_name] = signature
|
|
898
|
+
except Exception as e:
|
|
899
|
+
logger.warning("graph_ready export failed for %s: %s", graph_name, e)
|
|
900
|
+
|
|
901
|
+
async def _maybe_cache_graphs_on_chunk(
|
|
902
|
+
self,
|
|
903
|
+
*,
|
|
904
|
+
graph_cache: Optional[StreamingGraphCache],
|
|
905
|
+
emit_graph_ready: bool,
|
|
906
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
907
|
+
graph_ready_task_id: Optional[str],
|
|
908
|
+
graph_ready_format: str,
|
|
909
|
+
graph_ready_initial: Optional[dict[str, str]],
|
|
910
|
+
last_check: List[float],
|
|
911
|
+
force: bool = False,
|
|
912
|
+
) -> None:
|
|
913
|
+
if not graph_cache or not graph_cache.auto_cache:
|
|
914
|
+
return
|
|
915
|
+
if self._is_executing and not force:
|
|
916
|
+
# Skip polling if Stata is busy; it will block on _exec_lock anyway.
|
|
917
|
+
# During final check (force=True), we know it's safe because _run_streaming_blocking has finished.
|
|
918
|
+
return
|
|
919
|
+
now = time.monotonic()
|
|
920
|
+
if not force and last_check and now - last_check[0] < 0.25:
|
|
921
|
+
return
|
|
922
|
+
if last_check:
|
|
923
|
+
last_check[0] = now
|
|
924
|
+
try:
|
|
925
|
+
cached_names = await graph_cache.cache_detected_graphs_with_pystata()
|
|
926
|
+
except Exception as e:
|
|
927
|
+
logger.debug("graph_ready polling failed: %s", e)
|
|
928
|
+
return
|
|
929
|
+
if emit_graph_ready and cached_names:
|
|
930
|
+
await self._emit_graph_ready_for_graphs(
|
|
931
|
+
cached_names,
|
|
932
|
+
notify_log=notify_log,
|
|
933
|
+
task_id=graph_ready_task_id,
|
|
934
|
+
export_format=graph_ready_format,
|
|
935
|
+
graph_ready_initial=graph_ready_initial,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
async def _emit_graph_ready_events(
|
|
939
|
+
self,
|
|
940
|
+
initial_graphs: dict[str, str],
|
|
941
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
942
|
+
task_id: Optional[str],
|
|
943
|
+
export_format: str,
|
|
944
|
+
) -> None:
|
|
945
|
+
try:
|
|
946
|
+
current_graphs = list(self.list_graphs(force_refresh=True))
|
|
947
|
+
except Exception as e:
|
|
948
|
+
logger.warning("graph_ready: list_graphs failed: %s", e)
|
|
949
|
+
return
|
|
950
|
+
|
|
951
|
+
if not current_graphs:
|
|
952
|
+
return
|
|
953
|
+
|
|
954
|
+
for graph_name in current_graphs:
|
|
955
|
+
signature = self._get_graph_signature(graph_name)
|
|
956
|
+
previous = initial_graphs.get(graph_name)
|
|
957
|
+
if previous is not None and previous == signature:
|
|
958
|
+
continue
|
|
959
|
+
try:
|
|
960
|
+
export_path = None
|
|
961
|
+
if export_format == "svg":
|
|
962
|
+
export_path = self._get_cached_graph_path(graph_name)
|
|
963
|
+
|
|
964
|
+
if not export_path:
|
|
965
|
+
export_path = await anyio.to_thread.run_sync(
|
|
966
|
+
lambda: self.export_graph(graph_name, format=export_format)
|
|
967
|
+
)
|
|
968
|
+
payload = {
|
|
969
|
+
"event": "graph_ready",
|
|
970
|
+
"task_id": task_id,
|
|
971
|
+
"graph": {
|
|
972
|
+
"name": graph_name,
|
|
973
|
+
"path": export_path,
|
|
974
|
+
"label": graph_name,
|
|
975
|
+
},
|
|
976
|
+
}
|
|
977
|
+
await notify_log(json.dumps(payload))
|
|
978
|
+
initial_graphs[graph_name] = signature
|
|
979
|
+
except Exception as e:
|
|
980
|
+
logger.warning("graph_ready export failed for %s: %s", graph_name, e)
|
|
981
|
+
|
|
982
|
+
def _get_graph_signature(self, graph_name: str) -> str:
|
|
983
|
+
"""
|
|
984
|
+
Get a stable signature for a graph without calling Stata.
|
|
985
|
+
Consistent with GraphCreationDetector implementation.
|
|
986
|
+
"""
|
|
987
|
+
if not graph_name:
|
|
988
|
+
return ""
|
|
989
|
+
cmd_idx = getattr(self, "_command_idx", 0)
|
|
990
|
+
# Only include command index for default 'Graph' to detect modifications.
|
|
991
|
+
# For named graphs, we only want to detect them when they are new or renamed.
|
|
992
|
+
if graph_name.lower() == "graph":
|
|
993
|
+
return f"{graph_name}_{cmd_idx}"
|
|
994
|
+
return graph_name
|
|
995
|
+
|
|
206
996
|
def _request_break_in(self) -> None:
|
|
207
997
|
"""
|
|
208
998
|
Attempt to interrupt a running Stata command when cancellation is requested.
|
|
@@ -272,72 +1062,199 @@ class StataClient:
|
|
|
272
1062
|
finally:
|
|
273
1063
|
os.chdir(prev)
|
|
274
1064
|
|
|
1065
|
+
@contextmanager
|
|
1066
|
+
def _safe_redirect_fds(self):
|
|
1067
|
+
"""Redirects fd 1 (stdout) to fd 2 (stderr) at the OS level."""
|
|
1068
|
+
# Save original stdout fd
|
|
1069
|
+
try:
|
|
1070
|
+
stdout_fd = os.dup(1)
|
|
1071
|
+
except Exception:
|
|
1072
|
+
# Fallback if we can't dup (e.g. strange environment)
|
|
1073
|
+
yield
|
|
1074
|
+
return
|
|
1075
|
+
|
|
1076
|
+
try:
|
|
1077
|
+
# Redirect OS-level stdout to stderr
|
|
1078
|
+
os.dup2(2, 1)
|
|
1079
|
+
yield
|
|
1080
|
+
finally:
|
|
1081
|
+
# Restore stdout
|
|
1082
|
+
try:
|
|
1083
|
+
os.dup2(stdout_fd, 1)
|
|
1084
|
+
os.close(stdout_fd)
|
|
1085
|
+
except Exception:
|
|
1086
|
+
pass
|
|
1087
|
+
|
|
275
1088
|
def init(self):
|
|
276
1089
|
"""Initializes usage of pystata using cached discovery results."""
|
|
277
1090
|
if self._initialized:
|
|
278
1091
|
return
|
|
279
1092
|
|
|
1093
|
+
# Suppress any non-UTF8 banner output from PyStata on stdout, which breaks MCP stdio transport
|
|
1094
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
1095
|
+
|
|
280
1096
|
try:
|
|
281
1097
|
import stata_setup
|
|
282
1098
|
|
|
283
|
-
# Get discovered Stata
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
bin_dir = os.path.dirname(stata_exec_path)
|
|
290
|
-
if bin_dir:
|
|
291
|
-
candidates.append(bin_dir)
|
|
292
|
-
|
|
293
|
-
# 2. App Bundle: .../StataMP.app (macOS only)
|
|
294
|
-
curr = bin_dir
|
|
295
|
-
app_bundle = None
|
|
296
|
-
while len(curr) > 1:
|
|
297
|
-
if curr.endswith(".app"):
|
|
298
|
-
app_bundle = curr
|
|
299
|
-
break
|
|
300
|
-
parent = os.path.dirname(curr)
|
|
301
|
-
if parent == curr: # Reached root directory, prevent infinite loop on Windows
|
|
302
|
-
break
|
|
303
|
-
curr = parent
|
|
304
|
-
|
|
305
|
-
if app_bundle:
|
|
306
|
-
candidates.insert(0, os.path.dirname(app_bundle))
|
|
307
|
-
candidates.insert(1, app_bundle)
|
|
1099
|
+
# Get discovered Stata paths (cached from first call)
|
|
1100
|
+
discovery_candidates = _get_discovery_candidates()
|
|
1101
|
+
if not discovery_candidates:
|
|
1102
|
+
raise RuntimeError("No Stata candidates found during discovery")
|
|
1103
|
+
|
|
1104
|
+
logger.info("Initializing Stata engine (attempting up to %d candidate binaries)...", len(discovery_candidates))
|
|
308
1105
|
|
|
309
|
-
#
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
if c in seen:
|
|
314
|
-
continue
|
|
315
|
-
seen.add(c)
|
|
316
|
-
deduped.append(c)
|
|
317
|
-
candidates = deduped
|
|
1106
|
+
# Diagnostic: force faulthandler to output to stderr for C crashes
|
|
1107
|
+
import faulthandler
|
|
1108
|
+
faulthandler.enable(file=sys.stderr)
|
|
1109
|
+
import subprocess
|
|
318
1110
|
|
|
319
1111
|
success = False
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
1112
|
+
last_error = None
|
|
1113
|
+
chosen_exec: Optional[Tuple[str, str]] = None
|
|
1114
|
+
|
|
1115
|
+
for stata_exec_path, edition in discovery_candidates:
|
|
1116
|
+
candidates = []
|
|
1117
|
+
# Prefer the binary directory first (documented input for stata_setup)
|
|
1118
|
+
bin_dir = os.path.dirname(stata_exec_path)
|
|
1119
|
+
|
|
1120
|
+
# 2. App Bundle: .../StataMP.app (macOS only)
|
|
1121
|
+
curr = bin_dir
|
|
1122
|
+
app_bundle = None
|
|
1123
|
+
while len(curr) > 1:
|
|
1124
|
+
if curr.endswith(".app"):
|
|
1125
|
+
app_bundle = curr
|
|
1126
|
+
break
|
|
1127
|
+
parent = os.path.dirname(curr)
|
|
1128
|
+
if parent == curr:
|
|
1129
|
+
break
|
|
1130
|
+
curr = parent
|
|
1131
|
+
|
|
1132
|
+
ordered_candidates = []
|
|
1133
|
+
if app_bundle:
|
|
1134
|
+
# On macOS, the parent of the .app is often the correct install path
|
|
1135
|
+
# (e.g., /Applications/StataNow containing StataMP.app)
|
|
1136
|
+
parent_dir = os.path.dirname(app_bundle)
|
|
1137
|
+
if parent_dir and parent_dir != "/":
|
|
1138
|
+
ordered_candidates.append(parent_dir)
|
|
1139
|
+
ordered_candidates.append(app_bundle)
|
|
1140
|
+
|
|
1141
|
+
if bin_dir:
|
|
1142
|
+
ordered_candidates.append(bin_dir)
|
|
1143
|
+
|
|
1144
|
+
# Deduplicate preserving order
|
|
1145
|
+
seen = set()
|
|
1146
|
+
candidates = []
|
|
1147
|
+
for c in ordered_candidates:
|
|
1148
|
+
if c not in seen:
|
|
1149
|
+
seen.add(c)
|
|
1150
|
+
candidates.append(c)
|
|
1151
|
+
|
|
1152
|
+
for path in candidates:
|
|
1153
|
+
try:
|
|
1154
|
+
# 1. Pre-flight check in a subprocess to capture hard exits/crashes
|
|
1155
|
+
sys.stderr.write(f"[mcp_stata] DEBUG: Pre-flight check for path '{path}'\n")
|
|
1156
|
+
sys.stderr.flush()
|
|
1157
|
+
|
|
1158
|
+
preflight_code = f"""
|
|
1159
|
+
import sys
|
|
1160
|
+
import stata_setup
|
|
1161
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
1162
|
+
with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr):
|
|
1163
|
+
try:
|
|
1164
|
+
stata_setup.config({repr(path)}, {repr(edition)})
|
|
1165
|
+
from pystata import stata
|
|
1166
|
+
# Minimal verification of engine health
|
|
1167
|
+
stata.run('display 1', echo=False)
|
|
1168
|
+
print('PREFLIGHT_OK')
|
|
1169
|
+
except Exception as e:
|
|
1170
|
+
print(f'PREFLIGHT_FAIL: {{e}}', file=sys.stderr)
|
|
1171
|
+
sys.exit(1)
|
|
1172
|
+
"""
|
|
1173
|
+
|
|
1174
|
+
try:
|
|
1175
|
+
# Use shorter timeout for pre-flight if feasible,
|
|
1176
|
+
# but keep it safe for slow environments. 15s is usually enough for a ping.
|
|
1177
|
+
res = subprocess.run(
|
|
1178
|
+
[sys.executable, "-c", preflight_code],
|
|
1179
|
+
capture_output=True, text=True, timeout=20
|
|
1180
|
+
)
|
|
1181
|
+
if res.returncode != 0:
|
|
1182
|
+
sys.stderr.write(f"[mcp_stata] Pre-flight failed (rc={res.returncode}) for '{path}'\n")
|
|
1183
|
+
if res.stdout.strip():
|
|
1184
|
+
sys.stderr.write(f"--- Pre-flight stdout ---\n{res.stdout.strip()}\n")
|
|
1185
|
+
if res.stderr.strip():
|
|
1186
|
+
sys.stderr.write(f"--- Pre-flight stderr ---\n{res.stderr.strip()}\n")
|
|
1187
|
+
sys.stderr.flush()
|
|
1188
|
+
last_error = f"Pre-flight failed: {res.stdout.strip()} {res.stderr.strip()}"
|
|
1189
|
+
continue
|
|
1190
|
+
else:
|
|
1191
|
+
sys.stderr.write(f"[mcp_stata] Pre-flight succeeded for '{path}'. Proceeding to in-process init.\n")
|
|
1192
|
+
sys.stderr.flush()
|
|
1193
|
+
except Exception as pre_e:
|
|
1194
|
+
sys.stderr.write(f"[mcp_stata] Pre-flight execution error for '{path}': {repr(pre_e)}\n")
|
|
1195
|
+
sys.stderr.flush()
|
|
1196
|
+
last_error = pre_e
|
|
1197
|
+
continue
|
|
1198
|
+
|
|
1199
|
+
msg = f"[mcp_stata] DEBUG: In-process stata_setup.config('{path}', '{edition}')\n"
|
|
1200
|
+
sys.stderr.write(msg)
|
|
1201
|
+
sys.stderr.flush()
|
|
1202
|
+
# Redirect both sys.stdout/err AND the raw fds to our stderr pipe.
|
|
1203
|
+
with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
|
|
1204
|
+
stata_setup.config(path, edition)
|
|
1205
|
+
|
|
1206
|
+
sys.stderr.write(f"[mcp_stata] DEBUG: stata_setup.config succeeded for path: {path}\n")
|
|
1207
|
+
sys.stderr.flush()
|
|
1208
|
+
success = True
|
|
1209
|
+
chosen_exec = (stata_exec_path, edition)
|
|
1210
|
+
logger.info("stata_setup.config succeeded with path: %s", path)
|
|
1211
|
+
break
|
|
1212
|
+
except BaseException as e:
|
|
1213
|
+
last_error = e
|
|
1214
|
+
sys.stderr.write(f"[mcp_stata] WARNING: In-process stata_setup.config caught: {repr(e)}\n")
|
|
1215
|
+
sys.stderr.flush()
|
|
1216
|
+
logger.warning("stata_setup.config failed for path '%s': %s", path, e)
|
|
1217
|
+
if isinstance(e, SystemExit):
|
|
1218
|
+
break
|
|
1219
|
+
continue
|
|
1220
|
+
|
|
1221
|
+
if success:
|
|
1222
|
+
# Cache winning candidate for subsequent lookups
|
|
1223
|
+
global _discovery_result
|
|
1224
|
+
if chosen_exec:
|
|
1225
|
+
_discovery_result = chosen_exec
|
|
325
1226
|
break
|
|
326
|
-
except Exception:
|
|
327
|
-
continue
|
|
328
1227
|
|
|
329
1228
|
if not success:
|
|
330
|
-
|
|
331
|
-
f"stata_setup.config failed
|
|
332
|
-
f"
|
|
1229
|
+
error_msg = (
|
|
1230
|
+
f"stata_setup.config failed to initialize Stata. "
|
|
1231
|
+
f"Tried candidates: {discovery_candidates}. "
|
|
1232
|
+
f"Last error: {repr(last_error)}"
|
|
333
1233
|
)
|
|
1234
|
+
sys.stderr.write(f"[mcp_stata] ERROR: {error_msg}\n")
|
|
1235
|
+
sys.stderr.flush()
|
|
1236
|
+
logger.error(error_msg)
|
|
1237
|
+
raise RuntimeError(error_msg)
|
|
334
1238
|
|
|
335
1239
|
# Cache the binary path for later use (e.g., PNG export on Windows)
|
|
336
1240
|
self._stata_exec_path = os.path.abspath(stata_exec_path)
|
|
337
1241
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
1242
|
+
try:
|
|
1243
|
+
sys.stderr.write("[mcp_stata] DEBUG: Importing pystata and warming up...\n")
|
|
1244
|
+
sys.stderr.flush()
|
|
1245
|
+
with redirect_stdout(sys.stderr), redirect_stderr(sys.stderr), self._safe_redirect_fds():
|
|
1246
|
+
from pystata import stata # type: ignore[import-not-found]
|
|
1247
|
+
# Warm up the engine and swallow any late splash screen output
|
|
1248
|
+
stata.run("display 1", echo=False)
|
|
1249
|
+
self.stata = stata
|
|
1250
|
+
self._initialized = True
|
|
1251
|
+
sys.stderr.write("[mcp_stata] DEBUG: pystata warmed up successfully\n")
|
|
1252
|
+
sys.stderr.flush()
|
|
1253
|
+
except BaseException as e:
|
|
1254
|
+
sys.stderr.write(f"[mcp_stata] ERROR: Failed to load pystata or run initial command: {repr(e)}\n")
|
|
1255
|
+
sys.stderr.flush()
|
|
1256
|
+
logger.error("Failed to load pystata or run initial command: %s", e)
|
|
1257
|
+
raise
|
|
341
1258
|
|
|
342
1259
|
# Initialize list_graphs TTL cache
|
|
343
1260
|
self._list_graphs_cache = None
|
|
@@ -408,32 +1325,11 @@ class StataClient:
|
|
|
408
1325
|
|
|
409
1326
|
return pat.sub(repl, code)
|
|
410
1327
|
|
|
411
|
-
def _read_return_code(self) -> int:
|
|
412
|
-
"""Read the last Stata return code without mutating rc."""
|
|
413
|
-
try:
|
|
414
|
-
from sfi import Macro # type: ignore[import-not-found]
|
|
415
|
-
rc_val = Macro.getCValue("rc") # type: ignore[attr-defined]
|
|
416
|
-
if rc_val is not None:
|
|
417
|
-
return int(float(rc_val))
|
|
418
|
-
# If getCValue returns None, fall through to the alternative approach
|
|
419
|
-
except Exception:
|
|
420
|
-
pass
|
|
421
|
-
|
|
422
|
-
# Alternative approach: use a global macro
|
|
423
|
-
# CRITICAL: This must be done carefully to avoid mutating c(rc)
|
|
424
|
-
try:
|
|
425
|
-
self.stata.run("global MCP_RC = c(rc)")
|
|
426
|
-
from sfi import Macro as Macro2 # type: ignore[import-not-found]
|
|
427
|
-
rc_val = Macro2.getGlobal("MCP_RC")
|
|
428
|
-
return int(float(rc_val))
|
|
429
|
-
except Exception:
|
|
430
|
-
return -1
|
|
431
|
-
|
|
432
1328
|
def _get_rc_from_scalar(self, Scalar) -> int:
|
|
433
1329
|
"""Safely get return code, handling None values."""
|
|
434
1330
|
try:
|
|
435
1331
|
from sfi import Macro
|
|
436
|
-
rc_val = Macro.
|
|
1332
|
+
rc_val = Macro.getGlobal("_rc")
|
|
437
1333
|
if rc_val is None:
|
|
438
1334
|
return -1
|
|
439
1335
|
return int(float(rc_val))
|
|
@@ -441,12 +1337,27 @@ class StataClient:
|
|
|
441
1337
|
return -1
|
|
442
1338
|
|
|
443
1339
|
def _parse_rc_from_text(self, text: str) -> Optional[int]:
|
|
444
|
-
|
|
445
|
-
if
|
|
1340
|
+
"""Parse return code from plain text using structural patterns."""
|
|
1341
|
+
if not text:
|
|
1342
|
+
return None
|
|
1343
|
+
|
|
1344
|
+
# 1. Primary check: 'search r(N)' pattern (SMCL tag potentially stripped)
|
|
1345
|
+
matches = list(re.finditer(r'search r\((\d+)\)', text))
|
|
1346
|
+
if matches:
|
|
446
1347
|
try:
|
|
447
|
-
return int(
|
|
1348
|
+
return int(matches[-1].group(1))
|
|
448
1349
|
except Exception:
|
|
449
|
-
|
|
1350
|
+
pass
|
|
1351
|
+
|
|
1352
|
+
# 2. Secondary check: Standalone r(N); pattern
|
|
1353
|
+
# This appears at the end of command blocks
|
|
1354
|
+
matches = list(re.finditer(r'(?<!\w)r\((\d+)\);?', text))
|
|
1355
|
+
if matches:
|
|
1356
|
+
try:
|
|
1357
|
+
return int(matches[-1].group(1))
|
|
1358
|
+
except Exception:
|
|
1359
|
+
pass
|
|
1360
|
+
|
|
450
1361
|
return None
|
|
451
1362
|
|
|
452
1363
|
def _parse_line_from_text(self, text: str) -> Optional[int]:
|
|
@@ -458,11 +1369,104 @@ class StataClient:
|
|
|
458
1369
|
return None
|
|
459
1370
|
return None
|
|
460
1371
|
|
|
1372
|
+
def _read_log_backwards_until_error(self, path: str, max_bytes: int = 5_000_000) -> str:
|
|
1373
|
+
"""
|
|
1374
|
+
Read log file backwards in chunks, stopping when we find {err} tags or reach the start.
|
|
1375
|
+
|
|
1376
|
+
This is more efficient and robust than reading huge fixed tails, as we only read
|
|
1377
|
+
what we need to find the error.
|
|
1378
|
+
|
|
1379
|
+
Args:
|
|
1380
|
+
path: Path to the log file
|
|
1381
|
+
max_bytes: Maximum total bytes to read (safety limit, default 5MB)
|
|
1382
|
+
|
|
1383
|
+
Returns:
|
|
1384
|
+
The relevant portion of the log containing the error and context
|
|
1385
|
+
"""
|
|
1386
|
+
try:
|
|
1387
|
+
chunk_size = 50_000 # Read 50KB chunks at a time
|
|
1388
|
+
total_read = 0
|
|
1389
|
+
chunks = []
|
|
1390
|
+
|
|
1391
|
+
with open(path, 'rb') as f:
|
|
1392
|
+
# Get file size
|
|
1393
|
+
f.seek(0, os.SEEK_END)
|
|
1394
|
+
file_size = f.tell()
|
|
1395
|
+
|
|
1396
|
+
if file_size == 0:
|
|
1397
|
+
return ""
|
|
1398
|
+
|
|
1399
|
+
# Start from the end
|
|
1400
|
+
position = file_size
|
|
1401
|
+
|
|
1402
|
+
while position > 0 and total_read < max_bytes:
|
|
1403
|
+
# Calculate how much to read in this chunk
|
|
1404
|
+
read_size = min(chunk_size, position, max_bytes - total_read)
|
|
1405
|
+
position -= read_size
|
|
1406
|
+
|
|
1407
|
+
# Seek and read
|
|
1408
|
+
f.seek(position)
|
|
1409
|
+
chunk = f.read(read_size)
|
|
1410
|
+
chunks.insert(0, chunk)
|
|
1411
|
+
total_read += read_size
|
|
1412
|
+
|
|
1413
|
+
# Decode and check for error tags
|
|
1414
|
+
try:
|
|
1415
|
+
accumulated = b''.join(chunks).decode('utf-8', errors='replace')
|
|
1416
|
+
|
|
1417
|
+
# Check if we've found an error tag
|
|
1418
|
+
if '{err}' in accumulated:
|
|
1419
|
+
# Found it! Read one more chunk for context before the error
|
|
1420
|
+
if position > 0 and total_read < max_bytes:
|
|
1421
|
+
extra_read = min(chunk_size, position, max_bytes - total_read)
|
|
1422
|
+
position -= extra_read
|
|
1423
|
+
f.seek(position)
|
|
1424
|
+
extra_chunk = f.read(extra_read)
|
|
1425
|
+
chunks.insert(0, extra_chunk)
|
|
1426
|
+
|
|
1427
|
+
return b''.join(chunks).decode('utf-8', errors='replace')
|
|
1428
|
+
|
|
1429
|
+
except UnicodeDecodeError:
|
|
1430
|
+
# Continue reading if we hit a decode error (might be mid-character)
|
|
1431
|
+
continue
|
|
1432
|
+
|
|
1433
|
+
# Read everything we've accumulated
|
|
1434
|
+
return b''.join(chunks).decode('utf-8', errors='replace')
|
|
1435
|
+
|
|
1436
|
+
except Exception as e:
|
|
1437
|
+
logger.warning(f"Error reading log backwards: {e}")
|
|
1438
|
+
# Fallback to regular tail read
|
|
1439
|
+
return self._read_log_tail(path, 200_000)
|
|
1440
|
+
|
|
1441
|
+
def _read_log_tail_smart(self, path: str, rc: int, trace: bool = False) -> str:
|
|
1442
|
+
"""
|
|
1443
|
+
Smart log tail reader that adapts based on whether an error occurred.
|
|
1444
|
+
|
|
1445
|
+
- If rc == 0: Read normal tail (20KB without trace, 200KB with trace)
|
|
1446
|
+
- If rc != 0: Search backwards dynamically to find the error
|
|
1447
|
+
|
|
1448
|
+
Args:
|
|
1449
|
+
path: Path to the log file
|
|
1450
|
+
rc: Return code from Stata
|
|
1451
|
+
trace: Whether trace mode was enabled
|
|
1452
|
+
|
|
1453
|
+
Returns:
|
|
1454
|
+
Relevant log content
|
|
1455
|
+
"""
|
|
1456
|
+
if rc != 0:
|
|
1457
|
+
# Error occurred - search backwards for {err} tags
|
|
1458
|
+
return self._read_log_backwards_until_error(path)
|
|
1459
|
+
else:
|
|
1460
|
+
# Success - just read normal tail
|
|
1461
|
+
tail_size = 200_000 if trace else 20_000
|
|
1462
|
+
return self._read_log_tail(path, tail_size)
|
|
1463
|
+
|
|
461
1464
|
def _read_log_tail(self, path: str, max_chars: int) -> str:
|
|
462
1465
|
try:
|
|
463
1466
|
with open(path, "rb") as f:
|
|
464
1467
|
f.seek(0, os.SEEK_END)
|
|
465
1468
|
size = f.tell()
|
|
1469
|
+
|
|
466
1470
|
if size <= 0:
|
|
467
1471
|
return ""
|
|
468
1472
|
read_size = min(size, max_chars)
|
|
@@ -472,6 +1476,98 @@ class StataClient:
|
|
|
472
1476
|
except Exception:
|
|
473
1477
|
return ""
|
|
474
1478
|
|
|
1479
|
+
def _build_combined_log(
|
|
1480
|
+
self,
|
|
1481
|
+
tail: TailBuffer,
|
|
1482
|
+
path: str,
|
|
1483
|
+
rc: int,
|
|
1484
|
+
trace: bool,
|
|
1485
|
+
exc: Optional[Exception],
|
|
1486
|
+
) -> str:
|
|
1487
|
+
tail_text = tail.get_value()
|
|
1488
|
+
log_tail = self._read_log_tail_smart(path, rc, trace)
|
|
1489
|
+
if log_tail and len(log_tail) > len(tail_text):
|
|
1490
|
+
tail_text = log_tail
|
|
1491
|
+
return (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
1492
|
+
|
|
1493
|
+
def _truncate_command_output(
|
|
1494
|
+
self,
|
|
1495
|
+
result: CommandResponse,
|
|
1496
|
+
max_output_lines: Optional[int],
|
|
1497
|
+
) -> CommandResponse:
|
|
1498
|
+
if max_output_lines is None or not result.stdout:
|
|
1499
|
+
return result
|
|
1500
|
+
lines = result.stdout.splitlines()
|
|
1501
|
+
if len(lines) <= max_output_lines:
|
|
1502
|
+
return result
|
|
1503
|
+
truncated_lines = lines[:max_output_lines]
|
|
1504
|
+
truncated_lines.append(
|
|
1505
|
+
f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)"
|
|
1506
|
+
)
|
|
1507
|
+
truncated_stdout = "\n".join(truncated_lines)
|
|
1508
|
+
if hasattr(result, "model_copy"):
|
|
1509
|
+
return result.model_copy(update={"stdout": truncated_stdout})
|
|
1510
|
+
return result.copy(update={"stdout": truncated_stdout})
|
|
1511
|
+
|
|
1512
|
+
def _run_plain_capture(self, code: str) -> str:
|
|
1513
|
+
"""
|
|
1514
|
+
Run a Stata command while capturing output using a named SMCL log.
|
|
1515
|
+
This is the most reliable way to capture output (like return list)
|
|
1516
|
+
without interfering with user logs or being affected by stdout redirection issues.
|
|
1517
|
+
"""
|
|
1518
|
+
if not self._initialized:
|
|
1519
|
+
self.init()
|
|
1520
|
+
|
|
1521
|
+
with self._exec_lock:
|
|
1522
|
+
hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
|
|
1523
|
+
# Hold results BEFORE opening the capture log
|
|
1524
|
+
self.stata.run(f"capture _return hold {hold_name}", echo=False)
|
|
1525
|
+
|
|
1526
|
+
try:
|
|
1527
|
+
with self._smcl_log_capture() as (log_name, smcl_path):
|
|
1528
|
+
# Restore results INSIDE the capture log so return list can see them
|
|
1529
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False)
|
|
1530
|
+
try:
|
|
1531
|
+
self.stata.run(code, echo=True)
|
|
1532
|
+
except Exception:
|
|
1533
|
+
pass
|
|
1534
|
+
except Exception:
|
|
1535
|
+
# Cleanup hold if log capture failed to open
|
|
1536
|
+
self.stata.run(f"capture _return drop {hold_name}", echo=False)
|
|
1537
|
+
content = ""
|
|
1538
|
+
smcl_path = None
|
|
1539
|
+
else:
|
|
1540
|
+
# Read SMCL content and convert to text
|
|
1541
|
+
content = self._read_smcl_file(smcl_path)
|
|
1542
|
+
# Remove the temp file
|
|
1543
|
+
self._safe_unlink(smcl_path)
|
|
1544
|
+
|
|
1545
|
+
return self._smcl_to_text(content)
|
|
1546
|
+
|
|
1547
|
+
def _count_do_file_lines(self, path: str) -> int:
|
|
1548
|
+
"""
|
|
1549
|
+
Count the number of executable lines in a .do file for progress inference.
|
|
1550
|
+
|
|
1551
|
+
Blank lines and comment-only lines (starting with * or //) are ignored.
|
|
1552
|
+
"""
|
|
1553
|
+
try:
|
|
1554
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
1555
|
+
lines = f.read().splitlines()
|
|
1556
|
+
except Exception:
|
|
1557
|
+
return 0
|
|
1558
|
+
|
|
1559
|
+
total = 0
|
|
1560
|
+
for line in lines:
|
|
1561
|
+
s = line.strip()
|
|
1562
|
+
if not s:
|
|
1563
|
+
continue
|
|
1564
|
+
if s.startswith("*"):
|
|
1565
|
+
continue
|
|
1566
|
+
if s.startswith("//"):
|
|
1567
|
+
continue
|
|
1568
|
+
total += 1
|
|
1569
|
+
return total
|
|
1570
|
+
|
|
475
1571
|
def _smcl_to_text(self, smcl: str) -> str:
|
|
476
1572
|
"""Convert simple SMCL markup into plain text for LLM-friendly help."""
|
|
477
1573
|
# First, keep inline directive content if present (e.g., {bf:word} -> word)
|
|
@@ -523,6 +1619,7 @@ class StataClient:
|
|
|
523
1619
|
if not self._initialized:
|
|
524
1620
|
self.init()
|
|
525
1621
|
|
|
1622
|
+
self._increment_command_idx()
|
|
526
1623
|
# Rewrite graph names with special characters to internal aliases
|
|
527
1624
|
code = self._maybe_rewrite_graph_name_in_command(code)
|
|
528
1625
|
|
|
@@ -530,17 +1627,44 @@ class StataClient:
|
|
|
530
1627
|
error_buffer = StringIO()
|
|
531
1628
|
rc = 0
|
|
532
1629
|
sys_error = None
|
|
1630
|
+
error_envelope = None
|
|
1631
|
+
smcl_content = ""
|
|
1632
|
+
smcl_path = None
|
|
533
1633
|
|
|
534
1634
|
with self._exec_lock:
|
|
535
1635
|
try:
|
|
536
|
-
from sfi import Scalar, SFIToolkit
|
|
1636
|
+
from sfi import Scalar, SFIToolkit
|
|
537
1637
|
with self._temp_cwd(cwd):
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
1638
|
+
# Create SMCL log for authoritative output capture
|
|
1639
|
+
# Use shorter unique path to avoid Windows path issues
|
|
1640
|
+
smcl_path = self._create_smcl_log_path(prefix="mcp_", max_hex=16, base_dir=cwd)
|
|
1641
|
+
log_name = self._make_smcl_log_name()
|
|
1642
|
+
self._open_smcl_log(smcl_path, log_name)
|
|
1643
|
+
|
|
1644
|
+
try:
|
|
1645
|
+
with self._redirect_io(output_buffer, error_buffer):
|
|
1646
|
+
try:
|
|
1647
|
+
if trace:
|
|
1648
|
+
self.stata.run("set trace on")
|
|
1649
|
+
|
|
1650
|
+
# Run the user code
|
|
1651
|
+
self.stata.run(code, echo=echo)
|
|
1652
|
+
|
|
1653
|
+
# Hold results IMMEDIATELY to prevent clobbering by cleanup
|
|
1654
|
+
self._hold_name = f"mcp_hold_{uuid.uuid4().hex[:8]}"
|
|
1655
|
+
self.stata.run(f"capture _return hold {self._hold_name}", echo=False)
|
|
1656
|
+
|
|
1657
|
+
finally:
|
|
1658
|
+
if trace:
|
|
1659
|
+
try:
|
|
1660
|
+
self.stata.run("set trace off")
|
|
1661
|
+
except Exception:
|
|
1662
|
+
pass
|
|
1663
|
+
finally:
|
|
1664
|
+
# Close SMCL log AFTER output redirection
|
|
1665
|
+
self._close_smcl_log(log_name)
|
|
1666
|
+
# Restore and capture results while still inside the lock
|
|
1667
|
+
self._restore_results_from_hold("_hold_name")
|
|
544
1668
|
|
|
545
1669
|
except Exception as e:
|
|
546
1670
|
sys_error = str(e)
|
|
@@ -548,36 +1672,66 @@ class StataClient:
|
|
|
548
1672
|
parsed_rc = self._parse_rc_from_text(sys_error)
|
|
549
1673
|
rc = parsed_rc if parsed_rc is not None else 1
|
|
550
1674
|
|
|
1675
|
+
# Read SMCL content as the authoritative source
|
|
1676
|
+
if smcl_path:
|
|
1677
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
1678
|
+
# Clean up SMCL file
|
|
1679
|
+
self._safe_unlink(smcl_path)
|
|
1680
|
+
|
|
551
1681
|
stdout_content = output_buffer.getvalue()
|
|
552
1682
|
stderr_content = error_buffer.getvalue()
|
|
553
|
-
full_log = stdout_content + "\n" + stderr_content
|
|
554
1683
|
|
|
555
|
-
#
|
|
556
|
-
if rc
|
|
557
|
-
parsed_rc = self.
|
|
558
|
-
if parsed_rc is not None:
|
|
1684
|
+
# If RC wasn't captured or is generic, try to parse from SMCL
|
|
1685
|
+
if rc in (0, 1, -1) and smcl_content:
|
|
1686
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
1687
|
+
if parsed_rc is not None and parsed_rc != 0:
|
|
559
1688
|
rc = parsed_rc
|
|
1689
|
+
elif rc == -1:
|
|
1690
|
+
rc = 0
|
|
1691
|
+
|
|
1692
|
+
# If stdout is empty but SMCL has content AND command succeeded, use SMCL as stdout
|
|
1693
|
+
# This handles cases where Stata writes to log but not to redirected stdout
|
|
1694
|
+
# For errors, we keep stdout empty and error info goes to ErrorEnvelope
|
|
1695
|
+
if rc == 0 and not stdout_content and smcl_content:
|
|
1696
|
+
# Convert SMCL to plain text for stdout
|
|
1697
|
+
stdout_content = self._smcl_to_text(smcl_content)
|
|
560
1698
|
|
|
561
|
-
error_envelope = None
|
|
562
1699
|
if rc != 0:
|
|
563
1700
|
if sys_error:
|
|
564
1701
|
msg = sys_error
|
|
565
|
-
|
|
1702
|
+
context = sys_error
|
|
566
1703
|
else:
|
|
567
|
-
# Extract error
|
|
568
|
-
msg, context = self.
|
|
569
|
-
|
|
570
|
-
error_envelope = ErrorEnvelope(
|
|
1704
|
+
# Extract error from SMCL (authoritative source)
|
|
1705
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
1706
|
+
|
|
1707
|
+
error_envelope = ErrorEnvelope(
|
|
1708
|
+
message=msg,
|
|
1709
|
+
rc=rc,
|
|
1710
|
+
context=context,
|
|
1711
|
+
snippet=smcl_content[-800:] if smcl_content else (stdout_content + stderr_content)[-800:],
|
|
1712
|
+
smcl_output=smcl_content # Include raw SMCL for debugging
|
|
1713
|
+
)
|
|
1714
|
+
stderr_content = context
|
|
571
1715
|
|
|
572
|
-
|
|
1716
|
+
resp = CommandResponse(
|
|
573
1717
|
command=code,
|
|
574
1718
|
rc=rc,
|
|
575
1719
|
stdout=stdout_content,
|
|
576
1720
|
stderr=stderr_content,
|
|
577
1721
|
success=(rc == 0),
|
|
578
1722
|
error=error_envelope,
|
|
1723
|
+
log_path=smcl_path if smcl_path else None,
|
|
1724
|
+
smcl_output=smcl_content,
|
|
579
1725
|
)
|
|
580
1726
|
|
|
1727
|
+
# Capture results immediately after execution, INSIDE the lock
|
|
1728
|
+
try:
|
|
1729
|
+
self._last_results = self.get_stored_results(force_fresh=True)
|
|
1730
|
+
except Exception:
|
|
1731
|
+
self._last_results = None
|
|
1732
|
+
|
|
1733
|
+
return resp
|
|
1734
|
+
|
|
581
1735
|
def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
|
|
582
1736
|
"""Execute Stata code while leaving stdout/stderr alone."""
|
|
583
1737
|
if not self._initialized:
|
|
@@ -595,10 +1749,8 @@ class StataClient:
|
|
|
595
1749
|
ret = self.stata.run(code, echo=echo)
|
|
596
1750
|
if isinstance(ret, str) and ret:
|
|
597
1751
|
ret_text = ret
|
|
1752
|
+
|
|
598
1753
|
|
|
599
|
-
# Robust RC check even for no-capture
|
|
600
|
-
rc = self._read_return_code()
|
|
601
|
-
|
|
602
1754
|
except Exception as e:
|
|
603
1755
|
exc = e
|
|
604
1756
|
rc = 1
|
|
@@ -631,23 +1783,115 @@ class StataClient:
|
|
|
631
1783
|
error=error,
|
|
632
1784
|
)
|
|
633
1785
|
|
|
1786
|
+
def _exec_no_capture_silent(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
|
|
1787
|
+
"""Execute Stata code while suppressing stdout/stderr output."""
|
|
1788
|
+
if not self._initialized:
|
|
1789
|
+
self.init()
|
|
1790
|
+
|
|
1791
|
+
exc: Optional[Exception] = None
|
|
1792
|
+
ret_text: Optional[str] = None
|
|
1793
|
+
rc = 0
|
|
1794
|
+
|
|
1795
|
+
with self._exec_lock:
|
|
1796
|
+
try:
|
|
1797
|
+
from sfi import Scalar # Import SFI tools
|
|
1798
|
+
if trace:
|
|
1799
|
+
self.stata.run("set trace on")
|
|
1800
|
+
output_buf = StringIO()
|
|
1801
|
+
with redirect_stdout(output_buf), redirect_stderr(output_buf):
|
|
1802
|
+
ret = self.stata.run(code, echo=echo)
|
|
1803
|
+
if isinstance(ret, str) and ret:
|
|
1804
|
+
ret_text = ret
|
|
1805
|
+
except Exception as e:
|
|
1806
|
+
exc = e
|
|
1807
|
+
rc = 1
|
|
1808
|
+
finally:
|
|
1809
|
+
if trace:
|
|
1810
|
+
try:
|
|
1811
|
+
self.stata.run("set trace off")
|
|
1812
|
+
except Exception as e:
|
|
1813
|
+
logger.warning("Failed to turn off Stata trace mode: %s", e)
|
|
1814
|
+
|
|
1815
|
+
stdout = ""
|
|
1816
|
+
stderr = ""
|
|
1817
|
+
success = rc == 0 and exc is None
|
|
1818
|
+
error = None
|
|
1819
|
+
if not success:
|
|
1820
|
+
msg = str(exc) if exc else f"Stata error r({rc})"
|
|
1821
|
+
error = ErrorEnvelope(
|
|
1822
|
+
message=msg,
|
|
1823
|
+
rc=rc,
|
|
1824
|
+
command=code,
|
|
1825
|
+
stdout=ret_text,
|
|
1826
|
+
)
|
|
1827
|
+
|
|
1828
|
+
return CommandResponse(
|
|
1829
|
+
command=code,
|
|
1830
|
+
rc=rc,
|
|
1831
|
+
stdout=stdout,
|
|
1832
|
+
stderr=None,
|
|
1833
|
+
success=success,
|
|
1834
|
+
error=error,
|
|
1835
|
+
)
|
|
1836
|
+
|
|
1837
|
+
def exec_lightweight(self, code: str) -> CommandResponse:
|
|
1838
|
+
"""
|
|
1839
|
+
Executes a command using simple stdout redirection (no SMCL logs).
|
|
1840
|
+
Much faster on Windows as it avoids FS operations.
|
|
1841
|
+
LIMITED: Does not support error envelopes or complex return code parsing.
|
|
1842
|
+
"""
|
|
1843
|
+
if not self._initialized:
|
|
1844
|
+
self.init()
|
|
1845
|
+
|
|
1846
|
+
code = self._maybe_rewrite_graph_name_in_command(code)
|
|
1847
|
+
|
|
1848
|
+
output_buffer = StringIO()
|
|
1849
|
+
error_buffer = StringIO()
|
|
1850
|
+
rc = 0
|
|
1851
|
+
exc = None
|
|
1852
|
+
|
|
1853
|
+
with self._exec_lock:
|
|
1854
|
+
with self._redirect_io(output_buffer, error_buffer):
|
|
1855
|
+
try:
|
|
1856
|
+
self.stata.run(code, echo=False)
|
|
1857
|
+
except Exception as e:
|
|
1858
|
+
exc = e
|
|
1859
|
+
rc = 1
|
|
1860
|
+
|
|
1861
|
+
stdout = output_buffer.getvalue()
|
|
1862
|
+
stderr = error_buffer.getvalue()
|
|
1863
|
+
|
|
1864
|
+
return CommandResponse(
|
|
1865
|
+
command=code,
|
|
1866
|
+
rc=rc,
|
|
1867
|
+
stdout=stdout,
|
|
1868
|
+
stderr=stderr if not exc else str(exc),
|
|
1869
|
+
success=(rc == 0),
|
|
1870
|
+
error=None
|
|
1871
|
+
)
|
|
1872
|
+
|
|
634
1873
|
async def run_command_streaming(
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
1874
|
+
self,
|
|
1875
|
+
code: str,
|
|
1876
|
+
*,
|
|
1877
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
1878
|
+
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
|
|
1879
|
+
echo: bool = True,
|
|
1880
|
+
trace: bool = False,
|
|
1881
|
+
max_output_lines: Optional[int] = None,
|
|
1882
|
+
cwd: Optional[str] = None,
|
|
1883
|
+
auto_cache_graphs: bool = False,
|
|
1884
|
+
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
|
|
1885
|
+
emit_graph_ready: bool = False,
|
|
1886
|
+
graph_ready_task_id: Optional[str] = None,
|
|
1887
|
+
graph_ready_format: str = "svg",
|
|
1888
|
+
) -> CommandResponse:
|
|
647
1889
|
if not self._initialized:
|
|
648
1890
|
self.init()
|
|
649
1891
|
|
|
650
1892
|
code = self._maybe_rewrite_graph_name_in_command(code)
|
|
1893
|
+
auto_cache_graphs = auto_cache_graphs or emit_graph_ready
|
|
1894
|
+
total_lines = 0 # Commands (not do-files) do not have line-based progress
|
|
651
1895
|
|
|
652
1896
|
if cwd is not None and not os.path.isdir(cwd):
|
|
653
1897
|
return CommandResponse(
|
|
@@ -665,136 +1909,171 @@ class StataClient:
|
|
|
665
1909
|
|
|
666
1910
|
start_time = time.time()
|
|
667
1911
|
exc: Optional[Exception] = None
|
|
1912
|
+
smcl_content = ""
|
|
1913
|
+
smcl_path = None
|
|
668
1914
|
|
|
669
1915
|
# Setup streaming graph cache if enabled
|
|
670
|
-
graph_cache =
|
|
671
|
-
if auto_cache_graphs:
|
|
672
|
-
graph_cache = StreamingGraphCache(self, auto_cache=True)
|
|
673
|
-
|
|
674
|
-
graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
|
|
675
|
-
|
|
676
|
-
graph_cache.add_cache_callback(graph_cache_callback)
|
|
1916
|
+
graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
|
|
677
1917
|
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
encoding="utf-8",
|
|
684
|
-
errors="replace",
|
|
685
|
-
buffering=1,
|
|
686
|
-
)
|
|
687
|
-
log_path = log_file.name
|
|
688
|
-
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
689
|
-
tee = FileTeeIO(log_file, tail)
|
|
1918
|
+
_log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
|
|
1919
|
+
|
|
1920
|
+
# Create SMCL log path for authoritative output capture
|
|
1921
|
+
smcl_path = self._create_smcl_log_path(base_dir=cwd)
|
|
1922
|
+
smcl_log_name = self._make_smcl_log_name()
|
|
690
1923
|
|
|
691
1924
|
# Inform the MCP client immediately where to read/tail the output.
|
|
692
|
-
await notify_log(json.dumps({"event": "log_path", "path":
|
|
1925
|
+
await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
|
|
693
1926
|
|
|
694
1927
|
rc = -1
|
|
1928
|
+
path_for_stata = code.replace("\\", "/")
|
|
1929
|
+
command = f'{path_for_stata}'
|
|
695
1930
|
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
1931
|
+
graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
|
|
1932
|
+
|
|
1933
|
+
# Increment AFTER capture so detected modifications are based on state BEFORE this command
|
|
1934
|
+
self._increment_command_idx()
|
|
1935
|
+
|
|
1936
|
+
graph_poll_state = [0.0]
|
|
1937
|
+
|
|
1938
|
+
async def on_chunk_for_graphs(_chunk: str) -> None:
|
|
1939
|
+
# Background the graph check so we don't block SMCL streaming or task completion
|
|
1940
|
+
asyncio.create_task(
|
|
1941
|
+
self._maybe_cache_graphs_on_chunk(
|
|
1942
|
+
graph_cache=graph_cache,
|
|
1943
|
+
emit_graph_ready=emit_graph_ready,
|
|
1944
|
+
notify_log=notify_log,
|
|
1945
|
+
graph_ready_task_id=graph_ready_task_id,
|
|
1946
|
+
graph_ready_format=graph_ready_format,
|
|
1947
|
+
graph_ready_initial=graph_ready_initial,
|
|
1948
|
+
last_check=graph_poll_state,
|
|
1949
|
+
)
|
|
1950
|
+
)
|
|
714
1951
|
|
|
715
|
-
|
|
716
|
-
rc = self._read_return_code()
|
|
1952
|
+
done = anyio.Event()
|
|
717
1953
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
1954
|
+
try:
|
|
1955
|
+
async with anyio.create_task_group() as tg:
|
|
1956
|
+
async def stream_smcl() -> None:
|
|
1957
|
+
try:
|
|
1958
|
+
await self._stream_smcl_log(
|
|
1959
|
+
smcl_path=smcl_path,
|
|
1960
|
+
notify_log=notify_log,
|
|
1961
|
+
done=done,
|
|
1962
|
+
on_chunk=on_chunk_for_graphs if graph_cache else None,
|
|
1963
|
+
)
|
|
1964
|
+
except Exception as exc:
|
|
1965
|
+
logger.debug("SMCL streaming failed: %s", exc)
|
|
1966
|
+
|
|
1967
|
+
tg.start_soon(stream_smcl)
|
|
1968
|
+
|
|
1969
|
+
if notify_progress is not None:
|
|
1970
|
+
if total_lines > 0:
|
|
1971
|
+
await notify_progress(0, float(total_lines), f"Executing command: 0/{total_lines}")
|
|
1972
|
+
else:
|
|
1973
|
+
await notify_progress(0, None, "Running command")
|
|
1974
|
+
|
|
1975
|
+
try:
|
|
1976
|
+
run_blocking = lambda: self._run_streaming_blocking(
|
|
1977
|
+
command=command,
|
|
1978
|
+
tee=tee,
|
|
1979
|
+
cwd=cwd,
|
|
1980
|
+
trace=trace,
|
|
1981
|
+
echo=echo,
|
|
1982
|
+
smcl_path=smcl_path,
|
|
1983
|
+
smcl_log_name=smcl_log_name,
|
|
1984
|
+
hold_attr="_hold_name_stream",
|
|
1985
|
+
require_smcl_log=True,
|
|
1986
|
+
)
|
|
1987
|
+
try:
|
|
1988
|
+
rc, exc = await anyio.to_thread.run_sync(
|
|
1989
|
+
run_blocking,
|
|
1990
|
+
abandon_on_cancel=True,
|
|
1991
|
+
)
|
|
1992
|
+
except TypeError:
|
|
1993
|
+
rc, exc = await anyio.to_thread.run_sync(run_blocking)
|
|
1994
|
+
except Exception as e:
|
|
1995
|
+
exc = e
|
|
1996
|
+
if rc in (-1, 0):
|
|
1997
|
+
rc = 1
|
|
1998
|
+
except get_cancelled_exc_class():
|
|
1999
|
+
self._request_break_in()
|
|
2000
|
+
await self._wait_for_stata_stop()
|
|
2001
|
+
raise
|
|
727
2002
|
finally:
|
|
728
|
-
|
|
2003
|
+
done.set()
|
|
2004
|
+
tee.close()
|
|
2005
|
+
except* Exception as exc_group:
|
|
2006
|
+
logger.debug("SMCL streaming task group failed: %s", exc_group)
|
|
729
2007
|
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
await notify_progress(0, None, "Running Stata command")
|
|
733
|
-
|
|
734
|
-
await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
|
|
735
|
-
except get_cancelled_exc_class():
|
|
736
|
-
# Best-effort cancellation: signal Stata to break, wait briefly, then propagate.
|
|
737
|
-
self._request_break_in()
|
|
738
|
-
await self._wait_for_stata_stop()
|
|
739
|
-
raise
|
|
740
|
-
finally:
|
|
741
|
-
tee.close()
|
|
2008
|
+
# Read SMCL content as the authoritative source
|
|
2009
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
742
2010
|
|
|
743
|
-
# Cache detected graphs after command completes
|
|
744
2011
|
if graph_cache:
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
await notify_progress(1, 1, f"Command completed. Cached {len(cached_graphs)} graphs: {', '.join(cached_graphs)}")
|
|
754
|
-
except Exception as e:
|
|
755
|
-
logger.warning(f"Failed to cache detected graphs: {e}")
|
|
2012
|
+
asyncio.create_task(
|
|
2013
|
+
self._cache_new_graphs(
|
|
2014
|
+
graph_cache,
|
|
2015
|
+
notify_progress=notify_progress,
|
|
2016
|
+
total_lines=total_lines,
|
|
2017
|
+
completed_label="Command",
|
|
2018
|
+
)
|
|
2019
|
+
)
|
|
756
2020
|
|
|
757
|
-
|
|
758
|
-
log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
|
|
759
|
-
if log_tail and len(log_tail) > len(tail_text):
|
|
760
|
-
tail_text = log_tail
|
|
761
|
-
combined = (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
2021
|
+
combined = self._build_combined_log(tail, smcl_path, rc, trace, exc)
|
|
762
2022
|
|
|
2023
|
+
# Use SMCL content as primary source for RC detection
|
|
2024
|
+
if not exc or rc in (1, -1):
|
|
2025
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
2026
|
+
if parsed_rc is not None and parsed_rc != 0:
|
|
2027
|
+
rc = parsed_rc
|
|
2028
|
+
elif rc in (-1, 0, 1): # Also check text if rc is generic 1 or unset
|
|
2029
|
+
parsed_rc_text = self._parse_rc_from_text(combined)
|
|
2030
|
+
if parsed_rc_text is not None:
|
|
2031
|
+
rc = parsed_rc_text
|
|
2032
|
+
elif rc == -1:
|
|
2033
|
+
rc = 0 # Default to success if no error trace found
|
|
2034
|
+
|
|
763
2035
|
success = (rc == 0 and exc is None)
|
|
2036
|
+
stderr_final = None
|
|
764
2037
|
error = None
|
|
765
2038
|
|
|
766
2039
|
if not success:
|
|
767
|
-
# Use
|
|
768
|
-
|
|
769
|
-
|
|
2040
|
+
# Use SMCL as authoritative source for error extraction
|
|
2041
|
+
if smcl_content:
|
|
2042
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
2043
|
+
else:
|
|
2044
|
+
# Fallback to combined log
|
|
2045
|
+
msg, context = self._extract_error_and_context(combined, rc)
|
|
2046
|
+
|
|
770
2047
|
error = ErrorEnvelope(
|
|
771
2048
|
message=msg,
|
|
772
2049
|
context=context,
|
|
773
2050
|
rc=rc,
|
|
774
|
-
command=
|
|
2051
|
+
command=command,
|
|
775
2052
|
log_path=log_path,
|
|
776
|
-
snippet=
|
|
2053
|
+
snippet=smcl_content[-800:] if smcl_content else combined[-800:],
|
|
2054
|
+
smcl_output=smcl_content,
|
|
777
2055
|
)
|
|
2056
|
+
stderr_final = context
|
|
778
2057
|
|
|
779
2058
|
duration = time.time() - start_time
|
|
780
|
-
code_preview = code.replace("\n", "\\n")
|
|
781
2059
|
logger.info(
|
|
782
2060
|
"stata.run(stream) rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
|
|
783
2061
|
rc,
|
|
784
2062
|
success,
|
|
785
2063
|
trace,
|
|
786
2064
|
duration * 1000,
|
|
787
|
-
|
|
2065
|
+
code.replace("\n", "\\n")[:120],
|
|
788
2066
|
)
|
|
789
2067
|
|
|
790
2068
|
result = CommandResponse(
|
|
791
2069
|
command=code,
|
|
792
2070
|
rc=rc,
|
|
793
2071
|
stdout="",
|
|
794
|
-
stderr=
|
|
2072
|
+
stderr=stderr_final,
|
|
795
2073
|
log_path=log_path,
|
|
796
2074
|
success=success,
|
|
797
2075
|
error=error,
|
|
2076
|
+
smcl_output=smcl_content,
|
|
798
2077
|
)
|
|
799
2078
|
|
|
800
2079
|
if notify_progress is not None:
|
|
@@ -802,69 +2081,25 @@ class StataClient:
|
|
|
802
2081
|
|
|
803
2082
|
return result
|
|
804
2083
|
|
|
805
|
-
def _count_do_file_lines(self, path: str) -> int:
|
|
806
|
-
try:
|
|
807
|
-
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
808
|
-
lines = f.read().splitlines()
|
|
809
|
-
except Exception:
|
|
810
|
-
return 0
|
|
811
|
-
|
|
812
|
-
total = 0
|
|
813
|
-
for line in lines:
|
|
814
|
-
s = line.strip()
|
|
815
|
-
if not s:
|
|
816
|
-
continue
|
|
817
|
-
if s.startswith("*"):
|
|
818
|
-
continue
|
|
819
|
-
if s.startswith("//"):
|
|
820
|
-
continue
|
|
821
|
-
total += 1
|
|
822
|
-
return total
|
|
823
|
-
|
|
824
2084
|
async def run_do_file_streaming(
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
success=False,
|
|
844
|
-
error=ErrorEnvelope(
|
|
845
|
-
message=f"cwd not found: {cwd}",
|
|
846
|
-
rc=601,
|
|
847
|
-
command=path,
|
|
848
|
-
),
|
|
849
|
-
)
|
|
850
|
-
|
|
851
|
-
effective_path = path
|
|
852
|
-
if cwd is not None and not os.path.isabs(path):
|
|
853
|
-
effective_path = os.path.abspath(os.path.join(cwd, path))
|
|
854
|
-
|
|
855
|
-
if not os.path.exists(effective_path):
|
|
856
|
-
return CommandResponse(
|
|
857
|
-
command=f'do "{effective_path}"',
|
|
858
|
-
rc=601,
|
|
859
|
-
stdout="",
|
|
860
|
-
stderr=None,
|
|
861
|
-
success=False,
|
|
862
|
-
error=ErrorEnvelope(
|
|
863
|
-
message=f"Do-file not found: {effective_path}",
|
|
864
|
-
rc=601,
|
|
865
|
-
command=effective_path,
|
|
866
|
-
),
|
|
867
|
-
)
|
|
2085
|
+
self,
|
|
2086
|
+
path: str,
|
|
2087
|
+
*,
|
|
2088
|
+
notify_log: Callable[[str], Awaitable[None]],
|
|
2089
|
+
notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
|
|
2090
|
+
echo: bool = True,
|
|
2091
|
+
trace: bool = False,
|
|
2092
|
+
max_output_lines: Optional[int] = None,
|
|
2093
|
+
cwd: Optional[str] = None,
|
|
2094
|
+
auto_cache_graphs: bool = False,
|
|
2095
|
+
on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
|
|
2096
|
+
emit_graph_ready: bool = False,
|
|
2097
|
+
graph_ready_task_id: Optional[str] = None,
|
|
2098
|
+
graph_ready_format: str = "svg",
|
|
2099
|
+
) -> CommandResponse:
|
|
2100
|
+
effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
|
|
2101
|
+
if error_response is not None:
|
|
2102
|
+
return error_response
|
|
868
2103
|
|
|
869
2104
|
total_lines = self._count_do_file_lines(effective_path)
|
|
870
2105
|
executed_lines = 0
|
|
@@ -893,174 +2128,145 @@ class StataClient:
|
|
|
893
2128
|
if not self._initialized:
|
|
894
2129
|
self.init()
|
|
895
2130
|
|
|
2131
|
+
auto_cache_graphs = auto_cache_graphs or emit_graph_ready
|
|
2132
|
+
|
|
896
2133
|
start_time = time.time()
|
|
897
2134
|
exc: Optional[Exception] = None
|
|
2135
|
+
smcl_content = ""
|
|
2136
|
+
smcl_path = None
|
|
898
2137
|
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
if auto_cache_graphs:
|
|
902
|
-
graph_cache = StreamingGraphCache(self, auto_cache=True)
|
|
903
|
-
|
|
904
|
-
graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
|
|
905
|
-
|
|
906
|
-
graph_cache.add_cache_callback(graph_cache_callback)
|
|
2138
|
+
graph_cache = self._init_streaming_graph_cache(auto_cache_graphs, on_graph_cached, notify_log)
|
|
2139
|
+
_log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
|
|
907
2140
|
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
delete=False,
|
|
912
|
-
mode="w",
|
|
913
|
-
encoding="utf-8",
|
|
914
|
-
errors="replace",
|
|
915
|
-
buffering=1,
|
|
916
|
-
)
|
|
917
|
-
log_path = log_file.name
|
|
918
|
-
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
919
|
-
tee = FileTeeIO(log_file, tail)
|
|
2141
|
+
base_dir = cwd or os.path.dirname(effective_path)
|
|
2142
|
+
smcl_path = self._create_smcl_log_path(base_dir=base_dir)
|
|
2143
|
+
smcl_log_name = self._make_smcl_log_name()
|
|
920
2144
|
|
|
921
2145
|
# Inform the MCP client immediately where to read/tail the output.
|
|
922
|
-
await notify_log(json.dumps({"event": "log_path", "path":
|
|
2146
|
+
await notify_log(json.dumps({"event": "log_path", "path": smcl_path}))
|
|
923
2147
|
|
|
924
2148
|
rc = -1
|
|
925
|
-
|
|
926
|
-
|
|
2149
|
+
graph_ready_initial = self._capture_graph_state(graph_cache, emit_graph_ready)
|
|
2150
|
+
|
|
2151
|
+
# Increment AFTER capture
|
|
2152
|
+
self._increment_command_idx()
|
|
2153
|
+
|
|
2154
|
+
graph_poll_state = [0.0]
|
|
2155
|
+
|
|
2156
|
+
async def on_chunk_for_graphs(_chunk: str) -> None:
|
|
2157
|
+
# Background the graph check so we don't block SMCL streaming or task completion
|
|
2158
|
+
asyncio.create_task(
|
|
2159
|
+
self._maybe_cache_graphs_on_chunk(
|
|
2160
|
+
graph_cache=graph_cache,
|
|
2161
|
+
emit_graph_ready=emit_graph_ready,
|
|
2162
|
+
notify_log=notify_log,
|
|
2163
|
+
graph_ready_task_id=graph_ready_task_id,
|
|
2164
|
+
graph_ready_format=graph_ready_format,
|
|
2165
|
+
graph_ready_initial=graph_ready_initial,
|
|
2166
|
+
last_check=graph_poll_state,
|
|
2167
|
+
)
|
|
2168
|
+
)
|
|
927
2169
|
|
|
928
|
-
|
|
2170
|
+
on_chunk_callback = on_chunk_for_progress
|
|
929
2171
|
if graph_cache:
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
except Exception as e:
|
|
934
|
-
logger.debug(f"Failed to capture initial graph state: {e}")
|
|
935
|
-
graph_cache._initial_graphs = set()
|
|
936
|
-
|
|
937
|
-
def _run_blocking() -> None:
|
|
938
|
-
nonlocal rc, exc
|
|
939
|
-
with self._exec_lock:
|
|
940
|
-
# Set execution flag to prevent recursive Stata calls
|
|
941
|
-
self._is_executing = True
|
|
942
|
-
try:
|
|
943
|
-
from sfi import Scalar, SFIToolkit # Import SFI tools
|
|
944
|
-
with self._temp_cwd(cwd):
|
|
945
|
-
with self._redirect_io_streaming(tee, tee):
|
|
946
|
-
try:
|
|
947
|
-
if trace:
|
|
948
|
-
self.stata.run("set trace on")
|
|
949
|
-
ret = self.stata.run(command, echo=echo)
|
|
950
|
-
# Some PyStata builds return output as a string rather than printing.
|
|
951
|
-
if isinstance(ret, str) and ret:
|
|
952
|
-
try:
|
|
953
|
-
tee.write(ret)
|
|
954
|
-
except Exception:
|
|
955
|
-
pass
|
|
956
|
-
|
|
957
|
-
# ROBUST DETECTION & OUTPUT
|
|
958
|
-
rc = self._read_return_code()
|
|
959
|
-
|
|
960
|
-
except Exception as e:
|
|
961
|
-
exc = e
|
|
962
|
-
if rc == 0: rc = 1
|
|
963
|
-
finally:
|
|
964
|
-
if trace:
|
|
965
|
-
try: self.stata.run("set trace off")
|
|
966
|
-
except: pass
|
|
967
|
-
finally:
|
|
968
|
-
# Clear execution flag
|
|
969
|
-
self._is_executing = False
|
|
2172
|
+
async def on_chunk_callback(chunk: str) -> None:
|
|
2173
|
+
await on_chunk_for_progress(chunk)
|
|
2174
|
+
await on_chunk_for_graphs(chunk)
|
|
970
2175
|
|
|
971
2176
|
done = anyio.Event()
|
|
972
2177
|
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
await anyio.sleep(0.05)
|
|
986
|
-
|
|
987
|
-
f.seek(last_pos)
|
|
988
|
-
chunk = f.read()
|
|
989
|
-
if chunk:
|
|
990
|
-
await on_chunk_for_progress(chunk)
|
|
991
|
-
except Exception:
|
|
992
|
-
return
|
|
993
|
-
|
|
994
|
-
async with anyio.create_task_group() as tg:
|
|
995
|
-
tg.start_soon(_monitor_progress_from_log)
|
|
996
|
-
|
|
997
|
-
if notify_progress is not None:
|
|
998
|
-
if total_lines > 0:
|
|
999
|
-
await notify_progress(0, float(total_lines), f"Executing do-file: 0/{total_lines}")
|
|
1000
|
-
else:
|
|
1001
|
-
await notify_progress(0, None, "Running do-file")
|
|
1002
|
-
|
|
1003
|
-
try:
|
|
1004
|
-
await anyio.to_thread.run_sync(_run_blocking, abandon_on_cancel=True)
|
|
1005
|
-
except get_cancelled_exc_class():
|
|
1006
|
-
self._request_break_in()
|
|
1007
|
-
await self._wait_for_stata_stop()
|
|
1008
|
-
raise
|
|
1009
|
-
finally:
|
|
1010
|
-
done.set()
|
|
1011
|
-
tee.close()
|
|
2178
|
+
try:
|
|
2179
|
+
async with anyio.create_task_group() as tg:
|
|
2180
|
+
async def stream_smcl() -> None:
|
|
2181
|
+
try:
|
|
2182
|
+
await self._stream_smcl_log(
|
|
2183
|
+
smcl_path=smcl_path,
|
|
2184
|
+
notify_log=notify_log,
|
|
2185
|
+
done=done,
|
|
2186
|
+
on_chunk=on_chunk_callback,
|
|
2187
|
+
)
|
|
2188
|
+
except Exception as exc:
|
|
2189
|
+
logger.debug("SMCL streaming failed: %s", exc)
|
|
1012
2190
|
|
|
1013
|
-
|
|
1014
|
-
if graph_cache and graph_cache.auto_cache:
|
|
1015
|
-
try:
|
|
1016
|
-
# [Existing graph cache logic kept identical]
|
|
1017
|
-
cached_graphs = []
|
|
1018
|
-
initial_graphs = getattr(graph_cache, '_initial_graphs', set())
|
|
1019
|
-
current_graphs = set(self.list_graphs())
|
|
1020
|
-
new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
|
|
2191
|
+
tg.start_soon(stream_smcl)
|
|
1021
2192
|
|
|
1022
|
-
if
|
|
1023
|
-
|
|
2193
|
+
if notify_progress is not None:
|
|
2194
|
+
if total_lines > 0:
|
|
2195
|
+
await notify_progress(0, float(total_lines), f"Executing do-file: 0/{total_lines}")
|
|
2196
|
+
else:
|
|
2197
|
+
await notify_progress(0, None, "Running do-file")
|
|
1024
2198
|
|
|
1025
|
-
|
|
2199
|
+
try:
|
|
2200
|
+
run_blocking = lambda: self._run_streaming_blocking(
|
|
2201
|
+
command=command,
|
|
2202
|
+
tee=tee,
|
|
2203
|
+
cwd=cwd,
|
|
2204
|
+
trace=trace,
|
|
2205
|
+
echo=echo,
|
|
2206
|
+
smcl_path=smcl_path,
|
|
2207
|
+
smcl_log_name=smcl_log_name,
|
|
2208
|
+
hold_attr="_hold_name_do",
|
|
2209
|
+
require_smcl_log=True,
|
|
2210
|
+
)
|
|
1026
2211
|
try:
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
2212
|
+
rc, exc = await anyio.to_thread.run_sync(
|
|
2213
|
+
run_blocking,
|
|
2214
|
+
abandon_on_cancel=True,
|
|
1030
2215
|
)
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
float(total_lines) if total_lines > 0 else 1,
|
|
1047
|
-
f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
|
|
1048
|
-
)
|
|
1049
|
-
except Exception as e:
|
|
1050
|
-
logger.error(f"Post-execution graph detection failed: {e}")
|
|
2216
|
+
except TypeError:
|
|
2217
|
+
rc, exc = await anyio.to_thread.run_sync(run_blocking)
|
|
2218
|
+
except Exception as e:
|
|
2219
|
+
exc = e
|
|
2220
|
+
if rc in (-1, 0):
|
|
2221
|
+
rc = 1
|
|
2222
|
+
except get_cancelled_exc_class():
|
|
2223
|
+
self._request_break_in()
|
|
2224
|
+
await self._wait_for_stata_stop()
|
|
2225
|
+
raise
|
|
2226
|
+
finally:
|
|
2227
|
+
done.set()
|
|
2228
|
+
tee.close()
|
|
2229
|
+
except* Exception as exc_group:
|
|
2230
|
+
logger.debug("SMCL streaming task group failed: %s", exc_group)
|
|
1051
2231
|
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
2232
|
+
# Read SMCL content as the authoritative source
|
|
2233
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
2234
|
+
|
|
2235
|
+
if graph_cache:
|
|
2236
|
+
asyncio.create_task(
|
|
2237
|
+
self._cache_new_graphs(
|
|
2238
|
+
graph_cache,
|
|
2239
|
+
notify_progress=notify_progress,
|
|
2240
|
+
total_lines=total_lines,
|
|
2241
|
+
completed_label="Do-file",
|
|
2242
|
+
)
|
|
2243
|
+
)
|
|
2244
|
+
|
|
2245
|
+
combined = self._build_combined_log(tail, log_path, rc, trace, exc)
|
|
1057
2246
|
|
|
2247
|
+
# Use SMCL content as primary source for RC detection
|
|
2248
|
+
if not exc or rc in (1, -1):
|
|
2249
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
2250
|
+
if parsed_rc is not None and parsed_rc != 0:
|
|
2251
|
+
rc = parsed_rc
|
|
2252
|
+
elif rc in (-1, 0, 1):
|
|
2253
|
+
parsed_rc_text = self._parse_rc_from_text(combined)
|
|
2254
|
+
if parsed_rc_text is not None:
|
|
2255
|
+
rc = parsed_rc_text
|
|
2256
|
+
elif rc == -1:
|
|
2257
|
+
rc = 0 # Default to success if no error found
|
|
2258
|
+
|
|
1058
2259
|
success = (rc == 0 and exc is None)
|
|
2260
|
+
stderr_final = None
|
|
1059
2261
|
error = None
|
|
1060
2262
|
|
|
1061
2263
|
if not success:
|
|
1062
|
-
#
|
|
1063
|
-
|
|
2264
|
+
# Use SMCL as authoritative source for error extraction
|
|
2265
|
+
if smcl_content:
|
|
2266
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
2267
|
+
else:
|
|
2268
|
+
# Fallback to combined log
|
|
2269
|
+
msg, context = self._extract_error_and_context(combined, rc)
|
|
1064
2270
|
|
|
1065
2271
|
error = ErrorEnvelope(
|
|
1066
2272
|
message=msg,
|
|
@@ -1068,8 +2274,10 @@ class StataClient:
|
|
|
1068
2274
|
rc=rc,
|
|
1069
2275
|
command=command,
|
|
1070
2276
|
log_path=log_path,
|
|
1071
|
-
snippet=combined[-800:]
|
|
2277
|
+
snippet=smcl_content[-800:] if smcl_content else combined[-800:],
|
|
2278
|
+
smcl_output=smcl_content,
|
|
1072
2279
|
)
|
|
2280
|
+
stderr_final = context
|
|
1073
2281
|
|
|
1074
2282
|
duration = time.time() - start_time
|
|
1075
2283
|
logger.info(
|
|
@@ -1085,10 +2293,11 @@ class StataClient:
|
|
|
1085
2293
|
command=command,
|
|
1086
2294
|
rc=rc,
|
|
1087
2295
|
stdout="",
|
|
1088
|
-
stderr=
|
|
2296
|
+
stderr=stderr_final,
|
|
1089
2297
|
log_path=log_path,
|
|
1090
2298
|
success=success,
|
|
1091
2299
|
error=error,
|
|
2300
|
+
smcl_output=smcl_content,
|
|
1092
2301
|
)
|
|
1093
2302
|
|
|
1094
2303
|
if notify_progress is not None:
|
|
@@ -1110,22 +2319,7 @@ class StataClient:
|
|
|
1110
2319
|
"""
|
|
1111
2320
|
result = self._exec_with_capture(code, echo=echo, trace=trace, cwd=cwd)
|
|
1112
2321
|
|
|
1113
|
-
|
|
1114
|
-
if max_output_lines is not None and result.stdout:
|
|
1115
|
-
lines = result.stdout.splitlines()
|
|
1116
|
-
if len(lines) > max_output_lines:
|
|
1117
|
-
truncated_lines = lines[:max_output_lines]
|
|
1118
|
-
truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
|
|
1119
|
-
result = CommandResponse(
|
|
1120
|
-
command=result.command,
|
|
1121
|
-
rc=result.rc,
|
|
1122
|
-
stdout="\n".join(truncated_lines),
|
|
1123
|
-
stderr=result.stderr,
|
|
1124
|
-
success=result.success,
|
|
1125
|
-
error=result.error,
|
|
1126
|
-
)
|
|
1127
|
-
|
|
1128
|
-
return result
|
|
2322
|
+
return self._truncate_command_output(result, max_output_lines)
|
|
1129
2323
|
|
|
1130
2324
|
def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
|
|
1131
2325
|
"""Returns valid JSON-serializable data."""
|
|
@@ -1135,17 +2329,18 @@ class StataClient:
|
|
|
1135
2329
|
if count > self.MAX_DATA_ROWS:
|
|
1136
2330
|
count = self.MAX_DATA_ROWS
|
|
1137
2331
|
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
2332
|
+
with self._exec_lock:
|
|
2333
|
+
try:
|
|
2334
|
+
# Use pystata integration to retrieve data
|
|
2335
|
+
df = self.stata.pdataframe_from_data()
|
|
1141
2336
|
|
|
1142
|
-
|
|
1143
|
-
|
|
2337
|
+
# Slice
|
|
2338
|
+
sliced = df.iloc[start : start + count]
|
|
1144
2339
|
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
2340
|
+
# Convert to dict
|
|
2341
|
+
return sliced.to_dict(orient="records")
|
|
2342
|
+
except Exception as e:
|
|
2343
|
+
return [{"error": f"Failed to retrieve data: {e}"}]
|
|
1149
2344
|
|
|
1150
2345
|
def list_variables(self) -> List[Dict[str, str]]:
|
|
1151
2346
|
"""Returns list of variables with labels."""
|
|
@@ -1155,17 +2350,18 @@ class StataClient:
|
|
|
1155
2350
|
# We can use sfi to be efficient
|
|
1156
2351
|
from sfi import Data # type: ignore[import-not-found]
|
|
1157
2352
|
vars_info = []
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
2353
|
+
with self._exec_lock:
|
|
2354
|
+
for i in range(Data.getVarCount()):
|
|
2355
|
+
var_index = i # 0-based
|
|
2356
|
+
name = Data.getVarName(var_index)
|
|
2357
|
+
label = Data.getVarLabel(var_index)
|
|
2358
|
+
type_str = Data.getVarType(var_index) # Returns int
|
|
2359
|
+
|
|
2360
|
+
vars_info.append({
|
|
2361
|
+
"name": name,
|
|
2362
|
+
"label": label,
|
|
2363
|
+
"type": str(type_str),
|
|
2364
|
+
})
|
|
1169
2365
|
return vars_info
|
|
1170
2366
|
|
|
1171
2367
|
def get_dataset_state(self) -> Dict[str, Any]:
|
|
@@ -1175,24 +2371,28 @@ class StataClient:
|
|
|
1175
2371
|
|
|
1176
2372
|
from sfi import Data, Macro # type: ignore[import-not-found]
|
|
1177
2373
|
|
|
1178
|
-
|
|
1179
|
-
|
|
2374
|
+
with self._exec_lock:
|
|
2375
|
+
n = int(Data.getObsTotal())
|
|
2376
|
+
k = int(Data.getVarCount())
|
|
1180
2377
|
|
|
1181
|
-
frame = "default"
|
|
1182
|
-
sortlist = ""
|
|
1183
|
-
changed = False
|
|
1184
|
-
try:
|
|
1185
|
-
frame = str(Macro.getCValue("frame") or "default")
|
|
1186
|
-
except Exception:
|
|
1187
2378
|
frame = "default"
|
|
1188
|
-
try:
|
|
1189
|
-
sortlist = str(Macro.getCValue("sortlist") or "")
|
|
1190
|
-
except Exception:
|
|
1191
2379
|
sortlist = ""
|
|
1192
|
-
try:
|
|
1193
|
-
changed = bool(int(float(Macro.getCValue("changed") or "0")))
|
|
1194
|
-
except Exception:
|
|
1195
2380
|
changed = False
|
|
2381
|
+
try:
|
|
2382
|
+
frame = str(Macro.getGlobal("frame") or "default")
|
|
2383
|
+
except Exception:
|
|
2384
|
+
logger.debug("Failed to get 'frame' macro", exc_info=True)
|
|
2385
|
+
frame = "default"
|
|
2386
|
+
try:
|
|
2387
|
+
sortlist = str(Macro.getGlobal("sortlist") or "")
|
|
2388
|
+
except Exception:
|
|
2389
|
+
logger.debug("Failed to get 'sortlist' macro", exc_info=True)
|
|
2390
|
+
sortlist = ""
|
|
2391
|
+
try:
|
|
2392
|
+
changed = bool(int(float(Macro.getGlobal("changed") or "0")))
|
|
2393
|
+
except Exception:
|
|
2394
|
+
logger.debug("Failed to get 'changed' macro", exc_info=True)
|
|
2395
|
+
changed = False
|
|
1196
2396
|
|
|
1197
2397
|
return {"frame": frame, "n": n, "k": k, "sortlist": sortlist, "changed": changed}
|
|
1198
2398
|
|
|
@@ -1206,11 +2406,12 @@ class StataClient:
|
|
|
1206
2406
|
from sfi import Data # type: ignore[import-not-found]
|
|
1207
2407
|
|
|
1208
2408
|
out: Dict[str, int] = {}
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
2409
|
+
with self._exec_lock:
|
|
2410
|
+
for i in range(int(Data.getVarCount())):
|
|
2411
|
+
try:
|
|
2412
|
+
out[str(Data.getVarName(i))] = i
|
|
2413
|
+
except Exception:
|
|
2414
|
+
continue
|
|
1214
2415
|
return out
|
|
1215
2416
|
|
|
1216
2417
|
def list_variables_rich(self) -> List[Dict[str, Any]]:
|
|
@@ -1340,6 +2541,96 @@ class StataClient:
|
|
|
1340
2541
|
"truncated_cells": truncated_cells,
|
|
1341
2542
|
}
|
|
1342
2543
|
|
|
2544
|
+
def get_arrow_stream(
|
|
2545
|
+
self,
|
|
2546
|
+
*,
|
|
2547
|
+
offset: int,
|
|
2548
|
+
limit: int,
|
|
2549
|
+
vars: List[str],
|
|
2550
|
+
include_obs_no: bool,
|
|
2551
|
+
obs_indices: Optional[List[int]] = None,
|
|
2552
|
+
) -> bytes:
|
|
2553
|
+
"""
|
|
2554
|
+
Returns an Apache Arrow IPC stream (as bytes) for the requested data page.
|
|
2555
|
+
Uses Polars if available (faster), falls back to Pandas.
|
|
2556
|
+
"""
|
|
2557
|
+
if not self._initialized:
|
|
2558
|
+
self.init()
|
|
2559
|
+
|
|
2560
|
+
import pyarrow as pa
|
|
2561
|
+
from sfi import Data # type: ignore[import-not-found]
|
|
2562
|
+
|
|
2563
|
+
use_polars = _get_polars_available()
|
|
2564
|
+
if use_polars:
|
|
2565
|
+
import polars as pl
|
|
2566
|
+
else:
|
|
2567
|
+
import pandas as pd
|
|
2568
|
+
|
|
2569
|
+
state = self.get_dataset_state()
|
|
2570
|
+
n = int(state.get("n", 0) or 0)
|
|
2571
|
+
k = int(state.get("k", 0) or 0)
|
|
2572
|
+
if k == 0 and n == 0:
|
|
2573
|
+
raise RuntimeError("No data in memory")
|
|
2574
|
+
|
|
2575
|
+
var_map = self._get_var_index_map()
|
|
2576
|
+
for v in vars:
|
|
2577
|
+
if v not in var_map:
|
|
2578
|
+
raise ValueError(f"Invalid variable: {v}")
|
|
2579
|
+
|
|
2580
|
+
# Determine observations to fetch
|
|
2581
|
+
if obs_indices is None:
|
|
2582
|
+
start = offset
|
|
2583
|
+
end = min(offset + limit, n)
|
|
2584
|
+
obs_list = list(range(start, end)) if start < n else []
|
|
2585
|
+
else:
|
|
2586
|
+
start = offset
|
|
2587
|
+
end = min(offset + limit, len(obs_indices))
|
|
2588
|
+
obs_list = obs_indices[start:end]
|
|
2589
|
+
|
|
2590
|
+
try:
|
|
2591
|
+
if not obs_list:
|
|
2592
|
+
# Empty schema-only table
|
|
2593
|
+
if use_polars:
|
|
2594
|
+
schema_cols = {}
|
|
2595
|
+
if include_obs_no:
|
|
2596
|
+
schema_cols["_n"] = pl.Int64
|
|
2597
|
+
for v in vars:
|
|
2598
|
+
schema_cols[v] = pl.Utf8
|
|
2599
|
+
table = pl.DataFrame(schema=schema_cols).to_arrow()
|
|
2600
|
+
else:
|
|
2601
|
+
columns = {}
|
|
2602
|
+
if include_obs_no:
|
|
2603
|
+
columns["_n"] = pa.array([], type=pa.int64())
|
|
2604
|
+
for v in vars:
|
|
2605
|
+
columns[v] = pa.array([], type=pa.string())
|
|
2606
|
+
table = pa.table(columns)
|
|
2607
|
+
else:
|
|
2608
|
+
# Fetch all data in one C-call
|
|
2609
|
+
raw_data = Data.get(var=vars, obs=obs_list, valuelabel=False)
|
|
2610
|
+
|
|
2611
|
+
if use_polars:
|
|
2612
|
+
df = pl.DataFrame(raw_data, schema=vars, orient="row")
|
|
2613
|
+
if include_obs_no:
|
|
2614
|
+
obs_nums = [i + 1 for i in obs_list]
|
|
2615
|
+
df = df.with_columns(pl.Series("_n", obs_nums, dtype=pl.Int64))
|
|
2616
|
+
df = df.select(["_n"] + vars)
|
|
2617
|
+
table = df.to_arrow()
|
|
2618
|
+
else:
|
|
2619
|
+
df = pd.DataFrame(raw_data, columns=vars)
|
|
2620
|
+
if include_obs_no:
|
|
2621
|
+
df.insert(0, "_n", [i + 1 for i in obs_list])
|
|
2622
|
+
table = pa.Table.from_pandas(df, preserve_index=False)
|
|
2623
|
+
|
|
2624
|
+
# Serialize to IPC Stream
|
|
2625
|
+
sink = pa.BufferOutputStream()
|
|
2626
|
+
with pa.RecordBatchStreamWriter(sink, table.schema) as writer:
|
|
2627
|
+
writer.write_table(table)
|
|
2628
|
+
|
|
2629
|
+
return sink.getvalue().to_pybytes()
|
|
2630
|
+
|
|
2631
|
+
except Exception as e:
|
|
2632
|
+
raise RuntimeError(f"Failed to generate Arrow stream: {e}")
|
|
2633
|
+
|
|
1343
2634
|
_FILTER_IDENT = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b")
|
|
1344
2635
|
|
|
1345
2636
|
def _extract_filter_vars(self, filter_expr: str) -> List[str]:
|
|
@@ -1527,39 +2818,46 @@ class StataClient:
|
|
|
1527
2818
|
return self._list_graphs_cache
|
|
1528
2819
|
|
|
1529
2820
|
# Cache miss or expired, fetch fresh data
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
2821
|
+
with self._exec_lock:
|
|
2822
|
+
try:
|
|
2823
|
+
# Preservation of r() results is critical because this can be called
|
|
2824
|
+
# automatically after every user command (e.g., during streaming).
|
|
2825
|
+
import time
|
|
2826
|
+
hold_name = f"_mcp_ghold_{int(time.time() * 1000 % 1000000)}"
|
|
2827
|
+
self.stata.run(f"capture _return hold {hold_name}", echo=False)
|
|
2828
|
+
|
|
2829
|
+
try:
|
|
2830
|
+
self.stata.run("macro define mcp_graph_list \"\"", echo=False)
|
|
2831
|
+
self.stata.run("quietly graph dir, memory", echo=False)
|
|
2832
|
+
from sfi import Macro # type: ignore[import-not-found]
|
|
2833
|
+
self.stata.run("macro define mcp_graph_list `r(list)'", echo=False)
|
|
2834
|
+
graph_list_str = Macro.getGlobal("mcp_graph_list")
|
|
2835
|
+
finally:
|
|
2836
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False)
|
|
1534
2837
|
|
|
1535
|
-
|
|
1536
|
-
# We stash the result in a global macro that python sfi can easily read.
|
|
1537
|
-
from sfi import Macro # type: ignore[import-not-found]
|
|
1538
|
-
self.stata.run("global mcp_graph_list `r(list)'")
|
|
1539
|
-
graph_list_str = Macro.getGlobal("mcp_graph_list")
|
|
1540
|
-
raw_list = graph_list_str.split() if graph_list_str else []
|
|
2838
|
+
raw_list = graph_list_str.split() if graph_list_str else []
|
|
1541
2839
|
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
2840
|
+
# Map internal Stata names back to user-facing names when we have an alias.
|
|
2841
|
+
reverse = getattr(self, "_graph_name_reverse", {})
|
|
2842
|
+
graph_list = [reverse.get(n, n) for n in raw_list]
|
|
1545
2843
|
|
|
1546
|
-
|
|
2844
|
+
result = graph_list
|
|
1547
2845
|
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
2846
|
+
# Update cache
|
|
2847
|
+
with self._list_graphs_cache_lock:
|
|
2848
|
+
self._list_graphs_cache = result
|
|
2849
|
+
self._list_graphs_cache_time = time.time()
|
|
2850
|
+
|
|
2851
|
+
return result
|
|
2852
|
+
|
|
2853
|
+
except Exception as e:
|
|
2854
|
+
# On error, return cached result if available, otherwise empty list
|
|
2855
|
+
with self._list_graphs_cache_lock:
|
|
2856
|
+
if self._list_graphs_cache is not None:
|
|
2857
|
+
logger.warning(f"list_graphs failed, returning cached result: {e}")
|
|
2858
|
+
return self._list_graphs_cache
|
|
2859
|
+
logger.warning(f"list_graphs failed, no cache available: {e}")
|
|
2860
|
+
return []
|
|
1563
2861
|
|
|
1564
2862
|
def list_graphs_structured(self) -> GraphListResponse:
|
|
1565
2863
|
names = self.list_graphs()
|
|
@@ -1583,8 +2881,9 @@ class StataClient:
|
|
|
1583
2881
|
import tempfile
|
|
1584
2882
|
|
|
1585
2883
|
fmt = (format or "pdf").strip().lower()
|
|
1586
|
-
if fmt not in {"pdf", "png"}:
|
|
1587
|
-
raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
|
|
2884
|
+
if fmt not in {"pdf", "png", "svg"}:
|
|
2885
|
+
raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png, svg.")
|
|
2886
|
+
|
|
1588
2887
|
|
|
1589
2888
|
if not filename:
|
|
1590
2889
|
suffix = f".{fmt}"
|
|
@@ -1608,9 +2907,9 @@ class StataClient:
|
|
|
1608
2907
|
gph_path_for_stata = gph_path.replace("\\", "/")
|
|
1609
2908
|
# Make the target graph current, then save without name() (which isn't accepted there)
|
|
1610
2909
|
if graph_name:
|
|
1611
|
-
self.
|
|
1612
|
-
save_cmd = f'graph save "{gph_path_for_stata}", replace'
|
|
1613
|
-
save_resp = self.
|
|
2910
|
+
self._exec_no_capture_silent(f'quietly graph display "{graph_name}"', echo=False)
|
|
2911
|
+
save_cmd = f'quietly graph save "{gph_path_for_stata}", replace'
|
|
2912
|
+
save_resp = self._exec_no_capture_silent(save_cmd, echo=False)
|
|
1614
2913
|
if not save_resp.success:
|
|
1615
2914
|
msg = save_resp.error.message if save_resp.error else f"graph save failed (rc={save_resp.rc})"
|
|
1616
2915
|
raise RuntimeError(msg)
|
|
@@ -1618,8 +2917,8 @@ class StataClient:
|
|
|
1618
2917
|
# 2) Prepare a do-file to export PNG externally
|
|
1619
2918
|
user_filename_fwd = user_filename.replace("\\", "/")
|
|
1620
2919
|
do_lines = [
|
|
1621
|
-
f'graph use "{gph_path_for_stata}"',
|
|
1622
|
-
f'graph export "{user_filename_fwd}", replace as(png)',
|
|
2920
|
+
f'quietly graph use "{gph_path_for_stata}"',
|
|
2921
|
+
f'quietly graph export "{user_filename_fwd}", replace as(png)',
|
|
1623
2922
|
"exit",
|
|
1624
2923
|
]
|
|
1625
2924
|
with tempfile.NamedTemporaryFile(prefix="mcp_stata_export_", suffix=".do", delete=False, mode="w", encoding="ascii") as do_tmp:
|
|
@@ -1670,20 +2969,21 @@ class StataClient:
|
|
|
1670
2969
|
# Stata prefers forward slashes in its command parser on Windows
|
|
1671
2970
|
filename_for_stata = user_filename.replace("\\", "/")
|
|
1672
2971
|
|
|
1673
|
-
cmd = "graph export"
|
|
1674
2972
|
if graph_name:
|
|
1675
2973
|
resolved = self._resolve_graph_name_for_stata(graph_name)
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
2974
|
+
# Use display + export without name() for maximum compatibility.
|
|
2975
|
+
# name(NAME) often fails in PyStata for non-active graphs (r(693)).
|
|
2976
|
+
self._exec_no_capture_silent(f'quietly graph display "{resolved}"', echo=False)
|
|
2977
|
+
|
|
2978
|
+
cmd = f'quietly graph export "{filename_for_stata}", replace as({fmt})'
|
|
1679
2979
|
|
|
1680
2980
|
# Avoid stdout/stderr redirection for graph export because PyStata's
|
|
1681
2981
|
# output thread can crash on Windows when we swap stdio handles.
|
|
1682
|
-
resp = self.
|
|
2982
|
+
resp = self._exec_no_capture_silent(cmd, echo=False)
|
|
1683
2983
|
if not resp.success:
|
|
1684
2984
|
# Retry once after a short pause in case Stata had a transient file handle issue
|
|
1685
2985
|
time.sleep(0.2)
|
|
1686
|
-
resp_retry = self.
|
|
2986
|
+
resp_retry = self._exec_no_capture_silent(cmd, echo=False)
|
|
1687
2987
|
if not resp_retry.success:
|
|
1688
2988
|
msg = resp_retry.error.message if resp_retry.error else f"graph export failed (rc={resp_retry.rc})"
|
|
1689
2989
|
raise RuntimeError(msg)
|
|
@@ -1716,14 +3016,15 @@ class StataClient:
|
|
|
1716
3016
|
if not self._initialized:
|
|
1717
3017
|
self.init()
|
|
1718
3018
|
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
3019
|
+
with self._exec_lock:
|
|
3020
|
+
# Try to locate the .sthlp help file
|
|
3021
|
+
# We use 'capture' to avoid crashing if not found
|
|
3022
|
+
self.stata.run(f"capture findfile {topic}.sthlp")
|
|
1722
3023
|
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
3024
|
+
# Retrieve the found path from r(fn)
|
|
3025
|
+
from sfi import Macro # type: ignore[import-not-found]
|
|
3026
|
+
self.stata.run("global mcp_help_file `r(fn)'")
|
|
3027
|
+
fn = Macro.getGlobal("mcp_help_file")
|
|
1727
3028
|
|
|
1728
3029
|
if fn and os.path.exists(fn):
|
|
1729
3030
|
try:
|
|
@@ -1737,73 +3038,77 @@ class StataClient:
|
|
|
1737
3038
|
logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
|
|
1738
3039
|
return self._smcl_to_text(smcl)
|
|
1739
3040
|
except Exception as e:
|
|
1740
|
-
|
|
3041
|
+
logger.warning("Help file read failed for %s: %s", topic, e)
|
|
1741
3042
|
|
|
1742
|
-
#
|
|
1743
|
-
return f"Help file for '{topic}' not found.
|
|
3043
|
+
# If no help file found, return a fallback message
|
|
3044
|
+
return f"Help file for '{topic}' not found."
|
|
3045
|
+
|
|
3046
|
+
def get_stored_results(self, force_fresh: bool = False) -> Dict[str, Any]:
|
|
3047
|
+
"""Returns e() and r() results using SFI for maximum reliability."""
|
|
3048
|
+
if not force_fresh and self._last_results is not None:
|
|
3049
|
+
return self._last_results
|
|
1744
3050
|
|
|
1745
|
-
def get_stored_results(self) -> Dict[str, Any]:
|
|
1746
|
-
"""Returns e() and r() results."""
|
|
1747
3051
|
if not self._initialized:
|
|
1748
3052
|
self.init()
|
|
1749
3053
|
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
3054
|
+
with self._exec_lock:
|
|
3055
|
+
# We must be extremely careful not to clobber r()/e() while fetching their names.
|
|
3056
|
+
# We use a hold to peek at the results.
|
|
3057
|
+
hold_name = f"mcp_peek_{uuid.uuid4().hex[:8]}"
|
|
3058
|
+
self.stata.run(f"capture _return hold {hold_name}", echo=False)
|
|
3059
|
+
|
|
3060
|
+
try:
|
|
3061
|
+
from sfi import Scalar, Macro
|
|
3062
|
+
results = {"r": {}, "e": {}}
|
|
3063
|
+
|
|
3064
|
+
for rclass in ["r", "e"]:
|
|
3065
|
+
# Restore with 'hold' to peek at results without losing them from the hold
|
|
3066
|
+
# Note: Stata 18+ supports 'restore ..., hold' which is ideal.
|
|
3067
|
+
self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
|
|
3068
|
+
|
|
3069
|
+
# Fetch names using backtick expansion (which we verified works better than colon)
|
|
3070
|
+
# and avoid leading underscores which were causing syntax errors with 'global'
|
|
3071
|
+
self.stata.run(f"macro define mcp_scnames `: {rclass}(scalars)'", echo=False)
|
|
3072
|
+
self.stata.run(f"macro define mcp_macnames `: {rclass}(macros)'", echo=False)
|
|
3073
|
+
|
|
3074
|
+
# 1. Capture Scalars
|
|
3075
|
+
names_str = Macro.getGlobal("mcp_scnames")
|
|
3076
|
+
if names_str:
|
|
3077
|
+
for name in names_str.split():
|
|
3078
|
+
try:
|
|
3079
|
+
val = Scalar.getValue(f"{rclass}({name})")
|
|
3080
|
+
results[rclass][name] = val
|
|
3081
|
+
except Exception:
|
|
3082
|
+
pass
|
|
3083
|
+
|
|
3084
|
+
# 2. Capture Macros (strings)
|
|
3085
|
+
macros_str = Macro.getGlobal("mcp_macnames")
|
|
3086
|
+
if macros_str:
|
|
3087
|
+
for name in macros_str.split():
|
|
3088
|
+
try:
|
|
3089
|
+
# Restore/Hold again to be safe before fetching each macro
|
|
3090
|
+
self.stata.run(f"capture _return restore {hold_name}, hold", echo=False)
|
|
3091
|
+
# Capture the string value into a macro
|
|
3092
|
+
self.stata.run(f"macro define mcp_mval `{rclass}({name})'", echo=False)
|
|
3093
|
+
val = Macro.getGlobal("mcp_mval")
|
|
3094
|
+
results[rclass][name] = val
|
|
3095
|
+
except Exception:
|
|
3096
|
+
pass
|
|
3097
|
+
|
|
3098
|
+
# Cleanup
|
|
3099
|
+
self.stata.run("macro drop mcp_scnames mcp_macnames mcp_mval", echo=False)
|
|
3100
|
+
self.stata.run(f"capture _return restore {hold_name}", echo=False) # Restore one last time to leave Stata in correct state
|
|
3101
|
+
|
|
3102
|
+
self._last_results = results
|
|
3103
|
+
return results
|
|
3104
|
+
except Exception as e:
|
|
3105
|
+
logger.error(f"SFI-based get_stored_results failed: {e}")
|
|
3106
|
+
# Try to clean up hold if we failed
|
|
3107
|
+
try:
|
|
3108
|
+
self.stata.run(f"capture _return drop {hold_name}", echo=False)
|
|
3109
|
+
except Exception:
|
|
3110
|
+
pass
|
|
3111
|
+
return {"r": {}, "e": {}}
|
|
1807
3112
|
|
|
1808
3113
|
def invalidate_graph_cache(self, graph_name: str = None) -> None:
|
|
1809
3114
|
"""Invalidate cache for specific graph or all graphs.
|
|
@@ -1953,47 +3258,32 @@ class StataClient:
|
|
|
1953
3258
|
|
|
1954
3259
|
# Additional validation by attempting to display the graph
|
|
1955
3260
|
resolved = self._resolve_graph_name_for_stata(graph_name)
|
|
1956
|
-
cmd = f'graph display {resolved}'
|
|
1957
|
-
resp = self.
|
|
3261
|
+
cmd = f'quietly graph display {resolved}'
|
|
3262
|
+
resp = self._exec_no_capture_silent(cmd, echo=False)
|
|
1958
3263
|
return resp.success
|
|
1959
3264
|
except Exception:
|
|
1960
3265
|
return False
|
|
1961
3266
|
|
|
1962
3267
|
def _is_cache_valid(self, graph_name: str, cache_path: str) -> bool:
|
|
1963
|
-
"""Check if cached content is still valid."""
|
|
3268
|
+
"""Check if cached content is still valid using internal signatures."""
|
|
1964
3269
|
try:
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
import os
|
|
1968
|
-
|
|
1969
|
-
temp_dir = tempfile.gettempdir()
|
|
1970
|
-
temp_file = os.path.join(temp_dir, f"temp_{graph_name}_{os.getpid()}.svg")
|
|
1971
|
-
|
|
1972
|
-
resolved = self._resolve_graph_name_for_stata(graph_name)
|
|
1973
|
-
export_cmd = f'graph export "{temp_file.replace("\\\\", "/")}", name({resolved}) replace as(svg)'
|
|
1974
|
-
resp = self._exec_no_capture(export_cmd, echo=False)
|
|
1975
|
-
|
|
1976
|
-
if resp.success and os.path.exists(temp_file):
|
|
1977
|
-
with open(temp_file, 'rb') as f:
|
|
1978
|
-
current_data = f.read()
|
|
1979
|
-
os.remove(temp_file)
|
|
3270
|
+
if not os.path.exists(cache_path) or os.path.getsize(cache_path) == 0:
|
|
3271
|
+
return False
|
|
1980
3272
|
|
|
1981
|
-
|
|
1982
|
-
|
|
3273
|
+
current_sig = self._get_graph_signature(graph_name)
|
|
3274
|
+
cached_sig = self._preemptive_cache.get(f"{graph_name}_sig")
|
|
3275
|
+
|
|
3276
|
+
# If we have a signature match, it's valid for the current command session
|
|
3277
|
+
if cached_sig and cached_sig == current_sig:
|
|
3278
|
+
return True
|
|
1983
3279
|
|
|
1984
|
-
|
|
3280
|
+
# Otherwise it's invalid (needs refresh for new command)
|
|
3281
|
+
return False
|
|
1985
3282
|
except Exception:
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
return False # Assume invalid if we can't verify
|
|
1989
|
-
|
|
1990
|
-
def export_graphs_all(self, use_base64: bool = False) -> GraphExportResponse:
|
|
1991
|
-
"""Exports all graphs to file paths (default) or base64-encoded strings.
|
|
3283
|
+
return False
|
|
1992
3284
|
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
returns file paths to exported SVG files.
|
|
1996
|
-
"""
|
|
3285
|
+
def export_graphs_all(self) -> GraphExportResponse:
|
|
3286
|
+
"""Exports all graphs to file paths."""
|
|
1997
3287
|
exports: List[GraphExport] = []
|
|
1998
3288
|
graph_names = self.list_graphs(force_refresh=True)
|
|
1999
3289
|
|
|
@@ -2003,7 +3293,6 @@ class StataClient:
|
|
|
2003
3293
|
import tempfile
|
|
2004
3294
|
import os
|
|
2005
3295
|
import threading
|
|
2006
|
-
import base64
|
|
2007
3296
|
import uuid
|
|
2008
3297
|
import time
|
|
2009
3298
|
import logging
|
|
@@ -2027,15 +3316,15 @@ class StataClient:
|
|
|
2027
3316
|
svg_path_for_stata = svg_path.replace("\\", "/")
|
|
2028
3317
|
|
|
2029
3318
|
try:
|
|
2030
|
-
export_cmd = f'graph export "{svg_path_for_stata}", name({resolved}) replace as(svg)'
|
|
2031
|
-
export_resp = self.
|
|
3319
|
+
export_cmd = f'quietly graph export "{svg_path_for_stata}", name({resolved}) replace as(svg)'
|
|
3320
|
+
export_resp = self._exec_no_capture_silent(export_cmd, echo=False)
|
|
2032
3321
|
|
|
2033
3322
|
if not export_resp.success:
|
|
2034
|
-
display_cmd = f'graph display {resolved}'
|
|
2035
|
-
display_resp = self.
|
|
3323
|
+
display_cmd = f'quietly graph display {resolved}'
|
|
3324
|
+
display_resp = self._exec_no_capture_silent(display_cmd, echo=False)
|
|
2036
3325
|
if display_resp.success:
|
|
2037
|
-
export_cmd2 = f'graph export "{svg_path_for_stata}", replace as(svg)'
|
|
2038
|
-
export_resp = self.
|
|
3326
|
+
export_cmd2 = f'quietly graph export "{svg_path_for_stata}", replace as(svg)'
|
|
3327
|
+
export_resp = self._exec_no_capture_silent(export_cmd2, echo=False)
|
|
2039
3328
|
else:
|
|
2040
3329
|
export_resp = display_resp
|
|
2041
3330
|
|
|
@@ -2077,12 +3366,7 @@ class StataClient:
|
|
|
2077
3366
|
|
|
2078
3367
|
for name, cached_path in cached_graphs.items():
|
|
2079
3368
|
try:
|
|
2080
|
-
|
|
2081
|
-
with open(cached_path, "rb") as f:
|
|
2082
|
-
svg_b64 = base64.b64encode(f.read()).decode("ascii")
|
|
2083
|
-
exports.append(GraphExport(name=name, image_base64=svg_b64))
|
|
2084
|
-
else:
|
|
2085
|
-
exports.append(GraphExport(name=name, file_path=cached_path))
|
|
3369
|
+
exports.append(GraphExport(name=name, file_path=cached_path))
|
|
2086
3370
|
except Exception as e:
|
|
2087
3371
|
cache_errors.append(f"Failed to read cached graph {name}: {e}")
|
|
2088
3372
|
# Fall back to uncached processing
|
|
@@ -2125,24 +3409,16 @@ class StataClient:
|
|
|
2125
3409
|
self._cache_sizes[name] = item_size
|
|
2126
3410
|
self._total_cache_size += item_size
|
|
2127
3411
|
|
|
2128
|
-
|
|
2129
|
-
svg_b64 = base64.b64encode(result).decode("ascii")
|
|
2130
|
-
exports.append(GraphExport(name=name, image_base64=svg_b64))
|
|
2131
|
-
else:
|
|
2132
|
-
exports.append(GraphExport(name=name, file_path=cache_path))
|
|
3412
|
+
exports.append(GraphExport(name=name, file_path=cache_path))
|
|
2133
3413
|
except Exception as e:
|
|
2134
3414
|
cache_errors.append(f"Failed to cache graph {name}: {e}")
|
|
2135
3415
|
# Still return the result even if caching fails
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
temp_path = os.path.join(tempfile.gettempdir(), f"{safe_name}_{uuid.uuid4().hex[:8]}.svg")
|
|
2143
|
-
with open(temp_path, 'wb') as f:
|
|
2144
|
-
f.write(result)
|
|
2145
|
-
exports.append(GraphExport(name=name, file_path=temp_path))
|
|
3416
|
+
# Create temp file for immediate use
|
|
3417
|
+
safe_name = self._sanitize_filename(name)
|
|
3418
|
+
temp_path = os.path.join(tempfile.gettempdir(), f"{safe_name}_{uuid.uuid4().hex[:8]}.svg")
|
|
3419
|
+
with open(temp_path, 'wb') as f:
|
|
3420
|
+
f.write(result)
|
|
3421
|
+
exports.append(GraphExport(name=name, file_path=temp_path))
|
|
2146
3422
|
|
|
2147
3423
|
# Log errors if any occurred
|
|
2148
3424
|
if cache_errors:
|
|
@@ -2197,29 +3473,21 @@ class StataClient:
|
|
|
2197
3473
|
del self._preemptive_cache[hash_key]
|
|
2198
3474
|
|
|
2199
3475
|
try:
|
|
2200
|
-
#
|
|
2201
|
-
|
|
3476
|
+
# Include signature in filename to force client-side refresh
|
|
3477
|
+
sig = self._get_graph_signature(graph_name)
|
|
3478
|
+
safe_name = self._sanitize_filename(sig)
|
|
2202
3479
|
cache_path = os.path.join(self._preemptive_cache_dir, f"{safe_name}.svg")
|
|
2203
3480
|
cache_path_for_stata = cache_path.replace("\\", "/")
|
|
2204
3481
|
|
|
2205
3482
|
resolved_graph_name = self._resolve_graph_name_for_stata(graph_name)
|
|
2206
|
-
|
|
3483
|
+
# Use display + export without name() for maximum compatibility.
|
|
3484
|
+
# name(NAME) often fails in PyStata for non-active graphs (r(693)).
|
|
3485
|
+
# Quoting the name helps with spaces/special characters.
|
|
3486
|
+
display_cmd = f'quietly graph display "{resolved_graph_name}"'
|
|
3487
|
+
self._exec_no_capture_silent(display_cmd, echo=False)
|
|
2207
3488
|
|
|
2208
|
-
export_cmd = f'graph export "{cache_path_for_stata}",
|
|
2209
|
-
resp = self.
|
|
2210
|
-
|
|
2211
|
-
# Fallback: some graph names (spaces, slashes, backslashes) can confuse
|
|
2212
|
-
# Stata's parser in name() even when the graph exists. In that case,
|
|
2213
|
-
# make the graph current, then export without name().
|
|
2214
|
-
if not resp.success:
|
|
2215
|
-
try:
|
|
2216
|
-
display_cmd = f'graph display {graph_name_q}'
|
|
2217
|
-
display_resp = self._exec_no_capture(display_cmd, echo=False)
|
|
2218
|
-
if display_resp.success:
|
|
2219
|
-
export_cmd2 = f'graph export "{cache_path_for_stata}", replace as(svg)'
|
|
2220
|
-
resp = self._exec_no_capture(export_cmd2, echo=False)
|
|
2221
|
-
except Exception:
|
|
2222
|
-
pass
|
|
3489
|
+
export_cmd = f'quietly graph export "{cache_path_for_stata}", replace as(svg)'
|
|
3490
|
+
resp = self._exec_no_capture_silent(export_cmd, echo=False)
|
|
2223
3491
|
|
|
2224
3492
|
if resp.success and os.path.exists(cache_path) and os.path.getsize(cache_path) > 0:
|
|
2225
3493
|
# Read the data to compute hash
|
|
@@ -2232,9 +3500,20 @@ class StataClient:
|
|
|
2232
3500
|
self._evict_cache_if_needed(item_size)
|
|
2233
3501
|
|
|
2234
3502
|
with self._cache_lock:
|
|
3503
|
+
# Clear any old versions of this graph from the path cache
|
|
3504
|
+
# (Optional but keeps it clean)
|
|
3505
|
+
old_path = self._preemptive_cache.get(graph_name)
|
|
3506
|
+
if old_path and old_path != cache_path:
|
|
3507
|
+
try:
|
|
3508
|
+
os.remove(old_path)
|
|
3509
|
+
except Exception:
|
|
3510
|
+
pass
|
|
3511
|
+
|
|
2235
3512
|
self._preemptive_cache[graph_name] = cache_path
|
|
2236
3513
|
# Store content hash for validation
|
|
2237
3514
|
self._preemptive_cache[f"{graph_name}_hash"] = self._get_content_hash(data)
|
|
3515
|
+
# Store signature for fast validation
|
|
3516
|
+
self._preemptive_cache[f"{graph_name}_sig"] = self._get_graph_signature(graph_name)
|
|
2238
3517
|
# Update tracking
|
|
2239
3518
|
self._cache_access_times[graph_name] = time.time()
|
|
2240
3519
|
self._cache_sizes[graph_name] = item_size
|
|
@@ -2253,105 +3532,58 @@ class StataClient:
|
|
|
2253
3532
|
return False
|
|
2254
3533
|
|
|
2255
3534
|
def run_do_file(self, path: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
rc=601,
|
|
2260
|
-
stdout="",
|
|
2261
|
-
stderr=None,
|
|
2262
|
-
success=False,
|
|
2263
|
-
error=ErrorEnvelope(
|
|
2264
|
-
message=f"cwd not found: {cwd}",
|
|
2265
|
-
rc=601,
|
|
2266
|
-
command=path,
|
|
2267
|
-
),
|
|
2268
|
-
)
|
|
2269
|
-
|
|
2270
|
-
effective_path = path
|
|
2271
|
-
if cwd is not None and not os.path.isabs(path):
|
|
2272
|
-
effective_path = os.path.abspath(os.path.join(cwd, path))
|
|
2273
|
-
|
|
2274
|
-
if not os.path.exists(effective_path):
|
|
2275
|
-
return CommandResponse(
|
|
2276
|
-
command=f'do "{effective_path}"',
|
|
2277
|
-
rc=601,
|
|
2278
|
-
stdout="",
|
|
2279
|
-
stderr=None,
|
|
2280
|
-
success=False,
|
|
2281
|
-
error=ErrorEnvelope(
|
|
2282
|
-
message=f"Do-file not found: {effective_path}",
|
|
2283
|
-
rc=601,
|
|
2284
|
-
command=effective_path,
|
|
2285
|
-
),
|
|
2286
|
-
)
|
|
3535
|
+
effective_path, command, error_response = self._resolve_do_file_path(path, cwd)
|
|
3536
|
+
if error_response is not None:
|
|
3537
|
+
return error_response
|
|
2287
3538
|
|
|
2288
3539
|
if not self._initialized:
|
|
2289
3540
|
self.init()
|
|
2290
3541
|
|
|
2291
3542
|
start_time = time.time()
|
|
2292
3543
|
exc: Optional[Exception] = None
|
|
2293
|
-
|
|
2294
|
-
|
|
3544
|
+
smcl_content = ""
|
|
3545
|
+
smcl_path = None
|
|
2295
3546
|
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
mode="w",
|
|
2301
|
-
encoding="utf-8",
|
|
2302
|
-
errors="replace",
|
|
2303
|
-
buffering=1,
|
|
2304
|
-
)
|
|
2305
|
-
log_path = log_file.name
|
|
2306
|
-
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
2307
|
-
tee = FileTeeIO(log_file, tail)
|
|
3547
|
+
_log_file, log_path, tail, tee = self._create_streaming_log(trace=trace)
|
|
3548
|
+
base_dir = cwd or os.path.dirname(effective_path)
|
|
3549
|
+
smcl_path = self._create_smcl_log_path(base_dir=base_dir)
|
|
3550
|
+
smcl_log_name = self._make_smcl_log_name()
|
|
2308
3551
|
|
|
2309
3552
|
rc = -1
|
|
3553
|
+
try:
|
|
3554
|
+
rc, exc = self._run_streaming_blocking(
|
|
3555
|
+
command=command,
|
|
3556
|
+
tee=tee,
|
|
3557
|
+
cwd=cwd,
|
|
3558
|
+
trace=trace,
|
|
3559
|
+
echo=echo,
|
|
3560
|
+
smcl_path=smcl_path,
|
|
3561
|
+
smcl_log_name=smcl_log_name,
|
|
3562
|
+
hold_attr="_hold_name_do_sync",
|
|
3563
|
+
require_smcl_log=True,
|
|
3564
|
+
)
|
|
3565
|
+
except Exception as e:
|
|
3566
|
+
exc = e
|
|
3567
|
+
rc = 1
|
|
3568
|
+
finally:
|
|
3569
|
+
tee.close()
|
|
2310
3570
|
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
from sfi import Scalar, SFIToolkit # Import SFI tools
|
|
2314
|
-
with self._temp_cwd(cwd):
|
|
2315
|
-
with self._redirect_io_streaming(tee, tee):
|
|
2316
|
-
try:
|
|
2317
|
-
if trace:
|
|
2318
|
-
self.stata.run("set trace on")
|
|
2319
|
-
ret = self.stata.run(command, echo=echo)
|
|
2320
|
-
# Some PyStata builds return output as a string rather than printing.
|
|
2321
|
-
if isinstance(ret, str) and ret:
|
|
2322
|
-
try:
|
|
2323
|
-
tee.write(ret)
|
|
2324
|
-
except Exception:
|
|
2325
|
-
pass
|
|
2326
|
-
|
|
2327
|
-
except Exception as e:
|
|
2328
|
-
exc = e
|
|
2329
|
-
rc = 1
|
|
2330
|
-
finally:
|
|
2331
|
-
if trace:
|
|
2332
|
-
try:
|
|
2333
|
-
self.stata.run("set trace off")
|
|
2334
|
-
except Exception:
|
|
2335
|
-
pass
|
|
2336
|
-
except Exception as e:
|
|
2337
|
-
# Outer catch in case imports or locks fail
|
|
2338
|
-
exc = e
|
|
2339
|
-
rc = 1
|
|
2340
|
-
|
|
2341
|
-
tee.close()
|
|
3571
|
+
# Read SMCL content as the authoritative source
|
|
3572
|
+
smcl_content = self._read_smcl_file(smcl_path)
|
|
2342
3573
|
|
|
2343
|
-
|
|
2344
|
-
log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
|
|
2345
|
-
if log_tail and len(log_tail) > len(tail_text):
|
|
2346
|
-
tail_text = log_tail
|
|
2347
|
-
combined = (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
3574
|
+
combined = self._build_combined_log(tail, log_path, rc, trace, exc)
|
|
2348
3575
|
|
|
2349
|
-
#
|
|
3576
|
+
# Use SMCL content as primary source for RC detection if not already captured
|
|
2350
3577
|
if rc == -1 and not exc:
|
|
2351
|
-
parsed_rc = self.
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
3578
|
+
parsed_rc = self._parse_rc_from_smcl(smcl_content)
|
|
3579
|
+
if parsed_rc is not None:
|
|
3580
|
+
rc = parsed_rc
|
|
3581
|
+
else:
|
|
3582
|
+
# Fallback to text parsing
|
|
3583
|
+
parsed_rc = self._parse_rc_from_text(combined)
|
|
3584
|
+
rc = parsed_rc if parsed_rc is not None else 0
|
|
3585
|
+
elif exc and rc == 1:
|
|
3586
|
+
# Try to parse more specific RC from exception message
|
|
2355
3587
|
parsed_rc = self._parse_rc_from_text(str(exc))
|
|
2356
3588
|
if parsed_rc is not None:
|
|
2357
3589
|
rc = parsed_rc
|
|
@@ -2360,15 +3592,20 @@ class StataClient:
|
|
|
2360
3592
|
error = None
|
|
2361
3593
|
|
|
2362
3594
|
if not success:
|
|
2363
|
-
#
|
|
2364
|
-
|
|
3595
|
+
# Use SMCL as authoritative source for error extraction
|
|
3596
|
+
if smcl_content:
|
|
3597
|
+
msg, context = self._extract_error_from_smcl(smcl_content, rc)
|
|
3598
|
+
else:
|
|
3599
|
+
# Fallback to combined log
|
|
3600
|
+
msg, context = self._extract_error_and_context(combined, rc)
|
|
2365
3601
|
|
|
2366
3602
|
error = ErrorEnvelope(
|
|
2367
3603
|
message=msg,
|
|
2368
3604
|
rc=rc,
|
|
2369
3605
|
snippet=context,
|
|
2370
3606
|
command=command,
|
|
2371
|
-
log_path=log_path
|
|
3607
|
+
log_path=log_path,
|
|
3608
|
+
smcl_output=smcl_content,
|
|
2372
3609
|
)
|
|
2373
3610
|
|
|
2374
3611
|
duration = time.time() - start_time
|
|
@@ -2389,6 +3626,7 @@ class StataClient:
|
|
|
2389
3626
|
log_path=log_path,
|
|
2390
3627
|
success=success,
|
|
2391
3628
|
error=error,
|
|
3629
|
+
smcl_output=smcl_content,
|
|
2392
3630
|
)
|
|
2393
3631
|
|
|
2394
3632
|
def load_data(self, source: str, clear: bool = True, max_output_lines: Optional[int] = None) -> CommandResponse:
|
|
@@ -2407,40 +3645,8 @@ class StataClient:
|
|
|
2407
3645
|
cmd = f"sysuse {src}{clear_suffix}"
|
|
2408
3646
|
|
|
2409
3647
|
result = self._exec_with_capture(cmd, echo=True, trace=False)
|
|
2410
|
-
|
|
2411
|
-
# Truncate stdout if requested
|
|
2412
|
-
if max_output_lines is not None and result.stdout:
|
|
2413
|
-
lines = result.stdout.splitlines()
|
|
2414
|
-
if len(lines) > max_output_lines:
|
|
2415
|
-
truncated_lines = lines[:max_output_lines]
|
|
2416
|
-
truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
|
|
2417
|
-
result = CommandResponse(
|
|
2418
|
-
command=result.command,
|
|
2419
|
-
rc=result.rc,
|
|
2420
|
-
stdout="\n".join(truncated_lines),
|
|
2421
|
-
stderr=result.stderr,
|
|
2422
|
-
success=result.success,
|
|
2423
|
-
error=result.error,
|
|
2424
|
-
)
|
|
2425
|
-
|
|
2426
|
-
return result
|
|
3648
|
+
return self._truncate_command_output(result, max_output_lines)
|
|
2427
3649
|
|
|
2428
3650
|
def codebook(self, varname: str, trace: bool = False, max_output_lines: Optional[int] = None) -> CommandResponse:
|
|
2429
3651
|
result = self._exec_with_capture(f"codebook {varname}", trace=trace)
|
|
2430
|
-
|
|
2431
|
-
# Truncate stdout if requested
|
|
2432
|
-
if max_output_lines is not None and result.stdout:
|
|
2433
|
-
lines = result.stdout.splitlines()
|
|
2434
|
-
if len(lines) > max_output_lines:
|
|
2435
|
-
truncated_lines = lines[:max_output_lines]
|
|
2436
|
-
truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
|
|
2437
|
-
result = CommandResponse(
|
|
2438
|
-
command=result.command,
|
|
2439
|
-
rc=result.rc,
|
|
2440
|
-
stdout="\n".join(truncated_lines),
|
|
2441
|
-
stderr=result.stderr,
|
|
2442
|
-
success=result.success,
|
|
2443
|
-
error=result.error,
|
|
2444
|
-
)
|
|
2445
|
-
|
|
2446
|
-
return result
|
|
3652
|
+
return self._truncate_command_output(result, max_output_lines)
|