mcp-stata 1.2.2__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/stata_client.py CHANGED
@@ -1,17 +1,20 @@
1
- import sys
2
- import os
3
- import json
4
- import re
5
1
  import base64
2
+ import json
6
3
  import logging
4
+ import os
5
+ import re
6
+ import subprocess
7
+ import sys
7
8
  import threading
9
+ import tempfile
8
10
  import time
9
- from io import StringIO
10
11
  from contextlib import contextmanager
11
- from typing import Any, List, Optional, Dict
12
- import pandas as pd
12
+ from io import StringIO
13
+ from typing import Any, Awaitable, Callable, Dict, List, Optional
14
+
15
+ import anyio
16
+
13
17
  from .discovery import find_stata_path
14
- from .smcl.smcl2html import smcl_to_markdown
15
18
  from .models import (
16
19
  CommandResponse,
17
20
  ErrorEnvelope,
@@ -22,21 +25,29 @@ from .models import (
22
25
  VariableInfo,
23
26
  VariablesResponse,
24
27
  )
28
+ from .smcl.smcl2html import smcl_to_markdown
29
+ from .streaming_io import FileTeeIO, TailBuffer
30
+ from .graph_detector import StreamingGraphCache
25
31
 
26
32
  logger = logging.getLogger("mcp_stata")
27
33
 
34
+
28
35
  class StataClient:
29
- _instance = None
30
36
  _initialized = False
31
37
  _exec_lock: threading.Lock
38
+ _cache_init_lock = threading.Lock() # Class-level lock for cache initialization
39
+ _is_executing = False # Flag to prevent recursive Stata calls
32
40
  MAX_DATA_ROWS = 500
33
- MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Allow large graph exports (~50MB)
41
+ MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Maximum graph exports (~50MB)
42
+ MAX_CACHE_SIZE = 100 # Maximum number of graphs to cache
43
+ MAX_CACHE_BYTES = 500 * 1024 * 1024 # Maximum cache size in bytes (~500MB)
44
+ LIST_GRAPHS_TTL = 0.075 # TTL for list_graphs cache (75ms)
34
45
 
35
46
  def __new__(cls):
36
- if cls._instance is None:
37
- cls._instance = super(StataClient, cls).__new__(cls)
38
- cls._instance._exec_lock = threading.Lock()
39
- return cls._instance
47
+ inst = super(StataClient, cls).__new__(cls)
48
+ inst._exec_lock = threading.Lock()
49
+ inst._is_executing = False
50
+ return inst
40
51
 
41
52
  @contextmanager
42
53
  def _redirect_io(self):
@@ -49,15 +60,67 @@ class StataClient:
49
60
  finally:
50
61
  sys.stdout, sys.stderr = backup_stdout, backup_stderr
51
62
 
63
+ @staticmethod
64
+ def _stata_quote(value: str) -> str:
65
+ """Return a Stata double-quoted string literal for value."""
66
+ # Stata uses doubled quotes to represent a quote character inside a string.
67
+ v = (value or "")
68
+ v = v.replace('"', '""')
69
+ # Use compound double quotes to avoid tokenization issues with spaces and
70
+ # punctuation in contexts like graph names.
71
+ return f'`"{v}"\''
72
+
73
+ @contextmanager
74
+ def _redirect_io_streaming(self, out_stream, err_stream):
75
+ backup_stdout, backup_stderr = sys.stdout, sys.stderr
76
+ sys.stdout, sys.stderr = out_stream, err_stream
77
+ try:
78
+ yield
79
+ finally:
80
+ sys.stdout, sys.stderr = backup_stdout, backup_stderr
81
+
82
+ @staticmethod
83
+ def _create_graph_cache_callback(on_graph_cached, notify_log):
84
+ """Create a standardized graph cache callback with proper error handling."""
85
+ async def graph_cache_callback(graph_name: str, success: bool) -> None:
86
+ try:
87
+ if on_graph_cached:
88
+ await on_graph_cached(graph_name, success)
89
+ except Exception as e:
90
+ logger.error(f"Graph cache callback failed: {e}")
91
+
92
+ try:
93
+ # Also notify via log channel
94
+ await notify_log(json.dumps({
95
+ "event": "graph_cached",
96
+ "graph": graph_name,
97
+ "success": success
98
+ }))
99
+ except Exception as e:
100
+ logger.error(f"Failed to notify about graph cache: {e}")
101
+
102
+ return graph_cache_callback
103
+
104
+ @contextmanager
105
+ def _temp_cwd(self, cwd: Optional[str]):
106
+ if cwd is None:
107
+ yield
108
+ return
109
+ prev = os.getcwd()
110
+ os.chdir(cwd)
111
+ try:
112
+ yield
113
+ finally:
114
+ os.chdir(prev)
115
+
52
116
  def init(self):
53
117
  """Initializes usage of pystata."""
54
118
  if self._initialized:
55
119
  return
56
120
 
57
121
  try:
58
- # 1. Setup config
59
- # 1. Setup config
60
122
  import stata_setup
123
+
61
124
  try:
62
125
  stata_exec_path, edition = find_stata_path()
63
126
  except FileNotFoundError as e:
@@ -67,77 +130,148 @@ class StataClient:
67
130
  f"Stata binary is not executable: {e}. "
68
131
  "Point STATA_PATH directly to the Stata binary (e.g., .../Contents/MacOS/stata-mp)."
69
132
  ) from e
70
- logger.info(f"Discovery found Stata at: {stata_exec_path} ({edition})")
71
133
 
72
- # Helper to try init
73
- def tries_init(path_to_try):
74
- try:
75
- logger.info(f"Attempting stata_setup.config with: {path_to_try}")
76
- stata_setup.config(path_to_try, edition)
77
- return True
78
- except Exception as e:
79
- logger.warning(f"Init failed with {path_to_try}: {e}")
80
- return False
134
+ logger.info(f"Discovery found Stata at: {stata_exec_path} ({edition})")
81
135
 
82
- success = False
83
136
  candidates = []
84
-
85
- # 1. Binary Dir: .../Contents/MacOS
137
+
138
+ # Prefer the binary directory first (documented input for stata_setup)
86
139
  bin_dir = os.path.dirname(stata_exec_path)
87
-
88
- # 2. App Bundle: .../StataMP.app
89
- # Walk up to find .app
140
+ if bin_dir:
141
+ candidates.append(bin_dir)
142
+
143
+ # 2. App Bundle: .../StataMP.app (macOS only)
90
144
  curr = bin_dir
91
145
  app_bundle = None
92
146
  while len(curr) > 1:
93
147
  if curr.endswith(".app"):
94
148
  app_bundle = curr
95
149
  break
96
- curr = os.path.dirname(curr)
97
-
150
+ parent = os.path.dirname(curr)
151
+ if parent == curr: # Reached root directory, prevent infinite loop on Windows
152
+ break
153
+ curr = parent
154
+
98
155
  if app_bundle:
99
- # Priority 1: The installation root (parent of .app)
100
- candidates.append(os.path.dirname(app_bundle))
101
-
102
- # Priority 2: The .app bundle itself
103
- candidates.append(app_bundle)
104
-
105
- # Priority 3: The binary directory
106
- candidates.append(bin_dir)
107
-
156
+ candidates.insert(0, os.path.dirname(app_bundle))
157
+ candidates.insert(1, app_bundle)
158
+
159
+ # Deduplicate preserving order
160
+ seen = set()
161
+ deduped = []
162
+ for c in candidates:
163
+ if c in seen:
164
+ continue
165
+ seen.add(c)
166
+ deduped.append(c)
167
+ candidates = deduped
168
+
169
+ success = False
108
170
  for path in candidates:
109
- if tries_init(path):
171
+ try:
172
+ stata_setup.config(path, edition)
110
173
  success = True
111
174
  break
112
-
175
+ except Exception:
176
+ continue
177
+
113
178
  if not success:
114
179
  raise RuntimeError(
115
180
  f"stata_setup.config failed. Tried: {candidates}. "
116
181
  f"Derived from binary: {stata_exec_path}"
117
182
  )
118
-
119
- # 2. Import pystata
120
- from pystata import stata
183
+
184
+ # Cache the binary path for later use (e.g., PNG export on Windows)
185
+ self._stata_exec_path = os.path.abspath(stata_exec_path)
186
+
187
+ from pystata import stata # type: ignore[import-not-found]
121
188
  self.stata = stata
122
189
  self._initialized = True
190
+
191
+ # Ensure a clean graph state for a fresh client. PyStata's backend is
192
+ # effectively global, so graph memory can otherwise leak across tests
193
+ # and separate StataClient instances.
194
+ try:
195
+ self.stata.run("capture graph drop _all", quietly=True)
196
+ except Exception:
197
+ pass
123
198
 
199
+ # Initialize list_graphs TTL cache
200
+ self._list_graphs_cache = None
201
+ self._list_graphs_cache_time = 0
202
+ self._list_graphs_cache_lock = threading.Lock()
203
+
204
+ # Map user-facing graph names (may include spaces/punctuation) to valid
205
+ # internal Stata graph names.
206
+ self._graph_name_aliases: Dict[str, str] = {}
207
+ self._graph_name_reverse: Dict[str, str] = {}
208
+
124
209
  except ImportError:
125
210
  # Fallback for when stata_setup isn't in PYTHONPATH yet?
126
211
  # Usually users must have it installed. We rely on discovery logic.
127
212
  raise RuntimeError("Could not import `stata_setup`. Ensure pystata is installed.")
128
- except Exception as e:
129
- raise RuntimeError(f"Failed to initialize Stata: {e}")
213
+
214
+ def _make_valid_stata_name(self, name: str) -> str:
215
+ """Create a valid Stata name (<=32 chars, [A-Za-z_][A-Za-z0-9_]*)."""
216
+ base = re.sub(r"[^A-Za-z0-9_]", "_", name or "")
217
+ if not base:
218
+ base = "Graph"
219
+ if not re.match(r"^[A-Za-z_]", base):
220
+ base = f"G_{base}"
221
+ base = base[:32]
222
+
223
+ # Avoid collisions.
224
+ candidate = base
225
+ i = 1
226
+ while candidate in getattr(self, "_graph_name_reverse", {}):
227
+ suffix = f"_{i}"
228
+ candidate = (base[: max(0, 32 - len(suffix))] + suffix)[:32]
229
+ i += 1
230
+ return candidate
231
+
232
+ def _resolve_graph_name_for_stata(self, name: str) -> str:
233
+ """Return internal Stata graph name for a user-facing name."""
234
+ if not name:
235
+ return name
236
+ aliases = getattr(self, "_graph_name_aliases", None)
237
+ if aliases and name in aliases:
238
+ return aliases[name]
239
+ return name
240
+
241
+ def _maybe_rewrite_graph_name_in_command(self, code: str) -> str:
242
+ """Rewrite name("...") to a valid Stata name and store alias mapping."""
243
+ if not code:
244
+ return code
245
+ if not hasattr(self, "_graph_name_aliases"):
246
+ self._graph_name_aliases = {}
247
+ self._graph_name_reverse = {}
248
+
249
+ # Handle common patterns: name("..." ...) or name(`"..."' ...)
250
+ pat = re.compile(r"name\(\s*(?:`\"(?P<cq>[^\"]*)\"'|\"(?P<dq>[^\"]*)\")\s*(?P<rest>[^)]*)\)")
251
+
252
+ def repl(m: re.Match) -> str:
253
+ original = m.group("cq") if m.group("cq") is not None else m.group("dq")
254
+ original = original or ""
255
+ internal = self._graph_name_aliases.get(original)
256
+ if not internal:
257
+ internal = self._make_valid_stata_name(original)
258
+ self._graph_name_aliases[original] = internal
259
+ self._graph_name_reverse[internal] = original
260
+ rest = m.group("rest") or ""
261
+ return f"name({internal}{rest})"
262
+
263
+ return pat.sub(repl, code)
130
264
 
131
265
  def _read_return_code(self) -> int:
132
266
  """Read the last Stata return code without mutating rc."""
133
267
  try:
134
- from sfi import Macro
268
+ from sfi import Macro # type: ignore[import-not-found]
135
269
  rc_val = Macro.getCValue("rc") # type: ignore[attr-defined]
136
270
  return int(float(rc_val))
137
271
  except Exception:
138
272
  try:
139
273
  self.stata.run("global MCP_RC = c(rc)")
140
- from sfi import Macro as Macro2
274
+ from sfi import Macro as Macro2 # type: ignore[import-not-found]
141
275
  rc_val = Macro2.getGlobal("MCP_RC")
142
276
  return int(float(rc_val))
143
277
  except Exception:
@@ -183,7 +317,7 @@ class StataClient:
183
317
  ) -> ErrorEnvelope:
184
318
  combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
185
319
  rc_hint = self._parse_rc_from_text(combined) if combined else None
186
- rc_final = rc if rc not in (-1, None) else rc_hint
320
+ rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
187
321
  line_no = self._parse_line_from_text(combined) if combined else None
188
322
  snippet = combined[-800:] if combined else None
189
323
  message = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
@@ -198,33 +332,68 @@ class StataClient:
198
332
  trace=trace or None,
199
333
  )
200
334
 
201
- def _exec_with_capture(self, code: str, echo: bool = True, trace: bool = False) -> CommandResponse:
335
+ def _exec_with_capture(self, code: str, echo: bool = True, trace: bool = False, cwd: Optional[str] = None) -> CommandResponse:
202
336
  """Execute Stata code with stdout/stderr capture and rc detection."""
203
337
  if not self._initialized:
204
338
  self.init()
205
339
 
340
+ code = self._maybe_rewrite_graph_name_in_command(code)
341
+
342
+ if cwd is not None and not os.path.isdir(cwd):
343
+ return CommandResponse(
344
+ command=code,
345
+ rc=601,
346
+ stdout="",
347
+ stderr=None,
348
+ success=False,
349
+ error=ErrorEnvelope(
350
+ message=f"cwd not found: {cwd}",
351
+ rc=601,
352
+ command=code,
353
+ ),
354
+ )
355
+
206
356
  start_time = time.time()
207
357
  exc: Optional[Exception] = None
358
+ ret_text: Optional[str] = None
208
359
  with self._exec_lock:
209
- with self._redirect_io() as (out_buf, err_buf):
210
- try:
211
- if trace:
212
- self.stata.run("set trace on")
213
- self.stata.run(code, echo=echo)
214
- except Exception as e:
215
- exc = e
216
- finally:
217
- rc = self._read_return_code()
218
- if trace:
360
+ # Set execution flag to prevent recursive Stata calls
361
+ self._is_executing = True
362
+ try:
363
+ with self._temp_cwd(cwd):
364
+ with self._redirect_io() as (out_buf, err_buf):
219
365
  try:
220
- self.stata.run("set trace off")
221
- except Exception:
222
- pass
366
+ if trace:
367
+ self.stata.run("set trace on")
368
+ ret = self.stata.run(code, echo=echo)
369
+ if isinstance(ret, str) and ret:
370
+ ret_text = ret
371
+ except Exception as e:
372
+ exc = e
373
+ finally:
374
+ rc = self._read_return_code()
375
+ if trace:
376
+ try:
377
+ self.stata.run("set trace off")
378
+ except Exception:
379
+ pass
380
+ finally:
381
+ # Clear execution flag
382
+ self._is_executing = False
223
383
 
224
384
  stdout = out_buf.getvalue()
385
+ # Some PyStata builds return output as a string rather than printing.
386
+ if (not stdout or not stdout.strip()) and ret_text:
387
+ stdout = ret_text
225
388
  stderr = err_buf.getvalue()
226
- # If no exception and stderr is empty, treat rc anomalies as success (e.g., spurious rc reads)
227
- if exc is None and (not stderr or not stderr.strip()):
389
+ combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
390
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
391
+ if exc is None and rc_hint is not None and rc_hint != 0:
392
+ # Prefer r(#) parsed from the current command output when present.
393
+ rc = rc_hint
394
+ # If no exception and stderr is empty and no r(#) is present, treat rc anomalies as success
395
+ # (e.g., stale/spurious c(rc) reads).
396
+ if exc is None and (not stderr or not stderr.strip()) and rc_hint is None:
228
397
  rc = 0 if rc is None or rc != 0 else rc
229
398
  success = rc == 0 and exc is None
230
399
  error = None
@@ -240,266 +409,1669 @@ class StataClient:
240
409
  duration * 1000,
241
410
  code_preview[:120],
242
411
  )
412
+ # Mutually exclusive - when error, output is in ErrorEnvelope only
243
413
  return CommandResponse(
244
414
  command=code,
245
415
  rc=rc,
246
- stdout=stdout,
247
- stderr=stderr or None,
416
+ stdout="" if not success else stdout,
417
+ stderr=None,
248
418
  success=success,
249
419
  error=error,
250
420
  )
251
421
 
252
- def run_command(self, code: str, echo: bool = True) -> str:
253
- """Runs a Stata command and returns raw output (legacy)."""
254
- result = self._exec_with_capture(code, echo=echo)
255
- if result.success:
256
- return result.stdout
257
- if result.error:
258
- return f"Error executing Stata code (r({result.error.rc})):\n{result.error.message}"
259
- return result.stdout or "Unknown Stata error"
422
+ def _exec_no_capture(self, code: str, echo: bool = False, trace: bool = False) -> CommandResponse:
423
+ """Execute Stata code while leaving stdout/stderr alone.
260
424
 
261
- def run_command_structured(self, code: str, echo: bool = True, trace: bool = False) -> CommandResponse:
262
- """Runs a Stata command and returns a structured envelope."""
263
- return self._exec_with_capture(code, echo=echo, trace=trace)
264
-
265
- def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
266
- """Returns valid JSON-serializable data."""
425
+ PyStata's output bridge uses its own thread and can misbehave on Windows
426
+ when we redirect stdio (e.g., graph export). This path keeps the normal
427
+ handlers and just reads rc afterward.
428
+ """
267
429
  if not self._initialized:
268
430
  self.init()
269
431
 
270
- if count > self.MAX_DATA_ROWS:
271
- count = self.MAX_DATA_ROWS
272
-
273
- try:
274
- # Use pystata integration to retrieve data
275
- df = self.stata.pdataframe_from_data()
432
+ exc: Optional[Exception] = None
433
+ ret_text: Optional[str] = None
434
+ with self._exec_lock:
435
+ try:
436
+ if trace:
437
+ self.stata.run("set trace on")
438
+ ret = self.stata.run(code, echo=echo)
439
+ if isinstance(ret, str) and ret:
440
+ ret_text = ret
441
+ except Exception as e:
442
+ exc = e
443
+ finally:
444
+ rc = self._read_return_code()
445
+ # If Stata returned an r(#) in text, prefer it.
446
+ combined = "\n".join(filter(None, [ret_text or "", str(exc) if exc else ""])).strip()
447
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
448
+ if exc is None and rc_hint is not None and rc_hint != 0:
449
+ rc = rc_hint
450
+ if exc is None and (rc is None or rc == -1) and rc_hint is None:
451
+ # Normalize spurious rc reads only when missing/invalid
452
+ rc = 0
453
+ if trace:
454
+ try:
455
+ self.stata.run("set trace off")
456
+ except Exception as e:
457
+ logger.warning("Failed to turn off Stata trace mode: %s", e)
276
458
 
277
- # Slice
278
- sliced = df.iloc[start : start + count]
459
+ stdout = ""
460
+ stderr = ""
461
+ success = rc == 0 and exc is None
462
+ error = None
463
+ if not success:
464
+ # Pass ret_text as stdout for snippet parsing.
465
+ error = self._build_error_envelope(code, rc, ret_text or "", stderr, exc, trace)
279
466
 
280
- # Convert to dict
281
- return sliced.to_dict(orient="records")
282
- except Exception as e:
283
- return [{"error": f"Failed to retrieve data: {e}"}]
467
+ return CommandResponse(
468
+ command=code,
469
+ rc=rc,
470
+ stdout=stdout,
471
+ stderr=None,
472
+ success=success,
473
+ error=error,
474
+ )
284
475
 
285
- def list_variables(self) -> List[Dict[str, str]]:
286
- """Returns list of variables with labels."""
476
+ async def run_command_streaming(
477
+ self,
478
+ code: str,
479
+ *,
480
+ notify_log: Callable[[str], Awaitable[None]],
481
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
482
+ echo: bool = True,
483
+ trace: bool = False,
484
+ max_output_lines: Optional[int] = None,
485
+ cwd: Optional[str] = None,
486
+ auto_cache_graphs: bool = False,
487
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
488
+ ) -> CommandResponse:
287
489
  if not self._initialized:
288
490
  self.init()
289
-
290
- # We can use sfi to be efficient
291
- from sfi import Data
292
- vars_info = []
293
- for i in range(Data.getVarCount()):
294
- var_index = i # 0-based
295
- name = Data.getVarName(var_index)
296
- label = Data.getVarLabel(var_index)
297
- type_str = Data.getVarType(var_index) # Returns int
298
-
299
- vars_info.append({
300
- "name": name,
301
- "label": label,
302
- "type": str(type_str),
303
- })
304
- return vars_info
305
491
 
306
- def get_variable_details(self, varname: str) -> str:
307
- """Returns codebook/summary for a specific variable."""
308
- return self.run_command(f"codebook {varname}")
492
+ code = self._maybe_rewrite_graph_name_in_command(code)
309
493
 
310
- def list_variables_structured(self) -> VariablesResponse:
311
- vars_info: List[VariableInfo] = []
312
- for item in self.list_variables():
313
- vars_info.append(
314
- VariableInfo(
315
- name=item.get("name", ""),
316
- label=item.get("label"),
317
- type=item.get("type"),
318
- )
494
+ if cwd is not None and not os.path.isdir(cwd):
495
+ return CommandResponse(
496
+ command=code,
497
+ rc=601,
498
+ stdout="",
499
+ stderr=None,
500
+ success=False,
501
+ error=ErrorEnvelope(
502
+ message=f"cwd not found: {cwd}",
503
+ rc=601,
504
+ command=code,
505
+ ),
319
506
  )
320
- return VariablesResponse(variables=vars_info)
321
507
 
322
- def list_graphs(self) -> List[str]:
323
- """Returns list of graphs in memory."""
324
- if not self._initialized:
325
- self.init()
326
-
327
- # 'graph dir' returns list in r(list)
328
- # We need to ensure we run it quietly so we don't spam.
329
- self.stata.run("quietly graph dir, memory")
330
-
331
- # Accessing r-class results in Python can be tricky via pystata's run command.
332
- # We stash the result in a global macro that python sfi can easily read.
333
- from sfi import Macro
334
- self.stata.run("global mcp_graph_list `r(list)'")
335
- graph_list_str = Macro.getGlobal("mcp_graph_list")
336
- if not graph_list_str:
337
- return []
338
-
339
- return graph_list_str.split()
508
+ start_time = time.time()
509
+ exc: Optional[Exception] = None
340
510
 
341
- def list_graphs_structured(self) -> GraphListResponse:
342
- names = self.list_graphs()
343
- active_name = names[-1] if names else None
344
- graphs = [GraphInfo(name=n, active=(n == active_name)) for n in names]
345
- return GraphListResponse(graphs=graphs)
511
+ # Setup streaming graph cache if enabled
512
+ graph_cache = None
513
+ if auto_cache_graphs:
514
+ graph_cache = StreamingGraphCache(self, auto_cache=True)
515
+
516
+ graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
517
+
518
+ graph_cache.add_cache_callback(graph_cache_callback)
346
519
 
347
- def export_graph(self, graph_name: str = None, filename: str = None, format: str = "pdf") -> str:
348
- """Exports graph to a temp file (pdf or png) and returns the path."""
349
- import tempfile
520
+ log_file = tempfile.NamedTemporaryFile(
521
+ prefix="mcp_stata_",
522
+ suffix=".log",
523
+ delete=False,
524
+ mode="w",
525
+ encoding="utf-8",
526
+ errors="replace",
527
+ buffering=1,
528
+ )
529
+ log_path = log_file.name
530
+ tail = TailBuffer(max_chars=8000)
531
+ tee = FileTeeIO(log_file, tail)
350
532
 
351
- fmt = (format or "pdf").strip().lower()
352
- if fmt not in {"pdf", "png"}:
353
- raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
533
+ # Inform the MCP client immediately where to read/tail the output.
534
+ await notify_log(json.dumps({"event": "log_path", "path": log_path}))
354
535
 
355
- if not filename:
356
- suffix = f".{fmt}"
357
- with tempfile.NamedTemporaryFile(prefix="mcp_stata_", suffix=suffix, delete=False) as tmp:
358
- filename = tmp.name
359
- else:
360
- # Ensure fresh start
361
- if os.path.exists(filename):
362
- try:
363
- os.remove(filename)
364
- except Exception:
365
- pass
366
-
367
- cmd = "graph export"
368
- if graph_name:
369
- cmd += f' "{filename}", name("{graph_name}") replace as({fmt})'
370
- else:
371
- cmd += f' "{filename}", replace as({fmt})'
372
-
373
- output = self.run_command(cmd)
374
-
375
- if os.path.exists(filename):
376
- try:
377
- size = os.path.getsize(filename)
378
- if size == 0:
379
- raise RuntimeError(f"Graph export failed: produced empty file {filename}")
380
- if size > self.MAX_GRAPH_BYTES:
381
- raise RuntimeError(
382
- f"Graph export failed: file too large (> {self.MAX_GRAPH_BYTES} bytes): {filename}"
383
- )
384
- except Exception as size_err:
385
- # Clean up oversized or unreadable files
386
- try:
387
- os.remove(filename)
388
- except Exception:
389
- pass
390
- raise size_err
391
- return filename
392
-
393
- # If file missing, it failed. Check output for details.
394
- raise RuntimeError(f"Graph export failed: {output}")
536
+ rc = -1
395
537
 
396
- def get_help(self, topic: str, plain_text: bool = False) -> str:
397
- """Returns help text as Markdown (default) or plain text."""
398
- if not self._initialized:
399
- self.init()
400
-
401
- # Try to locate the .sthlp help file
402
- # We use 'capture' to avoid crashing if not found
403
- self.stata.run(f"capture findfile {topic}.sthlp")
404
-
405
- # Retrieve the found path from r(fn)
406
- from sfi import Macro
407
- self.stata.run("global mcp_help_file `r(fn)'")
408
- fn = Macro.getGlobal("mcp_help_file")
409
-
410
- if fn and os.path.exists(fn):
538
+ def _run_blocking() -> None:
539
+ nonlocal rc, exc
540
+ with self._exec_lock:
541
+ with self._temp_cwd(cwd):
542
+ with self._redirect_io_streaming(tee, tee):
543
+ try:
544
+ if trace:
545
+ self.stata.run("set trace on")
546
+ ret = self.stata.run(code, echo=echo)
547
+ # Some PyStata builds return output as a string rather than printing.
548
+ if isinstance(ret, str) and ret:
549
+ try:
550
+ tee.write(ret)
551
+ except Exception:
552
+ pass
553
+ except Exception as e:
554
+ exc = e
555
+ finally:
556
+ rc = self._read_return_code()
557
+ if trace:
558
+ try:
559
+ self.stata.run("set trace off")
560
+ except Exception:
561
+ pass
562
+
563
+ try:
564
+ if notify_progress is not None:
565
+ await notify_progress(0, None, "Running Stata command")
566
+
567
+ await anyio.to_thread.run_sync(_run_blocking)
568
+ finally:
569
+ tee.close()
570
+
571
+ # Cache detected graphs after command completes
572
+ if graph_cache:
411
573
  try:
412
- with open(fn, 'r', encoding='utf-8', errors='replace') as f:
413
- smcl = f.read()
414
- if plain_text:
415
- return self._smcl_to_text(smcl)
416
- try:
417
- return smcl_to_markdown(smcl, adopath=os.path.dirname(fn), current_file=os.path.splitext(os.path.basename(fn))[0])
418
- except Exception as parse_err:
419
- logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
420
- return self._smcl_to_text(smcl)
574
+ # Use the enhanced pystata-integrated caching method
575
+ if hasattr(graph_cache, 'cache_detected_graphs_with_pystata'):
576
+ cached_graphs = await graph_cache.cache_detected_graphs_with_pystata()
577
+ else:
578
+ cached_graphs = await graph_cache.cache_detected_graphs()
579
+
580
+ if cached_graphs and notify_progress:
581
+ await notify_progress(1, 1, f"Command completed. Cached {len(cached_graphs)} graphs: {', '.join(cached_graphs)}")
421
582
  except Exception as e:
422
- return f"Error reading help file at {fn}: {e}"
583
+ logger.warning(f"Failed to cache detected graphs: {e}")
423
584
 
424
- # Fallback to URL if file not found
425
- return f"Help file for '{topic}' not found. Please consult: https://www.stata.com/help.cgi?{topic}"
585
+ tail_text = tail.get_value()
586
+ combined = (tail_text or "") + (f"\n{exc}" if exc else "")
587
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
588
+ if exc is None and rc_hint is not None and rc_hint != 0:
589
+ rc = rc_hint
590
+ if exc is None and rc_hint is None:
591
+ rc = 0 if rc is None or rc != 0 else rc
592
+ success = rc == 0 and exc is None
593
+ error = None
594
+ if not success:
595
+ snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
596
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
597
+ rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
598
+ line_no = self._parse_line_from_text(combined) if combined else None
599
+ message = "Stata error"
600
+ if tail_text and tail_text.strip():
601
+ for line in reversed(tail_text.splitlines()):
602
+ if line.strip():
603
+ message = line.strip()
604
+ break
605
+ elif exc is not None:
606
+ message = str(exc).strip() or message
607
+
608
+ error = ErrorEnvelope(
609
+ message=message,
610
+ rc=rc_final,
611
+ line=line_no,
612
+ command=code,
613
+ log_path=log_path,
614
+ snippet=snippet,
615
+ trace=trace or None,
616
+ )
617
+
618
+ duration = time.time() - start_time
619
+ code_preview = code.replace("\n", "\\n")
620
+ logger.info(
621
+ "stata.run(stream) rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
622
+ rc,
623
+ success,
624
+ trace,
625
+ duration * 1000,
626
+ code_preview[:120],
627
+ )
628
+
629
+ result = CommandResponse(
630
+ command=code,
631
+ rc=rc,
632
+ stdout="",
633
+ stderr=None,
634
+ log_path=log_path,
635
+ success=success,
636
+ error=error,
637
+ )
638
+
639
+ if notify_progress is not None:
640
+ await notify_progress(1, 1, "Finished")
641
+
642
+ return result
643
+
644
+ def _count_do_file_lines(self, path: str) -> int:
645
+ try:
646
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
647
+ lines = f.read().splitlines()
648
+ except Exception:
649
+ return 0
650
+
651
+ total = 0
652
+ for line in lines:
653
+ s = line.strip()
654
+ if not s:
655
+ continue
656
+ if s.startswith("*"):
657
+ continue
658
+ if s.startswith("//"):
659
+ continue
660
+ total += 1
661
+ return total
662
+
663
+ async def run_do_file_streaming(
664
+ self,
665
+ path: str,
666
+ *,
667
+ notify_log: Callable[[str], Awaitable[None]],
668
+ notify_progress: Optional[Callable[[float, Optional[float], Optional[str]], Awaitable[None]]] = None,
669
+ echo: bool = True,
670
+ trace: bool = False,
671
+ max_output_lines: Optional[int] = None,
672
+ cwd: Optional[str] = None,
673
+ auto_cache_graphs: bool = False,
674
+ on_graph_cached: Optional[Callable[[str, bool], Awaitable[None]]] = None,
675
+ ) -> CommandResponse:
676
+ if cwd is not None and not os.path.isdir(cwd):
677
+ return CommandResponse(
678
+ command=f'do "{path}"',
679
+ rc=601,
680
+ stdout="",
681
+ stderr=None,
682
+ success=False,
683
+ error=ErrorEnvelope(
684
+ message=f"cwd not found: {cwd}",
685
+ rc=601,
686
+ command=path,
687
+ ),
688
+ )
689
+
690
+ effective_path = path
691
+ if cwd is not None and not os.path.isabs(path):
692
+ effective_path = os.path.abspath(os.path.join(cwd, path))
693
+
694
+ if not os.path.exists(effective_path):
695
+ return CommandResponse(
696
+ command=f'do "{effective_path}"',
697
+ rc=601,
698
+ stdout="",
699
+ stderr=None,
700
+ success=False,
701
+ error=ErrorEnvelope(
702
+ message=f"Do-file not found: {effective_path}",
703
+ rc=601,
704
+ command=effective_path,
705
+ ),
706
+ )
707
+
708
+ total_lines = self._count_do_file_lines(effective_path)
709
+ executed_lines = 0
710
+ last_progress_time = 0.0
711
+ dot_prompt = re.compile(r"^\.\s+\S")
712
+
713
+ async def on_chunk_for_progress(chunk: str) -> None:
714
+ nonlocal executed_lines, last_progress_time
715
+ if total_lines <= 0 or notify_progress is None:
716
+ return
717
+ for line in chunk.splitlines():
718
+ if dot_prompt.match(line):
719
+ executed_lines += 1
720
+ if executed_lines > total_lines:
721
+ executed_lines = total_lines
722
+
723
+ now = time.monotonic()
724
+ if executed_lines > 0 and (now - last_progress_time) >= 0.25:
725
+ last_progress_time = now
726
+ await notify_progress(
727
+ float(executed_lines),
728
+ float(total_lines),
729
+ f"Executing do-file: {executed_lines}/{total_lines}",
730
+ )
731
+
732
+ if not self._initialized:
733
+ self.init()
734
+
735
+ start_time = time.time()
736
+ exc: Optional[Exception] = None
737
+
738
+ # Setup streaming graph cache if enabled
739
+ graph_cache = None
740
+ if auto_cache_graphs:
741
+ graph_cache = StreamingGraphCache(self, auto_cache=True)
742
+
743
+ graph_cache_callback = self._create_graph_cache_callback(on_graph_cached, notify_log)
744
+
745
+ graph_cache.add_cache_callback(graph_cache_callback)
746
+
747
+ log_file = tempfile.NamedTemporaryFile(
748
+ prefix="mcp_stata_",
749
+ suffix=".log",
750
+ delete=False,
751
+ mode="w",
752
+ encoding="utf-8",
753
+ errors="replace",
754
+ buffering=1,
755
+ )
756
+ log_path = log_file.name
757
+ tail = TailBuffer(max_chars=8000)
758
+ tee = FileTeeIO(log_file, tail)
759
+
760
+ # Inform the MCP client immediately where to read/tail the output.
761
+ await notify_log(json.dumps({"event": "log_path", "path": log_path}))
762
+
763
+ rc = -1
764
+ path_for_stata = effective_path.replace("\\", "/")
765
+ command = f'do "{path_for_stata}"'
766
+
767
+ # Capture initial graph state BEFORE execution starts
768
+ # This allows post-execution detection to identify new graphs
769
+ if graph_cache:
770
+ try:
771
+ graph_cache._initial_graphs = set(self.list_graphs())
772
+ logger.debug(f"Initial graph state captured: {graph_cache._initial_graphs}")
773
+ except Exception as e:
774
+ logger.debug(f"Failed to capture initial graph state: {e}")
775
+ graph_cache._initial_graphs = set()
776
+
777
+ def _run_blocking() -> None:
778
+ nonlocal rc, exc
779
+ with self._exec_lock:
780
+ # Set execution flag to prevent recursive Stata calls
781
+ self._is_executing = True
782
+ try:
783
+ with self._temp_cwd(cwd):
784
+ with self._redirect_io_streaming(tee, tee):
785
+ try:
786
+ if trace:
787
+ self.stata.run("set trace on")
788
+ ret = self.stata.run(command, echo=echo)
789
+ # Some PyStata builds return output as a string rather than printing.
790
+ if isinstance(ret, str) and ret:
791
+ try:
792
+ tee.write(ret)
793
+ except Exception:
794
+ pass
795
+ except Exception as e:
796
+ exc = e
797
+ finally:
798
+ rc = self._read_return_code()
799
+ if trace:
800
+ try:
801
+ self.stata.run("set trace off")
802
+ except Exception:
803
+ pass
804
+ finally:
805
+ # Clear execution flag
806
+ self._is_executing = False
807
+
808
+ done = anyio.Event()
809
+
810
+ async def _monitor_progress_from_log() -> None:
811
+ if notify_progress is None or total_lines <= 0:
812
+ return
813
+ last_pos = 0
814
+ try:
815
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
816
+ while not done.is_set():
817
+ f.seek(last_pos)
818
+ chunk = f.read()
819
+ if chunk:
820
+ last_pos = f.tell()
821
+ await on_chunk_for_progress(chunk)
822
+ await anyio.sleep(0.05)
823
+
824
+ f.seek(last_pos)
825
+ chunk = f.read()
826
+ if chunk:
827
+ await on_chunk_for_progress(chunk)
828
+ except Exception:
829
+ return
830
+
831
+ async with anyio.create_task_group() as tg:
832
+ tg.start_soon(_monitor_progress_from_log)
833
+
834
+ if notify_progress is not None:
835
+ if total_lines > 0:
836
+ await notify_progress(0, float(total_lines), f"Executing do-file: 0/{total_lines}")
837
+ else:
838
+ await notify_progress(0, None, "Running do-file")
839
+
840
+ try:
841
+ await anyio.to_thread.run_sync(_run_blocking)
842
+ finally:
843
+ done.set()
844
+ tee.close()
845
+
846
+ # Robust post-execution graph detection and caching
847
+ # This is the ONLY place where graphs are detected and cached
848
+ # Runs after execution completes, when it's safe to call list_graphs()
849
+ if graph_cache and graph_cache.auto_cache:
850
+ cached_graphs = []
851
+ try:
852
+ # Get initial state (before execution)
853
+ initial_graphs = getattr(graph_cache, '_initial_graphs', set())
854
+
855
+ # Get current state (after execution)
856
+ logger.debug("Post-execution: Querying graph state via list_graphs()")
857
+ current_graphs = set(self.list_graphs())
858
+
859
+ # Detect new graphs (created during execution)
860
+ new_graphs = current_graphs - initial_graphs - graph_cache._cached_graphs
861
+
862
+ if new_graphs:
863
+ logger.info(f"Detected {len(new_graphs)} new graph(s): {sorted(new_graphs)}")
864
+
865
+ # Cache each detected graph
866
+ for graph_name in new_graphs:
867
+ try:
868
+ logger.debug(f"Caching graph: {graph_name}")
869
+ cache_result = await anyio.to_thread.run_sync(
870
+ self.cache_graph_on_creation,
871
+ graph_name
872
+ )
873
+
874
+ if cache_result:
875
+ cached_graphs.append(graph_name)
876
+ graph_cache._cached_graphs.add(graph_name)
877
+ logger.debug(f"Successfully cached graph: {graph_name}")
878
+ else:
879
+ logger.warning(f"Failed to cache graph: {graph_name}")
880
+
881
+ # Trigger callbacks
882
+ for callback in graph_cache._cache_callbacks:
883
+ try:
884
+ await anyio.to_thread.run_sync(callback, graph_name, cache_result)
885
+ except Exception as e:
886
+ logger.debug(f"Callback failed for {graph_name}: {e}")
887
+
888
+ except Exception as e:
889
+ logger.error(f"Error caching graph {graph_name}: {e}")
890
+ # Trigger callbacks with failure
891
+ for callback in graph_cache._cache_callbacks:
892
+ try:
893
+ await anyio.to_thread.run_sync(callback, graph_name, False)
894
+ except Exception:
895
+ pass
896
+
897
+ # Check for dropped graphs (for completeness)
898
+ dropped_graphs = initial_graphs - current_graphs
899
+ if dropped_graphs:
900
+ logger.debug(f"Graphs dropped during execution: {sorted(dropped_graphs)}")
901
+ for graph_name in dropped_graphs:
902
+ try:
903
+ self.invalidate_graph_cache(graph_name)
904
+ except Exception:
905
+ pass
906
+
907
+ # Notify progress if graphs were cached
908
+ if cached_graphs and notify_progress:
909
+ await notify_progress(
910
+ float(total_lines) if total_lines > 0 else 1,
911
+ float(total_lines) if total_lines > 0 else 1,
912
+ f"Do-file completed. Cached {len(cached_graphs)} graph(s): {', '.join(cached_graphs)}"
913
+ )
914
+
915
+ except Exception as e:
916
+ logger.error(f"Post-execution graph detection failed: {e}")
917
+
918
+ tail_text = tail.get_value()
919
+ combined = (tail_text or "") + (f"\n{exc}" if exc else "")
920
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
921
+ if exc is None and rc_hint is not None and rc_hint != 0:
922
+ rc = rc_hint
923
+ if exc is None and rc_hint is None:
924
+ rc = 0 if rc is None or rc != 0 else rc
925
+ success = rc == 0 and exc is None
926
+ error = None
927
+ if not success:
928
+ snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
929
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
930
+ rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
931
+ line_no = self._parse_line_from_text(combined) if combined else None
932
+ message = "Stata error"
933
+ if tail_text and tail_text.strip():
934
+ for line in reversed(tail_text.splitlines()):
935
+ if line.strip():
936
+ message = line.strip()
937
+ break
938
+ elif exc is not None:
939
+ message = str(exc).strip() or message
940
+
941
+ error = ErrorEnvelope(
942
+ message=message,
943
+ rc=rc_final,
944
+ line=line_no,
945
+ command=command,
946
+ log_path=log_path,
947
+ snippet=snippet,
948
+ trace=trace or None,
949
+ )
950
+
951
+ duration = time.time() - start_time
952
+ logger.info(
953
+ "stata.run(do stream) rc=%s success=%s trace=%s duration_ms=%.2f path=%s",
954
+ rc,
955
+ success,
956
+ trace,
957
+ duration * 1000,
958
+ effective_path,
959
+ )
960
+
961
+ result = CommandResponse(
962
+ command=command,
963
+ rc=rc,
964
+ stdout="",
965
+ stderr=None,
966
+ log_path=log_path,
967
+ success=success,
968
+ error=error,
969
+ )
970
+
971
+ if notify_progress is not None:
972
+ if total_lines > 0:
973
+ await notify_progress(float(total_lines), float(total_lines), f"Executing do-file: {total_lines}/{total_lines}")
974
+ else:
975
+ await notify_progress(1, 1, "Finished")
976
+
977
+ return result
978
+
979
+ def run_command_structured(self, code: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
980
+ """Runs a Stata command and returns a structured envelope.
981
+
982
+ Args:
983
+ code: The Stata command to execute.
984
+ echo: If True, the command itself is included in the output.
985
+ trace: If True, enables trace mode for debugging.
986
+ max_output_lines: If set, truncates stdout to this many lines (token efficiency).
987
+ """
988
+ result = self._exec_with_capture(code, echo=echo, trace=trace, cwd=cwd)
989
+
990
+ # Truncate stdout if requested
991
+ if max_output_lines is not None and result.stdout:
992
+ lines = result.stdout.splitlines()
993
+ if len(lines) > max_output_lines:
994
+ truncated_lines = lines[:max_output_lines]
995
+ truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
996
+ result = CommandResponse(
997
+ command=result.command,
998
+ rc=result.rc,
999
+ stdout="\n".join(truncated_lines),
1000
+ stderr=result.stderr,
1001
+ success=result.success,
1002
+ error=result.error,
1003
+ )
1004
+
1005
+ return result
1006
+
1007
+ def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
1008
+ """Returns valid JSON-serializable data."""
1009
+ if not self._initialized:
1010
+ self.init()
1011
+
1012
+ if count > self.MAX_DATA_ROWS:
1013
+ count = self.MAX_DATA_ROWS
1014
+
1015
+ try:
1016
+ # Use pystata integration to retrieve data
1017
+ df = self.stata.pdataframe_from_data()
1018
+
1019
+ # Slice
1020
+ sliced = df.iloc[start : start + count]
1021
+
1022
+ # Convert to dict
1023
+ return sliced.to_dict(orient="records")
1024
+ except Exception as e:
1025
+ return [{"error": f"Failed to retrieve data: {e}"}]
1026
+
1027
+ def list_variables(self) -> List[Dict[str, str]]:
1028
+ """Returns list of variables with labels."""
1029
+ if not self._initialized:
1030
+ self.init()
1031
+
1032
+ # We can use sfi to be efficient
1033
+ from sfi import Data # type: ignore[import-not-found]
1034
+ vars_info = []
1035
+ for i in range(Data.getVarCount()):
1036
+ var_index = i # 0-based
1037
+ name = Data.getVarName(var_index)
1038
+ label = Data.getVarLabel(var_index)
1039
+ type_str = Data.getVarType(var_index) # Returns int
1040
+
1041
+ vars_info.append({
1042
+ "name": name,
1043
+ "label": label,
1044
+ "type": str(type_str),
1045
+ })
1046
+ return vars_info
1047
+
1048
+ def get_dataset_state(self) -> Dict[str, Any]:
1049
+ """Return basic dataset state without mutating the dataset."""
1050
+ if not self._initialized:
1051
+ self.init()
1052
+
1053
+ from sfi import Data, Macro # type: ignore[import-not-found]
1054
+
1055
+ n = int(Data.getObsTotal())
1056
+ k = int(Data.getVarCount())
1057
+
1058
+ frame = "default"
1059
+ sortlist = ""
1060
+ changed = False
1061
+ try:
1062
+ frame = str(Macro.getCValue("frame") or "default")
1063
+ except Exception:
1064
+ frame = "default"
1065
+ try:
1066
+ sortlist = str(Macro.getCValue("sortlist") or "")
1067
+ except Exception:
1068
+ sortlist = ""
1069
+ try:
1070
+ changed = bool(int(float(Macro.getCValue("changed") or "0")))
1071
+ except Exception:
1072
+ changed = False
1073
+
1074
+ return {"frame": frame, "n": n, "k": k, "sortlist": sortlist, "changed": changed}
1075
+
1076
+ def _require_data_in_memory(self) -> None:
1077
+ state = self.get_dataset_state()
1078
+ if int(state.get("k", 0) or 0) == 0 and int(state.get("n", 0) or 0) == 0:
1079
+ # Stata empty dataset could still have k>0 n==0; treat that as ok.
1080
+ raise RuntimeError("No data in memory")
1081
+
1082
+ def _get_var_index_map(self) -> Dict[str, int]:
1083
+ from sfi import Data # type: ignore[import-not-found]
1084
+
1085
+ out: Dict[str, int] = {}
1086
+ for i in range(int(Data.getVarCount())):
1087
+ try:
1088
+ out[str(Data.getVarName(i))] = i
1089
+ except Exception:
1090
+ continue
1091
+ return out
1092
+
1093
+ def list_variables_rich(self) -> List[Dict[str, Any]]:
1094
+ """Return variable metadata (name/type/label/format/valueLabel) without modifying the dataset."""
1095
+ if not self._initialized:
1096
+ self.init()
1097
+
1098
+ from sfi import Data # type: ignore[import-not-found]
1099
+
1100
+ vars_info: List[Dict[str, Any]] = []
1101
+ for i in range(int(Data.getVarCount())):
1102
+ name = str(Data.getVarName(i))
1103
+ label = None
1104
+ fmt = None
1105
+ vtype = None
1106
+ value_label = None
1107
+ try:
1108
+ label = Data.getVarLabel(i)
1109
+ except Exception:
1110
+ label = None
1111
+ try:
1112
+ fmt = Data.getVarFormat(i)
1113
+ except Exception:
1114
+ fmt = None
1115
+ try:
1116
+ vtype = Data.getVarType(i)
1117
+ except Exception:
1118
+ vtype = None
1119
+
1120
+ vars_info.append(
1121
+ {
1122
+ "name": name,
1123
+ "type": str(vtype) if vtype is not None else None,
1124
+ "label": label if label else None,
1125
+ "format": fmt if fmt else None,
1126
+ "valueLabel": value_label,
1127
+ }
1128
+ )
1129
+ return vars_info
1130
+
1131
+ @staticmethod
1132
+ def _is_stata_missing(value: Any) -> bool:
1133
+ if value is None:
1134
+ return True
1135
+ if isinstance(value, float):
1136
+ # Stata missing values typically show up as very large floats via sfi.Data.get
1137
+ return value > 8.0e307
1138
+ return False
1139
+
1140
+ def _normalize_cell(self, value: Any, *, max_chars: int) -> tuple[Any, bool]:
1141
+ if self._is_stata_missing(value):
1142
+ return ".", False
1143
+ if isinstance(value, str):
1144
+ if len(value) > max_chars:
1145
+ return value[:max_chars], True
1146
+ return value, False
1147
+ return value, False
1148
+
1149
+ def get_page(
1150
+ self,
1151
+ *,
1152
+ offset: int,
1153
+ limit: int,
1154
+ vars: List[str],
1155
+ include_obs_no: bool,
1156
+ max_chars: int,
1157
+ obs_indices: Optional[List[int]] = None,
1158
+ ) -> Dict[str, Any]:
1159
+ if not self._initialized:
1160
+ self.init()
1161
+
1162
+ from sfi import Data # type: ignore[import-not-found]
1163
+
1164
+ state = self.get_dataset_state()
1165
+ n = int(state.get("n", 0) or 0)
1166
+ k = int(state.get("k", 0) or 0)
1167
+ if k == 0 and n == 0:
1168
+ raise RuntimeError("No data in memory")
1169
+
1170
+ var_map = self._get_var_index_map()
1171
+ for v in vars:
1172
+ if v not in var_map:
1173
+ raise ValueError(f"Invalid variable: {v}")
1174
+
1175
+ if obs_indices is None:
1176
+ start = offset
1177
+ end = min(offset + limit, n)
1178
+ if start >= n:
1179
+ rows: list[list[Any]] = []
1180
+ returned = 0
1181
+ obs_list: list[int] = []
1182
+ else:
1183
+ obs_list = list(range(start, end))
1184
+ raw_rows = Data.get(var=vars, obs=obs_list)
1185
+ rows = raw_rows
1186
+ returned = len(rows)
1187
+ else:
1188
+ start = offset
1189
+ end = min(offset + limit, len(obs_indices))
1190
+ obs_list = obs_indices[start:end]
1191
+ raw_rows = Data.get(var=vars, obs=obs_list) if obs_list else []
1192
+ rows = raw_rows
1193
+ returned = len(rows)
1194
+
1195
+ out_vars = list(vars)
1196
+ out_rows: list[list[Any]] = []
1197
+ truncated_cells = 0
1198
+
1199
+ if include_obs_no:
1200
+ out_vars = ["_n"] + out_vars
1201
+
1202
+ for idx, raw in enumerate(rows):
1203
+ norm_row: list[Any] = []
1204
+ if include_obs_no:
1205
+ norm_row.append(int(obs_list[idx]) + 1)
1206
+ for cell in raw:
1207
+ norm, truncated = self._normalize_cell(cell, max_chars=max_chars)
1208
+ if truncated:
1209
+ truncated_cells += 1
1210
+ norm_row.append(norm)
1211
+ out_rows.append(norm_row)
1212
+
1213
+ return {
1214
+ "vars": out_vars,
1215
+ "rows": out_rows,
1216
+ "returned": returned,
1217
+ "truncated_cells": truncated_cells,
1218
+ }
1219
+
1220
+ _FILTER_IDENT = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b")
1221
+
1222
+ def _extract_filter_vars(self, filter_expr: str) -> List[str]:
1223
+ tokens = set(self._FILTER_IDENT.findall(filter_expr or ""))
1224
+ # Exclude python keywords we might inject.
1225
+ exclude = {"and", "or", "not", "True", "False", "None"}
1226
+ var_map = self._get_var_index_map()
1227
+ vars_used = [t for t in tokens if t not in exclude and t in var_map]
1228
+ return sorted(vars_used)
1229
+
1230
+ def _compile_filter_expr(self, filter_expr: str) -> Any:
1231
+ expr = (filter_expr or "").strip()
1232
+ if not expr:
1233
+ raise ValueError("Empty filter")
1234
+
1235
+ # Stata boolean operators.
1236
+ expr = expr.replace("&", " and ").replace("|", " or ")
1237
+
1238
+ # Replace missing literal '.' (but not numeric decimals like 0.5).
1239
+ expr = re.sub(r"(?<![0-9])\.(?![0-9A-Za-z_])", "None", expr)
1240
+
1241
+ try:
1242
+ return compile(expr, "<filterExpr>", "eval")
1243
+ except Exception as e:
1244
+ raise ValueError(f"Invalid filter expression: {e}")
1245
+
1246
+ def validate_filter_expr(self, filter_expr: str) -> None:
1247
+ if not self._initialized:
1248
+ self.init()
1249
+ state = self.get_dataset_state()
1250
+ if int(state.get("k", 0) or 0) == 0 and int(state.get("n", 0) or 0) == 0:
1251
+ raise RuntimeError("No data in memory")
1252
+
1253
+ vars_used = self._extract_filter_vars(filter_expr)
1254
+ if not vars_used:
1255
+ # still allow constant expressions like "1" or "True"
1256
+ self._compile_filter_expr(filter_expr)
1257
+ return
1258
+ self._compile_filter_expr(filter_expr)
1259
+
1260
+ def compute_view_indices(self, filter_expr: str, *, chunk_size: int = 5000) -> List[int]:
1261
+ if not self._initialized:
1262
+ self.init()
1263
+
1264
+ from sfi import Data # type: ignore[import-not-found]
1265
+
1266
+ state = self.get_dataset_state()
1267
+ n = int(state.get("n", 0) or 0)
1268
+ k = int(state.get("k", 0) or 0)
1269
+ if k == 0 and n == 0:
1270
+ raise RuntimeError("No data in memory")
1271
+
1272
+ vars_used = self._extract_filter_vars(filter_expr)
1273
+ code = self._compile_filter_expr(filter_expr)
1274
+ _ = self._get_var_index_map()
1275
+
1276
+ indices: List[int] = []
1277
+ for start in range(0, n, chunk_size):
1278
+ end = min(start + chunk_size, n)
1279
+ obs_list = list(range(start, end))
1280
+ raw_rows = Data.get(var=vars_used, obs=obs_list) if vars_used else [[None] for _ in obs_list]
1281
+
1282
+ for row_i, obs in enumerate(obs_list):
1283
+ env: Dict[str, Any] = {}
1284
+ if vars_used:
1285
+ for j, v in enumerate(vars_used):
1286
+ val = raw_rows[row_i][j]
1287
+ env[v] = None if self._is_stata_missing(val) else val
1288
+
1289
+ ok = False
1290
+ try:
1291
+ ok = bool(eval(code, {"__builtins__": {}}, env))
1292
+ except NameError as e:
1293
+ raise ValueError(f"Invalid filter: {e}")
1294
+ except Exception as e:
1295
+ raise ValueError(f"Invalid filter: {e}")
1296
+
1297
+ if ok:
1298
+ indices.append(int(obs))
1299
+
1300
+ return indices
1301
+
1302
+ def get_variable_details(self, varname: str) -> str:
1303
+ """Returns codebook/summary for a specific variable."""
1304
+ resp = self.run_command_structured(f"codebook {varname}", echo=True)
1305
+ if resp.success:
1306
+ return resp.stdout
1307
+ if resp.error:
1308
+ return resp.error.message
1309
+ return ""
1310
+
1311
+ def list_variables_structured(self) -> VariablesResponse:
1312
+ vars_info: List[VariableInfo] = []
1313
+ for item in self.list_variables():
1314
+ vars_info.append(
1315
+ VariableInfo(
1316
+ name=item.get("name", ""),
1317
+ label=item.get("label"),
1318
+ type=item.get("type"),
1319
+ )
1320
+ )
1321
+ return VariablesResponse(variables=vars_info)
1322
+
1323
+ def list_graphs(self, *, force_refresh: bool = False) -> List[str]:
1324
+ """Returns list of graphs in memory with TTL caching."""
1325
+ if not self._initialized:
1326
+ self.init()
1327
+
1328
+ import time
1329
+
1330
+ # Prevent recursive Stata calls - if we're already executing, return cached or empty
1331
+ if self._is_executing:
1332
+ with self._list_graphs_cache_lock:
1333
+ if self._list_graphs_cache is not None:
1334
+ logger.debug("Recursive list_graphs call prevented, returning cached value")
1335
+ return self._list_graphs_cache
1336
+ else:
1337
+ logger.debug("Recursive list_graphs call prevented, returning empty list")
1338
+ return []
1339
+
1340
+ # Check if cache is valid
1341
+ current_time = time.time()
1342
+ with self._list_graphs_cache_lock:
1343
+ if (not force_refresh and self._list_graphs_cache is not None and
1344
+ current_time - self._list_graphs_cache_time < self.LIST_GRAPHS_TTL):
1345
+ return self._list_graphs_cache
1346
+
1347
+ # Cache miss or expired, fetch fresh data
1348
+ try:
1349
+ # 'graph dir' returns list in r(list)
1350
+ # We need to ensure we run it quietly so we don't spam.
1351
+ self.stata.run("quietly graph dir, memory")
1352
+
1353
+ # Accessing r-class results in Python can be tricky via pystata's run command.
1354
+ # We stash the result in a global macro that python sfi can easily read.
1355
+ from sfi import Macro # type: ignore[import-not-found]
1356
+ self.stata.run("global mcp_graph_list `r(list)'")
1357
+ graph_list_str = Macro.getGlobal("mcp_graph_list")
1358
+ raw_list = graph_list_str.split() if graph_list_str else []
1359
+
1360
+ # Map internal Stata names back to user-facing names when we have an alias.
1361
+ reverse = getattr(self, "_graph_name_reverse", {})
1362
+ graph_list = [reverse.get(n, n) for n in raw_list]
1363
+
1364
+ result = graph_list
1365
+
1366
+ # Update cache
1367
+ with self._list_graphs_cache_lock:
1368
+ self._list_graphs_cache = result
1369
+ self._list_graphs_cache_time = current_time
1370
+
1371
+ return result
1372
+
1373
+ except Exception as e:
1374
+ # On error, return cached result if available, otherwise empty list
1375
+ with self._list_graphs_cache_lock:
1376
+ if self._list_graphs_cache is not None:
1377
+ logger.warning(f"list_graphs failed, returning cached result: {e}")
1378
+ return self._list_graphs_cache
1379
+ logger.warning(f"list_graphs failed, no cache available: {e}")
1380
+ return []
1381
+
1382
+ def list_graphs_structured(self) -> GraphListResponse:
1383
+ names = self.list_graphs()
1384
+ active_name = names[-1] if names else None
1385
+ graphs = [GraphInfo(name=n, active=(n == active_name)) for n in names]
1386
+ return GraphListResponse(graphs=graphs)
1387
+
1388
+ def invalidate_list_graphs_cache(self) -> None:
1389
+ """Invalidate the list_graphs cache to force fresh data on next call."""
1390
+ with self._list_graphs_cache_lock:
1391
+ self._list_graphs_cache = None
1392
+ self._list_graphs_cache_time = 0
1393
+
1394
+ def export_graph(self, graph_name: str = None, filename: str = None, format: str = "pdf") -> str:
1395
+ """Exports graph to a temp file (pdf or png) and returns the path.
1396
+
1397
+ On Windows, PyStata can crash when exporting PNGs directly. For PNG on
1398
+ Windows, we save the graph to .gph and invoke the Stata executable in
1399
+ batch mode to export the PNG out-of-process.
1400
+ """
1401
+ import tempfile
1402
+
1403
+ fmt = (format or "pdf").strip().lower()
1404
+ if fmt not in {"pdf", "png"}:
1405
+ raise ValueError(f"Unsupported graph export format: {format}. Allowed: pdf, png.")
1406
+
1407
+ if not filename:
1408
+ suffix = f".{fmt}"
1409
+ with tempfile.NamedTemporaryFile(prefix="mcp_stata_", suffix=suffix, delete=False) as tmp:
1410
+ filename = tmp.name
1411
+ else:
1412
+ # Ensure fresh start
1413
+ if os.path.exists(filename):
1414
+ try:
1415
+ os.remove(filename)
1416
+ except Exception:
1417
+ pass
1418
+
1419
+ # Keep the user-facing path as a normal absolute Windows path
1420
+ user_filename = os.path.abspath(filename)
1421
+
1422
+ if fmt == "png" and os.name == "nt":
1423
+ # 1) Save graph to a .gph file from the embedded session
1424
+ with tempfile.NamedTemporaryFile(prefix="mcp_stata_graph_", suffix=".gph", delete=False) as gph_tmp:
1425
+ gph_path = gph_tmp.name
1426
+ gph_path_for_stata = gph_path.replace("\\", "/")
1427
+ # Make the target graph current, then save without name() (which isn't accepted there)
1428
+ if graph_name:
1429
+ self._exec_no_capture(f'graph display "{graph_name}"', echo=False)
1430
+ save_cmd = f'graph save "{gph_path_for_stata}", replace'
1431
+ save_resp = self._exec_no_capture(save_cmd, echo=False)
1432
+ if not save_resp.success:
1433
+ msg = save_resp.error.message if save_resp.error else f"graph save failed (rc={save_resp.rc})"
1434
+ raise RuntimeError(msg)
1435
+
1436
+ # 2) Prepare a do-file to export PNG externally
1437
+ user_filename_fwd = user_filename.replace("\\", "/")
1438
+ do_lines = [
1439
+ f'graph use "{gph_path_for_stata}"',
1440
+ f'graph export "{user_filename_fwd}", replace as(png)',
1441
+ "exit",
1442
+ ]
1443
+ with tempfile.NamedTemporaryFile(prefix="mcp_stata_export_", suffix=".do", delete=False, mode="w", encoding="ascii") as do_tmp:
1444
+ do_tmp.write("\n".join(do_lines))
1445
+ do_path = do_tmp.name
1446
+
1447
+ stata_exe = getattr(self, "_stata_exec_path", None)
1448
+ if not stata_exe or not os.path.exists(stata_exe):
1449
+ raise RuntimeError("Stata executable path unavailable for PNG export")
1450
+
1451
+ workdir = os.path.dirname(do_path) or None
1452
+ log_path = os.path.splitext(do_path)[0] + ".log"
1453
+
1454
+ cmd = [stata_exe, "/e", "do", do_path]
1455
+ try:
1456
+ completed = subprocess.run(
1457
+ cmd,
1458
+ capture_output=True,
1459
+ text=True,
1460
+ timeout=30,
1461
+ cwd=workdir,
1462
+ )
1463
+ except subprocess.TimeoutExpired:
1464
+ raise RuntimeError("External Stata export timed out")
1465
+ finally:
1466
+ try:
1467
+ os.remove(do_path)
1468
+ except Exception:
1469
+ # Ignore errors during temporary do-file cleanup (file may not exist or be locked)
1470
+ logger.warning("Failed to remove temporary do-file: %s", do_path, exc_info=True)
1471
+
1472
+ try:
1473
+ os.remove(gph_path)
1474
+ except Exception:
1475
+ logger.warning("Failed to remove temporary graph file: %s", gph_path, exc_info=True)
1476
+
1477
+ try:
1478
+ if os.path.exists(log_path):
1479
+ os.remove(log_path)
1480
+ except Exception:
1481
+ logger.warning("Failed to remove temporary log file: %s", log_path, exc_info=True)
1482
+
1483
+ if completed.returncode != 0:
1484
+ err = completed.stderr.strip() or completed.stdout.strip() or str(completed.returncode)
1485
+ raise RuntimeError(f"External Stata export failed: {err}")
1486
+
1487
+ else:
1488
+ # Stata prefers forward slashes in its command parser on Windows
1489
+ filename_for_stata = user_filename.replace("\\", "/")
1490
+
1491
+ cmd = "graph export"
1492
+ if graph_name:
1493
+ resolved = self._resolve_graph_name_for_stata(graph_name)
1494
+ cmd += f' "{filename_for_stata}", name("{resolved}") replace as({fmt})'
1495
+ else:
1496
+ cmd += f' "{filename_for_stata}", replace as({fmt})'
1497
+
1498
+ # Avoid stdout/stderr redirection for graph export because PyStata's
1499
+ # output thread can crash on Windows when we swap stdio handles.
1500
+ resp = self._exec_no_capture(cmd, echo=False)
1501
+ if not resp.success:
1502
+ # Retry once after a short pause in case Stata had a transient file handle issue
1503
+ time.sleep(0.2)
1504
+ resp_retry = self._exec_no_capture(cmd, echo=False)
1505
+ if not resp_retry.success:
1506
+ msg = resp_retry.error.message if resp_retry.error else f"graph export failed (rc={resp_retry.rc})"
1507
+ raise RuntimeError(msg)
1508
+ resp = resp_retry
1509
+
1510
+ if os.path.exists(user_filename):
1511
+ try:
1512
+ size = os.path.getsize(user_filename)
1513
+ if size == 0:
1514
+ raise RuntimeError(f"Graph export failed: produced empty file {user_filename}")
1515
+ if size > self.MAX_GRAPH_BYTES:
1516
+ raise RuntimeError(
1517
+ f"Graph export failed: file too large (> {self.MAX_GRAPH_BYTES} bytes): {user_filename}"
1518
+ )
1519
+ except Exception as size_err:
1520
+ # Clean up oversized or unreadable files
1521
+ try:
1522
+ os.remove(user_filename)
1523
+ except Exception:
1524
+ pass
1525
+ raise size_err
1526
+ return user_filename
1527
+
1528
+ # If file missing, it failed. Check output for details.
1529
+ msg = resp.error.message if resp.error else "graph export failed: file missing"
1530
+ raise RuntimeError(msg)
1531
+
1532
+ def get_help(self, topic: str, plain_text: bool = False) -> str:
1533
+ """Returns help text as Markdown (default) or plain text."""
1534
+ if not self._initialized:
1535
+ self.init()
1536
+
1537
+ # Try to locate the .sthlp help file
1538
+ # We use 'capture' to avoid crashing if not found
1539
+ self.stata.run(f"capture findfile {topic}.sthlp")
1540
+
1541
+ # Retrieve the found path from r(fn)
1542
+ from sfi import Macro # type: ignore[import-not-found]
1543
+ self.stata.run("global mcp_help_file `r(fn)'")
1544
+ fn = Macro.getGlobal("mcp_help_file")
1545
+
1546
+ if fn and os.path.exists(fn):
1547
+ try:
1548
+ with open(fn, 'r', encoding='utf-8', errors='replace') as f:
1549
+ smcl = f.read()
1550
+ if plain_text:
1551
+ return self._smcl_to_text(smcl)
1552
+ try:
1553
+ return smcl_to_markdown(smcl, adopath=os.path.dirname(fn), current_file=os.path.splitext(os.path.basename(fn))[0])
1554
+ except Exception as parse_err:
1555
+ logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
1556
+ return self._smcl_to_text(smcl)
1557
+ except Exception as e:
1558
+ return f"Error reading help file at {fn}: {e}"
1559
+
1560
+ # Fallback to URL if file not found
1561
+ return f"Help file for '{topic}' not found. Please consult: https://www.stata.com/help.cgi?{topic}"
1562
+
1563
+ def get_stored_results(self) -> Dict[str, Any]:
1564
+ """Returns e() and r() results."""
1565
+ if not self._initialized:
1566
+ self.init()
426
1567
 
427
- def get_stored_results(self) -> Dict[str, Any]:
428
- """Returns e() and r() results."""
429
- if not self._initialized:
430
- self.init()
431
-
432
- from sfi import Scalar, Macro
433
-
434
1568
  results = {"r": {}, "e": {}}
435
-
1569
+
436
1570
  # We parse 'return list' output as there is no direct bulk export of stored results
437
- raw_r = self.run_command("return list")
438
- raw_e = self.run_command("ereturn list")
439
-
1571
+ raw_r_resp = self.run_command_structured("return list", echo=True)
1572
+ raw_e_resp = self.run_command_structured("ereturn list", echo=True)
1573
+ raw_r = raw_r_resp.stdout if raw_r_resp.success else (raw_r_resp.error.snippet if raw_r_resp.error else "")
1574
+ raw_e = raw_e_resp.stdout if raw_e_resp.success else (raw_e_resp.error.snippet if raw_e_resp.error else "")
1575
+
440
1576
  # Simple parser
441
1577
  def parse_list(text):
442
1578
  data = {}
443
1579
  # We don't strictly need to track sections if we check patterns
444
1580
  for line in text.splitlines():
445
1581
  line = line.strip()
446
- if not line: continue
447
-
1582
+ if not line:
1583
+ continue
1584
+
448
1585
  # scalars: r(name) = value
449
1586
  if "=" in line and ("r(" in line or "e(" in line):
450
1587
  try:
451
1588
  name_part, val_part = line.split("=", 1)
452
1589
  name_part = name_part.strip() # "r(mean)"
453
1590
  val_part = val_part.strip() # "6165.2..."
454
-
455
- # Extract just the name inside r(...) if desired,
456
- # or keep full key "r(mean)".
1591
+
1592
+ # Extract just the name inside r(...) if desired,
1593
+ # or keep full key "r(mean)".
457
1594
  # User likely wants "mean" inside "r" dict.
458
-
1595
+
459
1596
  if "(" in name_part and name_part.endswith(")"):
460
1597
  # r(mean) -> mean
461
1598
  start = name_part.find("(") + 1
462
1599
  end = name_part.find(")")
463
1600
  key = name_part[start:end]
464
1601
  data[key] = val_part
465
- except: pass
466
-
1602
+ except Exception:
1603
+ pass
1604
+
467
1605
  # macros: r(name) : "value"
468
1606
  elif ":" in line and ("r(" in line or "e(" in line):
469
- try:
1607
+ try:
470
1608
  name_part, val_part = line.split(":", 1)
471
1609
  name_part = name_part.strip()
472
1610
  val_part = val_part.strip().strip('"')
473
-
1611
+
474
1612
  if "(" in name_part and name_part.endswith(")"):
475
1613
  start = name_part.find("(") + 1
476
1614
  end = name_part.find(")")
477
1615
  key = name_part[start:end]
478
1616
  data[key] = val_part
479
- except: pass
1617
+ except Exception:
1618
+ pass
480
1619
  return data
481
-
1620
+
482
1621
  results["r"] = parse_list(raw_r)
483
1622
  results["e"] = parse_list(raw_e)
484
-
1623
+
485
1624
  return results
486
1625
 
487
- def export_graphs_all(self) -> GraphExportResponse:
488
- """Exports all graphs to base64-encoded strings."""
1626
+ def invalidate_graph_cache(self, graph_name: str = None) -> None:
1627
+ """Invalidate cache for specific graph or all graphs.
1628
+
1629
+ Args:
1630
+ graph_name: Specific graph name to invalidate. If None, clears all cache.
1631
+ """
1632
+ self._initialize_cache()
1633
+
1634
+ with self._cache_lock:
1635
+ if graph_name is None:
1636
+ # Clear all cache
1637
+ self._preemptive_cache.clear()
1638
+ else:
1639
+ # Clear specific graph cache
1640
+ if graph_name in self._preemptive_cache:
1641
+ del self._preemptive_cache[graph_name]
1642
+ # Also clear hash if present
1643
+ hash_key = f"{graph_name}_hash"
1644
+ if hash_key in self._preemptive_cache:
1645
+ del self._preemptive_cache[hash_key]
1646
+
1647
+ def _initialize_cache(self) -> None:
1648
+ """Initialize cache in a thread-safe manner."""
1649
+ import tempfile
1650
+ import threading
1651
+ import os
1652
+ import uuid
1653
+
1654
+ with StataClient._cache_init_lock: # Use class-level lock
1655
+ if not hasattr(self, '_cache_initialized'):
1656
+ self._preemptive_cache = {}
1657
+ self._cache_access_times = {} # Track access times for LRU
1658
+ self._cache_sizes = {} # Track individual cache item sizes
1659
+ self._total_cache_size = 0 # Track total cache size in bytes
1660
+ # Use unique identifier to avoid conflicts
1661
+ unique_id = f"preemptive_cache_{uuid.uuid4().hex[:8]}_{os.getpid()}"
1662
+ self._preemptive_cache_dir = tempfile.mkdtemp(prefix=unique_id)
1663
+ self._cache_lock = threading.Lock()
1664
+ self._cache_initialized = True
1665
+
1666
+ # Register cleanup function
1667
+ import atexit
1668
+ atexit.register(self._cleanup_cache)
1669
+ else:
1670
+ # Cache already initialized, but directory might have been removed.
1671
+ if (not hasattr(self, '_preemptive_cache_dir') or
1672
+ not self._preemptive_cache_dir or
1673
+ not os.path.isdir(self._preemptive_cache_dir)):
1674
+ unique_id = f"preemptive_cache_{uuid.uuid4().hex[:8]}_{os.getpid()}"
1675
+ self._preemptive_cache_dir = tempfile.mkdtemp(prefix=unique_id)
1676
+
1677
+ def _cleanup_cache(self) -> None:
1678
+ """Clean up cache directory and files."""
1679
+ import os
1680
+ import shutil
1681
+
1682
+ if hasattr(self, '_preemptive_cache_dir') and self._preemptive_cache_dir:
1683
+ try:
1684
+ shutil.rmtree(self._preemptive_cache_dir, ignore_errors=True)
1685
+ except Exception:
1686
+ pass # Best effort cleanup
1687
+
1688
+ if hasattr(self, '_preemptive_cache'):
1689
+ self._preemptive_cache.clear()
1690
+
1691
+ def _evict_cache_if_needed(self, new_item_size: int = 0) -> None:
1692
+ """
1693
+ Evict least recently used cache items if cache size limits are exceeded.
1694
+
1695
+ NOTE: The caller is responsible for holding ``self._cache_lock`` while
1696
+ invoking this method, so that eviction and subsequent cache insertion
1697
+ (if any) occur within a single critical section.
1698
+ """
1699
+ import time
1700
+
1701
+ # Check if we need to evict based on count or size
1702
+ needs_eviction = (
1703
+ len(self._preemptive_cache) > StataClient.MAX_CACHE_SIZE or
1704
+ self._total_cache_size + new_item_size > StataClient.MAX_CACHE_BYTES
1705
+ )
1706
+
1707
+ if not needs_eviction:
1708
+ return
1709
+
1710
+ # Sort by access time (oldest first)
1711
+ items_by_access = sorted(
1712
+ self._cache_access_times.items(),
1713
+ key=lambda x: x[1]
1714
+ )
1715
+
1716
+ evicted_count = 0
1717
+ for graph_name, access_time in items_by_access:
1718
+ if (len(self._preemptive_cache) < StataClient.MAX_CACHE_SIZE and
1719
+ self._total_cache_size + new_item_size <= StataClient.MAX_CACHE_BYTES):
1720
+ break
1721
+
1722
+ # Remove from cache
1723
+ if graph_name in self._preemptive_cache:
1724
+ cache_path = self._preemptive_cache[graph_name]
1725
+
1726
+ # Remove file
1727
+ try:
1728
+ if os.path.exists(cache_path):
1729
+ os.remove(cache_path)
1730
+ except Exception:
1731
+ pass
1732
+
1733
+ # Update tracking
1734
+ item_size = self._cache_sizes.get(graph_name, 0)
1735
+ del self._preemptive_cache[graph_name]
1736
+ del self._cache_access_times[graph_name]
1737
+ if graph_name in self._cache_sizes:
1738
+ del self._cache_sizes[graph_name]
1739
+ self._total_cache_size -= item_size
1740
+ evicted_count += 1
1741
+
1742
+ # Remove hash entry if exists
1743
+ hash_key = f"{graph_name}_hash"
1744
+ if hash_key in self._preemptive_cache:
1745
+ del self._preemptive_cache[hash_key]
1746
+
1747
+ if evicted_count > 0:
1748
+ logger.debug(f"Evicted {evicted_count} items from graph cache due to size limits")
1749
+
1750
+ def _get_content_hash(self, data: bytes) -> str:
1751
+ """Generate content hash for cache validation."""
1752
+ import hashlib
1753
+ return hashlib.md5(data).hexdigest()
1754
+
1755
+ def _sanitize_filename(self, name: str) -> str:
1756
+ """Sanitize graph name for safe file system usage."""
1757
+ import re
1758
+ # Remove or replace problematic characters
1759
+ safe_name = re.sub(r'[<>:"/\\|?*]', '_', name)
1760
+ safe_name = re.sub(r'[^\w\-_.]', '_', safe_name)
1761
+ # Limit length
1762
+ return safe_name[:100] if len(safe_name) > 100 else safe_name
1763
+
1764
+ def _validate_graph_exists(self, graph_name: str) -> bool:
1765
+ """Validate that graph still exists in Stata."""
1766
+ try:
1767
+ # First try to get graph list to verify existence
1768
+ graph_list = self.list_graphs(force_refresh=True)
1769
+ if graph_name not in graph_list:
1770
+ return False
1771
+
1772
+ # Additional validation by attempting to display the graph
1773
+ resolved = self._resolve_graph_name_for_stata(graph_name)
1774
+ cmd = f'graph display {resolved}'
1775
+ resp = self._exec_no_capture(cmd, echo=False)
1776
+ return resp.success
1777
+ except Exception:
1778
+ return False
1779
+
1780
+ def _is_cache_valid(self, graph_name: str, cache_path: str) -> bool:
1781
+ """Check if cached content is still valid."""
1782
+ try:
1783
+ # Get current graph content hash
1784
+ import tempfile
1785
+ import os
1786
+
1787
+ temp_dir = tempfile.gettempdir()
1788
+ temp_file = os.path.join(temp_dir, f"temp_{graph_name}_{os.getpid()}.svg")
1789
+
1790
+ resolved = self._resolve_graph_name_for_stata(graph_name)
1791
+ export_cmd = f'graph export "{temp_file.replace("\\\\", "/")}", name({resolved}) replace as(svg)'
1792
+ resp = self._exec_no_capture(export_cmd, echo=False)
1793
+
1794
+ if resp.success and os.path.exists(temp_file):
1795
+ with open(temp_file, 'rb') as f:
1796
+ current_data = f.read()
1797
+ os.remove(temp_file)
1798
+
1799
+ current_hash = self._get_content_hash(current_data)
1800
+ cached_hash = self._preemptive_cache.get(f"{graph_name}_hash")
1801
+
1802
+ return cached_hash == current_hash
1803
+ except Exception:
1804
+ pass
1805
+
1806
+ return False # Assume invalid if we can't verify
1807
+
1808
+ def export_graphs_all(self, use_base64: bool = False) -> GraphExportResponse:
1809
+ """Exports all graphs to file paths (default) or base64-encoded strings.
1810
+
1811
+ Args:
1812
+ use_base64: If True, returns base64-encoded images. If False (default),
1813
+ returns file paths to exported SVG files.
1814
+ """
489
1815
  exports: List[GraphExport] = []
490
- for name in self.list_graphs():
1816
+ graph_names = self.list_graphs(force_refresh=True)
1817
+
1818
+ if not graph_names:
1819
+ return GraphExportResponse(graphs=exports)
1820
+
1821
+ import tempfile
1822
+ import os
1823
+ import threading
1824
+ import base64
1825
+ import uuid
1826
+ import time
1827
+ import logging
1828
+
1829
+ # Initialize cache in thread-safe manner
1830
+ self._initialize_cache()
1831
+
1832
+ def _cache_keyed_svg_path(name: str) -> str:
1833
+ import hashlib
1834
+ safe_name = self._sanitize_filename(name)
1835
+ suffix = hashlib.md5((name or "").encode("utf-8")).hexdigest()[:8]
1836
+ return os.path.join(self._preemptive_cache_dir, f"{safe_name}_{suffix}.svg")
1837
+
1838
+ def _export_svg_bytes(name: str) -> bytes:
1839
+ resolved = self._resolve_graph_name_for_stata(name)
1840
+
1841
+ temp_dir = tempfile.gettempdir()
1842
+ safe_temp_name = self._sanitize_filename(name)
1843
+ unique_filename = f"{safe_temp_name}_{uuid.uuid4().hex[:8]}_{os.getpid()}_{int(time.time())}.svg"
1844
+ svg_path = os.path.join(temp_dir, unique_filename)
1845
+ svg_path_for_stata = svg_path.replace("\\", "/")
1846
+
1847
+ try:
1848
+ export_cmd = f'graph export "{svg_path_for_stata}", name({resolved}) replace as(svg)'
1849
+ export_resp = self._exec_no_capture(export_cmd, echo=False)
1850
+
1851
+ if not export_resp.success:
1852
+ display_cmd = f'graph display {resolved}'
1853
+ display_resp = self._exec_no_capture(display_cmd, echo=False)
1854
+ if display_resp.success:
1855
+ export_cmd2 = f'graph export "{svg_path_for_stata}", replace as(svg)'
1856
+ export_resp = self._exec_no_capture(export_cmd2, echo=False)
1857
+ else:
1858
+ export_resp = display_resp
1859
+
1860
+ if export_resp.success and os.path.exists(svg_path) and os.path.getsize(svg_path) > 0:
1861
+ with open(svg_path, "rb") as f:
1862
+ return f.read()
1863
+ error_msg = getattr(export_resp, 'error', 'Unknown error')
1864
+ raise RuntimeError(f"Failed to export graph {name}: {error_msg}")
1865
+ finally:
1866
+ if os.path.exists(svg_path):
1867
+ try:
1868
+ os.remove(svg_path)
1869
+ except OSError as e:
1870
+ logger.warning(f"Failed to cleanup temp file {svg_path}: {e}")
1871
+
1872
+ cached_graphs = {}
1873
+ uncached_graphs = []
1874
+ cache_errors = []
1875
+
1876
+ with self._cache_lock:
1877
+ for name in graph_names:
1878
+ if name in self._preemptive_cache:
1879
+ cached_path = self._preemptive_cache[name]
1880
+ if os.path.exists(cached_path) and os.path.getsize(cached_path) > 0:
1881
+ # Additional validation: check if graph content has changed
1882
+ if self._is_cache_valid(name, cached_path):
1883
+ cached_graphs[name] = cached_path
1884
+ else:
1885
+ uncached_graphs.append(name)
1886
+ # Remove stale cache entry
1887
+ del self._preemptive_cache[name]
1888
+ else:
1889
+ uncached_graphs.append(name)
1890
+ # Remove invalid cache entry
1891
+ if name in self._preemptive_cache:
1892
+ del self._preemptive_cache[name]
1893
+ else:
1894
+ uncached_graphs.append(name)
1895
+
1896
+ for name, cached_path in cached_graphs.items():
491
1897
  try:
492
- path = self.export_graph(name, format="png")
493
- with open(path, "rb") as f:
494
- b64 = base64.b64encode(f.read()).decode("ascii")
495
- exports.append(GraphExport(name=name, image_base64=b64))
1898
+ if use_base64:
1899
+ with open(cached_path, "rb") as f:
1900
+ svg_b64 = base64.b64encode(f.read()).decode("ascii")
1901
+ exports.append(GraphExport(name=name, image_base64=svg_b64))
1902
+ else:
1903
+ exports.append(GraphExport(name=name, file_path=cached_path))
496
1904
  except Exception as e:
497
- logger.warning("Failed to export graph '%s': %s", name, e)
498
- continue
1905
+ cache_errors.append(f"Failed to read cached graph {name}: {e}")
1906
+ # Fall back to uncached processing
1907
+ uncached_graphs.append(name)
1908
+
1909
+ if uncached_graphs:
1910
+ successful_graphs = []
1911
+ failed_graphs = []
1912
+ memory_results = {}
1913
+
1914
+ for name in uncached_graphs:
1915
+ try:
1916
+ svg_data = _export_svg_bytes(name)
1917
+ memory_results[name] = svg_data
1918
+ successful_graphs.append(name)
1919
+ except Exception as e:
1920
+ failed_graphs.append(name)
1921
+ cache_errors.append(f"Failed to cache graph {name}: {e}")
1922
+
1923
+ for name in successful_graphs:
1924
+ result = memory_results[name]
1925
+
1926
+ cache_path = _cache_keyed_svg_path(name)
1927
+
1928
+ try:
1929
+ with open(cache_path, 'wb') as f:
1930
+ f.write(result)
1931
+
1932
+ # Update cache with size tracking and eviction
1933
+ import time
1934
+ item_size = len(result)
1935
+ self._evict_cache_if_needed(item_size)
1936
+
1937
+ with self._cache_lock:
1938
+ self._preemptive_cache[name] = cache_path
1939
+ # Store content hash for validation
1940
+ self._preemptive_cache[f"{name}_hash"] = self._get_content_hash(result)
1941
+ # Update tracking
1942
+ self._cache_access_times[name] = time.time()
1943
+ self._cache_sizes[name] = item_size
1944
+ self._total_cache_size += item_size
1945
+
1946
+ if use_base64:
1947
+ svg_b64 = base64.b64encode(result).decode("ascii")
1948
+ exports.append(GraphExport(name=name, image_base64=svg_b64))
1949
+ else:
1950
+ exports.append(GraphExport(name=name, file_path=cache_path))
1951
+ except Exception as e:
1952
+ cache_errors.append(f"Failed to cache graph {name}: {e}")
1953
+ # Still return the result even if caching fails
1954
+ if use_base64:
1955
+ svg_b64 = base64.b64encode(result).decode("ascii")
1956
+ exports.append(GraphExport(name=name, image_base64=svg_b64))
1957
+ else:
1958
+ # Create temp file for immediate use
1959
+ safe_name = self._sanitize_filename(name)
1960
+ temp_path = os.path.join(tempfile.gettempdir(), f"{safe_name}_{uuid.uuid4().hex[:8]}.svg")
1961
+ with open(temp_path, 'wb') as f:
1962
+ f.write(result)
1963
+ exports.append(GraphExport(name=name, file_path=temp_path))
1964
+
1965
+ # Log errors if any occurred
1966
+ if cache_errors:
1967
+ logger = logging.getLogger(__name__)
1968
+ for error in cache_errors:
1969
+ logger.warning(error)
1970
+
499
1971
  return GraphExportResponse(graphs=exports)
500
1972
 
501
- def run_do_file(self, path: str, echo: bool = True, trace: bool = False) -> CommandResponse:
502
- if not os.path.exists(path):
1973
+ def cache_graph_on_creation(self, graph_name: str) -> bool:
1974
+ """Revolutionary method to cache a graph immediately after creation.
1975
+
1976
+ Call this method right after creating a graph to pre-emptively cache it.
1977
+ This eliminates all export wait time for future access.
1978
+
1979
+ Args:
1980
+ graph_name: Name of the graph to cache
1981
+
1982
+ Returns:
1983
+ True if caching succeeded, False otherwise
1984
+ """
1985
+ import os
1986
+ import logging
1987
+
1988
+ # Initialize cache in thread-safe manner
1989
+ self._initialize_cache()
1990
+
1991
+ # Invalidate list_graphs cache since a new graph was created
1992
+ self.invalidate_list_graphs_cache()
1993
+
1994
+ # Check if already cached and valid
1995
+ with self._cache_lock:
1996
+ if graph_name in self._preemptive_cache:
1997
+ cache_path = self._preemptive_cache[graph_name]
1998
+ if os.path.exists(cache_path) and os.path.getsize(cache_path) > 0:
1999
+ if self._is_cache_valid(graph_name, cache_path):
2000
+ # Update access time for LRU
2001
+ import time
2002
+ self._cache_access_times[graph_name] = time.time()
2003
+ return True
2004
+ else:
2005
+ # Remove stale cache entry
2006
+ del self._preemptive_cache[graph_name]
2007
+ if graph_name in self._cache_access_times:
2008
+ del self._cache_access_times[graph_name]
2009
+ if graph_name in self._cache_sizes:
2010
+ self._total_cache_size -= self._cache_sizes[graph_name]
2011
+ del self._cache_sizes[graph_name]
2012
+ # Remove hash entry if exists
2013
+ hash_key = f"{graph_name}_hash"
2014
+ if hash_key in self._preemptive_cache:
2015
+ del self._preemptive_cache[hash_key]
2016
+
2017
+ try:
2018
+ # Sanitize graph name for file system
2019
+ safe_name = self._sanitize_filename(graph_name)
2020
+ cache_path = os.path.join(self._preemptive_cache_dir, f"{safe_name}.svg")
2021
+ cache_path_for_stata = cache_path.replace("\\", "/")
2022
+
2023
+ resolved_graph_name = self._resolve_graph_name_for_stata(graph_name)
2024
+ graph_name_q = self._stata_quote(resolved_graph_name)
2025
+
2026
+ export_cmd = f'graph export "{cache_path_for_stata}", name({graph_name_q}) replace as(svg)'
2027
+ resp = self._exec_no_capture(export_cmd, echo=False)
2028
+
2029
+ # Fallback: some graph names (spaces, slashes, backslashes) can confuse
2030
+ # Stata's parser in name() even when the graph exists. In that case,
2031
+ # make the graph current, then export without name().
2032
+ if not resp.success:
2033
+ try:
2034
+ display_cmd = f'graph display {graph_name_q}'
2035
+ display_resp = self._exec_no_capture(display_cmd, echo=False)
2036
+ if display_resp.success:
2037
+ export_cmd2 = f'graph export "{cache_path_for_stata}", replace as(svg)'
2038
+ resp = self._exec_no_capture(export_cmd2, echo=False)
2039
+ except Exception:
2040
+ pass
2041
+
2042
+ if resp.success and os.path.exists(cache_path) and os.path.getsize(cache_path) > 0:
2043
+ # Read the data to compute hash
2044
+ with open(cache_path, 'rb') as f:
2045
+ data = f.read()
2046
+
2047
+ # Update cache with size tracking and eviction
2048
+ import time
2049
+ item_size = len(data)
2050
+ self._evict_cache_if_needed(item_size)
2051
+
2052
+ with self._cache_lock:
2053
+ self._preemptive_cache[graph_name] = cache_path
2054
+ # Store content hash for validation
2055
+ self._preemptive_cache[f"{graph_name}_hash"] = self._get_content_hash(data)
2056
+ # Update tracking
2057
+ self._cache_access_times[graph_name] = time.time()
2058
+ self._cache_sizes[graph_name] = item_size
2059
+ self._total_cache_size += item_size
2060
+
2061
+ return True
2062
+ else:
2063
+ error_msg = getattr(resp, 'error', 'Unknown error')
2064
+ logger = logging.getLogger(__name__)
2065
+ logger.warning(f"Failed to cache graph {graph_name}: {error_msg}")
2066
+
2067
+ except Exception as e:
2068
+ logger = logging.getLogger(__name__)
2069
+ logger.warning(f"Exception caching graph {graph_name}: {e}")
2070
+
2071
+ return False
2072
+
2073
+ def run_do_file(self, path: str, echo: bool = True, trace: bool = False, max_output_lines: Optional[int] = None, cwd: Optional[str] = None) -> CommandResponse:
2074
+ if cwd is not None and not os.path.isdir(cwd):
503
2075
  return CommandResponse(
504
2076
  command=f'do "{path}"',
505
2077
  rc=601,
@@ -507,14 +2079,133 @@ class StataClient:
507
2079
  stderr=None,
508
2080
  success=False,
509
2081
  error=ErrorEnvelope(
510
- message=f"Do-file not found: {path}",
2082
+ message=f"cwd not found: {cwd}",
511
2083
  rc=601,
512
2084
  command=path,
513
2085
  ),
514
2086
  )
515
- return self._exec_with_capture(f'do "{path}"', echo=echo, trace=trace)
516
2087
 
517
- def load_data(self, source: str, clear: bool = True) -> CommandResponse:
2088
+ effective_path = path
2089
+ if cwd is not None and not os.path.isabs(path):
2090
+ effective_path = os.path.abspath(os.path.join(cwd, path))
2091
+
2092
+ if not os.path.exists(effective_path):
2093
+ return CommandResponse(
2094
+ command=f'do "{effective_path}"',
2095
+ rc=601,
2096
+ stdout="",
2097
+ stderr=None,
2098
+ success=False,
2099
+ error=ErrorEnvelope(
2100
+ message=f"Do-file not found: {effective_path}",
2101
+ rc=601,
2102
+ command=effective_path,
2103
+ ),
2104
+ )
2105
+
2106
+ if not self._initialized:
2107
+ self.init()
2108
+
2109
+ start_time = time.time()
2110
+ exc: Optional[Exception] = None
2111
+ path_for_stata = effective_path.replace("\\", "/")
2112
+ command = f'do "{path_for_stata}"'
2113
+
2114
+ log_file = tempfile.NamedTemporaryFile(
2115
+ prefix="mcp_stata_",
2116
+ suffix=".log",
2117
+ delete=False,
2118
+ mode="w",
2119
+ encoding="utf-8",
2120
+ errors="replace",
2121
+ buffering=1,
2122
+ )
2123
+ log_path = log_file.name
2124
+ tail = TailBuffer(max_chars=8000)
2125
+ tee = FileTeeIO(log_file, tail)
2126
+
2127
+ rc = -1
2128
+
2129
+ with self._exec_lock:
2130
+ with self._temp_cwd(cwd):
2131
+ with self._redirect_io_streaming(tee, tee):
2132
+ try:
2133
+ if trace:
2134
+ self.stata.run("set trace on")
2135
+ ret = self.stata.run(command, echo=echo)
2136
+ # Some PyStata builds return output as a string rather than printing.
2137
+ if isinstance(ret, str) and ret:
2138
+ try:
2139
+ tee.write(ret)
2140
+ except Exception:
2141
+ pass
2142
+ except Exception as e:
2143
+ exc = e
2144
+ finally:
2145
+ rc = self._read_return_code()
2146
+ if trace:
2147
+ try:
2148
+ self.stata.run("set trace off")
2149
+ except Exception:
2150
+ pass
2151
+
2152
+ tee.close()
2153
+
2154
+ tail_text = tail.get_value()
2155
+ combined = (tail_text or "") + (f"\n{exc}" if exc else "")
2156
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
2157
+ if exc is None and rc_hint is not None and rc_hint != 0:
2158
+ rc = rc_hint
2159
+ if exc is None and rc_hint is None:
2160
+ rc = 0 if rc is None or rc != 0 else rc
2161
+ success = rc == 0 and exc is None
2162
+
2163
+ error = None
2164
+ if not success:
2165
+ snippet = (tail_text[-800:] if tail_text else None) or (str(exc) if exc else None)
2166
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
2167
+ rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
2168
+ line_no = self._parse_line_from_text(combined) if combined else None
2169
+ message = "Stata error"
2170
+ if tail_text and tail_text.strip():
2171
+ for line in reversed(tail_text.splitlines()):
2172
+ if line.strip():
2173
+ message = line.strip()
2174
+ break
2175
+ elif exc is not None:
2176
+ message = str(exc).strip() or message
2177
+
2178
+ error = ErrorEnvelope(
2179
+ message=message,
2180
+ rc=rc_final,
2181
+ line=line_no,
2182
+ command=command,
2183
+ log_path=log_path,
2184
+ snippet=snippet,
2185
+ trace=trace or None,
2186
+ )
2187
+
2188
+ duration = time.time() - start_time
2189
+ logger.info(
2190
+ "stata.run(do) rc=%s success=%s trace=%s duration_ms=%.2f path=%s",
2191
+ rc,
2192
+ success,
2193
+ trace,
2194
+ duration * 1000,
2195
+ effective_path,
2196
+ )
2197
+
2198
+ return CommandResponse(
2199
+ command=command,
2200
+ rc=rc,
2201
+ stdout="",
2202
+ stderr=None,
2203
+ log_path=log_path,
2204
+ success=success,
2205
+ error=error,
2206
+ )
2207
+
2208
+ def load_data(self, source: str, clear: bool = True, max_output_lines: Optional[int] = None) -> CommandResponse:
518
2209
  src = source.strip()
519
2210
  clear_suffix = ", clear" if clear else ""
520
2211
 
@@ -529,8 +2220,42 @@ class StataClient:
529
2220
  else:
530
2221
  cmd = f"sysuse {src}{clear_suffix}"
531
2222
 
532
- return self._exec_with_capture(cmd, echo=True, trace=False)
2223
+ result = self._exec_with_capture(cmd, echo=True, trace=False)
2224
+
2225
+ # Truncate stdout if requested
2226
+ if max_output_lines is not None and result.stdout:
2227
+ lines = result.stdout.splitlines()
2228
+ if len(lines) > max_output_lines:
2229
+ truncated_lines = lines[:max_output_lines]
2230
+ truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
2231
+ result = CommandResponse(
2232
+ command=result.command,
2233
+ rc=result.rc,
2234
+ stdout="\n".join(truncated_lines),
2235
+ stderr=result.stderr,
2236
+ success=result.success,
2237
+ error=result.error,
2238
+ )
2239
+
2240
+ return result
2241
+
2242
+ def codebook(self, varname: str, trace: bool = False, max_output_lines: Optional[int] = None) -> CommandResponse:
2243
+ result = self._exec_with_capture(f"codebook {varname}", trace=trace)
2244
+
2245
+ # Truncate stdout if requested
2246
+ if max_output_lines is not None and result.stdout:
2247
+ lines = result.stdout.splitlines()
2248
+ if len(lines) > max_output_lines:
2249
+ truncated_lines = lines[:max_output_lines]
2250
+ truncated_lines.append(f"\n... (output truncated: showing {max_output_lines} of {len(lines)} lines)")
2251
+ result = CommandResponse(
2252
+ command=result.command,
2253
+ rc=result.rc,
2254
+ stdout="\n".join(truncated_lines),
2255
+ stderr=result.stderr,
2256
+ success=result.success,
2257
+ error=result.error,
2258
+ )
533
2259
 
534
- def codebook(self, varname: str, trace: bool = False) -> CommandResponse:
535
- return self._exec_with_capture(f"codebook {varname}", trace=trace)
2260
+ return result
536
2261