mcp-stata 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

@@ -0,0 +1,524 @@
1
+ import sys
2
+ import os
3
+ import json
4
+ import re
5
+ import base64
6
+ import logging
7
+ import threading
8
+ import time
9
+ from io import StringIO
10
+ from contextlib import contextmanager
11
+ from typing import Any, List, Optional, Dict
12
+ import pandas as pd
13
+ from .discovery import find_stata_path
14
+ from .smcl.smcl2html import smcl_to_markdown
15
+ from .models import (
16
+ CommandResponse,
17
+ ErrorEnvelope,
18
+ GraphExport,
19
+ GraphExportResponse,
20
+ GraphInfo,
21
+ GraphListResponse,
22
+ VariableInfo,
23
+ VariablesResponse,
24
+ )
25
+
26
+ logger = logging.getLogger("mcp_stata")
27
+
28
+ class StataClient:
29
+ _instance = None
30
+ _initialized = False
31
+ _exec_lock: threading.Lock
32
+ MAX_DATA_ROWS = 500
33
+ MAX_GRAPH_BYTES = 50 * 1024 * 1024 # Allow large graph exports (~50MB)
34
+
35
+ def __new__(cls):
36
+ if cls._instance is None:
37
+ cls._instance = super(StataClient, cls).__new__(cls)
38
+ cls._instance._exec_lock = threading.Lock()
39
+ return cls._instance
40
+
41
+ @contextmanager
42
+ def _redirect_io(self):
43
+ """Safely redirect stdout/stderr for the duration of a Stata call."""
44
+ out_buf, err_buf = StringIO(), StringIO()
45
+ backup_stdout, backup_stderr = sys.stdout, sys.stderr
46
+ sys.stdout, sys.stderr = out_buf, err_buf
47
+ try:
48
+ yield out_buf, err_buf
49
+ finally:
50
+ sys.stdout, sys.stderr = backup_stdout, backup_stderr
51
+
52
+ def init(self):
53
+ """Initializes usage of pystata."""
54
+ if self._initialized:
55
+ return
56
+
57
+ try:
58
+ # 1. Setup config
59
+ # 1. Setup config
60
+ import stata_setup
61
+ try:
62
+ stata_exec_path, edition = find_stata_path()
63
+ except FileNotFoundError as e:
64
+ raise RuntimeError(f"Stata binary not found: {e}") from e
65
+ except PermissionError as e:
66
+ raise RuntimeError(
67
+ f"Stata binary is not executable: {e}. "
68
+ "Point STATA_PATH directly to the Stata binary (e.g., .../Contents/MacOS/stata-mp)."
69
+ ) from e
70
+ logger.info(f"Discovery found Stata at: {stata_exec_path} ({edition})")
71
+
72
+ # Helper to try init
73
+ def tries_init(path_to_try):
74
+ try:
75
+ logger.info(f"Attempting stata_setup.config with: {path_to_try}")
76
+ stata_setup.config(path_to_try, edition)
77
+ return True
78
+ except Exception as e:
79
+ logger.warning(f"Init failed with {path_to_try}: {e}")
80
+ return False
81
+
82
+ success = False
83
+ candidates = []
84
+
85
+ # 1. Binary Dir: .../Contents/MacOS
86
+ bin_dir = os.path.dirname(stata_exec_path)
87
+
88
+ # 2. App Bundle: .../StataMP.app
89
+ # Walk up to find .app
90
+ curr = bin_dir
91
+ app_bundle = None
92
+ while len(curr) > 1:
93
+ if curr.endswith(".app"):
94
+ app_bundle = curr
95
+ break
96
+ curr = os.path.dirname(curr)
97
+
98
+ if app_bundle:
99
+ # Priority 1: The installation root (parent of .app)
100
+ candidates.append(os.path.dirname(app_bundle))
101
+
102
+ # Priority 2: The .app bundle itself
103
+ candidates.append(app_bundle)
104
+
105
+ # Priority 3: The binary directory
106
+ candidates.append(bin_dir)
107
+
108
+ for path in candidates:
109
+ if tries_init(path):
110
+ success = True
111
+ break
112
+
113
+ if not success:
114
+ raise RuntimeError(
115
+ f"stata_setup.config failed. Tried: {candidates}. "
116
+ f"Derived from binary: {stata_exec_path}"
117
+ )
118
+
119
+ # 2. Import pystata
120
+ from pystata import stata
121
+ self.stata = stata
122
+ self._initialized = True
123
+
124
+ except ImportError:
125
+ # Fallback for when stata_setup isn't in PYTHONPATH yet?
126
+ # Usually users must have it installed. We rely on discovery logic.
127
+ raise RuntimeError("Could not import `stata_setup`. Ensure pystata is installed.")
128
+ except Exception as e:
129
+ raise RuntimeError(f"Failed to initialize Stata: {e}")
130
+
131
+ def _read_return_code(self) -> int:
132
+ """Read the last Stata return code without mutating rc."""
133
+ try:
134
+ from sfi import Macro
135
+ rc_val = Macro.getCValue("rc") # type: ignore[attr-defined]
136
+ return int(float(rc_val))
137
+ except Exception:
138
+ try:
139
+ self.stata.run("global MCP_RC = c(rc)")
140
+ from sfi import Macro as Macro2
141
+ rc_val = Macro2.getGlobal("MCP_RC")
142
+ return int(float(rc_val))
143
+ except Exception:
144
+ return -1
145
+
146
+ def _parse_rc_from_text(self, text: str) -> Optional[int]:
147
+ match = re.search(r"r\((\d+)\)", text)
148
+ if match:
149
+ try:
150
+ return int(match.group(1))
151
+ except Exception:
152
+ return None
153
+ return None
154
+
155
+ def _parse_line_from_text(self, text: str) -> Optional[int]:
156
+ match = re.search(r"line\s+(\d+)", text, re.IGNORECASE)
157
+ if match:
158
+ try:
159
+ return int(match.group(1))
160
+ except Exception:
161
+ return None
162
+ return None
163
+
164
+ def _smcl_to_text(self, smcl: str) -> str:
165
+ """Convert simple SMCL markup into plain text for LLM-friendly help."""
166
+ # First, keep inline directive content if present (e.g., {bf:word} -> word)
167
+ cleaned = re.sub(r"\{[^}:]+:([^}]*)\}", r"\1", smcl)
168
+ # Remove remaining SMCL brace commands like {smcl}, {vieweralsosee ...}, {txt}, {p}
169
+ cleaned = re.sub(r"\{[^}]*\}", "", cleaned)
170
+ # Normalize whitespace
171
+ cleaned = cleaned.replace("\r", "")
172
+ lines = [line.rstrip() for line in cleaned.splitlines()]
173
+ return "\n".join(lines).strip()
174
+
175
+ def _build_error_envelope(
176
+ self,
177
+ command: str,
178
+ rc: int,
179
+ stdout: str,
180
+ stderr: str,
181
+ exc: Optional[Exception],
182
+ trace: bool,
183
+ ) -> ErrorEnvelope:
184
+ combined = "\n".join(filter(None, [stdout, stderr, str(exc) if exc else ""])).strip()
185
+ rc_hint = self._parse_rc_from_text(combined) if combined else None
186
+ rc_final = rc if rc not in (-1, None) else rc_hint
187
+ line_no = self._parse_line_from_text(combined) if combined else None
188
+ snippet = combined[-800:] if combined else None
189
+ message = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
190
+ return ErrorEnvelope(
191
+ message=message,
192
+ rc=rc_final,
193
+ line=line_no,
194
+ command=command,
195
+ stdout=stdout or None,
196
+ stderr=stderr or None,
197
+ snippet=snippet,
198
+ trace=trace or None,
199
+ )
200
+
201
+ def _exec_with_capture(self, code: str, echo: bool = True, trace: bool = False) -> CommandResponse:
202
+ """Execute Stata code with stdout/stderr capture and rc detection."""
203
+ if not self._initialized:
204
+ self.init()
205
+
206
+ start_time = time.time()
207
+ exc: Optional[Exception] = None
208
+ with self._exec_lock:
209
+ with self._redirect_io() as (out_buf, err_buf):
210
+ try:
211
+ if trace:
212
+ self.stata.run("set trace on")
213
+ self.stata.run(code, echo=echo)
214
+ except Exception as e:
215
+ exc = e
216
+ finally:
217
+ rc = self._read_return_code()
218
+ if trace:
219
+ try:
220
+ self.stata.run("set trace off")
221
+ except Exception:
222
+ pass
223
+
224
+ stdout = out_buf.getvalue()
225
+ stderr = err_buf.getvalue()
226
+ # If no exception and stderr is empty, treat rc anomalies as success (e.g., spurious rc reads)
227
+ if exc is None and (not stderr or not stderr.strip()):
228
+ rc = 0 if rc is None or rc != 0 else rc
229
+ success = rc == 0 and exc is None
230
+ error = None
231
+ if not success:
232
+ error = self._build_error_envelope(code, rc, stdout, stderr, exc, trace)
233
+ duration = time.time() - start_time
234
+ code_preview = code.replace("\n", "\\n")
235
+ logger.info(
236
+ "stata.run rc=%s success=%s trace=%s duration_ms=%.2f code_preview=%s",
237
+ rc,
238
+ success,
239
+ trace,
240
+ duration * 1000,
241
+ code_preview[:120],
242
+ )
243
+ return CommandResponse(
244
+ command=code,
245
+ rc=rc,
246
+ stdout=stdout,
247
+ stderr=stderr or None,
248
+ success=success,
249
+ error=error,
250
+ )
251
+
252
+ def run_command(self, code: str, echo: bool = True) -> str:
253
+ """Runs a Stata command and returns raw output (legacy)."""
254
+ result = self._exec_with_capture(code, echo=echo)
255
+ if result.success:
256
+ return result.stdout
257
+ if result.error:
258
+ return f"Error executing Stata code (r({result.error.rc})):\n{result.error.message}"
259
+ return result.stdout or "Unknown Stata error"
260
+
261
+ def run_command_structured(self, code: str, echo: bool = True, trace: bool = False) -> CommandResponse:
262
+ """Runs a Stata command and returns a structured envelope."""
263
+ return self._exec_with_capture(code, echo=echo, trace=trace)
264
+
265
+ def get_data(self, start: int = 0, count: int = 50) -> List[Dict[str, Any]]:
266
+ """Returns valid JSON-serializable data."""
267
+ if not self._initialized:
268
+ self.init()
269
+
270
+ if count > self.MAX_DATA_ROWS:
271
+ count = self.MAX_DATA_ROWS
272
+
273
+ try:
274
+ # Use pystata integration to retrieve data
275
+ df = self.stata.pdataframe_from_data()
276
+
277
+ # Slice
278
+ sliced = df.iloc[start : start + count]
279
+
280
+ # Convert to dict
281
+ return sliced.to_dict(orient="records")
282
+ except Exception as e:
283
+ return [{"error": f"Failed to retrieve data: {e}"}]
284
+
285
+ def list_variables(self) -> List[Dict[str, str]]:
286
+ """Returns list of variables with labels."""
287
+ if not self._initialized:
288
+ self.init()
289
+
290
+ # We can use sfi to be efficient
291
+ from sfi import Data
292
+ vars_info = []
293
+ for i in range(Data.getVarCount()):
294
+ var_index = i # 0-based
295
+ name = Data.getVarName(var_index)
296
+ label = Data.getVarLabel(var_index)
297
+ type_str = Data.getVarType(var_index) # Returns int
298
+
299
+ vars_info.append({
300
+ "name": name,
301
+ "label": label,
302
+ "type": str(type_str),
303
+ })
304
+ return vars_info
305
+
306
+ def get_variable_details(self, varname: str) -> str:
307
+ """Returns codebook/summary for a specific variable."""
308
+ return self.run_command(f"codebook {varname}")
309
+
310
+ def list_variables_structured(self) -> VariablesResponse:
311
+ vars_info: List[VariableInfo] = []
312
+ for item in self.list_variables():
313
+ vars_info.append(
314
+ VariableInfo(
315
+ name=item.get("name", ""),
316
+ label=item.get("label"),
317
+ type=item.get("type"),
318
+ )
319
+ )
320
+ return VariablesResponse(variables=vars_info)
321
+
322
+ def list_graphs(self) -> List[str]:
323
+ """Returns list of graphs in memory."""
324
+ # 'graph dir' returns list in r(list)
325
+ # We need to ensure we run it quietly so we don't spam.
326
+ self.stata.run("quietly graph dir, memory")
327
+
328
+ # Accessing r-class results in Python can be tricky via pystata's run command.
329
+ # We stash the result in a global macro that python sfi can easily read.
330
+ from sfi import Macro
331
+ self.stata.run("global mcp_graph_list `r(list)'")
332
+ graph_list_str = Macro.getGlobal("mcp_graph_list")
333
+ if not graph_list_str:
334
+ return []
335
+
336
+ return graph_list_str.split()
337
+
338
+ def list_graphs_structured(self) -> GraphListResponse:
339
+ names = self.list_graphs()
340
+ active_name = names[-1] if names else None
341
+ graphs = [GraphInfo(name=n, active=(n == active_name)) for n in names]
342
+ return GraphListResponse(graphs=graphs)
343
+
344
+ def export_graph(self, graph_name: str = None, filename: str = None) -> str:
345
+ """Exports graph to a temp file and returns path."""
346
+ import tempfile
347
+ if not filename:
348
+ with tempfile.NamedTemporaryFile(prefix="mcp_stata_", suffix=".png", delete=False) as tmp:
349
+ filename = tmp.name
350
+ else:
351
+ # Ensure fresh start
352
+ if os.path.exists(filename):
353
+ try:
354
+ os.remove(filename)
355
+ except Exception:
356
+ pass
357
+
358
+ cmd = "graph export"
359
+ if graph_name:
360
+ cmd += f' "{filename}", name("{graph_name}") replace'
361
+ else:
362
+ cmd += f' "{filename}", replace'
363
+
364
+ output = self.run_command(cmd)
365
+
366
+ if os.path.exists(filename):
367
+ try:
368
+ size = os.path.getsize(filename)
369
+ if size == 0:
370
+ raise RuntimeError(f"Graph export failed: produced empty file {filename}")
371
+ if size > self.MAX_GRAPH_BYTES:
372
+ raise RuntimeError(
373
+ f"Graph export failed: file too large (> {self.MAX_GRAPH_BYTES} bytes): {filename}"
374
+ )
375
+ except Exception as size_err:
376
+ # Clean up oversized or unreadable files
377
+ try:
378
+ os.remove(filename)
379
+ except Exception:
380
+ pass
381
+ raise size_err
382
+ return filename
383
+
384
+ # If file missing, it failed. Check output for details.
385
+ raise RuntimeError(f"Graph export failed: {output}")
386
+
387
+ def get_help(self, topic: str, plain_text: bool = False) -> str:
388
+ """Returns help text as Markdown (default) or plain text."""
389
+ # Try to locate the .sthlp help file
390
+ # We use 'capture' to avoid crashing if not found
391
+ self.stata.run(f"capture findfile {topic}.sthlp")
392
+
393
+ # Retrieve the found path from r(fn)
394
+ from sfi import Macro
395
+ self.stata.run("global mcp_help_file `r(fn)'")
396
+ fn = Macro.getGlobal("mcp_help_file")
397
+
398
+ if fn and os.path.exists(fn):
399
+ try:
400
+ with open(fn, 'r', encoding='utf-8', errors='replace') as f:
401
+ smcl = f.read()
402
+ if plain_text:
403
+ return self._smcl_to_text(smcl)
404
+ try:
405
+ return smcl_to_markdown(smcl, adopath=os.path.dirname(fn), current_file=os.path.splitext(os.path.basename(fn))[0])
406
+ except Exception as parse_err:
407
+ logger.warning("SMCL to Markdown failed, falling back to plain text: %s", parse_err)
408
+ return self._smcl_to_text(smcl)
409
+ except Exception as e:
410
+ return f"Error reading help file at {fn}: {e}"
411
+
412
+ # Fallback to URL if file not found
413
+ return f"Help file for '{topic}' not found. Please consult: https://www.stata.com/help.cgi?{topic}"
414
+
415
+ def get_stored_results(self) -> Dict[str, Any]:
416
+ """Returns e() and r() results."""
417
+ if not self._initialized:
418
+ self.init()
419
+
420
+ from sfi import Scalar, Macro
421
+
422
+ results = {"r": {}, "e": {}}
423
+
424
+ # We parse 'return list' output as there is no direct bulk export of stored results
425
+ raw_r = self.run_command("return list")
426
+ raw_e = self.run_command("ereturn list")
427
+
428
+ # Simple parser
429
+ def parse_list(text):
430
+ data = {}
431
+ # We don't strictly need to track sections if we check patterns
432
+ for line in text.splitlines():
433
+ line = line.strip()
434
+ if not line: continue
435
+
436
+ # scalars: r(name) = value
437
+ if "=" in line and ("r(" in line or "e(" in line):
438
+ try:
439
+ name_part, val_part = line.split("=", 1)
440
+ name_part = name_part.strip() # "r(mean)"
441
+ val_part = val_part.strip() # "6165.2..."
442
+
443
+ # Extract just the name inside r(...) if desired,
444
+ # or keep full key "r(mean)".
445
+ # User likely wants "mean" inside "r" dict.
446
+
447
+ if "(" in name_part and name_part.endswith(")"):
448
+ # r(mean) -> mean
449
+ start = name_part.find("(") + 1
450
+ end = name_part.find(")")
451
+ key = name_part[start:end]
452
+ data[key] = val_part
453
+ except: pass
454
+
455
+ # macros: r(name) : "value"
456
+ elif ":" in line and ("r(" in line or "e(" in line):
457
+ try:
458
+ name_part, val_part = line.split(":", 1)
459
+ name_part = name_part.strip()
460
+ val_part = val_part.strip().strip('"')
461
+
462
+ if "(" in name_part and name_part.endswith(")"):
463
+ start = name_part.find("(") + 1
464
+ end = name_part.find(")")
465
+ key = name_part[start:end]
466
+ data[key] = val_part
467
+ except: pass
468
+ return data
469
+
470
+ results["r"] = parse_list(raw_r)
471
+ results["e"] = parse_list(raw_e)
472
+
473
+ return results
474
+
475
+ def export_graphs_all(self) -> GraphExportResponse:
476
+ """Exports all graphs to base64-encoded strings."""
477
+ exports: List[GraphExport] = []
478
+ for name in self.list_graphs():
479
+ try:
480
+ path = self.export_graph(name)
481
+ with open(path, "rb") as f:
482
+ b64 = base64.b64encode(f.read()).decode("ascii")
483
+ exports.append(GraphExport(name=name, image_base64=b64))
484
+ except Exception as e:
485
+ logger.warning("Failed to export graph '%s': %s", name, e)
486
+ continue
487
+ return GraphExportResponse(graphs=exports)
488
+
489
+ def run_do_file(self, path: str, echo: bool = True, trace: bool = False) -> CommandResponse:
490
+ if not os.path.exists(path):
491
+ return CommandResponse(
492
+ command=f'do "{path}"',
493
+ rc=601,
494
+ stdout="",
495
+ stderr=None,
496
+ success=False,
497
+ error=ErrorEnvelope(
498
+ message=f"Do-file not found: {path}",
499
+ rc=601,
500
+ command=path,
501
+ ),
502
+ )
503
+ return self._exec_with_capture(f'do "{path}"', echo=echo, trace=trace)
504
+
505
+ def load_data(self, source: str, clear: bool = True) -> CommandResponse:
506
+ src = source.strip()
507
+ clear_suffix = ", clear" if clear else ""
508
+
509
+ if src.startswith("sysuse "):
510
+ cmd = f"{src}{clear_suffix}"
511
+ elif src.startswith("webuse "):
512
+ cmd = f"{src}{clear_suffix}"
513
+ elif src.startswith("use "):
514
+ cmd = f"{src}{clear_suffix}"
515
+ elif "://" in src or src.endswith(".dta") or os.path.sep in src:
516
+ cmd = f'use "{src}"{clear_suffix}'
517
+ else:
518
+ cmd = f"sysuse {src}{clear_suffix}"
519
+
520
+ return self._exec_with_capture(cmd, echo=True, trace=False)
521
+
522
+ def codebook(self, varname: str, trace: bool = False) -> CommandResponse:
523
+ return self._exec_with_capture(f"codebook {varname}", trace=trace)
524
+