mcp-stata 1.6.8__py3-none-any.whl → 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-stata might be problematic. Click here for more details.
- mcp_stata/discovery.py +151 -31
- mcp_stata/stata_client.py +154 -29
- mcp_stata/ui_http.py +37 -1
- {mcp_stata-1.6.8.dist-info → mcp_stata-1.7.3.dist-info}/METADATA +60 -2
- {mcp_stata-1.6.8.dist-info → mcp_stata-1.7.3.dist-info}/RECORD +8 -8
- {mcp_stata-1.6.8.dist-info → mcp_stata-1.7.3.dist-info}/WHEEL +0 -0
- {mcp_stata-1.6.8.dist-info → mcp_stata-1.7.3.dist-info}/entry_points.txt +0 -0
- {mcp_stata-1.6.8.dist-info → mcp_stata-1.7.3.dist-info}/licenses/LICENSE +0 -0
mcp_stata/discovery.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Improved discovery.py with better error handling for intermittent failures.
|
|
3
|
+
Key improvements:
|
|
4
|
+
1. Retry logic for file existence checks
|
|
5
|
+
2. Better diagnostic logging
|
|
6
|
+
3. Fuzzy path matching for common typos
|
|
7
|
+
4. Case-insensitive path resolution on Windows
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
import os
|
|
2
11
|
import sys
|
|
3
12
|
import platform
|
|
@@ -5,12 +14,106 @@ import glob
|
|
|
5
14
|
import logging
|
|
6
15
|
import shutil
|
|
7
16
|
import ntpath
|
|
8
|
-
|
|
17
|
+
import time
|
|
9
18
|
from typing import Tuple, List, Optional
|
|
10
19
|
|
|
11
20
|
logger = logging.getLogger("mcp_stata.discovery")
|
|
12
21
|
|
|
13
22
|
|
|
23
|
+
def _exists_with_retry(path: str, max_attempts: int = 3, delay: float = 0.1) -> bool:
|
|
24
|
+
"""
|
|
25
|
+
Check if file exists with retry logic to handle transient failures.
|
|
26
|
+
This helps with antivirus scans, file locks, and other temporary issues.
|
|
27
|
+
"""
|
|
28
|
+
for attempt in range(max_attempts):
|
|
29
|
+
if os.path.exists(path):
|
|
30
|
+
return True
|
|
31
|
+
if attempt < max_attempts - 1:
|
|
32
|
+
logger.debug(
|
|
33
|
+
f"File existence check attempt {attempt + 1} failed for: {path}"
|
|
34
|
+
)
|
|
35
|
+
time.sleep(delay)
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _find_similar_stata_dirs(target_path: str) -> List[str]:
|
|
40
|
+
"""
|
|
41
|
+
Find similar Stata directories to help diagnose path typos.
|
|
42
|
+
Useful when user has 'Stata19Now' instead of 'StataNow19'.
|
|
43
|
+
"""
|
|
44
|
+
parent = os.path.dirname(target_path)
|
|
45
|
+
|
|
46
|
+
# If parent doesn't exist, try grandparent (for directory name typos)
|
|
47
|
+
search_dir = parent
|
|
48
|
+
if not os.path.exists(parent):
|
|
49
|
+
search_dir = os.path.dirname(parent)
|
|
50
|
+
|
|
51
|
+
if not os.path.exists(search_dir):
|
|
52
|
+
return []
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
subdirs = [
|
|
56
|
+
d for d in os.listdir(search_dir)
|
|
57
|
+
if os.path.isdir(os.path.join(search_dir, d))
|
|
58
|
+
]
|
|
59
|
+
# Filter to Stata-related directories (case-insensitive)
|
|
60
|
+
stata_dirs = [
|
|
61
|
+
os.path.join(search_dir, d)
|
|
62
|
+
for d in subdirs
|
|
63
|
+
if 'stata' in d.lower()
|
|
64
|
+
]
|
|
65
|
+
return stata_dirs
|
|
66
|
+
except (OSError, PermissionError) as e:
|
|
67
|
+
logger.debug(f"Could not list directory {search_dir}: {e}")
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _validate_path_with_diagnostics(path: str, system: str) -> Tuple[bool, str]:
|
|
72
|
+
"""
|
|
73
|
+
Validate path exists and provide detailed diagnostics if not.
|
|
74
|
+
Returns (exists, diagnostic_message)
|
|
75
|
+
"""
|
|
76
|
+
if _exists_with_retry(path):
|
|
77
|
+
return True, ""
|
|
78
|
+
|
|
79
|
+
# Build diagnostic message
|
|
80
|
+
diagnostics = []
|
|
81
|
+
diagnostics.append(f"File not found: '{path}'")
|
|
82
|
+
|
|
83
|
+
parent_dir = os.path.dirname(path)
|
|
84
|
+
filename = os.path.basename(path)
|
|
85
|
+
|
|
86
|
+
if _exists_with_retry(parent_dir):
|
|
87
|
+
diagnostics.append(f"✓ Parent directory exists: '{parent_dir}'")
|
|
88
|
+
try:
|
|
89
|
+
files_in_parent = os.listdir(parent_dir)
|
|
90
|
+
# Look for similar filenames
|
|
91
|
+
similar_files = [
|
|
92
|
+
f for f in files_in_parent
|
|
93
|
+
if 'stata' in f.lower() and f.lower().endswith('.exe' if system == 'Windows' else '')
|
|
94
|
+
]
|
|
95
|
+
if similar_files:
|
|
96
|
+
diagnostics.append(f"Found {len(similar_files)} Stata file(s) in parent:")
|
|
97
|
+
for f in similar_files[:5]: # Show max 5
|
|
98
|
+
diagnostics.append(f" - {f}")
|
|
99
|
+
else:
|
|
100
|
+
diagnostics.append(f"No Stata executables found in parent directory")
|
|
101
|
+
diagnostics.append(f"Files present: {', '.join(files_in_parent[:10])}")
|
|
102
|
+
except (OSError, PermissionError) as e:
|
|
103
|
+
diagnostics.append(f"✗ Could not list parent directory: {e}")
|
|
104
|
+
else:
|
|
105
|
+
diagnostics.append(f"✗ Parent directory does not exist: '{parent_dir}'")
|
|
106
|
+
|
|
107
|
+
# Check for similar directories (typo detection)
|
|
108
|
+
similar_dirs = _find_similar_stata_dirs(path)
|
|
109
|
+
if similar_dirs:
|
|
110
|
+
diagnostics.append("\nDid you mean one of these directories?")
|
|
111
|
+
for dir_path in similar_dirs[:5]:
|
|
112
|
+
diagnostics.append(f" - {dir_path}")
|
|
113
|
+
|
|
114
|
+
return False, "\n".join(diagnostics)
|
|
115
|
+
|
|
116
|
+
|
|
14
117
|
def _normalize_env_path(raw: str, system: str) -> str:
|
|
15
118
|
"""Strip quotes/whitespace, expand variables, and normalize slashes for STATA_PATH."""
|
|
16
119
|
cleaned = raw.strip()
|
|
@@ -30,7 +133,7 @@ def _normalize_env_path(raw: str, system: str) -> str:
|
|
|
30
133
|
|
|
31
134
|
|
|
32
135
|
def _is_executable(path: str, system: str) -> bool:
|
|
33
|
-
if not
|
|
136
|
+
if not _exists_with_retry(path): # Use retry logic
|
|
34
137
|
return False
|
|
35
138
|
if system == "Windows":
|
|
36
139
|
# On Windows, check if it's a file and has .exe extension
|
|
@@ -71,11 +174,11 @@ def _resolve_windows_host_path(path: str, system: str) -> str:
|
|
|
71
174
|
"""
|
|
72
175
|
if system != "Windows":
|
|
73
176
|
return path
|
|
74
|
-
if
|
|
177
|
+
if _exists_with_retry(path): # Use retry logic
|
|
75
178
|
return path
|
|
76
179
|
if os.sep != "\\" and "\\" in path:
|
|
77
180
|
alt_path = path.replace("\\", os.sep)
|
|
78
|
-
if
|
|
181
|
+
if _exists_with_retry(alt_path): # Use retry logic
|
|
79
182
|
return alt_path
|
|
80
183
|
return path
|
|
81
184
|
|
|
@@ -97,11 +200,12 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
97
200
|
|
|
98
201
|
Behavior:
|
|
99
202
|
- If STATA_PATH is set and valid, use it.
|
|
100
|
-
- If STATA_PATH is set but invalid, fall back
|
|
101
|
-
- If auto-discovery fails, raise an error
|
|
203
|
+
- If STATA_PATH is set but invalid, provide detailed diagnostics and fall back.
|
|
204
|
+
- If auto-discovery fails, raise an error with helpful suggestions.
|
|
102
205
|
"""
|
|
103
206
|
system = _detect_system()
|
|
104
207
|
stata_path_error: Optional[Exception] = None
|
|
208
|
+
stata_path_diagnostics: Optional[str] = None
|
|
105
209
|
|
|
106
210
|
windows_binaries = [
|
|
107
211
|
("StataMP-64.exe", "mp"),
|
|
@@ -158,11 +262,15 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
158
262
|
)
|
|
159
263
|
return candidate, edition
|
|
160
264
|
|
|
161
|
-
|
|
162
|
-
|
|
265
|
+
# Enhanced error with diagnostics
|
|
266
|
+
exists, diagnostics = _validate_path_with_diagnostics(path, system)
|
|
267
|
+
error_msg = (
|
|
268
|
+
f"STATA_PATH points to directory '{path}', but no Stata executable was found within.\n"
|
|
269
|
+
f"{diagnostics}\n\n"
|
|
163
270
|
"Point STATA_PATH directly to the Stata binary "
|
|
164
|
-
"(e.g., C:\\Program Files\\
|
|
271
|
+
"(e.g., C:\\Program Files\\StataNow19\\StataMP-64.exe)."
|
|
165
272
|
)
|
|
273
|
+
raise FileNotFoundError(error_msg)
|
|
166
274
|
|
|
167
275
|
edition = "be"
|
|
168
276
|
lower_path = path.lower()
|
|
@@ -173,13 +281,18 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
173
281
|
elif "be" in lower_path:
|
|
174
282
|
edition = "be"
|
|
175
283
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
284
|
+
# Use enhanced validation with diagnostics
|
|
285
|
+
exists, diagnostics = _validate_path_with_diagnostics(path, system)
|
|
286
|
+
if not exists:
|
|
287
|
+
error_msg = (
|
|
288
|
+
f"STATA_PATH points to '{path}', but that file does not exist.\n"
|
|
289
|
+
f"{diagnostics}\n\n"
|
|
179
290
|
"Update STATA_PATH to your Stata binary (e.g., "
|
|
180
291
|
"/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, "
|
|
181
|
-
"/usr/local/stata19/stata-mp or C:\\Program Files\\
|
|
292
|
+
"/usr/local/stata19/stata-mp or C:\\Program Files\\StataNow19\\StataMP-64.exe)."
|
|
182
293
|
)
|
|
294
|
+
raise FileNotFoundError(error_msg)
|
|
295
|
+
|
|
183
296
|
if not _is_executable(path, system):
|
|
184
297
|
raise PermissionError(
|
|
185
298
|
f"STATA_PATH points to '{path}', but it is not executable. "
|
|
@@ -191,6 +304,7 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
191
304
|
|
|
192
305
|
except Exception as exc:
|
|
193
306
|
stata_path_error = exc
|
|
307
|
+
stata_path_diagnostics = str(exc)
|
|
194
308
|
logger.warning(
|
|
195
309
|
"STATA_PATH override failed (%s). Falling back to auto-discovery.",
|
|
196
310
|
exc,
|
|
@@ -213,11 +327,11 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
213
327
|
for pattern in app_globs:
|
|
214
328
|
for app_dir in glob.glob(pattern):
|
|
215
329
|
binary_dir = os.path.join(app_dir, "Contents", "MacOS")
|
|
216
|
-
if not
|
|
330
|
+
if not _exists_with_retry(binary_dir): # Use retry logic
|
|
217
331
|
continue
|
|
218
332
|
for binary, edition in [("stata-mp", "mp"), ("stata-se", "se"), ("stata", "be")]:
|
|
219
333
|
full_path = os.path.join(binary_dir, binary)
|
|
220
|
-
if
|
|
334
|
+
if _exists_with_retry(full_path): # Use retry logic
|
|
221
335
|
candidates.append((full_path, edition))
|
|
222
336
|
|
|
223
337
|
elif system == "Windows":
|
|
@@ -265,7 +379,7 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
265
379
|
continue
|
|
266
380
|
for exe, edition in windows_binaries:
|
|
267
381
|
full_path = os.path.join(stata_dir, exe)
|
|
268
|
-
if
|
|
382
|
+
if _exists_with_retry(full_path): # Use retry logic
|
|
269
383
|
candidates.append((full_path, edition))
|
|
270
384
|
|
|
271
385
|
elif system == "Linux":
|
|
@@ -303,13 +417,13 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
303
417
|
continue
|
|
304
418
|
for binary, edition in linux_binaries:
|
|
305
419
|
full_path = os.path.join(base_dir, binary)
|
|
306
|
-
if
|
|
420
|
+
if _exists_with_retry(full_path): # Use retry logic
|
|
307
421
|
candidates.append((full_path, edition))
|
|
308
422
|
|
|
309
423
|
candidates = _dedupe_preserve(candidates)
|
|
310
424
|
|
|
311
425
|
for path, edition in candidates:
|
|
312
|
-
if not
|
|
426
|
+
if not _exists_with_retry(path): # Use retry logic
|
|
313
427
|
logger.warning("Discovered candidate missing on disk: %s", path)
|
|
314
428
|
continue
|
|
315
429
|
if not _is_executable(path, system):
|
|
@@ -318,21 +432,27 @@ def find_stata_path() -> Tuple[str, str]:
|
|
|
318
432
|
logger.info("Auto-discovered Stata at %s (%s)", path, edition)
|
|
319
433
|
return path, edition
|
|
320
434
|
|
|
435
|
+
# Build comprehensive error message
|
|
436
|
+
error_parts = ["Could not automatically locate Stata."]
|
|
437
|
+
|
|
321
438
|
if stata_path_error is not None:
|
|
322
|
-
|
|
323
|
-
"
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
"
|
|
332
|
-
"
|
|
333
|
-
"/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, /usr/local/stata18/stata-mp, "
|
|
334
|
-
"or C:\\Program Files\\Stata18\\StataMP-64.exe)."
|
|
439
|
+
error_parts.append(
|
|
440
|
+
f"\nSTATA_PATH was set but failed:\n{stata_path_diagnostics}"
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
error_parts.append(
|
|
444
|
+
"\nTo fix this issue:\n"
|
|
445
|
+
"1. Set STATA_PATH to point to your Stata executable, for example:\n"
|
|
446
|
+
" - Windows: C:\\Program Files\\StataNow19\\StataMP-64.exe\n"
|
|
447
|
+
" - macOS: /Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp\n"
|
|
448
|
+
" - Linux: /usr/local/stata19/stata-mp\n"
|
|
449
|
+
"\n2. Or install Stata in a standard location where it can be auto-discovered."
|
|
335
450
|
)
|
|
451
|
+
|
|
452
|
+
if stata_path_error is not None:
|
|
453
|
+
raise FileNotFoundError("\n".join(error_parts)) from stata_path_error
|
|
454
|
+
else:
|
|
455
|
+
raise FileNotFoundError("\n".join(error_parts))
|
|
336
456
|
|
|
337
457
|
|
|
338
458
|
def main() -> int:
|
mcp_stata/stata_client.py
CHANGED
|
@@ -408,6 +408,73 @@ class StataClient:
|
|
|
408
408
|
return None
|
|
409
409
|
return None
|
|
410
410
|
|
|
411
|
+
def _read_log_tail(self, path: str, max_chars: int) -> str:
|
|
412
|
+
try:
|
|
413
|
+
with open(path, "rb") as f:
|
|
414
|
+
f.seek(0, os.SEEK_END)
|
|
415
|
+
size = f.tell()
|
|
416
|
+
if size <= 0:
|
|
417
|
+
return ""
|
|
418
|
+
read_size = min(size, max_chars)
|
|
419
|
+
f.seek(-read_size, os.SEEK_END)
|
|
420
|
+
data = f.read(read_size)
|
|
421
|
+
return data.decode("utf-8", errors="replace")
|
|
422
|
+
except Exception:
|
|
423
|
+
return ""
|
|
424
|
+
|
|
425
|
+
def _select_stata_error_message(self, text: str, fallback: str) -> str:
|
|
426
|
+
if not text:
|
|
427
|
+
return fallback
|
|
428
|
+
ignore_patterns = (
|
|
429
|
+
r"^r\(\d+\);?$",
|
|
430
|
+
r"^end of do-file$",
|
|
431
|
+
r"^execution terminated$",
|
|
432
|
+
r"^[-=*]{3,}.*$",
|
|
433
|
+
)
|
|
434
|
+
rc_pattern = r"^r\(\d+\);?$"
|
|
435
|
+
error_patterns = (
|
|
436
|
+
r"\btype mismatch\b",
|
|
437
|
+
r"\bnot found\b",
|
|
438
|
+
r"\bnot allowed\b",
|
|
439
|
+
r"\bno observations\b",
|
|
440
|
+
r"\bconformability error\b",
|
|
441
|
+
r"\binvalid\b",
|
|
442
|
+
r"\bsyntax error\b",
|
|
443
|
+
r"\berror\b",
|
|
444
|
+
)
|
|
445
|
+
lines = text.splitlines()
|
|
446
|
+
for raw in reversed(lines):
|
|
447
|
+
line = raw.strip()
|
|
448
|
+
if not line:
|
|
449
|
+
continue
|
|
450
|
+
if any(re.search(pat, line, re.IGNORECASE) for pat in error_patterns):
|
|
451
|
+
return line
|
|
452
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
453
|
+
line = lines[i].strip()
|
|
454
|
+
if not line:
|
|
455
|
+
continue
|
|
456
|
+
if re.match(rc_pattern, line, re.IGNORECASE):
|
|
457
|
+
for j in range(i - 1, -1, -1):
|
|
458
|
+
prev_line = lines[j].strip()
|
|
459
|
+
if not prev_line:
|
|
460
|
+
continue
|
|
461
|
+
if prev_line.startswith((".", ">", "-", "=")):
|
|
462
|
+
continue
|
|
463
|
+
if any(re.match(pat, prev_line, re.IGNORECASE) for pat in ignore_patterns):
|
|
464
|
+
continue
|
|
465
|
+
return prev_line
|
|
466
|
+
return line
|
|
467
|
+
for raw in reversed(lines):
|
|
468
|
+
line = raw.strip()
|
|
469
|
+
if not line:
|
|
470
|
+
continue
|
|
471
|
+
if line.startswith((".", ">", "-", "=")):
|
|
472
|
+
continue
|
|
473
|
+
if any(re.match(pat, line, re.IGNORECASE) for pat in ignore_patterns):
|
|
474
|
+
continue
|
|
475
|
+
return line
|
|
476
|
+
return fallback
|
|
477
|
+
|
|
411
478
|
def _smcl_to_text(self, smcl: str) -> str:
|
|
412
479
|
"""Convert simple SMCL markup into plain text for LLM-friendly help."""
|
|
413
480
|
# First, keep inline directive content if present (e.g., {bf:word} -> word)
|
|
@@ -433,7 +500,10 @@ class StataClient:
|
|
|
433
500
|
rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
|
|
434
501
|
line_no = self._parse_line_from_text(combined) if combined else None
|
|
435
502
|
snippet = combined[-800:] if combined else None
|
|
436
|
-
|
|
503
|
+
fallback = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
|
|
504
|
+
if fallback == "Stata error" and rc_final is not None:
|
|
505
|
+
fallback = f"Stata error r({rc_final})"
|
|
506
|
+
message = self._select_stata_error_message(combined, fallback)
|
|
437
507
|
return ErrorEnvelope(
|
|
438
508
|
message=message,
|
|
439
509
|
rc=rc_final,
|
|
@@ -640,7 +710,7 @@ class StataClient:
|
|
|
640
710
|
buffering=1,
|
|
641
711
|
)
|
|
642
712
|
log_path = log_file.name
|
|
643
|
-
tail = TailBuffer(max_chars=
|
|
713
|
+
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
644
714
|
tee = FileTeeIO(log_file, tail)
|
|
645
715
|
|
|
646
716
|
# Inform the MCP client immediately where to read/tail the output.
|
|
@@ -705,6 +775,9 @@ class StataClient:
|
|
|
705
775
|
logger.warning(f"Failed to cache detected graphs: {e}")
|
|
706
776
|
|
|
707
777
|
tail_text = tail.get_value()
|
|
778
|
+
log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
|
|
779
|
+
if log_tail and len(log_tail) > len(tail_text):
|
|
780
|
+
tail_text = log_tail
|
|
708
781
|
combined = (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
709
782
|
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
710
783
|
if exc is None and rc_hint is not None and rc_hint != 0:
|
|
@@ -718,14 +791,10 @@ class StataClient:
|
|
|
718
791
|
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
719
792
|
rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
|
|
720
793
|
line_no = self._parse_line_from_text(combined) if combined else None
|
|
721
|
-
|
|
722
|
-
if
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
message = line.strip()
|
|
726
|
-
break
|
|
727
|
-
elif exc is not None:
|
|
728
|
-
message = str(exc).strip() or message
|
|
794
|
+
fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
|
|
795
|
+
if fallback == "Stata error" and rc_final is not None:
|
|
796
|
+
fallback = f"Stata error r({rc_final})"
|
|
797
|
+
message = self._select_stata_error_message(combined, fallback)
|
|
729
798
|
|
|
730
799
|
error = ErrorEnvelope(
|
|
731
800
|
message=message,
|
|
@@ -876,7 +945,7 @@ class StataClient:
|
|
|
876
945
|
buffering=1,
|
|
877
946
|
)
|
|
878
947
|
log_path = log_file.name
|
|
879
|
-
tail = TailBuffer(max_chars=
|
|
948
|
+
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
880
949
|
tee = FileTeeIO(log_file, tail)
|
|
881
950
|
|
|
882
951
|
# Inform the MCP client immediately where to read/tail the output.
|
|
@@ -1042,6 +1111,9 @@ class StataClient:
|
|
|
1042
1111
|
logger.error(f"Post-execution graph detection failed: {e}")
|
|
1043
1112
|
|
|
1044
1113
|
tail_text = tail.get_value()
|
|
1114
|
+
log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
|
|
1115
|
+
if log_tail and len(log_tail) > len(tail_text):
|
|
1116
|
+
tail_text = log_tail
|
|
1045
1117
|
combined = (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
1046
1118
|
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
1047
1119
|
if exc is None and rc_hint is not None and rc_hint != 0:
|
|
@@ -1055,14 +1127,10 @@ class StataClient:
|
|
|
1055
1127
|
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
1056
1128
|
rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
|
|
1057
1129
|
line_no = self._parse_line_from_text(combined) if combined else None
|
|
1058
|
-
|
|
1059
|
-
if
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
message = line.strip()
|
|
1063
|
-
break
|
|
1064
|
-
elif exc is not None:
|
|
1065
|
-
message = str(exc).strip() or message
|
|
1130
|
+
fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
|
|
1131
|
+
if fallback == "Stata error" and rc_final is not None:
|
|
1132
|
+
fallback = f"Stata error r({rc_final})"
|
|
1133
|
+
message = self._select_stata_error_message(combined, fallback)
|
|
1066
1134
|
|
|
1067
1135
|
error = ErrorEnvelope(
|
|
1068
1136
|
message=message,
|
|
@@ -1425,6 +1493,65 @@ class StataClient:
|
|
|
1425
1493
|
|
|
1426
1494
|
return indices
|
|
1427
1495
|
|
|
1496
|
+
def apply_sort(self, sort_spec: List[str]) -> None:
|
|
1497
|
+
"""
|
|
1498
|
+
Apply sorting to the dataset using gsort.
|
|
1499
|
+
|
|
1500
|
+
Args:
|
|
1501
|
+
sort_spec: List of variables to sort by, with optional +/- prefix.
|
|
1502
|
+
e.g., ["-price", "+mpg"] sorts by price descending, then mpg ascending.
|
|
1503
|
+
No prefix is treated as ascending (+).
|
|
1504
|
+
|
|
1505
|
+
Raises:
|
|
1506
|
+
ValueError: If sort_spec is invalid or contains invalid variables
|
|
1507
|
+
RuntimeError: If no data in memory or sort command fails
|
|
1508
|
+
"""
|
|
1509
|
+
if not self._initialized:
|
|
1510
|
+
self.init()
|
|
1511
|
+
|
|
1512
|
+
state = self.get_dataset_state()
|
|
1513
|
+
if int(state.get("k", 0) or 0) == 0 and int(state.get("n", 0) or 0) == 0:
|
|
1514
|
+
raise RuntimeError("No data in memory")
|
|
1515
|
+
|
|
1516
|
+
if not sort_spec or not isinstance(sort_spec, list):
|
|
1517
|
+
raise ValueError("sort_spec must be a non-empty list")
|
|
1518
|
+
|
|
1519
|
+
# Validate all variables exist
|
|
1520
|
+
var_map = self._get_var_index_map()
|
|
1521
|
+
for spec in sort_spec:
|
|
1522
|
+
if not isinstance(spec, str) or not spec:
|
|
1523
|
+
raise ValueError(f"Invalid sort specification: {spec!r}")
|
|
1524
|
+
|
|
1525
|
+
# Extract variable name (remove +/- prefix if present)
|
|
1526
|
+
varname = spec.lstrip("+-")
|
|
1527
|
+
if not varname:
|
|
1528
|
+
raise ValueError(f"Invalid sort specification: {spec!r}")
|
|
1529
|
+
|
|
1530
|
+
if varname not in var_map:
|
|
1531
|
+
raise ValueError(f"Variable not found: {varname}")
|
|
1532
|
+
|
|
1533
|
+
# Build gsort command
|
|
1534
|
+
# gsort uses - for descending, + or nothing for ascending
|
|
1535
|
+
gsort_args = []
|
|
1536
|
+
for spec in sort_spec:
|
|
1537
|
+
if spec.startswith("-") or spec.startswith("+"):
|
|
1538
|
+
gsort_args.append(spec)
|
|
1539
|
+
else:
|
|
1540
|
+
# No prefix means ascending, add + explicitly for clarity
|
|
1541
|
+
gsort_args.append(f"+{spec}")
|
|
1542
|
+
|
|
1543
|
+
cmd = f"gsort {' '.join(gsort_args)}"
|
|
1544
|
+
|
|
1545
|
+
try:
|
|
1546
|
+
result = self.run_command_structured(cmd, echo=False)
|
|
1547
|
+
if not result.success:
|
|
1548
|
+
error_msg = result.error.message if result.error else "Sort failed"
|
|
1549
|
+
raise RuntimeError(f"Failed to sort dataset: {error_msg}")
|
|
1550
|
+
except Exception as e:
|
|
1551
|
+
if isinstance(e, RuntimeError):
|
|
1552
|
+
raise
|
|
1553
|
+
raise RuntimeError(f"Failed to sort dataset: {e}")
|
|
1554
|
+
|
|
1428
1555
|
def get_variable_details(self, varname: str) -> str:
|
|
1429
1556
|
"""Returns codebook/summary for a specific variable."""
|
|
1430
1557
|
resp = self.run_command_structured(f"codebook {varname}", echo=True)
|
|
@@ -2247,7 +2374,7 @@ class StataClient:
|
|
|
2247
2374
|
buffering=1,
|
|
2248
2375
|
)
|
|
2249
2376
|
log_path = log_file.name
|
|
2250
|
-
tail = TailBuffer(max_chars=
|
|
2377
|
+
tail = TailBuffer(max_chars=200000 if trace else 20000)
|
|
2251
2378
|
tee = FileTeeIO(log_file, tail)
|
|
2252
2379
|
|
|
2253
2380
|
rc = -1
|
|
@@ -2278,6 +2405,9 @@ class StataClient:
|
|
|
2278
2405
|
tee.close()
|
|
2279
2406
|
|
|
2280
2407
|
tail_text = tail.get_value()
|
|
2408
|
+
log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
|
|
2409
|
+
if log_tail and len(log_tail) > len(tail_text):
|
|
2410
|
+
tail_text = log_tail
|
|
2281
2411
|
combined = (tail_text or "") + (f"\n{exc}" if exc else "")
|
|
2282
2412
|
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
2283
2413
|
if exc is None and rc_hint is not None and rc_hint != 0:
|
|
@@ -2292,14 +2422,10 @@ class StataClient:
|
|
|
2292
2422
|
rc_hint = self._parse_rc_from_text(combined) if combined else None
|
|
2293
2423
|
rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
|
|
2294
2424
|
line_no = self._parse_line_from_text(combined) if combined else None
|
|
2295
|
-
|
|
2296
|
-
if
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
message = line.strip()
|
|
2300
|
-
break
|
|
2301
|
-
elif exc is not None:
|
|
2302
|
-
message = str(exc).strip() or message
|
|
2425
|
+
fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
|
|
2426
|
+
if fallback == "Stata error" and rc_final is not None:
|
|
2427
|
+
fallback = f"Stata error r({rc_final})"
|
|
2428
|
+
message = self._select_stata_error_message(combined, fallback)
|
|
2303
2429
|
|
|
2304
2430
|
error = ErrorEnvelope(
|
|
2305
2431
|
message=message,
|
|
@@ -2384,4 +2510,3 @@ class StataClient:
|
|
|
2384
2510
|
)
|
|
2385
2511
|
|
|
2386
2512
|
return result
|
|
2387
|
-
|
mcp_stata/ui_http.py
CHANGED
|
@@ -27,6 +27,7 @@ class ViewHandle:
|
|
|
27
27
|
view_id: str
|
|
28
28
|
dataset_id: str
|
|
29
29
|
frame: str
|
|
30
|
+
filter_expr: str
|
|
30
31
|
obs_indices: list[int]
|
|
31
32
|
filtered_n: int
|
|
32
33
|
created_at: float
|
|
@@ -86,7 +87,7 @@ class UIChannelManager:
|
|
|
86
87
|
return UIChannelInfo(base_url=base_url, token=self._token or "", expires_at=self._expires_at)
|
|
87
88
|
|
|
88
89
|
def capabilities(self) -> dict[str, bool]:
|
|
89
|
-
return {"dataBrowser": True, "filtering": True}
|
|
90
|
+
return {"dataBrowser": True, "filtering": True, "sorting": True}
|
|
90
91
|
|
|
91
92
|
def current_dataset_id(self) -> str:
|
|
92
93
|
with self._lock:
|
|
@@ -138,6 +139,7 @@ class UIChannelManager:
|
|
|
138
139
|
view_id=view_id,
|
|
139
140
|
dataset_id=current_id,
|
|
140
141
|
frame=frame,
|
|
142
|
+
filter_expr=filter_expr,
|
|
141
143
|
obs_indices=obs_indices,
|
|
142
144
|
filtered_n=len(obs_indices),
|
|
143
145
|
created_at=now,
|
|
@@ -290,11 +292,16 @@ class UIChannelManager:
|
|
|
290
292
|
body = self._read_json()
|
|
291
293
|
if body is None:
|
|
292
294
|
return
|
|
295
|
+
# Debug logging to diagnose limit parameter issues
|
|
296
|
+
import sys
|
|
297
|
+
print(f"[DEBUG] /v1/page request body: {body}", file=sys.stderr, flush=True)
|
|
298
|
+
print(f"[DEBUG] limit value: {body.get('limit')!r} (type: {type(body.get('limit')).__name__})", file=sys.stderr, flush=True)
|
|
293
299
|
try:
|
|
294
300
|
resp = handle_page_request(manager, body, view_id=None)
|
|
295
301
|
self._send_json(200, resp)
|
|
296
302
|
return
|
|
297
303
|
except HTTPError as e:
|
|
304
|
+
print(f"[DEBUG] HTTPError: {e.code} - {e.message}", file=sys.stderr, flush=True)
|
|
298
305
|
self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
|
|
299
306
|
return
|
|
300
307
|
except Exception as e:
|
|
@@ -347,11 +354,16 @@ class UIChannelManager:
|
|
|
347
354
|
body = self._read_json()
|
|
348
355
|
if body is None:
|
|
349
356
|
return
|
|
357
|
+
# Debug logging to diagnose limit parameter issues
|
|
358
|
+
import sys
|
|
359
|
+
print(f"[DEBUG] /v1/views/{view_id}/page request body: {body}", file=sys.stderr, flush=True)
|
|
360
|
+
print(f"[DEBUG] limit value: {body.get('limit')!r} (type: {type(body.get('limit')).__name__})", file=sys.stderr, flush=True)
|
|
350
361
|
try:
|
|
351
362
|
resp = handle_page_request(manager, body, view_id=view_id)
|
|
352
363
|
self._send_json(200, resp)
|
|
353
364
|
return
|
|
354
365
|
except HTTPError as e:
|
|
366
|
+
print(f"[DEBUG] HTTPError: {e.code} - {e.message}", file=sys.stderr, flush=True)
|
|
355
367
|
self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
|
|
356
368
|
return
|
|
357
369
|
except Exception as e:
|
|
@@ -473,6 +485,13 @@ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view
|
|
|
473
485
|
vars_req = body.get("vars", [])
|
|
474
486
|
include_obs_no = bool(body.get("includeObsNo", False))
|
|
475
487
|
|
|
488
|
+
# Parse sortBy parameter
|
|
489
|
+
sort_by = body.get("sortBy", [])
|
|
490
|
+
if sort_by is not None and not isinstance(sort_by, list):
|
|
491
|
+
raise HTTPError(400, "invalid_request", f"sortBy must be an array, got: {type(sort_by).__name__}")
|
|
492
|
+
if sort_by and not all(isinstance(s, str) for s in sort_by):
|
|
493
|
+
raise HTTPError(400, "invalid_request", "sortBy must be an array of strings")
|
|
494
|
+
|
|
476
495
|
# Parse maxChars
|
|
477
496
|
max_chars_raw = body.get("maxChars", max_chars)
|
|
478
497
|
try:
|
|
@@ -509,6 +528,20 @@ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view
|
|
|
509
528
|
filtered_n = view.filtered_n
|
|
510
529
|
|
|
511
530
|
try:
|
|
531
|
+
# Apply sorting if requested
|
|
532
|
+
if sort_by:
|
|
533
|
+
try:
|
|
534
|
+
manager._client.apply_sort(sort_by)
|
|
535
|
+
# If sorting with a filtered view, re-compute indices after sort
|
|
536
|
+
if view_id is not None:
|
|
537
|
+
assert view is not None
|
|
538
|
+
obs_indices = manager._client.compute_view_indices(view.filter_expr)
|
|
539
|
+
filtered_n = len(obs_indices)
|
|
540
|
+
except ValueError as e:
|
|
541
|
+
raise HTTPError(400, "invalid_request", f"Invalid sort specification: {e}")
|
|
542
|
+
except RuntimeError as e:
|
|
543
|
+
raise HTTPError(500, "internal_error", f"Failed to apply sort: {e}")
|
|
544
|
+
|
|
512
545
|
dataset_state = manager._client.get_dataset_state()
|
|
513
546
|
page = manager._client.get_page(
|
|
514
547
|
offset=offset,
|
|
@@ -518,6 +551,9 @@ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view
|
|
|
518
551
|
max_chars=max_chars_req,
|
|
519
552
|
obs_indices=obs_indices,
|
|
520
553
|
)
|
|
554
|
+
except HTTPError:
|
|
555
|
+
# Re-raise HTTPError exceptions as-is
|
|
556
|
+
raise
|
|
521
557
|
except RuntimeError as e:
|
|
522
558
|
# StataClient uses RuntimeError("No data in memory") for empty dataset.
|
|
523
559
|
msg = str(e) or "No data in memory"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-stata
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.3
|
|
4
4
|
Summary: A lightweight Model Context Protocol (MCP) server for Stata. Execute commands, inspect data, retrieve stored results (`r()`/`e()`), and view graphs in your chat interface. Built for economists who want to integrate LLM assistance into their Stata workflow.
|
|
5
5
|
Project-URL: Homepage, https://github.com/tmonk/mcp-stata
|
|
6
6
|
Project-URL: Repository, https://github.com/tmonk/mcp-stata
|
|
@@ -300,7 +300,8 @@ Call the MCP tool `get_ui_channel()` and parse the JSON:
|
|
|
300
300
|
"expiresAt": 1730000000,
|
|
301
301
|
"capabilities": {
|
|
302
302
|
"dataBrowser": true,
|
|
303
|
-
"filtering": true
|
|
303
|
+
"filtering": true,
|
|
304
|
+
"sorting": true
|
|
304
305
|
}
|
|
305
306
|
}
|
|
306
307
|
```
|
|
@@ -341,10 +342,67 @@ curl -sS \
|
|
|
341
342
|
"$BASE_URL/v1/page"
|
|
342
343
|
```
|
|
343
344
|
|
|
345
|
+
#### Sorting
|
|
346
|
+
|
|
347
|
+
The `/v1/page` and `/v1/views/:viewId/page` endpoints support sorting via the optional `sortBy` parameter:
|
|
348
|
+
|
|
349
|
+
```bash
|
|
350
|
+
# Sort by price ascending
|
|
351
|
+
curl -sS \
|
|
352
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
353
|
+
-H "Content-Type: application/json" \
|
|
354
|
+
-d '{"datasetId":"...","offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["price"]}' \
|
|
355
|
+
"$BASE_URL/v1/page"
|
|
356
|
+
|
|
357
|
+
# Sort by price descending
|
|
358
|
+
curl -sS \
|
|
359
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
360
|
+
-H "Content-Type: application/json" \
|
|
361
|
+
-d '{"datasetId":"...","offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["-price"]}' \
|
|
362
|
+
"$BASE_URL/v1/page"
|
|
363
|
+
|
|
364
|
+
# Multi-variable sort: foreign ascending, then price descending
|
|
365
|
+
curl -sS \
|
|
366
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
367
|
+
-H "Content-Type: application/json" \
|
|
368
|
+
-d '{"datasetId":"...","offset":0,"limit":50,"vars":["foreign","price","mpg"],"sortBy":["foreign","-price"]}' \
|
|
369
|
+
"$BASE_URL/v1/page"
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
**Sort specification format:**
|
|
373
|
+
- `sortBy` is an array of strings (variable names with optional prefix)
|
|
374
|
+
- No prefix or `+` prefix = ascending order (e.g., `"price"` or `"+price"`)
|
|
375
|
+
- `-` prefix = descending order (e.g., `"-price"`)
|
|
376
|
+
- Multiple variables are supported for multi-level sorting
|
|
377
|
+
- Uses Stata's `gsort` command internally
|
|
378
|
+
|
|
379
|
+
**Sorting with filtered views:**
|
|
380
|
+
- Sorting is fully supported with filtered views
|
|
381
|
+
- The sort is applied to the entire dataset, then filtered indices are re-computed
|
|
382
|
+
- Example: Filter for `price < 5000`, then sort descending by price
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
# Create a filtered view
|
|
386
|
+
curl -sS \
|
|
387
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
388
|
+
-H "Content-Type: application/json" \
|
|
389
|
+
-d '{"datasetId":"...","frame":"default","filterExpr":"price < 5000"}' \
|
|
390
|
+
"$BASE_URL/v1/views"
|
|
391
|
+
# Returns: {"view": {"id": "view_abc123", "filteredN": 37}}
|
|
392
|
+
|
|
393
|
+
# Get sorted page from filtered view
|
|
394
|
+
curl -sS \
|
|
395
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
396
|
+
-H "Content-Type: application/json" \
|
|
397
|
+
-d '{"offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["-price"]}' \
|
|
398
|
+
"$BASE_URL/v1/views/view_abc123/page"
|
|
399
|
+
```
|
|
400
|
+
|
|
344
401
|
Notes:
|
|
345
402
|
|
|
346
403
|
- `datasetId` is used for cache invalidation. If the dataset changes due to running Stata commands, the server will report a new dataset id and view handles become invalid.
|
|
347
404
|
- Filter expressions are evaluated in Python using values read from Stata via `sfi.Data.get`. Use boolean operators like `==`, `!=`, `<`, `>`, and `and`/`or` (Stata-style `&`/`|` are also accepted).
|
|
405
|
+
- Sorting modifies the dataset order in memory using `gsort`. When combined with views, the filtered indices are automatically re-computed after sorting.
|
|
348
406
|
|
|
349
407
|
## License
|
|
350
408
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
mcp_stata/__init__.py,sha256=kJKKRn7lGuVCuS2-GaN5VoVcvnxtNlfuswW_VOlYqwg,98
|
|
2
|
-
mcp_stata/discovery.py,sha256=
|
|
2
|
+
mcp_stata/discovery.py,sha256=jQN9uvBNHF_hCCU9k6BDtSdDxiUVpvXcOJwpWYwo55c,17430
|
|
3
3
|
mcp_stata/graph_detector.py,sha256=-dJIU1Dq_c1eQSk4eegUi0gU2N-tFqjFGM0tE1E32KM,16066
|
|
4
4
|
mcp_stata/models.py,sha256=QETpYKO3yILy_L6mhouVEanvUIvu4ww_CAAFuiP2YdM,1201
|
|
5
5
|
mcp_stata/server.py,sha256=PV8ragGMeHT72zgVx5DJp3vt8CPqT8iwdvJ8GXSctds,15989
|
|
6
|
-
mcp_stata/stata_client.py,sha256=
|
|
6
|
+
mcp_stata/stata_client.py,sha256=06cA5K4vwXc_kNCwIifUL8eSSYsIYtM5zArhJcLcUlo,101267
|
|
7
7
|
mcp_stata/streaming_io.py,sha256=GVaXgTtxx8YLY6RWqdTcO2M3QSqxLsefqkmnlNO1nTI,6974
|
|
8
|
-
mcp_stata/ui_http.py,sha256=
|
|
8
|
+
mcp_stata/ui_http.py,sha256=w1tYxNuwuhkjyfWHxUnpd1DcVBaakjPkEnWr-Fo1lWo,24193
|
|
9
9
|
mcp_stata/smcl/smcl2html.py,sha256=wi91mOMeV9MCmHtNr0toihNbaiDCNZ_NP6a6xEAzWLM,2624
|
|
10
|
-
mcp_stata-1.
|
|
11
|
-
mcp_stata-1.
|
|
12
|
-
mcp_stata-1.
|
|
13
|
-
mcp_stata-1.
|
|
14
|
-
mcp_stata-1.
|
|
10
|
+
mcp_stata-1.7.3.dist-info/METADATA,sha256=cOSWlFgl296f5UhvozBLCPpe7tWS7kcVWGBNlnqO2Hs,15951
|
|
11
|
+
mcp_stata-1.7.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
12
|
+
mcp_stata-1.7.3.dist-info/entry_points.txt,sha256=TcOgrtiTL4LGFEDb1pCrQWA-fUZvIujDOvQ-bWFh5Z8,52
|
|
13
|
+
mcp_stata-1.7.3.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
|
|
14
|
+
mcp_stata-1.7.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|