mcp-stata 1.6.8__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/discovery.py CHANGED
@@ -1,3 +1,12 @@
1
+ """
2
+ Improved discovery.py with better error handling for intermittent failures.
3
+ Key improvements:
4
+ 1. Retry logic for file existence checks
5
+ 2. Better diagnostic logging
6
+ 3. Fuzzy path matching for common typos
7
+ 4. Case-insensitive path resolution on Windows
8
+ """
9
+
1
10
  import os
2
11
  import sys
3
12
  import platform
@@ -5,12 +14,106 @@ import glob
5
14
  import logging
6
15
  import shutil
7
16
  import ntpath
8
-
17
+ import time
9
18
  from typing import Tuple, List, Optional
10
19
 
11
20
  logger = logging.getLogger("mcp_stata.discovery")
12
21
 
13
22
 
23
+ def _exists_with_retry(path: str, max_attempts: int = 3, delay: float = 0.1) -> bool:
24
+ """
25
+ Check if file exists with retry logic to handle transient failures.
26
+ This helps with antivirus scans, file locks, and other temporary issues.
27
+ """
28
+ for attempt in range(max_attempts):
29
+ if os.path.exists(path):
30
+ return True
31
+ if attempt < max_attempts - 1:
32
+ logger.debug(
33
+ f"File existence check attempt {attempt + 1} failed for: {path}"
34
+ )
35
+ time.sleep(delay)
36
+ return False
37
+
38
+
39
+ def _find_similar_stata_dirs(target_path: str) -> List[str]:
40
+ """
41
+ Find similar Stata directories to help diagnose path typos.
42
+ Useful when user has 'Stata19Now' instead of 'StataNow19'.
43
+ """
44
+ parent = os.path.dirname(target_path)
45
+
46
+ # If parent doesn't exist, try grandparent (for directory name typos)
47
+ search_dir = parent
48
+ if not os.path.exists(parent):
49
+ search_dir = os.path.dirname(parent)
50
+
51
+ if not os.path.exists(search_dir):
52
+ return []
53
+
54
+ try:
55
+ subdirs = [
56
+ d for d in os.listdir(search_dir)
57
+ if os.path.isdir(os.path.join(search_dir, d))
58
+ ]
59
+ # Filter to Stata-related directories (case-insensitive)
60
+ stata_dirs = [
61
+ os.path.join(search_dir, d)
62
+ for d in subdirs
63
+ if 'stata' in d.lower()
64
+ ]
65
+ return stata_dirs
66
+ except (OSError, PermissionError) as e:
67
+ logger.debug(f"Could not list directory {search_dir}: {e}")
68
+ return []
69
+
70
+
71
+ def _validate_path_with_diagnostics(path: str, system: str) -> Tuple[bool, str]:
72
+ """
73
+ Validate path exists and provide detailed diagnostics if not.
74
+ Returns (exists, diagnostic_message)
75
+ """
76
+ if _exists_with_retry(path):
77
+ return True, ""
78
+
79
+ # Build diagnostic message
80
+ diagnostics = []
81
+ diagnostics.append(f"File not found: '{path}'")
82
+
83
+ parent_dir = os.path.dirname(path)
84
+ filename = os.path.basename(path)
85
+
86
+ if _exists_with_retry(parent_dir):
87
+ diagnostics.append(f"✓ Parent directory exists: '{parent_dir}'")
88
+ try:
89
+ files_in_parent = os.listdir(parent_dir)
90
+ # Look for similar filenames
91
+ similar_files = [
92
+ f for f in files_in_parent
93
+ if 'stata' in f.lower() and f.lower().endswith('.exe' if system == 'Windows' else '')
94
+ ]
95
+ if similar_files:
96
+ diagnostics.append(f"Found {len(similar_files)} Stata file(s) in parent:")
97
+ for f in similar_files[:5]: # Show max 5
98
+ diagnostics.append(f" - {f}")
99
+ else:
100
+ diagnostics.append(f"No Stata executables found in parent directory")
101
+ diagnostics.append(f"Files present: {', '.join(files_in_parent[:10])}")
102
+ except (OSError, PermissionError) as e:
103
+ diagnostics.append(f"✗ Could not list parent directory: {e}")
104
+ else:
105
+ diagnostics.append(f"✗ Parent directory does not exist: '{parent_dir}'")
106
+
107
+ # Check for similar directories (typo detection)
108
+ similar_dirs = _find_similar_stata_dirs(path)
109
+ if similar_dirs:
110
+ diagnostics.append("\nDid you mean one of these directories?")
111
+ for dir_path in similar_dirs[:5]:
112
+ diagnostics.append(f" - {dir_path}")
113
+
114
+ return False, "\n".join(diagnostics)
115
+
116
+
14
117
  def _normalize_env_path(raw: str, system: str) -> str:
15
118
  """Strip quotes/whitespace, expand variables, and normalize slashes for STATA_PATH."""
16
119
  cleaned = raw.strip()
@@ -30,7 +133,7 @@ def _normalize_env_path(raw: str, system: str) -> str:
30
133
 
31
134
 
32
135
  def _is_executable(path: str, system: str) -> bool:
33
- if not os.path.exists(path):
136
+ if not _exists_with_retry(path): # Use retry logic
34
137
  return False
35
138
  if system == "Windows":
36
139
  # On Windows, check if it's a file and has .exe extension
@@ -71,11 +174,11 @@ def _resolve_windows_host_path(path: str, system: str) -> str:
71
174
  """
72
175
  if system != "Windows":
73
176
  return path
74
- if os.path.exists(path):
177
+ if _exists_with_retry(path): # Use retry logic
75
178
  return path
76
179
  if os.sep != "\\" and "\\" in path:
77
180
  alt_path = path.replace("\\", os.sep)
78
- if os.path.exists(alt_path):
181
+ if _exists_with_retry(alt_path): # Use retry logic
79
182
  return alt_path
80
183
  return path
81
184
 
@@ -97,11 +200,12 @@ def find_stata_path() -> Tuple[str, str]:
97
200
 
98
201
  Behavior:
99
202
  - If STATA_PATH is set and valid, use it.
100
- - If STATA_PATH is set but invalid, fall back to auto-discovery.
101
- - If auto-discovery fails, raise an error (including STATA_PATH failure context, if any).
203
+ - If STATA_PATH is set but invalid, provide detailed diagnostics and fall back.
204
+ - If auto-discovery fails, raise an error with helpful suggestions.
102
205
  """
103
206
  system = _detect_system()
104
207
  stata_path_error: Optional[Exception] = None
208
+ stata_path_diagnostics: Optional[str] = None
105
209
 
106
210
  windows_binaries = [
107
211
  ("StataMP-64.exe", "mp"),
@@ -158,11 +262,15 @@ def find_stata_path() -> Tuple[str, str]:
158
262
  )
159
263
  return candidate, edition
160
264
 
161
- raise FileNotFoundError(
162
- f"STATA_PATH points to directory '{path}', but no Stata executable was found within. "
265
+ # Enhanced error with diagnostics
266
+ exists, diagnostics = _validate_path_with_diagnostics(path, system)
267
+ error_msg = (
268
+ f"STATA_PATH points to directory '{path}', but no Stata executable was found within.\n"
269
+ f"{diagnostics}\n\n"
163
270
  "Point STATA_PATH directly to the Stata binary "
164
- "(e.g., C:\\Program Files\\Stata19\\StataMP-64.exe)."
271
+ "(e.g., C:\\Program Files\\StataNow19\\StataMP-64.exe)."
165
272
  )
273
+ raise FileNotFoundError(error_msg)
166
274
 
167
275
  edition = "be"
168
276
  lower_path = path.lower()
@@ -173,13 +281,18 @@ def find_stata_path() -> Tuple[str, str]:
173
281
  elif "be" in lower_path:
174
282
  edition = "be"
175
283
 
176
- if not os.path.exists(path):
177
- raise FileNotFoundError(
178
- f"STATA_PATH points to '{path}', but that file does not exist. "
284
+ # Use enhanced validation with diagnostics
285
+ exists, diagnostics = _validate_path_with_diagnostics(path, system)
286
+ if not exists:
287
+ error_msg = (
288
+ f"STATA_PATH points to '{path}', but that file does not exist.\n"
289
+ f"{diagnostics}\n\n"
179
290
  "Update STATA_PATH to your Stata binary (e.g., "
180
291
  "/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, "
181
- "/usr/local/stata19/stata-mp or C:\\Program Files\\Stata19Now\\StataSE-64.exe)."
292
+ "/usr/local/stata19/stata-mp or C:\\Program Files\\StataNow19\\StataMP-64.exe)."
182
293
  )
294
+ raise FileNotFoundError(error_msg)
295
+
183
296
  if not _is_executable(path, system):
184
297
  raise PermissionError(
185
298
  f"STATA_PATH points to '{path}', but it is not executable. "
@@ -191,6 +304,7 @@ def find_stata_path() -> Tuple[str, str]:
191
304
 
192
305
  except Exception as exc:
193
306
  stata_path_error = exc
307
+ stata_path_diagnostics = str(exc)
194
308
  logger.warning(
195
309
  "STATA_PATH override failed (%s). Falling back to auto-discovery.",
196
310
  exc,
@@ -213,11 +327,11 @@ def find_stata_path() -> Tuple[str, str]:
213
327
  for pattern in app_globs:
214
328
  for app_dir in glob.glob(pattern):
215
329
  binary_dir = os.path.join(app_dir, "Contents", "MacOS")
216
- if not os.path.exists(binary_dir):
330
+ if not _exists_with_retry(binary_dir): # Use retry logic
217
331
  continue
218
332
  for binary, edition in [("stata-mp", "mp"), ("stata-se", "se"), ("stata", "be")]:
219
333
  full_path = os.path.join(binary_dir, binary)
220
- if os.path.exists(full_path):
334
+ if _exists_with_retry(full_path): # Use retry logic
221
335
  candidates.append((full_path, edition))
222
336
 
223
337
  elif system == "Windows":
@@ -265,7 +379,7 @@ def find_stata_path() -> Tuple[str, str]:
265
379
  continue
266
380
  for exe, edition in windows_binaries:
267
381
  full_path = os.path.join(stata_dir, exe)
268
- if os.path.exists(full_path):
382
+ if _exists_with_retry(full_path): # Use retry logic
269
383
  candidates.append((full_path, edition))
270
384
 
271
385
  elif system == "Linux":
@@ -303,13 +417,13 @@ def find_stata_path() -> Tuple[str, str]:
303
417
  continue
304
418
  for binary, edition in linux_binaries:
305
419
  full_path = os.path.join(base_dir, binary)
306
- if os.path.exists(full_path):
420
+ if _exists_with_retry(full_path): # Use retry logic
307
421
  candidates.append((full_path, edition))
308
422
 
309
423
  candidates = _dedupe_preserve(candidates)
310
424
 
311
425
  for path, edition in candidates:
312
- if not os.path.exists(path):
426
+ if not _exists_with_retry(path): # Use retry logic
313
427
  logger.warning("Discovered candidate missing on disk: %s", path)
314
428
  continue
315
429
  if not _is_executable(path, system):
@@ -318,21 +432,27 @@ def find_stata_path() -> Tuple[str, str]:
318
432
  logger.info("Auto-discovered Stata at %s (%s)", path, edition)
319
433
  return path, edition
320
434
 
435
+ # Build comprehensive error message
436
+ error_parts = ["Could not automatically locate Stata."]
437
+
321
438
  if stata_path_error is not None:
322
- raise FileNotFoundError(
323
- "Could not automatically locate Stata after STATA_PATH failed. "
324
- f"STATA_PATH error was: {stata_path_error}. "
325
- "Fix STATA_PATH to point to the Stata executable, or install Stata in a standard location "
326
- "(e.g., /Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, /usr/local/stata18/stata-mp, "
327
- "or C:\\Program Files\\Stata18\\StataMP-64.exe)."
328
- ) from stata_path_error
329
-
330
- raise FileNotFoundError(
331
- "Could not automatically locate Stata. "
332
- "Set STATA_PATH to your Stata executable (e.g., "
333
- "/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, /usr/local/stata18/stata-mp, "
334
- "or C:\\Program Files\\Stata18\\StataMP-64.exe)."
439
+ error_parts.append(
440
+ f"\nSTATA_PATH was set but failed:\n{stata_path_diagnostics}"
441
+ )
442
+
443
+ error_parts.append(
444
+ "\nTo fix this issue:\n"
445
+ "1. Set STATA_PATH to point to your Stata executable, for example:\n"
446
+ " - Windows: C:\\Program Files\\StataNow19\\StataMP-64.exe\n"
447
+ " - macOS: /Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp\n"
448
+ " - Linux: /usr/local/stata19/stata-mp\n"
449
+ "\n2. Or install Stata in a standard location where it can be auto-discovered."
335
450
  )
451
+
452
+ if stata_path_error is not None:
453
+ raise FileNotFoundError("\n".join(error_parts)) from stata_path_error
454
+ else:
455
+ raise FileNotFoundError("\n".join(error_parts))
336
456
 
337
457
 
338
458
  def main() -> int:
mcp_stata/stata_client.py CHANGED
@@ -408,6 +408,73 @@ class StataClient:
408
408
  return None
409
409
  return None
410
410
 
411
+ def _read_log_tail(self, path: str, max_chars: int) -> str:
412
+ try:
413
+ with open(path, "rb") as f:
414
+ f.seek(0, os.SEEK_END)
415
+ size = f.tell()
416
+ if size <= 0:
417
+ return ""
418
+ read_size = min(size, max_chars)
419
+ f.seek(-read_size, os.SEEK_END)
420
+ data = f.read(read_size)
421
+ return data.decode("utf-8", errors="replace")
422
+ except Exception:
423
+ return ""
424
+
425
+ def _select_stata_error_message(self, text: str, fallback: str) -> str:
426
+ if not text:
427
+ return fallback
428
+ ignore_patterns = (
429
+ r"^r\(\d+\);?$",
430
+ r"^end of do-file$",
431
+ r"^execution terminated$",
432
+ r"^[-=*]{3,}.*$",
433
+ )
434
+ rc_pattern = r"^r\(\d+\);?$"
435
+ error_patterns = (
436
+ r"\btype mismatch\b",
437
+ r"\bnot found\b",
438
+ r"\bnot allowed\b",
439
+ r"\bno observations\b",
440
+ r"\bconformability error\b",
441
+ r"\binvalid\b",
442
+ r"\bsyntax error\b",
443
+ r"\berror\b",
444
+ )
445
+ lines = text.splitlines()
446
+ for raw in reversed(lines):
447
+ line = raw.strip()
448
+ if not line:
449
+ continue
450
+ if any(re.search(pat, line, re.IGNORECASE) for pat in error_patterns):
451
+ return line
452
+ for i in range(len(lines) - 1, -1, -1):
453
+ line = lines[i].strip()
454
+ if not line:
455
+ continue
456
+ if re.match(rc_pattern, line, re.IGNORECASE):
457
+ for j in range(i - 1, -1, -1):
458
+ prev_line = lines[j].strip()
459
+ if not prev_line:
460
+ continue
461
+ if prev_line.startswith((".", ">", "-", "=")):
462
+ continue
463
+ if any(re.match(pat, prev_line, re.IGNORECASE) for pat in ignore_patterns):
464
+ continue
465
+ return prev_line
466
+ return line
467
+ for raw in reversed(lines):
468
+ line = raw.strip()
469
+ if not line:
470
+ continue
471
+ if line.startswith((".", ">", "-", "=")):
472
+ continue
473
+ if any(re.match(pat, line, re.IGNORECASE) for pat in ignore_patterns):
474
+ continue
475
+ return line
476
+ return fallback
477
+
411
478
  def _smcl_to_text(self, smcl: str) -> str:
412
479
  """Convert simple SMCL markup into plain text for LLM-friendly help."""
413
480
  # First, keep inline directive content if present (e.g., {bf:word} -> word)
@@ -433,7 +500,10 @@ class StataClient:
433
500
  rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
434
501
  line_no = self._parse_line_from_text(combined) if combined else None
435
502
  snippet = combined[-800:] if combined else None
436
- message = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
503
+ fallback = (stderr or (str(exc) if exc else "") or stdout or "Stata error").strip()
504
+ if fallback == "Stata error" and rc_final is not None:
505
+ fallback = f"Stata error r({rc_final})"
506
+ message = self._select_stata_error_message(combined, fallback)
437
507
  return ErrorEnvelope(
438
508
  message=message,
439
509
  rc=rc_final,
@@ -640,7 +710,7 @@ class StataClient:
640
710
  buffering=1,
641
711
  )
642
712
  log_path = log_file.name
643
- tail = TailBuffer(max_chars=8000)
713
+ tail = TailBuffer(max_chars=200000 if trace else 20000)
644
714
  tee = FileTeeIO(log_file, tail)
645
715
 
646
716
  # Inform the MCP client immediately where to read/tail the output.
@@ -705,6 +775,9 @@ class StataClient:
705
775
  logger.warning(f"Failed to cache detected graphs: {e}")
706
776
 
707
777
  tail_text = tail.get_value()
778
+ log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
779
+ if log_tail and len(log_tail) > len(tail_text):
780
+ tail_text = log_tail
708
781
  combined = (tail_text or "") + (f"\n{exc}" if exc else "")
709
782
  rc_hint = self._parse_rc_from_text(combined) if combined else None
710
783
  if exc is None and rc_hint is not None and rc_hint != 0:
@@ -718,14 +791,10 @@ class StataClient:
718
791
  rc_hint = self._parse_rc_from_text(combined) if combined else None
719
792
  rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
720
793
  line_no = self._parse_line_from_text(combined) if combined else None
721
- message = "Stata error"
722
- if tail_text and tail_text.strip():
723
- for line in reversed(tail_text.splitlines()):
724
- if line.strip():
725
- message = line.strip()
726
- break
727
- elif exc is not None:
728
- message = str(exc).strip() or message
794
+ fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
795
+ if fallback == "Stata error" and rc_final is not None:
796
+ fallback = f"Stata error r({rc_final})"
797
+ message = self._select_stata_error_message(combined, fallback)
729
798
 
730
799
  error = ErrorEnvelope(
731
800
  message=message,
@@ -876,7 +945,7 @@ class StataClient:
876
945
  buffering=1,
877
946
  )
878
947
  log_path = log_file.name
879
- tail = TailBuffer(max_chars=8000)
948
+ tail = TailBuffer(max_chars=200000 if trace else 20000)
880
949
  tee = FileTeeIO(log_file, tail)
881
950
 
882
951
  # Inform the MCP client immediately where to read/tail the output.
@@ -1042,6 +1111,9 @@ class StataClient:
1042
1111
  logger.error(f"Post-execution graph detection failed: {e}")
1043
1112
 
1044
1113
  tail_text = tail.get_value()
1114
+ log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
1115
+ if log_tail and len(log_tail) > len(tail_text):
1116
+ tail_text = log_tail
1045
1117
  combined = (tail_text or "") + (f"\n{exc}" if exc else "")
1046
1118
  rc_hint = self._parse_rc_from_text(combined) if combined else None
1047
1119
  if exc is None and rc_hint is not None and rc_hint != 0:
@@ -1055,14 +1127,10 @@ class StataClient:
1055
1127
  rc_hint = self._parse_rc_from_text(combined) if combined else None
1056
1128
  rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
1057
1129
  line_no = self._parse_line_from_text(combined) if combined else None
1058
- message = "Stata error"
1059
- if tail_text and tail_text.strip():
1060
- for line in reversed(tail_text.splitlines()):
1061
- if line.strip():
1062
- message = line.strip()
1063
- break
1064
- elif exc is not None:
1065
- message = str(exc).strip() or message
1130
+ fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
1131
+ if fallback == "Stata error" and rc_final is not None:
1132
+ fallback = f"Stata error r({rc_final})"
1133
+ message = self._select_stata_error_message(combined, fallback)
1066
1134
 
1067
1135
  error = ErrorEnvelope(
1068
1136
  message=message,
@@ -1425,6 +1493,65 @@ class StataClient:
1425
1493
 
1426
1494
  return indices
1427
1495
 
1496
+ def apply_sort(self, sort_spec: List[str]) -> None:
1497
+ """
1498
+ Apply sorting to the dataset using gsort.
1499
+
1500
+ Args:
1501
+ sort_spec: List of variables to sort by, with optional +/- prefix.
1502
+ e.g., ["-price", "+mpg"] sorts by price descending, then mpg ascending.
1503
+ No prefix is treated as ascending (+).
1504
+
1505
+ Raises:
1506
+ ValueError: If sort_spec is invalid or contains invalid variables
1507
+ RuntimeError: If no data in memory or sort command fails
1508
+ """
1509
+ if not self._initialized:
1510
+ self.init()
1511
+
1512
+ state = self.get_dataset_state()
1513
+ if int(state.get("k", 0) or 0) == 0 and int(state.get("n", 0) or 0) == 0:
1514
+ raise RuntimeError("No data in memory")
1515
+
1516
+ if not sort_spec or not isinstance(sort_spec, list):
1517
+ raise ValueError("sort_spec must be a non-empty list")
1518
+
1519
+ # Validate all variables exist
1520
+ var_map = self._get_var_index_map()
1521
+ for spec in sort_spec:
1522
+ if not isinstance(spec, str) or not spec:
1523
+ raise ValueError(f"Invalid sort specification: {spec!r}")
1524
+
1525
+ # Extract variable name (remove +/- prefix if present)
1526
+ varname = spec.lstrip("+-")
1527
+ if not varname:
1528
+ raise ValueError(f"Invalid sort specification: {spec!r}")
1529
+
1530
+ if varname not in var_map:
1531
+ raise ValueError(f"Variable not found: {varname}")
1532
+
1533
+ # Build gsort command
1534
+ # gsort uses - for descending, + or nothing for ascending
1535
+ gsort_args = []
1536
+ for spec in sort_spec:
1537
+ if spec.startswith("-") or spec.startswith("+"):
1538
+ gsort_args.append(spec)
1539
+ else:
1540
+ # No prefix means ascending, add + explicitly for clarity
1541
+ gsort_args.append(f"+{spec}")
1542
+
1543
+ cmd = f"gsort {' '.join(gsort_args)}"
1544
+
1545
+ try:
1546
+ result = self.run_command_structured(cmd, echo=False)
1547
+ if not result.success:
1548
+ error_msg = result.error.message if result.error else "Sort failed"
1549
+ raise RuntimeError(f"Failed to sort dataset: {error_msg}")
1550
+ except Exception as e:
1551
+ if isinstance(e, RuntimeError):
1552
+ raise
1553
+ raise RuntimeError(f"Failed to sort dataset: {e}")
1554
+
1428
1555
  def get_variable_details(self, varname: str) -> str:
1429
1556
  """Returns codebook/summary for a specific variable."""
1430
1557
  resp = self.run_command_structured(f"codebook {varname}", echo=True)
@@ -2247,7 +2374,7 @@ class StataClient:
2247
2374
  buffering=1,
2248
2375
  )
2249
2376
  log_path = log_file.name
2250
- tail = TailBuffer(max_chars=8000)
2377
+ tail = TailBuffer(max_chars=200000 if trace else 20000)
2251
2378
  tee = FileTeeIO(log_file, tail)
2252
2379
 
2253
2380
  rc = -1
@@ -2278,6 +2405,9 @@ class StataClient:
2278
2405
  tee.close()
2279
2406
 
2280
2407
  tail_text = tail.get_value()
2408
+ log_tail = self._read_log_tail(log_path, 200000 if trace else 20000)
2409
+ if log_tail and len(log_tail) > len(tail_text):
2410
+ tail_text = log_tail
2281
2411
  combined = (tail_text or "") + (f"\n{exc}" if exc else "")
2282
2412
  rc_hint = self._parse_rc_from_text(combined) if combined else None
2283
2413
  if exc is None and rc_hint is not None and rc_hint != 0:
@@ -2292,14 +2422,10 @@ class StataClient:
2292
2422
  rc_hint = self._parse_rc_from_text(combined) if combined else None
2293
2423
  rc_final = rc_hint if (rc_hint is not None and rc_hint != 0) else (rc if rc not in (-1, None) else rc_hint)
2294
2424
  line_no = self._parse_line_from_text(combined) if combined else None
2295
- message = "Stata error"
2296
- if tail_text and tail_text.strip():
2297
- for line in reversed(tail_text.splitlines()):
2298
- if line.strip():
2299
- message = line.strip()
2300
- break
2301
- elif exc is not None:
2302
- message = str(exc).strip() or message
2425
+ fallback = (str(exc).strip() if exc is not None else "") or "Stata error"
2426
+ if fallback == "Stata error" and rc_final is not None:
2427
+ fallback = f"Stata error r({rc_final})"
2428
+ message = self._select_stata_error_message(combined, fallback)
2303
2429
 
2304
2430
  error = ErrorEnvelope(
2305
2431
  message=message,
@@ -2384,4 +2510,3 @@ class StataClient:
2384
2510
  )
2385
2511
 
2386
2512
  return result
2387
-
mcp_stata/ui_http.py CHANGED
@@ -27,6 +27,7 @@ class ViewHandle:
27
27
  view_id: str
28
28
  dataset_id: str
29
29
  frame: str
30
+ filter_expr: str
30
31
  obs_indices: list[int]
31
32
  filtered_n: int
32
33
  created_at: float
@@ -86,7 +87,7 @@ class UIChannelManager:
86
87
  return UIChannelInfo(base_url=base_url, token=self._token or "", expires_at=self._expires_at)
87
88
 
88
89
  def capabilities(self) -> dict[str, bool]:
89
- return {"dataBrowser": True, "filtering": True}
90
+ return {"dataBrowser": True, "filtering": True, "sorting": True}
90
91
 
91
92
  def current_dataset_id(self) -> str:
92
93
  with self._lock:
@@ -138,6 +139,7 @@ class UIChannelManager:
138
139
  view_id=view_id,
139
140
  dataset_id=current_id,
140
141
  frame=frame,
142
+ filter_expr=filter_expr,
141
143
  obs_indices=obs_indices,
142
144
  filtered_n=len(obs_indices),
143
145
  created_at=now,
@@ -290,11 +292,16 @@ class UIChannelManager:
290
292
  body = self._read_json()
291
293
  if body is None:
292
294
  return
295
+ # Debug logging to diagnose limit parameter issues
296
+ import sys
297
+ print(f"[DEBUG] /v1/page request body: {body}", file=sys.stderr, flush=True)
298
+ print(f"[DEBUG] limit value: {body.get('limit')!r} (type: {type(body.get('limit')).__name__})", file=sys.stderr, flush=True)
293
299
  try:
294
300
  resp = handle_page_request(manager, body, view_id=None)
295
301
  self._send_json(200, resp)
296
302
  return
297
303
  except HTTPError as e:
304
+ print(f"[DEBUG] HTTPError: {e.code} - {e.message}", file=sys.stderr, flush=True)
298
305
  self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
299
306
  return
300
307
  except Exception as e:
@@ -347,11 +354,16 @@ class UIChannelManager:
347
354
  body = self._read_json()
348
355
  if body is None:
349
356
  return
357
+ # Debug logging to diagnose limit parameter issues
358
+ import sys
359
+ print(f"[DEBUG] /v1/views/{view_id}/page request body: {body}", file=sys.stderr, flush=True)
360
+ print(f"[DEBUG] limit value: {body.get('limit')!r} (type: {type(body.get('limit')).__name__})", file=sys.stderr, flush=True)
350
361
  try:
351
362
  resp = handle_page_request(manager, body, view_id=view_id)
352
363
  self._send_json(200, resp)
353
364
  return
354
365
  except HTTPError as e:
366
+ print(f"[DEBUG] HTTPError: {e.code} - {e.message}", file=sys.stderr, flush=True)
355
367
  self._error(e.status, e.code, e.message, stata_rc=e.stata_rc)
356
368
  return
357
369
  except Exception as e:
@@ -473,6 +485,13 @@ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view
473
485
  vars_req = body.get("vars", [])
474
486
  include_obs_no = bool(body.get("includeObsNo", False))
475
487
 
488
+ # Parse sortBy parameter
489
+ sort_by = body.get("sortBy", [])
490
+ if sort_by is not None and not isinstance(sort_by, list):
491
+ raise HTTPError(400, "invalid_request", f"sortBy must be an array, got: {type(sort_by).__name__}")
492
+ if sort_by and not all(isinstance(s, str) for s in sort_by):
493
+ raise HTTPError(400, "invalid_request", "sortBy must be an array of strings")
494
+
476
495
  # Parse maxChars
477
496
  max_chars_raw = body.get("maxChars", max_chars)
478
497
  try:
@@ -509,6 +528,20 @@ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view
509
528
  filtered_n = view.filtered_n
510
529
 
511
530
  try:
531
+ # Apply sorting if requested
532
+ if sort_by:
533
+ try:
534
+ manager._client.apply_sort(sort_by)
535
+ # If sorting with a filtered view, re-compute indices after sort
536
+ if view_id is not None:
537
+ assert view is not None
538
+ obs_indices = manager._client.compute_view_indices(view.filter_expr)
539
+ filtered_n = len(obs_indices)
540
+ except ValueError as e:
541
+ raise HTTPError(400, "invalid_request", f"Invalid sort specification: {e}")
542
+ except RuntimeError as e:
543
+ raise HTTPError(500, "internal_error", f"Failed to apply sort: {e}")
544
+
512
545
  dataset_state = manager._client.get_dataset_state()
513
546
  page = manager._client.get_page(
514
547
  offset=offset,
@@ -518,6 +551,9 @@ def handle_page_request(manager: UIChannelManager, body: dict[str, Any], *, view
518
551
  max_chars=max_chars_req,
519
552
  obs_indices=obs_indices,
520
553
  )
554
+ except HTTPError:
555
+ # Re-raise HTTPError exceptions as-is
556
+ raise
521
557
  except RuntimeError as e:
522
558
  # StataClient uses RuntimeError("No data in memory") for empty dataset.
523
559
  msg = str(e) or "No data in memory"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-stata
3
- Version: 1.6.8
3
+ Version: 1.7.3
4
4
  Summary: A lightweight Model Context Protocol (MCP) server for Stata. Execute commands, inspect data, retrieve stored results (`r()`/`e()`), and view graphs in your chat interface. Built for economists who want to integrate LLM assistance into their Stata workflow.
5
5
  Project-URL: Homepage, https://github.com/tmonk/mcp-stata
6
6
  Project-URL: Repository, https://github.com/tmonk/mcp-stata
@@ -300,7 +300,8 @@ Call the MCP tool `get_ui_channel()` and parse the JSON:
300
300
  "expiresAt": 1730000000,
301
301
  "capabilities": {
302
302
  "dataBrowser": true,
303
- "filtering": true
303
+ "filtering": true,
304
+ "sorting": true
304
305
  }
305
306
  }
306
307
  ```
@@ -341,10 +342,67 @@ curl -sS \
341
342
  "$BASE_URL/v1/page"
342
343
  ```
343
344
 
345
+ #### Sorting
346
+
347
+ The `/v1/page` and `/v1/views/:viewId/page` endpoints support sorting via the optional `sortBy` parameter:
348
+
349
+ ```bash
350
+ # Sort by price ascending
351
+ curl -sS \
352
+ -H "Authorization: Bearer $TOKEN" \
353
+ -H "Content-Type: application/json" \
354
+ -d '{"datasetId":"...","offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["price"]}' \
355
+ "$BASE_URL/v1/page"
356
+
357
+ # Sort by price descending
358
+ curl -sS \
359
+ -H "Authorization: Bearer $TOKEN" \
360
+ -H "Content-Type: application/json" \
361
+ -d '{"datasetId":"...","offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["-price"]}' \
362
+ "$BASE_URL/v1/page"
363
+
364
+ # Multi-variable sort: foreign ascending, then price descending
365
+ curl -sS \
366
+ -H "Authorization: Bearer $TOKEN" \
367
+ -H "Content-Type: application/json" \
368
+ -d '{"datasetId":"...","offset":0,"limit":50,"vars":["foreign","price","mpg"],"sortBy":["foreign","-price"]}' \
369
+ "$BASE_URL/v1/page"
370
+ ```
371
+
372
+ **Sort specification format:**
373
+ - `sortBy` is an array of strings (variable names with optional prefix)
374
+ - No prefix or `+` prefix = ascending order (e.g., `"price"` or `"+price"`)
375
+ - `-` prefix = descending order (e.g., `"-price"`)
376
+ - Multiple variables are supported for multi-level sorting
377
+ - Uses Stata's `gsort` command internally
378
+
379
+ **Sorting with filtered views:**
380
+ - Sorting is fully supported with filtered views
381
+ - The sort is applied to the entire dataset, then filtered indices are re-computed
382
+ - Example: Filter for `price < 5000`, then sort descending by price
383
+
384
+ ```bash
385
+ # Create a filtered view
386
+ curl -sS \
387
+ -H "Authorization: Bearer $TOKEN" \
388
+ -H "Content-Type: application/json" \
389
+ -d '{"datasetId":"...","frame":"default","filterExpr":"price < 5000"}' \
390
+ "$BASE_URL/v1/views"
391
+ # Returns: {"view": {"id": "view_abc123", "filteredN": 37}}
392
+
393
+ # Get sorted page from filtered view
394
+ curl -sS \
395
+ -H "Authorization: Bearer $TOKEN" \
396
+ -H "Content-Type: application/json" \
397
+ -d '{"offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["-price"]}' \
398
+ "$BASE_URL/v1/views/view_abc123/page"
399
+ ```
400
+
344
401
  Notes:
345
402
 
346
403
  - `datasetId` is used for cache invalidation. If the dataset changes due to running Stata commands, the server will report a new dataset id and view handles become invalid.
347
404
  - Filter expressions are evaluated in Python using values read from Stata via `sfi.Data.get`. Use boolean operators like `==`, `!=`, `<`, `>`, and `and`/`or` (Stata-style `&`/`|` are also accepted).
405
+ - Sorting modifies the dataset order in memory using `gsort`. When combined with views, the filtered indices are automatically re-computed after sorting.
348
406
 
349
407
  ## License
350
408
 
@@ -1,14 +1,14 @@
1
1
  mcp_stata/__init__.py,sha256=kJKKRn7lGuVCuS2-GaN5VoVcvnxtNlfuswW_VOlYqwg,98
2
- mcp_stata/discovery.py,sha256=J_XU1_AXRpqWg_ULV8xf4lT6RRN8MxOdpr1ioTi5TjQ,12951
2
+ mcp_stata/discovery.py,sha256=jQN9uvBNHF_hCCU9k6BDtSdDxiUVpvXcOJwpWYwo55c,17430
3
3
  mcp_stata/graph_detector.py,sha256=-dJIU1Dq_c1eQSk4eegUi0gU2N-tFqjFGM0tE1E32KM,16066
4
4
  mcp_stata/models.py,sha256=QETpYKO3yILy_L6mhouVEanvUIvu4ww_CAAFuiP2YdM,1201
5
5
  mcp_stata/server.py,sha256=PV8ragGMeHT72zgVx5DJp3vt8CPqT8iwdvJ8GXSctds,15989
6
- mcp_stata/stata_client.py,sha256=TNJnlkZ0IoNoVXhKUw0_IYLiRNOwyL2wVmb1gWdiRUY,95981
6
+ mcp_stata/stata_client.py,sha256=06cA5K4vwXc_kNCwIifUL8eSSYsIYtM5zArhJcLcUlo,101267
7
7
  mcp_stata/streaming_io.py,sha256=GVaXgTtxx8YLY6RWqdTcO2M3QSqxLsefqkmnlNO1nTI,6974
8
- mcp_stata/ui_http.py,sha256=kkPYpqp-lQDXs_9qcs7hb16FtvNcag3rKSH7wvQX7Qo,22013
8
+ mcp_stata/ui_http.py,sha256=w1tYxNuwuhkjyfWHxUnpd1DcVBaakjPkEnWr-Fo1lWo,24193
9
9
  mcp_stata/smcl/smcl2html.py,sha256=wi91mOMeV9MCmHtNr0toihNbaiDCNZ_NP6a6xEAzWLM,2624
10
- mcp_stata-1.6.8.dist-info/METADATA,sha256=V5mN_9vRL5f1aja0zrhMatBKb-_ZC6Ok3uOXfRBfYw4,13794
11
- mcp_stata-1.6.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
- mcp_stata-1.6.8.dist-info/entry_points.txt,sha256=TcOgrtiTL4LGFEDb1pCrQWA-fUZvIujDOvQ-bWFh5Z8,52
13
- mcp_stata-1.6.8.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
14
- mcp_stata-1.6.8.dist-info/RECORD,,
10
+ mcp_stata-1.7.3.dist-info/METADATA,sha256=cOSWlFgl296f5UhvozBLCPpe7tWS7kcVWGBNlnqO2Hs,15951
11
+ mcp_stata-1.7.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
+ mcp_stata-1.7.3.dist-info/entry_points.txt,sha256=TcOgrtiTL4LGFEDb1pCrQWA-fUZvIujDOvQ-bWFh5Z8,52
13
+ mcp_stata-1.7.3.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
14
+ mcp_stata-1.7.3.dist-info/RECORD,,