mcp-stata 1.18.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_stata/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .server import main
2
+ from .stata_client import StataClient
3
+
4
+ __all__ = ["main", "StataClient"]
mcp_stata/config.py ADDED
@@ -0,0 +1,20 @@
1
+
2
+ """
3
+ Central configuration for mcp-stata server and UI channel.
4
+ """
5
+ from typing import Final
6
+
7
+ # Server Limits
8
+ MAX_LIMIT: Final[int] = 500 # Default row limit for JSON endpoints
9
+ MAX_VARS: Final[int] = 32_767 # Max variables in Stata
10
+ MAX_CHARS: Final[int] = 500 # Max chars per string cell to return
11
+ MAX_REQUEST_BYTES: Final[int] = 1_000_000 # Max size of HTTP request body
12
+ MAX_ARROW_LIMIT: Final[int] = 1_000_000 # Default row limit for Arrow IPC streams
13
+
14
+ # Timeouts (seconds)
15
+ TOKEN_TTL_S: Final[int] = 20 * 60 # Bearer token validity
16
+ VIEW_TTL_S: Final[int] = 30 * 60 # Filtered view handle validity
17
+
18
+ # Network
19
+ DEFAULT_HOST: Final[str] = "127.0.0.1"
20
+ DEFAULT_PORT: Final[int] = 0 # 0 = random ephemeral port
mcp_stata/discovery.py ADDED
@@ -0,0 +1,550 @@
1
+ """
2
+ Optimized discovery.py with fast auto-discovery and targeted retry logic.
3
+ Key improvements:
4
+ 1. Fast path checking during discovery (no retries)
5
+ 2. Retry logic only for validation of user-provided paths
6
+ 3. Better diagnostic logging
7
+ 4. Fuzzy path matching for common typos
8
+ 5. Case-insensitive path resolution on Windows
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import platform
14
+ import glob
15
+ import logging
16
+ import shutil
17
+ import ntpath
18
+ import time
19
+ import re
20
+ from typing import Tuple, List, Optional
21
+
22
+ logger = logging.getLogger("mcp_stata.discovery")
23
+
24
+
25
+ def _exists_with_retry(path: str, max_attempts: int = 1, delay: float = 0.01) -> bool:
26
+ """
27
+ Check if file exists with retry logic to handle transient failures.
28
+ This helps with antivirus scans, file locks, and other temporary issues.
29
+ Only use this for validating user-provided paths, not during discovery.
30
+ """
31
+ for attempt in range(max_attempts):
32
+ if os.path.exists(path):
33
+ return True
34
+ if attempt < max_attempts - 1:
35
+ logger.debug(
36
+ f"File existence check attempt {attempt + 1} failed for: {path}"
37
+ )
38
+ time.sleep(delay)
39
+ return False
40
+
41
+
42
+ def _exists_fast(path: str) -> bool:
43
+ """Fast existence check without retries for auto-discovery."""
44
+ return os.path.exists(path)
45
+
46
+
47
+ def _find_similar_stata_dirs(target_path: str) -> List[str]:
48
+ """
49
+ Find similar Stata directories to help diagnose path typos.
50
+ Useful when user has 'Stata19Now' instead of 'StataNow19'.
51
+ """
52
+ parent = os.path.dirname(target_path)
53
+
54
+ # If parent doesn't exist, try grandparent (for directory name typos)
55
+ search_dir = parent
56
+ if not os.path.exists(parent):
57
+ search_dir = os.path.dirname(parent)
58
+
59
+ if not os.path.exists(search_dir):
60
+ return []
61
+
62
+ try:
63
+ subdirs = [
64
+ d for d in os.listdir(search_dir)
65
+ if os.path.isdir(os.path.join(search_dir, d))
66
+ ]
67
+ # Filter to Stata-related directories (case-insensitive)
68
+ stata_dirs = [
69
+ os.path.join(search_dir, d)
70
+ for d in subdirs
71
+ if 'stata' in d.lower()
72
+ ]
73
+ return stata_dirs
74
+ except (OSError, PermissionError) as e:
75
+ logger.debug(f"Could not list directory {search_dir}: {e}")
76
+ return []
77
+
78
+
79
+ def _validate_path_with_diagnostics(path: str, system: str) -> Tuple[bool, str]:
80
+ """
81
+ Validate path exists and provide detailed diagnostics if not.
82
+ Returns (exists, diagnostic_message)
83
+ Uses retry logic for validation since this is for user-provided paths.
84
+ """
85
+ if _exists_with_retry(path):
86
+ return True, ""
87
+
88
+ # Build diagnostic message
89
+ diagnostics = []
90
+ diagnostics.append(f"File not found: '{path}'")
91
+
92
+ parent_dir = os.path.dirname(path)
93
+ filename = os.path.basename(path)
94
+
95
+ if _exists_with_retry(parent_dir):
96
+ diagnostics.append(f"✓ Parent directory exists: '{parent_dir}'")
97
+ try:
98
+ files_in_parent = os.listdir(parent_dir)
99
+ # Look for similar filenames
100
+ similar_files = [
101
+ f for f in files_in_parent
102
+ if 'stata' in f.lower() and f.lower().endswith('.exe' if system == 'Windows' else '')
103
+ ]
104
+ if similar_files:
105
+ diagnostics.append(f"Found {len(similar_files)} Stata file(s) in parent:")
106
+ for f in similar_files[:5]: # Show max 5
107
+ diagnostics.append(f" - {f}")
108
+ else:
109
+ diagnostics.append(f"No Stata executables found in parent directory")
110
+ diagnostics.append(f"Files present: {', '.join(files_in_parent[:10])}")
111
+ except (OSError, PermissionError) as e:
112
+ diagnostics.append(f"✗ Could not list parent directory: {e}")
113
+ else:
114
+ diagnostics.append(f"✗ Parent directory does not exist: '{parent_dir}'")
115
+
116
+ # Check for similar directories (typo detection)
117
+ similar_dirs = _find_similar_stata_dirs(path)
118
+ if similar_dirs:
119
+ diagnostics.append("\nDid you mean one of these directories?")
120
+ for dir_path in similar_dirs[:5]:
121
+ diagnostics.append(f" - {dir_path}")
122
+
123
+ return False, "\n".join(diagnostics)
124
+
125
+
126
+ def _normalize_env_path(raw: str, system: str) -> str:
127
+ """Strip quotes/whitespace, expand variables, and normalize slashes for STATA_PATH."""
128
+ cleaned = raw.strip()
129
+ if (cleaned.startswith('"') and cleaned.endswith('"')) or (
130
+ cleaned.startswith("'") and cleaned.endswith("'")
131
+ ):
132
+ cleaned = cleaned[1:-1].strip()
133
+
134
+ expanded = os.path.expandvars(os.path.expanduser(cleaned))
135
+
136
+ # Always normalize path separators for the intended platform. This is especially
137
+ # important when running Windows discovery tests on non-Windows hosts where
138
+ # os.path (PosixPath) would otherwise leave backslashes untouched.
139
+ if system == "Windows":
140
+ return ntpath.normpath(expanded)
141
+ return os.path.normpath(expanded)
142
+
143
+
144
+ def _is_executable(path: str, system: str, use_retry: bool = True) -> bool:
145
+ """
146
+ Check if path is executable.
147
+ use_retry: Use retry logic for user-provided paths, fast check for discovery.
148
+ """
149
+ exists_check = _exists_with_retry if use_retry else _exists_fast
150
+
151
+ if not exists_check(path):
152
+ return False
153
+ if system == "Windows":
154
+ # On Windows, check if it's a file and has .exe extension
155
+ return os.path.isfile(path) and path.lower().endswith(".exe")
156
+ return os.access(path, os.X_OK)
157
+
158
+
159
+ def _dedupe_preserve(items: List[tuple]) -> List[tuple]:
160
+ seen = set()
161
+ unique = []
162
+ for path, edition in items:
163
+ if path in seen:
164
+ continue
165
+ seen.add(path)
166
+ unique.append((path, edition))
167
+ return unique
168
+
169
+
170
+ def _dedupe_str_preserve(items: List[str]) -> List[str]:
171
+ seen = set()
172
+ out: List[str] = []
173
+ for s in items:
174
+ if not s:
175
+ continue
176
+ if s in seen:
177
+ continue
178
+ seen.add(s)
179
+ out.append(s)
180
+ return out
181
+
182
+
183
+ def _extract_version_number(path: str) -> int:
184
+ """
185
+ Extract the highest Stata version number found in path components that
186
+ mention 'stata'. Returns 0 if no version is found.
187
+ """
188
+ version = 0
189
+ normalized = path.lower().replace("\\", os.sep)
190
+ for part in normalized.split(os.sep):
191
+ if "stata" not in part:
192
+ continue
193
+ for match in re.findall(r"(\d{1,3})", part):
194
+ try:
195
+ version = max(version, int(match))
196
+ except ValueError:
197
+ continue
198
+ return version
199
+
200
+
201
+ def _sort_candidates(candidates: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
202
+ """Sort candidates by version desc, edition (mp>se>be), then path for stability."""
203
+ edition_rank = {"mp": 3, "se": 2, "be": 1}
204
+
205
+ def sort_key(item: Tuple[str, str]):
206
+ path, edition = item
207
+ version = _extract_version_number(path)
208
+ rank = edition_rank.get((edition or "").lower(), 0)
209
+ return (-version, -rank, path)
210
+
211
+ return sorted(candidates, key=sort_key)
212
+
213
+
214
+ def _resolve_windows_host_path(path: str, system: str) -> str:
215
+ """
216
+ On non-Windows hosts running Windows-discovery code, a Windows-style path
217
+ (with backslashes) won't match the real filesystem layout. If the normalized
218
+ path does not exist and we're emulating Windows, try swapping backslashes for
219
+ the host separator so tests can interact with the temp filesystem.
220
+ """
221
+ if system != "Windows":
222
+ return path
223
+ if _exists_fast(path):
224
+ return path
225
+ if os.sep != "\\" and "\\" in path:
226
+ alt_path = path.replace("\\", os.sep)
227
+ if _exists_fast(alt_path):
228
+ return alt_path
229
+ return path
230
+
231
+
232
+ def _detect_system() -> str:
233
+ """
234
+ Prefer Windows detection via os.name / sys.platform instead of platform.system()
235
+ because some environments (e.g., Cygwin/MSYS) do not return "Windows".
236
+ """
237
+ if os.name == "nt" or sys.platform.startswith(("cygwin", "msys")):
238
+ return "Windows"
239
+ return platform.system()
240
+
241
+
242
+ def find_stata_candidates() -> List[Tuple[str, str]]:
243
+ """
244
+ Locate all viable Stata installations ordered by preference.
245
+
246
+ Returns:
247
+ List of (path_to_executable, edition_string) sorted by:
248
+ - Newest version number found in path (desc)
249
+ - Edition preference: mp > se > be
250
+ - Path name (stable tie-breaker)
251
+
252
+ Behavior:
253
+ - If STATA_PATH is set and valid, use it (may yield multiple binaries in dir).
254
+ - If STATA_PATH is set but invalid, provide detailed diagnostics and fall back.
255
+ - If auto-discovery fails, raise an error with helpful suggestions.
256
+ """
257
+ system = _detect_system()
258
+ stata_path_error: Optional[Exception] = None
259
+ stata_path_diagnostics: Optional[str] = None
260
+
261
+ windows_binaries = [
262
+ ("StataMP-64.exe", "mp"),
263
+ ("StataMP.exe", "mp"),
264
+ ("StataSE-64.exe", "se"),
265
+ ("StataSE.exe", "se"),
266
+ ("Stata-64.exe", "be"),
267
+ ("Stata.exe", "be"),
268
+ ]
269
+ linux_binaries = [
270
+ ("stata-mp", "mp"),
271
+ ("stata-se", "se"),
272
+ ("stata", "be"),
273
+ ("xstata-mp", "mp"),
274
+ ("xstata-se", "se"),
275
+ ("xstata", "be"),
276
+ ]
277
+
278
+ # 1. Check STATA_PATH override with enhanced diagnostics
279
+ raw_stata_path = os.environ.get("STATA_PATH")
280
+ if raw_stata_path:
281
+ try:
282
+ path = _normalize_env_path(raw_stata_path, system)
283
+ path = _resolve_windows_host_path(path, system)
284
+
285
+ if os.path.isdir(path):
286
+ candidates_in_dir = []
287
+ if system == "Windows":
288
+ for exe, edition in windows_binaries:
289
+ candidate = os.path.join(path, exe)
290
+ if _is_executable(candidate, system, use_retry=True):
291
+ candidates_in_dir.append((candidate, edition))
292
+ elif system == "Darwin" or (system != "Windows" and path.endswith(".app")):
293
+ # macOS app bundle logic
294
+ sub_path = os.path.join(path, "Contents", "MacOS")
295
+ if os.path.isdir(sub_path):
296
+ for binary, edition in [("stata-mp", "mp"), ("stata-se", "se"), ("stata", "be")]:
297
+ candidate = os.path.join(sub_path, binary)
298
+ if _is_executable(candidate, system, use_retry=True):
299
+ candidates_in_dir.append((candidate, edition))
300
+
301
+ # Also try direct if not in a bundle
302
+ if not candidates_in_dir:
303
+ for binary, edition in linux_binaries:
304
+ candidate = os.path.join(path, binary)
305
+ if _is_executable(candidate, system, use_retry=True):
306
+ candidates_in_dir.append((candidate, edition))
307
+ else:
308
+ for binary, edition in linux_binaries:
309
+ candidate = os.path.join(path, binary)
310
+ if _is_executable(candidate, system, use_retry=True):
311
+ candidates_in_dir.append((candidate, edition))
312
+
313
+ if candidates_in_dir:
314
+ resolved = []
315
+ for candidate, edition in _sort_candidates(candidates_in_dir):
316
+ if _is_executable(candidate, system, use_retry=True):
317
+ logger.info(
318
+ "Found Stata via STATA_PATH directory: %s (%s)",
319
+ candidate,
320
+ edition,
321
+ )
322
+ resolved.append((candidate, edition))
323
+ if resolved:
324
+ return resolved
325
+
326
+ # Enhanced error with diagnostics
327
+ exists, diagnostics = _validate_path_with_diagnostics(path, system)
328
+ error_msg = (
329
+ f"STATA_PATH points to directory '{path}', but no Stata executable was found within.\n"
330
+ f"{diagnostics}\n\n"
331
+ "Point STATA_PATH directly to the Stata binary "
332
+ "(e.g., C:\\Program Files\\StataNow19\\StataMP-64.exe)."
333
+ )
334
+ raise FileNotFoundError(error_msg)
335
+
336
+ edition = "be"
337
+ lower_path = path.lower()
338
+ if "mp" in lower_path:
339
+ edition = "mp"
340
+ elif "se" in lower_path:
341
+ edition = "se"
342
+ elif "be" in lower_path:
343
+ edition = "be"
344
+
345
+ # Use enhanced validation with diagnostics (with retry for user path)
346
+ exists, diagnostics = _validate_path_with_diagnostics(path, system)
347
+ if not exists:
348
+ error_msg = (
349
+ f"STATA_PATH points to '{path}', but that file does not exist.\n"
350
+ f"{diagnostics}\n\n"
351
+ "Update STATA_PATH to your Stata binary (e.g., "
352
+ "/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, "
353
+ "/usr/local/stata19/stata-mp or C:\\Program Files\\StataNow19\\StataMP-64.exe)."
354
+ )
355
+ raise FileNotFoundError(error_msg)
356
+
357
+ if not _is_executable(path, system, use_retry=True):
358
+ raise PermissionError(
359
+ f"STATA_PATH points to '{path}', but it is not executable. "
360
+ "Ensure this is the Stata binary, not the .app directory."
361
+ )
362
+
363
+ logger.info("Using STATA_PATH override: %s (%s)", path, edition)
364
+ return [(path, edition)]
365
+
366
+ except Exception as exc:
367
+ stata_path_error = exc
368
+ stata_path_diagnostics = str(exc)
369
+ logger.warning(
370
+ "STATA_PATH override failed (%s). Falling back to auto-discovery.",
371
+ exc,
372
+ )
373
+
374
+ # 2. Platform-specific search (using fast checks, no retries)
375
+ candidates: List[Tuple[str, str]] = [] # List of (path, edition)
376
+
377
+ if system == "Darwin": # macOS
378
+ # Search targets specific to macOS installation patterns
379
+ patterns = [
380
+ "/Applications/StataNow/StataMP.app",
381
+ "/Applications/StataNow/StataSE.app",
382
+ "/Applications/StataNow/Stata.app",
383
+ "/Applications/Stata/StataMP.app",
384
+ "/Applications/Stata/StataSE.app",
385
+ "/Applications/Stata/Stata.app",
386
+ "/Applications/Stata*/Stata*.app",
387
+ "/Applications/Stata*.app",
388
+ ]
389
+
390
+ for pattern in patterns:
391
+ for app_dir in glob.glob(pattern):
392
+ binary_dir = os.path.join(app_dir, "Contents", "MacOS")
393
+ if not _exists_fast(binary_dir):
394
+ continue
395
+ for binary, edition in [("stata-mp", "mp"), ("stata-se", "se"), ("stata", "be")]:
396
+ full_path = os.path.join(binary_dir, binary)
397
+ if _exists_fast(full_path):
398
+ candidates.append((full_path, edition))
399
+ candidates = _dedupe_preserve(candidates)
400
+
401
+ elif system == "Windows":
402
+ # Include ProgramW6432 (real 64-bit Program Files) and hardcode fallbacks.
403
+ base_dirs = _dedupe_str_preserve(
404
+ [
405
+ os.environ.get("ProgramW6432", r"C:\Program Files"),
406
+ os.environ.get("ProgramFiles", r"C:\Program Files"),
407
+ os.environ.get("ProgramFiles(Arm)", r"C:\Program Files (Arm)"),
408
+ os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"),
409
+ r"C:\Program Files",
410
+ r"C:\Program Files (Arm)",
411
+ r"C:\Program Files (x86)",
412
+ ]
413
+ )
414
+
415
+ # Resolve for non-Windows hosts running Windows discovery tests.
416
+ base_dirs = [
417
+ _resolve_windows_host_path(ntpath.normpath(bd), system) for bd in base_dirs
418
+ ]
419
+ base_dirs = _dedupe_str_preserve(base_dirs)
420
+
421
+ # Look in a few plausible layouts:
422
+ # base\Stata*\...
423
+ # base\*\Stata*\... (e.g., base\StataCorp\Stata19Now)
424
+ # base\Stata*\*\... (e.g., base\Stata\Stata19Now)
425
+ dir_globs: List[str] = []
426
+ for base_dir in base_dirs:
427
+ dir_globs.extend(
428
+ [
429
+ os.path.join(base_dir, "Stata*"),
430
+ os.path.join(base_dir, "*", "Stata*"),
431
+ os.path.join(base_dir, "Stata*", "Stata*"),
432
+ ]
433
+ )
434
+ dir_globs = _dedupe_str_preserve(dir_globs)
435
+
436
+ stata_dirs: List[str] = []
437
+ for pattern in dir_globs:
438
+ stata_dirs.extend(glob.glob(pattern))
439
+ stata_dirs = _dedupe_str_preserve(stata_dirs)
440
+
441
+ for stata_dir in stata_dirs:
442
+ if not os.path.isdir(stata_dir):
443
+ continue
444
+ for exe, edition in windows_binaries:
445
+ full_path = os.path.join(stata_dir, exe)
446
+ if _exists_fast(full_path):
447
+ candidates.append((full_path, edition))
448
+ candidates = _dedupe_preserve(candidates)
449
+
450
+ elif system == "Linux":
451
+ home_base = os.environ.get("HOME") or os.path.expanduser("~")
452
+
453
+ # 2a. Try binaries available on PATH first
454
+ for binary, edition in linux_binaries:
455
+ found = shutil.which(binary)
456
+ if found:
457
+ candidates.append((found, edition))
458
+
459
+ # 2b. Search common install prefixes used by Stata's Linux installer
460
+ linux_roots = [
461
+ "/usr/local",
462
+ "/opt",
463
+ os.path.join(home_base, "stata"),
464
+ os.path.join(home_base, "Stata"),
465
+ ]
466
+
467
+ for root in linux_roots:
468
+ patterns: List[str] = []
469
+ if root.endswith(("stata", "Stata")):
470
+ patterns.append(root)
471
+ else:
472
+ patterns.extend(
473
+ [
474
+ os.path.join(root, "stata*"),
475
+ os.path.join(root, "Stata*"),
476
+ ]
477
+ )
478
+
479
+ for pattern in patterns:
480
+ for base_dir in glob.glob(pattern):
481
+ if not os.path.isdir(base_dir):
482
+ continue
483
+ for binary, edition in linux_binaries:
484
+ full_path = os.path.join(base_dir, binary)
485
+ if _exists_fast(full_path):
486
+ candidates.append((full_path, edition))
487
+
488
+ candidates = _dedupe_preserve(candidates)
489
+
490
+ # Final validation of candidates (still using fast checks)
491
+ validated: List[Tuple[str, str]] = []
492
+ unique_candidates = _dedupe_preserve(candidates)
493
+ for path, edition in _sort_candidates(unique_candidates):
494
+ if not _exists_fast(path):
495
+ logger.warning("Discovered candidate missing on disk: %s", path)
496
+ continue
497
+ if not _is_executable(path, system, use_retry=False):
498
+ logger.warning("Discovered candidate is not executable: %s", path)
499
+ continue
500
+ logger.info("Auto-discovered Stata at %s (%s)", path, edition)
501
+ validated.append((path, edition))
502
+
503
+ if validated:
504
+ return validated
505
+
506
+ # Build comprehensive error message
507
+ error_parts = ["Could not automatically locate Stata."]
508
+
509
+ if stata_path_error is not None:
510
+ error_parts.append(
511
+ f"\nSTATA_PATH was set but failed:\n{stata_path_diagnostics}"
512
+ )
513
+
514
+ error_parts.append(
515
+ "\nTo fix this issue:\n"
516
+ "1. Set STATA_PATH to point to your Stata executable, for example:\n"
517
+ " - Windows: C:\\Program Files\\StataNow19\\StataMP-64.exe\n"
518
+ " - macOS: /Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp\n"
519
+ " - Linux: /usr/local/stata19/stata-mp\n"
520
+ "\n2. Or install Stata in a standard location where it can be auto-discovered."
521
+ )
522
+
523
+ if stata_path_error is not None:
524
+ raise FileNotFoundError("\n".join(error_parts)) from stata_path_error
525
+ else:
526
+ raise FileNotFoundError("\n".join(error_parts))
527
+
528
+
529
+ def find_stata_path() -> Tuple[str, str]:
530
+ """
531
+ Backward-compatible wrapper returning the top-ranked candidate.
532
+ """
533
+ candidates = find_stata_candidates()
534
+ return candidates[0]
535
+
536
+
537
+ def main() -> int:
538
+ """CLI helper to print discovered Stata binary and edition."""
539
+ try:
540
+ path, edition = find_stata_path()
541
+ # Print so CLI users and tests see the output on stdout.
542
+ print(f"Stata executable: {path}\nEdition: {edition}")
543
+ return 0
544
+ except Exception as exc: # pragma: no cover - exercised via tests with env
545
+ print(f"Discovery failed: {exc}")
546
+ return 1
547
+
548
+
549
+ if __name__ == "__main__": # pragma: no cover - manual utility
550
+ raise SystemExit(main())