mcp-stata 1.18.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_stata/__init__.py +4 -0
- mcp_stata/_native_ops.cpython-313-x86_64-linux-gnu.so +0 -0
- mcp_stata/config.py +20 -0
- mcp_stata/discovery.py +550 -0
- mcp_stata/graph_detector.py +401 -0
- mcp_stata/models.py +62 -0
- mcp_stata/native_ops.py +87 -0
- mcp_stata/server.py +1130 -0
- mcp_stata/smcl/smcl2html.py +88 -0
- mcp_stata/stata_client.py +3692 -0
- mcp_stata/streaming_io.py +263 -0
- mcp_stata/test_stata.py +54 -0
- mcp_stata/ui_http.py +998 -0
- mcp_stata-1.18.0.dist-info/METADATA +471 -0
- mcp_stata-1.18.0.dist-info/RECORD +18 -0
- mcp_stata-1.18.0.dist-info/WHEEL +5 -0
- mcp_stata-1.18.0.dist-info/entry_points.txt +2 -0
- mcp_stata-1.18.0.dist-info/licenses/LICENSE +661 -0
mcp_stata/__init__.py
ADDED
|
Binary file
|
mcp_stata/config.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
"""
|
|
3
|
+
Central configuration for mcp-stata server and UI channel.
|
|
4
|
+
"""
|
|
5
|
+
from typing import Final
|
|
6
|
+
|
|
7
|
+
# Server Limits
|
|
8
|
+
MAX_LIMIT: Final[int] = 500 # Default row limit for JSON endpoints
|
|
9
|
+
MAX_VARS: Final[int] = 32_767 # Max variables in Stata
|
|
10
|
+
MAX_CHARS: Final[int] = 500 # Max chars per string cell to return
|
|
11
|
+
MAX_REQUEST_BYTES: Final[int] = 1_000_000 # Max size of HTTP request body
|
|
12
|
+
MAX_ARROW_LIMIT: Final[int] = 1_000_000 # Default row limit for Arrow IPC streams
|
|
13
|
+
|
|
14
|
+
# Timeouts (seconds)
|
|
15
|
+
TOKEN_TTL_S: Final[int] = 20 * 60 # Bearer token validity
|
|
16
|
+
VIEW_TTL_S: Final[int] = 30 * 60 # Filtered view handle validity
|
|
17
|
+
|
|
18
|
+
# Network
|
|
19
|
+
DEFAULT_HOST: Final[str] = "127.0.0.1"
|
|
20
|
+
DEFAULT_PORT: Final[int] = 0 # 0 = random ephemeral port
|
mcp_stata/discovery.py
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Optimized discovery.py with fast auto-discovery and targeted retry logic.
|
|
3
|
+
Key improvements:
|
|
4
|
+
1. Fast path checking during discovery (no retries)
|
|
5
|
+
2. Retry logic only for validation of user-provided paths
|
|
6
|
+
3. Better diagnostic logging
|
|
7
|
+
4. Fuzzy path matching for common typos
|
|
8
|
+
5. Case-insensitive path resolution on Windows
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
import platform
|
|
14
|
+
import glob
|
|
15
|
+
import logging
|
|
16
|
+
import shutil
|
|
17
|
+
import ntpath
|
|
18
|
+
import time
|
|
19
|
+
import re
|
|
20
|
+
from typing import Tuple, List, Optional
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("mcp_stata.discovery")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _exists_with_retry(path: str, max_attempts: int = 1, delay: float = 0.01) -> bool:
|
|
26
|
+
"""
|
|
27
|
+
Check if file exists with retry logic to handle transient failures.
|
|
28
|
+
This helps with antivirus scans, file locks, and other temporary issues.
|
|
29
|
+
Only use this for validating user-provided paths, not during discovery.
|
|
30
|
+
"""
|
|
31
|
+
for attempt in range(max_attempts):
|
|
32
|
+
if os.path.exists(path):
|
|
33
|
+
return True
|
|
34
|
+
if attempt < max_attempts - 1:
|
|
35
|
+
logger.debug(
|
|
36
|
+
f"File existence check attempt {attempt + 1} failed for: {path}"
|
|
37
|
+
)
|
|
38
|
+
time.sleep(delay)
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _exists_fast(path: str) -> bool:
|
|
43
|
+
"""Fast existence check without retries for auto-discovery."""
|
|
44
|
+
return os.path.exists(path)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _find_similar_stata_dirs(target_path: str) -> List[str]:
|
|
48
|
+
"""
|
|
49
|
+
Find similar Stata directories to help diagnose path typos.
|
|
50
|
+
Useful when user has 'Stata19Now' instead of 'StataNow19'.
|
|
51
|
+
"""
|
|
52
|
+
parent = os.path.dirname(target_path)
|
|
53
|
+
|
|
54
|
+
# If parent doesn't exist, try grandparent (for directory name typos)
|
|
55
|
+
search_dir = parent
|
|
56
|
+
if not os.path.exists(parent):
|
|
57
|
+
search_dir = os.path.dirname(parent)
|
|
58
|
+
|
|
59
|
+
if not os.path.exists(search_dir):
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
subdirs = [
|
|
64
|
+
d for d in os.listdir(search_dir)
|
|
65
|
+
if os.path.isdir(os.path.join(search_dir, d))
|
|
66
|
+
]
|
|
67
|
+
# Filter to Stata-related directories (case-insensitive)
|
|
68
|
+
stata_dirs = [
|
|
69
|
+
os.path.join(search_dir, d)
|
|
70
|
+
for d in subdirs
|
|
71
|
+
if 'stata' in d.lower()
|
|
72
|
+
]
|
|
73
|
+
return stata_dirs
|
|
74
|
+
except (OSError, PermissionError) as e:
|
|
75
|
+
logger.debug(f"Could not list directory {search_dir}: {e}")
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _validate_path_with_diagnostics(path: str, system: str) -> Tuple[bool, str]:
|
|
80
|
+
"""
|
|
81
|
+
Validate path exists and provide detailed diagnostics if not.
|
|
82
|
+
Returns (exists, diagnostic_message)
|
|
83
|
+
Uses retry logic for validation since this is for user-provided paths.
|
|
84
|
+
"""
|
|
85
|
+
if _exists_with_retry(path):
|
|
86
|
+
return True, ""
|
|
87
|
+
|
|
88
|
+
# Build diagnostic message
|
|
89
|
+
diagnostics = []
|
|
90
|
+
diagnostics.append(f"File not found: '{path}'")
|
|
91
|
+
|
|
92
|
+
parent_dir = os.path.dirname(path)
|
|
93
|
+
filename = os.path.basename(path)
|
|
94
|
+
|
|
95
|
+
if _exists_with_retry(parent_dir):
|
|
96
|
+
diagnostics.append(f"✓ Parent directory exists: '{parent_dir}'")
|
|
97
|
+
try:
|
|
98
|
+
files_in_parent = os.listdir(parent_dir)
|
|
99
|
+
# Look for similar filenames
|
|
100
|
+
similar_files = [
|
|
101
|
+
f for f in files_in_parent
|
|
102
|
+
if 'stata' in f.lower() and f.lower().endswith('.exe' if system == 'Windows' else '')
|
|
103
|
+
]
|
|
104
|
+
if similar_files:
|
|
105
|
+
diagnostics.append(f"Found {len(similar_files)} Stata file(s) in parent:")
|
|
106
|
+
for f in similar_files[:5]: # Show max 5
|
|
107
|
+
diagnostics.append(f" - {f}")
|
|
108
|
+
else:
|
|
109
|
+
diagnostics.append(f"No Stata executables found in parent directory")
|
|
110
|
+
diagnostics.append(f"Files present: {', '.join(files_in_parent[:10])}")
|
|
111
|
+
except (OSError, PermissionError) as e:
|
|
112
|
+
diagnostics.append(f"✗ Could not list parent directory: {e}")
|
|
113
|
+
else:
|
|
114
|
+
diagnostics.append(f"✗ Parent directory does not exist: '{parent_dir}'")
|
|
115
|
+
|
|
116
|
+
# Check for similar directories (typo detection)
|
|
117
|
+
similar_dirs = _find_similar_stata_dirs(path)
|
|
118
|
+
if similar_dirs:
|
|
119
|
+
diagnostics.append("\nDid you mean one of these directories?")
|
|
120
|
+
for dir_path in similar_dirs[:5]:
|
|
121
|
+
diagnostics.append(f" - {dir_path}")
|
|
122
|
+
|
|
123
|
+
return False, "\n".join(diagnostics)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _normalize_env_path(raw: str, system: str) -> str:
|
|
127
|
+
"""Strip quotes/whitespace, expand variables, and normalize slashes for STATA_PATH."""
|
|
128
|
+
cleaned = raw.strip()
|
|
129
|
+
if (cleaned.startswith('"') and cleaned.endswith('"')) or (
|
|
130
|
+
cleaned.startswith("'") and cleaned.endswith("'")
|
|
131
|
+
):
|
|
132
|
+
cleaned = cleaned[1:-1].strip()
|
|
133
|
+
|
|
134
|
+
expanded = os.path.expandvars(os.path.expanduser(cleaned))
|
|
135
|
+
|
|
136
|
+
# Always normalize path separators for the intended platform. This is especially
|
|
137
|
+
# important when running Windows discovery tests on non-Windows hosts where
|
|
138
|
+
# os.path (PosixPath) would otherwise leave backslashes untouched.
|
|
139
|
+
if system == "Windows":
|
|
140
|
+
return ntpath.normpath(expanded)
|
|
141
|
+
return os.path.normpath(expanded)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _is_executable(path: str, system: str, use_retry: bool = True) -> bool:
|
|
145
|
+
"""
|
|
146
|
+
Check if path is executable.
|
|
147
|
+
use_retry: Use retry logic for user-provided paths, fast check for discovery.
|
|
148
|
+
"""
|
|
149
|
+
exists_check = _exists_with_retry if use_retry else _exists_fast
|
|
150
|
+
|
|
151
|
+
if not exists_check(path):
|
|
152
|
+
return False
|
|
153
|
+
if system == "Windows":
|
|
154
|
+
# On Windows, check if it's a file and has .exe extension
|
|
155
|
+
return os.path.isfile(path) and path.lower().endswith(".exe")
|
|
156
|
+
return os.access(path, os.X_OK)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _dedupe_preserve(items: List[tuple]) -> List[tuple]:
|
|
160
|
+
seen = set()
|
|
161
|
+
unique = []
|
|
162
|
+
for path, edition in items:
|
|
163
|
+
if path in seen:
|
|
164
|
+
continue
|
|
165
|
+
seen.add(path)
|
|
166
|
+
unique.append((path, edition))
|
|
167
|
+
return unique
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _dedupe_str_preserve(items: List[str]) -> List[str]:
|
|
171
|
+
seen = set()
|
|
172
|
+
out: List[str] = []
|
|
173
|
+
for s in items:
|
|
174
|
+
if not s:
|
|
175
|
+
continue
|
|
176
|
+
if s in seen:
|
|
177
|
+
continue
|
|
178
|
+
seen.add(s)
|
|
179
|
+
out.append(s)
|
|
180
|
+
return out
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _extract_version_number(path: str) -> int:
|
|
184
|
+
"""
|
|
185
|
+
Extract the highest Stata version number found in path components that
|
|
186
|
+
mention 'stata'. Returns 0 if no version is found.
|
|
187
|
+
"""
|
|
188
|
+
version = 0
|
|
189
|
+
normalized = path.lower().replace("\\", os.sep)
|
|
190
|
+
for part in normalized.split(os.sep):
|
|
191
|
+
if "stata" not in part:
|
|
192
|
+
continue
|
|
193
|
+
for match in re.findall(r"(\d{1,3})", part):
|
|
194
|
+
try:
|
|
195
|
+
version = max(version, int(match))
|
|
196
|
+
except ValueError:
|
|
197
|
+
continue
|
|
198
|
+
return version
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _sort_candidates(candidates: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
202
|
+
"""Sort candidates by version desc, edition (mp>se>be), then path for stability."""
|
|
203
|
+
edition_rank = {"mp": 3, "se": 2, "be": 1}
|
|
204
|
+
|
|
205
|
+
def sort_key(item: Tuple[str, str]):
|
|
206
|
+
path, edition = item
|
|
207
|
+
version = _extract_version_number(path)
|
|
208
|
+
rank = edition_rank.get((edition or "").lower(), 0)
|
|
209
|
+
return (-version, -rank, path)
|
|
210
|
+
|
|
211
|
+
return sorted(candidates, key=sort_key)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _resolve_windows_host_path(path: str, system: str) -> str:
|
|
215
|
+
"""
|
|
216
|
+
On non-Windows hosts running Windows-discovery code, a Windows-style path
|
|
217
|
+
(with backslashes) won't match the real filesystem layout. If the normalized
|
|
218
|
+
path does not exist and we're emulating Windows, try swapping backslashes for
|
|
219
|
+
the host separator so tests can interact with the temp filesystem.
|
|
220
|
+
"""
|
|
221
|
+
if system != "Windows":
|
|
222
|
+
return path
|
|
223
|
+
if _exists_fast(path):
|
|
224
|
+
return path
|
|
225
|
+
if os.sep != "\\" and "\\" in path:
|
|
226
|
+
alt_path = path.replace("\\", os.sep)
|
|
227
|
+
if _exists_fast(alt_path):
|
|
228
|
+
return alt_path
|
|
229
|
+
return path
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _detect_system() -> str:
|
|
233
|
+
"""
|
|
234
|
+
Prefer Windows detection via os.name / sys.platform instead of platform.system()
|
|
235
|
+
because some environments (e.g., Cygwin/MSYS) do not return "Windows".
|
|
236
|
+
"""
|
|
237
|
+
if os.name == "nt" or sys.platform.startswith(("cygwin", "msys")):
|
|
238
|
+
return "Windows"
|
|
239
|
+
return platform.system()
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def find_stata_candidates() -> List[Tuple[str, str]]:
|
|
243
|
+
"""
|
|
244
|
+
Locate all viable Stata installations ordered by preference.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
List of (path_to_executable, edition_string) sorted by:
|
|
248
|
+
- Newest version number found in path (desc)
|
|
249
|
+
- Edition preference: mp > se > be
|
|
250
|
+
- Path name (stable tie-breaker)
|
|
251
|
+
|
|
252
|
+
Behavior:
|
|
253
|
+
- If STATA_PATH is set and valid, use it (may yield multiple binaries in dir).
|
|
254
|
+
- If STATA_PATH is set but invalid, provide detailed diagnostics and fall back.
|
|
255
|
+
- If auto-discovery fails, raise an error with helpful suggestions.
|
|
256
|
+
"""
|
|
257
|
+
system = _detect_system()
|
|
258
|
+
stata_path_error: Optional[Exception] = None
|
|
259
|
+
stata_path_diagnostics: Optional[str] = None
|
|
260
|
+
|
|
261
|
+
windows_binaries = [
|
|
262
|
+
("StataMP-64.exe", "mp"),
|
|
263
|
+
("StataMP.exe", "mp"),
|
|
264
|
+
("StataSE-64.exe", "se"),
|
|
265
|
+
("StataSE.exe", "se"),
|
|
266
|
+
("Stata-64.exe", "be"),
|
|
267
|
+
("Stata.exe", "be"),
|
|
268
|
+
]
|
|
269
|
+
linux_binaries = [
|
|
270
|
+
("stata-mp", "mp"),
|
|
271
|
+
("stata-se", "se"),
|
|
272
|
+
("stata", "be"),
|
|
273
|
+
("xstata-mp", "mp"),
|
|
274
|
+
("xstata-se", "se"),
|
|
275
|
+
("xstata", "be"),
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
# 1. Check STATA_PATH override with enhanced diagnostics
|
|
279
|
+
raw_stata_path = os.environ.get("STATA_PATH")
|
|
280
|
+
if raw_stata_path:
|
|
281
|
+
try:
|
|
282
|
+
path = _normalize_env_path(raw_stata_path, system)
|
|
283
|
+
path = _resolve_windows_host_path(path, system)
|
|
284
|
+
|
|
285
|
+
if os.path.isdir(path):
|
|
286
|
+
candidates_in_dir = []
|
|
287
|
+
if system == "Windows":
|
|
288
|
+
for exe, edition in windows_binaries:
|
|
289
|
+
candidate = os.path.join(path, exe)
|
|
290
|
+
if _is_executable(candidate, system, use_retry=True):
|
|
291
|
+
candidates_in_dir.append((candidate, edition))
|
|
292
|
+
elif system == "Darwin" or (system != "Windows" and path.endswith(".app")):
|
|
293
|
+
# macOS app bundle logic
|
|
294
|
+
sub_path = os.path.join(path, "Contents", "MacOS")
|
|
295
|
+
if os.path.isdir(sub_path):
|
|
296
|
+
for binary, edition in [("stata-mp", "mp"), ("stata-se", "se"), ("stata", "be")]:
|
|
297
|
+
candidate = os.path.join(sub_path, binary)
|
|
298
|
+
if _is_executable(candidate, system, use_retry=True):
|
|
299
|
+
candidates_in_dir.append((candidate, edition))
|
|
300
|
+
|
|
301
|
+
# Also try direct if not in a bundle
|
|
302
|
+
if not candidates_in_dir:
|
|
303
|
+
for binary, edition in linux_binaries:
|
|
304
|
+
candidate = os.path.join(path, binary)
|
|
305
|
+
if _is_executable(candidate, system, use_retry=True):
|
|
306
|
+
candidates_in_dir.append((candidate, edition))
|
|
307
|
+
else:
|
|
308
|
+
for binary, edition in linux_binaries:
|
|
309
|
+
candidate = os.path.join(path, binary)
|
|
310
|
+
if _is_executable(candidate, system, use_retry=True):
|
|
311
|
+
candidates_in_dir.append((candidate, edition))
|
|
312
|
+
|
|
313
|
+
if candidates_in_dir:
|
|
314
|
+
resolved = []
|
|
315
|
+
for candidate, edition in _sort_candidates(candidates_in_dir):
|
|
316
|
+
if _is_executable(candidate, system, use_retry=True):
|
|
317
|
+
logger.info(
|
|
318
|
+
"Found Stata via STATA_PATH directory: %s (%s)",
|
|
319
|
+
candidate,
|
|
320
|
+
edition,
|
|
321
|
+
)
|
|
322
|
+
resolved.append((candidate, edition))
|
|
323
|
+
if resolved:
|
|
324
|
+
return resolved
|
|
325
|
+
|
|
326
|
+
# Enhanced error with diagnostics
|
|
327
|
+
exists, diagnostics = _validate_path_with_diagnostics(path, system)
|
|
328
|
+
error_msg = (
|
|
329
|
+
f"STATA_PATH points to directory '{path}', but no Stata executable was found within.\n"
|
|
330
|
+
f"{diagnostics}\n\n"
|
|
331
|
+
"Point STATA_PATH directly to the Stata binary "
|
|
332
|
+
"(e.g., C:\\Program Files\\StataNow19\\StataMP-64.exe)."
|
|
333
|
+
)
|
|
334
|
+
raise FileNotFoundError(error_msg)
|
|
335
|
+
|
|
336
|
+
edition = "be"
|
|
337
|
+
lower_path = path.lower()
|
|
338
|
+
if "mp" in lower_path:
|
|
339
|
+
edition = "mp"
|
|
340
|
+
elif "se" in lower_path:
|
|
341
|
+
edition = "se"
|
|
342
|
+
elif "be" in lower_path:
|
|
343
|
+
edition = "be"
|
|
344
|
+
|
|
345
|
+
# Use enhanced validation with diagnostics (with retry for user path)
|
|
346
|
+
exists, diagnostics = _validate_path_with_diagnostics(path, system)
|
|
347
|
+
if not exists:
|
|
348
|
+
error_msg = (
|
|
349
|
+
f"STATA_PATH points to '{path}', but that file does not exist.\n"
|
|
350
|
+
f"{diagnostics}\n\n"
|
|
351
|
+
"Update STATA_PATH to your Stata binary (e.g., "
|
|
352
|
+
"/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp, "
|
|
353
|
+
"/usr/local/stata19/stata-mp or C:\\Program Files\\StataNow19\\StataMP-64.exe)."
|
|
354
|
+
)
|
|
355
|
+
raise FileNotFoundError(error_msg)
|
|
356
|
+
|
|
357
|
+
if not _is_executable(path, system, use_retry=True):
|
|
358
|
+
raise PermissionError(
|
|
359
|
+
f"STATA_PATH points to '{path}', but it is not executable. "
|
|
360
|
+
"Ensure this is the Stata binary, not the .app directory."
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
logger.info("Using STATA_PATH override: %s (%s)", path, edition)
|
|
364
|
+
return [(path, edition)]
|
|
365
|
+
|
|
366
|
+
except Exception as exc:
|
|
367
|
+
stata_path_error = exc
|
|
368
|
+
stata_path_diagnostics = str(exc)
|
|
369
|
+
logger.warning(
|
|
370
|
+
"STATA_PATH override failed (%s). Falling back to auto-discovery.",
|
|
371
|
+
exc,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# 2. Platform-specific search (using fast checks, no retries)
|
|
375
|
+
candidates: List[Tuple[str, str]] = [] # List of (path, edition)
|
|
376
|
+
|
|
377
|
+
if system == "Darwin": # macOS
|
|
378
|
+
# Search targets specific to macOS installation patterns
|
|
379
|
+
patterns = [
|
|
380
|
+
"/Applications/StataNow/StataMP.app",
|
|
381
|
+
"/Applications/StataNow/StataSE.app",
|
|
382
|
+
"/Applications/StataNow/Stata.app",
|
|
383
|
+
"/Applications/Stata/StataMP.app",
|
|
384
|
+
"/Applications/Stata/StataSE.app",
|
|
385
|
+
"/Applications/Stata/Stata.app",
|
|
386
|
+
"/Applications/Stata*/Stata*.app",
|
|
387
|
+
"/Applications/Stata*.app",
|
|
388
|
+
]
|
|
389
|
+
|
|
390
|
+
for pattern in patterns:
|
|
391
|
+
for app_dir in glob.glob(pattern):
|
|
392
|
+
binary_dir = os.path.join(app_dir, "Contents", "MacOS")
|
|
393
|
+
if not _exists_fast(binary_dir):
|
|
394
|
+
continue
|
|
395
|
+
for binary, edition in [("stata-mp", "mp"), ("stata-se", "se"), ("stata", "be")]:
|
|
396
|
+
full_path = os.path.join(binary_dir, binary)
|
|
397
|
+
if _exists_fast(full_path):
|
|
398
|
+
candidates.append((full_path, edition))
|
|
399
|
+
candidates = _dedupe_preserve(candidates)
|
|
400
|
+
|
|
401
|
+
elif system == "Windows":
|
|
402
|
+
# Include ProgramW6432 (real 64-bit Program Files) and hardcode fallbacks.
|
|
403
|
+
base_dirs = _dedupe_str_preserve(
|
|
404
|
+
[
|
|
405
|
+
os.environ.get("ProgramW6432", r"C:\Program Files"),
|
|
406
|
+
os.environ.get("ProgramFiles", r"C:\Program Files"),
|
|
407
|
+
os.environ.get("ProgramFiles(Arm)", r"C:\Program Files (Arm)"),
|
|
408
|
+
os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"),
|
|
409
|
+
r"C:\Program Files",
|
|
410
|
+
r"C:\Program Files (Arm)",
|
|
411
|
+
r"C:\Program Files (x86)",
|
|
412
|
+
]
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# Resolve for non-Windows hosts running Windows discovery tests.
|
|
416
|
+
base_dirs = [
|
|
417
|
+
_resolve_windows_host_path(ntpath.normpath(bd), system) for bd in base_dirs
|
|
418
|
+
]
|
|
419
|
+
base_dirs = _dedupe_str_preserve(base_dirs)
|
|
420
|
+
|
|
421
|
+
# Look in a few plausible layouts:
|
|
422
|
+
# base\Stata*\...
|
|
423
|
+
# base\*\Stata*\... (e.g., base\StataCorp\Stata19Now)
|
|
424
|
+
# base\Stata*\*\... (e.g., base\Stata\Stata19Now)
|
|
425
|
+
dir_globs: List[str] = []
|
|
426
|
+
for base_dir in base_dirs:
|
|
427
|
+
dir_globs.extend(
|
|
428
|
+
[
|
|
429
|
+
os.path.join(base_dir, "Stata*"),
|
|
430
|
+
os.path.join(base_dir, "*", "Stata*"),
|
|
431
|
+
os.path.join(base_dir, "Stata*", "Stata*"),
|
|
432
|
+
]
|
|
433
|
+
)
|
|
434
|
+
dir_globs = _dedupe_str_preserve(dir_globs)
|
|
435
|
+
|
|
436
|
+
stata_dirs: List[str] = []
|
|
437
|
+
for pattern in dir_globs:
|
|
438
|
+
stata_dirs.extend(glob.glob(pattern))
|
|
439
|
+
stata_dirs = _dedupe_str_preserve(stata_dirs)
|
|
440
|
+
|
|
441
|
+
for stata_dir in stata_dirs:
|
|
442
|
+
if not os.path.isdir(stata_dir):
|
|
443
|
+
continue
|
|
444
|
+
for exe, edition in windows_binaries:
|
|
445
|
+
full_path = os.path.join(stata_dir, exe)
|
|
446
|
+
if _exists_fast(full_path):
|
|
447
|
+
candidates.append((full_path, edition))
|
|
448
|
+
candidates = _dedupe_preserve(candidates)
|
|
449
|
+
|
|
450
|
+
elif system == "Linux":
|
|
451
|
+
home_base = os.environ.get("HOME") or os.path.expanduser("~")
|
|
452
|
+
|
|
453
|
+
# 2a. Try binaries available on PATH first
|
|
454
|
+
for binary, edition in linux_binaries:
|
|
455
|
+
found = shutil.which(binary)
|
|
456
|
+
if found:
|
|
457
|
+
candidates.append((found, edition))
|
|
458
|
+
|
|
459
|
+
# 2b. Search common install prefixes used by Stata's Linux installer
|
|
460
|
+
linux_roots = [
|
|
461
|
+
"/usr/local",
|
|
462
|
+
"/opt",
|
|
463
|
+
os.path.join(home_base, "stata"),
|
|
464
|
+
os.path.join(home_base, "Stata"),
|
|
465
|
+
]
|
|
466
|
+
|
|
467
|
+
for root in linux_roots:
|
|
468
|
+
patterns: List[str] = []
|
|
469
|
+
if root.endswith(("stata", "Stata")):
|
|
470
|
+
patterns.append(root)
|
|
471
|
+
else:
|
|
472
|
+
patterns.extend(
|
|
473
|
+
[
|
|
474
|
+
os.path.join(root, "stata*"),
|
|
475
|
+
os.path.join(root, "Stata*"),
|
|
476
|
+
]
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
for pattern in patterns:
|
|
480
|
+
for base_dir in glob.glob(pattern):
|
|
481
|
+
if not os.path.isdir(base_dir):
|
|
482
|
+
continue
|
|
483
|
+
for binary, edition in linux_binaries:
|
|
484
|
+
full_path = os.path.join(base_dir, binary)
|
|
485
|
+
if _exists_fast(full_path):
|
|
486
|
+
candidates.append((full_path, edition))
|
|
487
|
+
|
|
488
|
+
candidates = _dedupe_preserve(candidates)
|
|
489
|
+
|
|
490
|
+
# Final validation of candidates (still using fast checks)
|
|
491
|
+
validated: List[Tuple[str, str]] = []
|
|
492
|
+
unique_candidates = _dedupe_preserve(candidates)
|
|
493
|
+
for path, edition in _sort_candidates(unique_candidates):
|
|
494
|
+
if not _exists_fast(path):
|
|
495
|
+
logger.warning("Discovered candidate missing on disk: %s", path)
|
|
496
|
+
continue
|
|
497
|
+
if not _is_executable(path, system, use_retry=False):
|
|
498
|
+
logger.warning("Discovered candidate is not executable: %s", path)
|
|
499
|
+
continue
|
|
500
|
+
logger.info("Auto-discovered Stata at %s (%s)", path, edition)
|
|
501
|
+
validated.append((path, edition))
|
|
502
|
+
|
|
503
|
+
if validated:
|
|
504
|
+
return validated
|
|
505
|
+
|
|
506
|
+
# Build comprehensive error message
|
|
507
|
+
error_parts = ["Could not automatically locate Stata."]
|
|
508
|
+
|
|
509
|
+
if stata_path_error is not None:
|
|
510
|
+
error_parts.append(
|
|
511
|
+
f"\nSTATA_PATH was set but failed:\n{stata_path_diagnostics}"
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
error_parts.append(
|
|
515
|
+
"\nTo fix this issue:\n"
|
|
516
|
+
"1. Set STATA_PATH to point to your Stata executable, for example:\n"
|
|
517
|
+
" - Windows: C:\\Program Files\\StataNow19\\StataMP-64.exe\n"
|
|
518
|
+
" - macOS: /Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp\n"
|
|
519
|
+
" - Linux: /usr/local/stata19/stata-mp\n"
|
|
520
|
+
"\n2. Or install Stata in a standard location where it can be auto-discovered."
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
if stata_path_error is not None:
|
|
524
|
+
raise FileNotFoundError("\n".join(error_parts)) from stata_path_error
|
|
525
|
+
else:
|
|
526
|
+
raise FileNotFoundError("\n".join(error_parts))
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def find_stata_path() -> Tuple[str, str]:
|
|
530
|
+
"""
|
|
531
|
+
Backward-compatible wrapper returning the top-ranked candidate.
|
|
532
|
+
"""
|
|
533
|
+
candidates = find_stata_candidates()
|
|
534
|
+
return candidates[0]
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def main() -> int:
|
|
538
|
+
"""CLI helper to print discovered Stata binary and edition."""
|
|
539
|
+
try:
|
|
540
|
+
path, edition = find_stata_path()
|
|
541
|
+
# Print so CLI users and tests see the output on stdout.
|
|
542
|
+
print(f"Stata executable: {path}\nEdition: {edition}")
|
|
543
|
+
return 0
|
|
544
|
+
except Exception as exc: # pragma: no cover - exercised via tests with env
|
|
545
|
+
print(f"Discovery failed: {exc}")
|
|
546
|
+
return 1
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
if __name__ == "__main__": # pragma: no cover - manual utility
|
|
550
|
+
raise SystemExit(main())
|