mcp-stata 1.21.0__cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_stata/utils.py ADDED
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+ import os
3
+ import tempfile
4
+ import pathlib
5
+ import uuid
6
+ import logging
7
+ import threading
8
+ import shutil
9
+ import atexit
10
+ import signal
11
+ import sys
12
+ from typing import Optional, List
13
+
14
+ logger = logging.getLogger("mcp_stata")
15
+
16
+ _temp_dir_cache: Optional[str] = None
17
+ _temp_dir_lock = threading.Lock()
18
+ _files_to_cleanup: set[pathlib.Path] = set()
19
+ _dirs_to_cleanup: set[pathlib.Path] = set()
20
+
21
+ def register_temp_file(path: str | pathlib.Path) -> None:
22
+ """
23
+ Register a file to be deleted on process exit.
24
+ Using this instead of NamedTemporaryFile(delete=True) because on Windows,
25
+ delete=True prevents Stata from opening the file simultaneously.
26
+ """
27
+ with _temp_dir_lock:
28
+ p = pathlib.Path(path).absolute()
29
+ _files_to_cleanup.add(p)
30
+
31
+ def register_temp_dir(path: str | pathlib.Path) -> None:
32
+ """Register a directory to be recursively deleted on process exit."""
33
+ with _temp_dir_lock:
34
+ p = pathlib.Path(path).absolute()
35
+ _dirs_to_cleanup.add(p)
36
+
37
+ def is_windows() -> bool:
38
+ """Returns True if the current operating system is Windows."""
39
+ return os.name == "nt"
40
+
41
+ def _cleanup_temp_resources():
42
+ """Cleanup registered temporary files and directories."""
43
+ with _temp_dir_lock:
44
+ # Sort and copy to avoid modification during iteration
45
+ files = sorted(list(_files_to_cleanup), reverse=True)
46
+ for p in files:
47
+ try:
48
+ # missing_ok=True is Python 3.8+
49
+ p.unlink(missing_ok=True)
50
+ _files_to_cleanup.discard(p)
51
+ except Exception:
52
+ pass
53
+
54
+ dirs = sorted(list(_dirs_to_cleanup), reverse=True)
55
+ for p in dirs:
56
+ try:
57
+ if p.exists() and p.is_dir():
58
+ shutil.rmtree(p, ignore_errors=True)
59
+ _dirs_to_cleanup.discard(p)
60
+ except Exception:
61
+ pass
62
+
63
+ atexit.register(_cleanup_temp_resources)
64
+
65
+ def _signal_handler(signum, frame):
66
+ """Handle signals by cleaning up and exiting."""
67
+ _cleanup_temp_resources()
68
+ sys.exit(0)
69
+
70
+ # Register signal handlers for graceful cleanup on termination
71
+ try:
72
+ # Avoid hijacking signals if we are running in a test environment or not in main thread
73
+ is_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
74
+ if threading.current_thread() is threading.main_thread() and not is_pytest:
75
+ signal.signal(signal.SIGTERM, _signal_handler)
76
+ signal.signal(signal.SIGINT, _signal_handler)
77
+ except (ValueError, RuntimeError):
78
+ # Not in main thread or other signal handling restriction
79
+ pass
80
+
81
+ def get_writable_temp_dir() -> str:
82
+ """
83
+ Finds a writable temporary directory by trying multiple fallback locations.
84
+ Priority:
85
+ 1. MCP_STATA_TEMP environment variable
86
+ 2. System Temp (tempfile.gettempdir())
87
+ 3. User Home subdirectory (~/.mcp-stata/temp)
88
+ 4. Current Working Directory subdirectory (.tmp)
89
+
90
+ Results are cached after the first successful identification.
91
+ """
92
+ global _temp_dir_cache
93
+
94
+ with _temp_dir_lock:
95
+ if _temp_dir_cache is not None:
96
+ return _temp_dir_cache
97
+
98
+ candidates = []
99
+
100
+ # 1. Environment variable
101
+ env_temp = os.getenv("MCP_STATA_TEMP")
102
+ if env_temp:
103
+ candidates.append((pathlib.Path(env_temp), "MCP_STATA_TEMP environment variable"))
104
+
105
+ # 2. System Temp
106
+ candidates.append((pathlib.Path(tempfile.gettempdir()), "System temp directory"))
107
+
108
+ # 3. User Home
109
+ try:
110
+ home_temp = pathlib.Path.home() / ".mcp-stata" / "temp"
111
+ candidates.append((home_temp, "User home directory"))
112
+ except Exception:
113
+ pass
114
+
115
+ # 4. Current working directory subdirectory (.tmp)
116
+ candidates.append((pathlib.Path.cwd() / ".tmp", "Working directory (.tmp)"))
117
+
118
+ tested_paths = []
119
+ for path, description in candidates:
120
+ try:
121
+ # Ensure directory exists
122
+ path.mkdir(parents=True, exist_ok=True)
123
+
124
+ # Test writability using standard tempfile logic
125
+ try:
126
+ fd, temp_path = tempfile.mkstemp(
127
+ prefix=".mcp_write_test_",
128
+ suffix=".tmp",
129
+ dir=str(path)
130
+ )
131
+ os.close(fd)
132
+ os.unlink(temp_path)
133
+
134
+ # Success
135
+ validated_path = str(path.absolute())
136
+
137
+ # Log if we fell back from the first preferred (non-env) candidate
138
+ # (System temp is second, index 1 if env_temp is set, else index 0)
139
+ first_preferred_idx = 1 if env_temp else 0
140
+ if candidates.index((path, description)) > first_preferred_idx:
141
+ logger.warning(f"Falling back to temporary directory: {validated_path} ({description})")
142
+ else:
143
+ logger.debug(f"Using temporary directory: {validated_path} ({description})")
144
+
145
+ _temp_dir_cache = validated_path
146
+ # Globally set tempfile.tempdir so other parts of the app and libraries
147
+ # use our validated writable path by default.
148
+ tempfile.tempdir = validated_path
149
+ return validated_path
150
+ except (OSError, PermissionError) as e:
151
+ tested_paths.append(f"{path} ({description}): {e}")
152
+ continue
153
+ except (OSError, PermissionError) as e:
154
+ tested_paths.append(f"{path} ({description}): {e}")
155
+ continue
156
+
157
+ error_msg = "Failed to find any writable temporary directory. Errors:\n" + "\n".join(tested_paths)
158
+ logger.error(error_msg)
159
+ raise RuntimeError(error_msg)
@@ -0,0 +1,486 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-stata
3
+ Version: 1.21.0
4
+ Classifier: Development Status :: 4 - Beta
5
+ Classifier: Intended Audience :: Science/Research
6
+ Classifier: Intended Audience :: Developers
7
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
8
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
13
+ Requires-Dist: mcp[cli]>=1.0.0
14
+ Requires-Dist: pandas>=2.0.0
15
+ Requires-Dist: numpy>=1.26.0
16
+ Requires-Dist: pydantic>=2.0.0
17
+ Requires-Dist: pystata>=0.0.1
18
+ Requires-Dist: stata-setup>=0.1.0
19
+ Requires-Dist: httpx>=0.27.0,<0.28.0
20
+ Requires-Dist: pytest-asyncio>=1.3.0
21
+ Requires-Dist: pyarrow>=14.0.0
22
+ Requires-Dist: polars>=1.36.1
23
+ Requires-Dist: build>=1.3.0 ; extra == 'dev'
24
+ Requires-Dist: hatch>=1.16.2 ; extra == 'dev'
25
+ Requires-Dist: twine>=6.2.0 ; extra == 'dev'
26
+ Requires-Dist: ruff>=0.4.0 ; extra == 'dev'
27
+ Requires-Dist: pytest>=7.0.0 ; extra == 'dev'
28
+ Requires-Dist: pytest-cov>=4.0.0 ; extra == 'dev'
29
+ Requires-Dist: pytest-xdist>=3.5.0 ; extra == 'dev'
30
+ Requires-Dist: python-semantic-release>=9.8.0 ; extra == 'dev'
31
+ Requires-Dist: maturin>=1.11.5 ; extra == 'dev'
32
+ Provides-Extra: dev
33
+ License-File: LICENSE
34
+ Summary: A lightweight Model Context Protocol (MCP) server for Stata. Execute commands, inspect data, retrieve stored results (`r()`/`e()`), and view graphs in your chat interface. Built for economists who want to integrate LLM assistance into their Stata workflow.
35
+ Keywords: mcp,stata,statistics,econometrics,ai,llm
36
+ Author-email: Thomas Monk <t.d.monk@lse.ac.uk>
37
+ License-Expression: AGPL-3.0-or-later
38
+ Requires-Python: >=3.11
39
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
40
+ Project-URL: Homepage, https://github.com/tmonk/mcp-stata
41
+ Project-URL: Issues, https://github.com/tmonk/mcp-stata/issues
42
+ Project-URL: Repository, https://github.com/tmonk/mcp-stata
43
+
44
+ # Stata MCP Server
45
+
46
+ <a href="https://cursor.com/en-US/install-mcp?name=mcp-stata&config=eyJjb21tYW5kIjogInV2eCAtLXJlZnJlc2ggLS1yZWZyZXNoLXBhY2thZ2UgbWNwLXN0YXRhIC0tZnJvbSBtY3Atc3RhdGFAbGF0ZXN0IG1jcC1zdGF0YSJ9"><img src="https://cursor.com/deeplink/mcp-install-dark.svg" alt="Install MCP Server" height="20"></a>&nbsp;
47
+ <a href="https://pypi.org/project/mcp-stata/"><img src="https://img.shields.io/pypi/v/mcp-stata?style=flat&color=black" alt="PyPI - Version" height="20"></a>
48
+
49
+ A [Model Context Protocol](https://github.com/modelcontextprotocol) (MCP) server that connects AI agents to a local Stata installation.
50
+
51
+ > If you'd like a fully integrated VS Code extension to run Stata code without leaving your IDE, and also allow AI agent interaction, check out my other project: [<img src="https://raw.githubusercontent.com/tmonk/stata-workbench/refs/heads/main/img/icon.png" height="12px"> Stata Workbench](https://github.com/tmonk/stata-workbench/).
52
+
53
+ Built by <a href="https://tdmonk.com">Thomas Monk</a>, London School of Economics.
54
+ <!-- mcp-name: io.github.tmonk/mcp-stata -->
55
+
56
+ This server enables LLMs to:
57
+ - **Execute Stata code**: run any Stata command (e.g. `sysuse auto`, `regress price mpg`).
58
+ - **Inspect data**: retrieve dataset summaries and variable codebooks.
59
+ - **Export graphics**: generate and view Stata graphs (histograms, scatterplots).
60
+ - **Streaming graph caching**: automatically cache graphs during command execution for instant exports.
61
+ - **Verify results**: programmatically check stored results (`r()`, `e()`) for accurate validation.
62
+
63
+ ## Prerequisites
64
+
65
+ - **Stata 17+** (required for `pystata` integration)
66
+ - **Python 3.11+** (required)
67
+ - **uv** (recommended for install/run)
68
+
69
+ ## Installation
70
+
71
+ ### Run as a published tool with `uvx`
72
+
73
+ ```bash
74
+ uvx --refresh --refresh-package mcp-stata --from mcp-stata@latest mcp-stata
75
+ ```
76
+
77
+ `uvx` is an alias for `uv tool run` and runs the tool in an isolated, cached environment.
78
+
79
+ ## Configuration
80
+
81
+ This server attempts to automatically discover your Stata installation (supporting standard paths and StataNow).
82
+
83
+ If auto-discovery fails, set the `STATA_PATH` environment variable to your Stata executable:
84
+
85
+ ```bash
86
+ # macOS example
87
+ export STATA_PATH="/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp"
88
+
89
+ # Windows example (cmd.exe)
90
+ set STATA_PATH="C:\Program Files\Stata18\StataMP-64.exe"
91
+ ```
92
+
93
+ If you encounter write permission issues with temporary files (common on Windows), you can override the temporary directory location by setting `MCP_STATA_TEMP`:
94
+
95
+ ```bash
96
+ # Example
97
+ export MCP_STATA_TEMP="/path/to/writable/temp"
98
+ ```
99
+
100
+ The server will automatically try the following locations in order of preference:
101
+ 1. `MCP_STATA_TEMP` environment variable
102
+ 2. System temporary directory
103
+ 3. `~/.mcp-stata/temp`
104
+ 4. Current working directory subdirectory (`.tmp/`)
105
+
106
+ If you prefer, add these variables to your MCP config's `env` for any IDE shown below. It's optional and only needed when discovery cannot find Stata.
107
+
108
+ Optional `env` example (add inside your MCP server entry):
109
+
110
+ ```json
111
+ "env": {
112
+ "STATA_PATH": "/Applications/StataNow/StataMP.app/Contents/MacOS/stata-mp"
113
+ }
114
+ ```
115
+
116
+ ## IDE Setup (MCP)
117
+
118
+ This MCP server uses the **stdio** transport (the IDE launches the process and communicates over stdin/stdout).
119
+
120
+ ---
121
+
122
+ ### Claude Desktop
123
+
124
+ Open Claude Desktop → **Settings** → **Developer** → **Edit Config**.
125
+ Config file locations include:
126
+
127
+ * macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
128
+ * Windows: `%APPDATA%\Claude\claude_desktop_config.json`
129
+
130
+ #### Published tool (uvx)
131
+
132
+ ```json
133
+ {
134
+ "mcpServers": {
135
+ "mcp-stata": {
136
+ "command": "uvx",
137
+ "args": [
138
+ "--refresh",
139
+ "--refresh-package",
140
+ "mcp-stata",
141
+ "--from",
142
+ "mcp-stata@latest",
143
+ "mcp-stata"
144
+ ]
145
+ }
146
+ }
147
+ }
148
+ ```
149
+
150
+ After editing, fully quit and restart Claude Desktop to reload MCP servers.
151
+
152
+ ---
153
+
154
+ ### Cursor
155
+
156
+ Cursor supports MCP config at:
157
+
158
+ * Global: `~/.cursor/mcp.json`
159
+ * Project: `.cursor/mcp.json`
160
+
161
+ #### Published tool (uvx)
162
+
163
+ ```json
164
+ {
165
+ "mcpServers": {
166
+ "mcp-stata": {
167
+ "command": "uvx",
168
+ "args": [
169
+ "--refresh",
170
+ "--refresh-package",
171
+ "mcp-stata",
172
+ "--from",
173
+ "mcp-stata@latest",
174
+ "mcp-stata"
175
+ ]
176
+ }
177
+ }
178
+ }
179
+ ```
180
+
181
+ ---
182
+
183
+ ### Windsurf
184
+
185
+ Windsurf supports MCP plugins and also allows manual editing of `mcp_config.json`. After adding/editing a server, use the UI’s refresh so it re-reads the config.
186
+
187
+ A common location is `~/.codeium/windsurf/mcp_config.json`.
188
+ #### Published tool (uvx)
189
+
190
+ ```json
191
+ {
192
+ "mcpServers": {
193
+ "mcp-stata": {
194
+ "command": "uvx",
195
+ "args": [
196
+ "--refresh",
197
+ "--refresh-package",
198
+ "mcp-stata",
199
+ "--from",
200
+ "mcp-stata@latest",
201
+ "mcp-stata"
202
+ ]
203
+ }
204
+ }
205
+ }
206
+ ```
207
+
208
+ ---
209
+
210
+ ### Google Antigravity
211
+
212
+ In Antigravity, MCP servers are managed from the MCP store/menu; you can open **Manage MCP Servers** and then **View raw config** to edit `mcp_config.json`.
213
+
214
+ #### Published tool (uvx)
215
+
216
+ ```json
217
+ {
218
+ "mcpServers": {
219
+ "mcp-stata": {
220
+ "command": "uvx",
221
+ "args": [
222
+ "--refresh",
223
+ "--refresh-package",
224
+ "mcp-stata",
225
+ "--from",
226
+ "mcp-stata@latest",
227
+ "mcp-stata"
228
+ ]
229
+ }
230
+ }
231
+ }
232
+ ```
233
+
234
+ ---
235
+
236
+ ### Visual Studio Code
237
+
238
+ VS Code supports MCP servers via a `.vscode/mcp.json` file. The top-level key is **`servers`** (not `mcpServers`).
239
+
240
+ Create `.vscode/mcp.json`:
241
+
242
+ #### Published tool (uvx)
243
+
244
+ ```json
245
+ {
246
+ "servers": {
247
+ "mcp-stata": {
248
+ "type": "stdio",
249
+ "command": "uvx",
250
+ "args": [
251
+ "--refresh",
252
+ "--refresh-package",
253
+ "mcp-stata",
254
+ "--from",
255
+ "mcp-stata@latest",
256
+ "mcp-stata"
257
+ ]
258
+ }
259
+ }
260
+ }
261
+ ```
262
+
263
+ VS Code documents `.vscode/mcp.json` and the `servers` schema, including `type` and `command`/`args`.
264
+
265
+ ---
266
+
267
+ ## Skills
268
+
269
+ - Skill file (for Claude/Codex): [skill/SKILL.md](skill/SKILL.md)
270
+
271
+ ## Tools Available (from server.py)
272
+
273
+ * `run_command(code, echo=True, as_json=True, trace=False, raw=False, max_output_lines=None)`: Execute Stata syntax.
274
+ - Always writes output to a temporary log file and emits a single `notifications/logMessage` containing `{"event":"log_path","path":"..."}` so the client can tail it locally.
275
+ - May emit `notifications/progress` when the client provides a progress token/callback.
276
+ * `read_log(path, offset=0, max_bytes=65536)`: Read a slice of a previously-provided log file (JSON: `path`, `offset`, `next_offset`, `data`).
277
+ * `find_in_log(path, query, start_offset=0, max_bytes=5_000_000, before=2, after=2, case_sensitive=False, regex=False, max_matches=50)`: Search a log file for text and return context windows.
278
+ - Returns JSON with `matches` (context lines, line indices), `next_offset`, and `truncated` if `max_matches` is hit.
279
+ - Supports literal or regex search with bounded read window for large logs.
280
+ * `load_data(source, clear=True, as_json=True, raw=False, max_output_lines=None)`: Heuristic loader (sysuse/webuse/use/path/URL) with JSON envelope unless `raw=True`. Supports output truncation.
281
+ * `get_data(start=0, count=50)`: View dataset rows (JSON response, capped to 500 rows).
282
+ * `get_ui_channel()`: Return a short-lived localhost HTTP endpoint + bearer token for the UI-only data browser.
283
+ * `describe()`: View dataset structure via Stata `describe`.
284
+ * `list_graphs()`: See available graphs in memory (JSON list with an `active` flag).
285
+ * `export_graph(graph_name=None, format="pdf")`: Export a graph to a file path (default PDF; use `format="png"` for PNG).
286
+ * `export_graphs_all()`: Export all in-memory graphs. Returns file paths.
287
+ * `get_help(topic, plain_text=False)`: Markdown-rendered Stata help by default; `plain_text=True` strips formatting.
288
+ * `codebook(variable, as_json=True, trace=False, raw=False, max_output_lines=None)`: Variable-level metadata (JSON envelope by default; supports `trace=True` and output truncation).
289
+ * `run_do_file(path, echo=True, as_json=True, trace=False, raw=False, max_output_lines=None)`: Execute a .do file.
290
+ - Always writes output to a temporary log file and emits a single `notifications/logMessage` containing `{"event":"log_path","path":"..."}` so the client can tail it locally.
291
+ - Emits incremental `notifications/progress` when the client provides a progress token/callback.
292
+ * `get_stored_results()`: Get `r()` and `e()` scalars/macros as JSON.
293
+ * `get_variable_list()`: JSON list of variables and labels.
294
+
295
+ ### Cancellation
296
+
297
+ - Clients may cancel an in-flight request by sending the MCP notification `notifications/cancelled` with `params.requestId` set to the original tool call ID.
298
+ - Client guidance:
299
+ 1. Pass a `_meta.progressToken` when invoking the tool if you want progress updates (optional).
300
+ 2. If you need to cancel, send `notifications/cancelled` with the same requestId. You may also stop tailing the log file path once you receive cancellation confirmation (the tool call will return an error indicating cancellation).
301
+ 3. Be prepared for partial output in the log file; cancellation is best-effort and depends on Stata surfacing `BreakError`.
302
+
303
+ Resources exposed for MCP clients:
304
+
305
+ * `stata://data/summary` → `summarize`
306
+ * `stata://data/metadata` → `describe`
307
+ * `stata://graphs/list` → graph list (resource handler delegates to `list_graphs` tool)
308
+ * `stata://variables/list` → variable list (resource wrapper)
309
+ * `stata://results/stored` → stored r()/e() results
310
+
311
+ ## UI-only Data Browser (Local HTTP API)
312
+
313
+ This server also hosts a **localhost-only HTTP API** intended for a VS Code extension UI to browse data at high volume (paging, filtering) without sending large payloads over MCP.
314
+
315
+ Important properties:
316
+
317
+ - **Loopback only**: binds to `127.0.0.1`.
318
+ - **Bearer auth**: every request requires an `Authorization: Bearer <token>` header.
319
+ - **Short-lived tokens**: clients should call `get_ui_channel()` to obtain a fresh token as needed.
320
+ - **No Stata dataset mutation** for browsing/filtering:
321
+ - No generated variables.
322
+ - Paging uses `sfi.Data.get`.
323
+ - Filtering is evaluated in Python over chunked reads.
324
+
325
+ ### Discovery via MCP (`get_ui_channel`)
326
+
327
+ Call the MCP tool `get_ui_channel()` and parse the JSON:
328
+
329
+ ```json
330
+ {
331
+ "baseUrl": "http://127.0.0.1:53741",
332
+ "token": "...",
333
+ "expiresAt": 1730000000,
334
+ "capabilities": {
335
+ "dataBrowser": true,
336
+ "filtering": true,
337
+ "sorting": true,
338
+ "arrowStream": true
339
+ }
340
+ }
341
+ ```
342
+
343
+ Server-enforced limits (current defaults):
344
+
345
+ - **maxLimit**: 500
346
+ - **maxVars**: 32,767
347
+ - **maxChars**: 500
348
+ - **maxRequestBytes**: 1,000,000
349
+ - **maxArrowLimit**: 1,000,000 (specific to `/v1/arrow`)
350
+
351
+ ### Endpoints
352
+
353
+ All endpoints are under `baseUrl` and require the bearer token.
354
+
355
+ - `GET /v1/dataset`
356
+ - Returns dataset identity and basic state (`id`, `frame`, `n`, `k`).
357
+ - `GET /v1/vars`
358
+ - Returns variable metadata (`name`, `type`, `label`, `format`).
359
+ - `POST /v1/page`
360
+ - Returns a page of data for selected variables.
361
+ - `POST /v1/arrow`
362
+ - Returns a binary Arrow IPC stream (same input as `/v1/page`).
363
+ - `POST /v1/views`
364
+ - Creates a server-side filtered view (handle-based filtering).
365
+ - `POST /v1/views/:viewId/page`
366
+ - Pages within a filtered view.
367
+ - `POST /v1/views/:viewId/arrow`
368
+ - Returns a binary Arrow IPC stream from a filtered view.
369
+ - `DELETE /v1/views/:viewId`
370
+ - Deletes a view handle.
371
+ - `POST /v1/filters/validate`
372
+ - Validates a filter expression.
373
+
374
+ ### Paging request example
375
+
376
+ ```bash
377
+ curl -sS \
378
+ -H "Authorization: Bearer $TOKEN" \
379
+ -H "Content-Type: application/json" \
380
+ -d '{"datasetId":"...","frame":"default","offset":0,"limit":50,"vars":["price","mpg"],"includeObsNo":true,"maxChars":200}' \
381
+ "$BASE_URL/v1/page"
382
+ ```
383
+
384
+ #### Sorting
385
+
386
+ The `/v1/page` and `/v1/views/:viewId/page` endpoints support sorting via the optional `sortBy` parameter:
387
+
388
+ ```bash
389
+ # Sort by price ascending
390
+ curl -sS \
391
+ -H "Authorization: Bearer $TOKEN" \
392
+ -H "Content-Type: application/json" \
393
+ -d '{"datasetId":"...","offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["price"]}' \
394
+ "$BASE_URL/v1/page"
395
+
396
+ # Sort by price descending
397
+ curl -sS \
398
+ -H "Authorization: Bearer $TOKEN" \
399
+ -H "Content-Type: application/json" \
400
+ -d '{"datasetId":"...","offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["-price"]}' \
401
+ "$BASE_URL/v1/page"
402
+
403
+ # Multi-variable sort: foreign ascending, then price descending
404
+ curl -sS \
405
+ -H "Authorization: Bearer $TOKEN" \
406
+ -H "Content-Type: application/json" \
407
+ -d '{"datasetId":"...","offset":0,"limit":50,"vars":["foreign","price","mpg"],"sortBy":["foreign","-price"]}' \
408
+ "$BASE_URL/v1/page"
409
+ ```
410
+
411
+ **Sort specification format:**
412
+ - `sortBy` is an array of strings (variable names with optional prefix)
413
+ - No prefix or `+` prefix = ascending order (e.g., `"price"` or `"+price"`)
414
+ - `-` prefix = descending order (e.g., `"-price"`)
415
+ - Multiple variables are supported for multi-level sorting
416
+ - Uses the native Rust sorter when available, with a Polars fallback
417
+
418
+ **Sorting with filtered views:**
419
+ - Sorting is fully supported with filtered views
420
+ - The sort is computed in-memory over the sort columns, then filtered indices are re-applied
421
+ - Example: Filter for `price < 5000`, then sort descending by price
422
+
423
+ ```bash
424
+ # Create a filtered view
425
+ curl -sS \
426
+ -H "Authorization: Bearer $TOKEN" \
427
+ -H "Content-Type: application/json" \
428
+ -d '{"datasetId":"...","frame":"default","filterExpr":"price < 5000"}' \
429
+ "$BASE_URL/v1/views"
430
+ # Returns: {"view": {"id": "view_abc123", "filteredN": 37}}
431
+
432
+ # Get sorted page from filtered view
433
+ curl -sS \
434
+ -H "Authorization: Bearer $TOKEN" \
435
+ -H "Content-Type: application/json" \
436
+ -d '{"offset":0,"limit":50,"vars":["price","mpg"],"sortBy":["-price"]}' \
437
+ "$BASE_URL/v1/views/view_abc123/page"
438
+ ```
439
+
440
+ Notes:
441
+
442
+ - `datasetId` is used for cache invalidation. If the dataset changes due to running Stata commands, the server will report a new dataset id and view handles become invalid.
443
+ - Filter expressions are evaluated in Python using values read from Stata via `sfi.Data.get`. Use boolean operators like `==`, `!=`, `<`, `>`, and `and`/`or` (Stata-style `&`/`|` are also accepted).
444
+ - Sorting does **not** mutate the dataset order in Stata; it computes sorted indices for the response and caches them for subsequent requests.
445
+ - The Rust sorter is the primary implementation; Polars is used only as a fallback when the native extension is unavailable.
446
+
447
+ ## License
448
+
449
+ This project is licensed under the GNU Affero General Public License v3.0 or later.
450
+ See the LICENSE file for the full text.
451
+
452
+ ## Error reporting
453
+
454
+ - All tools that execute Stata commands support JSON envelopes (`as_json=true`) carrying:
455
+ - `rc` (from r()/c(rc)), `stdout`, `stderr`, `message`, optional `line` (when Stata reports it), `command`, optional `log_path` (for log-file streaming), and a `snippet` excerpt of error output.
456
+ - Stata-specific cues are preserved:
457
+ - `r(XXX)` codes are parsed when present in output.
458
+ - “Red text” is captured via stderr where available.
459
+ - `trace=true` adds `set trace on` around the command/do-file to surface program-defined errors; the trace is turned off afterward.
460
+
461
+ ## Logging
462
+
463
+ Set `MCP_STATA_LOGLEVEL` (e.g., `DEBUG`, `INFO`) to control server logging. Logs include discovery details (edition/path) and command-init traces for easier troubleshooting.
464
+
465
+ ## Development & Contributing
466
+
467
+ For detailed information on building, testing, and contributing to this project, see [CONTRIBUTING.md](CONTRIBUTING.md).
468
+
469
+ Quick setup:
470
+
471
+ ```bash
472
+ # Install dependencies
473
+ uv sync --extra dev --no-install-project
474
+
475
+ # Run tests (requires Stata)
476
+ pytest
477
+
478
+ # Run tests without Stata
479
+ pytest -v -m "not requires_stata"
480
+
481
+ # Build the package
482
+ python -m build
483
+ ```
484
+
485
+ [![MCP Badge](https://lobehub.com/badge/mcp/tmonk-mcp-stata)](https://lobehub.com/mcp/tmonk-mcp-stata)
486
+ [![Tests](https://github.com/tmonk/mcp-stata/actions/workflows/build-test.yml/badge.svg)](https://github.com/tmonk/mcp-stata/actions/workflows/build-test.yml)
@@ -0,0 +1,20 @@
1
+ mcp_stata/__init__.py,sha256=0Tn_oUmwc1sIuYg-QlNSxnkabCtuRE5fEimkxHAB2gU,65
2
+ mcp_stata/__main__.py,sha256=oNIGZ_NlJ2fPnARXuF5lAbVGCjNTJWkebcaBYnFSIcY,73
3
+ mcp_stata/_native_ops.abi3.so,sha256=LHqkNILBrbIdyaw0gUp-ZYa8Uo_k1NorBe1taEoGK1o,2456080
4
+ mcp_stata/config.py,sha256=SfXltpwO_gROABca1sm0xXDhaeRmlRfQmXcnBiG4GYk,714
5
+ mcp_stata/discovery.py,sha256=E-k7tSM6Jqm_Y6QehY1RKnB-9qGhaW2UE9EU7B8HH9w,20624
6
+ mcp_stata/graph_detector.py,sha256=CsDJtMquLtMpzox8hsgS2BiOG_y6hIFxgtqrm9vGOP0,26602
7
+ mcp_stata/models.py,sha256=bq2MlB_4q92JfgM4BaXK2MNeh1Qnlx_eD-0vlwsfktw,1341
8
+ mcp_stata/native_ops.py,sha256=m9wOB4qVbfWKM629XuJI6KmP0cQw_EfbuQAUrVxW1Dk,2221
9
+ mcp_stata/server.py,sha256=WmDeERVhzGkgt8dsdjZInTyo-M46UaFsxz2rHHHEZio,44560
10
+ mcp_stata/smcl/smcl2html.py,sha256=nrZIzmSSWD9AuDQHXJ80A0v1f8fkR7gbl4nCy_BHRIg,3005
11
+ mcp_stata/stata_client.py,sha256=MhPd67LGKQhJvJSSMgN_0Y5Wyf8RkAklySdUetBjCKk,195370
12
+ mcp_stata/streaming_io.py,sha256=jWxEJ5v47Nzu0R-QUB69VvJWijj6X89J6uxf32Jt_mg,7122
13
+ mcp_stata/test_stata.py,sha256=V2nMuRGeP-EH2Oi5A9J9kNxpJbjcJFEJrM7a_vrjL-M,1674
14
+ mcp_stata/ui_http.py,sha256=3pY9ztlAKJaxPe6F2bzq9siqfOafislSd1-Jo7LajUA,39866
15
+ mcp_stata/utils.py,sha256=QzzAlOm7eOWyAssmUmXlMe4iiRQLGA0zNuURoooKOJc,6010
16
+ mcp_stata-1.21.0.dist-info/METADATA,sha256=bWDJPF1w61oenwetpqP74G9WWIUch4l5YiJ9pk82tqU,17884
17
+ mcp_stata-1.21.0.dist-info/WHEEL,sha256=tHqGY8yOainggDSQ-cig9jfWg3aQ960NLhjXdBFVTTQ,145
18
+ mcp_stata-1.21.0.dist-info/entry_points.txt,sha256=veUG0YD5rR8Kghyf39JasKMu3-XKptiXIwrQg1Xd2Es,50
19
+ mcp_stata-1.21.0.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
20
+ mcp_stata-1.21.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.11.5)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-abi3-manylinux_2_17_x86_64
5
+ Tag: cp311-abi3-manylinux2014_x86_64
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mcp-stata=mcp_stata.server:main