mcp-stata 1.2.2__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-stata might be problematic. Click here for more details.
- mcp_stata/discovery.py +96 -25
- mcp_stata/graph_detector.py +385 -0
- mcp_stata/models.py +4 -1
- mcp_stata/server.py +258 -44
- mcp_stata/stata_client.py +1990 -265
- mcp_stata/streaming_io.py +261 -0
- mcp_stata/ui_http.py +540 -0
- mcp_stata-1.6.2.dist-info/METADATA +380 -0
- mcp_stata-1.6.2.dist-info/RECORD +14 -0
- mcp_stata-1.2.2.dist-info/METADATA +0 -240
- mcp_stata-1.2.2.dist-info/RECORD +0 -11
- {mcp_stata-1.2.2.dist-info → mcp_stata-1.6.2.dist-info}/WHEEL +0 -0
- {mcp_stata-1.2.2.dist-info → mcp_stata-1.6.2.dist-info}/entry_points.txt +0 -0
- {mcp_stata-1.2.2.dist-info → mcp_stata-1.6.2.dist-info}/licenses/LICENSE +0 -0
mcp_stata/server.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import anyio
|
|
2
|
+
from mcp.server.fastmcp import Context, FastMCP
|
|
2
3
|
import mcp.types as types
|
|
3
4
|
from .stata_client import StataClient
|
|
4
5
|
from .models import (
|
|
@@ -11,40 +12,128 @@ import logging
|
|
|
11
12
|
import json
|
|
12
13
|
import os
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
from .ui_http import UIChannelManager
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
LOG_LEVEL = os.getenv("MCP_STATA_LOGLEVEL", "INFO").upper()
|
|
15
19
|
logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)s %(name)s - %(message)s")
|
|
16
20
|
|
|
17
21
|
# Initialize FastMCP
|
|
18
|
-
mcp = FastMCP("
|
|
22
|
+
mcp = FastMCP("mcp_stata")
|
|
19
23
|
client = StataClient()
|
|
24
|
+
ui_channel = UIChannelManager(client)
|
|
20
25
|
|
|
21
26
|
@mcp.tool()
|
|
22
|
-
def run_command(
|
|
27
|
+
async def run_command(
|
|
28
|
+
code: str,
|
|
29
|
+
ctx: Context | None = None,
|
|
30
|
+
echo: bool = True,
|
|
31
|
+
as_json: bool = True,
|
|
32
|
+
trace: bool = False,
|
|
33
|
+
raw: bool = False,
|
|
34
|
+
max_output_lines: int = None,
|
|
35
|
+
cwd: str | None = None,
|
|
36
|
+
) -> str:
|
|
23
37
|
"""
|
|
24
|
-
Executes
|
|
38
|
+
Executes Stata code.
|
|
39
|
+
|
|
40
|
+
This is the primary tool for interacting with Stata.
|
|
41
|
+
|
|
42
|
+
Stata output is written to a temporary log file on disk.
|
|
43
|
+
The server emits a single `notifications/logMessage` event containing the log file path
|
|
44
|
+
(JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
|
|
45
|
+
If the client supplies a progress callback/token, progress updates may also be emitted
|
|
46
|
+
via `notifications/progress`.
|
|
25
47
|
|
|
26
|
-
This is the primary tool for interacting with Stata. You can run any valid Stata syntax.
|
|
27
|
-
|
|
28
48
|
Args:
|
|
29
|
-
code: The
|
|
49
|
+
code: The Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
|
|
50
|
+
ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
|
|
30
51
|
echo: If True, the command itself is included in the output. Default is True.
|
|
31
52
|
as_json: If True, returns a JSON envelope with rc/stdout/stderr/error.
|
|
32
53
|
trace: If True, enables `set trace on` for deeper error diagnostics (automatically disabled after).
|
|
54
|
+
raw: If True, return raw output/error message rather than a JSON envelope.
|
|
55
|
+
max_output_lines: If set, truncates stdout to this many lines for token efficiency.
|
|
56
|
+
Useful for verbose commands (regress, codebook, etc.).
|
|
57
|
+
Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
|
|
33
58
|
"""
|
|
34
|
-
|
|
59
|
+
session = ctx.request_context.session if ctx is not None else None
|
|
60
|
+
|
|
61
|
+
async def notify_log(text: str) -> None:
|
|
62
|
+
if session is None:
|
|
63
|
+
return
|
|
64
|
+
await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
|
|
65
|
+
|
|
66
|
+
progress_token = None
|
|
67
|
+
if ctx is not None and ctx.request_context.meta is not None:
|
|
68
|
+
progress_token = ctx.request_context.meta.progressToken
|
|
69
|
+
|
|
70
|
+
async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
|
|
71
|
+
if session is None or progress_token is None:
|
|
72
|
+
return
|
|
73
|
+
await session.send_progress_notification(
|
|
74
|
+
progress_token=progress_token,
|
|
75
|
+
progress=progress,
|
|
76
|
+
total=total,
|
|
77
|
+
message=message,
|
|
78
|
+
related_request_id=ctx.request_id,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
async def _noop_log(_text: str) -> None:
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
result = await client.run_command_streaming(
|
|
85
|
+
code,
|
|
86
|
+
notify_log=notify_log if session is not None else _noop_log,
|
|
87
|
+
notify_progress=notify_progress if progress_token is not None else None,
|
|
88
|
+
echo=echo,
|
|
89
|
+
trace=trace,
|
|
90
|
+
max_output_lines=max_output_lines,
|
|
91
|
+
cwd=cwd,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Conservative invalidation: arbitrary Stata commands may change data.
|
|
95
|
+
ui_channel.notify_potential_dataset_change()
|
|
35
96
|
if raw:
|
|
36
97
|
if result.success:
|
|
37
|
-
return result.
|
|
98
|
+
return result.log_path or ""
|
|
38
99
|
if result.error:
|
|
39
100
|
msg = result.error.message
|
|
40
101
|
if result.error.rc is not None:
|
|
41
102
|
msg = f"{msg}\nrc={result.error.rc}"
|
|
42
103
|
return msg
|
|
43
|
-
return result.
|
|
104
|
+
return result.log_path or ""
|
|
44
105
|
if as_json:
|
|
45
|
-
return result.model_dump_json(
|
|
46
|
-
|
|
47
|
-
|
|
106
|
+
return result.model_dump_json()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@mcp.tool()
|
|
110
|
+
def read_log(path: str, offset: int = 0, max_bytes: int = 65536) -> str:
|
|
111
|
+
"""Read a slice of a log file.
|
|
112
|
+
|
|
113
|
+
Intended for clients that want to display a terminal-like view without pushing MBs of
|
|
114
|
+
output through MCP log notifications.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
path: Absolute path to the log file previously provided by the server.
|
|
118
|
+
offset: Byte offset to start reading from.
|
|
119
|
+
max_bytes: Maximum bytes to read.
|
|
120
|
+
|
|
121
|
+
Returns a compact JSON string: {"path":..., "offset":..., "next_offset":..., "data":...}
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
if offset < 0:
|
|
125
|
+
offset = 0
|
|
126
|
+
with open(path, "rb") as f:
|
|
127
|
+
f.seek(offset)
|
|
128
|
+
data = f.read(max_bytes)
|
|
129
|
+
next_offset = f.tell()
|
|
130
|
+
text = data.decode("utf-8", errors="replace")
|
|
131
|
+
return json.dumps({"path": path, "offset": offset, "next_offset": next_offset, "data": text})
|
|
132
|
+
except FileNotFoundError:
|
|
133
|
+
return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": ""})
|
|
134
|
+
except Exception as e:
|
|
135
|
+
return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": f"ERROR: {e}"})
|
|
136
|
+
|
|
48
137
|
|
|
49
138
|
@mcp.tool()
|
|
50
139
|
def get_data(start: int = 0, count: int = 50) -> str:
|
|
@@ -59,7 +148,20 @@ def get_data(start: int = 0, count: int = 50) -> str:
|
|
|
59
148
|
"""
|
|
60
149
|
data = client.get_data(start, count)
|
|
61
150
|
resp = DataResponse(start=start, count=count, data=data)
|
|
62
|
-
return resp.model_dump_json(
|
|
151
|
+
return resp.model_dump_json()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@mcp.tool()
|
|
155
|
+
def get_ui_channel() -> str:
|
|
156
|
+
"""Return localhost HTTP endpoint + bearer token for the extension UI data plane."""
|
|
157
|
+
info = ui_channel.get_channel()
|
|
158
|
+
payload = {
|
|
159
|
+
"baseUrl": info.base_url,
|
|
160
|
+
"token": info.token,
|
|
161
|
+
"expiresAt": info.expires_at,
|
|
162
|
+
"capabilities": ui_channel.capabilities(),
|
|
163
|
+
}
|
|
164
|
+
return json.dumps(payload)
|
|
63
165
|
|
|
64
166
|
@mcp.tool()
|
|
65
167
|
def describe() -> str:
|
|
@@ -68,7 +170,12 @@ def describe() -> str:
|
|
|
68
170
|
|
|
69
171
|
Use this to understand the structure of the dataset, variable names, and their formats before running analysis.
|
|
70
172
|
"""
|
|
71
|
-
|
|
173
|
+
result = client.run_command_structured("describe", echo=True)
|
|
174
|
+
if result.success:
|
|
175
|
+
return result.stdout
|
|
176
|
+
if result.error:
|
|
177
|
+
return result.error.message
|
|
178
|
+
return ""
|
|
72
179
|
|
|
73
180
|
@mcp.tool()
|
|
74
181
|
def list_graphs() -> str:
|
|
@@ -80,7 +187,7 @@ def list_graphs() -> str:
|
|
|
80
187
|
default.
|
|
81
188
|
"""
|
|
82
189
|
graphs = client.list_graphs_structured()
|
|
83
|
-
return graphs.model_dump_json(
|
|
190
|
+
return graphs.model_dump_json()
|
|
84
191
|
|
|
85
192
|
@mcp.tool()
|
|
86
193
|
def export_graph(graph_name: str = None, format: str = "pdf") -> str:
|
|
@@ -115,42 +222,122 @@ def get_stored_results() -> str:
|
|
|
115
222
|
"""
|
|
116
223
|
Returns the current stored results (r-class and e-class scalars/macros) as a JSON-formatted string.
|
|
117
224
|
|
|
118
|
-
Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
|
|
225
|
+
Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
|
|
119
226
|
specific values (e.g., means, coefficients, sample sizes) for validation or further calculation.
|
|
120
227
|
"""
|
|
121
228
|
import json
|
|
122
|
-
return json.dumps(client.get_stored_results()
|
|
229
|
+
return json.dumps(client.get_stored_results())
|
|
123
230
|
|
|
124
231
|
@mcp.tool()
|
|
125
|
-
def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False) -> str:
|
|
232
|
+
def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False, max_output_lines: int = None) -> str:
|
|
126
233
|
"""
|
|
127
234
|
Loads data using sysuse/webuse/use heuristics based on the source string.
|
|
128
235
|
Automatically appends , clear unless clear=False.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
source: Dataset source (e.g., "auto", "auto.dta", "/path/to/file.dta").
|
|
239
|
+
clear: If True, clears data in memory before loading.
|
|
240
|
+
as_json: If True, returns JSON envelope.
|
|
241
|
+
raw: If True, returns raw output only.
|
|
242
|
+
max_output_lines: If set, truncates stdout to this many lines for token efficiency.
|
|
129
243
|
"""
|
|
130
|
-
result = client.load_data(source, clear=clear)
|
|
244
|
+
result = client.load_data(source, clear=clear, max_output_lines=max_output_lines)
|
|
245
|
+
ui_channel.notify_potential_dataset_change()
|
|
131
246
|
if raw:
|
|
132
247
|
return result.stdout if result.success else (result.error.message if result.error else result.stdout)
|
|
133
|
-
return result.model_dump_json(
|
|
248
|
+
return result.model_dump_json()
|
|
134
249
|
|
|
135
250
|
@mcp.tool()
|
|
136
|
-
def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
|
|
251
|
+
def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False, max_output_lines: int = None) -> str:
|
|
137
252
|
"""
|
|
138
253
|
Returns codebook/summary for a specific variable.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
variable: The variable name to analyze.
|
|
257
|
+
as_json: If True, returns JSON envelope.
|
|
258
|
+
trace: If True, enables trace mode.
|
|
259
|
+
raw: If True, returns raw output only.
|
|
260
|
+
max_output_lines: If set, truncates stdout to this many lines for token efficiency.
|
|
139
261
|
"""
|
|
140
|
-
result = client.codebook(variable, trace=trace)
|
|
262
|
+
result = client.codebook(variable, trace=trace, max_output_lines=max_output_lines)
|
|
141
263
|
if raw:
|
|
142
264
|
return result.stdout if result.success else (result.error.message if result.error else result.stdout)
|
|
143
|
-
return result.model_dump_json(
|
|
265
|
+
return result.model_dump_json()
|
|
144
266
|
|
|
145
267
|
@mcp.tool()
|
|
146
|
-
def run_do_file(
|
|
268
|
+
async def run_do_file(
|
|
269
|
+
path: str,
|
|
270
|
+
ctx: Context | None = None,
|
|
271
|
+
echo: bool = True,
|
|
272
|
+
as_json: bool = True,
|
|
273
|
+
trace: bool = False,
|
|
274
|
+
raw: bool = False,
|
|
275
|
+
max_output_lines: int = None,
|
|
276
|
+
cwd: str | None = None,
|
|
277
|
+
) -> str:
|
|
147
278
|
"""
|
|
148
|
-
Executes a .do file
|
|
279
|
+
Executes a .do file.
|
|
280
|
+
|
|
281
|
+
Stata output is written to a temporary log file on disk.
|
|
282
|
+
The server emits a single `notifications/logMessage` event containing the log file path
|
|
283
|
+
(JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
|
|
284
|
+
If the client supplies a progress callback/token, progress updates are emitted via
|
|
285
|
+
`notifications/progress`.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
path: Path to the .do file to execute.
|
|
289
|
+
ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
|
|
290
|
+
echo: If True, includes command in output.
|
|
291
|
+
as_json: If True, returns JSON envelope.
|
|
292
|
+
trace: If True, enables trace mode.
|
|
293
|
+
raw: If True, returns raw output only.
|
|
294
|
+
max_output_lines: If set, truncates stdout to this many lines for token efficiency.
|
|
295
|
+
Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
|
|
149
296
|
"""
|
|
150
|
-
|
|
297
|
+
session = ctx.request_context.session if ctx is not None else None
|
|
298
|
+
|
|
299
|
+
async def notify_log(text: str) -> None:
|
|
300
|
+
if session is None:
|
|
301
|
+
return
|
|
302
|
+
await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
|
|
303
|
+
|
|
304
|
+
progress_token = None
|
|
305
|
+
if ctx is not None and ctx.request_context.meta is not None:
|
|
306
|
+
progress_token = ctx.request_context.meta.progressToken
|
|
307
|
+
|
|
308
|
+
async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
|
|
309
|
+
if session is None or progress_token is None:
|
|
310
|
+
return
|
|
311
|
+
await session.send_progress_notification(
|
|
312
|
+
progress_token=progress_token,
|
|
313
|
+
progress=progress,
|
|
314
|
+
total=total,
|
|
315
|
+
message=message,
|
|
316
|
+
related_request_id=ctx.request_id,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
async def _noop_log(_text: str) -> None:
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
result = await client.run_do_file_streaming(
|
|
323
|
+
path,
|
|
324
|
+
notify_log=notify_log if session is not None else _noop_log,
|
|
325
|
+
notify_progress=notify_progress if progress_token is not None else None,
|
|
326
|
+
echo=echo,
|
|
327
|
+
trace=trace,
|
|
328
|
+
max_output_lines=max_output_lines,
|
|
329
|
+
cwd=cwd,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
ui_channel.notify_potential_dataset_change()
|
|
333
|
+
|
|
151
334
|
if raw:
|
|
152
|
-
|
|
153
|
-
|
|
335
|
+
if result.success:
|
|
336
|
+
return result.log_path or ""
|
|
337
|
+
if result.error:
|
|
338
|
+
return result.error.message
|
|
339
|
+
return result.log_path or ""
|
|
340
|
+
return result.model_dump_json()
|
|
154
341
|
|
|
155
342
|
@mcp.resource("stata://data/summary")
|
|
156
343
|
def get_summary() -> str:
|
|
@@ -158,7 +345,12 @@ def get_summary() -> str:
|
|
|
158
345
|
Returns the output of the `summarize` command for the dataset in memory.
|
|
159
346
|
Provides descriptive statistics (obs, mean, std. dev, min, max) for all variables.
|
|
160
347
|
"""
|
|
161
|
-
|
|
348
|
+
result = client.run_command_structured("summarize", echo=True)
|
|
349
|
+
if result.success:
|
|
350
|
+
return result.stdout
|
|
351
|
+
if result.error:
|
|
352
|
+
return result.error.message
|
|
353
|
+
return ""
|
|
162
354
|
|
|
163
355
|
@mcp.resource("stata://data/metadata")
|
|
164
356
|
def get_metadata() -> str:
|
|
@@ -166,18 +358,23 @@ def get_metadata() -> str:
|
|
|
166
358
|
Returns the output of the `describe` command.
|
|
167
359
|
Provides metadata about the dataset, including variable names, storage types, display formats, and labels.
|
|
168
360
|
"""
|
|
169
|
-
|
|
361
|
+
result = client.run_command_structured("describe", echo=True)
|
|
362
|
+
if result.success:
|
|
363
|
+
return result.stdout
|
|
364
|
+
if result.error:
|
|
365
|
+
return result.error.message
|
|
366
|
+
return ""
|
|
170
367
|
|
|
171
368
|
@mcp.resource("stata://graphs/list")
|
|
172
|
-
def
|
|
173
|
-
"""
|
|
174
|
-
return
|
|
369
|
+
def list_graphs_resource() -> str:
|
|
370
|
+
"""Resource wrapper for the graph list (uses tool list_graphs)."""
|
|
371
|
+
return list_graphs()
|
|
175
372
|
|
|
176
373
|
@mcp.tool()
|
|
177
374
|
def get_variable_list() -> str:
|
|
178
375
|
"""Returns JSON list of all variables."""
|
|
179
376
|
variables = client.list_variables_structured()
|
|
180
|
-
return variables.model_dump_json(
|
|
377
|
+
return variables.model_dump_json()
|
|
181
378
|
|
|
182
379
|
@mcp.resource("stata://variables/list")
|
|
183
380
|
def get_variable_list_resource() -> str:
|
|
@@ -188,20 +385,37 @@ def get_variable_list_resource() -> str:
|
|
|
188
385
|
def get_stored_results_resource() -> str:
|
|
189
386
|
"""Returns stored r() and e() results."""
|
|
190
387
|
import json
|
|
191
|
-
return json.dumps(client.get_stored_results()
|
|
388
|
+
return json.dumps(client.get_stored_results())
|
|
192
389
|
|
|
193
390
|
@mcp.tool()
|
|
194
|
-
def export_graphs_all() -> str:
|
|
391
|
+
def export_graphs_all(use_base64: bool = False) -> str:
|
|
195
392
|
"""
|
|
196
|
-
Exports all graphs in memory to base64-encoded
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
393
|
+
Exports all graphs in memory to file paths (default) or base64-encoded SVGs.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
use_base64: If True, returns base64-encoded images (token-intensive).
|
|
397
|
+
If False (default), returns file paths to SVG files (token-efficient).
|
|
398
|
+
Use file paths unless you need to embed images directly.
|
|
399
|
+
|
|
400
|
+
Returns a JSON envelope listing graph names and either file paths or base64 images.
|
|
401
|
+
The agent can open SVG files directly to verify visuals (titles/labels/colors/legends).
|
|
200
402
|
"""
|
|
201
|
-
exports = client.export_graphs_all()
|
|
202
|
-
return exports.model_dump_json(
|
|
403
|
+
exports = client.export_graphs_all(use_base64=use_base64)
|
|
404
|
+
return exports.model_dump_json(exclude_none=False)
|
|
203
405
|
|
|
204
406
|
def main():
|
|
407
|
+
# On Windows, Stata automation relies on COM, which is sensitive to threading models.
|
|
408
|
+
# The FastMCP server executes tool calls in a thread pool. If Stata is initialized
|
|
409
|
+
# lazily inside a worker thread, it may fail or hang due to COM/UI limitations.
|
|
410
|
+
# We explicitly initialize Stata here on the main thread to ensure the COM server
|
|
411
|
+
# is properly registered and accessible.
|
|
412
|
+
if os.name == "nt":
|
|
413
|
+
try:
|
|
414
|
+
client.init()
|
|
415
|
+
except Exception as e:
|
|
416
|
+
# Log error but let the server start; specific tools will fail gracefully later
|
|
417
|
+
logging.error(f"Stata initialization failed: {e}")
|
|
418
|
+
|
|
205
419
|
mcp.run()
|
|
206
420
|
|
|
207
421
|
if __name__ == "__main__":
|