mcp-stata 1.2.2__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/server.py CHANGED
@@ -1,4 +1,5 @@
1
- from mcp.server.fastmcp import FastMCP
1
+ import anyio
2
+ from mcp.server.fastmcp import Context, FastMCP
2
3
  import mcp.types as types
3
4
  from .stata_client import StataClient
4
5
  from .models import (
@@ -11,40 +12,128 @@ import logging
11
12
  import json
12
13
  import os
13
14
 
14
- LOG_LEVEL = os.getenv("STATA_MCP_LOGLEVEL", "INFO").upper()
15
+ from .ui_http import UIChannelManager
16
+
17
+
18
+ LOG_LEVEL = os.getenv("MCP_STATA_LOGLEVEL", "INFO").upper()
15
19
  logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)s %(name)s - %(message)s")
16
20
 
17
21
  # Initialize FastMCP
18
- mcp = FastMCP("stata")
22
+ mcp = FastMCP("mcp_stata")
19
23
  client = StataClient()
24
+ ui_channel = UIChannelManager(client)
20
25
 
21
26
  @mcp.tool()
22
- def run_command(code: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
27
+ async def run_command(
28
+ code: str,
29
+ ctx: Context | None = None,
30
+ echo: bool = True,
31
+ as_json: bool = True,
32
+ trace: bool = False,
33
+ raw: bool = False,
34
+ max_output_lines: int = None,
35
+ cwd: str | None = None,
36
+ ) -> str:
23
37
  """
24
- Executes a specific Stata command.
38
+ Executes Stata code.
39
+
40
+ This is the primary tool for interacting with Stata.
41
+
42
+ Stata output is written to a temporary log file on disk.
43
+ The server emits a single `notifications/logMessage` event containing the log file path
44
+ (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
45
+ If the client supplies a progress callback/token, progress updates may also be emitted
46
+ via `notifications/progress`.
25
47
 
26
- This is the primary tool for interacting with Stata. You can run any valid Stata syntax.
27
-
28
48
  Args:
29
- code: The detailed Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
49
+ code: The Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
50
+ ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
30
51
  echo: If True, the command itself is included in the output. Default is True.
31
52
  as_json: If True, returns a JSON envelope with rc/stdout/stderr/error.
32
53
  trace: If True, enables `set trace on` for deeper error diagnostics (automatically disabled after).
54
+ raw: If True, return raw output/error message rather than a JSON envelope.
55
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
56
+ Useful for verbose commands (regress, codebook, etc.).
57
+ Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
33
58
  """
34
- result = client.run_command_structured(code, echo=echo, trace=trace)
59
+ session = ctx.request_context.session if ctx is not None else None
60
+
61
+ async def notify_log(text: str) -> None:
62
+ if session is None:
63
+ return
64
+ await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
65
+
66
+ progress_token = None
67
+ if ctx is not None and ctx.request_context.meta is not None:
68
+ progress_token = ctx.request_context.meta.progressToken
69
+
70
+ async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
71
+ if session is None or progress_token is None:
72
+ return
73
+ await session.send_progress_notification(
74
+ progress_token=progress_token,
75
+ progress=progress,
76
+ total=total,
77
+ message=message,
78
+ related_request_id=ctx.request_id,
79
+ )
80
+
81
+ async def _noop_log(_text: str) -> None:
82
+ return
83
+
84
+ result = await client.run_command_streaming(
85
+ code,
86
+ notify_log=notify_log if session is not None else _noop_log,
87
+ notify_progress=notify_progress if progress_token is not None else None,
88
+ echo=echo,
89
+ trace=trace,
90
+ max_output_lines=max_output_lines,
91
+ cwd=cwd,
92
+ )
93
+
94
+ # Conservative invalidation: arbitrary Stata commands may change data.
95
+ ui_channel.notify_potential_dataset_change()
35
96
  if raw:
36
97
  if result.success:
37
- return result.stdout
98
+ return result.log_path or ""
38
99
  if result.error:
39
100
  msg = result.error.message
40
101
  if result.error.rc is not None:
41
102
  msg = f"{msg}\nrc={result.error.rc}"
42
103
  return msg
43
- return result.stdout
104
+ return result.log_path or ""
44
105
  if as_json:
45
- return result.model_dump_json(indent=2)
46
- # Default structured string for compatibility when as_json is False but raw is also False
47
- return result.model_dump_json(indent=2)
106
+ return result.model_dump_json()
107
+
108
+
109
+ @mcp.tool()
110
+ def read_log(path: str, offset: int = 0, max_bytes: int = 65536) -> str:
111
+ """Read a slice of a log file.
112
+
113
+ Intended for clients that want to display a terminal-like view without pushing MBs of
114
+ output through MCP log notifications.
115
+
116
+ Args:
117
+ path: Absolute path to the log file previously provided by the server.
118
+ offset: Byte offset to start reading from.
119
+ max_bytes: Maximum bytes to read.
120
+
121
+ Returns a compact JSON string: {"path":..., "offset":..., "next_offset":..., "data":...}
122
+ """
123
+ try:
124
+ if offset < 0:
125
+ offset = 0
126
+ with open(path, "rb") as f:
127
+ f.seek(offset)
128
+ data = f.read(max_bytes)
129
+ next_offset = f.tell()
130
+ text = data.decode("utf-8", errors="replace")
131
+ return json.dumps({"path": path, "offset": offset, "next_offset": next_offset, "data": text})
132
+ except FileNotFoundError:
133
+ return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": ""})
134
+ except Exception as e:
135
+ return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": f"ERROR: {e}"})
136
+
48
137
 
49
138
  @mcp.tool()
50
139
  def get_data(start: int = 0, count: int = 50) -> str:
@@ -59,7 +148,20 @@ def get_data(start: int = 0, count: int = 50) -> str:
59
148
  """
60
149
  data = client.get_data(start, count)
61
150
  resp = DataResponse(start=start, count=count, data=data)
62
- return resp.model_dump_json(indent=2)
151
+ return resp.model_dump_json()
152
+
153
+
154
+ @mcp.tool()
155
+ def get_ui_channel() -> str:
156
+ """Return localhost HTTP endpoint + bearer token for the extension UI data plane."""
157
+ info = ui_channel.get_channel()
158
+ payload = {
159
+ "baseUrl": info.base_url,
160
+ "token": info.token,
161
+ "expiresAt": info.expires_at,
162
+ "capabilities": ui_channel.capabilities(),
163
+ }
164
+ return json.dumps(payload)
63
165
 
64
166
  @mcp.tool()
65
167
  def describe() -> str:
@@ -68,7 +170,12 @@ def describe() -> str:
68
170
 
69
171
  Use this to understand the structure of the dataset, variable names, and their formats before running analysis.
70
172
  """
71
- return client.run_command("describe")
173
+ result = client.run_command_structured("describe", echo=True)
174
+ if result.success:
175
+ return result.stdout
176
+ if result.error:
177
+ return result.error.message
178
+ return ""
72
179
 
73
180
  @mcp.tool()
74
181
  def list_graphs() -> str:
@@ -80,7 +187,7 @@ def list_graphs() -> str:
80
187
  default.
81
188
  """
82
189
  graphs = client.list_graphs_structured()
83
- return graphs.model_dump_json(indent=2)
190
+ return graphs.model_dump_json()
84
191
 
85
192
  @mcp.tool()
86
193
  def export_graph(graph_name: str = None, format: str = "pdf") -> str:
@@ -115,42 +222,122 @@ def get_stored_results() -> str:
115
222
  """
116
223
  Returns the current stored results (r-class and e-class scalars/macros) as a JSON-formatted string.
117
224
 
118
- Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
225
+ Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
119
226
  specific values (e.g., means, coefficients, sample sizes) for validation or further calculation.
120
227
  """
121
228
  import json
122
- return json.dumps(client.get_stored_results(), indent=2)
229
+ return json.dumps(client.get_stored_results())
123
230
 
124
231
  @mcp.tool()
125
- def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False) -> str:
232
+ def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False, max_output_lines: int = None) -> str:
126
233
  """
127
234
  Loads data using sysuse/webuse/use heuristics based on the source string.
128
235
  Automatically appends , clear unless clear=False.
236
+
237
+ Args:
238
+ source: Dataset source (e.g., "auto", "auto.dta", "/path/to/file.dta").
239
+ clear: If True, clears data in memory before loading.
240
+ as_json: If True, returns JSON envelope.
241
+ raw: If True, returns raw output only.
242
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
129
243
  """
130
- result = client.load_data(source, clear=clear)
244
+ result = client.load_data(source, clear=clear, max_output_lines=max_output_lines)
245
+ ui_channel.notify_potential_dataset_change()
131
246
  if raw:
132
247
  return result.stdout if result.success else (result.error.message if result.error else result.stdout)
133
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
248
+ return result.model_dump_json()
134
249
 
135
250
  @mcp.tool()
136
- def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
251
+ def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False, max_output_lines: int = None) -> str:
137
252
  """
138
253
  Returns codebook/summary for a specific variable.
254
+
255
+ Args:
256
+ variable: The variable name to analyze.
257
+ as_json: If True, returns JSON envelope.
258
+ trace: If True, enables trace mode.
259
+ raw: If True, returns raw output only.
260
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
139
261
  """
140
- result = client.codebook(variable, trace=trace)
262
+ result = client.codebook(variable, trace=trace, max_output_lines=max_output_lines)
141
263
  if raw:
142
264
  return result.stdout if result.success else (result.error.message if result.error else result.stdout)
143
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
265
+ return result.model_dump_json()
144
266
 
145
267
  @mcp.tool()
146
- def run_do_file(path: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
268
+ async def run_do_file(
269
+ path: str,
270
+ ctx: Context | None = None,
271
+ echo: bool = True,
272
+ as_json: bool = True,
273
+ trace: bool = False,
274
+ raw: bool = False,
275
+ max_output_lines: int = None,
276
+ cwd: str | None = None,
277
+ ) -> str:
147
278
  """
148
- Executes a .do file with optional trace output and JSON envelope.
279
+ Executes a .do file.
280
+
281
+ Stata output is written to a temporary log file on disk.
282
+ The server emits a single `notifications/logMessage` event containing the log file path
283
+ (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
284
+ If the client supplies a progress callback/token, progress updates are emitted via
285
+ `notifications/progress`.
286
+
287
+ Args:
288
+ path: Path to the .do file to execute.
289
+ ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
290
+ echo: If True, includes command in output.
291
+ as_json: If True, returns JSON envelope.
292
+ trace: If True, enables trace mode.
293
+ raw: If True, returns raw output only.
294
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
295
+ Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
149
296
  """
150
- result = client.run_do_file(path, echo=echo, trace=trace)
297
+ session = ctx.request_context.session if ctx is not None else None
298
+
299
+ async def notify_log(text: str) -> None:
300
+ if session is None:
301
+ return
302
+ await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
303
+
304
+ progress_token = None
305
+ if ctx is not None and ctx.request_context.meta is not None:
306
+ progress_token = ctx.request_context.meta.progressToken
307
+
308
+ async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
309
+ if session is None or progress_token is None:
310
+ return
311
+ await session.send_progress_notification(
312
+ progress_token=progress_token,
313
+ progress=progress,
314
+ total=total,
315
+ message=message,
316
+ related_request_id=ctx.request_id,
317
+ )
318
+
319
+ async def _noop_log(_text: str) -> None:
320
+ return
321
+
322
+ result = await client.run_do_file_streaming(
323
+ path,
324
+ notify_log=notify_log if session is not None else _noop_log,
325
+ notify_progress=notify_progress if progress_token is not None else None,
326
+ echo=echo,
327
+ trace=trace,
328
+ max_output_lines=max_output_lines,
329
+ cwd=cwd,
330
+ )
331
+
332
+ ui_channel.notify_potential_dataset_change()
333
+
151
334
  if raw:
152
- return result.stdout if result.success else (result.error.message if result.error else result.stdout)
153
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
335
+ if result.success:
336
+ return result.log_path or ""
337
+ if result.error:
338
+ return result.error.message
339
+ return result.log_path or ""
340
+ return result.model_dump_json()
154
341
 
155
342
  @mcp.resource("stata://data/summary")
156
343
  def get_summary() -> str:
@@ -158,7 +345,12 @@ def get_summary() -> str:
158
345
  Returns the output of the `summarize` command for the dataset in memory.
159
346
  Provides descriptive statistics (obs, mean, std. dev, min, max) for all variables.
160
347
  """
161
- return client.run_command("summarize")
348
+ result = client.run_command_structured("summarize", echo=True)
349
+ if result.success:
350
+ return result.stdout
351
+ if result.error:
352
+ return result.error.message
353
+ return ""
162
354
 
163
355
  @mcp.resource("stata://data/metadata")
164
356
  def get_metadata() -> str:
@@ -166,18 +358,23 @@ def get_metadata() -> str:
166
358
  Returns the output of the `describe` command.
167
359
  Provides metadata about the dataset, including variable names, storage types, display formats, and labels.
168
360
  """
169
- return client.run_command("describe")
361
+ result = client.run_command_structured("describe", echo=True)
362
+ if result.success:
363
+ return result.stdout
364
+ if result.error:
365
+ return result.error.message
366
+ return ""
170
367
 
171
368
  @mcp.resource("stata://graphs/list")
172
- def get_graph_list() -> str:
173
- """Returns list of active graphs."""
174
- return client.list_graphs_structured().model_dump_json(indent=2)
369
+ def list_graphs_resource() -> str:
370
+ """Resource wrapper for the graph list (uses tool list_graphs)."""
371
+ return list_graphs()
175
372
 
176
373
  @mcp.tool()
177
374
  def get_variable_list() -> str:
178
375
  """Returns JSON list of all variables."""
179
376
  variables = client.list_variables_structured()
180
- return variables.model_dump_json(indent=2)
377
+ return variables.model_dump_json()
181
378
 
182
379
  @mcp.resource("stata://variables/list")
183
380
  def get_variable_list_resource() -> str:
@@ -188,20 +385,37 @@ def get_variable_list_resource() -> str:
188
385
  def get_stored_results_resource() -> str:
189
386
  """Returns stored r() and e() results."""
190
387
  import json
191
- return json.dumps(client.get_stored_results(), indent=2)
388
+ return json.dumps(client.get_stored_results())
192
389
 
193
390
  @mcp.tool()
194
- def export_graphs_all() -> str:
391
+ def export_graphs_all(use_base64: bool = False) -> str:
195
392
  """
196
- Exports all graphs in memory to base64-encoded PNGs.
197
- Returns a JSON envelope listing graph names and images so the agent can open
198
- them directly and verify visuals (titles/labels/colors/legends) against the
199
- user's requested spec without an extra fetch.
393
+ Exports all graphs in memory to file paths (default) or base64-encoded SVGs.
394
+
395
+ Args:
396
+ use_base64: If True, returns base64-encoded images (token-intensive).
397
+ If False (default), returns file paths to SVG files (token-efficient).
398
+ Use file paths unless you need to embed images directly.
399
+
400
+ Returns a JSON envelope listing graph names and either file paths or base64 images.
401
+ The agent can open SVG files directly to verify visuals (titles/labels/colors/legends).
200
402
  """
201
- exports = client.export_graphs_all()
202
- return exports.model_dump_json(indent=2)
403
+ exports = client.export_graphs_all(use_base64=use_base64)
404
+ return exports.model_dump_json(exclude_none=False)
203
405
 
204
406
  def main():
407
+ # On Windows, Stata automation relies on COM, which is sensitive to threading models.
408
+ # The FastMCP server executes tool calls in a thread pool. If Stata is initialized
409
+ # lazily inside a worker thread, it may fail or hang due to COM/UI limitations.
410
+ # We explicitly initialize Stata here on the main thread to ensure the COM server
411
+ # is properly registered and accessible.
412
+ if os.name == "nt":
413
+ try:
414
+ client.init()
415
+ except Exception as e:
416
+ # Log error but let the server start; specific tools will fail gracefully later
417
+ logging.error(f"Stata initialization failed: {e}")
418
+
205
419
  mcp.run()
206
420
 
207
421
  if __name__ == "__main__":