mcp-stata 1.0.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/server.py CHANGED
@@ -1,5 +1,5 @@
1
- from mcp.server.fastmcp import FastMCP
2
- from mcp.server.fastmcp import Image
1
+ import anyio
2
+ from mcp.server.fastmcp import Context, FastMCP
3
3
  import mcp.types as types
4
4
  from .stata_client import StataClient
5
5
  from .models import (
@@ -12,40 +12,128 @@ import logging
12
12
  import json
13
13
  import os
14
14
 
15
- LOG_LEVEL = os.getenv("STATA_MCP_LOGLEVEL", "INFO").upper()
15
+ from .ui_http import UIChannelManager
16
+
17
+
18
+ LOG_LEVEL = os.getenv("MCP_STATA_LOGLEVEL", "INFO").upper()
16
19
  logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)s %(name)s - %(message)s")
17
20
 
18
21
  # Initialize FastMCP
19
- mcp = FastMCP("stata")
22
+ mcp = FastMCP("mcp_stata")
20
23
  client = StataClient()
24
+ ui_channel = UIChannelManager(client)
21
25
 
22
26
  @mcp.tool()
23
- def run_command(code: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
27
+ async def run_command(
28
+ code: str,
29
+ ctx: Context | None = None,
30
+ echo: bool = True,
31
+ as_json: bool = True,
32
+ trace: bool = False,
33
+ raw: bool = False,
34
+ max_output_lines: int = None,
35
+ cwd: str | None = None,
36
+ ) -> str:
24
37
  """
25
- Executes a specific Stata command.
38
+ Executes Stata code.
39
+
40
+ This is the primary tool for interacting with Stata.
41
+
42
+ Stata output is written to a temporary log file on disk.
43
+ The server emits a single `notifications/logMessage` event containing the log file path
44
+ (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
45
+ If the client supplies a progress callback/token, progress updates may also be emitted
46
+ via `notifications/progress`.
26
47
 
27
- This is the primary tool for interacting with Stata. You can run any valid Stata syntax.
28
-
29
48
  Args:
30
- code: The detailed Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
49
+ code: The Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
50
+ ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
31
51
  echo: If True, the command itself is included in the output. Default is True.
32
52
  as_json: If True, returns a JSON envelope with rc/stdout/stderr/error.
33
53
  trace: If True, enables `set trace on` for deeper error diagnostics (automatically disabled after).
54
+ raw: If True, return raw output/error message rather than a JSON envelope.
55
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
56
+ Useful for verbose commands (regress, codebook, etc.).
57
+ Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
34
58
  """
35
- result = client.run_command_structured(code, echo=echo, trace=trace)
59
+ session = ctx.request_context.session if ctx is not None else None
60
+
61
+ async def notify_log(text: str) -> None:
62
+ if session is None:
63
+ return
64
+ await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
65
+
66
+ progress_token = None
67
+ if ctx is not None and ctx.request_context.meta is not None:
68
+ progress_token = ctx.request_context.meta.progressToken
69
+
70
+ async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
71
+ if session is None or progress_token is None:
72
+ return
73
+ await session.send_progress_notification(
74
+ progress_token=progress_token,
75
+ progress=progress,
76
+ total=total,
77
+ message=message,
78
+ related_request_id=ctx.request_id,
79
+ )
80
+
81
+ async def _noop_log(_text: str) -> None:
82
+ return
83
+
84
+ result = await client.run_command_streaming(
85
+ code,
86
+ notify_log=notify_log if session is not None else _noop_log,
87
+ notify_progress=notify_progress if progress_token is not None else None,
88
+ echo=echo,
89
+ trace=trace,
90
+ max_output_lines=max_output_lines,
91
+ cwd=cwd,
92
+ )
93
+
94
+ # Conservative invalidation: arbitrary Stata commands may change data.
95
+ ui_channel.notify_potential_dataset_change()
36
96
  if raw:
37
97
  if result.success:
38
- return result.stdout
98
+ return result.log_path or ""
39
99
  if result.error:
40
100
  msg = result.error.message
41
101
  if result.error.rc is not None:
42
102
  msg = f"{msg}\nrc={result.error.rc}"
43
103
  return msg
44
- return result.stdout
104
+ return result.log_path or ""
45
105
  if as_json:
46
- return result.model_dump_json(indent=2)
47
- # Default structured string for compatibility when as_json is False but raw is also False
48
- return result.model_dump_json(indent=2)
106
+ return result.model_dump_json()
107
+
108
+
109
+ @mcp.tool()
110
+ def read_log(path: str, offset: int = 0, max_bytes: int = 65536) -> str:
111
+ """Read a slice of a log file.
112
+
113
+ Intended for clients that want to display a terminal-like view without pushing MBs of
114
+ output through MCP log notifications.
115
+
116
+ Args:
117
+ path: Absolute path to the log file previously provided by the server.
118
+ offset: Byte offset to start reading from.
119
+ max_bytes: Maximum bytes to read.
120
+
121
+ Returns a compact JSON string: {"path":..., "offset":..., "next_offset":..., "data":...}
122
+ """
123
+ try:
124
+ if offset < 0:
125
+ offset = 0
126
+ with open(path, "rb") as f:
127
+ f.seek(offset)
128
+ data = f.read(max_bytes)
129
+ next_offset = f.tell()
130
+ text = data.decode("utf-8", errors="replace")
131
+ return json.dumps({"path": path, "offset": offset, "next_offset": next_offset, "data": text})
132
+ except FileNotFoundError:
133
+ return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": ""})
134
+ except Exception as e:
135
+ return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": f"ERROR: {e}"})
136
+
49
137
 
50
138
  @mcp.tool()
51
139
  def get_data(start: int = 0, count: int = 50) -> str:
@@ -60,7 +148,20 @@ def get_data(start: int = 0, count: int = 50) -> str:
60
148
  """
61
149
  data = client.get_data(start, count)
62
150
  resp = DataResponse(start=start, count=count, data=data)
63
- return resp.model_dump_json(indent=2)
151
+ return resp.model_dump_json()
152
+
153
+
154
+ @mcp.tool()
155
+ def get_ui_channel() -> str:
156
+ """Return localhost HTTP endpoint + bearer token for the extension UI data plane."""
157
+ info = ui_channel.get_channel()
158
+ payload = {
159
+ "baseUrl": info.base_url,
160
+ "token": info.token,
161
+ "expiresAt": info.expires_at,
162
+ "capabilities": ui_channel.capabilities(),
163
+ }
164
+ return json.dumps(payload)
64
165
 
65
166
  @mcp.tool()
66
167
  def describe() -> str:
@@ -69,35 +170,40 @@ def describe() -> str:
69
170
 
70
171
  Use this to understand the structure of the dataset, variable names, and their formats before running analysis.
71
172
  """
72
- return client.run_command("describe")
173
+ result = client.run_command_structured("describe", echo=True)
174
+ if result.success:
175
+ return result.stdout
176
+ if result.error:
177
+ return result.error.message
178
+ return ""
73
179
 
74
180
  @mcp.tool()
75
181
  def list_graphs() -> str:
76
182
  """
77
183
  Lists the names of all graphs currently stored in Stata's memory.
78
184
 
79
- Use this to see which graphs are available for export via `export_graph`.
185
+ Use this to see which graphs are available for export via `export_graph`. The
186
+ response marks the active graph so the agent knows which one will export by
187
+ default.
80
188
  """
81
189
  graphs = client.list_graphs_structured()
82
- return graphs.model_dump_json(indent=2)
190
+ return graphs.model_dump_json()
83
191
 
84
192
  @mcp.tool()
85
- def export_graph(graph_name: str = None) -> Image:
193
+ def export_graph(graph_name: str = None, format: str = "pdf") -> str:
86
194
  """
87
- Exports a stored Stata graph to an image format (PNG) and returns it.
88
-
195
+ Exports a stored Stata graph to a file and returns its path.
196
+
89
197
  Args:
90
198
  graph_name: The name of the graph to export (as seen in `list_graphs`).
91
199
  If None, exports the currently active graph.
200
+ format: Output format, defaults to "pdf". Supported: "pdf", "png". Use
201
+ "png" to view the plot directly so the agent can visually check
202
+ titles, labels, legends, colors, and other user requirements.
92
203
  """
93
204
  try:
94
- path = client.export_graph(graph_name)
95
- with open(path, "rb") as f:
96
- data = f.read()
97
- return Image(data=data, format="png")
205
+ return client.export_graph(graph_name, format=format)
98
206
  except Exception as e:
99
- # Return error as text if image fails?
100
- # FastMCP expects Image or error.
101
207
  raise RuntimeError(f"Failed to export graph: {e}")
102
208
 
103
209
  @mcp.tool()
@@ -116,42 +222,122 @@ def get_stored_results() -> str:
116
222
  """
117
223
  Returns the current stored results (r-class and e-class scalars/macros) as a JSON-formatted string.
118
224
 
119
- Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
225
+ Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
120
226
  specific values (e.g., means, coefficients, sample sizes) for validation or further calculation.
121
227
  """
122
228
  import json
123
- return json.dumps(client.get_stored_results(), indent=2)
229
+ return json.dumps(client.get_stored_results())
124
230
 
125
231
  @mcp.tool()
126
- def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False) -> str:
232
+ def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False, max_output_lines: int = None) -> str:
127
233
  """
128
234
  Loads data using sysuse/webuse/use heuristics based on the source string.
129
235
  Automatically appends , clear unless clear=False.
236
+
237
+ Args:
238
+ source: Dataset source (e.g., "auto", "auto.dta", "/path/to/file.dta").
239
+ clear: If True, clears data in memory before loading.
240
+ as_json: If True, returns JSON envelope.
241
+ raw: If True, returns raw output only.
242
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
130
243
  """
131
- result = client.load_data(source, clear=clear)
244
+ result = client.load_data(source, clear=clear, max_output_lines=max_output_lines)
245
+ ui_channel.notify_potential_dataset_change()
132
246
  if raw:
133
247
  return result.stdout if result.success else (result.error.message if result.error else result.stdout)
134
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
248
+ return result.model_dump_json()
135
249
 
136
250
  @mcp.tool()
137
- def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
251
+ def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False, max_output_lines: int = None) -> str:
138
252
  """
139
253
  Returns codebook/summary for a specific variable.
254
+
255
+ Args:
256
+ variable: The variable name to analyze.
257
+ as_json: If True, returns JSON envelope.
258
+ trace: If True, enables trace mode.
259
+ raw: If True, returns raw output only.
260
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
140
261
  """
141
- result = client.codebook(variable, trace=trace)
262
+ result = client.codebook(variable, trace=trace, max_output_lines=max_output_lines)
142
263
  if raw:
143
264
  return result.stdout if result.success else (result.error.message if result.error else result.stdout)
144
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
265
+ return result.model_dump_json()
145
266
 
146
267
  @mcp.tool()
147
- def run_do_file(path: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
268
+ async def run_do_file(
269
+ path: str,
270
+ ctx: Context | None = None,
271
+ echo: bool = True,
272
+ as_json: bool = True,
273
+ trace: bool = False,
274
+ raw: bool = False,
275
+ max_output_lines: int = None,
276
+ cwd: str | None = None,
277
+ ) -> str:
148
278
  """
149
- Executes a .do file with optional trace output and JSON envelope.
279
+ Executes a .do file.
280
+
281
+ Stata output is written to a temporary log file on disk.
282
+ The server emits a single `notifications/logMessage` event containing the log file path
283
+ (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
284
+ If the client supplies a progress callback/token, progress updates are emitted via
285
+ `notifications/progress`.
286
+
287
+ Args:
288
+ path: Path to the .do file to execute.
289
+ ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
290
+ echo: If True, includes command in output.
291
+ as_json: If True, returns JSON envelope.
292
+ trace: If True, enables trace mode.
293
+ raw: If True, returns raw output only.
294
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
295
+ Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
150
296
  """
151
- result = client.run_do_file(path, echo=echo, trace=trace)
297
+ session = ctx.request_context.session if ctx is not None else None
298
+
299
+ async def notify_log(text: str) -> None:
300
+ if session is None:
301
+ return
302
+ await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
303
+
304
+ progress_token = None
305
+ if ctx is not None and ctx.request_context.meta is not None:
306
+ progress_token = ctx.request_context.meta.progressToken
307
+
308
+ async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
309
+ if session is None or progress_token is None:
310
+ return
311
+ await session.send_progress_notification(
312
+ progress_token=progress_token,
313
+ progress=progress,
314
+ total=total,
315
+ message=message,
316
+ related_request_id=ctx.request_id,
317
+ )
318
+
319
+ async def _noop_log(_text: str) -> None:
320
+ return
321
+
322
+ result = await client.run_do_file_streaming(
323
+ path,
324
+ notify_log=notify_log if session is not None else _noop_log,
325
+ notify_progress=notify_progress if progress_token is not None else None,
326
+ echo=echo,
327
+ trace=trace,
328
+ max_output_lines=max_output_lines,
329
+ cwd=cwd,
330
+ )
331
+
332
+ ui_channel.notify_potential_dataset_change()
333
+
152
334
  if raw:
153
- return result.stdout if result.success else (result.error.message if result.error else result.stdout)
154
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
335
+ if result.success:
336
+ return result.log_path or ""
337
+ if result.error:
338
+ return result.error.message
339
+ return result.log_path or ""
340
+ return result.model_dump_json()
155
341
 
156
342
  @mcp.resource("stata://data/summary")
157
343
  def get_summary() -> str:
@@ -159,7 +345,12 @@ def get_summary() -> str:
159
345
  Returns the output of the `summarize` command for the dataset in memory.
160
346
  Provides descriptive statistics (obs, mean, std. dev, min, max) for all variables.
161
347
  """
162
- return client.run_command("summarize")
348
+ result = client.run_command_structured("summarize", echo=True)
349
+ if result.success:
350
+ return result.stdout
351
+ if result.error:
352
+ return result.error.message
353
+ return ""
163
354
 
164
355
  @mcp.resource("stata://data/metadata")
165
356
  def get_metadata() -> str:
@@ -167,35 +358,64 @@ def get_metadata() -> str:
167
358
  Returns the output of the `describe` command.
168
359
  Provides metadata about the dataset, including variable names, storage types, display formats, and labels.
169
360
  """
170
- return client.run_command("describe")
361
+ result = client.run_command_structured("describe", echo=True)
362
+ if result.success:
363
+ return result.stdout
364
+ if result.error:
365
+ return result.error.message
366
+ return ""
171
367
 
172
368
  @mcp.resource("stata://graphs/list")
173
- def get_graph_list() -> str:
174
- """Returns list of active graphs."""
175
- return client.list_graphs_structured().model_dump_json(indent=2)
369
+ def list_graphs_resource() -> str:
370
+ """Resource wrapper for the graph list (uses tool list_graphs)."""
371
+ return list_graphs()
176
372
 
177
- @mcp.resource("stata://variables/list")
373
+ @mcp.tool()
178
374
  def get_variable_list() -> str:
179
375
  """Returns JSON list of all variables."""
180
376
  variables = client.list_variables_structured()
181
- return variables.model_dump_json(indent=2)
377
+ return variables.model_dump_json()
378
+
379
+ @mcp.resource("stata://variables/list")
380
+ def get_variable_list_resource() -> str:
381
+ """Resource wrapper for the variable list."""
382
+ return get_variable_list()
182
383
 
183
384
  @mcp.resource("stata://results/stored")
184
385
  def get_stored_results_resource() -> str:
185
386
  """Returns stored r() and e() results."""
186
387
  import json
187
- return json.dumps(client.get_stored_results(), indent=2)
388
+ return json.dumps(client.get_stored_results())
188
389
 
189
390
  @mcp.tool()
190
- def export_graphs_all() -> str:
391
+ def export_graphs_all(use_base64: bool = False) -> str:
191
392
  """
192
- Exports all graphs in memory to base64-encoded PNGs.
193
- Returns a JSON envelope listing graph names and images.
393
+ Exports all graphs in memory to file paths (default) or base64-encoded SVGs.
394
+
395
+ Args:
396
+ use_base64: If True, returns base64-encoded images (token-intensive).
397
+ If False (default), returns file paths to SVG files (token-efficient).
398
+ Use file paths unless you need to embed images directly.
399
+
400
+ Returns a JSON envelope listing graph names and either file paths or base64 images.
401
+ The agent can open SVG files directly to verify visuals (titles/labels/colors/legends).
194
402
  """
195
- exports = client.export_graphs_all()
196
- return exports.model_dump_json(indent=2)
403
+ exports = client.export_graphs_all(use_base64=use_base64)
404
+ return exports.model_dump_json(exclude_none=False)
197
405
 
198
406
  def main():
407
+ # On Windows, Stata automation relies on COM, which is sensitive to threading models.
408
+ # The FastMCP server executes tool calls in a thread pool. If Stata is initialized
409
+ # lazily inside a worker thread, it may fail or hang due to COM/UI limitations.
410
+ # We explicitly initialize Stata here on the main thread to ensure the COM server
411
+ # is properly registered and accessible.
412
+ if os.name == "nt":
413
+ try:
414
+ client.init()
415
+ except Exception as e:
416
+ # Log error but let the server start; specific tools will fail gracefully later
417
+ logging.error(f"Stata initialization failed: {e}")
418
+
199
419
  mcp.run()
200
420
 
201
421
  if __name__ == "__main__":