mcp-stata 1.2.2__py3-none-any.whl → 1.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-stata might be problematic. Click here for more details.

mcp_stata/server.py CHANGED
@@ -1,4 +1,6 @@
1
- from mcp.server.fastmcp import FastMCP
1
+ import anyio
2
+ from importlib.metadata import PackageNotFoundError, version
3
+ from mcp.server.fastmcp import Context, FastMCP
2
4
  import mcp.types as types
3
5
  from .stata_client import StataClient
4
6
  from .models import (
@@ -11,40 +13,134 @@ import logging
11
13
  import json
12
14
  import os
13
15
 
14
- LOG_LEVEL = os.getenv("STATA_MCP_LOGLEVEL", "INFO").upper()
16
+ from .ui_http import UIChannelManager
17
+
18
+
19
+ LOG_LEVEL = os.getenv("MCP_STATA_LOGLEVEL", "INFO").upper()
15
20
  logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)s %(name)s - %(message)s")
21
+ try:
22
+ _mcp_stata_version = version("mcp-stata")
23
+ except PackageNotFoundError:
24
+ _mcp_stata_version = "unknown"
25
+ logging.info("mcp-stata version: %s", _mcp_stata_version)
26
+ logging.info("STATA_PATH env at startup: %s", os.getenv("STATA_PATH", "<not set>"))
16
27
 
17
28
  # Initialize FastMCP
18
- mcp = FastMCP("stata")
29
+ mcp = FastMCP("mcp_stata")
19
30
  client = StataClient()
31
+ ui_channel = UIChannelManager(client)
20
32
 
21
33
  @mcp.tool()
22
- def run_command(code: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
34
+ async def run_command(
35
+ code: str,
36
+ ctx: Context | None = None,
37
+ echo: bool = True,
38
+ as_json: bool = True,
39
+ trace: bool = False,
40
+ raw: bool = False,
41
+ max_output_lines: int = None,
42
+ cwd: str | None = None,
43
+ ) -> str:
23
44
  """
24
- Executes a specific Stata command.
45
+ Executes Stata code.
46
+
47
+ This is the primary tool for interacting with Stata.
48
+
49
+ Stata output is written to a temporary log file on disk.
50
+ The server emits a single `notifications/logMessage` event containing the log file path
51
+ (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
52
+ If the client supplies a progress callback/token, progress updates may also be emitted
53
+ via `notifications/progress`.
25
54
 
26
- This is the primary tool for interacting with Stata. You can run any valid Stata syntax.
27
-
28
55
  Args:
29
- code: The detailed Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
56
+ code: The Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
57
+ ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
30
58
  echo: If True, the command itself is included in the output. Default is True.
31
59
  as_json: If True, returns a JSON envelope with rc/stdout/stderr/error.
32
60
  trace: If True, enables `set trace on` for deeper error diagnostics (automatically disabled after).
61
+ raw: If True, return raw output/error message rather than a JSON envelope.
62
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
63
+ Useful for verbose commands (regress, codebook, etc.).
64
+ Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
33
65
  """
34
- result = client.run_command_structured(code, echo=echo, trace=trace)
66
+ session = ctx.request_context.session if ctx is not None else None
67
+
68
+ async def notify_log(text: str) -> None:
69
+ if session is None:
70
+ return
71
+ await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
72
+
73
+ progress_token = None
74
+ if ctx is not None and ctx.request_context.meta is not None:
75
+ progress_token = ctx.request_context.meta.progressToken
76
+
77
+ async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
78
+ if session is None or progress_token is None:
79
+ return
80
+ await session.send_progress_notification(
81
+ progress_token=progress_token,
82
+ progress=progress,
83
+ total=total,
84
+ message=message,
85
+ related_request_id=ctx.request_id,
86
+ )
87
+
88
+ async def _noop_log(_text: str) -> None:
89
+ return
90
+
91
+ result = await client.run_command_streaming(
92
+ code,
93
+ notify_log=notify_log if session is not None else _noop_log,
94
+ notify_progress=notify_progress if progress_token is not None else None,
95
+ echo=echo,
96
+ trace=trace,
97
+ max_output_lines=max_output_lines,
98
+ cwd=cwd,
99
+ )
100
+
101
+ # Conservative invalidation: arbitrary Stata commands may change data.
102
+ ui_channel.notify_potential_dataset_change()
35
103
  if raw:
36
104
  if result.success:
37
- return result.stdout
105
+ return result.log_path or ""
38
106
  if result.error:
39
107
  msg = result.error.message
40
108
  if result.error.rc is not None:
41
109
  msg = f"{msg}\nrc={result.error.rc}"
42
110
  return msg
43
- return result.stdout
111
+ return result.log_path or ""
44
112
  if as_json:
45
- return result.model_dump_json(indent=2)
46
- # Default structured string for compatibility when as_json is False but raw is also False
47
- return result.model_dump_json(indent=2)
113
+ return result.model_dump_json()
114
+
115
+
116
+ @mcp.tool()
117
+ def read_log(path: str, offset: int = 0, max_bytes: int = 65536) -> str:
118
+ """Read a slice of a log file.
119
+
120
+ Intended for clients that want to display a terminal-like view without pushing MBs of
121
+ output through MCP log notifications.
122
+
123
+ Args:
124
+ path: Absolute path to the log file previously provided by the server.
125
+ offset: Byte offset to start reading from.
126
+ max_bytes: Maximum bytes to read.
127
+
128
+ Returns a compact JSON string: {"path":..., "offset":..., "next_offset":..., "data":...}
129
+ """
130
+ try:
131
+ if offset < 0:
132
+ offset = 0
133
+ with open(path, "rb") as f:
134
+ f.seek(offset)
135
+ data = f.read(max_bytes)
136
+ next_offset = f.tell()
137
+ text = data.decode("utf-8", errors="replace")
138
+ return json.dumps({"path": path, "offset": offset, "next_offset": next_offset, "data": text})
139
+ except FileNotFoundError:
140
+ return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": ""})
141
+ except Exception as e:
142
+ return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": f"ERROR: {e}"})
143
+
48
144
 
49
145
  @mcp.tool()
50
146
  def get_data(start: int = 0, count: int = 50) -> str:
@@ -59,7 +155,20 @@ def get_data(start: int = 0, count: int = 50) -> str:
59
155
  """
60
156
  data = client.get_data(start, count)
61
157
  resp = DataResponse(start=start, count=count, data=data)
62
- return resp.model_dump_json(indent=2)
158
+ return resp.model_dump_json()
159
+
160
+
161
+ @mcp.tool()
162
+ def get_ui_channel() -> str:
163
+ """Return localhost HTTP endpoint + bearer token for the extension UI data plane."""
164
+ info = ui_channel.get_channel()
165
+ payload = {
166
+ "baseUrl": info.base_url,
167
+ "token": info.token,
168
+ "expiresAt": info.expires_at,
169
+ "capabilities": ui_channel.capabilities(),
170
+ }
171
+ return json.dumps(payload)
63
172
 
64
173
  @mcp.tool()
65
174
  def describe() -> str:
@@ -68,7 +177,12 @@ def describe() -> str:
68
177
 
69
178
  Use this to understand the structure of the dataset, variable names, and their formats before running analysis.
70
179
  """
71
- return client.run_command("describe")
180
+ result = client.run_command_structured("describe", echo=True)
181
+ if result.success:
182
+ return result.stdout
183
+ if result.error:
184
+ return result.error.message
185
+ return ""
72
186
 
73
187
  @mcp.tool()
74
188
  def list_graphs() -> str:
@@ -80,7 +194,7 @@ def list_graphs() -> str:
80
194
  default.
81
195
  """
82
196
  graphs = client.list_graphs_structured()
83
- return graphs.model_dump_json(indent=2)
197
+ return graphs.model_dump_json()
84
198
 
85
199
  @mcp.tool()
86
200
  def export_graph(graph_name: str = None, format: str = "pdf") -> str:
@@ -115,42 +229,122 @@ def get_stored_results() -> str:
115
229
  """
116
230
  Returns the current stored results (r-class and e-class scalars/macros) as a JSON-formatted string.
117
231
 
118
- Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
232
+ Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
119
233
  specific values (e.g., means, coefficients, sample sizes) for validation or further calculation.
120
234
  """
121
235
  import json
122
- return json.dumps(client.get_stored_results(), indent=2)
236
+ return json.dumps(client.get_stored_results())
123
237
 
124
238
  @mcp.tool()
125
- def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False) -> str:
239
+ def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False, max_output_lines: int = None) -> str:
126
240
  """
127
241
  Loads data using sysuse/webuse/use heuristics based on the source string.
128
242
  Automatically appends , clear unless clear=False.
243
+
244
+ Args:
245
+ source: Dataset source (e.g., "auto", "auto.dta", "/path/to/file.dta").
246
+ clear: If True, clears data in memory before loading.
247
+ as_json: If True, returns JSON envelope.
248
+ raw: If True, returns raw output only.
249
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
129
250
  """
130
- result = client.load_data(source, clear=clear)
251
+ result = client.load_data(source, clear=clear, max_output_lines=max_output_lines)
252
+ ui_channel.notify_potential_dataset_change()
131
253
  if raw:
132
254
  return result.stdout if result.success else (result.error.message if result.error else result.stdout)
133
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
255
+ return result.model_dump_json()
134
256
 
135
257
  @mcp.tool()
136
- def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
258
+ def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False, max_output_lines: int = None) -> str:
137
259
  """
138
260
  Returns codebook/summary for a specific variable.
261
+
262
+ Args:
263
+ variable: The variable name to analyze.
264
+ as_json: If True, returns JSON envelope.
265
+ trace: If True, enables trace mode.
266
+ raw: If True, returns raw output only.
267
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
139
268
  """
140
- result = client.codebook(variable, trace=trace)
269
+ result = client.codebook(variable, trace=trace, max_output_lines=max_output_lines)
141
270
  if raw:
142
271
  return result.stdout if result.success else (result.error.message if result.error else result.stdout)
143
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
272
+ return result.model_dump_json()
144
273
 
145
274
  @mcp.tool()
146
- def run_do_file(path: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
275
+ async def run_do_file(
276
+ path: str,
277
+ ctx: Context | None = None,
278
+ echo: bool = True,
279
+ as_json: bool = True,
280
+ trace: bool = False,
281
+ raw: bool = False,
282
+ max_output_lines: int = None,
283
+ cwd: str | None = None,
284
+ ) -> str:
147
285
  """
148
- Executes a .do file with optional trace output and JSON envelope.
286
+ Executes a .do file.
287
+
288
+ Stata output is written to a temporary log file on disk.
289
+ The server emits a single `notifications/logMessage` event containing the log file path
290
+ (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
291
+ If the client supplies a progress callback/token, progress updates are emitted via
292
+ `notifications/progress`.
293
+
294
+ Args:
295
+ path: Path to the .do file to execute.
296
+ ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
297
+ echo: If True, includes command in output.
298
+ as_json: If True, returns JSON envelope.
299
+ trace: If True, enables trace mode.
300
+ raw: If True, returns raw output only.
301
+ max_output_lines: If set, truncates stdout to this many lines for token efficiency.
302
+ Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
149
303
  """
150
- result = client.run_do_file(path, echo=echo, trace=trace)
304
+ session = ctx.request_context.session if ctx is not None else None
305
+
306
+ async def notify_log(text: str) -> None:
307
+ if session is None:
308
+ return
309
+ await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
310
+
311
+ progress_token = None
312
+ if ctx is not None and ctx.request_context.meta is not None:
313
+ progress_token = ctx.request_context.meta.progressToken
314
+
315
+ async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
316
+ if session is None or progress_token is None:
317
+ return
318
+ await session.send_progress_notification(
319
+ progress_token=progress_token,
320
+ progress=progress,
321
+ total=total,
322
+ message=message,
323
+ related_request_id=ctx.request_id,
324
+ )
325
+
326
+ async def _noop_log(_text: str) -> None:
327
+ return
328
+
329
+ result = await client.run_do_file_streaming(
330
+ path,
331
+ notify_log=notify_log if session is not None else _noop_log,
332
+ notify_progress=notify_progress if progress_token is not None else None,
333
+ echo=echo,
334
+ trace=trace,
335
+ max_output_lines=max_output_lines,
336
+ cwd=cwd,
337
+ )
338
+
339
+ ui_channel.notify_potential_dataset_change()
340
+
151
341
  if raw:
152
- return result.stdout if result.success else (result.error.message if result.error else result.stdout)
153
- return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
342
+ if result.success:
343
+ return result.log_path or ""
344
+ if result.error:
345
+ return result.error.message
346
+ return result.log_path or ""
347
+ return result.model_dump_json()
154
348
 
155
349
  @mcp.resource("stata://data/summary")
156
350
  def get_summary() -> str:
@@ -158,7 +352,12 @@ def get_summary() -> str:
158
352
  Returns the output of the `summarize` command for the dataset in memory.
159
353
  Provides descriptive statistics (obs, mean, std. dev, min, max) for all variables.
160
354
  """
161
- return client.run_command("summarize")
355
+ result = client.run_command_structured("summarize", echo=True)
356
+ if result.success:
357
+ return result.stdout
358
+ if result.error:
359
+ return result.error.message
360
+ return ""
162
361
 
163
362
  @mcp.resource("stata://data/metadata")
164
363
  def get_metadata() -> str:
@@ -166,18 +365,23 @@ def get_metadata() -> str:
166
365
  Returns the output of the `describe` command.
167
366
  Provides metadata about the dataset, including variable names, storage types, display formats, and labels.
168
367
  """
169
- return client.run_command("describe")
368
+ result = client.run_command_structured("describe", echo=True)
369
+ if result.success:
370
+ return result.stdout
371
+ if result.error:
372
+ return result.error.message
373
+ return ""
170
374
 
171
375
  @mcp.resource("stata://graphs/list")
172
- def get_graph_list() -> str:
173
- """Returns list of active graphs."""
174
- return client.list_graphs_structured().model_dump_json(indent=2)
376
+ def list_graphs_resource() -> str:
377
+ """Resource wrapper for the graph list (uses tool list_graphs)."""
378
+ return list_graphs()
175
379
 
176
380
  @mcp.tool()
177
381
  def get_variable_list() -> str:
178
382
  """Returns JSON list of all variables."""
179
383
  variables = client.list_variables_structured()
180
- return variables.model_dump_json(indent=2)
384
+ return variables.model_dump_json()
181
385
 
182
386
  @mcp.resource("stata://variables/list")
183
387
  def get_variable_list_resource() -> str:
@@ -188,20 +392,37 @@ def get_variable_list_resource() -> str:
188
392
  def get_stored_results_resource() -> str:
189
393
  """Returns stored r() and e() results."""
190
394
  import json
191
- return json.dumps(client.get_stored_results(), indent=2)
395
+ return json.dumps(client.get_stored_results())
192
396
 
193
397
  @mcp.tool()
194
- def export_graphs_all() -> str:
398
+ def export_graphs_all(use_base64: bool = False) -> str:
195
399
  """
196
- Exports all graphs in memory to base64-encoded PNGs.
197
- Returns a JSON envelope listing graph names and images so the agent can open
198
- them directly and verify visuals (titles/labels/colors/legends) against the
199
- user's requested spec without an extra fetch.
400
+ Exports all graphs in memory to file paths (default) or base64-encoded SVGs.
401
+
402
+ Args:
403
+ use_base64: If True, returns base64-encoded images (token-intensive).
404
+ If False (default), returns file paths to SVG files (token-efficient).
405
+ Use file paths unless you need to embed images directly.
406
+
407
+ Returns a JSON envelope listing graph names and either file paths or base64 images.
408
+ The agent can open SVG files directly to verify visuals (titles/labels/colors/legends).
200
409
  """
201
- exports = client.export_graphs_all()
202
- return exports.model_dump_json(indent=2)
410
+ exports = client.export_graphs_all(use_base64=use_base64)
411
+ return exports.model_dump_json(exclude_none=False)
203
412
 
204
413
  def main():
414
+ # On Windows, Stata automation relies on COM, which is sensitive to threading models.
415
+ # The FastMCP server executes tool calls in a thread pool. If Stata is initialized
416
+ # lazily inside a worker thread, it may fail or hang due to COM/UI limitations.
417
+ # We explicitly initialize Stata here on the main thread to ensure the COM server
418
+ # is properly registered and accessible.
419
+ if os.name == "nt":
420
+ try:
421
+ client.init()
422
+ except Exception as e:
423
+ # Log error but let the server start; specific tools will fail gracefully later
424
+ logging.error(f"Stata initialization failed: {e}")
425
+
205
426
  mcp.run()
206
427
 
207
428
  if __name__ == "__main__":