PyPI - mcp-stata - Versions diffs - 1.0.1__py3-none-any.whl → 1.6.2__py3-none-any.whl - Mend

mcp-stata 1.0.1py3-none-any.whl → 1.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-stata might be problematic. Click here for more details.

Files changed (14) hide show

mcp_stata/discovery.py +132 -23
mcp_stata/graph_detector.py +385 -0
mcp_stata/models.py +4 -1
mcp_stata/server.py +274 -54
mcp_stata/stata_client.py +2000 -263
mcp_stata/streaming_io.py +261 -0
mcp_stata/ui_http.py +540 -0
mcp_stata-1.6.2.dist-info/METADATA +380 -0
mcp_stata-1.6.2.dist-info/RECORD +14 -0
mcp_stata-1.0.1.dist-info/METADATA +0 -240
mcp_stata-1.0.1.dist-info/RECORD +0 -11
{mcp_stata-1.0.1.dist-info → mcp_stata-1.6.2.dist-info}/WHEEL +0 -0
{mcp_stata-1.0.1.dist-info → mcp_stata-1.6.2.dist-info}/entry_points.txt +0 -0
{mcp_stata-1.0.1.dist-info → mcp_stata-1.6.2.dist-info}/licenses/LICENSE +0 -0

mcp_stata/server.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from mcp.server.fastmcp import FastMCP
-from mcp.server.fastmcp import Image
+import anyio
+from mcp.server.fastmcp import Context, FastMCP
 import mcp.types as types
 from .stata_client import StataClient
 from .models import (
@@ -12,40 +12,128 @@ import logging
 import json
 import os
-LOG_LEVEL = os.getenv("STATA_MCP_LOGLEVEL", "INFO").upper()
+from .ui_http import UIChannelManager
+LOG_LEVEL = os.getenv("MCP_STATA_LOGLEVEL", "INFO").upper()
 logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)s %(name)s - %(message)s")
 # Initialize FastMCP
-mcp = FastMCP("stata")
+mcp = FastMCP("mcp_stata")
 client = StataClient()
+ui_channel = UIChannelManager(client)
 @mcp.tool()
-def run_command(code: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
+async def run_command(
+    code: str,
+    ctx: Context | None = None,
+    echo: bool = True,
+    as_json: bool = True,
+    trace: bool = False,
+    raw: bool = False,
+    max_output_lines: int = None,
+    cwd: str | None = None,
+) -> str:
     """
-    Executes a specific Stata command.
+    Executes Stata code.
+    This is the primary tool for interacting with Stata.
+    Stata output is written to a temporary log file on disk.
+    The server emits a single `notifications/logMessage` event containing the log file path
+    (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
+    If the client supplies a progress callback/token, progress updates may also be emitted
+    via `notifications/progress`.
-    This is the primary tool for interacting with Stata. You can run any valid Stata syntax.
     Args:
-        code: The detailed Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
+        code: The Stata command(s) to execute (e.g., "sysuse auto", "regress price mpg", "summarize").
+        ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
         echo: If True, the command itself is included in the output. Default is True.
         as_json: If True, returns a JSON envelope with rc/stdout/stderr/error.
         trace: If True, enables `set trace on` for deeper error diagnostics (automatically disabled after).
+        raw: If True, return raw output/error message rather than a JSON envelope.
+        max_output_lines: If set, truncates stdout to this many lines for token efficiency.
+                         Useful for verbose commands (regress, codebook, etc.).
+        Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
     """
-    result = client.run_command_structured(code, echo=echo, trace=trace)
+    session = ctx.request_context.session if ctx is not None else None
+    async def notify_log(text: str) -> None:
+        if session is None:
+            return
+        await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
+    progress_token = None
+    if ctx is not None and ctx.request_context.meta is not None:
+        progress_token = ctx.request_context.meta.progressToken
+    async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
+        if session is None or progress_token is None:
+            return
+        await session.send_progress_notification(
+            progress_token=progress_token,
+            progress=progress,
+            total=total,
+            message=message,
+            related_request_id=ctx.request_id,
+        )
+    async def _noop_log(_text: str) -> None:
+        return
+    result = await client.run_command_streaming(
+        code,
+        notify_log=notify_log if session is not None else _noop_log,
+        notify_progress=notify_progress if progress_token is not None else None,
+        echo=echo,
+        trace=trace,
+        max_output_lines=max_output_lines,
+        cwd=cwd,
+    )
+    # Conservative invalidation: arbitrary Stata commands may change data.
+    ui_channel.notify_potential_dataset_change()
     if raw:
         if result.success:
-            return result.stdout
+            return result.log_path or ""
         if result.error:
             msg = result.error.message
             if result.error.rc is not None:
                 msg = f"{msg}\nrc={result.error.rc}"
             return msg
-        return result.stdout
+        return result.log_path or ""
     if as_json:
-        return result.model_dump_json(indent=2)
-    # Default structured string for compatibility when as_json is False but raw is also False
-    return result.model_dump_json(indent=2)
+        return result.model_dump_json()
+@mcp.tool()
+def read_log(path: str, offset: int = 0, max_bytes: int = 65536) -> str:
+    """Read a slice of a log file.
+    Intended for clients that want to display a terminal-like view without pushing MBs of
+    output through MCP log notifications.
+    Args:
+        path: Absolute path to the log file previously provided by the server.
+        offset: Byte offset to start reading from.
+        max_bytes: Maximum bytes to read.
+    Returns a compact JSON string: {"path":..., "offset":..., "next_offset":..., "data":...}
+    """
+    try:
+        if offset < 0:
+            offset = 0
+        with open(path, "rb") as f:
+            f.seek(offset)
+            data = f.read(max_bytes)
+            next_offset = f.tell()
+        text = data.decode("utf-8", errors="replace")
+        return json.dumps({"path": path, "offset": offset, "next_offset": next_offset, "data": text})
+    except FileNotFoundError:
+        return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": ""})
+    except Exception as e:
+        return json.dumps({"path": path, "offset": offset, "next_offset": offset, "data": f"ERROR: {e}"})
 @mcp.tool()
 def get_data(start: int = 0, count: int = 50) -> str:
@@ -60,7 +148,20 @@ def get_data(start: int = 0, count: int = 50) -> str:
     """
     data = client.get_data(start, count)
     resp = DataResponse(start=start, count=count, data=data)
-    return resp.model_dump_json(indent=2)
+    return resp.model_dump_json()
+@mcp.tool()
+def get_ui_channel() -> str:
+    """Return localhost HTTP endpoint + bearer token for the extension UI data plane."""
+    info = ui_channel.get_channel()
+    payload = {
+        "baseUrl": info.base_url,
+        "token": info.token,
+        "expiresAt": info.expires_at,
+        "capabilities": ui_channel.capabilities(),
+    }
+    return json.dumps(payload)
 @mcp.tool()
 def describe() -> str:
@@ -69,35 +170,40 @@ def describe() -> str:
     Use this to understand the structure of the dataset, variable names, and their formats before running analysis.
     """
-    return client.run_command("describe")
+    result = client.run_command_structured("describe", echo=True)
+    if result.success:
+        return result.stdout
+    if result.error:
+        return result.error.message
+    return ""
 @mcp.tool()
 def list_graphs() -> str:
     """
     Lists the names of all graphs currently stored in Stata's memory.
-    Use this to see which graphs are available for export via `export_graph`.
+    Use this to see which graphs are available for export via `export_graph`. The
+    response marks the active graph so the agent knows which one will export by
+    default.
     """
     graphs = client.list_graphs_structured()
-    return graphs.model_dump_json(indent=2)
+    return graphs.model_dump_json()
 @mcp.tool()
-def export_graph(graph_name: str = None) -> Image:
+def export_graph(graph_name: str = None, format: str = "pdf") -> str:
     """
-    Exports a stored Stata graph to an image format (PNG) and returns it.
+    Exports a stored Stata graph to a file and returns its path.
     Args:
         graph_name: The name of the graph to export (as seen in `list_graphs`).
                    If None, exports the currently active graph.
+        format: Output format, defaults to "pdf". Supported: "pdf", "png". Use
+                "png" to view the plot directly so the agent can visually check
+                titles, labels, legends, colors, and other user requirements.
     """
     try:
-        path = client.export_graph(graph_name)
-        with open(path, "rb") as f:
-            data = f.read()
-        return Image(data=data, format="png")
+        return client.export_graph(graph_name, format=format)
     except Exception as e:
-        # Return error as text if image fails?
-        # FastMCP expects Image or error.
         raise RuntimeError(f"Failed to export graph: {e}")
 @mcp.tool()
@@ -116,42 +222,122 @@ def get_stored_results() -> str:
     """
     Returns the current stored results (r-class and e-class scalars/macros) as a JSON-formatted string.
-    Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
+    Use this after running a command (like `summarize` or `regress`) to programmatically retrieve
     specific values (e.g., means, coefficients, sample sizes) for validation or further calculation.
     """
     import json
-    return json.dumps(client.get_stored_results(), indent=2)
+    return json.dumps(client.get_stored_results())
 @mcp.tool()
-def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False) -> str:
+def load_data(source: str, clear: bool = True, as_json: bool = True, raw: bool = False, max_output_lines: int = None) -> str:
     """
     Loads data using sysuse/webuse/use heuristics based on the source string.
     Automatically appends , clear unless clear=False.
+    Args:
+        source: Dataset source (e.g., "auto", "auto.dta", "/path/to/file.dta").
+        clear: If True, clears data in memory before loading.
+        as_json: If True, returns JSON envelope.
+        raw: If True, returns raw output only.
+        max_output_lines: If set, truncates stdout to this many lines for token efficiency.
     """
-    result = client.load_data(source, clear=clear)
+    result = client.load_data(source, clear=clear, max_output_lines=max_output_lines)
+    ui_channel.notify_potential_dataset_change()
     if raw:
         return result.stdout if result.success else (result.error.message if result.error else result.stdout)
-    return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
+    return result.model_dump_json()
 @mcp.tool()
-def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
+def codebook(variable: str, as_json: bool = True, trace: bool = False, raw: bool = False, max_output_lines: int = None) -> str:
     """
     Returns codebook/summary for a specific variable.
+    Args:
+        variable: The variable name to analyze.
+        as_json: If True, returns JSON envelope.
+        trace: If True, enables trace mode.
+        raw: If True, returns raw output only.
+        max_output_lines: If set, truncates stdout to this many lines for token efficiency.
     """
-    result = client.codebook(variable, trace=trace)
+    result = client.codebook(variable, trace=trace, max_output_lines=max_output_lines)
     if raw:
         return result.stdout if result.success else (result.error.message if result.error else result.stdout)
-    return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
+    return result.model_dump_json()
 @mcp.tool()
-def run_do_file(path: str, echo: bool = True, as_json: bool = True, trace: bool = False, raw: bool = False) -> str:
+async def run_do_file(
+    path: str,
+    ctx: Context | None = None,
+    echo: bool = True,
+    as_json: bool = True,
+    trace: bool = False,
+    raw: bool = False,
+    max_output_lines: int = None,
+    cwd: str | None = None,
+) -> str:
     """
-    Executes a .do file with optional trace output and JSON envelope.
+    Executes a .do file.
+    Stata output is written to a temporary log file on disk.
+    The server emits a single `notifications/logMessage` event containing the log file path
+    (JSON payload: {"event":"log_path","path":"..."}) so the client can tail it locally.
+    If the client supplies a progress callback/token, progress updates are emitted via
+    `notifications/progress`.
+    Args:
+        path: Path to the .do file to execute.
+        ctx: FastMCP-injected request context (used to send MCP notifications). Optional for direct Python calls.
+        echo: If True, includes command in output.
+        as_json: If True, returns JSON envelope.
+        trace: If True, enables trace mode.
+        raw: If True, returns raw output only.
+        max_output_lines: If set, truncates stdout to this many lines for token efficiency.
+        Note: This tool always uses log-file streaming semantics; there is no non-streaming mode.
     """
-    result = client.run_do_file(path, echo=echo, trace=trace)
+    session = ctx.request_context.session if ctx is not None else None
+    async def notify_log(text: str) -> None:
+        if session is None:
+            return
+        await session.send_log_message(level="info", data=text, related_request_id=ctx.request_id)
+    progress_token = None
+    if ctx is not None and ctx.request_context.meta is not None:
+        progress_token = ctx.request_context.meta.progressToken
+    async def notify_progress(progress: float, total: float | None, message: str | None) -> None:
+        if session is None or progress_token is None:
+            return
+        await session.send_progress_notification(
+            progress_token=progress_token,
+            progress=progress,
+            total=total,
+            message=message,
+            related_request_id=ctx.request_id,
+        )
+    async def _noop_log(_text: str) -> None:
+        return
+    result = await client.run_do_file_streaming(
+        path,
+        notify_log=notify_log if session is not None else _noop_log,
+        notify_progress=notify_progress if progress_token is not None else None,
+        echo=echo,
+        trace=trace,
+        max_output_lines=max_output_lines,
+        cwd=cwd,
+    )
+    ui_channel.notify_potential_dataset_change()
     if raw:
-        return result.stdout if result.success else (result.error.message if result.error else result.stdout)
-    return result.model_dump_json(indent=2) if as_json else result.model_dump_json(indent=2)
+        if result.success:
+            return result.log_path or ""
+        if result.error:
+            return result.error.message
+        return result.log_path or ""
+    return result.model_dump_json()
 @mcp.resource("stata://data/summary")
 def get_summary() -> str:
@@ -159,7 +345,12 @@ def get_summary() -> str:
     Returns the output of the `summarize` command for the dataset in memory.
     Provides descriptive statistics (obs, mean, std. dev, min, max) for all variables.
     """
-    return client.run_command("summarize")
+    result = client.run_command_structured("summarize", echo=True)
+    if result.success:
+        return result.stdout
+    if result.error:
+        return result.error.message
+    return ""
 @mcp.resource("stata://data/metadata")
 def get_metadata() -> str:
@@ -167,35 +358,64 @@ def get_metadata() -> str:
     Returns the output of the `describe` command.
     Provides metadata about the dataset, including variable names, storage types, display formats, and labels.
     """
-    return client.run_command("describe")
+    result = client.run_command_structured("describe", echo=True)
+    if result.success:
+        return result.stdout
+    if result.error:
+        return result.error.message
+    return ""
 @mcp.resource("stata://graphs/list")
-def get_graph_list() -> str:
-    """Returns list of active graphs."""
-    return client.list_graphs_structured().model_dump_json(indent=2)
+def list_graphs_resource() -> str:
+    """Resource wrapper for the graph list (uses tool list_graphs)."""
+    return list_graphs()
-@mcp.resource("stata://variables/list")
+@mcp.tool()
 def get_variable_list() -> str:
     """Returns JSON list of all variables."""
     variables = client.list_variables_structured()
-    return variables.model_dump_json(indent=2)
+    return variables.model_dump_json()
+@mcp.resource("stata://variables/list")
+def get_variable_list_resource() -> str:
+    """Resource wrapper for the variable list."""
+    return get_variable_list()
 @mcp.resource("stata://results/stored")
 def get_stored_results_resource() -> str:
     """Returns stored r() and e() results."""
     import json
-    return json.dumps(client.get_stored_results(), indent=2)
+    return json.dumps(client.get_stored_results())
 @mcp.tool()
-def export_graphs_all() -> str:
+def export_graphs_all(use_base64: bool = False) -> str:
     """
-    Exports all graphs in memory to base64-encoded PNGs.
-    Returns a JSON envelope listing graph names and images.
+    Exports all graphs in memory to file paths (default) or base64-encoded SVGs.
+    Args:
+        use_base64: If True, returns base64-encoded images (token-intensive).
+                   If False (default), returns file paths to SVG files (token-efficient).
+                   Use file paths unless you need to embed images directly.
+    Returns a JSON envelope listing graph names and either file paths or base64 images.
+    The agent can open SVG files directly to verify visuals (titles/labels/colors/legends).
     """
-    exports = client.export_graphs_all()
-    return exports.model_dump_json(indent=2)
+    exports = client.export_graphs_all(use_base64=use_base64)
+    return exports.model_dump_json(exclude_none=False)
 def main():
+    # On Windows, Stata automation relies on COM, which is sensitive to threading models.
+    # The FastMCP server executes tool calls in a thread pool. If Stata is initialized
+    # lazily inside a worker thread, it may fail or hang due to COM/UI limitations.
+    # We explicitly initialize Stata here on the main thread to ensure the COM server
+    # is properly registered and accessible.
+    if os.name == "nt":
+        try:
+            client.init()
+        except Exception as e:
+            # Log error but let the server start; specific tools will fail gracefully later
+            logging.error(f"Stata initialization failed: {e}")
     mcp.run()
 if __name__ == "__main__":

mcp-stata 1.0.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

Potentially problematic release.

mcp-stata 1.0.1py3-none-any.whl → 1.6.2py3-none-any.whl