PyPI - minitap-mcp - Versions diffs - 0.9.0__py3-none-any.whl - Mend

minitap-mcp 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

minitap/mcp/__init__.py +0 -0
minitap/mcp/core/agents/compare_screenshots/agent.py +75 -0
minitap/mcp/core/agents/compare_screenshots/eval/prompts/prompt_1.md +62 -0
minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/actual.png +0 -0
minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/figma.png +0 -0
minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/human_feedback.txt +18 -0
minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/prompt_1/model_params.json +3 -0
minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/prompt_1/output.md +46 -0
minitap/mcp/core/agents/compare_screenshots/prompt.md +62 -0
minitap/mcp/core/cloud_apk.py +117 -0
minitap/mcp/core/config.py +111 -0
minitap/mcp/core/decorators.py +107 -0
minitap/mcp/core/device.py +249 -0
minitap/mcp/core/llm.py +39 -0
minitap/mcp/core/logging_config.py +59 -0
minitap/mcp/core/models.py +59 -0
minitap/mcp/core/sdk_agent.py +35 -0
minitap/mcp/core/storage.py +407 -0
minitap/mcp/core/task_runs.py +100 -0
minitap/mcp/core/utils/figma.py +69 -0
minitap/mcp/core/utils/images.py +55 -0
minitap/mcp/main.py +328 -0
minitap/mcp/server/cloud_mobile.py +492 -0
minitap/mcp/server/middleware.py +21 -0
minitap/mcp/server/poller.py +78 -0
minitap/mcp/server/remote_proxy.py +96 -0
minitap/mcp/tools/execute_mobile_command.py +182 -0
minitap/mcp/tools/read_swift_logs.py +297 -0
minitap/mcp/tools/screen_analyzer.md +17 -0
minitap/mcp/tools/take_screenshot.py +53 -0
minitap/mcp/tools/upload_screenshot.py +80 -0
minitap_mcp-0.9.0.dist-info/METADATA +352 -0
minitap_mcp-0.9.0.dist-info/RECORD +35 -0
minitap_mcp-0.9.0.dist-info/WHEEL +4 -0
minitap_mcp-0.9.0.dist-info/entry_points.txt +3 -0

minitap/mcp/tools/execute_mobile_command.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""Tool for running manual tasks on a connected mobile device."""
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+from fastmcp.exceptions import ToolError
+from fastmcp.tools.tool import ToolResult
+from mcp.types import TextContent
+from minitap.mobile_use.sdk.types import ManualTaskConfig
+from minitap.mobile_use.sdk.types.task import PlatformTaskRequest
+from pydantic import Field
+from minitap.mcp.core.cloud_apk import install_apk_on_cloud_mobile, upload_apk_to_cloud_mobile
+from minitap.mcp.core.config import settings
+from minitap.mcp.core.decorators import handle_tool_errors
+from minitap.mcp.core.logging_config import get_logger
+from minitap.mcp.core.sdk_agent import get_mobile_use_agent
+from minitap.mcp.core.storage import StorageDownloadError, download_trajectory_gif
+from minitap.mcp.core.task_runs import TaskRunsError, get_latest_task_run_id
+from minitap.mcp.main import mcp
+from minitap.mcp.server.cloud_mobile import check_cloud_mobile_status
+logger = get_logger(__name__)
+def _serialize_result(result: Any) -> Any:
+    """Convert SDK responses to serializable data for MCP."""
+    if hasattr(result, "model_dump"):
+        return result.model_dump()
+    if hasattr(result, "dict"):
+        return result.dict()
+    if isinstance(result, Mapping):
+        return dict(result)
+    return result
+@mcp.tool(
+    name="execute_mobile_command",
+    description="""
+    Execute a natural language command on a mobile device using the Minitap SDK.
+    This tool allows you to control your Android or iOS device using natural language.
+    Examples:
+    - "Open the settings app and tell me the battery level"
+    - "Find the first 3 unread emails in Gmail"
+    - "Take a screenshot and save it"
+    APK Deployment (Cloud Mobile Only):
+    When CLOUD_MOBILE_NAME is set, you can deploy and test APKs on cloud mobiles:
+    - Set apk_path to the path of your locally built APK
+    - The APK will be uploaded to cloud storage and installed on the device
+    - Requires MINITAP_API_KEY environment variable
+    - Must provide locked_app_package when using apk_path
+    Example with APK deployment:
+    execute_mobile_command(
+        apk_path="/path/to/app-debug.apk",
+        locked_app_package="com.example.myapp",
+        goal="Test the login flow with valid credentials"
+    )
+    Note: If apk path is set and no cloud mobile name -> it will raise a tool error
+    """,
+)
+@handle_tool_errors
+async def execute_mobile_command(
+    goal: str = Field(description="High-level goal describing the action to perform."),
+    output_description: str | None = Field(
+        default=None,
+        description="Optional description of the expected output format. "
+        "For example: 'A JSON array with sender and subject for each email' "
+        "or 'The battery percentage as a number'.",
+    ),
+    locked_app_package: str | None = Field(
+        default=None,
+        description="Optional package name of the app to lock the device to. "
+        "Will launch the app if not already running, and keep it in foreground "
+        "until the task is completed. REQUIRED when using apk_path.",
+    ),
+    apk_path: str | None = Field(
+        default=None,
+        description="Path to local APK file to deploy to cloud mobile. "
+        "Only works when CLOUD_MOBILE_NAME is set. "
+        "The APK will be uploaded to cloud storage and installed before task execution. "
+        "Requires MINITAP_API_KEY to be configured. ",
+    ),
+) -> str | dict[str, Any] | ToolResult:
+    """Run a manual task on a mobile device via the Minitap platform."""
+    try:
+        if settings.CLOUD_MOBILE_NAME:
+            await check_cloud_mobile_status(settings.CLOUD_MOBILE_NAME)
+        if apk_path:
+            if not settings.CLOUD_MOBILE_NAME:
+                raise ToolError(
+                    "apk_path parameter requires CLOUD_MOBILE_NAME to be set. "
+                    "APK deployment is only supported in cloud mobile mode."
+                )
+            # Step 1: Upload APK via Platform storage API
+            filename = await upload_apk_to_cloud_mobile(apk_path=apk_path)
+            # Step 2: Install APK on cloud mobile
+            await install_apk_on_cloud_mobile(filename=filename)
+        request = PlatformTaskRequest(
+            task=ManualTaskConfig(
+                goal=goal,
+                output_description=output_description,
+            ),
+            execution_origin="mcp",
+        )
+        agent = get_mobile_use_agent()
+        if not agent._initialized:
+            await agent.init()
+        result = await agent.run_task(
+            request=request,
+            locked_app_package=locked_app_package,
+        )
+        trajectory_gif_path: Path | None = None
+        if settings.TRAJECTORY_GIF_DOWNLOAD_FOLDER:
+            trajectory_gif_path = await _download_trajectory_gif_if_available()
+        serialized_result = _serialize_result(result)
+        # If trajectory was saved, return a ToolResult with multiple content items
+        if trajectory_gif_path:
+            import json
+            result_text = (
+                json.dumps(serialized_result, indent=2)
+                if isinstance(serialized_result, dict)
+                else str(serialized_result)
+            )
+            return ToolResult(
+                content=[
+                    TextContent(type="text", text=result_text),
+                    TextContent(type="text", text=f"Trajectory saved to {trajectory_gif_path}"),
+                ],
+            )
+        return serialized_result
+    except Exception as e:
+        raise ToolError(str(e))
+async def _download_trajectory_gif_if_available() -> Path | None:
+    """Download the trajectory GIF if available and folder is configured.
+    Fetches the latest task run ID from the API and downloads the GIF.
+    Returns:
+        The path to the downloaded GIF file, or None if download failed or not configured.
+    """
+    download_folder = settings.TRAJECTORY_GIF_DOWNLOAD_FOLDER
+    if not download_folder:
+        logger.warning("TRAJECTORY_GIF_DOWNLOAD_FOLDER not configured, skipping GIF download")
+        return None
+    task_run_id = None
+    try:
+        task_run_id = await get_latest_task_run_id()
+        gif_path = await download_trajectory_gif(
+            task_run_id=task_run_id,
+            download_path=download_folder,
+        )
+        logger.info(
+            "Trajectory GIF downloaded",
+            task_run_id=task_run_id,
+            path=str(gif_path),
+        )
+        return gif_path
+    except (StorageDownloadError, TaskRunsError) as e:
+        logger.warning(
+            "Failed to download trajectory GIF",
+            task_run_id=task_run_id,
+            error=str(e),
+        )
+        return None

minitap/mcp/tools/read_swift_logs.py ADDED Viewed

@@ -0,0 +1,297 @@
+"""Tool for reading Swift/iOS logs for debugging during development."""
+import asyncio
+import json
+import sys
+from datetime import datetime
+from pydantic import BaseModel, Field
+from minitap.mcp.core.decorators import handle_tool_errors
+from minitap.mcp.core.logging_config import get_logger
+from minitap.mcp.main import mcp
+logger = get_logger(__name__)
+class BacktraceFrame(BaseModel):
+    imageOffset: int | None = None
+    imageUUID: str | None = None
+    imagePath: str | None = None
+    symbol: str | None = None
+class Backtrace(BaseModel):
+    frames: list[BacktraceFrame] = []
+class SimplifiedLog(BaseModel):
+    timestamp: str
+    level: str
+    category: str
+    message: str
+    process_id: int
+    backtrace: Backtrace | None = None
+    sender_image_path: str | None = None
+    process_image_path: str | None = None
+    sender_image_uuid: str | None = None
+class LogsOutput(BaseModel):
+    bundle_id: str
+    last_minutes: int
+    log_count: int
+    logs: list[SimplifiedLog]
+    message: str | None = None
+def _convert_to_iso8601(timestamp: str) -> str:
+    """Convert macOS log show timestamp to ISO8601 format.
+    Input format:  "YYYY-MM-DD HH:MM:SS.NNNNNN±TTTT"
+    Output format: "YYYY-MM-DDTHH:MM:SS.NNNNNN±TT:TT"
+    """
+    if not timestamp:
+        return timestamp
+    try:
+        dt = datetime.fromisoformat(timestamp.replace(" ", "T"))
+        return dt.isoformat()
+    except ValueError:
+        return timestamp
+def _parse_backtrace(raw: dict | None) -> Backtrace | None:
+    """Parse raw backtrace dict into Backtrace model."""
+    if not raw or not isinstance(raw, dict):
+        return None
+    frames_raw = raw.get("frames", [])
+    if not frames_raw:
+        return None
+    frames = [
+        BacktraceFrame(
+            imageOffset=f.get("imageOffset"),
+            imageUUID=f.get("imageUUID"),
+            imagePath=f.get("imagePath"),
+            symbol=f.get("symbol"),
+        )
+        for f in frames_raw
+        if isinstance(f, dict)
+    ]
+    return Backtrace(frames=frames) if frames else None
+async def _run_log_show(
+    predicate: str | None,
+    last_minutes: int,
+    include_debug: bool,
+    *,
+    simulator: bool = False,
+) -> tuple[list, str | None]:
+    """Run log show command and return parsed logs and optional error message."""
+    if simulator:
+        cmd = ["xcrun", "simctl", "spawn", "booted", "log", "show"]
+    else:
+        cmd = ["log", "show"]
+    cmd.extend(["--style", "json", "--last", f"{last_minutes}m"])
+    if predicate:
+        cmd.extend(["--predicate", predicate])
+    if include_debug:
+        cmd.extend(["--debug", "--info"])
+    process = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await process.communicate()
+    error_output = stderr.decode("utf-8", errors="replace")
+    if process.returncode != 0:
+        if simulator and "No devices are booted" in error_output:
+            return [], "Error: No iOS Simulator is running. Please boot a simulator first."
+        return [], None
+    output = stdout.decode("utf-8", errors="replace").strip()
+    lines = output.split("\n")
+    if lines and lines[0].startswith("Filtering the log data"):
+        lines = lines[1:]
+    if lines and lines[0].startswith("Skipping info and debug"):
+        lines = lines[1:]
+    json_output = "\n".join(lines).strip()
+    if not json_output or json_output == "[]":
+        return [], None
+    try:
+        return json.loads(json_output), None
+    except json.JSONDecodeError:
+        return [], None
+@mcp.tool(
+    name="read_swift_logs",
+    description="""
+    Read Swift/iOS logs for debugging during app development. Please note that this tool expect the
+    bundle identifier of the app to be passed as an argument.
+    This tool can read logs from:
+    1. iOS Simulator runtime logs (source="simulator") - filters by process name
+    2. All unified logging sources (source="all") - queries by subsystem and process name
+    Use cases:
+    - Debug runtime issues by reading simulator logs
+    - Find crash logs and error messages
+    - Read print() statements and os.Logger output from your Swift app
+    Examples:
+    - read_swift_logs(source="simulator", bundle_id="com.example.myapp")
+    - read_swift_logs(source="simulator", bundle_id="com.example.myapp", last_minutes=10)
+    - read_swift_logs(source="all", bundle_id="com.example.myapp", last_minutes=5)
+    """,
+)
+@handle_tool_errors
+async def read_swift_logs(
+    bundle_id: str = Field(
+        description="The bundle identifier of the iOS app (e.g., 'com.example.myapp'). "
+        "This is used to filter logs by subsystem.",
+    ),
+    source: str = Field(
+        default="all",
+        description="Log source: 'simulator' for iOS Simulator runtime logs, "
+        "'all' to read from all sources that generate runtime logs related with the bundle.",
+    ),
+    last_minutes: int = Field(
+        default=5,
+        description="Number of minutes of logs to retrieve. Default is 5 minutes.",
+    ),
+) -> LogsOutput | str:
+    """Read Swift/iOS logs from simulator or file."""
+    if sys.platform != "darwin":
+        return "Error: This tool only works on macOS with Xcode installed."
+    process_name = bundle_id.split(".")[-1]
+    if source == "simulator":
+        return await _read_simulator_logs(bundle_id, last_minutes, process_name)
+    elif source == "all":
+        return await _read_file_logs(bundle_id, process_name, last_minutes)
+    else:
+        return f"Error: Unknown source '{source}'. Use 'simulator' or 'all'."
+def _map_to_simplified_logs(log_entries: list[dict]) -> list[SimplifiedLog]:
+    return [
+        SimplifiedLog(
+            timestamp=_convert_to_iso8601(entry.get("timestamp", "")),
+            level=entry.get("messageType", ""),
+            category=entry.get("category", ""),
+            message=entry.get("eventMessage", ""),
+            process_id=entry.get("processID", 0),
+            backtrace=_parse_backtrace(entry.get("backtrace")),
+            sender_image_path=entry.get("senderImagePath"),
+            process_image_path=entry.get("processImagePath"),
+            sender_image_uuid=entry.get("senderImageUUID"),
+        )
+        for entry in log_entries
+        if entry.get("eventMessage")
+    ]
+async def _read_simulator_logs(
+    bundle_id: str,
+    last_minutes: int,
+    process_name: str | None,
+) -> LogsOutput | str:
+    """Read historical logs from the booted iOS Simulator."""
+    predicate = f'processImagePath CONTAINS "{process_name}"' if process_name else None
+    logger.info(f"Reading simulator logs for last {last_minutes}m")
+    log_entries, error = await _run_log_show(
+        predicate, last_minutes, include_debug=True, simulator=True
+    )
+    if error:
+        return error
+    if not log_entries:
+        return LogsOutput(
+            bundle_id=bundle_id,
+            last_minutes=last_minutes,
+            log_count=0,
+            logs=[],
+            message=f"No logs found for '{process_name}' in the last {last_minutes} min.",
+        )
+    simplified_logs = _map_to_simplified_logs(log_entries)
+    return LogsOutput(
+        bundle_id=bundle_id,
+        last_minutes=last_minutes,
+        log_count=len(simplified_logs),
+        logs=simplified_logs,
+    )
+async def _read_file_logs(bundle_id: str, process_name: str, last_minutes: int) -> LogsOutput:
+    # Query 1: Logs by subsystem (os.Logger logs)
+    subsystem_predicate = f'subsystem == "{bundle_id}"'
+    # Query 2: Logs by process name (catches crashes and system logs)
+    # Include fatal errors, crashes, and error-level logs
+    process_predicate = (
+        f'process == "{process_name}" AND '
+        f'(messageType == "Fault" OR messageType == "Error" OR '
+        f'eventMessage CONTAINS "fatal" OR eventMessage CONTAINS "crash")'
+    )
+    logger.info(
+        "fetching_ios_logs",
+        bundle_id=bundle_id,
+        last_minutes=last_minutes,
+    )
+    # Run both queries in parallel
+    (subsystem_logs, _), (process_logs, _) = await asyncio.gather(
+        _run_log_show(subsystem_predicate, last_minutes, include_debug=True),
+        _run_log_show(process_predicate, last_minutes, include_debug=False),
+    )
+    # Merge and deduplicate logs by timestamp + message
+    all_logs = subsystem_logs + process_logs
+    seen = set()
+    unique_logs = []
+    for log_entry in all_logs:
+        key = (log_entry.get("timestamp"), log_entry.get("eventMessage"))
+        if key not in seen:
+            seen.add(key)
+            unique_logs.append(log_entry)
+    # Sort by timestamp
+    unique_logs.sort(key=lambda x: x.get("timestamp", ""))
+    if not unique_logs:
+        return LogsOutput(
+            bundle_id=bundle_id,
+            last_minutes=last_minutes,
+            log_count=0,
+            logs=[],
+            message=f"No logs found for '{bundle_id}' in the last {last_minutes} min.",
+        )
+    simplified_logs = _map_to_simplified_logs(unique_logs)
+    logger.info("logs_retrieved", bundle_id=bundle_id, log_count=len(simplified_logs))
+    return LogsOutput(
+        bundle_id=bundle_id,
+        last_minutes=last_minutes,
+        log_count=len(simplified_logs),
+        logs=simplified_logs,
+    )

minitap/mcp/tools/screen_analyzer.md ADDED Viewed

@@ -0,0 +1,17 @@
+You are given:
+1. A screenshot of a mobile device.
+2. A prompt describing what information to extract.
+Your task:
+- Look at the screenshot and **answer the prompt directly and completely**.
+- Provide a **detailed, structured description** of the relevant content (text, layout, icons, menus, timestamps, notifications, etc.).
+- If the prompt asks for specific data, extract it exactly as shown.
+- If the screenshot contains structured information (e.g., receipt, chat, settings), present it clearly using lists or tables.
+- Do not guess — if something is unclear or missing, state that explicitly.
+**Output format:**
+1. **Direct answer** to the prompt.
+2. **Detailed breakdown** of the screenshot content supporting the answer.

minitap/mcp/tools/take_screenshot.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""Simple screenshot capture tool - returns raw base64 image without LLM analysis."""
+import base64
+from mcp.types import ImageContent
+from pydantic import Field
+from minitap.mcp.core.decorators import handle_tool_errors
+from minitap.mcp.core.device import capture_screenshot, find_mobile_device
+from minitap.mcp.main import mcp
+from minitap.mcp.server.cloud_mobile import (
+    check_cloud_mobile_status,
+    get_cloud_mobile_id,
+    get_cloud_screenshot,
+)
+@mcp.tool(
+    name="take_screenshot",
+    description="""
+    Capture a screenshot from the connected mobile device.
+    Returns the raw base64-encoded PNG image directly without any LLM analysis.
+    Use this when you need the screenshot image for display or further processing.
+    """,
+)
+@handle_tool_errors
+async def take_screenshot(
+    device_id: str | None = Field(
+        default=None,
+        description="ID of the device to capture screenshot from. "
+        "If not provided, the first available device is used.",
+    ),
+) -> list[ImageContent]:
+    """Capture screenshot and return as base64 image content."""
+    cloud_mobile_id = get_cloud_mobile_id()
+    if cloud_mobile_id:
+        # Cloud mode: use cloud screenshot API
+        await check_cloud_mobile_status(cloud_mobile_id)
+        screenshot_bytes = await get_cloud_screenshot(cloud_mobile_id)
+        screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
+    else:
+        # Local mode: capture from local device
+        device = find_mobile_device(device_id=device_id)
+        screenshot_base64 = capture_screenshot(device)
+    return [
+        ImageContent(
+            type="image",
+            data=screenshot_base64,
+            mimeType="image/png",
+        )
+    ]

minitap/mcp/tools/upload_screenshot.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""Tool for uploading device screenshots to remote storage.
+This tool captures a screenshot from the connected device and uploads it
+to remote storage, returning a filename that can be used with other tools
+like figma_compare_screenshot.
+"""
+import base64
+from fastmcp.exceptions import ToolError
+from fastmcp.tools.tool import ToolResult
+from minitap.mcp.core.decorators import handle_tool_errors
+from minitap.mcp.core.device import capture_screenshot, find_mobile_device
+from minitap.mcp.core.logging_config import get_logger
+from minitap.mcp.core.storage import StorageUploadError, upload_screenshot_to_storage
+from minitap.mcp.main import mcp
+from minitap.mcp.server.cloud_mobile import get_cloud_mobile_id, get_cloud_screenshot
+logger = get_logger(__name__)
+@mcp.tool(
+    name="upload_screenshot",
+    description="""
+    Capture a screenshot from the connected device and upload it to storage.
+    This tool:
+    1. Captures a screenshot from the connected device (local or cloud)
+    2. Uploads the screenshot to remote storage
+    3. Returns a filename that can be used with other tools
+    Use this to get a screenshot filename for tools like figma_compare_screenshot
+    that require a current_screenshot_filename parameter.
+    Example workflow:
+    1. Call upload_screenshot to get a filename
+    2. Use the returned filename with figma_compare_screenshot
+    """,
+)
+@handle_tool_errors
+async def upload_screenshot() -> ToolResult:
+    """Capture and upload a device screenshot, return the filename."""
+    logger.info("Capturing and uploading device screenshot")
+    # Step 1: Capture screenshot from device
+    cloud_mobile_id = get_cloud_mobile_id()
+    if cloud_mobile_id:
+        logger.debug("Capturing screenshot from cloud device", device_id=cloud_mobile_id)
+        try:
+            screenshot_bytes = await get_cloud_screenshot(cloud_mobile_id)
+            screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
+        except Exception as e:
+            raise ToolError(f"Failed to capture cloud device screenshot: {e}") from e
+    else:
+        logger.debug("Capturing screenshot from local device")
+        try:
+            device = find_mobile_device()
+            screenshot_base64 = capture_screenshot(device)
+        except Exception as e:
+            raise ToolError(f"Failed to capture local device screenshot: {e}") from e
+    logger.info("Screenshot captured from device")
+    # Step 2: Upload screenshot to storage
+    try:
+        filename = await upload_screenshot_to_storage(screenshot_base64)
+        logger.info("Screenshot uploaded to storage", filename=filename)
+    except StorageUploadError as e:
+        raise ToolError(f"Failed to upload screenshot: {e}") from e
+    return ToolResult(
+        content=[
+            {
+                "type": "text",
+                "text": f"Screenshot uploaded successfully.\n\n**Filename:** {filename}",
+            }
+        ]
+    )