minitap-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ from minitap.mobile_use.sdk import Agent
4
+ from minitap.mobile_use.sdk.builders import Builders
5
+
6
+
7
+ def get_mobile_use_agent():
8
+ config = Builders.AgentConfig
9
+ custom_adb_socket = os.getenv("ADB_SERVER_SOCKET")
10
+ if custom_adb_socket:
11
+ parts = custom_adb_socket.split(":")
12
+ if len(parts) != 3:
13
+ raise ValueError(f"Invalid ADB server socket: {custom_adb_socket}")
14
+ _, host, port = parts
15
+ config = config.with_adb_server(host=host, port=int(port))
16
+ return Agent(config=config.build())
17
+
18
+
19
+ agent = get_mobile_use_agent()
@@ -0,0 +1,27 @@
1
+ """Configuration for the MCP server."""
2
+
3
+ from dotenv import load_dotenv
4
+ from pydantic import Field, SecretStr
5
+ from pydantic_settings import BaseSettings, SettingsConfigDict
6
+
7
+ # Load environment variables from .env file
8
+ load_dotenv(verbose=True)
9
+
10
+
11
+ class MCPSettings(BaseSettings):
12
+ """Configuration class for MCP server."""
13
+
14
+ model_config = SettingsConfigDict(env_file=".env", extra="ignore")
15
+
16
+ # Minitap API configuration
17
+ MINITAP_API_KEY: SecretStr
18
+ MINITAP_API_BASE_URL: str = Field(default="https://platform.minitap.ai/api/v1")
19
+
20
+ VISION_MODEL: str = Field(default="qwen/qwen-2.5-vl-7b-instruct")
21
+
22
+ # MCP server configuration (optional, for remote access)
23
+ MCP_SERVER_HOST: str = Field(default="0.0.0.0")
24
+ MCP_SERVER_PORT: int = Field(default=8000)
25
+
26
+
27
+ settings = MCPSettings() # type: ignore
@@ -0,0 +1,42 @@
1
+ """Decorators for MCP tools."""
2
+
3
+ import inspect
4
+ from collections.abc import Callable
5
+ from functools import wraps
6
+ from typing import Any, TypeVar
7
+
8
+ from minitap.mcp.core.device import DeviceNotFoundError
9
+
10
+ F = TypeVar("F", bound=Callable[..., Any])
11
+
12
+
13
+ def handle_tool_errors[T: Callable[..., Any]](func: T) -> T:
14
+ """
15
+ Decorator that catches all exceptions in MCP tools and returns error messages.
16
+
17
+ This prevents unhandled exceptions from causing infinite loops in the MCP server.
18
+ """
19
+
20
+ @wraps(func)
21
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
22
+ try:
23
+ return await func(*args, **kwargs)
24
+ except DeviceNotFoundError as e:
25
+ return f"Error: {str(e)}"
26
+ except Exception as e:
27
+ return f"Error in {func.__name__}: {type(e).__name__}: {str(e)}"
28
+
29
+ @wraps(func)
30
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
31
+ try:
32
+ return func(*args, **kwargs)
33
+ except DeviceNotFoundError as e:
34
+ return f"Error: {str(e)}"
35
+ except Exception as e:
36
+ return f"Error in {func.__name__}: {type(e).__name__}: {str(e)}"
37
+
38
+ # Check if the function is async
39
+ if inspect.iscoroutinefunction(func):
40
+ return async_wrapper # type: ignore
41
+ else:
42
+ return sync_wrapper # type: ignore
@@ -0,0 +1,242 @@
1
+ """Device detection and screenshot utilities for Android and iOS devices."""
2
+
3
+ import base64
4
+ import json
5
+ import os
6
+ import subprocess
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Literal
10
+
11
+ from adbutils import AdbClient, AdbDevice
12
+ from pydantic import BaseModel, ConfigDict
13
+
14
+
15
+ DevicePlatform = Literal["android", "ios"]
16
+
17
+
18
+ class MobileDevice(BaseModel):
19
+ """Represents a mobile device with its platform and connection details."""
20
+
21
+ model_config = ConfigDict(arbitrary_types_allowed=True)
22
+
23
+ device_id: str
24
+ platform: DevicePlatform
25
+ adb_device: AdbDevice | None = None # Only for Android
26
+
27
+
28
+ class DeviceInfo(BaseModel):
29
+ """Serializable device information."""
30
+
31
+ device_id: str
32
+ platform: DevicePlatform
33
+ name: str | None = None
34
+ state: str | None = None
35
+
36
+
37
+ class DeviceNotFoundError(Exception):
38
+ """Raised when no device can be found."""
39
+
40
+ pass
41
+
42
+
43
+ def get_adb_client() -> AdbClient:
44
+ """Get an ADB client instance."""
45
+ custom_adb_socket = os.getenv("ADB_SERVER_SOCKET")
46
+ if not custom_adb_socket:
47
+ return AdbClient()
48
+ parts = custom_adb_socket.split(":")
49
+ if len(parts) != 3:
50
+ raise ValueError(f"Invalid ADB server socket: {custom_adb_socket}")
51
+ _, host, port = parts
52
+ return AdbClient(host=host, port=int(port))
53
+
54
+
55
+ def list_available_devices() -> list[DeviceInfo]:
56
+ """
57
+ List all available mobile devices (Android and iOS).
58
+
59
+ Returns:
60
+ list[DeviceInfo]: A list of device information objects.
61
+ """
62
+ devices: list[DeviceInfo] = []
63
+
64
+ # List Android devices
65
+ try:
66
+ adb_client = get_adb_client()
67
+ android_devices = adb_client.device_list()
68
+
69
+ for device in android_devices:
70
+ if device.serial:
71
+ devices.append(
72
+ DeviceInfo(
73
+ device_id=device.serial,
74
+ platform="android",
75
+ name=device.serial,
76
+ state="connected",
77
+ )
78
+ )
79
+ except Exception:
80
+ # ADB not available or error listing devices
81
+ pass
82
+
83
+ # List iOS devices
84
+ try:
85
+ cmd = ["xcrun", "simctl", "list", "devices", "-j"]
86
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
87
+ data = json.loads(result.stdout)
88
+
89
+ for runtime, ios_devices in data.get("devices", {}).items():
90
+ if "iOS" not in runtime:
91
+ continue
92
+
93
+ for device in ios_devices:
94
+ udid = device.get("udid")
95
+ name = device.get("name")
96
+ state = device.get("state")
97
+
98
+ if udid:
99
+ devices.append(
100
+ DeviceInfo(
101
+ device_id=udid,
102
+ platform="ios",
103
+ name=name,
104
+ state=state,
105
+ )
106
+ )
107
+ except (subprocess.CalledProcessError, FileNotFoundError, json.JSONDecodeError):
108
+ # xcrun not available or error listing devices
109
+ pass
110
+
111
+ return devices
112
+
113
+
114
+ def find_mobile_device(device_id: str | None = None) -> MobileDevice:
115
+ """
116
+ Find a mobile device (Android via ADB or iOS via xcrun).
117
+
118
+ Args:
119
+ device_id: Optional device ID to target a specific device.
120
+ If None, returns the first available device.
121
+
122
+ Returns:
123
+ MobileDevice: A reference to the device with its platform information.
124
+
125
+ Raises:
126
+ DeviceNotFoundError: If no device is found or the specified device_id is not found.
127
+ """
128
+ # Get all available devices
129
+ available_devices = list_available_devices()
130
+
131
+ if not available_devices:
132
+ raise DeviceNotFoundError(
133
+ "No mobile device found. "
134
+ "Make sure you have an Android device connected via ADB "
135
+ "or an iOS simulator running."
136
+ )
137
+
138
+ # Find the target device
139
+ target_device = None
140
+ if device_id:
141
+ # Look for specific device
142
+ for dev in available_devices:
143
+ if dev.device_id == device_id:
144
+ target_device = dev
145
+ break
146
+ if not target_device:
147
+ raise DeviceNotFoundError(
148
+ f"Device with ID '{device_id}' not found. "
149
+ "Make sure the device is connected and accessible via adb or xcrun."
150
+ )
151
+ else:
152
+ # Prefer connected/booted devices first
153
+ for dev in available_devices:
154
+ if dev.state in ("connected", "Booted"):
155
+ target_device = dev
156
+ break
157
+ # Fall back to any device if no connected/booted device found
158
+ if not target_device:
159
+ target_device = available_devices[0]
160
+
161
+ # Create MobileDevice instance with platform-specific details
162
+ if target_device.platform == "android":
163
+ # For Android, get the AdbDevice reference
164
+ try:
165
+ adb_client = get_adb_client()
166
+ adb_device = adb_client.device(serial=target_device.device_id)
167
+ return MobileDevice(
168
+ device_id=target_device.device_id,
169
+ platform="android",
170
+ adb_device=adb_device,
171
+ )
172
+ except Exception as e:
173
+ raise DeviceNotFoundError(f"Failed to connect to Android device: {e}")
174
+ else:
175
+ # For iOS, just return the device info
176
+ return MobileDevice(device_id=target_device.device_id, platform="ios")
177
+
178
+
179
+ def capture_screenshot(device: MobileDevice) -> str:
180
+ """
181
+ Capture a screenshot from the given mobile device.
182
+
183
+ Args:
184
+ device: MobileDevice instance returned by find_mobile_device()
185
+
186
+ Returns:
187
+ str: Base64-encoded screenshot image (PNG format)
188
+
189
+ Raises:
190
+ RuntimeError: If screenshot capture fails
191
+ """
192
+ if device.platform == "android":
193
+ return _capture_android_screenshot(device)
194
+ else:
195
+ return _capture_ios_screenshot(device)
196
+
197
+
198
+ def _capture_android_screenshot(device: MobileDevice) -> str:
199
+ """Capture screenshot from Android device using ADB."""
200
+ if not device.adb_device:
201
+ # Reconnect to device if not available
202
+ adb_client = get_adb_client()
203
+ adb_device = adb_client.device(serial=device.device_id)
204
+ if not adb_device:
205
+ raise RuntimeError(f"Android device {device.device_id} not found")
206
+ device.adb_device = adb_device
207
+
208
+ try:
209
+ # Use ADB screencap to get PNG screenshot
210
+ screenshot_bytes = device.adb_device.shell("screencap -p", encoding=None)
211
+ if isinstance(screenshot_bytes, bytes):
212
+ return base64.b64encode(screenshot_bytes).decode("utf-8")
213
+ else:
214
+ raise RuntimeError("Unexpected screenshot data type from ADB")
215
+ except Exception as e:
216
+ raise RuntimeError(f"Failed to capture Android screenshot: {e}")
217
+
218
+
219
+ def _capture_ios_screenshot(device: MobileDevice) -> str:
220
+ """Capture screenshot from iOS simulator using xcrun."""
221
+ try:
222
+ # Create temporary file for screenshot
223
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
224
+ tmp_path = Path(tmp_file.name)
225
+
226
+ try:
227
+ # Capture screenshot using xcrun simctl
228
+ cmd = ["xcrun", "simctl", "io", device.device_id, "screenshot", str(tmp_path)]
229
+ subprocess.run(cmd, capture_output=True, text=True, check=True)
230
+
231
+ # Read and encode the screenshot
232
+ screenshot_bytes = tmp_path.read_bytes()
233
+ return base64.b64encode(screenshot_bytes).decode("utf-8")
234
+ finally:
235
+ # Clean up temporary file
236
+ if tmp_path.exists():
237
+ tmp_path.unlink()
238
+
239
+ except subprocess.CalledProcessError as e:
240
+ raise RuntimeError(f"Failed to capture iOS screenshot: {e.stderr}")
241
+ except Exception as e:
242
+ raise RuntimeError(f"Failed to capture iOS screenshot: {e}")
@@ -0,0 +1,28 @@
1
+ from langchain_openai import ChatOpenAI
2
+
3
+ from minitap.mcp.core.config import settings
4
+
5
+
6
+ def get_minitap_llm(
7
+ trace_id: str,
8
+ remote_tracing: bool = False,
9
+ model: str = "google/gemini-2.5-pro",
10
+ temperature: float | None = None,
11
+ max_retries: int | None = None,
12
+ ) -> ChatOpenAI:
13
+ assert settings.MINITAP_API_KEY is not None
14
+ assert settings.MINITAP_API_BASE_URL is not None
15
+ if max_retries is None and model.startswith("google/"):
16
+ max_retries = 2
17
+ client = ChatOpenAI(
18
+ model=model,
19
+ temperature=temperature,
20
+ max_retries=max_retries,
21
+ api_key=settings.MINITAP_API_KEY,
22
+ base_url=settings.MINITAP_API_BASE_URL,
23
+ default_query={
24
+ "sessionId": trace_id,
25
+ "traceOnlyUsage": remote_tracing,
26
+ },
27
+ )
28
+ return client
@@ -0,0 +1,55 @@
1
+ import base64
2
+ from PIL import Image
3
+ from io import BytesIO
4
+
5
+ from langchain_core.messages import HumanMessage
6
+
7
+
8
+ def compress_base64_jpeg(base64_str: str, quality: int = 50) -> str:
9
+ """
10
+ Compress a base64-encoded image to JPEG format.
11
+
12
+ Args:
13
+ base64_str: Base64-encoded image string
14
+ quality: JPEG quality (0-100, default 50)
15
+
16
+ Returns:
17
+ Base64-encoded JPEG image
18
+ """
19
+ if base64_str.startswith("data:image"):
20
+ base64_str = base64_str.split(",")[1]
21
+
22
+ image_data = base64.b64decode(base64_str)
23
+ image = Image.open(BytesIO(image_data))
24
+
25
+ # Convert RGBA/LA/PA to RGB (JPEG doesn't support transparency)
26
+ if image.mode in ("RGBA", "LA", "PA"):
27
+ # Create a white background
28
+ background = Image.new("RGB", image.size, (255, 255, 255))
29
+ # Paste the image on the background using alpha channel as mask
30
+ if image.mode == "RGBA":
31
+ background.paste(image, mask=image.split()[3]) # Use alpha channel as mask
32
+ else:
33
+ background.paste(image, mask=image.split()[1]) # Use alpha for LA
34
+ image = background
35
+ elif image.mode != "RGB":
36
+ # Convert any other mode to RGB
37
+ image = image.convert("RGB")
38
+
39
+ compressed_io = BytesIO()
40
+ image.save(compressed_io, format="JPEG", quality=quality, optimize=True)
41
+
42
+ compressed_base64 = base64.b64encode(compressed_io.getvalue()).decode("utf-8")
43
+ return compressed_base64
44
+
45
+
46
+ def get_screenshot_message_for_llm(screenshot_base64: str):
47
+ prefix = "" if screenshot_base64.startswith("data:image") else "data:image/jpeg;base64,"
48
+ return HumanMessage(
49
+ content=[
50
+ {
51
+ "type": "image_url",
52
+ "image_url": {"url": f"{prefix}{screenshot_base64}"},
53
+ }
54
+ ]
55
+ )
minitap/mcp/main.py ADDED
@@ -0,0 +1,109 @@
1
+ """MCP server for mobile-use with screen analysis capabilities."""
2
+
3
+ import argparse
4
+ import logging
5
+ import os
6
+ import sys
7
+ import threading
8
+
9
+ # Fix Windows console encoding for Unicode characters (emojis in logs)
10
+ if sys.platform == "win32":
11
+ if hasattr(sys.stdout, "reconfigure"):
12
+ sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
13
+ if hasattr(sys.stderr, "reconfigure"):
14
+ sys.stderr.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
15
+ os.environ["PYTHONIOENCODING"] = "utf-8"
16
+
17
+ try:
18
+ import colorama
19
+
20
+ colorama.init(strip=False, convert=True, wrap=True)
21
+ except ImportError:
22
+ pass
23
+
24
+
25
+ from fastmcp import FastMCP # noqa: E402
26
+
27
+ from minitap.mcp.core.agents import agent
28
+ from minitap.mcp.core.config import settings # noqa: E402
29
+ from minitap.mcp.core.device import (
30
+ DeviceInfo, # noqa: E402
31
+ list_available_devices, # noqa: E402; noqa: E402
32
+ )
33
+ from minitap.mcp.server.middleware import MaestroCheckerMiddleware
34
+ from minitap.mcp.server.poller import device_health_poller
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ mcp = FastMCP(
40
+ name="mobile-use-mcp",
41
+ instructions="""
42
+ This server provides analysis tools for connected
43
+ mobile devices (iOS or Android).
44
+ Call get_available_devices() to list them.
45
+ """,
46
+ )
47
+
48
+ from minitap.mcp.tools import ( # noqa: E402, F401
49
+ analyze_screen,
50
+ execute_mobile_command,
51
+ go_back,
52
+ )
53
+
54
+
55
+ @mcp.resource("data://devices")
56
+ def get_available_devices() -> list[DeviceInfo]:
57
+ """Provides a list of connected mobile devices (iOS or Android)."""
58
+ return list_available_devices()
59
+
60
+
61
+ def mcp_lifespan(**mcp_run_kwargs):
62
+ mcp.add_middleware(MaestroCheckerMiddleware(agent))
63
+
64
+ # Start device health poller in background
65
+ logger.info("Device health poller started")
66
+ stop_event = threading.Event()
67
+ poller_thread = threading.Thread(
68
+ target=device_health_poller,
69
+ args=(
70
+ stop_event,
71
+ agent,
72
+ ),
73
+ )
74
+ poller_thread.start()
75
+
76
+ try:
77
+ mcp.run(**mcp_run_kwargs)
78
+ except KeyboardInterrupt:
79
+ pass
80
+
81
+ # Stop device health poller
82
+ stop_event.set()
83
+ logger.info("Device health poller stopping...")
84
+ poller_thread.join()
85
+ logger.info("Device health poller stopped")
86
+
87
+
88
+ def main() -> None:
89
+ """Main entry point for the MCP server."""
90
+ parser = argparse.ArgumentParser(description="Mobile Use MCP Server")
91
+ parser.add_argument(
92
+ "--server",
93
+ action="store_true",
94
+ help="Run as network server (uses MCP_SERVER_HOST and MCP_SERVER_PORT from env)",
95
+ )
96
+
97
+ args = parser.parse_args()
98
+
99
+ # Run MCP server with optional host/port for remote access
100
+ if args.server:
101
+ logger.info(f"Starting MCP server on {settings.MCP_SERVER_HOST}:{settings.MCP_SERVER_PORT}")
102
+ mcp_lifespan(
103
+ transport="http",
104
+ host=settings.MCP_SERVER_HOST,
105
+ port=settings.MCP_SERVER_PORT,
106
+ )
107
+ else:
108
+ logger.info("Starting MCP server in local mode")
109
+ mcp_lifespan()
@@ -0,0 +1,23 @@
1
+ from fastmcp.exceptions import ToolError
2
+ from fastmcp.server.middleware import Middleware, MiddlewareContext
3
+
4
+ from minitap.mobile_use.sdk import Agent
5
+
6
+
7
+ class MaestroCheckerMiddleware(Middleware):
8
+ def __init__(self, agent: Agent):
9
+ self.agent = agent
10
+
11
+ async def on_call_tool(self, context: MiddlewareContext, call_next):
12
+ if context.fastmcp_context:
13
+ try:
14
+ tool = await context.fastmcp_context.fastmcp.get_tool(context.message.name)
15
+ if "requires-maestro" in tool.tags:
16
+ if not self.agent.is_healthy():
17
+ raise ToolError(
18
+ "Maestro not healthy.\n"
19
+ "Make sure a mobile device is connected and try again."
20
+ )
21
+ except Exception:
22
+ pass
23
+ return await call_next(context)
@@ -0,0 +1,38 @@
1
+ """Device health monitoring poller for the MCP server."""
2
+
3
+ import logging
4
+ import time
5
+ import threading
6
+
7
+ from minitap.mcp.core.device import list_available_devices
8
+ from minitap.mobile_use.sdk import Agent
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def device_health_poller(stop_event: threading.Event, agent: Agent) -> None:
14
+ """
15
+ Background poller that monitors device availability and agent health.
16
+ Runs every 5 seconds to ensure a device is connected and the agent is healthy.
17
+
18
+ Args:
19
+ agent: The Agent instance to monitor and reinitialize if needed.
20
+ """
21
+ while not stop_event.is_set():
22
+ try:
23
+ time.sleep(5)
24
+
25
+ devices = list_available_devices()
26
+
27
+ if len(devices) > 0:
28
+ if not agent.is_healthy():
29
+ logger.warning("Agent is not healthy. Reinitializing...")
30
+ agent.clean(force=True)
31
+ agent.init()
32
+ logger.info("Agent reinitialized successfully")
33
+ else:
34
+ logger.info("No mobile device found, retrying in 5 seconds...")
35
+
36
+ except Exception as e:
37
+ logger.error(f"Error in device health poller: {e}")
38
+ agent.clean(force=True)
@@ -0,0 +1,58 @@
1
+ from pathlib import Path
2
+ from jinja2 import Template
3
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
4
+ from uuid import uuid4
5
+
6
+ from pydantic import Field
7
+
8
+ from minitap.mcp.core.config import settings
9
+ from minitap.mcp.core.decorators import handle_tool_errors
10
+ from minitap.mcp.core.device import capture_screenshot, find_mobile_device
11
+ from minitap.mcp.core.llm import get_minitap_llm
12
+ from minitap.mcp.core.utils import compress_base64_jpeg, get_screenshot_message_for_llm
13
+ from minitap.mcp.main import mcp
14
+
15
+
16
+ @mcp.tool(
17
+ name="analyze_screen",
18
+ description="""
19
+ Analyze what is shown on the mobile device screen.
20
+ This tool takes a screenshot file path and uses a vision-capable LLM
21
+ to analyze and describe what's on the screen. Useful for understanding
22
+ UI elements, extracting text, or identifying specific features.
23
+ """,
24
+ )
25
+ @handle_tool_errors
26
+ async def analyze_screen(
27
+ prompt: str = Field(
28
+ description="Prompt for the analysis.",
29
+ ),
30
+ device_id: str | None = Field(
31
+ default=None,
32
+ description="ID of the device screen to analyze. "
33
+ "If not provided, the first available device is taken.",
34
+ ),
35
+ ) -> str | list | dict:
36
+ system_message = Template(
37
+ Path(__file__).parent.joinpath("screen_analyzer.md").read_text(encoding="utf-8")
38
+ ).render()
39
+
40
+ # Find the device and capture screenshot
41
+ device = find_mobile_device(device_id=device_id)
42
+ screenshot_base64 = capture_screenshot(device)
43
+ compressed_image_base64 = compress_base64_jpeg(screenshot_base64)
44
+
45
+ messages: list[BaseMessage] = [
46
+ SystemMessage(content=system_message),
47
+ get_screenshot_message_for_llm(compressed_image_base64),
48
+ HumanMessage(content=prompt),
49
+ ]
50
+
51
+ llm = get_minitap_llm(
52
+ trace_id=str(uuid4()),
53
+ remote_tracing=True,
54
+ model=settings.VISION_MODEL,
55
+ temperature=1,
56
+ )
57
+ response = await llm.ainvoke(messages)
58
+ return response.content
@@ -0,0 +1,64 @@
1
+ """Tool for running manual tasks on a connected mobile device."""
2
+
3
+ from collections.abc import Mapping
4
+ from typing import Any
5
+
6
+ from minitap.mobile_use.sdk.types import ManualTaskConfig
7
+ from minitap.mobile_use.sdk.types.task import PlatformTaskRequest
8
+ from pydantic import Field
9
+
10
+ from minitap.mcp.core.agents import agent
11
+ from minitap.mcp.core.decorators import handle_tool_errors
12
+ from minitap.mcp.main import mcp
13
+
14
+
15
+ def _serialize_result(result: Any) -> Any:
16
+ """Convert SDK responses to serializable data for MCP."""
17
+ if hasattr(result, "model_dump"):
18
+ return result.model_dump()
19
+ if hasattr(result, "dict"):
20
+ return result.dict()
21
+ if isinstance(result, Mapping):
22
+ return dict(result)
23
+ return result
24
+
25
+
26
+ @mcp.tool(
27
+ name="execute_mobile_command",
28
+ tags={"requires-maestro"},
29
+ description="""
30
+ Execute a natural language command on a mobile device using the Minitap SDK.
31
+ This tool allows you to control your Android or iOS device using natural language.
32
+ Examples:
33
+ - "Open the settings app and tell me the battery level"
34
+ - "Find the first 3 unread emails in Gmail"
35
+ - "Take a screenshot and save it"
36
+
37
+ The tool uses the Minitap platform with API key authentication.
38
+ Set MINITAP_API_KEY and MINITAP_API_BASE_URL environment variables.
39
+ Visit https://platform.minitap.ai to get your API key.
40
+ """,
41
+ )
42
+ @handle_tool_errors
43
+ async def execute_mobile_command(
44
+ goal: str = Field(description="High-level goal describing the action to perform."),
45
+ output_description: str | None = Field(
46
+ default=None,
47
+ description="Optional description of the expected output format. "
48
+ "For example: 'A JSON array with sender and subject for each email' "
49
+ "or 'The battery percentage as a number'.",
50
+ ),
51
+ profile: str = Field(
52
+ default="default",
53
+ description="Name of the profile to use for this task. Defaults to 'default'.",
54
+ ),
55
+ ) -> str | dict[str, Any]:
56
+ """Run a manual task on a mobile device via the Minitap platform."""
57
+ try:
58
+ request = PlatformTaskRequest(
59
+ task=ManualTaskConfig(goal=goal, output_description=output_description),
60
+ )
61
+ result = await agent.run_task(request=request)
62
+ return _serialize_result(result)
63
+ finally:
64
+ agent.clean()
@@ -0,0 +1,42 @@
1
+ import requests
2
+
3
+ from minitap.mcp.core.decorators import handle_tool_errors
4
+ from minitap.mcp.main import mcp
5
+
6
+
7
+ @mcp.tool(
8
+ name="go_back",
9
+ tags={"requires-maestro"},
10
+ description="""
11
+ Sends a 'back' command to the mobile device automation server.
12
+ """,
13
+ )
14
+ @handle_tool_errors
15
+ async def go_back() -> str:
16
+ """Send a back command to the mobile device."""
17
+ try:
18
+ response = requests.post(
19
+ "http://localhost:9999/api/run-command",
20
+ headers={
21
+ "User-Agent": "python-requests/2.32.4",
22
+ "Accept-Encoding": "gzip, deflate, zstd",
23
+ "Accept": "*/*",
24
+ "Connection": "keep-alive",
25
+ "Content-Type": "application/json",
26
+ },
27
+ json={"yaml": "back\n"},
28
+ timeout=30,
29
+ )
30
+
31
+ if response.status_code == 200:
32
+ return f"Successfully sent back command. Response: {response.text}"
33
+ else:
34
+ return (
35
+ f"Failed to send back command. "
36
+ f"Status code: {response.status_code}, Response: {response.text}"
37
+ )
38
+
39
+ except requests.exceptions.RequestException as e:
40
+ return f"Error sending back command: {str(e)}"
41
+ except Exception as e:
42
+ return f"Unexpected error: {str(e)}"
@@ -0,0 +1,17 @@
1
+ You are given:
2
+
3
+ 1. A screenshot of a mobile device.
4
+ 2. A prompt describing what information to extract.
5
+
6
+ Your task:
7
+
8
+ - Look at the screenshot and **answer the prompt directly and completely**.
9
+ - Provide a **detailed, structured description** of the relevant content (text, layout, icons, menus, timestamps, notifications, etc.).
10
+ - If the prompt asks for specific data, extract it exactly as shown.
11
+ - If the screenshot contains structured information (e.g., receipt, chat, settings), present it clearly using lists or tables.
12
+ - Do not guess — if something is unclear or missing, state that explicitly.
13
+
14
+ **Output format:**
15
+
16
+ 1. **Direct answer** to the prompt.
17
+ 2. **Detailed breakdown** of the screenshot content supporting the answer.
@@ -0,0 +1,348 @@
1
+ Metadata-Version: 2.3
2
+ Name: minitap-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server for mobile-use
5
+ Author: Pierre-Louis Favreau, Jean-Pierre Lo, Clément Guiguet
6
+ Requires-Dist: fastmcp>=2.12.4
7
+ Requires-Dist: python-dotenv>=1.1.1
8
+ Requires-Dist: pydantic>=2.12.0
9
+ Requires-Dist: pydantic-settings>=2.10.1
10
+ Requires-Dist: minitap-mobile-use>=2.5.3
11
+ Requires-Dist: jinja2>=3.1.6
12
+ Requires-Dist: langchain-core>=0.3.75
13
+ Requires-Dist: ruff==0.5.3 ; extra == 'dev'
14
+ Requires-Dist: pytest==8.4.1 ; extra == 'dev'
15
+ Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
16
+ Requires-Python: >=3.12
17
+ Project-URL: Homepage, https://minitap.ai/
18
+ Project-URL: Source, https://github.com/minitap-ai/mobile-use
19
+ Provides-Extra: dev
20
+ Description-Content-Type: text/markdown
21
+
22
+ # Mobile-Use MCP Server
23
+
24
+ A Model Context Protocol (MCP) server that provides AI-powered mobile device screen analysis. Automatically detects connected Android (via ADB) and iOS devices (via xcrun), captures screenshots, and analyzes them using vision language models.
25
+
26
+ ## Features
27
+
28
+ - **🔍 Device Discovery**: Automatically finds connected Android devices (ADB) and iOS simulators (xcrun)
29
+ - **📱 Screen Analysis**: Capture and analyze device screenshots using vision-capable LLMs
30
+ - **🤖 Natural Language Control**: Execute commands on your device using natural language via the mobile-use SDK
31
+ - **🚀 Easy Integration**: Built with FastMCP for seamless MCP protocol implementation
32
+ - **⚙️ Flexible Configuration**: Uses Minitap API with support for various vision models
33
+
34
+ ## Installation
35
+
36
+ ### Prerequisites
37
+
38
+ - **Python 3.12+**
39
+ - **uv** (recommended) or pip
40
+ - **For Android**: ADB installed and accessible
41
+ - **For iOS**: Xcode Command Line Tools (macOS only)
42
+ - **Minitap API Key** - Get one at [platform.minitap.ai](https://platform.minitap.ai)
43
+
44
+ ### Setup
45
+
46
+ 1. **Clone and navigate to the project:**
47
+
48
+ ```bash
49
+ cd minitap-mcp
50
+ ```
51
+
52
+ 2. **Install dependencies:**
53
+
54
+ ```bash
55
+ # Create a virtual environment
56
+ uv venv
57
+ source .venv/bin/activate
58
+
59
+ # Install dependencies
60
+ uv sync
61
+ ```
62
+
63
+ 3. **Configure for MCP usage:**
64
+
65
+ The MCP server is configured via environment variables passed from your MCP client (e.g., Windsurf).
66
+
67
+ Required environment variable:
68
+ - `MINITAP_API_KEY`: Your Minitap API key
69
+
70
+ Optional environment variables:
71
+ - `MINITAP_API_BASE_URL`: API base URL (default: `https://platform.minitap.ai/api/v1`)
72
+ - `VISION_MODEL`: Vision model to use (default: `baidu/ernie-4.5-vl-28b-a3b`)
73
+ - `ADB_SERVER_SOCKET`: Custom ADB server socket (format: `tcp:host:port`)
74
+
75
+ ## Available Resources & Tools
76
+
77
+ ### Resource: `data://devices`
78
+
79
+ Lists all connected mobile devices (Android and iOS).
80
+
81
+ **Returns:** Array of device information objects with:
82
+ - `device_id`: Device serial (Android) or UDID (iOS)
83
+ - `platform`: `"android"` or `"ios"`
84
+ - `name`: Device name
85
+ - `state`: Device state (`"connected"` or `"Booted"`)
86
+
87
+ ### Tool: `analyze_screen`
88
+
89
+ Captures a screenshot from a mobile device and analyzes it using a vision language model.
90
+
91
+ **Parameters:**
92
+ - `prompt` (required): Analysis prompt describing what information to extract
93
+ - `device_id` (optional): Specific device ID to target. If not provided, uses the first available device.
94
+
95
+ **Returns:** AI-generated analysis of the screenshot based on the prompt.
96
+
97
+ **Example:**
98
+ ```
99
+ Prompt: "What app is currently open? List all visible UI elements."
100
+ ```
101
+
102
+ The tool will:
103
+ 1. Find the specified device (or first available)
104
+ 2. Capture a screenshot
105
+ 3. Analyze it with the vision model
106
+ 4. Return the analysis
107
+
108
+ ### Tool: `execute_mobile_command`
109
+
110
+ Execute natural language commands on your mobile device using the mobile-use SDK. This tool allows you to control your Android or iOS device with simple instructions.
111
+
112
+ **Parameters:**
113
+ - `goal` (required): Natural language command to execute on the device
114
+ - `output_description` (optional): Description of the expected output format (e.g., "A JSON list of objects with sender and subject keys")
115
+ - `profile` (optional): Name of the profile to use for this task. Defaults to 'default'
116
+
117
+ **Returns:** Execution result with status, output, and any extracted data.
118
+
119
+ **Examples:**
120
+ ```python
121
+ # Simple command
122
+ goal: "Go to settings and tell me my current battery level"
123
+
124
+ # Data extraction with structured output
125
+ goal: "Open Gmail, find first 3 unread emails, and list their sender and subject line"
126
+ output_description: "A JSON list of objects, each with 'sender' and 'subject' keys"
127
+
128
+ # App navigation
129
+ goal: "Open Twitter and scroll to the latest tweet"
130
+ ```
131
+
132
+ The tool will:
133
+ 1. Find the specified device (or first available)
134
+ 2. Execute the command using the mobile-use AI agent
135
+ 3. Return the result or extracted data
136
+
137
+ ## Usage
138
+
139
+ ### Running the MCP Server
140
+
141
+ #### Local Mode (Default)
142
+
143
+ The MCP server is typically started by your MCP client (e.g., Windsurf). For manual testing:
144
+
145
+ ```bash
146
+ minitap-mcp
147
+ ```
148
+
149
+ #### Network Server Mode
150
+
151
+ You can run the MCP server as a network server for remote access:
152
+
153
+ ```bash
154
+ # Run as network server (uses MCP_SERVER_HOST and MCP_SERVER_PORT from env)
155
+ minitap-mcp --server
156
+ ```
157
+
158
+ The server will bind to the host and port specified in your environment variables:
159
+ - `MCP_SERVER_HOST` (default: `0.0.0.0`)
160
+ - `MCP_SERVER_PORT` (default: `8000`)
161
+
162
+ Configure these in your `.env` file or via environment variables to customize the binding address.
163
+
164
+ Inside Windsurf, you can configure the MCP server by adding the following to your `~/.codeium/windsurf/mcp_settings.json` file:
165
+
166
+ ```json
167
+ {
168
+ "mcpServers": {
169
+ "minitap-mcp": {
170
+ "serverUrl": "http://localhost:8000/mcp"
171
+ }
172
+ }
173
+ }
174
+ ```
175
+
176
+ N.B. You may need to change the port based on what you've configured in your `.env` file.
177
+
178
+ ## Development
179
+
180
+ ### Quick Testing
181
+
182
+ Test device detection and screenshot capture (no API key required):
183
+
184
+ ```bash
185
+ python tests/test_devices.py
186
+ ```
187
+
188
+ Test the complete MCP flow with LLM analysis (requires API key):
189
+
190
+ ```bash
191
+ cp .env.example .env
192
+ # Edit .env and add your MINITAP_API_KEY
193
+ python tests/test_mcp.py
194
+ ```
195
+
196
+ ### Code Quality
197
+
198
+ **Format code:**
199
+ ```bash
200
+ ruff format .
201
+ ```
202
+
203
+ **Lint:**
204
+ ```bash
205
+ ruff check --fix
206
+ ```
207
+
208
+ ## Project Structure
209
+
210
+ ```
211
+ minitap/mcp/
212
+ ├── __init__.py
213
+ ├── main.py # FastMCP server entry point
214
+ ├── core/
215
+ │ ├── __init__.py
216
+ │ ├── config.py # Pydantic settings configuration
217
+ │ ├── decorators.py # Error handling decorators
218
+ │ ├── device.py # Device discovery & screenshot capture
219
+ │ ├── llm.py # LLM client initialization
220
+ │ └── utils.py # Utility functions (image compression, etc.)
221
+ └── tools/
222
+ ├── __init__.py
223
+ ├── analyze_screen.py # Screen analysis tool
224
+ ├── execute_mobile_command.py # Mobile-use SDK integration tool
225
+ └── screen_analyzer.md # System prompt for analysis
226
+
227
+ tests/
228
+ ├── test_devices.py # Device detection tests
229
+ └── test_mcp.py # Full MCP integration tests
230
+ ```
231
+
232
+ ## Creating New Tools
233
+
234
+ When adding new MCP tools, use the `@handle_tool_errors` decorator to prevent unhandled exceptions from causing infinite loops:
235
+
236
+ ```python
237
+ from minitap.mcp.core.decorators import handle_tool_errors
238
+ from minitap.mcp.main import mcp
239
+
240
+ @mcp.tool(name="my_tool", description="...")
241
+ @handle_tool_errors # Add this decorator
242
+ async def my_tool(param: str) -> str:
243
+ # Your tool logic here
244
+ # Any exception will be caught and returned as an error message
245
+ return "result"
246
+ ```
247
+
248
+ The decorator automatically:
249
+ - Catches all exceptions (including `DeviceNotFoundError`)
250
+ - Returns user-friendly error messages
251
+ - Prevents the MCP server from hanging or looping infinitely
252
+ - Works with both sync and async functions
253
+
254
+ ## Integration with Windsurf
255
+
256
+ To use this MCP server in Windsurf, add it to your MCP settings:
257
+
258
+ **Location:** `~/.codeium/windsurf/mcp_settings.json`
259
+
260
+ **Configuration:**
261
+
262
+ ```json
263
+ {
264
+ "mcpServers": {
265
+ "minitap-mcp": {
266
+ "command": "uv",
267
+ "args": ["-c", "cd /path/to/minitap-mcp && source .venv/bin/activate && uv sync && minitap-mcp"],
268
+ "env": {
269
+ "MINITAP_API_KEY": "your_minitap_api_key_here",
270
+ "MINITAP_API_BASE_URL": "https://platform.minitap.ai/api/v1",
271
+ "VISION_MODEL": "baidu/ernie-4.5-vl-28b-a3b" // optional
272
+ }
273
+ }
274
+ }
275
+ }
276
+ ```
277
+
278
+ **After configuration:**
279
+ 1. Restart Windsurf
280
+ 2. The `analyze_screen` and `execute_mobile_command` tools will be available in Cascade
281
+ 3. The `data://devices` resource will list connected devices
282
+
283
+ ### Available Vision Models
284
+
285
+ The Minitap API supports various vision models:
286
+ - `qwen/qwen-2.5-vl-7b-instruct` (default)
287
+ - `baidu/ernie-4.5-vl-28b-a3b`
288
+ - `openai/gpt-4o`
289
+ - And more - check the Minitap platform for the full list
290
+
291
+ ## Device Requirements
292
+
293
+ ### Android Devices
294
+
295
+ **Requirements:**
296
+ - ADB installed and in PATH
297
+ - USB debugging enabled on the device
298
+ - Device connected via USB or network ADB
299
+
300
+ **Verify connection:**
301
+ ```bash
302
+ adb devices
303
+ ```
304
+
305
+ **Custom ADB Server:**
306
+ If using a custom ADB server (e.g., Docker, WSL), set the socket:
307
+ ```bash
308
+ export ADB_SERVER_SOCKET="tcp:localhost:5037"
309
+ ```
310
+
311
+ N.B. You may need to reboot your IDE
312
+
313
+ ### iOS Devices
314
+
315
+ **Requirements:**
316
+ - macOS with Xcode Command Line Tools
317
+ - iOS Simulator running
318
+
319
+ **Verify simulators:**
320
+ ```bash
321
+ xcrun simctl list devices booted
322
+ ```
323
+
324
+ **Start a simulator:**
325
+ ```bash
326
+ open -a Simulator
327
+ ```
328
+
329
+ ## Troubleshooting
330
+
331
+ ### No devices found
332
+
333
+ 1. **Android:** Run `adb devices` to verify device connection
334
+ 2. **iOS:** Run `xcrun simctl list devices booted` to check running simulators
335
+ 3. Ensure USB debugging is enabled (Android)
336
+ 4. Try restarting ADB: `adb kill-server && adb start-server`
337
+
338
+ ### Screenshot capture fails
339
+
340
+ 1. Ensure device screen is unlocked
341
+ 2. For Android, verify screencap permission
342
+ 3. For iOS, ensure simulator is fully booted
343
+
344
+ ### Tool not detected in Windsurf
345
+
346
+ 1. Verify the import in `main.py` includes the tools module
347
+ 2. Check that `tools/__init__.py` exists
348
+ 3. Restart Windsurf after configuration changes
@@ -0,0 +1,18 @@
1
+ minitap/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ minitap/mcp/core/agents.py,sha256=aRqHA1r8ejWOGlTbku_TXv6WV8Vucp5LP4puhEit7tc,573
3
+ minitap/mcp/core/config.py,sha256=kaIlb_4AQ3DNwtcLDZF528cgrtj5LGcCFNdHZfEwrUo,820
4
+ minitap/mcp/core/decorators.py,sha256=iekv181o_rkv0upacFWkmPqxsZRTzuLFyOZ0sIDtQnQ,1317
5
+ minitap/mcp/core/device.py,sha256=sEO3Z-8F325hDOObdH1YBhZE60f17FmIclt5UlhY_nU,7875
6
+ minitap/mcp/core/llm.py,sha256=z_pYZkZcAchsiWPh4W79frQPANsfYyFPUe8DJo8lZO0,822
7
+ minitap/mcp/core/utils.py,sha256=3uExpRoh7affIieZx3TLlZTmZCcoxWfx1YpPbwhjiJY,1791
8
+ minitap/mcp/main.py,sha256=4ytnVMd7yzk7I9MnFsPB8U9b9bMyhVf6Yq623pYccj0,3060
9
+ minitap/mcp/server/middleware.py,sha256=fbry_IiHmwUxVjsWgOU2goybcS1kLRXFZZ89KPH1d8E,880
10
+ minitap/mcp/server/poller.py,sha256=C2h5Ir3nY5gZ6qTDOHBw_Tb8PfAY54A-we2HrwjNLvg,1222
11
+ minitap/mcp/tools/analyze_screen.py,sha256=fjcjf3tTZDlxzmiQFHFNgw38bxPz4eisw57zuxshN2A,1984
12
+ minitap/mcp/tools/execute_mobile_command.py,sha256=fpmr0LnV7DDEiHwDDrDVZ-SaoVUmReZWH6sRBahPWes,2320
13
+ minitap/mcp/tools/go_back.py,sha256=lEmADkDkXu8JGm-sY7zL7M6GlBy-lD7Iffv4yzwoQfo,1301
14
+ minitap/mcp/tools/screen_analyzer.md,sha256=TTO80JQWusbA9cKAZn-9cqhgVHm6F_qJh5w152hG3YM,734
15
+ minitap_mcp-0.1.0.dist-info/WHEEL,sha256=X16MKk8bp2DRsAuyteHJ-9qOjzmnY0x1aj0P1ftqqWA,78
16
+ minitap_mcp-0.1.0.dist-info/entry_points.txt,sha256=rYVoXm7tSQCqQTtHx4Lovgn1YsjwtEEHfddKrfEVHuY,55
17
+ minitap_mcp-0.1.0.dist-info/METADATA,sha256=DB6OoQCvyqHD05Xks_splNrQzhKj32MO8mvf2KaWnmw,9920
18
+ minitap_mcp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.2
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ minitap-mcp = minitap.mcp.main:main
3
+