minitap-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mcp/__init__.py +0 -0
- minitap/mcp/core/agents.py +19 -0
- minitap/mcp/core/config.py +27 -0
- minitap/mcp/core/decorators.py +42 -0
- minitap/mcp/core/device.py +242 -0
- minitap/mcp/core/llm.py +28 -0
- minitap/mcp/core/utils.py +55 -0
- minitap/mcp/main.py +109 -0
- minitap/mcp/server/middleware.py +23 -0
- minitap/mcp/server/poller.py +38 -0
- minitap/mcp/tools/analyze_screen.py +58 -0
- minitap/mcp/tools/execute_mobile_command.py +64 -0
- minitap/mcp/tools/go_back.py +42 -0
- minitap/mcp/tools/screen_analyzer.md +17 -0
- minitap_mcp-0.1.0.dist-info/METADATA +348 -0
- minitap_mcp-0.1.0.dist-info/RECORD +18 -0
- minitap_mcp-0.1.0.dist-info/WHEEL +4 -0
- minitap_mcp-0.1.0.dist-info/entry_points.txt +3 -0
minitap/mcp/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.sdk import Agent
|
|
4
|
+
from minitap.mobile_use.sdk.builders import Builders
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_mobile_use_agent():
|
|
8
|
+
config = Builders.AgentConfig
|
|
9
|
+
custom_adb_socket = os.getenv("ADB_SERVER_SOCKET")
|
|
10
|
+
if custom_adb_socket:
|
|
11
|
+
parts = custom_adb_socket.split(":")
|
|
12
|
+
if len(parts) != 3:
|
|
13
|
+
raise ValueError(f"Invalid ADB server socket: {custom_adb_socket}")
|
|
14
|
+
_, host, port = parts
|
|
15
|
+
config = config.with_adb_server(host=host, port=int(port))
|
|
16
|
+
return Agent(config=config.build())
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
agent = get_mobile_use_agent()
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Configuration for the MCP server."""
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pydantic import Field, SecretStr
|
|
5
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
6
|
+
|
|
7
|
+
# Load environment variables from .env file
|
|
8
|
+
load_dotenv(verbose=True)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MCPSettings(BaseSettings):
|
|
12
|
+
"""Configuration class for MCP server."""
|
|
13
|
+
|
|
14
|
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
|
15
|
+
|
|
16
|
+
# Minitap API configuration
|
|
17
|
+
MINITAP_API_KEY: SecretStr
|
|
18
|
+
MINITAP_API_BASE_URL: str = Field(default="https://platform.minitap.ai/api/v1")
|
|
19
|
+
|
|
20
|
+
VISION_MODEL: str = Field(default="qwen/qwen-2.5-vl-7b-instruct")
|
|
21
|
+
|
|
22
|
+
# MCP server configuration (optional, for remote access)
|
|
23
|
+
MCP_SERVER_HOST: str = Field(default="0.0.0.0")
|
|
24
|
+
MCP_SERVER_PORT: int = Field(default=8000)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
settings = MCPSettings() # type: ignore
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Decorators for MCP tools."""
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from typing import Any, TypeVar
|
|
7
|
+
|
|
8
|
+
from minitap.mcp.core.device import DeviceNotFoundError
|
|
9
|
+
|
|
10
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def handle_tool_errors[T: Callable[..., Any]](func: T) -> T:
|
|
14
|
+
"""
|
|
15
|
+
Decorator that catches all exceptions in MCP tools and returns error messages.
|
|
16
|
+
|
|
17
|
+
This prevents unhandled exceptions from causing infinite loops in the MCP server.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@wraps(func)
|
|
21
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
22
|
+
try:
|
|
23
|
+
return await func(*args, **kwargs)
|
|
24
|
+
except DeviceNotFoundError as e:
|
|
25
|
+
return f"Error: {str(e)}"
|
|
26
|
+
except Exception as e:
|
|
27
|
+
return f"Error in {func.__name__}: {type(e).__name__}: {str(e)}"
|
|
28
|
+
|
|
29
|
+
@wraps(func)
|
|
30
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
31
|
+
try:
|
|
32
|
+
return func(*args, **kwargs)
|
|
33
|
+
except DeviceNotFoundError as e:
|
|
34
|
+
return f"Error: {str(e)}"
|
|
35
|
+
except Exception as e:
|
|
36
|
+
return f"Error in {func.__name__}: {type(e).__name__}: {str(e)}"
|
|
37
|
+
|
|
38
|
+
# Check if the function is async
|
|
39
|
+
if inspect.iscoroutinefunction(func):
|
|
40
|
+
return async_wrapper # type: ignore
|
|
41
|
+
else:
|
|
42
|
+
return sync_wrapper # type: ignore
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Device detection and screenshot utilities for Android and iOS devices."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import tempfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
from adbutils import AdbClient, AdbDevice
|
|
12
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
DevicePlatform = Literal["android", "ios"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MobileDevice(BaseModel):
|
|
19
|
+
"""Represents a mobile device with its platform and connection details."""
|
|
20
|
+
|
|
21
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
22
|
+
|
|
23
|
+
device_id: str
|
|
24
|
+
platform: DevicePlatform
|
|
25
|
+
adb_device: AdbDevice | None = None # Only for Android
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DeviceInfo(BaseModel):
|
|
29
|
+
"""Serializable device information."""
|
|
30
|
+
|
|
31
|
+
device_id: str
|
|
32
|
+
platform: DevicePlatform
|
|
33
|
+
name: str | None = None
|
|
34
|
+
state: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DeviceNotFoundError(Exception):
|
|
38
|
+
"""Raised when no device can be found."""
|
|
39
|
+
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_adb_client() -> AdbClient:
|
|
44
|
+
"""Get an ADB client instance."""
|
|
45
|
+
custom_adb_socket = os.getenv("ADB_SERVER_SOCKET")
|
|
46
|
+
if not custom_adb_socket:
|
|
47
|
+
return AdbClient()
|
|
48
|
+
parts = custom_adb_socket.split(":")
|
|
49
|
+
if len(parts) != 3:
|
|
50
|
+
raise ValueError(f"Invalid ADB server socket: {custom_adb_socket}")
|
|
51
|
+
_, host, port = parts
|
|
52
|
+
return AdbClient(host=host, port=int(port))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def list_available_devices() -> list[DeviceInfo]:
|
|
56
|
+
"""
|
|
57
|
+
List all available mobile devices (Android and iOS).
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
list[DeviceInfo]: A list of device information objects.
|
|
61
|
+
"""
|
|
62
|
+
devices: list[DeviceInfo] = []
|
|
63
|
+
|
|
64
|
+
# List Android devices
|
|
65
|
+
try:
|
|
66
|
+
adb_client = get_adb_client()
|
|
67
|
+
android_devices = adb_client.device_list()
|
|
68
|
+
|
|
69
|
+
for device in android_devices:
|
|
70
|
+
if device.serial:
|
|
71
|
+
devices.append(
|
|
72
|
+
DeviceInfo(
|
|
73
|
+
device_id=device.serial,
|
|
74
|
+
platform="android",
|
|
75
|
+
name=device.serial,
|
|
76
|
+
state="connected",
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
except Exception:
|
|
80
|
+
# ADB not available or error listing devices
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
# List iOS devices
|
|
84
|
+
try:
|
|
85
|
+
cmd = ["xcrun", "simctl", "list", "devices", "-j"]
|
|
86
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
87
|
+
data = json.loads(result.stdout)
|
|
88
|
+
|
|
89
|
+
for runtime, ios_devices in data.get("devices", {}).items():
|
|
90
|
+
if "iOS" not in runtime:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
for device in ios_devices:
|
|
94
|
+
udid = device.get("udid")
|
|
95
|
+
name = device.get("name")
|
|
96
|
+
state = device.get("state")
|
|
97
|
+
|
|
98
|
+
if udid:
|
|
99
|
+
devices.append(
|
|
100
|
+
DeviceInfo(
|
|
101
|
+
device_id=udid,
|
|
102
|
+
platform="ios",
|
|
103
|
+
name=name,
|
|
104
|
+
state=state,
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
except (subprocess.CalledProcessError, FileNotFoundError, json.JSONDecodeError):
|
|
108
|
+
# xcrun not available or error listing devices
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
return devices
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def find_mobile_device(device_id: str | None = None) -> MobileDevice:
|
|
115
|
+
"""
|
|
116
|
+
Find a mobile device (Android via ADB or iOS via xcrun).
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
device_id: Optional device ID to target a specific device.
|
|
120
|
+
If None, returns the first available device.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
MobileDevice: A reference to the device with its platform information.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
DeviceNotFoundError: If no device is found or the specified device_id is not found.
|
|
127
|
+
"""
|
|
128
|
+
# Get all available devices
|
|
129
|
+
available_devices = list_available_devices()
|
|
130
|
+
|
|
131
|
+
if not available_devices:
|
|
132
|
+
raise DeviceNotFoundError(
|
|
133
|
+
"No mobile device found. "
|
|
134
|
+
"Make sure you have an Android device connected via ADB "
|
|
135
|
+
"or an iOS simulator running."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Find the target device
|
|
139
|
+
target_device = None
|
|
140
|
+
if device_id:
|
|
141
|
+
# Look for specific device
|
|
142
|
+
for dev in available_devices:
|
|
143
|
+
if dev.device_id == device_id:
|
|
144
|
+
target_device = dev
|
|
145
|
+
break
|
|
146
|
+
if not target_device:
|
|
147
|
+
raise DeviceNotFoundError(
|
|
148
|
+
f"Device with ID '{device_id}' not found. "
|
|
149
|
+
"Make sure the device is connected and accessible via adb or xcrun."
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
# Prefer connected/booted devices first
|
|
153
|
+
for dev in available_devices:
|
|
154
|
+
if dev.state in ("connected", "Booted"):
|
|
155
|
+
target_device = dev
|
|
156
|
+
break
|
|
157
|
+
# Fall back to any device if no connected/booted device found
|
|
158
|
+
if not target_device:
|
|
159
|
+
target_device = available_devices[0]
|
|
160
|
+
|
|
161
|
+
# Create MobileDevice instance with platform-specific details
|
|
162
|
+
if target_device.platform == "android":
|
|
163
|
+
# For Android, get the AdbDevice reference
|
|
164
|
+
try:
|
|
165
|
+
adb_client = get_adb_client()
|
|
166
|
+
adb_device = adb_client.device(serial=target_device.device_id)
|
|
167
|
+
return MobileDevice(
|
|
168
|
+
device_id=target_device.device_id,
|
|
169
|
+
platform="android",
|
|
170
|
+
adb_device=adb_device,
|
|
171
|
+
)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
raise DeviceNotFoundError(f"Failed to connect to Android device: {e}")
|
|
174
|
+
else:
|
|
175
|
+
# For iOS, just return the device info
|
|
176
|
+
return MobileDevice(device_id=target_device.device_id, platform="ios")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def capture_screenshot(device: MobileDevice) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Capture a screenshot from the given mobile device.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
device: MobileDevice instance returned by find_mobile_device()
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
str: Base64-encoded screenshot image (PNG format)
|
|
188
|
+
|
|
189
|
+
Raises:
|
|
190
|
+
RuntimeError: If screenshot capture fails
|
|
191
|
+
"""
|
|
192
|
+
if device.platform == "android":
|
|
193
|
+
return _capture_android_screenshot(device)
|
|
194
|
+
else:
|
|
195
|
+
return _capture_ios_screenshot(device)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _capture_android_screenshot(device: MobileDevice) -> str:
|
|
199
|
+
"""Capture screenshot from Android device using ADB."""
|
|
200
|
+
if not device.adb_device:
|
|
201
|
+
# Reconnect to device if not available
|
|
202
|
+
adb_client = get_adb_client()
|
|
203
|
+
adb_device = adb_client.device(serial=device.device_id)
|
|
204
|
+
if not adb_device:
|
|
205
|
+
raise RuntimeError(f"Android device {device.device_id} not found")
|
|
206
|
+
device.adb_device = adb_device
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
# Use ADB screencap to get PNG screenshot
|
|
210
|
+
screenshot_bytes = device.adb_device.shell("screencap -p", encoding=None)
|
|
211
|
+
if isinstance(screenshot_bytes, bytes):
|
|
212
|
+
return base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
213
|
+
else:
|
|
214
|
+
raise RuntimeError("Unexpected screenshot data type from ADB")
|
|
215
|
+
except Exception as e:
|
|
216
|
+
raise RuntimeError(f"Failed to capture Android screenshot: {e}")
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _capture_ios_screenshot(device: MobileDevice) -> str:
|
|
220
|
+
"""Capture screenshot from iOS simulator using xcrun."""
|
|
221
|
+
try:
|
|
222
|
+
# Create temporary file for screenshot
|
|
223
|
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
|
|
224
|
+
tmp_path = Path(tmp_file.name)
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
# Capture screenshot using xcrun simctl
|
|
228
|
+
cmd = ["xcrun", "simctl", "io", device.device_id, "screenshot", str(tmp_path)]
|
|
229
|
+
subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
230
|
+
|
|
231
|
+
# Read and encode the screenshot
|
|
232
|
+
screenshot_bytes = tmp_path.read_bytes()
|
|
233
|
+
return base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
234
|
+
finally:
|
|
235
|
+
# Clean up temporary file
|
|
236
|
+
if tmp_path.exists():
|
|
237
|
+
tmp_path.unlink()
|
|
238
|
+
|
|
239
|
+
except subprocess.CalledProcessError as e:
|
|
240
|
+
raise RuntimeError(f"Failed to capture iOS screenshot: {e.stderr}")
|
|
241
|
+
except Exception as e:
|
|
242
|
+
raise RuntimeError(f"Failed to capture iOS screenshot: {e}")
|
minitap/mcp/core/llm.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from langchain_openai import ChatOpenAI
|
|
2
|
+
|
|
3
|
+
from minitap.mcp.core.config import settings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_minitap_llm(
|
|
7
|
+
trace_id: str,
|
|
8
|
+
remote_tracing: bool = False,
|
|
9
|
+
model: str = "google/gemini-2.5-pro",
|
|
10
|
+
temperature: float | None = None,
|
|
11
|
+
max_retries: int | None = None,
|
|
12
|
+
) -> ChatOpenAI:
|
|
13
|
+
assert settings.MINITAP_API_KEY is not None
|
|
14
|
+
assert settings.MINITAP_API_BASE_URL is not None
|
|
15
|
+
if max_retries is None and model.startswith("google/"):
|
|
16
|
+
max_retries = 2
|
|
17
|
+
client = ChatOpenAI(
|
|
18
|
+
model=model,
|
|
19
|
+
temperature=temperature,
|
|
20
|
+
max_retries=max_retries,
|
|
21
|
+
api_key=settings.MINITAP_API_KEY,
|
|
22
|
+
base_url=settings.MINITAP_API_BASE_URL,
|
|
23
|
+
default_query={
|
|
24
|
+
"sessionId": trace_id,
|
|
25
|
+
"traceOnlyUsage": remote_tracing,
|
|
26
|
+
},
|
|
27
|
+
)
|
|
28
|
+
return client
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from PIL import Image
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
|
|
5
|
+
from langchain_core.messages import HumanMessage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def compress_base64_jpeg(base64_str: str, quality: int = 50) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Compress a base64-encoded image to JPEG format.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
base64_str: Base64-encoded image string
|
|
14
|
+
quality: JPEG quality (0-100, default 50)
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
Base64-encoded JPEG image
|
|
18
|
+
"""
|
|
19
|
+
if base64_str.startswith("data:image"):
|
|
20
|
+
base64_str = base64_str.split(",")[1]
|
|
21
|
+
|
|
22
|
+
image_data = base64.b64decode(base64_str)
|
|
23
|
+
image = Image.open(BytesIO(image_data))
|
|
24
|
+
|
|
25
|
+
# Convert RGBA/LA/PA to RGB (JPEG doesn't support transparency)
|
|
26
|
+
if image.mode in ("RGBA", "LA", "PA"):
|
|
27
|
+
# Create a white background
|
|
28
|
+
background = Image.new("RGB", image.size, (255, 255, 255))
|
|
29
|
+
# Paste the image on the background using alpha channel as mask
|
|
30
|
+
if image.mode == "RGBA":
|
|
31
|
+
background.paste(image, mask=image.split()[3]) # Use alpha channel as mask
|
|
32
|
+
else:
|
|
33
|
+
background.paste(image, mask=image.split()[1]) # Use alpha for LA
|
|
34
|
+
image = background
|
|
35
|
+
elif image.mode != "RGB":
|
|
36
|
+
# Convert any other mode to RGB
|
|
37
|
+
image = image.convert("RGB")
|
|
38
|
+
|
|
39
|
+
compressed_io = BytesIO()
|
|
40
|
+
image.save(compressed_io, format="JPEG", quality=quality, optimize=True)
|
|
41
|
+
|
|
42
|
+
compressed_base64 = base64.b64encode(compressed_io.getvalue()).decode("utf-8")
|
|
43
|
+
return compressed_base64
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_screenshot_message_for_llm(screenshot_base64: str):
|
|
47
|
+
prefix = "" if screenshot_base64.startswith("data:image") else "data:image/jpeg;base64,"
|
|
48
|
+
return HumanMessage(
|
|
49
|
+
content=[
|
|
50
|
+
{
|
|
51
|
+
"type": "image_url",
|
|
52
|
+
"image_url": {"url": f"{prefix}{screenshot_base64}"},
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
)
|
minitap/mcp/main.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""MCP server for mobile-use with screen analysis capabilities."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
|
|
9
|
+
# Fix Windows console encoding for Unicode characters (emojis in logs)
|
|
10
|
+
if sys.platform == "win32":
|
|
11
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
12
|
+
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
|
|
13
|
+
if hasattr(sys.stderr, "reconfigure"):
|
|
14
|
+
sys.stderr.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
|
|
15
|
+
os.environ["PYTHONIOENCODING"] = "utf-8"
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import colorama
|
|
19
|
+
|
|
20
|
+
colorama.init(strip=False, convert=True, wrap=True)
|
|
21
|
+
except ImportError:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from fastmcp import FastMCP # noqa: E402
|
|
26
|
+
|
|
27
|
+
from minitap.mcp.core.agents import agent
|
|
28
|
+
from minitap.mcp.core.config import settings # noqa: E402
|
|
29
|
+
from minitap.mcp.core.device import (
|
|
30
|
+
DeviceInfo, # noqa: E402
|
|
31
|
+
list_available_devices, # noqa: E402; noqa: E402
|
|
32
|
+
)
|
|
33
|
+
from minitap.mcp.server.middleware import MaestroCheckerMiddleware
|
|
34
|
+
from minitap.mcp.server.poller import device_health_poller
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
mcp = FastMCP(
|
|
40
|
+
name="mobile-use-mcp",
|
|
41
|
+
instructions="""
|
|
42
|
+
This server provides analysis tools for connected
|
|
43
|
+
mobile devices (iOS or Android).
|
|
44
|
+
Call get_available_devices() to list them.
|
|
45
|
+
""",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
from minitap.mcp.tools import ( # noqa: E402, F401
|
|
49
|
+
analyze_screen,
|
|
50
|
+
execute_mobile_command,
|
|
51
|
+
go_back,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@mcp.resource("data://devices")
|
|
56
|
+
def get_available_devices() -> list[DeviceInfo]:
|
|
57
|
+
"""Provides a list of connected mobile devices (iOS or Android)."""
|
|
58
|
+
return list_available_devices()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def mcp_lifespan(**mcp_run_kwargs):
|
|
62
|
+
mcp.add_middleware(MaestroCheckerMiddleware(agent))
|
|
63
|
+
|
|
64
|
+
# Start device health poller in background
|
|
65
|
+
logger.info("Device health poller started")
|
|
66
|
+
stop_event = threading.Event()
|
|
67
|
+
poller_thread = threading.Thread(
|
|
68
|
+
target=device_health_poller,
|
|
69
|
+
args=(
|
|
70
|
+
stop_event,
|
|
71
|
+
agent,
|
|
72
|
+
),
|
|
73
|
+
)
|
|
74
|
+
poller_thread.start()
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
mcp.run(**mcp_run_kwargs)
|
|
78
|
+
except KeyboardInterrupt:
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
# Stop device health poller
|
|
82
|
+
stop_event.set()
|
|
83
|
+
logger.info("Device health poller stopping...")
|
|
84
|
+
poller_thread.join()
|
|
85
|
+
logger.info("Device health poller stopped")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def main() -> None:
|
|
89
|
+
"""Main entry point for the MCP server."""
|
|
90
|
+
parser = argparse.ArgumentParser(description="Mobile Use MCP Server")
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--server",
|
|
93
|
+
action="store_true",
|
|
94
|
+
help="Run as network server (uses MCP_SERVER_HOST and MCP_SERVER_PORT from env)",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
args = parser.parse_args()
|
|
98
|
+
|
|
99
|
+
# Run MCP server with optional host/port for remote access
|
|
100
|
+
if args.server:
|
|
101
|
+
logger.info(f"Starting MCP server on {settings.MCP_SERVER_HOST}:{settings.MCP_SERVER_PORT}")
|
|
102
|
+
mcp_lifespan(
|
|
103
|
+
transport="http",
|
|
104
|
+
host=settings.MCP_SERVER_HOST,
|
|
105
|
+
port=settings.MCP_SERVER_PORT,
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
logger.info("Starting MCP server in local mode")
|
|
109
|
+
mcp_lifespan()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from fastmcp.exceptions import ToolError
|
|
2
|
+
from fastmcp.server.middleware import Middleware, MiddlewareContext
|
|
3
|
+
|
|
4
|
+
from minitap.mobile_use.sdk import Agent
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MaestroCheckerMiddleware(Middleware):
|
|
8
|
+
def __init__(self, agent: Agent):
|
|
9
|
+
self.agent = agent
|
|
10
|
+
|
|
11
|
+
async def on_call_tool(self, context: MiddlewareContext, call_next):
|
|
12
|
+
if context.fastmcp_context:
|
|
13
|
+
try:
|
|
14
|
+
tool = await context.fastmcp_context.fastmcp.get_tool(context.message.name)
|
|
15
|
+
if "requires-maestro" in tool.tags:
|
|
16
|
+
if not self.agent.is_healthy():
|
|
17
|
+
raise ToolError(
|
|
18
|
+
"Maestro not healthy.\n"
|
|
19
|
+
"Make sure a mobile device is connected and try again."
|
|
20
|
+
)
|
|
21
|
+
except Exception:
|
|
22
|
+
pass
|
|
23
|
+
return await call_next(context)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Device health monitoring poller for the MCP server."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
from minitap.mcp.core.device import list_available_devices
|
|
8
|
+
from minitap.mobile_use.sdk import Agent
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def device_health_poller(stop_event: threading.Event, agent: Agent) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Background poller that monitors device availability and agent health.
|
|
16
|
+
Runs every 5 seconds to ensure a device is connected and the agent is healthy.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
agent: The Agent instance to monitor and reinitialize if needed.
|
|
20
|
+
"""
|
|
21
|
+
while not stop_event.is_set():
|
|
22
|
+
try:
|
|
23
|
+
time.sleep(5)
|
|
24
|
+
|
|
25
|
+
devices = list_available_devices()
|
|
26
|
+
|
|
27
|
+
if len(devices) > 0:
|
|
28
|
+
if not agent.is_healthy():
|
|
29
|
+
logger.warning("Agent is not healthy. Reinitializing...")
|
|
30
|
+
agent.clean(force=True)
|
|
31
|
+
agent.init()
|
|
32
|
+
logger.info("Agent reinitialized successfully")
|
|
33
|
+
else:
|
|
34
|
+
logger.info("No mobile device found, retrying in 5 seconds...")
|
|
35
|
+
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.error(f"Error in device health poller: {e}")
|
|
38
|
+
agent.clean(force=True)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from jinja2 import Template
|
|
3
|
+
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
|
|
8
|
+
from minitap.mcp.core.config import settings
|
|
9
|
+
from minitap.mcp.core.decorators import handle_tool_errors
|
|
10
|
+
from minitap.mcp.core.device import capture_screenshot, find_mobile_device
|
|
11
|
+
from minitap.mcp.core.llm import get_minitap_llm
|
|
12
|
+
from minitap.mcp.core.utils import compress_base64_jpeg, get_screenshot_message_for_llm
|
|
13
|
+
from minitap.mcp.main import mcp
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@mcp.tool(
|
|
17
|
+
name="analyze_screen",
|
|
18
|
+
description="""
|
|
19
|
+
Analyze what is shown on the mobile device screen.
|
|
20
|
+
This tool takes a screenshot file path and uses a vision-capable LLM
|
|
21
|
+
to analyze and describe what's on the screen. Useful for understanding
|
|
22
|
+
UI elements, extracting text, or identifying specific features.
|
|
23
|
+
""",
|
|
24
|
+
)
|
|
25
|
+
@handle_tool_errors
|
|
26
|
+
async def analyze_screen(
|
|
27
|
+
prompt: str = Field(
|
|
28
|
+
description="Prompt for the analysis.",
|
|
29
|
+
),
|
|
30
|
+
device_id: str | None = Field(
|
|
31
|
+
default=None,
|
|
32
|
+
description="ID of the device screen to analyze. "
|
|
33
|
+
"If not provided, the first available device is taken.",
|
|
34
|
+
),
|
|
35
|
+
) -> str | list | dict:
|
|
36
|
+
system_message = Template(
|
|
37
|
+
Path(__file__).parent.joinpath("screen_analyzer.md").read_text(encoding="utf-8")
|
|
38
|
+
).render()
|
|
39
|
+
|
|
40
|
+
# Find the device and capture screenshot
|
|
41
|
+
device = find_mobile_device(device_id=device_id)
|
|
42
|
+
screenshot_base64 = capture_screenshot(device)
|
|
43
|
+
compressed_image_base64 = compress_base64_jpeg(screenshot_base64)
|
|
44
|
+
|
|
45
|
+
messages: list[BaseMessage] = [
|
|
46
|
+
SystemMessage(content=system_message),
|
|
47
|
+
get_screenshot_message_for_llm(compressed_image_base64),
|
|
48
|
+
HumanMessage(content=prompt),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
llm = get_minitap_llm(
|
|
52
|
+
trace_id=str(uuid4()),
|
|
53
|
+
remote_tracing=True,
|
|
54
|
+
model=settings.VISION_MODEL,
|
|
55
|
+
temperature=1,
|
|
56
|
+
)
|
|
57
|
+
response = await llm.ainvoke(messages)
|
|
58
|
+
return response.content
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Tool for running manual tasks on a connected mobile device."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from minitap.mobile_use.sdk.types import ManualTaskConfig
|
|
7
|
+
from minitap.mobile_use.sdk.types.task import PlatformTaskRequest
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
10
|
+
from minitap.mcp.core.agents import agent
|
|
11
|
+
from minitap.mcp.core.decorators import handle_tool_errors
|
|
12
|
+
from minitap.mcp.main import mcp
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _serialize_result(result: Any) -> Any:
|
|
16
|
+
"""Convert SDK responses to serializable data for MCP."""
|
|
17
|
+
if hasattr(result, "model_dump"):
|
|
18
|
+
return result.model_dump()
|
|
19
|
+
if hasattr(result, "dict"):
|
|
20
|
+
return result.dict()
|
|
21
|
+
if isinstance(result, Mapping):
|
|
22
|
+
return dict(result)
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@mcp.tool(
|
|
27
|
+
name="execute_mobile_command",
|
|
28
|
+
tags={"requires-maestro"},
|
|
29
|
+
description="""
|
|
30
|
+
Execute a natural language command on a mobile device using the Minitap SDK.
|
|
31
|
+
This tool allows you to control your Android or iOS device using natural language.
|
|
32
|
+
Examples:
|
|
33
|
+
- "Open the settings app and tell me the battery level"
|
|
34
|
+
- "Find the first 3 unread emails in Gmail"
|
|
35
|
+
- "Take a screenshot and save it"
|
|
36
|
+
|
|
37
|
+
The tool uses the Minitap platform with API key authentication.
|
|
38
|
+
Set MINITAP_API_KEY and MINITAP_API_BASE_URL environment variables.
|
|
39
|
+
Visit https://platform.minitap.ai to get your API key.
|
|
40
|
+
""",
|
|
41
|
+
)
|
|
42
|
+
@handle_tool_errors
|
|
43
|
+
async def execute_mobile_command(
|
|
44
|
+
goal: str = Field(description="High-level goal describing the action to perform."),
|
|
45
|
+
output_description: str | None = Field(
|
|
46
|
+
default=None,
|
|
47
|
+
description="Optional description of the expected output format. "
|
|
48
|
+
"For example: 'A JSON array with sender and subject for each email' "
|
|
49
|
+
"or 'The battery percentage as a number'.",
|
|
50
|
+
),
|
|
51
|
+
profile: str = Field(
|
|
52
|
+
default="default",
|
|
53
|
+
description="Name of the profile to use for this task. Defaults to 'default'.",
|
|
54
|
+
),
|
|
55
|
+
) -> str | dict[str, Any]:
|
|
56
|
+
"""Run a manual task on a mobile device via the Minitap platform."""
|
|
57
|
+
try:
|
|
58
|
+
request = PlatformTaskRequest(
|
|
59
|
+
task=ManualTaskConfig(goal=goal, output_description=output_description),
|
|
60
|
+
)
|
|
61
|
+
result = await agent.run_task(request=request)
|
|
62
|
+
return _serialize_result(result)
|
|
63
|
+
finally:
|
|
64
|
+
agent.clean()
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
from minitap.mcp.core.decorators import handle_tool_errors
|
|
4
|
+
from minitap.mcp.main import mcp
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@mcp.tool(
|
|
8
|
+
name="go_back",
|
|
9
|
+
tags={"requires-maestro"},
|
|
10
|
+
description="""
|
|
11
|
+
Sends a 'back' command to the mobile device automation server.
|
|
12
|
+
""",
|
|
13
|
+
)
|
|
14
|
+
@handle_tool_errors
|
|
15
|
+
async def go_back() -> str:
|
|
16
|
+
"""Send a back command to the mobile device."""
|
|
17
|
+
try:
|
|
18
|
+
response = requests.post(
|
|
19
|
+
"http://localhost:9999/api/run-command",
|
|
20
|
+
headers={
|
|
21
|
+
"User-Agent": "python-requests/2.32.4",
|
|
22
|
+
"Accept-Encoding": "gzip, deflate, zstd",
|
|
23
|
+
"Accept": "*/*",
|
|
24
|
+
"Connection": "keep-alive",
|
|
25
|
+
"Content-Type": "application/json",
|
|
26
|
+
},
|
|
27
|
+
json={"yaml": "back\n"},
|
|
28
|
+
timeout=30,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if response.status_code == 200:
|
|
32
|
+
return f"Successfully sent back command. Response: {response.text}"
|
|
33
|
+
else:
|
|
34
|
+
return (
|
|
35
|
+
f"Failed to send back command. "
|
|
36
|
+
f"Status code: {response.status_code}, Response: {response.text}"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
except requests.exceptions.RequestException as e:
|
|
40
|
+
return f"Error sending back command: {str(e)}"
|
|
41
|
+
except Exception as e:
|
|
42
|
+
return f"Unexpected error: {str(e)}"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
You are given:
|
|
2
|
+
|
|
3
|
+
1. A screenshot of a mobile device.
|
|
4
|
+
2. A prompt describing what information to extract.
|
|
5
|
+
|
|
6
|
+
Your task:
|
|
7
|
+
|
|
8
|
+
- Look at the screenshot and **answer the prompt directly and completely**.
|
|
9
|
+
- Provide a **detailed, structured description** of the relevant content (text, layout, icons, menus, timestamps, notifications, etc.).
|
|
10
|
+
- If the prompt asks for specific data, extract it exactly as shown.
|
|
11
|
+
- If the screenshot contains structured information (e.g., receipt, chat, settings), present it clearly using lists or tables.
|
|
12
|
+
- Do not guess — if something is unclear or missing, state that explicitly.
|
|
13
|
+
|
|
14
|
+
**Output format:**
|
|
15
|
+
|
|
16
|
+
1. **Direct answer** to the prompt.
|
|
17
|
+
2. **Detailed breakdown** of the screenshot content supporting the answer.
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: minitap-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for mobile-use
|
|
5
|
+
Author: Pierre-Louis Favreau, Jean-Pierre Lo, Clément Guiguet
|
|
6
|
+
Requires-Dist: fastmcp>=2.12.4
|
|
7
|
+
Requires-Dist: python-dotenv>=1.1.1
|
|
8
|
+
Requires-Dist: pydantic>=2.12.0
|
|
9
|
+
Requires-Dist: pydantic-settings>=2.10.1
|
|
10
|
+
Requires-Dist: minitap-mobile-use>=2.5.3
|
|
11
|
+
Requires-Dist: jinja2>=3.1.6
|
|
12
|
+
Requires-Dist: langchain-core>=0.3.75
|
|
13
|
+
Requires-Dist: ruff==0.5.3 ; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest==8.4.1 ; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Project-URL: Homepage, https://minitap.ai/
|
|
18
|
+
Project-URL: Source, https://github.com/minitap-ai/mobile-use
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Mobile-Use MCP Server
|
|
23
|
+
|
|
24
|
+
A Model Context Protocol (MCP) server that provides AI-powered mobile device screen analysis. Automatically detects connected Android (via ADB) and iOS devices (via xcrun), captures screenshots, and analyzes them using vision language models.
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **🔍 Device Discovery**: Automatically finds connected Android devices (ADB) and iOS simulators (xcrun)
|
|
29
|
+
- **📱 Screen Analysis**: Capture and analyze device screenshots using vision-capable LLMs
|
|
30
|
+
- **🤖 Natural Language Control**: Execute commands on your device using natural language via the mobile-use SDK
|
|
31
|
+
- **🚀 Easy Integration**: Built with FastMCP for seamless MCP protocol implementation
|
|
32
|
+
- **⚙️ Flexible Configuration**: Uses Minitap API with support for various vision models
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
### Prerequisites
|
|
37
|
+
|
|
38
|
+
- **Python 3.12+**
|
|
39
|
+
- **uv** (recommended) or pip
|
|
40
|
+
- **For Android**: ADB installed and accessible
|
|
41
|
+
- **For iOS**: Xcode Command Line Tools (macOS only)
|
|
42
|
+
- **Minitap API Key** - Get one at [platform.minitap.ai](https://platform.minitap.ai)
|
|
43
|
+
|
|
44
|
+
### Setup
|
|
45
|
+
|
|
46
|
+
1. **Clone and navigate to the project:**
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
cd minitap-mcp
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
2. **Install dependencies:**
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Create a virtual environment
|
|
56
|
+
uv venv
|
|
57
|
+
source .venv/bin/activate
|
|
58
|
+
|
|
59
|
+
# Install dependencies
|
|
60
|
+
uv sync
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
3. **Configure for MCP usage:**
|
|
64
|
+
|
|
65
|
+
The MCP server is configured via environment variables passed from your MCP client (e.g., Windsurf).
|
|
66
|
+
|
|
67
|
+
Required environment variable:
|
|
68
|
+
- `MINITAP_API_KEY`: Your Minitap API key
|
|
69
|
+
|
|
70
|
+
Optional environment variables:
|
|
71
|
+
- `MINITAP_API_BASE_URL`: API base URL (default: `https://platform.minitap.ai/api/v1`)
|
|
72
|
+
- `VISION_MODEL`: Vision model to use (default: `baidu/ernie-4.5-vl-28b-a3b`)
|
|
73
|
+
- `ADB_SERVER_SOCKET`: Custom ADB server socket (format: `tcp:host:port`)
|
|
74
|
+
|
|
75
|
+
## Available Resources & Tools
|
|
76
|
+
|
|
77
|
+
### Resource: `data://devices`
|
|
78
|
+
|
|
79
|
+
Lists all connected mobile devices (Android and iOS).
|
|
80
|
+
|
|
81
|
+
**Returns:** Array of device information objects with:
|
|
82
|
+
- `device_id`: Device serial (Android) or UDID (iOS)
|
|
83
|
+
- `platform`: `"android"` or `"ios"`
|
|
84
|
+
- `name`: Device name
|
|
85
|
+
- `state`: Device state (`"connected"` or `"Booted"`)
|
|
86
|
+
|
|
87
|
+
### Tool: `analyze_screen`
|
|
88
|
+
|
|
89
|
+
Captures a screenshot from a mobile device and analyzes it using a vision language model.
|
|
90
|
+
|
|
91
|
+
**Parameters:**
|
|
92
|
+
- `prompt` (required): Analysis prompt describing what information to extract
|
|
93
|
+
- `device_id` (optional): Specific device ID to target. If not provided, uses the first available device.
|
|
94
|
+
|
|
95
|
+
**Returns:** AI-generated analysis of the screenshot based on the prompt.
|
|
96
|
+
|
|
97
|
+
**Example:**
|
|
98
|
+
```
|
|
99
|
+
Prompt: "What app is currently open? List all visible UI elements."
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The tool will:
|
|
103
|
+
1. Find the specified device (or first available)
|
|
104
|
+
2. Capture a screenshot
|
|
105
|
+
3. Analyze it with the vision model
|
|
106
|
+
4. Return the analysis
|
|
107
|
+
|
|
108
|
+
### Tool: `execute_mobile_command`
|
|
109
|
+
|
|
110
|
+
Execute natural language commands on your mobile device using the mobile-use SDK. This tool allows you to control your Android or iOS device with simple instructions.
|
|
111
|
+
|
|
112
|
+
**Parameters:**
|
|
113
|
+
- `goal` (required): Natural language command to execute on the device
|
|
114
|
+
- `output_description` (optional): Description of the expected output format (e.g., "A JSON list of objects with sender and subject keys")
|
|
115
|
+
- `profile` (optional): Name of the profile to use for this task. Defaults to 'default'
|
|
116
|
+
|
|
117
|
+
**Returns:** Execution result with status, output, and any extracted data.
|
|
118
|
+
|
|
119
|
+
**Examples:**
|
|
120
|
+
```python
|
|
121
|
+
# Simple command
|
|
122
|
+
goal: "Go to settings and tell me my current battery level"
|
|
123
|
+
|
|
124
|
+
# Data extraction with structured output
|
|
125
|
+
goal: "Open Gmail, find first 3 unread emails, and list their sender and subject line"
|
|
126
|
+
output_description: "A JSON list of objects, each with 'sender' and 'subject' keys"
|
|
127
|
+
|
|
128
|
+
# App navigation
|
|
129
|
+
goal: "Open Twitter and scroll to the latest tweet"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The tool will:
|
|
133
|
+
1. Find the specified device (or first available)
|
|
134
|
+
2. Execute the command using the mobile-use AI agent
|
|
135
|
+
3. Return the result or extracted data
|
|
136
|
+
|
|
137
|
+
## Usage
|
|
138
|
+
|
|
139
|
+
### Running the MCP Server
|
|
140
|
+
|
|
141
|
+
#### Local Mode (Default)
|
|
142
|
+
|
|
143
|
+
The MCP server is typically started by your MCP client (e.g., Windsurf). For manual testing:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
minitap-mcp
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
#### Network Server Mode
|
|
150
|
+
|
|
151
|
+
You can run the MCP server as a network server for remote access:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Run as network server (uses MCP_SERVER_HOST and MCP_SERVER_PORT from env)
|
|
155
|
+
minitap-mcp --server
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
The server will bind to the host and port specified in your environment variables:
|
|
159
|
+
- `MCP_SERVER_HOST` (default: `0.0.0.0`)
|
|
160
|
+
- `MCP_SERVER_PORT` (default: `8000`)
|
|
161
|
+
|
|
162
|
+
Configure these in your `.env` file or via environment variables to customize the binding address.
|
|
163
|
+
|
|
164
|
+
Inside Windsurf, you can configure the MCP server by adding the following to your `~/.codeium/windsurf/mcp_settings.json` file:
|
|
165
|
+
|
|
166
|
+
```json
|
|
167
|
+
{
|
|
168
|
+
"mcpServers": {
|
|
169
|
+
"minitap-mcp": {
|
|
170
|
+
"serverUrl": "http://localhost:8000/mcp"
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
N.B. You may need to change the port based on what you've configured in your `.env` file.
|
|
177
|
+
|
|
178
|
+
## Development
|
|
179
|
+
|
|
180
|
+
### Quick Testing
|
|
181
|
+
|
|
182
|
+
Test device detection and screenshot capture (no API key required):
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
python tests/test_devices.py
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Test the complete MCP flow with LLM analysis (requires API key):
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
cp .env.example .env
|
|
192
|
+
# Edit .env and add your MINITAP_API_KEY
|
|
193
|
+
python tests/test_mcp.py
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Code Quality
|
|
197
|
+
|
|
198
|
+
**Format code:**
|
|
199
|
+
```bash
|
|
200
|
+
ruff format .
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
**Lint:**
|
|
204
|
+
```bash
|
|
205
|
+
ruff check --fix
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Project Structure
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
minitap/mcp/
|
|
212
|
+
├── __init__.py
|
|
213
|
+
├── main.py # FastMCP server entry point
|
|
214
|
+
├── core/
|
|
215
|
+
│ ├── __init__.py
|
|
216
|
+
│ ├── config.py # Pydantic settings configuration
|
|
217
|
+
│ ├── decorators.py # Error handling decorators
|
|
218
|
+
│ ├── device.py # Device discovery & screenshot capture
|
|
219
|
+
│ ├── llm.py # LLM client initialization
|
|
220
|
+
│ └── utils.py # Utility functions (image compression, etc.)
|
|
221
|
+
└── tools/
|
|
222
|
+
├── __init__.py
|
|
223
|
+
├── analyze_screen.py # Screen analysis tool
|
|
224
|
+
├── execute_mobile_command.py # Mobile-use SDK integration tool
|
|
225
|
+
└── screen_analyzer.md # System prompt for analysis
|
|
226
|
+
|
|
227
|
+
tests/
|
|
228
|
+
├── test_devices.py # Device detection tests
|
|
229
|
+
└── test_mcp.py # Full MCP integration tests
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Creating New Tools
|
|
233
|
+
|
|
234
|
+
When adding new MCP tools, use the `@handle_tool_errors` decorator to prevent unhandled exceptions from causing infinite loops:
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
from minitap.mcp.core.decorators import handle_tool_errors
|
|
238
|
+
from minitap.mcp.main import mcp
|
|
239
|
+
|
|
240
|
+
@mcp.tool(name="my_tool", description="...")
|
|
241
|
+
@handle_tool_errors # Add this decorator
|
|
242
|
+
async def my_tool(param: str) -> str:
|
|
243
|
+
# Your tool logic here
|
|
244
|
+
# Any exception will be caught and returned as an error message
|
|
245
|
+
return "result"
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
The decorator automatically:
|
|
249
|
+
- Catches all exceptions (including `DeviceNotFoundError`)
|
|
250
|
+
- Returns user-friendly error messages
|
|
251
|
+
- Prevents the MCP server from hanging or looping infinitely
|
|
252
|
+
- Works with both sync and async functions
|
|
253
|
+
|
|
254
|
+
## Integration with Windsurf
|
|
255
|
+
|
|
256
|
+
To use this MCP server in Windsurf, add it to your MCP settings:
|
|
257
|
+
|
|
258
|
+
**Location:** `~/.codeium/windsurf/mcp_settings.json`
|
|
259
|
+
|
|
260
|
+
**Configuration:**
|
|
261
|
+
|
|
262
|
+
```json
|
|
263
|
+
{
|
|
264
|
+
"mcpServers": {
|
|
265
|
+
"minitap-mcp": {
|
|
266
|
+
"command": "uv",
|
|
267
|
+
"args": ["-c", "cd /path/to/minitap-mcp && source .venv/bin/activate && uv sync && minitap-mcp"],
|
|
268
|
+
"env": {
|
|
269
|
+
"MINITAP_API_KEY": "your_minitap_api_key_here",
|
|
270
|
+
"MINITAP_API_BASE_URL": "https://platform.minitap.ai/api/v1",
|
|
271
|
+
"VISION_MODEL": "baidu/ernie-4.5-vl-28b-a3b" // optional
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**After configuration:**
|
|
279
|
+
1. Restart Windsurf
|
|
280
|
+
2. The `analyze_screen` and `execute_mobile_command` tools will be available in Cascade
|
|
281
|
+
3. The `data://devices` resource will list connected devices
|
|
282
|
+
|
|
283
|
+
### Available Vision Models
|
|
284
|
+
|
|
285
|
+
The Minitap API supports various vision models:
|
|
286
|
+
- `qwen/qwen-2.5-vl-7b-instruct` (default)
|
|
287
|
+
- `baidu/ernie-4.5-vl-28b-a3b`
|
|
288
|
+
- `openai/gpt-4o`
|
|
289
|
+
- And more - check the Minitap platform for the full list
|
|
290
|
+
|
|
291
|
+
## Device Requirements
|
|
292
|
+
|
|
293
|
+
### Android Devices
|
|
294
|
+
|
|
295
|
+
**Requirements:**
|
|
296
|
+
- ADB installed and in PATH
|
|
297
|
+
- USB debugging enabled on the device
|
|
298
|
+
- Device connected via USB or network ADB
|
|
299
|
+
|
|
300
|
+
**Verify connection:**
|
|
301
|
+
```bash
|
|
302
|
+
adb devices
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
**Custom ADB Server:**
|
|
306
|
+
If using a custom ADB server (e.g., Docker, WSL), set the socket:
|
|
307
|
+
```bash
|
|
308
|
+
export ADB_SERVER_SOCKET="tcp:localhost:5037"
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
N.B. You may need to reboot your IDE
|
|
312
|
+
|
|
313
|
+
### iOS Devices
|
|
314
|
+
|
|
315
|
+
**Requirements:**
|
|
316
|
+
- macOS with Xcode Command Line Tools
|
|
317
|
+
- iOS Simulator running
|
|
318
|
+
|
|
319
|
+
**Verify simulators:**
|
|
320
|
+
```bash
|
|
321
|
+
xcrun simctl list devices booted
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
**Start a simulator:**
|
|
325
|
+
```bash
|
|
326
|
+
open -a Simulator
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
## Troubleshooting
|
|
330
|
+
|
|
331
|
+
### No devices found
|
|
332
|
+
|
|
333
|
+
1. **Android:** Run `adb devices` to verify device connection
|
|
334
|
+
2. **iOS:** Run `xcrun simctl list devices booted` to check running simulators
|
|
335
|
+
3. Ensure USB debugging is enabled (Android)
|
|
336
|
+
4. Try restarting ADB: `adb kill-server && adb start-server`
|
|
337
|
+
|
|
338
|
+
### Screenshot capture fails
|
|
339
|
+
|
|
340
|
+
1. Ensure device screen is unlocked
|
|
341
|
+
2. For Android, verify screencap permission
|
|
342
|
+
3. For iOS, ensure simulator is fully booted
|
|
343
|
+
|
|
344
|
+
### Tool not detected in Windsurf
|
|
345
|
+
|
|
346
|
+
1. Verify the import in `main.py` includes the tools module
|
|
347
|
+
2. Check that `tools/__init__.py` exists
|
|
348
|
+
3. Restart Windsurf after configuration changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
minitap/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
minitap/mcp/core/agents.py,sha256=aRqHA1r8ejWOGlTbku_TXv6WV8Vucp5LP4puhEit7tc,573
|
|
3
|
+
minitap/mcp/core/config.py,sha256=kaIlb_4AQ3DNwtcLDZF528cgrtj5LGcCFNdHZfEwrUo,820
|
|
4
|
+
minitap/mcp/core/decorators.py,sha256=iekv181o_rkv0upacFWkmPqxsZRTzuLFyOZ0sIDtQnQ,1317
|
|
5
|
+
minitap/mcp/core/device.py,sha256=sEO3Z-8F325hDOObdH1YBhZE60f17FmIclt5UlhY_nU,7875
|
|
6
|
+
minitap/mcp/core/llm.py,sha256=z_pYZkZcAchsiWPh4W79frQPANsfYyFPUe8DJo8lZO0,822
|
|
7
|
+
minitap/mcp/core/utils.py,sha256=3uExpRoh7affIieZx3TLlZTmZCcoxWfx1YpPbwhjiJY,1791
|
|
8
|
+
minitap/mcp/main.py,sha256=4ytnVMd7yzk7I9MnFsPB8U9b9bMyhVf6Yq623pYccj0,3060
|
|
9
|
+
minitap/mcp/server/middleware.py,sha256=fbry_IiHmwUxVjsWgOU2goybcS1kLRXFZZ89KPH1d8E,880
|
|
10
|
+
minitap/mcp/server/poller.py,sha256=C2h5Ir3nY5gZ6qTDOHBw_Tb8PfAY54A-we2HrwjNLvg,1222
|
|
11
|
+
minitap/mcp/tools/analyze_screen.py,sha256=fjcjf3tTZDlxzmiQFHFNgw38bxPz4eisw57zuxshN2A,1984
|
|
12
|
+
minitap/mcp/tools/execute_mobile_command.py,sha256=fpmr0LnV7DDEiHwDDrDVZ-SaoVUmReZWH6sRBahPWes,2320
|
|
13
|
+
minitap/mcp/tools/go_back.py,sha256=lEmADkDkXu8JGm-sY7zL7M6GlBy-lD7Iffv4yzwoQfo,1301
|
|
14
|
+
minitap/mcp/tools/screen_analyzer.md,sha256=TTO80JQWusbA9cKAZn-9cqhgVHm6F_qJh5w152hG3YM,734
|
|
15
|
+
minitap_mcp-0.1.0.dist-info/WHEEL,sha256=X16MKk8bp2DRsAuyteHJ-9qOjzmnY0x1aj0P1ftqqWA,78
|
|
16
|
+
minitap_mcp-0.1.0.dist-info/entry_points.txt,sha256=rYVoXm7tSQCqQTtHx4Lovgn1YsjwtEEHfddKrfEVHuY,55
|
|
17
|
+
minitap_mcp-0.1.0.dist-info/METADATA,sha256=DB6OoQCvyqHD05Xks_splNrQzhKj32MO8mvf2KaWnmw,9920
|
|
18
|
+
minitap_mcp-0.1.0.dist-info/RECORD,,
|