minitap-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap_mcp-0.1.0/PKG-INFO +348 -0
- minitap_mcp-0.1.0/README.md +327 -0
- minitap_mcp-0.1.0/minitap/mcp/__init__.py +0 -0
- minitap_mcp-0.1.0/minitap/mcp/core/agents.py +19 -0
- minitap_mcp-0.1.0/minitap/mcp/core/config.py +27 -0
- minitap_mcp-0.1.0/minitap/mcp/core/decorators.py +42 -0
- minitap_mcp-0.1.0/minitap/mcp/core/device.py +242 -0
- minitap_mcp-0.1.0/minitap/mcp/core/llm.py +28 -0
- minitap_mcp-0.1.0/minitap/mcp/core/utils.py +55 -0
- minitap_mcp-0.1.0/minitap/mcp/main.py +109 -0
- minitap_mcp-0.1.0/minitap/mcp/server/middleware.py +23 -0
- minitap_mcp-0.1.0/minitap/mcp/server/poller.py +38 -0
- minitap_mcp-0.1.0/minitap/mcp/tools/analyze_screen.py +58 -0
- minitap_mcp-0.1.0/minitap/mcp/tools/execute_mobile_command.py +64 -0
- minitap_mcp-0.1.0/minitap/mcp/tools/go_back.py +42 -0
- minitap_mcp-0.1.0/minitap/mcp/tools/screen_analyzer.md +17 -0
- minitap_mcp-0.1.0/pyproject.toml +145 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: minitap-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for mobile-use
|
|
5
|
+
Author: Pierre-Louis Favreau, Jean-Pierre Lo, ClΓ©ment Guiguet
|
|
6
|
+
Requires-Dist: fastmcp>=2.12.4
|
|
7
|
+
Requires-Dist: python-dotenv>=1.1.1
|
|
8
|
+
Requires-Dist: pydantic>=2.12.0
|
|
9
|
+
Requires-Dist: pydantic-settings>=2.10.1
|
|
10
|
+
Requires-Dist: minitap-mobile-use>=2.5.3
|
|
11
|
+
Requires-Dist: jinja2>=3.1.6
|
|
12
|
+
Requires-Dist: langchain-core>=0.3.75
|
|
13
|
+
Requires-Dist: ruff==0.5.3 ; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest==8.4.1 ; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Project-URL: Homepage, https://minitap.ai/
|
|
18
|
+
Project-URL: Source, https://github.com/minitap-ai/mobile-use
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Mobile-Use MCP Server
|
|
23
|
+
|
|
24
|
+
A Model Context Protocol (MCP) server that provides AI-powered mobile device screen analysis. Automatically detects connected Android (via ADB) and iOS devices (via xcrun), captures screenshots, and analyzes them using vision language models.
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **π Device Discovery**: Automatically finds connected Android devices (ADB) and iOS simulators (xcrun)
|
|
29
|
+
- **π± Screen Analysis**: Capture and analyze device screenshots using vision-capable LLMs
|
|
30
|
+
- **π€ Natural Language Control**: Execute commands on your device using natural language via the mobile-use SDK
|
|
31
|
+
- **π Easy Integration**: Built with FastMCP for seamless MCP protocol implementation
|
|
32
|
+
- **βοΈ Flexible Configuration**: Uses Minitap API with support for various vision models
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
### Prerequisites
|
|
37
|
+
|
|
38
|
+
- **Python 3.12+**
|
|
39
|
+
- **uv** (recommended) or pip
|
|
40
|
+
- **For Android**: ADB installed and accessible
|
|
41
|
+
- **For iOS**: Xcode Command Line Tools (macOS only)
|
|
42
|
+
- **Minitap API Key** - Get one at [platform.minitap.ai](https://platform.minitap.ai)
|
|
43
|
+
|
|
44
|
+
### Setup
|
|
45
|
+
|
|
46
|
+
1. **Clone and navigate to the project:**
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
cd minitap-mcp
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
2. **Install dependencies:**
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Create a virtual environment
|
|
56
|
+
uv venv
|
|
57
|
+
source .venv/bin/activate
|
|
58
|
+
|
|
59
|
+
# Install dependencies
|
|
60
|
+
uv sync
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
3. **Configure for MCP usage:**
|
|
64
|
+
|
|
65
|
+
The MCP server is configured via environment variables passed from your MCP client (e.g., Windsurf).
|
|
66
|
+
|
|
67
|
+
Required environment variable:
|
|
68
|
+
- `MINITAP_API_KEY`: Your Minitap API key
|
|
69
|
+
|
|
70
|
+
Optional environment variables:
|
|
71
|
+
- `MINITAP_API_BASE_URL`: API base URL (default: `https://platform.minitap.ai/api/v1`)
|
|
72
|
+
- `VISION_MODEL`: Vision model to use (default: `baidu/ernie-4.5-vl-28b-a3b`)
|
|
73
|
+
- `ADB_SERVER_SOCKET`: Custom ADB server socket (format: `tcp:host:port`)
|
|
74
|
+
|
|
75
|
+
## Available Resources & Tools
|
|
76
|
+
|
|
77
|
+
### Resource: `data://devices`
|
|
78
|
+
|
|
79
|
+
Lists all connected mobile devices (Android and iOS).
|
|
80
|
+
|
|
81
|
+
**Returns:** Array of device information objects with:
|
|
82
|
+
- `device_id`: Device serial (Android) or UDID (iOS)
|
|
83
|
+
- `platform`: `"android"` or `"ios"`
|
|
84
|
+
- `name`: Device name
|
|
85
|
+
- `state`: Device state (`"connected"` or `"Booted"`)
|
|
86
|
+
|
|
87
|
+
### Tool: `analyze_screen`
|
|
88
|
+
|
|
89
|
+
Captures a screenshot from a mobile device and analyzes it using a vision language model.
|
|
90
|
+
|
|
91
|
+
**Parameters:**
|
|
92
|
+
- `prompt` (required): Analysis prompt describing what information to extract
|
|
93
|
+
- `device_id` (optional): Specific device ID to target. If not provided, uses the first available device.
|
|
94
|
+
|
|
95
|
+
**Returns:** AI-generated analysis of the screenshot based on the prompt.
|
|
96
|
+
|
|
97
|
+
**Example:**
|
|
98
|
+
```
|
|
99
|
+
Prompt: "What app is currently open? List all visible UI elements."
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The tool will:
|
|
103
|
+
1. Find the specified device (or first available)
|
|
104
|
+
2. Capture a screenshot
|
|
105
|
+
3. Analyze it with the vision model
|
|
106
|
+
4. Return the analysis
|
|
107
|
+
|
|
108
|
+
### Tool: `execute_mobile_command`
|
|
109
|
+
|
|
110
|
+
Execute natural language commands on your mobile device using the mobile-use SDK. This tool allows you to control your Android or iOS device with simple instructions.
|
|
111
|
+
|
|
112
|
+
**Parameters:**
|
|
113
|
+
- `goal` (required): Natural language command to execute on the device
|
|
114
|
+
- `output_description` (optional): Description of the expected output format (e.g., "A JSON list of objects with sender and subject keys")
|
|
115
|
+
- `profile` (optional): Name of the profile to use for this task. Defaults to 'default'
|
|
116
|
+
|
|
117
|
+
**Returns:** Execution result with status, output, and any extracted data.
|
|
118
|
+
|
|
119
|
+
**Examples:**
|
|
120
|
+
```python
|
|
121
|
+
# Simple command
|
|
122
|
+
goal: "Go to settings and tell me my current battery level"
|
|
123
|
+
|
|
124
|
+
# Data extraction with structured output
|
|
125
|
+
goal: "Open Gmail, find first 3 unread emails, and list their sender and subject line"
|
|
126
|
+
output_description: "A JSON list of objects, each with 'sender' and 'subject' keys"
|
|
127
|
+
|
|
128
|
+
# App navigation
|
|
129
|
+
goal: "Open Twitter and scroll to the latest tweet"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The tool will:
|
|
133
|
+
1. Find the specified device (or first available)
|
|
134
|
+
2. Execute the command using the mobile-use AI agent
|
|
135
|
+
3. Return the result or extracted data
|
|
136
|
+
|
|
137
|
+
## Usage
|
|
138
|
+
|
|
139
|
+
### Running the MCP Server
|
|
140
|
+
|
|
141
|
+
#### Local Mode (Default)
|
|
142
|
+
|
|
143
|
+
The MCP server is typically started by your MCP client (e.g., Windsurf). For manual testing:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
minitap-mcp
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
#### Network Server Mode
|
|
150
|
+
|
|
151
|
+
You can run the MCP server as a network server for remote access:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Run as network server (uses MCP_SERVER_HOST and MCP_SERVER_PORT from env)
|
|
155
|
+
minitap-mcp --server
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
The server will bind to the host and port specified in your environment variables:
|
|
159
|
+
- `MCP_SERVER_HOST` (default: `0.0.0.0`)
|
|
160
|
+
- `MCP_SERVER_PORT` (default: `8000`)
|
|
161
|
+
|
|
162
|
+
Configure these in your `.env` file or via environment variables to customize the binding address.
|
|
163
|
+
|
|
164
|
+
Inside Windsurf, you can configure the MCP server by adding the following to your `~/.codeium/windsurf/mcp_settings.json` file:
|
|
165
|
+
|
|
166
|
+
```json
|
|
167
|
+
{
|
|
168
|
+
"mcpServers": {
|
|
169
|
+
"minitap-mcp": {
|
|
170
|
+
"serverUrl": "http://localhost:8000/mcp"
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
N.B. You may need to change the port based on what you've configured in your `.env` file.
|
|
177
|
+
|
|
178
|
+
## Development
|
|
179
|
+
|
|
180
|
+
### Quick Testing
|
|
181
|
+
|
|
182
|
+
Test device detection and screenshot capture (no API key required):
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
python tests/test_devices.py
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Test the complete MCP flow with LLM analysis (requires API key):
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
cp .env.example .env
|
|
192
|
+
# Edit .env and add your MINITAP_API_KEY
|
|
193
|
+
python tests/test_mcp.py
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Code Quality
|
|
197
|
+
|
|
198
|
+
**Format code:**
|
|
199
|
+
```bash
|
|
200
|
+
ruff format .
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
**Lint:**
|
|
204
|
+
```bash
|
|
205
|
+
ruff check --fix
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Project Structure
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
minitap/mcp/
|
|
212
|
+
βββ __init__.py
|
|
213
|
+
βββ main.py # FastMCP server entry point
|
|
214
|
+
βββ core/
|
|
215
|
+
β βββ __init__.py
|
|
216
|
+
β βββ config.py # Pydantic settings configuration
|
|
217
|
+
β βββ decorators.py # Error handling decorators
|
|
218
|
+
β βββ device.py # Device discovery & screenshot capture
|
|
219
|
+
β βββ llm.py # LLM client initialization
|
|
220
|
+
β βββ utils.py # Utility functions (image compression, etc.)
|
|
221
|
+
βββ tools/
|
|
222
|
+
βββ __init__.py
|
|
223
|
+
βββ analyze_screen.py # Screen analysis tool
|
|
224
|
+
βββ execute_mobile_command.py # Mobile-use SDK integration tool
|
|
225
|
+
βββ screen_analyzer.md # System prompt for analysis
|
|
226
|
+
|
|
227
|
+
tests/
|
|
228
|
+
βββ test_devices.py # Device detection tests
|
|
229
|
+
βββ test_mcp.py # Full MCP integration tests
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Creating New Tools
|
|
233
|
+
|
|
234
|
+
When adding new MCP tools, use the `@handle_tool_errors` decorator to prevent unhandled exceptions from causing infinite loops:
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
from minitap.mcp.core.decorators import handle_tool_errors
|
|
238
|
+
from minitap.mcp.main import mcp
|
|
239
|
+
|
|
240
|
+
@mcp.tool(name="my_tool", description="...")
|
|
241
|
+
@handle_tool_errors # Add this decorator
|
|
242
|
+
async def my_tool(param: str) -> str:
|
|
243
|
+
# Your tool logic here
|
|
244
|
+
# Any exception will be caught and returned as an error message
|
|
245
|
+
return "result"
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
The decorator automatically:
|
|
249
|
+
- Catches all exceptions (including `DeviceNotFoundError`)
|
|
250
|
+
- Returns user-friendly error messages
|
|
251
|
+
- Prevents the MCP server from hanging or looping infinitely
|
|
252
|
+
- Works with both sync and async functions
|
|
253
|
+
|
|
254
|
+
## Integration with Windsurf
|
|
255
|
+
|
|
256
|
+
To use this MCP server in Windsurf, add it to your MCP settings:
|
|
257
|
+
|
|
258
|
+
**Location:** `~/.codeium/windsurf/mcp_settings.json`
|
|
259
|
+
|
|
260
|
+
**Configuration:**
|
|
261
|
+
|
|
262
|
+
```json
|
|
263
|
+
{
|
|
264
|
+
"mcpServers": {
|
|
265
|
+
"minitap-mcp": {
|
|
266
|
+
"command": "uv",
|
|
267
|
+
"args": ["-c", "cd /path/to/minitap-mcp && source .venv/bin/activate && uv sync && minitap-mcp"],
|
|
268
|
+
"env": {
|
|
269
|
+
"MINITAP_API_KEY": "your_minitap_api_key_here",
|
|
270
|
+
"MINITAP_API_BASE_URL": "https://platform.minitap.ai/api/v1",
|
|
271
|
+
"VISION_MODEL": "baidu/ernie-4.5-vl-28b-a3b" // optional
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**After configuration:**
|
|
279
|
+
1. Restart Windsurf
|
|
280
|
+
2. The `analyze_screen` and `execute_mobile_command` tools will be available in Cascade
|
|
281
|
+
3. The `data://devices` resource will list connected devices
|
|
282
|
+
|
|
283
|
+
### Available Vision Models
|
|
284
|
+
|
|
285
|
+
The Minitap API supports various vision models:
|
|
286
|
+
- `qwen/qwen-2.5-vl-7b-instruct` (default)
|
|
287
|
+
- `baidu/ernie-4.5-vl-28b-a3b`
|
|
288
|
+
- `openai/gpt-4o`
|
|
289
|
+
- And more - check the Minitap platform for the full list
|
|
290
|
+
|
|
291
|
+
## Device Requirements
|
|
292
|
+
|
|
293
|
+
### Android Devices
|
|
294
|
+
|
|
295
|
+
**Requirements:**
|
|
296
|
+
- ADB installed and in PATH
|
|
297
|
+
- USB debugging enabled on the device
|
|
298
|
+
- Device connected via USB or network ADB
|
|
299
|
+
|
|
300
|
+
**Verify connection:**
|
|
301
|
+
```bash
|
|
302
|
+
adb devices
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
**Custom ADB Server:**
|
|
306
|
+
If using a custom ADB server (e.g., Docker, WSL), set the socket:
|
|
307
|
+
```bash
|
|
308
|
+
export ADB_SERVER_SOCKET="tcp:localhost:5037"
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
N.B. You may need to reboot your IDE
|
|
312
|
+
|
|
313
|
+
### iOS Devices
|
|
314
|
+
|
|
315
|
+
**Requirements:**
|
|
316
|
+
- macOS with Xcode Command Line Tools
|
|
317
|
+
- iOS Simulator running
|
|
318
|
+
|
|
319
|
+
**Verify simulators:**
|
|
320
|
+
```bash
|
|
321
|
+
xcrun simctl list devices booted
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
**Start a simulator:**
|
|
325
|
+
```bash
|
|
326
|
+
open -a Simulator
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
## Troubleshooting
|
|
330
|
+
|
|
331
|
+
### No devices found
|
|
332
|
+
|
|
333
|
+
1. **Android:** Run `adb devices` to verify device connection
|
|
334
|
+
2. **iOS:** Run `xcrun simctl list devices booted` to check running simulators
|
|
335
|
+
3. Ensure USB debugging is enabled (Android)
|
|
336
|
+
4. Try restarting ADB: `adb kill-server && adb start-server`
|
|
337
|
+
|
|
338
|
+
### Screenshot capture fails
|
|
339
|
+
|
|
340
|
+
1. Ensure device screen is unlocked
|
|
341
|
+
2. For Android, verify screencap permission
|
|
342
|
+
3. For iOS, ensure simulator is fully booted
|
|
343
|
+
|
|
344
|
+
### Tool not detected in Windsurf
|
|
345
|
+
|
|
346
|
+
1. Verify the import in `main.py` includes the tools module
|
|
347
|
+
2. Check that `tools/__init__.py` exists
|
|
348
|
+
3. Restart Windsurf after configuration changes
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# Mobile-Use MCP Server
|
|
2
|
+
|
|
3
|
+
A Model Context Protocol (MCP) server that provides AI-powered mobile device screen analysis. Automatically detects connected Android (via ADB) and iOS devices (via xcrun), captures screenshots, and analyzes them using vision language models.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **π Device Discovery**: Automatically finds connected Android devices (ADB) and iOS simulators (xcrun)
|
|
8
|
+
- **π± Screen Analysis**: Capture and analyze device screenshots using vision-capable LLMs
|
|
9
|
+
- **π€ Natural Language Control**: Execute commands on your device using natural language via the mobile-use SDK
|
|
10
|
+
- **π Easy Integration**: Built with FastMCP for seamless MCP protocol implementation
|
|
11
|
+
- **βοΈ Flexible Configuration**: Uses Minitap API with support for various vision models
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
### Prerequisites
|
|
16
|
+
|
|
17
|
+
- **Python 3.12+**
|
|
18
|
+
- **uv** (recommended) or pip
|
|
19
|
+
- **For Android**: ADB installed and accessible
|
|
20
|
+
- **For iOS**: Xcode Command Line Tools (macOS only)
|
|
21
|
+
- **Minitap API Key** - Get one at [platform.minitap.ai](https://platform.minitap.ai)
|
|
22
|
+
|
|
23
|
+
### Setup
|
|
24
|
+
|
|
25
|
+
1. **Clone and navigate to the project:**
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
cd minitap-mcp
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
2. **Install dependencies:**
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Create a virtual environment
|
|
35
|
+
uv venv
|
|
36
|
+
source .venv/bin/activate
|
|
37
|
+
|
|
38
|
+
# Install dependencies
|
|
39
|
+
uv sync
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
3. **Configure for MCP usage:**
|
|
43
|
+
|
|
44
|
+
The MCP server is configured via environment variables passed from your MCP client (e.g., Windsurf).
|
|
45
|
+
|
|
46
|
+
Required environment variable:
|
|
47
|
+
- `MINITAP_API_KEY`: Your Minitap API key
|
|
48
|
+
|
|
49
|
+
Optional environment variables:
|
|
50
|
+
- `MINITAP_API_BASE_URL`: API base URL (default: `https://platform.minitap.ai/api/v1`)
|
|
51
|
+
- `VISION_MODEL`: Vision model to use (default: `baidu/ernie-4.5-vl-28b-a3b`)
|
|
52
|
+
- `ADB_SERVER_SOCKET`: Custom ADB server socket (format: `tcp:host:port`)
|
|
53
|
+
|
|
54
|
+
## Available Resources & Tools
|
|
55
|
+
|
|
56
|
+
### Resource: `data://devices`
|
|
57
|
+
|
|
58
|
+
Lists all connected mobile devices (Android and iOS).
|
|
59
|
+
|
|
60
|
+
**Returns:** Array of device information objects with:
|
|
61
|
+
- `device_id`: Device serial (Android) or UDID (iOS)
|
|
62
|
+
- `platform`: `"android"` or `"ios"`
|
|
63
|
+
- `name`: Device name
|
|
64
|
+
- `state`: Device state (`"connected"` or `"Booted"`)
|
|
65
|
+
|
|
66
|
+
### Tool: `analyze_screen`
|
|
67
|
+
|
|
68
|
+
Captures a screenshot from a mobile device and analyzes it using a vision language model.
|
|
69
|
+
|
|
70
|
+
**Parameters:**
|
|
71
|
+
- `prompt` (required): Analysis prompt describing what information to extract
|
|
72
|
+
- `device_id` (optional): Specific device ID to target. If not provided, uses the first available device.
|
|
73
|
+
|
|
74
|
+
**Returns:** AI-generated analysis of the screenshot based on the prompt.
|
|
75
|
+
|
|
76
|
+
**Example:**
|
|
77
|
+
```
|
|
78
|
+
Prompt: "What app is currently open? List all visible UI elements."
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
The tool will:
|
|
82
|
+
1. Find the specified device (or first available)
|
|
83
|
+
2. Capture a screenshot
|
|
84
|
+
3. Analyze it with the vision model
|
|
85
|
+
4. Return the analysis
|
|
86
|
+
|
|
87
|
+
### Tool: `execute_mobile_command`
|
|
88
|
+
|
|
89
|
+
Execute natural language commands on your mobile device using the mobile-use SDK. This tool allows you to control your Android or iOS device with simple instructions.
|
|
90
|
+
|
|
91
|
+
**Parameters:**
|
|
92
|
+
- `goal` (required): Natural language command to execute on the device
|
|
93
|
+
- `output_description` (optional): Description of the expected output format (e.g., "A JSON list of objects with sender and subject keys")
|
|
94
|
+
- `profile` (optional): Name of the profile to use for this task. Defaults to 'default'
|
|
95
|
+
|
|
96
|
+
**Returns:** Execution result with status, output, and any extracted data.
|
|
97
|
+
|
|
98
|
+
**Examples:**
|
|
99
|
+
```python
|
|
100
|
+
# Simple command
|
|
101
|
+
goal: "Go to settings and tell me my current battery level"
|
|
102
|
+
|
|
103
|
+
# Data extraction with structured output
|
|
104
|
+
goal: "Open Gmail, find first 3 unread emails, and list their sender and subject line"
|
|
105
|
+
output_description: "A JSON list of objects, each with 'sender' and 'subject' keys"
|
|
106
|
+
|
|
107
|
+
# App navigation
|
|
108
|
+
goal: "Open Twitter and scroll to the latest tweet"
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
The tool will:
|
|
112
|
+
1. Find the specified device (or first available)
|
|
113
|
+
2. Execute the command using the mobile-use AI agent
|
|
114
|
+
3. Return the result or extracted data
|
|
115
|
+
|
|
116
|
+
## Usage
|
|
117
|
+
|
|
118
|
+
### Running the MCP Server
|
|
119
|
+
|
|
120
|
+
#### Local Mode (Default)
|
|
121
|
+
|
|
122
|
+
The MCP server is typically started by your MCP client (e.g., Windsurf). For manual testing:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
minitap-mcp
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
#### Network Server Mode
|
|
129
|
+
|
|
130
|
+
You can run the MCP server as a network server for remote access:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
# Run as network server (uses MCP_SERVER_HOST and MCP_SERVER_PORT from env)
|
|
134
|
+
minitap-mcp --server
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The server will bind to the host and port specified in your environment variables:
|
|
138
|
+
- `MCP_SERVER_HOST` (default: `0.0.0.0`)
|
|
139
|
+
- `MCP_SERVER_PORT` (default: `8000`)
|
|
140
|
+
|
|
141
|
+
Configure these in your `.env` file or via environment variables to customize the binding address.
|
|
142
|
+
|
|
143
|
+
Inside Windsurf, you can configure the MCP server by adding the following to your `~/.codeium/windsurf/mcp_settings.json` file:
|
|
144
|
+
|
|
145
|
+
```json
|
|
146
|
+
{
|
|
147
|
+
"mcpServers": {
|
|
148
|
+
"minitap-mcp": {
|
|
149
|
+
"serverUrl": "http://localhost:8000/mcp"
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
N.B. You may need to change the port based on what you've configured in your `.env` file.
|
|
156
|
+
|
|
157
|
+
## Development
|
|
158
|
+
|
|
159
|
+
### Quick Testing
|
|
160
|
+
|
|
161
|
+
Test device detection and screenshot capture (no API key required):
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
python tests/test_devices.py
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Test the complete MCP flow with LLM analysis (requires API key):
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
cp .env.example .env
|
|
171
|
+
# Edit .env and add your MINITAP_API_KEY
|
|
172
|
+
python tests/test_mcp.py
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Code Quality
|
|
176
|
+
|
|
177
|
+
**Format code:**
|
|
178
|
+
```bash
|
|
179
|
+
ruff format .
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
**Lint:**
|
|
183
|
+
```bash
|
|
184
|
+
ruff check --fix
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Project Structure
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
minitap/mcp/
|
|
191
|
+
βββ __init__.py
|
|
192
|
+
βββ main.py # FastMCP server entry point
|
|
193
|
+
βββ core/
|
|
194
|
+
β βββ __init__.py
|
|
195
|
+
β βββ config.py # Pydantic settings configuration
|
|
196
|
+
β βββ decorators.py # Error handling decorators
|
|
197
|
+
β βββ device.py # Device discovery & screenshot capture
|
|
198
|
+
β βββ llm.py # LLM client initialization
|
|
199
|
+
β βββ utils.py # Utility functions (image compression, etc.)
|
|
200
|
+
βββ tools/
|
|
201
|
+
βββ __init__.py
|
|
202
|
+
βββ analyze_screen.py # Screen analysis tool
|
|
203
|
+
βββ execute_mobile_command.py # Mobile-use SDK integration tool
|
|
204
|
+
βββ screen_analyzer.md # System prompt for analysis
|
|
205
|
+
|
|
206
|
+
tests/
|
|
207
|
+
βββ test_devices.py # Device detection tests
|
|
208
|
+
βββ test_mcp.py # Full MCP integration tests
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Creating New Tools
|
|
212
|
+
|
|
213
|
+
When adding new MCP tools, use the `@handle_tool_errors` decorator to prevent unhandled exceptions from causing infinite loops:
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
from minitap.mcp.core.decorators import handle_tool_errors
|
|
217
|
+
from minitap.mcp.main import mcp
|
|
218
|
+
|
|
219
|
+
@mcp.tool(name="my_tool", description="...")
|
|
220
|
+
@handle_tool_errors # Add this decorator
|
|
221
|
+
async def my_tool(param: str) -> str:
|
|
222
|
+
# Your tool logic here
|
|
223
|
+
# Any exception will be caught and returned as an error message
|
|
224
|
+
return "result"
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
The decorator automatically:
|
|
228
|
+
- Catches all exceptions (including `DeviceNotFoundError`)
|
|
229
|
+
- Returns user-friendly error messages
|
|
230
|
+
- Prevents the MCP server from hanging or looping infinitely
|
|
231
|
+
- Works with both sync and async functions
|
|
232
|
+
|
|
233
|
+
## Integration with Windsurf
|
|
234
|
+
|
|
235
|
+
To use this MCP server in Windsurf, add it to your MCP settings:
|
|
236
|
+
|
|
237
|
+
**Location:** `~/.codeium/windsurf/mcp_settings.json`
|
|
238
|
+
|
|
239
|
+
**Configuration:**
|
|
240
|
+
|
|
241
|
+
```json
|
|
242
|
+
{
|
|
243
|
+
"mcpServers": {
|
|
244
|
+
"minitap-mcp": {
|
|
245
|
+
"command": "uv",
|
|
246
|
+
"args": ["-c", "cd /path/to/minitap-mcp && source .venv/bin/activate && uv sync && minitap-mcp"],
|
|
247
|
+
"env": {
|
|
248
|
+
"MINITAP_API_KEY": "your_minitap_api_key_here",
|
|
249
|
+
"MINITAP_API_BASE_URL": "https://platform.minitap.ai/api/v1",
|
|
250
|
+
"VISION_MODEL": "baidu/ernie-4.5-vl-28b-a3b" // optional
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
**After configuration:**
|
|
258
|
+
1. Restart Windsurf
|
|
259
|
+
2. The `analyze_screen` and `execute_mobile_command` tools will be available in Cascade
|
|
260
|
+
3. The `data://devices` resource will list connected devices
|
|
261
|
+
|
|
262
|
+
### Available Vision Models
|
|
263
|
+
|
|
264
|
+
The Minitap API supports various vision models:
|
|
265
|
+
- `qwen/qwen-2.5-vl-7b-instruct` (default)
|
|
266
|
+
- `baidu/ernie-4.5-vl-28b-a3b`
|
|
267
|
+
- `openai/gpt-4o`
|
|
268
|
+
- And more - check the Minitap platform for the full list
|
|
269
|
+
|
|
270
|
+
## Device Requirements
|
|
271
|
+
|
|
272
|
+
### Android Devices
|
|
273
|
+
|
|
274
|
+
**Requirements:**
|
|
275
|
+
- ADB installed and in PATH
|
|
276
|
+
- USB debugging enabled on the device
|
|
277
|
+
- Device connected via USB or network ADB
|
|
278
|
+
|
|
279
|
+
**Verify connection:**
|
|
280
|
+
```bash
|
|
281
|
+
adb devices
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
**Custom ADB Server:**
|
|
285
|
+
If using a custom ADB server (e.g., Docker, WSL), set the socket:
|
|
286
|
+
```bash
|
|
287
|
+
export ADB_SERVER_SOCKET="tcp:localhost:5037"
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
N.B. You may need to reboot your IDE
|
|
291
|
+
|
|
292
|
+
### iOS Devices
|
|
293
|
+
|
|
294
|
+
**Requirements:**
|
|
295
|
+
- macOS with Xcode Command Line Tools
|
|
296
|
+
- iOS Simulator running
|
|
297
|
+
|
|
298
|
+
**Verify simulators:**
|
|
299
|
+
```bash
|
|
300
|
+
xcrun simctl list devices booted
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**Start a simulator:**
|
|
304
|
+
```bash
|
|
305
|
+
open -a Simulator
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Troubleshooting
|
|
309
|
+
|
|
310
|
+
### No devices found
|
|
311
|
+
|
|
312
|
+
1. **Android:** Run `adb devices` to verify device connection
|
|
313
|
+
2. **iOS:** Run `xcrun simctl list devices booted` to check running simulators
|
|
314
|
+
3. Ensure USB debugging is enabled (Android)
|
|
315
|
+
4. Try restarting ADB: `adb kill-server && adb start-server`
|
|
316
|
+
|
|
317
|
+
### Screenshot capture fails
|
|
318
|
+
|
|
319
|
+
1. Ensure device screen is unlocked
|
|
320
|
+
2. For Android, verify screencap permission
|
|
321
|
+
3. For iOS, ensure simulator is fully booted
|
|
322
|
+
|
|
323
|
+
### Tool not detected in Windsurf
|
|
324
|
+
|
|
325
|
+
1. Verify the import in `main.py` includes the tools module
|
|
326
|
+
2. Check that `tools/__init__.py` exists
|
|
327
|
+
3. Restart Windsurf after configuration changes
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.sdk import Agent
|
|
4
|
+
from minitap.mobile_use.sdk.builders import Builders
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_mobile_use_agent():
|
|
8
|
+
config = Builders.AgentConfig
|
|
9
|
+
custom_adb_socket = os.getenv("ADB_SERVER_SOCKET")
|
|
10
|
+
if custom_adb_socket:
|
|
11
|
+
parts = custom_adb_socket.split(":")
|
|
12
|
+
if len(parts) != 3:
|
|
13
|
+
raise ValueError(f"Invalid ADB server socket: {custom_adb_socket}")
|
|
14
|
+
_, host, port = parts
|
|
15
|
+
config = config.with_adb_server(host=host, port=int(port))
|
|
16
|
+
return Agent(config=config.build())
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
agent = get_mobile_use_agent()
|