minitap-mcp 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/PKG-INFO +112 -24
  2. minitap_mcp-0.5.0/PYPI_README.md +269 -0
  3. minitap_mcp-0.5.0/minitap/mcp/core/agents/extract_figma_assets.py +69 -0
  4. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/tools/save_figma_assets.py +4 -6
  5. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/pyproject.toml +1 -1
  6. minitap_mcp-0.4.2/PYPI_README.md +0 -181
  7. minitap_mcp-0.4.2/minitap/mcp/core/agents/extract_figma_assets.md +0 -64
  8. minitap_mcp-0.4.2/minitap/mcp/core/agents/extract_figma_assets.py +0 -96
  9. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/__init__.py +0 -0
  10. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/agents/compare_screenshots.md +0 -0
  11. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/agents/compare_screenshots.py +0 -0
  12. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/config.py +0 -0
  13. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/decorators.py +0 -0
  14. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/device.py +0 -0
  15. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/llm.py +0 -0
  16. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/logging_config.py +0 -0
  17. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/models.py +0 -0
  18. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/sdk_agent.py +0 -0
  19. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/core/utils.py +0 -0
  20. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/main.py +0 -0
  21. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/server/middleware.py +0 -0
  22. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/server/poller.py +0 -0
  23. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/tools/analyze_screen.py +0 -0
  24. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/tools/compare_screenshot_with_figma.py +0 -0
  25. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/tools/execute_mobile_command.py +0 -0
  26. {minitap_mcp-0.4.2 → minitap_mcp-0.5.0}/minitap/mcp/tools/screen_analyzer.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: minitap-mcp
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Model Context Protocol server for controlling Android & iOS devices with natural language
5
5
  Author: Pierre-Louis Favreau, Jean-Pierre Lo, Clément Guiguet
6
6
  Requires-Dist: fastmcp>=2.12.4
@@ -84,11 +84,18 @@ minitap-mcp --server
84
84
 
85
85
  CLI flags always override environment variables when both are present.
86
86
 
87
- By default, the server will bind to `0.0.0.0:8000`. Configure via environment variables:
87
+ By default, the server will bind to `0.0.0.0:8000`. You can customize the port:
88
88
 
89
89
  ```bash
90
+ # Using CLI argument
91
+ minitap-mcp --server --port 9000
92
+
93
+ # Or using environment variable
94
+ export MCP_SERVER_PORT="9000"
95
+ minitap-mcp --server
96
+
97
+ # You can also customize the host
90
98
  export MCP_SERVER_HOST="0.0.0.0"
91
- export MCP_SERVER_PORT="8000"
92
99
  ```
93
100
 
94
101
  ## IDE Integration
@@ -120,41 +127,122 @@ export MCP_SERVER_PORT="8000"
120
127
  ```
121
128
 
122
129
 
123
- ## Available Tools
130
+ ## Available Resources & Tools
131
+
132
+ Once connected, your AI assistant can use these resources and tools:
133
+
134
+ ### Resource: `data://devices`
135
+
136
+ Lists all connected mobile devices (Android and iOS).
124
137
 
125
- Once connected, your AI assistant can use these tools:
138
+ **Returns:** Array of device information objects with:
126
139
 
127
- ### `execute_mobile_command`
128
- Execute natural language commands on your mobile device using the Minitap SDK. This tool allows you to control your Android or iOS device using natural language.
140
+ - `device_id`: Device serial (Android) or UDID (iOS)
141
+ - `platform`: `"android"` or `"ios"`
142
+ - `name`: Device name
143
+ - `state`: Device state (`"connected"` or `"Booted"`)
144
+
145
+ ### Tool: `analyze_screen`
146
+
147
+ Captures a screenshot from a mobile device and analyzes it using a vision language model.
129
148
 
130
149
  **Parameters:**
131
- - `goal` (required): High-level goal describing the action to perform
132
- - `output_description` (optional): Natural language description of the desired output format. Results are returned as structured JSON (e.g., "An array with sender and subject for each email")
133
- - `profile` (optional): Profile name to use (defaults to "default")
134
150
 
135
- **Examples:**
151
+ - `prompt` (required): Analysis prompt describing what information to extract
152
+ - `device_id` (optional): Specific device ID to target. If not provided, uses the first available device.
153
+
154
+ **Returns:** AI-generated analysis of the screenshot based on the prompt.
155
+
156
+ **Example:**
157
+
136
158
  ```
137
- "Open the settings app and tell me the battery level"
138
- "Find the first 3 unread emails in Gmail"
139
- "Open Google Maps and search for the nearest coffee shop"
140
- "Take a screenshot and save it"
159
+ Prompt: "What app is currently open? List all visible UI elements."
141
160
  ```
142
161
 
143
- ### `analyze_screen`
144
- Capture and analyze what's currently shown on the mobile device screen using a vision-capable LLM. Useful for understanding UI elements, extracting text, or identifying specific features.
162
+ The tool will:
163
+
164
+ 1. Find the specified device (or first available)
165
+ 2. Capture a screenshot
166
+ 3. Analyze it with the vision model
167
+ 4. Return the analysis
168
+
169
+ ### Tool: `execute_mobile_command`
170
+
171
+ Execute natural language commands on your mobile device using the mobile-use SDK. This tool allows you to control your Android or iOS device with simple instructions.
145
172
 
146
173
  **Parameters:**
147
- - `prompt` (required): Analysis prompt describing what information to extract
148
- - `device_id` (optional): Specific device ID to target
174
+
175
+ - `goal` (required): Natural language command to execute on the device
176
+ - `output_description` (optional): Description of the expected output format (e.g., "A JSON list of objects with sender and subject keys")
177
+ - `profile` (optional): Name of the profile to use for this task. Defaults to 'default'
178
+
179
+ **Returns:** Execution result with status, output, and any extracted data.
149
180
 
150
181
  **Examples:**
151
- ```js
152
- "What app is currently open?"
153
- "Read the text messages visible on screen"
154
- "List all buttons and their labels on the current screen"
155
- "Extract the phone number displayed"
182
+
183
+ ```python
184
+ # Simple command
185
+ goal: "Go to settings and tell me my current battery level"
186
+
187
+ # Data extraction with structured output
188
+ goal: "Open Gmail, find first 3 unread emails, and list their sender and subject line"
189
+ output_description: "A JSON list of objects, each with 'sender' and 'subject' keys"
190
+
191
+ # App navigation
192
+ goal: "Open Twitter and scroll to the latest tweet"
156
193
  ```
157
194
 
195
+ The tool will:
196
+
197
+ 1. Find the specified device (or first available)
198
+ 2. Execute the command using the mobile-use AI agent
199
+ 3. Return the result or extracted data
200
+
201
+ ### Tool: `save_figma_assets`
202
+
203
+ Fetch Figma design assets and React implementation code, then save them locally in the workspace.
204
+
205
+ **Parameters:**
206
+
207
+ - `node_id` (required): The node ID of the Figma design in format "1:2" (colon-separated). Extract from URLs like `https://figma.com/design/:fileKey/:fileName?node-id=1-2`
208
+ - `file_key` (required): The file key from the Figma URL (e.g., "abc123" from `https://figma.com/design/abc123/MyFile`)
209
+ - `workspace_path` (optional): The workspace path where assets should be saved. Defaults to current directory.
210
+
211
+ **Returns:** Download summary with list of successfully downloaded assets and any failures.
212
+
213
+ **Example:**
214
+
215
+ ```python
216
+ node_id: "1:2"
217
+ file_key: "abc123xyz"
218
+ workspace_path: "."
219
+ ```
220
+
221
+ The tool will:
222
+
223
+ 1. Call `get_design_context` from Figma MCP to get React/TypeScript code
224
+ 2. Extract all asset URLs from the code implementation
225
+ 3. Download each asset to `.mobile-use/figma_assets/<node-id>/` folder
226
+ 4. Save the code implementation to `.mobile-use/figma_assets/<node-id>/code_implementation.ts`
227
+ 5. Return a list of downloaded files with success/failure status
228
+
229
+ ### Tool: `compare_screenshot_with_figma`
230
+
231
+ Compare a screenshot of the current mobile device state with a Figma design to identify visual differences.
232
+
233
+ **Parameters:**
234
+
235
+ - `node_id` (required): The node ID of the Figma design in format "1:2" (colon-separated). Extract from URLs like `https://figma.com/design/:fileKey/:fileName?node-id=1-2`
236
+
237
+ **Returns:** Detailed comparison report with both the Figma design and current device screenshots for visual context.
238
+
239
+ The tool will:
240
+
241
+ 1. Capture a screenshot of the current device state
242
+ 2. Fetch the Figma design screenshot
243
+ 3. Compare both screenshots using vision AI
244
+ 4. Return a detailed analysis highlighting differences
245
+
158
246
  ## Advanced Configuration
159
247
 
160
248
  ### Custom ADB Server
@@ -0,0 +1,269 @@
1
+ # Minitap MCP Server
2
+
3
+ A Model Context Protocol (MCP) server that enables AI assistants to control and interact with real mobile devices (Android & iOS) through natural language commands.
4
+
5
+ ## Quick Start
6
+
7
+ ### Installation
8
+
9
+ ```bash
10
+ pip install minitap-mcp
11
+ ```
12
+
13
+ ### Prerequisites
14
+
15
+ Before running the MCP server, ensure you have the required mobile automation tools installed:
16
+
17
+ - **For Android devices:**
18
+ - [ADB (Android Debug Bridge)](https://developer.android.com/tools/adb) - For device communication
19
+ - [Maestro](https://maestro.mobile.dev/) - For mobile automation
20
+
21
+ - **For iOS devices (macOS only):**
22
+ - Xcode Command Line Tools with `xcrun`
23
+ - [Maestro](https://maestro.mobile.dev/) - For mobile automation
24
+
25
+ For detailed setup instructions, see the [mobile-use repository](https://github.com/minitap-ai/mobile-use).
26
+
27
+ ### Running the Server
28
+
29
+ The simplest way to start:
30
+
31
+ ```bash
32
+ minitap-mcp --server --api-key your_minitap_api_key
33
+ ```
34
+
35
+ This starts the server on `localhost:8000` with your API key. Get your free API key at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
36
+
37
+ **Available CLI options:**
38
+
39
+ ```bash
40
+ minitap-mcp --server --api-key YOUR_KEY --llm-profile PROFILE_NAME
41
+ ```
42
+
43
+ - `--api-key`: Your Minitap API key (overrides `MINITAP_API_KEY` env var). Get yours at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
44
+ - `--llm-profile`: LLM profile name to use (overrides `MINITAP_LLM_PROFILE_NAME` env var). If unset, uses the default profile. Configure profiles at [platform.minitap.ai/llm-profiles](https://platform.minitap.ai/llm-profiles).
45
+
46
+ ### Configuration (Optional)
47
+
48
+ Alternatively, you can set environment variables instead of using CLI flags:
49
+
50
+ ```bash
51
+ export MINITAP_API_KEY="your_minitap_api_key"
52
+ export MINITAP_API_BASE_URL="https://platform.minitap.ai/api/v1"
53
+ export MINITAP_LLM_PROFILE_NAME="default"
54
+ ```
55
+
56
+ You can set these in your `.bashrc` or equivalent, then simply run:
57
+
58
+ ```bash
59
+ minitap-mcp --server
60
+ ```
61
+
62
+ CLI flags always override environment variables when both are present.
63
+
64
+ By default, the server will bind to `0.0.0.0:8000`. You can customize the port:
65
+
66
+ ```bash
67
+ # Using CLI argument
68
+ minitap-mcp --server --port 9000
69
+
70
+ # Or using environment variable
71
+ export MCP_SERVER_PORT="9000"
72
+ minitap-mcp --server
73
+
74
+ # You can also customize the host
75
+ export MCP_SERVER_HOST="0.0.0.0"
76
+ ```
77
+
78
+ ## IDE Integration
79
+
80
+ 1. Start the server: `minitap-mcp --server --api-key your_minitap_api_key`
81
+ 2. Add to your IDE MCP settings file:
82
+
83
+ ```jsonc
84
+ # For Windsurf
85
+ {
86
+ "mcpServers": {
87
+ "minitap-mcp": {
88
+ "serverUrl": "http://localhost:8000/mcp"
89
+ }
90
+ }
91
+ }
92
+ ```
93
+
94
+ ```jsonc
95
+ # For Cursor
96
+ {
97
+ "mcpServers": {
98
+ "minitap-mcp": {
99
+ "transport": "http",
100
+ "url": "http://localhost:8000/mcp"
101
+ }
102
+ }
103
+ }
104
+ ```
105
+
106
+
107
+ ## Available Resources & Tools
108
+
109
+ Once connected, your AI assistant can use these resources and tools:
110
+
111
+ ### Resource: `data://devices`
112
+
113
+ Lists all connected mobile devices (Android and iOS).
114
+
115
+ **Returns:** Array of device information objects with:
116
+
117
+ - `device_id`: Device serial (Android) or UDID (iOS)
118
+ - `platform`: `"android"` or `"ios"`
119
+ - `name`: Device name
120
+ - `state`: Device state (`"connected"` or `"Booted"`)
121
+
122
+ ### Tool: `analyze_screen`
123
+
124
+ Captures a screenshot from a mobile device and analyzes it using a vision language model.
125
+
126
+ **Parameters:**
127
+
128
+ - `prompt` (required): Analysis prompt describing what information to extract
129
+ - `device_id` (optional): Specific device ID to target. If not provided, uses the first available device.
130
+
131
+ **Returns:** AI-generated analysis of the screenshot based on the prompt.
132
+
133
+ **Example:**
134
+
135
+ ```
136
+ Prompt: "What app is currently open? List all visible UI elements."
137
+ ```
138
+
139
+ The tool will:
140
+
141
+ 1. Find the specified device (or first available)
142
+ 2. Capture a screenshot
143
+ 3. Analyze it with the vision model
144
+ 4. Return the analysis
145
+
146
+ ### Tool: `execute_mobile_command`
147
+
148
+ Execute natural language commands on your mobile device using the mobile-use SDK. This tool allows you to control your Android or iOS device with simple instructions.
149
+
150
+ **Parameters:**
151
+
152
+ - `goal` (required): Natural language command to execute on the device
153
+ - `output_description` (optional): Description of the expected output format (e.g., "A JSON list of objects with sender and subject keys")
154
+ - `profile` (optional): Name of the profile to use for this task. Defaults to 'default'
155
+
156
+ **Returns:** Execution result with status, output, and any extracted data.
157
+
158
+ **Examples:**
159
+
160
+ ```python
161
+ # Simple command
162
+ goal: "Go to settings and tell me my current battery level"
163
+
164
+ # Data extraction with structured output
165
+ goal: "Open Gmail, find first 3 unread emails, and list their sender and subject line"
166
+ output_description: "A JSON list of objects, each with 'sender' and 'subject' keys"
167
+
168
+ # App navigation
169
+ goal: "Open Twitter and scroll to the latest tweet"
170
+ ```
171
+
172
+ The tool will:
173
+
174
+ 1. Find the specified device (or first available)
175
+ 2. Execute the command using the mobile-use AI agent
176
+ 3. Return the result or extracted data
177
+
178
+ ### Tool: `save_figma_assets`
179
+
180
+ Fetch Figma design assets and React implementation code, then save them locally in the workspace.
181
+
182
+ **Parameters:**
183
+
184
+ - `node_id` (required): The node ID of the Figma design in format "1:2" (colon-separated). Extract from URLs like `https://figma.com/design/:fileKey/:fileName?node-id=1-2`
185
+ - `file_key` (required): The file key from the Figma URL (e.g., "abc123" from `https://figma.com/design/abc123/MyFile`)
186
+ - `workspace_path` (optional): The workspace path where assets should be saved. Defaults to current directory.
187
+
188
+ **Returns:** Download summary with list of successfully downloaded assets and any failures.
189
+
190
+ **Example:**
191
+
192
+ ```python
193
+ node_id: "1:2"
194
+ file_key: "abc123xyz"
195
+ workspace_path: "."
196
+ ```
197
+
198
+ The tool will:
199
+
200
+ 1. Call `get_design_context` from Figma MCP to get React/TypeScript code
201
+ 2. Extract all asset URLs from the code implementation
202
+ 3. Download each asset to `.mobile-use/figma_assets/<node-id>/` folder
203
+ 4. Save the code implementation to `.mobile-use/figma_assets/<node-id>/code_implementation.ts`
204
+ 5. Return a list of downloaded files with success/failure status
205
+
206
+ ### Tool: `compare_screenshot_with_figma`
207
+
208
+ Compare a screenshot of the current mobile device state with a Figma design to identify visual differences.
209
+
210
+ **Parameters:**
211
+
212
+ - `node_id` (required): The node ID of the Figma design in format "1:2" (colon-separated). Extract from URLs like `https://figma.com/design/:fileKey/:fileName?node-id=1-2`
213
+
214
+ **Returns:** Detailed comparison report with both the Figma design and current device screenshots for visual context.
215
+
216
+ The tool will:
217
+
218
+ 1. Capture a screenshot of the current device state
219
+ 2. Fetch the Figma design screenshot
220
+ 3. Compare both screenshots using vision AI
221
+ 4. Return a detailed analysis highlighting differences
222
+
223
+ ## Advanced Configuration
224
+
225
+ ### Custom ADB Server
226
+
227
+ If using a remote or custom ADB server (like on WSL):
228
+
229
+ ```bash
230
+ export ADB_SERVER_SOCKET="tcp:192.168.1.100:5037"
231
+ ```
232
+
233
+ ### Vision Model
234
+
235
+ Customize the vision model used for screen analysis:
236
+
237
+ ```bash
238
+ export VISION_MODEL="qwen/qwen-2.5-vl-7b-instruct"
239
+ ```
240
+
241
+ ## Device Setup
242
+
243
+ ### Android
244
+ 1. Enable USB debugging on your device
245
+ 2. Connect via USB or network ADB
246
+ 3. Verify connection: `adb devices`
247
+
248
+ ### iOS (macOS only)
249
+ 1. Install Xcode Command Line Tools
250
+ 2. Start a simulator or connect a physical device
251
+ 3. Verify: `xcrun simctl list devices booted`
252
+
253
+ ## Troubleshooting
254
+
255
+ **No devices found:**
256
+ - Verify ADB/xcrun connection
257
+ - Check USB debugging is enabled (Android)
258
+ - Ensure device is unlocked
259
+
260
+ **Connection refused errors:**
261
+ - Check ADB/xcrun connection
262
+
263
+ **API authentication errors:**
264
+ - Verify `MINITAP_API_KEY` is set correctly
265
+
266
+ ## Links
267
+
268
+ - **Mobile-Use SDK:** [github.com/minitap-ai/mobile-use](https://github.com/minitap-ai/mobile-use)
269
+ - **Mobile-Use Documentation:** [docs.minitap.ai](https://docs.minitap.ai)
@@ -0,0 +1,69 @@
1
+ """Agent to extract Figma asset URLs from design context code using regex."""
2
+
3
+ import re
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class FigmaAsset(BaseModel):
9
+ """Represents a single Figma asset."""
10
+
11
+ variable_name: str = Field(description="The variable name from the code (e.g., imgSignal)")
12
+ url: str = Field(description="The full URL to the asset")
13
+ extension: str = Field(description="The file extension (e.g., svg, png, jpg)")
14
+
15
+
16
+ class ExtractedAssets(BaseModel):
17
+ """Container for all extracted Figma assets."""
18
+
19
+ assets: list[FigmaAsset] = Field(
20
+ default_factory=list,
21
+ description="List of all extracted assets from the Figma design context",
22
+ )
23
+ code_implementation: str = Field(
24
+ description="The React/TypeScript code with imports instead of const declarations"
25
+ )
26
+
27
+
28
+ def extract_figma_assets(design_context_code: str) -> ExtractedAssets:
29
+ """Extract asset URLs from Figma design context code using regex.
30
+
31
+ Args:
32
+ design_context_code: The React/TypeScript code from get_design_context
33
+
34
+ Returns:
35
+ ExtractedAssets with list of assets and transformed code
36
+ """
37
+ # Regex captures: (1) variable name, (2) full URL, (4) extension
38
+ # Supports http/https, any domain, query strings, optional semicolon
39
+ pattern = r'const\s+(\w+)\s*=\s*["\']((https?://[^"\']+?)\.(\w+)(?:\?[^"\']*)?)["\'];?'
40
+ matches = re.finditer(pattern, design_context_code)
41
+
42
+ assets = []
43
+ asset_lines = []
44
+
45
+ for match in matches:
46
+ var_name = match.group(1)
47
+ url = match.group(2)
48
+ extension = match.group(4)
49
+
50
+ assets.append(FigmaAsset(variable_name=var_name, url=url, extension=extension))
51
+ asset_lines.append(match.group(0))
52
+
53
+ import_statements = []
54
+ for asset in assets:
55
+ import_statements.append(
56
+ f"import {asset.variable_name} from './{asset.variable_name}.{asset.extension}';"
57
+ )
58
+
59
+ transformed_code = design_context_code
60
+ for line in asset_lines:
61
+ transformed_code = transformed_code.replace(line, "")
62
+
63
+ lines = transformed_code.split("\n")
64
+ while lines and not lines[0].strip():
65
+ lines.pop(0)
66
+
67
+ final_code = "\n".join(import_statements) + "\n\n" + "\n".join(lines)
68
+
69
+ return ExtractedAssets(assets=assets, code_implementation=final_code)
@@ -41,9 +41,9 @@ logger = get_logger(__name__)
41
41
 
42
42
  This tool:
43
43
  1. Calls get_design_context from Figma MCP to get the React/TypeScript code
44
- 2. Extracts all asset URLs and code implementation from the code
44
+ 2. Extracts asset URLs and transforms const declarations to import statements
45
45
  3. Downloads each asset to .mobile-use/figma_assets/<node-id>/ folder
46
- 4. Saves the code implementation to .mobile-use/figma_assets/<node-id>/code_implementation.ts
46
+ 4. Saves the transformed code to .mobile-use/figma_assets/<node-id>/code_implementation.ts
47
47
  5. Returns a list of downloaded files
48
48
  """,
49
49
  )
@@ -75,10 +75,8 @@ async def save_figma_assets(
75
75
  # Step 1: Get design context from Figma MCP
76
76
  design_context = await get_design_context(node_id, file_key)
77
77
 
78
- # Step 2: Extract asset URLs using LLM agent
79
- extracted_context: ExtractedAssets = await extract_figma_assets(
80
- design_context.code_implementation
81
- )
78
+ # Step 2: Extract asset URLs and transform code
79
+ extracted_context: ExtractedAssets = extract_figma_assets(design_context.code_implementation)
82
80
  if not extracted_context.assets:
83
81
  raise ToolError("No assets found in the Figma design context.")
84
82
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "minitap-mcp"
3
- version = "0.4.2"
3
+ version = "0.5.0"
4
4
  description = "Model Context Protocol server for controlling Android & iOS devices with natural language"
5
5
  readme = "PYPI_README.md"
6
6
 
@@ -1,181 +0,0 @@
1
- # Minitap MCP Server
2
-
3
- A Model Context Protocol (MCP) server that enables AI assistants to control and interact with real mobile devices (Android & iOS) through natural language commands.
4
-
5
- ## Quick Start
6
-
7
- ### Installation
8
-
9
- ```bash
10
- pip install minitap-mcp
11
- ```
12
-
13
- ### Prerequisites
14
-
15
- Before running the MCP server, ensure you have the required mobile automation tools installed:
16
-
17
- - **For Android devices:**
18
- - [ADB (Android Debug Bridge)](https://developer.android.com/tools/adb) - For device communication
19
- - [Maestro](https://maestro.mobile.dev/) - For mobile automation
20
-
21
- - **For iOS devices (macOS only):**
22
- - Xcode Command Line Tools with `xcrun`
23
- - [Maestro](https://maestro.mobile.dev/) - For mobile automation
24
-
25
- For detailed setup instructions, see the [mobile-use repository](https://github.com/minitap-ai/mobile-use).
26
-
27
- ### Running the Server
28
-
29
- The simplest way to start:
30
-
31
- ```bash
32
- minitap-mcp --server --api-key your_minitap_api_key
33
- ```
34
-
35
- This starts the server on `localhost:8000` with your API key. Get your free API key at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
36
-
37
- **Available CLI options:**
38
-
39
- ```bash
40
- minitap-mcp --server --api-key YOUR_KEY --llm-profile PROFILE_NAME
41
- ```
42
-
43
- - `--api-key`: Your Minitap API key (overrides `MINITAP_API_KEY` env var). Get yours at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
44
- - `--llm-profile`: LLM profile name to use (overrides `MINITAP_LLM_PROFILE_NAME` env var). If unset, uses the default profile. Configure profiles at [platform.minitap.ai/llm-profiles](https://platform.minitap.ai/llm-profiles).
45
-
46
- ### Configuration (Optional)
47
-
48
- Alternatively, you can set environment variables instead of using CLI flags:
49
-
50
- ```bash
51
- export MINITAP_API_KEY="your_minitap_api_key"
52
- export MINITAP_API_BASE_URL="https://platform.minitap.ai/api/v1"
53
- export MINITAP_LLM_PROFILE_NAME="default"
54
- ```
55
-
56
- You can set these in your `.bashrc` or equivalent, then simply run:
57
-
58
- ```bash
59
- minitap-mcp --server
60
- ```
61
-
62
- CLI flags always override environment variables when both are present.
63
-
64
- By default, the server will bind to `0.0.0.0:8000`. Configure via environment variables:
65
-
66
- ```bash
67
- export MCP_SERVER_HOST="0.0.0.0"
68
- export MCP_SERVER_PORT="8000"
69
- ```
70
-
71
- ## IDE Integration
72
-
73
- 1. Start the server: `minitap-mcp --server --api-key your_minitap_api_key`
74
- 2. Add to your IDE MCP settings file:
75
-
76
- ```jsonc
77
- # For Windsurf
78
- {
79
- "mcpServers": {
80
- "minitap-mcp": {
81
- "serverUrl": "http://localhost:8000/mcp"
82
- }
83
- }
84
- }
85
- ```
86
-
87
- ```jsonc
88
- # For Cursor
89
- {
90
- "mcpServers": {
91
- "minitap-mcp": {
92
- "transport": "http",
93
- "url": "http://localhost:8000/mcp"
94
- }
95
- }
96
- }
97
- ```
98
-
99
-
100
- ## Available Tools
101
-
102
- Once connected, your AI assistant can use these tools:
103
-
104
- ### `execute_mobile_command`
105
- Execute natural language commands on your mobile device using the Minitap SDK. This tool allows you to control your Android or iOS device using natural language.
106
-
107
- **Parameters:**
108
- - `goal` (required): High-level goal describing the action to perform
109
- - `output_description` (optional): Natural language description of the desired output format. Results are returned as structured JSON (e.g., "An array with sender and subject for each email")
110
- - `profile` (optional): Profile name to use (defaults to "default")
111
-
112
- **Examples:**
113
- ```
114
- "Open the settings app and tell me the battery level"
115
- "Find the first 3 unread emails in Gmail"
116
- "Open Google Maps and search for the nearest coffee shop"
117
- "Take a screenshot and save it"
118
- ```
119
-
120
- ### `analyze_screen`
121
- Capture and analyze what's currently shown on the mobile device screen using a vision-capable LLM. Useful for understanding UI elements, extracting text, or identifying specific features.
122
-
123
- **Parameters:**
124
- - `prompt` (required): Analysis prompt describing what information to extract
125
- - `device_id` (optional): Specific device ID to target
126
-
127
- **Examples:**
128
- ```js
129
- "What app is currently open?"
130
- "Read the text messages visible on screen"
131
- "List all buttons and their labels on the current screen"
132
- "Extract the phone number displayed"
133
- ```
134
-
135
- ## Advanced Configuration
136
-
137
- ### Custom ADB Server
138
-
139
- If using a remote or custom ADB server (like on WSL):
140
-
141
- ```bash
142
- export ADB_SERVER_SOCKET="tcp:192.168.1.100:5037"
143
- ```
144
-
145
- ### Vision Model
146
-
147
- Customize the vision model used for screen analysis:
148
-
149
- ```bash
150
- export VISION_MODEL="qwen/qwen-2.5-vl-7b-instruct"
151
- ```
152
-
153
- ## Device Setup
154
-
155
- ### Android
156
- 1. Enable USB debugging on your device
157
- 2. Connect via USB or network ADB
158
- 3. Verify connection: `adb devices`
159
-
160
- ### iOS (macOS only)
161
- 1. Install Xcode Command Line Tools
162
- 2. Start a simulator or connect a physical device
163
- 3. Verify: `xcrun simctl list devices booted`
164
-
165
- ## Troubleshooting
166
-
167
- **No devices found:**
168
- - Verify ADB/xcrun connection
169
- - Check USB debugging is enabled (Android)
170
- - Ensure device is unlocked
171
-
172
- **Connection refused errors:**
173
- - Check ADB/xcrun connection
174
-
175
- **API authentication errors:**
176
- - Verify `MINITAP_API_KEY` is set correctly
177
-
178
- ## Links
179
-
180
- - **Mobile-Use SDK:** [github.com/minitap-ai/mobile-use](https://github.com/minitap-ai/mobile-use)
181
- - **Mobile-Use Documentation:** [docs.minitap.ai](https://docs.minitap.ai)
@@ -1,64 +0,0 @@
1
- You are an expert at parsing React/TypeScript code to extract asset URLs and generate clean, documented code implementations.
2
-
3
- Your task is to:
4
-
5
- 1. Extract all asset URLs from the provided code snippet
6
- 2. Generate a clean `code_implementation` output that includes the React code with embedded comments referencing implementation and node guidelines
7
-
8
- **Instructions:**
9
-
10
- ## Part 1: Extract Asset URLs
11
-
12
- 1. Look for all constant declarations that contain URLs pointing to assets (images, SVGs, etc.)
13
- 2. These constants typically follow patterns like:
14
-
15
- - `const imgVariableName = "http://localhost:3845/assets/[hash].[extension]";`
16
- - The variable names usually start with `img` followed by a descriptive name in camelCase
17
-
18
- 3. For each asset URL found, extract:
19
- - The **variable name** (e.g., `imgSignal`, `imgBatteryThreeQuarters`)
20
- - The **full URL** (e.g., `http://localhost:3845/assets/685c5ac58caa29556e29737cf8f8c9605d9c8571.svg`)
21
- - The **file extension** from the URL (e.g., `svg`, `png`, `jpg`)
22
-
23
- ## Part 2: Generate Code Implementation
24
-
25
- The `code_implementation` field should contain:
26
-
27
- 1. The React/TypeScript code with **LOCAL asset imports** instead of HTTP URLs:
28
-
29
- - Convert `const imgSignal = "http://localhost:3845/assets/[hash].svg";`
30
- - To `import imgSignal from './assets/imgSignal.svg';` (or appropriate relative path)
31
- - Use the **exact same variable names** as in the original const declarations
32
- - **CRITICAL**: Preserve the variable naming convention
33
-
34
- 2. Preserve all `data-node-id` attributes and other metadata in the code
35
-
36
- ## Part 3: Return Format
37
-
38
- Return a JSON object with two fields:
39
-
40
- - `assets`: Array of extracted asset objects
41
- - `code_implementation`: String containing the React code with embedded guideline comments
42
-
43
- ```json
44
- {
45
- "assets": [
46
- {
47
- "variable_name": "imgSignal",
48
- "url": "http://localhost:3845/assets/685c5ac58caa29556e29737cf8f8c9605d9c8571.svg",
49
- "extension": "svg"
50
- },
51
- ...
52
- ],
53
- "code_implementation": "import ... function ..."
54
- }
55
- ```
56
-
57
- **Important:**
58
-
59
- - Only extract asset URLs
60
- - Preserve the exact variable names as they appear in the code
61
- - DO NOT MISS any assets
62
- - If no assets are found, return an empty array for `assets`
63
- - Return ONLY the JSON object with both `assets` and `code_implementation` fields
64
- - Do NOT include the const declarations of the assets in the code_implementation output - convert them to imports.
@@ -1,96 +0,0 @@
1
- """Agent to extract Figma asset URLs from design context code."""
2
-
3
- import re
4
- import uuid
5
- from pathlib import Path
6
-
7
- from jinja2 import Template
8
- from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
9
- from pydantic import BaseModel, Field
10
-
11
- from minitap.mcp.core.llm import get_minitap_llm
12
-
13
-
14
- class FigmaAsset(BaseModel):
15
- """Represents a single Figma asset."""
16
-
17
- variable_name: str = Field(description="The variable name from the code (e.g., imgSignal)")
18
- url: str = Field(description="The full URL to the asset")
19
- extension: str = Field(description="The file extension (e.g., svg, png, jpg)")
20
-
21
-
22
- class ExtractedAssets(BaseModel):
23
- """Container for all extracted Figma assets."""
24
-
25
- assets: list[FigmaAsset] = Field(
26
- default_factory=list,
27
- description="List of all extracted assets from the Figma design context",
28
- )
29
- code_implementation: str = Field(
30
- description=(
31
- "The React/TypeScript code\n"
32
- "with the local url declarations turned into const declarations"
33
- )
34
- )
35
-
36
-
37
- def sanitize_unicode_for_llm(text: str) -> str:
38
- """Remove or replace problematic Unicode characters that increase token consumption.
39
-
40
- Characters outside the Basic Multilingual Plane (BMP) like emoji and special symbols
41
- get escaped as \\U sequences when sent to LLMs, dramatically increasing token count
42
- and processing time.
43
-
44
- Args:
45
- text: The text to sanitize
46
-
47
- Returns:
48
- Text with problematic Unicode characters replaced with placeholders
49
- """
50
-
51
- # Replace characters outside BMP (U+10000 and above) with a placeholder
52
- # These are typically emoji, special symbols, or rare characters
53
- def replace_high_unicode(match):
54
- char = match.group(0)
55
- codepoint = ord(char)
56
- # Return a descriptive placeholder
57
- return f"[U+{codepoint:X}]"
58
-
59
- # Pattern matches characters with codepoints >= U+10000
60
- pattern = re.compile(r"[\U00010000-\U0010FFFF]")
61
- sanitized = pattern.sub(replace_high_unicode, text)
62
-
63
- return sanitized
64
-
65
-
66
- async def extract_figma_assets(design_context_code: str) -> ExtractedAssets:
67
- """Extract asset URLs from Figma design context code.
68
-
69
- Args:
70
- design_context_code: The React/TypeScript code from get_design_context
71
-
72
- Returns:
73
- List of dictionaries containing variable_name, url, and extension
74
- """
75
- system_message = Template(
76
- Path(__file__).parent.joinpath("extract_figma_assets.md").read_text(encoding="utf-8")
77
- ).render()
78
-
79
- sanitized_code = sanitize_unicode_for_llm(design_context_code)
80
-
81
- messages: list[BaseMessage] = [
82
- SystemMessage(content=system_message),
83
- HumanMessage(
84
- content=f"Here is the code to analyze:\n\n```typescript\n{sanitized_code}\n```"
85
- ),
86
- ]
87
-
88
- llm = get_minitap_llm(
89
- model="openai/gpt-5",
90
- temperature=0,
91
- trace_id=str(uuid.uuid4()),
92
- remote_tracing=True,
93
- ).with_structured_output(ExtractedAssets)
94
- result: ExtractedAssets = await llm.ainvoke(messages) # type: ignore
95
-
96
- return result