minitap-mcp 0.1.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. minitap_mcp-0.4.0/PKG-INFO +203 -0
  2. minitap_mcp-0.4.0/PYPI_README.md +181 -0
  3. minitap_mcp-0.4.0/minitap/mcp/core/agents/compare_screenshots.md +62 -0
  4. minitap_mcp-0.4.0/minitap/mcp/core/agents/compare_screenshots.py +65 -0
  5. minitap_mcp-0.4.0/minitap/mcp/core/agents/extract_figma_assets.md +64 -0
  6. minitap_mcp-0.4.0/minitap/mcp/core/agents/extract_figma_assets.py +65 -0
  7. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/core/config.py +4 -1
  8. minitap_mcp-0.4.0/minitap/mcp/core/models.py +59 -0
  9. minitap_mcp-0.4.0/minitap/mcp/core/sdk_agent.py +27 -0
  10. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/main.py +67 -43
  11. minitap_mcp-0.4.0/minitap/mcp/server/poller.py +78 -0
  12. minitap_mcp-0.4.0/minitap/mcp/tools/compare_screenshot_with_figma.py +132 -0
  13. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/tools/execute_mobile_command.py +5 -3
  14. minitap_mcp-0.4.0/minitap/mcp/tools/save_figma_assets.py +258 -0
  15. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/pyproject.toml +5 -4
  16. minitap_mcp-0.1.1/PKG-INFO +0 -348
  17. minitap_mcp-0.1.1/README.md +0 -327
  18. minitap_mcp-0.1.1/minitap/mcp/core/agents.py +0 -19
  19. minitap_mcp-0.1.1/minitap/mcp/server/poller.py +0 -38
  20. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/__init__.py +0 -0
  21. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/core/decorators.py +0 -0
  22. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/core/device.py +0 -0
  23. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/core/llm.py +0 -0
  24. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/core/utils.py +0 -0
  25. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/server/middleware.py +0 -0
  26. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/tools/analyze_screen.py +0 -0
  27. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/tools/go_back.py +0 -0
  28. {minitap_mcp-0.1.1 → minitap_mcp-0.4.0}/minitap/mcp/tools/screen_analyzer.md +0 -0
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.3
2
+ Name: minitap-mcp
3
+ Version: 0.4.0
4
+ Summary: Model Context Protocol server for controlling Android & iOS devices with natural language
5
+ Author: Pierre-Louis Favreau, Jean-Pierre Lo, Clément Guiguet
6
+ Requires-Dist: fastmcp>=2.12.4
7
+ Requires-Dist: python-dotenv>=1.1.1
8
+ Requires-Dist: pydantic>=2.12.0
9
+ Requires-Dist: pydantic-settings>=2.10.1
10
+ Requires-Dist: minitap-mobile-use>=2.8.1
11
+ Requires-Dist: jinja2>=3.1.6
12
+ Requires-Dist: langchain-core>=0.3.75
13
+ Requires-Dist: pillow>=11.1.0
14
+ Requires-Dist: ruff==0.5.3 ; extra == 'dev'
15
+ Requires-Dist: pytest==8.4.1 ; extra == 'dev'
16
+ Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
17
+ Requires-Python: >=3.12
18
+ Project-URL: Homepage, https://minitap.ai/
19
+ Project-URL: Source, https://github.com/minitap-ai/mobile-use
20
+ Provides-Extra: dev
21
+ Description-Content-Type: text/markdown
22
+
23
+ # Minitap MCP Server
24
+
25
+ A Model Context Protocol (MCP) server that enables AI assistants to control and interact with real mobile devices (Android & iOS) through natural language commands.
26
+
27
+ ## Quick Start
28
+
29
+ ### Installation
30
+
31
+ ```bash
32
+ pip install minitap-mcp
33
+ ```
34
+
35
+ ### Prerequisites
36
+
37
+ Before running the MCP server, ensure you have the required mobile automation tools installed:
38
+
39
+ - **For Android devices:**
40
+ - [ADB (Android Debug Bridge)](https://developer.android.com/tools/adb) - For device communication
41
+ - [Maestro](https://maestro.mobile.dev/) - For mobile automation
42
+
43
+ - **For iOS devices (macOS only):**
44
+ - Xcode Command Line Tools with `xcrun`
45
+ - [Maestro](https://maestro.mobile.dev/) - For mobile automation
46
+
47
+ For detailed setup instructions, see the [mobile-use repository](https://github.com/minitap-ai/mobile-use).
48
+
49
+ ### Running the Server
50
+
51
+ The simplest way to start:
52
+
53
+ ```bash
54
+ minitap-mcp --server --api-key your_minitap_api_key
55
+ ```
56
+
57
+ This starts the server on `localhost:8000` with your API key. Get your free API key at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
58
+
59
+ **Available CLI options:**
60
+
61
+ ```bash
62
+ minitap-mcp --server --api-key YOUR_KEY --llm-profile PROFILE_NAME
63
+ ```
64
+
65
+ - `--api-key`: Your Minitap API key (overrides `MINITAP_API_KEY` env var). Get yours at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
66
+ - `--llm-profile`: LLM profile name to use (overrides `MINITAP_LLM_PROFILE_NAME` env var). If unset, uses the default profile. Configure profiles at [platform.minitap.ai/llm-profiles](https://platform.minitap.ai/llm-profiles).
67
+
68
+ ### Configuration (Optional)
69
+
70
+ Alternatively, you can set environment variables instead of using CLI flags:
71
+
72
+ ```bash
73
+ export MINITAP_API_KEY="your_minitap_api_key"
74
+ export MINITAP_API_BASE_URL="https://platform.minitap.ai/api/v1"
75
+ export MINITAP_LLM_PROFILE_NAME="default"
76
+ ```
77
+
78
+ You can set these in your `.bashrc` or equivalent, then simply run:
79
+
80
+ ```bash
81
+ minitap-mcp --server
82
+ ```
83
+
84
+ CLI flags always override environment variables when both are present.
85
+
86
+ By default, the server will bind to `0.0.0.0:8000`. Configure via environment variables:
87
+
88
+ ```bash
89
+ export MCP_SERVER_HOST="0.0.0.0"
90
+ export MCP_SERVER_PORT="8000"
91
+ ```
92
+
93
+ ## IDE Integration
94
+
95
+ 1. Start the server: `minitap-mcp --server --api-key your_minitap_api_key`
96
+ 2. Add to your IDE MCP settings file:
97
+
98
+ ```jsonc
99
+ # For Windsurf
100
+ {
101
+ "mcpServers": {
102
+ "minitap-mcp": {
103
+ "serverUrl": "http://localhost:8000/mcp"
104
+ }
105
+ }
106
+ }
107
+ ```
108
+
109
+ ```jsonc
110
+ # For Cursor
111
+ {
112
+ "mcpServers": {
113
+ "minitap-mcp": {
114
+ "transport": "http",
115
+ "url": "http://localhost:8000/mcp"
116
+ }
117
+ }
118
+ }
119
+ ```
120
+
121
+
122
+ ## Available Tools
123
+
124
+ Once connected, your AI assistant can use these tools:
125
+
126
+ ### `execute_mobile_command`
127
+ Execute natural language commands on your mobile device using the Minitap SDK. This tool allows you to control your Android or iOS device using natural language.
128
+
129
+ **Parameters:**
130
+ - `goal` (required): High-level goal describing the action to perform
131
+ - `output_description` (optional): Natural language description of the desired output format. Results are returned as structured JSON (e.g., "An array with sender and subject for each email")
132
+ - `profile` (optional): Profile name to use (defaults to "default")
133
+
134
+ **Examples:**
135
+ ```
136
+ "Open the settings app and tell me the battery level"
137
+ "Find the first 3 unread emails in Gmail"
138
+ "Open Google Maps and search for the nearest coffee shop"
139
+ "Take a screenshot and save it"
140
+ ```
141
+
142
+ ### `analyze_screen`
143
+ Capture and analyze what's currently shown on the mobile device screen using a vision-capable LLM. Useful for understanding UI elements, extracting text, or identifying specific features.
144
+
145
+ **Parameters:**
146
+ - `prompt` (required): Analysis prompt describing what information to extract
147
+ - `device_id` (optional): Specific device ID to target
148
+
149
+ **Examples:**
150
+ ```js
151
+ "What app is currently open?"
152
+ "Read the text messages visible on screen"
153
+ "List all buttons and their labels on the current screen"
154
+ "Extract the phone number displayed"
155
+ ```
156
+
157
+ ## Advanced Configuration
158
+
159
+ ### Custom ADB Server
160
+
161
+ If using a remote or custom ADB server (like on WSL):
162
+
163
+ ```bash
164
+ export ADB_SERVER_SOCKET="tcp:192.168.1.100:5037"
165
+ ```
166
+
167
+ ### Vision Model
168
+
169
+ Customize the vision model used for screen analysis:
170
+
171
+ ```bash
172
+ export VISION_MODEL="qwen/qwen-2.5-vl-7b-instruct"
173
+ ```
174
+
175
+ ## Device Setup
176
+
177
+ ### Android
178
+ 1. Enable USB debugging on your device
179
+ 2. Connect via USB or network ADB
180
+ 3. Verify connection: `adb devices`
181
+
182
+ ### iOS (macOS only)
183
+ 1. Install Xcode Command Line Tools
184
+ 2. Start a simulator or connect a physical device
185
+ 3. Verify: `xcrun simctl list devices booted`
186
+
187
+ ## Troubleshooting
188
+
189
+ **No devices found:**
190
+ - Verify ADB/xcrun connection
191
+ - Check USB debugging is enabled (Android)
192
+ - Ensure device is unlocked
193
+
194
+ **Connection refused errors:**
195
+ - Check ADB/xcrun connection
196
+
197
+ **API authentication errors:**
198
+ - Verify `MINITAP_API_KEY` is set correctly
199
+
200
+ ## Links
201
+
202
+ - **Mobile-Use SDK:** [github.com/minitap-ai/mobile-use](https://github.com/minitap-ai/mobile-use)
203
+ - **Mobile-Use Documentation:** [docs.minitap.ai](https://docs.minitap.ai)
@@ -0,0 +1,181 @@
1
+ # Minitap MCP Server
2
+
3
+ A Model Context Protocol (MCP) server that enables AI assistants to control and interact with real mobile devices (Android & iOS) through natural language commands.
4
+
5
+ ## Quick Start
6
+
7
+ ### Installation
8
+
9
+ ```bash
10
+ pip install minitap-mcp
11
+ ```
12
+
13
+ ### Prerequisites
14
+
15
+ Before running the MCP server, ensure you have the required mobile automation tools installed:
16
+
17
+ - **For Android devices:**
18
+ - [ADB (Android Debug Bridge)](https://developer.android.com/tools/adb) - For device communication
19
+ - [Maestro](https://maestro.mobile.dev/) - For mobile automation
20
+
21
+ - **For iOS devices (macOS only):**
22
+ - Xcode Command Line Tools with `xcrun`
23
+ - [Maestro](https://maestro.mobile.dev/) - For mobile automation
24
+
25
+ For detailed setup instructions, see the [mobile-use repository](https://github.com/minitap-ai/mobile-use).
26
+
27
+ ### Running the Server
28
+
29
+ The simplest way to start:
30
+
31
+ ```bash
32
+ minitap-mcp --server --api-key your_minitap_api_key
33
+ ```
34
+
35
+ This starts the server on `localhost:8000` with your API key. Get your free API key at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
36
+
37
+ **Available CLI options:**
38
+
39
+ ```bash
40
+ minitap-mcp --server --api-key YOUR_KEY --llm-profile PROFILE_NAME
41
+ ```
42
+
43
+ - `--api-key`: Your Minitap API key (overrides `MINITAP_API_KEY` env var). Get yours at [platform.minitap.ai/api-keys](https://platform.minitap.ai/api-keys).
44
+ - `--llm-profile`: LLM profile name to use (overrides `MINITAP_LLM_PROFILE_NAME` env var). If unset, uses the default profile. Configure profiles at [platform.minitap.ai/llm-profiles](https://platform.minitap.ai/llm-profiles).
45
+
46
+ ### Configuration (Optional)
47
+
48
+ Alternatively, you can set environment variables instead of using CLI flags:
49
+
50
+ ```bash
51
+ export MINITAP_API_KEY="your_minitap_api_key"
52
+ export MINITAP_API_BASE_URL="https://platform.minitap.ai/api/v1"
53
+ export MINITAP_LLM_PROFILE_NAME="default"
54
+ ```
55
+
56
+ You can set these in your `.bashrc` or equivalent, then simply run:
57
+
58
+ ```bash
59
+ minitap-mcp --server
60
+ ```
61
+
62
+ CLI flags always override environment variables when both are present.
63
+
64
+ By default, the server will bind to `0.0.0.0:8000`. Configure via environment variables:
65
+
66
+ ```bash
67
+ export MCP_SERVER_HOST="0.0.0.0"
68
+ export MCP_SERVER_PORT="8000"
69
+ ```
70
+
71
+ ## IDE Integration
72
+
73
+ 1. Start the server: `minitap-mcp --server --api-key your_minitap_api_key`
74
+ 2. Add to your IDE MCP settings file:
75
+
76
+ ```jsonc
77
+ # For Windsurf
78
+ {
79
+ "mcpServers": {
80
+ "minitap-mcp": {
81
+ "serverUrl": "http://localhost:8000/mcp"
82
+ }
83
+ }
84
+ }
85
+ ```
86
+
87
+ ```jsonc
88
+ # For Cursor
89
+ {
90
+ "mcpServers": {
91
+ "minitap-mcp": {
92
+ "transport": "http",
93
+ "url": "http://localhost:8000/mcp"
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+
100
+ ## Available Tools
101
+
102
+ Once connected, your AI assistant can use these tools:
103
+
104
+ ### `execute_mobile_command`
105
+ Execute natural language commands on your mobile device using the Minitap SDK. This tool allows you to control your Android or iOS device using natural language.
106
+
107
+ **Parameters:**
108
+ - `goal` (required): High-level goal describing the action to perform
109
+ - `output_description` (optional): Natural language description of the desired output format. Results are returned as structured JSON (e.g., "An array with sender and subject for each email")
110
+ - `profile` (optional): Profile name to use (defaults to "default")
111
+
112
+ **Examples:**
113
+ ```
114
+ "Open the settings app and tell me the battery level"
115
+ "Find the first 3 unread emails in Gmail"
116
+ "Open Google Maps and search for the nearest coffee shop"
117
+ "Take a screenshot and save it"
118
+ ```
119
+
120
+ ### `analyze_screen`
121
+ Capture and analyze what's currently shown on the mobile device screen using a vision-capable LLM. Useful for understanding UI elements, extracting text, or identifying specific features.
122
+
123
+ **Parameters:**
124
+ - `prompt` (required): Analysis prompt describing what information to extract
125
+ - `device_id` (optional): Specific device ID to target
126
+
127
+ **Examples:**
128
+ ```js
129
+ "What app is currently open?"
130
+ "Read the text messages visible on screen"
131
+ "List all buttons and their labels on the current screen"
132
+ "Extract the phone number displayed"
133
+ ```
134
+
135
+ ## Advanced Configuration
136
+
137
+ ### Custom ADB Server
138
+
139
+ If using a remote or custom ADB server (like on WSL):
140
+
141
+ ```bash
142
+ export ADB_SERVER_SOCKET="tcp:192.168.1.100:5037"
143
+ ```
144
+
145
+ ### Vision Model
146
+
147
+ Customize the vision model used for screen analysis:
148
+
149
+ ```bash
150
+ export VISION_MODEL="qwen/qwen-2.5-vl-7b-instruct"
151
+ ```
152
+
153
+ ## Device Setup
154
+
155
+ ### Android
156
+ 1. Enable USB debugging on your device
157
+ 2. Connect via USB or network ADB
158
+ 3. Verify connection: `adb devices`
159
+
160
+ ### iOS (macOS only)
161
+ 1. Install Xcode Command Line Tools
162
+ 2. Start a simulator or connect a physical device
163
+ 3. Verify: `xcrun simctl list devices booted`
164
+
165
+ ## Troubleshooting
166
+
167
+ **No devices found:**
168
+ - Verify ADB/xcrun connection
169
+ - Check USB debugging is enabled (Android)
170
+ - Ensure device is unlocked
171
+
172
+ **Connection refused errors:**
173
+ - Check ADB/xcrun connection
174
+
175
+ **API authentication errors:**
176
+ - Verify `MINITAP_API_KEY` is set correctly
177
+
178
+ ## Links
179
+
180
+ - **Mobile-Use SDK:** [github.com/minitap-ai/mobile-use](https://github.com/minitap-ai/mobile-use)
181
+ - **Mobile-Use Documentation:** [docs.minitap.ai](https://docs.minitap.ai)
@@ -0,0 +1,62 @@
1
+ You will be given _two screenshots_.
2
+
3
+ 1. "Expected screenshot" — this is the design from Figma.
4
+ 2. "Implemented screenshot" — this is the actual phone screen that has been built.
5
+
6
+ Your task is to **compare the two screenshots** in detail, and generate a structured report that includes:
7
+
8
+ - A comprehensive list of **all visible differences** between the expected design and the implemented screen.
9
+ - For each difference, provide:
10
+ - A clear **description** of what changed (for example: "The 'Submit' button label changed from 'Submit' to 'Send'", "The icon moved 8px to the right", "The background colour of header changed from #FFFFFF to #F6F6F6", etc.).
11
+ - The **type of change** (e.g., text change, color change, position/movement, size change, added element, removed element, style change).
12
+ - The **location** of the change (for example: "bottom-centre of screen", "top header area", "to the right of search bar"). If possible, approximate coordinates or bounding box (e.g., "approx. 240×180 px at screen width 1080").
13
+ - The **impact on implementation** (i.e., reasoning about what this means: "The implemented version uses a different text label – so behaviour may differ", "The icon moved and may overlap another element", etc.).
14
+ - A **recommendation** if relevant (e.g., "Should revert to #FFFFFF to match design", "Check alignment of icon relative to search bar", etc.).
15
+
16
+ **Important**:
17
+
18
+ - Assume the screenshots are aligned (same resolution and scale); if not aligned mention that as a difference.
19
+ - Focus on _visible UI differences_ (layout, text, style, iconography) – you do _not_ need to inspect source code, only what is visually rendered.
20
+ - Do _not_ produce generic comments like "looks like a difference" – aim for _precise, actionable descriptions_.
21
+ - **IGNORE dynamic/personal content** that naturally differs between mockups and real implementations:
22
+ - User profile information (names, usernames, email addresses, profile pictures)
23
+ - Time-based information (current time, dates, timestamps, "2 hours ago", etc.)
24
+ - Dynamic data (notification counts, unread badges, live statistics)
25
+ - Sample/placeholder content that varies (e.g., "John Doe" vs "Jane Smith")
26
+ - System status information (battery level, signal strength, network indicators)
27
+ - Only flag these as differences if the _structure, layout, or styling_ of these elements differs, not the content itself.
28
+ - Output in a structured format, for example:
29
+
30
+ ```
31
+
32
+ 1. Location: [top header – full width]
33
+ Change: Background colour changed from #FFFFFF → #F6F6F6
34
+ Type: Colour change
35
+ Impact: The header will appear darker than design; text contrast may be lower.
36
+ Recommendation: Update header background to #FFFFFF as in design.
37
+
38
+ ```
39
+
40
+ - At the end produce a summary with ONLY:
41
+ - Total number of differences found
42
+ - Overall "match score" out of 100 (your estimation of how closely the implementation matches the design)
43
+ - Do NOT include any recap, overview, or macro-level summary of changes - all details are already captured in the differences list above.
44
+
45
+ ### Input:
46
+
47
+ - Screenshot A: Expected (Figma)
48
+ - Screenshot B: Implemented (Phone)
49
+ Provide both screenshots and then the prompt.
50
+
51
+ ### Output:
52
+
53
+ Structured list of differences + summary.
54
+
55
+ Please use the following to start the analysis.
56
+ **Input:**
57
+ First screen is the Figma screenshot (what is expected)
58
+ Second screen is what is expected (taken from the phone, after the implementation)
59
+
60
+ You will have this data in the next messages sent by the user.
61
+
62
+ Go ahead and generate your report.
@@ -0,0 +1,65 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from uuid import uuid4
4
+
5
+ from jinja2 import Template
6
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
7
+ from pydantic import BaseModel
8
+
9
+ from minitap.mcp.core.device import capture_screenshot, find_mobile_device
10
+ from minitap.mcp.core.llm import get_minitap_llm
11
+ from minitap.mcp.core.utils import get_screenshot_message_for_llm
12
+
13
+
14
+ class CompareScreenshotsOutput(BaseModel):
15
+ comparison_text: str
16
+ expected_screenshot_base64: str
17
+ current_screenshot_base64: str
18
+
19
+
20
+ async def compare_screenshots(
21
+ expected_screenshot_base64: str,
22
+ ) -> CompareScreenshotsOutput:
23
+ """
24
+ Compare screenshots and return the comparison text along with both screenshots.
25
+
26
+ Returns:
27
+ CompareScreenshotsOutput
28
+ """
29
+ system_message = Template(
30
+ Path(__file__).parent.joinpath("compare_screenshots.md").read_text(encoding="utf-8")
31
+ ).render()
32
+
33
+ device = find_mobile_device()
34
+ current_screenshot = capture_screenshot(device)
35
+
36
+ messages: list[BaseMessage] = [
37
+ SystemMessage(content=system_message),
38
+ HumanMessage(content="Here is the Figma screenshot (what needs to be matched):"),
39
+ get_screenshot_message_for_llm(expected_screenshot_base64),
40
+ HumanMessage(content="Here is the screenshot of the mobile device:"),
41
+ get_screenshot_message_for_llm(current_screenshot),
42
+ ]
43
+
44
+ llm = get_minitap_llm(
45
+ trace_id=str(uuid4()),
46
+ remote_tracing=True,
47
+ model="google/gemini-2.5-pro",
48
+ temperature=1,
49
+ )
50
+ response = await llm.ainvoke(messages)
51
+ return CompareScreenshotsOutput(
52
+ comparison_text=str(response.content),
53
+ expected_screenshot_base64=expected_screenshot_base64,
54
+ current_screenshot_base64=current_screenshot,
55
+ )
56
+
57
+
58
+ async def main():
59
+ expected_screenshot_base64 = "Base64 encoded screenshot to compare with."
60
+ result = await compare_screenshots(expected_screenshot_base64)
61
+ print(result.model_dump_json(indent=2))
62
+
63
+
64
+ if __name__ == "__main__":
65
+ asyncio.run(main())
@@ -0,0 +1,64 @@
1
+ You are an expert at parsing React/TypeScript code to extract asset URLs and generate clean, documented code implementations.
2
+
3
+ Your task is to:
4
+
5
+ 1. Extract all asset URLs from the provided code snippet
6
+ 2. Generate a clean `code_implementation` output that includes the React code with embedded comments referencing implementation and node guidelines
7
+
8
+ **Instructions:**
9
+
10
+ ## Part 1: Extract Asset URLs
11
+
12
+ 1. Look for all constant declarations that contain URLs pointing to assets (images, SVGs, etc.)
13
+ 2. These constants typically follow patterns like:
14
+
15
+ - `const imgVariableName = "http://localhost:3845/assets/[hash].[extension]";`
16
+ - The variable names usually start with `img` followed by a descriptive name in camelCase
17
+
18
+ 3. For each asset URL found, extract:
19
+ - The **variable name** (e.g., `imgSignal`, `imgBatteryThreeQuarters`)
20
+ - The **full URL** (e.g., `http://localhost:3845/assets/685c5ac58caa29556e29737cf8f8c9605d9c8571.svg`)
21
+ - The **file extension** from the URL (e.g., `svg`, `png`, `jpg`)
22
+
23
+ ## Part 2: Generate Code Implementation
24
+
25
+ The `code_implementation` field should contain:
26
+
27
+ 1. The React/TypeScript code with **LOCAL asset imports** instead of HTTP URLs:
28
+
29
+ - Convert `const imgSignal = "http://localhost:3845/assets/[hash].svg";`
30
+ - To `import imgSignal from './assets/imgSignal.svg';` (or appropriate relative path)
31
+ - Use the **exact same variable names** as in the original const declarations
32
+ - **CRITICAL**: Preserve the variable naming convention
33
+
34
+ 2. Preserve all `data-node-id` attributes and other metadata in the code
35
+
36
+ ## Part 3: Return Format
37
+
38
+ Return a JSON object with two fields:
39
+
40
+ - `assets`: Array of extracted asset objects
41
+ - `code_implementation`: String containing the React code with embedded guideline comments
42
+
43
+ ```json
44
+ {
45
+ "assets": [
46
+ {
47
+ "variable_name": "imgSignal",
48
+ "url": "http://localhost:3845/assets/685c5ac58caa29556e29737cf8f8c9605d9c8571.svg",
49
+ "extension": "svg"
50
+ },
51
+ ...
52
+ ],
53
+ "code_implementation": "import ... function ..."
54
+ }
55
+ ```
56
+
57
+ **Important:**
58
+
59
+ - Only extract asset URLs
60
+ - Preserve the exact variable names as they appear in the code
61
+ - DO NOT MISS any assets
62
+ - If no assets are found, return an empty array for `assets`
63
+ - Return ONLY the JSON object with both `assets` and `code_implementation` fields
64
+ - Do NOT include the const declarations of the assets in the code_implementation output - convert them to imports.
@@ -0,0 +1,65 @@
1
+ """Agent to extract Figma asset URLs from design context code."""
2
+
3
+ from pathlib import Path
4
+ from uuid import uuid4
5
+
6
+ from jinja2 import Template
7
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
8
+ from pydantic import BaseModel, Field
9
+
10
+ from minitap.mcp.core.llm import get_minitap_llm
11
+
12
+
13
+ class FigmaAsset(BaseModel):
14
+ """Represents a single Figma asset."""
15
+
16
+ variable_name: str = Field(description="The variable name from the code (e.g., imgSignal)")
17
+ url: str = Field(description="The full URL to the asset")
18
+ extension: str = Field(description="The file extension (e.g., svg, png, jpg)")
19
+
20
+
21
+ class ExtractedAssets(BaseModel):
22
+ """Container for all extracted Figma assets."""
23
+
24
+ assets: list[FigmaAsset] = Field(
25
+ default_factory=list,
26
+ description="List of all extracted assets from the Figma design context",
27
+ )
28
+ code_implementation: str = Field(
29
+ description=(
30
+ "The React/TypeScript code\n"
31
+ "with the local url declarations turned into const declarations"
32
+ )
33
+ )
34
+
35
+
36
+ async def extract_figma_assets(design_context_code: str) -> ExtractedAssets:
37
+ """Extract asset URLs from Figma design context code.
38
+
39
+ Args:
40
+ design_context_code: The React/TypeScript code from get_design_context
41
+
42
+ Returns:
43
+ List of dictionaries containing variable_name, url, and extension
44
+ """
45
+ system_message = Template(
46
+ Path(__file__).parent.joinpath("extract_figma_assets.md").read_text(encoding="utf-8")
47
+ ).render()
48
+
49
+ messages: list[BaseMessage] = [
50
+ SystemMessage(content=system_message),
51
+ HumanMessage(
52
+ content=f"Here is the code to analyze:\n\n```typescript\n{design_context_code}\n```"
53
+ ),
54
+ ]
55
+
56
+ llm = get_minitap_llm(
57
+ trace_id=str(uuid4()),
58
+ remote_tracing=True,
59
+ model="google/gemini-2.5-pro",
60
+ temperature=0,
61
+ ).with_structured_output(ExtractedAssets)
62
+
63
+ result: ExtractedAssets = await llm.ainvoke(messages) # type: ignore
64
+
65
+ return result
@@ -14,11 +14,14 @@ class MCPSettings(BaseSettings):
14
14
  model_config = SettingsConfigDict(env_file=".env", extra="ignore")
15
15
 
16
16
  # Minitap API configuration
17
- MINITAP_API_KEY: SecretStr
17
+ MINITAP_API_KEY: SecretStr | None = Field(default=None)
18
18
  MINITAP_API_BASE_URL: str = Field(default="https://platform.minitap.ai/api/v1")
19
19
 
20
20
  VISION_MODEL: str = Field(default="qwen/qwen-2.5-vl-7b-instruct")
21
21
 
22
+ # Figma MCP server configuration
23
+ FIGMA_MCP_SERVER_URL: str = Field(default="http://127.0.0.1:3845/mcp")
24
+
22
25
  # MCP server configuration (optional, for remote access)
23
26
  MCP_SERVER_HOST: str = Field(default="0.0.0.0")
24
27
  MCP_SERVER_PORT: int = Field(default=8000)