minitap-mcp 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. minitap/mcp/__init__.py +0 -0
  2. minitap/mcp/core/agents/compare_screenshots/agent.py +75 -0
  3. minitap/mcp/core/agents/compare_screenshots/eval/prompts/prompt_1.md +62 -0
  4. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/actual.png +0 -0
  5. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/figma.png +0 -0
  6. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/human_feedback.txt +18 -0
  7. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/prompt_1/model_params.json +3 -0
  8. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/prompt_1/output.md +46 -0
  9. minitap/mcp/core/agents/compare_screenshots/prompt.md +62 -0
  10. minitap/mcp/core/cloud_apk.py +117 -0
  11. minitap/mcp/core/config.py +111 -0
  12. minitap/mcp/core/decorators.py +107 -0
  13. minitap/mcp/core/device.py +249 -0
  14. minitap/mcp/core/llm.py +39 -0
  15. minitap/mcp/core/logging_config.py +59 -0
  16. minitap/mcp/core/models.py +59 -0
  17. minitap/mcp/core/sdk_agent.py +35 -0
  18. minitap/mcp/core/storage.py +407 -0
  19. minitap/mcp/core/task_runs.py +100 -0
  20. minitap/mcp/core/utils/figma.py +69 -0
  21. minitap/mcp/core/utils/images.py +55 -0
  22. minitap/mcp/main.py +328 -0
  23. minitap/mcp/server/cloud_mobile.py +492 -0
  24. minitap/mcp/server/middleware.py +21 -0
  25. minitap/mcp/server/poller.py +78 -0
  26. minitap/mcp/server/remote_proxy.py +96 -0
  27. minitap/mcp/tools/execute_mobile_command.py +182 -0
  28. minitap/mcp/tools/read_swift_logs.py +297 -0
  29. minitap/mcp/tools/screen_analyzer.md +17 -0
  30. minitap/mcp/tools/take_screenshot.py +53 -0
  31. minitap/mcp/tools/upload_screenshot.py +80 -0
  32. minitap_mcp-0.9.0.dist-info/METADATA +352 -0
  33. minitap_mcp-0.9.0.dist-info/RECORD +35 -0
  34. minitap_mcp-0.9.0.dist-info/WHEEL +4 -0
  35. minitap_mcp-0.9.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,182 @@
1
+ """Tool for running manual tasks on a connected mobile device."""
2
+
3
+ from collections.abc import Mapping
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from fastmcp.exceptions import ToolError
8
+ from fastmcp.tools.tool import ToolResult
9
+ from mcp.types import TextContent
10
+ from minitap.mobile_use.sdk.types import ManualTaskConfig
11
+ from minitap.mobile_use.sdk.types.task import PlatformTaskRequest
12
+ from pydantic import Field
13
+
14
+ from minitap.mcp.core.cloud_apk import install_apk_on_cloud_mobile, upload_apk_to_cloud_mobile
15
+ from minitap.mcp.core.config import settings
16
+ from minitap.mcp.core.decorators import handle_tool_errors
17
+ from minitap.mcp.core.logging_config import get_logger
18
+ from minitap.mcp.core.sdk_agent import get_mobile_use_agent
19
+ from minitap.mcp.core.storage import StorageDownloadError, download_trajectory_gif
20
+ from minitap.mcp.core.task_runs import TaskRunsError, get_latest_task_run_id
21
+ from minitap.mcp.main import mcp
22
+ from minitap.mcp.server.cloud_mobile import check_cloud_mobile_status
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ def _serialize_result(result: Any) -> Any:
28
+ """Convert SDK responses to serializable data for MCP."""
29
+ if hasattr(result, "model_dump"):
30
+ return result.model_dump()
31
+ if hasattr(result, "dict"):
32
+ return result.dict()
33
+ if isinstance(result, Mapping):
34
+ return dict(result)
35
+ return result
36
+
37
+
38
+ @mcp.tool(
39
+ name="execute_mobile_command",
40
+ description="""
41
+ Execute a natural language command on a mobile device using the Minitap SDK.
42
+ This tool allows you to control your Android or iOS device using natural language.
43
+
44
+ Examples:
45
+ - "Open the settings app and tell me the battery level"
46
+ - "Find the first 3 unread emails in Gmail"
47
+ - "Take a screenshot and save it"
48
+
49
+ APK Deployment (Cloud Mobile Only):
50
+ When CLOUD_MOBILE_NAME is set, you can deploy and test APKs on cloud mobiles:
51
+ - Set apk_path to the path of your locally built APK
52
+ - The APK will be uploaded to cloud storage and installed on the device
53
+ - Requires MINITAP_API_KEY environment variable
54
+ - Must provide locked_app_package when using apk_path
55
+
56
+ Example with APK deployment:
57
+ execute_mobile_command(
58
+ apk_path="/path/to/app-debug.apk",
59
+ locked_app_package="com.example.myapp",
60
+ goal="Test the login flow with valid credentials"
61
+ )
62
+
63
+ Note: If apk path is set and no cloud mobile name -> it will raise a tool error
64
+ """,
65
+ )
66
+ @handle_tool_errors
67
+ async def execute_mobile_command(
68
+ goal: str = Field(description="High-level goal describing the action to perform."),
69
+ output_description: str | None = Field(
70
+ default=None,
71
+ description="Optional description of the expected output format. "
72
+ "For example: 'A JSON array with sender and subject for each email' "
73
+ "or 'The battery percentage as a number'.",
74
+ ),
75
+ locked_app_package: str | None = Field(
76
+ default=None,
77
+ description="Optional package name of the app to lock the device to. "
78
+ "Will launch the app if not already running, and keep it in foreground "
79
+ "until the task is completed. REQUIRED when using apk_path.",
80
+ ),
81
+ apk_path: str | None = Field(
82
+ default=None,
83
+ description="Path to local APK file to deploy to cloud mobile. "
84
+ "Only works when CLOUD_MOBILE_NAME is set. "
85
+ "The APK will be uploaded to cloud storage and installed before task execution. "
86
+ "Requires MINITAP_API_KEY to be configured. ",
87
+ ),
88
+ ) -> str | dict[str, Any] | ToolResult:
89
+ """Run a manual task on a mobile device via the Minitap platform."""
90
+ try:
91
+ if settings.CLOUD_MOBILE_NAME:
92
+ await check_cloud_mobile_status(settings.CLOUD_MOBILE_NAME)
93
+
94
+ if apk_path:
95
+ if not settings.CLOUD_MOBILE_NAME:
96
+ raise ToolError(
97
+ "apk_path parameter requires CLOUD_MOBILE_NAME to be set. "
98
+ "APK deployment is only supported in cloud mobile mode."
99
+ )
100
+
101
+ # Step 1: Upload APK via Platform storage API
102
+ filename = await upload_apk_to_cloud_mobile(apk_path=apk_path)
103
+
104
+ # Step 2: Install APK on cloud mobile
105
+ await install_apk_on_cloud_mobile(filename=filename)
106
+
107
+ request = PlatformTaskRequest(
108
+ task=ManualTaskConfig(
109
+ goal=goal,
110
+ output_description=output_description,
111
+ ),
112
+ execution_origin="mcp",
113
+ )
114
+ agent = get_mobile_use_agent()
115
+ if not agent._initialized:
116
+ await agent.init()
117
+ result = await agent.run_task(
118
+ request=request,
119
+ locked_app_package=locked_app_package,
120
+ )
121
+
122
+ trajectory_gif_path: Path | None = None
123
+ if settings.TRAJECTORY_GIF_DOWNLOAD_FOLDER:
124
+ trajectory_gif_path = await _download_trajectory_gif_if_available()
125
+
126
+ serialized_result = _serialize_result(result)
127
+
128
+ # If trajectory was saved, return a ToolResult with multiple content items
129
+ if trajectory_gif_path:
130
+ import json
131
+
132
+ result_text = (
133
+ json.dumps(serialized_result, indent=2)
134
+ if isinstance(serialized_result, dict)
135
+ else str(serialized_result)
136
+ )
137
+ return ToolResult(
138
+ content=[
139
+ TextContent(type="text", text=result_text),
140
+ TextContent(type="text", text=f"Trajectory saved to {trajectory_gif_path}"),
141
+ ],
142
+ )
143
+
144
+ return serialized_result
145
+ except Exception as e:
146
+ raise ToolError(str(e))
147
+
148
+
149
+ async def _download_trajectory_gif_if_available() -> Path | None:
150
+ """Download the trajectory GIF if available and folder is configured.
151
+
152
+ Fetches the latest task run ID from the API and downloads the GIF.
153
+
154
+ Returns:
155
+ The path to the downloaded GIF file, or None if download failed or not configured.
156
+ """
157
+ download_folder = settings.TRAJECTORY_GIF_DOWNLOAD_FOLDER
158
+ if not download_folder:
159
+ logger.warning("TRAJECTORY_GIF_DOWNLOAD_FOLDER not configured, skipping GIF download")
160
+ return None
161
+
162
+ task_run_id = None
163
+ try:
164
+ task_run_id = await get_latest_task_run_id()
165
+
166
+ gif_path = await download_trajectory_gif(
167
+ task_run_id=task_run_id,
168
+ download_path=download_folder,
169
+ )
170
+ logger.info(
171
+ "Trajectory GIF downloaded",
172
+ task_run_id=task_run_id,
173
+ path=str(gif_path),
174
+ )
175
+ return gif_path
176
+ except (StorageDownloadError, TaskRunsError) as e:
177
+ logger.warning(
178
+ "Failed to download trajectory GIF",
179
+ task_run_id=task_run_id,
180
+ error=str(e),
181
+ )
182
+ return None
@@ -0,0 +1,297 @@
1
+ """Tool for reading Swift/iOS logs for debugging during development."""
2
+
3
+ import asyncio
4
+ import json
5
+ import sys
6
+ from datetime import datetime
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ from minitap.mcp.core.decorators import handle_tool_errors
11
+ from minitap.mcp.core.logging_config import get_logger
12
+ from minitap.mcp.main import mcp
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class BacktraceFrame(BaseModel):
18
+ imageOffset: int | None = None
19
+ imageUUID: str | None = None
20
+ imagePath: str | None = None
21
+ symbol: str | None = None
22
+
23
+
24
+ class Backtrace(BaseModel):
25
+ frames: list[BacktraceFrame] = []
26
+
27
+
28
+ class SimplifiedLog(BaseModel):
29
+ timestamp: str
30
+ level: str
31
+ category: str
32
+ message: str
33
+ process_id: int
34
+ backtrace: Backtrace | None = None
35
+ sender_image_path: str | None = None
36
+ process_image_path: str | None = None
37
+ sender_image_uuid: str | None = None
38
+
39
+
40
+ class LogsOutput(BaseModel):
41
+ bundle_id: str
42
+ last_minutes: int
43
+ log_count: int
44
+ logs: list[SimplifiedLog]
45
+ message: str | None = None
46
+
47
+
48
+ def _convert_to_iso8601(timestamp: str) -> str:
49
+ """Convert macOS log show timestamp to ISO8601 format.
50
+
51
+ Input format: "YYYY-MM-DD HH:MM:SS.NNNNNN±TTTT"
52
+ Output format: "YYYY-MM-DDTHH:MM:SS.NNNNNN±TT:TT"
53
+ """
54
+ if not timestamp:
55
+ return timestamp
56
+
57
+ try:
58
+ dt = datetime.fromisoformat(timestamp.replace(" ", "T"))
59
+ return dt.isoformat()
60
+ except ValueError:
61
+ return timestamp
62
+
63
+
64
+ def _parse_backtrace(raw: dict | None) -> Backtrace | None:
65
+ """Parse raw backtrace dict into Backtrace model."""
66
+ if not raw or not isinstance(raw, dict):
67
+ return None
68
+ frames_raw = raw.get("frames", [])
69
+ if not frames_raw:
70
+ return None
71
+ frames = [
72
+ BacktraceFrame(
73
+ imageOffset=f.get("imageOffset"),
74
+ imageUUID=f.get("imageUUID"),
75
+ imagePath=f.get("imagePath"),
76
+ symbol=f.get("symbol"),
77
+ )
78
+ for f in frames_raw
79
+ if isinstance(f, dict)
80
+ ]
81
+ return Backtrace(frames=frames) if frames else None
82
+
83
+
84
+ async def _run_log_show(
85
+ predicate: str | None,
86
+ last_minutes: int,
87
+ include_debug: bool,
88
+ *,
89
+ simulator: bool = False,
90
+ ) -> tuple[list, str | None]:
91
+ """Run log show command and return parsed logs and optional error message."""
92
+ if simulator:
93
+ cmd = ["xcrun", "simctl", "spawn", "booted", "log", "show"]
94
+ else:
95
+ cmd = ["log", "show"]
96
+
97
+ cmd.extend(["--style", "json", "--last", f"{last_minutes}m"])
98
+
99
+ if predicate:
100
+ cmd.extend(["--predicate", predicate])
101
+
102
+ if include_debug:
103
+ cmd.extend(["--debug", "--info"])
104
+
105
+ process = await asyncio.create_subprocess_exec(
106
+ *cmd,
107
+ stdout=asyncio.subprocess.PIPE,
108
+ stderr=asyncio.subprocess.PIPE,
109
+ )
110
+
111
+ stdout, stderr = await process.communicate()
112
+ error_output = stderr.decode("utf-8", errors="replace")
113
+
114
+ if process.returncode != 0:
115
+ if simulator and "No devices are booted" in error_output:
116
+ return [], "Error: No iOS Simulator is running. Please boot a simulator first."
117
+ return [], None
118
+
119
+ output = stdout.decode("utf-8", errors="replace").strip()
120
+ lines = output.split("\n")
121
+ if lines and lines[0].startswith("Filtering the log data"):
122
+ lines = lines[1:]
123
+ if lines and lines[0].startswith("Skipping info and debug"):
124
+ lines = lines[1:]
125
+
126
+ json_output = "\n".join(lines).strip()
127
+
128
+ if not json_output or json_output == "[]":
129
+ return [], None
130
+
131
+ try:
132
+ return json.loads(json_output), None
133
+ except json.JSONDecodeError:
134
+ return [], None
135
+
136
+
137
+ @mcp.tool(
138
+ name="read_swift_logs",
139
+ description="""
140
+ Read Swift/iOS logs for debugging during app development. Please note that this tool expect the
141
+ bundle identifier of the app to be passed as an argument.
142
+
143
+ This tool can read logs from:
144
+ 1. iOS Simulator runtime logs (source="simulator") - filters by process name
145
+ 2. All unified logging sources (source="all") - queries by subsystem and process name
146
+
147
+ Use cases:
148
+ - Debug runtime issues by reading simulator logs
149
+ - Find crash logs and error messages
150
+ - Read print() statements and os.Logger output from your Swift app
151
+
152
+ Examples:
153
+ - read_swift_logs(source="simulator", bundle_id="com.example.myapp")
154
+ - read_swift_logs(source="simulator", bundle_id="com.example.myapp", last_minutes=10)
155
+ - read_swift_logs(source="all", bundle_id="com.example.myapp", last_minutes=5)
156
+ """,
157
+ )
158
+ @handle_tool_errors
159
+ async def read_swift_logs(
160
+ bundle_id: str = Field(
161
+ description="The bundle identifier of the iOS app (e.g., 'com.example.myapp'). "
162
+ "This is used to filter logs by subsystem.",
163
+ ),
164
+ source: str = Field(
165
+ default="all",
166
+ description="Log source: 'simulator' for iOS Simulator runtime logs, "
167
+ "'all' to read from all sources that generate runtime logs related with the bundle.",
168
+ ),
169
+ last_minutes: int = Field(
170
+ default=5,
171
+ description="Number of minutes of logs to retrieve. Default is 5 minutes.",
172
+ ),
173
+ ) -> LogsOutput | str:
174
+ """Read Swift/iOS logs from simulator or file."""
175
+ if sys.platform != "darwin":
176
+ return "Error: This tool only works on macOS with Xcode installed."
177
+
178
+ process_name = bundle_id.split(".")[-1]
179
+
180
+ if source == "simulator":
181
+ return await _read_simulator_logs(bundle_id, last_minutes, process_name)
182
+ elif source == "all":
183
+ return await _read_file_logs(bundle_id, process_name, last_minutes)
184
+ else:
185
+ return f"Error: Unknown source '{source}'. Use 'simulator' or 'all'."
186
+
187
+
188
+ def _map_to_simplified_logs(log_entries: list[dict]) -> list[SimplifiedLog]:
189
+ return [
190
+ SimplifiedLog(
191
+ timestamp=_convert_to_iso8601(entry.get("timestamp", "")),
192
+ level=entry.get("messageType", ""),
193
+ category=entry.get("category", ""),
194
+ message=entry.get("eventMessage", ""),
195
+ process_id=entry.get("processID", 0),
196
+ backtrace=_parse_backtrace(entry.get("backtrace")),
197
+ sender_image_path=entry.get("senderImagePath"),
198
+ process_image_path=entry.get("processImagePath"),
199
+ sender_image_uuid=entry.get("senderImageUUID"),
200
+ )
201
+ for entry in log_entries
202
+ if entry.get("eventMessage")
203
+ ]
204
+
205
+
206
+ async def _read_simulator_logs(
207
+ bundle_id: str,
208
+ last_minutes: int,
209
+ process_name: str | None,
210
+ ) -> LogsOutput | str:
211
+ """Read historical logs from the booted iOS Simulator."""
212
+ predicate = f'processImagePath CONTAINS "{process_name}"' if process_name else None
213
+
214
+ logger.info(f"Reading simulator logs for last {last_minutes}m")
215
+
216
+ log_entries, error = await _run_log_show(
217
+ predicate, last_minutes, include_debug=True, simulator=True
218
+ )
219
+
220
+ if error:
221
+ return error
222
+
223
+ if not log_entries:
224
+ return LogsOutput(
225
+ bundle_id=bundle_id,
226
+ last_minutes=last_minutes,
227
+ log_count=0,
228
+ logs=[],
229
+ message=f"No logs found for '{process_name}' in the last {last_minutes} min.",
230
+ )
231
+
232
+ simplified_logs = _map_to_simplified_logs(log_entries)
233
+
234
+ return LogsOutput(
235
+ bundle_id=bundle_id,
236
+ last_minutes=last_minutes,
237
+ log_count=len(simplified_logs),
238
+ logs=simplified_logs,
239
+ )
240
+
241
+
242
+ async def _read_file_logs(bundle_id: str, process_name: str, last_minutes: int) -> LogsOutput:
243
+ # Query 1: Logs by subsystem (os.Logger logs)
244
+ subsystem_predicate = f'subsystem == "{bundle_id}"'
245
+
246
+ # Query 2: Logs by process name (catches crashes and system logs)
247
+ # Include fatal errors, crashes, and error-level logs
248
+ process_predicate = (
249
+ f'process == "{process_name}" AND '
250
+ f'(messageType == "Fault" OR messageType == "Error" OR '
251
+ f'eventMessage CONTAINS "fatal" OR eventMessage CONTAINS "crash")'
252
+ )
253
+
254
+ logger.info(
255
+ "fetching_ios_logs",
256
+ bundle_id=bundle_id,
257
+ last_minutes=last_minutes,
258
+ )
259
+
260
+ # Run both queries in parallel
261
+ (subsystem_logs, _), (process_logs, _) = await asyncio.gather(
262
+ _run_log_show(subsystem_predicate, last_minutes, include_debug=True),
263
+ _run_log_show(process_predicate, last_minutes, include_debug=False),
264
+ )
265
+
266
+ # Merge and deduplicate logs by timestamp + message
267
+ all_logs = subsystem_logs + process_logs
268
+ seen = set()
269
+ unique_logs = []
270
+ for log_entry in all_logs:
271
+ key = (log_entry.get("timestamp"), log_entry.get("eventMessage"))
272
+ if key not in seen:
273
+ seen.add(key)
274
+ unique_logs.append(log_entry)
275
+
276
+ # Sort by timestamp
277
+ unique_logs.sort(key=lambda x: x.get("timestamp", ""))
278
+
279
+ if not unique_logs:
280
+ return LogsOutput(
281
+ bundle_id=bundle_id,
282
+ last_minutes=last_minutes,
283
+ log_count=0,
284
+ logs=[],
285
+ message=f"No logs found for '{bundle_id}' in the last {last_minutes} min.",
286
+ )
287
+
288
+ simplified_logs = _map_to_simplified_logs(unique_logs)
289
+
290
+ logger.info("logs_retrieved", bundle_id=bundle_id, log_count=len(simplified_logs))
291
+
292
+ return LogsOutput(
293
+ bundle_id=bundle_id,
294
+ last_minutes=last_minutes,
295
+ log_count=len(simplified_logs),
296
+ logs=simplified_logs,
297
+ )
@@ -0,0 +1,17 @@
1
+ You are given:
2
+
3
+ 1. A screenshot of a mobile device.
4
+ 2. A prompt describing what information to extract.
5
+
6
+ Your task:
7
+
8
+ - Look at the screenshot and **answer the prompt directly and completely**.
9
+ - Provide a **detailed, structured description** of the relevant content (text, layout, icons, menus, timestamps, notifications, etc.).
10
+ - If the prompt asks for specific data, extract it exactly as shown.
11
+ - If the screenshot contains structured information (e.g., receipt, chat, settings), present it clearly using lists or tables.
12
+ - Do not guess — if something is unclear or missing, state that explicitly.
13
+
14
+ **Output format:**
15
+
16
+ 1. **Direct answer** to the prompt.
17
+ 2. **Detailed breakdown** of the screenshot content supporting the answer.
@@ -0,0 +1,53 @@
1
+ """Simple screenshot capture tool - returns raw base64 image without LLM analysis."""
2
+
3
+ import base64
4
+
5
+ from mcp.types import ImageContent
6
+ from pydantic import Field
7
+
8
+ from minitap.mcp.core.decorators import handle_tool_errors
9
+ from minitap.mcp.core.device import capture_screenshot, find_mobile_device
10
+ from minitap.mcp.main import mcp
11
+ from minitap.mcp.server.cloud_mobile import (
12
+ check_cloud_mobile_status,
13
+ get_cloud_mobile_id,
14
+ get_cloud_screenshot,
15
+ )
16
+
17
+
18
+ @mcp.tool(
19
+ name="take_screenshot",
20
+ description="""
21
+ Capture a screenshot from the connected mobile device.
22
+ Returns the raw base64-encoded PNG image directly without any LLM analysis.
23
+ Use this when you need the screenshot image for display or further processing.
24
+ """,
25
+ )
26
+ @handle_tool_errors
27
+ async def take_screenshot(
28
+ device_id: str | None = Field(
29
+ default=None,
30
+ description="ID of the device to capture screenshot from. "
31
+ "If not provided, the first available device is used.",
32
+ ),
33
+ ) -> list[ImageContent]:
34
+ """Capture screenshot and return as base64 image content."""
35
+ cloud_mobile_id = get_cloud_mobile_id()
36
+
37
+ if cloud_mobile_id:
38
+ # Cloud mode: use cloud screenshot API
39
+ await check_cloud_mobile_status(cloud_mobile_id)
40
+ screenshot_bytes = await get_cloud_screenshot(cloud_mobile_id)
41
+ screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
42
+ else:
43
+ # Local mode: capture from local device
44
+ device = find_mobile_device(device_id=device_id)
45
+ screenshot_base64 = capture_screenshot(device)
46
+
47
+ return [
48
+ ImageContent(
49
+ type="image",
50
+ data=screenshot_base64,
51
+ mimeType="image/png",
52
+ )
53
+ ]
@@ -0,0 +1,80 @@
1
+ """Tool for uploading device screenshots to remote storage.
2
+
3
+ This tool captures a screenshot from the connected device and uploads it
4
+ to remote storage, returning a filename that can be used with other tools
5
+ like figma_compare_screenshot.
6
+ """
7
+
8
+ import base64
9
+
10
+ from fastmcp.exceptions import ToolError
11
+ from fastmcp.tools.tool import ToolResult
12
+
13
+ from minitap.mcp.core.decorators import handle_tool_errors
14
+ from minitap.mcp.core.device import capture_screenshot, find_mobile_device
15
+ from minitap.mcp.core.logging_config import get_logger
16
+ from minitap.mcp.core.storage import StorageUploadError, upload_screenshot_to_storage
17
+ from minitap.mcp.main import mcp
18
+ from minitap.mcp.server.cloud_mobile import get_cloud_mobile_id, get_cloud_screenshot
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ @mcp.tool(
24
+ name="upload_screenshot",
25
+ description="""
26
+ Capture a screenshot from the connected device and upload it to storage.
27
+
28
+ This tool:
29
+ 1. Captures a screenshot from the connected device (local or cloud)
30
+ 2. Uploads the screenshot to remote storage
31
+ 3. Returns a filename that can be used with other tools
32
+
33
+ Use this to get a screenshot filename for tools like figma_compare_screenshot
34
+ that require a current_screenshot_filename parameter.
35
+
36
+ Example workflow:
37
+ 1. Call upload_screenshot to get a filename
38
+ 2. Use the returned filename with figma_compare_screenshot
39
+ """,
40
+ )
41
+ @handle_tool_errors
42
+ async def upload_screenshot() -> ToolResult:
43
+ """Capture and upload a device screenshot, return the filename."""
44
+ logger.info("Capturing and uploading device screenshot")
45
+
46
+ # Step 1: Capture screenshot from device
47
+ cloud_mobile_id = get_cloud_mobile_id()
48
+
49
+ if cloud_mobile_id:
50
+ logger.debug("Capturing screenshot from cloud device", device_id=cloud_mobile_id)
51
+ try:
52
+ screenshot_bytes = await get_cloud_screenshot(cloud_mobile_id)
53
+ screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
54
+ except Exception as e:
55
+ raise ToolError(f"Failed to capture cloud device screenshot: {e}") from e
56
+ else:
57
+ logger.debug("Capturing screenshot from local device")
58
+ try:
59
+ device = find_mobile_device()
60
+ screenshot_base64 = capture_screenshot(device)
61
+ except Exception as e:
62
+ raise ToolError(f"Failed to capture local device screenshot: {e}") from e
63
+
64
+ logger.info("Screenshot captured from device")
65
+
66
+ # Step 2: Upload screenshot to storage
67
+ try:
68
+ filename = await upload_screenshot_to_storage(screenshot_base64)
69
+ logger.info("Screenshot uploaded to storage", filename=filename)
70
+ except StorageUploadError as e:
71
+ raise ToolError(f"Failed to upload screenshot: {e}") from e
72
+
73
+ return ToolResult(
74
+ content=[
75
+ {
76
+ "type": "text",
77
+ "text": f"Screenshot uploaded successfully.\n\n**Filename:** {filename}",
78
+ }
79
+ ]
80
+ )