minitap-mcp 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. minitap/mcp/__init__.py +0 -0
  2. minitap/mcp/core/agents/compare_screenshots/agent.py +75 -0
  3. minitap/mcp/core/agents/compare_screenshots/eval/prompts/prompt_1.md +62 -0
  4. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/actual.png +0 -0
  5. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/figma.png +0 -0
  6. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/human_feedback.txt +18 -0
  7. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/prompt_1/model_params.json +3 -0
  8. minitap/mcp/core/agents/compare_screenshots/eval/scenario_1_add_cartoon_img_and_move_button/prompt_1/output.md +46 -0
  9. minitap/mcp/core/agents/compare_screenshots/prompt.md +62 -0
  10. minitap/mcp/core/cloud_apk.py +117 -0
  11. minitap/mcp/core/config.py +111 -0
  12. minitap/mcp/core/decorators.py +107 -0
  13. minitap/mcp/core/device.py +249 -0
  14. minitap/mcp/core/llm.py +39 -0
  15. minitap/mcp/core/logging_config.py +59 -0
  16. minitap/mcp/core/models.py +59 -0
  17. minitap/mcp/core/sdk_agent.py +35 -0
  18. minitap/mcp/core/storage.py +407 -0
  19. minitap/mcp/core/task_runs.py +100 -0
  20. minitap/mcp/core/utils/figma.py +69 -0
  21. minitap/mcp/core/utils/images.py +55 -0
  22. minitap/mcp/main.py +328 -0
  23. minitap/mcp/server/cloud_mobile.py +492 -0
  24. minitap/mcp/server/middleware.py +21 -0
  25. minitap/mcp/server/poller.py +78 -0
  26. minitap/mcp/server/remote_proxy.py +96 -0
  27. minitap/mcp/tools/execute_mobile_command.py +182 -0
  28. minitap/mcp/tools/read_swift_logs.py +297 -0
  29. minitap/mcp/tools/screen_analyzer.md +17 -0
  30. minitap/mcp/tools/take_screenshot.py +53 -0
  31. minitap/mcp/tools/upload_screenshot.py +80 -0
  32. minitap_mcp-0.9.0.dist-info/METADATA +352 -0
  33. minitap_mcp-0.9.0.dist-info/RECORD +35 -0
  34. minitap_mcp-0.9.0.dist-info/WHEEL +4 -0
  35. minitap_mcp-0.9.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,407 @@
1
+ """Storage utilities for uploading local files to remote storage.
2
+
3
+ This module provides functionality to upload local files (like screenshots)
4
+ to the MaaS API storage backend and get presigned URLs that can be passed
5
+ to remote MCP tools.
6
+ """
7
+
8
+ import base64
9
+ import uuid
10
+ from pathlib import Path
11
+
12
+ import httpx
13
+
14
+ from minitap.mcp.core.config import settings
15
+ from minitap.mcp.core.logging_config import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class StorageUploadError(Exception):
21
+ """Error raised when file upload fails."""
22
+
23
+ pass
24
+
25
+
26
+ def _get_api_key() -> str:
27
+ """Get the API key from settings.
28
+
29
+ Returns:
30
+ The API key string
31
+
32
+ Raises:
33
+ StorageUploadError: If API key is not configured
34
+ """
35
+ api_key = settings.MINITAP_API_KEY.get_secret_value() if settings.MINITAP_API_KEY else None
36
+ if not api_key:
37
+ raise StorageUploadError("MINITAP_API_KEY is required for file uploads")
38
+ return api_key
39
+
40
+
41
+ def _generate_filename(content_type: str) -> str:
42
+ """Generate a unique filename based on content type.
43
+
44
+ Args:
45
+ content_type: MIME type of the file
46
+
47
+ Returns:
48
+ UUID-based filename with appropriate extension
49
+ """
50
+ ext = _get_extension_from_mime_type(content_type)
51
+ return f"{uuid.uuid4()}.{ext}"
52
+
53
+
54
+ async def _get_signed_upload_url(
55
+ client: httpx.AsyncClient,
56
+ filename: str,
57
+ api_key: str,
58
+ ) -> str:
59
+ """Get a signed upload URL from the MaaS API.
60
+
61
+ Args:
62
+ client: HTTP client to use for the request
63
+ filename: Name of the file to upload
64
+ api_key: API key for authentication
65
+
66
+ Returns:
67
+ Signed upload URL
68
+
69
+ Raises:
70
+ StorageUploadError: If request fails or no URL is returned
71
+ """
72
+ base_url = settings.MINITAP_API_BASE_URL
73
+ endpoint = f"{base_url}/storage/signed-upload"
74
+
75
+ try:
76
+ logger.debug("Requesting signed upload URL", filename=filename)
77
+ response = await client.get(
78
+ endpoint,
79
+ params={"filenames": filename},
80
+ headers={"Authorization": f"Bearer {api_key}"},
81
+ )
82
+
83
+ if response.status_code != 200:
84
+ logger.error(
85
+ "Failed to get signed upload URL",
86
+ status_code=response.status_code,
87
+ response=response.text,
88
+ )
89
+ raise StorageUploadError(
90
+ f"Failed to get signed upload URL: HTTP {response.status_code}"
91
+ )
92
+
93
+ signed_urls = response.json().get("signed_urls", {})
94
+ if filename not in signed_urls:
95
+ raise StorageUploadError(f"No signed URL returned for {filename}")
96
+
97
+ logger.debug("Got signed upload URL", filename=filename)
98
+ return signed_urls[filename]
99
+
100
+ except httpx.TimeoutException as e:
101
+ logger.error("Signed URL request timed out", error=str(e))
102
+ raise StorageUploadError("Signed URL request timed out") from e
103
+ except httpx.RequestError as e:
104
+ logger.error("Signed URL request failed", error=str(e))
105
+ raise StorageUploadError(f"Signed URL request failed: {str(e)}") from e
106
+ except StorageUploadError:
107
+ raise
108
+ except Exception as e:
109
+ logger.error("Unexpected error getting signed URL", error=str(e))
110
+ raise StorageUploadError(f"Unexpected error: {str(e)}") from e
111
+
112
+
113
+ async def _upload_to_signed_url(
114
+ client: httpx.AsyncClient,
115
+ url: str,
116
+ content: bytes,
117
+ content_type: str,
118
+ filename: str,
119
+ ) -> None:
120
+ """Upload content to a signed URL.
121
+
122
+ Args:
123
+ client: HTTP client to use for the request
124
+ url: Signed upload URL
125
+ content: File content as bytes
126
+ content_type: MIME type of the content
127
+ filename: Filename (for logging)
128
+
129
+ Raises:
130
+ StorageUploadError: If upload fails
131
+ """
132
+ try:
133
+ logger.debug("Uploading file to storage", filename=filename, size=len(content))
134
+ response = await client.put(
135
+ url,
136
+ content=content,
137
+ headers={"Content-Type": content_type},
138
+ )
139
+
140
+ if response.status_code not in (200, 201):
141
+ logger.error(
142
+ "Failed to upload file",
143
+ status_code=response.status_code,
144
+ response=response.text,
145
+ )
146
+ raise StorageUploadError(f"Failed to upload file: HTTP {response.status_code}")
147
+
148
+ logger.info("File uploaded successfully", filename=filename)
149
+
150
+ except httpx.TimeoutException as e:
151
+ logger.error("Upload request timed out", error=str(e))
152
+ raise StorageUploadError("Upload request timed out") from e
153
+ except httpx.RequestError as e:
154
+ logger.error("Upload request failed", error=str(e))
155
+ raise StorageUploadError(f"Upload request failed: {str(e)}") from e
156
+ except StorageUploadError:
157
+ raise
158
+ except Exception as e:
159
+ logger.error("Unexpected error during upload", error=str(e))
160
+ raise StorageUploadError(f"Unexpected error: {str(e)}") from e
161
+
162
+
163
+ async def upload_file_to_storage(
164
+ file_content: bytes,
165
+ filename: str | None = None,
166
+ content_type: str = "image/png",
167
+ ) -> str:
168
+ """Upload file content to remote storage and return the filename.
169
+
170
+ This function:
171
+ 1. Gets a signed upload URL from the MaaS API
172
+ 2. Uploads the file content to that URL
173
+ 3. Returns the filename for use with remote MCP tools
174
+
175
+ Args:
176
+ file_content: The file content as bytes
177
+ filename: Optional filename (will generate UUID-based name if not provided)
178
+ content_type: MIME type of the file (default: image/png)
179
+
180
+ Returns:
181
+ Filename of the uploaded file (to be used with remote MCP tools)
182
+
183
+ Raises:
184
+ StorageUploadError: If upload fails at any step
185
+ """
186
+ api_key = _get_api_key()
187
+ filename = filename or _generate_filename(content_type)
188
+
189
+ async with httpx.AsyncClient(timeout=30.0) as client:
190
+ signed_url = await _get_signed_upload_url(client, filename, api_key)
191
+ await _upload_to_signed_url(client, signed_url, file_content, content_type, filename)
192
+
193
+ return filename
194
+
195
+
196
+ async def upload_screenshot_to_storage(screenshot_base64: str) -> str:
197
+ """Upload a base64-encoded screenshot to storage.
198
+
199
+ Convenience function for uploading screenshots captured from devices.
200
+
201
+ Args:
202
+ screenshot_base64: Base64-encoded screenshot data
203
+
204
+ Returns:
205
+ Filename of the uploaded screenshot
206
+
207
+ Raises:
208
+ StorageUploadError: If upload fails
209
+ """
210
+
211
+ try:
212
+ screenshot_bytes = base64.b64decode(screenshot_base64)
213
+ except Exception as e:
214
+ raise StorageUploadError(f"Invalid base64 data: {str(e)}") from e
215
+
216
+ return await upload_file_to_storage(
217
+ file_content=screenshot_bytes,
218
+ content_type="image/png",
219
+ )
220
+
221
+
222
+ async def upload_local_file_to_storage(file_path: str | Path) -> str:
223
+ """Upload a local file to storage.
224
+
225
+ Args:
226
+ file_path: Path to the local file
227
+
228
+ Returns:
229
+ Public download URL for the uploaded file
230
+
231
+ Raises:
232
+ StorageUploadError: If file doesn't exist or upload fails
233
+ """
234
+ path = Path(file_path)
235
+
236
+ if not path.exists():
237
+ raise StorageUploadError(f"File not found: {file_path}")
238
+
239
+ mime_type = _guess_mime_type(path.suffix)
240
+ file_content = path.read_bytes()
241
+
242
+ return await upload_file_to_storage(
243
+ file_content=file_content,
244
+ filename=f"{uuid.uuid4()}{path.suffix}",
245
+ content_type=mime_type,
246
+ )
247
+
248
+
249
+ def _get_extension_from_mime_type(mime_type: str) -> str:
250
+ """Get file extension from MIME type."""
251
+ mime_to_ext = {
252
+ "image/png": "png",
253
+ "image/jpeg": "jpg",
254
+ "image/gif": "gif",
255
+ "image/webp": "webp",
256
+ "application/json": "json",
257
+ "text/plain": "txt",
258
+ }
259
+ return mime_to_ext.get(mime_type, "bin")
260
+
261
+
262
+ def _guess_mime_type(extension: str) -> str:
263
+ """Guess MIME type from file extension."""
264
+ ext = extension.lower().lstrip(".")
265
+ ext_to_mime = {
266
+ "png": "image/png",
267
+ "jpg": "image/jpeg",
268
+ "jpeg": "image/jpeg",
269
+ "gif": "image/gif",
270
+ "webp": "image/webp",
271
+ "json": "application/json",
272
+ "txt": "text/plain",
273
+ }
274
+ return ext_to_mime.get(ext, "application/octet-stream")
275
+
276
+
277
+ class StorageDownloadError(Exception):
278
+ """Error raised when file download fails."""
279
+
280
+ pass
281
+
282
+
283
+ async def get_trajectory_gif_download_url(task_run_id: str) -> str:
284
+ """Get a signed download URL for a trajectory GIF.
285
+
286
+ This function calls the MaaS API to get a signed S3 download URL for the
287
+ trajectory GIF associated with a task run.
288
+
289
+ Args:
290
+ task_run_id: The ID of the task run to get the GIF for
291
+
292
+ Returns:
293
+ The signed download URL for the GIF
294
+
295
+ Raises:
296
+ StorageDownloadError: If the request fails or no URL is returned
297
+ """
298
+ try:
299
+ api_key = _get_api_key()
300
+ except StorageUploadError as e:
301
+ raise StorageDownloadError(str(e)) from e
302
+ base_url = settings.MINITAP_API_BASE_URL
303
+ endpoint = f"{base_url}/storage/trajectory-gif-download/{task_run_id}"
304
+
305
+ async with httpx.AsyncClient(timeout=30.0) as client:
306
+ try:
307
+ logger.debug("Requesting trajectory GIF download URL", task_run_id=task_run_id)
308
+ response = await client.get(
309
+ endpoint,
310
+ headers={"Authorization": f"Bearer {api_key}"},
311
+ )
312
+
313
+ if response.status_code == 404:
314
+ raise StorageDownloadError(f"Trajectory GIF not found for task run: {task_run_id}")
315
+
316
+ if response.status_code != 200:
317
+ logger.error(
318
+ "Failed to get trajectory GIF download URL",
319
+ status_code=response.status_code,
320
+ response=response.text,
321
+ )
322
+ raise StorageDownloadError(
323
+ f"Failed to get trajectory GIF download URL: HTTP {response.status_code}"
324
+ )
325
+
326
+ data = response.json()
327
+ download_url = data.get("signed_url")
328
+ if not download_url:
329
+ raise StorageDownloadError("No download URL returned in response")
330
+
331
+ logger.debug("Got trajectory GIF download URL", task_run_id=task_run_id)
332
+ return download_url
333
+
334
+ except httpx.TimeoutException as e:
335
+ logger.error("Trajectory GIF download URL request timed out", error=str(e))
336
+ raise StorageDownloadError("Request timed out") from e
337
+ except httpx.RequestError as e:
338
+ logger.error("Trajectory GIF download URL request failed", error=str(e))
339
+ raise StorageDownloadError(f"Request failed: {str(e)}") from e
340
+ except StorageDownloadError:
341
+ raise
342
+ except Exception as e:
343
+ logger.error("Unexpected error getting trajectory GIF download URL", error=str(e))
344
+ raise StorageDownloadError(f"Unexpected error: {str(e)}") from e
345
+
346
+
347
+ async def download_trajectory_gif(task_run_id: str, download_path: str | Path) -> Path:
348
+ """Download a trajectory GIF to a local path.
349
+
350
+ This function:
351
+ 1. Gets a signed download URL from the MaaS API
352
+ 2. Downloads the GIF from that URL
353
+ 3. Saves it to the specified local path
354
+
355
+ Args:
356
+ task_run_id: The ID of the task run to download the GIF for
357
+ download_path: Directory path where the GIF will be saved.
358
+ The file will be saved as {task_run_id}/trajectory.gif
359
+
360
+ Returns:
361
+ The full path to the downloaded GIF file
362
+
363
+ Raises:
364
+ StorageDownloadError: If download fails at any step
365
+ """
366
+ download_dir = Path(download_path) / task_run_id
367
+
368
+ try:
369
+ download_dir.mkdir(parents=True, exist_ok=True)
370
+ except OSError as e:
371
+ raise StorageDownloadError(f"Failed to create download directory: {e}") from e
372
+
373
+ download_url = await get_trajectory_gif_download_url(task_run_id)
374
+
375
+ output_file = download_dir / "trajectory.gif"
376
+
377
+ async with httpx.AsyncClient(timeout=60.0) as client:
378
+ try:
379
+ logger.info(
380
+ "Downloading trajectory GIF", task_run_id=task_run_id, path=str(output_file)
381
+ )
382
+ response = await client.get(download_url)
383
+
384
+ if response.status_code != 200:
385
+ raise StorageDownloadError(f"Failed to download GIF: HTTP {response.status_code}")
386
+
387
+ output_file.write_bytes(response.content)
388
+ logger.info(
389
+ "Trajectory GIF downloaded successfully",
390
+ task_run_id=task_run_id,
391
+ path=str(output_file),
392
+ size=len(response.content),
393
+ )
394
+
395
+ return output_file
396
+
397
+ except httpx.TimeoutException as e:
398
+ logger.error("GIF download timed out", error=str(e))
399
+ raise StorageDownloadError("Download timed out") from e
400
+ except httpx.RequestError as e:
401
+ logger.error("GIF download request failed", error=str(e))
402
+ raise StorageDownloadError(f"Download failed: {str(e)}") from e
403
+ except StorageDownloadError:
404
+ raise
405
+ except Exception as e:
406
+ logger.error("Unexpected error downloading GIF", error=str(e))
407
+ raise StorageDownloadError(f"Unexpected error: {str(e)}") from e
@@ -0,0 +1,100 @@
1
+ """Task runs API utilities.
2
+
3
+ This module provides functionality to interact with the task runs API
4
+ for fetching information about executed tasks.
5
+ """
6
+
7
+ import httpx
8
+
9
+ from minitap.mcp.core.config import settings
10
+ from minitap.mcp.core.logging_config import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class TaskRunsError(Exception):
16
+ """Error raised when task runs API operations fail."""
17
+
18
+ pass
19
+
20
+
21
+ def _get_api_key() -> str:
22
+ """Get the API key from settings.
23
+
24
+ Returns:
25
+ The API key string
26
+
27
+ Raises:
28
+ TaskRunsError: If API key is not configured
29
+ """
30
+ api_key = settings.MINITAP_API_KEY.get_secret_value() if settings.MINITAP_API_KEY else None
31
+ if not api_key:
32
+ raise TaskRunsError("MINITAP_API_KEY is required for task runs API")
33
+ return api_key
34
+
35
+
36
+ async def get_latest_task_run_id() -> str:
37
+ """Get the ID of the most recently finished task run.
38
+
39
+ This function calls the MaaS API to get the latest task run,
40
+ sorted by finished_at in descending order.
41
+
42
+ Returns:
43
+ The ID of the latest task run
44
+
45
+ Raises:
46
+ TaskRunsError: If the request fails or no task run is found
47
+ """
48
+ api_key = _get_api_key()
49
+ base_url = settings.MINITAP_API_BASE_URL
50
+ endpoint = f"{base_url}/task-runs"
51
+
52
+ params = {
53
+ "page": 1,
54
+ "pageSize": 1,
55
+ "orphans": "include",
56
+ "virtualMobile": "include",
57
+ "sortBy": "finished_at",
58
+ "sortOrder": "desc",
59
+ }
60
+
61
+ async with httpx.AsyncClient(timeout=30.0) as client:
62
+ try:
63
+ logger.debug("Fetching latest task run ID")
64
+ response = await client.get(
65
+ endpoint,
66
+ params=params,
67
+ headers={"Authorization": f"Bearer {api_key}"},
68
+ )
69
+
70
+ if response.status_code != 200:
71
+ logger.error(
72
+ "Failed to get latest task run",
73
+ status_code=response.status_code,
74
+ response=response.text,
75
+ )
76
+ raise TaskRunsError(f"Failed to get latest task run: HTTP {response.status_code}")
77
+
78
+ data = response.json()
79
+ items = data.get("runs", [])
80
+ if not items:
81
+ raise TaskRunsError("No task runs found")
82
+
83
+ task_run_id = items[0].get("id")
84
+ if not task_run_id:
85
+ raise TaskRunsError("Task run ID not found in response")
86
+
87
+ logger.debug("Got latest task run ID", task_run_id=task_run_id)
88
+ return task_run_id
89
+
90
+ except httpx.TimeoutException as e:
91
+ logger.error("Latest task run request timed out", error=str(e))
92
+ raise TaskRunsError("Request timed out") from e
93
+ except httpx.RequestError as e:
94
+ logger.error("Latest task run request failed", error=str(e))
95
+ raise TaskRunsError(f"Request failed: {str(e)}") from e
96
+ except TaskRunsError:
97
+ raise
98
+ except Exception as e:
99
+ logger.error("Unexpected error getting latest task run", error=str(e))
100
+ raise TaskRunsError(f"Unexpected error: {str(e)}") from e
@@ -0,0 +1,69 @@
1
+ """Agent to extract Figma asset URLs from design context code using regex."""
2
+
3
+ import re
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class FigmaAsset(BaseModel):
9
+ """Represents a single Figma asset."""
10
+
11
+ variable_name: str = Field(description="The variable name from the code (e.g., imgSignal)")
12
+ url: str = Field(description="The full URL to the asset")
13
+ extension: str = Field(description="The file extension (e.g., svg, png, jpg)")
14
+
15
+
16
+ class ExtractedAssets(BaseModel):
17
+ """Container for all extracted Figma assets."""
18
+
19
+ assets: list[FigmaAsset] = Field(
20
+ default_factory=list,
21
+ description="List of all extracted assets from the Figma design context",
22
+ )
23
+ code_implementation: str = Field(
24
+ description="The React/TypeScript code with imports instead of const declarations"
25
+ )
26
+
27
+
28
+ def extract_figma_assets(design_context_code: str) -> ExtractedAssets:
29
+ """Extract asset URLs from Figma design context code using regex.
30
+
31
+ Args:
32
+ design_context_code: The React/TypeScript code from get_design_context
33
+
34
+ Returns:
35
+ ExtractedAssets with list of assets and transformed code
36
+ """
37
+ # Regex captures: (1) variable name, (2) full URL, (4) extension
38
+ # Supports http/https, any domain, query strings, optional semicolon
39
+ pattern = r'const\s+(\w+)\s*=\s*["\']((https?://[^"\']+?)\.(\w+)(?:\?[^"\']*)?)["\'];?'
40
+ matches = re.finditer(pattern, design_context_code)
41
+
42
+ assets = []
43
+ asset_lines = []
44
+
45
+ for match in matches:
46
+ var_name = match.group(1)
47
+ url = match.group(2)
48
+ extension = match.group(4)
49
+
50
+ assets.append(FigmaAsset(variable_name=var_name, url=url, extension=extension))
51
+ asset_lines.append(match.group(0))
52
+
53
+ import_statements = []
54
+ for asset in assets:
55
+ import_statements.append(
56
+ f"import {asset.variable_name} from './{asset.variable_name}.{asset.extension}';"
57
+ )
58
+
59
+ transformed_code = design_context_code
60
+ for line in asset_lines:
61
+ transformed_code = transformed_code.replace(line, "")
62
+
63
+ lines = transformed_code.split("\n")
64
+ while lines and not lines[0].strip():
65
+ lines.pop(0)
66
+
67
+ final_code = "\n".join(import_statements) + "\n\n" + "\n".join(lines)
68
+
69
+ return ExtractedAssets(assets=assets, code_implementation=final_code)
@@ -0,0 +1,55 @@
1
+ import base64
2
+ from PIL import Image
3
+ from io import BytesIO
4
+
5
+ from langchain_core.messages import HumanMessage
6
+
7
+
8
+ def compress_base64_jpeg(base64_str: str, quality: int = 50) -> str:
9
+ """
10
+ Compress a base64-encoded image to JPEG format.
11
+
12
+ Args:
13
+ base64_str: Base64-encoded image string
14
+ quality: JPEG quality (0-100, default 50)
15
+
16
+ Returns:
17
+ Base64-encoded JPEG image
18
+ """
19
+ if base64_str.startswith("data:image"):
20
+ base64_str = base64_str.split(",")[1]
21
+
22
+ image_data = base64.b64decode(base64_str)
23
+ image = Image.open(BytesIO(image_data))
24
+
25
+ # Convert RGBA/LA/PA to RGB (JPEG doesn't support transparency)
26
+ if image.mode in ("RGBA", "LA", "PA"):
27
+ # Create a white background
28
+ background = Image.new("RGB", image.size, (255, 255, 255))
29
+ # Paste the image on the background using alpha channel as mask
30
+ if image.mode == "RGBA":
31
+ background.paste(image, mask=image.split()[3]) # Use alpha channel as mask
32
+ else:
33
+ background.paste(image, mask=image.split()[1]) # Use alpha for LA
34
+ image = background
35
+ elif image.mode != "RGB":
36
+ # Convert any other mode to RGB
37
+ image = image.convert("RGB")
38
+
39
+ compressed_io = BytesIO()
40
+ image.save(compressed_io, format="JPEG", quality=quality, optimize=True)
41
+
42
+ compressed_base64 = base64.b64encode(compressed_io.getvalue()).decode("utf-8")
43
+ return compressed_base64
44
+
45
+
46
+ def get_screenshot_message_for_llm(screenshot_base64: str):
47
+ prefix = "" if screenshot_base64.startswith("data:image") else "data:image/jpeg;base64,"
48
+ return HumanMessage(
49
+ content=[
50
+ {
51
+ "type": "image_url",
52
+ "image_url": {"url": f"{prefix}{screenshot_base64}"},
53
+ }
54
+ ]
55
+ )