minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,182 @@
1
+ from abc import abstractmethod
2
+ from typing import Protocol
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from minitap.mobile_use.controllers.types import Bounds, CoordinatesSelectorRequest, TapOutput
7
+ from minitap.mobile_use.utils.video import VideoRecordingResult
8
+
9
+
10
+ class ScreenDataResponse(BaseModel):
11
+ base64: str
12
+ elements: list
13
+ width: int
14
+ height: int
15
+ platform: str
16
+
17
+
18
+ class MobileDeviceController(Protocol):
19
+ @abstractmethod
20
+ async def tap(
21
+ self,
22
+ coords: CoordinatesSelectorRequest,
23
+ long_press: bool = False,
24
+ long_press_duration: int = 1000,
25
+ ) -> TapOutput:
26
+ raise NotImplementedError("Subclasses must implement this method")
27
+
28
+ @abstractmethod
29
+ async def swipe(
30
+ self,
31
+ start: CoordinatesSelectorRequest,
32
+ end: CoordinatesSelectorRequest,
33
+ duration: int = 400,
34
+ ) -> str | None:
35
+ """
36
+ Swipe from start to end coordinates.
37
+ Returns error message on failure, None on success.
38
+ """
39
+ raise NotImplementedError("Subclasses must implement this method")
40
+
41
+ @abstractmethod
42
+ async def screenshot(self) -> str:
43
+ """Take a screenshot and return raw image data."""
44
+ raise NotImplementedError("Subclasses must implement this method")
45
+
46
+ @abstractmethod
47
+ async def input_text(self, text: str) -> bool:
48
+ """
49
+ Input text at the currently focused field.
50
+ Returns True on success, False on failure.
51
+ """
52
+ raise NotImplementedError("Subclasses must implement this method")
53
+
54
+ @abstractmethod
55
+ async def launch_app(self, package_or_bundle_id: str) -> bool:
56
+ """
57
+ Launch an application by package name (Android) or bundle ID (iOS).
58
+ Returns True on success, False on failure.
59
+ """
60
+ raise NotImplementedError("Subclasses must implement this method")
61
+
62
+ @abstractmethod
63
+ async def terminate_app(self, package_or_bundle_id: str | None) -> bool:
64
+ """
65
+ Terminate an application.
66
+ Returns True on success, False on failure.
67
+ """
68
+ raise NotImplementedError("Subclasses must implement this method")
69
+
70
+ @abstractmethod
71
+ async def open_url(self, url: str) -> bool:
72
+ """
73
+ Open a URL.
74
+ Returns True on success, False on failure.
75
+ """
76
+ raise NotImplementedError("Subclasses must implement this method")
77
+
78
+ @abstractmethod
79
+ async def press_back(self) -> bool:
80
+ """
81
+ Press the back button (Android) or equivalent gesture (iOS).
82
+ Returns True on success, False on failure.
83
+ """
84
+ raise NotImplementedError("Subclasses must implement this method")
85
+
86
+ @abstractmethod
87
+ async def press_home(self) -> bool:
88
+ """
89
+ Press the home button.
90
+ Returns True on success, False on failure.
91
+ """
92
+ raise NotImplementedError("Subclasses must implement this method")
93
+
94
+ @abstractmethod
95
+ async def press_enter(self) -> bool:
96
+ """
97
+ Press the enter/return key.
98
+ Returns True on success, False on failure.
99
+ """
100
+ raise NotImplementedError("Subclasses must implement this method")
101
+
102
+ @abstractmethod
103
+ async def get_ui_hierarchy(self) -> list[dict]:
104
+ """
105
+ Get the UI element hierarchy.
106
+ Returns a list of UI elements with their properties.
107
+ """
108
+ raise NotImplementedError("Subclasses must implement this method")
109
+
110
+ @abstractmethod
111
+ def find_element(
112
+ self,
113
+ ui_hierarchy: list[dict],
114
+ resource_id: str | None = None,
115
+ text: str | None = None,
116
+ index: int = 0,
117
+ ) -> tuple[dict | None, Bounds | None, str | None]:
118
+ """
119
+ Find a UI element in the hierarchy.
120
+
121
+ Returns:
122
+ Tuple of (element_dict, bounds, error_message)
123
+ """
124
+ raise NotImplementedError("Subclasses must implement this method")
125
+
126
+ @abstractmethod
127
+ async def cleanup(self) -> None:
128
+ """Clean up resources (e.g., stop companion processes)."""
129
+ raise NotImplementedError("Subclasses must implement this method")
130
+
131
+ @abstractmethod
132
+ async def erase_text(self, nb_chars: int | None = None) -> bool:
133
+ """
134
+ Erase the last nb_chars characters.
135
+ Returns True on success, False on failure.
136
+ """
137
+ raise NotImplementedError("Subclasses must implement this method")
138
+
139
+ @abstractmethod
140
+ async def get_screen_data(self) -> "ScreenDataResponse":
141
+ """
142
+ Get screen data including screenshot (base64), UI hierarchy elements,
143
+ screen dimensions, and platform.
144
+
145
+ Returns:
146
+ ScreenDataResponse with base64 screenshot, elements, width, height, platform
147
+ """
148
+ raise NotImplementedError("Subclasses must implement this method")
149
+
150
+ @abstractmethod
151
+ def get_compressed_b64_screenshot(self, image_base64: str, quality: int = 50) -> str:
152
+ """
153
+ Compress a base64 image.
154
+ Returns the compressed base64 image.
155
+ """
156
+ raise NotImplementedError("Subclasses must implement this method")
157
+
158
+ @abstractmethod
159
+ async def start_video_recording(
160
+ self,
161
+ max_duration_seconds: int = 900,
162
+ ) -> VideoRecordingResult:
163
+ """
164
+ Start screen recording on the device.
165
+
166
+ Args:
167
+ max_duration_seconds: Maximum recording duration in seconds.
168
+
169
+ Returns:
170
+ VideoRecordingResult with success status and message.
171
+ """
172
+ raise NotImplementedError("Subclasses must implement this method")
173
+
174
+ @abstractmethod
175
+ async def stop_video_recording(self) -> VideoRecordingResult:
176
+ """
177
+ Stop screen recording and retrieve the video file.
178
+
179
+ Returns:
180
+ VideoRecordingResult with success status, message, and video_path if successful.
181
+ """
182
+ raise NotImplementedError("Subclasses must implement this method")
@@ -0,0 +1,436 @@
1
+ """iOS-specific device controller implementation using IDB or WDA."""
2
+
3
+ import asyncio
4
+ import base64
5
+ import re
6
+ import tempfile
7
+ import time
8
+ from io import BytesIO
9
+ from pathlib import Path
10
+
11
+ from idb.common.types import HIDButtonType
12
+ from PIL import Image
13
+
14
+ from minitap.mobile_use.clients.idb_client import IdbClientWrapper
15
+ from minitap.mobile_use.clients.ios_client import IosClientWrapper
16
+ from minitap.mobile_use.controllers.device_controller import (
17
+ MobileDeviceController,
18
+ ScreenDataResponse,
19
+ )
20
+ from minitap.mobile_use.controllers.types import Bounds, CoordinatesSelectorRequest, TapOutput
21
+ from minitap.mobile_use.utils.logger import get_logger
22
+ from minitap.mobile_use.utils.video import (
23
+ DEFAULT_MAX_DURATION_SECONDS,
24
+ VIDEO_READY_DELAY_SECONDS,
25
+ RecordingSession,
26
+ VideoRecordingResult,
27
+ get_active_session,
28
+ has_active_session,
29
+ remove_active_session,
30
+ set_active_session,
31
+ )
32
+
33
+ logger = get_logger(__name__)
34
+
35
+
36
+ class iOSDeviceController(MobileDeviceController):
37
+ """iOS device controller using IDB (simulators) or WDA (physical devices)."""
38
+
39
+ def __init__(
40
+ self,
41
+ ios_client: IosClientWrapper,
42
+ device_id: str,
43
+ device_width: int,
44
+ device_height: int,
45
+ ):
46
+ self.ios_client = ios_client
47
+ self.device_id = device_id
48
+ self.device_width = device_width
49
+ self.device_height = device_height
50
+ self._is_idb = isinstance(ios_client, IdbClientWrapper)
51
+
52
+ async def tap(
53
+ self,
54
+ coords: CoordinatesSelectorRequest,
55
+ long_press: bool = False,
56
+ long_press_duration: int = 1000,
57
+ ) -> TapOutput:
58
+ """Tap at specific coordinates using IDB."""
59
+ try:
60
+ duration = long_press_duration / 1000.0 if long_press else None
61
+ await self.ios_client.tap(x=coords.x, y=coords.y, duration=duration) # type: ignore[call-arg]
62
+ return TapOutput(error=None)
63
+ except Exception as e:
64
+ return TapOutput(error=f"IDB tap failed: {str(e)}")
65
+
66
+ async def swipe(
67
+ self,
68
+ start: CoordinatesSelectorRequest,
69
+ end: CoordinatesSelectorRequest,
70
+ duration: int = 400,
71
+ ) -> str | None:
72
+ """Swipe from start to end coordinates using IDB."""
73
+ try:
74
+ # IDB delta is the number of steps, approximating from duration
75
+ ms_duration_to_percentage = duration / 1000.0
76
+ await self.ios_client.swipe( # type: ignore[call-arg]
77
+ x_start=start.x,
78
+ y_start=start.y,
79
+ x_end=end.x,
80
+ y_end=end.y,
81
+ duration=ms_duration_to_percentage,
82
+ )
83
+ return None
84
+ except Exception as e:
85
+ return f"IDB swipe failed: {str(e)}"
86
+
87
+ async def get_screen_data(self) -> ScreenDataResponse:
88
+ """Get screen data (screenshot and hierarchy in parallel)."""
89
+ try:
90
+ # Run screenshot and hierarchy fetch in parallel
91
+ screenshot_bytes, accessibility_info = await asyncio.gather(
92
+ self.ios_client.screenshot(), # type: ignore[call-arg]
93
+ self.ios_client.describe_all(),
94
+ )
95
+
96
+ if screenshot_bytes is None:
97
+ raise RuntimeError("Screenshot returned None")
98
+
99
+ elements = (
100
+ self._process_flat_ios_hierarchy(accessibility_info) if accessibility_info else []
101
+ )
102
+
103
+ base64_screenshot = base64.b64encode(screenshot_bytes).decode("utf-8")
104
+
105
+ return ScreenDataResponse(
106
+ base64=base64_screenshot,
107
+ elements=elements,
108
+ width=self.device_width,
109
+ height=self.device_height,
110
+ platform="ios",
111
+ )
112
+ except Exception as e:
113
+ logger.error(f"Failed to get screen data: {e}")
114
+ raise
115
+
116
+ async def screenshot(self) -> str:
117
+ """Take a screenshot using IDB and return base64 encoded string."""
118
+ try:
119
+ screenshot_bytes = await self.ios_client.screenshot() # type: ignore[call-arg]
120
+ if screenshot_bytes is None:
121
+ raise RuntimeError("Screenshot returned None")
122
+ return base64.b64encode(screenshot_bytes).decode("utf-8")
123
+ except Exception as e:
124
+ logger.error(f"Failed to take screenshot: {e}")
125
+ raise
126
+
127
+ async def input_text(self, text: str) -> bool:
128
+ """Input text using IDB."""
129
+ try:
130
+ return await self.ios_client.text(text) # type: ignore[call-arg]
131
+ except Exception as e:
132
+ logger.error(f"Failed to input text: {e}")
133
+ return False
134
+
135
+ async def launch_app(self, package_or_bundle_id: str) -> bool:
136
+ """Launch an iOS app using IDB."""
137
+ try:
138
+ return await self.ios_client.launch(bundle_id=package_or_bundle_id) # type: ignore[call-arg]
139
+ except Exception as e:
140
+ logger.error(f"Failed to launch app {package_or_bundle_id}: {e}")
141
+ return False
142
+
143
+ async def terminate_app(self, package_or_bundle_id: str | None) -> bool:
144
+ """Terminate an iOS app using IDB."""
145
+ if package_or_bundle_id is None:
146
+ logger.warning("Cannot terminate app: bundle_id is None")
147
+ return False
148
+ try:
149
+ return await self.ios_client.terminate(bundle_id=package_or_bundle_id) # type: ignore[call-arg]
150
+ except Exception as e:
151
+ logger.error(f"Failed to terminate app {package_or_bundle_id}: {e}")
152
+ return False
153
+
154
+ async def open_url(self, url: str) -> bool:
155
+ """Open a URL using IDB."""
156
+ try:
157
+ return await self.ios_client.open_url(url) # type: ignore[call-arg]
158
+ except Exception as e:
159
+ logger.error(f"Failed to open URL {url}: {e}")
160
+ return False
161
+
162
+ async def press_back(self) -> bool:
163
+ """iOS doesn't have a back button - swipe from left edge."""
164
+ try:
165
+ # Simulate back gesture by swiping from left edge
166
+ start = CoordinatesSelectorRequest(x=10, y=self.device_height // 4)
167
+ end = CoordinatesSelectorRequest(x=300, y=self.device_height // 4)
168
+ result = await self.swipe(start, end, duration=300)
169
+ return result is None
170
+ except Exception as e:
171
+ logger.error(f"Failed to press back: {e}")
172
+ return False
173
+
174
+ async def press_home(self) -> bool:
175
+ """Press the home button."""
176
+ try:
177
+ if self._is_idb:
178
+ return await self.ios_client.button(button_type=HIDButtonType.HOME) # type: ignore[call-arg, union-attr]
179
+ else:
180
+ return await self.ios_client.home() # type: ignore[union-attr]
181
+ except Exception as e:
182
+ logger.error(f"Failed to press home: {e}")
183
+ return False
184
+
185
+ async def press_enter(self) -> bool:
186
+ """Press the enter/return key."""
187
+ try:
188
+ await self.ios_client.key(40) # type: ignore[call-arg]
189
+ return True
190
+ except Exception as e:
191
+ logger.error(f"Failed to press enter: {e}")
192
+ return False
193
+
194
+ async def get_ui_hierarchy(self) -> list[dict]:
195
+ """Get UI hierarchy using IDB accessibility info or WDA source."""
196
+ try:
197
+ accessibility_info = await asyncio.wait_for(
198
+ self.ios_client.describe_all(), timeout=40.0
199
+ )
200
+ if accessibility_info is None:
201
+ logger.warning("Accessibility info returned None")
202
+ return []
203
+
204
+ hierarchy = self._process_flat_ios_hierarchy(accessibility_info)
205
+ return hierarchy
206
+ except TimeoutError:
207
+ logger.error("Timeout waiting for UI hierarchy (40s)")
208
+ return []
209
+ except Exception as e:
210
+ logger.error(f"Failed to get UI hierarchy: {e}")
211
+ return []
212
+
213
+ def _process_flat_ios_hierarchy(self, accessibility_data: list[dict]) -> list[dict]:
214
+ """
215
+ Process flat iOS accessibility info into our standard format.
216
+
217
+ IDB with nested=False returns a flat list of all elements.
218
+ """
219
+ elements = []
220
+
221
+ for node in accessibility_data:
222
+ if not isinstance(node, dict):
223
+ continue
224
+
225
+ # Extract element info
226
+ element = {
227
+ "type": node.get("type", ""),
228
+ "value": node.get("AXValue", ""),
229
+ "label": node.get("AXLabel", node.get("label", "")),
230
+ "frame": node.get("frame", {}),
231
+ "enabled": node.get("enabled", False),
232
+ "visible": True, # Elements in the list are generally visible
233
+ }
234
+
235
+ # Add bounds if frame is available
236
+ if "frame" in node and isinstance(node["frame"], dict):
237
+ frame = node["frame"]
238
+ if all(k in frame for k in ["x", "y", "width", "height"]):
239
+ element["bounds"] = (
240
+ f"[{int(frame['x'])},{int(frame['y'])}]"
241
+ f"[{int(frame['x'] + frame['width'])},{int(frame['y'] + frame['height'])}]"
242
+ )
243
+
244
+ elements.append(element)
245
+
246
+ return elements
247
+
248
+ def find_element(
249
+ self,
250
+ ui_hierarchy: list[dict],
251
+ resource_id: str | None = None,
252
+ text: str | None = None,
253
+ index: int = 0,
254
+ ) -> tuple[dict | None, Bounds | None, str | None]:
255
+ """Find a UI element in the iOS hierarchy."""
256
+ if not resource_id and not text:
257
+ return None, None, "No resource_id or text provided"
258
+
259
+ matches = []
260
+ for element in ui_hierarchy:
261
+ # iOS doesn't have resource-id, so we match on type if provided as resource_id
262
+ if resource_id and element.get("type") == resource_id:
263
+ matches.append(element)
264
+ # Match on value or label for text
265
+ elif text and (element.get("value") == text or element.get("label") == text):
266
+ matches.append(element)
267
+
268
+ if not matches:
269
+ criteria = f"type='{resource_id}'" if resource_id else f"text='{text}'"
270
+ return None, None, f"No element found with {criteria}"
271
+
272
+ if index >= len(matches):
273
+ criteria = f"type='{resource_id}'" if resource_id else f"text='{text}'"
274
+ return (
275
+ None,
276
+ None,
277
+ f"Index {index} out of range for {criteria} (found {len(matches)} matches)",
278
+ )
279
+
280
+ element = matches[index]
281
+ bounds = self._extract_bounds(element)
282
+
283
+ return element, bounds, None
284
+
285
+ def _extract_bounds(self, element: dict) -> Bounds | None:
286
+ """Extract bounds from an iOS UI element."""
287
+ bounds_str = element.get("bounds")
288
+ if not bounds_str or not isinstance(bounds_str, str):
289
+ return None
290
+
291
+ try:
292
+ # Parse bounds string like "[x1,y1][x2,y2]"
293
+ match = re.match(r"\[(\d+),(\d+)\]\[(\d+),(\d+)\]", bounds_str)
294
+ if match:
295
+ return Bounds(
296
+ x1=int(match.group(1)),
297
+ y1=int(match.group(2)),
298
+ x2=int(match.group(3)),
299
+ y2=int(match.group(4)),
300
+ )
301
+ except (ValueError, IndexError):
302
+ return None
303
+
304
+ return None
305
+
306
+ async def erase_text(self, nb_chars: int | None = None) -> bool:
307
+ """Erase text by sending delete key presses."""
308
+ try:
309
+ if nb_chars is None:
310
+ nb_chars = 50 # Default to erasing 50 characters
311
+ # iOS delete key code is 42 (HID keyboard delete)
312
+ for _ in range(nb_chars):
313
+ await self.ios_client.key(42) # type: ignore[call-arg]
314
+ return True
315
+ except Exception as e:
316
+ logger.error(f"Failed to erase text: {e}")
317
+ return False
318
+
319
+ async def cleanup(self) -> None:
320
+ """Cleanup iOS controller resources."""
321
+ logger.debug("iOS controller cleanup")
322
+ await self.ios_client.cleanup()
323
+
324
+ def get_compressed_b64_screenshot(self, image_base64: str, quality: int = 50) -> str:
325
+ if image_base64.startswith("data:image"):
326
+ image_base64 = image_base64.split(",")[1]
327
+
328
+ image_data = base64.b64decode(image_base64)
329
+ image = Image.open(BytesIO(image_data))
330
+
331
+ # Convert RGBA to RGB if image has alpha channel (PNG transparency)
332
+ if image.mode in ("RGBA", "LA", "P"):
333
+ rgb_image = Image.new("RGB", image.size, (255, 255, 255))
334
+ rgb_image.paste(image, mask=image.split()[-1] if image.mode == "RGBA" else None)
335
+ image = rgb_image
336
+
337
+ compressed_io = BytesIO()
338
+ image.save(compressed_io, format="JPEG", quality=quality, optimize=True)
339
+
340
+ compressed_base64 = base64.b64encode(compressed_io.getvalue()).decode("utf-8")
341
+ return compressed_base64
342
+
343
+ async def start_video_recording(
344
+ self,
345
+ max_duration_seconds: int = DEFAULT_MAX_DURATION_SECONDS,
346
+ ) -> VideoRecordingResult:
347
+ """Start screen recording on iOS device/simulator using idb record-video."""
348
+ device_id = self.device_id
349
+
350
+ if has_active_session(device_id):
351
+ return VideoRecordingResult(
352
+ success=False,
353
+ message=f"Recording already in progress for device {device_id}",
354
+ )
355
+
356
+ try:
357
+ temp_dir = tempfile.mkdtemp(prefix="mobile_use_video_")
358
+ video_path = Path(temp_dir) / "recording.mp4"
359
+
360
+ cmd = ["idb", "record-video", "--udid", device_id, str(video_path)]
361
+
362
+ process = await asyncio.create_subprocess_exec(
363
+ *cmd,
364
+ stdout=asyncio.subprocess.PIPE,
365
+ stderr=asyncio.subprocess.PIPE,
366
+ )
367
+
368
+ session = RecordingSession(
369
+ device_id=device_id,
370
+ start_time=time.time(),
371
+ process=process,
372
+ local_video_path=video_path,
373
+ )
374
+ set_active_session(device_id, session)
375
+
376
+ logger.info(f"Started iOS screen recording on {device_id}")
377
+ return VideoRecordingResult(
378
+ success=True,
379
+ message=f"Recording started (max {max_duration_seconds}s).",
380
+ )
381
+
382
+ except Exception as e:
383
+ logger.error(f"Failed to start iOS recording: {e}")
384
+ return VideoRecordingResult(
385
+ success=False,
386
+ message=f"Failed to start recording: {e}",
387
+ )
388
+
389
+ async def stop_video_recording(self) -> VideoRecordingResult:
390
+ """Stop iOS recording and retrieve the video file."""
391
+ device_id = self.device_id
392
+ session = get_active_session(device_id)
393
+
394
+ if not session:
395
+ return VideoRecordingResult(
396
+ success=False,
397
+ message=f"No active recording for device {device_id}",
398
+ )
399
+
400
+ try:
401
+ process = session.process
402
+ if process is not None:
403
+ try:
404
+ process.terminate()
405
+ await asyncio.wait_for(process.wait(), timeout=10.0)
406
+ except TimeoutError:
407
+ process.kill()
408
+ await process.wait()
409
+
410
+ await asyncio.sleep(VIDEO_READY_DELAY_SECONDS)
411
+
412
+ local_path = session.local_video_path
413
+ remove_active_session(device_id)
414
+
415
+ duration = time.time() - session.start_time
416
+ logger.info(f"Stopped iOS recording after {duration:.1f}s, saved to {local_path}")
417
+
418
+ if local_path and local_path.exists():
419
+ return VideoRecordingResult(
420
+ success=True,
421
+ message=f"Recording stopped after {duration:.1f}s",
422
+ video_path=local_path,
423
+ )
424
+ else:
425
+ return VideoRecordingResult(
426
+ success=False,
427
+ message="Recording stopped but video file not found",
428
+ )
429
+
430
+ except Exception as e:
431
+ logger.error(f"Failed to stop iOS recording: {e}")
432
+ remove_active_session(device_id)
433
+ return VideoRecordingResult(
434
+ success=False,
435
+ message=f"Failed to stop recording: {e}",
436
+ )