minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Protocol
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from minitap.mobile_use.controllers.types import Bounds, CoordinatesSelectorRequest, TapOutput
|
|
7
|
+
from minitap.mobile_use.utils.video import VideoRecordingResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ScreenDataResponse(BaseModel):
|
|
11
|
+
base64: str
|
|
12
|
+
elements: list
|
|
13
|
+
width: int
|
|
14
|
+
height: int
|
|
15
|
+
platform: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MobileDeviceController(Protocol):
|
|
19
|
+
@abstractmethod
|
|
20
|
+
async def tap(
|
|
21
|
+
self,
|
|
22
|
+
coords: CoordinatesSelectorRequest,
|
|
23
|
+
long_press: bool = False,
|
|
24
|
+
long_press_duration: int = 1000,
|
|
25
|
+
) -> TapOutput:
|
|
26
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
async def swipe(
|
|
30
|
+
self,
|
|
31
|
+
start: CoordinatesSelectorRequest,
|
|
32
|
+
end: CoordinatesSelectorRequest,
|
|
33
|
+
duration: int = 400,
|
|
34
|
+
) -> str | None:
|
|
35
|
+
"""
|
|
36
|
+
Swipe from start to end coordinates.
|
|
37
|
+
Returns error message on failure, None on success.
|
|
38
|
+
"""
|
|
39
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
async def screenshot(self) -> str:
|
|
43
|
+
"""Take a screenshot and return raw image data."""
|
|
44
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
async def input_text(self, text: str) -> bool:
|
|
48
|
+
"""
|
|
49
|
+
Input text at the currently focused field.
|
|
50
|
+
Returns True on success, False on failure.
|
|
51
|
+
"""
|
|
52
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
async def launch_app(self, package_or_bundle_id: str) -> bool:
|
|
56
|
+
"""
|
|
57
|
+
Launch an application by package name (Android) or bundle ID (iOS).
|
|
58
|
+
Returns True on success, False on failure.
|
|
59
|
+
"""
|
|
60
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
async def terminate_app(self, package_or_bundle_id: str | None) -> bool:
|
|
64
|
+
"""
|
|
65
|
+
Terminate an application.
|
|
66
|
+
Returns True on success, False on failure.
|
|
67
|
+
"""
|
|
68
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
async def open_url(self, url: str) -> bool:
|
|
72
|
+
"""
|
|
73
|
+
Open a URL.
|
|
74
|
+
Returns True on success, False on failure.
|
|
75
|
+
"""
|
|
76
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
77
|
+
|
|
78
|
+
@abstractmethod
|
|
79
|
+
async def press_back(self) -> bool:
|
|
80
|
+
"""
|
|
81
|
+
Press the back button (Android) or equivalent gesture (iOS).
|
|
82
|
+
Returns True on success, False on failure.
|
|
83
|
+
"""
|
|
84
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
async def press_home(self) -> bool:
|
|
88
|
+
"""
|
|
89
|
+
Press the home button.
|
|
90
|
+
Returns True on success, False on failure.
|
|
91
|
+
"""
|
|
92
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
async def press_enter(self) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Press the enter/return key.
|
|
98
|
+
Returns True on success, False on failure.
|
|
99
|
+
"""
|
|
100
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
async def get_ui_hierarchy(self) -> list[dict]:
|
|
104
|
+
"""
|
|
105
|
+
Get the UI element hierarchy.
|
|
106
|
+
Returns a list of UI elements with their properties.
|
|
107
|
+
"""
|
|
108
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
def find_element(
|
|
112
|
+
self,
|
|
113
|
+
ui_hierarchy: list[dict],
|
|
114
|
+
resource_id: str | None = None,
|
|
115
|
+
text: str | None = None,
|
|
116
|
+
index: int = 0,
|
|
117
|
+
) -> tuple[dict | None, Bounds | None, str | None]:
|
|
118
|
+
"""
|
|
119
|
+
Find a UI element in the hierarchy.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Tuple of (element_dict, bounds, error_message)
|
|
123
|
+
"""
|
|
124
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
async def cleanup(self) -> None:
|
|
128
|
+
"""Clean up resources (e.g., stop companion processes)."""
|
|
129
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
130
|
+
|
|
131
|
+
@abstractmethod
|
|
132
|
+
async def erase_text(self, nb_chars: int | None = None) -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Erase the last nb_chars characters.
|
|
135
|
+
Returns True on success, False on failure.
|
|
136
|
+
"""
|
|
137
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
138
|
+
|
|
139
|
+
@abstractmethod
|
|
140
|
+
async def get_screen_data(self) -> "ScreenDataResponse":
|
|
141
|
+
"""
|
|
142
|
+
Get screen data including screenshot (base64), UI hierarchy elements,
|
|
143
|
+
screen dimensions, and platform.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
ScreenDataResponse with base64 screenshot, elements, width, height, platform
|
|
147
|
+
"""
|
|
148
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
149
|
+
|
|
150
|
+
@abstractmethod
|
|
151
|
+
def get_compressed_b64_screenshot(self, image_base64: str, quality: int = 50) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Compress a base64 image.
|
|
154
|
+
Returns the compressed base64 image.
|
|
155
|
+
"""
|
|
156
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
157
|
+
|
|
158
|
+
@abstractmethod
|
|
159
|
+
async def start_video_recording(
|
|
160
|
+
self,
|
|
161
|
+
max_duration_seconds: int = 900,
|
|
162
|
+
) -> VideoRecordingResult:
|
|
163
|
+
"""
|
|
164
|
+
Start screen recording on the device.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
max_duration_seconds: Maximum recording duration in seconds.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
VideoRecordingResult with success status and message.
|
|
171
|
+
"""
|
|
172
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
173
|
+
|
|
174
|
+
@abstractmethod
|
|
175
|
+
async def stop_video_recording(self) -> VideoRecordingResult:
|
|
176
|
+
"""
|
|
177
|
+
Stop screen recording and retrieve the video file.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
VideoRecordingResult with success status, message, and video_path if successful.
|
|
181
|
+
"""
|
|
182
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"""iOS-specific device controller implementation using IDB or WDA."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import base64
|
|
5
|
+
import re
|
|
6
|
+
import tempfile
|
|
7
|
+
import time
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from idb.common.types import HIDButtonType
|
|
12
|
+
from PIL import Image
|
|
13
|
+
|
|
14
|
+
from minitap.mobile_use.clients.idb_client import IdbClientWrapper
|
|
15
|
+
from minitap.mobile_use.clients.ios_client import IosClientWrapper
|
|
16
|
+
from minitap.mobile_use.controllers.device_controller import (
|
|
17
|
+
MobileDeviceController,
|
|
18
|
+
ScreenDataResponse,
|
|
19
|
+
)
|
|
20
|
+
from minitap.mobile_use.controllers.types import Bounds, CoordinatesSelectorRequest, TapOutput
|
|
21
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
22
|
+
from minitap.mobile_use.utils.video import (
|
|
23
|
+
DEFAULT_MAX_DURATION_SECONDS,
|
|
24
|
+
VIDEO_READY_DELAY_SECONDS,
|
|
25
|
+
RecordingSession,
|
|
26
|
+
VideoRecordingResult,
|
|
27
|
+
get_active_session,
|
|
28
|
+
has_active_session,
|
|
29
|
+
remove_active_session,
|
|
30
|
+
set_active_session,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
logger = get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class iOSDeviceController(MobileDeviceController):
|
|
37
|
+
"""iOS device controller using IDB (simulators) or WDA (physical devices)."""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
ios_client: IosClientWrapper,
|
|
42
|
+
device_id: str,
|
|
43
|
+
device_width: int,
|
|
44
|
+
device_height: int,
|
|
45
|
+
):
|
|
46
|
+
self.ios_client = ios_client
|
|
47
|
+
self.device_id = device_id
|
|
48
|
+
self.device_width = device_width
|
|
49
|
+
self.device_height = device_height
|
|
50
|
+
self._is_idb = isinstance(ios_client, IdbClientWrapper)
|
|
51
|
+
|
|
52
|
+
async def tap(
|
|
53
|
+
self,
|
|
54
|
+
coords: CoordinatesSelectorRequest,
|
|
55
|
+
long_press: bool = False,
|
|
56
|
+
long_press_duration: int = 1000,
|
|
57
|
+
) -> TapOutput:
|
|
58
|
+
"""Tap at specific coordinates using IDB."""
|
|
59
|
+
try:
|
|
60
|
+
duration = long_press_duration / 1000.0 if long_press else None
|
|
61
|
+
await self.ios_client.tap(x=coords.x, y=coords.y, duration=duration) # type: ignore[call-arg]
|
|
62
|
+
return TapOutput(error=None)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
return TapOutput(error=f"IDB tap failed: {str(e)}")
|
|
65
|
+
|
|
66
|
+
async def swipe(
|
|
67
|
+
self,
|
|
68
|
+
start: CoordinatesSelectorRequest,
|
|
69
|
+
end: CoordinatesSelectorRequest,
|
|
70
|
+
duration: int = 400,
|
|
71
|
+
) -> str | None:
|
|
72
|
+
"""Swipe from start to end coordinates using IDB."""
|
|
73
|
+
try:
|
|
74
|
+
# IDB delta is the number of steps, approximating from duration
|
|
75
|
+
ms_duration_to_percentage = duration / 1000.0
|
|
76
|
+
await self.ios_client.swipe( # type: ignore[call-arg]
|
|
77
|
+
x_start=start.x,
|
|
78
|
+
y_start=start.y,
|
|
79
|
+
x_end=end.x,
|
|
80
|
+
y_end=end.y,
|
|
81
|
+
duration=ms_duration_to_percentage,
|
|
82
|
+
)
|
|
83
|
+
return None
|
|
84
|
+
except Exception as e:
|
|
85
|
+
return f"IDB swipe failed: {str(e)}"
|
|
86
|
+
|
|
87
|
+
async def get_screen_data(self) -> ScreenDataResponse:
|
|
88
|
+
"""Get screen data (screenshot and hierarchy in parallel)."""
|
|
89
|
+
try:
|
|
90
|
+
# Run screenshot and hierarchy fetch in parallel
|
|
91
|
+
screenshot_bytes, accessibility_info = await asyncio.gather(
|
|
92
|
+
self.ios_client.screenshot(), # type: ignore[call-arg]
|
|
93
|
+
self.ios_client.describe_all(),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if screenshot_bytes is None:
|
|
97
|
+
raise RuntimeError("Screenshot returned None")
|
|
98
|
+
|
|
99
|
+
elements = (
|
|
100
|
+
self._process_flat_ios_hierarchy(accessibility_info) if accessibility_info else []
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
base64_screenshot = base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
104
|
+
|
|
105
|
+
return ScreenDataResponse(
|
|
106
|
+
base64=base64_screenshot,
|
|
107
|
+
elements=elements,
|
|
108
|
+
width=self.device_width,
|
|
109
|
+
height=self.device_height,
|
|
110
|
+
platform="ios",
|
|
111
|
+
)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
logger.error(f"Failed to get screen data: {e}")
|
|
114
|
+
raise
|
|
115
|
+
|
|
116
|
+
async def screenshot(self) -> str:
|
|
117
|
+
"""Take a screenshot using IDB and return base64 encoded string."""
|
|
118
|
+
try:
|
|
119
|
+
screenshot_bytes = await self.ios_client.screenshot() # type: ignore[call-arg]
|
|
120
|
+
if screenshot_bytes is None:
|
|
121
|
+
raise RuntimeError("Screenshot returned None")
|
|
122
|
+
return base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.error(f"Failed to take screenshot: {e}")
|
|
125
|
+
raise
|
|
126
|
+
|
|
127
|
+
async def input_text(self, text: str) -> bool:
|
|
128
|
+
"""Input text using IDB."""
|
|
129
|
+
try:
|
|
130
|
+
return await self.ios_client.text(text) # type: ignore[call-arg]
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.error(f"Failed to input text: {e}")
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
async def launch_app(self, package_or_bundle_id: str) -> bool:
|
|
136
|
+
"""Launch an iOS app using IDB."""
|
|
137
|
+
try:
|
|
138
|
+
return await self.ios_client.launch(bundle_id=package_or_bundle_id) # type: ignore[call-arg]
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.error(f"Failed to launch app {package_or_bundle_id}: {e}")
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
async def terminate_app(self, package_or_bundle_id: str | None) -> bool:
|
|
144
|
+
"""Terminate an iOS app using IDB."""
|
|
145
|
+
if package_or_bundle_id is None:
|
|
146
|
+
logger.warning("Cannot terminate app: bundle_id is None")
|
|
147
|
+
return False
|
|
148
|
+
try:
|
|
149
|
+
return await self.ios_client.terminate(bundle_id=package_or_bundle_id) # type: ignore[call-arg]
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Failed to terminate app {package_or_bundle_id}: {e}")
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
async def open_url(self, url: str) -> bool:
|
|
155
|
+
"""Open a URL using IDB."""
|
|
156
|
+
try:
|
|
157
|
+
return await self.ios_client.open_url(url) # type: ignore[call-arg]
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.error(f"Failed to open URL {url}: {e}")
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
async def press_back(self) -> bool:
|
|
163
|
+
"""iOS doesn't have a back button - swipe from left edge."""
|
|
164
|
+
try:
|
|
165
|
+
# Simulate back gesture by swiping from left edge
|
|
166
|
+
start = CoordinatesSelectorRequest(x=10, y=self.device_height // 4)
|
|
167
|
+
end = CoordinatesSelectorRequest(x=300, y=self.device_height // 4)
|
|
168
|
+
result = await self.swipe(start, end, duration=300)
|
|
169
|
+
return result is None
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.error(f"Failed to press back: {e}")
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
async def press_home(self) -> bool:
|
|
175
|
+
"""Press the home button."""
|
|
176
|
+
try:
|
|
177
|
+
if self._is_idb:
|
|
178
|
+
return await self.ios_client.button(button_type=HIDButtonType.HOME) # type: ignore[call-arg, union-attr]
|
|
179
|
+
else:
|
|
180
|
+
return await self.ios_client.home() # type: ignore[union-attr]
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.error(f"Failed to press home: {e}")
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
async def press_enter(self) -> bool:
|
|
186
|
+
"""Press the enter/return key."""
|
|
187
|
+
try:
|
|
188
|
+
await self.ios_client.key(40) # type: ignore[call-arg]
|
|
189
|
+
return True
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.error(f"Failed to press enter: {e}")
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
async def get_ui_hierarchy(self) -> list[dict]:
|
|
195
|
+
"""Get UI hierarchy using IDB accessibility info or WDA source."""
|
|
196
|
+
try:
|
|
197
|
+
accessibility_info = await asyncio.wait_for(
|
|
198
|
+
self.ios_client.describe_all(), timeout=40.0
|
|
199
|
+
)
|
|
200
|
+
if accessibility_info is None:
|
|
201
|
+
logger.warning("Accessibility info returned None")
|
|
202
|
+
return []
|
|
203
|
+
|
|
204
|
+
hierarchy = self._process_flat_ios_hierarchy(accessibility_info)
|
|
205
|
+
return hierarchy
|
|
206
|
+
except TimeoutError:
|
|
207
|
+
logger.error("Timeout waiting for UI hierarchy (40s)")
|
|
208
|
+
return []
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.error(f"Failed to get UI hierarchy: {e}")
|
|
211
|
+
return []
|
|
212
|
+
|
|
213
|
+
def _process_flat_ios_hierarchy(self, accessibility_data: list[dict]) -> list[dict]:
|
|
214
|
+
"""
|
|
215
|
+
Process flat iOS accessibility info into our standard format.
|
|
216
|
+
|
|
217
|
+
IDB with nested=False returns a flat list of all elements.
|
|
218
|
+
"""
|
|
219
|
+
elements = []
|
|
220
|
+
|
|
221
|
+
for node in accessibility_data:
|
|
222
|
+
if not isinstance(node, dict):
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# Extract element info
|
|
226
|
+
element = {
|
|
227
|
+
"type": node.get("type", ""),
|
|
228
|
+
"value": node.get("AXValue", ""),
|
|
229
|
+
"label": node.get("AXLabel", node.get("label", "")),
|
|
230
|
+
"frame": node.get("frame", {}),
|
|
231
|
+
"enabled": node.get("enabled", False),
|
|
232
|
+
"visible": True, # Elements in the list are generally visible
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
# Add bounds if frame is available
|
|
236
|
+
if "frame" in node and isinstance(node["frame"], dict):
|
|
237
|
+
frame = node["frame"]
|
|
238
|
+
if all(k in frame for k in ["x", "y", "width", "height"]):
|
|
239
|
+
element["bounds"] = (
|
|
240
|
+
f"[{int(frame['x'])},{int(frame['y'])}]"
|
|
241
|
+
f"[{int(frame['x'] + frame['width'])},{int(frame['y'] + frame['height'])}]"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
elements.append(element)
|
|
245
|
+
|
|
246
|
+
return elements
|
|
247
|
+
|
|
248
|
+
def find_element(
|
|
249
|
+
self,
|
|
250
|
+
ui_hierarchy: list[dict],
|
|
251
|
+
resource_id: str | None = None,
|
|
252
|
+
text: str | None = None,
|
|
253
|
+
index: int = 0,
|
|
254
|
+
) -> tuple[dict | None, Bounds | None, str | None]:
|
|
255
|
+
"""Find a UI element in the iOS hierarchy."""
|
|
256
|
+
if not resource_id and not text:
|
|
257
|
+
return None, None, "No resource_id or text provided"
|
|
258
|
+
|
|
259
|
+
matches = []
|
|
260
|
+
for element in ui_hierarchy:
|
|
261
|
+
# iOS doesn't have resource-id, so we match on type if provided as resource_id
|
|
262
|
+
if resource_id and element.get("type") == resource_id:
|
|
263
|
+
matches.append(element)
|
|
264
|
+
# Match on value or label for text
|
|
265
|
+
elif text and (element.get("value") == text or element.get("label") == text):
|
|
266
|
+
matches.append(element)
|
|
267
|
+
|
|
268
|
+
if not matches:
|
|
269
|
+
criteria = f"type='{resource_id}'" if resource_id else f"text='{text}'"
|
|
270
|
+
return None, None, f"No element found with {criteria}"
|
|
271
|
+
|
|
272
|
+
if index >= len(matches):
|
|
273
|
+
criteria = f"type='{resource_id}'" if resource_id else f"text='{text}'"
|
|
274
|
+
return (
|
|
275
|
+
None,
|
|
276
|
+
None,
|
|
277
|
+
f"Index {index} out of range for {criteria} (found {len(matches)} matches)",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
element = matches[index]
|
|
281
|
+
bounds = self._extract_bounds(element)
|
|
282
|
+
|
|
283
|
+
return element, bounds, None
|
|
284
|
+
|
|
285
|
+
def _extract_bounds(self, element: dict) -> Bounds | None:
|
|
286
|
+
"""Extract bounds from an iOS UI element."""
|
|
287
|
+
bounds_str = element.get("bounds")
|
|
288
|
+
if not bounds_str or not isinstance(bounds_str, str):
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
try:
|
|
292
|
+
# Parse bounds string like "[x1,y1][x2,y2]"
|
|
293
|
+
match = re.match(r"\[(\d+),(\d+)\]\[(\d+),(\d+)\]", bounds_str)
|
|
294
|
+
if match:
|
|
295
|
+
return Bounds(
|
|
296
|
+
x1=int(match.group(1)),
|
|
297
|
+
y1=int(match.group(2)),
|
|
298
|
+
x2=int(match.group(3)),
|
|
299
|
+
y2=int(match.group(4)),
|
|
300
|
+
)
|
|
301
|
+
except (ValueError, IndexError):
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
async def erase_text(self, nb_chars: int | None = None) -> bool:
|
|
307
|
+
"""Erase text by sending delete key presses."""
|
|
308
|
+
try:
|
|
309
|
+
if nb_chars is None:
|
|
310
|
+
nb_chars = 50 # Default to erasing 50 characters
|
|
311
|
+
# iOS delete key code is 42 (HID keyboard delete)
|
|
312
|
+
for _ in range(nb_chars):
|
|
313
|
+
await self.ios_client.key(42) # type: ignore[call-arg]
|
|
314
|
+
return True
|
|
315
|
+
except Exception as e:
|
|
316
|
+
logger.error(f"Failed to erase text: {e}")
|
|
317
|
+
return False
|
|
318
|
+
|
|
319
|
+
async def cleanup(self) -> None:
|
|
320
|
+
"""Cleanup iOS controller resources."""
|
|
321
|
+
logger.debug("iOS controller cleanup")
|
|
322
|
+
await self.ios_client.cleanup()
|
|
323
|
+
|
|
324
|
+
def get_compressed_b64_screenshot(self, image_base64: str, quality: int = 50) -> str:
|
|
325
|
+
if image_base64.startswith("data:image"):
|
|
326
|
+
image_base64 = image_base64.split(",")[1]
|
|
327
|
+
|
|
328
|
+
image_data = base64.b64decode(image_base64)
|
|
329
|
+
image = Image.open(BytesIO(image_data))
|
|
330
|
+
|
|
331
|
+
# Convert RGBA to RGB if image has alpha channel (PNG transparency)
|
|
332
|
+
if image.mode in ("RGBA", "LA", "P"):
|
|
333
|
+
rgb_image = Image.new("RGB", image.size, (255, 255, 255))
|
|
334
|
+
rgb_image.paste(image, mask=image.split()[-1] if image.mode == "RGBA" else None)
|
|
335
|
+
image = rgb_image
|
|
336
|
+
|
|
337
|
+
compressed_io = BytesIO()
|
|
338
|
+
image.save(compressed_io, format="JPEG", quality=quality, optimize=True)
|
|
339
|
+
|
|
340
|
+
compressed_base64 = base64.b64encode(compressed_io.getvalue()).decode("utf-8")
|
|
341
|
+
return compressed_base64
|
|
342
|
+
|
|
343
|
+
async def start_video_recording(
|
|
344
|
+
self,
|
|
345
|
+
max_duration_seconds: int = DEFAULT_MAX_DURATION_SECONDS,
|
|
346
|
+
) -> VideoRecordingResult:
|
|
347
|
+
"""Start screen recording on iOS device/simulator using idb record-video."""
|
|
348
|
+
device_id = self.device_id
|
|
349
|
+
|
|
350
|
+
if has_active_session(device_id):
|
|
351
|
+
return VideoRecordingResult(
|
|
352
|
+
success=False,
|
|
353
|
+
message=f"Recording already in progress for device {device_id}",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
temp_dir = tempfile.mkdtemp(prefix="mobile_use_video_")
|
|
358
|
+
video_path = Path(temp_dir) / "recording.mp4"
|
|
359
|
+
|
|
360
|
+
cmd = ["idb", "record-video", "--udid", device_id, str(video_path)]
|
|
361
|
+
|
|
362
|
+
process = await asyncio.create_subprocess_exec(
|
|
363
|
+
*cmd,
|
|
364
|
+
stdout=asyncio.subprocess.PIPE,
|
|
365
|
+
stderr=asyncio.subprocess.PIPE,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
session = RecordingSession(
|
|
369
|
+
device_id=device_id,
|
|
370
|
+
start_time=time.time(),
|
|
371
|
+
process=process,
|
|
372
|
+
local_video_path=video_path,
|
|
373
|
+
)
|
|
374
|
+
set_active_session(device_id, session)
|
|
375
|
+
|
|
376
|
+
logger.info(f"Started iOS screen recording on {device_id}")
|
|
377
|
+
return VideoRecordingResult(
|
|
378
|
+
success=True,
|
|
379
|
+
message=f"Recording started (max {max_duration_seconds}s).",
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
except Exception as e:
|
|
383
|
+
logger.error(f"Failed to start iOS recording: {e}")
|
|
384
|
+
return VideoRecordingResult(
|
|
385
|
+
success=False,
|
|
386
|
+
message=f"Failed to start recording: {e}",
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
async def stop_video_recording(self) -> VideoRecordingResult:
|
|
390
|
+
"""Stop iOS recording and retrieve the video file."""
|
|
391
|
+
device_id = self.device_id
|
|
392
|
+
session = get_active_session(device_id)
|
|
393
|
+
|
|
394
|
+
if not session:
|
|
395
|
+
return VideoRecordingResult(
|
|
396
|
+
success=False,
|
|
397
|
+
message=f"No active recording for device {device_id}",
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
process = session.process
|
|
402
|
+
if process is not None:
|
|
403
|
+
try:
|
|
404
|
+
process.terminate()
|
|
405
|
+
await asyncio.wait_for(process.wait(), timeout=10.0)
|
|
406
|
+
except TimeoutError:
|
|
407
|
+
process.kill()
|
|
408
|
+
await process.wait()
|
|
409
|
+
|
|
410
|
+
await asyncio.sleep(VIDEO_READY_DELAY_SECONDS)
|
|
411
|
+
|
|
412
|
+
local_path = session.local_video_path
|
|
413
|
+
remove_active_session(device_id)
|
|
414
|
+
|
|
415
|
+
duration = time.time() - session.start_time
|
|
416
|
+
logger.info(f"Stopped iOS recording after {duration:.1f}s, saved to {local_path}")
|
|
417
|
+
|
|
418
|
+
if local_path and local_path.exists():
|
|
419
|
+
return VideoRecordingResult(
|
|
420
|
+
success=True,
|
|
421
|
+
message=f"Recording stopped after {duration:.1f}s",
|
|
422
|
+
video_path=local_path,
|
|
423
|
+
)
|
|
424
|
+
else:
|
|
425
|
+
return VideoRecordingResult(
|
|
426
|
+
success=False,
|
|
427
|
+
message="Recording stopped but video file not found",
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
except Exception as e:
|
|
431
|
+
logger.error(f"Failed to stop iOS recording: {e}")
|
|
432
|
+
remove_active_session(device_id)
|
|
433
|
+
return VideoRecordingResult(
|
|
434
|
+
success=False,
|
|
435
|
+
message=f"Failed to stop recording: {e}",
|
|
436
|
+
)
|