minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from concurrent.futures.thread import ThreadPoolExecutor
|
|
3
|
+
from datetime import date
|
|
4
|
+
from shutil import which
|
|
5
|
+
|
|
6
|
+
from adbutils import AdbDevice
|
|
7
|
+
|
|
8
|
+
from minitap.mobile_use.clients.ios_client import (
|
|
9
|
+
DeviceType,
|
|
10
|
+
get_all_ios_devices_detailed,
|
|
11
|
+
get_device_type,
|
|
12
|
+
)
|
|
13
|
+
from minitap.mobile_use.context import DevicePlatform, MobileUseContext
|
|
14
|
+
from minitap.mobile_use.utils.logger import MobileUseLogger, get_logger
|
|
15
|
+
from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
|
|
21
|
+
if ctx.device.mobile_platform != DevicePlatform.ANDROID:
|
|
22
|
+
raise ValueError("Device is not an Android device")
|
|
23
|
+
adb = ctx.get_adb_client()
|
|
24
|
+
device = adb.device(serial=ctx.device.device_id)
|
|
25
|
+
if not device:
|
|
26
|
+
raise ConnectionError(f"Device {ctx.device.device_id} not found.")
|
|
27
|
+
return device
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_first_device(
|
|
31
|
+
logger: MobileUseLogger | None = None,
|
|
32
|
+
prefer_physical: bool = True,
|
|
33
|
+
) -> tuple[str | None, DevicePlatform | None, DeviceType | None]:
|
|
34
|
+
"""Gets the first available device.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
logger: Optional logger for error messages
|
|
38
|
+
prefer_physical: If True, prefer physical iOS devices over simulators
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Tuple of (device_id, platform, device_type) or (None, None, None) if no device found.
|
|
42
|
+
device_type is only set for iOS devices (SIMULATOR or PHYSICAL).
|
|
43
|
+
"""
|
|
44
|
+
# Check for Android devices first
|
|
45
|
+
if which("adb"):
|
|
46
|
+
try:
|
|
47
|
+
android_output = run_shell_command_on_host("adb devices")
|
|
48
|
+
lines = android_output.strip().split("\n")
|
|
49
|
+
for line in lines:
|
|
50
|
+
if "device" in line and not line.startswith("List of devices"):
|
|
51
|
+
return line.split()[0], DevicePlatform.ANDROID, None
|
|
52
|
+
except RuntimeError as e:
|
|
53
|
+
if logger:
|
|
54
|
+
logger.error(f"ADB command failed: {e}")
|
|
55
|
+
|
|
56
|
+
# Check for iOS devices (both simulators and physical)
|
|
57
|
+
ios_devices = get_all_ios_devices_detailed()
|
|
58
|
+
if ios_devices:
|
|
59
|
+
if prefer_physical:
|
|
60
|
+
# Sort to prefer physical devices
|
|
61
|
+
ios_devices.sort(key=lambda d: d["type"] != DeviceType.PHYSICAL)
|
|
62
|
+
|
|
63
|
+
device = ios_devices[0]
|
|
64
|
+
if logger:
|
|
65
|
+
logger.info(
|
|
66
|
+
f"Selected iOS device: {device['name']} ({device['type'].value}) - {device['udid']}"
|
|
67
|
+
)
|
|
68
|
+
return device["udid"], DevicePlatform.IOS, device["type"]
|
|
69
|
+
|
|
70
|
+
return None, None, None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_device_date(ctx: MobileUseContext) -> str:
|
|
74
|
+
if ctx.device.mobile_platform == DevicePlatform.IOS:
|
|
75
|
+
return date.today().strftime("%a %b %d %H:%M:%S %Z %Y")
|
|
76
|
+
device = get_adb_device(ctx)
|
|
77
|
+
return str(device.shell("date"))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def list_packages(ctx: MobileUseContext) -> str:
|
|
81
|
+
if ctx.device.mobile_platform == DevicePlatform.IOS:
|
|
82
|
+
udid = ctx.device.device_id
|
|
83
|
+
device_type = get_device_type(udid)
|
|
84
|
+
|
|
85
|
+
if device_type == DeviceType.SIMULATOR:
|
|
86
|
+
cmd = ["xcrun", "simctl", "listapps", udid, "|", "grep", "CFBundleIdentifier"]
|
|
87
|
+
return run_shell_command_on_host(" ".join(cmd))
|
|
88
|
+
|
|
89
|
+
# Physical device: try ios-deploy first (common with React Native/Cordova)
|
|
90
|
+
if which("ios-deploy"):
|
|
91
|
+
cmd = ["ios-deploy", "--id", udid, "--list_bundle_id"]
|
|
92
|
+
try:
|
|
93
|
+
output = run_shell_command_on_host(" ".join(cmd))
|
|
94
|
+
packages = [line.strip() for line in output.strip().split("\n") if line.strip()]
|
|
95
|
+
return "\n".join(sorted(packages))
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.debug(f"ios-deploy failed: {e}")
|
|
98
|
+
|
|
99
|
+
# Fallback: ideviceinstaller (libimobiledevice)
|
|
100
|
+
if which("ideviceinstaller"):
|
|
101
|
+
cmd = ["ideviceinstaller", "-l", "-u", udid]
|
|
102
|
+
try:
|
|
103
|
+
output = run_shell_command_on_host(" ".join(cmd))
|
|
104
|
+
# Parse output: "CFBundleIdentifier, CFBundleVersion, CFBundleDisplayName"
|
|
105
|
+
lines = output.strip().split("\n")
|
|
106
|
+
packages = []
|
|
107
|
+
for line in lines:
|
|
108
|
+
if ", " in line:
|
|
109
|
+
bundle_id = line.split(", ")[0].strip()
|
|
110
|
+
if bundle_id and not bundle_id.startswith("CFBundle"):
|
|
111
|
+
packages.append(bundle_id)
|
|
112
|
+
return "\n".join(sorted(packages))
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.debug(f"ideviceinstaller failed: {e}")
|
|
115
|
+
|
|
116
|
+
logger.warning(
|
|
117
|
+
"Cannot list apps on physical iOS device. Install ios-deploy "
|
|
118
|
+
"(npm install -g ios-deploy) or ideviceinstaller (brew install ideviceinstaller)"
|
|
119
|
+
)
|
|
120
|
+
return ""
|
|
121
|
+
else:
|
|
122
|
+
device = get_adb_device(ctx)
|
|
123
|
+
# Get full package list with paths
|
|
124
|
+
cmd = ["pm", "list", "packages", "-f"]
|
|
125
|
+
raw_output = str(device.shell(" ".join(cmd)))
|
|
126
|
+
|
|
127
|
+
# Extract only package names (remove paths and "package:" prefix)
|
|
128
|
+
# Format: "package:/path/to/app.apk=com.example.app" -> "com.example.app"
|
|
129
|
+
lines = raw_output.strip().split("\n")
|
|
130
|
+
packages = []
|
|
131
|
+
for line in lines:
|
|
132
|
+
if "=" in line:
|
|
133
|
+
package_name = line.split("=")[-1].strip()
|
|
134
|
+
packages.append(package_name)
|
|
135
|
+
|
|
136
|
+
return "\n".join(sorted(packages))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_current_foreground_package(ctx: MobileUseContext) -> str | None:
|
|
140
|
+
"""
|
|
141
|
+
Get the package name of the currently focused/foreground app.
|
|
142
|
+
|
|
143
|
+
Returns only the clean package/bundle name (e.g., 'com.whatsapp'),
|
|
144
|
+
without any metadata or window information.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
The package/bundle name, or None if unable to determine
|
|
148
|
+
"""
|
|
149
|
+
try:
|
|
150
|
+
if ctx.device.mobile_platform == DevicePlatform.IOS:
|
|
151
|
+
return _get_ios_foreground_package(ctx)
|
|
152
|
+
|
|
153
|
+
device = get_adb_device(ctx)
|
|
154
|
+
output = str(device.shell("dumpsys window | grep mCurrentFocus"))
|
|
155
|
+
|
|
156
|
+
if "mCurrentFocus=" not in output:
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
segment = output.split("mCurrentFocus=")[-1]
|
|
160
|
+
|
|
161
|
+
if "/" in segment:
|
|
162
|
+
tokens = segment.split()
|
|
163
|
+
for token in tokens:
|
|
164
|
+
if "." in token and not token.startswith("Window"):
|
|
165
|
+
package = token.split("/")[0]
|
|
166
|
+
package = package.rstrip("}")
|
|
167
|
+
if package and "." in package:
|
|
168
|
+
return package
|
|
169
|
+
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.debug(f"Failed to get current foreground package: {e}")
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _get_ios_foreground_package(ctx: MobileUseContext) -> str | None:
|
|
178
|
+
"""Get foreground package for iOS devices (simulator or physical)."""
|
|
179
|
+
|
|
180
|
+
ios_client = ctx.ios_client
|
|
181
|
+
|
|
182
|
+
if not ios_client:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
# Handle both running and non-running event loops
|
|
187
|
+
try:
|
|
188
|
+
asyncio.get_running_loop()
|
|
189
|
+
# Already in async context - run in separate thread
|
|
190
|
+
with ThreadPoolExecutor() as pool:
|
|
191
|
+
app_info = pool.submit(asyncio.run, ios_client.app_current()).result(timeout=10)
|
|
192
|
+
except RuntimeError:
|
|
193
|
+
# No running loop - use asyncio.run()
|
|
194
|
+
app_info = asyncio.run(ios_client.app_current())
|
|
195
|
+
if app_info and app_info.bundle_id:
|
|
196
|
+
return app_info.bundle_id
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.debug(f"Failed to get foreground app: {e}")
|
|
199
|
+
return None
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TapOutput(BaseModel):
|
|
5
|
+
"""Output from tap operations."""
|
|
6
|
+
|
|
7
|
+
error: str | None = Field(default=None, description="Error message if tap failed")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Bounds(BaseModel):
|
|
11
|
+
"""Represents the bounds of a UI element."""
|
|
12
|
+
|
|
13
|
+
x1: int
|
|
14
|
+
y1: int
|
|
15
|
+
x2: int
|
|
16
|
+
y2: int
|
|
17
|
+
|
|
18
|
+
def get_center(self) -> "CoordinatesSelectorRequest":
|
|
19
|
+
"""Get the center point of the bounds."""
|
|
20
|
+
return CoordinatesSelectorRequest(
|
|
21
|
+
x=(self.x1 + self.x2) // 2,
|
|
22
|
+
y=(self.y1 + self.y2) // 2,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CoordinatesSelectorRequest(BaseModel):
|
|
27
|
+
model_config = ConfigDict(extra="forbid")
|
|
28
|
+
x: int
|
|
29
|
+
y: int
|
|
30
|
+
|
|
31
|
+
def to_str(self):
|
|
32
|
+
return f"{self.x}, {self.y}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PercentagesSelectorRequest(BaseModel):
|
|
36
|
+
model_config = ConfigDict(extra="forbid")
|
|
37
|
+
"""
|
|
38
|
+
0%,0% # top-left corner
|
|
39
|
+
100%,100% # bottom-right corner
|
|
40
|
+
50%,50% # center
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
x_percent: int = Field(ge=0, le=100, description="X percentage (0-100)")
|
|
44
|
+
y_percent: int = Field(ge=0, le=100, description="Y percentage (0-100)")
|
|
45
|
+
|
|
46
|
+
def to_str(self):
|
|
47
|
+
return f"{self.x_percent}%, {self.y_percent}%"
|
|
48
|
+
|
|
49
|
+
def to_coords(self, width: int, height: int) -> CoordinatesSelectorRequest:
|
|
50
|
+
"""Convert percentages to pixel coordinates."""
|
|
51
|
+
x = min(max(int(width * self.x_percent / 100), 0), max(0, width - 1))
|
|
52
|
+
y = min(max(int(height * self.y_percent / 100), 0), max(0, height - 1))
|
|
53
|
+
return CoordinatesSelectorRequest(x=x, y=y)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SwipeStartEndCoordinatesRequest(BaseModel):
|
|
57
|
+
model_config = ConfigDict(extra="forbid")
|
|
58
|
+
start: CoordinatesSelectorRequest
|
|
59
|
+
end: CoordinatesSelectorRequest
|
|
60
|
+
|
|
61
|
+
def to_dict(self):
|
|
62
|
+
return {"start": self.start.to_str(), "end": self.end.to_str()}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SwipeStartEndPercentagesRequest(BaseModel):
|
|
66
|
+
model_config = ConfigDict(extra="forbid")
|
|
67
|
+
start: PercentagesSelectorRequest
|
|
68
|
+
end: PercentagesSelectorRequest
|
|
69
|
+
|
|
70
|
+
def to_dict(self):
|
|
71
|
+
return {"start": self.start.to_str(), "end": self.end.to_str()}
|
|
72
|
+
|
|
73
|
+
def to_coords(self, width: int, height: int) -> SwipeStartEndCoordinatesRequest:
|
|
74
|
+
"""Convert percentage-based swipe to coordinate-based swipe."""
|
|
75
|
+
return SwipeStartEndCoordinatesRequest(
|
|
76
|
+
start=self.start.to_coords(width, height),
|
|
77
|
+
end=self.end.to_coords(width, height),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class SwipeRequest(BaseModel):
|
|
82
|
+
"""
|
|
83
|
+
Swipe from start to end position using coordinates or percentages.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
model_config = ConfigDict(extra="forbid")
|
|
87
|
+
swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest = Field(
|
|
88
|
+
description="Start and end positions. Use EITHER (x, y) OR (x_percent, y_percent)."
|
|
89
|
+
)
|
|
90
|
+
duration: int | None = Field(
|
|
91
|
+
default=None,
|
|
92
|
+
description="Swipe duration in ms. If not provided, tool functions default to 400ms.",
|
|
93
|
+
ge=1,
|
|
94
|
+
le=10000,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def to_dict(self):
|
|
98
|
+
res = {}
|
|
99
|
+
if isinstance(
|
|
100
|
+
self.swipe_mode,
|
|
101
|
+
SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest,
|
|
102
|
+
):
|
|
103
|
+
res |= self.swipe_mode.to_dict()
|
|
104
|
+
if self.duration:
|
|
105
|
+
res |= {"duration": self.duration}
|
|
106
|
+
return res
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
2
|
+
from minitap.mobile_use.controllers.controller_factory import get_controller
|
|
3
|
+
from minitap.mobile_use.controllers.device_controller import MobileDeviceController
|
|
4
|
+
from minitap.mobile_use.controllers.types import (
|
|
5
|
+
CoordinatesSelectorRequest,
|
|
6
|
+
PercentagesSelectorRequest,
|
|
7
|
+
SwipeRequest,
|
|
8
|
+
SwipeStartEndCoordinatesRequest,
|
|
9
|
+
SwipeStartEndPercentagesRequest,
|
|
10
|
+
TapOutput,
|
|
11
|
+
)
|
|
12
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class UnifiedMobileController:
|
|
18
|
+
def __init__(self, ctx: MobileUseContext):
|
|
19
|
+
self.ctx = ctx
|
|
20
|
+
self._controller: MobileDeviceController = get_controller(ctx)
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def controller(self) -> MobileDeviceController:
|
|
24
|
+
return self._controller
|
|
25
|
+
|
|
26
|
+
async def tap_at(
|
|
27
|
+
self,
|
|
28
|
+
x: int,
|
|
29
|
+
y: int,
|
|
30
|
+
long_press: bool = False,
|
|
31
|
+
long_press_duration: int = 1000,
|
|
32
|
+
) -> TapOutput:
|
|
33
|
+
coords = CoordinatesSelectorRequest(x=x, y=y)
|
|
34
|
+
return await self._controller.tap(coords, long_press, long_press_duration)
|
|
35
|
+
|
|
36
|
+
async def tap_percentage(
|
|
37
|
+
self,
|
|
38
|
+
x_percent: int,
|
|
39
|
+
y_percent: int,
|
|
40
|
+
long_press: bool = False,
|
|
41
|
+
long_press_duration: int = 1000,
|
|
42
|
+
) -> TapOutput:
|
|
43
|
+
"""Tap at percentage-based coordinates (0 to 100)."""
|
|
44
|
+
coords = PercentagesSelectorRequest(x_percent=x_percent, y_percent=y_percent).to_coords(
|
|
45
|
+
width=self.ctx.device.device_width,
|
|
46
|
+
height=self.ctx.device.device_height,
|
|
47
|
+
)
|
|
48
|
+
return await self._controller.tap(coords, long_press, long_press_duration)
|
|
49
|
+
|
|
50
|
+
async def tap_element(
|
|
51
|
+
self,
|
|
52
|
+
resource_id: str | None = None,
|
|
53
|
+
text: str | None = None,
|
|
54
|
+
index: int = 0,
|
|
55
|
+
long_press: bool = False,
|
|
56
|
+
long_press_duration: int = 1000,
|
|
57
|
+
) -> TapOutput:
|
|
58
|
+
"""
|
|
59
|
+
Tap on a UI element by finding it in the hierarchy.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
resource_id: Android resource ID or iOS element type
|
|
63
|
+
text: Element text/label/value to match
|
|
64
|
+
index: Which match to tap if multiple elements match
|
|
65
|
+
long_press: Whether to perform long press
|
|
66
|
+
long_press_duration: Duration of long press in milliseconds
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
TapOutput with error field set on failure
|
|
70
|
+
"""
|
|
71
|
+
# Get UI hierarchy
|
|
72
|
+
ui_hierarchy = await self._controller.get_ui_hierarchy()
|
|
73
|
+
|
|
74
|
+
# Find element
|
|
75
|
+
element, bounds, error = self._controller.find_element(
|
|
76
|
+
ui_hierarchy=ui_hierarchy,
|
|
77
|
+
resource_id=resource_id,
|
|
78
|
+
text=text,
|
|
79
|
+
index=index,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if error or not bounds:
|
|
83
|
+
return TapOutput(error=error or "Could not extract bounds for element")
|
|
84
|
+
|
|
85
|
+
# Tap at element center
|
|
86
|
+
center = bounds.get_center()
|
|
87
|
+
return await self._controller.tap(center, long_press, long_press_duration)
|
|
88
|
+
|
|
89
|
+
async def swipe_coords(
|
|
90
|
+
self,
|
|
91
|
+
start_x: int,
|
|
92
|
+
start_y: int,
|
|
93
|
+
end_x: int,
|
|
94
|
+
end_y: int,
|
|
95
|
+
duration: int = 400,
|
|
96
|
+
) -> str | None:
|
|
97
|
+
"""Swipe between two coordinate points."""
|
|
98
|
+
start = CoordinatesSelectorRequest(x=start_x, y=start_y)
|
|
99
|
+
end = CoordinatesSelectorRequest(x=end_x, y=end_y)
|
|
100
|
+
return await self._controller.swipe(start, end, duration)
|
|
101
|
+
|
|
102
|
+
async def swipe_percentage(
|
|
103
|
+
self,
|
|
104
|
+
start_x_percent: int,
|
|
105
|
+
start_y_percent: int,
|
|
106
|
+
end_x_percent: int,
|
|
107
|
+
end_y_percent: int,
|
|
108
|
+
duration: int = 400,
|
|
109
|
+
) -> str | None:
|
|
110
|
+
"""Swipe using percentage-based coordinates (0 to 100)."""
|
|
111
|
+
start = PercentagesSelectorRequest(
|
|
112
|
+
x_percent=start_x_percent, y_percent=start_y_percent
|
|
113
|
+
).to_coords(
|
|
114
|
+
width=self.ctx.device.device_width,
|
|
115
|
+
height=self.ctx.device.device_height,
|
|
116
|
+
)
|
|
117
|
+
end = PercentagesSelectorRequest(
|
|
118
|
+
x_percent=end_x_percent, y_percent=end_y_percent
|
|
119
|
+
).to_coords(
|
|
120
|
+
width=self.ctx.device.device_width,
|
|
121
|
+
height=self.ctx.device.device_height,
|
|
122
|
+
)
|
|
123
|
+
return await self._controller.swipe(start, end, duration)
|
|
124
|
+
|
|
125
|
+
async def swipe_request(self, request: SwipeRequest) -> str | None:
|
|
126
|
+
mode = request.swipe_mode
|
|
127
|
+
|
|
128
|
+
if isinstance(mode, SwipeStartEndCoordinatesRequest):
|
|
129
|
+
return await self._controller.swipe(
|
|
130
|
+
start=mode.start,
|
|
131
|
+
end=mode.end,
|
|
132
|
+
duration=request.duration or 400,
|
|
133
|
+
)
|
|
134
|
+
elif isinstance(mode, SwipeStartEndPercentagesRequest):
|
|
135
|
+
coords = mode.to_coords(
|
|
136
|
+
width=self.ctx.device.device_width,
|
|
137
|
+
height=self.ctx.device.device_height,
|
|
138
|
+
)
|
|
139
|
+
return await self._controller.swipe(
|
|
140
|
+
start=coords.start,
|
|
141
|
+
end=coords.end,
|
|
142
|
+
duration=request.duration or 400,
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
return "Unsupported swipe mode"
|
|
146
|
+
|
|
147
|
+
async def type_text(self, text: str) -> bool:
|
|
148
|
+
return await self._controller.input_text(text)
|
|
149
|
+
|
|
150
|
+
async def take_screenshot(self) -> str:
|
|
151
|
+
return await self._controller.screenshot()
|
|
152
|
+
|
|
153
|
+
async def launch_app(self, package_or_bundle_id: str) -> bool:
|
|
154
|
+
return await self._controller.launch_app(package_or_bundle_id)
|
|
155
|
+
|
|
156
|
+
async def terminate_app(self, package_or_bundle_id: str | None) -> bool:
|
|
157
|
+
return await self._controller.terminate_app(package_or_bundle_id)
|
|
158
|
+
|
|
159
|
+
async def open_url(self, url: str) -> bool:
|
|
160
|
+
return await self._controller.open_url(url)
|
|
161
|
+
|
|
162
|
+
async def go_back(self) -> bool:
|
|
163
|
+
return await self._controller.press_back()
|
|
164
|
+
|
|
165
|
+
async def go_home(self) -> bool:
|
|
166
|
+
return await self._controller.press_home()
|
|
167
|
+
|
|
168
|
+
async def press_enter(self) -> bool:
|
|
169
|
+
return await self._controller.press_enter()
|
|
170
|
+
|
|
171
|
+
async def erase_text(self, nb_chars: int | None = None) -> bool:
|
|
172
|
+
return await self._controller.erase_text(nb_chars)
|
|
173
|
+
|
|
174
|
+
async def get_ui_elements(self) -> list[dict]:
|
|
175
|
+
return await self._controller.get_ui_hierarchy()
|
|
176
|
+
|
|
177
|
+
async def find_element(
|
|
178
|
+
self,
|
|
179
|
+
resource_id: str | None = None,
|
|
180
|
+
text: str | None = None,
|
|
181
|
+
index: int = 0,
|
|
182
|
+
) -> tuple[dict | None, str | None]:
|
|
183
|
+
ui_hierarchy = await self._controller.get_ui_hierarchy()
|
|
184
|
+
element, bounds, error = self._controller.find_element(
|
|
185
|
+
ui_hierarchy=ui_hierarchy,
|
|
186
|
+
resource_id=resource_id,
|
|
187
|
+
text=text,
|
|
188
|
+
index=index,
|
|
189
|
+
)
|
|
190
|
+
return element, error
|
|
191
|
+
|
|
192
|
+
async def cleanup(self) -> None:
|
|
193
|
+
await self._controller.cleanup()
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
from langchain_core.messages import AIMessage
|
|
5
|
+
from langgraph.constants import END, START
|
|
6
|
+
from langgraph.graph import StateGraph
|
|
7
|
+
from langgraph.graph.state import CompiledStateGraph
|
|
8
|
+
|
|
9
|
+
from minitap.mobile_use.agents.contextor.contextor import ContextorNode
|
|
10
|
+
from minitap.mobile_use.agents.cortex.cortex import CortexNode
|
|
11
|
+
from minitap.mobile_use.agents.executor.executor import ExecutorNode
|
|
12
|
+
from minitap.mobile_use.agents.executor.tool_node import ExecutorToolNode
|
|
13
|
+
from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
|
|
14
|
+
from minitap.mobile_use.agents.planner.planner import PlannerNode
|
|
15
|
+
from minitap.mobile_use.agents.planner.utils import (
|
|
16
|
+
all_completed,
|
|
17
|
+
get_current_subgoal,
|
|
18
|
+
one_of_them_is_failure,
|
|
19
|
+
)
|
|
20
|
+
from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
|
|
21
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
22
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
23
|
+
from minitap.mobile_use.graph.state import State
|
|
24
|
+
from minitap.mobile_use.tools.index import (
|
|
25
|
+
EXECUTOR_WRAPPERS_TOOLS,
|
|
26
|
+
VIDEO_RECORDING_WRAPPERS,
|
|
27
|
+
get_tools_from_wrappers,
|
|
28
|
+
)
|
|
29
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
30
|
+
|
|
31
|
+
logger = get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def convergence_node(state: State):
|
|
35
|
+
"""Convergence point for parallel execution paths."""
|
|
36
|
+
return {}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def convergence_gate(
|
|
40
|
+
state: State,
|
|
41
|
+
) -> Literal["continue", "replan", "end"]:
|
|
42
|
+
"""Check if all subgoals are completed at convergence point."""
|
|
43
|
+
logger.info("Starting convergence_gate")
|
|
44
|
+
|
|
45
|
+
if one_of_them_is_failure(state.subgoal_plan):
|
|
46
|
+
logger.info("One of the subgoals is in failure state, asking to replan")
|
|
47
|
+
return "replan"
|
|
48
|
+
|
|
49
|
+
if all_completed(state.subgoal_plan):
|
|
50
|
+
logger.info("All subgoals are completed, ending the goal")
|
|
51
|
+
return "end"
|
|
52
|
+
|
|
53
|
+
if not get_current_subgoal(state.subgoal_plan):
|
|
54
|
+
logger.info("No subgoal running, ending the goal")
|
|
55
|
+
return "end"
|
|
56
|
+
|
|
57
|
+
return "continue"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def post_cortex_gate(
|
|
61
|
+
state: State,
|
|
62
|
+
) -> Sequence[str]:
|
|
63
|
+
logger.info("Starting post_cortex_gate")
|
|
64
|
+
node_sequence = []
|
|
65
|
+
|
|
66
|
+
if len(state.complete_subgoals_by_ids) > 0 or not state.structured_decisions:
|
|
67
|
+
# If subgoals need to be marked as complete, add the path to the orchestrator.
|
|
68
|
+
# The 'or not state.structured_decisions' ensures we don't get stuck if Cortex does nothing.
|
|
69
|
+
node_sequence.append("review_subgoals")
|
|
70
|
+
|
|
71
|
+
if state.structured_decisions:
|
|
72
|
+
node_sequence.append("execute_decisions")
|
|
73
|
+
|
|
74
|
+
return node_sequence
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def post_executor_gate(
|
|
78
|
+
state: State,
|
|
79
|
+
) -> Literal["invoke_tools", "skip"]:
|
|
80
|
+
logger.info("Starting post_executor_gate")
|
|
81
|
+
messages = state.executor_messages
|
|
82
|
+
if not messages:
|
|
83
|
+
return "skip"
|
|
84
|
+
last_message = messages[-1]
|
|
85
|
+
|
|
86
|
+
if isinstance(last_message, AIMessage):
|
|
87
|
+
tool_calls = getattr(last_message, "tool_calls", None)
|
|
88
|
+
if tool_calls and len(tool_calls) > 0:
|
|
89
|
+
logger.info("[executor] Executing " + str(len(tool_calls)) + " tool calls:")
|
|
90
|
+
for tool_call in tool_calls:
|
|
91
|
+
logger.info("-------------")
|
|
92
|
+
logger.info("[executor] - " + str(tool_call) + "\n")
|
|
93
|
+
logger.info("-------------")
|
|
94
|
+
return "invoke_tools"
|
|
95
|
+
else:
|
|
96
|
+
logger.info("[executor] ❌ No tool calls found")
|
|
97
|
+
return "skip"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
101
|
+
graph_builder = StateGraph(State)
|
|
102
|
+
|
|
103
|
+
## Define nodes
|
|
104
|
+
graph_builder.add_node("planner", PlannerNode(ctx))
|
|
105
|
+
graph_builder.add_node("orchestrator", OrchestratorNode(ctx))
|
|
106
|
+
|
|
107
|
+
graph_builder.add_node("contextor", ContextorNode(ctx))
|
|
108
|
+
|
|
109
|
+
graph_builder.add_node("cortex", CortexNode(ctx))
|
|
110
|
+
|
|
111
|
+
graph_builder.add_node("executor", ExecutorNode(ctx))
|
|
112
|
+
|
|
113
|
+
executor_wrappers = list(EXECUTOR_WRAPPERS_TOOLS)
|
|
114
|
+
if ctx.video_recording_enabled:
|
|
115
|
+
executor_wrappers.extend(VIDEO_RECORDING_WRAPPERS)
|
|
116
|
+
|
|
117
|
+
executor_tool_node = ExecutorToolNode(
|
|
118
|
+
tools=get_tools_from_wrappers(ctx=ctx, wrappers=executor_wrappers),
|
|
119
|
+
messages_key=EXECUTOR_MESSAGES_KEY,
|
|
120
|
+
trace_id=ctx.trace_id,
|
|
121
|
+
)
|
|
122
|
+
graph_builder.add_node("executor_tools", executor_tool_node)
|
|
123
|
+
|
|
124
|
+
graph_builder.add_node("summarizer", SummarizerNode(ctx))
|
|
125
|
+
|
|
126
|
+
graph_builder.add_node(node="convergence", action=convergence_node, defer=True)
|
|
127
|
+
|
|
128
|
+
## Linking nodes
|
|
129
|
+
graph_builder.add_edge(START, "planner")
|
|
130
|
+
graph_builder.add_edge("planner", "orchestrator")
|
|
131
|
+
graph_builder.add_edge("orchestrator", "convergence")
|
|
132
|
+
graph_builder.add_edge("contextor", "cortex")
|
|
133
|
+
graph_builder.add_conditional_edges(
|
|
134
|
+
"cortex",
|
|
135
|
+
post_cortex_gate,
|
|
136
|
+
{
|
|
137
|
+
"review_subgoals": "orchestrator",
|
|
138
|
+
"execute_decisions": "executor",
|
|
139
|
+
},
|
|
140
|
+
)
|
|
141
|
+
graph_builder.add_conditional_edges(
|
|
142
|
+
"executor",
|
|
143
|
+
post_executor_gate,
|
|
144
|
+
{"invoke_tools": "executor_tools", "skip": "summarizer"},
|
|
145
|
+
)
|
|
146
|
+
graph_builder.add_edge("executor_tools", "summarizer")
|
|
147
|
+
|
|
148
|
+
graph_builder.add_edge("summarizer", "convergence")
|
|
149
|
+
|
|
150
|
+
graph_builder.add_conditional_edges(
|
|
151
|
+
source="convergence",
|
|
152
|
+
path=convergence_gate,
|
|
153
|
+
path_map={
|
|
154
|
+
"continue": "contextor",
|
|
155
|
+
"replan": "planner",
|
|
156
|
+
"end": END,
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return graph_builder.compile()
|