minitap-mobile-use 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.py +42 -0
- minitap/mobile_use/agents/cortex/cortex.md +93 -0
- minitap/mobile_use/agents/cortex/cortex.py +107 -0
- minitap/mobile_use/agents/cortex/types.py +11 -0
- minitap/mobile_use/agents/executor/executor.md +73 -0
- minitap/mobile_use/agents/executor/executor.py +84 -0
- minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
- minitap/mobile_use/agents/executor/utils.py +11 -0
- minitap/mobile_use/agents/hopper/hopper.md +13 -0
- minitap/mobile_use/agents/hopper/hopper.py +45 -0
- minitap/mobile_use/agents/orchestrator/human.md +13 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
- minitap/mobile_use/agents/orchestrator/types.py +14 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +75 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
- minitap/mobile_use/agents/planner/human.md +12 -0
- minitap/mobile_use/agents/planner/planner.md +64 -0
- minitap/mobile_use/agents/planner/planner.py +64 -0
- minitap/mobile_use/agents/planner/types.py +44 -0
- minitap/mobile_use/agents/planner/utils.py +45 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
- minitap/mobile_use/clients/device_hardware_client.py +23 -0
- minitap/mobile_use/clients/ios_client.py +44 -0
- minitap/mobile_use/clients/screen_api_client.py +53 -0
- minitap/mobile_use/config.py +285 -0
- minitap/mobile_use/constants.py +2 -0
- minitap/mobile_use/context.py +65 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
- minitap/mobile_use/graph/graph.py +149 -0
- minitap/mobile_use/graph/state.py +73 -0
- minitap/mobile_use/main.py +122 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +524 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
- minitap/mobile_use/sdk/constants.py +14 -0
- minitap/mobile_use/sdk/examples/README.md +45 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
- minitap/mobile_use/sdk/types/__init__.py +49 -0
- minitap/mobile_use/sdk/types/agent.py +73 -0
- minitap/mobile_use/sdk/types/exceptions.py +74 -0
- minitap/mobile_use/sdk/types/task.py +191 -0
- minitap/mobile_use/sdk/utils.py +28 -0
- minitap/mobile_use/servers/config.py +19 -0
- minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
- minitap/mobile_use/servers/device_screen_api.py +143 -0
- minitap/mobile_use/servers/start_servers.py +151 -0
- minitap/mobile_use/servers/stop_servers.py +215 -0
- minitap/mobile_use/servers/utils.py +11 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +143 -0
- minitap/mobile_use/tools/index.py +54 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
- minitap/mobile_use/tools/mobile/erase_text.py +124 -0
- minitap/mobile_use/tools/mobile/input_text.py +74 -0
- minitap/mobile_use/tools/mobile/launch_app.py +59 -0
- minitap/mobile_use/tools/mobile/list_packages.py +78 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
- minitap/mobile_use/tools/mobile/open_link.py +59 -0
- minitap/mobile_use/tools/mobile/paste_text.py +66 -0
- minitap/mobile_use/tools/mobile/press_key.py +58 -0
- minitap/mobile_use/tools/mobile/run_flow.py +57 -0
- minitap/mobile_use/tools/mobile/stop_app.py +58 -0
- minitap/mobile_use/tools/mobile/swipe.py +56 -0
- minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
- minitap/mobile_use/tools/mobile/tap.py +66 -0
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
- minitap/mobile_use/tools/tool_wrapper.py +33 -0
- minitap/mobile_use/utils/cli_helpers.py +40 -0
- minitap/mobile_use/utils/cli_selection.py +144 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +123 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +184 -0
- minitap/mobile_use/utils/media.py +73 -0
- minitap/mobile_use/utils/recorder.py +55 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +30 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/METADATA +274 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +95 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/WHEEL +4 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import time
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import psutil
|
|
6
|
+
import requests
|
|
7
|
+
from minitap.mobile_use.servers.config import server_settings
|
|
8
|
+
from minitap.mobile_use.servers.device_hardware_bridge import DEVICE_HARDWARE_BRIDGE_PORT
|
|
9
|
+
from minitap.mobile_use.utils.logger import get_server_logger
|
|
10
|
+
|
|
11
|
+
logger = get_server_logger()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def find_processes_by_name(name: str) -> List[psutil.Process]:
|
|
15
|
+
"""Find all processes with the given name."""
|
|
16
|
+
processes = []
|
|
17
|
+
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
|
|
18
|
+
try:
|
|
19
|
+
if name.lower() in proc.info["name"].lower():
|
|
20
|
+
processes.append(proc)
|
|
21
|
+
elif proc.info["cmdline"] and any(name in cmd for cmd in proc.info["cmdline"]):
|
|
22
|
+
processes.append(proc)
|
|
23
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
24
|
+
continue
|
|
25
|
+
return processes
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def find_processes_by_port(port: int) -> List[psutil.Process]:
|
|
29
|
+
processes = []
|
|
30
|
+
for proc in psutil.process_iter(["pid", "name"]):
|
|
31
|
+
try:
|
|
32
|
+
for conn in proc.net_connections():
|
|
33
|
+
if conn.laddr.port == port and conn.status == psutil.CONN_LISTEN:
|
|
34
|
+
processes.append(proc)
|
|
35
|
+
break
|
|
36
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
37
|
+
continue
|
|
38
|
+
return processes
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
|
|
42
|
+
try:
|
|
43
|
+
if not process.is_running():
|
|
44
|
+
logger.success(f"Process {process.pid} ({process.name()}) already terminated")
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
logger.info(f"Stopping process {process.pid} ({process.name()})")
|
|
48
|
+
|
|
49
|
+
process.terminate()
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
process.wait(timeout=timeout)
|
|
53
|
+
logger.success(f"Process {process.pid} terminated gracefully")
|
|
54
|
+
return True
|
|
55
|
+
except psutil.TimeoutExpired:
|
|
56
|
+
logger.warning(f"Process {process.pid} didn't terminate gracefully, force killing...")
|
|
57
|
+
try:
|
|
58
|
+
process.kill()
|
|
59
|
+
process.wait(timeout=2)
|
|
60
|
+
logger.success(f"Process {process.pid} force killed")
|
|
61
|
+
return True
|
|
62
|
+
except psutil.NoSuchProcess:
|
|
63
|
+
logger.success(f"Process {process.pid} already terminated during force kill")
|
|
64
|
+
return True
|
|
65
|
+
|
|
66
|
+
except psutil.NoSuchProcess:
|
|
67
|
+
logger.success(f"Process {process.pid} no longer exists (already terminated)")
|
|
68
|
+
return True
|
|
69
|
+
except (psutil.AccessDenied, psutil.ZombieProcess) as e:
|
|
70
|
+
logger.warning(f"Cannot stop process {process.pid}: {e}")
|
|
71
|
+
return False
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error(f"Unexpected error stopping process {process.pid}: {e}")
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def check_service_health(port: int, service_name: str) -> bool:
|
|
78
|
+
try:
|
|
79
|
+
if port == server_settings.DEVICE_SCREEN_API_PORT:
|
|
80
|
+
response = requests.get(f"http://localhost:{port}/health", timeout=2)
|
|
81
|
+
elif port == DEVICE_HARDWARE_BRIDGE_PORT:
|
|
82
|
+
response = requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
|
|
83
|
+
else:
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
if response.status_code == 200:
|
|
87
|
+
logger.warning(f"{service_name} is still responding on port {port}")
|
|
88
|
+
return True
|
|
89
|
+
except requests.exceptions.RequestException:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def stop_device_screen_api() -> bool:
|
|
96
|
+
logger.info("Stopping Device Screen API...")
|
|
97
|
+
api_port = server_settings.DEVICE_SCREEN_API_PORT
|
|
98
|
+
|
|
99
|
+
if not check_service_health(api_port, "Device Screen API"):
|
|
100
|
+
logger.success("Device Screen API is not running")
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
# Find processes by port
|
|
104
|
+
processes = find_processes_by_port(api_port)
|
|
105
|
+
|
|
106
|
+
# Also find by process name/command
|
|
107
|
+
uvicorn_processes = find_processes_by_name("uvicorn")
|
|
108
|
+
python_processes = find_processes_by_name("device_screen_api.py")
|
|
109
|
+
|
|
110
|
+
all_processes = list(set(processes + uvicorn_processes + python_processes))
|
|
111
|
+
|
|
112
|
+
if not all_processes:
|
|
113
|
+
logger.warning("No Device Screen API processes found, but service is still responding")
|
|
114
|
+
# Still try to verify if service actually stops
|
|
115
|
+
time.sleep(1)
|
|
116
|
+
if not check_service_health(api_port, "Device Screen API"):
|
|
117
|
+
logger.success("Device Screen API stopped successfully (was orphaned)")
|
|
118
|
+
return True
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
# Stop all processes
|
|
122
|
+
for proc in all_processes:
|
|
123
|
+
stop_process_gracefully(proc)
|
|
124
|
+
|
|
125
|
+
# Verify service is stopped
|
|
126
|
+
time.sleep(1)
|
|
127
|
+
if check_service_health(api_port, "Device Screen API"):
|
|
128
|
+
logger.error("Device Screen API is still running after stop attempt")
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
logger.success("Device Screen API stopped successfully")
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def stop_device_hardware_bridge() -> bool:
|
|
136
|
+
logger.info("Stopping Device Hardware Bridge...")
|
|
137
|
+
|
|
138
|
+
if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
|
|
139
|
+
logger.success("Device Hardware Bridge is not running")
|
|
140
|
+
return True
|
|
141
|
+
|
|
142
|
+
processes = find_processes_by_port(DEVICE_HARDWARE_BRIDGE_PORT)
|
|
143
|
+
|
|
144
|
+
maestro_processes = find_processes_by_name("maestro")
|
|
145
|
+
|
|
146
|
+
all_processes = list(set(processes + maestro_processes))
|
|
147
|
+
|
|
148
|
+
if not all_processes:
|
|
149
|
+
logger.warning("No Device Hardware Bridge processes found, but service is still responding")
|
|
150
|
+
# Still try to verify if service actually stops
|
|
151
|
+
time.sleep(1)
|
|
152
|
+
if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
|
|
153
|
+
logger.success("Device Hardware Bridge stopped successfully (was orphaned)")
|
|
154
|
+
return True
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
for proc in all_processes:
|
|
158
|
+
stop_process_gracefully(proc)
|
|
159
|
+
|
|
160
|
+
time.sleep(1)
|
|
161
|
+
if check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
|
|
162
|
+
logger.error("Device Hardware Bridge is still running after stop attempt")
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
logger.success("Device Hardware Bridge stopped successfully")
|
|
166
|
+
return True
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def stop_servers(
|
|
170
|
+
device_screen_api: bool = False, device_hardware_bridge: bool = False
|
|
171
|
+
) -> tuple[bool, bool]:
|
|
172
|
+
"""Stop all servers and return (api_success, bridge_success).
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
device_screen_api: If True, stop the Device Screen API
|
|
176
|
+
device_hardware_bridge: If True, stop the Device Hardware Bridge
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Tuple of (api_stopped, bridge_stopped) booleans
|
|
180
|
+
"""
|
|
181
|
+
api_success = stop_device_screen_api() if device_screen_api else True
|
|
182
|
+
bridge_success = stop_device_hardware_bridge() if device_hardware_bridge else True
|
|
183
|
+
|
|
184
|
+
if api_success and bridge_success:
|
|
185
|
+
logger.success("All servers stopped successfully")
|
|
186
|
+
elif api_success:
|
|
187
|
+
logger.warning("Device Screen API stopped, but Device Hardware Bridge had issues")
|
|
188
|
+
elif bridge_success:
|
|
189
|
+
logger.warning("Device Hardware Bridge stopped, but Device Screen API had issues")
|
|
190
|
+
else:
|
|
191
|
+
logger.error("Failed to stop both servers")
|
|
192
|
+
|
|
193
|
+
return api_success, bridge_success
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def main():
|
|
197
|
+
"""Main function to stop all servers."""
|
|
198
|
+
api_success, bridge_success = stop_servers(device_screen_api=True, device_hardware_bridge=True)
|
|
199
|
+
if api_success and bridge_success:
|
|
200
|
+
return 0
|
|
201
|
+
elif api_success or bridge_success:
|
|
202
|
+
return 1
|
|
203
|
+
else:
|
|
204
|
+
return 2
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
if __name__ == "__main__":
|
|
208
|
+
try:
|
|
209
|
+
sys.exit(main())
|
|
210
|
+
except KeyboardInterrupt:
|
|
211
|
+
logger.warning("\nStop operation interrupted by user")
|
|
212
|
+
sys.exit(130)
|
|
213
|
+
except Exception as e:
|
|
214
|
+
logger.error(f"Unexpected error: {e}")
|
|
215
|
+
sys.exit(1)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import psutil
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def is_port_in_use(port: int):
|
|
5
|
+
for conn in psutil.net_connections():
|
|
6
|
+
if conn.status == psutil.CONN_LISTEN and conn.laddr:
|
|
7
|
+
if hasattr(conn.laddr, "port") and conn.laddr.port == port:
|
|
8
|
+
return True
|
|
9
|
+
elif isinstance(conn.laddr, tuple) and len(conn.laddr) >= 2 and conn.laddr[1] == port:
|
|
10
|
+
return True
|
|
11
|
+
return False
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
# Set up basic logging
|
|
6
|
+
logging.basicConfig(level=logging.INFO)
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def run_subprocess(command: str) -> tuple[str, str]:
|
|
11
|
+
"""
|
|
12
|
+
Executes a shell command in a subprocess.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
command: The command to execute.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
A tuple containing the stdout and stderr of the command.
|
|
19
|
+
"""
|
|
20
|
+
process = await asyncio.create_subprocess_shell(
|
|
21
|
+
command,
|
|
22
|
+
stdout=asyncio.subprocess.PIPE,
|
|
23
|
+
stderr=asyncio.subprocess.PIPE,
|
|
24
|
+
)
|
|
25
|
+
stdout, stderr = await process.communicate()
|
|
26
|
+
return stdout.decode(errors="ignore"), stderr.decode(errors="ignore")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def get_accessibility_tree(device_id: str | None = None) -> str:
|
|
30
|
+
"""
|
|
31
|
+
Retrieves the UI accessibility tree from an Android device as an XML string.
|
|
32
|
+
|
|
33
|
+
This function uses `uiautomator` to dump the current UI hierarchy.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
device_id: The optional ID of the target device. If not provided,
|
|
37
|
+
the command will run on the only connected device.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The UI hierarchy as an XML string.
|
|
41
|
+
Returns an empty string if the command fails.
|
|
42
|
+
"""
|
|
43
|
+
adb_command = "adb"
|
|
44
|
+
if device_id:
|
|
45
|
+
adb_command = f"adb -s {device_id}"
|
|
46
|
+
|
|
47
|
+
# The '/dev/tty' trick is used to get the raw XML output directly.
|
|
48
|
+
# On some devices, '/dev/null' or a temporary file might be needed.
|
|
49
|
+
command = f"{adb_command} shell uiautomator dump /dev/tty"
|
|
50
|
+
|
|
51
|
+
logger.info(f"Executing command: {command}")
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
stdout, stderr = await run_subprocess(command)
|
|
55
|
+
|
|
56
|
+
if "UI hierchary dumped to" in stderr: # Mispelling is in the original tool
|
|
57
|
+
# The XML is often in stdout, but sometimes mixed with stderr
|
|
58
|
+
# We'll clean it up to ensure we only get the XML part.
|
|
59
|
+
xml_output = re.sub(r"UI hierchary dumped to.*", "", stderr, flags=re.DOTALL).strip()
|
|
60
|
+
if not xml_output.startswith("<?xml"):
|
|
61
|
+
xml_output = stdout
|
|
62
|
+
|
|
63
|
+
# Clean up potential non-XML text at the beginning
|
|
64
|
+
xml_start_index = xml_output.find("<?xml")
|
|
65
|
+
if xml_start_index != -1:
|
|
66
|
+
return xml_output[xml_start_index:].strip()
|
|
67
|
+
else:
|
|
68
|
+
logger.error("Could not find XML content in the output.")
|
|
69
|
+
return ""
|
|
70
|
+
|
|
71
|
+
elif "ERROR" in stderr:
|
|
72
|
+
logger.error(f"Failed to get accessibility tree: {stderr.strip()}")
|
|
73
|
+
return ""
|
|
74
|
+
|
|
75
|
+
return stdout.strip()
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"An exception occurred while getting the accessibility tree: {e}")
|
|
79
|
+
return ""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Example of how to run this function
|
|
83
|
+
async def main():
|
|
84
|
+
print("Attempting to retrieve accessibility tree from the connected device...")
|
|
85
|
+
# You can specify a device_id like "emulator-5554" if you have multiple devices
|
|
86
|
+
accessibility_tree = await get_accessibility_tree()
|
|
87
|
+
|
|
88
|
+
if accessibility_tree:
|
|
89
|
+
print("\n--- Accessibility Tree XML ---")
|
|
90
|
+
print(accessibility_tree)
|
|
91
|
+
print("\n----------------------------")
|
|
92
|
+
else:
|
|
93
|
+
print("\nFailed to retrieve the accessibility tree.")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
# To run this example, save it as a Python file (e.g., `get_tree.py`)
|
|
98
|
+
# and run `python get_tree.py` in your terminal.
|
|
99
|
+
# Make sure you have an Android device connected with ADB enabled.
|
|
100
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Awaitable, Callable, Literal, TypeVar
|
|
3
|
+
from typing_extensions import overload
|
|
4
|
+
|
|
5
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
6
|
+
from langchain_openai import ChatOpenAI
|
|
7
|
+
from minitap.mobile_use.config import (
|
|
8
|
+
AgentNode,
|
|
9
|
+
AgentNodeWithFallback,
|
|
10
|
+
LLMUtilsNode,
|
|
11
|
+
LLMWithFallback,
|
|
12
|
+
settings,
|
|
13
|
+
)
|
|
14
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_google_llm(
|
|
20
|
+
model_name: str = "gemini-2.5-pro",
|
|
21
|
+
temperature: float = 0.7,
|
|
22
|
+
) -> ChatGoogleGenerativeAI:
|
|
23
|
+
assert settings.GOOGLE_API_KEY is not None
|
|
24
|
+
client = ChatGoogleGenerativeAI(
|
|
25
|
+
model=model_name,
|
|
26
|
+
max_tokens=None,
|
|
27
|
+
temperature=temperature,
|
|
28
|
+
api_key=settings.GOOGLE_API_KEY,
|
|
29
|
+
max_retries=2,
|
|
30
|
+
)
|
|
31
|
+
return client
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_openai_llm(
|
|
35
|
+
model_name: str = "o3",
|
|
36
|
+
temperature: float = 1,
|
|
37
|
+
) -> ChatOpenAI:
|
|
38
|
+
assert settings.OPENAI_API_KEY is not None
|
|
39
|
+
client = ChatOpenAI(
|
|
40
|
+
model=model_name,
|
|
41
|
+
api_key=settings.OPENAI_API_KEY,
|
|
42
|
+
temperature=temperature,
|
|
43
|
+
)
|
|
44
|
+
return client
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_openrouter_llm(model_name: str, temperature: float = 1):
|
|
48
|
+
assert settings.OPEN_ROUTER_API_KEY is not None
|
|
49
|
+
client = ChatOpenAI(
|
|
50
|
+
model=model_name,
|
|
51
|
+
temperature=temperature,
|
|
52
|
+
api_key=settings.OPEN_ROUTER_API_KEY,
|
|
53
|
+
base_url="https://openrouter.ai/api/v1",
|
|
54
|
+
)
|
|
55
|
+
return client
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_grok_llm(model_name: str, temperature: float = 1) -> ChatOpenAI:
|
|
59
|
+
assert settings.XAI_API_KEY is not None
|
|
60
|
+
client = ChatOpenAI(
|
|
61
|
+
model=model_name,
|
|
62
|
+
api_key=settings.XAI_API_KEY,
|
|
63
|
+
temperature=temperature,
|
|
64
|
+
base_url="https://api.x.ai/v1",
|
|
65
|
+
)
|
|
66
|
+
return client
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@overload
|
|
70
|
+
def get_llm(
|
|
71
|
+
ctx: MobileUseContext,
|
|
72
|
+
name: AgentNodeWithFallback,
|
|
73
|
+
*,
|
|
74
|
+
use_fallback: bool = False,
|
|
75
|
+
temperature: float = 1,
|
|
76
|
+
): ...
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@overload
|
|
80
|
+
def get_llm(
|
|
81
|
+
ctx: MobileUseContext,
|
|
82
|
+
name: AgentNode,
|
|
83
|
+
*,
|
|
84
|
+
temperature: float = 1,
|
|
85
|
+
): ...
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@overload
|
|
89
|
+
def get_llm(
|
|
90
|
+
ctx: MobileUseContext,
|
|
91
|
+
name: LLMUtilsNode,
|
|
92
|
+
*,
|
|
93
|
+
is_utils: Literal[True],
|
|
94
|
+
temperature: float = 1,
|
|
95
|
+
): ...
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_llm(
|
|
99
|
+
ctx: MobileUseContext,
|
|
100
|
+
name: AgentNode | LLMUtilsNode | AgentNodeWithFallback,
|
|
101
|
+
is_utils: bool = False,
|
|
102
|
+
use_fallback: bool = False,
|
|
103
|
+
temperature: float = 1,
|
|
104
|
+
):
|
|
105
|
+
llm = (
|
|
106
|
+
ctx.llm_config.get_utils(name) # type: ignore
|
|
107
|
+
if is_utils
|
|
108
|
+
else ctx.llm_config.get_agent(name) # type: ignore
|
|
109
|
+
)
|
|
110
|
+
if use_fallback:
|
|
111
|
+
if isinstance(llm, LLMWithFallback):
|
|
112
|
+
llm = llm.fallback
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError("LLM has no fallback!")
|
|
115
|
+
if llm.provider == "openai":
|
|
116
|
+
return get_openai_llm(llm.model, temperature)
|
|
117
|
+
elif llm.provider == "google":
|
|
118
|
+
return get_google_llm(llm.model, temperature)
|
|
119
|
+
elif llm.provider == "openrouter":
|
|
120
|
+
return get_openrouter_llm(llm.model, temperature)
|
|
121
|
+
elif llm.provider == "xai":
|
|
122
|
+
return get_grok_llm(llm.model, temperature)
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Unsupported provider: {llm.provider}")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
T = TypeVar("T")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
async def with_fallback(
|
|
131
|
+
main_call: Callable[[], Awaitable[T]],
|
|
132
|
+
fallback_call: Callable[[], Awaitable[T]],
|
|
133
|
+
none_should_fallback: bool = True,
|
|
134
|
+
) -> T:
|
|
135
|
+
try:
|
|
136
|
+
result = await main_call()
|
|
137
|
+
if result is None and none_should_fallback:
|
|
138
|
+
logger.warning("Main LLM inference returned None. Falling back...")
|
|
139
|
+
return await fallback_call()
|
|
140
|
+
return result
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
|
|
143
|
+
return await fallback_call()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from langchain_core.tools import BaseTool
|
|
2
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
3
|
+
from minitap.mobile_use.tools.mobile.back import back_wrapper
|
|
4
|
+
from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
|
|
5
|
+
from minitap.mobile_use.tools.mobile.erase_text import erase_text_wrapper
|
|
6
|
+
from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
|
|
7
|
+
from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
|
|
8
|
+
from minitap.mobile_use.tools.mobile.list_packages import list_packages_wrapper
|
|
9
|
+
from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
|
|
10
|
+
from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
|
|
11
|
+
from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
|
|
12
|
+
from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
|
|
13
|
+
|
|
14
|
+
# from minitap.mobile_use.tools.mobile.run_flow import run_flow_wrapper
|
|
15
|
+
from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
|
|
16
|
+
from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
|
|
17
|
+
from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
|
|
18
|
+
from minitap.mobile_use.tools.mobile.tap import tap_wrapper
|
|
19
|
+
from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
|
|
20
|
+
wait_for_animation_to_end_wrapper,
|
|
21
|
+
)
|
|
22
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
23
|
+
|
|
24
|
+
EXECUTOR_WRAPPERS_TOOLS = [
|
|
25
|
+
back_wrapper,
|
|
26
|
+
open_link_wrapper,
|
|
27
|
+
tap_wrapper,
|
|
28
|
+
long_press_on_wrapper,
|
|
29
|
+
swipe_wrapper,
|
|
30
|
+
take_screenshot_wrapper,
|
|
31
|
+
# run_flow_wrapper, # To decomment when subflow is implemented
|
|
32
|
+
copy_text_from_wrapper,
|
|
33
|
+
input_text_wrapper,
|
|
34
|
+
list_packages_wrapper,
|
|
35
|
+
launch_app_wrapper,
|
|
36
|
+
stop_app_wrapper,
|
|
37
|
+
paste_text_wrapper,
|
|
38
|
+
erase_text_wrapper,
|
|
39
|
+
press_key_wrapper,
|
|
40
|
+
wait_for_animation_to_end_wrapper,
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_tools_from_wrappers(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> list[BaseTool]:
|
|
45
|
+
"""Get the tools from the wrappers."""
|
|
46
|
+
return [wrapper.tool_fn_getter(ctx) for wrapper in wrappers]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_tool_wrapper_from_name(name: str) -> ToolWrapper | None:
|
|
50
|
+
"""Get the tool wrapper from the name."""
|
|
51
|
+
for wrapper in EXECUTOR_WRAPPERS_TOOLS:
|
|
52
|
+
if wrapper.tool_fn_getter.__name__ == f"get_{name}_tool":
|
|
53
|
+
return wrapper
|
|
54
|
+
return None
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import ToolMessage
|
|
4
|
+
from langchain_core.tools import tool
|
|
5
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.types import Command
|
|
7
|
+
from minitap.mobile_use.controllers.mobile_command_controller import back as back_controller
|
|
8
|
+
from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
|
|
9
|
+
from typing_extensions import Annotated
|
|
10
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
11
|
+
from minitap.mobile_use.graph.state import State
|
|
12
|
+
from langgraph.prebuilt import InjectedState
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_back_tool(ctx: MobileUseContext):
|
|
16
|
+
@tool
|
|
17
|
+
def back(
|
|
18
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
19
|
+
state: Annotated[State, InjectedState],
|
|
20
|
+
agent_thought: str,
|
|
21
|
+
executor_metadata: Optional[ExecutorMetadata],
|
|
22
|
+
):
|
|
23
|
+
"""Navigates to the previous screen. (Only works on Android for the moment)"""
|
|
24
|
+
output = back_controller(ctx=ctx)
|
|
25
|
+
has_failed = output is not None
|
|
26
|
+
tool_message = ToolMessage(
|
|
27
|
+
tool_call_id=tool_call_id,
|
|
28
|
+
content=back_wrapper.on_failure_fn() if has_failed else back_wrapper.on_success_fn(),
|
|
29
|
+
additional_kwargs={"error": output} if has_failed else {},
|
|
30
|
+
)
|
|
31
|
+
return Command(
|
|
32
|
+
update=back_wrapper.handle_executor_state_fields(
|
|
33
|
+
ctx=ctx,
|
|
34
|
+
state=state,
|
|
35
|
+
executor_metadata=executor_metadata,
|
|
36
|
+
tool_message=tool_message,
|
|
37
|
+
is_failure=has_failed,
|
|
38
|
+
updates={
|
|
39
|
+
"agents_thoughts": [agent_thought],
|
|
40
|
+
"messages": [tool_message],
|
|
41
|
+
},
|
|
42
|
+
),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
return back
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
back_wrapper = ToolWrapper(
|
|
49
|
+
tool_fn_getter=get_back_tool,
|
|
50
|
+
on_success_fn=lambda: "Navigated to the previous screen.",
|
|
51
|
+
on_failure_fn=lambda: "Failed to navigate to the previous screen.",
|
|
52
|
+
)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import ToolMessage
|
|
4
|
+
from langchain_core.tools import tool
|
|
5
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.types import Command
|
|
7
|
+
from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
|
|
8
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
9
|
+
copy_text_from as copy_text_from_controller,
|
|
10
|
+
)
|
|
11
|
+
from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
|
|
12
|
+
from pydantic import Field
|
|
13
|
+
from typing_extensions import Annotated
|
|
14
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
15
|
+
from minitap.mobile_use.graph.state import State
|
|
16
|
+
from langgraph.prebuilt import InjectedState
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_copy_text_from_tool(ctx: MobileUseContext):
|
|
20
|
+
@tool
|
|
21
|
+
def copy_text_from(
|
|
22
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
23
|
+
state: Annotated[State, InjectedState],
|
|
24
|
+
agent_thought: str,
|
|
25
|
+
executor_metadata: Optional[ExecutorMetadata],
|
|
26
|
+
selector_request: SelectorRequest = Field(
|
|
27
|
+
..., description="The selector to copy text from"
|
|
28
|
+
),
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Copies text from a UI element identified by the given selector and stores it in memory.
|
|
32
|
+
|
|
33
|
+
The copied text can be:
|
|
34
|
+
- Pasted later using `pasteText`
|
|
35
|
+
- Accessed in JavaScript via `maestro.copiedText`
|
|
36
|
+
|
|
37
|
+
Example Usage:
|
|
38
|
+
- launchApp
|
|
39
|
+
- copyTextFrom: { id: "someId" }
|
|
40
|
+
- tapOn: { id: "searchFieldId" }
|
|
41
|
+
- pasteText
|
|
42
|
+
|
|
43
|
+
See the Selectors documentation for supported selector types.
|
|
44
|
+
"""
|
|
45
|
+
output = copy_text_from_controller(ctx=ctx, selector_request=selector_request)
|
|
46
|
+
has_failed = output is not None
|
|
47
|
+
tool_message = ToolMessage(
|
|
48
|
+
tool_call_id=tool_call_id,
|
|
49
|
+
content=copy_text_from_wrapper.on_failure_fn(selector_request)
|
|
50
|
+
if has_failed
|
|
51
|
+
else copy_text_from_wrapper.on_success_fn(selector_request),
|
|
52
|
+
additional_kwargs={"error": output} if has_failed else {},
|
|
53
|
+
)
|
|
54
|
+
return Command(
|
|
55
|
+
update=copy_text_from_wrapper.handle_executor_state_fields(
|
|
56
|
+
ctx=ctx,
|
|
57
|
+
state=state,
|
|
58
|
+
executor_metadata=executor_metadata,
|
|
59
|
+
tool_message=tool_message,
|
|
60
|
+
is_failure=has_failed,
|
|
61
|
+
updates={
|
|
62
|
+
"agents_thoughts": [agent_thought],
|
|
63
|
+
"messages": [tool_message],
|
|
64
|
+
},
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return copy_text_from
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
copy_text_from_wrapper = ToolWrapper(
|
|
72
|
+
tool_fn_getter=get_copy_text_from_tool,
|
|
73
|
+
on_success_fn=lambda selector_request: (
|
|
74
|
+
f'Text copied successfully from selector "{selector_request}".'
|
|
75
|
+
),
|
|
76
|
+
on_failure_fn=lambda selector_request: f"Failed to copy text from selector {selector_request}.",
|
|
77
|
+
)
|