minitap-mobile-use 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/cortex/cortex.md +19 -10
- minitap/mobile_use/agents/cortex/cortex.py +15 -2
- minitap/mobile_use/agents/cortex/types.py +2 -4
- minitap/mobile_use/agents/executor/executor.md +20 -15
- minitap/mobile_use/agents/executor/executor.py +6 -18
- minitap/mobile_use/agents/executor/tool_node.py +105 -0
- minitap/mobile_use/agents/hopper/hopper.md +2 -10
- minitap/mobile_use/agents/hopper/hopper.py +4 -9
- minitap/mobile_use/agents/orchestrator/human.md +3 -4
- minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
- minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
- minitap/mobile_use/agents/orchestrator/types.py +5 -8
- minitap/mobile_use/agents/outputter/outputter.py +1 -2
- minitap/mobile_use/agents/planner/planner.md +25 -15
- minitap/mobile_use/agents/planner/planner.py +7 -1
- minitap/mobile_use/agents/planner/types.py +10 -5
- minitap/mobile_use/agents/planner/utils.py +11 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
- minitap/mobile_use/clients/device_hardware_client.py +3 -0
- minitap/mobile_use/config.py +16 -14
- minitap/mobile_use/constants.py +1 -0
- minitap/mobile_use/context.py +3 -4
- minitap/mobile_use/controllers/mobile_command_controller.py +37 -26
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
- minitap/mobile_use/graph/graph.py +10 -31
- minitap/mobile_use/graph/state.py +34 -14
- minitap/mobile_use/main.py +11 -8
- minitap/mobile_use/sdk/agent.py +78 -63
- minitap/mobile_use/sdk/builders/agent_config_builder.py +23 -11
- minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
- minitap/mobile_use/sdk/types/agent.py +10 -5
- minitap/mobile_use/sdk/types/task.py +19 -18
- minitap/mobile_use/sdk/utils.py +1 -1
- minitap/mobile_use/servers/config.py +1 -2
- minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
- minitap/mobile_use/servers/start_servers.py +4 -4
- minitap/mobile_use/servers/stop_servers.py +12 -18
- minitap/mobile_use/services/llm.py +4 -2
- minitap/mobile_use/tools/index.py +11 -7
- minitap/mobile_use/tools/mobile/back.py +8 -12
- minitap/mobile_use/tools/mobile/clear_text.py +277 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +8 -12
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/find_packages.py +69 -0
- minitap/mobile_use/tools/mobile/input_text.py +55 -32
- minitap/mobile_use/tools/mobile/launch_app.py +8 -12
- minitap/mobile_use/tools/mobile/long_press_on.py +9 -13
- minitap/mobile_use/tools/mobile/open_link.py +8 -12
- minitap/mobile_use/tools/mobile/paste_text.py +8 -12
- minitap/mobile_use/tools/mobile/press_key.py +8 -12
- minitap/mobile_use/tools/mobile/stop_app.py +9 -13
- minitap/mobile_use/tools/mobile/swipe.py +8 -12
- minitap/mobile_use/tools/mobile/take_screenshot.py +8 -12
- minitap/mobile_use/tools/mobile/tap.py +9 -13
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +9 -13
- minitap/mobile_use/tools/tool_wrapper.py +1 -23
- minitap/mobile_use/tools/utils.py +86 -0
- minitap/mobile_use/utils/cli_helpers.py +1 -2
- minitap/mobile_use/utils/cli_selection.py +5 -6
- minitap/mobile_use/utils/decorators.py +21 -20
- minitap/mobile_use/utils/logger.py +3 -4
- minitap/mobile_use/utils/media.py +1 -1
- minitap/mobile_use/utils/recorder.py +11 -10
- minitap/mobile_use/utils/ui_hierarchy.py +98 -3
- {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/METADATA +12 -2
- minitap_mobile_use-2.1.0.dist-info/RECORD +96 -0
- minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
- minitap/mobile_use/tools/mobile/erase_text.py +0 -124
- minitap/mobile_use/tools/mobile/list_packages.py +0 -78
- minitap/mobile_use/tools/mobile/run_flow.py +0 -57
- minitap_mobile_use-2.0.0.dist-info/RECORD +0 -95
- {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/entry_points.txt +0 -0
|
@@ -5,7 +5,8 @@ Task-related type definitions for the Mobile-use SDK.
|
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any,
|
|
8
|
+
from typing import Any, TypeVar, overload
|
|
9
|
+
|
|
9
10
|
from pydantic import BaseModel, Field
|
|
10
11
|
|
|
11
12
|
from minitap.mobile_use.config import LLMConfig, get_default_llm_config
|
|
@@ -36,8 +37,8 @@ class AgentProfile(BaseModel):
|
|
|
36
37
|
self,
|
|
37
38
|
*,
|
|
38
39
|
name: str,
|
|
39
|
-
llm_config:
|
|
40
|
-
from_file:
|
|
40
|
+
llm_config: LLMConfig | None = None,
|
|
41
|
+
from_file: str | None = None,
|
|
41
42
|
**kwargs,
|
|
42
43
|
):
|
|
43
44
|
kwargs["name"] = name
|
|
@@ -64,7 +65,7 @@ class TaskStatus(str, Enum):
|
|
|
64
65
|
|
|
65
66
|
|
|
66
67
|
T = TypeVar("T", bound=BaseModel)
|
|
67
|
-
TOutput = TypeVar("TOutput", bound=
|
|
68
|
+
TOutput = TypeVar("TOutput", bound=BaseModel | None)
|
|
68
69
|
|
|
69
70
|
|
|
70
71
|
class TaskRequestCommon(BaseModel):
|
|
@@ -75,11 +76,11 @@ class TaskRequestCommon(BaseModel):
|
|
|
75
76
|
max_steps: int = RECURSION_LIMIT
|
|
76
77
|
record_trace: bool = False
|
|
77
78
|
trace_path: Path = Path("mobile-use-traces")
|
|
78
|
-
llm_output_path:
|
|
79
|
-
thoughts_output_path:
|
|
79
|
+
llm_output_path: Path | None = None
|
|
80
|
+
thoughts_output_path: Path | None = None
|
|
80
81
|
|
|
81
82
|
|
|
82
|
-
class TaskRequest
|
|
83
|
+
class TaskRequest[TOutput](TaskRequestCommon):
|
|
83
84
|
"""
|
|
84
85
|
Defines the format of a mobile automation task request.
|
|
85
86
|
|
|
@@ -98,10 +99,10 @@ class TaskRequest(TaskRequestCommon, Generic[TOutput]):
|
|
|
98
99
|
"""
|
|
99
100
|
|
|
100
101
|
goal: str
|
|
101
|
-
profile:
|
|
102
|
-
task_name:
|
|
103
|
-
output_description:
|
|
104
|
-
output_format:
|
|
102
|
+
profile: str | None = None
|
|
103
|
+
task_name: str | None = None
|
|
104
|
+
output_description: str | None = None
|
|
105
|
+
output_format: type[TOutput] | None = None
|
|
105
106
|
|
|
106
107
|
|
|
107
108
|
class TaskResult(BaseModel):
|
|
@@ -116,11 +117,11 @@ class TaskResult(BaseModel):
|
|
|
116
117
|
"""
|
|
117
118
|
|
|
118
119
|
content: Any = None
|
|
119
|
-
error:
|
|
120
|
+
error: str | None = None
|
|
120
121
|
execution_time_seconds: float
|
|
121
122
|
steps_taken: int
|
|
122
123
|
|
|
123
|
-
def get_as_model(self, model_class:
|
|
124
|
+
def get_as_model(self, model_class: type[T]) -> T:
|
|
124
125
|
"""
|
|
125
126
|
Parse the content into a Pydantic model instance.
|
|
126
127
|
|
|
@@ -158,14 +159,14 @@ class Task(BaseModel):
|
|
|
158
159
|
status: TaskStatus
|
|
159
160
|
request: TaskRequest
|
|
160
161
|
created_at: datetime
|
|
161
|
-
ended_at:
|
|
162
|
-
result:
|
|
162
|
+
ended_at: datetime | None = None
|
|
163
|
+
result: TaskResult | None = None
|
|
163
164
|
|
|
164
165
|
def finalize(
|
|
165
166
|
self,
|
|
166
|
-
content:
|
|
167
|
-
state:
|
|
168
|
-
error:
|
|
167
|
+
content: Any | None = None,
|
|
168
|
+
state: dict | None = None,
|
|
169
|
+
error: str | None = None,
|
|
169
170
|
cancelled: bool = False,
|
|
170
171
|
):
|
|
171
172
|
self.status = TaskStatus.COMPLETED if error is None else TaskStatus.FAILED
|
minitap/mobile_use/sdk/utils.py
CHANGED
|
@@ -16,7 +16,7 @@ def load_llm_config_override(path: Path) -> LLMConfig:
|
|
|
16
16
|
override_config_dict = {}
|
|
17
17
|
if os.path.exists(path):
|
|
18
18
|
logger.info(f"Loading custom LLM config from {path.resolve()}...")
|
|
19
|
-
with open(path
|
|
19
|
+
with open(path) as f:
|
|
20
20
|
override_config_dict = load_jsonc(f)
|
|
21
21
|
else:
|
|
22
22
|
logger.warning("Custom LLM config not found - using the default config")
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
1
|
from dotenv import load_dotenv
|
|
3
2
|
from minitap.mobile_use.servers.device_hardware_bridge import DEVICE_HARDWARE_BRIDGE_PORT
|
|
4
3
|
from minitap.mobile_use.utils.logger import get_logger
|
|
@@ -11,7 +10,7 @@ logger = get_logger(__name__)
|
|
|
11
10
|
class ServerSettings(BaseSettings):
|
|
12
11
|
DEVICE_HARDWARE_BRIDGE_BASE_URL: str = f"http://localhost:{DEVICE_HARDWARE_BRIDGE_PORT}"
|
|
13
12
|
DEVICE_SCREEN_API_PORT: int = 9998
|
|
14
|
-
ADB_HOST:
|
|
13
|
+
ADB_HOST: str | None = None
|
|
15
14
|
|
|
16
15
|
model_config = {"env_file": ".env", "extra": "ignore"}
|
|
17
16
|
|
|
@@ -4,7 +4,6 @@ import subprocess
|
|
|
4
4
|
import threading
|
|
5
5
|
import time
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from typing import Optional
|
|
8
7
|
|
|
9
8
|
import requests
|
|
10
9
|
from minitap.mobile_use.context import DevicePlatform
|
|
@@ -24,7 +23,7 @@ class BridgeStatus(Enum):
|
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class DeviceHardwareBridge:
|
|
27
|
-
def __init__(self, device_id: str, platform: DevicePlatform, adb_host:
|
|
26
|
+
def __init__(self, device_id: str, platform: DevicePlatform, adb_host: str | None = None):
|
|
28
27
|
self.process = None
|
|
29
28
|
self.status = BridgeStatus.STOPPED
|
|
30
29
|
self.thread = None
|
|
@@ -32,7 +31,7 @@ class DeviceHardwareBridge:
|
|
|
32
31
|
self.lock = threading.Lock()
|
|
33
32
|
self.device_id: str = device_id
|
|
34
33
|
self.platform: DevicePlatform = platform
|
|
35
|
-
self.adb_host:
|
|
34
|
+
self.adb_host: str | None = adb_host
|
|
36
35
|
|
|
37
36
|
def _run_maestro_studio(self):
|
|
38
37
|
try:
|
|
@@ -207,6 +206,6 @@ class DeviceHardwareBridge:
|
|
|
207
206
|
with self.lock:
|
|
208
207
|
return {"status": self.status.value, "output": self.output[-10:]}
|
|
209
208
|
|
|
210
|
-
def get_device_id(self) ->
|
|
209
|
+
def get_device_id(self) -> str | None:
|
|
211
210
|
with self.lock:
|
|
212
211
|
return self.device_id
|
|
@@ -4,7 +4,7 @@ import signal
|
|
|
4
4
|
import sys
|
|
5
5
|
import time
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from typing import Annotated
|
|
7
|
+
from typing import Annotated
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
10
|
import typer
|
|
@@ -22,7 +22,7 @@ bridge_instance = None
|
|
|
22
22
|
shutdown_requested = False
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def check_device_screen_api_health(base_url:
|
|
25
|
+
def check_device_screen_api_health(base_url: str | None = None, max_retries=30, delay=1):
|
|
26
26
|
base_url = base_url or f"http://localhost:{server_settings.DEVICE_SCREEN_API_PORT}"
|
|
27
27
|
health_url = f"{base_url}/health"
|
|
28
28
|
|
|
@@ -49,7 +49,7 @@ def check_device_screen_api_health(base_url: Optional[str] = None, max_retries=3
|
|
|
49
49
|
return False
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
def _start_device_screen_api_process() ->
|
|
52
|
+
def _start_device_screen_api_process() -> multiprocessing.Process | None:
|
|
53
53
|
try:
|
|
54
54
|
process = multiprocessing.Process(target=start_device_screen_api, daemon=True)
|
|
55
55
|
process.start()
|
|
@@ -61,7 +61,7 @@ def _start_device_screen_api_process() -> Optional[multiprocessing.Process]:
|
|
|
61
61
|
|
|
62
62
|
def start_device_hardware_bridge(
|
|
63
63
|
device_id: str, platform: DevicePlatform
|
|
64
|
-
) ->
|
|
64
|
+
) -> DeviceHardwareBridge | None:
|
|
65
65
|
logger.info("Starting Device Hardware Bridge...")
|
|
66
66
|
|
|
67
67
|
try:
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import time
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
import psutil
|
|
6
5
|
import requests
|
|
6
|
+
|
|
7
7
|
from minitap.mobile_use.servers.config import server_settings
|
|
8
8
|
from minitap.mobile_use.servers.device_hardware_bridge import DEVICE_HARDWARE_BRIDGE_PORT
|
|
9
9
|
from minitap.mobile_use.utils.logger import get_server_logger
|
|
@@ -11,7 +11,7 @@ from minitap.mobile_use.utils.logger import get_server_logger
|
|
|
11
11
|
logger = get_server_logger()
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def find_processes_by_name(name: str) ->
|
|
14
|
+
def find_processes_by_name(name: str) -> list[psutil.Process]:
|
|
15
15
|
"""Find all processes with the given name."""
|
|
16
16
|
processes = []
|
|
17
17
|
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
|
|
@@ -25,7 +25,7 @@ def find_processes_by_name(name: str) -> List[psutil.Process]:
|
|
|
25
25
|
return processes
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def find_processes_by_port(port: int) ->
|
|
28
|
+
def find_processes_by_port(port: int) -> list[psutil.Process]:
|
|
29
29
|
processes = []
|
|
30
30
|
for proc in psutil.process_iter(["pid", "name"]):
|
|
31
31
|
try:
|
|
@@ -44,27 +44,23 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
|
|
|
44
44
|
logger.success(f"Process {process.pid} ({process.name()}) already terminated")
|
|
45
45
|
return True
|
|
46
46
|
|
|
47
|
-
logger.
|
|
47
|
+
logger.debug(f"Stopping process {process.pid} ({process.name()})")
|
|
48
48
|
|
|
49
49
|
process.terminate()
|
|
50
50
|
|
|
51
51
|
try:
|
|
52
52
|
process.wait(timeout=timeout)
|
|
53
|
-
logger.success(f"Process {process.pid} terminated gracefully")
|
|
54
53
|
return True
|
|
55
54
|
except psutil.TimeoutExpired:
|
|
56
55
|
logger.warning(f"Process {process.pid} didn't terminate gracefully, force killing...")
|
|
57
56
|
try:
|
|
58
57
|
process.kill()
|
|
59
58
|
process.wait(timeout=2)
|
|
60
|
-
logger.success(f"Process {process.pid} force killed")
|
|
61
59
|
return True
|
|
62
60
|
except psutil.NoSuchProcess:
|
|
63
|
-
logger.success(f"Process {process.pid} already terminated during force kill")
|
|
64
61
|
return True
|
|
65
62
|
|
|
66
63
|
except psutil.NoSuchProcess:
|
|
67
|
-
logger.success(f"Process {process.pid} no longer exists (already terminated)")
|
|
68
64
|
return True
|
|
69
65
|
except (psutil.AccessDenied, psutil.ZombieProcess) as e:
|
|
70
66
|
logger.warning(f"Cannot stop process {process.pid}: {e}")
|
|
@@ -84,7 +80,7 @@ def check_service_health(port: int, service_name: str) -> bool:
|
|
|
84
80
|
return False
|
|
85
81
|
|
|
86
82
|
if response.status_code == 200:
|
|
87
|
-
logger.
|
|
83
|
+
logger.debug(f"{service_name} is still responding on port {port}")
|
|
88
84
|
return True
|
|
89
85
|
except requests.exceptions.RequestException:
|
|
90
86
|
pass
|
|
@@ -167,19 +163,15 @@ def stop_device_hardware_bridge() -> bool:
|
|
|
167
163
|
|
|
168
164
|
|
|
169
165
|
def stop_servers(
|
|
170
|
-
|
|
166
|
+
should_stop_screen_api: bool = False, should_stop_hw_bridge: bool = False
|
|
171
167
|
) -> tuple[bool, bool]:
|
|
172
|
-
"""Stop
|
|
173
|
-
|
|
174
|
-
Args:
|
|
175
|
-
device_screen_api: If True, stop the Device Screen API
|
|
176
|
-
device_hardware_bridge: If True, stop the Device Hardware Bridge
|
|
168
|
+
"""Stop the servers and return whether they stopped successfully (api_success, bridge_success).
|
|
177
169
|
|
|
178
170
|
Returns:
|
|
179
171
|
Tuple of (api_stopped, bridge_stopped) booleans
|
|
180
172
|
"""
|
|
181
|
-
api_success = stop_device_screen_api() if
|
|
182
|
-
bridge_success = stop_device_hardware_bridge() if
|
|
173
|
+
api_success = stop_device_screen_api() if should_stop_screen_api else True
|
|
174
|
+
bridge_success = stop_device_hardware_bridge() if should_stop_hw_bridge else True
|
|
183
175
|
|
|
184
176
|
if api_success and bridge_success:
|
|
185
177
|
logger.success("All servers stopped successfully")
|
|
@@ -195,7 +187,9 @@ def stop_servers(
|
|
|
195
187
|
|
|
196
188
|
def main():
|
|
197
189
|
"""Main function to stop all servers."""
|
|
198
|
-
api_success, bridge_success = stop_servers(
|
|
190
|
+
api_success, bridge_success = stop_servers(
|
|
191
|
+
should_stop_screen_api=True, should_stop_hw_bridge=True
|
|
192
|
+
)
|
|
199
193
|
if api_success and bridge_success:
|
|
200
194
|
return 0
|
|
201
195
|
elif api_success or bridge_success:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
3
|
-
from
|
|
2
|
+
from typing import Literal, TypeVar
|
|
3
|
+
from collections.abc import Awaitable, Callable
|
|
4
|
+
from typing import overload
|
|
4
5
|
|
|
5
6
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
6
7
|
from langchain_openai import ChatOpenAI
|
|
@@ -39,6 +40,7 @@ def get_openai_llm(
|
|
|
39
40
|
client = ChatOpenAI(
|
|
40
41
|
model=model_name,
|
|
41
42
|
api_key=settings.OPENAI_API_KEY,
|
|
43
|
+
base_url=settings.OPENAI_BASE_URL,
|
|
42
44
|
temperature=temperature,
|
|
43
45
|
)
|
|
44
46
|
return client
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
from langchain_core.tools import BaseTool
|
|
2
|
+
|
|
2
3
|
from minitap.mobile_use.context import MobileUseContext
|
|
3
4
|
from minitap.mobile_use.tools.mobile.back import back_wrapper
|
|
5
|
+
from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
|
|
4
6
|
from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
|
|
5
|
-
from minitap.mobile_use.tools.mobile.
|
|
7
|
+
from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
|
|
8
|
+
from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
|
|
6
9
|
from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
|
|
7
10
|
from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
|
|
8
|
-
from minitap.mobile_use.tools.mobile.list_packages import list_packages_wrapper
|
|
9
11
|
from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
|
|
10
12
|
from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
|
|
11
13
|
from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
|
|
12
14
|
from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
|
|
13
|
-
|
|
14
|
-
# from minitap.mobile_use.tools.mobile.run_flow import run_flow_wrapper
|
|
15
15
|
from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
|
|
16
16
|
from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
|
|
17
17
|
from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
|
|
@@ -28,14 +28,14 @@ EXECUTOR_WRAPPERS_TOOLS = [
|
|
|
28
28
|
long_press_on_wrapper,
|
|
29
29
|
swipe_wrapper,
|
|
30
30
|
take_screenshot_wrapper,
|
|
31
|
-
# run_flow_wrapper, # To decomment when subflow is implemented
|
|
32
31
|
copy_text_from_wrapper,
|
|
33
32
|
input_text_wrapper,
|
|
34
|
-
|
|
33
|
+
erase_one_char_wrapper,
|
|
34
|
+
find_packages_wrapper,
|
|
35
35
|
launch_app_wrapper,
|
|
36
36
|
stop_app_wrapper,
|
|
37
37
|
paste_text_wrapper,
|
|
38
|
-
|
|
38
|
+
clear_text_wrapper,
|
|
39
39
|
press_key_wrapper,
|
|
40
40
|
wait_for_animation_to_end_wrapper,
|
|
41
41
|
]
|
|
@@ -46,6 +46,10 @@ def get_tools_from_wrappers(ctx: MobileUseContext, wrappers: list[ToolWrapper])
|
|
|
46
46
|
return [wrapper.tool_fn_getter(ctx) for wrapper in wrappers]
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
|
|
50
|
+
return "\n".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
|
|
51
|
+
|
|
52
|
+
|
|
49
53
|
def get_tool_wrapper_from_name(name: str) -> ToolWrapper | None:
|
|
50
54
|
"""Get the tool wrapper from the name."""
|
|
51
55
|
for wrapper in EXECUTOR_WRAPPERS_TOOLS:
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
6
4
|
from langgraph.types import Command
|
|
5
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
6
|
from minitap.mobile_use.controllers.mobile_command_controller import back as back_controller
|
|
8
|
-
from minitap.mobile_use.tools.tool_wrapper import
|
|
9
|
-
from
|
|
7
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
8
|
+
from typing import Annotated
|
|
10
9
|
from minitap.mobile_use.context import MobileUseContext
|
|
11
10
|
from minitap.mobile_use.graph.state import State
|
|
12
11
|
from langgraph.prebuilt import InjectedState
|
|
@@ -18,7 +17,6 @@ def get_back_tool(ctx: MobileUseContext):
|
|
|
18
17
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
19
18
|
state: Annotated[State, InjectedState],
|
|
20
19
|
agent_thought: str,
|
|
21
|
-
executor_metadata: Optional[ExecutorMetadata],
|
|
22
20
|
):
|
|
23
21
|
"""Navigates to the previous screen. (Only works on Android for the moment)"""
|
|
24
22
|
output = back_controller(ctx=ctx)
|
|
@@ -27,18 +25,16 @@ def get_back_tool(ctx: MobileUseContext):
|
|
|
27
25
|
tool_call_id=tool_call_id,
|
|
28
26
|
content=back_wrapper.on_failure_fn() if has_failed else back_wrapper.on_success_fn(),
|
|
29
27
|
additional_kwargs={"error": output} if has_failed else {},
|
|
28
|
+
status="error" if has_failed else "success",
|
|
30
29
|
)
|
|
31
30
|
return Command(
|
|
32
|
-
update=
|
|
31
|
+
update=state.sanitize_update(
|
|
33
32
|
ctx=ctx,
|
|
34
|
-
|
|
35
|
-
executor_metadata=executor_metadata,
|
|
36
|
-
tool_message=tool_message,
|
|
37
|
-
is_failure=has_failed,
|
|
38
|
-
updates={
|
|
33
|
+
update={
|
|
39
34
|
"agents_thoughts": [agent_thought],
|
|
40
|
-
|
|
35
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
41
36
|
},
|
|
37
|
+
agent="executor",
|
|
42
38
|
),
|
|
43
39
|
)
|
|
44
40
|
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import ToolMessage
|
|
4
|
+
from langchain_core.tools import tool
|
|
5
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.prebuilt import InjectedState
|
|
7
|
+
from langgraph.types import Command
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
12
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
13
|
+
erase_text as erase_text_controller,
|
|
14
|
+
)
|
|
15
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
16
|
+
get_screen_data,
|
|
17
|
+
)
|
|
18
|
+
from minitap.mobile_use.graph.state import State
|
|
19
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
20
|
+
from minitap.mobile_use.tools.utils import (
|
|
21
|
+
focus_element_if_needed,
|
|
22
|
+
move_cursor_to_end_if_bounds,
|
|
23
|
+
)
|
|
24
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
25
|
+
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
26
|
+
find_element_by_resource_id,
|
|
27
|
+
get_element_text,
|
|
28
|
+
text_input_is_empty,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
MAX_CLEAR_TRIES = 5
|
|
34
|
+
DEFAULT_CHARS_TO_ERASE = 50
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ClearTextResult(BaseModel):
|
|
38
|
+
success: bool
|
|
39
|
+
error_message: str | None
|
|
40
|
+
chars_erased: int
|
|
41
|
+
final_text: str | None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TextClearer:
|
|
45
|
+
def __init__(self, ctx: MobileUseContext, state: State):
|
|
46
|
+
self.ctx = ctx
|
|
47
|
+
self.state = state
|
|
48
|
+
|
|
49
|
+
def _refresh_ui_hierarchy(self) -> None:
|
|
50
|
+
screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
|
|
51
|
+
self.state.latest_ui_hierarchy = screen_data.elements
|
|
52
|
+
|
|
53
|
+
def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
|
|
54
|
+
if not self.state.latest_ui_hierarchy:
|
|
55
|
+
self._refresh_ui_hierarchy()
|
|
56
|
+
|
|
57
|
+
if not self.state.latest_ui_hierarchy:
|
|
58
|
+
return None, None, None
|
|
59
|
+
|
|
60
|
+
element = find_element_by_resource_id(
|
|
61
|
+
ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if not element:
|
|
65
|
+
return None, None, None
|
|
66
|
+
|
|
67
|
+
current_text = get_element_text(element)
|
|
68
|
+
hint_text = get_element_text(element, hint_text=True)
|
|
69
|
+
|
|
70
|
+
return element, current_text, hint_text
|
|
71
|
+
|
|
72
|
+
def _format_text_with_hint_info(self, text: str | None, hint_text: str | None) -> str | None:
|
|
73
|
+
if text is None:
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
is_hint_text = hint_text is not None and hint_text != "" and hint_text == text
|
|
77
|
+
|
|
78
|
+
if is_hint_text:
|
|
79
|
+
return f"{text} (which is the hint text, the input is very likely empty)"
|
|
80
|
+
|
|
81
|
+
return text
|
|
82
|
+
|
|
83
|
+
def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
|
|
84
|
+
return current_text is not None and current_text != "" and current_text != hint_text
|
|
85
|
+
|
|
86
|
+
def _prepare_element_for_clearing(self, resource_id: str) -> bool:
|
|
87
|
+
if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
def _erase_text_attempt(self, text_length: int) -> str | None:
|
|
94
|
+
chars_to_erase = text_length + 1
|
|
95
|
+
logger.info(f"Erasing {chars_to_erase} characters from the input")
|
|
96
|
+
|
|
97
|
+
error = erase_text_controller(ctx=self.ctx, nb_chars=chars_to_erase)
|
|
98
|
+
if error:
|
|
99
|
+
logger.error(f"Failed to erase text: {error}")
|
|
100
|
+
return str(error)
|
|
101
|
+
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
def _clear_with_retries(
|
|
105
|
+
self, resource_id: str, initial_text: str, hint_text: str | None
|
|
106
|
+
) -> tuple[bool, str | None, int]:
|
|
107
|
+
current_text = initial_text
|
|
108
|
+
erased_chars = 0
|
|
109
|
+
|
|
110
|
+
for attempt in range(1, MAX_CLEAR_TRIES + 1):
|
|
111
|
+
logger.info(f"Clear attempt {attempt}/{MAX_CLEAR_TRIES}")
|
|
112
|
+
|
|
113
|
+
chars_to_erase = len(current_text) if current_text else DEFAULT_CHARS_TO_ERASE
|
|
114
|
+
error = self._erase_text_attempt(text_length=chars_to_erase)
|
|
115
|
+
|
|
116
|
+
if error:
|
|
117
|
+
return False, current_text, 0
|
|
118
|
+
erased_chars += chars_to_erase
|
|
119
|
+
|
|
120
|
+
self._refresh_ui_hierarchy()
|
|
121
|
+
elt = find_element_by_resource_id(
|
|
122
|
+
ui_hierarchy=self.state.latest_ui_hierarchy or [],
|
|
123
|
+
resource_id=resource_id,
|
|
124
|
+
)
|
|
125
|
+
if elt:
|
|
126
|
+
current_text = get_element_text(elt)
|
|
127
|
+
logger.info(f"Current text: {current_text}")
|
|
128
|
+
if text_input_is_empty(text=current_text, hint_text=hint_text):
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
move_cursor_to_end_if_bounds(
|
|
132
|
+
ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return True, current_text, erased_chars
|
|
136
|
+
|
|
137
|
+
def _create_result(
|
|
138
|
+
self,
|
|
139
|
+
success: bool,
|
|
140
|
+
error_message: str | None,
|
|
141
|
+
chars_erased: int,
|
|
142
|
+
final_text: str | None,
|
|
143
|
+
hint_text: str | None,
|
|
144
|
+
) -> ClearTextResult:
|
|
145
|
+
formatted_final_text = self._format_text_with_hint_info(final_text, hint_text)
|
|
146
|
+
|
|
147
|
+
return ClearTextResult(
|
|
148
|
+
success=success,
|
|
149
|
+
error_message=error_message,
|
|
150
|
+
chars_erased=chars_erased,
|
|
151
|
+
final_text=formatted_final_text,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def _handle_no_clearing_needed(
|
|
155
|
+
self, current_text: str | None, hint_text: str | None
|
|
156
|
+
) -> ClearTextResult:
|
|
157
|
+
return self._create_result(
|
|
158
|
+
success=True,
|
|
159
|
+
error_message=None,
|
|
160
|
+
chars_erased=-1,
|
|
161
|
+
final_text=current_text,
|
|
162
|
+
hint_text=hint_text,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
|
|
166
|
+
error = erase_text_controller(ctx=self.ctx)
|
|
167
|
+
self._refresh_ui_hierarchy()
|
|
168
|
+
|
|
169
|
+
_, final_text, _ = self._get_element_info(resource_id)
|
|
170
|
+
|
|
171
|
+
return self._create_result(
|
|
172
|
+
success=error is None,
|
|
173
|
+
error_message=str(error) if error is not None else None,
|
|
174
|
+
chars_erased=0, # Unknown since we don't have initial text
|
|
175
|
+
final_text=final_text,
|
|
176
|
+
hint_text=hint_text,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
|
|
180
|
+
element, current_text, hint_text = self._get_element_info(resource_id)
|
|
181
|
+
|
|
182
|
+
if not element:
|
|
183
|
+
return self._handle_element_not_found(resource_id, hint_text)
|
|
184
|
+
|
|
185
|
+
if not self._should_clear_text(current_text, hint_text):
|
|
186
|
+
return self._handle_no_clearing_needed(current_text, hint_text)
|
|
187
|
+
|
|
188
|
+
if not self._prepare_element_for_clearing(resource_id):
|
|
189
|
+
return self._create_result(
|
|
190
|
+
success=False,
|
|
191
|
+
error_message="Failed to focus element",
|
|
192
|
+
chars_erased=0,
|
|
193
|
+
final_text=current_text,
|
|
194
|
+
hint_text=hint_text,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
success, final_text, chars_erased = self._clear_with_retries(
|
|
198
|
+
resource_id=resource_id,
|
|
199
|
+
initial_text=current_text or "",
|
|
200
|
+
hint_text=hint_text,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
error_message = None if success else "Failed to clear text after retries"
|
|
204
|
+
|
|
205
|
+
return self._create_result(
|
|
206
|
+
success=success,
|
|
207
|
+
error_message=error_message,
|
|
208
|
+
chars_erased=chars_erased,
|
|
209
|
+
final_text=final_text,
|
|
210
|
+
hint_text=hint_text,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def get_clear_text_tool(ctx: MobileUseContext):
|
|
215
|
+
@tool
|
|
216
|
+
def clear_text(
|
|
217
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
218
|
+
state: Annotated[State, InjectedState],
|
|
219
|
+
agent_thought: str,
|
|
220
|
+
text_input_resource_id: str,
|
|
221
|
+
):
|
|
222
|
+
"""
|
|
223
|
+
Clears all the text from the text field, by focusing it if needed.
|
|
224
|
+
"""
|
|
225
|
+
clearer = TextClearer(ctx, state)
|
|
226
|
+
result = clearer.clear_text_by_resource_id(text_input_resource_id)
|
|
227
|
+
|
|
228
|
+
content = (
|
|
229
|
+
clear_text_wrapper.on_failure_fn(result.error_message)
|
|
230
|
+
if not result.success
|
|
231
|
+
else clear_text_wrapper.on_success_fn(
|
|
232
|
+
nb_char_erased=result.chars_erased, new_text_value=result.final_text
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
tool_message = ToolMessage(
|
|
237
|
+
tool_call_id=tool_call_id,
|
|
238
|
+
content=content,
|
|
239
|
+
additional_kwargs={"error": result.error_message} if not result.success else {},
|
|
240
|
+
status="error" if not result.success else "success",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return Command(
|
|
244
|
+
update=state.sanitize_update(
|
|
245
|
+
ctx=ctx,
|
|
246
|
+
update={
|
|
247
|
+
"agents_thoughts": [agent_thought],
|
|
248
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
249
|
+
},
|
|
250
|
+
agent="executor",
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return clear_text
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
|
|
258
|
+
if nb_char_erased == -1:
|
|
259
|
+
msg = "No text clearing was needed (the input was already empty)."
|
|
260
|
+
else:
|
|
261
|
+
msg = f"Text erased successfully. {nb_char_erased} characters were erased."
|
|
262
|
+
|
|
263
|
+
if new_text_value is not None:
|
|
264
|
+
msg += f" New text in the input is '{new_text_value}'."
|
|
265
|
+
|
|
266
|
+
return msg
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _format_failure_message(output: str | None) -> str:
|
|
270
|
+
return "Failed to erase text. " + (str(output) if output else "")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
clear_text_wrapper = ToolWrapper(
|
|
274
|
+
tool_fn_getter=get_clear_text_tool,
|
|
275
|
+
on_success_fn=_format_success_message,
|
|
276
|
+
on_failure_fn=_format_failure_message,
|
|
277
|
+
)
|