minitap-mobile-use 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (74) hide show
  1. minitap/mobile_use/agents/cortex/cortex.md +19 -10
  2. minitap/mobile_use/agents/cortex/cortex.py +15 -2
  3. minitap/mobile_use/agents/cortex/types.py +2 -4
  4. minitap/mobile_use/agents/executor/executor.md +20 -15
  5. minitap/mobile_use/agents/executor/executor.py +6 -18
  6. minitap/mobile_use/agents/executor/tool_node.py +105 -0
  7. minitap/mobile_use/agents/hopper/hopper.md +2 -10
  8. minitap/mobile_use/agents/hopper/hopper.py +4 -9
  9. minitap/mobile_use/agents/orchestrator/human.md +3 -4
  10. minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
  11. minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
  12. minitap/mobile_use/agents/orchestrator/types.py +5 -8
  13. minitap/mobile_use/agents/outputter/outputter.py +1 -2
  14. minitap/mobile_use/agents/planner/planner.md +25 -15
  15. minitap/mobile_use/agents/planner/planner.py +7 -1
  16. minitap/mobile_use/agents/planner/types.py +10 -5
  17. minitap/mobile_use/agents/planner/utils.py +11 -0
  18. minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
  19. minitap/mobile_use/clients/device_hardware_client.py +3 -0
  20. minitap/mobile_use/config.py +16 -14
  21. minitap/mobile_use/constants.py +1 -0
  22. minitap/mobile_use/context.py +3 -4
  23. minitap/mobile_use/controllers/mobile_command_controller.py +37 -26
  24. minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
  25. minitap/mobile_use/graph/graph.py +10 -31
  26. minitap/mobile_use/graph/state.py +34 -14
  27. minitap/mobile_use/main.py +11 -8
  28. minitap/mobile_use/sdk/agent.py +78 -63
  29. minitap/mobile_use/sdk/builders/agent_config_builder.py +23 -11
  30. minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
  31. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
  32. minitap/mobile_use/sdk/types/agent.py +10 -5
  33. minitap/mobile_use/sdk/types/task.py +19 -18
  34. minitap/mobile_use/sdk/utils.py +1 -1
  35. minitap/mobile_use/servers/config.py +1 -2
  36. minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
  37. minitap/mobile_use/servers/start_servers.py +4 -4
  38. minitap/mobile_use/servers/stop_servers.py +12 -18
  39. minitap/mobile_use/services/llm.py +4 -2
  40. minitap/mobile_use/tools/index.py +11 -7
  41. minitap/mobile_use/tools/mobile/back.py +8 -12
  42. minitap/mobile_use/tools/mobile/clear_text.py +277 -0
  43. minitap/mobile_use/tools/mobile/copy_text_from.py +8 -12
  44. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  45. minitap/mobile_use/tools/mobile/find_packages.py +69 -0
  46. minitap/mobile_use/tools/mobile/input_text.py +55 -32
  47. minitap/mobile_use/tools/mobile/launch_app.py +8 -12
  48. minitap/mobile_use/tools/mobile/long_press_on.py +9 -13
  49. minitap/mobile_use/tools/mobile/open_link.py +8 -12
  50. minitap/mobile_use/tools/mobile/paste_text.py +8 -12
  51. minitap/mobile_use/tools/mobile/press_key.py +8 -12
  52. minitap/mobile_use/tools/mobile/stop_app.py +9 -13
  53. minitap/mobile_use/tools/mobile/swipe.py +8 -12
  54. minitap/mobile_use/tools/mobile/take_screenshot.py +8 -12
  55. minitap/mobile_use/tools/mobile/tap.py +9 -13
  56. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +9 -13
  57. minitap/mobile_use/tools/tool_wrapper.py +1 -23
  58. minitap/mobile_use/tools/utils.py +86 -0
  59. minitap/mobile_use/utils/cli_helpers.py +1 -2
  60. minitap/mobile_use/utils/cli_selection.py +5 -6
  61. minitap/mobile_use/utils/decorators.py +21 -20
  62. minitap/mobile_use/utils/logger.py +3 -4
  63. minitap/mobile_use/utils/media.py +1 -1
  64. minitap/mobile_use/utils/recorder.py +11 -10
  65. minitap/mobile_use/utils/ui_hierarchy.py +98 -3
  66. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/METADATA +12 -2
  67. minitap_mobile_use-2.1.0.dist-info/RECORD +96 -0
  68. minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
  69. minitap/mobile_use/tools/mobile/erase_text.py +0 -124
  70. minitap/mobile_use/tools/mobile/list_packages.py +0 -78
  71. minitap/mobile_use/tools/mobile/run_flow.py +0 -57
  72. minitap_mobile_use-2.0.0.dist-info/RECORD +0 -95
  73. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/WHEEL +0 -0
  74. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,8 @@ Task-related type definitions for the Mobile-use SDK.
5
5
  from datetime import datetime
6
6
  from enum import Enum
7
7
  from pathlib import Path
8
- from typing import Any, Generic, Optional, Type, TypeVar, overload
8
+ from typing import Any, TypeVar, overload
9
+
9
10
  from pydantic import BaseModel, Field
10
11
 
11
12
  from minitap.mobile_use.config import LLMConfig, get_default_llm_config
@@ -36,8 +37,8 @@ class AgentProfile(BaseModel):
36
37
  self,
37
38
  *,
38
39
  name: str,
39
- llm_config: Optional[LLMConfig] = None,
40
- from_file: Optional[str] = None,
40
+ llm_config: LLMConfig | None = None,
41
+ from_file: str | None = None,
41
42
  **kwargs,
42
43
  ):
43
44
  kwargs["name"] = name
@@ -64,7 +65,7 @@ class TaskStatus(str, Enum):
64
65
 
65
66
 
66
67
  T = TypeVar("T", bound=BaseModel)
67
- TOutput = TypeVar("TOutput", bound=Optional[BaseModel])
68
+ TOutput = TypeVar("TOutput", bound=BaseModel | None)
68
69
 
69
70
 
70
71
  class TaskRequestCommon(BaseModel):
@@ -75,11 +76,11 @@ class TaskRequestCommon(BaseModel):
75
76
  max_steps: int = RECURSION_LIMIT
76
77
  record_trace: bool = False
77
78
  trace_path: Path = Path("mobile-use-traces")
78
- llm_output_path: Optional[Path] = None
79
- thoughts_output_path: Optional[Path] = None
79
+ llm_output_path: Path | None = None
80
+ thoughts_output_path: Path | None = None
80
81
 
81
82
 
82
- class TaskRequest(TaskRequestCommon, Generic[TOutput]):
83
+ class TaskRequest[TOutput](TaskRequestCommon):
83
84
  """
84
85
  Defines the format of a mobile automation task request.
85
86
 
@@ -98,10 +99,10 @@ class TaskRequest(TaskRequestCommon, Generic[TOutput]):
98
99
  """
99
100
 
100
101
  goal: str
101
- profile: Optional[str] = None
102
- task_name: Optional[str] = None
103
- output_description: Optional[str] = None
104
- output_format: Optional[type[TOutput]] = None
102
+ profile: str | None = None
103
+ task_name: str | None = None
104
+ output_description: str | None = None
105
+ output_format: type[TOutput] | None = None
105
106
 
106
107
 
107
108
  class TaskResult(BaseModel):
@@ -116,11 +117,11 @@ class TaskResult(BaseModel):
116
117
  """
117
118
 
118
119
  content: Any = None
119
- error: Optional[str] = None
120
+ error: str | None = None
120
121
  execution_time_seconds: float
121
122
  steps_taken: int
122
123
 
123
- def get_as_model(self, model_class: Type[T]) -> T:
124
+ def get_as_model(self, model_class: type[T]) -> T:
124
125
  """
125
126
  Parse the content into a Pydantic model instance.
126
127
 
@@ -158,14 +159,14 @@ class Task(BaseModel):
158
159
  status: TaskStatus
159
160
  request: TaskRequest
160
161
  created_at: datetime
161
- ended_at: Optional[datetime] = None
162
- result: Optional[TaskResult] = None
162
+ ended_at: datetime | None = None
163
+ result: TaskResult | None = None
163
164
 
164
165
  def finalize(
165
166
  self,
166
- content: Optional[Any] = None,
167
- state: Optional[dict] = None,
168
- error: Optional[str] = None,
167
+ content: Any | None = None,
168
+ state: dict | None = None,
169
+ error: str | None = None,
169
170
  cancelled: bool = False,
170
171
  ):
171
172
  self.status = TaskStatus.COMPLETED if error is None else TaskStatus.FAILED
@@ -16,7 +16,7 @@ def load_llm_config_override(path: Path) -> LLMConfig:
16
16
  override_config_dict = {}
17
17
  if os.path.exists(path):
18
18
  logger.info(f"Loading custom LLM config from {path.resolve()}...")
19
- with open(path, "r") as f:
19
+ with open(path) as f:
20
20
  override_config_dict = load_jsonc(f)
21
21
  else:
22
22
  logger.warning("Custom LLM config not found - using the default config")
@@ -1,4 +1,3 @@
1
- from typing import Optional
2
1
  from dotenv import load_dotenv
3
2
  from minitap.mobile_use.servers.device_hardware_bridge import DEVICE_HARDWARE_BRIDGE_PORT
4
3
  from minitap.mobile_use.utils.logger import get_logger
@@ -11,7 +10,7 @@ logger = get_logger(__name__)
11
10
  class ServerSettings(BaseSettings):
12
11
  DEVICE_HARDWARE_BRIDGE_BASE_URL: str = f"http://localhost:{DEVICE_HARDWARE_BRIDGE_PORT}"
13
12
  DEVICE_SCREEN_API_PORT: int = 9998
14
- ADB_HOST: Optional[str] = None
13
+ ADB_HOST: str | None = None
15
14
 
16
15
  model_config = {"env_file": ".env", "extra": "ignore"}
17
16
 
@@ -4,7 +4,6 @@ import subprocess
4
4
  import threading
5
5
  import time
6
6
  from enum import Enum
7
- from typing import Optional
8
7
 
9
8
  import requests
10
9
  from minitap.mobile_use.context import DevicePlatform
@@ -24,7 +23,7 @@ class BridgeStatus(Enum):
24
23
 
25
24
 
26
25
  class DeviceHardwareBridge:
27
- def __init__(self, device_id: str, platform: DevicePlatform, adb_host: Optional[str] = None):
26
+ def __init__(self, device_id: str, platform: DevicePlatform, adb_host: str | None = None):
28
27
  self.process = None
29
28
  self.status = BridgeStatus.STOPPED
30
29
  self.thread = None
@@ -32,7 +31,7 @@ class DeviceHardwareBridge:
32
31
  self.lock = threading.Lock()
33
32
  self.device_id: str = device_id
34
33
  self.platform: DevicePlatform = platform
35
- self.adb_host: Optional[str] = adb_host
34
+ self.adb_host: str | None = adb_host
36
35
 
37
36
  def _run_maestro_studio(self):
38
37
  try:
@@ -207,6 +206,6 @@ class DeviceHardwareBridge:
207
206
  with self.lock:
208
207
  return {"status": self.status.value, "output": self.output[-10:]}
209
208
 
210
- def get_device_id(self) -> Optional[str]:
209
+ def get_device_id(self) -> str | None:
211
210
  with self.lock:
212
211
  return self.device_id
@@ -4,7 +4,7 @@ import signal
4
4
  import sys
5
5
  import time
6
6
  from enum import Enum
7
- from typing import Annotated, Optional
7
+ from typing import Annotated
8
8
 
9
9
  import requests
10
10
  import typer
@@ -22,7 +22,7 @@ bridge_instance = None
22
22
  shutdown_requested = False
23
23
 
24
24
 
25
- def check_device_screen_api_health(base_url: Optional[str] = None, max_retries=30, delay=1):
25
+ def check_device_screen_api_health(base_url: str | None = None, max_retries=30, delay=1):
26
26
  base_url = base_url or f"http://localhost:{server_settings.DEVICE_SCREEN_API_PORT}"
27
27
  health_url = f"{base_url}/health"
28
28
 
@@ -49,7 +49,7 @@ def check_device_screen_api_health(base_url: Optional[str] = None, max_retries=3
49
49
  return False
50
50
 
51
51
 
52
- def _start_device_screen_api_process() -> Optional[multiprocessing.Process]:
52
+ def _start_device_screen_api_process() -> multiprocessing.Process | None:
53
53
  try:
54
54
  process = multiprocessing.Process(target=start_device_screen_api, daemon=True)
55
55
  process.start()
@@ -61,7 +61,7 @@ def _start_device_screen_api_process() -> Optional[multiprocessing.Process]:
61
61
 
62
62
  def start_device_hardware_bridge(
63
63
  device_id: str, platform: DevicePlatform
64
- ) -> Optional[DeviceHardwareBridge]:
64
+ ) -> DeviceHardwareBridge | None:
65
65
  logger.info("Starting Device Hardware Bridge...")
66
66
 
67
67
  try:
@@ -1,9 +1,9 @@
1
1
  import sys
2
2
  import time
3
- from typing import List
4
3
 
5
4
  import psutil
6
5
  import requests
6
+
7
7
  from minitap.mobile_use.servers.config import server_settings
8
8
  from minitap.mobile_use.servers.device_hardware_bridge import DEVICE_HARDWARE_BRIDGE_PORT
9
9
  from minitap.mobile_use.utils.logger import get_server_logger
@@ -11,7 +11,7 @@ from minitap.mobile_use.utils.logger import get_server_logger
11
11
  logger = get_server_logger()
12
12
 
13
13
 
14
- def find_processes_by_name(name: str) -> List[psutil.Process]:
14
+ def find_processes_by_name(name: str) -> list[psutil.Process]:
15
15
  """Find all processes with the given name."""
16
16
  processes = []
17
17
  for proc in psutil.process_iter(["pid", "name", "cmdline"]):
@@ -25,7 +25,7 @@ def find_processes_by_name(name: str) -> List[psutil.Process]:
25
25
  return processes
26
26
 
27
27
 
28
- def find_processes_by_port(port: int) -> List[psutil.Process]:
28
+ def find_processes_by_port(port: int) -> list[psutil.Process]:
29
29
  processes = []
30
30
  for proc in psutil.process_iter(["pid", "name"]):
31
31
  try:
@@ -44,27 +44,23 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
44
44
  logger.success(f"Process {process.pid} ({process.name()}) already terminated")
45
45
  return True
46
46
 
47
- logger.info(f"Stopping process {process.pid} ({process.name()})")
47
+ logger.debug(f"Stopping process {process.pid} ({process.name()})")
48
48
 
49
49
  process.terminate()
50
50
 
51
51
  try:
52
52
  process.wait(timeout=timeout)
53
- logger.success(f"Process {process.pid} terminated gracefully")
54
53
  return True
55
54
  except psutil.TimeoutExpired:
56
55
  logger.warning(f"Process {process.pid} didn't terminate gracefully, force killing...")
57
56
  try:
58
57
  process.kill()
59
58
  process.wait(timeout=2)
60
- logger.success(f"Process {process.pid} force killed")
61
59
  return True
62
60
  except psutil.NoSuchProcess:
63
- logger.success(f"Process {process.pid} already terminated during force kill")
64
61
  return True
65
62
 
66
63
  except psutil.NoSuchProcess:
67
- logger.success(f"Process {process.pid} no longer exists (already terminated)")
68
64
  return True
69
65
  except (psutil.AccessDenied, psutil.ZombieProcess) as e:
70
66
  logger.warning(f"Cannot stop process {process.pid}: {e}")
@@ -84,7 +80,7 @@ def check_service_health(port: int, service_name: str) -> bool:
84
80
  return False
85
81
 
86
82
  if response.status_code == 200:
87
- logger.warning(f"{service_name} is still responding on port {port}")
83
+ logger.debug(f"{service_name} is still responding on port {port}")
88
84
  return True
89
85
  except requests.exceptions.RequestException:
90
86
  pass
@@ -167,19 +163,15 @@ def stop_device_hardware_bridge() -> bool:
167
163
 
168
164
 
169
165
  def stop_servers(
170
- device_screen_api: bool = False, device_hardware_bridge: bool = False
166
+ should_stop_screen_api: bool = False, should_stop_hw_bridge: bool = False
171
167
  ) -> tuple[bool, bool]:
172
- """Stop all servers and return (api_success, bridge_success).
173
-
174
- Args:
175
- device_screen_api: If True, stop the Device Screen API
176
- device_hardware_bridge: If True, stop the Device Hardware Bridge
168
+ """Stop the servers and return whether they stopped successfully (api_success, bridge_success).
177
169
 
178
170
  Returns:
179
171
  Tuple of (api_stopped, bridge_stopped) booleans
180
172
  """
181
- api_success = stop_device_screen_api() if device_screen_api else True
182
- bridge_success = stop_device_hardware_bridge() if device_hardware_bridge else True
173
+ api_success = stop_device_screen_api() if should_stop_screen_api else True
174
+ bridge_success = stop_device_hardware_bridge() if should_stop_hw_bridge else True
183
175
 
184
176
  if api_success and bridge_success:
185
177
  logger.success("All servers stopped successfully")
@@ -195,7 +187,9 @@ def stop_servers(
195
187
 
196
188
  def main():
197
189
  """Main function to stop all servers."""
198
- api_success, bridge_success = stop_servers(device_screen_api=True, device_hardware_bridge=True)
190
+ api_success, bridge_success = stop_servers(
191
+ should_stop_screen_api=True, should_stop_hw_bridge=True
192
+ )
199
193
  if api_success and bridge_success:
200
194
  return 0
201
195
  elif api_success or bridge_success:
@@ -1,6 +1,7 @@
1
1
  import logging
2
- from typing import Awaitable, Callable, Literal, TypeVar
3
- from typing_extensions import overload
2
+ from typing import Literal, TypeVar
3
+ from collections.abc import Awaitable, Callable
4
+ from typing import overload
4
5
 
5
6
  from langchain_google_genai import ChatGoogleGenerativeAI
6
7
  from langchain_openai import ChatOpenAI
@@ -39,6 +40,7 @@ def get_openai_llm(
39
40
  client = ChatOpenAI(
40
41
  model=model_name,
41
42
  api_key=settings.OPENAI_API_KEY,
43
+ base_url=settings.OPENAI_BASE_URL,
42
44
  temperature=temperature,
43
45
  )
44
46
  return client
@@ -1,17 +1,17 @@
1
1
  from langchain_core.tools import BaseTool
2
+
2
3
  from minitap.mobile_use.context import MobileUseContext
3
4
  from minitap.mobile_use.tools.mobile.back import back_wrapper
5
+ from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
4
6
  from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
5
- from minitap.mobile_use.tools.mobile.erase_text import erase_text_wrapper
7
+ from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
8
+ from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
6
9
  from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
7
10
  from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
8
- from minitap.mobile_use.tools.mobile.list_packages import list_packages_wrapper
9
11
  from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
10
12
  from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
11
13
  from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
12
14
  from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
13
-
14
- # from minitap.mobile_use.tools.mobile.run_flow import run_flow_wrapper
15
15
  from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
16
16
  from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
17
17
  from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
@@ -28,14 +28,14 @@ EXECUTOR_WRAPPERS_TOOLS = [
28
28
  long_press_on_wrapper,
29
29
  swipe_wrapper,
30
30
  take_screenshot_wrapper,
31
- # run_flow_wrapper, # To decomment when subflow is implemented
32
31
  copy_text_from_wrapper,
33
32
  input_text_wrapper,
34
- list_packages_wrapper,
33
+ erase_one_char_wrapper,
34
+ find_packages_wrapper,
35
35
  launch_app_wrapper,
36
36
  stop_app_wrapper,
37
37
  paste_text_wrapper,
38
- erase_text_wrapper,
38
+ clear_text_wrapper,
39
39
  press_key_wrapper,
40
40
  wait_for_animation_to_end_wrapper,
41
41
  ]
@@ -46,6 +46,10 @@ def get_tools_from_wrappers(ctx: MobileUseContext, wrappers: list[ToolWrapper])
46
46
  return [wrapper.tool_fn_getter(ctx) for wrapper in wrappers]
47
47
 
48
48
 
49
+ def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
50
+ return "\n".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
51
+
52
+
49
53
  def get_tool_wrapper_from_name(name: str) -> ToolWrapper | None:
50
54
  """Get the tool wrapper from the name."""
51
55
  for wrapper in EXECUTOR_WRAPPERS_TOOLS:
@@ -1,12 +1,11 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.types import Command
5
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
6
  from minitap.mobile_use.controllers.mobile_command_controller import back as back_controller
8
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
9
- from typing_extensions import Annotated
7
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
8
+ from typing import Annotated
10
9
  from minitap.mobile_use.context import MobileUseContext
11
10
  from minitap.mobile_use.graph.state import State
12
11
  from langgraph.prebuilt import InjectedState
@@ -18,7 +17,6 @@ def get_back_tool(ctx: MobileUseContext):
18
17
  tool_call_id: Annotated[str, InjectedToolCallId],
19
18
  state: Annotated[State, InjectedState],
20
19
  agent_thought: str,
21
- executor_metadata: Optional[ExecutorMetadata],
22
20
  ):
23
21
  """Navigates to the previous screen. (Only works on Android for the moment)"""
24
22
  output = back_controller(ctx=ctx)
@@ -27,18 +25,16 @@ def get_back_tool(ctx: MobileUseContext):
27
25
  tool_call_id=tool_call_id,
28
26
  content=back_wrapper.on_failure_fn() if has_failed else back_wrapper.on_success_fn(),
29
27
  additional_kwargs={"error": output} if has_failed else {},
28
+ status="error" if has_failed else "success",
30
29
  )
31
30
  return Command(
32
- update=back_wrapper.handle_executor_state_fields(
31
+ update=state.sanitize_update(
33
32
  ctx=ctx,
34
- state=state,
35
- executor_metadata=executor_metadata,
36
- tool_message=tool_message,
37
- is_failure=has_failed,
38
- updates={
33
+ update={
39
34
  "agents_thoughts": [agent_thought],
40
- "messages": [tool_message],
35
+ EXECUTOR_MESSAGES_KEY: [tool_message],
41
36
  },
37
+ agent="executor",
42
38
  ),
43
39
  )
44
40
 
@@ -0,0 +1,277 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+ from pydantic import BaseModel
9
+
10
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
+ from minitap.mobile_use.context import MobileUseContext
12
+ from minitap.mobile_use.controllers.mobile_command_controller import (
13
+ erase_text as erase_text_controller,
14
+ )
15
+ from minitap.mobile_use.controllers.mobile_command_controller import (
16
+ get_screen_data,
17
+ )
18
+ from minitap.mobile_use.graph.state import State
19
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
20
+ from minitap.mobile_use.tools.utils import (
21
+ focus_element_if_needed,
22
+ move_cursor_to_end_if_bounds,
23
+ )
24
+ from minitap.mobile_use.utils.logger import get_logger
25
+ from minitap.mobile_use.utils.ui_hierarchy import (
26
+ find_element_by_resource_id,
27
+ get_element_text,
28
+ text_input_is_empty,
29
+ )
30
+
31
+ logger = get_logger(__name__)
32
+
33
+ MAX_CLEAR_TRIES = 5
34
+ DEFAULT_CHARS_TO_ERASE = 50
35
+
36
+
37
+ class ClearTextResult(BaseModel):
38
+ success: bool
39
+ error_message: str | None
40
+ chars_erased: int
41
+ final_text: str | None
42
+
43
+
44
+ class TextClearer:
45
+ def __init__(self, ctx: MobileUseContext, state: State):
46
+ self.ctx = ctx
47
+ self.state = state
48
+
49
+ def _refresh_ui_hierarchy(self) -> None:
50
+ screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
51
+ self.state.latest_ui_hierarchy = screen_data.elements
52
+
53
+ def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
54
+ if not self.state.latest_ui_hierarchy:
55
+ self._refresh_ui_hierarchy()
56
+
57
+ if not self.state.latest_ui_hierarchy:
58
+ return None, None, None
59
+
60
+ element = find_element_by_resource_id(
61
+ ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
62
+ )
63
+
64
+ if not element:
65
+ return None, None, None
66
+
67
+ current_text = get_element_text(element)
68
+ hint_text = get_element_text(element, hint_text=True)
69
+
70
+ return element, current_text, hint_text
71
+
72
+ def _format_text_with_hint_info(self, text: str | None, hint_text: str | None) -> str | None:
73
+ if text is None:
74
+ return None
75
+
76
+ is_hint_text = hint_text is not None and hint_text != "" and hint_text == text
77
+
78
+ if is_hint_text:
79
+ return f"{text} (which is the hint text, the input is very likely empty)"
80
+
81
+ return text
82
+
83
+ def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
84
+ return current_text is not None and current_text != "" and current_text != hint_text
85
+
86
+ def _prepare_element_for_clearing(self, resource_id: str) -> bool:
87
+ if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
88
+ return False
89
+
90
+ move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
91
+ return True
92
+
93
+ def _erase_text_attempt(self, text_length: int) -> str | None:
94
+ chars_to_erase = text_length + 1
95
+ logger.info(f"Erasing {chars_to_erase} characters from the input")
96
+
97
+ error = erase_text_controller(ctx=self.ctx, nb_chars=chars_to_erase)
98
+ if error:
99
+ logger.error(f"Failed to erase text: {error}")
100
+ return str(error)
101
+
102
+ return None
103
+
104
+ def _clear_with_retries(
105
+ self, resource_id: str, initial_text: str, hint_text: str | None
106
+ ) -> tuple[bool, str | None, int]:
107
+ current_text = initial_text
108
+ erased_chars = 0
109
+
110
+ for attempt in range(1, MAX_CLEAR_TRIES + 1):
111
+ logger.info(f"Clear attempt {attempt}/{MAX_CLEAR_TRIES}")
112
+
113
+ chars_to_erase = len(current_text) if current_text else DEFAULT_CHARS_TO_ERASE
114
+ error = self._erase_text_attempt(text_length=chars_to_erase)
115
+
116
+ if error:
117
+ return False, current_text, 0
118
+ erased_chars += chars_to_erase
119
+
120
+ self._refresh_ui_hierarchy()
121
+ elt = find_element_by_resource_id(
122
+ ui_hierarchy=self.state.latest_ui_hierarchy or [],
123
+ resource_id=resource_id,
124
+ )
125
+ if elt:
126
+ current_text = get_element_text(elt)
127
+ logger.info(f"Current text: {current_text}")
128
+ if text_input_is_empty(text=current_text, hint_text=hint_text):
129
+ break
130
+
131
+ move_cursor_to_end_if_bounds(
132
+ ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
133
+ )
134
+
135
+ return True, current_text, erased_chars
136
+
137
+ def _create_result(
138
+ self,
139
+ success: bool,
140
+ error_message: str | None,
141
+ chars_erased: int,
142
+ final_text: str | None,
143
+ hint_text: str | None,
144
+ ) -> ClearTextResult:
145
+ formatted_final_text = self._format_text_with_hint_info(final_text, hint_text)
146
+
147
+ return ClearTextResult(
148
+ success=success,
149
+ error_message=error_message,
150
+ chars_erased=chars_erased,
151
+ final_text=formatted_final_text,
152
+ )
153
+
154
+ def _handle_no_clearing_needed(
155
+ self, current_text: str | None, hint_text: str | None
156
+ ) -> ClearTextResult:
157
+ return self._create_result(
158
+ success=True,
159
+ error_message=None,
160
+ chars_erased=-1,
161
+ final_text=current_text,
162
+ hint_text=hint_text,
163
+ )
164
+
165
+ def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
166
+ error = erase_text_controller(ctx=self.ctx)
167
+ self._refresh_ui_hierarchy()
168
+
169
+ _, final_text, _ = self._get_element_info(resource_id)
170
+
171
+ return self._create_result(
172
+ success=error is None,
173
+ error_message=str(error) if error is not None else None,
174
+ chars_erased=0, # Unknown since we don't have initial text
175
+ final_text=final_text,
176
+ hint_text=hint_text,
177
+ )
178
+
179
+ def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
180
+ element, current_text, hint_text = self._get_element_info(resource_id)
181
+
182
+ if not element:
183
+ return self._handle_element_not_found(resource_id, hint_text)
184
+
185
+ if not self._should_clear_text(current_text, hint_text):
186
+ return self._handle_no_clearing_needed(current_text, hint_text)
187
+
188
+ if not self._prepare_element_for_clearing(resource_id):
189
+ return self._create_result(
190
+ success=False,
191
+ error_message="Failed to focus element",
192
+ chars_erased=0,
193
+ final_text=current_text,
194
+ hint_text=hint_text,
195
+ )
196
+
197
+ success, final_text, chars_erased = self._clear_with_retries(
198
+ resource_id=resource_id,
199
+ initial_text=current_text or "",
200
+ hint_text=hint_text,
201
+ )
202
+
203
+ error_message = None if success else "Failed to clear text after retries"
204
+
205
+ return self._create_result(
206
+ success=success,
207
+ error_message=error_message,
208
+ chars_erased=chars_erased,
209
+ final_text=final_text,
210
+ hint_text=hint_text,
211
+ )
212
+
213
+
214
+ def get_clear_text_tool(ctx: MobileUseContext):
215
+ @tool
216
+ def clear_text(
217
+ tool_call_id: Annotated[str, InjectedToolCallId],
218
+ state: Annotated[State, InjectedState],
219
+ agent_thought: str,
220
+ text_input_resource_id: str,
221
+ ):
222
+ """
223
+ Clears all the text from the text field, by focusing it if needed.
224
+ """
225
+ clearer = TextClearer(ctx, state)
226
+ result = clearer.clear_text_by_resource_id(text_input_resource_id)
227
+
228
+ content = (
229
+ clear_text_wrapper.on_failure_fn(result.error_message)
230
+ if not result.success
231
+ else clear_text_wrapper.on_success_fn(
232
+ nb_char_erased=result.chars_erased, new_text_value=result.final_text
233
+ )
234
+ )
235
+
236
+ tool_message = ToolMessage(
237
+ tool_call_id=tool_call_id,
238
+ content=content,
239
+ additional_kwargs={"error": result.error_message} if not result.success else {},
240
+ status="error" if not result.success else "success",
241
+ )
242
+
243
+ return Command(
244
+ update=state.sanitize_update(
245
+ ctx=ctx,
246
+ update={
247
+ "agents_thoughts": [agent_thought],
248
+ EXECUTOR_MESSAGES_KEY: [tool_message],
249
+ },
250
+ agent="executor",
251
+ ),
252
+ )
253
+
254
+ return clear_text
255
+
256
+
257
+ def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
258
+ if nb_char_erased == -1:
259
+ msg = "No text clearing was needed (the input was already empty)."
260
+ else:
261
+ msg = f"Text erased successfully. {nb_char_erased} characters were erased."
262
+
263
+ if new_text_value is not None:
264
+ msg += f" New text in the input is '{new_text_value}'."
265
+
266
+ return msg
267
+
268
+
269
+ def _format_failure_message(output: str | None) -> str:
270
+ return "Failed to erase text. " + (str(output) if output else "")
271
+
272
+
273
+ clear_text_wrapper = ToolWrapper(
274
+ tool_fn_getter=get_clear_text_tool,
275
+ on_success_fn=_format_success_message,
276
+ on_failure_fn=_format_failure_message,
277
+ )