minitap-mobile-use 0.0.1.dev0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (55) hide show
  1. minitap/mobile_use/agents/cortex/cortex.md +17 -10
  2. minitap/mobile_use/agents/cortex/cortex.py +12 -2
  3. minitap/mobile_use/agents/cortex/types.py +2 -2
  4. minitap/mobile_use/agents/executor/executor.md +16 -10
  5. minitap/mobile_use/agents/executor/executor.py +6 -18
  6. minitap/mobile_use/agents/executor/tool_node.py +105 -0
  7. minitap/mobile_use/agents/hopper/hopper.md +2 -10
  8. minitap/mobile_use/agents/hopper/hopper.py +4 -9
  9. minitap/mobile_use/agents/orchestrator/human.md +3 -4
  10. minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
  11. minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
  12. minitap/mobile_use/agents/orchestrator/types.py +5 -8
  13. minitap/mobile_use/agents/planner/planner.md +14 -13
  14. minitap/mobile_use/agents/planner/planner.py +4 -1
  15. minitap/mobile_use/agents/planner/types.py +8 -2
  16. minitap/mobile_use/agents/planner/utils.py +11 -0
  17. minitap/mobile_use/clients/device_hardware_client.py +3 -0
  18. minitap/mobile_use/config.py +2 -0
  19. minitap/mobile_use/constants.py +1 -0
  20. minitap/mobile_use/controllers/mobile_command_controller.py +10 -11
  21. minitap/mobile_use/graph/graph.py +9 -31
  22. minitap/mobile_use/graph/state.py +26 -6
  23. minitap/mobile_use/main.py +6 -2
  24. minitap/mobile_use/sdk/agent.py +54 -39
  25. minitap/mobile_use/sdk/builders/agent_config_builder.py +17 -4
  26. minitap/mobile_use/sdk/types/agent.py +5 -0
  27. minitap/mobile_use/servers/stop_servers.py +10 -15
  28. minitap/mobile_use/services/llm.py +1 -0
  29. minitap/mobile_use/tools/index.py +2 -4
  30. minitap/mobile_use/tools/mobile/back.py +7 -11
  31. minitap/mobile_use/tools/mobile/copy_text_from.py +7 -11
  32. minitap/mobile_use/tools/mobile/erase_text.py +7 -9
  33. minitap/mobile_use/tools/mobile/find_packages.py +69 -0
  34. minitap/mobile_use/tools/mobile/input_text.py +131 -32
  35. minitap/mobile_use/tools/mobile/launch_app.py +7 -11
  36. minitap/mobile_use/tools/mobile/long_press_on.py +7 -9
  37. minitap/mobile_use/tools/mobile/open_link.py +7 -11
  38. minitap/mobile_use/tools/mobile/paste_text.py +7 -11
  39. minitap/mobile_use/tools/mobile/press_key.py +7 -11
  40. minitap/mobile_use/tools/mobile/stop_app.py +7 -9
  41. minitap/mobile_use/tools/mobile/swipe.py +7 -11
  42. minitap/mobile_use/tools/mobile/take_screenshot.py +7 -11
  43. minitap/mobile_use/tools/mobile/tap.py +7 -9
  44. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +7 -9
  45. minitap/mobile_use/tools/tool_wrapper.py +1 -23
  46. minitap/mobile_use/utils/recorder.py +11 -10
  47. minitap/mobile_use/utils/ui_hierarchy.py +88 -1
  48. {minitap_mobile_use-0.0.1.dev0.dist-info → minitap_mobile_use-2.0.1.dist-info}/METADATA +2 -2
  49. minitap_mobile_use-2.0.1.dist-info/RECORD +94 -0
  50. minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
  51. minitap/mobile_use/tools/mobile/list_packages.py +0 -78
  52. minitap/mobile_use/tools/mobile/run_flow.py +0 -57
  53. minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +0 -95
  54. {minitap_mobile_use-0.0.1.dev0.dist-info → minitap_mobile_use-2.0.1.dist-info}/WHEEL +0 -0
  55. {minitap_mobile_use-0.0.1.dev0.dist-info → minitap_mobile_use-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -4,6 +4,7 @@ from typing import List
4
4
 
5
5
  import psutil
6
6
  import requests
7
+
7
8
  from minitap.mobile_use.servers.config import server_settings
8
9
  from minitap.mobile_use.servers.device_hardware_bridge import DEVICE_HARDWARE_BRIDGE_PORT
9
10
  from minitap.mobile_use.utils.logger import get_server_logger
@@ -44,27 +45,23 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
44
45
  logger.success(f"Process {process.pid} ({process.name()}) already terminated")
45
46
  return True
46
47
 
47
- logger.info(f"Stopping process {process.pid} ({process.name()})")
48
+ logger.debug(f"Stopping process {process.pid} ({process.name()})")
48
49
 
49
50
  process.terminate()
50
51
 
51
52
  try:
52
53
  process.wait(timeout=timeout)
53
- logger.success(f"Process {process.pid} terminated gracefully")
54
54
  return True
55
55
  except psutil.TimeoutExpired:
56
56
  logger.warning(f"Process {process.pid} didn't terminate gracefully, force killing...")
57
57
  try:
58
58
  process.kill()
59
59
  process.wait(timeout=2)
60
- logger.success(f"Process {process.pid} force killed")
61
60
  return True
62
61
  except psutil.NoSuchProcess:
63
- logger.success(f"Process {process.pid} already terminated during force kill")
64
62
  return True
65
63
 
66
64
  except psutil.NoSuchProcess:
67
- logger.success(f"Process {process.pid} no longer exists (already terminated)")
68
65
  return True
69
66
  except (psutil.AccessDenied, psutil.ZombieProcess) as e:
70
67
  logger.warning(f"Cannot stop process {process.pid}: {e}")
@@ -84,7 +81,7 @@ def check_service_health(port: int, service_name: str) -> bool:
84
81
  return False
85
82
 
86
83
  if response.status_code == 200:
87
- logger.warning(f"{service_name} is still responding on port {port}")
84
+ logger.debug(f"{service_name} is still responding on port {port}")
88
85
  return True
89
86
  except requests.exceptions.RequestException:
90
87
  pass
@@ -167,19 +164,15 @@ def stop_device_hardware_bridge() -> bool:
167
164
 
168
165
 
169
166
  def stop_servers(
170
- device_screen_api: bool = False, device_hardware_bridge: bool = False
167
+ should_stop_screen_api: bool = False, should_stop_hw_bridge: bool = False
171
168
  ) -> tuple[bool, bool]:
172
- """Stop all servers and return (api_success, bridge_success).
173
-
174
- Args:
175
- device_screen_api: If True, stop the Device Screen API
176
- device_hardware_bridge: If True, stop the Device Hardware Bridge
169
+ """Stop the servers and return whether they stopped successfully (api_success, bridge_success).
177
170
 
178
171
  Returns:
179
172
  Tuple of (api_stopped, bridge_stopped) booleans
180
173
  """
181
- api_success = stop_device_screen_api() if device_screen_api else True
182
- bridge_success = stop_device_hardware_bridge() if device_hardware_bridge else True
174
+ api_success = stop_device_screen_api() if should_stop_screen_api else True
175
+ bridge_success = stop_device_hardware_bridge() if should_stop_hw_bridge else True
183
176
 
184
177
  if api_success and bridge_success:
185
178
  logger.success("All servers stopped successfully")
@@ -195,7 +188,9 @@ def stop_servers(
195
188
 
196
189
  def main():
197
190
  """Main function to stop all servers."""
198
- api_success, bridge_success = stop_servers(device_screen_api=True, device_hardware_bridge=True)
191
+ api_success, bridge_success = stop_servers(
192
+ should_stop_screen_api=True, should_stop_hw_bridge=True
193
+ )
199
194
  if api_success and bridge_success:
200
195
  return 0
201
196
  elif api_success or bridge_success:
@@ -39,6 +39,7 @@ def get_openai_llm(
39
39
  client = ChatOpenAI(
40
40
  model=model_name,
41
41
  api_key=settings.OPENAI_API_KEY,
42
+ base_url=settings.OPENAI_BASE_URL,
42
43
  temperature=temperature,
43
44
  )
44
45
  return client
@@ -5,13 +5,12 @@ from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrappe
5
5
  from minitap.mobile_use.tools.mobile.erase_text import erase_text_wrapper
6
6
  from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
7
7
  from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
8
- from minitap.mobile_use.tools.mobile.list_packages import list_packages_wrapper
8
+ from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
9
9
  from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
10
10
  from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
11
11
  from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
12
12
  from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
13
13
 
14
- # from minitap.mobile_use.tools.mobile.run_flow import run_flow_wrapper
15
14
  from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
16
15
  from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
17
16
  from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
@@ -28,10 +27,9 @@ EXECUTOR_WRAPPERS_TOOLS = [
28
27
  long_press_on_wrapper,
29
28
  swipe_wrapper,
30
29
  take_screenshot_wrapper,
31
- # run_flow_wrapper, # To decomment when subflow is implemented
32
30
  copy_text_from_wrapper,
33
31
  input_text_wrapper,
34
- list_packages_wrapper,
32
+ find_packages_wrapper,
35
33
  launch_app_wrapper,
36
34
  stop_app_wrapper,
37
35
  paste_text_wrapper,
@@ -1,11 +1,10 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.types import Command
5
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
6
  from minitap.mobile_use.controllers.mobile_command_controller import back as back_controller
8
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
7
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
9
8
  from typing_extensions import Annotated
10
9
  from minitap.mobile_use.context import MobileUseContext
11
10
  from minitap.mobile_use.graph.state import State
@@ -18,7 +17,6 @@ def get_back_tool(ctx: MobileUseContext):
18
17
  tool_call_id: Annotated[str, InjectedToolCallId],
19
18
  state: Annotated[State, InjectedState],
20
19
  agent_thought: str,
21
- executor_metadata: Optional[ExecutorMetadata],
22
20
  ):
23
21
  """Navigates to the previous screen. (Only works on Android for the moment)"""
24
22
  output = back_controller(ctx=ctx)
@@ -27,18 +25,16 @@ def get_back_tool(ctx: MobileUseContext):
27
25
  tool_call_id=tool_call_id,
28
26
  content=back_wrapper.on_failure_fn() if has_failed else back_wrapper.on_success_fn(),
29
27
  additional_kwargs={"error": output} if has_failed else {},
28
+ status="error" if has_failed else "success",
30
29
  )
31
30
  return Command(
32
- update=back_wrapper.handle_executor_state_fields(
31
+ update=state.sanitize_update(
33
32
  ctx=ctx,
34
- state=state,
35
- executor_metadata=executor_metadata,
36
- tool_message=tool_message,
37
- is_failure=has_failed,
38
- updates={
33
+ update={
39
34
  "agents_thoughts": [agent_thought],
40
- "messages": [tool_message],
35
+ EXECUTOR_MESSAGES_KEY: [tool_message],
41
36
  },
37
+ agent="executor",
42
38
  ),
43
39
  )
44
40
 
@@ -1,14 +1,13 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.types import Command
5
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
6
  from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
8
7
  from minitap.mobile_use.controllers.mobile_command_controller import (
9
8
  copy_text_from as copy_text_from_controller,
10
9
  )
11
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
10
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
11
  from pydantic import Field
13
12
  from typing_extensions import Annotated
14
13
  from minitap.mobile_use.context import MobileUseContext
@@ -22,7 +21,6 @@ def get_copy_text_from_tool(ctx: MobileUseContext):
22
21
  tool_call_id: Annotated[str, InjectedToolCallId],
23
22
  state: Annotated[State, InjectedState],
24
23
  agent_thought: str,
25
- executor_metadata: Optional[ExecutorMetadata],
26
24
  selector_request: SelectorRequest = Field(
27
25
  ..., description="The selector to copy text from"
28
26
  ),
@@ -50,18 +48,16 @@ def get_copy_text_from_tool(ctx: MobileUseContext):
50
48
  if has_failed
51
49
  else copy_text_from_wrapper.on_success_fn(selector_request),
52
50
  additional_kwargs={"error": output} if has_failed else {},
51
+ status="error" if has_failed else "success",
53
52
  )
54
53
  return Command(
55
- update=copy_text_from_wrapper.handle_executor_state_fields(
54
+ update=state.sanitize_update(
56
55
  ctx=ctx,
57
- state=state,
58
- executor_metadata=executor_metadata,
59
- tool_message=tool_message,
60
- is_failure=has_failed,
61
- updates={
56
+ update={
62
57
  "agents_thoughts": [agent_thought],
63
- "messages": [tool_message],
58
+ EXECUTOR_MESSAGES_KEY: [tool_message],
64
59
  },
60
+ agent="executor",
65
61
  ),
66
62
  )
67
63
 
@@ -7,6 +7,7 @@ from langgraph.prebuilt import InjectedState
7
7
  from langgraph.types import Command
8
8
  from typing_extensions import Annotated
9
9
 
10
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
10
11
  from minitap.mobile_use.controllers.mobile_command_controller import (
11
12
  ScreenDataResponse,
12
13
  WaitTimeout,
@@ -17,7 +18,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
17
18
  erase_text as erase_text_controller,
18
19
  )
19
20
  from minitap.mobile_use.graph.state import State
20
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
21
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
21
22
  from minitap.mobile_use.utils.ui_hierarchy import find_element_by_resource_id
22
23
  from minitap.mobile_use.context import MobileUseContext
23
24
 
@@ -29,7 +30,6 @@ def get_erase_text_tool(ctx: MobileUseContext):
29
30
  state: Annotated[State, InjectedState],
30
31
  agent_thought: str,
31
32
  input_text_resource_id: str,
32
- executor_metadata: Optional[ExecutorMetadata],
33
33
  nb_chars: Optional[int] = None,
34
34
  ):
35
35
  """
@@ -87,19 +87,17 @@ def get_erase_text_tool(ctx: MobileUseContext):
87
87
  nb_char_erased=nb_char_erased, new_text_value=new_text_value
88
88
  ),
89
89
  additional_kwargs={"error": output} if has_failed else {},
90
+ status="error" if has_failed else "success",
90
91
  )
91
92
 
92
93
  return Command(
93
- update=erase_text_wrapper.handle_executor_state_fields(
94
+ update=state.sanitize_update(
94
95
  ctx=ctx,
95
- state=state,
96
- executor_metadata=executor_metadata,
97
- tool_message=tool_message,
98
- is_failure=has_failed,
99
- updates={
96
+ update={
100
97
  "agents_thoughts": [agent_thought],
101
- "messages": [tool_message],
98
+ EXECUTOR_MESSAGES_KEY: [tool_message],
102
99
  },
100
+ agent="executor",
103
101
  ),
104
102
  )
105
103
 
@@ -0,0 +1,69 @@
1
+ from langchain_core.messages import ToolMessage
2
+ from langchain_core.tools import tool
3
+ from langchain_core.tools.base import InjectedToolCallId
4
+ from langgraph.prebuilt import InjectedState
5
+ from langgraph.types import Command
6
+ from minitap.mobile_use.agents.hopper.hopper import HopperOutput, hopper
7
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
8
+ from minitap.mobile_use.context import MobileUseContext
9
+ from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
10
+ from minitap.mobile_use.graph.state import State
11
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
+ from typing_extensions import Annotated
13
+
14
+
15
+ def get_find_packages_tool(ctx: MobileUseContext):
16
+ @tool
17
+ async def find_packages(
18
+ tool_call_id: Annotated[str, InjectedToolCallId],
19
+ state: Annotated[State, InjectedState],
20
+ appNames: list[str],
21
+ agent_thought: str,
22
+ ) -> Command:
23
+ """
24
+ Finds relevant applications.
25
+ Outputs the full package names list (android) or bundle ids list (IOS).
26
+ """
27
+ output: str = list_packages(ctx=ctx)
28
+
29
+ try:
30
+ hopper_output: HopperOutput = await hopper(
31
+ ctx=ctx,
32
+ request=f"I'm looking for the package names of the following apps: {appNames}",
33
+ data=output,
34
+ )
35
+ tool_message = ToolMessage(
36
+ tool_call_id=tool_call_id,
37
+ content=find_packages_wrapper.on_success_fn(
38
+ hopper_output.step, hopper_output.output
39
+ ),
40
+ status="success",
41
+ )
42
+ except Exception as e:
43
+ print("Failed to extract insights from data: " + str(e))
44
+ tool_message = ToolMessage(
45
+ tool_call_id=tool_call_id,
46
+ content=find_packages_wrapper.on_failure_fn(),
47
+ additional_kwargs={"output": output},
48
+ status="error",
49
+ )
50
+
51
+ return Command(
52
+ update=state.sanitize_update(
53
+ ctx=ctx,
54
+ update={
55
+ "agents_thoughts": [agent_thought, tool_message.content],
56
+ EXECUTOR_MESSAGES_KEY: [tool_message],
57
+ },
58
+ agent="executor",
59
+ ),
60
+ )
61
+
62
+ return find_packages
63
+
64
+
65
+ find_packages_wrapper = ToolWrapper(
66
+ tool_fn_getter=get_find_packages_tool,
67
+ on_success_fn=lambda thought, output: f"Packages found successfully ({thought}): {output}",
68
+ on_failure_fn=lambda: "Failed to find packages.",
69
+ )
@@ -1,17 +1,119 @@
1
- from typing import Optional
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
2
4
 
3
5
  from langchain_core.messages import ToolMessage
4
6
  from langchain_core.tools import tool
5
7
  from langchain_core.tools.base import InjectedToolCallId
8
+ from langgraph.prebuilt import InjectedState
6
9
  from langgraph.types import Command
10
+ from pydantic import BaseModel
11
+ from typing_extensions import Annotated
12
+
13
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
14
+ from minitap.mobile_use.context import MobileUseContext
15
+ from minitap.mobile_use.controllers.mobile_command_controller import (
16
+ CoordinatesSelectorRequest,
17
+ IdSelectorRequest,
18
+ SelectorRequestWithCoordinates,
19
+ tap,
20
+ )
7
21
  from minitap.mobile_use.controllers.mobile_command_controller import (
8
22
  input_text as input_text_controller,
9
23
  )
10
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
11
- from typing_extensions import Annotated
12
24
  from minitap.mobile_use.graph.state import State
13
- from langgraph.prebuilt import InjectedState
14
- from minitap.mobile_use.context import MobileUseContext
25
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
26
+ from minitap.mobile_use.utils.logger import get_logger
27
+ from minitap.mobile_use.utils.ui_hierarchy import (
28
+ Point,
29
+ find_element_by_resource_id,
30
+ get_bounds_for_element,
31
+ is_element_focused,
32
+ )
33
+
34
+ logger = get_logger(__name__)
35
+
36
+
37
+ class InputResult(BaseModel):
38
+ """Result of an input operation from the controller layer."""
39
+
40
+ ok: bool
41
+ error: str | None = None
42
+
43
+
44
+ def _focus_element_if_needed(
45
+ ctx: MobileUseContext,
46
+ state: State,
47
+ resource_id: str,
48
+ ) -> bool:
49
+ """
50
+ Ensures the element identified by `resource_id` is focused.
51
+ """
52
+ rich_hierarchy: list[dict] = ctx.hw_bridge_client.get_rich_hierarchy()
53
+ rich_elt = find_element_by_resource_id(
54
+ ui_hierarchy=rich_hierarchy,
55
+ resource_id=resource_id,
56
+ is_rich_hierarchy=True,
57
+ )
58
+ if rich_elt and not is_element_focused(rich_elt):
59
+ tap(ctx=ctx, selector_request=IdSelectorRequest(id=resource_id))
60
+ logger.debug(f"Focused (tap) on resource_id={resource_id}")
61
+ rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
62
+ rich_elt = find_element_by_resource_id(
63
+ ui_hierarchy=rich_hierarchy,
64
+ resource_id=resource_id,
65
+ is_rich_hierarchy=True,
66
+ )
67
+ if rich_elt and is_element_focused(rich_elt):
68
+ logger.debug(f"Text input is focused: {resource_id}")
69
+ return True
70
+
71
+ logger.warning(f"Failed to focus resource_id={resource_id}")
72
+ return False
73
+
74
+
75
+ def _move_cursor_to_end_if_bounds(
76
+ ctx: MobileUseContext,
77
+ state: State,
78
+ resource_id: str,
79
+ ) -> None:
80
+ """
81
+ Best-effort move of the text cursor near the end of the input by tapping the
82
+ bottom-right area of the focused element (if bounds are available).
83
+ """
84
+ elt = find_element_by_resource_id(
85
+ ui_hierarchy=state.latest_ui_hierarchy or [],
86
+ resource_id=resource_id,
87
+ )
88
+ if not elt:
89
+ return
90
+
91
+ bounds = get_bounds_for_element(elt)
92
+ if not bounds:
93
+ return
94
+
95
+ logger.debug("Tapping near the end of the input to move the cursor")
96
+ bottom_right: Point = bounds.get_relative_point(x_percent=0.99, y_percent=0.99)
97
+ tap(
98
+ ctx=ctx,
99
+ selector_request=SelectorRequestWithCoordinates(
100
+ coordinates=CoordinatesSelectorRequest(
101
+ x=bottom_right.x,
102
+ y=bottom_right.y,
103
+ ),
104
+ ),
105
+ )
106
+ logger.debug(f"Tapped end of input {resource_id} at ({bottom_right.x}, {bottom_right.y})")
107
+
108
+
109
+ def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
110
+ """
111
+ Thin wrapper to normalize the controller result.
112
+ """
113
+ controller_out = input_text_controller(ctx=ctx, text=text)
114
+ if controller_out is None:
115
+ return InputResult(ok=True)
116
+ return InputResult(ok=False, error=str(controller_out))
15
117
 
16
118
 
17
119
  def get_input_text_tool(ctx: MobileUseContext):
@@ -20,47 +122,44 @@ def get_input_text_tool(ctx: MobileUseContext):
20
122
  tool_call_id: Annotated[str, InjectedToolCallId],
21
123
  state: Annotated[State, InjectedState],
22
124
  agent_thought: str,
23
- executor_metadata: Optional[ExecutorMetadata],
24
125
  text: str,
126
+ text_input_resource_id: str,
25
127
  ):
26
128
  """
27
- Inputs the specified text into the UI (works even if no field is focused).
129
+ Focus a text field and type text into it.
28
130
 
29
- Example:
30
- - inputText: "Hello World"
131
+ - Ensure the corresponding element is focused (tap if necessary).
132
+ - If bounds are available, tap near the end to place the cursor at the end.
133
+ - Type the provided `text` using the controller.
134
+ """
135
+ focused = _focus_element_if_needed(ctx=ctx, state=state, resource_id=text_input_resource_id)
136
+ if focused:
137
+ _move_cursor_to_end_if_bounds(ctx=ctx, state=state, resource_id=text_input_resource_id)
31
138
 
32
- Notes:
33
- - Unicode not supported on Android.
139
+ result = _controller_input_text(ctx=ctx, text=text)
34
140
 
35
- Random Input Options:
36
- - inputRandomEmail
37
- - inputRandomPersonName
38
- - inputRandomNumber (with optional 'length', default 8)
39
- - inputRandomText (with optional 'length', default 8)
141
+ status: Literal["success", "error"] = "success" if result.ok else "error"
142
+ content_msg = (
143
+ input_text_wrapper.on_success_fn(text)
144
+ if result.ok
145
+ else input_text_wrapper.on_failure_fn(text)
146
+ )
40
147
 
41
- Tip:
42
- Use `copyTextFrom` to reuse generated inputs in later steps.
43
- """
44
- output = input_text_controller(ctx=ctx, text=text)
45
- has_failed = output is not None
46
148
  tool_message = ToolMessage(
47
149
  tool_call_id=tool_call_id,
48
- content=input_text_wrapper.on_failure_fn(text)
49
- if has_failed
50
- else input_text_wrapper.on_success_fn(text),
51
- additional_kwargs={"error": output} if has_failed else {},
150
+ content=content_msg,
151
+ additional_kwargs={"error": result.error} if not result.ok else {},
152
+ status=status,
52
153
  )
154
+
53
155
  return Command(
54
- update=input_text_wrapper.handle_executor_state_fields(
156
+ update=state.sanitize_update(
55
157
  ctx=ctx,
56
- state=state,
57
- executor_metadata=executor_metadata,
58
- tool_message=tool_message,
59
- is_failure=has_failed,
60
- updates={
158
+ update={
61
159
  "agents_thoughts": [agent_thought],
62
- "messages": [tool_message],
160
+ EXECUTOR_MESSAGES_KEY: [tool_message],
63
161
  },
162
+ agent="executor",
64
163
  ),
65
164
  )
66
165
 
@@ -1,13 +1,12 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.types import Command
5
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
6
  from minitap.mobile_use.controllers.mobile_command_controller import (
8
7
  launch_app as launch_app_controller,
9
8
  )
10
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
9
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
11
10
  from typing_extensions import Annotated
12
11
  from minitap.mobile_use.context import MobileUseContext
13
12
  from minitap.mobile_use.graph.state import State
@@ -20,7 +19,6 @@ def get_launch_app_tool(ctx: MobileUseContext):
20
19
  tool_call_id: Annotated[str, InjectedToolCallId],
21
20
  state: Annotated[State, InjectedState],
22
21
  agent_thought: str,
23
- executor_metadata: Optional[ExecutorMetadata],
24
22
  package_name: str,
25
23
  ):
26
24
  """
@@ -34,18 +32,16 @@ def get_launch_app_tool(ctx: MobileUseContext):
34
32
  if has_failed
35
33
  else launch_app_wrapper.on_success_fn(package_name),
36
34
  additional_kwargs={"error": output} if has_failed else {},
35
+ status="error" if has_failed else "success",
37
36
  )
38
37
  return Command(
39
- update=launch_app_wrapper.handle_executor_state_fields(
38
+ update=state.sanitize_update(
40
39
  ctx=ctx,
41
- state=state,
42
- executor_metadata=executor_metadata,
43
- tool_message=tool_message,
44
- is_failure=has_failed,
45
- updates={
40
+ update={
46
41
  "agents_thoughts": [agent_thought],
47
- "messages": [tool_message],
42
+ EXECUTOR_MESSAGES_KEY: [tool_message],
48
43
  },
44
+ agent="executor",
49
45
  ),
50
46
  )
51
47
 
@@ -5,13 +5,14 @@ from langchain_core.tools import tool
5
5
  from langchain_core.tools.base import InjectedToolCallId
6
6
  from langgraph.prebuilt import InjectedState
7
7
  from langgraph.types import Command
8
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
8
9
  from minitap.mobile_use.context import MobileUseContext
9
10
  from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
10
11
  from minitap.mobile_use.controllers.mobile_command_controller import (
11
12
  long_press_on as long_press_on_controller,
12
13
  )
13
14
  from minitap.mobile_use.graph.state import State
14
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
15
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
15
16
  from typing_extensions import Annotated
16
17
 
17
18
 
@@ -21,7 +22,6 @@ def get_long_press_on_tool(ctx: MobileUseContext):
21
22
  tool_call_id: Annotated[str, InjectedToolCallId],
22
23
  state: Annotated[State, InjectedState],
23
24
  agent_thought: str,
24
- executor_metadata: Optional[ExecutorMetadata],
25
25
  selector_request: SelectorRequest,
26
26
  index: Optional[int] = None,
27
27
  ):
@@ -37,18 +37,16 @@ def get_long_press_on_tool(ctx: MobileUseContext):
37
37
  if has_failed
38
38
  else long_press_on_wrapper.on_success_fn(),
39
39
  additional_kwargs={"error": output} if has_failed else {},
40
+ status="error" if has_failed else "success",
40
41
  )
41
42
  return Command(
42
- update=long_press_on_wrapper.handle_executor_state_fields(
43
+ update=state.sanitize_update(
43
44
  ctx=ctx,
44
- state=state,
45
- executor_metadata=executor_metadata,
46
- tool_message=tool_message,
47
- is_failure=has_failed,
48
- updates={
45
+ update={
49
46
  "agents_thoughts": [agent_thought],
50
- "messages": [tool_message],
47
+ EXECUTOR_MESSAGES_KEY: [tool_message],
51
48
  },
49
+ agent="executor",
52
50
  ),
53
51
  )
54
52
 
@@ -1,16 +1,15 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.prebuilt import InjectedState
7
5
  from langgraph.types import Command
6
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
8
7
  from minitap.mobile_use.context import MobileUseContext
9
8
  from minitap.mobile_use.controllers.mobile_command_controller import (
10
9
  open_link as open_link_controller,
11
10
  )
12
11
  from minitap.mobile_use.graph.state import State
13
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
12
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
13
  from typing_extensions import Annotated
15
14
 
16
15
 
@@ -20,7 +19,6 @@ def get_open_link_tool(ctx: MobileUseContext):
20
19
  tool_call_id: Annotated[str, InjectedToolCallId],
21
20
  state: Annotated[State, InjectedState],
22
21
  agent_thought: str,
23
- executor_metadata: Optional[ExecutorMetadata],
24
22
  url: str,
25
23
  ):
26
24
  """
@@ -34,18 +32,16 @@ def get_open_link_tool(ctx: MobileUseContext):
34
32
  if has_failed
35
33
  else open_link_wrapper.on_success_fn(url),
36
34
  additional_kwargs={"error": output} if has_failed else {},
35
+ status="error" if has_failed else "success",
37
36
  )
38
37
  return Command(
39
- update=open_link_wrapper.handle_executor_state_fields(
38
+ update=state.sanitize_update(
40
39
  ctx=ctx,
41
- state=state,
42
- executor_metadata=executor_metadata,
43
- tool_message=tool_message,
44
- is_failure=has_failed,
45
- updates={
40
+ update={
46
41
  "agents_thoughts": [agent_thought],
47
- "messages": [tool_message],
42
+ EXECUTOR_MESSAGES_KEY: [tool_message],
48
43
  },
44
+ agent="executor",
49
45
  ),
50
46
  )
51
47