minitap-mobile-use 2.3.0__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (56) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +2 -2
  2. minitap/mobile_use/agents/cortex/cortex.md +49 -8
  3. minitap/mobile_use/agents/cortex/cortex.py +8 -4
  4. minitap/mobile_use/agents/executor/executor.md +14 -11
  5. minitap/mobile_use/agents/executor/executor.py +6 -5
  6. minitap/mobile_use/agents/hopper/hopper.py +6 -3
  7. minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
  8. minitap/mobile_use/agents/outputter/outputter.py +6 -3
  9. minitap/mobile_use/agents/planner/planner.md +20 -22
  10. minitap/mobile_use/agents/planner/planner.py +10 -7
  11. minitap/mobile_use/agents/planner/types.py +4 -2
  12. minitap/mobile_use/agents/planner/utils.py +14 -0
  13. minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
  14. minitap/mobile_use/config.py +6 -1
  15. minitap/mobile_use/context.py +13 -3
  16. minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
  17. minitap/mobile_use/graph/state.py +7 -3
  18. minitap/mobile_use/sdk/agent.py +188 -23
  19. minitap/mobile_use/sdk/examples/README.md +19 -1
  20. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +65 -0
  21. minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
  22. minitap/mobile_use/sdk/services/platform.py +307 -0
  23. minitap/mobile_use/sdk/types/__init__.py +16 -14
  24. minitap/mobile_use/sdk/types/exceptions.py +27 -0
  25. minitap/mobile_use/sdk/types/platform.py +127 -0
  26. minitap/mobile_use/sdk/types/task.py +78 -17
  27. minitap/mobile_use/servers/device_hardware_bridge.py +1 -1
  28. minitap/mobile_use/servers/stop_servers.py +11 -12
  29. minitap/mobile_use/services/llm.py +89 -5
  30. minitap/mobile_use/tools/index.py +0 -6
  31. minitap/mobile_use/tools/mobile/back.py +3 -3
  32. minitap/mobile_use/tools/mobile/clear_text.py +24 -43
  33. minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
  34. minitap/mobile_use/tools/mobile/glimpse_screen.py +11 -7
  35. minitap/mobile_use/tools/mobile/input_text.py +21 -51
  36. minitap/mobile_use/tools/mobile/launch_app.py +54 -22
  37. minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
  38. minitap/mobile_use/tools/mobile/open_link.py +15 -8
  39. minitap/mobile_use/tools/mobile/press_key.py +15 -8
  40. minitap/mobile_use/tools/mobile/stop_app.py +14 -8
  41. minitap/mobile_use/tools/mobile/swipe.py +11 -5
  42. minitap/mobile_use/tools/mobile/tap.py +103 -21
  43. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
  44. minitap/mobile_use/tools/test_utils.py +104 -78
  45. minitap/mobile_use/tools/types.py +35 -0
  46. minitap/mobile_use/tools/utils.py +51 -48
  47. minitap/mobile_use/utils/recorder.py +1 -1
  48. minitap/mobile_use/utils/ui_hierarchy.py +9 -2
  49. {minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/METADATA +3 -1
  50. minitap_mobile_use-2.5.0.dist-info/RECORD +100 -0
  51. minitap/mobile_use/tools/mobile/copy_text_from.py +0 -75
  52. minitap/mobile_use/tools/mobile/find_packages.py +0 -69
  53. minitap/mobile_use/tools/mobile/paste_text.py +0 -88
  54. minitap_mobile_use-2.3.0.dist-info/RECORD +0 -98
  55. {minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/WHEEL +0 -0
  56. {minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,44 +1,76 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
4
7
  from langgraph.types import Command
8
+
9
+ from minitap.mobile_use.agents.hopper.hopper import HopperOutput, hopper
5
10
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
+ from minitap.mobile_use.context import MobileUseContext
6
12
  from minitap.mobile_use.controllers.mobile_command_controller import (
7
13
  launch_app as launch_app_controller,
8
14
  )
9
- from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
10
- from typing import Annotated
11
- from minitap.mobile_use.context import MobileUseContext
15
+ from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
12
16
  from minitap.mobile_use.graph.state import State
13
- from langgraph.prebuilt import InjectedState
17
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
18
+
19
+
20
+ async def find_package(ctx: MobileUseContext, app_name: str) -> str | None:
21
+ """
22
+ Finds the package name for a given application name.
23
+ """
24
+ all_packages = list_packages(ctx=ctx)
25
+ try:
26
+ hopper_output: HopperOutput = await hopper(
27
+ ctx=ctx,
28
+ request=f"I'm looking for the package name of the following app: '{app_name}'",
29
+ data=all_packages,
30
+ )
31
+ # Assuming hopper_output.output directly contains the package name
32
+ return hopper_output.output
33
+ except Exception as e:
34
+ print(f"Failed to find package for '{app_name}': {e}")
35
+ return None
14
36
 
15
37
 
16
38
  def get_launch_app_tool(ctx: MobileUseContext):
17
39
  @tool
18
- def launch_app(
40
+ async def launch_app(
19
41
  tool_call_id: Annotated[str, InjectedToolCallId],
20
42
  state: Annotated[State, InjectedState],
43
+ app_name: str,
21
44
  agent_thought: str,
22
- package_name: str,
23
- ):
45
+ ) -> Command:
24
46
  """
25
- Launch an application on the device using the package name on Android, bundle id on iOS.
47
+ Finds and launches an application on the device using its natural language name.
26
48
  """
27
- output = launch_app_controller(ctx=ctx, package_name=package_name)
28
- has_failed = output is not None
29
- tool_message = ToolMessage(
30
- tool_call_id=tool_call_id,
31
- content=launch_app_wrapper.on_failure_fn(package_name)
32
- if has_failed
33
- else launch_app_wrapper.on_success_fn(package_name),
34
- additional_kwargs={"error": output} if has_failed else {},
35
- status="error" if has_failed else "success",
36
- )
49
+ package_name = await find_package(ctx=ctx, app_name=app_name)
50
+
51
+ if not package_name:
52
+ tool_message = ToolMessage(
53
+ tool_call_id=tool_call_id,
54
+ content=launch_app_wrapper.on_failure_fn(app_name, "Package not found."),
55
+ status="error",
56
+ )
57
+ else:
58
+ output = launch_app_controller(ctx=ctx, package_name=package_name)
59
+ has_failed = output is not None
60
+ tool_message = ToolMessage(
61
+ tool_call_id=tool_call_id,
62
+ content=launch_app_wrapper.on_failure_fn(app_name, output)
63
+ if has_failed
64
+ else launch_app_wrapper.on_success_fn(app_name),
65
+ additional_kwargs={"error": output} if has_failed else {},
66
+ status="error" if has_failed else "success",
67
+ )
68
+
37
69
  return Command(
38
- update=state.sanitize_update(
70
+ update=await state.asanitize_update(
39
71
  ctx=ctx,
40
72
  update={
41
- "agents_thoughts": [agent_thought],
73
+ "agents_thoughts": [agent_thought, tool_message.content],
42
74
  EXECUTOR_MESSAGES_KEY: [tool_message],
43
75
  },
44
76
  agent="executor",
@@ -50,6 +82,6 @@ def get_launch_app_tool(ctx: MobileUseContext):
50
82
 
51
83
  launch_app_wrapper = ToolWrapper(
52
84
  tool_fn_getter=get_launch_app_tool,
53
- on_success_fn=lambda package_name: f"App {package_name} launched successfully.",
54
- on_failure_fn=lambda package_name: f"Failed to launch app {package_name}.",
85
+ on_success_fn=lambda app_name: f"App '{app_name}' launched successfully.",
86
+ on_failure_fn=lambda app_name, error: f"Failed to launch app '{app_name}': {error}",
55
87
  )
@@ -1,8 +1,11 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
11
  from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
@@ -11,37 +14,41 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
11
14
  )
12
15
  from minitap.mobile_use.graph.state import State
13
16
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
- from typing import Annotated
15
17
 
16
18
 
17
19
  def get_long_press_on_tool(ctx: MobileUseContext):
18
20
  @tool
19
- def long_press_on(
21
+ async def long_press_on(
20
22
  tool_call_id: Annotated[str, InjectedToolCallId],
21
23
  state: Annotated[State, InjectedState],
22
24
  agent_thought: str,
23
25
  selector_request: SelectorRequest,
24
26
  index: int | None = None,
25
- ):
27
+ ) -> Command:
26
28
  """
27
29
  Long press on a UI element identified by the given selector.
28
30
  An index can be specified to select a specific element if multiple are found.
29
31
  """
30
32
  output = long_press_on_controller(ctx=ctx, selector_request=selector_request, index=index)
31
33
  has_failed = output is not None
34
+
35
+ agent_outcome = (
36
+ long_press_on_wrapper.on_failure_fn()
37
+ if has_failed
38
+ else long_press_on_wrapper.on_success_fn()
39
+ )
40
+
32
41
  tool_message = ToolMessage(
33
42
  tool_call_id=tool_call_id,
34
- content=long_press_on_wrapper.on_failure_fn()
35
- if has_failed
36
- else long_press_on_wrapper.on_success_fn(),
43
+ content=agent_outcome,
37
44
  additional_kwargs={"error": output} if has_failed else {},
38
45
  status="error" if has_failed else "success",
39
46
  )
40
47
  return Command(
41
- update=state.sanitize_update(
48
+ update=await state.asanitize_update(
42
49
  ctx=ctx,
43
50
  update={
44
- "agents_thoughts": [agent_thought],
51
+ "agents_thoughts": [agent_thought, agent_outcome],
45
52
  EXECUTOR_MESSAGES_KEY: [tool_message],
46
53
  },
47
54
  agent="executor",
@@ -1,8 +1,11 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
11
  from minitap.mobile_use.controllers.mobile_command_controller import (
@@ -10,35 +13,39 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
10
13
  )
11
14
  from minitap.mobile_use.graph.state import State
12
15
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
13
- from typing import Annotated
14
16
 
15
17
 
16
18
  def get_open_link_tool(ctx: MobileUseContext):
17
19
  @tool
18
- def open_link(
20
+ async def open_link(
19
21
  tool_call_id: Annotated[str, InjectedToolCallId],
20
22
  state: Annotated[State, InjectedState],
21
23
  agent_thought: str,
22
24
  url: str,
23
- ):
25
+ ) -> Command:
24
26
  """
25
27
  Open a link on a device (i.e. a deep link).
26
28
  """
27
29
  output = open_link_controller(ctx=ctx, url=url)
28
30
  has_failed = output is not None
31
+
32
+ agent_outcome = (
33
+ open_link_wrapper.on_failure_fn()
34
+ if has_failed
35
+ else open_link_wrapper.on_success_fn(url)
36
+ )
37
+
29
38
  tool_message = ToolMessage(
30
39
  tool_call_id=tool_call_id,
31
- content=open_link_wrapper.on_failure_fn()
32
- if has_failed
33
- else open_link_wrapper.on_success_fn(url),
40
+ content=agent_outcome,
34
41
  additional_kwargs={"error": output} if has_failed else {},
35
42
  status="error" if has_failed else "success",
36
43
  )
37
44
  return Command(
38
- update=state.sanitize_update(
45
+ update=await state.asanitize_update(
39
46
  ctx=ctx,
40
47
  update={
41
- "agents_thoughts": [agent_thought],
48
+ "agents_thoughts": [agent_thought, agent_outcome],
42
49
  EXECUTOR_MESSAGES_KEY: [tool_message],
43
50
  },
44
51
  agent="executor",
@@ -1,8 +1,11 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
11
  from minitap.mobile_use.controllers.mobile_command_controller import Key
@@ -11,33 +14,37 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
11
14
  )
12
15
  from minitap.mobile_use.graph.state import State
13
16
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
- from typing import Annotated
15
17
 
16
18
 
17
19
  def get_press_key_tool(ctx: MobileUseContext):
18
20
  @tool
19
- def press_key(
21
+ async def press_key(
20
22
  tool_call_id: Annotated[str, InjectedToolCallId],
21
23
  state: Annotated[State, InjectedState],
22
24
  agent_thought: str,
23
25
  key: Key,
24
- ):
26
+ ) -> Command:
25
27
  """Press a key on the device."""
26
28
  output = press_key_controller(ctx=ctx, key=key)
27
29
  has_failed = output is not None
30
+
31
+ agent_outcome = (
32
+ press_key_wrapper.on_failure_fn(key)
33
+ if has_failed
34
+ else press_key_wrapper.on_success_fn(key)
35
+ )
36
+
28
37
  tool_message = ToolMessage(
29
38
  tool_call_id=tool_call_id,
30
- content=press_key_wrapper.on_failure_fn(key)
31
- if has_failed
32
- else press_key_wrapper.on_success_fn(key),
39
+ content=agent_outcome,
33
40
  additional_kwargs={"error": output} if has_failed else {},
34
41
  status="error" if has_failed else "success",
35
42
  )
36
43
  return Command(
37
- update=state.sanitize_update(
44
+ update=await state.asanitize_update(
38
45
  ctx=ctx,
39
46
  update={
40
- "agents_thoughts": [agent_thought],
47
+ "agents_thoughts": [agent_thought, agent_outcome],
41
48
  EXECUTOR_MESSAGES_KEY: [tool_message],
42
49
  },
43
50
  agent="executor",
@@ -1,43 +1,49 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
11
  from minitap.mobile_use.controllers.mobile_command_controller import stop_app as stop_app_controller
9
12
  from minitap.mobile_use.graph.state import State
10
13
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
11
- from typing import Annotated
12
14
 
13
15
 
14
16
  def get_stop_app_tool(ctx: MobileUseContext):
15
17
  @tool
16
- def stop_app(
18
+ async def stop_app(
17
19
  tool_call_id: Annotated[str, InjectedToolCallId],
18
20
  state: Annotated[State, InjectedState],
19
21
  agent_thought: str,
20
22
  package_name: str | None = None,
21
- ):
23
+ ) -> Command:
22
24
  """
23
25
  Stops current application if it is running.
24
26
  You can also specify the package name of the app to be stopped.
25
27
  """
26
28
  output = stop_app_controller(ctx=ctx, package_name=package_name)
27
29
  has_failed = output is not None
30
+
31
+ agent_outcome = (
32
+ stop_app_wrapper.on_failure_fn(package_name)
33
+ if has_failed
34
+ else stop_app_wrapper.on_success_fn(package_name)
35
+ )
28
36
  tool_message = ToolMessage(
29
37
  tool_call_id=tool_call_id,
30
- content=stop_app_wrapper.on_failure_fn(package_name)
31
- if has_failed
32
- else stop_app_wrapper.on_success_fn(package_name),
38
+ content=agent_outcome,
33
39
  additional_kwargs={"error": output} if has_failed else {},
34
40
  status="error" if has_failed else "success",
35
41
  )
36
42
  return Command(
37
- update=state.sanitize_update(
43
+ update=await state.asanitize_update(
38
44
  ctx=ctx,
39
45
  update={
40
- "agents_thoughts": [agent_thought],
46
+ "agents_thoughts": [agent_thought, agent_outcome],
41
47
  EXECUTOR_MESSAGES_KEY: [tool_message],
42
48
  },
43
49
  agent="executor",
@@ -24,26 +24,32 @@ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
24
24
 
25
25
  def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
26
26
  @tool
27
- def swipe(
27
+ async def swipe(
28
28
  tool_call_id: Annotated[str, InjectedToolCallId],
29
29
  state: Annotated[State, InjectedState],
30
30
  agent_thought: str,
31
31
  swipe_request: SwipeRequest,
32
- ):
32
+ ) -> Command:
33
33
  """Swipes on the screen."""
34
34
  output = swipe_controller(ctx=ctx, swipe_request=swipe_request)
35
35
  has_failed = output is not None
36
+
37
+ agent_outcome = (
38
+ swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
39
+ )
40
+
36
41
  tool_message = ToolMessage(
37
42
  tool_call_id=tool_call_id,
38
- content=swipe_wrapper.on_failure_fn() if has_failed else swipe_wrapper.on_success_fn(),
43
+ content=agent_outcome,
39
44
  additional_kwargs={"error": output} if has_failed else {},
40
45
  status="error" if has_failed else "success",
41
46
  )
47
+
42
48
  return Command(
43
- update=state.sanitize_update(
49
+ update=await state.asanitize_update(
44
50
  ctx=ctx,
45
51
  update={
46
- "agents_thoughts": [agent_thought],
52
+ "agents_thoughts": [agent_thought, agent_outcome],
47
53
  EXECUTOR_MESSAGES_KEY: [tool_message],
48
54
  },
49
55
  agent="executor",
@@ -1,45 +1,132 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
- from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
11
+ from minitap.mobile_use.controllers.mobile_command_controller import (
12
+ CoordinatesSelectorRequest,
13
+ IdSelectorRequest,
14
+ SelectorRequestWithCoordinates,
15
+ TextSelectorRequest,
16
+ )
9
17
  from minitap.mobile_use.controllers.mobile_command_controller import tap as tap_controller
10
18
  from minitap.mobile_use.graph.state import State
11
19
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
- from typing import Annotated
20
+ from minitap.mobile_use.tools.types import Target
21
+ from minitap.mobile_use.utils.logger import get_logger
22
+
23
+ logger = get_logger(__name__)
13
24
 
14
25
 
15
26
  def get_tap_tool(ctx: MobileUseContext):
16
27
  @tool
17
- def tap(
28
+ async def tap(
18
29
  tool_call_id: Annotated[str, InjectedToolCallId],
19
30
  state: Annotated[State, InjectedState],
20
31
  agent_thought: str,
21
- selector_request: SelectorRequest,
22
- index: int | None = None,
32
+ target: Target,
23
33
  ):
24
34
  """
25
- Taps on a selector.
26
- Index is optional and is used when you have multiple views matching the same selector.
35
+ Taps on a UI element identified by the 'target' object.
36
+
37
+ The 'target' object allows specifying an element by its resource_id
38
+ (with an optional index), its coordinates, or its text content (with an optional index).
39
+ The tool uses a fallback strategy, trying the locators in that order.
27
40
  """
28
- output = tap_controller(ctx=ctx, selector_request=selector_request, index=index)
41
+ output = {
42
+ "error": "No valid selector provided or all selectors failed."
43
+ } # Default to failure
44
+ final_selector_info = "N/A"
45
+
46
+ # 1. Try with resource_id
47
+ if target.resource_id:
48
+ try:
49
+ selector = IdSelectorRequest(id=target.resource_id)
50
+ logger.info(
51
+ f"Attempting to tap using resource_id: '{target.resource_id}' "
52
+ f"at index {target.resource_id_index}"
53
+ )
54
+ result = tap_controller(
55
+ ctx=ctx, selector_request=selector, index=target.resource_id_index
56
+ )
57
+ if result is None: # Success
58
+ output = None
59
+ final_selector_info = (
60
+ f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
61
+ )
62
+ else:
63
+ logger.warning(
64
+ f"Tap with resource_id '{target.resource_id}' failed. Error: {result}"
65
+ )
66
+ output = result
67
+ except Exception as e:
68
+ logger.warning(f"Exception during tap with resource_id '{target.resource_id}': {e}")
69
+ output = {"error": str(e)}
70
+
71
+ # 2. If resource_id failed or wasn't provided, try with coordinates
72
+ if output is not None and target.coordinates:
73
+ try:
74
+ center_point = target.coordinates.get_center()
75
+ selector = SelectorRequestWithCoordinates(
76
+ coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
77
+ )
78
+ logger.info(
79
+ f"Attempting to tap using coordinates: {center_point.x},{center_point.y}"
80
+ )
81
+ result = tap_controller(ctx=ctx, selector_request=selector)
82
+ if result is None: # Success
83
+ output = None
84
+ final_selector_info = f"coordinates='{target.coordinates}'"
85
+ else:
86
+ logger.warning(
87
+ f"Tap with coordinates '{target.coordinates}' failed. Error: {result}"
88
+ )
89
+ output = result
90
+ except Exception as e:
91
+ logger.warning(f"Exception during tap with coordinates '{target.coordinates}': {e}")
92
+ output = {"error": str(e)}
93
+
94
+ # 3. If coordinates failed or weren't provided, try with text
95
+ if output is not None and target.text:
96
+ try:
97
+ selector = TextSelectorRequest(text=target.text)
98
+ logger.info(
99
+ f"Attempting to tap using text: '{target.text}' at index {target.text_index}"
100
+ )
101
+ result = tap_controller(ctx=ctx, selector_request=selector, index=target.text_index)
102
+ if result is None: # Success
103
+ output = None
104
+ final_selector_info = f"text='{target.text}' (index={target.text_index})"
105
+ else:
106
+ logger.warning(f"Tap with text '{target.text}' failed. Error: {result}")
107
+ output = result
108
+ except Exception as e:
109
+ logger.warning(f"Exception during tap with text '{target.text}': {e}")
110
+ output = {"error": str(e)}
111
+
29
112
  has_failed = output is not None
113
+ agent_outcome = (
114
+ tap_wrapper.on_failure_fn(final_selector_info)
115
+ if has_failed
116
+ else tap_wrapper.on_success_fn(final_selector_info)
117
+ )
118
+
30
119
  tool_message = ToolMessage(
31
120
  tool_call_id=tool_call_id,
32
- content=tap_wrapper.on_failure_fn(selector_request, index)
33
- if has_failed
34
- else tap_wrapper.on_success_fn(selector_request, index),
121
+ content=agent_outcome,
35
122
  additional_kwargs={"error": output} if has_failed else {},
36
123
  status="error" if has_failed else "success",
37
124
  )
38
125
  return Command(
39
- update=state.sanitize_update(
126
+ update=await state.asanitize_update(
40
127
  ctx=ctx,
41
128
  update={
42
- "agents_thoughts": [agent_thought],
129
+ "agents_thoughts": [agent_thought, agent_outcome],
43
130
  EXECUTOR_MESSAGES_KEY: [tool_message],
44
131
  },
45
132
  agent="executor",
@@ -51,12 +138,7 @@ def get_tap_tool(ctx: MobileUseContext):
51
138
 
52
139
  tap_wrapper = ToolWrapper(
53
140
  tool_fn_getter=get_tap_tool,
54
- on_success_fn=(
55
- lambda selector_request,
56
- index: f"Tap on {selector_request} {'at index {index}' if index else ''} is successful."
57
- ),
58
- on_failure_fn=(
59
- lambda selector_request,
60
- index: f"Failed to tap on {selector_request} {'at index {index}' if index else ''}."
61
- ),
141
+ on_success_fn=lambda selector_info: f"Tap on element with {selector_info} was successful.",
142
+ on_failure_fn=lambda selector_info: "Failed to tap on element. "
143
+ + f"Last attempt was with {selector_info}.",
62
144
  )
@@ -16,12 +16,12 @@ from typing import Annotated
16
16
 
17
17
  def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
18
18
  @tool
19
- def wait_for_animation_to_end(
19
+ async def wait_for_animation_to_end(
20
20
  tool_call_id: Annotated[str, InjectedToolCallId],
21
21
  state: Annotated[State, InjectedState],
22
22
  agent_thought: str,
23
23
  timeout: WaitTimeout | None,
24
- ):
24
+ ) -> Command:
25
25
  """
26
26
  Waits for ongoing animations or videos to finish before continuing.
27
27
 
@@ -44,7 +44,7 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
44
44
  status="error" if has_failed else "success",
45
45
  )
46
46
  return Command(
47
- update=state.sanitize_update(
47
+ update=await state.asanitize_update(
48
48
  ctx=ctx,
49
49
  update={
50
50
  "agents_thoughts": [agent_thought],