minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,169 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import BaseTool, InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+
9
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
10
+ from minitap.mobile_use.context import MobileUseContext
11
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
12
+ from minitap.mobile_use.graph.state import State
13
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
+ from minitap.mobile_use.tools.types import Target
15
+ from minitap.mobile_use.utils.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ def get_long_press_on_tool(ctx: MobileUseContext) -> BaseTool:
21
+ @tool
22
+ async def long_press_on(
23
+ agent_thought: str,
24
+ target: Target,
25
+ tool_call_id: Annotated[str, InjectedToolCallId],
26
+ state: Annotated[State, InjectedState],
27
+ duration_ms: int = 1000,
28
+ ):
29
+ """
30
+ Long presses on a UI element identified by the 'target' object.
31
+
32
+ The 'target' object allows specifying an element by its resource_id
33
+ (with an optional index), its bounds, or its text content (with an optional index).
34
+ The tool uses a fallback strategy, trying the locators in that order.
35
+
36
+ Args:
37
+ target: The UI element to long press on (bounds, resource_id, or text).
38
+ duration_ms: Duration of the long press in milliseconds. Choose based on interaction:
39
+ - 500-800ms: Quick long press (e.g., selecting text, haptic feedback)
40
+ - 1000ms (default): Standard long press (most common use case)
41
+ - 1500-2000ms: Extended long press (e.g., context menus, special actions)
42
+ - 2500ms+: Very long press (e.g., accessibility, advanced gestures)
43
+ """
44
+ error_obj: dict | None = {
45
+ "error": "No valid selector provided or all selectors failed."
46
+ } # Default to failure
47
+ latest_selector_info: str | None = None
48
+
49
+ controller = UnifiedMobileController(ctx)
50
+
51
+ # 1. Try with COORDINATES FIRST (visual approach)
52
+ if target.bounds:
53
+ try:
54
+ center_point = target.bounds.get_center()
55
+ logger.info(
56
+ f"Attempting to long press using coordinates: {center_point.x},{center_point.y}"
57
+ )
58
+ latest_selector_info = f"coordinates='{target.bounds}'"
59
+ result = await controller.tap_at(
60
+ x=center_point.x,
61
+ y=center_point.y,
62
+ long_press=True,
63
+ long_press_duration=duration_ms,
64
+ )
65
+ if result.error is None: # Success
66
+ error_obj = None
67
+ else:
68
+ logger.warning(
69
+ f"Long press with coordinates '{target.bounds}' failed. "
70
+ f"Error: {result.error}"
71
+ )
72
+ error_obj = {"error": result.error}
73
+ except Exception as e:
74
+ logger.warning(
75
+ f"Exception during long press with coordinates '{target.bounds}': {e}"
76
+ )
77
+ error_obj = {"error": str(e)}
78
+
79
+ # 2. If coordinates failed or weren't provided, try with resource_id
80
+ if error_obj is not None and target.resource_id:
81
+ try:
82
+ logger.info(
83
+ f"Attempting to long press using resource_id: '{target.resource_id}' "
84
+ f"at index {target.resource_id_index}"
85
+ )
86
+ latest_selector_info = (
87
+ f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
88
+ )
89
+ result = await controller.tap_element(
90
+ resource_id=target.resource_id,
91
+ index=target.resource_id_index or 0,
92
+ long_press=True,
93
+ long_press_duration=duration_ms,
94
+ )
95
+ if result.error is None: # Success
96
+ error_obj = None
97
+ else:
98
+ logger.warning(
99
+ f"Long press with resource_id '{target.resource_id}' failed. "
100
+ f"Error: {result.error}"
101
+ )
102
+ error_obj = {"error": result.error}
103
+ except Exception as e:
104
+ logger.warning(
105
+ f"Exception during long press with resource_id '{target.resource_id}': {e}"
106
+ )
107
+ error_obj = {"error": str(e)}
108
+
109
+ # 3. If resource_id failed or wasn't provided, try with text (last resort)
110
+ if error_obj is not None and target.text:
111
+ try:
112
+ logger.info(
113
+ f"Attempting to long press using text: '{target.text}' "
114
+ f"at index {target.text_index}"
115
+ )
116
+ latest_selector_info = f"text='{target.text}' (index={target.text_index})"
117
+ result = await controller.tap_element(
118
+ text=target.text,
119
+ index=target.text_index or 0,
120
+ long_press=True,
121
+ long_press_duration=duration_ms,
122
+ )
123
+ if result.error is None: # Success
124
+ error_obj = None
125
+ else:
126
+ logger.warning(
127
+ f"Long press with text '{target.text}' failed. Error: {result.error}"
128
+ )
129
+ error_obj = {"error": result.error}
130
+ except Exception as e:
131
+ logger.warning(f"Exception during long press with text '{target.text}': {e}")
132
+ error_obj = {"error": str(e)}
133
+
134
+ has_failed = error_obj is not None
135
+ final_selector_info = latest_selector_info if latest_selector_info else "N/A"
136
+ agent_outcome = (
137
+ long_press_on_wrapper.on_failure_fn(final_selector_info)
138
+ if has_failed
139
+ else long_press_on_wrapper.on_success_fn(final_selector_info)
140
+ )
141
+
142
+ tool_message = ToolMessage(
143
+ tool_call_id=tool_call_id,
144
+ content=agent_outcome,
145
+ additional_kwargs=error_obj if has_failed else {},
146
+ status="error" if has_failed else "success",
147
+ )
148
+ return Command(
149
+ update=await state.asanitize_update(
150
+ ctx=ctx,
151
+ update={
152
+ "agents_thoughts": [agent_thought, agent_outcome],
153
+ EXECUTOR_MESSAGES_KEY: [tool_message],
154
+ },
155
+ agent="executor",
156
+ ),
157
+ )
158
+
159
+ return long_press_on
160
+
161
+
162
+ long_press_on_wrapper = ToolWrapper(
163
+ tool_fn_getter=get_long_press_on_tool,
164
+ on_success_fn=lambda selector_info: (
165
+ f"Long press on element with {selector_info} was successful."
166
+ ),
167
+ on_failure_fn=lambda selector_info: "Failed to long press on element. "
168
+ + f"Last attempt was with {selector_info}.",
169
+ )
@@ -0,0 +1,62 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+
9
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
10
+ from minitap.mobile_use.context import MobileUseContext
11
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
12
+ from minitap.mobile_use.graph.state import State
13
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
+
15
+
16
+ def get_open_link_tool(ctx: MobileUseContext):
17
+ @tool
18
+ async def open_link(
19
+ agent_thought: str,
20
+ url: str,
21
+ tool_call_id: Annotated[str, InjectedToolCallId],
22
+ state: Annotated[State, InjectedState],
23
+ ) -> Command:
24
+ """
25
+ Open a link on a device (i.e. a deep link).
26
+ """
27
+ controller = UnifiedMobileController(ctx)
28
+ success = await controller.open_url(url)
29
+ has_failed = not success
30
+ output = "Failed to open URL" if has_failed else None
31
+
32
+ agent_outcome = (
33
+ open_link_wrapper.on_failure_fn()
34
+ if has_failed
35
+ else open_link_wrapper.on_success_fn(url)
36
+ )
37
+
38
+ tool_message = ToolMessage(
39
+ tool_call_id=tool_call_id,
40
+ content=agent_outcome,
41
+ additional_kwargs={"error": output} if has_failed else {},
42
+ status="error" if has_failed else "success",
43
+ )
44
+ return Command(
45
+ update=await state.asanitize_update(
46
+ ctx=ctx,
47
+ update={
48
+ "agents_thoughts": [agent_thought, agent_outcome],
49
+ EXECUTOR_MESSAGES_KEY: [tool_message],
50
+ },
51
+ agent="executor",
52
+ ),
53
+ )
54
+
55
+ return open_link
56
+
57
+
58
+ open_link_wrapper = ToolWrapper(
59
+ tool_fn_getter=get_open_link_tool,
60
+ on_success_fn=lambda url: f"Link {url} opened successfully.",
61
+ on_failure_fn=lambda: "Failed to open link.",
62
+ )
@@ -0,0 +1,83 @@
1
+ from enum import Enum
2
+ from typing import Annotated
3
+
4
+ from langchain_core.messages import ToolMessage
5
+ from langchain_core.tools import tool
6
+ from langchain_core.tools.base import InjectedToolCallId
7
+ from langgraph.prebuilt import InjectedState
8
+ from langgraph.types import Command
9
+ from pydantic import BeforeValidator
10
+
11
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
12
+ from minitap.mobile_use.context import MobileUseContext
13
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
14
+ from minitap.mobile_use.graph.state import State
15
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
16
+
17
+
18
+ class Key(Enum):
19
+ ENTER = "Enter"
20
+ HOME = "Home"
21
+ BACK = "Back"
22
+
23
+
24
+ def normalize_key(value: str | Key) -> str:
25
+ """Convert key input to Title Case for case-insensitive matching."""
26
+ if isinstance(value, Key):
27
+ return value.value
28
+ return value.title()
29
+
30
+
31
+ CaseInsensitiveKey = Annotated[Key, BeforeValidator(normalize_key)]
32
+
33
+
34
+ def get_press_key_tool(ctx: MobileUseContext):
35
+ @tool
36
+ async def press_key(
37
+ agent_thought: str,
38
+ key: CaseInsensitiveKey,
39
+ tool_call_id: Annotated[str, InjectedToolCallId],
40
+ state: Annotated[State, InjectedState],
41
+ ) -> Command:
42
+ """Press a key on the device."""
43
+ controller = UnifiedMobileController(ctx)
44
+ match key:
45
+ case Key.HOME:
46
+ output = await controller.go_home()
47
+ case Key.BACK:
48
+ output = await controller.go_back()
49
+ case Key.ENTER:
50
+ output = await controller.press_enter()
51
+ has_failed = not output
52
+
53
+ agent_outcome = (
54
+ press_key_wrapper.on_failure_fn(key)
55
+ if has_failed
56
+ else press_key_wrapper.on_success_fn(key)
57
+ )
58
+
59
+ tool_message = ToolMessage(
60
+ tool_call_id=tool_call_id,
61
+ content=agent_outcome,
62
+ additional_kwargs={"error": output} if has_failed else {},
63
+ status="error" if has_failed else "success",
64
+ )
65
+ return Command(
66
+ update=await state.asanitize_update(
67
+ ctx=ctx,
68
+ update={
69
+ "agents_thoughts": [agent_thought, agent_outcome],
70
+ EXECUTOR_MESSAGES_KEY: [tool_message],
71
+ },
72
+ agent="executor",
73
+ ),
74
+ )
75
+
76
+ return press_key
77
+
78
+
79
+ press_key_wrapper = ToolWrapper(
80
+ tool_fn_getter=get_press_key_tool,
81
+ on_success_fn=lambda key: f"Key {key.value} pressed successfully.",
82
+ on_failure_fn=lambda key: f"Failed to press key {key.value}.",
83
+ )
@@ -0,0 +1,62 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+
9
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
10
+ from minitap.mobile_use.context import MobileUseContext
11
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
12
+ from minitap.mobile_use.graph.state import State
13
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
+
15
+
16
+ def get_stop_app_tool(ctx: MobileUseContext):
17
+ @tool
18
+ async def stop_app(
19
+ agent_thought: str,
20
+ package_name: str | None = None,
21
+ tool_call_id: Annotated[str, InjectedToolCallId] = None, # type: ignore
22
+ state: Annotated[State, InjectedState] = None, # type: ignore
23
+ ) -> Command:
24
+ """
25
+ Stops current application if it is running.
26
+ You can also specify the package name of the app to be stopped.
27
+ """
28
+ controller = UnifiedMobileController(ctx)
29
+ success = await controller.terminate_app(package_name)
30
+ has_failed = not success
31
+ output = "Failed to terminate app" if has_failed else None
32
+
33
+ agent_outcome = (
34
+ stop_app_wrapper.on_failure_fn(package_name)
35
+ if has_failed
36
+ else stop_app_wrapper.on_success_fn(package_name)
37
+ )
38
+ tool_message = ToolMessage(
39
+ tool_call_id=tool_call_id,
40
+ content=agent_outcome,
41
+ additional_kwargs={"error": output} if has_failed else {},
42
+ status="error" if has_failed else "success",
43
+ )
44
+ return Command(
45
+ update=await state.asanitize_update(
46
+ ctx=ctx,
47
+ update={
48
+ "agents_thoughts": [agent_thought, agent_outcome],
49
+ EXECUTOR_MESSAGES_KEY: [tool_message],
50
+ },
51
+ agent="executor",
52
+ ),
53
+ )
54
+
55
+ return stop_app
56
+
57
+
58
+ stop_app_wrapper = ToolWrapper(
59
+ tool_fn_getter=get_stop_app_tool,
60
+ on_success_fn=lambda package_name: f"App {package_name or 'current'} stopped successfully.",
61
+ on_failure_fn=lambda package_name: f"Failed to stop app {package_name or 'current'}.",
62
+ )
@@ -0,0 +1,156 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import BaseTool, InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+ from pydantic import Field
9
+
10
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
+ from minitap.mobile_use.context import MobileUseContext
12
+ from minitap.mobile_use.controllers.types import (
13
+ CoordinatesSelectorRequest,
14
+ PercentagesSelectorRequest,
15
+ SwipeRequest,
16
+ SwipeStartEndCoordinatesRequest,
17
+ SwipeStartEndPercentagesRequest,
18
+ )
19
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
20
+ from minitap.mobile_use.graph.state import State
21
+ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
22
+
23
+
24
+ def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
25
+ @tool
26
+ async def swipe(
27
+ agent_thought: str,
28
+ swipe_request: SwipeRequest,
29
+ tool_call_id: Annotated[str, InjectedToolCallId],
30
+ state: Annotated[State, InjectedState],
31
+ ) -> Command:
32
+ """Swipe from start to end position on screen.
33
+
34
+ Supports percentage-based or coordinate-based positioning.
35
+ """
36
+ controller = UnifiedMobileController(ctx)
37
+ output = await controller.swipe_request(swipe_request)
38
+ has_failed = output is not None
39
+
40
+ agent_outcome = (
41
+ swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
42
+ )
43
+
44
+ tool_message = ToolMessage(
45
+ tool_call_id=tool_call_id,
46
+ content=agent_outcome,
47
+ additional_kwargs={"error": output} if has_failed else {},
48
+ status="error" if has_failed else "success",
49
+ )
50
+
51
+ return Command(
52
+ update=await state.asanitize_update(
53
+ ctx=ctx,
54
+ update={
55
+ "agents_thoughts": [agent_thought, agent_outcome],
56
+ EXECUTOR_MESSAGES_KEY: [tool_message],
57
+ },
58
+ agent="executor",
59
+ ),
60
+ )
61
+
62
+ return swipe
63
+
64
+
65
+ def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
66
+ """
67
+ Returns composite swipe tools with flattened arguments.
68
+ Each tool handles a specific swipe mode to avoid complex Union type issues.
69
+ """
70
+
71
+ async def _execute_swipe(
72
+ tool_call_id: str,
73
+ state: State,
74
+ agent_thought: str,
75
+ swipe_request: SwipeRequest,
76
+ ) -> Command:
77
+ """Shared swipe execution logic."""
78
+ controller = UnifiedMobileController(ctx)
79
+ output = await controller.swipe_request(swipe_request)
80
+ has_failed = output is not None
81
+
82
+ agent_outcome = (
83
+ swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
84
+ )
85
+
86
+ tool_message = ToolMessage(
87
+ tool_call_id=tool_call_id,
88
+ content=agent_outcome,
89
+ additional_kwargs={"error": output} if has_failed else {},
90
+ status="error" if has_failed else "success",
91
+ )
92
+
93
+ return Command(
94
+ update=await state.asanitize_update(
95
+ ctx=ctx,
96
+ update={
97
+ "agents_thoughts": [agent_thought, agent_outcome],
98
+ EXECUTOR_MESSAGES_KEY: [tool_message],
99
+ },
100
+ agent="executor",
101
+ ),
102
+ )
103
+
104
+ @tool
105
+ async def swipe_coordinates(
106
+ agent_thought: str,
107
+ start_x: int = Field(description="Start X coordinate in pixels"),
108
+ start_y: int = Field(description="Start Y coordinate in pixels"),
109
+ end_x: int = Field(description="End X coordinate in pixels"),
110
+ end_y: int = Field(description="End Y coordinate in pixels"),
111
+ duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
112
+ tool_call_id: Annotated[str, InjectedToolCallId] = None, # type: ignore
113
+ state: Annotated[State, InjectedState] = None, # type: ignore
114
+ ) -> Command:
115
+ """Swipe using pixel coordinates from start position to end position."""
116
+ swipe_request = SwipeRequest(
117
+ swipe_mode=SwipeStartEndCoordinatesRequest(
118
+ start=CoordinatesSelectorRequest(x=start_x, y=start_y),
119
+ end=CoordinatesSelectorRequest(x=end_x, y=end_y),
120
+ ),
121
+ duration=duration,
122
+ )
123
+ return await _execute_swipe(tool_call_id, state, agent_thought, swipe_request)
124
+
125
+ @tool
126
+ async def swipe_percentages(
127
+ agent_thought: str,
128
+ start_x_percent: int = Field(description="Start X percent (0-100)", ge=0, le=100),
129
+ start_y_percent: int = Field(description="Start Y percent (0-100)", ge=0, le=100),
130
+ end_x_percent: int = Field(description="End X percent (0-100)", ge=0, le=100),
131
+ end_y_percent: int = Field(description="End Y percent (0-100)", ge=0, le=100),
132
+ duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
133
+ tool_call_id: Annotated[str, InjectedToolCallId] = None, # type: ignore
134
+ state: Annotated[State, InjectedState] = None, # type: ignore
135
+ ) -> Command:
136
+ """Swipe using percentage coordinates from start position to end position."""
137
+ swipe_request = SwipeRequest(
138
+ swipe_mode=SwipeStartEndPercentagesRequest(
139
+ start=PercentagesSelectorRequest(
140
+ x_percent=start_x_percent, y_percent=start_y_percent
141
+ ),
142
+ end=PercentagesSelectorRequest(x_percent=end_x_percent, y_percent=end_y_percent),
143
+ ),
144
+ duration=duration,
145
+ )
146
+ return await _execute_swipe(tool_call_id, state, agent_thought, swipe_request)
147
+
148
+ return [swipe_coordinates, swipe_percentages]
149
+
150
+
151
+ swipe_wrapper = CompositeToolWrapper(
152
+ tool_fn_getter=get_swipe_tool,
153
+ composite_tools_fn_getter=get_composite_swipe_tools,
154
+ on_success_fn=lambda: "Swipe is successful.",
155
+ on_failure_fn=lambda: "Failed to swipe.",
156
+ )
@@ -0,0 +1,154 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import BaseTool, InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+
9
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
10
+ from minitap.mobile_use.context import MobileUseContext
11
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
12
+ from minitap.mobile_use.graph.state import State
13
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
+ from minitap.mobile_use.tools.types import Target
15
+ from minitap.mobile_use.tools.utils import has_valid_selectors, validate_coordinates_bounds
16
+ from minitap.mobile_use.utils.logger import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ def get_tap_tool(ctx: MobileUseContext) -> BaseTool:
22
+ @tool
23
+ async def tap(
24
+ agent_thought: str,
25
+ target: Target,
26
+ tool_call_id: Annotated[str, InjectedToolCallId],
27
+ state: Annotated[State, InjectedState],
28
+ ):
29
+ """
30
+ Taps on a UI element identified by the 'target' object.
31
+
32
+ The 'target' object allows specifying an element by its resource_id
33
+ (with an optional index), its bounds, or its text content (with an optional index).
34
+ The tool uses a fallback strategy, trying the locators in that order.
35
+ """
36
+ # Track all attempts for better error reporting
37
+ attempts: list[dict] = []
38
+ success = False
39
+ successful_selector: str | None = None
40
+
41
+ # Validate target has at least one selector
42
+ if not has_valid_selectors(target):
43
+ attempts.append(
44
+ {
45
+ "selector": "none",
46
+ "error": "No valid selector provided (need bounds, resource_id, or text)",
47
+ }
48
+ )
49
+
50
+ controller = UnifiedMobileController(ctx)
51
+
52
+ # 1. Try with COORDINATES FIRST (visual approach)
53
+ if not success and target.bounds:
54
+ center = target.bounds.get_center()
55
+ selector_info = f"coordinates ({center.x}, {center.y})"
56
+
57
+ # Validate bounds before attempting
58
+ bounds_error = validate_coordinates_bounds(
59
+ target, ctx.device.device_width, ctx.device.device_height
60
+ )
61
+ if bounds_error:
62
+ logger.warning(f"Coordinates out of bounds: {bounds_error}")
63
+ attempts.append(
64
+ {"selector": selector_info, "error": f"Out of bounds: {bounds_error}"}
65
+ )
66
+ else:
67
+ try:
68
+ center_point = target.bounds.get_center()
69
+ logger.info(f"Attempting tap with {selector_info}")
70
+ result = await controller.tap_at(x=center_point.x, y=center_point.y)
71
+ if result.error is None:
72
+ success = True
73
+ successful_selector = selector_info
74
+ else:
75
+ error_msg = result.error
76
+ logger.warning(f"Tap with {selector_info} failed: {error_msg}")
77
+ attempts.append({"selector": selector_info, "error": error_msg})
78
+ except Exception as e:
79
+ logger.warning(f"Exception during tap with {selector_info}: {e}")
80
+ attempts.append({"selector": selector_info, "error": str(e)})
81
+
82
+ # 2. If coordinates failed or weren't provided, try with resource_id
83
+ if not success and target.resource_id:
84
+ selector_info = f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
85
+ try:
86
+ logger.info(f"Attempting tap with {selector_info}")
87
+ result = await controller.tap_element(
88
+ resource_id=target.resource_id,
89
+ index=target.resource_id_index or 0,
90
+ )
91
+ if result.error is None:
92
+ success = True
93
+ successful_selector = selector_info
94
+ else:
95
+ error_msg = result.error
96
+ logger.warning(f"Tap with {selector_info} failed: {error_msg}")
97
+ attempts.append({"selector": selector_info, "error": error_msg})
98
+ except Exception as e:
99
+ logger.warning(f"Exception during tap with {selector_info}: {e}")
100
+ attempts.append({"selector": selector_info, "error": str(e)})
101
+
102
+ # 3. If resource_id failed or wasn't provided, try with text (last resort)
103
+ if not success and target.text:
104
+ selector_info = f"text='{target.text}' (index={target.text_index})"
105
+ try:
106
+ logger.info(f"Attempting tap with {selector_info}")
107
+ result = await controller.tap_element(
108
+ text=target.text,
109
+ index=target.text_index or 0,
110
+ )
111
+ if result.error is None:
112
+ success = True
113
+ successful_selector = selector_info
114
+ else:
115
+ error_msg = result.error
116
+ logger.warning(f"Tap with {selector_info} failed: {error_msg}")
117
+ attempts.append({"selector": selector_info, "error": error_msg})
118
+ except Exception as e:
119
+ logger.warning(f"Exception during tap with {selector_info}: {e}")
120
+ attempts.append({"selector": selector_info, "error": str(e)})
121
+
122
+ # Build result message
123
+ if success:
124
+ agent_outcome = tap_wrapper.on_success_fn(successful_selector)
125
+ else:
126
+ # Build detailed failure message with all attempts
127
+ failure_details = "; ".join([f"{a['selector']}: {a['error']}" for a in attempts])
128
+ agent_outcome = tap_wrapper.on_failure_fn(failure_details)
129
+
130
+ tool_message = ToolMessage(
131
+ tool_call_id=tool_call_id,
132
+ content=agent_outcome,
133
+ additional_kwargs={"attempts": attempts} if not success else {},
134
+ status="success" if success else "error",
135
+ )
136
+ return Command(
137
+ update=await state.asanitize_update(
138
+ ctx=ctx,
139
+ update={
140
+ "agents_thoughts": [agent_thought, agent_outcome],
141
+ EXECUTOR_MESSAGES_KEY: [tool_message],
142
+ },
143
+ agent="executor",
144
+ ),
145
+ )
146
+
147
+ return tap
148
+
149
+
150
+ tap_wrapper = ToolWrapper(
151
+ tool_fn_getter=get_tap_tool,
152
+ on_success_fn=lambda selector_info: f"Tap on element with {selector_info} was successful.",
153
+ on_failure_fn=lambda failure_details: f"Failed to tap on element. Attempts: {failure_details}",
154
+ )