minitap-mobile-use 2.0.1__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (62) hide show
  1. minitap/mobile_use/agents/cortex/cortex.md +7 -5
  2. minitap/mobile_use/agents/cortex/cortex.py +4 -1
  3. minitap/mobile_use/agents/cortex/types.py +1 -3
  4. minitap/mobile_use/agents/executor/executor.md +4 -5
  5. minitap/mobile_use/agents/executor/executor.py +3 -1
  6. minitap/mobile_use/agents/executor/tool_node.py +6 -6
  7. minitap/mobile_use/agents/outputter/outputter.py +1 -2
  8. minitap/mobile_use/agents/planner/planner.md +11 -2
  9. minitap/mobile_use/agents/planner/planner.py +7 -2
  10. minitap/mobile_use/agents/planner/types.py +3 -4
  11. minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
  12. minitap/mobile_use/config.py +31 -16
  13. minitap/mobile_use/context.py +3 -4
  14. minitap/mobile_use/controllers/mobile_command_controller.py +36 -24
  15. minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
  16. minitap/mobile_use/graph/graph.py +1 -0
  17. minitap/mobile_use/graph/state.py +9 -9
  18. minitap/mobile_use/main.py +7 -8
  19. minitap/mobile_use/sdk/agent.py +25 -26
  20. minitap/mobile_use/sdk/builders/agent_config_builder.py +9 -10
  21. minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
  22. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
  23. minitap/mobile_use/sdk/types/agent.py +5 -5
  24. minitap/mobile_use/sdk/types/task.py +19 -18
  25. minitap/mobile_use/sdk/utils.py +4 -3
  26. minitap/mobile_use/servers/config.py +1 -2
  27. minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
  28. minitap/mobile_use/servers/start_servers.py +4 -4
  29. minitap/mobile_use/servers/stop_servers.py +2 -3
  30. minitap/mobile_use/services/llm.py +24 -6
  31. minitap/mobile_use/tools/index.py +26 -14
  32. minitap/mobile_use/tools/mobile/back.py +1 -1
  33. minitap/mobile_use/tools/mobile/clear_text.py +277 -0
  34. minitap/mobile_use/tools/mobile/copy_text_from.py +1 -1
  35. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  36. minitap/mobile_use/tools/mobile/find_packages.py +1 -1
  37. minitap/mobile_use/tools/mobile/input_text.py +4 -80
  38. minitap/mobile_use/tools/mobile/launch_app.py +1 -1
  39. minitap/mobile_use/tools/mobile/long_press_on.py +2 -4
  40. minitap/mobile_use/tools/mobile/open_link.py +1 -1
  41. minitap/mobile_use/tools/mobile/paste_text.py +1 -1
  42. minitap/mobile_use/tools/mobile/press_key.py +1 -1
  43. minitap/mobile_use/tools/mobile/stop_app.py +2 -4
  44. minitap/mobile_use/tools/mobile/swipe.py +107 -9
  45. minitap/mobile_use/tools/mobile/take_screenshot.py +1 -1
  46. minitap/mobile_use/tools/mobile/tap.py +2 -4
  47. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +2 -4
  48. minitap/mobile_use/tools/tool_wrapper.py +6 -1
  49. minitap/mobile_use/tools/utils.py +86 -0
  50. minitap/mobile_use/utils/cli_helpers.py +1 -2
  51. minitap/mobile_use/utils/cli_selection.py +5 -6
  52. minitap/mobile_use/utils/decorators.py +21 -20
  53. minitap/mobile_use/utils/logger.py +3 -4
  54. minitap/mobile_use/utils/media.py +1 -1
  55. minitap/mobile_use/utils/recorder.py +2 -9
  56. minitap/mobile_use/utils/ui_hierarchy.py +13 -5
  57. {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.2.0.dist-info}/METADATA +35 -5
  58. minitap_mobile_use-2.2.0.dist-info/RECORD +96 -0
  59. minitap/mobile_use/tools/mobile/erase_text.py +0 -122
  60. minitap_mobile_use-2.0.1.dist-info/RECORD +0 -94
  61. {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.2.0.dist-info}/WHEEL +0 -0
  62. {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.2.0.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,17 @@
1
1
  from langchain_core.tools import BaseTool
2
+
2
3
  from minitap.mobile_use.context import MobileUseContext
3
4
  from minitap.mobile_use.tools.mobile.back import back_wrapper
5
+ from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
4
6
  from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
5
- from minitap.mobile_use.tools.mobile.erase_text import erase_text_wrapper
7
+ from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
8
+ from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
6
9
  from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
7
10
  from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
8
- from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
9
11
  from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
10
12
  from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
11
13
  from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
12
14
  from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
13
-
14
15
  from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
15
16
  from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
16
17
  from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
@@ -18,7 +19,7 @@ from minitap.mobile_use.tools.mobile.tap import tap_wrapper
18
19
  from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
19
20
  wait_for_animation_to_end_wrapper,
20
21
  )
21
- from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
22
+ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
22
23
 
23
24
  EXECUTOR_WRAPPERS_TOOLS = [
24
25
  back_wrapper,
@@ -29,24 +30,35 @@ EXECUTOR_WRAPPERS_TOOLS = [
29
30
  take_screenshot_wrapper,
30
31
  copy_text_from_wrapper,
31
32
  input_text_wrapper,
33
+ erase_one_char_wrapper,
32
34
  find_packages_wrapper,
33
35
  launch_app_wrapper,
34
36
  stop_app_wrapper,
35
37
  paste_text_wrapper,
36
- erase_text_wrapper,
38
+ clear_text_wrapper,
37
39
  press_key_wrapper,
38
40
  wait_for_animation_to_end_wrapper,
39
41
  ]
40
42
 
41
43
 
42
- def get_tools_from_wrappers(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> list[BaseTool]:
43
- """Get the tools from the wrappers."""
44
- return [wrapper.tool_fn_getter(ctx) for wrapper in wrappers]
44
+ def get_tools_from_wrappers(
45
+ ctx: "MobileUseContext",
46
+ wrappers: list[ToolWrapper],
47
+ ) -> list[BaseTool]:
48
+ tools: list[BaseTool] = []
49
+ for wrapper in wrappers:
50
+ if ctx.llm_config.get_agent("executor").provider == "vertexai":
51
+ # The main swipe tool argument structure is not supported by vertexai, we need to split
52
+ # this tool into multiple tools
53
+ if wrapper.tool_fn_getter == swipe_wrapper.tool_fn_getter and isinstance(
54
+ wrapper, CompositeToolWrapper
55
+ ):
56
+ tools.extend(wrapper.composite_tools_fn_getter(ctx))
57
+ continue
58
+
59
+ tools.append(wrapper.tool_fn_getter(ctx))
60
+ return tools
45
61
 
46
62
 
47
- def get_tool_wrapper_from_name(name: str) -> ToolWrapper | None:
48
- """Get the tool wrapper from the name."""
49
- for wrapper in EXECUTOR_WRAPPERS_TOOLS:
50
- if wrapper.tool_fn_getter.__name__ == f"get_{name}_tool":
51
- return wrapper
52
- return None
63
+ def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
64
+ return ", ".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
@@ -5,7 +5,7 @@ from langgraph.types import Command
5
5
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
6
6
  from minitap.mobile_use.controllers.mobile_command_controller import back as back_controller
7
7
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
8
- from typing_extensions import Annotated
8
+ from typing import Annotated
9
9
  from minitap.mobile_use.context import MobileUseContext
10
10
  from minitap.mobile_use.graph.state import State
11
11
  from langgraph.prebuilt import InjectedState
@@ -0,0 +1,277 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+ from pydantic import BaseModel
9
+
10
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
+ from minitap.mobile_use.context import MobileUseContext
12
+ from minitap.mobile_use.controllers.mobile_command_controller import (
13
+ erase_text as erase_text_controller,
14
+ )
15
+ from minitap.mobile_use.controllers.mobile_command_controller import (
16
+ get_screen_data,
17
+ )
18
+ from minitap.mobile_use.graph.state import State
19
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
20
+ from minitap.mobile_use.tools.utils import (
21
+ focus_element_if_needed,
22
+ move_cursor_to_end_if_bounds,
23
+ )
24
+ from minitap.mobile_use.utils.logger import get_logger
25
+ from minitap.mobile_use.utils.ui_hierarchy import (
26
+ find_element_by_resource_id,
27
+ get_element_text,
28
+ text_input_is_empty,
29
+ )
30
+
31
+ logger = get_logger(__name__)
32
+
33
+ MAX_CLEAR_TRIES = 5
34
+ DEFAULT_CHARS_TO_ERASE = 50
35
+
36
+
37
+ class ClearTextResult(BaseModel):
38
+ success: bool
39
+ error_message: str | None
40
+ chars_erased: int
41
+ final_text: str | None
42
+
43
+
44
+ class TextClearer:
45
+ def __init__(self, ctx: MobileUseContext, state: State):
46
+ self.ctx = ctx
47
+ self.state = state
48
+
49
+ def _refresh_ui_hierarchy(self) -> None:
50
+ screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
51
+ self.state.latest_ui_hierarchy = screen_data.elements
52
+
53
+ def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
54
+ if not self.state.latest_ui_hierarchy:
55
+ self._refresh_ui_hierarchy()
56
+
57
+ if not self.state.latest_ui_hierarchy:
58
+ return None, None, None
59
+
60
+ element = find_element_by_resource_id(
61
+ ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
62
+ )
63
+
64
+ if not element:
65
+ return None, None, None
66
+
67
+ current_text = get_element_text(element)
68
+ hint_text = get_element_text(element, hint_text=True)
69
+
70
+ return element, current_text, hint_text
71
+
72
+ def _format_text_with_hint_info(self, text: str | None, hint_text: str | None) -> str | None:
73
+ if text is None:
74
+ return None
75
+
76
+ is_hint_text = hint_text is not None and hint_text != "" and hint_text == text
77
+
78
+ if is_hint_text:
79
+ return f"{text} (which is the hint text, the input is very likely empty)"
80
+
81
+ return text
82
+
83
+ def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
84
+ return current_text is not None and current_text != "" and current_text != hint_text
85
+
86
+ def _prepare_element_for_clearing(self, resource_id: str) -> bool:
87
+ if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
88
+ return False
89
+
90
+ move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
91
+ return True
92
+
93
+ def _erase_text_attempt(self, text_length: int) -> str | None:
94
+ chars_to_erase = text_length + 1
95
+ logger.info(f"Erasing {chars_to_erase} characters from the input")
96
+
97
+ error = erase_text_controller(ctx=self.ctx, nb_chars=chars_to_erase)
98
+ if error:
99
+ logger.error(f"Failed to erase text: {error}")
100
+ return str(error)
101
+
102
+ return None
103
+
104
+ def _clear_with_retries(
105
+ self, resource_id: str, initial_text: str, hint_text: str | None
106
+ ) -> tuple[bool, str | None, int]:
107
+ current_text = initial_text
108
+ erased_chars = 0
109
+
110
+ for attempt in range(1, MAX_CLEAR_TRIES + 1):
111
+ logger.info(f"Clear attempt {attempt}/{MAX_CLEAR_TRIES}")
112
+
113
+ chars_to_erase = len(current_text) if current_text else DEFAULT_CHARS_TO_ERASE
114
+ error = self._erase_text_attempt(text_length=chars_to_erase)
115
+
116
+ if error:
117
+ return False, current_text, 0
118
+ erased_chars += chars_to_erase
119
+
120
+ self._refresh_ui_hierarchy()
121
+ elt = find_element_by_resource_id(
122
+ ui_hierarchy=self.state.latest_ui_hierarchy or [],
123
+ resource_id=resource_id,
124
+ )
125
+ if elt:
126
+ current_text = get_element_text(elt)
127
+ logger.info(f"Current text: {current_text}")
128
+ if text_input_is_empty(text=current_text, hint_text=hint_text):
129
+ break
130
+
131
+ move_cursor_to_end_if_bounds(
132
+ ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
133
+ )
134
+
135
+ return True, current_text, erased_chars
136
+
137
+ def _create_result(
138
+ self,
139
+ success: bool,
140
+ error_message: str | None,
141
+ chars_erased: int,
142
+ final_text: str | None,
143
+ hint_text: str | None,
144
+ ) -> ClearTextResult:
145
+ formatted_final_text = self._format_text_with_hint_info(final_text, hint_text)
146
+
147
+ return ClearTextResult(
148
+ success=success,
149
+ error_message=error_message,
150
+ chars_erased=chars_erased,
151
+ final_text=formatted_final_text,
152
+ )
153
+
154
+ def _handle_no_clearing_needed(
155
+ self, current_text: str | None, hint_text: str | None
156
+ ) -> ClearTextResult:
157
+ return self._create_result(
158
+ success=True,
159
+ error_message=None,
160
+ chars_erased=-1,
161
+ final_text=current_text,
162
+ hint_text=hint_text,
163
+ )
164
+
165
+ def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
166
+ error = erase_text_controller(ctx=self.ctx)
167
+ self._refresh_ui_hierarchy()
168
+
169
+ _, final_text, _ = self._get_element_info(resource_id)
170
+
171
+ return self._create_result(
172
+ success=error is None,
173
+ error_message=str(error) if error is not None else None,
174
+ chars_erased=0, # Unknown since we don't have initial text
175
+ final_text=final_text,
176
+ hint_text=hint_text,
177
+ )
178
+
179
+ def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
180
+ element, current_text, hint_text = self._get_element_info(resource_id)
181
+
182
+ if not element:
183
+ return self._handle_element_not_found(resource_id, hint_text)
184
+
185
+ if not self._should_clear_text(current_text, hint_text):
186
+ return self._handle_no_clearing_needed(current_text, hint_text)
187
+
188
+ if not self._prepare_element_for_clearing(resource_id):
189
+ return self._create_result(
190
+ success=False,
191
+ error_message="Failed to focus element",
192
+ chars_erased=0,
193
+ final_text=current_text,
194
+ hint_text=hint_text,
195
+ )
196
+
197
+ success, final_text, chars_erased = self._clear_with_retries(
198
+ resource_id=resource_id,
199
+ initial_text=current_text or "",
200
+ hint_text=hint_text,
201
+ )
202
+
203
+ error_message = None if success else "Failed to clear text after retries"
204
+
205
+ return self._create_result(
206
+ success=success,
207
+ error_message=error_message,
208
+ chars_erased=chars_erased,
209
+ final_text=final_text,
210
+ hint_text=hint_text,
211
+ )
212
+
213
+
214
+ def get_clear_text_tool(ctx: MobileUseContext):
215
+ @tool
216
+ def clear_text(
217
+ tool_call_id: Annotated[str, InjectedToolCallId],
218
+ state: Annotated[State, InjectedState],
219
+ agent_thought: str,
220
+ text_input_resource_id: str,
221
+ ):
222
+ """
223
+ Clears all the text from the text field, by focusing it if needed.
224
+ """
225
+ clearer = TextClearer(ctx, state)
226
+ result = clearer.clear_text_by_resource_id(text_input_resource_id)
227
+
228
+ content = (
229
+ clear_text_wrapper.on_failure_fn(result.error_message)
230
+ if not result.success
231
+ else clear_text_wrapper.on_success_fn(
232
+ nb_char_erased=result.chars_erased, new_text_value=result.final_text
233
+ )
234
+ )
235
+
236
+ tool_message = ToolMessage(
237
+ tool_call_id=tool_call_id,
238
+ content=content,
239
+ additional_kwargs={"error": result.error_message} if not result.success else {},
240
+ status="error" if not result.success else "success",
241
+ )
242
+
243
+ return Command(
244
+ update=state.sanitize_update(
245
+ ctx=ctx,
246
+ update={
247
+ "agents_thoughts": [agent_thought],
248
+ EXECUTOR_MESSAGES_KEY: [tool_message],
249
+ },
250
+ agent="executor",
251
+ ),
252
+ )
253
+
254
+ return clear_text
255
+
256
+
257
+ def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
258
+ if nb_char_erased == -1:
259
+ msg = "No text clearing was needed (the input was already empty)."
260
+ else:
261
+ msg = f"Text erased successfully. {nb_char_erased} characters were erased."
262
+
263
+ if new_text_value is not None:
264
+ msg += f" New text in the input is '{new_text_value}'."
265
+
266
+ return msg
267
+
268
+
269
+ def _format_failure_message(output: str | None) -> str:
270
+ return "Failed to erase text. " + (str(output) if output else "")
271
+
272
+
273
+ clear_text_wrapper = ToolWrapper(
274
+ tool_fn_getter=get_clear_text_tool,
275
+ on_success_fn=_format_success_message,
276
+ on_failure_fn=_format_failure_message,
277
+ )
@@ -9,7 +9,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
9
9
  )
10
10
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
11
11
  from pydantic import Field
12
- from typing_extensions import Annotated
12
+ from typing import Annotated
13
13
  from minitap.mobile_use.context import MobileUseContext
14
14
  from minitap.mobile_use.graph.state import State
15
15
  from langgraph.prebuilt import InjectedState
@@ -0,0 +1,56 @@
1
+ from langchain_core.messages import ToolMessage
2
+ from langchain_core.tools import tool
3
+ from langchain_core.tools.base import InjectedToolCallId
4
+ from langgraph.prebuilt import InjectedState
5
+ from langgraph.types import Command
6
+ from typing import Annotated
7
+
8
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
9
+ from minitap.mobile_use.context import MobileUseContext
10
+ from minitap.mobile_use.controllers.mobile_command_controller import (
11
+ erase_text as erase_text_controller,
12
+ )
13
+ from minitap.mobile_use.graph.state import State
14
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
15
+
16
+
17
+ def get_erase_one_char_tool(ctx: MobileUseContext):
18
+ @tool
19
+ def erase_one_char(
20
+ tool_call_id: Annotated[str, InjectedToolCallId],
21
+ state: Annotated[State, InjectedState],
22
+ agent_thought: str,
23
+ ):
24
+ """
25
+ Erase one character from a text area.
26
+ It acts the same as pressing backspace a single time.
27
+ """
28
+ output = erase_text_controller(ctx=ctx, nb_chars=1)
29
+ has_failed = output is not None
30
+ tool_message = ToolMessage(
31
+ tool_call_id=tool_call_id,
32
+ content=erase_one_char_wrapper.on_failure_fn()
33
+ if has_failed
34
+ else erase_one_char_wrapper.on_success_fn(),
35
+ additional_kwargs={"error": output} if has_failed else {},
36
+ status="error" if has_failed else "success",
37
+ )
38
+ return Command(
39
+ update=state.sanitize_update(
40
+ ctx=ctx,
41
+ update={
42
+ "agents_thoughts": [agent_thought],
43
+ EXECUTOR_MESSAGES_KEY: [tool_message],
44
+ },
45
+ agent="executor",
46
+ ),
47
+ )
48
+
49
+ return erase_one_char
50
+
51
+
52
+ erase_one_char_wrapper = ToolWrapper(
53
+ tool_fn_getter=get_erase_one_char_tool,
54
+ on_success_fn=lambda: "Erased one character successfully.",
55
+ on_failure_fn=lambda: "Failed to erase one character.",
56
+ )
@@ -9,7 +9,7 @@ from minitap.mobile_use.context import MobileUseContext
9
9
  from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
10
10
  from minitap.mobile_use.graph.state import State
11
11
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
- from typing_extensions import Annotated
12
+ from typing import Annotated
13
13
 
14
14
 
15
15
  def get_find_packages_tool(ctx: MobileUseContext):
@@ -8,28 +8,17 @@ from langchain_core.tools.base import InjectedToolCallId
8
8
  from langgraph.prebuilt import InjectedState
9
9
  from langgraph.types import Command
10
10
  from pydantic import BaseModel
11
- from typing_extensions import Annotated
11
+ from typing import Annotated
12
12
 
13
13
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
14
14
  from minitap.mobile_use.context import MobileUseContext
15
- from minitap.mobile_use.controllers.mobile_command_controller import (
16
- CoordinatesSelectorRequest,
17
- IdSelectorRequest,
18
- SelectorRequestWithCoordinates,
19
- tap,
20
- )
21
15
  from minitap.mobile_use.controllers.mobile_command_controller import (
22
16
  input_text as input_text_controller,
23
17
  )
24
18
  from minitap.mobile_use.graph.state import State
25
19
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
20
+ from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
26
21
  from minitap.mobile_use.utils.logger import get_logger
27
- from minitap.mobile_use.utils.ui_hierarchy import (
28
- Point,
29
- find_element_by_resource_id,
30
- get_bounds_for_element,
31
- is_element_focused,
32
- )
33
22
 
34
23
  logger = get_logger(__name__)
35
24
 
@@ -41,71 +30,6 @@ class InputResult(BaseModel):
41
30
  error: str | None = None
42
31
 
43
32
 
44
- def _focus_element_if_needed(
45
- ctx: MobileUseContext,
46
- state: State,
47
- resource_id: str,
48
- ) -> bool:
49
- """
50
- Ensures the element identified by `resource_id` is focused.
51
- """
52
- rich_hierarchy: list[dict] = ctx.hw_bridge_client.get_rich_hierarchy()
53
- rich_elt = find_element_by_resource_id(
54
- ui_hierarchy=rich_hierarchy,
55
- resource_id=resource_id,
56
- is_rich_hierarchy=True,
57
- )
58
- if rich_elt and not is_element_focused(rich_elt):
59
- tap(ctx=ctx, selector_request=IdSelectorRequest(id=resource_id))
60
- logger.debug(f"Focused (tap) on resource_id={resource_id}")
61
- rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
62
- rich_elt = find_element_by_resource_id(
63
- ui_hierarchy=rich_hierarchy,
64
- resource_id=resource_id,
65
- is_rich_hierarchy=True,
66
- )
67
- if rich_elt and is_element_focused(rich_elt):
68
- logger.debug(f"Text input is focused: {resource_id}")
69
- return True
70
-
71
- logger.warning(f"Failed to focus resource_id={resource_id}")
72
- return False
73
-
74
-
75
- def _move_cursor_to_end_if_bounds(
76
- ctx: MobileUseContext,
77
- state: State,
78
- resource_id: str,
79
- ) -> None:
80
- """
81
- Best-effort move of the text cursor near the end of the input by tapping the
82
- bottom-right area of the focused element (if bounds are available).
83
- """
84
- elt = find_element_by_resource_id(
85
- ui_hierarchy=state.latest_ui_hierarchy or [],
86
- resource_id=resource_id,
87
- )
88
- if not elt:
89
- return
90
-
91
- bounds = get_bounds_for_element(elt)
92
- if not bounds:
93
- return
94
-
95
- logger.debug("Tapping near the end of the input to move the cursor")
96
- bottom_right: Point = bounds.get_relative_point(x_percent=0.99, y_percent=0.99)
97
- tap(
98
- ctx=ctx,
99
- selector_request=SelectorRequestWithCoordinates(
100
- coordinates=CoordinatesSelectorRequest(
101
- x=bottom_right.x,
102
- y=bottom_right.y,
103
- ),
104
- ),
105
- )
106
- logger.debug(f"Tapped end of input {resource_id} at ({bottom_right.x}, {bottom_right.y})")
107
-
108
-
109
33
  def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
110
34
  """
111
35
  Thin wrapper to normalize the controller result.
@@ -132,9 +56,9 @@ def get_input_text_tool(ctx: MobileUseContext):
132
56
  - If bounds are available, tap near the end to place the cursor at the end.
133
57
  - Type the provided `text` using the controller.
134
58
  """
135
- focused = _focus_element_if_needed(ctx=ctx, state=state, resource_id=text_input_resource_id)
59
+ focused = focus_element_if_needed(ctx=ctx, resource_id=text_input_resource_id)
136
60
  if focused:
137
- _move_cursor_to_end_if_bounds(ctx=ctx, state=state, resource_id=text_input_resource_id)
61
+ move_cursor_to_end_if_bounds(ctx=ctx, state=state, resource_id=text_input_resource_id)
138
62
 
139
63
  result = _controller_input_text(ctx=ctx, text=text)
140
64
 
@@ -7,7 +7,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
7
7
  launch_app as launch_app_controller,
8
8
  )
9
9
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
10
- from typing_extensions import Annotated
10
+ from typing import Annotated
11
11
  from minitap.mobile_use.context import MobileUseContext
12
12
  from minitap.mobile_use.graph.state import State
13
13
  from langgraph.prebuilt import InjectedState
@@ -1,5 +1,3 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
@@ -13,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
13
11
  )
14
12
  from minitap.mobile_use.graph.state import State
15
13
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
16
- from typing_extensions import Annotated
14
+ from typing import Annotated
17
15
 
18
16
 
19
17
  def get_long_press_on_tool(ctx: MobileUseContext):
@@ -23,7 +21,7 @@ def get_long_press_on_tool(ctx: MobileUseContext):
23
21
  state: Annotated[State, InjectedState],
24
22
  agent_thought: str,
25
23
  selector_request: SelectorRequest,
26
- index: Optional[int] = None,
24
+ index: int | None = None,
27
25
  ):
28
26
  """
29
27
  Long press on a UI element identified by the given selector.
@@ -10,7 +10,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
10
10
  )
11
11
  from minitap.mobile_use.graph.state import State
12
12
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
13
- from typing_extensions import Annotated
13
+ from typing import Annotated
14
14
 
15
15
 
16
16
  def get_open_link_tool(ctx: MobileUseContext):
@@ -10,7 +10,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
10
10
  from minitap.mobile_use.graph.state import State
11
11
  from langgraph.prebuilt import InjectedState
12
12
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
13
- from typing_extensions import Annotated
13
+ from typing import Annotated
14
14
 
15
15
 
16
16
  def get_paste_text_tool(ctx: MobileUseContext):
@@ -11,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
11
11
  )
12
12
  from minitap.mobile_use.graph.state import State
13
13
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
- from typing_extensions import Annotated
14
+ from typing import Annotated
15
15
 
16
16
 
17
17
  def get_press_key_tool(ctx: MobileUseContext):
@@ -1,5 +1,3 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
@@ -10,7 +8,7 @@ from minitap.mobile_use.context import MobileUseContext
10
8
  from minitap.mobile_use.controllers.mobile_command_controller import stop_app as stop_app_controller
11
9
  from minitap.mobile_use.graph.state import State
12
10
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
13
- from typing_extensions import Annotated
11
+ from typing import Annotated
14
12
 
15
13
 
16
14
  def get_stop_app_tool(ctx: MobileUseContext):
@@ -19,7 +17,7 @@ def get_stop_app_tool(ctx: MobileUseContext):
19
17
  tool_call_id: Annotated[str, InjectedToolCallId],
20
18
  state: Annotated[State, InjectedState],
21
19
  agent_thought: str,
22
- package_name: Optional[str] = None,
20
+ package_name: str | None = None,
23
21
  ):
24
22
  """
25
23
  Stops current application if it is running.