minitap-mobile-use 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/cortex/cortex.md +7 -5
- minitap/mobile_use/agents/cortex/cortex.py +4 -1
- minitap/mobile_use/agents/cortex/types.py +1 -3
- minitap/mobile_use/agents/executor/executor.md +4 -5
- minitap/mobile_use/agents/executor/tool_node.py +6 -6
- minitap/mobile_use/agents/outputter/outputter.py +1 -2
- minitap/mobile_use/agents/planner/planner.md +11 -2
- minitap/mobile_use/agents/planner/planner.py +4 -1
- minitap/mobile_use/agents/planner/types.py +3 -4
- minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
- minitap/mobile_use/config.py +15 -15
- minitap/mobile_use/context.py +3 -4
- minitap/mobile_use/controllers/mobile_command_controller.py +32 -20
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
- minitap/mobile_use/graph/graph.py +1 -0
- minitap/mobile_use/graph/state.py +9 -9
- minitap/mobile_use/main.py +5 -6
- minitap/mobile_use/sdk/agent.py +24 -24
- minitap/mobile_use/sdk/builders/agent_config_builder.py +7 -8
- minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
- minitap/mobile_use/sdk/types/agent.py +5 -5
- minitap/mobile_use/sdk/types/task.py +19 -18
- minitap/mobile_use/sdk/utils.py +1 -1
- minitap/mobile_use/servers/config.py +1 -2
- minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
- minitap/mobile_use/servers/start_servers.py +4 -4
- minitap/mobile_use/servers/stop_servers.py +2 -3
- minitap/mobile_use/services/llm.py +3 -2
- minitap/mobile_use/tools/index.py +10 -4
- minitap/mobile_use/tools/mobile/back.py +1 -1
- minitap/mobile_use/tools/mobile/clear_text.py +277 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +1 -1
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/find_packages.py +1 -1
- minitap/mobile_use/tools/mobile/input_text.py +4 -80
- minitap/mobile_use/tools/mobile/launch_app.py +1 -1
- minitap/mobile_use/tools/mobile/long_press_on.py +2 -4
- minitap/mobile_use/tools/mobile/open_link.py +1 -1
- minitap/mobile_use/tools/mobile/paste_text.py +1 -1
- minitap/mobile_use/tools/mobile/press_key.py +1 -1
- minitap/mobile_use/tools/mobile/stop_app.py +2 -4
- minitap/mobile_use/tools/mobile/swipe.py +1 -1
- minitap/mobile_use/tools/mobile/take_screenshot.py +1 -1
- minitap/mobile_use/tools/mobile/tap.py +2 -4
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +2 -4
- minitap/mobile_use/tools/tool_wrapper.py +1 -1
- minitap/mobile_use/tools/utils.py +86 -0
- minitap/mobile_use/utils/cli_helpers.py +1 -2
- minitap/mobile_use/utils/cli_selection.py +5 -6
- minitap/mobile_use/utils/decorators.py +21 -20
- minitap/mobile_use/utils/logger.py +3 -4
- minitap/mobile_use/utils/media.py +1 -1
- minitap/mobile_use/utils/ui_hierarchy.py +13 -5
- {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.1.0.dist-info}/METADATA +11 -1
- minitap_mobile_use-2.1.0.dist-info/RECORD +96 -0
- minitap/mobile_use/tools/mobile/erase_text.py +0 -122
- minitap_mobile_use-2.0.1.dist-info/RECORD +0 -94
- {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.1.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.1.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import ToolMessage
|
|
4
|
+
from langchain_core.tools import tool
|
|
5
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.prebuilt import InjectedState
|
|
7
|
+
from langgraph.types import Command
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
12
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
13
|
+
erase_text as erase_text_controller,
|
|
14
|
+
)
|
|
15
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
16
|
+
get_screen_data,
|
|
17
|
+
)
|
|
18
|
+
from minitap.mobile_use.graph.state import State
|
|
19
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
20
|
+
from minitap.mobile_use.tools.utils import (
|
|
21
|
+
focus_element_if_needed,
|
|
22
|
+
move_cursor_to_end_if_bounds,
|
|
23
|
+
)
|
|
24
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
25
|
+
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
26
|
+
find_element_by_resource_id,
|
|
27
|
+
get_element_text,
|
|
28
|
+
text_input_is_empty,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
MAX_CLEAR_TRIES = 5
|
|
34
|
+
DEFAULT_CHARS_TO_ERASE = 50
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ClearTextResult(BaseModel):
|
|
38
|
+
success: bool
|
|
39
|
+
error_message: str | None
|
|
40
|
+
chars_erased: int
|
|
41
|
+
final_text: str | None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TextClearer:
|
|
45
|
+
def __init__(self, ctx: MobileUseContext, state: State):
|
|
46
|
+
self.ctx = ctx
|
|
47
|
+
self.state = state
|
|
48
|
+
|
|
49
|
+
def _refresh_ui_hierarchy(self) -> None:
|
|
50
|
+
screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
|
|
51
|
+
self.state.latest_ui_hierarchy = screen_data.elements
|
|
52
|
+
|
|
53
|
+
def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
|
|
54
|
+
if not self.state.latest_ui_hierarchy:
|
|
55
|
+
self._refresh_ui_hierarchy()
|
|
56
|
+
|
|
57
|
+
if not self.state.latest_ui_hierarchy:
|
|
58
|
+
return None, None, None
|
|
59
|
+
|
|
60
|
+
element = find_element_by_resource_id(
|
|
61
|
+
ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if not element:
|
|
65
|
+
return None, None, None
|
|
66
|
+
|
|
67
|
+
current_text = get_element_text(element)
|
|
68
|
+
hint_text = get_element_text(element, hint_text=True)
|
|
69
|
+
|
|
70
|
+
return element, current_text, hint_text
|
|
71
|
+
|
|
72
|
+
def _format_text_with_hint_info(self, text: str | None, hint_text: str | None) -> str | None:
|
|
73
|
+
if text is None:
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
is_hint_text = hint_text is not None and hint_text != "" and hint_text == text
|
|
77
|
+
|
|
78
|
+
if is_hint_text:
|
|
79
|
+
return f"{text} (which is the hint text, the input is very likely empty)"
|
|
80
|
+
|
|
81
|
+
return text
|
|
82
|
+
|
|
83
|
+
def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
|
|
84
|
+
return current_text is not None and current_text != "" and current_text != hint_text
|
|
85
|
+
|
|
86
|
+
def _prepare_element_for_clearing(self, resource_id: str) -> bool:
|
|
87
|
+
if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
def _erase_text_attempt(self, text_length: int) -> str | None:
|
|
94
|
+
chars_to_erase = text_length + 1
|
|
95
|
+
logger.info(f"Erasing {chars_to_erase} characters from the input")
|
|
96
|
+
|
|
97
|
+
error = erase_text_controller(ctx=self.ctx, nb_chars=chars_to_erase)
|
|
98
|
+
if error:
|
|
99
|
+
logger.error(f"Failed to erase text: {error}")
|
|
100
|
+
return str(error)
|
|
101
|
+
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
def _clear_with_retries(
|
|
105
|
+
self, resource_id: str, initial_text: str, hint_text: str | None
|
|
106
|
+
) -> tuple[bool, str | None, int]:
|
|
107
|
+
current_text = initial_text
|
|
108
|
+
erased_chars = 0
|
|
109
|
+
|
|
110
|
+
for attempt in range(1, MAX_CLEAR_TRIES + 1):
|
|
111
|
+
logger.info(f"Clear attempt {attempt}/{MAX_CLEAR_TRIES}")
|
|
112
|
+
|
|
113
|
+
chars_to_erase = len(current_text) if current_text else DEFAULT_CHARS_TO_ERASE
|
|
114
|
+
error = self._erase_text_attempt(text_length=chars_to_erase)
|
|
115
|
+
|
|
116
|
+
if error:
|
|
117
|
+
return False, current_text, 0
|
|
118
|
+
erased_chars += chars_to_erase
|
|
119
|
+
|
|
120
|
+
self._refresh_ui_hierarchy()
|
|
121
|
+
elt = find_element_by_resource_id(
|
|
122
|
+
ui_hierarchy=self.state.latest_ui_hierarchy or [],
|
|
123
|
+
resource_id=resource_id,
|
|
124
|
+
)
|
|
125
|
+
if elt:
|
|
126
|
+
current_text = get_element_text(elt)
|
|
127
|
+
logger.info(f"Current text: {current_text}")
|
|
128
|
+
if text_input_is_empty(text=current_text, hint_text=hint_text):
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
move_cursor_to_end_if_bounds(
|
|
132
|
+
ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return True, current_text, erased_chars
|
|
136
|
+
|
|
137
|
+
def _create_result(
|
|
138
|
+
self,
|
|
139
|
+
success: bool,
|
|
140
|
+
error_message: str | None,
|
|
141
|
+
chars_erased: int,
|
|
142
|
+
final_text: str | None,
|
|
143
|
+
hint_text: str | None,
|
|
144
|
+
) -> ClearTextResult:
|
|
145
|
+
formatted_final_text = self._format_text_with_hint_info(final_text, hint_text)
|
|
146
|
+
|
|
147
|
+
return ClearTextResult(
|
|
148
|
+
success=success,
|
|
149
|
+
error_message=error_message,
|
|
150
|
+
chars_erased=chars_erased,
|
|
151
|
+
final_text=formatted_final_text,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def _handle_no_clearing_needed(
|
|
155
|
+
self, current_text: str | None, hint_text: str | None
|
|
156
|
+
) -> ClearTextResult:
|
|
157
|
+
return self._create_result(
|
|
158
|
+
success=True,
|
|
159
|
+
error_message=None,
|
|
160
|
+
chars_erased=-1,
|
|
161
|
+
final_text=current_text,
|
|
162
|
+
hint_text=hint_text,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
|
|
166
|
+
error = erase_text_controller(ctx=self.ctx)
|
|
167
|
+
self._refresh_ui_hierarchy()
|
|
168
|
+
|
|
169
|
+
_, final_text, _ = self._get_element_info(resource_id)
|
|
170
|
+
|
|
171
|
+
return self._create_result(
|
|
172
|
+
success=error is None,
|
|
173
|
+
error_message=str(error) if error is not None else None,
|
|
174
|
+
chars_erased=0, # Unknown since we don't have initial text
|
|
175
|
+
final_text=final_text,
|
|
176
|
+
hint_text=hint_text,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
|
|
180
|
+
element, current_text, hint_text = self._get_element_info(resource_id)
|
|
181
|
+
|
|
182
|
+
if not element:
|
|
183
|
+
return self._handle_element_not_found(resource_id, hint_text)
|
|
184
|
+
|
|
185
|
+
if not self._should_clear_text(current_text, hint_text):
|
|
186
|
+
return self._handle_no_clearing_needed(current_text, hint_text)
|
|
187
|
+
|
|
188
|
+
if not self._prepare_element_for_clearing(resource_id):
|
|
189
|
+
return self._create_result(
|
|
190
|
+
success=False,
|
|
191
|
+
error_message="Failed to focus element",
|
|
192
|
+
chars_erased=0,
|
|
193
|
+
final_text=current_text,
|
|
194
|
+
hint_text=hint_text,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
success, final_text, chars_erased = self._clear_with_retries(
|
|
198
|
+
resource_id=resource_id,
|
|
199
|
+
initial_text=current_text or "",
|
|
200
|
+
hint_text=hint_text,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
error_message = None if success else "Failed to clear text after retries"
|
|
204
|
+
|
|
205
|
+
return self._create_result(
|
|
206
|
+
success=success,
|
|
207
|
+
error_message=error_message,
|
|
208
|
+
chars_erased=chars_erased,
|
|
209
|
+
final_text=final_text,
|
|
210
|
+
hint_text=hint_text,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def get_clear_text_tool(ctx: MobileUseContext):
|
|
215
|
+
@tool
|
|
216
|
+
def clear_text(
|
|
217
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
218
|
+
state: Annotated[State, InjectedState],
|
|
219
|
+
agent_thought: str,
|
|
220
|
+
text_input_resource_id: str,
|
|
221
|
+
):
|
|
222
|
+
"""
|
|
223
|
+
Clears all the text from the text field, by focusing it if needed.
|
|
224
|
+
"""
|
|
225
|
+
clearer = TextClearer(ctx, state)
|
|
226
|
+
result = clearer.clear_text_by_resource_id(text_input_resource_id)
|
|
227
|
+
|
|
228
|
+
content = (
|
|
229
|
+
clear_text_wrapper.on_failure_fn(result.error_message)
|
|
230
|
+
if not result.success
|
|
231
|
+
else clear_text_wrapper.on_success_fn(
|
|
232
|
+
nb_char_erased=result.chars_erased, new_text_value=result.final_text
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
tool_message = ToolMessage(
|
|
237
|
+
tool_call_id=tool_call_id,
|
|
238
|
+
content=content,
|
|
239
|
+
additional_kwargs={"error": result.error_message} if not result.success else {},
|
|
240
|
+
status="error" if not result.success else "success",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return Command(
|
|
244
|
+
update=state.sanitize_update(
|
|
245
|
+
ctx=ctx,
|
|
246
|
+
update={
|
|
247
|
+
"agents_thoughts": [agent_thought],
|
|
248
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
249
|
+
},
|
|
250
|
+
agent="executor",
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return clear_text
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
|
|
258
|
+
if nb_char_erased == -1:
|
|
259
|
+
msg = "No text clearing was needed (the input was already empty)."
|
|
260
|
+
else:
|
|
261
|
+
msg = f"Text erased successfully. {nb_char_erased} characters were erased."
|
|
262
|
+
|
|
263
|
+
if new_text_value is not None:
|
|
264
|
+
msg += f" New text in the input is '{new_text_value}'."
|
|
265
|
+
|
|
266
|
+
return msg
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _format_failure_message(output: str | None) -> str:
|
|
270
|
+
return "Failed to erase text. " + (str(output) if output else "")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
clear_text_wrapper = ToolWrapper(
|
|
274
|
+
tool_fn_getter=get_clear_text_tool,
|
|
275
|
+
on_success_fn=_format_success_message,
|
|
276
|
+
on_failure_fn=_format_failure_message,
|
|
277
|
+
)
|
|
@@ -9,7 +9,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
9
9
|
)
|
|
10
10
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
11
11
|
from pydantic import Field
|
|
12
|
-
from
|
|
12
|
+
from typing import Annotated
|
|
13
13
|
from minitap.mobile_use.context import MobileUseContext
|
|
14
14
|
from minitap.mobile_use.graph.state import State
|
|
15
15
|
from langgraph.prebuilt import InjectedState
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from langchain_core.messages import ToolMessage
|
|
2
|
+
from langchain_core.tools import tool
|
|
3
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
4
|
+
from langgraph.prebuilt import InjectedState
|
|
5
|
+
from langgraph.types import Command
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
9
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
10
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
11
|
+
erase_text as erase_text_controller,
|
|
12
|
+
)
|
|
13
|
+
from minitap.mobile_use.graph.state import State
|
|
14
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_erase_one_char_tool(ctx: MobileUseContext):
|
|
18
|
+
@tool
|
|
19
|
+
def erase_one_char(
|
|
20
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
|
+
state: Annotated[State, InjectedState],
|
|
22
|
+
agent_thought: str,
|
|
23
|
+
):
|
|
24
|
+
"""
|
|
25
|
+
Erase one character from a text area.
|
|
26
|
+
It acts the same as pressing backspace a single time.
|
|
27
|
+
"""
|
|
28
|
+
output = erase_text_controller(ctx=ctx, nb_chars=1)
|
|
29
|
+
has_failed = output is not None
|
|
30
|
+
tool_message = ToolMessage(
|
|
31
|
+
tool_call_id=tool_call_id,
|
|
32
|
+
content=erase_one_char_wrapper.on_failure_fn()
|
|
33
|
+
if has_failed
|
|
34
|
+
else erase_one_char_wrapper.on_success_fn(),
|
|
35
|
+
additional_kwargs={"error": output} if has_failed else {},
|
|
36
|
+
status="error" if has_failed else "success",
|
|
37
|
+
)
|
|
38
|
+
return Command(
|
|
39
|
+
update=state.sanitize_update(
|
|
40
|
+
ctx=ctx,
|
|
41
|
+
update={
|
|
42
|
+
"agents_thoughts": [agent_thought],
|
|
43
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
44
|
+
},
|
|
45
|
+
agent="executor",
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return erase_one_char
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
erase_one_char_wrapper = ToolWrapper(
|
|
53
|
+
tool_fn_getter=get_erase_one_char_tool,
|
|
54
|
+
on_success_fn=lambda: "Erased one character successfully.",
|
|
55
|
+
on_failure_fn=lambda: "Failed to erase one character.",
|
|
56
|
+
)
|
|
@@ -9,7 +9,7 @@ from minitap.mobile_use.context import MobileUseContext
|
|
|
9
9
|
from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
|
|
10
10
|
from minitap.mobile_use.graph.state import State
|
|
11
11
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
12
|
-
from
|
|
12
|
+
from typing import Annotated
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def get_find_packages_tool(ctx: MobileUseContext):
|
|
@@ -8,28 +8,17 @@ from langchain_core.tools.base import InjectedToolCallId
|
|
|
8
8
|
from langgraph.prebuilt import InjectedState
|
|
9
9
|
from langgraph.types import Command
|
|
10
10
|
from pydantic import BaseModel
|
|
11
|
-
from
|
|
11
|
+
from typing import Annotated
|
|
12
12
|
|
|
13
13
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
14
14
|
from minitap.mobile_use.context import MobileUseContext
|
|
15
|
-
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
16
|
-
CoordinatesSelectorRequest,
|
|
17
|
-
IdSelectorRequest,
|
|
18
|
-
SelectorRequestWithCoordinates,
|
|
19
|
-
tap,
|
|
20
|
-
)
|
|
21
15
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
22
16
|
input_text as input_text_controller,
|
|
23
17
|
)
|
|
24
18
|
from minitap.mobile_use.graph.state import State
|
|
25
19
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
20
|
+
from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
|
|
26
21
|
from minitap.mobile_use.utils.logger import get_logger
|
|
27
|
-
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
28
|
-
Point,
|
|
29
|
-
find_element_by_resource_id,
|
|
30
|
-
get_bounds_for_element,
|
|
31
|
-
is_element_focused,
|
|
32
|
-
)
|
|
33
22
|
|
|
34
23
|
logger = get_logger(__name__)
|
|
35
24
|
|
|
@@ -41,71 +30,6 @@ class InputResult(BaseModel):
|
|
|
41
30
|
error: str | None = None
|
|
42
31
|
|
|
43
32
|
|
|
44
|
-
def _focus_element_if_needed(
|
|
45
|
-
ctx: MobileUseContext,
|
|
46
|
-
state: State,
|
|
47
|
-
resource_id: str,
|
|
48
|
-
) -> bool:
|
|
49
|
-
"""
|
|
50
|
-
Ensures the element identified by `resource_id` is focused.
|
|
51
|
-
"""
|
|
52
|
-
rich_hierarchy: list[dict] = ctx.hw_bridge_client.get_rich_hierarchy()
|
|
53
|
-
rich_elt = find_element_by_resource_id(
|
|
54
|
-
ui_hierarchy=rich_hierarchy,
|
|
55
|
-
resource_id=resource_id,
|
|
56
|
-
is_rich_hierarchy=True,
|
|
57
|
-
)
|
|
58
|
-
if rich_elt and not is_element_focused(rich_elt):
|
|
59
|
-
tap(ctx=ctx, selector_request=IdSelectorRequest(id=resource_id))
|
|
60
|
-
logger.debug(f"Focused (tap) on resource_id={resource_id}")
|
|
61
|
-
rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
|
|
62
|
-
rich_elt = find_element_by_resource_id(
|
|
63
|
-
ui_hierarchy=rich_hierarchy,
|
|
64
|
-
resource_id=resource_id,
|
|
65
|
-
is_rich_hierarchy=True,
|
|
66
|
-
)
|
|
67
|
-
if rich_elt and is_element_focused(rich_elt):
|
|
68
|
-
logger.debug(f"Text input is focused: {resource_id}")
|
|
69
|
-
return True
|
|
70
|
-
|
|
71
|
-
logger.warning(f"Failed to focus resource_id={resource_id}")
|
|
72
|
-
return False
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _move_cursor_to_end_if_bounds(
|
|
76
|
-
ctx: MobileUseContext,
|
|
77
|
-
state: State,
|
|
78
|
-
resource_id: str,
|
|
79
|
-
) -> None:
|
|
80
|
-
"""
|
|
81
|
-
Best-effort move of the text cursor near the end of the input by tapping the
|
|
82
|
-
bottom-right area of the focused element (if bounds are available).
|
|
83
|
-
"""
|
|
84
|
-
elt = find_element_by_resource_id(
|
|
85
|
-
ui_hierarchy=state.latest_ui_hierarchy or [],
|
|
86
|
-
resource_id=resource_id,
|
|
87
|
-
)
|
|
88
|
-
if not elt:
|
|
89
|
-
return
|
|
90
|
-
|
|
91
|
-
bounds = get_bounds_for_element(elt)
|
|
92
|
-
if not bounds:
|
|
93
|
-
return
|
|
94
|
-
|
|
95
|
-
logger.debug("Tapping near the end of the input to move the cursor")
|
|
96
|
-
bottom_right: Point = bounds.get_relative_point(x_percent=0.99, y_percent=0.99)
|
|
97
|
-
tap(
|
|
98
|
-
ctx=ctx,
|
|
99
|
-
selector_request=SelectorRequestWithCoordinates(
|
|
100
|
-
coordinates=CoordinatesSelectorRequest(
|
|
101
|
-
x=bottom_right.x,
|
|
102
|
-
y=bottom_right.y,
|
|
103
|
-
),
|
|
104
|
-
),
|
|
105
|
-
)
|
|
106
|
-
logger.debug(f"Tapped end of input {resource_id} at ({bottom_right.x}, {bottom_right.y})")
|
|
107
|
-
|
|
108
|
-
|
|
109
33
|
def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
|
|
110
34
|
"""
|
|
111
35
|
Thin wrapper to normalize the controller result.
|
|
@@ -132,9 +56,9 @@ def get_input_text_tool(ctx: MobileUseContext):
|
|
|
132
56
|
- If bounds are available, tap near the end to place the cursor at the end.
|
|
133
57
|
- Type the provided `text` using the controller.
|
|
134
58
|
"""
|
|
135
|
-
focused =
|
|
59
|
+
focused = focus_element_if_needed(ctx=ctx, resource_id=text_input_resource_id)
|
|
136
60
|
if focused:
|
|
137
|
-
|
|
61
|
+
move_cursor_to_end_if_bounds(ctx=ctx, state=state, resource_id=text_input_resource_id)
|
|
138
62
|
|
|
139
63
|
result = _controller_input_text(ctx=ctx, text=text)
|
|
140
64
|
|
|
@@ -7,7 +7,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
7
7
|
launch_app as launch_app_controller,
|
|
8
8
|
)
|
|
9
9
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
10
|
-
from
|
|
10
|
+
from typing import Annotated
|
|
11
11
|
from minitap.mobile_use.context import MobileUseContext
|
|
12
12
|
from minitap.mobile_use.graph.state import State
|
|
13
13
|
from langgraph.prebuilt import InjectedState
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
@@ -13,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
13
11
|
)
|
|
14
12
|
from minitap.mobile_use.graph.state import State
|
|
15
13
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
16
|
-
from
|
|
14
|
+
from typing import Annotated
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
def get_long_press_on_tool(ctx: MobileUseContext):
|
|
@@ -23,7 +21,7 @@ def get_long_press_on_tool(ctx: MobileUseContext):
|
|
|
23
21
|
state: Annotated[State, InjectedState],
|
|
24
22
|
agent_thought: str,
|
|
25
23
|
selector_request: SelectorRequest,
|
|
26
|
-
index:
|
|
24
|
+
index: int | None = None,
|
|
27
25
|
):
|
|
28
26
|
"""
|
|
29
27
|
Long press on a UI element identified by the given selector.
|
|
@@ -10,7 +10,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
10
10
|
)
|
|
11
11
|
from minitap.mobile_use.graph.state import State
|
|
12
12
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
|
-
from
|
|
13
|
+
from typing import Annotated
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def get_open_link_tool(ctx: MobileUseContext):
|
|
@@ -10,7 +10,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
10
10
|
from minitap.mobile_use.graph.state import State
|
|
11
11
|
from langgraph.prebuilt import InjectedState
|
|
12
12
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
|
-
from
|
|
13
|
+
from typing import Annotated
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def get_paste_text_tool(ctx: MobileUseContext):
|
|
@@ -11,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
11
|
)
|
|
12
12
|
from minitap.mobile_use.graph.state import State
|
|
13
13
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
14
|
-
from
|
|
14
|
+
from typing import Annotated
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def get_press_key_tool(ctx: MobileUseContext):
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
@@ -10,7 +8,7 @@ from minitap.mobile_use.context import MobileUseContext
|
|
|
10
8
|
from minitap.mobile_use.controllers.mobile_command_controller import stop_app as stop_app_controller
|
|
11
9
|
from minitap.mobile_use.graph.state import State
|
|
12
10
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
|
-
from
|
|
11
|
+
from typing import Annotated
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
def get_stop_app_tool(ctx: MobileUseContext):
|
|
@@ -19,7 +17,7 @@ def get_stop_app_tool(ctx: MobileUseContext):
|
|
|
19
17
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
20
18
|
state: Annotated[State, InjectedState],
|
|
21
19
|
agent_thought: str,
|
|
22
|
-
package_name:
|
|
20
|
+
package_name: str | None = None,
|
|
23
21
|
):
|
|
24
22
|
"""
|
|
25
23
|
Stops current application if it is running.
|
|
@@ -9,7 +9,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import SwipeReques
|
|
|
9
9
|
from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
|
|
10
10
|
from minitap.mobile_use.graph.state import State
|
|
11
11
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
12
|
-
from
|
|
12
|
+
from typing import Annotated
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def get_swipe_tool(ctx: MobileUseContext):
|
|
@@ -11,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
11
|
from minitap.mobile_use.graph.state import State
|
|
12
12
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
13
|
from minitap.mobile_use.utils.media import compress_base64_jpeg
|
|
14
|
-
from
|
|
14
|
+
from typing import Annotated
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def get_take_screenshot_tool(ctx: MobileUseContext):
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
@@ -11,7 +9,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import SelectorReq
|
|
|
11
9
|
from minitap.mobile_use.controllers.mobile_command_controller import tap as tap_controller
|
|
12
10
|
from minitap.mobile_use.graph.state import State
|
|
13
11
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
14
|
-
from
|
|
12
|
+
from typing import Annotated
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
def get_tap_tool(ctx: MobileUseContext):
|
|
@@ -21,7 +19,7 @@ def get_tap_tool(ctx: MobileUseContext):
|
|
|
21
19
|
state: Annotated[State, InjectedState],
|
|
22
20
|
agent_thought: str,
|
|
23
21
|
selector_request: SelectorRequest,
|
|
24
|
-
index:
|
|
22
|
+
index: int | None = None,
|
|
25
23
|
):
|
|
26
24
|
"""
|
|
27
25
|
Taps on a selector.
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
@@ -13,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
13
11
|
)
|
|
14
12
|
from minitap.mobile_use.graph.state import State
|
|
15
13
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
16
|
-
from
|
|
14
|
+
from typing import Annotated
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
|
|
@@ -22,7 +20,7 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
|
|
|
22
20
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
23
21
|
state: Annotated[State, InjectedState],
|
|
24
22
|
agent_thought: str,
|
|
25
|
-
timeout:
|
|
23
|
+
timeout: WaitTimeout | None,
|
|
26
24
|
):
|
|
27
25
|
"""
|
|
28
26
|
Waits for ongoing animations or videos to finish before continuing.
|