minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import ToolMessage
|
|
4
|
+
from langchain_core.tools import BaseTool, tool
|
|
5
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.prebuilt import InjectedState
|
|
7
|
+
from langgraph.types import Command
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
12
|
+
from minitap.mobile_use.controllers.controller_factory import create_device_controller
|
|
13
|
+
from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
|
|
14
|
+
from minitap.mobile_use.graph.state import State
|
|
15
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
16
|
+
from minitap.mobile_use.tools.types import Target
|
|
17
|
+
from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
|
|
18
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
19
|
+
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
20
|
+
find_element_by_resource_id,
|
|
21
|
+
get_element_text,
|
|
22
|
+
text_input_is_empty,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
MAX_CLEAR_TRIES = 5
|
|
28
|
+
DEFAULT_CHARS_TO_ERASE = 50
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ClearTextResult(BaseModel):
|
|
32
|
+
success: bool
|
|
33
|
+
error_message: str | None
|
|
34
|
+
chars_erased: int
|
|
35
|
+
final_text: str | None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TextClearer:
|
|
39
|
+
def __init__(self, ctx: MobileUseContext, state: State):
|
|
40
|
+
self.ctx = ctx
|
|
41
|
+
self.state = state
|
|
42
|
+
|
|
43
|
+
async def _refresh_ui_hierarchy(self) -> None:
|
|
44
|
+
device_controller = create_device_controller(self.ctx)
|
|
45
|
+
screen_data = await device_controller.get_screen_data()
|
|
46
|
+
self.state.latest_ui_hierarchy = screen_data.elements
|
|
47
|
+
|
|
48
|
+
async def _get_element_info(
|
|
49
|
+
self, resource_id: str | None
|
|
50
|
+
) -> tuple[object | None, str | None, str | None]:
|
|
51
|
+
if not self.state.latest_ui_hierarchy:
|
|
52
|
+
await self._refresh_ui_hierarchy()
|
|
53
|
+
|
|
54
|
+
if not self.state.latest_ui_hierarchy:
|
|
55
|
+
return None, None, None
|
|
56
|
+
|
|
57
|
+
element = None
|
|
58
|
+
if resource_id:
|
|
59
|
+
element = find_element_by_resource_id(
|
|
60
|
+
ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if not element:
|
|
64
|
+
return None, None, None
|
|
65
|
+
|
|
66
|
+
current_text = get_element_text(element)
|
|
67
|
+
hint_text = get_element_text(element, hint_text=True)
|
|
68
|
+
|
|
69
|
+
return element, current_text, hint_text
|
|
70
|
+
|
|
71
|
+
def _format_text_with_hint_info(self, text: str | None, hint_text: str | None) -> str | None:
|
|
72
|
+
if text is None:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
is_hint_text = hint_text is not None and hint_text != "" and hint_text == text
|
|
76
|
+
|
|
77
|
+
if is_hint_text:
|
|
78
|
+
return f"{text} (which is the hint text, the input is very likely empty)"
|
|
79
|
+
|
|
80
|
+
return text
|
|
81
|
+
|
|
82
|
+
def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
|
|
83
|
+
return current_text is not None and current_text != "" and current_text != hint_text
|
|
84
|
+
|
|
85
|
+
async def _prepare_element_for_clearing(
|
|
86
|
+
self,
|
|
87
|
+
target: Target,
|
|
88
|
+
) -> bool:
|
|
89
|
+
if not await focus_element_if_needed(
|
|
90
|
+
ctx=self.ctx,
|
|
91
|
+
target=target,
|
|
92
|
+
):
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
await move_cursor_to_end_if_bounds(
|
|
96
|
+
ctx=self.ctx,
|
|
97
|
+
state=self.state,
|
|
98
|
+
target=target,
|
|
99
|
+
)
|
|
100
|
+
return True
|
|
101
|
+
|
|
102
|
+
async def _erase_text_attempt(self, text_length: int) -> str | None:
|
|
103
|
+
chars_to_erase = text_length + 1
|
|
104
|
+
logger.info(f"Erasing {chars_to_erase} characters from the input")
|
|
105
|
+
|
|
106
|
+
controller = UnifiedMobileController(self.ctx)
|
|
107
|
+
result = await controller.erase_text()
|
|
108
|
+
|
|
109
|
+
if not result:
|
|
110
|
+
logger.error("Failed to erase text")
|
|
111
|
+
return "Failed to erase text"
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
async def _clear_with_retries(
|
|
116
|
+
self,
|
|
117
|
+
target: Target,
|
|
118
|
+
initial_text: str,
|
|
119
|
+
hint_text: str | None,
|
|
120
|
+
) -> tuple[bool, str | None, int]:
|
|
121
|
+
current_text = initial_text
|
|
122
|
+
erased_chars = 0
|
|
123
|
+
|
|
124
|
+
for attempt in range(1, MAX_CLEAR_TRIES + 1):
|
|
125
|
+
logger.info(f"Clear attempt {attempt}/{MAX_CLEAR_TRIES}")
|
|
126
|
+
|
|
127
|
+
chars_to_erase = len(current_text) if current_text else DEFAULT_CHARS_TO_ERASE
|
|
128
|
+
error = await self._erase_text_attempt(text_length=chars_to_erase)
|
|
129
|
+
|
|
130
|
+
if error:
|
|
131
|
+
return False, current_text, 0
|
|
132
|
+
erased_chars += chars_to_erase
|
|
133
|
+
|
|
134
|
+
await self._refresh_ui_hierarchy()
|
|
135
|
+
elt = None
|
|
136
|
+
if target.resource_id:
|
|
137
|
+
elt = find_element_by_resource_id(
|
|
138
|
+
ui_hierarchy=self.state.latest_ui_hierarchy or [],
|
|
139
|
+
resource_id=target.resource_id,
|
|
140
|
+
)
|
|
141
|
+
if elt:
|
|
142
|
+
current_text = get_element_text(elt)
|
|
143
|
+
logger.info(f"Current text: {current_text}")
|
|
144
|
+
if text_input_is_empty(text=current_text, hint_text=hint_text):
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
await move_cursor_to_end_if_bounds(
|
|
148
|
+
ctx=self.ctx,
|
|
149
|
+
state=self.state,
|
|
150
|
+
target=target,
|
|
151
|
+
elt=elt,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return True, current_text, erased_chars
|
|
155
|
+
|
|
156
|
+
def _create_result(
|
|
157
|
+
self,
|
|
158
|
+
success: bool,
|
|
159
|
+
error_message: str | None,
|
|
160
|
+
chars_erased: int,
|
|
161
|
+
final_text: str | None,
|
|
162
|
+
hint_text: str | None,
|
|
163
|
+
) -> ClearTextResult:
|
|
164
|
+
formatted_final_text = self._format_text_with_hint_info(final_text, hint_text)
|
|
165
|
+
|
|
166
|
+
return ClearTextResult(
|
|
167
|
+
success=success,
|
|
168
|
+
error_message=error_message,
|
|
169
|
+
chars_erased=chars_erased,
|
|
170
|
+
final_text=formatted_final_text,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def _handle_no_clearing_needed(
|
|
174
|
+
self, current_text: str | None, hint_text: str | None
|
|
175
|
+
) -> ClearTextResult:
|
|
176
|
+
return self._create_result(
|
|
177
|
+
success=True,
|
|
178
|
+
error_message=None,
|
|
179
|
+
chars_erased=-1,
|
|
180
|
+
final_text=current_text,
|
|
181
|
+
hint_text=hint_text,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
async def _handle_element_not_found(
|
|
185
|
+
self, target: Target, hint_text: str | None
|
|
186
|
+
) -> ClearTextResult:
|
|
187
|
+
if not await self._prepare_element_for_clearing(target=target):
|
|
188
|
+
return self._create_result(
|
|
189
|
+
success=False,
|
|
190
|
+
error_message="Failed to focus element",
|
|
191
|
+
chars_erased=0,
|
|
192
|
+
final_text=None,
|
|
193
|
+
hint_text=None,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
controller = UnifiedMobileController(self.ctx)
|
|
197
|
+
output = await controller.erase_text()
|
|
198
|
+
await self._refresh_ui_hierarchy()
|
|
199
|
+
|
|
200
|
+
_, final_text, _ = await self._get_element_info(target.resource_id)
|
|
201
|
+
|
|
202
|
+
return self._create_result(
|
|
203
|
+
success=output,
|
|
204
|
+
error_message="Erase text failed" if not output else None,
|
|
205
|
+
chars_erased=0, # Unknown since we don't have initial text
|
|
206
|
+
final_text=final_text,
|
|
207
|
+
hint_text=hint_text,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
async def clear_input_text(
|
|
211
|
+
self,
|
|
212
|
+
target: Target,
|
|
213
|
+
) -> ClearTextResult:
|
|
214
|
+
element, current_text, hint_text = await self._get_element_info(
|
|
215
|
+
resource_id=target.resource_id,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if not element:
|
|
219
|
+
return await self._handle_element_not_found(target=target, hint_text=hint_text)
|
|
220
|
+
|
|
221
|
+
if not self._should_clear_text(current_text, hint_text):
|
|
222
|
+
return self._handle_no_clearing_needed(current_text, hint_text)
|
|
223
|
+
|
|
224
|
+
if not await self._prepare_element_for_clearing(target=target):
|
|
225
|
+
return self._create_result(
|
|
226
|
+
success=False,
|
|
227
|
+
error_message="Failed to focus element",
|
|
228
|
+
chars_erased=0,
|
|
229
|
+
final_text=current_text,
|
|
230
|
+
hint_text=hint_text,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
success, final_text, chars_erased = await self._clear_with_retries(
|
|
234
|
+
target=target,
|
|
235
|
+
initial_text=current_text or "",
|
|
236
|
+
hint_text=hint_text,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
error_message = None if success else "Failed to clear text after retries"
|
|
240
|
+
|
|
241
|
+
return self._create_result(
|
|
242
|
+
success=success,
|
|
243
|
+
error_message=error_message,
|
|
244
|
+
chars_erased=chars_erased,
|
|
245
|
+
final_text=final_text,
|
|
246
|
+
hint_text=hint_text,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def get_focus_and_clear_text_tool(ctx: MobileUseContext) -> BaseTool:
|
|
251
|
+
@tool
|
|
252
|
+
async def focus_and_clear_text(
|
|
253
|
+
agent_thought: str,
|
|
254
|
+
target: Target,
|
|
255
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
256
|
+
state: Annotated[State, InjectedState],
|
|
257
|
+
):
|
|
258
|
+
"""
|
|
259
|
+
Clears all the text from the text field, by focusing it if needed.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
agent_thought: The thought of the agent.
|
|
263
|
+
target: The target text field to clear.
|
|
264
|
+
"""
|
|
265
|
+
clearer = TextClearer(ctx, state)
|
|
266
|
+
result = await clearer.clear_input_text(target=target)
|
|
267
|
+
|
|
268
|
+
agent_outcome = (
|
|
269
|
+
focus_and_clear_text_wrapper.on_failure_fn(result.error_message)
|
|
270
|
+
if not result.success
|
|
271
|
+
else focus_and_clear_text_wrapper.on_success_fn(
|
|
272
|
+
nb_char_erased=result.chars_erased, new_text_value=result.final_text
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
tool_message = ToolMessage(
|
|
277
|
+
tool_call_id=tool_call_id,
|
|
278
|
+
content=agent_outcome,
|
|
279
|
+
additional_kwargs={"error": result.error_message} if not result.success else {},
|
|
280
|
+
status="error" if not result.success else "success",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return Command(
|
|
284
|
+
update=await state.asanitize_update(
|
|
285
|
+
ctx=ctx,
|
|
286
|
+
update={
|
|
287
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
288
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
289
|
+
},
|
|
290
|
+
agent="executor",
|
|
291
|
+
),
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return focus_and_clear_text
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
|
|
298
|
+
if nb_char_erased == -1:
|
|
299
|
+
msg = "No text clearing was needed (the input was already empty)."
|
|
300
|
+
else:
|
|
301
|
+
msg = f"Text erased successfully. {nb_char_erased} characters were erased."
|
|
302
|
+
|
|
303
|
+
if new_text_value is not None:
|
|
304
|
+
msg += f" New text in the input is '{new_text_value}'."
|
|
305
|
+
|
|
306
|
+
return msg
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _format_failure_message(output: str | None) -> str:
|
|
310
|
+
return "Failed to erase text. " + (str(output) if output else "")
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
focus_and_clear_text_wrapper = ToolWrapper(
|
|
314
|
+
tool_fn_getter=get_focus_and_clear_text_tool,
|
|
315
|
+
on_success_fn=_format_success_message,
|
|
316
|
+
on_failure_fn=_format_failure_message,
|
|
317
|
+
)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Annotated, Literal
|
|
4
|
+
|
|
5
|
+
from langchain_core.messages import ToolMessage
|
|
6
|
+
from langchain_core.tools import BaseTool, tool
|
|
7
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
8
|
+
from langgraph.prebuilt import InjectedState
|
|
9
|
+
from langgraph.types import Command
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
13
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
14
|
+
from minitap.mobile_use.controllers.controller_factory import create_device_controller
|
|
15
|
+
from minitap.mobile_use.graph.state import State
|
|
16
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
17
|
+
from minitap.mobile_use.tools.types import Target
|
|
18
|
+
from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
|
|
19
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
20
|
+
from minitap.mobile_use.utils.ui_hierarchy import find_element_by_resource_id, get_element_text
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class InputResult(BaseModel):
|
|
26
|
+
"""Result of an input operation from the controller layer."""
|
|
27
|
+
|
|
28
|
+
ok: bool
|
|
29
|
+
error: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
|
|
33
|
+
"""
|
|
34
|
+
Thin wrapper to normalize the controller result.
|
|
35
|
+
"""
|
|
36
|
+
controller = create_device_controller(ctx)
|
|
37
|
+
success = await controller.input_text(text)
|
|
38
|
+
if success:
|
|
39
|
+
return InputResult(ok=True)
|
|
40
|
+
return InputResult(ok=False, error="Failed to type text")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_focus_and_input_text_tool(ctx: MobileUseContext) -> BaseTool:
|
|
44
|
+
@tool
|
|
45
|
+
async def focus_and_input_text(
|
|
46
|
+
agent_thought: str,
|
|
47
|
+
text: str,
|
|
48
|
+
target: Target,
|
|
49
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
50
|
+
state: Annotated[State, InjectedState],
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
Focus a text field and type text into it.
|
|
54
|
+
|
|
55
|
+
- Ensure the corresponding element is focused (tap if necessary).
|
|
56
|
+
- If bounds are available, tap near the end to place the cursor at the end.
|
|
57
|
+
- Type the provided `text` using the controller.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
agent_thought: The thought of the agent.
|
|
61
|
+
text: The text to type.
|
|
62
|
+
target: The target of the text input (if available).
|
|
63
|
+
"""
|
|
64
|
+
focus_method = await focus_element_if_needed(ctx=ctx, target=target)
|
|
65
|
+
if not focus_method:
|
|
66
|
+
error_message = "Failed to focus the text input element before typing."
|
|
67
|
+
tool_message = ToolMessage(
|
|
68
|
+
tool_call_id=tool_call_id,
|
|
69
|
+
content=focus_and_input_text_wrapper.on_failure_fn(text, error_message),
|
|
70
|
+
additional_kwargs={"error": error_message},
|
|
71
|
+
status="error",
|
|
72
|
+
)
|
|
73
|
+
return Command(
|
|
74
|
+
update=await state.asanitize_update(
|
|
75
|
+
ctx=ctx,
|
|
76
|
+
update={
|
|
77
|
+
"agents_thoughts": [agent_thought, error_message],
|
|
78
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
79
|
+
},
|
|
80
|
+
agent="executor",
|
|
81
|
+
),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
await move_cursor_to_end_if_bounds(ctx=ctx, state=state, target=target)
|
|
85
|
+
|
|
86
|
+
result = await _controller_input_text(ctx=ctx, text=text)
|
|
87
|
+
status: Literal["success", "error"] = "success" if result.ok else "error"
|
|
88
|
+
|
|
89
|
+
text_input_content = ""
|
|
90
|
+
if status == "success" and target.resource_id:
|
|
91
|
+
controller = create_device_controller(ctx)
|
|
92
|
+
screen_data = await controller.get_screen_data()
|
|
93
|
+
state.latest_ui_hierarchy = screen_data.elements
|
|
94
|
+
element = find_element_by_resource_id(
|
|
95
|
+
ui_hierarchy=state.latest_ui_hierarchy,
|
|
96
|
+
resource_id=target.resource_id,
|
|
97
|
+
index=target.resource_id_index,
|
|
98
|
+
)
|
|
99
|
+
if element:
|
|
100
|
+
text_input_content = get_element_text(element)
|
|
101
|
+
|
|
102
|
+
agent_outcome = (
|
|
103
|
+
focus_and_input_text_wrapper.on_success_fn(
|
|
104
|
+
text_to_type=text,
|
|
105
|
+
text_from_resource_id=text_input_content,
|
|
106
|
+
target_resource_id=target.resource_id,
|
|
107
|
+
focus_method=focus_method,
|
|
108
|
+
)
|
|
109
|
+
if result.ok
|
|
110
|
+
else focus_and_input_text_wrapper.on_failure_fn(text_to_type=text, error=result.error)
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
tool_message = ToolMessage(
|
|
114
|
+
tool_call_id=tool_call_id,
|
|
115
|
+
content=agent_outcome,
|
|
116
|
+
additional_kwargs={"error": result.error} if not result.ok else {},
|
|
117
|
+
status=status,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return Command(
|
|
121
|
+
update=await state.asanitize_update(
|
|
122
|
+
ctx=ctx,
|
|
123
|
+
update={
|
|
124
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
125
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
126
|
+
},
|
|
127
|
+
agent="executor",
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return focus_and_input_text
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _on_input_success(text_to_type, text_from_resource_id, target_resource_id, focus_method):
|
|
135
|
+
"""Success message handler for input text operations."""
|
|
136
|
+
if focus_method == "resource_id":
|
|
137
|
+
return (
|
|
138
|
+
f"Typed {repr(text_to_type)}\n"
|
|
139
|
+
f"Here is the whole content of input with id {repr(target_resource_id)}: "
|
|
140
|
+
f"{repr(text_from_resource_id)}"
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
return (
|
|
144
|
+
f"Typed {repr(text_to_type)} using {focus_method}."
|
|
145
|
+
+ " Should now verify before moving forward."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
focus_and_input_text_wrapper = ToolWrapper(
|
|
150
|
+
tool_fn_getter=get_focus_and_input_text_tool,
|
|
151
|
+
on_success_fn=_on_input_success,
|
|
152
|
+
on_failure_fn=lambda text, error: f"Failed to input text {repr(text)}. Reason: {error}",
|
|
153
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import ToolMessage
|
|
4
|
+
from langchain_core.tools import tool
|
|
5
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.prebuilt import InjectedState
|
|
7
|
+
from langgraph.types import Command
|
|
8
|
+
|
|
9
|
+
from minitap.mobile_use.agents.hopper.hopper import HopperOutput, hopper
|
|
10
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
12
|
+
from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
|
|
13
|
+
from minitap.mobile_use.graph.state import State
|
|
14
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
15
|
+
from minitap.mobile_use.utils.app_launch_utils import launch_app_with_retries
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def find_package(ctx: MobileUseContext, app_name: str) -> str | None:
|
|
19
|
+
"""
|
|
20
|
+
Finds the package name for a given application name.
|
|
21
|
+
Returns None if package not found or on error.
|
|
22
|
+
"""
|
|
23
|
+
all_packages = list_packages(ctx=ctx)
|
|
24
|
+
try:
|
|
25
|
+
hopper_output: HopperOutput = await hopper(
|
|
26
|
+
ctx=ctx,
|
|
27
|
+
request=f"I'm looking for the package name of the following app: '{app_name}'",
|
|
28
|
+
data=all_packages,
|
|
29
|
+
)
|
|
30
|
+
if not hopper_output.found:
|
|
31
|
+
return None
|
|
32
|
+
return hopper_output.output
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print(f"Failed to find package for '{app_name}': {e}")
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_launch_app_tool(ctx: MobileUseContext):
|
|
39
|
+
@tool
|
|
40
|
+
async def launch_app(
|
|
41
|
+
app_name: str,
|
|
42
|
+
agent_thought: str,
|
|
43
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
44
|
+
state: Annotated[State, InjectedState],
|
|
45
|
+
) -> Command:
|
|
46
|
+
"""
|
|
47
|
+
Finds and launches an application on the device using its natural language name.
|
|
48
|
+
"""
|
|
49
|
+
package_name = await find_package(ctx=ctx, app_name=app_name)
|
|
50
|
+
|
|
51
|
+
if not package_name:
|
|
52
|
+
tool_message = ToolMessage(
|
|
53
|
+
tool_call_id=tool_call_id,
|
|
54
|
+
content=launch_app_wrapper.on_failure_fn(app_name, "Package not found."),
|
|
55
|
+
status="error",
|
|
56
|
+
)
|
|
57
|
+
else:
|
|
58
|
+
success, error_msg = await launch_app_with_retries(ctx=ctx, app_package=package_name)
|
|
59
|
+
tool_message = ToolMessage(
|
|
60
|
+
tool_call_id=tool_call_id,
|
|
61
|
+
content=launch_app_wrapper.on_success_fn(app_name)
|
|
62
|
+
if success
|
|
63
|
+
else launch_app_wrapper.on_failure_fn(app_name, error_msg),
|
|
64
|
+
additional_kwargs={} if success else {"error": error_msg},
|
|
65
|
+
status="success" if success else "error",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return Command(
|
|
69
|
+
update=await state.asanitize_update(
|
|
70
|
+
ctx=ctx,
|
|
71
|
+
update={
|
|
72
|
+
"agents_thoughts": [agent_thought, tool_message.content],
|
|
73
|
+
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
74
|
+
},
|
|
75
|
+
agent="executor",
|
|
76
|
+
),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return launch_app
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
launch_app_wrapper = ToolWrapper(
|
|
83
|
+
tool_fn_getter=get_launch_app_tool,
|
|
84
|
+
on_success_fn=lambda app_name: f"App '{app_name}' launched successfully.",
|
|
85
|
+
on_failure_fn=lambda app_name, error: f"Failed to launch app '{app_name}': {error}",
|
|
86
|
+
)
|