minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,317 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import BaseTool, tool
5
+ from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+ from pydantic import BaseModel
9
+
10
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
+ from minitap.mobile_use.context import MobileUseContext
12
+ from minitap.mobile_use.controllers.controller_factory import create_device_controller
13
+ from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
14
+ from minitap.mobile_use.graph.state import State
15
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
16
+ from minitap.mobile_use.tools.types import Target
17
+ from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
18
+ from minitap.mobile_use.utils.logger import get_logger
19
+ from minitap.mobile_use.utils.ui_hierarchy import (
20
+ find_element_by_resource_id,
21
+ get_element_text,
22
+ text_input_is_empty,
23
+ )
24
+
25
+ logger = get_logger(__name__)
26
+
27
+ MAX_CLEAR_TRIES = 5
28
+ DEFAULT_CHARS_TO_ERASE = 50
29
+
30
+
31
+ class ClearTextResult(BaseModel):
32
+ success: bool
33
+ error_message: str | None
34
+ chars_erased: int
35
+ final_text: str | None
36
+
37
+
38
+ class TextClearer:
39
+ def __init__(self, ctx: MobileUseContext, state: State):
40
+ self.ctx = ctx
41
+ self.state = state
42
+
43
+ async def _refresh_ui_hierarchy(self) -> None:
44
+ device_controller = create_device_controller(self.ctx)
45
+ screen_data = await device_controller.get_screen_data()
46
+ self.state.latest_ui_hierarchy = screen_data.elements
47
+
48
+ async def _get_element_info(
49
+ self, resource_id: str | None
50
+ ) -> tuple[object | None, str | None, str | None]:
51
+ if not self.state.latest_ui_hierarchy:
52
+ await self._refresh_ui_hierarchy()
53
+
54
+ if not self.state.latest_ui_hierarchy:
55
+ return None, None, None
56
+
57
+ element = None
58
+ if resource_id:
59
+ element = find_element_by_resource_id(
60
+ ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
61
+ )
62
+
63
+ if not element:
64
+ return None, None, None
65
+
66
+ current_text = get_element_text(element)
67
+ hint_text = get_element_text(element, hint_text=True)
68
+
69
+ return element, current_text, hint_text
70
+
71
+ def _format_text_with_hint_info(self, text: str | None, hint_text: str | None) -> str | None:
72
+ if text is None:
73
+ return None
74
+
75
+ is_hint_text = hint_text is not None and hint_text != "" and hint_text == text
76
+
77
+ if is_hint_text:
78
+ return f"{text} (which is the hint text, the input is very likely empty)"
79
+
80
+ return text
81
+
82
+ def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
83
+ return current_text is not None and current_text != "" and current_text != hint_text
84
+
85
+ async def _prepare_element_for_clearing(
86
+ self,
87
+ target: Target,
88
+ ) -> bool:
89
+ if not await focus_element_if_needed(
90
+ ctx=self.ctx,
91
+ target=target,
92
+ ):
93
+ return False
94
+
95
+ await move_cursor_to_end_if_bounds(
96
+ ctx=self.ctx,
97
+ state=self.state,
98
+ target=target,
99
+ )
100
+ return True
101
+
102
+ async def _erase_text_attempt(self, text_length: int) -> str | None:
103
+ chars_to_erase = text_length + 1
104
+ logger.info(f"Erasing {chars_to_erase} characters from the input")
105
+
106
+ controller = UnifiedMobileController(self.ctx)
107
+ result = await controller.erase_text()
108
+
109
+ if not result:
110
+ logger.error("Failed to erase text")
111
+ return "Failed to erase text"
112
+
113
+ return None
114
+
115
+ async def _clear_with_retries(
116
+ self,
117
+ target: Target,
118
+ initial_text: str,
119
+ hint_text: str | None,
120
+ ) -> tuple[bool, str | None, int]:
121
+ current_text = initial_text
122
+ erased_chars = 0
123
+
124
+ for attempt in range(1, MAX_CLEAR_TRIES + 1):
125
+ logger.info(f"Clear attempt {attempt}/{MAX_CLEAR_TRIES}")
126
+
127
+ chars_to_erase = len(current_text) if current_text else DEFAULT_CHARS_TO_ERASE
128
+ error = await self._erase_text_attempt(text_length=chars_to_erase)
129
+
130
+ if error:
131
+ return False, current_text, 0
132
+ erased_chars += chars_to_erase
133
+
134
+ await self._refresh_ui_hierarchy()
135
+ elt = None
136
+ if target.resource_id:
137
+ elt = find_element_by_resource_id(
138
+ ui_hierarchy=self.state.latest_ui_hierarchy or [],
139
+ resource_id=target.resource_id,
140
+ )
141
+ if elt:
142
+ current_text = get_element_text(elt)
143
+ logger.info(f"Current text: {current_text}")
144
+ if text_input_is_empty(text=current_text, hint_text=hint_text):
145
+ break
146
+
147
+ await move_cursor_to_end_if_bounds(
148
+ ctx=self.ctx,
149
+ state=self.state,
150
+ target=target,
151
+ elt=elt,
152
+ )
153
+
154
+ return True, current_text, erased_chars
155
+
156
+ def _create_result(
157
+ self,
158
+ success: bool,
159
+ error_message: str | None,
160
+ chars_erased: int,
161
+ final_text: str | None,
162
+ hint_text: str | None,
163
+ ) -> ClearTextResult:
164
+ formatted_final_text = self._format_text_with_hint_info(final_text, hint_text)
165
+
166
+ return ClearTextResult(
167
+ success=success,
168
+ error_message=error_message,
169
+ chars_erased=chars_erased,
170
+ final_text=formatted_final_text,
171
+ )
172
+
173
+ def _handle_no_clearing_needed(
174
+ self, current_text: str | None, hint_text: str | None
175
+ ) -> ClearTextResult:
176
+ return self._create_result(
177
+ success=True,
178
+ error_message=None,
179
+ chars_erased=-1,
180
+ final_text=current_text,
181
+ hint_text=hint_text,
182
+ )
183
+
184
+ async def _handle_element_not_found(
185
+ self, target: Target, hint_text: str | None
186
+ ) -> ClearTextResult:
187
+ if not await self._prepare_element_for_clearing(target=target):
188
+ return self._create_result(
189
+ success=False,
190
+ error_message="Failed to focus element",
191
+ chars_erased=0,
192
+ final_text=None,
193
+ hint_text=None,
194
+ )
195
+
196
+ controller = UnifiedMobileController(self.ctx)
197
+ output = await controller.erase_text()
198
+ await self._refresh_ui_hierarchy()
199
+
200
+ _, final_text, _ = await self._get_element_info(target.resource_id)
201
+
202
+ return self._create_result(
203
+ success=output,
204
+ error_message="Erase text failed" if not output else None,
205
+ chars_erased=0, # Unknown since we don't have initial text
206
+ final_text=final_text,
207
+ hint_text=hint_text,
208
+ )
209
+
210
+ async def clear_input_text(
211
+ self,
212
+ target: Target,
213
+ ) -> ClearTextResult:
214
+ element, current_text, hint_text = await self._get_element_info(
215
+ resource_id=target.resource_id,
216
+ )
217
+
218
+ if not element:
219
+ return await self._handle_element_not_found(target=target, hint_text=hint_text)
220
+
221
+ if not self._should_clear_text(current_text, hint_text):
222
+ return self._handle_no_clearing_needed(current_text, hint_text)
223
+
224
+ if not await self._prepare_element_for_clearing(target=target):
225
+ return self._create_result(
226
+ success=False,
227
+ error_message="Failed to focus element",
228
+ chars_erased=0,
229
+ final_text=current_text,
230
+ hint_text=hint_text,
231
+ )
232
+
233
+ success, final_text, chars_erased = await self._clear_with_retries(
234
+ target=target,
235
+ initial_text=current_text or "",
236
+ hint_text=hint_text,
237
+ )
238
+
239
+ error_message = None if success else "Failed to clear text after retries"
240
+
241
+ return self._create_result(
242
+ success=success,
243
+ error_message=error_message,
244
+ chars_erased=chars_erased,
245
+ final_text=final_text,
246
+ hint_text=hint_text,
247
+ )
248
+
249
+
250
+ def get_focus_and_clear_text_tool(ctx: MobileUseContext) -> BaseTool:
251
+ @tool
252
+ async def focus_and_clear_text(
253
+ agent_thought: str,
254
+ target: Target,
255
+ tool_call_id: Annotated[str, InjectedToolCallId],
256
+ state: Annotated[State, InjectedState],
257
+ ):
258
+ """
259
+ Clears all the text from the text field, by focusing it if needed.
260
+
261
+ Args:
262
+ agent_thought: The thought of the agent.
263
+ target: The target text field to clear.
264
+ """
265
+ clearer = TextClearer(ctx, state)
266
+ result = await clearer.clear_input_text(target=target)
267
+
268
+ agent_outcome = (
269
+ focus_and_clear_text_wrapper.on_failure_fn(result.error_message)
270
+ if not result.success
271
+ else focus_and_clear_text_wrapper.on_success_fn(
272
+ nb_char_erased=result.chars_erased, new_text_value=result.final_text
273
+ )
274
+ )
275
+
276
+ tool_message = ToolMessage(
277
+ tool_call_id=tool_call_id,
278
+ content=agent_outcome,
279
+ additional_kwargs={"error": result.error_message} if not result.success else {},
280
+ status="error" if not result.success else "success",
281
+ )
282
+
283
+ return Command(
284
+ update=await state.asanitize_update(
285
+ ctx=ctx,
286
+ update={
287
+ "agents_thoughts": [agent_thought, agent_outcome],
288
+ EXECUTOR_MESSAGES_KEY: [tool_message],
289
+ },
290
+ agent="executor",
291
+ ),
292
+ )
293
+
294
+ return focus_and_clear_text
295
+
296
+
297
+ def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
298
+ if nb_char_erased == -1:
299
+ msg = "No text clearing was needed (the input was already empty)."
300
+ else:
301
+ msg = f"Text erased successfully. {nb_char_erased} characters were erased."
302
+
303
+ if new_text_value is not None:
304
+ msg += f" New text in the input is '{new_text_value}'."
305
+
306
+ return msg
307
+
308
+
309
+ def _format_failure_message(output: str | None) -> str:
310
+ return "Failed to erase text. " + (str(output) if output else "")
311
+
312
+
313
+ focus_and_clear_text_wrapper = ToolWrapper(
314
+ tool_fn_getter=get_focus_and_clear_text_tool,
315
+ on_success_fn=_format_success_message,
316
+ on_failure_fn=_format_failure_message,
317
+ )
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Annotated, Literal
4
+
5
+ from langchain_core.messages import ToolMessage
6
+ from langchain_core.tools import BaseTool, tool
7
+ from langchain_core.tools.base import InjectedToolCallId
8
+ from langgraph.prebuilt import InjectedState
9
+ from langgraph.types import Command
10
+ from pydantic import BaseModel
11
+
12
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
13
+ from minitap.mobile_use.context import MobileUseContext
14
+ from minitap.mobile_use.controllers.controller_factory import create_device_controller
15
+ from minitap.mobile_use.graph.state import State
16
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
17
+ from minitap.mobile_use.tools.types import Target
18
+ from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
19
+ from minitap.mobile_use.utils.logger import get_logger
20
+ from minitap.mobile_use.utils.ui_hierarchy import find_element_by_resource_id, get_element_text
21
+
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ class InputResult(BaseModel):
26
+ """Result of an input operation from the controller layer."""
27
+
28
+ ok: bool
29
+ error: str | None = None
30
+
31
+
32
+ async def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
33
+ """
34
+ Thin wrapper to normalize the controller result.
35
+ """
36
+ controller = create_device_controller(ctx)
37
+ success = await controller.input_text(text)
38
+ if success:
39
+ return InputResult(ok=True)
40
+ return InputResult(ok=False, error="Failed to type text")
41
+
42
+
43
+ def get_focus_and_input_text_tool(ctx: MobileUseContext) -> BaseTool:
44
+ @tool
45
+ async def focus_and_input_text(
46
+ agent_thought: str,
47
+ text: str,
48
+ target: Target,
49
+ tool_call_id: Annotated[str, InjectedToolCallId],
50
+ state: Annotated[State, InjectedState],
51
+ ):
52
+ """
53
+ Focus a text field and type text into it.
54
+
55
+ - Ensure the corresponding element is focused (tap if necessary).
56
+ - If bounds are available, tap near the end to place the cursor at the end.
57
+ - Type the provided `text` using the controller.
58
+
59
+ Args:
60
+ agent_thought: The thought of the agent.
61
+ text: The text to type.
62
+ target: The target of the text input (if available).
63
+ """
64
+ focus_method = await focus_element_if_needed(ctx=ctx, target=target)
65
+ if not focus_method:
66
+ error_message = "Failed to focus the text input element before typing."
67
+ tool_message = ToolMessage(
68
+ tool_call_id=tool_call_id,
69
+ content=focus_and_input_text_wrapper.on_failure_fn(text, error_message),
70
+ additional_kwargs={"error": error_message},
71
+ status="error",
72
+ )
73
+ return Command(
74
+ update=await state.asanitize_update(
75
+ ctx=ctx,
76
+ update={
77
+ "agents_thoughts": [agent_thought, error_message],
78
+ EXECUTOR_MESSAGES_KEY: [tool_message],
79
+ },
80
+ agent="executor",
81
+ ),
82
+ )
83
+
84
+ await move_cursor_to_end_if_bounds(ctx=ctx, state=state, target=target)
85
+
86
+ result = await _controller_input_text(ctx=ctx, text=text)
87
+ status: Literal["success", "error"] = "success" if result.ok else "error"
88
+
89
+ text_input_content = ""
90
+ if status == "success" and target.resource_id:
91
+ controller = create_device_controller(ctx)
92
+ screen_data = await controller.get_screen_data()
93
+ state.latest_ui_hierarchy = screen_data.elements
94
+ element = find_element_by_resource_id(
95
+ ui_hierarchy=state.latest_ui_hierarchy,
96
+ resource_id=target.resource_id,
97
+ index=target.resource_id_index,
98
+ )
99
+ if element:
100
+ text_input_content = get_element_text(element)
101
+
102
+ agent_outcome = (
103
+ focus_and_input_text_wrapper.on_success_fn(
104
+ text_to_type=text,
105
+ text_from_resource_id=text_input_content,
106
+ target_resource_id=target.resource_id,
107
+ focus_method=focus_method,
108
+ )
109
+ if result.ok
110
+ else focus_and_input_text_wrapper.on_failure_fn(text_to_type=text, error=result.error)
111
+ )
112
+
113
+ tool_message = ToolMessage(
114
+ tool_call_id=tool_call_id,
115
+ content=agent_outcome,
116
+ additional_kwargs={"error": result.error} if not result.ok else {},
117
+ status=status,
118
+ )
119
+
120
+ return Command(
121
+ update=await state.asanitize_update(
122
+ ctx=ctx,
123
+ update={
124
+ "agents_thoughts": [agent_thought, agent_outcome],
125
+ EXECUTOR_MESSAGES_KEY: [tool_message],
126
+ },
127
+ agent="executor",
128
+ ),
129
+ )
130
+
131
+ return focus_and_input_text
132
+
133
+
134
+ def _on_input_success(text_to_type, text_from_resource_id, target_resource_id, focus_method):
135
+ """Success message handler for input text operations."""
136
+ if focus_method == "resource_id":
137
+ return (
138
+ f"Typed {repr(text_to_type)}\n"
139
+ f"Here is the whole content of input with id {repr(target_resource_id)}: "
140
+ f"{repr(text_from_resource_id)}"
141
+ )
142
+ else:
143
+ return (
144
+ f"Typed {repr(text_to_type)} using {focus_method}."
145
+ + " Should now verify before moving forward."
146
+ )
147
+
148
+
149
+ focus_and_input_text_wrapper = ToolWrapper(
150
+ tool_fn_getter=get_focus_and_input_text_tool,
151
+ on_success_fn=_on_input_success,
152
+ on_failure_fn=lambda text, error: f"Failed to input text {repr(text)}. Reason: {error}",
153
+ )
@@ -0,0 +1,86 @@
1
+ from typing import Annotated
2
+
3
+ from langchain_core.messages import ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_core.tools.base import InjectedToolCallId
6
+ from langgraph.prebuilt import InjectedState
7
+ from langgraph.types import Command
8
+
9
+ from minitap.mobile_use.agents.hopper.hopper import HopperOutput, hopper
10
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
+ from minitap.mobile_use.context import MobileUseContext
12
+ from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
13
+ from minitap.mobile_use.graph.state import State
14
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
15
+ from minitap.mobile_use.utils.app_launch_utils import launch_app_with_retries
16
+
17
+
18
+ async def find_package(ctx: MobileUseContext, app_name: str) -> str | None:
19
+ """
20
+ Finds the package name for a given application name.
21
+ Returns None if package not found or on error.
22
+ """
23
+ all_packages = list_packages(ctx=ctx)
24
+ try:
25
+ hopper_output: HopperOutput = await hopper(
26
+ ctx=ctx,
27
+ request=f"I'm looking for the package name of the following app: '{app_name}'",
28
+ data=all_packages,
29
+ )
30
+ if not hopper_output.found:
31
+ return None
32
+ return hopper_output.output
33
+ except Exception as e:
34
+ print(f"Failed to find package for '{app_name}': {e}")
35
+ return None
36
+
37
+
38
+ def get_launch_app_tool(ctx: MobileUseContext):
39
+ @tool
40
+ async def launch_app(
41
+ app_name: str,
42
+ agent_thought: str,
43
+ tool_call_id: Annotated[str, InjectedToolCallId],
44
+ state: Annotated[State, InjectedState],
45
+ ) -> Command:
46
+ """
47
+ Finds and launches an application on the device using its natural language name.
48
+ """
49
+ package_name = await find_package(ctx=ctx, app_name=app_name)
50
+
51
+ if not package_name:
52
+ tool_message = ToolMessage(
53
+ tool_call_id=tool_call_id,
54
+ content=launch_app_wrapper.on_failure_fn(app_name, "Package not found."),
55
+ status="error",
56
+ )
57
+ else:
58
+ success, error_msg = await launch_app_with_retries(ctx=ctx, app_package=package_name)
59
+ tool_message = ToolMessage(
60
+ tool_call_id=tool_call_id,
61
+ content=launch_app_wrapper.on_success_fn(app_name)
62
+ if success
63
+ else launch_app_wrapper.on_failure_fn(app_name, error_msg),
64
+ additional_kwargs={} if success else {"error": error_msg},
65
+ status="success" if success else "error",
66
+ )
67
+
68
+ return Command(
69
+ update=await state.asanitize_update(
70
+ ctx=ctx,
71
+ update={
72
+ "agents_thoughts": [agent_thought, tool_message.content],
73
+ EXECUTOR_MESSAGES_KEY: [tool_message],
74
+ },
75
+ agent="executor",
76
+ ),
77
+ )
78
+
79
+ return launch_app
80
+
81
+
82
+ launch_app_wrapper = ToolWrapper(
83
+ tool_fn_getter=get_launch_app_tool,
84
+ on_success_fn=lambda app_name: f"App '{app_name}' launched successfully.",
85
+ on_failure_fn=lambda app_name, error: f"Failed to launch app '{app_name}': {error}",
86
+ )