minitap-mobile-use 2.5.3__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (43) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +0 -8
  2. minitap/mobile_use/agents/cortex/cortex.md +122 -36
  3. minitap/mobile_use/agents/cortex/cortex.py +32 -17
  4. minitap/mobile_use/agents/cortex/types.py +18 -4
  5. minitap/mobile_use/agents/executor/executor.md +3 -3
  6. minitap/mobile_use/agents/executor/executor.py +10 -3
  7. minitap/mobile_use/agents/hopper/hopper.md +30 -2
  8. minitap/mobile_use/agents/hopper/hopper.py +19 -15
  9. minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
  10. minitap/mobile_use/agents/outputter/outputter.py +13 -3
  11. minitap/mobile_use/agents/planner/planner.md +20 -9
  12. minitap/mobile_use/agents/planner/planner.py +12 -5
  13. minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
  14. minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
  15. minitap/mobile_use/clients/ios_client.py +7 -3
  16. minitap/mobile_use/config.py +87 -24
  17. minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
  18. minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
  19. minitap/mobile_use/controllers/types.py +95 -0
  20. minitap/mobile_use/graph/graph.py +55 -11
  21. minitap/mobile_use/graph/state.py +10 -3
  22. minitap/mobile_use/main.py +12 -4
  23. minitap/mobile_use/sdk/agent.py +113 -72
  24. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
  25. minitap/mobile_use/sdk/services/platform.py +15 -1
  26. minitap/mobile_use/sdk/types/platform.py +1 -0
  27. minitap/mobile_use/sdk/types/task.py +10 -1
  28. minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
  29. minitap/mobile_use/services/llm.py +5 -2
  30. minitap/mobile_use/tools/index.py +7 -9
  31. minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
  32. minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
  33. minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
  34. minitap/mobile_use/tools/mobile/swipe.py +3 -26
  35. minitap/mobile_use/tools/mobile/tap.py +41 -28
  36. minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
  37. minitap/mobile_use/utils/cli_helpers.py +10 -6
  38. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/METADATA +1 -1
  39. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/RECORD +41 -39
  40. minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
  41. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
  42. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/WHEEL +0 -0
  43. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/entry_points.txt +0 -0
@@ -23,6 +23,7 @@ from minitap.mobile_use.sdk.types.platform import (
23
23
  )
24
24
  from minitap.mobile_use.sdk.types.task import (
25
25
  AgentProfile,
26
+ CloudDevicePlatformTaskRequest,
26
27
  ManualTaskConfig,
27
28
  PlatformTaskInfo,
28
29
  PlatformTaskRequest,
@@ -60,6 +61,10 @@ class PlatformService:
60
61
 
61
62
  async def create_task_run(self, request: PlatformTaskRequest) -> PlatformTaskInfo:
62
63
  try:
64
+ virtual_mobile_id = None
65
+ if isinstance(request, CloudDevicePlatformTaskRequest):
66
+ virtual_mobile_id = request.virtual_mobile_id
67
+
63
68
  # Check if task is a string (fetch from platform) or ManualTaskConfig (create manually)
64
69
  if isinstance(request.task, str):
65
70
  # Fetch task from platform
@@ -87,7 +92,11 @@ class PlatformService:
87
92
  thoughts_output_path=request.thoughts_output_path,
88
93
  )
89
94
 
90
- task_run = await self._create_task_run(task=task, profile=profile)
95
+ task_run = await self._create_task_run(
96
+ task=task,
97
+ profile=profile,
98
+ virtual_mobile_id=virtual_mobile_id,
99
+ )
91
100
  else:
92
101
  # Create task manually from ManualTaskConfig
93
102
  logger.info(f"Creating manual task with goal: {request.task.goal}")
@@ -113,6 +122,7 @@ class PlatformService:
113
122
  task_run = await self._create_manual_task_run(
114
123
  manual_config=request.task,
115
124
  profile=profile,
125
+ virtual_mobile_id=virtual_mobile_id,
116
126
  )
117
127
 
118
128
  return PlatformTaskInfo(
@@ -244,12 +254,14 @@ class PlatformService:
244
254
  self,
245
255
  task: TaskResponse,
246
256
  profile: LLMProfileResponse,
257
+ virtual_mobile_id: str | None = None,
247
258
  ) -> TaskRunResponse:
248
259
  try:
249
260
  logger.info(f"Creating task run for task: {task.name}")
250
261
  task_run = CreateTaskRunRequest(
251
262
  task_id=task.id,
252
263
  llm_profile_id=profile.id,
264
+ virtual_mobile_id=virtual_mobile_id,
253
265
  )
254
266
  response = await self._client.post(url="v1/task-runs", json=task_run.model_dump())
255
267
  response.raise_for_status()
@@ -264,6 +276,7 @@ class PlatformService:
264
276
  self,
265
277
  manual_config: ManualTaskConfig,
266
278
  profile: LLMProfileResponse,
279
+ virtual_mobile_id: str | None = None,
267
280
  ) -> TaskRunResponse:
268
281
  """
269
282
  Create an orphan task run from a manual task configuration.
@@ -277,6 +290,7 @@ class PlatformService:
277
290
  "inputPrompt": manual_config.goal,
278
291
  "outputDescription": manual_config.output_description,
279
292
  "llmProfileId": profile.id,
293
+ "virtualMobileId": virtual_mobile_id,
280
294
  }
281
295
 
282
296
  response = await self._client.post(url="v1/task-runs/orphan", json=orphan_payload)
@@ -63,6 +63,7 @@ class CreateTaskRunRequest(BaseApiModel):
63
63
 
64
64
  task_id: str = Field(..., description="ID of the task to run")
65
65
  llm_profile_id: str = Field(..., description="LLM profile ID to use")
66
+ virtual_mobile_id: str | None = Field(None, description="Virtual mobile ID to use")
66
67
 
67
68
 
68
69
  class UpdateTaskRunStatusRequest(BaseApiModel):
@@ -2,12 +2,13 @@
2
2
  Task-related type definitions for the Mobile-use SDK.
3
3
  """
4
4
 
5
+ from asyncio import Event
5
6
  from collections.abc import Callable, Coroutine
6
7
  from datetime import datetime
7
8
  from pathlib import Path
8
9
  from typing import Any, TypeVar, overload
9
10
 
10
- from pydantic import BaseModel, Field
11
+ from pydantic import BaseModel, ConfigDict, Field
11
12
 
12
13
  from minitap.mobile_use.config import LLMConfig, get_default_llm_config
13
14
  from minitap.mobile_use.constants import RECURSION_LIMIT
@@ -135,6 +136,14 @@ class PlatformTaskRequest[TOutput](TaskRequestBase):
135
136
  api_key: str | None = None
136
137
 
137
138
 
139
+ class CloudDevicePlatformTaskRequest[TOutput](PlatformTaskRequest[TOutput]):
140
+ model_config = ConfigDict(arbitrary_types_allowed=True)
141
+
142
+ task_run_id_available_event: Event = Event()
143
+ task_run_id: str | None = None
144
+ virtual_mobile_id: str | None = None
145
+
146
+
138
147
  class TaskResult(BaseModel):
139
148
  """
140
149
  Result of a mobile automation task.
@@ -135,23 +135,30 @@ class DeviceHardwareBridge:
135
135
  print(f"[Maestro Studio ERROR]: {line}")
136
136
  self.output.append(line)
137
137
 
138
- if "device offline" in line.lower():
138
+ lower_line = line.lower()
139
+
140
+ # Ignore known benign warnings (common on macOS/JDK 21+)
141
+ if line.startswith("WARNING:") or (
142
+ "restricted method" in lower_line
143
+ or "jansi" in lower_line
144
+ or "enable-native-access" in lower_line
145
+ or "java.lang.system::load" in lower_line
146
+ ):
147
+ continue
148
+
149
+ if "device offline" in lower_line:
139
150
  with self.lock:
140
151
  self.status = BridgeStatus.FAILED
141
152
  if self.process:
142
153
  self.process.kill()
143
154
  break
144
155
 
145
- if "address already in use" in line.lower():
156
+ if "address already in use" in lower_line:
146
157
  with self.lock:
147
158
  self.status = BridgeStatus.PORT_IN_USE
148
159
  if self.process:
149
160
  self.process.kill()
150
161
  break
151
- else:
152
- with self.lock:
153
- if self.status == BridgeStatus.STARTING:
154
- self.status = BridgeStatus.FAILED
155
162
 
156
163
  def _wait_for_health_check(self, retries=5, delay=2):
157
164
  health_url = f"http://localhost:{DEVICE_HARDWARE_BRIDGE_PORT}/api/banner-message"
@@ -13,6 +13,7 @@ from minitap.mobile_use.config import (
13
13
  AgentNode,
14
14
  AgentNodeWithFallback,
15
15
  LLMUtilsNode,
16
+ LLMUtilsNodeWithFallback,
16
17
  LLMWithFallback,
17
18
  settings,
18
19
  )
@@ -169,8 +170,9 @@ def get_llm(
169
170
  @overload
170
171
  def get_llm(
171
172
  ctx: MobileUseContext,
172
- name: AgentNode,
173
+ name: LLMUtilsNode,
173
174
  *,
175
+ is_utils: Literal[True],
174
176
  temperature: float = 1,
175
177
  ) -> BaseChatModel: ...
176
178
 
@@ -178,9 +180,10 @@ def get_llm(
178
180
  @overload
179
181
  def get_llm(
180
182
  ctx: MobileUseContext,
181
- name: LLMUtilsNode,
183
+ name: LLMUtilsNodeWithFallback,
182
184
  *,
183
185
  is_utils: Literal[True],
186
+ use_fallback: bool = False,
184
187
  temperature: float = 1,
185
188
  ) -> BaseChatModel: ...
186
189
 
@@ -2,10 +2,9 @@ from langchain_core.tools import BaseTool
2
2
 
3
3
  from minitap.mobile_use.context import MobileUseContext
4
4
  from minitap.mobile_use.tools.mobile.back import back_wrapper
5
- from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
6
5
  from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
7
- from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
8
- from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
6
+ from minitap.mobile_use.tools.mobile.focus_and_clear_text import focus_and_clear_text_wrapper
7
+ from minitap.mobile_use.tools.mobile.focus_and_input_text import focus_and_input_text_wrapper
9
8
  from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
10
9
  from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
11
10
  from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
@@ -13,8 +12,8 @@ from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
13
12
  from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
14
13
  from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
15
14
  from minitap.mobile_use.tools.mobile.tap import tap_wrapper
16
- from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
17
- wait_for_animation_to_end_wrapper,
15
+ from minitap.mobile_use.tools.mobile.wait_for_delay import (
16
+ wait_for_delay_wrapper,
18
17
  )
19
18
  from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
20
19
 
@@ -24,14 +23,13 @@ EXECUTOR_WRAPPERS_TOOLS = [
24
23
  tap_wrapper,
25
24
  long_press_on_wrapper,
26
25
  swipe_wrapper,
27
- glimpse_screen_wrapper,
28
- input_text_wrapper,
26
+ focus_and_input_text_wrapper,
29
27
  erase_one_char_wrapper,
30
28
  launch_app_wrapper,
31
29
  stop_app_wrapper,
32
- clear_text_wrapper,
30
+ focus_and_clear_text_wrapper,
33
31
  press_key_wrapper,
34
- wait_for_animation_to_end_wrapper,
32
+ wait_for_delay_wrapper,
35
33
  ]
36
34
 
37
35
 
@@ -238,9 +238,9 @@ class TextClearer:
238
238
  )
239
239
 
240
240
 
241
- def get_clear_text_tool(ctx: MobileUseContext):
241
+ def get_focus_and_clear_text_tool(ctx: MobileUseContext):
242
242
  @tool
243
- async def clear_text(
243
+ async def focus_and_clear_text(
244
244
  tool_call_id: Annotated[str, InjectedToolCallId],
245
245
  state: Annotated[State, InjectedState],
246
246
  agent_thought: str,
@@ -255,9 +255,9 @@ def get_clear_text_tool(ctx: MobileUseContext):
255
255
  )
256
256
 
257
257
  agent_outcome = (
258
- clear_text_wrapper.on_failure_fn(result.error_message)
258
+ focus_and_clear_text_wrapper.on_failure_fn(result.error_message)
259
259
  if not result.success
260
- else clear_text_wrapper.on_success_fn(
260
+ else focus_and_clear_text_wrapper.on_success_fn(
261
261
  nb_char_erased=result.chars_erased, new_text_value=result.final_text
262
262
  )
263
263
  )
@@ -280,7 +280,7 @@ def get_clear_text_tool(ctx: MobileUseContext):
280
280
  ),
281
281
  )
282
282
 
283
- return clear_text
283
+ return focus_and_clear_text
284
284
 
285
285
 
286
286
  def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
@@ -299,8 +299,8 @@ def _format_failure_message(output: str | None) -> str:
299
299
  return "Failed to erase text. " + (str(output) if output else "")
300
300
 
301
301
 
302
- clear_text_wrapper = ToolWrapper(
303
- tool_fn_getter=get_clear_text_tool,
302
+ focus_and_clear_text_wrapper = ToolWrapper(
303
+ tool_fn_getter=get_focus_and_clear_text_tool,
304
304
  on_success_fn=_format_success_message,
305
305
  on_failure_fn=_format_failure_message,
306
306
  )
@@ -42,9 +42,9 @@ def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
42
42
  return InputResult(ok=False, error=str(controller_out))
43
43
 
44
44
 
45
- def get_input_text_tool(ctx: MobileUseContext):
45
+ def get_focus_and_input_text_tool(ctx: MobileUseContext):
46
46
  @tool
47
- async def input_text(
47
+ async def focus_and_input_text(
48
48
  tool_call_id: Annotated[str, InjectedToolCallId],
49
49
  state: Annotated[State, InjectedState],
50
50
  agent_thought: str,
@@ -70,7 +70,7 @@ def get_input_text_tool(ctx: MobileUseContext):
70
70
  error_message = "Failed to focus the text input element before typing."
71
71
  tool_message = ToolMessage(
72
72
  tool_call_id=tool_call_id,
73
- content=input_text_wrapper.on_failure_fn(text, error_message),
73
+ content=focus_and_input_text_wrapper.on_failure_fn(text, error_message),
74
74
  additional_kwargs={"error": error_message},
75
75
  status="error",
76
76
  )
@@ -103,9 +103,9 @@ def get_input_text_tool(ctx: MobileUseContext):
103
103
  text_input_content = get_element_text(element)
104
104
 
105
105
  agent_outcome = (
106
- input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
106
+ focus_and_input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
107
107
  if result.ok
108
- else input_text_wrapper.on_failure_fn(text, result.error)
108
+ else focus_and_input_text_wrapper.on_failure_fn(text, result.error)
109
109
  )
110
110
 
111
111
  tool_message = ToolMessage(
@@ -126,7 +126,7 @@ def get_input_text_tool(ctx: MobileUseContext):
126
126
  ),
127
127
  )
128
128
 
129
- return input_text
129
+ return focus_and_input_text
130
130
 
131
131
 
132
132
  def _on_input_success(text, text_input_content, text_input_resource_id):
@@ -141,8 +141,8 @@ def _on_input_success(text, text_input_content, text_input_resource_id):
141
141
  return "Typed text, should now verify before moving forward"
142
142
 
143
143
 
144
- input_text_wrapper = ToolWrapper(
145
- tool_fn_getter=get_input_text_tool,
144
+ focus_and_input_text_wrapper = ToolWrapper(
145
+ tool_fn_getter=get_focus_and_input_text_tool,
146
146
  on_success_fn=_on_input_success,
147
147
  on_failure_fn=lambda text, error: f"Failed to input text {repr(text)}. Reason: {error}",
148
148
  )
@@ -2,46 +2,158 @@ from typing import Annotated
2
2
 
3
3
  from langchain_core.messages import ToolMessage
4
4
  from langchain_core.tools import tool
5
- from langchain_core.tools.base import InjectedToolCallId
5
+ from langchain_core.tools.base import BaseTool, InjectedToolCallId
6
6
  from langgraph.prebuilt import InjectedState
7
7
  from langgraph.types import Command
8
8
 
9
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
10
10
  from minitap.mobile_use.context import MobileUseContext
11
- from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
11
+ from minitap.mobile_use.controllers.mobile_command_controller import (
12
+ CoordinatesSelectorRequest,
13
+ IdSelectorRequest,
14
+ SelectorRequestWithCoordinates,
15
+ TextSelectorRequest,
16
+ )
12
17
  from minitap.mobile_use.controllers.mobile_command_controller import (
13
18
  long_press_on as long_press_on_controller,
14
19
  )
15
20
  from minitap.mobile_use.graph.state import State
16
21
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
22
+ from minitap.mobile_use.tools.types import Target
23
+ from minitap.mobile_use.utils.logger import get_logger
17
24
 
25
+ logger = get_logger(__name__)
18
26
 
19
- def get_long_press_on_tool(ctx: MobileUseContext):
27
+
28
+ def get_long_press_on_tool(ctx: MobileUseContext) -> BaseTool:
20
29
  @tool
21
30
  async def long_press_on(
22
31
  tool_call_id: Annotated[str, InjectedToolCallId],
23
32
  state: Annotated[State, InjectedState],
24
33
  agent_thought: str,
25
- selector_request: SelectorRequest,
26
- index: int | None = None,
27
- ) -> Command:
34
+ target: Target,
35
+ duration_ms: int = 1000,
36
+ ):
28
37
  """
29
- Long press on a UI element identified by the given selector.
30
- An index can be specified to select a specific element if multiple are found.
38
+ Long presses on a UI element identified by the 'target' object.
39
+
40
+ The 'target' object allows specifying an element by its resource_id
41
+ (with an optional index), its coordinates, or its text content (with an optional index).
42
+ The tool uses a fallback strategy, trying the locators in that order.
43
+
44
+ Args:
45
+ target: The UI element to long press on (coordinates, resource_id, or text).
46
+ duration_ms: Duration of the long press in milliseconds. Choose based on interaction:
47
+ - 500-800ms: Quick long press (e.g., selecting text, haptic feedback)
48
+ - 1000ms (default): Standard long press (most common use case)
49
+ - 1500-2000ms: Extended long press (e.g., context menus, special actions)
50
+ - 2500ms+: Very long press (e.g., accessibility, advanced gestures)
31
51
  """
32
- output = long_press_on_controller(ctx=ctx, selector_request=selector_request, index=index)
33
- has_failed = output is not None
52
+ error_obj: dict | None = {
53
+ "error": "No valid selector provided or all selectors failed."
54
+ } # Default to failure
55
+ latest_selector_info: str | None = None
56
+
57
+ # 1. Try with COORDINATES FIRST (visual approach)
58
+ if target.coordinates:
59
+ try:
60
+ center_point = target.coordinates.get_center()
61
+ selector = SelectorRequestWithCoordinates(
62
+ coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
63
+ )
64
+ logger.info(
65
+ f"Attempting to long press using coordinates: {center_point.x},{center_point.y}"
66
+ )
67
+ latest_selector_info = f"coordinates='{target.coordinates}'"
68
+ result = long_press_on_controller(
69
+ ctx=ctx,
70
+ selector_request=selector,
71
+ ui_hierarchy=state.latest_ui_hierarchy,
72
+ long_press_duration=duration_ms,
73
+ )
74
+ if result is None: # Success
75
+ error_obj = None
76
+ else:
77
+ logger.warning(
78
+ f"Long press with coordinates '{target.coordinates}' failed. "
79
+ f"Error: {result}"
80
+ )
81
+ error_obj = {"error": result} if isinstance(result, str) else result
82
+ except Exception as e:
83
+ logger.warning(
84
+ f"Exception during long press with coordinates '{target.coordinates}': {e}"
85
+ )
86
+ error_obj = {"error": str(e)}
87
+
88
+ # 2. If coordinates failed or weren't provided, try with resource_id
89
+ if error_obj is not None and target.resource_id:
90
+ try:
91
+ selector = IdSelectorRequest(id=target.resource_id)
92
+ logger.info(
93
+ f"Attempting to long press using resource_id: '{target.resource_id}' "
94
+ f"at index {target.resource_id_index}"
95
+ )
96
+ latest_selector_info = (
97
+ f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
98
+ )
99
+ result = long_press_on_controller(
100
+ ctx=ctx,
101
+ selector_request=selector,
102
+ index=target.resource_id_index,
103
+ ui_hierarchy=state.latest_ui_hierarchy,
104
+ long_press_duration=duration_ms,
105
+ )
106
+ if result is None: # Success
107
+ error_obj = None
108
+ else:
109
+ logger.warning(
110
+ f"Long press with resource_id '{target.resource_id}' failed. "
111
+ f"Error: {result}"
112
+ )
113
+ error_obj = {"error": result} if isinstance(result, str) else result
114
+ except Exception as e:
115
+ logger.warning(
116
+ f"Exception during long press with resource_id '{target.resource_id}': {e}"
117
+ )
118
+ error_obj = {"error": str(e)}
119
+
120
+ # 3. If resource_id failed or wasn't provided, try with text (last resort)
121
+ if error_obj is not None and target.text:
122
+ try:
123
+ selector = TextSelectorRequest(text=target.text)
124
+ logger.info(
125
+ f"Attempting to long press using text: '{target.text}' "
126
+ f"at index {target.text_index}"
127
+ )
128
+ latest_selector_info = f"text='{target.text}' (index={target.text_index})"
129
+ result = long_press_on_controller(
130
+ ctx=ctx,
131
+ selector_request=selector,
132
+ index=target.text_index,
133
+ ui_hierarchy=state.latest_ui_hierarchy,
134
+ long_press_duration=duration_ms,
135
+ )
136
+ if result is None: # Success
137
+ error_obj = None
138
+ else:
139
+ logger.warning(f"Long press with text '{target.text}' failed. Error: {result}")
140
+ error_obj = {"error": result} if isinstance(result, str) else result
141
+ except Exception as e:
142
+ logger.warning(f"Exception during long press with text '{target.text}': {e}")
143
+ error_obj = {"error": str(e)}
34
144
 
145
+ has_failed = error_obj is not None
146
+ final_selector_info = latest_selector_info if latest_selector_info else "N/A"
35
147
  agent_outcome = (
36
- long_press_on_wrapper.on_failure_fn()
148
+ long_press_on_wrapper.on_failure_fn(final_selector_info)
37
149
  if has_failed
38
- else long_press_on_wrapper.on_success_fn()
150
+ else long_press_on_wrapper.on_success_fn(final_selector_info)
39
151
  )
40
152
 
41
153
  tool_message = ToolMessage(
42
154
  tool_call_id=tool_call_id,
43
155
  content=agent_outcome,
44
- additional_kwargs={"error": output} if has_failed else {},
156
+ additional_kwargs=error_obj if has_failed else {},
45
157
  status="error" if has_failed else "success",
46
158
  )
47
159
  return Command(
@@ -60,6 +172,9 @@ def get_long_press_on_tool(ctx: MobileUseContext):
60
172
 
61
173
  long_press_on_wrapper = ToolWrapper(
62
174
  tool_fn_getter=get_long_press_on_tool,
63
- on_success_fn=lambda: "Long press on is successful.",
64
- on_failure_fn=lambda: "Failed to long press on.",
175
+ on_success_fn=lambda selector_info: (
176
+ f"Long press on element with {selector_info} was successful."
177
+ ),
178
+ on_failure_fn=lambda selector_info: "Failed to long press on element. "
179
+ + f"Last attempt was with {selector_info}.",
65
180
  )
@@ -9,15 +9,14 @@ from pydantic import Field
9
9
 
10
10
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
11
11
  from minitap.mobile_use.context import MobileUseContext
12
- from minitap.mobile_use.controllers.mobile_command_controller import (
12
+ from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
13
+ from minitap.mobile_use.controllers.types import (
13
14
  CoordinatesSelectorRequest,
14
15
  PercentagesSelectorRequest,
15
- SwipeDirection,
16
16
  SwipeRequest,
17
17
  SwipeStartEndCoordinatesRequest,
18
18
  SwipeStartEndPercentagesRequest,
19
19
  )
20
- from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
21
20
  from minitap.mobile_use.graph.state import State
22
21
  from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
23
22
 
@@ -123,29 +122,7 @@ def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
123
122
  }
124
123
  )
125
124
 
126
- @tool
127
- def swipe_direction(
128
- agent_thought: str,
129
- tool_call_id: Annotated[str, InjectedToolCallId],
130
- state: Annotated[State, InjectedState],
131
- direction: SwipeDirection,
132
- duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
133
- ):
134
- """Swipe in a specific direction across the screen."""
135
- swipe_request = SwipeRequest(
136
- swipe_mode=direction,
137
- duration=duration,
138
- )
139
- return get_swipe_tool(ctx=ctx).invoke(
140
- input={
141
- "tool_call_id": tool_call_id,
142
- "state": state,
143
- "agent_thought": agent_thought,
144
- "swipe_request": swipe_request,
145
- }
146
- )
147
-
148
- return [swipe_coordinates, swipe_percentages, swipe_direction]
125
+ return [swipe_coordinates, swipe_percentages]
149
126
 
150
127
 
151
128
  swipe_wrapper = CompositeToolWrapper(