minitap-mobile-use 2.5.3__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +0 -8
- minitap/mobile_use/agents/cortex/cortex.md +122 -36
- minitap/mobile_use/agents/cortex/cortex.py +32 -17
- minitap/mobile_use/agents/cortex/types.py +18 -4
- minitap/mobile_use/agents/executor/executor.md +3 -3
- minitap/mobile_use/agents/executor/executor.py +10 -3
- minitap/mobile_use/agents/hopper/hopper.md +30 -2
- minitap/mobile_use/agents/hopper/hopper.py +19 -15
- minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
- minitap/mobile_use/agents/outputter/outputter.py +13 -3
- minitap/mobile_use/agents/planner/planner.md +20 -9
- minitap/mobile_use/agents/planner/planner.py +12 -5
- minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
- minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
- minitap/mobile_use/clients/ios_client.py +7 -3
- minitap/mobile_use/config.py +87 -24
- minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
- minitap/mobile_use/controllers/types.py +95 -0
- minitap/mobile_use/graph/graph.py +55 -11
- minitap/mobile_use/graph/state.py +10 -3
- minitap/mobile_use/main.py +12 -4
- minitap/mobile_use/sdk/agent.py +113 -72
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
- minitap/mobile_use/sdk/services/platform.py +15 -1
- minitap/mobile_use/sdk/types/platform.py +1 -0
- minitap/mobile_use/sdk/types/task.py +10 -1
- minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
- minitap/mobile_use/services/llm.py +5 -2
- minitap/mobile_use/tools/index.py +7 -9
- minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
- minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
- minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
- minitap/mobile_use/tools/mobile/swipe.py +3 -26
- minitap/mobile_use/tools/mobile/tap.py +41 -28
- minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
- minitap/mobile_use/utils/cli_helpers.py +10 -6
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/METADATA +1 -1
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/RECORD +41 -39
- minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -23,6 +23,7 @@ from minitap.mobile_use.sdk.types.platform import (
|
|
|
23
23
|
)
|
|
24
24
|
from minitap.mobile_use.sdk.types.task import (
|
|
25
25
|
AgentProfile,
|
|
26
|
+
CloudDevicePlatformTaskRequest,
|
|
26
27
|
ManualTaskConfig,
|
|
27
28
|
PlatformTaskInfo,
|
|
28
29
|
PlatformTaskRequest,
|
|
@@ -60,6 +61,10 @@ class PlatformService:
|
|
|
60
61
|
|
|
61
62
|
async def create_task_run(self, request: PlatformTaskRequest) -> PlatformTaskInfo:
|
|
62
63
|
try:
|
|
64
|
+
virtual_mobile_id = None
|
|
65
|
+
if isinstance(request, CloudDevicePlatformTaskRequest):
|
|
66
|
+
virtual_mobile_id = request.virtual_mobile_id
|
|
67
|
+
|
|
63
68
|
# Check if task is a string (fetch from platform) or ManualTaskConfig (create manually)
|
|
64
69
|
if isinstance(request.task, str):
|
|
65
70
|
# Fetch task from platform
|
|
@@ -87,7 +92,11 @@ class PlatformService:
|
|
|
87
92
|
thoughts_output_path=request.thoughts_output_path,
|
|
88
93
|
)
|
|
89
94
|
|
|
90
|
-
task_run = await self._create_task_run(
|
|
95
|
+
task_run = await self._create_task_run(
|
|
96
|
+
task=task,
|
|
97
|
+
profile=profile,
|
|
98
|
+
virtual_mobile_id=virtual_mobile_id,
|
|
99
|
+
)
|
|
91
100
|
else:
|
|
92
101
|
# Create task manually from ManualTaskConfig
|
|
93
102
|
logger.info(f"Creating manual task with goal: {request.task.goal}")
|
|
@@ -113,6 +122,7 @@ class PlatformService:
|
|
|
113
122
|
task_run = await self._create_manual_task_run(
|
|
114
123
|
manual_config=request.task,
|
|
115
124
|
profile=profile,
|
|
125
|
+
virtual_mobile_id=virtual_mobile_id,
|
|
116
126
|
)
|
|
117
127
|
|
|
118
128
|
return PlatformTaskInfo(
|
|
@@ -244,12 +254,14 @@ class PlatformService:
|
|
|
244
254
|
self,
|
|
245
255
|
task: TaskResponse,
|
|
246
256
|
profile: LLMProfileResponse,
|
|
257
|
+
virtual_mobile_id: str | None = None,
|
|
247
258
|
) -> TaskRunResponse:
|
|
248
259
|
try:
|
|
249
260
|
logger.info(f"Creating task run for task: {task.name}")
|
|
250
261
|
task_run = CreateTaskRunRequest(
|
|
251
262
|
task_id=task.id,
|
|
252
263
|
llm_profile_id=profile.id,
|
|
264
|
+
virtual_mobile_id=virtual_mobile_id,
|
|
253
265
|
)
|
|
254
266
|
response = await self._client.post(url="v1/task-runs", json=task_run.model_dump())
|
|
255
267
|
response.raise_for_status()
|
|
@@ -264,6 +276,7 @@ class PlatformService:
|
|
|
264
276
|
self,
|
|
265
277
|
manual_config: ManualTaskConfig,
|
|
266
278
|
profile: LLMProfileResponse,
|
|
279
|
+
virtual_mobile_id: str | None = None,
|
|
267
280
|
) -> TaskRunResponse:
|
|
268
281
|
"""
|
|
269
282
|
Create an orphan task run from a manual task configuration.
|
|
@@ -277,6 +290,7 @@ class PlatformService:
|
|
|
277
290
|
"inputPrompt": manual_config.goal,
|
|
278
291
|
"outputDescription": manual_config.output_description,
|
|
279
292
|
"llmProfileId": profile.id,
|
|
293
|
+
"virtualMobileId": virtual_mobile_id,
|
|
280
294
|
}
|
|
281
295
|
|
|
282
296
|
response = await self._client.post(url="v1/task-runs/orphan", json=orphan_payload)
|
|
@@ -63,6 +63,7 @@ class CreateTaskRunRequest(BaseApiModel):
|
|
|
63
63
|
|
|
64
64
|
task_id: str = Field(..., description="ID of the task to run")
|
|
65
65
|
llm_profile_id: str = Field(..., description="LLM profile ID to use")
|
|
66
|
+
virtual_mobile_id: str | None = Field(None, description="Virtual mobile ID to use")
|
|
66
67
|
|
|
67
68
|
|
|
68
69
|
class UpdateTaskRunStatusRequest(BaseApiModel):
|
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
Task-related type definitions for the Mobile-use SDK.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
from asyncio import Event
|
|
5
6
|
from collections.abc import Callable, Coroutine
|
|
6
7
|
from datetime import datetime
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any, TypeVar, overload
|
|
9
10
|
|
|
10
|
-
from pydantic import BaseModel, Field
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
12
|
|
|
12
13
|
from minitap.mobile_use.config import LLMConfig, get_default_llm_config
|
|
13
14
|
from minitap.mobile_use.constants import RECURSION_LIMIT
|
|
@@ -135,6 +136,14 @@ class PlatformTaskRequest[TOutput](TaskRequestBase):
|
|
|
135
136
|
api_key: str | None = None
|
|
136
137
|
|
|
137
138
|
|
|
139
|
+
class CloudDevicePlatformTaskRequest[TOutput](PlatformTaskRequest[TOutput]):
|
|
140
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
141
|
+
|
|
142
|
+
task_run_id_available_event: Event = Event()
|
|
143
|
+
task_run_id: str | None = None
|
|
144
|
+
virtual_mobile_id: str | None = None
|
|
145
|
+
|
|
146
|
+
|
|
138
147
|
class TaskResult(BaseModel):
|
|
139
148
|
"""
|
|
140
149
|
Result of a mobile automation task.
|
|
@@ -135,23 +135,30 @@ class DeviceHardwareBridge:
|
|
|
135
135
|
print(f"[Maestro Studio ERROR]: {line}")
|
|
136
136
|
self.output.append(line)
|
|
137
137
|
|
|
138
|
-
|
|
138
|
+
lower_line = line.lower()
|
|
139
|
+
|
|
140
|
+
# Ignore known benign warnings (common on macOS/JDK 21+)
|
|
141
|
+
if line.startswith("WARNING:") or (
|
|
142
|
+
"restricted method" in lower_line
|
|
143
|
+
or "jansi" in lower_line
|
|
144
|
+
or "enable-native-access" in lower_line
|
|
145
|
+
or "java.lang.system::load" in lower_line
|
|
146
|
+
):
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
if "device offline" in lower_line:
|
|
139
150
|
with self.lock:
|
|
140
151
|
self.status = BridgeStatus.FAILED
|
|
141
152
|
if self.process:
|
|
142
153
|
self.process.kill()
|
|
143
154
|
break
|
|
144
155
|
|
|
145
|
-
if "address already in use" in
|
|
156
|
+
if "address already in use" in lower_line:
|
|
146
157
|
with self.lock:
|
|
147
158
|
self.status = BridgeStatus.PORT_IN_USE
|
|
148
159
|
if self.process:
|
|
149
160
|
self.process.kill()
|
|
150
161
|
break
|
|
151
|
-
else:
|
|
152
|
-
with self.lock:
|
|
153
|
-
if self.status == BridgeStatus.STARTING:
|
|
154
|
-
self.status = BridgeStatus.FAILED
|
|
155
162
|
|
|
156
163
|
def _wait_for_health_check(self, retries=5, delay=2):
|
|
157
164
|
health_url = f"http://localhost:{DEVICE_HARDWARE_BRIDGE_PORT}/api/banner-message"
|
|
@@ -13,6 +13,7 @@ from minitap.mobile_use.config import (
|
|
|
13
13
|
AgentNode,
|
|
14
14
|
AgentNodeWithFallback,
|
|
15
15
|
LLMUtilsNode,
|
|
16
|
+
LLMUtilsNodeWithFallback,
|
|
16
17
|
LLMWithFallback,
|
|
17
18
|
settings,
|
|
18
19
|
)
|
|
@@ -169,8 +170,9 @@ def get_llm(
|
|
|
169
170
|
@overload
|
|
170
171
|
def get_llm(
|
|
171
172
|
ctx: MobileUseContext,
|
|
172
|
-
name:
|
|
173
|
+
name: LLMUtilsNode,
|
|
173
174
|
*,
|
|
175
|
+
is_utils: Literal[True],
|
|
174
176
|
temperature: float = 1,
|
|
175
177
|
) -> BaseChatModel: ...
|
|
176
178
|
|
|
@@ -178,9 +180,10 @@ def get_llm(
|
|
|
178
180
|
@overload
|
|
179
181
|
def get_llm(
|
|
180
182
|
ctx: MobileUseContext,
|
|
181
|
-
name:
|
|
183
|
+
name: LLMUtilsNodeWithFallback,
|
|
182
184
|
*,
|
|
183
185
|
is_utils: Literal[True],
|
|
186
|
+
use_fallback: bool = False,
|
|
184
187
|
temperature: float = 1,
|
|
185
188
|
) -> BaseChatModel: ...
|
|
186
189
|
|
|
@@ -2,10 +2,9 @@ from langchain_core.tools import BaseTool
|
|
|
2
2
|
|
|
3
3
|
from minitap.mobile_use.context import MobileUseContext
|
|
4
4
|
from minitap.mobile_use.tools.mobile.back import back_wrapper
|
|
5
|
-
from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
|
|
6
5
|
from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
|
|
7
|
-
from minitap.mobile_use.tools.mobile.
|
|
8
|
-
from minitap.mobile_use.tools.mobile.
|
|
6
|
+
from minitap.mobile_use.tools.mobile.focus_and_clear_text import focus_and_clear_text_wrapper
|
|
7
|
+
from minitap.mobile_use.tools.mobile.focus_and_input_text import focus_and_input_text_wrapper
|
|
9
8
|
from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
|
|
10
9
|
from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
|
|
11
10
|
from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
|
|
@@ -13,8 +12,8 @@ from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
|
|
|
13
12
|
from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
|
|
14
13
|
from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
|
|
15
14
|
from minitap.mobile_use.tools.mobile.tap import tap_wrapper
|
|
16
|
-
from minitap.mobile_use.tools.mobile.
|
|
17
|
-
|
|
15
|
+
from minitap.mobile_use.tools.mobile.wait_for_delay import (
|
|
16
|
+
wait_for_delay_wrapper,
|
|
18
17
|
)
|
|
19
18
|
from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
|
|
20
19
|
|
|
@@ -24,14 +23,13 @@ EXECUTOR_WRAPPERS_TOOLS = [
|
|
|
24
23
|
tap_wrapper,
|
|
25
24
|
long_press_on_wrapper,
|
|
26
25
|
swipe_wrapper,
|
|
27
|
-
|
|
28
|
-
input_text_wrapper,
|
|
26
|
+
focus_and_input_text_wrapper,
|
|
29
27
|
erase_one_char_wrapper,
|
|
30
28
|
launch_app_wrapper,
|
|
31
29
|
stop_app_wrapper,
|
|
32
|
-
|
|
30
|
+
focus_and_clear_text_wrapper,
|
|
33
31
|
press_key_wrapper,
|
|
34
|
-
|
|
32
|
+
wait_for_delay_wrapper,
|
|
35
33
|
]
|
|
36
34
|
|
|
37
35
|
|
|
@@ -238,9 +238,9 @@ class TextClearer:
|
|
|
238
238
|
)
|
|
239
239
|
|
|
240
240
|
|
|
241
|
-
def
|
|
241
|
+
def get_focus_and_clear_text_tool(ctx: MobileUseContext):
|
|
242
242
|
@tool
|
|
243
|
-
async def
|
|
243
|
+
async def focus_and_clear_text(
|
|
244
244
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
245
245
|
state: Annotated[State, InjectedState],
|
|
246
246
|
agent_thought: str,
|
|
@@ -255,9 +255,9 @@ def get_clear_text_tool(ctx: MobileUseContext):
|
|
|
255
255
|
)
|
|
256
256
|
|
|
257
257
|
agent_outcome = (
|
|
258
|
-
|
|
258
|
+
focus_and_clear_text_wrapper.on_failure_fn(result.error_message)
|
|
259
259
|
if not result.success
|
|
260
|
-
else
|
|
260
|
+
else focus_and_clear_text_wrapper.on_success_fn(
|
|
261
261
|
nb_char_erased=result.chars_erased, new_text_value=result.final_text
|
|
262
262
|
)
|
|
263
263
|
)
|
|
@@ -280,7 +280,7 @@ def get_clear_text_tool(ctx: MobileUseContext):
|
|
|
280
280
|
),
|
|
281
281
|
)
|
|
282
282
|
|
|
283
|
-
return
|
|
283
|
+
return focus_and_clear_text
|
|
284
284
|
|
|
285
285
|
|
|
286
286
|
def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
|
|
@@ -299,8 +299,8 @@ def _format_failure_message(output: str | None) -> str:
|
|
|
299
299
|
return "Failed to erase text. " + (str(output) if output else "")
|
|
300
300
|
|
|
301
301
|
|
|
302
|
-
|
|
303
|
-
tool_fn_getter=
|
|
302
|
+
focus_and_clear_text_wrapper = ToolWrapper(
|
|
303
|
+
tool_fn_getter=get_focus_and_clear_text_tool,
|
|
304
304
|
on_success_fn=_format_success_message,
|
|
305
305
|
on_failure_fn=_format_failure_message,
|
|
306
306
|
)
|
|
@@ -42,9 +42,9 @@ def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
|
|
|
42
42
|
return InputResult(ok=False, error=str(controller_out))
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def
|
|
45
|
+
def get_focus_and_input_text_tool(ctx: MobileUseContext):
|
|
46
46
|
@tool
|
|
47
|
-
async def
|
|
47
|
+
async def focus_and_input_text(
|
|
48
48
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
49
49
|
state: Annotated[State, InjectedState],
|
|
50
50
|
agent_thought: str,
|
|
@@ -70,7 +70,7 @@ def get_input_text_tool(ctx: MobileUseContext):
|
|
|
70
70
|
error_message = "Failed to focus the text input element before typing."
|
|
71
71
|
tool_message = ToolMessage(
|
|
72
72
|
tool_call_id=tool_call_id,
|
|
73
|
-
content=
|
|
73
|
+
content=focus_and_input_text_wrapper.on_failure_fn(text, error_message),
|
|
74
74
|
additional_kwargs={"error": error_message},
|
|
75
75
|
status="error",
|
|
76
76
|
)
|
|
@@ -103,9 +103,9 @@ def get_input_text_tool(ctx: MobileUseContext):
|
|
|
103
103
|
text_input_content = get_element_text(element)
|
|
104
104
|
|
|
105
105
|
agent_outcome = (
|
|
106
|
-
|
|
106
|
+
focus_and_input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
|
|
107
107
|
if result.ok
|
|
108
|
-
else
|
|
108
|
+
else focus_and_input_text_wrapper.on_failure_fn(text, result.error)
|
|
109
109
|
)
|
|
110
110
|
|
|
111
111
|
tool_message = ToolMessage(
|
|
@@ -126,7 +126,7 @@ def get_input_text_tool(ctx: MobileUseContext):
|
|
|
126
126
|
),
|
|
127
127
|
)
|
|
128
128
|
|
|
129
|
-
return
|
|
129
|
+
return focus_and_input_text
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
def _on_input_success(text, text_input_content, text_input_resource_id):
|
|
@@ -141,8 +141,8 @@ def _on_input_success(text, text_input_content, text_input_resource_id):
|
|
|
141
141
|
return "Typed text, should now verify before moving forward"
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
|
|
145
|
-
tool_fn_getter=
|
|
144
|
+
focus_and_input_text_wrapper = ToolWrapper(
|
|
145
|
+
tool_fn_getter=get_focus_and_input_text_tool,
|
|
146
146
|
on_success_fn=_on_input_success,
|
|
147
147
|
on_failure_fn=lambda text, error: f"Failed to input text {repr(text)}. Reason: {error}",
|
|
148
148
|
)
|
|
@@ -2,46 +2,158 @@ from typing import Annotated
|
|
|
2
2
|
|
|
3
3
|
from langchain_core.messages import ToolMessage
|
|
4
4
|
from langchain_core.tools import tool
|
|
5
|
-
from langchain_core.tools.base import InjectedToolCallId
|
|
5
|
+
from langchain_core.tools.base import BaseTool, InjectedToolCallId
|
|
6
6
|
from langgraph.prebuilt import InjectedState
|
|
7
7
|
from langgraph.types import Command
|
|
8
8
|
|
|
9
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
10
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
11
|
-
from minitap.mobile_use.controllers.mobile_command_controller import
|
|
11
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
12
|
+
CoordinatesSelectorRequest,
|
|
13
|
+
IdSelectorRequest,
|
|
14
|
+
SelectorRequestWithCoordinates,
|
|
15
|
+
TextSelectorRequest,
|
|
16
|
+
)
|
|
12
17
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
13
18
|
long_press_on as long_press_on_controller,
|
|
14
19
|
)
|
|
15
20
|
from minitap.mobile_use.graph.state import State
|
|
16
21
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
22
|
+
from minitap.mobile_use.tools.types import Target
|
|
23
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
17
24
|
|
|
25
|
+
logger = get_logger(__name__)
|
|
18
26
|
|
|
19
|
-
|
|
27
|
+
|
|
28
|
+
def get_long_press_on_tool(ctx: MobileUseContext) -> BaseTool:
|
|
20
29
|
@tool
|
|
21
30
|
async def long_press_on(
|
|
22
31
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
23
32
|
state: Annotated[State, InjectedState],
|
|
24
33
|
agent_thought: str,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
)
|
|
34
|
+
target: Target,
|
|
35
|
+
duration_ms: int = 1000,
|
|
36
|
+
):
|
|
28
37
|
"""
|
|
29
|
-
Long
|
|
30
|
-
|
|
38
|
+
Long presses on a UI element identified by the 'target' object.
|
|
39
|
+
|
|
40
|
+
The 'target' object allows specifying an element by its resource_id
|
|
41
|
+
(with an optional index), its coordinates, or its text content (with an optional index).
|
|
42
|
+
The tool uses a fallback strategy, trying the locators in that order.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
target: The UI element to long press on (coordinates, resource_id, or text).
|
|
46
|
+
duration_ms: Duration of the long press in milliseconds. Choose based on interaction:
|
|
47
|
+
- 500-800ms: Quick long press (e.g., selecting text, haptic feedback)
|
|
48
|
+
- 1000ms (default): Standard long press (most common use case)
|
|
49
|
+
- 1500-2000ms: Extended long press (e.g., context menus, special actions)
|
|
50
|
+
- 2500ms+: Very long press (e.g., accessibility, advanced gestures)
|
|
31
51
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
52
|
+
error_obj: dict | None = {
|
|
53
|
+
"error": "No valid selector provided or all selectors failed."
|
|
54
|
+
} # Default to failure
|
|
55
|
+
latest_selector_info: str | None = None
|
|
56
|
+
|
|
57
|
+
# 1. Try with COORDINATES FIRST (visual approach)
|
|
58
|
+
if target.coordinates:
|
|
59
|
+
try:
|
|
60
|
+
center_point = target.coordinates.get_center()
|
|
61
|
+
selector = SelectorRequestWithCoordinates(
|
|
62
|
+
coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
|
|
63
|
+
)
|
|
64
|
+
logger.info(
|
|
65
|
+
f"Attempting to long press using coordinates: {center_point.x},{center_point.y}"
|
|
66
|
+
)
|
|
67
|
+
latest_selector_info = f"coordinates='{target.coordinates}'"
|
|
68
|
+
result = long_press_on_controller(
|
|
69
|
+
ctx=ctx,
|
|
70
|
+
selector_request=selector,
|
|
71
|
+
ui_hierarchy=state.latest_ui_hierarchy,
|
|
72
|
+
long_press_duration=duration_ms,
|
|
73
|
+
)
|
|
74
|
+
if result is None: # Success
|
|
75
|
+
error_obj = None
|
|
76
|
+
else:
|
|
77
|
+
logger.warning(
|
|
78
|
+
f"Long press with coordinates '{target.coordinates}' failed. "
|
|
79
|
+
f"Error: {result}"
|
|
80
|
+
)
|
|
81
|
+
error_obj = {"error": result} if isinstance(result, str) else result
|
|
82
|
+
except Exception as e:
|
|
83
|
+
logger.warning(
|
|
84
|
+
f"Exception during long press with coordinates '{target.coordinates}': {e}"
|
|
85
|
+
)
|
|
86
|
+
error_obj = {"error": str(e)}
|
|
87
|
+
|
|
88
|
+
# 2. If coordinates failed or weren't provided, try with resource_id
|
|
89
|
+
if error_obj is not None and target.resource_id:
|
|
90
|
+
try:
|
|
91
|
+
selector = IdSelectorRequest(id=target.resource_id)
|
|
92
|
+
logger.info(
|
|
93
|
+
f"Attempting to long press using resource_id: '{target.resource_id}' "
|
|
94
|
+
f"at index {target.resource_id_index}"
|
|
95
|
+
)
|
|
96
|
+
latest_selector_info = (
|
|
97
|
+
f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
|
|
98
|
+
)
|
|
99
|
+
result = long_press_on_controller(
|
|
100
|
+
ctx=ctx,
|
|
101
|
+
selector_request=selector,
|
|
102
|
+
index=target.resource_id_index,
|
|
103
|
+
ui_hierarchy=state.latest_ui_hierarchy,
|
|
104
|
+
long_press_duration=duration_ms,
|
|
105
|
+
)
|
|
106
|
+
if result is None: # Success
|
|
107
|
+
error_obj = None
|
|
108
|
+
else:
|
|
109
|
+
logger.warning(
|
|
110
|
+
f"Long press with resource_id '{target.resource_id}' failed. "
|
|
111
|
+
f"Error: {result}"
|
|
112
|
+
)
|
|
113
|
+
error_obj = {"error": result} if isinstance(result, str) else result
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.warning(
|
|
116
|
+
f"Exception during long press with resource_id '{target.resource_id}': {e}"
|
|
117
|
+
)
|
|
118
|
+
error_obj = {"error": str(e)}
|
|
119
|
+
|
|
120
|
+
# 3. If resource_id failed or wasn't provided, try with text (last resort)
|
|
121
|
+
if error_obj is not None and target.text:
|
|
122
|
+
try:
|
|
123
|
+
selector = TextSelectorRequest(text=target.text)
|
|
124
|
+
logger.info(
|
|
125
|
+
f"Attempting to long press using text: '{target.text}' "
|
|
126
|
+
f"at index {target.text_index}"
|
|
127
|
+
)
|
|
128
|
+
latest_selector_info = f"text='{target.text}' (index={target.text_index})"
|
|
129
|
+
result = long_press_on_controller(
|
|
130
|
+
ctx=ctx,
|
|
131
|
+
selector_request=selector,
|
|
132
|
+
index=target.text_index,
|
|
133
|
+
ui_hierarchy=state.latest_ui_hierarchy,
|
|
134
|
+
long_press_duration=duration_ms,
|
|
135
|
+
)
|
|
136
|
+
if result is None: # Success
|
|
137
|
+
error_obj = None
|
|
138
|
+
else:
|
|
139
|
+
logger.warning(f"Long press with text '{target.text}' failed. Error: {result}")
|
|
140
|
+
error_obj = {"error": result} if isinstance(result, str) else result
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.warning(f"Exception during long press with text '{target.text}': {e}")
|
|
143
|
+
error_obj = {"error": str(e)}
|
|
34
144
|
|
|
145
|
+
has_failed = error_obj is not None
|
|
146
|
+
final_selector_info = latest_selector_info if latest_selector_info else "N/A"
|
|
35
147
|
agent_outcome = (
|
|
36
|
-
long_press_on_wrapper.on_failure_fn()
|
|
148
|
+
long_press_on_wrapper.on_failure_fn(final_selector_info)
|
|
37
149
|
if has_failed
|
|
38
|
-
else long_press_on_wrapper.on_success_fn()
|
|
150
|
+
else long_press_on_wrapper.on_success_fn(final_selector_info)
|
|
39
151
|
)
|
|
40
152
|
|
|
41
153
|
tool_message = ToolMessage(
|
|
42
154
|
tool_call_id=tool_call_id,
|
|
43
155
|
content=agent_outcome,
|
|
44
|
-
additional_kwargs=
|
|
156
|
+
additional_kwargs=error_obj if has_failed else {},
|
|
45
157
|
status="error" if has_failed else "success",
|
|
46
158
|
)
|
|
47
159
|
return Command(
|
|
@@ -60,6 +172,9 @@ def get_long_press_on_tool(ctx: MobileUseContext):
|
|
|
60
172
|
|
|
61
173
|
long_press_on_wrapper = ToolWrapper(
|
|
62
174
|
tool_fn_getter=get_long_press_on_tool,
|
|
63
|
-
on_success_fn=lambda:
|
|
64
|
-
|
|
175
|
+
on_success_fn=lambda selector_info: (
|
|
176
|
+
f"Long press on element with {selector_info} was successful."
|
|
177
|
+
),
|
|
178
|
+
on_failure_fn=lambda selector_info: "Failed to long press on element. "
|
|
179
|
+
+ f"Last attempt was with {selector_info}.",
|
|
65
180
|
)
|
|
@@ -9,15 +9,14 @@ from pydantic import Field
|
|
|
9
9
|
|
|
10
10
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
11
|
from minitap.mobile_use.context import MobileUseContext
|
|
12
|
-
from minitap.mobile_use.controllers.mobile_command_controller import
|
|
12
|
+
from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
|
|
13
|
+
from minitap.mobile_use.controllers.types import (
|
|
13
14
|
CoordinatesSelectorRequest,
|
|
14
15
|
PercentagesSelectorRequest,
|
|
15
|
-
SwipeDirection,
|
|
16
16
|
SwipeRequest,
|
|
17
17
|
SwipeStartEndCoordinatesRequest,
|
|
18
18
|
SwipeStartEndPercentagesRequest,
|
|
19
19
|
)
|
|
20
|
-
from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
|
|
21
20
|
from minitap.mobile_use.graph.state import State
|
|
22
21
|
from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
|
|
23
22
|
|
|
@@ -123,29 +122,7 @@ def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
|
|
|
123
122
|
}
|
|
124
123
|
)
|
|
125
124
|
|
|
126
|
-
|
|
127
|
-
def swipe_direction(
|
|
128
|
-
agent_thought: str,
|
|
129
|
-
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
130
|
-
state: Annotated[State, InjectedState],
|
|
131
|
-
direction: SwipeDirection,
|
|
132
|
-
duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
|
|
133
|
-
):
|
|
134
|
-
"""Swipe in a specific direction across the screen."""
|
|
135
|
-
swipe_request = SwipeRequest(
|
|
136
|
-
swipe_mode=direction,
|
|
137
|
-
duration=duration,
|
|
138
|
-
)
|
|
139
|
-
return get_swipe_tool(ctx=ctx).invoke(
|
|
140
|
-
input={
|
|
141
|
-
"tool_call_id": tool_call_id,
|
|
142
|
-
"state": state,
|
|
143
|
-
"agent_thought": agent_thought,
|
|
144
|
-
"swipe_request": swipe_request,
|
|
145
|
-
}
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
return [swipe_coordinates, swipe_percentages, swipe_direction]
|
|
125
|
+
return [swipe_coordinates, swipe_percentages]
|
|
149
126
|
|
|
150
127
|
|
|
151
128
|
swipe_wrapper = CompositeToolWrapper(
|