minitap-mobile-use 2.1.0__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +4 -2
- minitap/mobile_use/agents/cortex/cortex.md +72 -26
- minitap/mobile_use/agents/cortex/cortex.py +1 -2
- minitap/mobile_use/agents/executor/executor.md +6 -4
- minitap/mobile_use/agents/executor/executor.py +3 -1
- minitap/mobile_use/agents/executor/utils.py +2 -1
- minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
- minitap/mobile_use/agents/planner/planner.md +1 -1
- minitap/mobile_use/agents/planner/planner.py +4 -2
- minitap/mobile_use/config.py +16 -1
- minitap/mobile_use/controllers/mobile_command_controller.py +4 -4
- minitap/mobile_use/main.py +2 -2
- minitap/mobile_use/sdk/agent.py +17 -8
- minitap/mobile_use/sdk/builders/agent_config_builder.py +2 -2
- minitap/mobile_use/sdk/types/exceptions.py +30 -0
- minitap/mobile_use/sdk/utils.py +3 -2
- minitap/mobile_use/servers/device_hardware_bridge.py +2 -1
- minitap/mobile_use/servers/utils.py +6 -9
- minitap/mobile_use/services/llm.py +23 -6
- minitap/mobile_use/tools/index.py +21 -15
- minitap/mobile_use/tools/mobile/clear_text.py +73 -25
- minitap/mobile_use/tools/mobile/copy_text_from.py +7 -5
- minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +15 -11
- minitap/mobile_use/tools/mobile/input_text.py +94 -13
- minitap/mobile_use/tools/mobile/paste_text.py +34 -8
- minitap/mobile_use/tools/mobile/swipe.py +107 -9
- minitap/mobile_use/tools/test_utils.py +351 -0
- minitap/mobile_use/tools/tool_wrapper.py +5 -0
- minitap/mobile_use/tools/utils.py +147 -40
- minitap/mobile_use/utils/recorder.py +2 -9
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/ui_hierarchy.py +2 -2
- {minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/METADATA +28 -8
- {minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/RECORD +36 -34
- {minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -4,6 +4,8 @@ Exceptions for the Mobile-use SDK.
|
|
|
4
4
|
This module defines the exception hierarchy used throughout the Mobile-use SDK.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
class MobileUseError(Exception):
|
|
9
11
|
"""Base exception class for all Mobile-use SDK exceptions."""
|
|
@@ -72,3 +74,31 @@ class AgentProfileNotFoundError(AgentTaskRequestError):
|
|
|
72
74
|
|
|
73
75
|
def __init__(self, profile_name: str):
|
|
74
76
|
super().__init__(f"Agent profile {profile_name} not found")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
EXECUTABLES = Literal["adb", "maestro", "xcrun", "cli_tools"]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ExecutableNotFoundError(MobileUseError):
|
|
83
|
+
"""Exception raised when a required executable is not found."""
|
|
84
|
+
|
|
85
|
+
def __init__(self, executable_name: EXECUTABLES):
|
|
86
|
+
install_instructions: dict[EXECUTABLES, str] = {
|
|
87
|
+
"adb": "https://developer.android.com/tools/adb",
|
|
88
|
+
"maestro": "https://docs.maestro.dev/getting-started/installing-maestro",
|
|
89
|
+
"xcrun": "Install with: xcode-select --install",
|
|
90
|
+
}
|
|
91
|
+
if executable_name == "cli_tools":
|
|
92
|
+
message = (
|
|
93
|
+
"ADB or Xcode Command Line Tools not found in PATH. "
|
|
94
|
+
"At least one of them is required to run mobile-use "
|
|
95
|
+
"depending on the device platform you wish to run (Android: adb, iOS: xcrun)."
|
|
96
|
+
"Refer to the following links for installation instructions :"
|
|
97
|
+
f"\n- ADB: {install_instructions['adb']}"
|
|
98
|
+
f"\n- Xcode Command Line Tools: {install_instructions['xcrun']}"
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
message = f"Required executable '{executable_name}' not found in PATH."
|
|
102
|
+
if executable_name in install_instructions:
|
|
103
|
+
message += f"\nTo install it, please visit: {install_instructions[executable_name]}"
|
|
104
|
+
super().__init__(message)
|
minitap/mobile_use/sdk/utils.py
CHANGED
|
@@ -2,11 +2,11 @@ import os
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
from pydantic import ValidationError
|
|
5
|
+
|
|
5
6
|
from minitap.mobile_use.config import LLMConfig, deep_merge_llm_config, get_default_llm_config
|
|
6
7
|
from minitap.mobile_use.utils.file import load_jsonc
|
|
7
8
|
from minitap.mobile_use.utils.logger import get_logger
|
|
8
9
|
|
|
9
|
-
|
|
10
10
|
logger = get_logger(__name__)
|
|
11
11
|
|
|
12
12
|
|
|
@@ -24,5 +24,6 @@ def load_llm_config_override(path: Path) -> LLMConfig:
|
|
|
24
24
|
try:
|
|
25
25
|
return deep_merge_llm_config(default_config, override_config_dict)
|
|
26
26
|
except ValidationError as e:
|
|
27
|
-
logger.error(f"Invalid LLM config: {e}
|
|
27
|
+
logger.error(f"Invalid LLM config: {e}")
|
|
28
|
+
logger.info("Falling back to default config")
|
|
28
29
|
return default_config
|
|
@@ -6,6 +6,7 @@ import time
|
|
|
6
6
|
from enum import Enum
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
|
+
|
|
9
10
|
from minitap.mobile_use.context import DevicePlatform
|
|
10
11
|
from minitap.mobile_use.servers.utils import is_port_in_use
|
|
11
12
|
|
|
@@ -175,7 +176,7 @@ class DeviceHardwareBridge:
|
|
|
175
176
|
]
|
|
176
177
|
|
|
177
178
|
def start(self):
|
|
178
|
-
if is_port_in_use(DEVICE_HARDWARE_BRIDGE_PORT):
|
|
179
|
+
if is_port_in_use(port=DEVICE_HARDWARE_BRIDGE_PORT):
|
|
179
180
|
print("Maestro port already in use - assuming Maestro is running.")
|
|
180
181
|
self.status = BridgeStatus.RUNNING
|
|
181
182
|
return True
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
import
|
|
1
|
+
import contextlib
|
|
2
|
+
import socket
|
|
2
3
|
|
|
3
4
|
|
|
4
|
-
def is_port_in_use(port: int):
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
return True
|
|
9
|
-
elif isinstance(conn.laddr, tuple) and len(conn.laddr) >= 2 and conn.laddr[1] == port:
|
|
10
|
-
return True
|
|
11
|
-
return False
|
|
5
|
+
def is_port_in_use(port: int, host: str = "127.0.0.1") -> bool:
|
|
6
|
+
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
|
7
|
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
8
|
+
return s.connect_ex((host, port)) == 0
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Literal, TypeVar
|
|
3
2
|
from collections.abc import Awaitable, Callable
|
|
4
|
-
from typing import overload
|
|
3
|
+
from typing import Literal, TypeVar, overload
|
|
5
4
|
|
|
5
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
|
6
6
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
7
|
+
from langchain_google_vertexai import ChatVertexAI
|
|
7
8
|
from langchain_openai import ChatOpenAI
|
|
9
|
+
|
|
8
10
|
from minitap.mobile_use.config import (
|
|
9
11
|
AgentNode,
|
|
10
12
|
AgentNodeWithFallback,
|
|
@@ -32,6 +34,19 @@ def get_google_llm(
|
|
|
32
34
|
return client
|
|
33
35
|
|
|
34
36
|
|
|
37
|
+
def get_vertex_llm(
|
|
38
|
+
model_name: str = "gemini-2.5-pro",
|
|
39
|
+
temperature: float = 0.7,
|
|
40
|
+
) -> ChatVertexAI:
|
|
41
|
+
client = ChatVertexAI(
|
|
42
|
+
model_name=model_name,
|
|
43
|
+
max_tokens=None,
|
|
44
|
+
temperature=temperature,
|
|
45
|
+
max_retries=2,
|
|
46
|
+
)
|
|
47
|
+
return client
|
|
48
|
+
|
|
49
|
+
|
|
35
50
|
def get_openai_llm(
|
|
36
51
|
model_name: str = "o3",
|
|
37
52
|
temperature: float = 1,
|
|
@@ -75,7 +90,7 @@ def get_llm(
|
|
|
75
90
|
*,
|
|
76
91
|
use_fallback: bool = False,
|
|
77
92
|
temperature: float = 1,
|
|
78
|
-
): ...
|
|
93
|
+
) -> BaseChatModel: ...
|
|
79
94
|
|
|
80
95
|
|
|
81
96
|
@overload
|
|
@@ -84,7 +99,7 @@ def get_llm(
|
|
|
84
99
|
name: AgentNode,
|
|
85
100
|
*,
|
|
86
101
|
temperature: float = 1,
|
|
87
|
-
): ...
|
|
102
|
+
) -> BaseChatModel: ...
|
|
88
103
|
|
|
89
104
|
|
|
90
105
|
@overload
|
|
@@ -94,7 +109,7 @@ def get_llm(
|
|
|
94
109
|
*,
|
|
95
110
|
is_utils: Literal[True],
|
|
96
111
|
temperature: float = 1,
|
|
97
|
-
): ...
|
|
112
|
+
) -> BaseChatModel: ...
|
|
98
113
|
|
|
99
114
|
|
|
100
115
|
def get_llm(
|
|
@@ -103,7 +118,7 @@ def get_llm(
|
|
|
103
118
|
is_utils: bool = False,
|
|
104
119
|
use_fallback: bool = False,
|
|
105
120
|
temperature: float = 1,
|
|
106
|
-
):
|
|
121
|
+
) -> BaseChatModel:
|
|
107
122
|
llm = (
|
|
108
123
|
ctx.llm_config.get_utils(name) # type: ignore
|
|
109
124
|
if is_utils
|
|
@@ -118,6 +133,8 @@ def get_llm(
|
|
|
118
133
|
return get_openai_llm(llm.model, temperature)
|
|
119
134
|
elif llm.provider == "google":
|
|
120
135
|
return get_google_llm(llm.model, temperature)
|
|
136
|
+
elif llm.provider == "vertexai":
|
|
137
|
+
return get_vertex_llm(llm.model, temperature)
|
|
121
138
|
elif llm.provider == "openrouter":
|
|
122
139
|
return get_openrouter_llm(llm.model, temperature)
|
|
123
140
|
elif llm.provider == "xai":
|
|
@@ -6,6 +6,7 @@ from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
|
|
|
6
6
|
from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
|
|
7
7
|
from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
|
|
8
8
|
from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
|
|
9
|
+
from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
|
|
9
10
|
from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
|
|
10
11
|
from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
|
|
11
12
|
from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
|
|
@@ -14,12 +15,11 @@ from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
|
|
|
14
15
|
from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
|
|
15
16
|
from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
|
|
16
17
|
from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
|
|
17
|
-
from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
|
|
18
18
|
from minitap.mobile_use.tools.mobile.tap import tap_wrapper
|
|
19
19
|
from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
|
|
20
20
|
wait_for_animation_to_end_wrapper,
|
|
21
21
|
)
|
|
22
|
-
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
22
|
+
from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
|
|
23
23
|
|
|
24
24
|
EXECUTOR_WRAPPERS_TOOLS = [
|
|
25
25
|
back_wrapper,
|
|
@@ -27,7 +27,7 @@ EXECUTOR_WRAPPERS_TOOLS = [
|
|
|
27
27
|
tap_wrapper,
|
|
28
28
|
long_press_on_wrapper,
|
|
29
29
|
swipe_wrapper,
|
|
30
|
-
|
|
30
|
+
glimpse_screen_wrapper,
|
|
31
31
|
copy_text_from_wrapper,
|
|
32
32
|
input_text_wrapper,
|
|
33
33
|
erase_one_char_wrapper,
|
|
@@ -41,18 +41,24 @@ EXECUTOR_WRAPPERS_TOOLS = [
|
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
def get_tools_from_wrappers(
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
def get_tools_from_wrappers(
|
|
45
|
+
ctx: "MobileUseContext",
|
|
46
|
+
wrappers: list[ToolWrapper],
|
|
47
|
+
) -> list[BaseTool]:
|
|
48
|
+
tools: list[BaseTool] = []
|
|
49
|
+
for wrapper in wrappers:
|
|
50
|
+
if ctx.llm_config.get_agent("executor").provider == "vertexai":
|
|
51
|
+
# The main swipe tool argument structure is not supported by vertexai, we need to split
|
|
52
|
+
# this tool into multiple tools
|
|
53
|
+
if wrapper.tool_fn_getter == swipe_wrapper.tool_fn_getter and isinstance(
|
|
54
|
+
wrapper, CompositeToolWrapper
|
|
55
|
+
):
|
|
56
|
+
tools.extend(wrapper.composite_tools_fn_getter(ctx))
|
|
57
|
+
continue
|
|
47
58
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
return "\n".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
|
|
59
|
+
tools.append(wrapper.tool_fn_getter(ctx))
|
|
60
|
+
return tools
|
|
51
61
|
|
|
52
62
|
|
|
53
|
-
def
|
|
54
|
-
""
|
|
55
|
-
for wrapper in EXECUTOR_WRAPPERS_TOOLS:
|
|
56
|
-
if wrapper.tool_fn_getter.__name__ == f"get_{name}_tool":
|
|
57
|
-
return wrapper
|
|
58
|
-
return None
|
|
63
|
+
def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
|
|
64
|
+
return ", ".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
|
|
@@ -23,6 +23,7 @@ from minitap.mobile_use.tools.utils import (
|
|
|
23
23
|
)
|
|
24
24
|
from minitap.mobile_use.utils.logger import get_logger
|
|
25
25
|
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
26
|
+
ElementBounds,
|
|
26
27
|
find_element_by_resource_id,
|
|
27
28
|
get_element_text,
|
|
28
29
|
text_input_is_empty,
|
|
@@ -50,16 +51,20 @@ class TextClearer:
|
|
|
50
51
|
screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
|
|
51
52
|
self.state.latest_ui_hierarchy = screen_data.elements
|
|
52
53
|
|
|
53
|
-
def _get_element_info(
|
|
54
|
+
def _get_element_info(
|
|
55
|
+
self, resource_id: str | None
|
|
56
|
+
) -> tuple[object | None, str | None, str | None]:
|
|
54
57
|
if not self.state.latest_ui_hierarchy:
|
|
55
58
|
self._refresh_ui_hierarchy()
|
|
56
59
|
|
|
57
60
|
if not self.state.latest_ui_hierarchy:
|
|
58
61
|
return None, None, None
|
|
59
62
|
|
|
60
|
-
element =
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
element = None
|
|
64
|
+
if resource_id:
|
|
65
|
+
element = find_element_by_resource_id(
|
|
66
|
+
ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
|
|
67
|
+
)
|
|
63
68
|
|
|
64
69
|
if not element:
|
|
65
70
|
return None, None, None
|
|
@@ -83,11 +88,27 @@ class TextClearer:
|
|
|
83
88
|
def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
|
|
84
89
|
return current_text is not None and current_text != "" and current_text != hint_text
|
|
85
90
|
|
|
86
|
-
def _prepare_element_for_clearing(
|
|
87
|
-
|
|
91
|
+
def _prepare_element_for_clearing(
|
|
92
|
+
self,
|
|
93
|
+
text_input_resource_id: str | None,
|
|
94
|
+
text_input_coordinates: ElementBounds | None,
|
|
95
|
+
text_input_text: str | None,
|
|
96
|
+
) -> bool:
|
|
97
|
+
if not focus_element_if_needed(
|
|
98
|
+
ctx=self.ctx,
|
|
99
|
+
input_resource_id=text_input_resource_id,
|
|
100
|
+
input_coordinates=text_input_coordinates,
|
|
101
|
+
input_text=text_input_text,
|
|
102
|
+
):
|
|
88
103
|
return False
|
|
89
104
|
|
|
90
|
-
move_cursor_to_end_if_bounds(
|
|
105
|
+
move_cursor_to_end_if_bounds(
|
|
106
|
+
ctx=self.ctx,
|
|
107
|
+
state=self.state,
|
|
108
|
+
text_input_resource_id=text_input_resource_id,
|
|
109
|
+
text_input_coordinates=text_input_coordinates,
|
|
110
|
+
text_input_text=text_input_text,
|
|
111
|
+
)
|
|
91
112
|
return True
|
|
92
113
|
|
|
93
114
|
def _erase_text_attempt(self, text_length: int) -> str | None:
|
|
@@ -102,7 +123,12 @@ class TextClearer:
|
|
|
102
123
|
return None
|
|
103
124
|
|
|
104
125
|
def _clear_with_retries(
|
|
105
|
-
self,
|
|
126
|
+
self,
|
|
127
|
+
text_input_resource_id: str | None,
|
|
128
|
+
text_input_coordinates: ElementBounds | None,
|
|
129
|
+
text_input_text: str | None,
|
|
130
|
+
initial_text: str,
|
|
131
|
+
hint_text: str | None,
|
|
106
132
|
) -> tuple[bool, str | None, int]:
|
|
107
133
|
current_text = initial_text
|
|
108
134
|
erased_chars = 0
|
|
@@ -118,18 +144,25 @@ class TextClearer:
|
|
|
118
144
|
erased_chars += chars_to_erase
|
|
119
145
|
|
|
120
146
|
self._refresh_ui_hierarchy()
|
|
121
|
-
elt =
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
147
|
+
elt = None
|
|
148
|
+
if text_input_resource_id:
|
|
149
|
+
elt = find_element_by_resource_id(
|
|
150
|
+
ui_hierarchy=self.state.latest_ui_hierarchy or [],
|
|
151
|
+
resource_id=text_input_resource_id,
|
|
152
|
+
)
|
|
153
|
+
if elt:
|
|
154
|
+
current_text = get_element_text(elt)
|
|
155
|
+
logger.info(f"Current text: {current_text}")
|
|
156
|
+
if text_input_is_empty(text=current_text, hint_text=hint_text):
|
|
157
|
+
break
|
|
130
158
|
|
|
131
159
|
move_cursor_to_end_if_bounds(
|
|
132
|
-
ctx=self.ctx,
|
|
160
|
+
ctx=self.ctx,
|
|
161
|
+
state=self.state,
|
|
162
|
+
text_input_resource_id=text_input_resource_id,
|
|
163
|
+
text_input_coordinates=text_input_coordinates,
|
|
164
|
+
text_input_text=text_input_text,
|
|
165
|
+
elt=elt,
|
|
133
166
|
)
|
|
134
167
|
|
|
135
168
|
return True, current_text, erased_chars
|
|
@@ -162,7 +195,9 @@ class TextClearer:
|
|
|
162
195
|
hint_text=hint_text,
|
|
163
196
|
)
|
|
164
197
|
|
|
165
|
-
def _handle_element_not_found(
|
|
198
|
+
def _handle_element_not_found(
|
|
199
|
+
self, resource_id: str | None, hint_text: str | None
|
|
200
|
+
) -> ClearTextResult:
|
|
166
201
|
error = erase_text_controller(ctx=self.ctx)
|
|
167
202
|
self._refresh_ui_hierarchy()
|
|
168
203
|
|
|
@@ -176,16 +211,23 @@ class TextClearer:
|
|
|
176
211
|
hint_text=hint_text,
|
|
177
212
|
)
|
|
178
213
|
|
|
179
|
-
def
|
|
180
|
-
|
|
214
|
+
def clear_input_text(
|
|
215
|
+
self,
|
|
216
|
+
text_input_resource_id: str | None,
|
|
217
|
+
text_input_coordinates: ElementBounds | None,
|
|
218
|
+
text_input_text: str | None,
|
|
219
|
+
) -> ClearTextResult:
|
|
220
|
+
element, current_text, hint_text = self._get_element_info(text_input_resource_id)
|
|
181
221
|
|
|
182
222
|
if not element:
|
|
183
|
-
return self._handle_element_not_found(
|
|
223
|
+
return self._handle_element_not_found(text_input_resource_id, hint_text)
|
|
184
224
|
|
|
185
225
|
if not self._should_clear_text(current_text, hint_text):
|
|
186
226
|
return self._handle_no_clearing_needed(current_text, hint_text)
|
|
187
227
|
|
|
188
|
-
if not self._prepare_element_for_clearing(
|
|
228
|
+
if not self._prepare_element_for_clearing(
|
|
229
|
+
text_input_resource_id, text_input_coordinates, text_input_text
|
|
230
|
+
):
|
|
189
231
|
return self._create_result(
|
|
190
232
|
success=False,
|
|
191
233
|
error_message="Failed to focus element",
|
|
@@ -195,7 +237,9 @@ class TextClearer:
|
|
|
195
237
|
)
|
|
196
238
|
|
|
197
239
|
success, final_text, chars_erased = self._clear_with_retries(
|
|
198
|
-
|
|
240
|
+
text_input_resource_id=text_input_resource_id,
|
|
241
|
+
text_input_coordinates=text_input_coordinates,
|
|
242
|
+
text_input_text=text_input_text,
|
|
199
243
|
initial_text=current_text or "",
|
|
200
244
|
hint_text=hint_text,
|
|
201
245
|
)
|
|
@@ -218,12 +262,16 @@ def get_clear_text_tool(ctx: MobileUseContext):
|
|
|
218
262
|
state: Annotated[State, InjectedState],
|
|
219
263
|
agent_thought: str,
|
|
220
264
|
text_input_resource_id: str,
|
|
265
|
+
text_input_coordinates: ElementBounds | None,
|
|
266
|
+
text_input_text: str | None,
|
|
221
267
|
):
|
|
222
268
|
"""
|
|
223
269
|
Clears all the text from the text field, by focusing it if needed.
|
|
224
270
|
"""
|
|
225
271
|
clearer = TextClearer(ctx, state)
|
|
226
|
-
result = clearer.
|
|
272
|
+
result = clearer.clear_input_text(
|
|
273
|
+
text_input_resource_id, text_input_coordinates, text_input_text
|
|
274
|
+
)
|
|
227
275
|
|
|
228
276
|
content = (
|
|
229
277
|
clear_text_wrapper.on_failure_fn(result.error_message)
|
|
@@ -1,18 +1,20 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.prebuilt import InjectedState
|
|
4
7
|
from langgraph.types import Command
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
5
10
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
6
12
|
from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
|
|
7
13
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
8
14
|
copy_text_from as copy_text_from_controller,
|
|
9
15
|
)
|
|
10
|
-
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
11
|
-
from pydantic import Field
|
|
12
|
-
from typing import Annotated
|
|
13
|
-
from minitap.mobile_use.context import MobileUseContext
|
|
14
16
|
from minitap.mobile_use.graph.state import State
|
|
15
|
-
from
|
|
17
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def get_copy_text_from_tool(ctx: MobileUseContext):
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
11
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
@@ -11,18 +14,18 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
14
|
from minitap.mobile_use.graph.state import State
|
|
12
15
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
16
|
from minitap.mobile_use.utils.media import compress_base64_jpeg
|
|
14
|
-
from typing import Annotated
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def
|
|
19
|
+
def get_glimpse_screen_tool(ctx: MobileUseContext):
|
|
18
20
|
@tool
|
|
19
|
-
def
|
|
21
|
+
def glimpse_screen(
|
|
20
22
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
23
|
state: Annotated[State, InjectedState],
|
|
22
24
|
agent_thought: str,
|
|
23
25
|
):
|
|
24
26
|
"""
|
|
25
|
-
|
|
27
|
+
Captures the current screen as an image.
|
|
28
|
+
The resulting screenshot is added to the context for the next reasoning step.
|
|
26
29
|
"""
|
|
27
30
|
compressed_image_base64 = None
|
|
28
31
|
has_failed = False
|
|
@@ -36,9 +39,9 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
|
|
|
36
39
|
|
|
37
40
|
tool_message = ToolMessage(
|
|
38
41
|
tool_call_id=tool_call_id,
|
|
39
|
-
content=
|
|
42
|
+
content=glimpse_screen_wrapper.on_failure_fn()
|
|
40
43
|
if has_failed
|
|
41
|
-
else
|
|
44
|
+
else glimpse_screen_wrapper.on_success_fn(),
|
|
42
45
|
additional_kwargs={"error": output} if has_failed else {},
|
|
43
46
|
status="error" if has_failed else "success",
|
|
44
47
|
)
|
|
@@ -56,11 +59,12 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
|
|
|
56
59
|
),
|
|
57
60
|
)
|
|
58
61
|
|
|
59
|
-
return
|
|
62
|
+
return glimpse_screen
|
|
60
63
|
|
|
61
64
|
|
|
62
|
-
|
|
63
|
-
tool_fn_getter=
|
|
64
|
-
on_success_fn=lambda: "
|
|
65
|
-
|
|
65
|
+
glimpse_screen_wrapper = ToolWrapper(
|
|
66
|
+
tool_fn_getter=get_glimpse_screen_tool,
|
|
67
|
+
on_success_fn=lambda: "Visual context captured successfully."
|
|
68
|
+
+ "It is now available for immediate analysis.",
|
|
69
|
+
on_failure_fn=lambda: "Failed to capture visual context.",
|
|
66
70
|
)
|