minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +6 -4
- minitap/mobile_use/agents/cortex/cortex.md +114 -27
- minitap/mobile_use/agents/cortex/cortex.py +8 -5
- minitap/mobile_use/agents/executor/executor.md +15 -10
- minitap/mobile_use/agents/executor/executor.py +6 -5
- minitap/mobile_use/agents/executor/utils.py +2 -1
- minitap/mobile_use/agents/hopper/hopper.py +6 -3
- minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
- minitap/mobile_use/agents/outputter/outputter.py +6 -3
- minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
- minitap/mobile_use/agents/planner/planner.md +20 -22
- minitap/mobile_use/agents/planner/planner.py +10 -7
- minitap/mobile_use/agents/planner/types.py +4 -2
- minitap/mobile_use/agents/planner/utils.py +14 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
- minitap/mobile_use/config.py +6 -1
- minitap/mobile_use/context.py +13 -3
- minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
- minitap/mobile_use/graph/state.py +7 -3
- minitap/mobile_use/sdk/agent.py +204 -29
- minitap/mobile_use/sdk/examples/README.md +19 -1
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
- minitap/mobile_use/sdk/services/platform.py +244 -0
- minitap/mobile_use/sdk/types/__init__.py +14 -14
- minitap/mobile_use/sdk/types/exceptions.py +57 -0
- minitap/mobile_use/sdk/types/platform.py +125 -0
- minitap/mobile_use/sdk/types/task.py +60 -17
- minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
- minitap/mobile_use/servers/stop_servers.py +11 -12
- minitap/mobile_use/servers/utils.py +6 -9
- minitap/mobile_use/services/llm.py +89 -5
- minitap/mobile_use/tools/index.py +2 -8
- minitap/mobile_use/tools/mobile/back.py +3 -3
- minitap/mobile_use/tools/mobile/clear_text.py +67 -38
- minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
- minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
- minitap/mobile_use/tools/mobile/input_text.py +67 -16
- minitap/mobile_use/tools/mobile/launch_app.py +54 -22
- minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
- minitap/mobile_use/tools/mobile/open_link.py +15 -8
- minitap/mobile_use/tools/mobile/press_key.py +15 -8
- minitap/mobile_use/tools/mobile/stop_app.py +14 -8
- minitap/mobile_use/tools/mobile/swipe.py +11 -5
- minitap/mobile_use/tools/mobile/tap.py +103 -21
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
- minitap/mobile_use/tools/test_utils.py +377 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +149 -39
- minitap/mobile_use/utils/recorder.py +1 -1
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/ui_hierarchy.py +11 -4
- {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
- minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
- minitap/mobile_use/tools/mobile/find_packages.py +0 -69
- minitap/mobile_use/tools/mobile/paste_text.py +0 -62
- minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
- {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -70,18 +70,17 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
|
|
|
70
70
|
return False
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def
|
|
73
|
+
def check_service_running(port: int, service_name: str) -> bool:
|
|
74
74
|
try:
|
|
75
75
|
if port == server_settings.DEVICE_SCREEN_API_PORT:
|
|
76
|
-
|
|
76
|
+
requests.get(f"http://localhost:{port}/health", timeout=2)
|
|
77
77
|
elif port == DEVICE_HARDWARE_BRIDGE_PORT:
|
|
78
|
-
|
|
78
|
+
requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
|
|
79
79
|
else:
|
|
80
80
|
return False
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
return True
|
|
82
|
+
logger.debug(f"{service_name} is still responding on port {port}")
|
|
83
|
+
return True
|
|
85
84
|
except requests.exceptions.RequestException:
|
|
86
85
|
pass
|
|
87
86
|
|
|
@@ -92,7 +91,7 @@ def stop_device_screen_api() -> bool:
|
|
|
92
91
|
logger.info("Stopping Device Screen API...")
|
|
93
92
|
api_port = server_settings.DEVICE_SCREEN_API_PORT
|
|
94
93
|
|
|
95
|
-
if not
|
|
94
|
+
if not check_service_running(api_port, "Device Screen API"):
|
|
96
95
|
logger.success("Device Screen API is not running")
|
|
97
96
|
return True
|
|
98
97
|
|
|
@@ -109,7 +108,7 @@ def stop_device_screen_api() -> bool:
|
|
|
109
108
|
logger.warning("No Device Screen API processes found, but service is still responding")
|
|
110
109
|
# Still try to verify if service actually stops
|
|
111
110
|
time.sleep(1)
|
|
112
|
-
if not
|
|
111
|
+
if not check_service_running(api_port, "Device Screen API"):
|
|
113
112
|
logger.success("Device Screen API stopped successfully (was orphaned)")
|
|
114
113
|
return True
|
|
115
114
|
return False
|
|
@@ -120,7 +119,7 @@ def stop_device_screen_api() -> bool:
|
|
|
120
119
|
|
|
121
120
|
# Verify service is stopped
|
|
122
121
|
time.sleep(1)
|
|
123
|
-
if
|
|
122
|
+
if check_service_running(api_port, "Device Screen API"):
|
|
124
123
|
logger.error("Device Screen API is still running after stop attempt")
|
|
125
124
|
return False
|
|
126
125
|
|
|
@@ -131,7 +130,7 @@ def stop_device_screen_api() -> bool:
|
|
|
131
130
|
def stop_device_hardware_bridge() -> bool:
|
|
132
131
|
logger.info("Stopping Device Hardware Bridge...")
|
|
133
132
|
|
|
134
|
-
if not
|
|
133
|
+
if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
|
|
135
134
|
logger.success("Device Hardware Bridge is not running")
|
|
136
135
|
return True
|
|
137
136
|
|
|
@@ -145,7 +144,7 @@ def stop_device_hardware_bridge() -> bool:
|
|
|
145
144
|
logger.warning("No Device Hardware Bridge processes found, but service is still responding")
|
|
146
145
|
# Still try to verify if service actually stops
|
|
147
146
|
time.sleep(1)
|
|
148
|
-
if not
|
|
147
|
+
if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
|
|
149
148
|
logger.success("Device Hardware Bridge stopped successfully (was orphaned)")
|
|
150
149
|
return True
|
|
151
150
|
return False
|
|
@@ -154,7 +153,7 @@ def stop_device_hardware_bridge() -> bool:
|
|
|
154
153
|
stop_process_gracefully(proc)
|
|
155
154
|
|
|
156
155
|
time.sleep(1)
|
|
157
|
-
if
|
|
156
|
+
if check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
|
|
158
157
|
logger.error("Device Hardware Bridge is still running after stop attempt")
|
|
159
158
|
return False
|
|
160
159
|
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
import
|
|
1
|
+
import contextlib
|
|
2
|
+
import socket
|
|
2
3
|
|
|
3
4
|
|
|
4
|
-
def is_port_in_use(port: int):
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
return True
|
|
9
|
-
elif isinstance(conn.laddr, tuple) and len(conn.laddr) >= 2 and conn.laddr[1] == port:
|
|
10
|
-
return True
|
|
11
|
-
return False
|
|
5
|
+
def is_port_in_use(port: int, host: str = "127.0.0.1") -> bool:
|
|
6
|
+
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
|
7
|
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
8
|
+
return s.connect_ex((host, port)) == 0
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import logging
|
|
2
|
-
from collections.abc import Awaitable, Callable
|
|
3
|
-
from typing import Literal, TypeVar, overload
|
|
3
|
+
from collections.abc import Awaitable, Callable, Coroutine
|
|
4
|
+
from typing import Any, Literal, TypeVar, overload
|
|
4
5
|
|
|
5
6
|
from langchain_core.language_models.chat_models import BaseChatModel
|
|
6
7
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
7
8
|
from langchain_google_vertexai import ChatVertexAI
|
|
8
9
|
from langchain_openai import ChatOpenAI
|
|
10
|
+
from pydantic import SecretStr
|
|
9
11
|
|
|
10
12
|
from minitap.mobile_use.config import (
|
|
11
13
|
AgentNode,
|
|
@@ -15,8 +17,79 @@ from minitap.mobile_use.config import (
|
|
|
15
17
|
settings,
|
|
16
18
|
)
|
|
17
19
|
from minitap.mobile_use.context import MobileUseContext
|
|
20
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
18
21
|
|
|
19
|
-
|
|
22
|
+
# Logger for internal messages (ex: fallback)
|
|
23
|
+
llm_logger = logging.getLogger(__name__)
|
|
24
|
+
# Logger for user messages
|
|
25
|
+
user_messages_logger = get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def invoke_llm_with_timeout_message[T](
|
|
29
|
+
llm_call: Coroutine[Any, Any, T],
|
|
30
|
+
agent_name: str,
|
|
31
|
+
timeout_seconds: int = 10,
|
|
32
|
+
) -> T:
|
|
33
|
+
"""
|
|
34
|
+
Send a LLM call and display a timeout message if it takes too long.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
llm_call: The coroutine of the LLM call to execute.
|
|
38
|
+
agent_name: The name of the agent making the call (for the message).
|
|
39
|
+
timeout_seconds: The delay in seconds before displaying the message.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The result of the LLM call.
|
|
43
|
+
"""
|
|
44
|
+
llm_task = asyncio.create_task(llm_call)
|
|
45
|
+
waiter_task = asyncio.create_task(asyncio.sleep(timeout_seconds))
|
|
46
|
+
|
|
47
|
+
done, _ = await asyncio.wait({llm_task, waiter_task}, return_when=asyncio.FIRST_COMPLETED)
|
|
48
|
+
|
|
49
|
+
if llm_task in done:
|
|
50
|
+
# The LLM call has finished before the timeout, cancel the timer
|
|
51
|
+
waiter_task.cancel()
|
|
52
|
+
return llm_task.result()
|
|
53
|
+
else:
|
|
54
|
+
# The timeout has been reached, display the message and wait for the call to finish
|
|
55
|
+
user_messages_logger.info("Waiting for LLM call response...")
|
|
56
|
+
return await llm_task
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_minitap_llm(
|
|
60
|
+
trace_id: str,
|
|
61
|
+
remote_tracing: bool = False,
|
|
62
|
+
model: str = "google/gemini-2.5-pro",
|
|
63
|
+
temperature: float | None = None,
|
|
64
|
+
max_retries: int | None = None,
|
|
65
|
+
api_key: str | None = None,
|
|
66
|
+
) -> ChatOpenAI:
|
|
67
|
+
if api_key:
|
|
68
|
+
effective_api_key = SecretStr(api_key)
|
|
69
|
+
elif settings.MINITAP_API_KEY:
|
|
70
|
+
effective_api_key = settings.MINITAP_API_KEY
|
|
71
|
+
else:
|
|
72
|
+
raise ValueError("MINITAP_API_KEY must be provided or set in environment")
|
|
73
|
+
|
|
74
|
+
if settings.MINITAP_API_BASE_URL is None:
|
|
75
|
+
raise ValueError("MINITAP_API_BASE_URL must be set in environment")
|
|
76
|
+
|
|
77
|
+
llm_base_url = f"{settings.MINITAP_API_BASE_URL}/api/v1"
|
|
78
|
+
|
|
79
|
+
if max_retries is None and model.startswith("google/"):
|
|
80
|
+
max_retries = 2
|
|
81
|
+
client = ChatOpenAI(
|
|
82
|
+
model=model,
|
|
83
|
+
temperature=temperature,
|
|
84
|
+
max_retries=max_retries,
|
|
85
|
+
api_key=effective_api_key,
|
|
86
|
+
base_url=llm_base_url,
|
|
87
|
+
default_query={
|
|
88
|
+
"sessionId": trace_id,
|
|
89
|
+
"traceOnlyUsage": remote_tracing,
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
return client
|
|
20
93
|
|
|
21
94
|
|
|
22
95
|
def get_google_llm(
|
|
@@ -139,6 +212,17 @@ def get_llm(
|
|
|
139
212
|
return get_openrouter_llm(llm.model, temperature)
|
|
140
213
|
elif llm.provider == "xai":
|
|
141
214
|
return get_grok_llm(llm.model, temperature)
|
|
215
|
+
elif llm.provider == "minitap":
|
|
216
|
+
remote_tracing = False
|
|
217
|
+
if ctx.execution_setup:
|
|
218
|
+
remote_tracing = ctx.execution_setup.enable_remote_tracing
|
|
219
|
+
return get_minitap_llm(
|
|
220
|
+
trace_id=ctx.trace_id,
|
|
221
|
+
remote_tracing=remote_tracing,
|
|
222
|
+
model=llm.model,
|
|
223
|
+
temperature=temperature,
|
|
224
|
+
api_key=ctx.minitap_api_key,
|
|
225
|
+
)
|
|
142
226
|
else:
|
|
143
227
|
raise ValueError(f"Unsupported provider: {llm.provider}")
|
|
144
228
|
|
|
@@ -154,9 +238,9 @@ async def with_fallback(
|
|
|
154
238
|
try:
|
|
155
239
|
result = await main_call()
|
|
156
240
|
if result is None and none_should_fallback:
|
|
157
|
-
|
|
241
|
+
llm_logger.warning("Main LLM inference returned None. Falling back...")
|
|
158
242
|
return await fallback_call()
|
|
159
243
|
return result
|
|
160
244
|
except Exception as e:
|
|
161
|
-
|
|
245
|
+
llm_logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
|
|
162
246
|
return await fallback_call()
|
|
@@ -3,18 +3,15 @@ from langchain_core.tools import BaseTool
|
|
|
3
3
|
from minitap.mobile_use.context import MobileUseContext
|
|
4
4
|
from minitap.mobile_use.tools.mobile.back import back_wrapper
|
|
5
5
|
from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
|
|
6
|
-
from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
|
|
7
6
|
from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
|
|
8
|
-
from minitap.mobile_use.tools.mobile.
|
|
7
|
+
from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
|
|
9
8
|
from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
|
|
10
9
|
from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
|
|
11
10
|
from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
|
|
12
11
|
from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
|
|
13
|
-
from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
|
|
14
12
|
from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
|
|
15
13
|
from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
|
|
16
14
|
from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
|
|
17
|
-
from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
|
|
18
15
|
from minitap.mobile_use.tools.mobile.tap import tap_wrapper
|
|
19
16
|
from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
|
|
20
17
|
wait_for_animation_to_end_wrapper,
|
|
@@ -27,14 +24,11 @@ EXECUTOR_WRAPPERS_TOOLS = [
|
|
|
27
24
|
tap_wrapper,
|
|
28
25
|
long_press_on_wrapper,
|
|
29
26
|
swipe_wrapper,
|
|
30
|
-
|
|
31
|
-
copy_text_from_wrapper,
|
|
27
|
+
glimpse_screen_wrapper,
|
|
32
28
|
input_text_wrapper,
|
|
33
29
|
erase_one_char_wrapper,
|
|
34
|
-
find_packages_wrapper,
|
|
35
30
|
launch_app_wrapper,
|
|
36
31
|
stop_app_wrapper,
|
|
37
|
-
paste_text_wrapper,
|
|
38
32
|
clear_text_wrapper,
|
|
39
33
|
press_key_wrapper,
|
|
40
34
|
wait_for_animation_to_end_wrapper,
|
|
@@ -13,11 +13,11 @@ from langgraph.prebuilt import InjectedState
|
|
|
13
13
|
|
|
14
14
|
def get_back_tool(ctx: MobileUseContext):
|
|
15
15
|
@tool
|
|
16
|
-
def back(
|
|
16
|
+
async def back(
|
|
17
17
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
18
18
|
state: Annotated[State, InjectedState],
|
|
19
19
|
agent_thought: str,
|
|
20
|
-
):
|
|
20
|
+
) -> Command:
|
|
21
21
|
"""Navigates to the previous screen. (Only works on Android for the moment)"""
|
|
22
22
|
output = back_controller(ctx=ctx)
|
|
23
23
|
has_failed = output is not None
|
|
@@ -28,7 +28,7 @@ def get_back_tool(ctx: MobileUseContext):
|
|
|
28
28
|
status="error" if has_failed else "success",
|
|
29
29
|
)
|
|
30
30
|
return Command(
|
|
31
|
-
update=state.
|
|
31
|
+
update=await state.asanitize_update(
|
|
32
32
|
ctx=ctx,
|
|
33
33
|
update={
|
|
34
34
|
"agents_thoughts": [agent_thought],
|
|
@@ -12,15 +12,11 @@ from minitap.mobile_use.context import MobileUseContext
|
|
|
12
12
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
13
13
|
erase_text as erase_text_controller,
|
|
14
14
|
)
|
|
15
|
-
from minitap.mobile_use.controllers.mobile_command_controller import
|
|
16
|
-
get_screen_data,
|
|
17
|
-
)
|
|
15
|
+
from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
|
|
18
16
|
from minitap.mobile_use.graph.state import State
|
|
19
17
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
20
|
-
from minitap.mobile_use.tools.
|
|
21
|
-
|
|
22
|
-
move_cursor_to_end_if_bounds,
|
|
23
|
-
)
|
|
18
|
+
from minitap.mobile_use.tools.types import Target
|
|
19
|
+
from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
|
|
24
20
|
from minitap.mobile_use.utils.logger import get_logger
|
|
25
21
|
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
26
22
|
find_element_by_resource_id,
|
|
@@ -50,16 +46,20 @@ class TextClearer:
|
|
|
50
46
|
screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
|
|
51
47
|
self.state.latest_ui_hierarchy = screen_data.elements
|
|
52
48
|
|
|
53
|
-
def _get_element_info(
|
|
49
|
+
def _get_element_info(
|
|
50
|
+
self, resource_id: str | None
|
|
51
|
+
) -> tuple[object | None, str | None, str | None]:
|
|
54
52
|
if not self.state.latest_ui_hierarchy:
|
|
55
53
|
self._refresh_ui_hierarchy()
|
|
56
54
|
|
|
57
55
|
if not self.state.latest_ui_hierarchy:
|
|
58
56
|
return None, None, None
|
|
59
57
|
|
|
60
|
-
element =
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
element = None
|
|
59
|
+
if resource_id:
|
|
60
|
+
element = find_element_by_resource_id(
|
|
61
|
+
ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
|
|
62
|
+
)
|
|
63
63
|
|
|
64
64
|
if not element:
|
|
65
65
|
return None, None, None
|
|
@@ -83,11 +83,21 @@ class TextClearer:
|
|
|
83
83
|
def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
|
|
84
84
|
return current_text is not None and current_text != "" and current_text != hint_text
|
|
85
85
|
|
|
86
|
-
def _prepare_element_for_clearing(
|
|
87
|
-
|
|
86
|
+
def _prepare_element_for_clearing(
|
|
87
|
+
self,
|
|
88
|
+
target: Target,
|
|
89
|
+
) -> bool:
|
|
90
|
+
if not focus_element_if_needed(
|
|
91
|
+
ctx=self.ctx,
|
|
92
|
+
target=target,
|
|
93
|
+
):
|
|
88
94
|
return False
|
|
89
95
|
|
|
90
|
-
move_cursor_to_end_if_bounds(
|
|
96
|
+
move_cursor_to_end_if_bounds(
|
|
97
|
+
ctx=self.ctx,
|
|
98
|
+
state=self.state,
|
|
99
|
+
target=target,
|
|
100
|
+
)
|
|
91
101
|
return True
|
|
92
102
|
|
|
93
103
|
def _erase_text_attempt(self, text_length: int) -> str | None:
|
|
@@ -102,7 +112,10 @@ class TextClearer:
|
|
|
102
112
|
return None
|
|
103
113
|
|
|
104
114
|
def _clear_with_retries(
|
|
105
|
-
self,
|
|
115
|
+
self,
|
|
116
|
+
target: Target,
|
|
117
|
+
initial_text: str,
|
|
118
|
+
hint_text: str | None,
|
|
106
119
|
) -> tuple[bool, str | None, int]:
|
|
107
120
|
current_text = initial_text
|
|
108
121
|
erased_chars = 0
|
|
@@ -118,18 +131,23 @@ class TextClearer:
|
|
|
118
131
|
erased_chars += chars_to_erase
|
|
119
132
|
|
|
120
133
|
self._refresh_ui_hierarchy()
|
|
121
|
-
elt =
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
134
|
+
elt = None
|
|
135
|
+
if target.resource_id:
|
|
136
|
+
elt = find_element_by_resource_id(
|
|
137
|
+
ui_hierarchy=self.state.latest_ui_hierarchy or [],
|
|
138
|
+
resource_id=target.resource_id,
|
|
139
|
+
)
|
|
140
|
+
if elt:
|
|
141
|
+
current_text = get_element_text(elt)
|
|
142
|
+
logger.info(f"Current text: {current_text}")
|
|
143
|
+
if text_input_is_empty(text=current_text, hint_text=hint_text):
|
|
144
|
+
break
|
|
130
145
|
|
|
131
146
|
move_cursor_to_end_if_bounds(
|
|
132
|
-
ctx=self.ctx,
|
|
147
|
+
ctx=self.ctx,
|
|
148
|
+
state=self.state,
|
|
149
|
+
target=target,
|
|
150
|
+
elt=elt,
|
|
133
151
|
)
|
|
134
152
|
|
|
135
153
|
return True, current_text, erased_chars
|
|
@@ -162,7 +180,9 @@ class TextClearer:
|
|
|
162
180
|
hint_text=hint_text,
|
|
163
181
|
)
|
|
164
182
|
|
|
165
|
-
def _handle_element_not_found(
|
|
183
|
+
def _handle_element_not_found(
|
|
184
|
+
self, resource_id: str | None, hint_text: str | None
|
|
185
|
+
) -> ClearTextResult:
|
|
166
186
|
error = erase_text_controller(ctx=self.ctx)
|
|
167
187
|
self._refresh_ui_hierarchy()
|
|
168
188
|
|
|
@@ -176,16 +196,23 @@ class TextClearer:
|
|
|
176
196
|
hint_text=hint_text,
|
|
177
197
|
)
|
|
178
198
|
|
|
179
|
-
def
|
|
180
|
-
|
|
199
|
+
def clear_input_text(
|
|
200
|
+
self,
|
|
201
|
+
target: Target,
|
|
202
|
+
) -> ClearTextResult:
|
|
203
|
+
element, current_text, hint_text = self._get_element_info(
|
|
204
|
+
resource_id=target.resource_id,
|
|
205
|
+
)
|
|
181
206
|
|
|
182
207
|
if not element:
|
|
183
|
-
return self._handle_element_not_found(resource_id, hint_text)
|
|
208
|
+
return self._handle_element_not_found(target.resource_id, hint_text)
|
|
184
209
|
|
|
185
210
|
if not self._should_clear_text(current_text, hint_text):
|
|
186
211
|
return self._handle_no_clearing_needed(current_text, hint_text)
|
|
187
212
|
|
|
188
|
-
if not self._prepare_element_for_clearing(
|
|
213
|
+
if not self._prepare_element_for_clearing(
|
|
214
|
+
target=target,
|
|
215
|
+
):
|
|
189
216
|
return self._create_result(
|
|
190
217
|
success=False,
|
|
191
218
|
error_message="Failed to focus element",
|
|
@@ -195,7 +222,7 @@ class TextClearer:
|
|
|
195
222
|
)
|
|
196
223
|
|
|
197
224
|
success, final_text, chars_erased = self._clear_with_retries(
|
|
198
|
-
|
|
225
|
+
target=target,
|
|
199
226
|
initial_text=current_text or "",
|
|
200
227
|
hint_text=hint_text,
|
|
201
228
|
)
|
|
@@ -213,19 +240,21 @@ class TextClearer:
|
|
|
213
240
|
|
|
214
241
|
def get_clear_text_tool(ctx: MobileUseContext):
|
|
215
242
|
@tool
|
|
216
|
-
def clear_text(
|
|
243
|
+
async def clear_text(
|
|
217
244
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
218
245
|
state: Annotated[State, InjectedState],
|
|
219
246
|
agent_thought: str,
|
|
220
|
-
|
|
247
|
+
target: Target,
|
|
221
248
|
):
|
|
222
249
|
"""
|
|
223
250
|
Clears all the text from the text field, by focusing it if needed.
|
|
224
251
|
"""
|
|
225
252
|
clearer = TextClearer(ctx, state)
|
|
226
|
-
result = clearer.
|
|
253
|
+
result = clearer.clear_input_text(
|
|
254
|
+
target=target,
|
|
255
|
+
)
|
|
227
256
|
|
|
228
|
-
|
|
257
|
+
agent_outcome = (
|
|
229
258
|
clear_text_wrapper.on_failure_fn(result.error_message)
|
|
230
259
|
if not result.success
|
|
231
260
|
else clear_text_wrapper.on_success_fn(
|
|
@@ -235,16 +264,16 @@ def get_clear_text_tool(ctx: MobileUseContext):
|
|
|
235
264
|
|
|
236
265
|
tool_message = ToolMessage(
|
|
237
266
|
tool_call_id=tool_call_id,
|
|
238
|
-
content=
|
|
267
|
+
content=agent_outcome,
|
|
239
268
|
additional_kwargs={"error": result.error_message} if not result.success else {},
|
|
240
269
|
status="error" if not result.success else "success",
|
|
241
270
|
)
|
|
242
271
|
|
|
243
272
|
return Command(
|
|
244
|
-
update=state.
|
|
273
|
+
update=await state.asanitize_update(
|
|
245
274
|
ctx=ctx,
|
|
246
275
|
update={
|
|
247
|
-
"agents_thoughts": [agent_thought],
|
|
276
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
248
277
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
249
278
|
},
|
|
250
279
|
agent="executor",
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
6
|
-
from typing import Annotated
|
|
7
8
|
|
|
8
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
9
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
@@ -16,11 +17,11 @@ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
|
16
17
|
|
|
17
18
|
def get_erase_one_char_tool(ctx: MobileUseContext):
|
|
18
19
|
@tool
|
|
19
|
-
def erase_one_char(
|
|
20
|
+
async def erase_one_char(
|
|
20
21
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
22
|
state: Annotated[State, InjectedState],
|
|
22
23
|
agent_thought: str,
|
|
23
|
-
):
|
|
24
|
+
) -> Command:
|
|
24
25
|
"""
|
|
25
26
|
Erase one character from a text area.
|
|
26
27
|
It acts the same as pressing backspace a single time.
|
|
@@ -36,7 +37,7 @@ def get_erase_one_char_tool(ctx: MobileUseContext):
|
|
|
36
37
|
status="error" if has_failed else "success",
|
|
37
38
|
)
|
|
38
39
|
return Command(
|
|
39
|
-
update=state.
|
|
40
|
+
update=await state.asanitize_update(
|
|
40
41
|
ctx=ctx,
|
|
41
42
|
update={
|
|
42
43
|
"agents_thoughts": [agent_thought],
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
11
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
@@ -11,18 +14,18 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
14
|
from minitap.mobile_use.graph.state import State
|
|
12
15
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
16
|
from minitap.mobile_use.utils.media import compress_base64_jpeg
|
|
14
|
-
from typing import Annotated
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def
|
|
19
|
+
def get_glimpse_screen_tool(ctx: MobileUseContext):
|
|
18
20
|
@tool
|
|
19
|
-
def
|
|
21
|
+
async def glimpse_screen(
|
|
20
22
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
23
|
state: Annotated[State, InjectedState],
|
|
22
24
|
agent_thought: str,
|
|
23
|
-
):
|
|
25
|
+
) -> Command:
|
|
24
26
|
"""
|
|
25
|
-
|
|
27
|
+
Captures the current screen as an image.
|
|
28
|
+
The resulting screenshot is added to the context for the next reasoning step.
|
|
26
29
|
"""
|
|
27
30
|
compressed_image_base64 = None
|
|
28
31
|
has_failed = False
|
|
@@ -34,33 +37,38 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
|
|
|
34
37
|
output = str(e)
|
|
35
38
|
has_failed = True
|
|
36
39
|
|
|
40
|
+
agent_outcome = (
|
|
41
|
+
glimpse_screen_wrapper.on_failure_fn()
|
|
42
|
+
if has_failed
|
|
43
|
+
else glimpse_screen_wrapper.on_success_fn()
|
|
44
|
+
)
|
|
45
|
+
|
|
37
46
|
tool_message = ToolMessage(
|
|
38
47
|
tool_call_id=tool_call_id,
|
|
39
|
-
content=
|
|
40
|
-
if has_failed
|
|
41
|
-
else take_screenshot_wrapper.on_success_fn(),
|
|
48
|
+
content=agent_outcome,
|
|
42
49
|
additional_kwargs={"error": output} if has_failed else {},
|
|
43
50
|
status="error" if has_failed else "success",
|
|
44
51
|
)
|
|
45
52
|
updates = {
|
|
46
|
-
"agents_thoughts": [agent_thought],
|
|
53
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
47
54
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
48
55
|
}
|
|
49
56
|
if compressed_image_base64:
|
|
50
57
|
updates["latest_screenshot_base64"] = compressed_image_base64
|
|
51
58
|
return Command(
|
|
52
|
-
update=state.
|
|
59
|
+
update=await state.asanitize_update(
|
|
53
60
|
ctx=ctx,
|
|
54
61
|
update=updates,
|
|
55
62
|
agent="executor",
|
|
56
63
|
),
|
|
57
64
|
)
|
|
58
65
|
|
|
59
|
-
return
|
|
66
|
+
return glimpse_screen
|
|
60
67
|
|
|
61
68
|
|
|
62
|
-
|
|
63
|
-
tool_fn_getter=
|
|
64
|
-
on_success_fn=lambda: "
|
|
65
|
-
|
|
69
|
+
glimpse_screen_wrapper = ToolWrapper(
|
|
70
|
+
tool_fn_getter=get_glimpse_screen_tool,
|
|
71
|
+
on_success_fn=lambda: "Visual context captured successfully."
|
|
72
|
+
+ "It is now available for immediate analysis.",
|
|
73
|
+
on_failure_fn=lambda: "Failed to capture visual context.",
|
|
66
74
|
)
|