minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (59) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +6 -4
  2. minitap/mobile_use/agents/cortex/cortex.md +114 -27
  3. minitap/mobile_use/agents/cortex/cortex.py +8 -5
  4. minitap/mobile_use/agents/executor/executor.md +15 -10
  5. minitap/mobile_use/agents/executor/executor.py +6 -5
  6. minitap/mobile_use/agents/executor/utils.py +2 -1
  7. minitap/mobile_use/agents/hopper/hopper.py +6 -3
  8. minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
  9. minitap/mobile_use/agents/outputter/outputter.py +6 -3
  10. minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
  11. minitap/mobile_use/agents/planner/planner.md +20 -22
  12. minitap/mobile_use/agents/planner/planner.py +10 -7
  13. minitap/mobile_use/agents/planner/types.py +4 -2
  14. minitap/mobile_use/agents/planner/utils.py +14 -0
  15. minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
  16. minitap/mobile_use/config.py +6 -1
  17. minitap/mobile_use/context.py +13 -3
  18. minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
  19. minitap/mobile_use/graph/state.py +7 -3
  20. minitap/mobile_use/sdk/agent.py +204 -29
  21. minitap/mobile_use/sdk/examples/README.md +19 -1
  22. minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
  23. minitap/mobile_use/sdk/services/platform.py +244 -0
  24. minitap/mobile_use/sdk/types/__init__.py +14 -14
  25. minitap/mobile_use/sdk/types/exceptions.py +57 -0
  26. minitap/mobile_use/sdk/types/platform.py +125 -0
  27. minitap/mobile_use/sdk/types/task.py +60 -17
  28. minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
  29. minitap/mobile_use/servers/stop_servers.py +11 -12
  30. minitap/mobile_use/servers/utils.py +6 -9
  31. minitap/mobile_use/services/llm.py +89 -5
  32. minitap/mobile_use/tools/index.py +2 -8
  33. minitap/mobile_use/tools/mobile/back.py +3 -3
  34. minitap/mobile_use/tools/mobile/clear_text.py +67 -38
  35. minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
  36. minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
  37. minitap/mobile_use/tools/mobile/input_text.py +67 -16
  38. minitap/mobile_use/tools/mobile/launch_app.py +54 -22
  39. minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
  40. minitap/mobile_use/tools/mobile/open_link.py +15 -8
  41. minitap/mobile_use/tools/mobile/press_key.py +15 -8
  42. minitap/mobile_use/tools/mobile/stop_app.py +14 -8
  43. minitap/mobile_use/tools/mobile/swipe.py +11 -5
  44. minitap/mobile_use/tools/mobile/tap.py +103 -21
  45. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
  46. minitap/mobile_use/tools/test_utils.py +377 -0
  47. minitap/mobile_use/tools/types.py +35 -0
  48. minitap/mobile_use/tools/utils.py +149 -39
  49. minitap/mobile_use/utils/recorder.py +1 -1
  50. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  51. minitap/mobile_use/utils/ui_hierarchy.py +11 -4
  52. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
  53. minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
  54. minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
  55. minitap/mobile_use/tools/mobile/find_packages.py +0 -69
  56. minitap/mobile_use/tools/mobile/paste_text.py +0 -62
  57. minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
  58. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
  59. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -70,18 +70,17 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
70
70
  return False
71
71
 
72
72
 
73
- def check_service_health(port: int, service_name: str) -> bool:
73
+ def check_service_running(port: int, service_name: str) -> bool:
74
74
  try:
75
75
  if port == server_settings.DEVICE_SCREEN_API_PORT:
76
- response = requests.get(f"http://localhost:{port}/health", timeout=2)
76
+ requests.get(f"http://localhost:{port}/health", timeout=2)
77
77
  elif port == DEVICE_HARDWARE_BRIDGE_PORT:
78
- response = requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
78
+ requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
79
79
  else:
80
80
  return False
81
81
 
82
- if response.status_code == 200:
83
- logger.debug(f"{service_name} is still responding on port {port}")
84
- return True
82
+ logger.debug(f"{service_name} is still responding on port {port}")
83
+ return True
85
84
  except requests.exceptions.RequestException:
86
85
  pass
87
86
 
@@ -92,7 +91,7 @@ def stop_device_screen_api() -> bool:
92
91
  logger.info("Stopping Device Screen API...")
93
92
  api_port = server_settings.DEVICE_SCREEN_API_PORT
94
93
 
95
- if not check_service_health(api_port, "Device Screen API"):
94
+ if not check_service_running(api_port, "Device Screen API"):
96
95
  logger.success("Device Screen API is not running")
97
96
  return True
98
97
 
@@ -109,7 +108,7 @@ def stop_device_screen_api() -> bool:
109
108
  logger.warning("No Device Screen API processes found, but service is still responding")
110
109
  # Still try to verify if service actually stops
111
110
  time.sleep(1)
112
- if not check_service_health(api_port, "Device Screen API"):
111
+ if not check_service_running(api_port, "Device Screen API"):
113
112
  logger.success("Device Screen API stopped successfully (was orphaned)")
114
113
  return True
115
114
  return False
@@ -120,7 +119,7 @@ def stop_device_screen_api() -> bool:
120
119
 
121
120
  # Verify service is stopped
122
121
  time.sleep(1)
123
- if check_service_health(api_port, "Device Screen API"):
122
+ if check_service_running(api_port, "Device Screen API"):
124
123
  logger.error("Device Screen API is still running after stop attempt")
125
124
  return False
126
125
 
@@ -131,7 +130,7 @@ def stop_device_screen_api() -> bool:
131
130
  def stop_device_hardware_bridge() -> bool:
132
131
  logger.info("Stopping Device Hardware Bridge...")
133
132
 
134
- if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
133
+ if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
135
134
  logger.success("Device Hardware Bridge is not running")
136
135
  return True
137
136
 
@@ -145,7 +144,7 @@ def stop_device_hardware_bridge() -> bool:
145
144
  logger.warning("No Device Hardware Bridge processes found, but service is still responding")
146
145
  # Still try to verify if service actually stops
147
146
  time.sleep(1)
148
- if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
147
+ if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
149
148
  logger.success("Device Hardware Bridge stopped successfully (was orphaned)")
150
149
  return True
151
150
  return False
@@ -154,7 +153,7 @@ def stop_device_hardware_bridge() -> bool:
154
153
  stop_process_gracefully(proc)
155
154
 
156
155
  time.sleep(1)
157
- if check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
156
+ if check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
158
157
  logger.error("Device Hardware Bridge is still running after stop attempt")
159
158
  return False
160
159
 
@@ -1,11 +1,8 @@
1
- import psutil
1
+ import contextlib
2
+ import socket
2
3
 
3
4
 
4
- def is_port_in_use(port: int):
5
- for conn in psutil.net_connections():
6
- if conn.status == psutil.CONN_LISTEN and conn.laddr:
7
- if hasattr(conn.laddr, "port") and conn.laddr.port == port:
8
- return True
9
- elif isinstance(conn.laddr, tuple) and len(conn.laddr) >= 2 and conn.laddr[1] == port:
10
- return True
11
- return False
5
+ def is_port_in_use(port: int, host: str = "127.0.0.1") -> bool:
6
+ with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
7
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
8
+ return s.connect_ex((host, port)) == 0
@@ -1,11 +1,13 @@
1
+ import asyncio
1
2
  import logging
2
- from collections.abc import Awaitable, Callable
3
- from typing import Literal, TypeVar, overload
3
+ from collections.abc import Awaitable, Callable, Coroutine
4
+ from typing import Any, Literal, TypeVar, overload
4
5
 
5
6
  from langchain_core.language_models.chat_models import BaseChatModel
6
7
  from langchain_google_genai import ChatGoogleGenerativeAI
7
8
  from langchain_google_vertexai import ChatVertexAI
8
9
  from langchain_openai import ChatOpenAI
10
+ from pydantic import SecretStr
9
11
 
10
12
  from minitap.mobile_use.config import (
11
13
  AgentNode,
@@ -15,8 +17,79 @@ from minitap.mobile_use.config import (
15
17
  settings,
16
18
  )
17
19
  from minitap.mobile_use.context import MobileUseContext
20
+ from minitap.mobile_use.utils.logger import get_logger
18
21
 
19
- logger = logging.getLogger(__name__)
22
+ # Logger for internal messages (ex: fallback)
23
+ llm_logger = logging.getLogger(__name__)
24
+ # Logger for user messages
25
+ user_messages_logger = get_logger(__name__)
26
+
27
+
28
+ async def invoke_llm_with_timeout_message[T](
29
+ llm_call: Coroutine[Any, Any, T],
30
+ agent_name: str,
31
+ timeout_seconds: int = 10,
32
+ ) -> T:
33
+ """
34
+ Send a LLM call and display a timeout message if it takes too long.
35
+
36
+ Args:
37
+ llm_call: The coroutine of the LLM call to execute.
38
+ agent_name: The name of the agent making the call (for the message).
39
+ timeout_seconds: The delay in seconds before displaying the message.
40
+
41
+ Returns:
42
+ The result of the LLM call.
43
+ """
44
+ llm_task = asyncio.create_task(llm_call)
45
+ waiter_task = asyncio.create_task(asyncio.sleep(timeout_seconds))
46
+
47
+ done, _ = await asyncio.wait({llm_task, waiter_task}, return_when=asyncio.FIRST_COMPLETED)
48
+
49
+ if llm_task in done:
50
+ # The LLM call has finished before the timeout, cancel the timer
51
+ waiter_task.cancel()
52
+ return llm_task.result()
53
+ else:
54
+ # The timeout has been reached, display the message and wait for the call to finish
55
+ user_messages_logger.info("Waiting for LLM call response...")
56
+ return await llm_task
57
+
58
+
59
+ def get_minitap_llm(
60
+ trace_id: str,
61
+ remote_tracing: bool = False,
62
+ model: str = "google/gemini-2.5-pro",
63
+ temperature: float | None = None,
64
+ max_retries: int | None = None,
65
+ api_key: str | None = None,
66
+ ) -> ChatOpenAI:
67
+ if api_key:
68
+ effective_api_key = SecretStr(api_key)
69
+ elif settings.MINITAP_API_KEY:
70
+ effective_api_key = settings.MINITAP_API_KEY
71
+ else:
72
+ raise ValueError("MINITAP_API_KEY must be provided or set in environment")
73
+
74
+ if settings.MINITAP_API_BASE_URL is None:
75
+ raise ValueError("MINITAP_API_BASE_URL must be set in environment")
76
+
77
+ llm_base_url = f"{settings.MINITAP_API_BASE_URL}/api/v1"
78
+
79
+ if max_retries is None and model.startswith("google/"):
80
+ max_retries = 2
81
+ client = ChatOpenAI(
82
+ model=model,
83
+ temperature=temperature,
84
+ max_retries=max_retries,
85
+ api_key=effective_api_key,
86
+ base_url=llm_base_url,
87
+ default_query={
88
+ "sessionId": trace_id,
89
+ "traceOnlyUsage": remote_tracing,
90
+ },
91
+ )
92
+ return client
20
93
 
21
94
 
22
95
  def get_google_llm(
@@ -139,6 +212,17 @@ def get_llm(
139
212
  return get_openrouter_llm(llm.model, temperature)
140
213
  elif llm.provider == "xai":
141
214
  return get_grok_llm(llm.model, temperature)
215
+ elif llm.provider == "minitap":
216
+ remote_tracing = False
217
+ if ctx.execution_setup:
218
+ remote_tracing = ctx.execution_setup.enable_remote_tracing
219
+ return get_minitap_llm(
220
+ trace_id=ctx.trace_id,
221
+ remote_tracing=remote_tracing,
222
+ model=llm.model,
223
+ temperature=temperature,
224
+ api_key=ctx.minitap_api_key,
225
+ )
142
226
  else:
143
227
  raise ValueError(f"Unsupported provider: {llm.provider}")
144
228
 
@@ -154,9 +238,9 @@ async def with_fallback(
154
238
  try:
155
239
  result = await main_call()
156
240
  if result is None and none_should_fallback:
157
- logger.warning("Main LLM inference returned None. Falling back...")
241
+ llm_logger.warning("Main LLM inference returned None. Falling back...")
158
242
  return await fallback_call()
159
243
  return result
160
244
  except Exception as e:
161
- logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
245
+ llm_logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
162
246
  return await fallback_call()
@@ -3,18 +3,15 @@ from langchain_core.tools import BaseTool
3
3
  from minitap.mobile_use.context import MobileUseContext
4
4
  from minitap.mobile_use.tools.mobile.back import back_wrapper
5
5
  from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
6
- from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
7
6
  from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
8
- from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
7
+ from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
9
8
  from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
10
9
  from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
11
10
  from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
12
11
  from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
13
- from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
14
12
  from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
15
13
  from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
16
14
  from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
17
- from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
18
15
  from minitap.mobile_use.tools.mobile.tap import tap_wrapper
19
16
  from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
20
17
  wait_for_animation_to_end_wrapper,
@@ -27,14 +24,11 @@ EXECUTOR_WRAPPERS_TOOLS = [
27
24
  tap_wrapper,
28
25
  long_press_on_wrapper,
29
26
  swipe_wrapper,
30
- take_screenshot_wrapper,
31
- copy_text_from_wrapper,
27
+ glimpse_screen_wrapper,
32
28
  input_text_wrapper,
33
29
  erase_one_char_wrapper,
34
- find_packages_wrapper,
35
30
  launch_app_wrapper,
36
31
  stop_app_wrapper,
37
- paste_text_wrapper,
38
32
  clear_text_wrapper,
39
33
  press_key_wrapper,
40
34
  wait_for_animation_to_end_wrapper,
@@ -13,11 +13,11 @@ from langgraph.prebuilt import InjectedState
13
13
 
14
14
  def get_back_tool(ctx: MobileUseContext):
15
15
  @tool
16
- def back(
16
+ async def back(
17
17
  tool_call_id: Annotated[str, InjectedToolCallId],
18
18
  state: Annotated[State, InjectedState],
19
19
  agent_thought: str,
20
- ):
20
+ ) -> Command:
21
21
  """Navigates to the previous screen. (Only works on Android for the moment)"""
22
22
  output = back_controller(ctx=ctx)
23
23
  has_failed = output is not None
@@ -28,7 +28,7 @@ def get_back_tool(ctx: MobileUseContext):
28
28
  status="error" if has_failed else "success",
29
29
  )
30
30
  return Command(
31
- update=state.sanitize_update(
31
+ update=await state.asanitize_update(
32
32
  ctx=ctx,
33
33
  update={
34
34
  "agents_thoughts": [agent_thought],
@@ -12,15 +12,11 @@ from minitap.mobile_use.context import MobileUseContext
12
12
  from minitap.mobile_use.controllers.mobile_command_controller import (
13
13
  erase_text as erase_text_controller,
14
14
  )
15
- from minitap.mobile_use.controllers.mobile_command_controller import (
16
- get_screen_data,
17
- )
15
+ from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
18
16
  from minitap.mobile_use.graph.state import State
19
17
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
20
- from minitap.mobile_use.tools.utils import (
21
- focus_element_if_needed,
22
- move_cursor_to_end_if_bounds,
23
- )
18
+ from minitap.mobile_use.tools.types import Target
19
+ from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
24
20
  from minitap.mobile_use.utils.logger import get_logger
25
21
  from minitap.mobile_use.utils.ui_hierarchy import (
26
22
  find_element_by_resource_id,
@@ -50,16 +46,20 @@ class TextClearer:
50
46
  screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
51
47
  self.state.latest_ui_hierarchy = screen_data.elements
52
48
 
53
- def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
49
+ def _get_element_info(
50
+ self, resource_id: str | None
51
+ ) -> tuple[object | None, str | None, str | None]:
54
52
  if not self.state.latest_ui_hierarchy:
55
53
  self._refresh_ui_hierarchy()
56
54
 
57
55
  if not self.state.latest_ui_hierarchy:
58
56
  return None, None, None
59
57
 
60
- element = find_element_by_resource_id(
61
- ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
62
- )
58
+ element = None
59
+ if resource_id:
60
+ element = find_element_by_resource_id(
61
+ ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
62
+ )
63
63
 
64
64
  if not element:
65
65
  return None, None, None
@@ -83,11 +83,21 @@ class TextClearer:
83
83
  def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
84
84
  return current_text is not None and current_text != "" and current_text != hint_text
85
85
 
86
- def _prepare_element_for_clearing(self, resource_id: str) -> bool:
87
- if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
86
+ def _prepare_element_for_clearing(
87
+ self,
88
+ target: Target,
89
+ ) -> bool:
90
+ if not focus_element_if_needed(
91
+ ctx=self.ctx,
92
+ target=target,
93
+ ):
88
94
  return False
89
95
 
90
- move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
96
+ move_cursor_to_end_if_bounds(
97
+ ctx=self.ctx,
98
+ state=self.state,
99
+ target=target,
100
+ )
91
101
  return True
92
102
 
93
103
  def _erase_text_attempt(self, text_length: int) -> str | None:
@@ -102,7 +112,10 @@ class TextClearer:
102
112
  return None
103
113
 
104
114
  def _clear_with_retries(
105
- self, resource_id: str, initial_text: str, hint_text: str | None
115
+ self,
116
+ target: Target,
117
+ initial_text: str,
118
+ hint_text: str | None,
106
119
  ) -> tuple[bool, str | None, int]:
107
120
  current_text = initial_text
108
121
  erased_chars = 0
@@ -118,18 +131,23 @@ class TextClearer:
118
131
  erased_chars += chars_to_erase
119
132
 
120
133
  self._refresh_ui_hierarchy()
121
- elt = find_element_by_resource_id(
122
- ui_hierarchy=self.state.latest_ui_hierarchy or [],
123
- resource_id=resource_id,
124
- )
125
- if elt:
126
- current_text = get_element_text(elt)
127
- logger.info(f"Current text: {current_text}")
128
- if text_input_is_empty(text=current_text, hint_text=hint_text):
129
- break
134
+ elt = None
135
+ if target.resource_id:
136
+ elt = find_element_by_resource_id(
137
+ ui_hierarchy=self.state.latest_ui_hierarchy or [],
138
+ resource_id=target.resource_id,
139
+ )
140
+ if elt:
141
+ current_text = get_element_text(elt)
142
+ logger.info(f"Current text: {current_text}")
143
+ if text_input_is_empty(text=current_text, hint_text=hint_text):
144
+ break
130
145
 
131
146
  move_cursor_to_end_if_bounds(
132
- ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
147
+ ctx=self.ctx,
148
+ state=self.state,
149
+ target=target,
150
+ elt=elt,
133
151
  )
134
152
 
135
153
  return True, current_text, erased_chars
@@ -162,7 +180,9 @@ class TextClearer:
162
180
  hint_text=hint_text,
163
181
  )
164
182
 
165
- def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
183
+ def _handle_element_not_found(
184
+ self, resource_id: str | None, hint_text: str | None
185
+ ) -> ClearTextResult:
166
186
  error = erase_text_controller(ctx=self.ctx)
167
187
  self._refresh_ui_hierarchy()
168
188
 
@@ -176,16 +196,23 @@ class TextClearer:
176
196
  hint_text=hint_text,
177
197
  )
178
198
 
179
- def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
180
- element, current_text, hint_text = self._get_element_info(resource_id)
199
+ def clear_input_text(
200
+ self,
201
+ target: Target,
202
+ ) -> ClearTextResult:
203
+ element, current_text, hint_text = self._get_element_info(
204
+ resource_id=target.resource_id,
205
+ )
181
206
 
182
207
  if not element:
183
- return self._handle_element_not_found(resource_id, hint_text)
208
+ return self._handle_element_not_found(target.resource_id, hint_text)
184
209
 
185
210
  if not self._should_clear_text(current_text, hint_text):
186
211
  return self._handle_no_clearing_needed(current_text, hint_text)
187
212
 
188
- if not self._prepare_element_for_clearing(resource_id):
213
+ if not self._prepare_element_for_clearing(
214
+ target=target,
215
+ ):
189
216
  return self._create_result(
190
217
  success=False,
191
218
  error_message="Failed to focus element",
@@ -195,7 +222,7 @@ class TextClearer:
195
222
  )
196
223
 
197
224
  success, final_text, chars_erased = self._clear_with_retries(
198
- resource_id=resource_id,
225
+ target=target,
199
226
  initial_text=current_text or "",
200
227
  hint_text=hint_text,
201
228
  )
@@ -213,19 +240,21 @@ class TextClearer:
213
240
 
214
241
  def get_clear_text_tool(ctx: MobileUseContext):
215
242
  @tool
216
- def clear_text(
243
+ async def clear_text(
217
244
  tool_call_id: Annotated[str, InjectedToolCallId],
218
245
  state: Annotated[State, InjectedState],
219
246
  agent_thought: str,
220
- text_input_resource_id: str,
247
+ target: Target,
221
248
  ):
222
249
  """
223
250
  Clears all the text from the text field, by focusing it if needed.
224
251
  """
225
252
  clearer = TextClearer(ctx, state)
226
- result = clearer.clear_text_by_resource_id(text_input_resource_id)
253
+ result = clearer.clear_input_text(
254
+ target=target,
255
+ )
227
256
 
228
- content = (
257
+ agent_outcome = (
229
258
  clear_text_wrapper.on_failure_fn(result.error_message)
230
259
  if not result.success
231
260
  else clear_text_wrapper.on_success_fn(
@@ -235,16 +264,16 @@ def get_clear_text_tool(ctx: MobileUseContext):
235
264
 
236
265
  tool_message = ToolMessage(
237
266
  tool_call_id=tool_call_id,
238
- content=content,
267
+ content=agent_outcome,
239
268
  additional_kwargs={"error": result.error_message} if not result.success else {},
240
269
  status="error" if not result.success else "success",
241
270
  )
242
271
 
243
272
  return Command(
244
- update=state.sanitize_update(
273
+ update=await state.asanitize_update(
245
274
  ctx=ctx,
246
275
  update={
247
- "agents_thoughts": [agent_thought],
276
+ "agents_thoughts": [agent_thought, agent_outcome],
248
277
  EXECUTOR_MESSAGES_KEY: [tool_message],
249
278
  },
250
279
  agent="executor",
@@ -1,9 +1,10 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
6
- from typing import Annotated
7
8
 
8
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
9
10
  from minitap.mobile_use.context import MobileUseContext
@@ -16,11 +17,11 @@ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
16
17
 
17
18
  def get_erase_one_char_tool(ctx: MobileUseContext):
18
19
  @tool
19
- def erase_one_char(
20
+ async def erase_one_char(
20
21
  tool_call_id: Annotated[str, InjectedToolCallId],
21
22
  state: Annotated[State, InjectedState],
22
23
  agent_thought: str,
23
- ):
24
+ ) -> Command:
24
25
  """
25
26
  Erase one character from a text area.
26
27
  It acts the same as pressing backspace a single time.
@@ -36,7 +37,7 @@ def get_erase_one_char_tool(ctx: MobileUseContext):
36
37
  status="error" if has_failed else "success",
37
38
  )
38
39
  return Command(
39
- update=state.sanitize_update(
40
+ update=await state.asanitize_update(
40
41
  ctx=ctx,
41
42
  update={
42
43
  "agents_thoughts": [agent_thought],
@@ -1,8 +1,11 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
11
  from minitap.mobile_use.controllers.mobile_command_controller import (
@@ -11,18 +14,18 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
11
14
  from minitap.mobile_use.graph.state import State
12
15
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
13
16
  from minitap.mobile_use.utils.media import compress_base64_jpeg
14
- from typing import Annotated
15
17
 
16
18
 
17
- def get_take_screenshot_tool(ctx: MobileUseContext):
19
+ def get_glimpse_screen_tool(ctx: MobileUseContext):
18
20
  @tool
19
- def take_screenshot(
21
+ async def glimpse_screen(
20
22
  tool_call_id: Annotated[str, InjectedToolCallId],
21
23
  state: Annotated[State, InjectedState],
22
24
  agent_thought: str,
23
- ):
25
+ ) -> Command:
24
26
  """
25
- Take a screenshot of the device.
27
+ Captures the current screen as an image.
28
+ The resulting screenshot is added to the context for the next reasoning step.
26
29
  """
27
30
  compressed_image_base64 = None
28
31
  has_failed = False
@@ -34,33 +37,38 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
34
37
  output = str(e)
35
38
  has_failed = True
36
39
 
40
+ agent_outcome = (
41
+ glimpse_screen_wrapper.on_failure_fn()
42
+ if has_failed
43
+ else glimpse_screen_wrapper.on_success_fn()
44
+ )
45
+
37
46
  tool_message = ToolMessage(
38
47
  tool_call_id=tool_call_id,
39
- content=take_screenshot_wrapper.on_failure_fn()
40
- if has_failed
41
- else take_screenshot_wrapper.on_success_fn(),
48
+ content=agent_outcome,
42
49
  additional_kwargs={"error": output} if has_failed else {},
43
50
  status="error" if has_failed else "success",
44
51
  )
45
52
  updates = {
46
- "agents_thoughts": [agent_thought],
53
+ "agents_thoughts": [agent_thought, agent_outcome],
47
54
  EXECUTOR_MESSAGES_KEY: [tool_message],
48
55
  }
49
56
  if compressed_image_base64:
50
57
  updates["latest_screenshot_base64"] = compressed_image_base64
51
58
  return Command(
52
- update=state.sanitize_update(
59
+ update=await state.asanitize_update(
53
60
  ctx=ctx,
54
61
  update=updates,
55
62
  agent="executor",
56
63
  ),
57
64
  )
58
65
 
59
- return take_screenshot
66
+ return glimpse_screen
60
67
 
61
68
 
62
- take_screenshot_wrapper = ToolWrapper(
63
- tool_fn_getter=get_take_screenshot_tool,
64
- on_success_fn=lambda: "Screenshot taken successfully.",
65
- on_failure_fn=lambda: "Failed to take screenshot.",
69
+ glimpse_screen_wrapper = ToolWrapper(
70
+ tool_fn_getter=get_glimpse_screen_tool,
71
+ on_success_fn=lambda: "Visual context captured successfully."
72
+ + "It is now available for immediate analysis.",
73
+ on_failure_fn=lambda: "Failed to capture visual context.",
66
74
  )