PyPI - minitap-mobile-use - Versions diffs - 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl - Mend

minitap-mobile-use 2.2.0py3-none-any.whl → 2.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (59) hide show

minitap/mobile_use/agents/contextor/contextor.py +6 -4
minitap/mobile_use/agents/cortex/cortex.md +114 -27
minitap/mobile_use/agents/cortex/cortex.py +8 -5
minitap/mobile_use/agents/executor/executor.md +15 -10
minitap/mobile_use/agents/executor/executor.py +6 -5
minitap/mobile_use/agents/executor/utils.py +2 -1
minitap/mobile_use/agents/hopper/hopper.py +6 -3
minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
minitap/mobile_use/agents/outputter/outputter.py +6 -3
minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
minitap/mobile_use/agents/planner/planner.md +20 -22
minitap/mobile_use/agents/planner/planner.py +10 -7
minitap/mobile_use/agents/planner/types.py +4 -2
minitap/mobile_use/agents/planner/utils.py +14 -0
minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
minitap/mobile_use/config.py +6 -1
minitap/mobile_use/context.py +13 -3
minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
minitap/mobile_use/graph/state.py +7 -3
minitap/mobile_use/sdk/agent.py +204 -29
minitap/mobile_use/sdk/examples/README.md +19 -1
minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
minitap/mobile_use/sdk/services/platform.py +244 -0
minitap/mobile_use/sdk/types/__init__.py +14 -14
minitap/mobile_use/sdk/types/exceptions.py +57 -0
minitap/mobile_use/sdk/types/platform.py +125 -0
minitap/mobile_use/sdk/types/task.py +60 -17
minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
minitap/mobile_use/servers/stop_servers.py +11 -12
minitap/mobile_use/servers/utils.py +6 -9
minitap/mobile_use/services/llm.py +89 -5
minitap/mobile_use/tools/index.py +2 -8
minitap/mobile_use/tools/mobile/back.py +3 -3
minitap/mobile_use/tools/mobile/clear_text.py +67 -38
minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
minitap/mobile_use/tools/mobile/input_text.py +67 -16
minitap/mobile_use/tools/mobile/launch_app.py +54 -22
minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
minitap/mobile_use/tools/mobile/open_link.py +15 -8
minitap/mobile_use/tools/mobile/press_key.py +15 -8
minitap/mobile_use/tools/mobile/stop_app.py +14 -8
minitap/mobile_use/tools/mobile/swipe.py +11 -5
minitap/mobile_use/tools/mobile/tap.py +103 -21
minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
minitap/mobile_use/tools/test_utils.py +377 -0
minitap/mobile_use/tools/types.py +35 -0
minitap/mobile_use/tools/utils.py +149 -39
minitap/mobile_use/utils/recorder.py +1 -1
minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
minitap/mobile_use/utils/ui_hierarchy.py +11 -4
{minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
minitap/mobile_use/tools/mobile/find_packages.py +0 -69
minitap/mobile_use/tools/mobile/paste_text.py +0 -62
minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
{minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/servers/stop_servers.py CHANGED Viewed

@@ -70,18 +70,17 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
         return False
-def check_service_health(port: int, service_name: str) -> bool:
+def check_service_running(port: int, service_name: str) -> bool:
     try:
         if port == server_settings.DEVICE_SCREEN_API_PORT:
-            response = requests.get(f"http://localhost:{port}/health", timeout=2)
+            requests.get(f"http://localhost:{port}/health", timeout=2)
         elif port == DEVICE_HARDWARE_BRIDGE_PORT:
-            response = requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
+            requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
         else:
             return False
-        if response.status_code == 200:
-            logger.debug(f"{service_name} is still responding on port {port}")
-            return True
+        logger.debug(f"{service_name} is still responding on port {port}")
+        return True
     except requests.exceptions.RequestException:
         pass
@@ -92,7 +91,7 @@ def stop_device_screen_api() -> bool:
     logger.info("Stopping Device Screen API...")
     api_port = server_settings.DEVICE_SCREEN_API_PORT
-    if not check_service_health(api_port, "Device Screen API"):
+    if not check_service_running(api_port, "Device Screen API"):
         logger.success("Device Screen API is not running")
         return True
@@ -109,7 +108,7 @@ def stop_device_screen_api() -> bool:
         logger.warning("No Device Screen API processes found, but service is still responding")
         # Still try to verify if service actually stops
         time.sleep(1)
-        if not check_service_health(api_port, "Device Screen API"):
+        if not check_service_running(api_port, "Device Screen API"):
             logger.success("Device Screen API stopped successfully (was orphaned)")
             return True
         return False
@@ -120,7 +119,7 @@ def stop_device_screen_api() -> bool:
     # Verify service is stopped
     time.sleep(1)
-    if check_service_health(api_port, "Device Screen API"):
+    if check_service_running(api_port, "Device Screen API"):
         logger.error("Device Screen API is still running after stop attempt")
         return False
@@ -131,7 +130,7 @@ def stop_device_screen_api() -> bool:
 def stop_device_hardware_bridge() -> bool:
     logger.info("Stopping Device Hardware Bridge...")
-    if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
+    if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
         logger.success("Device Hardware Bridge is not running")
         return True
@@ -145,7 +144,7 @@ def stop_device_hardware_bridge() -> bool:
         logger.warning("No Device Hardware Bridge processes found, but service is still responding")
         # Still try to verify if service actually stops
         time.sleep(1)
-        if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
+        if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
             logger.success("Device Hardware Bridge stopped successfully (was orphaned)")
             return True
         return False
@@ -154,7 +153,7 @@ def stop_device_hardware_bridge() -> bool:
         stop_process_gracefully(proc)
     time.sleep(1)
-    if check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
+    if check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
         logger.error("Device Hardware Bridge is still running after stop attempt")
         return False

minitap/mobile_use/servers/utils.py CHANGED Viewed

@@ -1,11 +1,8 @@
-import psutil
+import contextlib
+import socket
-def is_port_in_use(port: int):
-    for conn in psutil.net_connections():
-        if conn.status == psutil.CONN_LISTEN and conn.laddr:
-            if hasattr(conn.laddr, "port") and conn.laddr.port == port:
-                return True
-            elif isinstance(conn.laddr, tuple) and len(conn.laddr) >= 2 and conn.laddr[1] == port:
-                return True
-    return False
+def is_port_in_use(port: int, host: str = "127.0.0.1") -> bool:
+    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.connect_ex((host, port)) == 0

minitap/mobile_use/services/llm.py CHANGED Viewed

@@ -1,11 +1,13 @@
+import asyncio
 import logging
-from collections.abc import Awaitable, Callable
-from typing import Literal, TypeVar, overload
+from collections.abc import Awaitable, Callable, Coroutine
+from typing import Any, Literal, TypeVar, overload
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_google_vertexai import ChatVertexAI
 from langchain_openai import ChatOpenAI
+from pydantic import SecretStr
 from minitap.mobile_use.config import (
     AgentNode,
@@ -15,8 +17,79 @@ from minitap.mobile_use.config import (
     settings,
 )
 from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.utils.logger import get_logger
-logger = logging.getLogger(__name__)
+# Logger for internal messages (ex: fallback)
+llm_logger = logging.getLogger(__name__)
+# Logger for user messages
+user_messages_logger = get_logger(__name__)
+async def invoke_llm_with_timeout_message[T](
+    llm_call: Coroutine[Any, Any, T],
+    agent_name: str,
+    timeout_seconds: int = 10,
+) -> T:
+    """
+    Send a LLM call and display a timeout message if it takes too long.
+    Args:
+        llm_call: The coroutine of the LLM call to execute.
+        agent_name: The name of the agent making the call (for the message).
+        timeout_seconds: The delay in seconds before displaying the message.
+    Returns:
+        The result of the LLM call.
+    """
+    llm_task = asyncio.create_task(llm_call)
+    waiter_task = asyncio.create_task(asyncio.sleep(timeout_seconds))
+    done, _ = await asyncio.wait({llm_task, waiter_task}, return_when=asyncio.FIRST_COMPLETED)
+    if llm_task in done:
+        # The LLM call has finished before the timeout, cancel the timer
+        waiter_task.cancel()
+        return llm_task.result()
+    else:
+        # The timeout has been reached, display the message and wait for the call to finish
+        user_messages_logger.info("Waiting for LLM call response...")
+        return await llm_task
+def get_minitap_llm(
+    trace_id: str,
+    remote_tracing: bool = False,
+    model: str = "google/gemini-2.5-pro",
+    temperature: float | None = None,
+    max_retries: int | None = None,
+    api_key: str | None = None,
+) -> ChatOpenAI:
+    if api_key:
+        effective_api_key = SecretStr(api_key)
+    elif settings.MINITAP_API_KEY:
+        effective_api_key = settings.MINITAP_API_KEY
+    else:
+        raise ValueError("MINITAP_API_KEY must be provided or set in environment")
+    if settings.MINITAP_API_BASE_URL is None:
+        raise ValueError("MINITAP_API_BASE_URL must be set in environment")
+    llm_base_url = f"{settings.MINITAP_API_BASE_URL}/api/v1"
+    if max_retries is None and model.startswith("google/"):
+        max_retries = 2
+    client = ChatOpenAI(
+        model=model,
+        temperature=temperature,
+        max_retries=max_retries,
+        api_key=effective_api_key,
+        base_url=llm_base_url,
+        default_query={
+            "sessionId": trace_id,
+            "traceOnlyUsage": remote_tracing,
+        },
+    )
+    return client
 def get_google_llm(
@@ -139,6 +212,17 @@ def get_llm(
         return get_openrouter_llm(llm.model, temperature)
     elif llm.provider == "xai":
         return get_grok_llm(llm.model, temperature)
+    elif llm.provider == "minitap":
+        remote_tracing = False
+        if ctx.execution_setup:
+            remote_tracing = ctx.execution_setup.enable_remote_tracing
+        return get_minitap_llm(
+            trace_id=ctx.trace_id,
+            remote_tracing=remote_tracing,
+            model=llm.model,
+            temperature=temperature,
+            api_key=ctx.minitap_api_key,
+        )
     else:
         raise ValueError(f"Unsupported provider: {llm.provider}")
@@ -154,9 +238,9 @@ async def with_fallback(
     try:
         result = await main_call()
         if result is None and none_should_fallback:
-            logger.warning("Main LLM inference returned None. Falling back...")
+            llm_logger.warning("Main LLM inference returned None. Falling back...")
             return await fallback_call()
         return result
     except Exception as e:
-        logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
+        llm_logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
         return await fallback_call()

minitap/mobile_use/tools/index.py CHANGED Viewed

@@ -3,18 +3,15 @@ from langchain_core.tools import BaseTool
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.tools.mobile.back import back_wrapper
 from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
-from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
 from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
-from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
+from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
 from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
 from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
 from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
 from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
-from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
 from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
 from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
 from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
-from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
 from minitap.mobile_use.tools.mobile.tap import tap_wrapper
 from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
     wait_for_animation_to_end_wrapper,
@@ -27,14 +24,11 @@ EXECUTOR_WRAPPERS_TOOLS = [
     tap_wrapper,
     long_press_on_wrapper,
     swipe_wrapper,
-    take_screenshot_wrapper,
-    copy_text_from_wrapper,
+    glimpse_screen_wrapper,
     input_text_wrapper,
     erase_one_char_wrapper,
-    find_packages_wrapper,
     launch_app_wrapper,
     stop_app_wrapper,
-    paste_text_wrapper,
     clear_text_wrapper,
     press_key_wrapper,
     wait_for_animation_to_end_wrapper,

minitap/mobile_use/tools/mobile/back.py CHANGED Viewed

@@ -13,11 +13,11 @@ from langgraph.prebuilt import InjectedState
 def get_back_tool(ctx: MobileUseContext):
     @tool
-    def back(
+    async def back(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-    ):
+    ) -> Command:
         """Navigates to the previous screen. (Only works on Android for the moment)"""
         output = back_controller(ctx=ctx)
         has_failed = output is not None
@@ -28,7 +28,7 @@ def get_back_tool(ctx: MobileUseContext):
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
                     "agents_thoughts": [agent_thought],

minitap/mobile_use/tools/mobile/clear_text.py CHANGED Viewed

@@ -12,15 +12,11 @@ from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import (
     erase_text as erase_text_controller,
 )
-from minitap.mobile_use.controllers.mobile_command_controller import (
-    get_screen_data,
-)
+from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from minitap.mobile_use.tools.utils import (
-    focus_element_if_needed,
-    move_cursor_to_end_if_bounds,
-)
+from minitap.mobile_use.tools.types import Target
+from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
 from minitap.mobile_use.utils.logger import get_logger
 from minitap.mobile_use.utils.ui_hierarchy import (
     find_element_by_resource_id,
@@ -50,16 +46,20 @@ class TextClearer:
         screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
         self.state.latest_ui_hierarchy = screen_data.elements
-    def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
+    def _get_element_info(
+        self, resource_id: str | None
+    ) -> tuple[object | None, str | None, str | None]:
         if not self.state.latest_ui_hierarchy:
             self._refresh_ui_hierarchy()
         if not self.state.latest_ui_hierarchy:
             return None, None, None
-        element = find_element_by_resource_id(
-            ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
-        )
+        element = None
+        if resource_id:
+            element = find_element_by_resource_id(
+                ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
+            )
         if not element:
             return None, None, None
@@ -83,11 +83,21 @@ class TextClearer:
     def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
         return current_text is not None and current_text != "" and current_text != hint_text
-    def _prepare_element_for_clearing(self, resource_id: str) -> bool:
-        if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
+    def _prepare_element_for_clearing(
+        self,
+        target: Target,
+    ) -> bool:
+        if not focus_element_if_needed(
+            ctx=self.ctx,
+            target=target,
+        ):
             return False
-        move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
+        move_cursor_to_end_if_bounds(
+            ctx=self.ctx,
+            state=self.state,
+            target=target,
+        )
         return True
     def _erase_text_attempt(self, text_length: int) -> str | None:
@@ -102,7 +112,10 @@ class TextClearer:
         return None
     def _clear_with_retries(
-        self, resource_id: str, initial_text: str, hint_text: str | None
+        self,
+        target: Target,
+        initial_text: str,
+        hint_text: str | None,
     ) -> tuple[bool, str | None, int]:
         current_text = initial_text
         erased_chars = 0
@@ -118,18 +131,23 @@ class TextClearer:
             erased_chars += chars_to_erase
             self._refresh_ui_hierarchy()
-            elt = find_element_by_resource_id(
-                ui_hierarchy=self.state.latest_ui_hierarchy or [],
-                resource_id=resource_id,
-            )
-            if elt:
-                current_text = get_element_text(elt)
-                logger.info(f"Current text: {current_text}")
-                if text_input_is_empty(text=current_text, hint_text=hint_text):
-                    break
+            elt = None
+            if target.resource_id:
+                elt = find_element_by_resource_id(
+                    ui_hierarchy=self.state.latest_ui_hierarchy or [],
+                    resource_id=target.resource_id,
+                )
+                if elt:
+                    current_text = get_element_text(elt)
+                    logger.info(f"Current text: {current_text}")
+                    if text_input_is_empty(text=current_text, hint_text=hint_text):
+                        break
             move_cursor_to_end_if_bounds(
-                ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
+                ctx=self.ctx,
+                state=self.state,
+                target=target,
+                elt=elt,
             )
         return True, current_text, erased_chars
@@ -162,7 +180,9 @@ class TextClearer:
             hint_text=hint_text,
         )
-    def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
+    def _handle_element_not_found(
+        self, resource_id: str | None, hint_text: str | None
+    ) -> ClearTextResult:
         error = erase_text_controller(ctx=self.ctx)
         self._refresh_ui_hierarchy()
@@ -176,16 +196,23 @@ class TextClearer:
             hint_text=hint_text,
         )
-    def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
-        element, current_text, hint_text = self._get_element_info(resource_id)
+    def clear_input_text(
+        self,
+        target: Target,
+    ) -> ClearTextResult:
+        element, current_text, hint_text = self._get_element_info(
+            resource_id=target.resource_id,
+        )
         if not element:
-            return self._handle_element_not_found(resource_id, hint_text)
+            return self._handle_element_not_found(target.resource_id, hint_text)
         if not self._should_clear_text(current_text, hint_text):
             return self._handle_no_clearing_needed(current_text, hint_text)
-        if not self._prepare_element_for_clearing(resource_id):
+        if not self._prepare_element_for_clearing(
+            target=target,
+        ):
             return self._create_result(
                 success=False,
                 error_message="Failed to focus element",
@@ -195,7 +222,7 @@ class TextClearer:
             )
         success, final_text, chars_erased = self._clear_with_retries(
-            resource_id=resource_id,
+            target=target,
             initial_text=current_text or "",
             hint_text=hint_text,
         )
@@ -213,19 +240,21 @@ class TextClearer:
 def get_clear_text_tool(ctx: MobileUseContext):
     @tool
-    def clear_text(
+    async def clear_text(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-        text_input_resource_id: str,
+        target: Target,
     ):
         """
         Clears all the text from the text field, by focusing it if needed.
         """
         clearer = TextClearer(ctx, state)
-        result = clearer.clear_text_by_resource_id(text_input_resource_id)
+        result = clearer.clear_input_text(
+            target=target,
+        )
-        content = (
+        agent_outcome = (
             clear_text_wrapper.on_failure_fn(result.error_message)
             if not result.success
             else clear_text_wrapper.on_success_fn(
@@ -235,16 +264,16 @@ def get_clear_text_tool(ctx: MobileUseContext):
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=content,
+            content=agent_outcome,
             additional_kwargs={"error": result.error_message} if not result.success else {},
             status="error" if not result.success else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap/mobile_use/tools/mobile/erase_one_char.py CHANGED Viewed

@@ -1,9 +1,10 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
-from typing import Annotated
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
@@ -16,11 +17,11 @@ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
 def get_erase_one_char_tool(ctx: MobileUseContext):
     @tool
-    def erase_one_char(
+    async def erase_one_char(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-    ):
+    ) -> Command:
         """
         Erase one character from a text area.
         It acts the same as pressing backspace a single time.
@@ -36,7 +37,7 @@ def get_erase_one_char_tool(ctx: MobileUseContext):
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
                     "agents_thoughts": [agent_thought],

minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} RENAMED Viewed

@@ -1,8 +1,11 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import (
@@ -11,18 +14,18 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
 from minitap.mobile_use.utils.media import compress_base64_jpeg
-from typing import Annotated
-def get_take_screenshot_tool(ctx: MobileUseContext):
+def get_glimpse_screen_tool(ctx: MobileUseContext):
     @tool
-    def take_screenshot(
+    async def glimpse_screen(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-    ):
+    ) -> Command:
         """
-        Take a screenshot of the device.
+        Captures the current screen as an image.
+        The resulting screenshot is added to the context for the next reasoning step.
         """
         compressed_image_base64 = None
         has_failed = False
@@ -34,33 +37,38 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
             output = str(e)
             has_failed = True
+        agent_outcome = (
+            glimpse_screen_wrapper.on_failure_fn()
+            if has_failed
+            else glimpse_screen_wrapper.on_success_fn()
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=take_screenshot_wrapper.on_failure_fn()
-            if has_failed
-            else take_screenshot_wrapper.on_success_fn(),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         updates = {
-            "agents_thoughts": [agent_thought],
+            "agents_thoughts": [agent_thought, agent_outcome],
             EXECUTOR_MESSAGES_KEY: [tool_message],
         }
         if compressed_image_base64:
             updates["latest_screenshot_base64"] = compressed_image_base64
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update=updates,
                 agent="executor",
             ),
         )
-    return take_screenshot
+    return glimpse_screen
-take_screenshot_wrapper = ToolWrapper(
-    tool_fn_getter=get_take_screenshot_tool,
-    on_success_fn=lambda: "Screenshot taken successfully.",
-    on_failure_fn=lambda: "Failed to take screenshot.",
+glimpse_screen_wrapper = ToolWrapper(
+    tool_fn_getter=get_glimpse_screen_tool,
+    on_success_fn=lambda: "Visual context captured successfully."
+    + "It is now available for immediate analysis.",
+    on_failure_fn=lambda: "Failed to capture visual context.",
 )

minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.2.0py3-none-any.whl → 2.4.0py3-none-any.whl