PyPI - minitap-mobile-use - Versions diffs - 2.1.0__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

minitap-mobile-use 2.1.0py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (36) hide show

minitap/mobile_use/agents/contextor/contextor.py +4 -2
minitap/mobile_use/agents/cortex/cortex.md +72 -26
minitap/mobile_use/agents/cortex/cortex.py +1 -2
minitap/mobile_use/agents/executor/executor.md +6 -4
minitap/mobile_use/agents/executor/executor.py +3 -1
minitap/mobile_use/agents/executor/utils.py +2 -1
minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
minitap/mobile_use/agents/planner/planner.md +1 -1
minitap/mobile_use/agents/planner/planner.py +4 -2
minitap/mobile_use/config.py +16 -1
minitap/mobile_use/controllers/mobile_command_controller.py +4 -4
minitap/mobile_use/main.py +2 -2
minitap/mobile_use/sdk/agent.py +17 -8
minitap/mobile_use/sdk/builders/agent_config_builder.py +2 -2
minitap/mobile_use/sdk/types/exceptions.py +30 -0
minitap/mobile_use/sdk/utils.py +3 -2
minitap/mobile_use/servers/device_hardware_bridge.py +2 -1
minitap/mobile_use/servers/utils.py +6 -9
minitap/mobile_use/services/llm.py +23 -6
minitap/mobile_use/tools/index.py +21 -15
minitap/mobile_use/tools/mobile/clear_text.py +73 -25
minitap/mobile_use/tools/mobile/copy_text_from.py +7 -5
minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +15 -11
minitap/mobile_use/tools/mobile/input_text.py +94 -13
minitap/mobile_use/tools/mobile/paste_text.py +34 -8
minitap/mobile_use/tools/mobile/swipe.py +107 -9
minitap/mobile_use/tools/test_utils.py +351 -0
minitap/mobile_use/tools/tool_wrapper.py +5 -0
minitap/mobile_use/tools/utils.py +147 -40
minitap/mobile_use/utils/recorder.py +2 -9
minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
minitap/mobile_use/utils/ui_hierarchy.py +2 -2
{minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/METADATA +28 -8
{minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/RECORD +36 -34
{minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.1.0.dist-info → minitap_mobile_use-2.3.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/sdk/types/exceptions.py CHANGED Viewed

@@ -4,6 +4,8 @@ Exceptions for the Mobile-use SDK.
 This module defines the exception hierarchy used throughout the Mobile-use SDK.
 """
+from typing import Literal
 class MobileUseError(Exception):
     """Base exception class for all Mobile-use SDK exceptions."""
@@ -72,3 +74,31 @@ class AgentProfileNotFoundError(AgentTaskRequestError):
     def __init__(self, profile_name: str):
         super().__init__(f"Agent profile {profile_name} not found")
+EXECUTABLES = Literal["adb", "maestro", "xcrun", "cli_tools"]
+class ExecutableNotFoundError(MobileUseError):
+    """Exception raised when a required executable is not found."""
+    def __init__(self, executable_name: EXECUTABLES):
+        install_instructions: dict[EXECUTABLES, str] = {
+            "adb": "https://developer.android.com/tools/adb",
+            "maestro": "https://docs.maestro.dev/getting-started/installing-maestro",
+            "xcrun": "Install with: xcode-select --install",
+        }
+        if executable_name == "cli_tools":
+            message = (
+                "ADB or Xcode Command Line Tools not found in PATH. "
+                "At least one of them is required to run mobile-use "
+                "depending on the device platform you wish to run (Android: adb, iOS: xcrun)."
+                "Refer to the following links for installation instructions :"
+                f"\n- ADB: {install_instructions['adb']}"
+                f"\n- Xcode Command Line Tools: {install_instructions['xcrun']}"
+            )
+        else:
+            message = f"Required executable '{executable_name}' not found in PATH."
+            if executable_name in install_instructions:
+                message += f"\nTo install it, please visit: {install_instructions[executable_name]}"
+        super().__init__(message)

minitap/mobile_use/sdk/utils.py CHANGED Viewed

@@ -2,11 +2,11 @@ import os
 from pathlib import Path
 from pydantic import ValidationError
 from minitap.mobile_use.config import LLMConfig, deep_merge_llm_config, get_default_llm_config
 from minitap.mobile_use.utils.file import load_jsonc
 from minitap.mobile_use.utils.logger import get_logger
 logger = get_logger(__name__)
@@ -24,5 +24,6 @@ def load_llm_config_override(path: Path) -> LLMConfig:
     try:
         return deep_merge_llm_config(default_config, override_config_dict)
     except ValidationError as e:
-        logger.error(f"Invalid LLM config: {e}. Falling back to default config")
+        logger.error(f"Invalid LLM config: {e}")
+        logger.info("Falling back to default config")
         return default_config

minitap/mobile_use/servers/device_hardware_bridge.py CHANGED Viewed

@@ -6,6 +6,7 @@ import time
 from enum import Enum
 import requests
 from minitap.mobile_use.context import DevicePlatform
 from minitap.mobile_use.servers.utils import is_port_in_use
@@ -175,7 +176,7 @@ class DeviceHardwareBridge:
         ]
     def start(self):
-        if is_port_in_use(DEVICE_HARDWARE_BRIDGE_PORT):
+        if is_port_in_use(port=DEVICE_HARDWARE_BRIDGE_PORT):
             print("Maestro port already in use - assuming Maestro is running.")
             self.status = BridgeStatus.RUNNING
             return True

minitap/mobile_use/servers/utils.py CHANGED Viewed

@@ -1,11 +1,8 @@
-import psutil
+import contextlib
+import socket
-def is_port_in_use(port: int):
-    for conn in psutil.net_connections():
-        if conn.status == psutil.CONN_LISTEN and conn.laddr:
-            if hasattr(conn.laddr, "port") and conn.laddr.port == port:
-                return True
-            elif isinstance(conn.laddr, tuple) and len(conn.laddr) >= 2 and conn.laddr[1] == port:
-                return True
-    return False
+def is_port_in_use(port: int, host: str = "127.0.0.1") -> bool:
+    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.connect_ex((host, port)) == 0

minitap/mobile_use/services/llm.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import logging
-from typing import Literal, TypeVar
 from collections.abc import Awaitable, Callable
-from typing import overload
+from typing import Literal, TypeVar, overload
+from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_vertexai import ChatVertexAI
 from langchain_openai import ChatOpenAI
 from minitap.mobile_use.config import (
     AgentNode,
     AgentNodeWithFallback,
@@ -32,6 +34,19 @@ def get_google_llm(
     return client
+def get_vertex_llm(
+    model_name: str = "gemini-2.5-pro",
+    temperature: float = 0.7,
+) -> ChatVertexAI:
+    client = ChatVertexAI(
+        model_name=model_name,
+        max_tokens=None,
+        temperature=temperature,
+        max_retries=2,
+    )
+    return client
 def get_openai_llm(
     model_name: str = "o3",
     temperature: float = 1,
@@ -75,7 +90,7 @@ def get_llm(
     *,
     use_fallback: bool = False,
     temperature: float = 1,
-): ...
+) -> BaseChatModel: ...
 @overload
@@ -84,7 +99,7 @@ def get_llm(
     name: AgentNode,
     *,
     temperature: float = 1,
-): ...
+) -> BaseChatModel: ...
 @overload
@@ -94,7 +109,7 @@ def get_llm(
     *,
     is_utils: Literal[True],
     temperature: float = 1,
-): ...
+) -> BaseChatModel: ...
 def get_llm(
@@ -103,7 +118,7 @@ def get_llm(
     is_utils: bool = False,
     use_fallback: bool = False,
     temperature: float = 1,
-):
+) -> BaseChatModel:
     llm = (
         ctx.llm_config.get_utils(name)  # type: ignore
         if is_utils
@@ -118,6 +133,8 @@ def get_llm(
         return get_openai_llm(llm.model, temperature)
     elif llm.provider == "google":
         return get_google_llm(llm.model, temperature)
+    elif llm.provider == "vertexai":
+        return get_vertex_llm(llm.model, temperature)
     elif llm.provider == "openrouter":
         return get_openrouter_llm(llm.model, temperature)
     elif llm.provider == "xai":

minitap/mobile_use/tools/index.py CHANGED Viewed

@@ -6,6 +6,7 @@ from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
 from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
 from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
 from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
+from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
 from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
 from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
 from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
@@ -14,12 +15,11 @@ from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
 from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
 from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
 from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
-from minitap.mobile_use.tools.mobile.take_screenshot import take_screenshot_wrapper
 from minitap.mobile_use.tools.mobile.tap import tap_wrapper
 from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
     wait_for_animation_to_end_wrapper,
 )
-from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
 EXECUTOR_WRAPPERS_TOOLS = [
     back_wrapper,
@@ -27,7 +27,7 @@ EXECUTOR_WRAPPERS_TOOLS = [
     tap_wrapper,
     long_press_on_wrapper,
     swipe_wrapper,
-    take_screenshot_wrapper,
+    glimpse_screen_wrapper,
     copy_text_from_wrapper,
     input_text_wrapper,
     erase_one_char_wrapper,
@@ -41,18 +41,24 @@ EXECUTOR_WRAPPERS_TOOLS = [
 ]
-def get_tools_from_wrappers(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> list[BaseTool]:
-    """Get the tools from the wrappers."""
-    return [wrapper.tool_fn_getter(ctx) for wrapper in wrappers]
+def get_tools_from_wrappers(
+    ctx: "MobileUseContext",
+    wrappers: list[ToolWrapper],
+) -> list[BaseTool]:
+    tools: list[BaseTool] = []
+    for wrapper in wrappers:
+        if ctx.llm_config.get_agent("executor").provider == "vertexai":
+            # The main swipe tool argument structure is not supported by vertexai, we need to split
+            # this tool into multiple tools
+            if wrapper.tool_fn_getter == swipe_wrapper.tool_fn_getter and isinstance(
+                wrapper, CompositeToolWrapper
+            ):
+                tools.extend(wrapper.composite_tools_fn_getter(ctx))
+                continue
-def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
-    return "\n".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
+        tools.append(wrapper.tool_fn_getter(ctx))
+    return tools
-def get_tool_wrapper_from_name(name: str) -> ToolWrapper | None:
-    """Get the tool wrapper from the name."""
-    for wrapper in EXECUTOR_WRAPPERS_TOOLS:
-        if wrapper.tool_fn_getter.__name__ == f"get_{name}_tool":
-            return wrapper
-    return None
+def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
+    return ", ".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])

minitap/mobile_use/tools/mobile/clear_text.py CHANGED Viewed

@@ -23,6 +23,7 @@ from minitap.mobile_use.tools.utils import (
 )
 from minitap.mobile_use.utils.logger import get_logger
 from minitap.mobile_use.utils.ui_hierarchy import (
+    ElementBounds,
     find_element_by_resource_id,
     get_element_text,
     text_input_is_empty,
@@ -50,16 +51,20 @@ class TextClearer:
         screen_data = get_screen_data(screen_api_client=self.ctx.screen_api_client)
         self.state.latest_ui_hierarchy = screen_data.elements
-    def _get_element_info(self, resource_id: str) -> tuple[object | None, str | None, str | None]:
+    def _get_element_info(
+        self, resource_id: str | None
+    ) -> tuple[object | None, str | None, str | None]:
         if not self.state.latest_ui_hierarchy:
             self._refresh_ui_hierarchy()
         if not self.state.latest_ui_hierarchy:
             return None, None, None
-        element = find_element_by_resource_id(
-            ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
-        )
+        element = None
+        if resource_id:
+            element = find_element_by_resource_id(
+                ui_hierarchy=self.state.latest_ui_hierarchy, resource_id=resource_id
+            )
         if not element:
             return None, None, None
@@ -83,11 +88,27 @@ class TextClearer:
     def _should_clear_text(self, current_text: str | None, hint_text: str | None) -> bool:
         return current_text is not None and current_text != "" and current_text != hint_text
-    def _prepare_element_for_clearing(self, resource_id: str) -> bool:
-        if not focus_element_if_needed(ctx=self.ctx, resource_id=resource_id):
+    def _prepare_element_for_clearing(
+        self,
+        text_input_resource_id: str | None,
+        text_input_coordinates: ElementBounds | None,
+        text_input_text: str | None,
+    ) -> bool:
+        if not focus_element_if_needed(
+            ctx=self.ctx,
+            input_resource_id=text_input_resource_id,
+            input_coordinates=text_input_coordinates,
+            input_text=text_input_text,
+        ):
             return False
-        move_cursor_to_end_if_bounds(ctx=self.ctx, state=self.state, resource_id=resource_id)
+        move_cursor_to_end_if_bounds(
+            ctx=self.ctx,
+            state=self.state,
+            text_input_resource_id=text_input_resource_id,
+            text_input_coordinates=text_input_coordinates,
+            text_input_text=text_input_text,
+        )
         return True
     def _erase_text_attempt(self, text_length: int) -> str | None:
@@ -102,7 +123,12 @@ class TextClearer:
         return None
     def _clear_with_retries(
-        self, resource_id: str, initial_text: str, hint_text: str | None
+        self,
+        text_input_resource_id: str | None,
+        text_input_coordinates: ElementBounds | None,
+        text_input_text: str | None,
+        initial_text: str,
+        hint_text: str | None,
     ) -> tuple[bool, str | None, int]:
         current_text = initial_text
         erased_chars = 0
@@ -118,18 +144,25 @@ class TextClearer:
             erased_chars += chars_to_erase
             self._refresh_ui_hierarchy()
-            elt = find_element_by_resource_id(
-                ui_hierarchy=self.state.latest_ui_hierarchy or [],
-                resource_id=resource_id,
-            )
-            if elt:
-                current_text = get_element_text(elt)
-                logger.info(f"Current text: {current_text}")
-                if text_input_is_empty(text=current_text, hint_text=hint_text):
-                    break
+            elt = None
+            if text_input_resource_id:
+                elt = find_element_by_resource_id(
+                    ui_hierarchy=self.state.latest_ui_hierarchy or [],
+                    resource_id=text_input_resource_id,
+                )
+                if elt:
+                    current_text = get_element_text(elt)
+                    logger.info(f"Current text: {current_text}")
+                    if text_input_is_empty(text=current_text, hint_text=hint_text):
+                        break
             move_cursor_to_end_if_bounds(
-                ctx=self.ctx, state=self.state, resource_id=resource_id, elt=elt
+                ctx=self.ctx,
+                state=self.state,
+                text_input_resource_id=text_input_resource_id,
+                text_input_coordinates=text_input_coordinates,
+                text_input_text=text_input_text,
+                elt=elt,
             )
         return True, current_text, erased_chars
@@ -162,7 +195,9 @@ class TextClearer:
             hint_text=hint_text,
         )
-    def _handle_element_not_found(self, resource_id: str, hint_text: str | None) -> ClearTextResult:
+    def _handle_element_not_found(
+        self, resource_id: str | None, hint_text: str | None
+    ) -> ClearTextResult:
         error = erase_text_controller(ctx=self.ctx)
         self._refresh_ui_hierarchy()
@@ -176,16 +211,23 @@ class TextClearer:
             hint_text=hint_text,
         )
-    def clear_text_by_resource_id(self, resource_id: str) -> ClearTextResult:
-        element, current_text, hint_text = self._get_element_info(resource_id)
+    def clear_input_text(
+        self,
+        text_input_resource_id: str | None,
+        text_input_coordinates: ElementBounds | None,
+        text_input_text: str | None,
+    ) -> ClearTextResult:
+        element, current_text, hint_text = self._get_element_info(text_input_resource_id)
         if not element:
-            return self._handle_element_not_found(resource_id, hint_text)
+            return self._handle_element_not_found(text_input_resource_id, hint_text)
         if not self._should_clear_text(current_text, hint_text):
             return self._handle_no_clearing_needed(current_text, hint_text)
-        if not self._prepare_element_for_clearing(resource_id):
+        if not self._prepare_element_for_clearing(
+            text_input_resource_id, text_input_coordinates, text_input_text
+        ):
             return self._create_result(
                 success=False,
                 error_message="Failed to focus element",
@@ -195,7 +237,9 @@ class TextClearer:
             )
         success, final_text, chars_erased = self._clear_with_retries(
-            resource_id=resource_id,
+            text_input_resource_id=text_input_resource_id,
+            text_input_coordinates=text_input_coordinates,
+            text_input_text=text_input_text,
             initial_text=current_text or "",
             hint_text=hint_text,
         )
@@ -218,12 +262,16 @@ def get_clear_text_tool(ctx: MobileUseContext):
         state: Annotated[State, InjectedState],
         agent_thought: str,
         text_input_resource_id: str,
+        text_input_coordinates: ElementBounds | None,
+        text_input_text: str | None,
     ):
         """
         Clears all the text from the text field, by focusing it if needed.
         """
         clearer = TextClearer(ctx, state)
-        result = clearer.clear_text_by_resource_id(text_input_resource_id)
+        result = clearer.clear_input_text(
+            text_input_resource_id, text_input_coordinates, text_input_text
+        )
         content = (
             clear_text_wrapper.on_failure_fn(result.error_message)

minitap/mobile_use/tools/mobile/copy_text_from.py CHANGED Viewed

@@ -1,18 +1,20 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
+from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
+from pydantic import Field
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
 from minitap.mobile_use.controllers.mobile_command_controller import (
     copy_text_from as copy_text_from_controller,
 )
-from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from pydantic import Field
-from typing import Annotated
-from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.graph.state import State
-from langgraph.prebuilt import InjectedState
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
 def get_copy_text_from_tool(ctx: MobileUseContext):

minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} RENAMED Viewed

@@ -1,8 +1,11 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import (
@@ -11,18 +14,18 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
 from minitap.mobile_use.utils.media import compress_base64_jpeg
-from typing import Annotated
-def get_take_screenshot_tool(ctx: MobileUseContext):
+def get_glimpse_screen_tool(ctx: MobileUseContext):
     @tool
-    def take_screenshot(
+    def glimpse_screen(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
     ):
         """
-        Take a screenshot of the device.
+        Captures the current screen as an image.
+        The resulting screenshot is added to the context for the next reasoning step.
         """
         compressed_image_base64 = None
         has_failed = False
@@ -36,9 +39,9 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=take_screenshot_wrapper.on_failure_fn()
+            content=glimpse_screen_wrapper.on_failure_fn()
             if has_failed
-            else take_screenshot_wrapper.on_success_fn(),
+            else glimpse_screen_wrapper.on_success_fn(),
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
@@ -56,11 +59,12 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
             ),
         )
-    return take_screenshot
+    return glimpse_screen
-take_screenshot_wrapper = ToolWrapper(
-    tool_fn_getter=get_take_screenshot_tool,
-    on_success_fn=lambda: "Screenshot taken successfully.",
-    on_failure_fn=lambda: "Failed to take screenshot.",
+glimpse_screen_wrapper = ToolWrapper(
+    tool_fn_getter=get_glimpse_screen_tool,
+    on_success_fn=lambda: "Visual context captured successfully."
+    + "It is now available for immediate analysis.",
+    on_failure_fn=lambda: "Failed to capture visual context.",
 )

minitap-mobile-use 2.1.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.1.0py3-none-any.whl → 2.3.0py3-none-any.whl