PyPI - minitap-mobile-use - Versions diffs - 2.3.0__py3-none-any.whl → 2.5.0__py3-none-any.whl - Mend

minitap-mobile-use 2.3.0py3-none-any.whl → 2.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (56) hide show

minitap/mobile_use/agents/contextor/contextor.py +2 -2
minitap/mobile_use/agents/cortex/cortex.md +49 -8
minitap/mobile_use/agents/cortex/cortex.py +8 -4
minitap/mobile_use/agents/executor/executor.md +14 -11
minitap/mobile_use/agents/executor/executor.py +6 -5
minitap/mobile_use/agents/hopper/hopper.py +6 -3
minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
minitap/mobile_use/agents/outputter/outputter.py +6 -3
minitap/mobile_use/agents/planner/planner.md +20 -22
minitap/mobile_use/agents/planner/planner.py +10 -7
minitap/mobile_use/agents/planner/types.py +4 -2
minitap/mobile_use/agents/planner/utils.py +14 -0
minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
minitap/mobile_use/config.py +6 -1
minitap/mobile_use/context.py +13 -3
minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
minitap/mobile_use/graph/state.py +7 -3
minitap/mobile_use/sdk/agent.py +188 -23
minitap/mobile_use/sdk/examples/README.md +19 -1
minitap/mobile_use/sdk/examples/platform_manual_task_example.py +65 -0
minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
minitap/mobile_use/sdk/services/platform.py +307 -0
minitap/mobile_use/sdk/types/__init__.py +16 -14
minitap/mobile_use/sdk/types/exceptions.py +27 -0
minitap/mobile_use/sdk/types/platform.py +127 -0
minitap/mobile_use/sdk/types/task.py +78 -17
minitap/mobile_use/servers/device_hardware_bridge.py +1 -1
minitap/mobile_use/servers/stop_servers.py +11 -12
minitap/mobile_use/services/llm.py +89 -5
minitap/mobile_use/tools/index.py +0 -6
minitap/mobile_use/tools/mobile/back.py +3 -3
minitap/mobile_use/tools/mobile/clear_text.py +24 -43
minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
minitap/mobile_use/tools/mobile/glimpse_screen.py +11 -7
minitap/mobile_use/tools/mobile/input_text.py +21 -51
minitap/mobile_use/tools/mobile/launch_app.py +54 -22
minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
minitap/mobile_use/tools/mobile/open_link.py +15 -8
minitap/mobile_use/tools/mobile/press_key.py +15 -8
minitap/mobile_use/tools/mobile/stop_app.py +14 -8
minitap/mobile_use/tools/mobile/swipe.py +11 -5
minitap/mobile_use/tools/mobile/tap.py +103 -21
minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
minitap/mobile_use/tools/test_utils.py +104 -78
minitap/mobile_use/tools/types.py +35 -0
minitap/mobile_use/tools/utils.py +51 -48
minitap/mobile_use/utils/recorder.py +1 -1
minitap/mobile_use/utils/ui_hierarchy.py +9 -2
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/METADATA +3 -1
minitap_mobile_use-2.5.0.dist-info/RECORD +100 -0
minitap/mobile_use/tools/mobile/copy_text_from.py +0 -75
minitap/mobile_use/tools/mobile/find_packages.py +0 -69
minitap/mobile_use/tools/mobile/paste_text.py +0 -88
minitap_mobile_use-2.3.0.dist-info/RECORD +0 -98
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/servers/stop_servers.py CHANGED Viewed

@@ -70,18 +70,17 @@ def stop_process_gracefully(process: psutil.Process, timeout: int = 5) -> bool:
         return False
-def check_service_health(port: int, service_name: str) -> bool:
+def check_service_running(port: int, service_name: str) -> bool:
     try:
         if port == server_settings.DEVICE_SCREEN_API_PORT:
-            response = requests.get(f"http://localhost:{port}/health", timeout=2)
+            requests.get(f"http://localhost:{port}/health", timeout=2)
         elif port == DEVICE_HARDWARE_BRIDGE_PORT:
-            response = requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
+            requests.get(f"http://localhost:{port}/api/banner-message", timeout=2)
         else:
             return False
-        if response.status_code == 200:
-            logger.debug(f"{service_name} is still responding on port {port}")
-            return True
+        logger.debug(f"{service_name} is still responding on port {port}")
+        return True
     except requests.exceptions.RequestException:
         pass
@@ -92,7 +91,7 @@ def stop_device_screen_api() -> bool:
     logger.info("Stopping Device Screen API...")
     api_port = server_settings.DEVICE_SCREEN_API_PORT
-    if not check_service_health(api_port, "Device Screen API"):
+    if not check_service_running(api_port, "Device Screen API"):
         logger.success("Device Screen API is not running")
         return True
@@ -109,7 +108,7 @@ def stop_device_screen_api() -> bool:
         logger.warning("No Device Screen API processes found, but service is still responding")
         # Still try to verify if service actually stops
         time.sleep(1)
-        if not check_service_health(api_port, "Device Screen API"):
+        if not check_service_running(api_port, "Device Screen API"):
             logger.success("Device Screen API stopped successfully (was orphaned)")
             return True
         return False
@@ -120,7 +119,7 @@ def stop_device_screen_api() -> bool:
     # Verify service is stopped
     time.sleep(1)
-    if check_service_health(api_port, "Device Screen API"):
+    if check_service_running(api_port, "Device Screen API"):
         logger.error("Device Screen API is still running after stop attempt")
         return False
@@ -131,7 +130,7 @@ def stop_device_screen_api() -> bool:
 def stop_device_hardware_bridge() -> bool:
     logger.info("Stopping Device Hardware Bridge...")
-    if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
+    if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
         logger.success("Device Hardware Bridge is not running")
         return True
@@ -145,7 +144,7 @@ def stop_device_hardware_bridge() -> bool:
         logger.warning("No Device Hardware Bridge processes found, but service is still responding")
         # Still try to verify if service actually stops
         time.sleep(1)
-        if not check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
+        if not check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
             logger.success("Device Hardware Bridge stopped successfully (was orphaned)")
             return True
         return False
@@ -154,7 +153,7 @@ def stop_device_hardware_bridge() -> bool:
         stop_process_gracefully(proc)
     time.sleep(1)
-    if check_service_health(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
+    if check_service_running(DEVICE_HARDWARE_BRIDGE_PORT, "Maestro Studio"):
         logger.error("Device Hardware Bridge is still running after stop attempt")
         return False

minitap/mobile_use/services/llm.py CHANGED Viewed

@@ -1,11 +1,13 @@
+import asyncio
 import logging
-from collections.abc import Awaitable, Callable
-from typing import Literal, TypeVar, overload
+from collections.abc import Awaitable, Callable, Coroutine
+from typing import Any, Literal, TypeVar, overload
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_google_vertexai import ChatVertexAI
 from langchain_openai import ChatOpenAI
+from pydantic import SecretStr
 from minitap.mobile_use.config import (
     AgentNode,
@@ -15,8 +17,79 @@ from minitap.mobile_use.config import (
     settings,
 )
 from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.utils.logger import get_logger
-logger = logging.getLogger(__name__)
+# Logger for internal messages (ex: fallback)
+llm_logger = logging.getLogger(__name__)
+# Logger for user messages
+user_messages_logger = get_logger(__name__)
+async def invoke_llm_with_timeout_message[T](
+    llm_call: Coroutine[Any, Any, T],
+    agent_name: str,
+    timeout_seconds: int = 10,
+) -> T:
+    """
+    Send a LLM call and display a timeout message if it takes too long.
+    Args:
+        llm_call: The coroutine of the LLM call to execute.
+        agent_name: The name of the agent making the call (for the message).
+        timeout_seconds: The delay in seconds before displaying the message.
+    Returns:
+        The result of the LLM call.
+    """
+    llm_task = asyncio.create_task(llm_call)
+    waiter_task = asyncio.create_task(asyncio.sleep(timeout_seconds))
+    done, _ = await asyncio.wait({llm_task, waiter_task}, return_when=asyncio.FIRST_COMPLETED)
+    if llm_task in done:
+        # The LLM call has finished before the timeout, cancel the timer
+        waiter_task.cancel()
+        return llm_task.result()
+    else:
+        # The timeout has been reached, display the message and wait for the call to finish
+        user_messages_logger.info("Waiting for LLM call response...")
+        return await llm_task
+def get_minitap_llm(
+    trace_id: str,
+    remote_tracing: bool = False,
+    model: str = "google/gemini-2.5-pro",
+    temperature: float | None = None,
+    max_retries: int | None = None,
+    api_key: str | None = None,
+) -> ChatOpenAI:
+    if api_key:
+        effective_api_key = SecretStr(api_key)
+    elif settings.MINITAP_API_KEY:
+        effective_api_key = settings.MINITAP_API_KEY
+    else:
+        raise ValueError("MINITAP_API_KEY must be provided or set in environment")
+    if settings.MINITAP_API_BASE_URL is None:
+        raise ValueError("MINITAP_API_BASE_URL must be set in environment")
+    llm_base_url = f"{settings.MINITAP_API_BASE_URL}/api/v1"
+    if max_retries is None and model.startswith("google/"):
+        max_retries = 2
+    client = ChatOpenAI(
+        model=model,
+        temperature=temperature,
+        max_retries=max_retries,
+        api_key=effective_api_key,
+        base_url=llm_base_url,
+        default_query={
+            "sessionId": trace_id,
+            "traceOnlyUsage": remote_tracing,
+        },
+    )
+    return client
 def get_google_llm(
@@ -139,6 +212,17 @@ def get_llm(
         return get_openrouter_llm(llm.model, temperature)
     elif llm.provider == "xai":
         return get_grok_llm(llm.model, temperature)
+    elif llm.provider == "minitap":
+        remote_tracing = False
+        if ctx.execution_setup:
+            remote_tracing = ctx.execution_setup.enable_remote_tracing
+        return get_minitap_llm(
+            trace_id=ctx.trace_id,
+            remote_tracing=remote_tracing,
+            model=llm.model,
+            temperature=temperature,
+            api_key=ctx.minitap_api_key,
+        )
     else:
         raise ValueError(f"Unsupported provider: {llm.provider}")
@@ -154,9 +238,9 @@ async def with_fallback(
     try:
         result = await main_call()
         if result is None and none_should_fallback:
-            logger.warning("Main LLM inference returned None. Falling back...")
+            llm_logger.warning("Main LLM inference returned None. Falling back...")
             return await fallback_call()
         return result
     except Exception as e:
-        logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
+        llm_logger.warning(f"❗ Main LLM inference failed: {e}. Falling back...")
         return await fallback_call()

minitap/mobile_use/tools/index.py CHANGED Viewed

@@ -3,15 +3,12 @@ from langchain_core.tools import BaseTool
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.tools.mobile.back import back_wrapper
 from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
-from minitap.mobile_use.tools.mobile.copy_text_from import copy_text_from_wrapper
 from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
-from minitap.mobile_use.tools.mobile.find_packages import find_packages_wrapper
 from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
 from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
 from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
 from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
 from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
-from minitap.mobile_use.tools.mobile.paste_text import paste_text_wrapper
 from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
 from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
 from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
@@ -28,13 +25,10 @@ EXECUTOR_WRAPPERS_TOOLS = [
     long_press_on_wrapper,
     swipe_wrapper,
     glimpse_screen_wrapper,
-    copy_text_from_wrapper,
     input_text_wrapper,
     erase_one_char_wrapper,
-    find_packages_wrapper,
     launch_app_wrapper,
     stop_app_wrapper,
-    paste_text_wrapper,
     clear_text_wrapper,
     press_key_wrapper,
     wait_for_animation_to_end_wrapper,

minitap/mobile_use/tools/mobile/back.py CHANGED Viewed

@@ -13,11 +13,11 @@ from langgraph.prebuilt import InjectedState
 def get_back_tool(ctx: MobileUseContext):
     @tool
-    def back(
+    async def back(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-    ):
+    ) -> Command:
         """Navigates to the previous screen. (Only works on Android for the moment)"""
         output = back_controller(ctx=ctx)
         has_failed = output is not None
@@ -28,7 +28,7 @@ def get_back_tool(ctx: MobileUseContext):
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
                     "agents_thoughts": [agent_thought],

minitap/mobile_use/tools/mobile/clear_text.py CHANGED Viewed

@@ -12,18 +12,13 @@ from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import (
     erase_text as erase_text_controller,
 )
-from minitap.mobile_use.controllers.mobile_command_controller import (
-    get_screen_data,
-)
+from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from minitap.mobile_use.tools.utils import (
-    focus_element_if_needed,
-    move_cursor_to_end_if_bounds,
-)
+from minitap.mobile_use.tools.types import Target
+from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
 from minitap.mobile_use.utils.logger import get_logger
 from minitap.mobile_use.utils.ui_hierarchy import (
-    ElementBounds,
     find_element_by_resource_id,
     get_element_text,
     text_input_is_empty,
@@ -90,24 +85,18 @@ class TextClearer:
     def _prepare_element_for_clearing(
         self,
-        text_input_resource_id: str | None,
-        text_input_coordinates: ElementBounds | None,
-        text_input_text: str | None,
+        target: Target,
     ) -> bool:
         if not focus_element_if_needed(
             ctx=self.ctx,
-            input_resource_id=text_input_resource_id,
-            input_coordinates=text_input_coordinates,
-            input_text=text_input_text,
+            target=target,
         ):
             return False
         move_cursor_to_end_if_bounds(
             ctx=self.ctx,
             state=self.state,
-            text_input_resource_id=text_input_resource_id,
-            text_input_coordinates=text_input_coordinates,
-            text_input_text=text_input_text,
+            target=target,
         )
         return True
@@ -124,9 +113,7 @@ class TextClearer:
     def _clear_with_retries(
         self,
-        text_input_resource_id: str | None,
-        text_input_coordinates: ElementBounds | None,
-        text_input_text: str | None,
+        target: Target,
         initial_text: str,
         hint_text: str | None,
     ) -> tuple[bool, str | None, int]:
@@ -145,10 +132,10 @@ class TextClearer:
             self._refresh_ui_hierarchy()
             elt = None
-            if text_input_resource_id:
+            if target.resource_id:
                 elt = find_element_by_resource_id(
                     ui_hierarchy=self.state.latest_ui_hierarchy or [],
-                    resource_id=text_input_resource_id,
+                    resource_id=target.resource_id,
                 )
                 if elt:
                     current_text = get_element_text(elt)
@@ -159,9 +146,7 @@ class TextClearer:
             move_cursor_to_end_if_bounds(
                 ctx=self.ctx,
                 state=self.state,
-                text_input_resource_id=text_input_resource_id,
-                text_input_coordinates=text_input_coordinates,
-                text_input_text=text_input_text,
+                target=target,
                 elt=elt,
             )
@@ -213,20 +198,20 @@ class TextClearer:
     def clear_input_text(
         self,
-        text_input_resource_id: str | None,
-        text_input_coordinates: ElementBounds | None,
-        text_input_text: str | None,
+        target: Target,
     ) -> ClearTextResult:
-        element, current_text, hint_text = self._get_element_info(text_input_resource_id)
+        element, current_text, hint_text = self._get_element_info(
+            resource_id=target.resource_id,
+        )
         if not element:
-            return self._handle_element_not_found(text_input_resource_id, hint_text)
+            return self._handle_element_not_found(target.resource_id, hint_text)
         if not self._should_clear_text(current_text, hint_text):
             return self._handle_no_clearing_needed(current_text, hint_text)
         if not self._prepare_element_for_clearing(
-            text_input_resource_id, text_input_coordinates, text_input_text
+            target=target,
         ):
             return self._create_result(
                 success=False,
@@ -237,9 +222,7 @@ class TextClearer:
             )
         success, final_text, chars_erased = self._clear_with_retries(
-            text_input_resource_id=text_input_resource_id,
-            text_input_coordinates=text_input_coordinates,
-            text_input_text=text_input_text,
+            target=target,
             initial_text=current_text or "",
             hint_text=hint_text,
         )
@@ -257,23 +240,21 @@ class TextClearer:
 def get_clear_text_tool(ctx: MobileUseContext):
     @tool
-    def clear_text(
+    async def clear_text(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-        text_input_resource_id: str,
-        text_input_coordinates: ElementBounds | None,
-        text_input_text: str | None,
+        target: Target,
     ):
         """
         Clears all the text from the text field, by focusing it if needed.
         """
         clearer = TextClearer(ctx, state)
         result = clearer.clear_input_text(
-            text_input_resource_id, text_input_coordinates, text_input_text
+            target=target,
         )
-        content = (
+        agent_outcome = (
             clear_text_wrapper.on_failure_fn(result.error_message)
             if not result.success
             else clear_text_wrapper.on_success_fn(
@@ -283,16 +264,16 @@ def get_clear_text_tool(ctx: MobileUseContext):
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=content,
+            content=agent_outcome,
             additional_kwargs={"error": result.error_message} if not result.success else {},
             status="error" if not result.success else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap/mobile_use/tools/mobile/erase_one_char.py CHANGED Viewed

@@ -1,9 +1,10 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
-from typing import Annotated
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
@@ -16,11 +17,11 @@ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
 def get_erase_one_char_tool(ctx: MobileUseContext):
     @tool
-    def erase_one_char(
+    async def erase_one_char(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-    ):
+    ) -> Command:
         """
         Erase one character from a text area.
         It acts the same as pressing backspace a single time.
@@ -36,7 +37,7 @@ def get_erase_one_char_tool(ctx: MobileUseContext):
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
                     "agents_thoughts": [agent_thought],

minitap/mobile_use/tools/mobile/glimpse_screen.py CHANGED Viewed

@@ -18,11 +18,11 @@ from minitap.mobile_use.utils.media import compress_base64_jpeg
 def get_glimpse_screen_tool(ctx: MobileUseContext):
     @tool
-    def glimpse_screen(
+    async def glimpse_screen(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-    ):
+    ) -> Command:
         """
         Captures the current screen as an image.
         The resulting screenshot is added to the context for the next reasoning step.
@@ -37,22 +37,26 @@ def get_glimpse_screen_tool(ctx: MobileUseContext):
             output = str(e)
             has_failed = True
+        agent_outcome = (
+            glimpse_screen_wrapper.on_failure_fn()
+            if has_failed
+            else glimpse_screen_wrapper.on_success_fn()
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=glimpse_screen_wrapper.on_failure_fn()
-            if has_failed
-            else glimpse_screen_wrapper.on_success_fn(),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         updates = {
-            "agents_thoughts": [agent_thought],
+            "agents_thoughts": [agent_thought, agent_outcome],
             EXECUTOR_MESSAGES_KEY: [tool_message],
         }
         if compressed_image_base64:
             updates["latest_screenshot_base64"] = compressed_image_base64
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update=updates,
                 agent="executor",

minitap/mobile_use/tools/mobile/input_text.py CHANGED Viewed

@@ -11,21 +11,16 @@ from pydantic import BaseModel
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
-from minitap.mobile_use.controllers.mobile_command_controller import (
-    get_screen_data,
-)
+from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
 from minitap.mobile_use.controllers.mobile_command_controller import (
     input_text as input_text_controller,
 )
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+from minitap.mobile_use.tools.types import Target
 from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
 from minitap.mobile_use.utils.logger import get_logger
-from minitap.mobile_use.utils.ui_hierarchy import (
-    ElementBounds,
-    find_element_by_resource_id,
-    get_element_text,
-)
+from minitap.mobile_use.utils.ui_hierarchy import find_element_by_resource_id, get_element_text
 logger = get_logger(__name__)
@@ -49,14 +44,12 @@ def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
 def get_input_text_tool(ctx: MobileUseContext):
     @tool
-    def input_text(
+    async def input_text(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         text: str,
-        text_input_resource_id: str | None,
-        text_input_coordinates: ElementBounds | None,
-        text_input_text: str | None,
+        target: Target,
     ):
         """
         Focus a text field and type text into it.
@@ -70,17 +63,9 @@ def get_input_text_tool(ctx: MobileUseContext):
             state: The state of the agent.
             agent_thought: The thought of the agent.
             text: The text to type.
-            text_input_resource_id: The resource ID of the text input (if available).
-            text_input_coordinates: The bounds (ElementBounds) of the text input (if available).
-            text_input_text: The current text content of the text input (if available).
+            target: The target of the text input (if available).
         """
-        focused = focus_element_if_needed(
-            ctx=ctx,
-            input_resource_id=text_input_resource_id,
-            input_coordinates=text_input_coordinates,
-            input_text=text_input_text,
-        )
+        focused = focus_element_if_needed(ctx=ctx, target=target)
         if not focused:
             error_message = "Failed to focus the text input element before typing."
             tool_message = ToolMessage(
@@ -90,7 +75,7 @@ def get_input_text_tool(ctx: MobileUseContext):
                 status="error",
             )
             return Command(
-                update=state.sanitize_update(
+                update=await state.asanitize_update(
                     ctx=ctx,
                     update={
                         "agents_thoughts": [agent_thought, error_message],
@@ -100,40 +85,25 @@ def get_input_text_tool(ctx: MobileUseContext):
                 ),
             )
-        move_cursor_to_end_if_bounds(
-            ctx=ctx,
-            state=state,
-            text_input_resource_id=text_input_resource_id,
-            text_input_coordinates=text_input_coordinates,
-            text_input_text=text_input_text,
-        )
+        move_cursor_to_end_if_bounds(ctx=ctx, state=state, target=target)
         result = _controller_input_text(ctx=ctx, text=text)
         status: Literal["success", "error"] = "success" if result.ok else "error"
         text_input_content = ""
-        if status == "success":
-            if text_input_resource_id is not None:
-                # Verification phase for elements with resource_id
-                screen_data = get_screen_data(screen_api_client=ctx.screen_api_client)
-                state.latest_ui_hierarchy = screen_data.elements
-                element = find_element_by_resource_id(
-                    ui_hierarchy=state.latest_ui_hierarchy, resource_id=text_input_resource_id
-                )
-                if not element:
-                    result = InputResult(ok=False, error="Element not found")
-                if element:
-                    text_input_content = get_element_text(element)
-            else:
-                # For elements without resource_id, skip verification and use direct message
-                pass
+        if status == "success" and target.resource_id:
+            screen_data = get_screen_data(screen_api_client=ctx.screen_api_client)
+            state.latest_ui_hierarchy = screen_data.elements
+            element = find_element_by_resource_id(
+                ui_hierarchy=state.latest_ui_hierarchy,
+                resource_id=target.resource_id,
+                index=target.resource_id_index,
+            )
+            if element:
+                text_input_content = get_element_text(element)
         agent_outcome = (
-            input_text_wrapper.on_success_fn(text, text_input_content, text_input_resource_id)
+            input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
             if result.ok
             else input_text_wrapper.on_failure_fn(text, result.error)
         )
@@ -146,7 +116,7 @@ def get_input_text_tool(ctx: MobileUseContext):
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
                     "agents_thoughts": [agent_thought, agent_outcome],

minitap-mobile-use 2.3.0__py3-none-any.whl → 2.5.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.3.0py3-none-any.whl → 2.5.0py3-none-any.whl