PyPI - minitap-mobile-use - Versions diffs - 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl - Mend

minitap-mobile-use 2.5.3py3-none-any.whl → 2.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (40) hide show

minitap/mobile_use/agents/contextor/contextor.py +0 -8
minitap/mobile_use/agents/cortex/cortex.md +122 -36
minitap/mobile_use/agents/cortex/cortex.py +32 -17
minitap/mobile_use/agents/cortex/types.py +18 -4
minitap/mobile_use/agents/executor/executor.md +3 -3
minitap/mobile_use/agents/executor/executor.py +10 -3
minitap/mobile_use/agents/hopper/hopper.md +30 -2
minitap/mobile_use/agents/hopper/hopper.py +19 -15
minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
minitap/mobile_use/agents/outputter/outputter.py +13 -3
minitap/mobile_use/agents/planner/planner.md +20 -9
minitap/mobile_use/agents/planner/planner.py +12 -5
minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
minitap/mobile_use/clients/ios_client.py +7 -3
minitap/mobile_use/config.py +87 -24
minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
minitap/mobile_use/controllers/types.py +95 -0
minitap/mobile_use/graph/graph.py +55 -11
minitap/mobile_use/graph/state.py +10 -3
minitap/mobile_use/main.py +12 -4
minitap/mobile_use/sdk/agent.py +109 -72
minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
minitap/mobile_use/services/llm.py +5 -2
minitap/mobile_use/tools/index.py +7 -9
minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
minitap/mobile_use/tools/mobile/swipe.py +3 -26
minitap/mobile_use/tools/mobile/tap.py +41 -28
minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
minitap/mobile_use/utils/cli_helpers.py +10 -6
{minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/METADATA +1 -1
{minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/RECORD +38 -36
minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
{minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/tools/index.py CHANGED Viewed

@@ -2,10 +2,9 @@ from langchain_core.tools import BaseTool
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.tools.mobile.back import back_wrapper
-from minitap.mobile_use.tools.mobile.clear_text import clear_text_wrapper
 from minitap.mobile_use.tools.mobile.erase_one_char import erase_one_char_wrapper
-from minitap.mobile_use.tools.mobile.glimpse_screen import glimpse_screen_wrapper
-from minitap.mobile_use.tools.mobile.input_text import input_text_wrapper
+from minitap.mobile_use.tools.mobile.focus_and_clear_text import focus_and_clear_text_wrapper
+from minitap.mobile_use.tools.mobile.focus_and_input_text import focus_and_input_text_wrapper
 from minitap.mobile_use.tools.mobile.launch_app import launch_app_wrapper
 from minitap.mobile_use.tools.mobile.long_press_on import long_press_on_wrapper
 from minitap.mobile_use.tools.mobile.open_link import open_link_wrapper
@@ -13,8 +12,8 @@ from minitap.mobile_use.tools.mobile.press_key import press_key_wrapper
 from minitap.mobile_use.tools.mobile.stop_app import stop_app_wrapper
 from minitap.mobile_use.tools.mobile.swipe import swipe_wrapper
 from minitap.mobile_use.tools.mobile.tap import tap_wrapper
-from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
-    wait_for_animation_to_end_wrapper,
+from minitap.mobile_use.tools.mobile.wait_for_delay import (
+    wait_for_delay_wrapper,
 )
 from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
@@ -24,14 +23,13 @@ EXECUTOR_WRAPPERS_TOOLS = [
     tap_wrapper,
     long_press_on_wrapper,
     swipe_wrapper,
-    glimpse_screen_wrapper,
-    input_text_wrapper,
+    focus_and_input_text_wrapper,
     erase_one_char_wrapper,
     launch_app_wrapper,
     stop_app_wrapper,
-    clear_text_wrapper,
+    focus_and_clear_text_wrapper,
     press_key_wrapper,
-    wait_for_animation_to_end_wrapper,
+    wait_for_delay_wrapper,
 ]

minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} RENAMED Viewed

@@ -238,9 +238,9 @@ class TextClearer:
         )
-def get_clear_text_tool(ctx: MobileUseContext):
+def get_focus_and_clear_text_tool(ctx: MobileUseContext):
     @tool
-    async def clear_text(
+    async def focus_and_clear_text(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
@@ -255,9 +255,9 @@ def get_clear_text_tool(ctx: MobileUseContext):
         )
         agent_outcome = (
-            clear_text_wrapper.on_failure_fn(result.error_message)
+            focus_and_clear_text_wrapper.on_failure_fn(result.error_message)
             if not result.success
-            else clear_text_wrapper.on_success_fn(
+            else focus_and_clear_text_wrapper.on_success_fn(
                 nb_char_erased=result.chars_erased, new_text_value=result.final_text
             )
         )
@@ -280,7 +280,7 @@ def get_clear_text_tool(ctx: MobileUseContext):
             ),
         )
-    return clear_text
+    return focus_and_clear_text
 def _format_success_message(nb_char_erased: int, new_text_value: str | None) -> str:
@@ -299,8 +299,8 @@ def _format_failure_message(output: str | None) -> str:
     return "Failed to erase text. " + (str(output) if output else "")
-clear_text_wrapper = ToolWrapper(
-    tool_fn_getter=get_clear_text_tool,
+focus_and_clear_text_wrapper = ToolWrapper(
+    tool_fn_getter=get_focus_and_clear_text_tool,
     on_success_fn=_format_success_message,
     on_failure_fn=_format_failure_message,
 )

minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} RENAMED Viewed

@@ -42,9 +42,9 @@ def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
     return InputResult(ok=False, error=str(controller_out))
-def get_input_text_tool(ctx: MobileUseContext):
+def get_focus_and_input_text_tool(ctx: MobileUseContext):
     @tool
-    async def input_text(
+    async def focus_and_input_text(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
@@ -70,7 +70,7 @@ def get_input_text_tool(ctx: MobileUseContext):
             error_message = "Failed to focus the text input element before typing."
             tool_message = ToolMessage(
                 tool_call_id=tool_call_id,
-                content=input_text_wrapper.on_failure_fn(text, error_message),
+                content=focus_and_input_text_wrapper.on_failure_fn(text, error_message),
                 additional_kwargs={"error": error_message},
                 status="error",
             )
@@ -103,9 +103,9 @@ def get_input_text_tool(ctx: MobileUseContext):
                 text_input_content = get_element_text(element)
         agent_outcome = (
-            input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
+            focus_and_input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
             if result.ok
-            else input_text_wrapper.on_failure_fn(text, result.error)
+            else focus_and_input_text_wrapper.on_failure_fn(text, result.error)
         )
         tool_message = ToolMessage(
@@ -126,7 +126,7 @@ def get_input_text_tool(ctx: MobileUseContext):
             ),
         )
-    return input_text
+    return focus_and_input_text
 def _on_input_success(text, text_input_content, text_input_resource_id):
@@ -141,8 +141,8 @@ def _on_input_success(text, text_input_content, text_input_resource_id):
         return "Typed text, should now verify before moving forward"
-input_text_wrapper = ToolWrapper(
-    tool_fn_getter=get_input_text_tool,
+focus_and_input_text_wrapper = ToolWrapper(
+    tool_fn_getter=get_focus_and_input_text_tool,
     on_success_fn=_on_input_success,
     on_failure_fn=lambda text, error: f"Failed to input text {repr(text)}. Reason: {error}",
 )

minitap/mobile_use/tools/mobile/long_press_on.py CHANGED Viewed

@@ -2,46 +2,158 @@ from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
-from langchain_core.tools.base import InjectedToolCallId
+from langchain_core.tools.base import BaseTool, InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
-from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
+from minitap.mobile_use.controllers.mobile_command_controller import (
+    CoordinatesSelectorRequest,
+    IdSelectorRequest,
+    SelectorRequestWithCoordinates,
+    TextSelectorRequest,
+)
 from minitap.mobile_use.controllers.mobile_command_controller import (
     long_press_on as long_press_on_controller,
 )
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+from minitap.mobile_use.tools.types import Target
+from minitap.mobile_use.utils.logger import get_logger
+logger = get_logger(__name__)
-def get_long_press_on_tool(ctx: MobileUseContext):
+def get_long_press_on_tool(ctx: MobileUseContext) -> BaseTool:
     @tool
     async def long_press_on(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
-        selector_request: SelectorRequest,
-        index: int | None = None,
-    ) -> Command:
+        target: Target,
+        duration_ms: int = 1000,
+    ):
         """
-        Long press on a UI element identified by the given selector.
-        An index can be specified to select a specific element if multiple are found.
+        Long presses on a UI element identified by the 'target' object.
+        The 'target' object allows specifying an element by its resource_id
+        (with an optional index), its coordinates, or its text content (with an optional index).
+        The tool uses a fallback strategy, trying the locators in that order.
+        Args:
+            target: The UI element to long press on (coordinates, resource_id, or text).
+            duration_ms: Duration of the long press in milliseconds. Choose based on interaction:
+                        - 500-800ms: Quick long press (e.g., selecting text, haptic feedback)
+                        - 1000ms (default): Standard long press (most common use case)
+                        - 1500-2000ms: Extended long press (e.g., context menus, special actions)
+                        - 2500ms+: Very long press (e.g., accessibility, advanced gestures)
         """
-        output = long_press_on_controller(ctx=ctx, selector_request=selector_request, index=index)
-        has_failed = output is not None
+        error_obj: dict | None = {
+            "error": "No valid selector provided or all selectors failed."
+        }  # Default to failure
+        latest_selector_info: str | None = None
+        # 1. Try with COORDINATES FIRST (visual approach)
+        if target.coordinates:
+            try:
+                center_point = target.coordinates.get_center()
+                selector = SelectorRequestWithCoordinates(
+                    coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
+                )
+                logger.info(
+                    f"Attempting to long press using coordinates: {center_point.x},{center_point.y}"
+                )
+                latest_selector_info = f"coordinates='{target.coordinates}'"
+                result = long_press_on_controller(
+                    ctx=ctx,
+                    selector_request=selector,
+                    ui_hierarchy=state.latest_ui_hierarchy,
+                    long_press_duration=duration_ms,
+                )
+                if result is None:  # Success
+                    error_obj = None
+                else:
+                    logger.warning(
+                        f"Long press with coordinates '{target.coordinates}' failed. "
+                        f"Error: {result}"
+                    )
+                    error_obj = {"error": result} if isinstance(result, str) else result
+            except Exception as e:
+                logger.warning(
+                    f"Exception during long press with coordinates '{target.coordinates}': {e}"
+                )
+                error_obj = {"error": str(e)}
+        # 2. If coordinates failed or weren't provided, try with resource_id
+        if error_obj is not None and target.resource_id:
+            try:
+                selector = IdSelectorRequest(id=target.resource_id)
+                logger.info(
+                    f"Attempting to long press using resource_id: '{target.resource_id}' "
+                    f"at index {target.resource_id_index}"
+                )
+                latest_selector_info = (
+                    f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
+                )
+                result = long_press_on_controller(
+                    ctx=ctx,
+                    selector_request=selector,
+                    index=target.resource_id_index,
+                    ui_hierarchy=state.latest_ui_hierarchy,
+                    long_press_duration=duration_ms,
+                )
+                if result is None:  # Success
+                    error_obj = None
+                else:
+                    logger.warning(
+                        f"Long press with resource_id '{target.resource_id}' failed. "
+                        f"Error: {result}"
+                    )
+                    error_obj = {"error": result} if isinstance(result, str) else result
+            except Exception as e:
+                logger.warning(
+                    f"Exception during long press with resource_id '{target.resource_id}': {e}"
+                )
+                error_obj = {"error": str(e)}
+        # 3. If resource_id failed or wasn't provided, try with text (last resort)
+        if error_obj is not None and target.text:
+            try:
+                selector = TextSelectorRequest(text=target.text)
+                logger.info(
+                    f"Attempting to long press using text: '{target.text}' "
+                    f"at index {target.text_index}"
+                )
+                latest_selector_info = f"text='{target.text}' (index={target.text_index})"
+                result = long_press_on_controller(
+                    ctx=ctx,
+                    selector_request=selector,
+                    index=target.text_index,
+                    ui_hierarchy=state.latest_ui_hierarchy,
+                    long_press_duration=duration_ms,
+                )
+                if result is None:  # Success
+                    error_obj = None
+                else:
+                    logger.warning(f"Long press with text '{target.text}' failed. Error: {result}")
+                    error_obj = {"error": result} if isinstance(result, str) else result
+            except Exception as e:
+                logger.warning(f"Exception during long press with text '{target.text}': {e}")
+                error_obj = {"error": str(e)}
+        has_failed = error_obj is not None
+        final_selector_info = latest_selector_info if latest_selector_info else "N/A"
         agent_outcome = (
-            long_press_on_wrapper.on_failure_fn()
+            long_press_on_wrapper.on_failure_fn(final_selector_info)
             if has_failed
-            else long_press_on_wrapper.on_success_fn()
+            else long_press_on_wrapper.on_success_fn(final_selector_info)
         )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
             content=agent_outcome,
-            additional_kwargs={"error": output} if has_failed else {},
+            additional_kwargs=error_obj if has_failed else {},
             status="error" if has_failed else "success",
         )
         return Command(
@@ -60,6 +172,9 @@ def get_long_press_on_tool(ctx: MobileUseContext):
 long_press_on_wrapper = ToolWrapper(
     tool_fn_getter=get_long_press_on_tool,
-    on_success_fn=lambda: "Long press on is successful.",
-    on_failure_fn=lambda: "Failed to long press on.",
+    on_success_fn=lambda selector_info: (
+        f"Long press on element with {selector_info} was successful."
+    ),
+    on_failure_fn=lambda selector_info: "Failed to long press on element. "
+    + f"Last attempt was with {selector_info}.",
 )

minitap/mobile_use/tools/mobile/swipe.py CHANGED Viewed

@@ -9,15 +9,14 @@ from pydantic import Field
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
-from minitap.mobile_use.controllers.mobile_command_controller import (
+from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
+from minitap.mobile_use.controllers.types import (
     CoordinatesSelectorRequest,
     PercentagesSelectorRequest,
-    SwipeDirection,
     SwipeRequest,
     SwipeStartEndCoordinatesRequest,
     SwipeStartEndPercentagesRequest,
 )
-from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
@@ -123,29 +122,7 @@ def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
             }
         )
-    @tool
-    def swipe_direction(
-        agent_thought: str,
-        tool_call_id: Annotated[str, InjectedToolCallId],
-        state: Annotated[State, InjectedState],
-        direction: SwipeDirection,
-        duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
-    ):
-        """Swipe in a specific direction across the screen."""
-        swipe_request = SwipeRequest(
-            swipe_mode=direction,
-            duration=duration,
-        )
-        return get_swipe_tool(ctx=ctx).invoke(
-            input={
-                "tool_call_id": tool_call_id,
-                "state": state,
-                "agent_thought": agent_thought,
-                "swipe_request": swipe_request,
-            }
-        )
-    return [swipe_coordinates, swipe_percentages, swipe_direction]
+    return [swipe_coordinates, swipe_percentages]
 swipe_wrapper = CompositeToolWrapper(

minitap/mobile_use/tools/mobile/tap.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
-from langchain_core.tools.base import InjectedToolCallId
+from langchain_core.tools.base import BaseTool, InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
@@ -23,7 +23,7 @@ from minitap.mobile_use.utils.logger import get_logger
 logger = get_logger(__name__)
-def get_tap_tool(ctx: MobileUseContext):
+def get_tap_tool(ctx: MobileUseContext) -> BaseTool:
     @tool
     async def tap(
         tool_call_id: Annotated[str, InjectedToolCallId],
@@ -41,67 +41,79 @@ def get_tap_tool(ctx: MobileUseContext):
         output = {
             "error": "No valid selector provided or all selectors failed."
         }  # Default to failure
-        final_selector_info = "N/A"
+        latest_selector_info: str | None = None
-        # 1. Try with resource_id
-        if target.resource_id:
+        # 1. Try with COORDINATES FIRST (visual approach)
+        if target.coordinates:
             try:
-                selector = IdSelectorRequest(id=target.resource_id)
+                center_point = target.coordinates.get_center()
+                selector = SelectorRequestWithCoordinates(
+                    coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
+                )
                 logger.info(
-                    f"Attempting to tap using resource_id: '{target.resource_id}' "
-                    f"at index {target.resource_id_index}"
+                    f"Attempting to tap using coordinates: {center_point.x},{center_point.y}"
                 )
+                latest_selector_info = f"coordinates='{target.coordinates}'"
                 result = tap_controller(
-                    ctx=ctx, selector_request=selector, index=target.resource_id_index
+                    ctx=ctx,
+                    selector_request=selector,
+                    ui_hierarchy=state.latest_ui_hierarchy,
                 )
                 if result is None:  # Success
                     output = None
-                    final_selector_info = (
-                        f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
-                    )
                 else:
                     logger.warning(
-                        f"Tap with resource_id '{target.resource_id}' failed. Error: {result}"
+                        f"Tap with coordinates '{target.coordinates}' failed. Error: {result}"
                     )
                     output = result
             except Exception as e:
-                logger.warning(f"Exception during tap with resource_id '{target.resource_id}': {e}")
+                logger.warning(f"Exception during tap with coordinates '{target.coordinates}': {e}")
                 output = {"error": str(e)}
-        # 2. If resource_id failed or wasn't provided, try with coordinates
-        if output is not None and target.coordinates:
+        # 2. If coordinates failed or weren't provided, try with resource_id
+        if output is not None and target.resource_id:
             try:
-                center_point = target.coordinates.get_center()
-                selector = SelectorRequestWithCoordinates(
-                    coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
-                )
+                selector = IdSelectorRequest(id=target.resource_id)
                 logger.info(
-                    f"Attempting to tap using coordinates: {center_point.x},{center_point.y}"
+                    f"Attempting to tap using resource_id: '{target.resource_id}' "
+                    f"at index {target.resource_id_index}"
+                )
+                latest_selector_info = (
+                    f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
+                )
+                result = tap_controller(
+                    ctx=ctx,
+                    selector_request=selector,
+                    index=target.resource_id_index,
+                    ui_hierarchy=state.latest_ui_hierarchy,
                 )
-                result = tap_controller(ctx=ctx, selector_request=selector)
                 if result is None:  # Success
                     output = None
-                    final_selector_info = f"coordinates='{target.coordinates}'"
                 else:
                     logger.warning(
-                        f"Tap with coordinates '{target.coordinates}' failed. Error: {result}"
+                        f"Tap with resource_id '{target.resource_id}' failed. Error: {result}"
                     )
                     output = result
             except Exception as e:
-                logger.warning(f"Exception during tap with coordinates '{target.coordinates}': {e}")
+                logger.warning(f"Exception during tap with resource_id '{target.resource_id}': {e}")
                 output = {"error": str(e)}
-        # 3. If coordinates failed or weren't provided, try with text
+        # 3. If resource_id failed or wasn't provided, try with text (last resort)
         if output is not None and target.text:
             try:
                 selector = TextSelectorRequest(text=target.text)
                 logger.info(
                     f"Attempting to tap using text: '{target.text}' at index {target.text_index}"
                 )
-                result = tap_controller(ctx=ctx, selector_request=selector, index=target.text_index)
+                latest_selector_info = f"text='{target.text}' (index={target.text_index})"
+                result = tap_controller(
+                    ctx=ctx,
+                    selector_request=selector,
+                    index=target.text_index,
+                    ui_hierarchy=state.latest_ui_hierarchy,
+                )
                 if result is None:  # Success
                     output = None
-                    final_selector_info = f"text='{target.text}' (index={target.text_index})"
                 else:
                     logger.warning(f"Tap with text '{target.text}' failed. Error: {result}")
                     output = result
@@ -110,6 +122,7 @@ def get_tap_tool(ctx: MobileUseContext):
                 output = {"error": str(e)}
         has_failed = output is not None
+        final_selector_info = latest_selector_info if latest_selector_info else "N/A"
         agent_outcome = (
             tap_wrapper.on_failure_fn(final_selector_info)
             if has_failed

minitap/mobile_use/tools/mobile/wait_for_delay.py ADDED Viewed

@@ -0,0 +1,84 @@
+import asyncio
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.mobile_command_controller import (
+    wait_for_delay as wait_for_delay_controller,
+)
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+MAX_DELAY_MS = 60000
+def get_wait_for_delay_tool(ctx: MobileUseContext):
+    @tool
+    async def wait_for_delay(
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[State, InjectedState],
+        agent_thought: str,
+        time_in_ms: int,
+    ) -> Command:
+        """
+        Wait for a delay in milliseconds.
+        This tool pauses execution for a specified number of milliseconds.
+        Use this when you need to introduce a controlled delay to allow the UI
+        to update after an action, regardless of whether an animation is playing.
+        Args:
+            time_in_ms: The number of milliseconds to wait. (capped at 60 seconds)
+        Example:
+            - wait_for_delay with time_in_ms=1000 (waits 1 second)
+            - wait_for_delay with time_in_ms=500 (waits 0.5 seconds)
+        """
+        if time_in_ms < 0:
+            time_in_ms = 1000
+        if time_in_ms > MAX_DELAY_MS:
+            time_in_ms = MAX_DELAY_MS
+        try:
+            await asyncio.to_thread(wait_for_delay_controller, time_in_ms)
+            output = None
+            has_failed = False
+        except Exception as e:
+            output = str(e)
+            has_failed = True
+        agent_outcome = (
+            wait_for_delay_wrapper.on_failure_fn()
+            if has_failed
+            else wait_for_delay_wrapper.on_success_fn(time_in_ms)
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"error": output} if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return wait_for_delay
+wait_for_delay_wrapper = ToolWrapper(
+    tool_fn_getter=get_wait_for_delay_tool,
+    on_success_fn=lambda delay: f"Successfully waited for {delay} milliseconds.",
+    on_failure_fn=lambda: "Failed to wait for delay.",
+)

minitap/mobile_use/utils/cli_helpers.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import sys
-from minitap.mobile_use.clients.ios_client import get_ios_devices
 from adbutils import AdbClient
 from rich.console import Console
+from minitap.mobile_use.clients.ios_client import get_ios_devices
 def display_device_status(console: Console, adb_client: AdbClient | None = None):
     """Checks for connected devices and displays the status."""
@@ -17,21 +18,24 @@ def display_device_status(console: Console, adb_client: AdbClient | None = None)
             console.print(f"  - {device.serial}")
     else:
         console.print("❌ [bold red]No Android device found.[/bold red]")
-        console.print("Please make sure your emulator is running or a device is connected via USB.")
         command = "emulator -avd <avd_name>"
         if sys.platform not in ["win32", "darwin"]:
             command = f"./{command}"
-        console.print(f"You can start an emulator using a command like: [bold]'{command}'[/bold]")
-        console.print("[italic]iOS detection coming soon...[/italic]")
+            console.print(
+                f"You can start an emulator using a command like: [bold]'{command}'[/bold]"
+            )
     xcrun_available, ios_devices, error_message = get_ios_devices()
     if xcrun_available:
         if ios_devices:
             console.print("✅ [bold green]iOS device(s) connected:[/bold green]")
             for device in ios_devices:
-                console.print(f"  - {device}")
+                console.print(f"  - [green]{device}[/green]")
         else:
-            console.print("❌ [bold red]No iOS device found.[/bold red]")
+            console.print(
+                "❌ [bold red]No iOS device found. We only support iOS simulators for now."
+                "[/bold red]"
+            )
             console.print(
                 "[iOS] Please make sure your emulator is running or a device is connected via USB."
             )

{minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: minitap-mobile-use
-Version: 2.5.3
+Version: 2.6.0
 Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
 Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
 License: MIT License

minitap-mobile-use 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.5.3py3-none-any.whl → 2.6.0py3-none-any.whl