PyPI - minitap-mobile-use - Versions diffs - 3.3.0__py3-none-any.whl - Mend

minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

minitap/mobile_use/__init__.py +0 -0
minitap/mobile_use/agents/contextor/contextor.md +55 -0
minitap/mobile_use/agents/contextor/contextor.py +175 -0
minitap/mobile_use/agents/contextor/types.py +36 -0
minitap/mobile_use/agents/cortex/cortex.md +135 -0
minitap/mobile_use/agents/cortex/cortex.py +152 -0
minitap/mobile_use/agents/cortex/types.py +15 -0
minitap/mobile_use/agents/executor/executor.md +42 -0
minitap/mobile_use/agents/executor/executor.py +87 -0
minitap/mobile_use/agents/executor/tool_node.py +152 -0
minitap/mobile_use/agents/hopper/hopper.md +15 -0
minitap/mobile_use/agents/hopper/hopper.py +44 -0
minitap/mobile_use/agents/orchestrator/human.md +12 -0
minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
minitap/mobile_use/agents/orchestrator/types.py +11 -0
minitap/mobile_use/agents/outputter/human.md +25 -0
minitap/mobile_use/agents/outputter/outputter.py +85 -0
minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
minitap/mobile_use/agents/planner/human.md +14 -0
minitap/mobile_use/agents/planner/planner.md +126 -0
minitap/mobile_use/agents/planner/planner.py +101 -0
minitap/mobile_use/agents/planner/types.py +51 -0
minitap/mobile_use/agents/planner/utils.py +70 -0
minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
minitap/mobile_use/agents/video_analyzer/human.md +5 -0
minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
minitap/mobile_use/clients/browserstack_client.py +477 -0
minitap/mobile_use/clients/idb_client.py +429 -0
minitap/mobile_use/clients/ios_client.py +332 -0
minitap/mobile_use/clients/ios_client_config.py +141 -0
minitap/mobile_use/clients/ui_automator_client.py +330 -0
minitap/mobile_use/clients/wda_client.py +526 -0
minitap/mobile_use/clients/wda_lifecycle.py +367 -0
minitap/mobile_use/config.py +413 -0
minitap/mobile_use/constants.py +3 -0
minitap/mobile_use/context.py +106 -0
minitap/mobile_use/controllers/__init__.py +0 -0
minitap/mobile_use/controllers/android_controller.py +524 -0
minitap/mobile_use/controllers/controller_factory.py +46 -0
minitap/mobile_use/controllers/device_controller.py +182 -0
minitap/mobile_use/controllers/ios_controller.py +436 -0
minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
minitap/mobile_use/controllers/types.py +106 -0
minitap/mobile_use/controllers/unified_controller.py +193 -0
minitap/mobile_use/graph/graph.py +160 -0
minitap/mobile_use/graph/state.py +115 -0
minitap/mobile_use/main.py +309 -0
minitap/mobile_use/sdk/__init__.py +12 -0
minitap/mobile_use/sdk/agent.py +1294 -0
minitap/mobile_use/sdk/builders/__init__.py +10 -0
minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
minitap/mobile_use/sdk/builders/index.py +15 -0
minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
minitap/mobile_use/sdk/constants.py +1 -0
minitap/mobile_use/sdk/examples/README.md +83 -0
minitap/mobile_use/sdk/examples/__init__.py +1 -0
minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
minitap/mobile_use/sdk/services/platform.py +434 -0
minitap/mobile_use/sdk/types/__init__.py +51 -0
minitap/mobile_use/sdk/types/agent.py +84 -0
minitap/mobile_use/sdk/types/exceptions.py +138 -0
minitap/mobile_use/sdk/types/platform.py +183 -0
minitap/mobile_use/sdk/types/task.py +269 -0
minitap/mobile_use/sdk/utils.py +29 -0
minitap/mobile_use/services/accessibility.py +100 -0
minitap/mobile_use/services/llm.py +247 -0
minitap/mobile_use/services/telemetry.py +421 -0
minitap/mobile_use/tools/index.py +67 -0
minitap/mobile_use/tools/mobile/back.py +52 -0
minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
minitap/mobile_use/tools/mobile/launch_app.py +86 -0
minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
minitap/mobile_use/tools/mobile/open_link.py +62 -0
minitap/mobile_use/tools/mobile/press_key.py +83 -0
minitap/mobile_use/tools/mobile/stop_app.py +62 -0
minitap/mobile_use/tools/mobile/swipe.py +156 -0
minitap/mobile_use/tools/mobile/tap.py +154 -0
minitap/mobile_use/tools/mobile/video_recording.py +177 -0
minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
minitap/mobile_use/tools/scratchpad.py +147 -0
minitap/mobile_use/tools/test_utils.py +413 -0
minitap/mobile_use/tools/tool_wrapper.py +16 -0
minitap/mobile_use/tools/types.py +35 -0
minitap/mobile_use/tools/utils.py +336 -0
minitap/mobile_use/utils/app_launch_utils.py +173 -0
minitap/mobile_use/utils/cli_helpers.py +37 -0
minitap/mobile_use/utils/cli_selection.py +143 -0
minitap/mobile_use/utils/conversations.py +31 -0
minitap/mobile_use/utils/decorators.py +124 -0
minitap/mobile_use/utils/errors.py +6 -0
minitap/mobile_use/utils/file.py +13 -0
minitap/mobile_use/utils/logger.py +183 -0
minitap/mobile_use/utils/media.py +186 -0
minitap/mobile_use/utils/recorder.py +52 -0
minitap/mobile_use/utils/requests_utils.py +37 -0
minitap/mobile_use/utils/shell_utils.py +20 -0
minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
minitap/mobile_use/utils/time.py +6 -0
minitap/mobile_use/utils/ui_hierarchy.py +132 -0
minitap/mobile_use/utils/video.py +281 -0
minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0

minitap/mobile_use/tools/mobile/long_press_on.py ADDED Viewed

@@ -0,0 +1,169 @@
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import BaseTool, InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+from minitap.mobile_use.tools.types import Target
+from minitap.mobile_use.utils.logger import get_logger
+logger = get_logger(__name__)
+def get_long_press_on_tool(ctx: MobileUseContext) -> BaseTool:
+    @tool
+    async def long_press_on(
+        agent_thought: str,
+        target: Target,
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[State, InjectedState],
+        duration_ms: int = 1000,
+    ):
+        """
+        Long presses on a UI element identified by the 'target' object.
+        The 'target' object allows specifying an element by its resource_id
+        (with an optional index), its bounds, or its text content (with an optional index).
+        The tool uses a fallback strategy, trying the locators in that order.
+        Args:
+            target: The UI element to long press on (bounds, resource_id, or text).
+            duration_ms: Duration of the long press in milliseconds. Choose based on interaction:
+                        - 500-800ms: Quick long press (e.g., selecting text, haptic feedback)
+                        - 1000ms (default): Standard long press (most common use case)
+                        - 1500-2000ms: Extended long press (e.g., context menus, special actions)
+                        - 2500ms+: Very long press (e.g., accessibility, advanced gestures)
+        """
+        error_obj: dict | None = {
+            "error": "No valid selector provided or all selectors failed."
+        }  # Default to failure
+        latest_selector_info: str | None = None
+        controller = UnifiedMobileController(ctx)
+        # 1. Try with COORDINATES FIRST (visual approach)
+        if target.bounds:
+            try:
+                center_point = target.bounds.get_center()
+                logger.info(
+                    f"Attempting to long press using coordinates: {center_point.x},{center_point.y}"
+                )
+                latest_selector_info = f"coordinates='{target.bounds}'"
+                result = await controller.tap_at(
+                    x=center_point.x,
+                    y=center_point.y,
+                    long_press=True,
+                    long_press_duration=duration_ms,
+                )
+                if result.error is None:  # Success
+                    error_obj = None
+                else:
+                    logger.warning(
+                        f"Long press with coordinates '{target.bounds}' failed. "
+                        f"Error: {result.error}"
+                    )
+                    error_obj = {"error": result.error}
+            except Exception as e:
+                logger.warning(
+                    f"Exception during long press with coordinates '{target.bounds}': {e}"
+                )
+                error_obj = {"error": str(e)}
+        # 2. If coordinates failed or weren't provided, try with resource_id
+        if error_obj is not None and target.resource_id:
+            try:
+                logger.info(
+                    f"Attempting to long press using resource_id: '{target.resource_id}' "
+                    f"at index {target.resource_id_index}"
+                )
+                latest_selector_info = (
+                    f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
+                )
+                result = await controller.tap_element(
+                    resource_id=target.resource_id,
+                    index=target.resource_id_index or 0,
+                    long_press=True,
+                    long_press_duration=duration_ms,
+                )
+                if result.error is None:  # Success
+                    error_obj = None
+                else:
+                    logger.warning(
+                        f"Long press with resource_id '{target.resource_id}' failed. "
+                        f"Error: {result.error}"
+                    )
+                    error_obj = {"error": result.error}
+            except Exception as e:
+                logger.warning(
+                    f"Exception during long press with resource_id '{target.resource_id}': {e}"
+                )
+                error_obj = {"error": str(e)}
+        # 3. If resource_id failed or wasn't provided, try with text (last resort)
+        if error_obj is not None and target.text:
+            try:
+                logger.info(
+                    f"Attempting to long press using text: '{target.text}' "
+                    f"at index {target.text_index}"
+                )
+                latest_selector_info = f"text='{target.text}' (index={target.text_index})"
+                result = await controller.tap_element(
+                    text=target.text,
+                    index=target.text_index or 0,
+                    long_press=True,
+                    long_press_duration=duration_ms,
+                )
+                if result.error is None:  # Success
+                    error_obj = None
+                else:
+                    logger.warning(
+                        f"Long press with text '{target.text}' failed. Error: {result.error}"
+                    )
+                    error_obj = {"error": result.error}
+            except Exception as e:
+                logger.warning(f"Exception during long press with text '{target.text}': {e}")
+                error_obj = {"error": str(e)}
+        has_failed = error_obj is not None
+        final_selector_info = latest_selector_info if latest_selector_info else "N/A"
+        agent_outcome = (
+            long_press_on_wrapper.on_failure_fn(final_selector_info)
+            if has_failed
+            else long_press_on_wrapper.on_success_fn(final_selector_info)
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs=error_obj if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return long_press_on
+long_press_on_wrapper = ToolWrapper(
+    tool_fn_getter=get_long_press_on_tool,
+    on_success_fn=lambda selector_info: (
+        f"Long press on element with {selector_info} was successful."
+    ),
+    on_failure_fn=lambda selector_info: "Failed to long press on element. "
+    + f"Last attempt was with {selector_info}.",
+)

minitap/mobile_use/tools/mobile/open_link.py ADDED Viewed

@@ -0,0 +1,62 @@
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+def get_open_link_tool(ctx: MobileUseContext):
+    @tool
+    async def open_link(
+        agent_thought: str,
+        url: str,
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[State, InjectedState],
+    ) -> Command:
+        """
+        Open a link on a device (i.e. a deep link).
+        """
+        controller = UnifiedMobileController(ctx)
+        success = await controller.open_url(url)
+        has_failed = not success
+        output = "Failed to open URL" if has_failed else None
+        agent_outcome = (
+            open_link_wrapper.on_failure_fn()
+            if has_failed
+            else open_link_wrapper.on_success_fn(url)
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"error": output} if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return open_link
+open_link_wrapper = ToolWrapper(
+    tool_fn_getter=get_open_link_tool,
+    on_success_fn=lambda url: f"Link {url} opened successfully.",
+    on_failure_fn=lambda: "Failed to open link.",
+)

minitap/mobile_use/tools/mobile/press_key.py ADDED Viewed

@@ -0,0 +1,83 @@
+from enum import Enum
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from pydantic import BeforeValidator
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+class Key(Enum):
+    ENTER = "Enter"
+    HOME = "Home"
+    BACK = "Back"
+def normalize_key(value: str | Key) -> str:
+    """Convert key input to Title Case for case-insensitive matching."""
+    if isinstance(value, Key):
+        return value.value
+    return value.title()
+CaseInsensitiveKey = Annotated[Key, BeforeValidator(normalize_key)]
+def get_press_key_tool(ctx: MobileUseContext):
+    @tool
+    async def press_key(
+        agent_thought: str,
+        key: CaseInsensitiveKey,
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[State, InjectedState],
+    ) -> Command:
+        """Press a key on the device."""
+        controller = UnifiedMobileController(ctx)
+        match key:
+            case Key.HOME:
+                output = await controller.go_home()
+            case Key.BACK:
+                output = await controller.go_back()
+            case Key.ENTER:
+                output = await controller.press_enter()
+        has_failed = not output
+        agent_outcome = (
+            press_key_wrapper.on_failure_fn(key)
+            if has_failed
+            else press_key_wrapper.on_success_fn(key)
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"error": output} if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return press_key
+press_key_wrapper = ToolWrapper(
+    tool_fn_getter=get_press_key_tool,
+    on_success_fn=lambda key: f"Key {key.value} pressed successfully.",
+    on_failure_fn=lambda key: f"Failed to press key {key.value}.",
+)

minitap/mobile_use/tools/mobile/stop_app.py ADDED Viewed

@@ -0,0 +1,62 @@
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+def get_stop_app_tool(ctx: MobileUseContext):
+    @tool
+    async def stop_app(
+        agent_thought: str,
+        package_name: str | None = None,
+        tool_call_id: Annotated[str, InjectedToolCallId] = None,  # type: ignore
+        state: Annotated[State, InjectedState] = None,  # type: ignore
+    ) -> Command:
+        """
+        Stops current application if it is running.
+        You can also specify the package name of the app to be stopped.
+        """
+        controller = UnifiedMobileController(ctx)
+        success = await controller.terminate_app(package_name)
+        has_failed = not success
+        output = "Failed to terminate app" if has_failed else None
+        agent_outcome = (
+            stop_app_wrapper.on_failure_fn(package_name)
+            if has_failed
+            else stop_app_wrapper.on_success_fn(package_name)
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"error": output} if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return stop_app
+stop_app_wrapper = ToolWrapper(
+    tool_fn_getter=get_stop_app_tool,
+    on_success_fn=lambda package_name: f"App {package_name or 'current'} stopped successfully.",
+    on_failure_fn=lambda package_name: f"Failed to stop app {package_name or 'current'}.",
+)

minitap/mobile_use/tools/mobile/swipe.py ADDED Viewed

@@ -0,0 +1,156 @@
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import BaseTool, InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from pydantic import Field
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.types import (
+    CoordinatesSelectorRequest,
+    PercentagesSelectorRequest,
+    SwipeRequest,
+    SwipeStartEndCoordinatesRequest,
+    SwipeStartEndPercentagesRequest,
+)
+from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
+def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
+    @tool
+    async def swipe(
+        agent_thought: str,
+        swipe_request: SwipeRequest,
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[State, InjectedState],
+    ) -> Command:
+        """Swipe from start to end position on screen.
+        Supports percentage-based or coordinate-based positioning.
+        """
+        controller = UnifiedMobileController(ctx)
+        output = await controller.swipe_request(swipe_request)
+        has_failed = output is not None
+        agent_outcome = (
+            swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"error": output} if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return swipe
+def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
+    """
+    Returns composite swipe tools with flattened arguments.
+    Each tool handles a specific swipe mode to avoid complex Union type issues.
+    """
+    async def _execute_swipe(
+        tool_call_id: str,
+        state: State,
+        agent_thought: str,
+        swipe_request: SwipeRequest,
+    ) -> Command:
+        """Shared swipe execution logic."""
+        controller = UnifiedMobileController(ctx)
+        output = await controller.swipe_request(swipe_request)
+        has_failed = output is not None
+        agent_outcome = (
+            swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
+        )
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"error": output} if has_failed else {},
+            status="error" if has_failed else "success",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    @tool
+    async def swipe_coordinates(
+        agent_thought: str,
+        start_x: int = Field(description="Start X coordinate in pixels"),
+        start_y: int = Field(description="Start Y coordinate in pixels"),
+        end_x: int = Field(description="End X coordinate in pixels"),
+        end_y: int = Field(description="End Y coordinate in pixels"),
+        duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
+        tool_call_id: Annotated[str, InjectedToolCallId] = None,  # type: ignore
+        state: Annotated[State, InjectedState] = None,  # type: ignore
+    ) -> Command:
+        """Swipe using pixel coordinates from start position to end position."""
+        swipe_request = SwipeRequest(
+            swipe_mode=SwipeStartEndCoordinatesRequest(
+                start=CoordinatesSelectorRequest(x=start_x, y=start_y),
+                end=CoordinatesSelectorRequest(x=end_x, y=end_y),
+            ),
+            duration=duration,
+        )
+        return await _execute_swipe(tool_call_id, state, agent_thought, swipe_request)
+    @tool
+    async def swipe_percentages(
+        agent_thought: str,
+        start_x_percent: int = Field(description="Start X percent (0-100)", ge=0, le=100),
+        start_y_percent: int = Field(description="Start Y percent (0-100)", ge=0, le=100),
+        end_x_percent: int = Field(description="End X percent (0-100)", ge=0, le=100),
+        end_y_percent: int = Field(description="End Y percent (0-100)", ge=0, le=100),
+        duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
+        tool_call_id: Annotated[str, InjectedToolCallId] = None,  # type: ignore
+        state: Annotated[State, InjectedState] = None,  # type: ignore
+    ) -> Command:
+        """Swipe using percentage coordinates from start position to end position."""
+        swipe_request = SwipeRequest(
+            swipe_mode=SwipeStartEndPercentagesRequest(
+                start=PercentagesSelectorRequest(
+                    x_percent=start_x_percent, y_percent=start_y_percent
+                ),
+                end=PercentagesSelectorRequest(x_percent=end_x_percent, y_percent=end_y_percent),
+            ),
+            duration=duration,
+        )
+        return await _execute_swipe(tool_call_id, state, agent_thought, swipe_request)
+    return [swipe_coordinates, swipe_percentages]
+swipe_wrapper = CompositeToolWrapper(
+    tool_fn_getter=get_swipe_tool,
+    composite_tools_fn_getter=get_composite_swipe_tools,
+    on_success_fn=lambda: "Swipe is successful.",
+    on_failure_fn=lambda: "Failed to swipe.",
+)

minitap/mobile_use/tools/mobile/tap.py ADDED Viewed

@@ -0,0 +1,154 @@
+from typing import Annotated
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import BaseTool, InjectedToolCallId
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.unified_controller import UnifiedMobileController
+from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+from minitap.mobile_use.tools.types import Target
+from minitap.mobile_use.tools.utils import has_valid_selectors, validate_coordinates_bounds
+from minitap.mobile_use.utils.logger import get_logger
+logger = get_logger(__name__)
+def get_tap_tool(ctx: MobileUseContext) -> BaseTool:
+    @tool
+    async def tap(
+        agent_thought: str,
+        target: Target,
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[State, InjectedState],
+    ):
+        """
+        Taps on a UI element identified by the 'target' object.
+        The 'target' object allows specifying an element by its resource_id
+        (with an optional index), its bounds, or its text content (with an optional index).
+        The tool uses a fallback strategy, trying the locators in that order.
+        """
+        # Track all attempts for better error reporting
+        attempts: list[dict] = []
+        success = False
+        successful_selector: str | None = None
+        # Validate target has at least one selector
+        if not has_valid_selectors(target):
+            attempts.append(
+                {
+                    "selector": "none",
+                    "error": "No valid selector provided (need bounds, resource_id, or text)",
+                }
+            )
+        controller = UnifiedMobileController(ctx)
+        # 1. Try with COORDINATES FIRST (visual approach)
+        if not success and target.bounds:
+            center = target.bounds.get_center()
+            selector_info = f"coordinates ({center.x}, {center.y})"
+            # Validate bounds before attempting
+            bounds_error = validate_coordinates_bounds(
+                target, ctx.device.device_width, ctx.device.device_height
+            )
+            if bounds_error:
+                logger.warning(f"Coordinates out of bounds: {bounds_error}")
+                attempts.append(
+                    {"selector": selector_info, "error": f"Out of bounds: {bounds_error}"}
+                )
+            else:
+                try:
+                    center_point = target.bounds.get_center()
+                    logger.info(f"Attempting tap with {selector_info}")
+                    result = await controller.tap_at(x=center_point.x, y=center_point.y)
+                    if result.error is None:
+                        success = True
+                        successful_selector = selector_info
+                    else:
+                        error_msg = result.error
+                        logger.warning(f"Tap with {selector_info} failed: {error_msg}")
+                        attempts.append({"selector": selector_info, "error": error_msg})
+                except Exception as e:
+                    logger.warning(f"Exception during tap with {selector_info}: {e}")
+                    attempts.append({"selector": selector_info, "error": str(e)})
+        # 2. If coordinates failed or weren't provided, try with resource_id
+        if not success and target.resource_id:
+            selector_info = f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
+            try:
+                logger.info(f"Attempting tap with {selector_info}")
+                result = await controller.tap_element(
+                    resource_id=target.resource_id,
+                    index=target.resource_id_index or 0,
+                )
+                if result.error is None:
+                    success = True
+                    successful_selector = selector_info
+                else:
+                    error_msg = result.error
+                    logger.warning(f"Tap with {selector_info} failed: {error_msg}")
+                    attempts.append({"selector": selector_info, "error": error_msg})
+            except Exception as e:
+                logger.warning(f"Exception during tap with {selector_info}: {e}")
+                attempts.append({"selector": selector_info, "error": str(e)})
+        # 3. If resource_id failed or wasn't provided, try with text (last resort)
+        if not success and target.text:
+            selector_info = f"text='{target.text}' (index={target.text_index})"
+            try:
+                logger.info(f"Attempting tap with {selector_info}")
+                result = await controller.tap_element(
+                    text=target.text,
+                    index=target.text_index or 0,
+                )
+                if result.error is None:
+                    success = True
+                    successful_selector = selector_info
+                else:
+                    error_msg = result.error
+                    logger.warning(f"Tap with {selector_info} failed: {error_msg}")
+                    attempts.append({"selector": selector_info, "error": error_msg})
+            except Exception as e:
+                logger.warning(f"Exception during tap with {selector_info}: {e}")
+                attempts.append({"selector": selector_info, "error": str(e)})
+        # Build result message
+        if success:
+            agent_outcome = tap_wrapper.on_success_fn(successful_selector)
+        else:
+            # Build detailed failure message with all attempts
+            failure_details = "; ".join([f"{a['selector']}: {a['error']}" for a in attempts])
+            agent_outcome = tap_wrapper.on_failure_fn(failure_details)
+        tool_message = ToolMessage(
+            tool_call_id=tool_call_id,
+            content=agent_outcome,
+            additional_kwargs={"attempts": attempts} if not success else {},
+            status="success" if success else "error",
+        )
+        return Command(
+            update=await state.asanitize_update(
+                ctx=ctx,
+                update={
+                    "agents_thoughts": [agent_thought, agent_outcome],
+                    EXECUTOR_MESSAGES_KEY: [tool_message],
+                },
+                agent="executor",
+            ),
+        )
+    return tap
+tap_wrapper = ToolWrapper(
+    tool_fn_getter=get_tap_tool,
+    on_success_fn=lambda selector_info: f"Tap on element with {selector_info} was successful.",
+    on_failure_fn=lambda failure_details: f"Failed to tap on element. Attempts: {failure_details}",
+)