PyPI - minitap-mobile-use - Versions diffs - 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl - Mend

minitap-mobile-use 2.2.0py3-none-any.whl → 2.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (59) hide show

minitap/mobile_use/agents/contextor/contextor.py +6 -4
minitap/mobile_use/agents/cortex/cortex.md +114 -27
minitap/mobile_use/agents/cortex/cortex.py +8 -5
minitap/mobile_use/agents/executor/executor.md +15 -10
minitap/mobile_use/agents/executor/executor.py +6 -5
minitap/mobile_use/agents/executor/utils.py +2 -1
minitap/mobile_use/agents/hopper/hopper.py +6 -3
minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
minitap/mobile_use/agents/outputter/outputter.py +6 -3
minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
minitap/mobile_use/agents/planner/planner.md +20 -22
minitap/mobile_use/agents/planner/planner.py +10 -7
minitap/mobile_use/agents/planner/types.py +4 -2
minitap/mobile_use/agents/planner/utils.py +14 -0
minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
minitap/mobile_use/config.py +6 -1
minitap/mobile_use/context.py +13 -3
minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
minitap/mobile_use/graph/state.py +7 -3
minitap/mobile_use/sdk/agent.py +204 -29
minitap/mobile_use/sdk/examples/README.md +19 -1
minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
minitap/mobile_use/sdk/services/platform.py +244 -0
minitap/mobile_use/sdk/types/__init__.py +14 -14
minitap/mobile_use/sdk/types/exceptions.py +57 -0
minitap/mobile_use/sdk/types/platform.py +125 -0
minitap/mobile_use/sdk/types/task.py +60 -17
minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
minitap/mobile_use/servers/stop_servers.py +11 -12
minitap/mobile_use/servers/utils.py +6 -9
minitap/mobile_use/services/llm.py +89 -5
minitap/mobile_use/tools/index.py +2 -8
minitap/mobile_use/tools/mobile/back.py +3 -3
minitap/mobile_use/tools/mobile/clear_text.py +67 -38
minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
minitap/mobile_use/tools/mobile/input_text.py +67 -16
minitap/mobile_use/tools/mobile/launch_app.py +54 -22
minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
minitap/mobile_use/tools/mobile/open_link.py +15 -8
minitap/mobile_use/tools/mobile/press_key.py +15 -8
minitap/mobile_use/tools/mobile/stop_app.py +14 -8
minitap/mobile_use/tools/mobile/swipe.py +11 -5
minitap/mobile_use/tools/mobile/tap.py +103 -21
minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
minitap/mobile_use/tools/test_utils.py +377 -0
minitap/mobile_use/tools/types.py +35 -0
minitap/mobile_use/tools/utils.py +149 -39
minitap/mobile_use/utils/recorder.py +1 -1
minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
minitap/mobile_use/utils/ui_hierarchy.py +11 -4
{minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
minitap/mobile_use/tools/mobile/find_packages.py +0 -69
minitap/mobile_use/tools/mobile/paste_text.py +0 -62
minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
{minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/tools/mobile/input_text.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Literal
+from typing import Annotated, Literal
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
@@ -8,17 +8,19 @@ from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from pydantic import BaseModel
-from typing import Annotated
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
 from minitap.mobile_use.controllers.mobile_command_controller import (
     input_text as input_text_controller,
 )
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+from minitap.mobile_use.tools.types import Target
 from minitap.mobile_use.tools.utils import focus_element_if_needed, move_cursor_to_end_if_bounds
 from minitap.mobile_use.utils.logger import get_logger
+from minitap.mobile_use.utils.ui_hierarchy import find_element_by_resource_id, get_element_text
 logger = get_logger(__name__)
@@ -42,12 +44,12 @@ def _controller_input_text(ctx: MobileUseContext, text: str) -> InputResult:
 def get_input_text_tool(ctx: MobileUseContext):
     @tool
-    def input_text(
+    async def input_text(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         text: str,
-        text_input_resource_id: str,
+        target: Target,
     ):
         """
         Focus a text field and type text into it.
@@ -55,32 +57,69 @@ def get_input_text_tool(ctx: MobileUseContext):
         - Ensure the corresponding element is focused (tap if necessary).
         - If bounds are available, tap near the end to place the cursor at the end.
         - Type the provided `text` using the controller.
+        Args:
+            tool_call_id: The ID of the tool call.
+            state: The state of the agent.
+            agent_thought: The thought of the agent.
+            text: The text to type.
+            target: The target of the text input (if available).
         """
-        focused = focus_element_if_needed(ctx=ctx, resource_id=text_input_resource_id)
-        if focused:
-            move_cursor_to_end_if_bounds(ctx=ctx, state=state, resource_id=text_input_resource_id)
+        focused = focus_element_if_needed(ctx=ctx, target=target)
+        if not focused:
+            error_message = "Failed to focus the text input element before typing."
+            tool_message = ToolMessage(
+                tool_call_id=tool_call_id,
+                content=input_text_wrapper.on_failure_fn(text, error_message),
+                additional_kwargs={"error": error_message},
+                status="error",
+            )
+            return Command(
+                update=await state.asanitize_update(
+                    ctx=ctx,
+                    update={
+                        "agents_thoughts": [agent_thought, error_message],
+                        EXECUTOR_MESSAGES_KEY: [tool_message],
+                    },
+                    agent="executor",
+                ),
+            )
+        move_cursor_to_end_if_bounds(ctx=ctx, state=state, target=target)
         result = _controller_input_text(ctx=ctx, text=text)
         status: Literal["success", "error"] = "success" if result.ok else "error"
-        content_msg = (
-            input_text_wrapper.on_success_fn(text)
+        text_input_content = ""
+        if status == "success" and target.resource_id:
+            screen_data = get_screen_data(screen_api_client=ctx.screen_api_client)
+            state.latest_ui_hierarchy = screen_data.elements
+            element = find_element_by_resource_id(
+                ui_hierarchy=state.latest_ui_hierarchy,
+                resource_id=target.resource_id,
+                index=target.resource_id_index,
+            )
+            if element:
+                text_input_content = get_element_text(element)
+        agent_outcome = (
+            input_text_wrapper.on_success_fn(text, text_input_content, target.resource_id)
             if result.ok
-            else input_text_wrapper.on_failure_fn(text)
+            else input_text_wrapper.on_failure_fn(text, result.error)
         )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=content_msg,
+            content=agent_outcome,
             additional_kwargs={"error": result.error} if not result.ok else {},
             status=status,
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",
@@ -90,8 +129,20 @@ def get_input_text_tool(ctx: MobileUseContext):
     return input_text
+def _on_input_success(text, text_input_content, text_input_resource_id):
+    """Success message handler for input text operations."""
+    if text_input_resource_id is not None:
+        return (
+            f"Typed {repr(text)}.\n"
+            f"Here is the whole content of input with id {repr(text_input_resource_id)}: "
+            f"{repr(text_input_content)}"
+        )
+    else:
+        return "Typed text, should now verify before moving forward"
 input_text_wrapper = ToolWrapper(
     tool_fn_getter=get_input_text_tool,
-    on_success_fn=lambda text: f"Successfully typed {text}",
-    on_failure_fn=lambda text: f"Failed to input text {text}",
+    on_success_fn=_on_input_success,
+    on_failure_fn=lambda text, error: f"Failed to input text {repr(text)}. Reason: {error}",
 )

minitap/mobile_use/tools/mobile/launch_app.py CHANGED Viewed

@@ -1,44 +1,76 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
+from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
+from minitap.mobile_use.agents.hopper.hopper import HopperOutput, hopper
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
+from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import (
     launch_app as launch_app_controller,
 )
-from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from typing import Annotated
-from minitap.mobile_use.context import MobileUseContext
+from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
 from minitap.mobile_use.graph.state import State
-from langgraph.prebuilt import InjectedState
+from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
+async def find_package(ctx: MobileUseContext, app_name: str) -> str | None:
+    """
+    Finds the package name for a given application name.
+    """
+    all_packages = list_packages(ctx=ctx)
+    try:
+        hopper_output: HopperOutput = await hopper(
+            ctx=ctx,
+            request=f"I'm looking for the package name of the following app: '{app_name}'",
+            data=all_packages,
+        )
+        # Assuming hopper_output.output directly contains the package name
+        return hopper_output.output
+    except Exception as e:
+        print(f"Failed to find package for '{app_name}': {e}")
+        return None
 def get_launch_app_tool(ctx: MobileUseContext):
     @tool
-    def launch_app(
+    async def launch_app(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
+        app_name: str,
         agent_thought: str,
-        package_name: str,
-    ):
+    ) -> Command:
         """
-        Launch an application on the device using the package name on Android, bundle id on iOS.
+        Finds and launches an application on the device using its natural language name.
         """
-        output = launch_app_controller(ctx=ctx, package_name=package_name)
-        has_failed = output is not None
-        tool_message = ToolMessage(
-            tool_call_id=tool_call_id,
-            content=launch_app_wrapper.on_failure_fn(package_name)
-            if has_failed
-            else launch_app_wrapper.on_success_fn(package_name),
-            additional_kwargs={"error": output} if has_failed else {},
-            status="error" if has_failed else "success",
-        )
+        package_name = await find_package(ctx=ctx, app_name=app_name)
+        if not package_name:
+            tool_message = ToolMessage(
+                tool_call_id=tool_call_id,
+                content=launch_app_wrapper.on_failure_fn(app_name, "Package not found."),
+                status="error",
+            )
+        else:
+            output = launch_app_controller(ctx=ctx, package_name=package_name)
+            has_failed = output is not None
+            tool_message = ToolMessage(
+                tool_call_id=tool_call_id,
+                content=launch_app_wrapper.on_failure_fn(app_name, output)
+                if has_failed
+                else launch_app_wrapper.on_success_fn(app_name),
+                additional_kwargs={"error": output} if has_failed else {},
+                status="error" if has_failed else "success",
+            )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, tool_message.content],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",
@@ -50,6 +82,6 @@ def get_launch_app_tool(ctx: MobileUseContext):
 launch_app_wrapper = ToolWrapper(
     tool_fn_getter=get_launch_app_tool,
-    on_success_fn=lambda package_name: f"App {package_name} launched successfully.",
-    on_failure_fn=lambda package_name: f"Failed to launch app {package_name}.",
+    on_success_fn=lambda app_name: f"App '{app_name}' launched successfully.",
+    on_failure_fn=lambda app_name, error: f"Failed to launch app '{app_name}': {error}",
 )

minitap/mobile_use/tools/mobile/long_press_on.py CHANGED Viewed

@@ -1,8 +1,11 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
@@ -11,37 +14,41 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
 )
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from typing import Annotated
 def get_long_press_on_tool(ctx: MobileUseContext):
     @tool
-    def long_press_on(
+    async def long_press_on(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         selector_request: SelectorRequest,
         index: int | None = None,
-    ):
+    ) -> Command:
         """
         Long press on a UI element identified by the given selector.
         An index can be specified to select a specific element if multiple are found.
         """
         output = long_press_on_controller(ctx=ctx, selector_request=selector_request, index=index)
         has_failed = output is not None
+        agent_outcome = (
+            long_press_on_wrapper.on_failure_fn()
+            if has_failed
+            else long_press_on_wrapper.on_success_fn()
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=long_press_on_wrapper.on_failure_fn()
-            if has_failed
-            else long_press_on_wrapper.on_success_fn(),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap/mobile_use/tools/mobile/open_link.py CHANGED Viewed

@@ -1,8 +1,11 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import (
@@ -10,35 +13,39 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
 )
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from typing import Annotated
 def get_open_link_tool(ctx: MobileUseContext):
     @tool
-    def open_link(
+    async def open_link(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         url: str,
-    ):
+    ) -> Command:
         """
         Open a link on a device (i.e. a deep link).
         """
         output = open_link_controller(ctx=ctx, url=url)
         has_failed = output is not None
+        agent_outcome = (
+            open_link_wrapper.on_failure_fn()
+            if has_failed
+            else open_link_wrapper.on_success_fn(url)
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=open_link_wrapper.on_failure_fn()
-            if has_failed
-            else open_link_wrapper.on_success_fn(url),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap/mobile_use/tools/mobile/press_key.py CHANGED Viewed

@@ -1,8 +1,11 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import Key
@@ -11,33 +14,37 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
 )
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from typing import Annotated
 def get_press_key_tool(ctx: MobileUseContext):
     @tool
-    def press_key(
+    async def press_key(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         key: Key,
-    ):
+    ) -> Command:
         """Press a key on the device."""
         output = press_key_controller(ctx=ctx, key=key)
         has_failed = output is not None
+        agent_outcome = (
+            press_key_wrapper.on_failure_fn(key)
+            if has_failed
+            else press_key_wrapper.on_success_fn(key)
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=press_key_wrapper.on_failure_fn(key)
-            if has_failed
-            else press_key_wrapper.on_success_fn(key),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap/mobile_use/tools/mobile/stop_app.py CHANGED Viewed

@@ -1,43 +1,49 @@
+from typing import Annotated
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
 from langchain_core.tools.base import InjectedToolCallId
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.controllers.mobile_command_controller import stop_app as stop_app_controller
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
-from typing import Annotated
 def get_stop_app_tool(ctx: MobileUseContext):
     @tool
-    def stop_app(
+    async def stop_app(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         package_name: str | None = None,
-    ):
+    ) -> Command:
         """
         Stops current application if it is running.
         You can also specify the package name of the app to be stopped.
         """
         output = stop_app_controller(ctx=ctx, package_name=package_name)
         has_failed = output is not None
+        agent_outcome = (
+            stop_app_wrapper.on_failure_fn(package_name)
+            if has_failed
+            else stop_app_wrapper.on_success_fn(package_name)
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=stop_app_wrapper.on_failure_fn(package_name)
-            if has_failed
-            else stop_app_wrapper.on_success_fn(package_name),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap/mobile_use/tools/mobile/swipe.py CHANGED Viewed

@@ -24,26 +24,32 @@ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
 def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
     @tool
-    def swipe(
+    async def swipe(
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[State, InjectedState],
         agent_thought: str,
         swipe_request: SwipeRequest,
-    ):
+    ) -> Command:
         """Swipes on the screen."""
         output = swipe_controller(ctx=ctx, swipe_request=swipe_request)
         has_failed = output is not None
+        agent_outcome = (
+            swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
+        )
         tool_message = ToolMessage(
             tool_call_id=tool_call_id,
-            content=swipe_wrapper.on_failure_fn() if has_failed else swipe_wrapper.on_success_fn(),
+            content=agent_outcome,
             additional_kwargs={"error": output} if has_failed else {},
             status="error" if has_failed else "success",
         )
         return Command(
-            update=state.sanitize_update(
+            update=await state.asanitize_update(
                 ctx=ctx,
                 update={
-                    "agents_thoughts": [agent_thought],
+                    "agents_thoughts": [agent_thought, agent_outcome],
                     EXECUTOR_MESSAGES_KEY: [tool_message],
                 },
                 agent="executor",

minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.2.0py3-none-any.whl → 2.4.0py3-none-any.whl