PyPI - minitap-mobile-use - Versions diffs - 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

minitap-mobile-use 2.0.0py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (74) hide show

minitap/mobile_use/agents/cortex/cortex.md +19 -10
minitap/mobile_use/agents/cortex/cortex.py +15 -2
minitap/mobile_use/agents/cortex/types.py +2 -4
minitap/mobile_use/agents/executor/executor.md +20 -15
minitap/mobile_use/agents/executor/executor.py +6 -18
minitap/mobile_use/agents/executor/tool_node.py +105 -0
minitap/mobile_use/agents/hopper/hopper.md +2 -10
minitap/mobile_use/agents/hopper/hopper.py +4 -9
minitap/mobile_use/agents/orchestrator/human.md +3 -4
minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
minitap/mobile_use/agents/orchestrator/types.py +5 -8
minitap/mobile_use/agents/outputter/outputter.py +1 -2
minitap/mobile_use/agents/planner/planner.md +25 -15
minitap/mobile_use/agents/planner/planner.py +7 -1
minitap/mobile_use/agents/planner/types.py +10 -5
minitap/mobile_use/agents/planner/utils.py +11 -0
minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
minitap/mobile_use/clients/device_hardware_client.py +3 -0
minitap/mobile_use/config.py +16 -14
minitap/mobile_use/constants.py +1 -0
minitap/mobile_use/context.py +3 -4
minitap/mobile_use/controllers/mobile_command_controller.py +37 -26
minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
minitap/mobile_use/graph/graph.py +10 -31
minitap/mobile_use/graph/state.py +34 -14
minitap/mobile_use/main.py +11 -8
minitap/mobile_use/sdk/agent.py +78 -63
minitap/mobile_use/sdk/builders/agent_config_builder.py +23 -11
minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
minitap/mobile_use/sdk/types/agent.py +10 -5
minitap/mobile_use/sdk/types/task.py +19 -18
minitap/mobile_use/sdk/utils.py +1 -1
minitap/mobile_use/servers/config.py +1 -2
minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
minitap/mobile_use/servers/start_servers.py +4 -4
minitap/mobile_use/servers/stop_servers.py +12 -18
minitap/mobile_use/services/llm.py +4 -2
minitap/mobile_use/tools/index.py +11 -7
minitap/mobile_use/tools/mobile/back.py +8 -12
minitap/mobile_use/tools/mobile/clear_text.py +277 -0
minitap/mobile_use/tools/mobile/copy_text_from.py +8 -12
minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
minitap/mobile_use/tools/mobile/find_packages.py +69 -0
minitap/mobile_use/tools/mobile/input_text.py +55 -32
minitap/mobile_use/tools/mobile/launch_app.py +8 -12
minitap/mobile_use/tools/mobile/long_press_on.py +9 -13
minitap/mobile_use/tools/mobile/open_link.py +8 -12
minitap/mobile_use/tools/mobile/paste_text.py +8 -12
minitap/mobile_use/tools/mobile/press_key.py +8 -12
minitap/mobile_use/tools/mobile/stop_app.py +9 -13
minitap/mobile_use/tools/mobile/swipe.py +8 -12
minitap/mobile_use/tools/mobile/take_screenshot.py +8 -12
minitap/mobile_use/tools/mobile/tap.py +9 -13
minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +9 -13
minitap/mobile_use/tools/tool_wrapper.py +1 -23
minitap/mobile_use/tools/utils.py +86 -0
minitap/mobile_use/utils/cli_helpers.py +1 -2
minitap/mobile_use/utils/cli_selection.py +5 -6
minitap/mobile_use/utils/decorators.py +21 -20
minitap/mobile_use/utils/logger.py +3 -4
minitap/mobile_use/utils/media.py +1 -1
minitap/mobile_use/utils/recorder.py +11 -10
minitap/mobile_use/utils/ui_hierarchy.py +98 -3
{minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/METADATA +12 -2
minitap_mobile_use-2.1.0.dist-info/RECORD +96 -0
minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
minitap/mobile_use/tools/mobile/erase_text.py +0 -124
minitap/mobile_use/tools/mobile/list_packages.py +0 -78
minitap/mobile_use/tools/mobile/run_flow.py +0 -57
minitap_mobile_use-2.0.0.dist-info/RECORD +0 -95
{minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/agents/orchestrator/types.py CHANGED Viewed

@@ -1,14 +1,11 @@
-from enum import Enum
+from typing import Annotated
 from pydantic import BaseModel
-class OrchestratorStatus(Enum):
-    CONTINUE = "continue"
-    RESUME = "resume"
-    REPLAN = "replan"
 class OrchestratorOutput(BaseModel):
-    status: OrchestratorStatus
+    completed_subgoal_ids: Annotated[
+        list[str], "IDs of subgoals that can now be marked as complete"
+    ] = []
+    needs_replaning: Annotated[bool, "Whether the orchestrator needs to replan the subgoal plan"]
     reason: str

minitap/mobile_use/agents/outputter/outputter.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 from pathlib import Path
-from typing import Dict, Type, Union
 from jinja2 import Template
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
@@ -49,7 +48,7 @@ async def outputter(
     structured_llm = llm
     if output_config.structured_output:
-        schema: Union[Dict, Type[BaseModel], None] = None
+        schema: dict | type[BaseModel] | None = None
         so = output_config.structured_output
         if isinstance(so, dict):

minitap/mobile_use/agents/planner/planner.md CHANGED Viewed

@@ -12,7 +12,9 @@ You work like an agile tech lead: defining the key milestones without locking in
    - Subgoals should reflect real interactions with mobile UIs (e.g. "Open app", "Tap search bar", "Scroll to item", "Send message to Bob", etc).
    - Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
    - List of agents thoughts is empty which is expected, since it is the first plan.
-   - Don't use precise UI actions when formulating subgoals like "copy", "paste", "tap", "swipe", ... unless explicitly asked in the initial goal.
+   - Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
+   - The executor has the following available tools: **{{ executor_tools_list }}**.
+     When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
 2. **Replanning**
    If you're asked to **revise a previous plan**, you'll also receive:
@@ -25,7 +27,7 @@ You work like an agile tech lead: defining the key milestones without locking in
 ### Output
-You must output a **list of strings**, each representing a clear subgoal.
+You must output a **list of subgoals (description + optional subgoal ID)**, each representing a clear subgoal.
 Each subgoal should be:
 - Focused on **realistic mobile interactions**
@@ -33,32 +35,40 @@ Each subgoal should be:
 - Sequential (later steps may depend on earlier ones)
 - Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
+If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
 ### Examples
 #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
 **Plan**:
-- Open the WhatsApp app
-- Locate or search for Alice
-- Open the conversation with Alice
-- Type the message "I’m running late"
-- Send the message
+- Open the WhatsApp app (ID: None -> will be generated as a UUID like bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
+- Locate or search for Alice (ID: None)
+- Open the conversation with Alice (ID: None)
+- Type the message "I’m running late" (ID: None)
+- Send the message (ID: None)
+#### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
+**Plan**:
+- Open the link https://tesla.com (ID: None)
+- Find the first car displayed on the home page (ID: None)
 #### **Replanning Example**
-**Original Plan**: same as above
+**Original Plan**: same as above with IDs set
 **Agent Thoughts**:
-- Couldn’t find Alice in recent chats
+- Couldn't find Alice in recent chats
 - Search bar was present on top of the chat screen
 - Keyboard appeared after tapping search
 **New Plan**:
-- Unlock the phone if needed
-- Open WhatsApp
-- Tap the search bar
-- Search for "Alice"
-- Select the correct chat
-- Type and send "I’m running late"
+- Open WhatsApp (ID: bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
+- Tap the search bar (ID: None)
+- Search for "Alice" (ID: None)
+- Select the correct chat (ID: None)
+- Type and send "I’m running late" (ID: None)

minitap/mobile_use/agents/planner/planner.py CHANGED Viewed

@@ -1,12 +1,15 @@
+import uuid
 from pathlib import Path
 from jinja2 import Template
 from langchain_core.messages import HumanMessage, SystemMessage
 from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
 from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.services.llm import get_llm
+from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
 from minitap.mobile_use.utils.decorators import wrap_with_callbacks
 from minitap.mobile_use.utils.logger import get_logger
@@ -35,6 +38,7 @@ class PlannerNode:
             initial_goal=state.initial_goal,
             previous_plan="\n".join(str(s) for s in state.subgoal_plan),
             agent_thoughts="\n".join(state.agents_thoughts),
+            executor_tools_list=format_tools_list(self.ctx, EXECUTOR_WRAPPERS_TOOLS),
         )
         messages = [
             SystemMessage(content=system_message),
@@ -47,7 +51,8 @@ class PlannerNode:
         subgoals_plan = [
             Subgoal(
-                description=subgoal,
+                id=subgoal.id or str(uuid.uuid4()),
+                description=subgoal.description,
                 status=SubgoalStatus.NOT_STARTED,
                 completion_reason=None,
             )
@@ -61,4 +66,5 @@ class PlannerNode:
             update={
                 "subgoal_plan": subgoals_plan,
             },
+            agent="planner",
         )

minitap/mobile_use/agents/planner/types.py CHANGED Viewed

@@ -1,12 +1,16 @@
 from enum import Enum
-from typing import Optional
 from pydantic import BaseModel
-from typing_extensions import Annotated
+from typing import Annotated
+class PlannerSubgoalOutput(BaseModel):
+    id: Annotated[str | None, "If not provided, it will be generated"] = None
+    description: str
 class PlannerOutput(BaseModel):
-    subgoals: list[str]
+    subgoals: list[PlannerSubgoalOutput]
 class SubgoalStatus(Enum):
@@ -17,9 +21,10 @@ class SubgoalStatus(Enum):
 class Subgoal(BaseModel):
+    id: Annotated[str, "Unique identifier of the subgoal"]
     description: Annotated[str, "Description of the subgoal"]
     completion_reason: Annotated[
-        Optional[str], "Reason why the subgoal was completed (failure or success)"
+        str | None, "Reason why the subgoal was completed (failure or success)"
     ] = None
     status: SubgoalStatus
@@ -35,7 +40,7 @@ class Subgoal(BaseModel):
             case SubgoalStatus.NOT_STARTED:
                 status_emoji = "(not started yet)"
-        output = f"- {self.description} : {status_emoji}."
+        output = f"- [ID:{self.id}]: {self.description} : {status_emoji}."
         if self.completion_reason:
             output += f" Completion reason: {self.completion_reason}"
         return output

minitap/mobile_use/agents/planner/utils.py CHANGED Viewed

@@ -5,6 +5,10 @@ def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
     return next((s for s in subgoals if s.status == SubgoalStatus.PENDING), None)
+def get_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
+    return [s for s in subgoals if s.id in ids]
 def get_next_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
     return next((s for s in subgoals if s.status == SubgoalStatus.NOT_STARTED), None)
@@ -21,6 +25,13 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
     return subgoals
+def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
+    for subgoal in subgoals:
+        if subgoal.id in ids:
+            subgoal.status = SubgoalStatus.SUCCESS
+    return subgoals
 def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
     current_subgoal = get_current_subgoal(subgoals)
     if not current_subgoal:

minitap/mobile_use/agents/summarizer/summarizer.py CHANGED Viewed

@@ -3,6 +3,7 @@ from langchain_core.messages import (
     RemoveMessage,
     ToolMessage,
 )
 from minitap.mobile_use.constants import MAX_MESSAGES_IN_HISTORY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.graph.state import State
@@ -22,7 +23,7 @@ class SummarizerNode:
         start_removal = False
         for msg in reversed(state.messages[:nb_removal_candidates]):
-            if isinstance(msg, (ToolMessage, HumanMessage)):
+            if isinstance(msg, ToolMessage | HumanMessage):
                 start_removal = True
             if start_removal and msg.id:
                 remove_messages.append(RemoveMessage(id=msg.id))

minitap/mobile_use/clients/device_hardware_client.py CHANGED Viewed

@@ -12,6 +12,9 @@ class DeviceHardwareClient:
         url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
         return self.session.get(url, **kwargs)
+    def get_rich_hierarchy(self) -> list[dict]:
+        return self.get("last-view-hierarchy").json().get("children", [])
     def post(self, path: str, **kwargs):
         url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
         return self.session.post(url, **kwargs)

minitap/mobile_use/config.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Annotated, Any, Literal, Optional, Union
+from typing import Annotated, Any, Literal
 from dotenv import load_dotenv
 from pydantic import BaseModel, Field, SecretStr, ValidationError, model_validator
@@ -17,15 +17,17 @@ logger = get_logger(__name__)
 class Settings(BaseSettings):
-    OPENAI_API_KEY: Optional[SecretStr] = None
-    GOOGLE_API_KEY: Optional[SecretStr] = None
-    XAI_API_KEY: Optional[SecretStr] = None
-    OPEN_ROUTER_API_KEY: Optional[SecretStr] = None
+    OPENAI_API_KEY: SecretStr | None = None
+    GOOGLE_API_KEY: SecretStr | None = None
+    XAI_API_KEY: SecretStr | None = None
+    OPEN_ROUTER_API_KEY: SecretStr | None = None
-    DEVICE_SCREEN_API_BASE_URL: Optional[str] = None
-    DEVICE_HARDWARE_BRIDGE_BASE_URL: Optional[str] = None
-    ADB_HOST: Optional[str] = None
-    ADB_PORT: Optional[int] = None
+    OPENAI_BASE_URL: str | None = None
+    DEVICE_SCREEN_API_BASE_URL: str | None = None
+    DEVICE_HARDWARE_BRIDGE_BASE_URL: str | None = None
+    ADB_HOST: str | None = None
+    ADB_PORT: int | None = None
     model_config = {"env_file": ".env", "extra": "ignore"}
@@ -69,7 +71,7 @@ def prepare_output_files() -> tuple[str | None, str | None]:
     return validated_events_path, validated_results_path
-def record_events(output_path: Path | None, events: Union[list[str], BaseModel, Any]):
+def record_events(output_path: Path | None, events: list[str] | BaseModel | Any):
     if not output_path:
         return
@@ -168,7 +170,7 @@ def get_default_llm_config() -> LLMConfig:
     try:
         if not os.path.exists(ROOT_DIR / DEFAULT_LLM_CONFIG_FILENAME):
             raise Exception("Default llm config not found")
-        with open(ROOT_DIR / DEFAULT_LLM_CONFIG_FILENAME, "r") as f:
+        with open(ROOT_DIR / DEFAULT_LLM_CONFIG_FILENAME) as f:
             default_config_dict = load_jsonc(f)
         return LLMConfig.model_validate(default_config_dict["default"])
     except Exception as e:
@@ -209,7 +211,7 @@ def parse_llm_config() -> LLMConfig:
     override_config_dict = {}
     if os.path.exists(ROOT_DIR / OVERRIDE_LLM_CONFIG_FILENAME):
         logger.info("Loading custom llm config...")
-        with open(ROOT_DIR / OVERRIDE_LLM_CONFIG_FILENAME, "r") as f:
+        with open(ROOT_DIR / OVERRIDE_LLM_CONFIG_FILENAME) as f:
             override_config_dict = load_jsonc(f)
     else:
         logger.warning("Custom llm config not found, loading default config")
@@ -235,7 +237,7 @@ def initialize_llm_config() -> LLMConfig:
 class OutputConfig(BaseModel):
     structured_output: Annotated[
-        Optional[Union[type[BaseModel], dict]],
+        type[BaseModel] | dict | None,
         Field(
             default=None,
             description=(
@@ -245,7 +247,7 @@ class OutputConfig(BaseModel):
         ),
     ]
     output_description: Annotated[
-        Optional[str],
+        str | None,
         Field(
             default=None,
             description=(

minitap/mobile_use/constants.py CHANGED Viewed

@@ -1,2 +1,3 @@
 RECURSION_LIMIT = 400
 MAX_MESSAGES_IN_HISTORY = 25
+EXECUTOR_MESSAGES_KEY = "executor_messages"

minitap/mobile_use/context.py CHANGED Viewed

@@ -6,12 +6,11 @@ Uses ContextVar to avoid prop drilling and maintain clean function signatures.
 from enum import Enum
 from pathlib import Path
-from typing import Optional
 from adbutils import AdbClient
 from openai import BaseModel
 from pydantic import ConfigDict
-from typing_extensions import Literal
+from typing import Literal
 from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
 from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
@@ -56,8 +55,8 @@ class MobileUseContext(BaseModel):
     hw_bridge_client: DeviceHardwareClient
     screen_api_client: ScreenApiClient
     llm_config: LLMConfig
-    adb_client: Optional[AdbClient] = None
-    execution_setup: Optional[ExecutionSetup] = None
+    adb_client: AdbClient | None = None
+    execution_setup: ExecutionSetup | None = None
     def get_adb_client(self) -> AdbClient:
         if self.adb_client is None:

minitap/mobile_use/controllers/mobile_command_controller.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import uuid
 from enum import Enum
-from typing import Annotated, Literal, Optional, Union
+from typing import Annotated, Literal
 import yaml
 from langgraph.types import Command
@@ -9,6 +9,7 @@ from requests import JSONDecodeError
 from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
 from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
+from minitap.mobile_use.config import initialize_llm_config
 from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
 from minitap.mobile_use.utils.errors import ControllerErrors
 from minitap.mobile_use.utils.logger import get_logger
@@ -42,7 +43,7 @@ class RunFlowRequest(BaseModel):
     dry_run: bool = Field(default=False, alias="dryRun")
-def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) -> Optional[dict]:
+def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) -> dict | None:
     """
     Run a flow i.e, a sequence of commands.
     Returns None on success, or the response body of the failed command.
@@ -136,20 +137,20 @@ class SelectorRequestWithPercentages(BaseModel):
         return {"point": self.percentages.to_str()}
-SelectorRequest = Union[
-    IdSelectorRequest,
-    SelectorRequestWithCoordinates,
-    SelectorRequestWithPercentages,
-    TextSelectorRequest,
-    IdWithTextSelectorRequest,
-]
+SelectorRequest = (
+    IdSelectorRequest
+    | SelectorRequestWithCoordinates
+    | SelectorRequestWithPercentages
+    | TextSelectorRequest
+    | IdWithTextSelectorRequest
+)
 def tap(
     ctx: MobileUseContext,
     selector_request: SelectorRequest,
     dry_run: bool = False,
-    index: Optional[int] = None,
+    index: int | None = None,
 ):
     """
     Tap on a selector.
@@ -170,7 +171,7 @@ def long_press_on(
     ctx: MobileUseContext,
     selector_request: SelectorRequest,
     dry_run: bool = False,
-    index: Optional[int] = None,
+    index: int | None = None,
 ):
     long_press_on_body = selector_request.to_dict()
     if not long_press_on_body:
@@ -210,7 +211,7 @@ SwipeDirection = Annotated[
 class SwipeRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")
     swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest | SwipeDirection
-    duration: Optional[int] = None  # in ms, default is 400ms
+    duration: int | None = None  # in ms, default is 400ms
     def to_dict(self):
         res = {}
@@ -256,7 +257,7 @@ def paste_text(ctx: MobileUseContext, dry_run: bool = False):
     return run_flow(ctx, ["pasteText"], dry_run=dry_run)
-def erase_text(ctx: MobileUseContext, nb_chars: Optional[int] = None, dry_run: bool = False):
+def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool = False):
     """
     Removes characters from the currently selected textfield (if any)
     Removes 50 characters if nb_chars is not specified.
@@ -274,7 +275,7 @@ def launch_app(ctx: MobileUseContext, package_name: str, dry_run: bool = False):
     return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
-def stop_app(ctx: MobileUseContext, package_name: Optional[str] = None, dry_run: bool = False):
+def stop_app(ctx: MobileUseContext, package_name: str | None = None, dry_run: bool = False):
     if package_name is None:
         flow_input = ["stopApp"]
     else:
@@ -316,7 +317,7 @@ class WaitTimeout(Enum):
 def wait_for_animation_to_end(
-    ctx: MobileUseContext, timeout: Optional[WaitTimeout] = None, dry_run: bool = False
+    ctx: MobileUseContext, timeout: WaitTimeout | None = None, dry_run: bool = False
 ):
     if timeout is None:
         return run_flow(ctx, ["waitForAnimationToEnd"], dry_run=dry_run)
@@ -331,12 +332,10 @@ def run_flow_with_wait_for_animation_to_end(
 if __name__ == "__main__":
-    # long press, erase
-    # input_text(text="test")
-    # erase_text()
     ctx = MobileUseContext(
+        llm_config=initialize_llm_config(),
         device=DeviceContext(
-            host_platform="LINUX",
+            host_platform="WINDOWS",
             mobile_platform=DevicePlatform.ANDROID,
             device_id="emulator-5554",
             device_width=1080,
@@ -347,7 +346,6 @@ if __name__ == "__main__":
     )
     screen_data = get_screen_data(ctx.screen_api_client)
     from minitap.mobile_use.graph.state import State
-    from minitap.mobile_use.tools.mobile.erase_text import get_erase_text_tool
     dummy_state = State(
         latest_ui_hierarchy=screen_data.elements,
@@ -358,20 +356,33 @@ if __name__ == "__main__":
         focused_app_info=None,
         device_date="",
         structured_decisions=None,
-        executor_retrigger=False,
-        executor_failed=False,
+        complete_subgoals_by_ids=[],
         executor_messages=[],
         cortex_last_thought="",
         agents_thoughts=[],
     )
-    # invoke erase_text tool
-    input_resource_id = "com.google.android.settings.intelligence:id/open_search_view_edit_text"
-    command_output: Command = get_erase_text_tool(ctx=ctx).invoke(
+    # from minitap.mobile_use.tools.mobile.input_text import get_input_text_tool
+    # input_resource_id = "com.google.android.apps.nexuslauncher:id/search_container_hotseat"
+    # command_output: Command = get_input_text_tool(ctx=ctx).invoke(
+    #     {
+    #         "tool_call_id": uuid.uuid4().hex,
+    #         "agent_thought": "",
+    #         "text_input_resource_id": input_resource_id,
+    #         "text": "Hello World",
+    #         "state": dummy_state,
+    #         "executor_metadata": None,
+    #     }
+    # )
+    from minitap.mobile_use.tools.mobile.clear_text import get_clear_text_tool
+    input_resource_id = "com.google.android.apps.nexuslauncher:id/input"
+    command_output: Command = get_clear_text_tool(ctx=ctx).invoke(
         {
             "tool_call_id": uuid.uuid4().hex,
             "agent_thought": "",
-            "input_text_resource_id": input_resource_id,
+            "text_input_resource_id": input_resource_id,
             "state": dummy_state,
             "executor_metadata": None,
         }

minitap/mobile_use/controllers/platform_specific_commands_controller.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from datetime import date
 import json
-from typing import Optional
 from adbutils import AdbDevice
 from minitap.mobile_use.utils.logger import MobileUseLogger
@@ -20,8 +19,8 @@ def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
 def get_first_device(
-    logger: Optional[MobileUseLogger] = None,
-) -> tuple[Optional[str], Optional[DevicePlatform]]:
+    logger: MobileUseLogger | None = None,
+) -> tuple[str | None, DevicePlatform | None]:
     """Gets the first available device."""
     try:
         android_output = run_shell_command_on_host("adb devices")
@@ -50,7 +49,7 @@ def get_first_device(
     return None, None
-def get_focused_app_info(ctx: MobileUseContext) -> Optional[str]:
+def get_focused_app_info(ctx: MobileUseContext) -> str | None:
     if ctx.device.mobile_platform == DevicePlatform.IOS:
         return None
     device = get_adb_device(ctx)

minitap/mobile_use/graph/graph.py CHANGED Viewed

@@ -6,13 +6,11 @@ from langchain_core.messages import (
 from langgraph.constants import END, START
 from langgraph.graph import StateGraph
 from langgraph.graph.state import CompiledStateGraph
-from langgraph.prebuilt import ToolNode
 from minitap.mobile_use.agents.contextor.contextor import ContextorNode
 from minitap.mobile_use.agents.cortex.cortex import CortexNode
 from minitap.mobile_use.agents.executor.executor import ExecutorNode
-from minitap.mobile_use.agents.executor.executor_context_cleaner import (
-    executor_context_cleaner_node,
-)
+from minitap.mobile_use.agents.executor.tool_node import ExecutorToolNode
 from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
 from minitap.mobile_use.agents.planner.planner import PlannerNode
 from minitap.mobile_use.agents.planner.utils import (
@@ -21,6 +19,7 @@ from minitap.mobile_use.agents.planner.utils import (
     one_of_them_is_failure,
 )
 from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
+from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
 from minitap.mobile_use.context import MobileUseContext
 from minitap.mobile_use.graph.state import State
 from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
@@ -53,7 +52,7 @@ def post_cortex_gate(
     state: State,
 ) -> Literal["continue", "end_subgoal"]:
     logger.info("Starting post_cortex_gate")
-    if not state.structured_decisions:
+    if len(state.complete_subgoals_by_ids) > 0:
         return "end_subgoal"
     return "continue"
@@ -62,7 +61,7 @@ def post_executor_gate(
     state: State,
 ) -> Literal["invoke_tools", "skip"]:
     logger.info("Starting post_executor_gate")
-    messages = state.messages
+    messages = state.executor_messages
     if not messages:
         return "skip"
     last_message = messages[-1]
@@ -77,17 +76,6 @@ def post_executor_gate(
     return "skip"
-def post_executor_tools_gate(
-    state: State,
-) -> Literal["continue", "failed", "done"]:
-    logger.info("Starting post_executor_tools_gate")
-    if state.executor_failed:
-        return "failed"
-    if state.executor_retrigger:
-        return "continue"
-    return "done"
 async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
     graph_builder = StateGraph(State)
@@ -100,12 +88,12 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
     graph_builder.add_node("cortex", CortexNode(ctx))
     graph_builder.add_node("executor", ExecutorNode(ctx))
-    executor_tool_node = ToolNode(
-        get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS)
+    executor_tool_node = ExecutorToolNode(
+        tools=get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS),
+        messages_key=EXECUTOR_MESSAGES_KEY,
     )
     graph_builder.add_node("executor_tools", executor_tool_node)
-    graph_builder.add_node("executor_context_cleaner", executor_context_cleaner_node)
     graph_builder.add_node("summarizer", SummarizerNode(ctx))
     # Linking nodes
@@ -132,18 +120,9 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
     graph_builder.add_conditional_edges(
         "executor",
         post_executor_gate,
-        {"invoke_tools": "executor_tools", "skip": "executor_context_cleaner"},
-    )
-    graph_builder.add_conditional_edges(
-        "executor_tools",
-        post_executor_tools_gate,
-        {
-            "continue": "executor",
-            "done": "executor_context_cleaner",
-            "failed": "executor_context_cleaner",
-        },
+        {"invoke_tools": "executor_tools", "skip": "summarizer"},
     )
-    graph_builder.add_edge("executor_context_cleaner", "summarizer")
+    graph_builder.add_edge("executor_tools", "summarizer")
     graph_builder.add_edge("summarizer", "contextor")
     return graph_builder.compile()

minitap-mobile-use 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.0.0py3-none-any.whl → 2.1.0py3-none-any.whl