minitap-mobile-use 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (74) hide show
  1. minitap/mobile_use/agents/cortex/cortex.md +19 -10
  2. minitap/mobile_use/agents/cortex/cortex.py +15 -2
  3. minitap/mobile_use/agents/cortex/types.py +2 -4
  4. minitap/mobile_use/agents/executor/executor.md +20 -15
  5. minitap/mobile_use/agents/executor/executor.py +6 -18
  6. minitap/mobile_use/agents/executor/tool_node.py +105 -0
  7. minitap/mobile_use/agents/hopper/hopper.md +2 -10
  8. minitap/mobile_use/agents/hopper/hopper.py +4 -9
  9. minitap/mobile_use/agents/orchestrator/human.md +3 -4
  10. minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
  11. minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
  12. minitap/mobile_use/agents/orchestrator/types.py +5 -8
  13. minitap/mobile_use/agents/outputter/outputter.py +1 -2
  14. minitap/mobile_use/agents/planner/planner.md +25 -15
  15. minitap/mobile_use/agents/planner/planner.py +7 -1
  16. minitap/mobile_use/agents/planner/types.py +10 -5
  17. minitap/mobile_use/agents/planner/utils.py +11 -0
  18. minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
  19. minitap/mobile_use/clients/device_hardware_client.py +3 -0
  20. minitap/mobile_use/config.py +16 -14
  21. minitap/mobile_use/constants.py +1 -0
  22. minitap/mobile_use/context.py +3 -4
  23. minitap/mobile_use/controllers/mobile_command_controller.py +37 -26
  24. minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
  25. minitap/mobile_use/graph/graph.py +10 -31
  26. minitap/mobile_use/graph/state.py +34 -14
  27. minitap/mobile_use/main.py +11 -8
  28. minitap/mobile_use/sdk/agent.py +78 -63
  29. minitap/mobile_use/sdk/builders/agent_config_builder.py +23 -11
  30. minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
  31. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
  32. minitap/mobile_use/sdk/types/agent.py +10 -5
  33. minitap/mobile_use/sdk/types/task.py +19 -18
  34. minitap/mobile_use/sdk/utils.py +1 -1
  35. minitap/mobile_use/servers/config.py +1 -2
  36. minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
  37. minitap/mobile_use/servers/start_servers.py +4 -4
  38. minitap/mobile_use/servers/stop_servers.py +12 -18
  39. minitap/mobile_use/services/llm.py +4 -2
  40. minitap/mobile_use/tools/index.py +11 -7
  41. minitap/mobile_use/tools/mobile/back.py +8 -12
  42. minitap/mobile_use/tools/mobile/clear_text.py +277 -0
  43. minitap/mobile_use/tools/mobile/copy_text_from.py +8 -12
  44. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  45. minitap/mobile_use/tools/mobile/find_packages.py +69 -0
  46. minitap/mobile_use/tools/mobile/input_text.py +55 -32
  47. minitap/mobile_use/tools/mobile/launch_app.py +8 -12
  48. minitap/mobile_use/tools/mobile/long_press_on.py +9 -13
  49. minitap/mobile_use/tools/mobile/open_link.py +8 -12
  50. minitap/mobile_use/tools/mobile/paste_text.py +8 -12
  51. minitap/mobile_use/tools/mobile/press_key.py +8 -12
  52. minitap/mobile_use/tools/mobile/stop_app.py +9 -13
  53. minitap/mobile_use/tools/mobile/swipe.py +8 -12
  54. minitap/mobile_use/tools/mobile/take_screenshot.py +8 -12
  55. minitap/mobile_use/tools/mobile/tap.py +9 -13
  56. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +9 -13
  57. minitap/mobile_use/tools/tool_wrapper.py +1 -23
  58. minitap/mobile_use/tools/utils.py +86 -0
  59. minitap/mobile_use/utils/cli_helpers.py +1 -2
  60. minitap/mobile_use/utils/cli_selection.py +5 -6
  61. minitap/mobile_use/utils/decorators.py +21 -20
  62. minitap/mobile_use/utils/logger.py +3 -4
  63. minitap/mobile_use/utils/media.py +1 -1
  64. minitap/mobile_use/utils/recorder.py +11 -10
  65. minitap/mobile_use/utils/ui_hierarchy.py +98 -3
  66. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/METADATA +12 -2
  67. minitap_mobile_use-2.1.0.dist-info/RECORD +96 -0
  68. minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
  69. minitap/mobile_use/tools/mobile/erase_text.py +0 -124
  70. minitap/mobile_use/tools/mobile/list_packages.py +0 -78
  71. minitap/mobile_use/tools/mobile/run_flow.py +0 -57
  72. minitap_mobile_use-2.0.0.dist-info/RECORD +0 -95
  73. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/WHEEL +0 -0
  74. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,11 @@
1
- from enum import Enum
1
+ from typing import Annotated
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
5
 
6
- class OrchestratorStatus(Enum):
7
- CONTINUE = "continue"
8
- RESUME = "resume"
9
- REPLAN = "replan"
10
-
11
-
12
6
  class OrchestratorOutput(BaseModel):
13
- status: OrchestratorStatus
7
+ completed_subgoal_ids: Annotated[
8
+ list[str], "IDs of subgoals that can now be marked as complete"
9
+ ] = []
10
+ needs_replaning: Annotated[bool, "Whether the orchestrator needs to replan the subgoal plan"]
14
11
  reason: str
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  from pathlib import Path
3
- from typing import Dict, Type, Union
4
3
 
5
4
  from jinja2 import Template
6
5
  from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
@@ -49,7 +48,7 @@ async def outputter(
49
48
  structured_llm = llm
50
49
 
51
50
  if output_config.structured_output:
52
- schema: Union[Dict, Type[BaseModel], None] = None
51
+ schema: dict | type[BaseModel] | None = None
53
52
  so = output_config.structured_output
54
53
 
55
54
  if isinstance(so, dict):
@@ -12,7 +12,9 @@ You work like an agile tech lead: defining the key milestones without locking in
12
12
  - Subgoals should reflect real interactions with mobile UIs (e.g. "Open app", "Tap search bar", "Scroll to item", "Send message to Bob", etc).
13
13
  - Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
14
14
  - List of agents thoughts is empty which is expected, since it is the first plan.
15
- - Don't use precise UI actions when formulating subgoals like "copy", "paste", "tap", "swipe", ... unless explicitly asked in the initial goal.
15
+ - Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
16
+ - The executor has the following available tools: **{{ executor_tools_list }}**.
17
+ When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
16
18
 
17
19
  2. **Replanning**
18
20
  If you're asked to **revise a previous plan**, you'll also receive:
@@ -25,7 +27,7 @@ You work like an agile tech lead: defining the key milestones without locking in
25
27
 
26
28
  ### Output
27
29
 
28
- You must output a **list of strings**, each representing a clear subgoal.
30
+ You must output a **list of subgoals (description + optional subgoal ID)**, each representing a clear subgoal.
29
31
  Each subgoal should be:
30
32
 
31
33
  - Focused on **realistic mobile interactions**
@@ -33,32 +35,40 @@ Each subgoal should be:
33
35
  - Sequential (later steps may depend on earlier ones)
34
36
  - Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
35
37
 
38
+ If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
39
+
36
40
  ### Examples
37
41
 
38
42
  #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
39
43
 
40
44
  **Plan**:
41
45
 
42
- - Open the WhatsApp app
43
- - Locate or search for Alice
44
- - Open the conversation with Alice
45
- - Type the message "I’m running late"
46
- - Send the message
46
+ - Open the WhatsApp app (ID: None -> will be generated as a UUID like bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
47
+ - Locate or search for Alice (ID: None)
48
+ - Open the conversation with Alice (ID: None)
49
+ - Type the message "I’m running late" (ID: None)
50
+ - Send the message (ID: None)
51
+
52
+ #### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
53
+
54
+ **Plan**:
55
+
56
+ - Open the link https://tesla.com (ID: None)
57
+ - Find the first car displayed on the home page (ID: None)
47
58
 
48
59
  #### **Replanning Example**
49
60
 
50
- **Original Plan**: same as above
61
+ **Original Plan**: same as above with IDs set
51
62
  **Agent Thoughts**:
52
63
 
53
- - Couldnt find Alice in recent chats
64
+ - Couldn't find Alice in recent chats
54
65
  - Search bar was present on top of the chat screen
55
66
  - Keyboard appeared after tapping search
56
67
 
57
68
  **New Plan**:
58
69
 
59
- - Unlock the phone if needed
60
- - Open WhatsApp
61
- - Tap the search bar
62
- - Search for "Alice"
63
- - Select the correct chat
64
- - Type and send "I’m running late"
70
+ - Open WhatsApp (ID: bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
71
+ - Tap the search bar (ID: None)
72
+ - Search for "Alice" (ID: None)
73
+ - Select the correct chat (ID: None)
74
+ - Type and send "I’m running late" (ID: None)
@@ -1,12 +1,15 @@
1
+ import uuid
1
2
  from pathlib import Path
2
3
 
3
4
  from jinja2 import Template
4
5
  from langchain_core.messages import HumanMessage, SystemMessage
6
+
5
7
  from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
6
8
  from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
7
9
  from minitap.mobile_use.context import MobileUseContext
8
10
  from minitap.mobile_use.graph.state import State
9
11
  from minitap.mobile_use.services.llm import get_llm
12
+ from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
10
13
  from minitap.mobile_use.utils.decorators import wrap_with_callbacks
11
14
  from minitap.mobile_use.utils.logger import get_logger
12
15
 
@@ -35,6 +38,7 @@ class PlannerNode:
35
38
  initial_goal=state.initial_goal,
36
39
  previous_plan="\n".join(str(s) for s in state.subgoal_plan),
37
40
  agent_thoughts="\n".join(state.agents_thoughts),
41
+ executor_tools_list=format_tools_list(self.ctx, EXECUTOR_WRAPPERS_TOOLS),
38
42
  )
39
43
  messages = [
40
44
  SystemMessage(content=system_message),
@@ -47,7 +51,8 @@ class PlannerNode:
47
51
 
48
52
  subgoals_plan = [
49
53
  Subgoal(
50
- description=subgoal,
54
+ id=subgoal.id or str(uuid.uuid4()),
55
+ description=subgoal.description,
51
56
  status=SubgoalStatus.NOT_STARTED,
52
57
  completion_reason=None,
53
58
  )
@@ -61,4 +66,5 @@ class PlannerNode:
61
66
  update={
62
67
  "subgoal_plan": subgoals_plan,
63
68
  },
69
+ agent="planner",
64
70
  )
@@ -1,12 +1,16 @@
1
1
  from enum import Enum
2
- from typing import Optional
3
2
 
4
3
  from pydantic import BaseModel
5
- from typing_extensions import Annotated
4
+ from typing import Annotated
5
+
6
+
7
+ class PlannerSubgoalOutput(BaseModel):
8
+ id: Annotated[str | None, "If not provided, it will be generated"] = None
9
+ description: str
6
10
 
7
11
 
8
12
  class PlannerOutput(BaseModel):
9
- subgoals: list[str]
13
+ subgoals: list[PlannerSubgoalOutput]
10
14
 
11
15
 
12
16
  class SubgoalStatus(Enum):
@@ -17,9 +21,10 @@ class SubgoalStatus(Enum):
17
21
 
18
22
 
19
23
  class Subgoal(BaseModel):
24
+ id: Annotated[str, "Unique identifier of the subgoal"]
20
25
  description: Annotated[str, "Description of the subgoal"]
21
26
  completion_reason: Annotated[
22
- Optional[str], "Reason why the subgoal was completed (failure or success)"
27
+ str | None, "Reason why the subgoal was completed (failure or success)"
23
28
  ] = None
24
29
  status: SubgoalStatus
25
30
 
@@ -35,7 +40,7 @@ class Subgoal(BaseModel):
35
40
  case SubgoalStatus.NOT_STARTED:
36
41
  status_emoji = "(not started yet)"
37
42
 
38
- output = f"- {self.description} : {status_emoji}."
43
+ output = f"- [ID:{self.id}]: {self.description} : {status_emoji}."
39
44
  if self.completion_reason:
40
45
  output += f" Completion reason: {self.completion_reason}"
41
46
  return output
@@ -5,6 +5,10 @@ def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
5
5
  return next((s for s in subgoals if s.status == SubgoalStatus.PENDING), None)
6
6
 
7
7
 
8
+ def get_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
9
+ return [s for s in subgoals if s.id in ids]
10
+
11
+
8
12
  def get_next_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
9
13
  return next((s for s in subgoals if s.status == SubgoalStatus.NOT_STARTED), None)
10
14
 
@@ -21,6 +25,13 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
21
25
  return subgoals
22
26
 
23
27
 
28
+ def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
29
+ for subgoal in subgoals:
30
+ if subgoal.id in ids:
31
+ subgoal.status = SubgoalStatus.SUCCESS
32
+ return subgoals
33
+
34
+
24
35
  def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
25
36
  current_subgoal = get_current_subgoal(subgoals)
26
37
  if not current_subgoal:
@@ -3,6 +3,7 @@ from langchain_core.messages import (
3
3
  RemoveMessage,
4
4
  ToolMessage,
5
5
  )
6
+
6
7
  from minitap.mobile_use.constants import MAX_MESSAGES_IN_HISTORY
7
8
  from minitap.mobile_use.context import MobileUseContext
8
9
  from minitap.mobile_use.graph.state import State
@@ -22,7 +23,7 @@ class SummarizerNode:
22
23
  start_removal = False
23
24
 
24
25
  for msg in reversed(state.messages[:nb_removal_candidates]):
25
- if isinstance(msg, (ToolMessage, HumanMessage)):
26
+ if isinstance(msg, ToolMessage | HumanMessage):
26
27
  start_removal = True
27
28
  if start_removal and msg.id:
28
29
  remove_messages.append(RemoveMessage(id=msg.id))
@@ -12,6 +12,9 @@ class DeviceHardwareClient:
12
12
  url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
13
13
  return self.session.get(url, **kwargs)
14
14
 
15
+ def get_rich_hierarchy(self) -> list[dict]:
16
+ return self.get("last-view-hierarchy").json().get("children", [])
17
+
15
18
  def post(self, path: str, **kwargs):
16
19
  url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
17
20
  return self.session.post(url, **kwargs)
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import os
3
3
  from pathlib import Path
4
- from typing import Annotated, Any, Literal, Optional, Union
4
+ from typing import Annotated, Any, Literal
5
5
 
6
6
  from dotenv import load_dotenv
7
7
  from pydantic import BaseModel, Field, SecretStr, ValidationError, model_validator
@@ -17,15 +17,17 @@ logger = get_logger(__name__)
17
17
 
18
18
 
19
19
  class Settings(BaseSettings):
20
- OPENAI_API_KEY: Optional[SecretStr] = None
21
- GOOGLE_API_KEY: Optional[SecretStr] = None
22
- XAI_API_KEY: Optional[SecretStr] = None
23
- OPEN_ROUTER_API_KEY: Optional[SecretStr] = None
20
+ OPENAI_API_KEY: SecretStr | None = None
21
+ GOOGLE_API_KEY: SecretStr | None = None
22
+ XAI_API_KEY: SecretStr | None = None
23
+ OPEN_ROUTER_API_KEY: SecretStr | None = None
24
24
 
25
- DEVICE_SCREEN_API_BASE_URL: Optional[str] = None
26
- DEVICE_HARDWARE_BRIDGE_BASE_URL: Optional[str] = None
27
- ADB_HOST: Optional[str] = None
28
- ADB_PORT: Optional[int] = None
25
+ OPENAI_BASE_URL: str | None = None
26
+
27
+ DEVICE_SCREEN_API_BASE_URL: str | None = None
28
+ DEVICE_HARDWARE_BRIDGE_BASE_URL: str | None = None
29
+ ADB_HOST: str | None = None
30
+ ADB_PORT: int | None = None
29
31
 
30
32
  model_config = {"env_file": ".env", "extra": "ignore"}
31
33
 
@@ -69,7 +71,7 @@ def prepare_output_files() -> tuple[str | None, str | None]:
69
71
  return validated_events_path, validated_results_path
70
72
 
71
73
 
72
- def record_events(output_path: Path | None, events: Union[list[str], BaseModel, Any]):
74
+ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any):
73
75
  if not output_path:
74
76
  return
75
77
 
@@ -168,7 +170,7 @@ def get_default_llm_config() -> LLMConfig:
168
170
  try:
169
171
  if not os.path.exists(ROOT_DIR / DEFAULT_LLM_CONFIG_FILENAME):
170
172
  raise Exception("Default llm config not found")
171
- with open(ROOT_DIR / DEFAULT_LLM_CONFIG_FILENAME, "r") as f:
173
+ with open(ROOT_DIR / DEFAULT_LLM_CONFIG_FILENAME) as f:
172
174
  default_config_dict = load_jsonc(f)
173
175
  return LLMConfig.model_validate(default_config_dict["default"])
174
176
  except Exception as e:
@@ -209,7 +211,7 @@ def parse_llm_config() -> LLMConfig:
209
211
  override_config_dict = {}
210
212
  if os.path.exists(ROOT_DIR / OVERRIDE_LLM_CONFIG_FILENAME):
211
213
  logger.info("Loading custom llm config...")
212
- with open(ROOT_DIR / OVERRIDE_LLM_CONFIG_FILENAME, "r") as f:
214
+ with open(ROOT_DIR / OVERRIDE_LLM_CONFIG_FILENAME) as f:
213
215
  override_config_dict = load_jsonc(f)
214
216
  else:
215
217
  logger.warning("Custom llm config not found, loading default config")
@@ -235,7 +237,7 @@ def initialize_llm_config() -> LLMConfig:
235
237
 
236
238
  class OutputConfig(BaseModel):
237
239
  structured_output: Annotated[
238
- Optional[Union[type[BaseModel], dict]],
240
+ type[BaseModel] | dict | None,
239
241
  Field(
240
242
  default=None,
241
243
  description=(
@@ -245,7 +247,7 @@ class OutputConfig(BaseModel):
245
247
  ),
246
248
  ]
247
249
  output_description: Annotated[
248
- Optional[str],
250
+ str | None,
249
251
  Field(
250
252
  default=None,
251
253
  description=(
@@ -1,2 +1,3 @@
1
1
  RECURSION_LIMIT = 400
2
2
  MAX_MESSAGES_IN_HISTORY = 25
3
+ EXECUTOR_MESSAGES_KEY = "executor_messages"
@@ -6,12 +6,11 @@ Uses ContextVar to avoid prop drilling and maintain clean function signatures.
6
6
 
7
7
  from enum import Enum
8
8
  from pathlib import Path
9
- from typing import Optional
10
9
 
11
10
  from adbutils import AdbClient
12
11
  from openai import BaseModel
13
12
  from pydantic import ConfigDict
14
- from typing_extensions import Literal
13
+ from typing import Literal
15
14
 
16
15
  from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
17
16
  from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
@@ -56,8 +55,8 @@ class MobileUseContext(BaseModel):
56
55
  hw_bridge_client: DeviceHardwareClient
57
56
  screen_api_client: ScreenApiClient
58
57
  llm_config: LLMConfig
59
- adb_client: Optional[AdbClient] = None
60
- execution_setup: Optional[ExecutionSetup] = None
58
+ adb_client: AdbClient | None = None
59
+ execution_setup: ExecutionSetup | None = None
61
60
 
62
61
  def get_adb_client(self) -> AdbClient:
63
62
  if self.adb_client is None:
@@ -1,6 +1,6 @@
1
1
  import uuid
2
2
  from enum import Enum
3
- from typing import Annotated, Literal, Optional, Union
3
+ from typing import Annotated, Literal
4
4
 
5
5
  import yaml
6
6
  from langgraph.types import Command
@@ -9,6 +9,7 @@ from requests import JSONDecodeError
9
9
 
10
10
  from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
11
11
  from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
12
+ from minitap.mobile_use.config import initialize_llm_config
12
13
  from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
13
14
  from minitap.mobile_use.utils.errors import ControllerErrors
14
15
  from minitap.mobile_use.utils.logger import get_logger
@@ -42,7 +43,7 @@ class RunFlowRequest(BaseModel):
42
43
  dry_run: bool = Field(default=False, alias="dryRun")
43
44
 
44
45
 
45
- def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) -> Optional[dict]:
46
+ def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) -> dict | None:
46
47
  """
47
48
  Run a flow i.e, a sequence of commands.
48
49
  Returns None on success, or the response body of the failed command.
@@ -136,20 +137,20 @@ class SelectorRequestWithPercentages(BaseModel):
136
137
  return {"point": self.percentages.to_str()}
137
138
 
138
139
 
139
- SelectorRequest = Union[
140
- IdSelectorRequest,
141
- SelectorRequestWithCoordinates,
142
- SelectorRequestWithPercentages,
143
- TextSelectorRequest,
144
- IdWithTextSelectorRequest,
145
- ]
140
+ SelectorRequest = (
141
+ IdSelectorRequest
142
+ | SelectorRequestWithCoordinates
143
+ | SelectorRequestWithPercentages
144
+ | TextSelectorRequest
145
+ | IdWithTextSelectorRequest
146
+ )
146
147
 
147
148
 
148
149
  def tap(
149
150
  ctx: MobileUseContext,
150
151
  selector_request: SelectorRequest,
151
152
  dry_run: bool = False,
152
- index: Optional[int] = None,
153
+ index: int | None = None,
153
154
  ):
154
155
  """
155
156
  Tap on a selector.
@@ -170,7 +171,7 @@ def long_press_on(
170
171
  ctx: MobileUseContext,
171
172
  selector_request: SelectorRequest,
172
173
  dry_run: bool = False,
173
- index: Optional[int] = None,
174
+ index: int | None = None,
174
175
  ):
175
176
  long_press_on_body = selector_request.to_dict()
176
177
  if not long_press_on_body:
@@ -210,7 +211,7 @@ SwipeDirection = Annotated[
210
211
  class SwipeRequest(BaseModel):
211
212
  model_config = ConfigDict(extra="forbid")
212
213
  swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest | SwipeDirection
213
- duration: Optional[int] = None # in ms, default is 400ms
214
+ duration: int | None = None # in ms, default is 400ms
214
215
 
215
216
  def to_dict(self):
216
217
  res = {}
@@ -256,7 +257,7 @@ def paste_text(ctx: MobileUseContext, dry_run: bool = False):
256
257
  return run_flow(ctx, ["pasteText"], dry_run=dry_run)
257
258
 
258
259
 
259
- def erase_text(ctx: MobileUseContext, nb_chars: Optional[int] = None, dry_run: bool = False):
260
+ def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool = False):
260
261
  """
261
262
  Removes characters from the currently selected textfield (if any)
262
263
  Removes 50 characters if nb_chars is not specified.
@@ -274,7 +275,7 @@ def launch_app(ctx: MobileUseContext, package_name: str, dry_run: bool = False):
274
275
  return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
275
276
 
276
277
 
277
- def stop_app(ctx: MobileUseContext, package_name: Optional[str] = None, dry_run: bool = False):
278
+ def stop_app(ctx: MobileUseContext, package_name: str | None = None, dry_run: bool = False):
278
279
  if package_name is None:
279
280
  flow_input = ["stopApp"]
280
281
  else:
@@ -316,7 +317,7 @@ class WaitTimeout(Enum):
316
317
 
317
318
 
318
319
  def wait_for_animation_to_end(
319
- ctx: MobileUseContext, timeout: Optional[WaitTimeout] = None, dry_run: bool = False
320
+ ctx: MobileUseContext, timeout: WaitTimeout | None = None, dry_run: bool = False
320
321
  ):
321
322
  if timeout is None:
322
323
  return run_flow(ctx, ["waitForAnimationToEnd"], dry_run=dry_run)
@@ -331,12 +332,10 @@ def run_flow_with_wait_for_animation_to_end(
331
332
 
332
333
 
333
334
  if __name__ == "__main__":
334
- # long press, erase
335
- # input_text(text="test")
336
- # erase_text()
337
335
  ctx = MobileUseContext(
336
+ llm_config=initialize_llm_config(),
338
337
  device=DeviceContext(
339
- host_platform="LINUX",
338
+ host_platform="WINDOWS",
340
339
  mobile_platform=DevicePlatform.ANDROID,
341
340
  device_id="emulator-5554",
342
341
  device_width=1080,
@@ -347,7 +346,6 @@ if __name__ == "__main__":
347
346
  )
348
347
  screen_data = get_screen_data(ctx.screen_api_client)
349
348
  from minitap.mobile_use.graph.state import State
350
- from minitap.mobile_use.tools.mobile.erase_text import get_erase_text_tool
351
349
 
352
350
  dummy_state = State(
353
351
  latest_ui_hierarchy=screen_data.elements,
@@ -358,20 +356,33 @@ if __name__ == "__main__":
358
356
  focused_app_info=None,
359
357
  device_date="",
360
358
  structured_decisions=None,
361
- executor_retrigger=False,
362
- executor_failed=False,
359
+ complete_subgoals_by_ids=[],
363
360
  executor_messages=[],
364
361
  cortex_last_thought="",
365
362
  agents_thoughts=[],
366
363
  )
367
364
 
368
- # invoke erase_text tool
369
- input_resource_id = "com.google.android.settings.intelligence:id/open_search_view_edit_text"
370
- command_output: Command = get_erase_text_tool(ctx=ctx).invoke(
365
+ # from minitap.mobile_use.tools.mobile.input_text import get_input_text_tool
366
+
367
+ # input_resource_id = "com.google.android.apps.nexuslauncher:id/search_container_hotseat"
368
+ # command_output: Command = get_input_text_tool(ctx=ctx).invoke(
369
+ # {
370
+ # "tool_call_id": uuid.uuid4().hex,
371
+ # "agent_thought": "",
372
+ # "text_input_resource_id": input_resource_id,
373
+ # "text": "Hello World",
374
+ # "state": dummy_state,
375
+ # "executor_metadata": None,
376
+ # }
377
+ # )
378
+ from minitap.mobile_use.tools.mobile.clear_text import get_clear_text_tool
379
+
380
+ input_resource_id = "com.google.android.apps.nexuslauncher:id/input"
381
+ command_output: Command = get_clear_text_tool(ctx=ctx).invoke(
371
382
  {
372
383
  "tool_call_id": uuid.uuid4().hex,
373
384
  "agent_thought": "",
374
- "input_text_resource_id": input_resource_id,
385
+ "text_input_resource_id": input_resource_id,
375
386
  "state": dummy_state,
376
387
  "executor_metadata": None,
377
388
  }
@@ -1,6 +1,5 @@
1
1
  from datetime import date
2
2
  import json
3
- from typing import Optional
4
3
 
5
4
  from adbutils import AdbDevice
6
5
  from minitap.mobile_use.utils.logger import MobileUseLogger
@@ -20,8 +19,8 @@ def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
20
19
 
21
20
 
22
21
  def get_first_device(
23
- logger: Optional[MobileUseLogger] = None,
24
- ) -> tuple[Optional[str], Optional[DevicePlatform]]:
22
+ logger: MobileUseLogger | None = None,
23
+ ) -> tuple[str | None, DevicePlatform | None]:
25
24
  """Gets the first available device."""
26
25
  try:
27
26
  android_output = run_shell_command_on_host("adb devices")
@@ -50,7 +49,7 @@ def get_first_device(
50
49
  return None, None
51
50
 
52
51
 
53
- def get_focused_app_info(ctx: MobileUseContext) -> Optional[str]:
52
+ def get_focused_app_info(ctx: MobileUseContext) -> str | None:
54
53
  if ctx.device.mobile_platform == DevicePlatform.IOS:
55
54
  return None
56
55
  device = get_adb_device(ctx)
@@ -6,13 +6,11 @@ from langchain_core.messages import (
6
6
  from langgraph.constants import END, START
7
7
  from langgraph.graph import StateGraph
8
8
  from langgraph.graph.state import CompiledStateGraph
9
- from langgraph.prebuilt import ToolNode
9
+
10
10
  from minitap.mobile_use.agents.contextor.contextor import ContextorNode
11
11
  from minitap.mobile_use.agents.cortex.cortex import CortexNode
12
12
  from minitap.mobile_use.agents.executor.executor import ExecutorNode
13
- from minitap.mobile_use.agents.executor.executor_context_cleaner import (
14
- executor_context_cleaner_node,
15
- )
13
+ from minitap.mobile_use.agents.executor.tool_node import ExecutorToolNode
16
14
  from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
17
15
  from minitap.mobile_use.agents.planner.planner import PlannerNode
18
16
  from minitap.mobile_use.agents.planner.utils import (
@@ -21,6 +19,7 @@ from minitap.mobile_use.agents.planner.utils import (
21
19
  one_of_them_is_failure,
22
20
  )
23
21
  from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
22
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
24
23
  from minitap.mobile_use.context import MobileUseContext
25
24
  from minitap.mobile_use.graph.state import State
26
25
  from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
@@ -53,7 +52,7 @@ def post_cortex_gate(
53
52
  state: State,
54
53
  ) -> Literal["continue", "end_subgoal"]:
55
54
  logger.info("Starting post_cortex_gate")
56
- if not state.structured_decisions:
55
+ if len(state.complete_subgoals_by_ids) > 0:
57
56
  return "end_subgoal"
58
57
  return "continue"
59
58
 
@@ -62,7 +61,7 @@ def post_executor_gate(
62
61
  state: State,
63
62
  ) -> Literal["invoke_tools", "skip"]:
64
63
  logger.info("Starting post_executor_gate")
65
- messages = state.messages
64
+ messages = state.executor_messages
66
65
  if not messages:
67
66
  return "skip"
68
67
  last_message = messages[-1]
@@ -77,17 +76,6 @@ def post_executor_gate(
77
76
  return "skip"
78
77
 
79
78
 
80
- def post_executor_tools_gate(
81
- state: State,
82
- ) -> Literal["continue", "failed", "done"]:
83
- logger.info("Starting post_executor_tools_gate")
84
- if state.executor_failed:
85
- return "failed"
86
- if state.executor_retrigger:
87
- return "continue"
88
- return "done"
89
-
90
-
91
79
  async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
92
80
  graph_builder = StateGraph(State)
93
81
 
@@ -100,12 +88,12 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
100
88
  graph_builder.add_node("cortex", CortexNode(ctx))
101
89
 
102
90
  graph_builder.add_node("executor", ExecutorNode(ctx))
103
- executor_tool_node = ToolNode(
104
- get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS)
91
+ executor_tool_node = ExecutorToolNode(
92
+ tools=get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS),
93
+ messages_key=EXECUTOR_MESSAGES_KEY,
105
94
  )
106
95
  graph_builder.add_node("executor_tools", executor_tool_node)
107
96
 
108
- graph_builder.add_node("executor_context_cleaner", executor_context_cleaner_node)
109
97
  graph_builder.add_node("summarizer", SummarizerNode(ctx))
110
98
 
111
99
  # Linking nodes
@@ -132,18 +120,9 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
132
120
  graph_builder.add_conditional_edges(
133
121
  "executor",
134
122
  post_executor_gate,
135
- {"invoke_tools": "executor_tools", "skip": "executor_context_cleaner"},
136
- )
137
- graph_builder.add_conditional_edges(
138
- "executor_tools",
139
- post_executor_tools_gate,
140
- {
141
- "continue": "executor",
142
- "done": "executor_context_cleaner",
143
- "failed": "executor_context_cleaner",
144
- },
123
+ {"invoke_tools": "executor_tools", "skip": "summarizer"},
145
124
  )
146
- graph_builder.add_edge("executor_context_cleaner", "summarizer")
125
+ graph_builder.add_edge("executor_tools", "summarizer")
147
126
  graph_builder.add_edge("summarizer", "contextor")
148
127
 
149
128
  return graph_builder.compile()