minitap-mobile-use 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (55) hide show
  1. minitap/mobile_use/agents/cortex/cortex.md +17 -10
  2. minitap/mobile_use/agents/cortex/cortex.py +12 -2
  3. minitap/mobile_use/agents/cortex/types.py +2 -2
  4. minitap/mobile_use/agents/executor/executor.md +16 -10
  5. minitap/mobile_use/agents/executor/executor.py +6 -18
  6. minitap/mobile_use/agents/executor/tool_node.py +105 -0
  7. minitap/mobile_use/agents/hopper/hopper.md +2 -10
  8. minitap/mobile_use/agents/hopper/hopper.py +4 -9
  9. minitap/mobile_use/agents/orchestrator/human.md +3 -4
  10. minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
  11. minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
  12. minitap/mobile_use/agents/orchestrator/types.py +5 -8
  13. minitap/mobile_use/agents/planner/planner.md +14 -13
  14. minitap/mobile_use/agents/planner/planner.py +4 -1
  15. minitap/mobile_use/agents/planner/types.py +8 -2
  16. minitap/mobile_use/agents/planner/utils.py +11 -0
  17. minitap/mobile_use/clients/device_hardware_client.py +3 -0
  18. minitap/mobile_use/config.py +2 -0
  19. minitap/mobile_use/constants.py +1 -0
  20. minitap/mobile_use/controllers/mobile_command_controller.py +10 -11
  21. minitap/mobile_use/graph/graph.py +9 -31
  22. minitap/mobile_use/graph/state.py +26 -6
  23. minitap/mobile_use/main.py +6 -2
  24. minitap/mobile_use/sdk/agent.py +54 -39
  25. minitap/mobile_use/sdk/builders/agent_config_builder.py +17 -4
  26. minitap/mobile_use/sdk/types/agent.py +5 -0
  27. minitap/mobile_use/servers/stop_servers.py +10 -15
  28. minitap/mobile_use/services/llm.py +1 -0
  29. minitap/mobile_use/tools/index.py +2 -4
  30. minitap/mobile_use/tools/mobile/back.py +7 -11
  31. minitap/mobile_use/tools/mobile/copy_text_from.py +7 -11
  32. minitap/mobile_use/tools/mobile/erase_text.py +7 -9
  33. minitap/mobile_use/tools/mobile/find_packages.py +69 -0
  34. minitap/mobile_use/tools/mobile/input_text.py +131 -32
  35. minitap/mobile_use/tools/mobile/launch_app.py +7 -11
  36. minitap/mobile_use/tools/mobile/long_press_on.py +7 -9
  37. minitap/mobile_use/tools/mobile/open_link.py +7 -11
  38. minitap/mobile_use/tools/mobile/paste_text.py +7 -11
  39. minitap/mobile_use/tools/mobile/press_key.py +7 -11
  40. minitap/mobile_use/tools/mobile/stop_app.py +7 -9
  41. minitap/mobile_use/tools/mobile/swipe.py +7 -11
  42. minitap/mobile_use/tools/mobile/take_screenshot.py +7 -11
  43. minitap/mobile_use/tools/mobile/tap.py +7 -9
  44. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +7 -9
  45. minitap/mobile_use/tools/tool_wrapper.py +1 -23
  46. minitap/mobile_use/utils/recorder.py +11 -10
  47. minitap/mobile_use/utils/ui_hierarchy.py +88 -1
  48. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.0.1.dist-info}/METADATA +2 -2
  49. minitap_mobile_use-2.0.1.dist-info/RECORD +94 -0
  50. minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
  51. minitap/mobile_use/tools/mobile/list_packages.py +0 -78
  52. minitap/mobile_use/tools/mobile/run_flow.py +0 -57
  53. minitap_mobile_use-2.0.0.dist-info/RECORD +0 -95
  54. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.0.1.dist-info}/WHEEL +0 -0
  55. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -25,7 +25,7 @@ You work like an agile tech lead: defining the key milestones without locking in
25
25
 
26
26
  ### Output
27
27
 
28
- You must output a **list of strings**, each representing a clear subgoal.
28
+ You must output a **list of subgoals (description + optional subgoal ID)**, each representing a clear subgoal.
29
29
  Each subgoal should be:
30
30
 
31
31
  - Focused on **realistic mobile interactions**
@@ -33,21 +33,23 @@ Each subgoal should be:
33
33
  - Sequential (later steps may depend on earlier ones)
34
34
  - Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
35
35
 
36
+ If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
37
+
36
38
  ### Examples
37
39
 
38
40
  #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
39
41
 
40
42
  **Plan**:
41
43
 
42
- - Open the WhatsApp app
43
- - Locate or search for Alice
44
- - Open the conversation with Alice
45
- - Type the message "I’m running late"
46
- - Send the message
44
+ - Open the WhatsApp app (ID: None -> will be generated as a UUID like bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
45
+ - Locate or search for Alice (ID: None)
46
+ - Open the conversation with Alice (ID: None)
47
+ - Type the message "I’m running late" (ID: None)
48
+ - Send the message (ID: None)
47
49
 
48
50
  #### **Replanning Example**
49
51
 
50
- **Original Plan**: same as above
52
+ **Original Plan**: same as above with IDs set
51
53
  **Agent Thoughts**:
52
54
 
53
55
  - Couldn’t find Alice in recent chats
@@ -56,9 +58,8 @@ Each subgoal should be:
56
58
 
57
59
  **New Plan**:
58
60
 
59
- - Unlock the phone if needed
60
- - Open WhatsApp
61
- - Tap the search bar
62
- - Search for "Alice"
63
- - Select the correct chat
64
- - Type and send "I’m running late"
61
+ - Open WhatsApp (ID: bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
62
+ - Tap the search bar (ID: None)
63
+ - Search for "Alice" (ID: None)
64
+ - Select the correct chat (ID: None)
65
+ - Type and send "I’m running late" (ID: None)
@@ -1,4 +1,5 @@
1
1
  from pathlib import Path
2
+ import uuid
2
3
 
3
4
  from jinja2 import Template
4
5
  from langchain_core.messages import HumanMessage, SystemMessage
@@ -47,7 +48,8 @@ class PlannerNode:
47
48
 
48
49
  subgoals_plan = [
49
50
  Subgoal(
50
- description=subgoal,
51
+ id=subgoal.id or str(uuid.uuid4()),
52
+ description=subgoal.description,
51
53
  status=SubgoalStatus.NOT_STARTED,
52
54
  completion_reason=None,
53
55
  )
@@ -61,4 +63,5 @@ class PlannerNode:
61
63
  update={
62
64
  "subgoal_plan": subgoals_plan,
63
65
  },
66
+ agent="planner",
64
67
  )
@@ -5,8 +5,13 @@ from pydantic import BaseModel
5
5
  from typing_extensions import Annotated
6
6
 
7
7
 
8
+ class PlannerSubgoalOutput(BaseModel):
9
+ id: Annotated[Optional[str], "If not provided, it will be generated"] = None
10
+ description: str
11
+
12
+
8
13
  class PlannerOutput(BaseModel):
9
- subgoals: list[str]
14
+ subgoals: list[PlannerSubgoalOutput]
10
15
 
11
16
 
12
17
  class SubgoalStatus(Enum):
@@ -17,6 +22,7 @@ class SubgoalStatus(Enum):
17
22
 
18
23
 
19
24
  class Subgoal(BaseModel):
25
+ id: Annotated[str, "Unique identifier of the subgoal"]
20
26
  description: Annotated[str, "Description of the subgoal"]
21
27
  completion_reason: Annotated[
22
28
  Optional[str], "Reason why the subgoal was completed (failure or success)"
@@ -35,7 +41,7 @@ class Subgoal(BaseModel):
35
41
  case SubgoalStatus.NOT_STARTED:
36
42
  status_emoji = "(not started yet)"
37
43
 
38
- output = f"- {self.description} : {status_emoji}."
44
+ output = f"- [ID:{self.id}]: {self.description} : {status_emoji}."
39
45
  if self.completion_reason:
40
46
  output += f" Completion reason: {self.completion_reason}"
41
47
  return output
@@ -5,6 +5,10 @@ def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
5
5
  return next((s for s in subgoals if s.status == SubgoalStatus.PENDING), None)
6
6
 
7
7
 
8
+ def get_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
9
+ return [s for s in subgoals if s.id in ids]
10
+
11
+
8
12
  def get_next_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
9
13
  return next((s for s in subgoals if s.status == SubgoalStatus.NOT_STARTED), None)
10
14
 
@@ -21,6 +25,13 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
21
25
  return subgoals
22
26
 
23
27
 
28
+ def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
29
+ for subgoal in subgoals:
30
+ if subgoal.id in ids:
31
+ subgoal.status = SubgoalStatus.SUCCESS
32
+ return subgoals
33
+
34
+
24
35
  def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
25
36
  current_subgoal = get_current_subgoal(subgoals)
26
37
  if not current_subgoal:
@@ -12,6 +12,9 @@ class DeviceHardwareClient:
12
12
  url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
13
13
  return self.session.get(url, **kwargs)
14
14
 
15
+ def get_rich_hierarchy(self) -> list[dict]:
16
+ return self.get("last-view-hierarchy").json().get("children", [])
17
+
15
18
  def post(self, path: str, **kwargs):
16
19
  url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
17
20
  return self.session.post(url, **kwargs)
@@ -22,6 +22,8 @@ class Settings(BaseSettings):
22
22
  XAI_API_KEY: Optional[SecretStr] = None
23
23
  OPEN_ROUTER_API_KEY: Optional[SecretStr] = None
24
24
 
25
+ OPENAI_BASE_URL: Optional[str] = None
26
+
25
27
  DEVICE_SCREEN_API_BASE_URL: Optional[str] = None
26
28
  DEVICE_HARDWARE_BRIDGE_BASE_URL: Optional[str] = None
27
29
  ADB_HOST: Optional[str] = None
@@ -1,2 +1,3 @@
1
1
  RECURSION_LIMIT = 400
2
2
  MAX_MESSAGES_IN_HISTORY = 25
3
+ EXECUTOR_MESSAGES_KEY = "executor_messages"
@@ -9,6 +9,7 @@ from requests import JSONDecodeError
9
9
 
10
10
  from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
11
11
  from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
12
+ from minitap.mobile_use.config import initialize_llm_config
12
13
  from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
13
14
  from minitap.mobile_use.utils.errors import ControllerErrors
14
15
  from minitap.mobile_use.utils.logger import get_logger
@@ -331,12 +332,10 @@ def run_flow_with_wait_for_animation_to_end(
331
332
 
332
333
 
333
334
  if __name__ == "__main__":
334
- # long press, erase
335
- # input_text(text="test")
336
- # erase_text()
337
335
  ctx = MobileUseContext(
336
+ llm_config=initialize_llm_config(),
338
337
  device=DeviceContext(
339
- host_platform="LINUX",
338
+ host_platform="WINDOWS",
340
339
  mobile_platform=DevicePlatform.ANDROID,
341
340
  device_id="emulator-5554",
342
341
  device_width=1080,
@@ -347,7 +346,6 @@ if __name__ == "__main__":
347
346
  )
348
347
  screen_data = get_screen_data(ctx.screen_api_client)
349
348
  from minitap.mobile_use.graph.state import State
350
- from minitap.mobile_use.tools.mobile.erase_text import get_erase_text_tool
351
349
 
352
350
  dummy_state = State(
353
351
  latest_ui_hierarchy=screen_data.elements,
@@ -358,20 +356,21 @@ if __name__ == "__main__":
358
356
  focused_app_info=None,
359
357
  device_date="",
360
358
  structured_decisions=None,
361
- executor_retrigger=False,
362
- executor_failed=False,
359
+ complete_subgoals_by_ids=[],
363
360
  executor_messages=[],
364
361
  cortex_last_thought="",
365
362
  agents_thoughts=[],
366
363
  )
367
364
 
368
- # invoke erase_text tool
369
- input_resource_id = "com.google.android.settings.intelligence:id/open_search_view_edit_text"
370
- command_output: Command = get_erase_text_tool(ctx=ctx).invoke(
365
+ from minitap.mobile_use.tools.mobile.input_text import get_input_text_tool
366
+
367
+ input_resource_id = "com.google.android.apps.nexuslauncher:id/search_container_hotseat"
368
+ command_output: Command = get_input_text_tool(ctx=ctx).invoke(
371
369
  {
372
370
  "tool_call_id": uuid.uuid4().hex,
373
371
  "agent_thought": "",
374
- "input_text_resource_id": input_resource_id,
372
+ "text_input_resource_id": input_resource_id,
373
+ "text": "Hello World",
375
374
  "state": dummy_state,
376
375
  "executor_metadata": None,
377
376
  }
@@ -6,13 +6,10 @@ from langchain_core.messages import (
6
6
  from langgraph.constants import END, START
7
7
  from langgraph.graph import StateGraph
8
8
  from langgraph.graph.state import CompiledStateGraph
9
- from langgraph.prebuilt import ToolNode
10
9
  from minitap.mobile_use.agents.contextor.contextor import ContextorNode
11
10
  from minitap.mobile_use.agents.cortex.cortex import CortexNode
12
11
  from minitap.mobile_use.agents.executor.executor import ExecutorNode
13
- from minitap.mobile_use.agents.executor.executor_context_cleaner import (
14
- executor_context_cleaner_node,
15
- )
12
+ from minitap.mobile_use.agents.executor.tool_node import ExecutorToolNode
16
13
  from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
17
14
  from minitap.mobile_use.agents.planner.planner import PlannerNode
18
15
  from minitap.mobile_use.agents.planner.utils import (
@@ -21,6 +18,7 @@ from minitap.mobile_use.agents.planner.utils import (
21
18
  one_of_them_is_failure,
22
19
  )
23
20
  from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
21
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
24
22
  from minitap.mobile_use.context import MobileUseContext
25
23
  from minitap.mobile_use.graph.state import State
26
24
  from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
@@ -53,7 +51,7 @@ def post_cortex_gate(
53
51
  state: State,
54
52
  ) -> Literal["continue", "end_subgoal"]:
55
53
  logger.info("Starting post_cortex_gate")
56
- if not state.structured_decisions:
54
+ if len(state.complete_subgoals_by_ids) > 0:
57
55
  return "end_subgoal"
58
56
  return "continue"
59
57
 
@@ -62,7 +60,7 @@ def post_executor_gate(
62
60
  state: State,
63
61
  ) -> Literal["invoke_tools", "skip"]:
64
62
  logger.info("Starting post_executor_gate")
65
- messages = state.messages
63
+ messages = state.executor_messages
66
64
  if not messages:
67
65
  return "skip"
68
66
  last_message = messages[-1]
@@ -77,17 +75,6 @@ def post_executor_gate(
77
75
  return "skip"
78
76
 
79
77
 
80
- def post_executor_tools_gate(
81
- state: State,
82
- ) -> Literal["continue", "failed", "done"]:
83
- logger.info("Starting post_executor_tools_gate")
84
- if state.executor_failed:
85
- return "failed"
86
- if state.executor_retrigger:
87
- return "continue"
88
- return "done"
89
-
90
-
91
78
  async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
92
79
  graph_builder = StateGraph(State)
93
80
 
@@ -100,12 +87,12 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
100
87
  graph_builder.add_node("cortex", CortexNode(ctx))
101
88
 
102
89
  graph_builder.add_node("executor", ExecutorNode(ctx))
103
- executor_tool_node = ToolNode(
104
- get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS)
90
+ executor_tool_node = ExecutorToolNode(
91
+ tools=get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS),
92
+ messages_key=EXECUTOR_MESSAGES_KEY,
105
93
  )
106
94
  graph_builder.add_node("executor_tools", executor_tool_node)
107
95
 
108
- graph_builder.add_node("executor_context_cleaner", executor_context_cleaner_node)
109
96
  graph_builder.add_node("summarizer", SummarizerNode(ctx))
110
97
 
111
98
  # Linking nodes
@@ -132,18 +119,9 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
132
119
  graph_builder.add_conditional_edges(
133
120
  "executor",
134
121
  post_executor_gate,
135
- {"invoke_tools": "executor_tools", "skip": "executor_context_cleaner"},
136
- )
137
- graph_builder.add_conditional_edges(
138
- "executor_tools",
139
- post_executor_tools_gate,
140
- {
141
- "continue": "executor",
142
- "done": "executor_context_cleaner",
143
- "failed": "executor_context_cleaner",
144
- },
122
+ {"invoke_tools": "executor_tools", "skip": "summarizer"},
145
123
  )
146
- graph_builder.add_edge("executor_context_cleaner", "summarizer")
124
+ graph_builder.add_edge("executor_tools", "summarizer")
147
125
  graph_builder.add_edge("summarizer", "contextor")
148
126
 
149
127
  return graph_builder.compile()
@@ -4,6 +4,7 @@ from langgraph.prebuilt.chat_agent_executor import AgentStatePydantic
4
4
  from typing_extensions import Annotated, Optional
5
5
 
6
6
  from minitap.mobile_use.agents.planner.types import Subgoal
7
+ from minitap.mobile_use.config import AgentNode
7
8
  from minitap.mobile_use.utils.logger import get_logger
8
9
  from minitap.mobile_use.utils.recorder import record_interaction
9
10
  from minitap.mobile_use.context import MobileUseContext
@@ -36,10 +37,13 @@ class State(AgentStatePydantic):
36
37
  "Structured decisions made by the cortex, for the executor to follow",
37
38
  take_last,
38
39
  ]
40
+ complete_subgoals_by_ids: Annotated[
41
+ list[str],
42
+ "List of subgoal IDs to complete",
43
+ take_last,
44
+ ]
39
45
 
40
46
  # executor related keys
41
- executor_retrigger: Annotated[Optional[bool], "Whether the executor must be retriggered"]
42
- executor_failed: Annotated[bool, "Whether a tool call made by the executor failed"]
43
47
  executor_messages: Annotated[list[AnyMessage], "Sequential Executor messages", add_messages]
44
48
  cortex_last_thought: Annotated[Optional[str], "Last thought of the cortex for the executor"]
45
49
 
@@ -47,11 +51,18 @@ class State(AgentStatePydantic):
47
51
  agents_thoughts: Annotated[
48
52
  list[str],
49
53
  "All thoughts and reasons that led to actions (why a tool was called, expected outcomes..)",
54
+ take_last,
50
55
  ]
51
56
 
52
- def sanitize_update(self, ctx: MobileUseContext, update: dict):
57
+ def sanitize_update(
58
+ self,
59
+ ctx: MobileUseContext,
60
+ update: dict,
61
+ agent: Optional[AgentNode] = None,
62
+ ):
53
63
  """
54
64
  Sanitizes the state update to ensure it is valid and apply side effect logic where required.
65
+ The agent is required if the update contains the "agents_thoughts" key.
55
66
  """
56
67
  updated_agents_thoughts: Optional[str | list[str]] = update.get("agents_thoughts", None)
57
68
  if updated_agents_thoughts is not None:
@@ -59,15 +70,24 @@ class State(AgentStatePydantic):
59
70
  updated_agents_thoughts = [updated_agents_thoughts]
60
71
  elif not isinstance(updated_agents_thoughts, list):
61
72
  raise ValueError("agents_thoughts must be a str or list[str]")
73
+ if agent is None:
74
+ raise ValueError("Agent is required when updating the 'agents_thoughts' key")
62
75
  update["agents_thoughts"] = _add_agent_thoughts(
63
76
  ctx=ctx,
64
77
  old=self.agents_thoughts,
65
78
  new=updated_agents_thoughts,
79
+ agent=agent,
66
80
  )
67
81
  return update
68
82
 
69
83
 
70
- def _add_agent_thoughts(ctx: MobileUseContext, old: list[str], new: list[str]) -> list[str]:
84
+ def _add_agent_thoughts(
85
+ ctx: MobileUseContext,
86
+ old: list[str],
87
+ new: list[str],
88
+ agent: AgentNode,
89
+ ) -> list[str]:
90
+ named_thoughts = [f"[{agent}] {thought}" for thought in new]
71
91
  if ctx.execution_setup:
72
- record_interaction(ctx, response=AIMessage(content=str(new)))
73
- return old + new
92
+ record_interaction(ctx, response=AIMessage(content=str(named_thoughts)))
93
+ return old + named_thoughts
@@ -1,9 +1,10 @@
1
1
  import asyncio
2
2
  import os
3
- from adbutils import AdbClient
4
3
  from typing import Optional
5
4
 
6
5
  import typer
6
+ from adbutils import AdbClient
7
+ from langchain.callbacks.base import Callbacks
7
8
  from rich.console import Console
8
9
  from typing_extensions import Annotated
9
10
 
@@ -26,6 +27,7 @@ async def run_automation(
26
27
  test_name: Optional[str] = None,
27
28
  traces_output_path_str: str = "traces",
28
29
  output_description: Optional[str] = None,
30
+ graph_config_callbacks: Callbacks = [],
29
31
  ):
30
32
  llm_config = initialize_llm_config()
31
33
  agent_profile = AgentProfile(name="default", llm_config=llm_config)
@@ -37,11 +39,13 @@ async def run_automation(
37
39
  config.with_hw_bridge_base_url(url=settings.DEVICE_HARDWARE_BRIDGE_BASE_URL)
38
40
  if settings.DEVICE_SCREEN_API_BASE_URL:
39
41
  config.with_screen_api_base_url(url=settings.DEVICE_SCREEN_API_BASE_URL)
42
+ if graph_config_callbacks:
43
+ config.with_graph_config_callbacks(graph_config_callbacks)
40
44
 
41
45
  agent = Agent(config=config.build())
42
46
  agent.init(
43
47
  retry_count=int(os.getenv("MOBILE_USE_HEALTH_RETRIES", 5)),
44
- retry_wait_seconds=int(os.getenv("MOBILE_USE_HEALTH_DELAY", 5)),
48
+ retry_wait_seconds=int(os.getenv("MOBILE_USE_HEALTH_DELAY", 2)),
45
49
  )
46
50
 
47
51
  task = agent.new_task(goal)
@@ -1,64 +1,63 @@
1
1
  import asyncio
2
- from datetime import datetime
3
- from pathlib import Path
4
2
  import sys
5
3
  import tempfile
6
4
  import time
5
+ import uuid
6
+ from datetime import datetime
7
+ from pathlib import Path
7
8
  from types import NoneType
8
9
  from typing import Optional, TypeVar, overload
9
- import uuid
10
+
10
11
  from adbutils import AdbClient
11
12
  from langchain_core.messages import AIMessage
12
13
  from pydantic import BaseModel
13
- from minitap.mobile_use.agents.outputter.outputter import outputter
14
14
 
15
+ from minitap.mobile_use.agents.outputter.outputter import outputter
16
+ from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
17
+ from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
15
18
  from minitap.mobile_use.config import OutputConfig, record_events
16
- from minitap.mobile_use.graph.graph import get_graph
17
- from minitap.mobile_use.graph.state import State
18
- from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
19
- from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
20
- from minitap.mobile_use.sdk.constants import (
21
- DEFAULT_HW_BRIDGE_BASE_URL,
22
- DEFAULT_SCREEN_API_BASE_URL,
23
- )
24
- from minitap.mobile_use.sdk.types.agent import AgentConfig
25
19
  from minitap.mobile_use.context import (
26
20
  DeviceContext,
27
21
  DevicePlatform,
28
22
  ExecutionSetup,
29
23
  MobileUseContext,
30
24
  )
31
- from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
32
- from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
33
25
  from minitap.mobile_use.controllers.mobile_command_controller import (
34
26
  ScreenDataResponse,
35
27
  get_screen_data,
36
28
  )
37
29
  from minitap.mobile_use.controllers.platform_specific_commands_controller import get_first_device
38
-
39
- from minitap.mobile_use.servers.stop_servers import stop_servers
40
- from minitap.mobile_use.servers.device_hardware_bridge import BridgeStatus
41
- from minitap.mobile_use.servers.start_servers import (
42
- start_device_hardware_bridge,
43
- start_device_screen_api,
30
+ from minitap.mobile_use.graph.graph import get_graph
31
+ from minitap.mobile_use.graph.state import State
32
+ from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
33
+ from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
34
+ from minitap.mobile_use.sdk.constants import (
35
+ DEFAULT_HW_BRIDGE_BASE_URL,
36
+ DEFAULT_SCREEN_API_BASE_URL,
44
37
  )
45
- from minitap.mobile_use.utils.logger import get_logger
38
+ from minitap.mobile_use.sdk.types.agent import AgentConfig
46
39
  from minitap.mobile_use.sdk.types.exceptions import (
40
+ AgentNotInitializedError,
47
41
  AgentProfileNotFoundError,
48
42
  AgentTaskRequestError,
49
43
  DeviceNotFoundError,
50
44
  ServerStartupError,
51
- AgentNotInitializedError,
52
45
  )
53
46
  from minitap.mobile_use.sdk.types.task import AgentProfile, Task, TaskRequest, TaskStatus
47
+ from minitap.mobile_use.servers.device_hardware_bridge import BridgeStatus
48
+ from minitap.mobile_use.servers.start_servers import (
49
+ start_device_hardware_bridge,
50
+ start_device_screen_api,
51
+ )
52
+ from minitap.mobile_use.servers.stop_servers import stop_servers
53
+ from minitap.mobile_use.utils.logger import get_logger
54
54
  from minitap.mobile_use.utils.media import (
55
55
  create_gif_from_trace_folder,
56
56
  create_steps_json_from_trace_folder,
57
57
  remove_images_from_trace_folder,
58
58
  remove_steps_json_from_trace_folder,
59
59
  )
60
- from minitap.mobile_use.utils.recorder import log_agent_thoughts
61
-
60
+ from minitap.mobile_use.utils.recorder import log_agent_thought
62
61
 
63
62
  logger = get_logger(__name__)
64
63
 
@@ -127,7 +126,10 @@ class Agent:
127
126
  f"Server start failed, attempting restart "
128
127
  f"{restart_attempt}/{server_restart_attempts}"
129
128
  )
130
- time.sleep(3)
129
+ stop_servers(
130
+ should_stop_screen_api=self._is_default_screen_api,
131
+ should_stop_hw_bridge=self._is_default_hw_bridge,
132
+ )
131
133
  else:
132
134
  error_msg = "Mobile-use servers failed to start after all restart attempts."
133
135
  logger.error(error_msg)
@@ -261,17 +263,31 @@ class Agent:
261
263
  input=graph_input,
262
264
  config={
263
265
  "recursion_limit": task.request.max_steps,
266
+ "callbacks": self._config.graph_config_callbacks,
264
267
  },
265
- stream_mode=["messages", "custom", "values"],
268
+ stream_mode=["messages", "custom", "updates", "values"],
266
269
  ):
267
- stream_mode, content = chunk
270
+ stream_mode, payload = chunk
268
271
  if stream_mode == "values":
269
- last_state_snapshot = content # type: ignore
272
+ last_state_snapshot = payload # type: ignore
270
273
  last_state = State(**last_state_snapshot) # type: ignore
271
- log_agent_thoughts(
272
- agents_thoughts=last_state.agents_thoughts,
273
- output_path=task.request.thoughts_output_path,
274
- )
274
+ if task.request.thoughts_output_path:
275
+ record_events(
276
+ output_path=task.request.thoughts_output_path,
277
+ events=last_state.agents_thoughts,
278
+ )
279
+
280
+ if stream_mode == "updates":
281
+ for key, value in payload.items(): # type: ignore
282
+ if value and "agents_thoughts" in value:
283
+ new_thoughts = value["agents_thoughts"]
284
+ last_item = new_thoughts[-1] if new_thoughts else None
285
+ if last_item:
286
+ log_agent_thought(
287
+ prefix=key,
288
+ agent_thought=last_item,
289
+ )
290
+
275
291
  if not last_state:
276
292
  err = f"[{task_name}] No result received from graph"
277
293
  logger.warning(err)
@@ -302,12 +318,12 @@ class Agent:
302
318
  self._finalize_tracing(task=task, context=context)
303
319
  return output
304
320
 
305
- def clean(self):
306
- if not self._initialized:
321
+ def clean(self, force: bool = False):
322
+ if not self._initialized and not force:
307
323
  return
308
324
  screen_api_ok, hw_bridge_ok = stop_servers(
309
- device_screen_api=not self._is_default_screen_api,
310
- device_hardware_bridge=not self._is_default_hw_bridge,
325
+ should_stop_screen_api=self._is_default_screen_api,
326
+ should_stop_hw_bridge=self._is_default_hw_bridge,
311
327
  )
312
328
  if not screen_api_ok:
313
329
  logger.warning("Failed to stop Device Screen API.")
@@ -402,10 +418,9 @@ class Agent:
402
418
  focused_app_info=None,
403
419
  device_date=None,
404
420
  structured_decisions=None,
421
+ complete_subgoals_by_ids=[],
405
422
  agents_thoughts=[],
406
423
  remaining_steps=task.request.max_steps,
407
- executor_retrigger=False,
408
- executor_failed=False,
409
424
  executor_messages=[],
410
425
  cortex_last_thought=None,
411
426
  )
@@ -2,19 +2,20 @@
2
2
  Builder for AgentConfig objects using a fluent interface.
3
3
  """
4
4
 
5
- from typing import Dict, Optional, List
6
5
  import copy
6
+ from typing import Dict, List, Optional
7
+
8
+ from langchain_core.callbacks.base import Callbacks
7
9
 
8
10
  from minitap.mobile_use.config import get_default_llm_config
11
+ from minitap.mobile_use.context import DevicePlatform
9
12
  from minitap.mobile_use.sdk.constants import (
10
13
  DEFAULT_HW_BRIDGE_BASE_URL,
11
14
  DEFAULT_PROFILE_NAME,
12
15
  DEFAULT_SCREEN_API_BASE_URL,
13
16
  )
14
- from minitap.mobile_use.sdk.types.agent import ApiBaseUrl, AgentConfig, ServerConfig
15
- from minitap.mobile_use.sdk.types.agent import AgentProfile
17
+ from minitap.mobile_use.sdk.types.agent import AgentConfig, AgentProfile, ApiBaseUrl, ServerConfig
16
18
  from minitap.mobile_use.sdk.types.task import TaskRequestCommon
17
- from minitap.mobile_use.context import DevicePlatform
18
19
 
19
20
 
20
21
  class AgentConfigBuilder:
@@ -44,6 +45,7 @@ class AgentConfigBuilder:
44
45
  self._device_id: Optional[str] = None
45
46
  self._device_platform: Optional[DevicePlatform] = None
46
47
  self._servers: ServerConfig = get_default_servers()
48
+ self._graph_config_callbacks: Callbacks = None
47
49
 
48
50
  def add_profile(self, profile: AgentProfile) -> "AgentConfigBuilder":
49
51
  """
@@ -151,6 +153,16 @@ class AgentConfigBuilder:
151
153
  self._servers = copy.deepcopy(servers)
152
154
  return self
153
155
 
156
+ def with_graph_config_callbacks(self, callbacks: Callbacks) -> "AgentConfigBuilder":
157
+ """
158
+ Set the graph config callbacks.
159
+
160
+ Args:
161
+ callbacks: The graph config callbacks to use
162
+ """
163
+ self._graph_config_callbacks = callbacks
164
+ return self
165
+
154
166
  def build(self) -> AgentConfig:
155
167
  """
156
168
  Build the mobile-use AgentConfig object.
@@ -197,6 +209,7 @@ class AgentConfigBuilder:
197
209
  device_id=self._device_id,
198
210
  device_platform=self._device_platform,
199
211
  servers=self._servers,
212
+ graph_config_callbacks=self._graph_config_callbacks,
200
213
  )
201
214
 
202
215
 
@@ -1,5 +1,7 @@
1
1
  from typing import Dict, Literal, Optional
2
2
  from urllib.parse import urlparse
3
+
4
+ from langchain_core.callbacks.base import Callbacks
3
5
  from pydantic import BaseModel
4
6
 
5
7
  from minitap.mobile_use.context import DevicePlatform
@@ -71,3 +73,6 @@ class AgentConfig(BaseModel):
71
73
  device_id: Optional[str] = None
72
74
  device_platform: Optional[DevicePlatform] = None
73
75
  servers: ServerConfig
76
+ graph_config_callbacks: Callbacks = None
77
+
78
+ model_config = {"arbitrary_types_allowed": True}