minitap-mobile-use 0.0.1.dev0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/cortex/cortex.md +17 -10
- minitap/mobile_use/agents/cortex/cortex.py +12 -2
- minitap/mobile_use/agents/cortex/types.py +2 -2
- minitap/mobile_use/agents/executor/executor.md +16 -10
- minitap/mobile_use/agents/executor/executor.py +6 -18
- minitap/mobile_use/agents/executor/tool_node.py +105 -0
- minitap/mobile_use/agents/hopper/hopper.md +2 -10
- minitap/mobile_use/agents/hopper/hopper.py +4 -9
- minitap/mobile_use/agents/orchestrator/human.md +3 -4
- minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
- minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
- minitap/mobile_use/agents/orchestrator/types.py +5 -8
- minitap/mobile_use/agents/planner/planner.md +14 -13
- minitap/mobile_use/agents/planner/planner.py +4 -1
- minitap/mobile_use/agents/planner/types.py +8 -2
- minitap/mobile_use/agents/planner/utils.py +11 -0
- minitap/mobile_use/clients/device_hardware_client.py +3 -0
- minitap/mobile_use/config.py +2 -0
- minitap/mobile_use/constants.py +1 -0
- minitap/mobile_use/controllers/mobile_command_controller.py +10 -11
- minitap/mobile_use/graph/graph.py +9 -31
- minitap/mobile_use/graph/state.py +26 -6
- minitap/mobile_use/main.py +6 -2
- minitap/mobile_use/sdk/agent.py +54 -39
- minitap/mobile_use/sdk/builders/agent_config_builder.py +17 -4
- minitap/mobile_use/sdk/types/agent.py +5 -0
- minitap/mobile_use/servers/stop_servers.py +10 -15
- minitap/mobile_use/services/llm.py +1 -0
- minitap/mobile_use/tools/index.py +2 -4
- minitap/mobile_use/tools/mobile/back.py +7 -11
- minitap/mobile_use/tools/mobile/copy_text_from.py +7 -11
- minitap/mobile_use/tools/mobile/erase_text.py +7 -9
- minitap/mobile_use/tools/mobile/find_packages.py +69 -0
- minitap/mobile_use/tools/mobile/input_text.py +131 -32
- minitap/mobile_use/tools/mobile/launch_app.py +7 -11
- minitap/mobile_use/tools/mobile/long_press_on.py +7 -9
- minitap/mobile_use/tools/mobile/open_link.py +7 -11
- minitap/mobile_use/tools/mobile/paste_text.py +7 -11
- minitap/mobile_use/tools/mobile/press_key.py +7 -11
- minitap/mobile_use/tools/mobile/stop_app.py +7 -9
- minitap/mobile_use/tools/mobile/swipe.py +7 -11
- minitap/mobile_use/tools/mobile/take_screenshot.py +7 -11
- minitap/mobile_use/tools/mobile/tap.py +7 -9
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +7 -9
- minitap/mobile_use/tools/tool_wrapper.py +1 -23
- minitap/mobile_use/utils/recorder.py +11 -10
- minitap/mobile_use/utils/ui_hierarchy.py +88 -1
- {minitap_mobile_use-0.0.1.dev0.dist-info → minitap_mobile_use-2.0.1.dist-info}/METADATA +2 -2
- minitap_mobile_use-2.0.1.dist-info/RECORD +94 -0
- minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
- minitap/mobile_use/tools/mobile/list_packages.py +0 -78
- minitap/mobile_use/tools/mobile/run_flow.py +0 -57
- minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +0 -95
- {minitap_mobile_use-0.0.1.dev0.dist-info → minitap_mobile_use-2.0.1.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-0.0.1.dev0.dist-info → minitap_mobile_use-2.0.1.dist-info}/entry_points.txt +0 -0
|
@@ -25,7 +25,7 @@ You work like an agile tech lead: defining the key milestones without locking in
|
|
|
25
25
|
|
|
26
26
|
### Output
|
|
27
27
|
|
|
28
|
-
You must output a **list of
|
|
28
|
+
You must output a **list of subgoals (description + optional subgoal ID)**, each representing a clear subgoal.
|
|
29
29
|
Each subgoal should be:
|
|
30
30
|
|
|
31
31
|
- Focused on **realistic mobile interactions**
|
|
@@ -33,21 +33,23 @@ Each subgoal should be:
|
|
|
33
33
|
- Sequential (later steps may depend on earlier ones)
|
|
34
34
|
- Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
|
|
35
35
|
|
|
36
|
+
If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
|
|
37
|
+
|
|
36
38
|
### Examples
|
|
37
39
|
|
|
38
40
|
#### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
|
|
39
41
|
|
|
40
42
|
**Plan**:
|
|
41
43
|
|
|
42
|
-
- Open the WhatsApp app
|
|
43
|
-
- Locate or search for Alice
|
|
44
|
-
- Open the conversation with Alice
|
|
45
|
-
- Type the message "I’m running late"
|
|
46
|
-
- Send the message
|
|
44
|
+
- Open the WhatsApp app (ID: None -> will be generated as a UUID like bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
|
|
45
|
+
- Locate or search for Alice (ID: None)
|
|
46
|
+
- Open the conversation with Alice (ID: None)
|
|
47
|
+
- Type the message "I’m running late" (ID: None)
|
|
48
|
+
- Send the message (ID: None)
|
|
47
49
|
|
|
48
50
|
#### **Replanning Example**
|
|
49
51
|
|
|
50
|
-
**Original Plan**: same as above
|
|
52
|
+
**Original Plan**: same as above with IDs set
|
|
51
53
|
**Agent Thoughts**:
|
|
52
54
|
|
|
53
55
|
- Couldn’t find Alice in recent chats
|
|
@@ -56,9 +58,8 @@ Each subgoal should be:
|
|
|
56
58
|
|
|
57
59
|
**New Plan**:
|
|
58
60
|
|
|
59
|
-
-
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
- Type and send "I’m running late"
|
|
61
|
+
- Open WhatsApp (ID: bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
|
|
62
|
+
- Tap the search bar (ID: None)
|
|
63
|
+
- Search for "Alice" (ID: None)
|
|
64
|
+
- Select the correct chat (ID: None)
|
|
65
|
+
- Type and send "I’m running late" (ID: None)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
import uuid
|
|
2
3
|
|
|
3
4
|
from jinja2 import Template
|
|
4
5
|
from langchain_core.messages import HumanMessage, SystemMessage
|
|
@@ -47,7 +48,8 @@ class PlannerNode:
|
|
|
47
48
|
|
|
48
49
|
subgoals_plan = [
|
|
49
50
|
Subgoal(
|
|
50
|
-
|
|
51
|
+
id=subgoal.id or str(uuid.uuid4()),
|
|
52
|
+
description=subgoal.description,
|
|
51
53
|
status=SubgoalStatus.NOT_STARTED,
|
|
52
54
|
completion_reason=None,
|
|
53
55
|
)
|
|
@@ -61,4 +63,5 @@ class PlannerNode:
|
|
|
61
63
|
update={
|
|
62
64
|
"subgoal_plan": subgoals_plan,
|
|
63
65
|
},
|
|
66
|
+
agent="planner",
|
|
64
67
|
)
|
|
@@ -5,8 +5,13 @@ from pydantic import BaseModel
|
|
|
5
5
|
from typing_extensions import Annotated
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
class PlannerSubgoalOutput(BaseModel):
|
|
9
|
+
id: Annotated[Optional[str], "If not provided, it will be generated"] = None
|
|
10
|
+
description: str
|
|
11
|
+
|
|
12
|
+
|
|
8
13
|
class PlannerOutput(BaseModel):
|
|
9
|
-
subgoals: list[
|
|
14
|
+
subgoals: list[PlannerSubgoalOutput]
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
class SubgoalStatus(Enum):
|
|
@@ -17,6 +22,7 @@ class SubgoalStatus(Enum):
|
|
|
17
22
|
|
|
18
23
|
|
|
19
24
|
class Subgoal(BaseModel):
|
|
25
|
+
id: Annotated[str, "Unique identifier of the subgoal"]
|
|
20
26
|
description: Annotated[str, "Description of the subgoal"]
|
|
21
27
|
completion_reason: Annotated[
|
|
22
28
|
Optional[str], "Reason why the subgoal was completed (failure or success)"
|
|
@@ -35,7 +41,7 @@ class Subgoal(BaseModel):
|
|
|
35
41
|
case SubgoalStatus.NOT_STARTED:
|
|
36
42
|
status_emoji = "(not started yet)"
|
|
37
43
|
|
|
38
|
-
output = f"- {self.description} : {status_emoji}."
|
|
44
|
+
output = f"- [ID:{self.id}]: {self.description} : {status_emoji}."
|
|
39
45
|
if self.completion_reason:
|
|
40
46
|
output += f" Completion reason: {self.completion_reason}"
|
|
41
47
|
return output
|
|
@@ -5,6 +5,10 @@ def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
|
|
|
5
5
|
return next((s for s in subgoals if s.status == SubgoalStatus.PENDING), None)
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def get_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
|
|
9
|
+
return [s for s in subgoals if s.id in ids]
|
|
10
|
+
|
|
11
|
+
|
|
8
12
|
def get_next_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
|
|
9
13
|
return next((s for s in subgoals if s.status == SubgoalStatus.NOT_STARTED), None)
|
|
10
14
|
|
|
@@ -21,6 +25,13 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
21
25
|
return subgoals
|
|
22
26
|
|
|
23
27
|
|
|
28
|
+
def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Subgoal]:
|
|
29
|
+
for subgoal in subgoals:
|
|
30
|
+
if subgoal.id in ids:
|
|
31
|
+
subgoal.status = SubgoalStatus.SUCCESS
|
|
32
|
+
return subgoals
|
|
33
|
+
|
|
34
|
+
|
|
24
35
|
def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
25
36
|
current_subgoal = get_current_subgoal(subgoals)
|
|
26
37
|
if not current_subgoal:
|
|
@@ -12,6 +12,9 @@ class DeviceHardwareClient:
|
|
|
12
12
|
url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
|
|
13
13
|
return self.session.get(url, **kwargs)
|
|
14
14
|
|
|
15
|
+
def get_rich_hierarchy(self) -> list[dict]:
|
|
16
|
+
return self.get("last-view-hierarchy").json().get("children", [])
|
|
17
|
+
|
|
15
18
|
def post(self, path: str, **kwargs):
|
|
16
19
|
url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
|
|
17
20
|
return self.session.post(url, **kwargs)
|
minitap/mobile_use/config.py
CHANGED
|
@@ -22,6 +22,8 @@ class Settings(BaseSettings):
|
|
|
22
22
|
XAI_API_KEY: Optional[SecretStr] = None
|
|
23
23
|
OPEN_ROUTER_API_KEY: Optional[SecretStr] = None
|
|
24
24
|
|
|
25
|
+
OPENAI_BASE_URL: Optional[str] = None
|
|
26
|
+
|
|
25
27
|
DEVICE_SCREEN_API_BASE_URL: Optional[str] = None
|
|
26
28
|
DEVICE_HARDWARE_BRIDGE_BASE_URL: Optional[str] = None
|
|
27
29
|
ADB_HOST: Optional[str] = None
|
minitap/mobile_use/constants.py
CHANGED
|
@@ -9,6 +9,7 @@ from requests import JSONDecodeError
|
|
|
9
9
|
|
|
10
10
|
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
11
11
|
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
12
|
+
from minitap.mobile_use.config import initialize_llm_config
|
|
12
13
|
from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
|
|
13
14
|
from minitap.mobile_use.utils.errors import ControllerErrors
|
|
14
15
|
from minitap.mobile_use.utils.logger import get_logger
|
|
@@ -331,12 +332,10 @@ def run_flow_with_wait_for_animation_to_end(
|
|
|
331
332
|
|
|
332
333
|
|
|
333
334
|
if __name__ == "__main__":
|
|
334
|
-
# long press, erase
|
|
335
|
-
# input_text(text="test")
|
|
336
|
-
# erase_text()
|
|
337
335
|
ctx = MobileUseContext(
|
|
336
|
+
llm_config=initialize_llm_config(),
|
|
338
337
|
device=DeviceContext(
|
|
339
|
-
host_platform="
|
|
338
|
+
host_platform="WINDOWS",
|
|
340
339
|
mobile_platform=DevicePlatform.ANDROID,
|
|
341
340
|
device_id="emulator-5554",
|
|
342
341
|
device_width=1080,
|
|
@@ -347,7 +346,6 @@ if __name__ == "__main__":
|
|
|
347
346
|
)
|
|
348
347
|
screen_data = get_screen_data(ctx.screen_api_client)
|
|
349
348
|
from minitap.mobile_use.graph.state import State
|
|
350
|
-
from minitap.mobile_use.tools.mobile.erase_text import get_erase_text_tool
|
|
351
349
|
|
|
352
350
|
dummy_state = State(
|
|
353
351
|
latest_ui_hierarchy=screen_data.elements,
|
|
@@ -358,20 +356,21 @@ if __name__ == "__main__":
|
|
|
358
356
|
focused_app_info=None,
|
|
359
357
|
device_date="",
|
|
360
358
|
structured_decisions=None,
|
|
361
|
-
|
|
362
|
-
executor_failed=False,
|
|
359
|
+
complete_subgoals_by_ids=[],
|
|
363
360
|
executor_messages=[],
|
|
364
361
|
cortex_last_thought="",
|
|
365
362
|
agents_thoughts=[],
|
|
366
363
|
)
|
|
367
364
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
365
|
+
from minitap.mobile_use.tools.mobile.input_text import get_input_text_tool
|
|
366
|
+
|
|
367
|
+
input_resource_id = "com.google.android.apps.nexuslauncher:id/search_container_hotseat"
|
|
368
|
+
command_output: Command = get_input_text_tool(ctx=ctx).invoke(
|
|
371
369
|
{
|
|
372
370
|
"tool_call_id": uuid.uuid4().hex,
|
|
373
371
|
"agent_thought": "",
|
|
374
|
-
"
|
|
372
|
+
"text_input_resource_id": input_resource_id,
|
|
373
|
+
"text": "Hello World",
|
|
375
374
|
"state": dummy_state,
|
|
376
375
|
"executor_metadata": None,
|
|
377
376
|
}
|
|
@@ -6,13 +6,10 @@ from langchain_core.messages import (
|
|
|
6
6
|
from langgraph.constants import END, START
|
|
7
7
|
from langgraph.graph import StateGraph
|
|
8
8
|
from langgraph.graph.state import CompiledStateGraph
|
|
9
|
-
from langgraph.prebuilt import ToolNode
|
|
10
9
|
from minitap.mobile_use.agents.contextor.contextor import ContextorNode
|
|
11
10
|
from minitap.mobile_use.agents.cortex.cortex import CortexNode
|
|
12
11
|
from minitap.mobile_use.agents.executor.executor import ExecutorNode
|
|
13
|
-
from minitap.mobile_use.agents.executor.
|
|
14
|
-
executor_context_cleaner_node,
|
|
15
|
-
)
|
|
12
|
+
from minitap.mobile_use.agents.executor.tool_node import ExecutorToolNode
|
|
16
13
|
from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
|
|
17
14
|
from minitap.mobile_use.agents.planner.planner import PlannerNode
|
|
18
15
|
from minitap.mobile_use.agents.planner.utils import (
|
|
@@ -21,6 +18,7 @@ from minitap.mobile_use.agents.planner.utils import (
|
|
|
21
18
|
one_of_them_is_failure,
|
|
22
19
|
)
|
|
23
20
|
from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
|
|
21
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
24
22
|
from minitap.mobile_use.context import MobileUseContext
|
|
25
23
|
from minitap.mobile_use.graph.state import State
|
|
26
24
|
from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
|
|
@@ -53,7 +51,7 @@ def post_cortex_gate(
|
|
|
53
51
|
state: State,
|
|
54
52
|
) -> Literal["continue", "end_subgoal"]:
|
|
55
53
|
logger.info("Starting post_cortex_gate")
|
|
56
|
-
if
|
|
54
|
+
if len(state.complete_subgoals_by_ids) > 0:
|
|
57
55
|
return "end_subgoal"
|
|
58
56
|
return "continue"
|
|
59
57
|
|
|
@@ -62,7 +60,7 @@ def post_executor_gate(
|
|
|
62
60
|
state: State,
|
|
63
61
|
) -> Literal["invoke_tools", "skip"]:
|
|
64
62
|
logger.info("Starting post_executor_gate")
|
|
65
|
-
messages = state.
|
|
63
|
+
messages = state.executor_messages
|
|
66
64
|
if not messages:
|
|
67
65
|
return "skip"
|
|
68
66
|
last_message = messages[-1]
|
|
@@ -77,17 +75,6 @@ def post_executor_gate(
|
|
|
77
75
|
return "skip"
|
|
78
76
|
|
|
79
77
|
|
|
80
|
-
def post_executor_tools_gate(
|
|
81
|
-
state: State,
|
|
82
|
-
) -> Literal["continue", "failed", "done"]:
|
|
83
|
-
logger.info("Starting post_executor_tools_gate")
|
|
84
|
-
if state.executor_failed:
|
|
85
|
-
return "failed"
|
|
86
|
-
if state.executor_retrigger:
|
|
87
|
-
return "continue"
|
|
88
|
-
return "done"
|
|
89
|
-
|
|
90
|
-
|
|
91
78
|
async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
92
79
|
graph_builder = StateGraph(State)
|
|
93
80
|
|
|
@@ -100,12 +87,12 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
|
100
87
|
graph_builder.add_node("cortex", CortexNode(ctx))
|
|
101
88
|
|
|
102
89
|
graph_builder.add_node("executor", ExecutorNode(ctx))
|
|
103
|
-
executor_tool_node =
|
|
104
|
-
get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS)
|
|
90
|
+
executor_tool_node = ExecutorToolNode(
|
|
91
|
+
tools=get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS),
|
|
92
|
+
messages_key=EXECUTOR_MESSAGES_KEY,
|
|
105
93
|
)
|
|
106
94
|
graph_builder.add_node("executor_tools", executor_tool_node)
|
|
107
95
|
|
|
108
|
-
graph_builder.add_node("executor_context_cleaner", executor_context_cleaner_node)
|
|
109
96
|
graph_builder.add_node("summarizer", SummarizerNode(ctx))
|
|
110
97
|
|
|
111
98
|
# Linking nodes
|
|
@@ -132,18 +119,9 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
|
132
119
|
graph_builder.add_conditional_edges(
|
|
133
120
|
"executor",
|
|
134
121
|
post_executor_gate,
|
|
135
|
-
{"invoke_tools": "executor_tools", "skip": "
|
|
136
|
-
)
|
|
137
|
-
graph_builder.add_conditional_edges(
|
|
138
|
-
"executor_tools",
|
|
139
|
-
post_executor_tools_gate,
|
|
140
|
-
{
|
|
141
|
-
"continue": "executor",
|
|
142
|
-
"done": "executor_context_cleaner",
|
|
143
|
-
"failed": "executor_context_cleaner",
|
|
144
|
-
},
|
|
122
|
+
{"invoke_tools": "executor_tools", "skip": "summarizer"},
|
|
145
123
|
)
|
|
146
|
-
graph_builder.add_edge("
|
|
124
|
+
graph_builder.add_edge("executor_tools", "summarizer")
|
|
147
125
|
graph_builder.add_edge("summarizer", "contextor")
|
|
148
126
|
|
|
149
127
|
return graph_builder.compile()
|
|
@@ -4,6 +4,7 @@ from langgraph.prebuilt.chat_agent_executor import AgentStatePydantic
|
|
|
4
4
|
from typing_extensions import Annotated, Optional
|
|
5
5
|
|
|
6
6
|
from minitap.mobile_use.agents.planner.types import Subgoal
|
|
7
|
+
from minitap.mobile_use.config import AgentNode
|
|
7
8
|
from minitap.mobile_use.utils.logger import get_logger
|
|
8
9
|
from minitap.mobile_use.utils.recorder import record_interaction
|
|
9
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
@@ -36,10 +37,13 @@ class State(AgentStatePydantic):
|
|
|
36
37
|
"Structured decisions made by the cortex, for the executor to follow",
|
|
37
38
|
take_last,
|
|
38
39
|
]
|
|
40
|
+
complete_subgoals_by_ids: Annotated[
|
|
41
|
+
list[str],
|
|
42
|
+
"List of subgoal IDs to complete",
|
|
43
|
+
take_last,
|
|
44
|
+
]
|
|
39
45
|
|
|
40
46
|
# executor related keys
|
|
41
|
-
executor_retrigger: Annotated[Optional[bool], "Whether the executor must be retriggered"]
|
|
42
|
-
executor_failed: Annotated[bool, "Whether a tool call made by the executor failed"]
|
|
43
47
|
executor_messages: Annotated[list[AnyMessage], "Sequential Executor messages", add_messages]
|
|
44
48
|
cortex_last_thought: Annotated[Optional[str], "Last thought of the cortex for the executor"]
|
|
45
49
|
|
|
@@ -47,11 +51,18 @@ class State(AgentStatePydantic):
|
|
|
47
51
|
agents_thoughts: Annotated[
|
|
48
52
|
list[str],
|
|
49
53
|
"All thoughts and reasons that led to actions (why a tool was called, expected outcomes..)",
|
|
54
|
+
take_last,
|
|
50
55
|
]
|
|
51
56
|
|
|
52
|
-
def sanitize_update(
|
|
57
|
+
def sanitize_update(
|
|
58
|
+
self,
|
|
59
|
+
ctx: MobileUseContext,
|
|
60
|
+
update: dict,
|
|
61
|
+
agent: Optional[AgentNode] = None,
|
|
62
|
+
):
|
|
53
63
|
"""
|
|
54
64
|
Sanitizes the state update to ensure it is valid and apply side effect logic where required.
|
|
65
|
+
The agent is required if the update contains the "agents_thoughts" key.
|
|
55
66
|
"""
|
|
56
67
|
updated_agents_thoughts: Optional[str | list[str]] = update.get("agents_thoughts", None)
|
|
57
68
|
if updated_agents_thoughts is not None:
|
|
@@ -59,15 +70,24 @@ class State(AgentStatePydantic):
|
|
|
59
70
|
updated_agents_thoughts = [updated_agents_thoughts]
|
|
60
71
|
elif not isinstance(updated_agents_thoughts, list):
|
|
61
72
|
raise ValueError("agents_thoughts must be a str or list[str]")
|
|
73
|
+
if agent is None:
|
|
74
|
+
raise ValueError("Agent is required when updating the 'agents_thoughts' key")
|
|
62
75
|
update["agents_thoughts"] = _add_agent_thoughts(
|
|
63
76
|
ctx=ctx,
|
|
64
77
|
old=self.agents_thoughts,
|
|
65
78
|
new=updated_agents_thoughts,
|
|
79
|
+
agent=agent,
|
|
66
80
|
)
|
|
67
81
|
return update
|
|
68
82
|
|
|
69
83
|
|
|
70
|
-
def _add_agent_thoughts(
|
|
84
|
+
def _add_agent_thoughts(
|
|
85
|
+
ctx: MobileUseContext,
|
|
86
|
+
old: list[str],
|
|
87
|
+
new: list[str],
|
|
88
|
+
agent: AgentNode,
|
|
89
|
+
) -> list[str]:
|
|
90
|
+
named_thoughts = [f"[{agent}] {thought}" for thought in new]
|
|
71
91
|
if ctx.execution_setup:
|
|
72
|
-
record_interaction(ctx, response=AIMessage(content=str(
|
|
73
|
-
return old +
|
|
92
|
+
record_interaction(ctx, response=AIMessage(content=str(named_thoughts)))
|
|
93
|
+
return old + named_thoughts
|
minitap/mobile_use/main.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import os
|
|
3
|
-
from adbutils import AdbClient
|
|
4
3
|
from typing import Optional
|
|
5
4
|
|
|
6
5
|
import typer
|
|
6
|
+
from adbutils import AdbClient
|
|
7
|
+
from langchain.callbacks.base import Callbacks
|
|
7
8
|
from rich.console import Console
|
|
8
9
|
from typing_extensions import Annotated
|
|
9
10
|
|
|
@@ -26,6 +27,7 @@ async def run_automation(
|
|
|
26
27
|
test_name: Optional[str] = None,
|
|
27
28
|
traces_output_path_str: str = "traces",
|
|
28
29
|
output_description: Optional[str] = None,
|
|
30
|
+
graph_config_callbacks: Callbacks = [],
|
|
29
31
|
):
|
|
30
32
|
llm_config = initialize_llm_config()
|
|
31
33
|
agent_profile = AgentProfile(name="default", llm_config=llm_config)
|
|
@@ -37,11 +39,13 @@ async def run_automation(
|
|
|
37
39
|
config.with_hw_bridge_base_url(url=settings.DEVICE_HARDWARE_BRIDGE_BASE_URL)
|
|
38
40
|
if settings.DEVICE_SCREEN_API_BASE_URL:
|
|
39
41
|
config.with_screen_api_base_url(url=settings.DEVICE_SCREEN_API_BASE_URL)
|
|
42
|
+
if graph_config_callbacks:
|
|
43
|
+
config.with_graph_config_callbacks(graph_config_callbacks)
|
|
40
44
|
|
|
41
45
|
agent = Agent(config=config.build())
|
|
42
46
|
agent.init(
|
|
43
47
|
retry_count=int(os.getenv("MOBILE_USE_HEALTH_RETRIES", 5)),
|
|
44
|
-
retry_wait_seconds=int(os.getenv("MOBILE_USE_HEALTH_DELAY",
|
|
48
|
+
retry_wait_seconds=int(os.getenv("MOBILE_USE_HEALTH_DELAY", 2)),
|
|
45
49
|
)
|
|
46
50
|
|
|
47
51
|
task = agent.new_task(goal)
|
minitap/mobile_use/sdk/agent.py
CHANGED
|
@@ -1,64 +1,63 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from pathlib import Path
|
|
4
2
|
import sys
|
|
5
3
|
import tempfile
|
|
6
4
|
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from types import NoneType
|
|
8
9
|
from typing import Optional, TypeVar, overload
|
|
9
|
-
|
|
10
|
+
|
|
10
11
|
from adbutils import AdbClient
|
|
11
12
|
from langchain_core.messages import AIMessage
|
|
12
13
|
from pydantic import BaseModel
|
|
13
|
-
from minitap.mobile_use.agents.outputter.outputter import outputter
|
|
14
14
|
|
|
15
|
+
from minitap.mobile_use.agents.outputter.outputter import outputter
|
|
16
|
+
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
17
|
+
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
15
18
|
from minitap.mobile_use.config import OutputConfig, record_events
|
|
16
|
-
from minitap.mobile_use.graph.graph import get_graph
|
|
17
|
-
from minitap.mobile_use.graph.state import State
|
|
18
|
-
from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
|
|
19
|
-
from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
|
|
20
|
-
from minitap.mobile_use.sdk.constants import (
|
|
21
|
-
DEFAULT_HW_BRIDGE_BASE_URL,
|
|
22
|
-
DEFAULT_SCREEN_API_BASE_URL,
|
|
23
|
-
)
|
|
24
|
-
from minitap.mobile_use.sdk.types.agent import AgentConfig
|
|
25
19
|
from minitap.mobile_use.context import (
|
|
26
20
|
DeviceContext,
|
|
27
21
|
DevicePlatform,
|
|
28
22
|
ExecutionSetup,
|
|
29
23
|
MobileUseContext,
|
|
30
24
|
)
|
|
31
|
-
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
32
|
-
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
33
25
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
34
26
|
ScreenDataResponse,
|
|
35
27
|
get_screen_data,
|
|
36
28
|
)
|
|
37
29
|
from minitap.mobile_use.controllers.platform_specific_commands_controller import get_first_device
|
|
38
|
-
|
|
39
|
-
from minitap.mobile_use.
|
|
40
|
-
from minitap.mobile_use.
|
|
41
|
-
from minitap.mobile_use.
|
|
42
|
-
|
|
43
|
-
|
|
30
|
+
from minitap.mobile_use.graph.graph import get_graph
|
|
31
|
+
from minitap.mobile_use.graph.state import State
|
|
32
|
+
from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
|
|
33
|
+
from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
|
|
34
|
+
from minitap.mobile_use.sdk.constants import (
|
|
35
|
+
DEFAULT_HW_BRIDGE_BASE_URL,
|
|
36
|
+
DEFAULT_SCREEN_API_BASE_URL,
|
|
44
37
|
)
|
|
45
|
-
from minitap.mobile_use.
|
|
38
|
+
from minitap.mobile_use.sdk.types.agent import AgentConfig
|
|
46
39
|
from minitap.mobile_use.sdk.types.exceptions import (
|
|
40
|
+
AgentNotInitializedError,
|
|
47
41
|
AgentProfileNotFoundError,
|
|
48
42
|
AgentTaskRequestError,
|
|
49
43
|
DeviceNotFoundError,
|
|
50
44
|
ServerStartupError,
|
|
51
|
-
AgentNotInitializedError,
|
|
52
45
|
)
|
|
53
46
|
from minitap.mobile_use.sdk.types.task import AgentProfile, Task, TaskRequest, TaskStatus
|
|
47
|
+
from minitap.mobile_use.servers.device_hardware_bridge import BridgeStatus
|
|
48
|
+
from minitap.mobile_use.servers.start_servers import (
|
|
49
|
+
start_device_hardware_bridge,
|
|
50
|
+
start_device_screen_api,
|
|
51
|
+
)
|
|
52
|
+
from minitap.mobile_use.servers.stop_servers import stop_servers
|
|
53
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
54
54
|
from minitap.mobile_use.utils.media import (
|
|
55
55
|
create_gif_from_trace_folder,
|
|
56
56
|
create_steps_json_from_trace_folder,
|
|
57
57
|
remove_images_from_trace_folder,
|
|
58
58
|
remove_steps_json_from_trace_folder,
|
|
59
59
|
)
|
|
60
|
-
from minitap.mobile_use.utils.recorder import
|
|
61
|
-
|
|
60
|
+
from minitap.mobile_use.utils.recorder import log_agent_thought
|
|
62
61
|
|
|
63
62
|
logger = get_logger(__name__)
|
|
64
63
|
|
|
@@ -127,7 +126,10 @@ class Agent:
|
|
|
127
126
|
f"Server start failed, attempting restart "
|
|
128
127
|
f"{restart_attempt}/{server_restart_attempts}"
|
|
129
128
|
)
|
|
130
|
-
|
|
129
|
+
stop_servers(
|
|
130
|
+
should_stop_screen_api=self._is_default_screen_api,
|
|
131
|
+
should_stop_hw_bridge=self._is_default_hw_bridge,
|
|
132
|
+
)
|
|
131
133
|
else:
|
|
132
134
|
error_msg = "Mobile-use servers failed to start after all restart attempts."
|
|
133
135
|
logger.error(error_msg)
|
|
@@ -261,17 +263,31 @@ class Agent:
|
|
|
261
263
|
input=graph_input,
|
|
262
264
|
config={
|
|
263
265
|
"recursion_limit": task.request.max_steps,
|
|
266
|
+
"callbacks": self._config.graph_config_callbacks,
|
|
264
267
|
},
|
|
265
|
-
stream_mode=["messages", "custom", "values"],
|
|
268
|
+
stream_mode=["messages", "custom", "updates", "values"],
|
|
266
269
|
):
|
|
267
|
-
stream_mode,
|
|
270
|
+
stream_mode, payload = chunk
|
|
268
271
|
if stream_mode == "values":
|
|
269
|
-
last_state_snapshot =
|
|
272
|
+
last_state_snapshot = payload # type: ignore
|
|
270
273
|
last_state = State(**last_state_snapshot) # type: ignore
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
274
|
+
if task.request.thoughts_output_path:
|
|
275
|
+
record_events(
|
|
276
|
+
output_path=task.request.thoughts_output_path,
|
|
277
|
+
events=last_state.agents_thoughts,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if stream_mode == "updates":
|
|
281
|
+
for key, value in payload.items(): # type: ignore
|
|
282
|
+
if value and "agents_thoughts" in value:
|
|
283
|
+
new_thoughts = value["agents_thoughts"]
|
|
284
|
+
last_item = new_thoughts[-1] if new_thoughts else None
|
|
285
|
+
if last_item:
|
|
286
|
+
log_agent_thought(
|
|
287
|
+
prefix=key,
|
|
288
|
+
agent_thought=last_item,
|
|
289
|
+
)
|
|
290
|
+
|
|
275
291
|
if not last_state:
|
|
276
292
|
err = f"[{task_name}] No result received from graph"
|
|
277
293
|
logger.warning(err)
|
|
@@ -302,12 +318,12 @@ class Agent:
|
|
|
302
318
|
self._finalize_tracing(task=task, context=context)
|
|
303
319
|
return output
|
|
304
320
|
|
|
305
|
-
def clean(self):
|
|
306
|
-
if not self._initialized:
|
|
321
|
+
def clean(self, force: bool = False):
|
|
322
|
+
if not self._initialized and not force:
|
|
307
323
|
return
|
|
308
324
|
screen_api_ok, hw_bridge_ok = stop_servers(
|
|
309
|
-
|
|
310
|
-
|
|
325
|
+
should_stop_screen_api=self._is_default_screen_api,
|
|
326
|
+
should_stop_hw_bridge=self._is_default_hw_bridge,
|
|
311
327
|
)
|
|
312
328
|
if not screen_api_ok:
|
|
313
329
|
logger.warning("Failed to stop Device Screen API.")
|
|
@@ -402,10 +418,9 @@ class Agent:
|
|
|
402
418
|
focused_app_info=None,
|
|
403
419
|
device_date=None,
|
|
404
420
|
structured_decisions=None,
|
|
421
|
+
complete_subgoals_by_ids=[],
|
|
405
422
|
agents_thoughts=[],
|
|
406
423
|
remaining_steps=task.request.max_steps,
|
|
407
|
-
executor_retrigger=False,
|
|
408
|
-
executor_failed=False,
|
|
409
424
|
executor_messages=[],
|
|
410
425
|
cortex_last_thought=None,
|
|
411
426
|
)
|
|
@@ -2,19 +2,20 @@
|
|
|
2
2
|
Builder for AgentConfig objects using a fluent interface.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Dict, Optional, List
|
|
6
5
|
import copy
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from langchain_core.callbacks.base import Callbacks
|
|
7
9
|
|
|
8
10
|
from minitap.mobile_use.config import get_default_llm_config
|
|
11
|
+
from minitap.mobile_use.context import DevicePlatform
|
|
9
12
|
from minitap.mobile_use.sdk.constants import (
|
|
10
13
|
DEFAULT_HW_BRIDGE_BASE_URL,
|
|
11
14
|
DEFAULT_PROFILE_NAME,
|
|
12
15
|
DEFAULT_SCREEN_API_BASE_URL,
|
|
13
16
|
)
|
|
14
|
-
from minitap.mobile_use.sdk.types.agent import
|
|
15
|
-
from minitap.mobile_use.sdk.types.agent import AgentProfile
|
|
17
|
+
from minitap.mobile_use.sdk.types.agent import AgentConfig, AgentProfile, ApiBaseUrl, ServerConfig
|
|
16
18
|
from minitap.mobile_use.sdk.types.task import TaskRequestCommon
|
|
17
|
-
from minitap.mobile_use.context import DevicePlatform
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class AgentConfigBuilder:
|
|
@@ -44,6 +45,7 @@ class AgentConfigBuilder:
|
|
|
44
45
|
self._device_id: Optional[str] = None
|
|
45
46
|
self._device_platform: Optional[DevicePlatform] = None
|
|
46
47
|
self._servers: ServerConfig = get_default_servers()
|
|
48
|
+
self._graph_config_callbacks: Callbacks = None
|
|
47
49
|
|
|
48
50
|
def add_profile(self, profile: AgentProfile) -> "AgentConfigBuilder":
|
|
49
51
|
"""
|
|
@@ -151,6 +153,16 @@ class AgentConfigBuilder:
|
|
|
151
153
|
self._servers = copy.deepcopy(servers)
|
|
152
154
|
return self
|
|
153
155
|
|
|
156
|
+
def with_graph_config_callbacks(self, callbacks: Callbacks) -> "AgentConfigBuilder":
|
|
157
|
+
"""
|
|
158
|
+
Set the graph config callbacks.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
callbacks: The graph config callbacks to use
|
|
162
|
+
"""
|
|
163
|
+
self._graph_config_callbacks = callbacks
|
|
164
|
+
return self
|
|
165
|
+
|
|
154
166
|
def build(self) -> AgentConfig:
|
|
155
167
|
"""
|
|
156
168
|
Build the mobile-use AgentConfig object.
|
|
@@ -197,6 +209,7 @@ class AgentConfigBuilder:
|
|
|
197
209
|
device_id=self._device_id,
|
|
198
210
|
device_platform=self._device_platform,
|
|
199
211
|
servers=self._servers,
|
|
212
|
+
graph_config_callbacks=self._graph_config_callbacks,
|
|
200
213
|
)
|
|
201
214
|
|
|
202
215
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from typing import Dict, Literal, Optional
|
|
2
2
|
from urllib.parse import urlparse
|
|
3
|
+
|
|
4
|
+
from langchain_core.callbacks.base import Callbacks
|
|
3
5
|
from pydantic import BaseModel
|
|
4
6
|
|
|
5
7
|
from minitap.mobile_use.context import DevicePlatform
|
|
@@ -71,3 +73,6 @@ class AgentConfig(BaseModel):
|
|
|
71
73
|
device_id: Optional[str] = None
|
|
72
74
|
device_platform: Optional[DevicePlatform] = None
|
|
73
75
|
servers: ServerConfig
|
|
76
|
+
graph_config_callbacks: Callbacks = None
|
|
77
|
+
|
|
78
|
+
model_config = {"arbitrary_types_allowed": True}
|