minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
from langchain_core.messages import HumanMessage, SystemMessage
|
|
5
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
6
|
+
from langchain_google_vertexai.chat_models import ChatVertexAI
|
|
7
|
+
|
|
8
|
+
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
9
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
10
|
+
from minitap.mobile_use.graph.state import State
|
|
11
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
|
|
12
|
+
from minitap.mobile_use.tools.index import (
|
|
13
|
+
EXECUTOR_WRAPPERS_TOOLS,
|
|
14
|
+
VIDEO_RECORDING_WRAPPERS,
|
|
15
|
+
get_tools_from_wrappers,
|
|
16
|
+
)
|
|
17
|
+
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
18
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ExecutorNode:
|
|
24
|
+
def __init__(self, ctx: MobileUseContext):
|
|
25
|
+
self.ctx = ctx
|
|
26
|
+
|
|
27
|
+
@wrap_with_callbacks(
|
|
28
|
+
before=lambda: logger.info("Starting Executor Agent..."),
|
|
29
|
+
on_success=lambda _: logger.success("Executor Agent"),
|
|
30
|
+
on_failure=lambda _: logger.error("Executor Agent"),
|
|
31
|
+
)
|
|
32
|
+
async def __call__(self, state: State):
|
|
33
|
+
structured_decisions = state.structured_decisions
|
|
34
|
+
if not structured_decisions:
|
|
35
|
+
logger.warning("No structured decisions found.")
|
|
36
|
+
return await state.asanitize_update(
|
|
37
|
+
ctx=self.ctx,
|
|
38
|
+
update={
|
|
39
|
+
"agents_thoughts": [
|
|
40
|
+
"No structured decisions found, I cannot execute anything."
|
|
41
|
+
],
|
|
42
|
+
},
|
|
43
|
+
agent="executor",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
system_message = Template(
|
|
47
|
+
Path(__file__).parent.joinpath("executor.md").read_text(encoding="utf-8")
|
|
48
|
+
).render(platform=self.ctx.device.mobile_platform.value)
|
|
49
|
+
cortex_last_thought = (
|
|
50
|
+
state.cortex_last_thought if state.cortex_last_thought else state.agents_thoughts[-1]
|
|
51
|
+
)
|
|
52
|
+
messages = [
|
|
53
|
+
SystemMessage(content=system_message),
|
|
54
|
+
HumanMessage(content=cortex_last_thought),
|
|
55
|
+
HumanMessage(content=structured_decisions),
|
|
56
|
+
*state.executor_messages,
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
llm = get_llm(ctx=self.ctx, name="executor")
|
|
60
|
+
llm_fallback = get_llm(ctx=self.ctx, name="executor", use_fallback=True)
|
|
61
|
+
|
|
62
|
+
executor_wrappers = list(EXECUTOR_WRAPPERS_TOOLS)
|
|
63
|
+
if self.ctx.video_recording_enabled:
|
|
64
|
+
executor_wrappers.extend(VIDEO_RECORDING_WRAPPERS)
|
|
65
|
+
|
|
66
|
+
llm_bind_tools_kwargs: dict = {
|
|
67
|
+
"tools": get_tools_from_wrappers(self.ctx, executor_wrappers),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# ChatGoogleGenerativeAI does not support the "parallel_tool_calls" keyword
|
|
71
|
+
if not isinstance(llm, ChatGoogleGenerativeAI | ChatVertexAI):
|
|
72
|
+
llm_bind_tools_kwargs["parallel_tool_calls"] = True
|
|
73
|
+
|
|
74
|
+
llm = llm.bind_tools(**llm_bind_tools_kwargs)
|
|
75
|
+
llm_fallback = llm_fallback.bind_tools(**llm_bind_tools_kwargs)
|
|
76
|
+
response = await with_fallback(
|
|
77
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
78
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
79
|
+
)
|
|
80
|
+
return await state.asanitize_update(
|
|
81
|
+
ctx=self.ctx,
|
|
82
|
+
update={
|
|
83
|
+
"cortex_last_thought": cortex_last_thought,
|
|
84
|
+
EXECUTOR_MESSAGES_KEY: [response],
|
|
85
|
+
},
|
|
86
|
+
agent="executor",
|
|
87
|
+
)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import copy
|
|
3
|
+
from typing import Any, override
|
|
4
|
+
|
|
5
|
+
from langchain_core.messages import AnyMessage, ToolCall, ToolMessage
|
|
6
|
+
from langchain_core.runnables import RunnableConfig
|
|
7
|
+
from langgraph.prebuilt import ToolNode
|
|
8
|
+
from langgraph.store.base import BaseStore
|
|
9
|
+
from langgraph.types import Command
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from minitap.mobile_use.services.telemetry import telemetry
|
|
13
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ExecutorToolNode(ToolNode):
|
|
19
|
+
"""
|
|
20
|
+
ToolNode that runs tool calls one after the other - not simultaneously.
|
|
21
|
+
If one error occurs, the remaining tool calls are aborted!
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, tools, messages_key: str, trace_id: str | None = None):
|
|
25
|
+
super().__init__(tools=tools, messages_key=messages_key)
|
|
26
|
+
self._trace_id = trace_id
|
|
27
|
+
|
|
28
|
+
@override
|
|
29
|
+
async def _afunc(
|
|
30
|
+
self,
|
|
31
|
+
input: list[AnyMessage] | dict[str, Any] | BaseModel,
|
|
32
|
+
config: RunnableConfig,
|
|
33
|
+
*,
|
|
34
|
+
store: BaseStore | None,
|
|
35
|
+
):
|
|
36
|
+
return await self.__func(is_async=True, input=input, config=config, store=store)
|
|
37
|
+
|
|
38
|
+
@override
|
|
39
|
+
def _func(
|
|
40
|
+
self,
|
|
41
|
+
input: list[AnyMessage] | dict[str, Any] | BaseModel,
|
|
42
|
+
config: RunnableConfig,
|
|
43
|
+
*,
|
|
44
|
+
store: BaseStore | None,
|
|
45
|
+
) -> Any:
|
|
46
|
+
loop = asyncio.get_event_loop()
|
|
47
|
+
return loop.run_until_complete(
|
|
48
|
+
self.__func(is_async=False, input=input, config=config, store=store)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
async def __func(
|
|
52
|
+
self,
|
|
53
|
+
is_async: bool,
|
|
54
|
+
input: list[AnyMessage] | dict[str, Any] | BaseModel,
|
|
55
|
+
config: RunnableConfig,
|
|
56
|
+
*,
|
|
57
|
+
store: BaseStore | None,
|
|
58
|
+
) -> Any:
|
|
59
|
+
tool_calls, input_type = self._parse_input(input, store)
|
|
60
|
+
outputs: list[Command | ToolMessage] = []
|
|
61
|
+
failed = False
|
|
62
|
+
for call in tool_calls:
|
|
63
|
+
if failed:
|
|
64
|
+
output = self._get_erroneous_command(
|
|
65
|
+
call=call,
|
|
66
|
+
message="Aborted: a previous tool call failed!",
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
if is_async:
|
|
70
|
+
output = await self._arun_one(call, input_type, config)
|
|
71
|
+
else:
|
|
72
|
+
output = self._run_one(call, input_type, config)
|
|
73
|
+
failed = self._has_tool_call_failed(call, output)
|
|
74
|
+
if failed is None:
|
|
75
|
+
output = self._get_erroneous_command(
|
|
76
|
+
call=call,
|
|
77
|
+
message=f"Unexpected tool output type: {type(output)}",
|
|
78
|
+
)
|
|
79
|
+
failed = True
|
|
80
|
+
|
|
81
|
+
call_without_state = copy.deepcopy(call)
|
|
82
|
+
if "args" in call_without_state and "state" in call_without_state["args"]:
|
|
83
|
+
del call_without_state["args"]["state"]
|
|
84
|
+
if failed:
|
|
85
|
+
error_msg = ""
|
|
86
|
+
try:
|
|
87
|
+
if isinstance(output, ToolMessage):
|
|
88
|
+
error_msg = output.content
|
|
89
|
+
elif isinstance(output, Command):
|
|
90
|
+
tool_msg = self._get_tool_message(output)
|
|
91
|
+
error_msg = tool_msg.content
|
|
92
|
+
except Exception:
|
|
93
|
+
error_msg = "Could not extract error details"
|
|
94
|
+
|
|
95
|
+
logger.info(f"❌ Tool call failed: {call_without_state}")
|
|
96
|
+
logger.info(f" Error: {error_msg}")
|
|
97
|
+
|
|
98
|
+
# Capture executor action telemetry
|
|
99
|
+
if self._trace_id:
|
|
100
|
+
telemetry.capture_executor_action(
|
|
101
|
+
task_id=self._trace_id,
|
|
102
|
+
tool_name=call["name"],
|
|
103
|
+
success=False,
|
|
104
|
+
error=str(error_msg)[:500] if error_msg else None,
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
logger.info("✅ Tool call succeeded: " + str(call_without_state))
|
|
108
|
+
|
|
109
|
+
# Capture executor action telemetry
|
|
110
|
+
if self._trace_id:
|
|
111
|
+
telemetry.capture_executor_action(
|
|
112
|
+
task_id=self._trace_id,
|
|
113
|
+
tool_name=call["name"],
|
|
114
|
+
success=True,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
outputs.append(output)
|
|
118
|
+
return self._combine_tool_outputs(outputs, input_type) # type: ignore
|
|
119
|
+
|
|
120
|
+
def _has_tool_call_failed(
|
|
121
|
+
self,
|
|
122
|
+
call: ToolCall,
|
|
123
|
+
output: ToolMessage | Command,
|
|
124
|
+
) -> bool | None:
|
|
125
|
+
if isinstance(output, ToolMessage):
|
|
126
|
+
return output.status == "error"
|
|
127
|
+
if isinstance(output, Command):
|
|
128
|
+
output_msg = self._get_tool_message(output)
|
|
129
|
+
return output_msg.status == "error"
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def _get_erroneous_command(self, call: ToolCall, message: str) -> Command:
|
|
133
|
+
tool_message = ToolMessage(
|
|
134
|
+
name=call["name"], tool_call_id=call["id"], content=message, status="error"
|
|
135
|
+
)
|
|
136
|
+
return Command(update={self.messages_key: [tool_message]})
|
|
137
|
+
|
|
138
|
+
def _get_tool_message(self, cmd: Command) -> ToolMessage:
|
|
139
|
+
if isinstance(cmd.update, dict):
|
|
140
|
+
msg = cmd.update.get(self.messages_key)
|
|
141
|
+
if isinstance(msg, list):
|
|
142
|
+
if len(msg) == 0:
|
|
143
|
+
raise ValueError("No messages found in command update")
|
|
144
|
+
if not isinstance(msg[-1], ToolMessage):
|
|
145
|
+
raise ValueError("Last message in command update is not a tool message")
|
|
146
|
+
return msg[-1]
|
|
147
|
+
elif isinstance(msg, ToolMessage):
|
|
148
|
+
return msg
|
|
149
|
+
elif msg is None:
|
|
150
|
+
raise ValueError(f"Missing '{self.messages_key}' in command update")
|
|
151
|
+
raise ValueError(f"Unexpected message type in command update: {type(msg)}")
|
|
152
|
+
raise ValueError("Command update is not a dict")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
## Hopper
|
|
2
|
+
|
|
3
|
+
Extract relevant information from batch data. **Keep extracted data exactly as-is** - no reformatting.
|
|
4
|
+
|
|
5
|
+
## Output
|
|
6
|
+
- **found**: `true` if data was found, `false` otherwise
|
|
7
|
+
- **output**: Extracted information if found, `null` otherwise
|
|
8
|
+
- **reason**: Brief explanation of search logic
|
|
9
|
+
|
|
10
|
+
## Rules
|
|
11
|
+
1. **Search entire input** - may contain hundreds of entries
|
|
12
|
+
2. **For app package lookup**: Match app name (or variations) in package identifier
|
|
13
|
+
- Common patterns: lowercase app name, company+app, brand name, codenames
|
|
14
|
+
3. **Prefer direct matches** over partial matches
|
|
15
|
+
4. **Return `null`** if not found or if multiple ambiguous matches exist - don't guess
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
from langchain_core.messages import HumanMessage, SystemMessage
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class HopperOutput(BaseModel):
|
|
12
|
+
found: bool = Field(description="True if the requested data was found, False otherwise.")
|
|
13
|
+
output: str | None = Field(description="The extracted data if found, null otherwise.")
|
|
14
|
+
reason: str = Field(
|
|
15
|
+
description="A short explanation of what you looked for"
|
|
16
|
+
+ " and how you decided what to extract."
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def hopper(
|
|
21
|
+
ctx: MobileUseContext,
|
|
22
|
+
request: str,
|
|
23
|
+
data: str,
|
|
24
|
+
) -> HopperOutput:
|
|
25
|
+
print("Starting Hopper Agent", flush=True)
|
|
26
|
+
system_message = Template(
|
|
27
|
+
Path(__file__).parent.joinpath("hopper.md").read_text(encoding="utf-8")
|
|
28
|
+
).render()
|
|
29
|
+
messages = [
|
|
30
|
+
SystemMessage(content=system_message),
|
|
31
|
+
HumanMessage(content=f"{request}\nHere is the data you must dig:\n{data}"),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
llm = get_llm(ctx=ctx, name="hopper", is_utils=True, temperature=0).with_structured_output(
|
|
35
|
+
HopperOutput
|
|
36
|
+
)
|
|
37
|
+
llm_fallback = get_llm(
|
|
38
|
+
ctx=ctx, name="hopper", is_utils=True, use_fallback=True, temperature=0
|
|
39
|
+
).with_structured_output(HopperOutput)
|
|
40
|
+
response: HopperOutput = await with_fallback(
|
|
41
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
42
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
43
|
+
) # type: ignore
|
|
44
|
+
return response
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
## You are the **Orchestrator**
|
|
2
|
+
|
|
3
|
+
Decide what to do next based on {{ platform }} mobile device execution state.
|
|
4
|
+
|
|
5
|
+
## Input
|
|
6
|
+
- Current **subgoal plan** with statuses
|
|
7
|
+
- **Subgoals to examine** (PENDING/NOT_STARTED)
|
|
8
|
+
- **Agent thoughts** from execution
|
|
9
|
+
- **Initial goal**
|
|
10
|
+
|
|
11
|
+
## Your Decisions
|
|
12
|
+
|
|
13
|
+
1. **Mark subgoals complete**: Add finished subgoal IDs to `completed_subgoal_ids`
|
|
14
|
+
2. **Set `needs_replanning = TRUE`** if repeated failures make current plan unworkable
|
|
15
|
+
3. **Fill `reason`**: Final answer if goal complete, or explanation of decisions
|
|
16
|
+
|
|
17
|
+
## Agent Roles (for context)
|
|
18
|
+
- **Planner**: Creates/updates subgoal plan
|
|
19
|
+
- **Cortex**: Analyzes screen, decides actions (may complete multiple subgoals at once)
|
|
20
|
+
- **Executor**: Executes actions on device
|
|
21
|
+
- **You (Orchestrator)**: Coordinate, track completion, trigger replanning
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
from langchain_core.messages import HumanMessage, SystemMessage
|
|
5
|
+
|
|
6
|
+
from minitap.mobile_use.agents.orchestrator.types import OrchestratorOutput
|
|
7
|
+
from minitap.mobile_use.agents.planner.utils import (
|
|
8
|
+
all_completed,
|
|
9
|
+
complete_subgoals_by_ids,
|
|
10
|
+
fail_current_subgoal,
|
|
11
|
+
get_current_subgoal,
|
|
12
|
+
get_subgoals_by_ids,
|
|
13
|
+
nothing_started,
|
|
14
|
+
start_next_subgoal,
|
|
15
|
+
)
|
|
16
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
17
|
+
from minitap.mobile_use.graph.state import State
|
|
18
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
|
|
19
|
+
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
20
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class OrchestratorNode:
|
|
26
|
+
def __init__(self, ctx: MobileUseContext):
|
|
27
|
+
self.ctx = ctx
|
|
28
|
+
|
|
29
|
+
@wrap_with_callbacks(
|
|
30
|
+
before=lambda: logger.info("Starting Orchestrator Agent..."),
|
|
31
|
+
on_success=lambda _: logger.success("Orchestrator Agent"),
|
|
32
|
+
on_failure=lambda _: logger.error("Orchestrator Agent"),
|
|
33
|
+
)
|
|
34
|
+
async def __call__(self, state: State):
|
|
35
|
+
no_subgoal_started = nothing_started(state.subgoal_plan)
|
|
36
|
+
current_subgoal = get_current_subgoal(state.subgoal_plan)
|
|
37
|
+
|
|
38
|
+
if no_subgoal_started or not current_subgoal:
|
|
39
|
+
state.subgoal_plan = start_next_subgoal(state.subgoal_plan)
|
|
40
|
+
new_subgoal = get_current_subgoal(state.subgoal_plan)
|
|
41
|
+
thoughts = [
|
|
42
|
+
(
|
|
43
|
+
f"Starting the first subgoal: {new_subgoal}"
|
|
44
|
+
if no_subgoal_started
|
|
45
|
+
else f"Starting the next subgoal: {new_subgoal}"
|
|
46
|
+
)
|
|
47
|
+
]
|
|
48
|
+
return await _get_state_update(
|
|
49
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
subgoals_to_examine = get_subgoals_by_ids(
|
|
53
|
+
subgoals=state.subgoal_plan,
|
|
54
|
+
ids=state.complete_subgoals_by_ids,
|
|
55
|
+
)
|
|
56
|
+
if len(subgoals_to_examine) <= 0:
|
|
57
|
+
return await _get_state_update(
|
|
58
|
+
ctx=self.ctx, state=state, thoughts=["No subgoal to examine."]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
system_message = Template(
|
|
62
|
+
Path(__file__).parent.joinpath("orchestrator.md").read_text(encoding="utf-8")
|
|
63
|
+
).render(platform=self.ctx.device.mobile_platform.value)
|
|
64
|
+
human_message = Template(
|
|
65
|
+
Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
|
|
66
|
+
).render(
|
|
67
|
+
initial_goal=state.initial_goal,
|
|
68
|
+
subgoal_plan="\n".join(str(s) for s in state.subgoal_plan),
|
|
69
|
+
subgoals_to_examine="\n".join(str(s) for s in subgoals_to_examine),
|
|
70
|
+
agent_thoughts="\n".join(state.agents_thoughts),
|
|
71
|
+
)
|
|
72
|
+
messages = [
|
|
73
|
+
SystemMessage(content=system_message),
|
|
74
|
+
HumanMessage(content=human_message),
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
llm = get_llm(ctx=self.ctx, name="orchestrator", temperature=1).with_structured_output(
|
|
78
|
+
OrchestratorOutput
|
|
79
|
+
)
|
|
80
|
+
llm_fallback = get_llm(
|
|
81
|
+
ctx=self.ctx, name="orchestrator", use_fallback=True, temperature=1
|
|
82
|
+
).with_structured_output(OrchestratorOutput)
|
|
83
|
+
response: OrchestratorOutput = await with_fallback(
|
|
84
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
85
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
86
|
+
) # type: ignore
|
|
87
|
+
if response.needs_replaning:
|
|
88
|
+
thoughts = [response.reason]
|
|
89
|
+
state.subgoal_plan = fail_current_subgoal(state.subgoal_plan)
|
|
90
|
+
thoughts.append("==== END OF PLAN, REPLANNING ====")
|
|
91
|
+
return await _get_state_update(
|
|
92
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
state.subgoal_plan = complete_subgoals_by_ids(
|
|
96
|
+
subgoals=state.subgoal_plan,
|
|
97
|
+
ids=response.completed_subgoal_ids,
|
|
98
|
+
)
|
|
99
|
+
thoughts = [response.reason]
|
|
100
|
+
if all_completed(state.subgoal_plan):
|
|
101
|
+
logger.success("All the subgoals have been completed successfully.")
|
|
102
|
+
return await _get_state_update(
|
|
103
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if current_subgoal.id not in response.completed_subgoal_ids:
|
|
107
|
+
# The current subgoal is not yet complete.
|
|
108
|
+
return await _get_state_update(
|
|
109
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
state.subgoal_plan = start_next_subgoal(state.subgoal_plan)
|
|
113
|
+
new_subgoal = get_current_subgoal(state.subgoal_plan)
|
|
114
|
+
thoughts.append(f"==== NEXT SUBGOAL: {new_subgoal} ====")
|
|
115
|
+
return await _get_state_update(
|
|
116
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def _get_state_update(
|
|
121
|
+
ctx: MobileUseContext,
|
|
122
|
+
state: State,
|
|
123
|
+
thoughts: list[str],
|
|
124
|
+
update_plan: bool = False,
|
|
125
|
+
):
|
|
126
|
+
update = {
|
|
127
|
+
"agents_thoughts": thoughts,
|
|
128
|
+
"complete_subgoals_by_ids": [],
|
|
129
|
+
}
|
|
130
|
+
if update_plan:
|
|
131
|
+
update["subgoal_plan"] = state.subgoal_plan
|
|
132
|
+
if ctx.on_plan_changes:
|
|
133
|
+
await ctx.on_plan_changes(state.subgoal_plan, False)
|
|
134
|
+
return await state.asanitize_update(ctx=ctx, update=update, agent="orchestrator")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class OrchestratorOutput(BaseModel):
|
|
7
|
+
completed_subgoal_ids: Annotated[
|
|
8
|
+
list[str], "IDs of subgoals that can now be marked as complete"
|
|
9
|
+
] = []
|
|
10
|
+
needs_replaning: Annotated[bool, "Whether the orchestrator needs to replan the subgoal plan"]
|
|
11
|
+
reason: str
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
You are a helpful assistant tasked with generating the final structured output of a multi-agent reasoning process.
|
|
2
|
+
|
|
3
|
+
## The original goal was:
|
|
4
|
+
{{ initial_goal }}
|
|
5
|
+
|
|
6
|
+
{% if agents_thoughts %}
|
|
7
|
+
Throughout the reasoning process, the following agent thoughts were collected:
|
|
8
|
+
{% for thought in agents_thoughts %}
|
|
9
|
+
- {{ thought }}
|
|
10
|
+
{% endfor %}
|
|
11
|
+
{% endif %}
|
|
12
|
+
|
|
13
|
+
{% if last_ai_message %}
|
|
14
|
+
The last message generated by the graph execution was:
|
|
15
|
+
"{{ last_ai_message }}"
|
|
16
|
+
{% endif %}
|
|
17
|
+
|
|
18
|
+
{% if not structured_output %}
|
|
19
|
+
Please generate a well-structured JSON object based on the following instructions:
|
|
20
|
+
|
|
21
|
+
> "{{ output_description }}"
|
|
22
|
+
|
|
23
|
+
Only return the JSON object, with fields matching the description as closely as possible.
|
|
24
|
+
Do not include explanations or markdown, just the raw JSON.
|
|
25
|
+
{% endif %}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from jinja2 import Template
|
|
5
|
+
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from minitap.mobile_use.config import OutputConfig
|
|
9
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
10
|
+
from minitap.mobile_use.graph.state import State
|
|
11
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
|
|
12
|
+
from minitap.mobile_use.utils.conversations import is_ai_message
|
|
13
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def outputter(
|
|
19
|
+
ctx: MobileUseContext, output_config: OutputConfig, graph_output: State
|
|
20
|
+
) -> dict:
|
|
21
|
+
logger.info("Starting Outputter Agent")
|
|
22
|
+
last_message = graph_output.messages[-1] if graph_output.messages else None
|
|
23
|
+
|
|
24
|
+
system_message = (
|
|
25
|
+
"You are a helpful assistant tasked with generating "
|
|
26
|
+
+ "the final structured output of a multi-agent reasoning process."
|
|
27
|
+
)
|
|
28
|
+
human_message = Template(
|
|
29
|
+
Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
|
|
30
|
+
).render(
|
|
31
|
+
initial_goal=graph_output.initial_goal,
|
|
32
|
+
agents_thoughts=graph_output.agents_thoughts,
|
|
33
|
+
structured_output=output_config.structured_output,
|
|
34
|
+
output_description=output_config.output_description,
|
|
35
|
+
last_ai_message=last_message.content
|
|
36
|
+
if last_message and is_ai_message(message=last_message)
|
|
37
|
+
else None,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
messages: list[BaseMessage] = [
|
|
41
|
+
SystemMessage(content=system_message),
|
|
42
|
+
HumanMessage(content=human_message),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if output_config.output_description:
|
|
46
|
+
messages.append(HumanMessage(content=output_config.output_description))
|
|
47
|
+
|
|
48
|
+
llm = get_llm(ctx=ctx, name="outputter", is_utils=True, temperature=1)
|
|
49
|
+
llm_fallback = get_llm(
|
|
50
|
+
ctx=ctx, name="outputter", is_utils=True, use_fallback=True, temperature=1
|
|
51
|
+
)
|
|
52
|
+
structured_llm = llm
|
|
53
|
+
structured_llm_fallback = llm_fallback
|
|
54
|
+
|
|
55
|
+
if output_config.structured_output:
|
|
56
|
+
schema: dict | type[BaseModel] | None = None
|
|
57
|
+
so = output_config.structured_output
|
|
58
|
+
|
|
59
|
+
if isinstance(so, dict):
|
|
60
|
+
schema = so
|
|
61
|
+
elif isinstance(so, BaseModel):
|
|
62
|
+
schema = type(so)
|
|
63
|
+
elif isinstance(so, type) and issubclass(so, BaseModel):
|
|
64
|
+
schema = so
|
|
65
|
+
|
|
66
|
+
if schema is not None:
|
|
67
|
+
structured_llm = llm.with_structured_output(schema)
|
|
68
|
+
structured_llm_fallback = llm_fallback.with_structured_output(schema)
|
|
69
|
+
|
|
70
|
+
response = await with_fallback(
|
|
71
|
+
main_call=lambda: invoke_llm_with_timeout_message(structured_llm.ainvoke(messages)),
|
|
72
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
73
|
+
structured_llm_fallback.ainvoke(messages)
|
|
74
|
+
),
|
|
75
|
+
) # type: ignore
|
|
76
|
+
if isinstance(response, BaseModel):
|
|
77
|
+
if output_config.output_description and hasattr(response, "content"):
|
|
78
|
+
response = json.loads(response.content) # type: ignore
|
|
79
|
+
return response
|
|
80
|
+
return response.model_dump()
|
|
81
|
+
elif hasattr(response, "content"):
|
|
82
|
+
return json.loads(response.content) # type: ignore
|
|
83
|
+
else:
|
|
84
|
+
logger.info("Found unknown response type: " + str(type(response)))
|
|
85
|
+
return response
|