droidrun 0.3.10.dev5__py3-none-any.whl → 0.3.10.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/agent/codeact/codeact_agent.py +18 -25
- droidrun/agent/droid/events.py +4 -1
- droidrun/agent/executor/executor_agent.py +24 -38
- droidrun/agent/executor/prompts.py +0 -108
- droidrun/agent/manager/manager_agent.py +104 -87
- droidrun/agent/utils/llm_picker.py +63 -1
- droidrun/agent/utils/tools.py +29 -0
- droidrun/app_cards/app_card_provider.py +27 -0
- droidrun/app_cards/providers/__init__.py +7 -0
- droidrun/app_cards/providers/composite_provider.py +97 -0
- droidrun/app_cards/providers/local_provider.py +116 -0
- droidrun/app_cards/providers/server_provider.py +126 -0
- droidrun/cli/main.py +241 -30
- droidrun/config_manager/__init__.py +0 -2
- droidrun/config_manager/config_manager.py +45 -101
- droidrun/config_manager/path_resolver.py +1 -1
- droidrun/config_manager/prompt_loader.py +48 -51
- droidrun/portal.py +17 -0
- droidrun/tools/adb.py +13 -34
- {droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev6.dist-info}/METADATA +2 -9
- {droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev6.dist-info}/RECORD +24 -20
- droidrun/config_manager/app_card_loader.py +0 -148
- {droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev6.dist-info}/WHEEL +0 -0
- {droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev6.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -85,15 +85,8 @@ class CodeActAgent(Workflow):
|
|
85
85
|
self.tool_list = {}
|
86
86
|
for action_name, signature in merged_signatures.items():
|
87
87
|
func = signature["function"]
|
88
|
-
|
89
|
-
|
90
|
-
def make_bound(f, ti):
|
91
|
-
async def bound_func(*args, **kwargs):
|
92
|
-
return await f(ti, *args, **kwargs)
|
93
|
-
return bound_func
|
94
|
-
self.tool_list[action_name] = make_bound(func, tools_instance)
|
95
|
-
else:
|
96
|
-
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
|
88
|
+
|
89
|
+
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
|
97
90
|
|
98
91
|
self.tool_list["remember"] = tools_instance.remember
|
99
92
|
self.tool_list["complete"] = tools_instance.complete
|
@@ -113,13 +106,10 @@ class CodeActAgent(Workflow):
|
|
113
106
|
)
|
114
107
|
self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
|
115
108
|
|
116
|
-
self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
|
117
|
-
|
118
109
|
self.executor = SimpleCodeExecutor(
|
119
110
|
loop=asyncio.get_event_loop(),
|
120
111
|
locals={},
|
121
112
|
tools=self.tool_list,
|
122
|
-
tools_instance=tools_instance,
|
123
113
|
globals={"__builtins__": __builtins__},
|
124
114
|
)
|
125
115
|
|
@@ -293,27 +283,30 @@ Now, describe the next step you will take to address the original goal: {goal}""
|
|
293
283
|
try:
|
294
284
|
self.code_exec_counter += 1
|
295
285
|
result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
|
296
|
-
logger.info(f"💡 Code execution successful. Result: {result
|
286
|
+
logger.info(f"💡 Code execution successful. Result: {result}")
|
297
287
|
await asyncio.sleep(self.agent_config.after_sleep_action)
|
298
|
-
screenshots = result['screenshots']
|
299
|
-
for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
|
300
|
-
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
301
|
-
|
302
|
-
ui_states = result['ui_states']
|
303
|
-
for ui_state in ui_states[:-1]:
|
304
|
-
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
|
305
288
|
|
289
|
+
# Check if complete() was called
|
306
290
|
if self.tools.finished:
|
307
|
-
logger.
|
308
|
-
|
309
|
-
|
310
|
-
|
291
|
+
logger.info("✅ Task marked as complete via complete() function")
|
292
|
+
|
293
|
+
# Validate completion state
|
294
|
+
success = self.tools.success if self.tools.success is not None else False
|
295
|
+
reason = self.tools.reason if self.tools.reason else "Task completed without reason"
|
296
|
+
|
297
|
+
# Reset finished flag for next execution
|
298
|
+
self.tools.finished = False
|
299
|
+
|
300
|
+
logger.info(f" - Success: {success}")
|
301
|
+
logger.info(f" - Reason: {reason}")
|
302
|
+
|
303
|
+
event = TaskEndEvent(success=success, reason=reason)
|
311
304
|
ctx.write_event_to_stream(event)
|
312
305
|
return event
|
313
306
|
|
314
307
|
self.remembered_info = self.tools.memory
|
315
308
|
|
316
|
-
event = TaskExecutionResultEvent(output=str(result
|
309
|
+
event = TaskExecutionResultEvent(output=str(result))
|
317
310
|
ctx.write_event_to_stream(event)
|
318
311
|
return event
|
319
312
|
|
droidrun/agent/droid/events.py
CHANGED
@@ -10,6 +10,7 @@ For internal events with full debugging metadata, see:
|
|
10
10
|
- codeact/events.py (Task*, EpisodicMemoryEvent)
|
11
11
|
"""
|
12
12
|
|
13
|
+
import asyncio
|
13
14
|
from typing import Dict, List
|
14
15
|
|
15
16
|
from llama_index.core.workflow import Event
|
@@ -49,7 +50,9 @@ class DroidAgentState(BaseModel):
|
|
49
50
|
|
50
51
|
# Task context
|
51
52
|
instruction: str = ""
|
52
|
-
|
53
|
+
# App Cards
|
54
|
+
app_card: str = ""
|
55
|
+
app_card_loading_task: asyncio.Task[str] | None = None
|
53
56
|
# Formatted device state for prompts (complete text)
|
54
57
|
formatted_device_state: str = ""
|
55
58
|
|
@@ -90,52 +90,38 @@ class ExecutorAgent(Workflow): # TODO: Fix a bug in bad prompt
|
|
90
90
|
subgoal = ev.get("subgoal", "")
|
91
91
|
logger.info(f"🧠 Executor thinking about action for: {subgoal}")
|
92
92
|
|
93
|
-
#
|
94
|
-
|
95
|
-
app_card_text = ""
|
96
|
-
if app_card.strip():
|
97
|
-
app_card_text = "App card gives information on how to operate the app and perform actions.\n### App Card ###\n" + app_card.strip() + "\n\n"
|
98
|
-
|
99
|
-
# Format device state (use unified state)
|
100
|
-
device_state_text = ""
|
101
|
-
if self.shared_state.formatted_device_state and self.shared_state.formatted_device_state.strip():
|
102
|
-
device_state_text = "### Device State ###\n" + self.shared_state.formatted_device_state.strip() + "\n\n"
|
103
|
-
|
104
|
-
# Format progress status
|
105
|
-
progress_status_text = self.shared_state.progress_status + "\n\n" if self.shared_state.progress_status else "No progress yet.\n\n"
|
106
|
-
|
107
|
-
# Format atomic actions
|
108
|
-
atomic_actions_text = chr(10).join(
|
109
|
-
f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}"
|
110
|
-
for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items()
|
111
|
-
) + "\n"
|
112
|
-
|
113
|
-
# Format action history
|
93
|
+
# Prepare action history as structured data (last 5 actions)
|
94
|
+
action_history = []
|
114
95
|
if self.shared_state.action_history:
|
115
|
-
|
116
|
-
|
117
|
-
|
96
|
+
n = min(5, len(self.shared_state.action_history))
|
97
|
+
action_history = [
|
98
|
+
{
|
99
|
+
"action": act,
|
100
|
+
"summary": summ,
|
101
|
+
"outcome": outcome,
|
102
|
+
"error": err_des
|
103
|
+
}
|
118
104
|
for act, summ, outcome, err_des in zip(
|
119
|
-
self.shared_state.action_history[-
|
120
|
-
self.shared_state.summary_history[-
|
121
|
-
self.shared_state.action_outcomes[-
|
122
|
-
self.shared_state.error_descriptions[-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
#
|
105
|
+
self.shared_state.action_history[-n:],
|
106
|
+
self.shared_state.summary_history[-n:],
|
107
|
+
self.shared_state.action_outcomes[-n:],
|
108
|
+
self.shared_state.error_descriptions[-n:],
|
109
|
+
strict=True
|
110
|
+
)
|
111
|
+
]
|
112
|
+
|
113
|
+
# Let Jinja2 handle all formatting
|
128
114
|
system_prompt = PromptLoader.load_prompt(
|
129
115
|
self.agent_config.get_executor_system_prompt_path(),
|
130
116
|
{
|
131
117
|
"instruction": self.shared_state.instruction,
|
132
|
-
"app_card":
|
133
|
-
"
|
118
|
+
"app_card": "", # TODO: Implement app card loader
|
119
|
+
"device_state": self.shared_state.formatted_device_state,
|
134
120
|
"plan": self.shared_state.plan,
|
135
121
|
"subgoal": subgoal,
|
136
|
-
"progress_status":
|
137
|
-
"atomic_actions":
|
138
|
-
"action_history":
|
122
|
+
"progress_status": self.shared_state.progress_status,
|
123
|
+
"atomic_actions": ATOMIC_ACTION_SIGNATURES,
|
124
|
+
"action_history": action_history
|
139
125
|
}
|
140
126
|
)
|
141
127
|
|
@@ -2,114 +2,6 @@
|
|
2
2
|
Prompts for the ExecutorAgent.
|
3
3
|
"""
|
4
4
|
|
5
|
-
|
6
|
-
from droidrun.agent.droid.events import DroidAgentState
|
7
|
-
from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES
|
8
|
-
|
9
|
-
|
10
|
-
def build_executor_system_prompt(
|
11
|
-
state: "DroidAgentState",
|
12
|
-
subgoal: str,
|
13
|
-
app_card: str = "",
|
14
|
-
) -> str:
|
15
|
-
"""
|
16
|
-
Build the complete Executor system prompt with all context.
|
17
|
-
|
18
|
-
Args:
|
19
|
-
state: Current DroidAgentState with all context
|
20
|
-
subgoal: Current subgoal to execute
|
21
|
-
app_card: Optional app-specific instructions
|
22
|
-
|
23
|
-
Returns:
|
24
|
-
Complete system prompt for the Executor
|
25
|
-
"""
|
26
|
-
prompt = f"""You are a LOW-LEVEL ACTION EXECUTOR for an Android phone. You do NOT answer questions or provide results. You ONLY perform individual atomic actions as specified in the current subgoal. You are part of a larger system - your job is to execute actions, not to think about or answer the user's original question.
|
27
|
-
|
28
|
-
### User Request ###
|
29
|
-
{state.instruction}
|
30
|
-
|
31
|
-
{("App card gives information on how to operate the app and perform actions.\n" + "### App Card ###\n" + app_card.strip() + "\n\n") if app_card.strip() else ""}{(("### Device State ###\n" + state.device_state_text.strip() + "\n\n") if state.device_state_text.strip() else "")}### Overall Plan ###
|
32
|
-
{state.plan}
|
33
|
-
|
34
|
-
### Current Subgoal ###
|
35
|
-
EXECUTE THIS SUBGOAL: {subgoal}
|
36
|
-
|
37
|
-
EXECUTION MODE: You are a dumb robot. Find the exact text/element mentioned in the subgoal above and perform the specified action on it. Do not read anything below this line until after you execute the subgoal.
|
38
|
-
|
39
|
-
### SUBGOAL PARSING MODE ###
|
40
|
-
Read the current subgoal exactly as written. Look for:
|
41
|
-
- Action words: "tap", "click", "swipe", "type", "press", "open" etc.
|
42
|
-
- Target elements: specific text, buttons, fields, coordinates mentioned
|
43
|
-
- Locations: "header", "bottom", "left", "right", specific coordinates
|
44
|
-
Convert directly to atomic action:
|
45
|
-
- "tap/click" → click action
|
46
|
-
- "swipe" → swipe action
|
47
|
-
- "type" → type action
|
48
|
-
- "press [system button]" → system_button action
|
49
|
-
- "open [app]" → open_app action
|
50
|
-
Execute the atomic action for the exact target mentioned. Ignore everything else.
|
51
|
-
|
52
|
-
### Progress Status ###
|
53
|
-
{(state.progress_status + "\n\n") if state.progress_status != "" else "No progress yet.\n\n"}
|
54
|
-
|
55
|
-
### Guidelines ###
|
56
|
-
General:
|
57
|
-
- For any pop-up window, such as a permission request, you need to close it (e.g., by clicking `Don't Allow` or `Accept & continue`) before proceeding. Never choose to add any account or log in.
|
58
|
-
Action Related:
|
59
|
-
- Use the `open_app` action whenever you want to open an app (nothing will happen if the app is not installed), do not use the app drawer to open an app.
|
60
|
-
- Consider exploring the screen by using the `swipe` action with different directions to reveal additional content. Or use search to quickly find a specific entry, if applicable.
|
61
|
-
- If you cannot change the page content by swiping in the same direction continuously, the page may have been swiped to the bottom. Please try another operation to display more content.
|
62
|
-
- For some horizontally distributed tags, you can swipe horizontally to view more.
|
63
|
-
Text Related Operations:
|
64
|
-
- Activated input box: If an input box is activated, it may have a cursor inside it and the keyboard is visible. If there is no cursor on the screen but the keyboard is visible, it may be because the cursor is blinking. The color of the activated input box will be highlighted. If you are not sure whether the input box is activated, click it before typing.
|
65
|
-
- To input some text: first click the input box that you want to input, make sure the correct input box is activated and the keyboard is visible, then use `type` action to enter the specified text.
|
66
|
-
- To clear the text: long press the backspace button in the keyboard.
|
67
|
-
- To copy some text: first long press the text you want to copy, then click the `copy` button in bar.
|
68
|
-
- To paste text into a text box: first long press the text box, then click the `paste` button in bar.
|
69
|
-
|
70
|
-
---
|
71
|
-
Execute the current subgoal mechanically. Do NOT examine the screen content or make decisions about what you see. Parse the current subgoal text to identify the required action and execute it exactly as written. You must choose your action from one of the atomic actions.
|
72
|
-
|
73
|
-
#### Atomic Actions ####
|
74
|
-
The atomic action functions are listed in the format of `action(arguments): description` as follows:
|
75
|
-
{chr(10).join(f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}" for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items())}
|
76
|
-
\n
|
77
|
-
### Latest Action History ###
|
78
|
-
{(("Recent actions you took previously and whether they were successful:\n" + "\n".join(
|
79
|
-
(f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome
|
80
|
-
else f"Action: {act} | Description: {summ} | Outcome: Failed | Feedback: {err_des}")
|
81
|
-
for act, summ, outcome, err_des in zip(
|
82
|
-
state.action_history[-min(5, len(state.action_history)):],
|
83
|
-
state.summary_history[-min(5, len(state.action_history)):],
|
84
|
-
state.action_outcomes[-min(5, len(state.action_history)):],
|
85
|
-
state.error_descriptions[-min(5, len(state.action_history)):], strict=True)
|
86
|
-
) + "\n\n")) if state.action_history else "No actions have been taken yet.\n\n"}
|
87
|
-
|
88
|
-
---
|
89
|
-
### LITERAL EXECUTION RULE ###
|
90
|
-
Whatever the current subgoal says to do, do that EXACTLY. Do not substitute with what you think is better. Do not optimize. Do not consider screen state. Parse the subgoal text literally and execute the matching atomic action.
|
91
|
-
|
92
|
-
IMPORTANT:
|
93
|
-
1. Do NOT repeat previously failed actions multiple times. Try changing to another action.
|
94
|
-
2. Must do the current subgoal.
|
95
|
-
|
96
|
-
Provide your output in the following format, which contains three parts:
|
97
|
-
|
98
|
-
### Thought ###
|
99
|
-
Break down the current subgoal into: (1) What atomic action is required? (2) What target/location is specified? (3) What parameters do I need? Do NOT reason about whether this makes sense - just mechanically convert the subgoal text into the appropriate action format.
|
100
|
-
|
101
|
-
### Action ###
|
102
|
-
Choose only one action or shortcut from the options provided.
|
103
|
-
You must provide your decision using a valid JSON format specifying the `action` and the arguments of the action. For example, if you want to open an App, you should write {{ "action":"open_app", "text": "app name" }}.
|
104
|
-
|
105
|
-
### Description ###
|
106
|
-
A brief description of the chosen action. Do not describe expected outcome.
|
107
|
-
"""
|
108
|
-
|
109
|
-
|
110
|
-
return prompt
|
111
|
-
|
112
|
-
|
113
5
|
def parse_executor_response(response: str) -> dict:
|
114
6
|
"""
|
115
7
|
Parse the Executor LLM response.
|
@@ -24,8 +24,14 @@ from droidrun.agent.utils.device_state_formatter import format_device_state
|
|
24
24
|
from droidrun.agent.utils.inference import acall_with_retries
|
25
25
|
from droidrun.agent.utils.tools import build_custom_tool_descriptions
|
26
26
|
from droidrun.config_manager.prompt_loader import PromptLoader
|
27
|
-
from droidrun.
|
28
|
-
|
27
|
+
from droidrun.app_cards.app_card_provider import AppCardProvider
|
28
|
+
from droidrun.app_cards.providers import (
|
29
|
+
LocalAppCardProvider,
|
30
|
+
ServerAppCardProvider,
|
31
|
+
CompositeAppCardProvider,
|
32
|
+
)
|
33
|
+
|
34
|
+
import asyncio
|
29
35
|
if TYPE_CHECKING:
|
30
36
|
from droidrun.agent.droid.events import DroidAgentState
|
31
37
|
from droidrun.tools import Tools
|
@@ -63,33 +69,75 @@ class ManagerAgent(Workflow):
|
|
63
69
|
self.shared_state = shared_state
|
64
70
|
self.custom_tools = custom_tools or {}
|
65
71
|
self.agent_config = agent_config
|
66
|
-
self.
|
72
|
+
self.app_card_config = self.agent_config.app_cards
|
73
|
+
|
74
|
+
# Initialize app card provider based on mode
|
75
|
+
self.app_card_provider: AppCardProvider = self._initialize_app_card_provider()
|
67
76
|
|
68
77
|
logger.info("✅ ManagerAgent initialized successfully.")
|
69
78
|
|
79
|
+
def _initialize_app_card_provider(self) -> AppCardProvider:
|
80
|
+
"""Initialize app card provider based on configuration mode."""
|
81
|
+
if not self.app_card_config.enabled:
|
82
|
+
# Return a dummy provider that always returns empty string
|
83
|
+
class DisabledProvider(AppCardProvider):
|
84
|
+
async def load_app_card(self, package_name: str, instruction: str = "") -> str:
|
85
|
+
return ""
|
86
|
+
return DisabledProvider()
|
87
|
+
|
88
|
+
mode = self.app_card_config.mode.lower()
|
89
|
+
|
90
|
+
if mode == "local":
|
91
|
+
logger.info(f"Initializing local app card provider (dir: {self.app_card_config.app_cards_dir})")
|
92
|
+
return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
|
93
|
+
|
94
|
+
elif mode == "server":
|
95
|
+
if not self.app_card_config.server_url:
|
96
|
+
logger.warning("Server mode enabled but no server_url configured, falling back to local")
|
97
|
+
return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
|
98
|
+
|
99
|
+
logger.info(f"Initializing server app card provider (url: {self.app_card_config.server_url})")
|
100
|
+
return ServerAppCardProvider(
|
101
|
+
server_url=self.app_card_config.server_url,
|
102
|
+
timeout=self.app_card_config.server_timeout,
|
103
|
+
max_retries=self.app_card_config.server_max_retries,
|
104
|
+
)
|
105
|
+
|
106
|
+
elif mode == "composite":
|
107
|
+
if not self.app_card_config.server_url:
|
108
|
+
logger.warning("Composite mode enabled but no server_url configured, falling back to local")
|
109
|
+
return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
|
110
|
+
|
111
|
+
logger.info(
|
112
|
+
f"Initializing composite app card provider "
|
113
|
+
f"(server: {self.app_card_config.server_url}, local: {self.app_card_config.app_cards_dir})"
|
114
|
+
)
|
115
|
+
return CompositeAppCardProvider(
|
116
|
+
server_url=self.app_card_config.server_url,
|
117
|
+
app_cards_dir=self.app_card_config.app_cards_dir,
|
118
|
+
server_timeout=self.app_card_config.server_timeout,
|
119
|
+
server_max_retries=self.app_card_config.server_max_retries,
|
120
|
+
)
|
121
|
+
|
122
|
+
else:
|
123
|
+
logger.warning(f"Unknown app_card mode '{mode}', falling back to local")
|
124
|
+
return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
|
125
|
+
|
70
126
|
# ========================================================================
|
71
127
|
# Helper Methods
|
72
128
|
# ========================================================================
|
73
129
|
|
74
130
|
def _build_system_prompt(
|
75
131
|
self,
|
76
|
-
has_text_to_modify: bool
|
77
|
-
app_card: str = ""
|
132
|
+
has_text_to_modify: bool
|
78
133
|
) -> str:
|
79
|
-
"""
|
80
|
-
Build system prompt with all context.
|
134
|
+
"""Build system prompt with all context."""
|
81
135
|
|
82
|
-
|
83
|
-
|
84
|
-
app_card: App card content
|
85
|
-
Returns:
|
86
|
-
Complete system prompt
|
87
|
-
"""
|
88
|
-
# Format error history
|
89
|
-
error_history_text = ""
|
136
|
+
# Prepare error history as structured data (if needed)
|
137
|
+
error_history = None
|
90
138
|
if self.shared_state.error_flag_plan:
|
91
139
|
k = self.shared_state.err_to_manager_thresh
|
92
|
-
|
140
|
+
error_history = [
|
93
141
|
{
|
94
142
|
"action": act,
|
95
143
|
"summary": summ,
|
@@ -98,78 +146,22 @@ class ManagerAgent(Workflow):
|
|
98
146
|
for act, summ, err_des in zip(
|
99
147
|
self.shared_state.action_history[-k:],
|
100
148
|
self.shared_state.summary_history[-k:],
|
101
|
-
self.shared_state.error_descriptions[-k:],
|
149
|
+
self.shared_state.error_descriptions[-k:],
|
150
|
+
strict=True
|
102
151
|
)
|
103
152
|
]
|
104
|
-
|
105
|
-
|
106
|
-
"You have encountered several failed attempts. Here are some logs:\n"
|
107
|
-
)
|
108
|
-
for error in errors:
|
109
|
-
error_history_text += (
|
110
|
-
f"- Attempt: Action: {error['action']} | "
|
111
|
-
f"Description: {error['summary']} | "
|
112
|
-
f"Outcome: Failed | "
|
113
|
-
f"Feedback: {error['error']}\n"
|
114
|
-
)
|
115
|
-
error_history_text += "</potentially_stuck>\n\n"
|
116
|
-
|
117
|
-
# Text manipulation section
|
118
|
-
text_manipulation_section = ""
|
119
|
-
if has_text_to_modify:
|
120
|
-
text_manipulation_section = """
|
121
|
-
|
122
|
-
<text_manipulation>
|
123
|
-
1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
|
124
|
-
2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
|
125
|
-
3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
|
126
|
-
4. The focused text field contains editable text that you can modify
|
127
|
-
5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
|
128
|
-
6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
|
129
|
-
</text_manipulation>"""
|
130
|
-
|
131
|
-
# Device date (include tags in variable value or empty string)
|
132
|
-
device_date = self.tools_instance.get_date()
|
133
|
-
device_date_text = ""
|
134
|
-
if device_date.strip():
|
135
|
-
device_date_text = f"<device_date>\n{device_date}\n</device_date>\n\n"
|
136
|
-
|
137
|
-
# App card (include tags in variable value or empty string)
|
138
|
-
app_card = app_card
|
139
|
-
app_card_text = ""
|
140
|
-
if app_card.strip():
|
141
|
-
app_card_text = "App card gives information on how to operate the app and perform actions.\n<app_card>\n" + app_card.strip() + "\n</app_card>\n\n"
|
142
|
-
|
143
|
-
# Important notes (include tags in variable value or empty string)
|
144
|
-
important_notes = "" # TODO: implement
|
145
|
-
important_notes_text = ""
|
146
|
-
if important_notes.strip():
|
147
|
-
important_notes_text = "<important_notes>\n" + important_notes + "\n</important_notes>\n\n"
|
148
|
-
|
149
|
-
# Custom tools
|
150
|
-
custom_tools_desc = build_custom_tool_descriptions(self.custom_tools)
|
151
|
-
custom_tools_text = ""
|
152
|
-
if custom_tools_desc.strip():
|
153
|
-
custom_tools_text = """
|
154
|
-
|
155
|
-
<custom_actions>
|
156
|
-
The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
|
157
|
-
""" + custom_tools_desc + """
|
158
|
-
|
159
|
-
You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
|
160
|
-
</custom_actions>"""
|
161
|
-
|
162
|
-
# Load and format prompt
|
153
|
+
|
154
|
+
# Let Jinja2 handle all formatting and conditionals
|
163
155
|
return PromptLoader.load_prompt(
|
164
156
|
self.agent_config.get_manager_system_prompt_path(),
|
165
157
|
{
|
166
158
|
"instruction": self.shared_state.instruction,
|
167
|
-
"device_date":
|
168
|
-
"app_card":
|
169
|
-
"important_notes":
|
170
|
-
"error_history":
|
171
|
-
"
|
172
|
-
"custom_tools_descriptions":
|
159
|
+
"device_date": self.tools_instance.get_date(),
|
160
|
+
"app_card": self.shared_state.app_card,
|
161
|
+
"important_notes": "", # TODO: implement
|
162
|
+
"error_history": error_history,
|
163
|
+
"text_manipulation_enabled": has_text_to_modify,
|
164
|
+
"custom_tools_descriptions": build_custom_tool_descriptions(self.custom_tools)
|
173
165
|
}
|
174
166
|
)
|
175
167
|
|
@@ -339,7 +331,17 @@ You can reference these custom actions or tell the Executer agent to use them in
|
|
339
331
|
self.shared_state.current_package_name = phone_state.get('packageName', 'Unknown')
|
340
332
|
self.shared_state.current_app_name = phone_state.get('currentApp', 'Unknown')
|
341
333
|
|
342
|
-
#
|
334
|
+
# ====================================================================
|
335
|
+
# Step 1.5: Start loading app card in background
|
336
|
+
# ====================================================================
|
337
|
+
if self.app_card_config.enabled:
|
338
|
+
loading_task = asyncio.create_task(
|
339
|
+
self.app_card_provider.load_app_card(
|
340
|
+
package_name=self.shared_state.current_package_name,
|
341
|
+
instruction=self.shared_state.instruction
|
342
|
+
)
|
343
|
+
)
|
344
|
+
self.shared_state.app_card_loading_task = loading_task
|
343
345
|
|
344
346
|
# ====================================================================
|
345
347
|
# Step 2: Capture screenshot if vision enabled
|
@@ -417,15 +419,30 @@ You can reference these custom actions or tell the Executer agent to use them in
|
|
417
419
|
|
418
420
|
has_text_to_modify = self.shared_state.has_text_to_modify
|
419
421
|
screenshot = self.shared_state.screenshot
|
420
|
-
|
421
|
-
|
422
|
+
|
423
|
+
# ====================================================================
|
424
|
+
# Try to get app card from previous iteration's loading task
|
425
|
+
# ====================================================================
|
426
|
+
if self.app_card_config.enabled and self.shared_state.app_card_loading_task:
|
427
|
+
try:
|
428
|
+
# Wait briefly for the background task to complete (0.1s timeout)
|
429
|
+
self.shared_state.app_card = await asyncio.wait_for(
|
430
|
+
self.shared_state.app_card_loading_task,
|
431
|
+
timeout=0.1
|
432
|
+
)
|
433
|
+
except asyncio.TimeoutError:
|
434
|
+
# Task not ready yet, use empty string
|
435
|
+
self.shared_state.app_card = ""
|
436
|
+
except Exception as e:
|
437
|
+
logger.warning(f"Error getting app card: {e}")
|
438
|
+
self.shared_state.app_card = ""
|
422
439
|
else:
|
423
|
-
app_card = ""
|
440
|
+
self.shared_state.app_card = ""
|
424
441
|
|
425
442
|
# ====================================================================
|
426
443
|
# Step 1: Build system prompt
|
427
444
|
# ====================================================================
|
428
|
-
system_prompt = self._build_system_prompt(has_text_to_modify
|
445
|
+
system_prompt = self._build_system_prompt(has_text_to_modify)
|
429
446
|
|
430
447
|
# ====================================================================
|
431
448
|
# Step 2: Build messages with context
|
@@ -1,11 +1,14 @@
|
|
1
1
|
import importlib
|
2
2
|
import logging
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any, TYPE_CHECKING
|
4
4
|
|
5
5
|
from llama_index.core.llms.llm import LLM
|
6
6
|
|
7
7
|
from droidrun.agent.usage import track_usage
|
8
8
|
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from droidrun.config_manager.config_manager import LLMProfile
|
11
|
+
|
9
12
|
# Configure logging
|
10
13
|
logger = logging.getLogger("droidrun")
|
11
14
|
|
@@ -104,6 +107,65 @@ def load_llm(provider_name: str, **kwargs: Any) -> LLM:
|
|
104
107
|
raise e
|
105
108
|
|
106
109
|
|
110
|
+
def load_llms_from_profiles(
|
111
|
+
profiles: dict[str, "LLMProfile"],
|
112
|
+
profile_names: list[str] | None = None,
|
113
|
+
**override_kwargs_per_profile
|
114
|
+
) -> dict[str, LLM]:
|
115
|
+
"""
|
116
|
+
Load multiple LLMs from LLMProfile objects.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
profiles: Dict of profile_name -> LLMProfile objects
|
120
|
+
profile_names: List of profile names to load. If None, loads all profiles
|
121
|
+
**override_kwargs_per_profile: Dict of profile-specific overrides
|
122
|
+
Example: manager={'temperature': 0.1}, executor={'max_tokens': 8000}
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
Dict mapping profile names to initialized LLM instances
|
126
|
+
|
127
|
+
Example:
|
128
|
+
>>> config = ConfigManager()
|
129
|
+
>>> llms = load_llms_from_profiles(config.llm_profiles)
|
130
|
+
>>> manager_llm = llms['manager']
|
131
|
+
|
132
|
+
>>> # Load specific profiles with overrides
|
133
|
+
>>> llms = load_llms_from_profiles(
|
134
|
+
... config.llm_profiles,
|
135
|
+
... profile_names=['manager', 'executor'],
|
136
|
+
... manager={'temperature': 0.1}
|
137
|
+
... )
|
138
|
+
"""
|
139
|
+
if profile_names is None:
|
140
|
+
profile_names = list(profiles.keys())
|
141
|
+
|
142
|
+
llms = {}
|
143
|
+
for profile_name in profile_names:
|
144
|
+
logger.debug(f"Loading LLM for profile: {profile_name}")
|
145
|
+
|
146
|
+
if profile_name not in profiles:
|
147
|
+
raise KeyError(
|
148
|
+
f"Profile '{profile_name}' not found. "
|
149
|
+
f"Available profiles: {list(profiles.keys())}"
|
150
|
+
)
|
151
|
+
|
152
|
+
profile = profiles[profile_name]
|
153
|
+
|
154
|
+
# Get base kwargs from profile
|
155
|
+
kwargs = profile.to_load_llm_kwargs()
|
156
|
+
|
157
|
+
# Apply profile-specific overrides if provided
|
158
|
+
if profile_name in override_kwargs_per_profile:
|
159
|
+
logger.debug(f"Applying overrides for {profile_name}: {override_kwargs_per_profile[profile_name]}")
|
160
|
+
kwargs.update(override_kwargs_per_profile[profile_name])
|
161
|
+
|
162
|
+
# Load the LLM
|
163
|
+
llms[profile_name] = load_llm(provider_name=profile.provider, **kwargs)
|
164
|
+
logger.debug(f"Successfully loaded {profile_name} LLM: {profile.provider}/{profile.model}")
|
165
|
+
|
166
|
+
return llms
|
167
|
+
|
168
|
+
|
107
169
|
# --- Example Usage ---
|
108
170
|
if __name__ == "__main__":
|
109
171
|
# Install the specific LLM integrations you want to test:
|
droidrun/agent/utils/tools.py
CHANGED
@@ -127,6 +127,35 @@ def open_app(tool_instance: "Tools", text: str) -> str:
|
|
127
127
|
return result
|
128
128
|
|
129
129
|
|
130
|
+
def remember(tool_instance: "Tools", information: str) -> str:
|
131
|
+
"""
|
132
|
+
Remember important information for later use.
|
133
|
+
|
134
|
+
Args:
|
135
|
+
tool_instance: The Tools instance
|
136
|
+
information: The information to remember
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
Confirmation message
|
140
|
+
"""
|
141
|
+
return tool_instance.remember(information)
|
142
|
+
|
143
|
+
|
144
|
+
def complete(tool_instance: "Tools", success: bool, reason: str = "") -> None:
|
145
|
+
"""
|
146
|
+
Mark the task as complete.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
tool_instance: The Tools instance
|
150
|
+
success: Whether the task was completed successfully
|
151
|
+
reason: Explanation for success or failure
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
None
|
155
|
+
"""
|
156
|
+
tool_instance.complete(success, reason)
|
157
|
+
|
158
|
+
|
130
159
|
# =============================================================================
|
131
160
|
# ATOMIC ACTION SIGNATURES - Single source of truth for both Executor and CodeAct
|
132
161
|
# =============================================================================
|