droidrun 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +15 -8
- droidrun/__main__.py +2 -3
- droidrun/adb/device.py +1 -1
- droidrun/agent/codeact/__init__.py +13 -0
- droidrun/agent/codeact/codeact_agent.py +334 -0
- droidrun/agent/codeact/events.py +36 -0
- droidrun/agent/codeact/prompts.py +78 -0
- droidrun/agent/droid/__init__.py +13 -0
- droidrun/agent/droid/droid_agent.py +418 -0
- droidrun/agent/planner/__init__.py +15 -0
- droidrun/agent/planner/events.py +20 -0
- droidrun/agent/planner/prompts.py +144 -0
- droidrun/agent/planner/task_manager.py +355 -0
- droidrun/agent/planner/workflow.py +371 -0
- droidrun/agent/utils/async_utils.py +56 -0
- droidrun/agent/utils/chat_utils.py +92 -0
- droidrun/agent/utils/executer.py +97 -0
- droidrun/agent/utils/llm_picker.py +143 -0
- droidrun/cli/main.py +422 -107
- droidrun/tools/__init__.py +4 -25
- droidrun/tools/actions.py +767 -783
- droidrun/tools/device.py +1 -1
- droidrun/tools/loader.py +60 -0
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/METADATA +134 -37
- droidrun-0.2.0.dist-info/RECORD +32 -0
- droidrun/agent/__init__.py +0 -16
- droidrun/agent/llm_reasoning.py +0 -567
- droidrun/agent/react_agent.py +0 -556
- droidrun/llm/__init__.py +0 -24
- droidrun-0.1.0.dist-info/RECORD +0 -20
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/WHEEL +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
"""
|
2
|
+
DroidAgent - A wrapper class that coordinates the planning and execution of tasks
|
3
|
+
to achieve a user's goal on an Android device.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import logging
|
8
|
+
from typing import Dict, Any, List, Tuple
|
9
|
+
|
10
|
+
from llama_index.core.base.llms.types import ChatMessage
|
11
|
+
from llama_index.core.llms.llm import LLM
|
12
|
+
from llama_index.core.memory import ChatMemoryBuffer
|
13
|
+
from ..codeact import CodeActAgent
|
14
|
+
from ..planner import PlannerAgent, TaskManager
|
15
|
+
from ..utils.executer import SimpleCodeExecutor
|
16
|
+
from ...tools import Tools
|
17
|
+
|
18
|
+
logger = logging.getLogger("droidrun")
|
19
|
+
|
20
|
+
class DroidAgent:
|
21
|
+
"""
|
22
|
+
A wrapper class that coordinates between PlannerAgent (creates plans) and
|
23
|
+
CodeActAgent (executes tasks) to achieve a user's goal.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
goal: str,
|
29
|
+
llm: LLM,
|
30
|
+
tools_instance: 'Tools' = None,
|
31
|
+
tool_list: Dict[str, Any] = None,
|
32
|
+
max_steps: int = 15,
|
33
|
+
vision: bool = False,
|
34
|
+
timeout: int = 1000,
|
35
|
+
max_retries: int = 3,
|
36
|
+
reasoning: bool = True,
|
37
|
+
enable_tracing: bool = False,
|
38
|
+
debug: bool = False,
|
39
|
+
device_serial: str = None,
|
40
|
+
**kwargs
|
41
|
+
):
|
42
|
+
"""
|
43
|
+
Initialize the DroidAgent wrapper.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
goal: The user's goal or command to execute
|
47
|
+
llm: The language model to use for both agents
|
48
|
+
tools_instance: An instance of the Tools class
|
49
|
+
tool_list: Dictionary of available tools
|
50
|
+
max_steps: Maximum number of steps for both agents
|
51
|
+
vision: Whether to enable vision capabilities
|
52
|
+
timeout: Timeout for agent execution in seconds
|
53
|
+
max_retries: Maximum number of retries for failed tasks
|
54
|
+
reasoning: Whether to use the PlannerAgent for complex reasoning (True)
|
55
|
+
or send tasks directly to CodeActAgent (False)
|
56
|
+
enable_tracing: Whether to enable Arize Phoenix tracing
|
57
|
+
debug: Whether to enable verbose debug logging
|
58
|
+
device_serial: Target Android device serial number
|
59
|
+
**kwargs: Additional keyword arguments to pass to the agents
|
60
|
+
"""
|
61
|
+
self.goal = goal
|
62
|
+
self.llm = llm
|
63
|
+
self.max_steps = max_steps
|
64
|
+
self.vision = vision
|
65
|
+
self.timeout = timeout
|
66
|
+
self.max_retries = max_retries
|
67
|
+
self.task_manager = TaskManager()
|
68
|
+
self.reasoning = reasoning
|
69
|
+
self.debug = debug
|
70
|
+
self.device_serial = device_serial
|
71
|
+
|
72
|
+
logger.info("🤖 Initializing DroidAgent wrapper...")
|
73
|
+
|
74
|
+
self.tools_instance = tools_instance
|
75
|
+
self.tool_list = tool_list
|
76
|
+
|
77
|
+
# Ensure remember tool is in the tool_list if available
|
78
|
+
if hasattr(tools_instance, 'remember') and 'remember' not in tool_list:
|
79
|
+
logger.debug("📝 Adding 'remember' tool to the available tools")
|
80
|
+
tool_list['remember'] = tools_instance.remember
|
81
|
+
|
82
|
+
# Create code executor
|
83
|
+
logger.debug("🔧 Initializing Code Executor...")
|
84
|
+
loop = asyncio.get_event_loop()
|
85
|
+
self.executor = SimpleCodeExecutor(
|
86
|
+
loop=loop,
|
87
|
+
locals={},
|
88
|
+
tools=tool_list,
|
89
|
+
globals={"__builtins__": __builtins__}
|
90
|
+
)
|
91
|
+
|
92
|
+
# Create memory buffer for the planning agent if reasoning is enabled
|
93
|
+
if self.reasoning:
|
94
|
+
self.planning_memory = ChatMemoryBuffer.from_defaults(llm=llm)
|
95
|
+
|
96
|
+
# Create CodeActAgent
|
97
|
+
logger.info("🧠 Initializing CodeAct Agent...")
|
98
|
+
self.codeact_agent = CodeActAgent(
|
99
|
+
llm=llm,
|
100
|
+
code_execute_fn=self.executor.execute,
|
101
|
+
available_tools=tool_list.values(),
|
102
|
+
tools=tools_instance,
|
103
|
+
max_steps=999999,
|
104
|
+
vision=vision,
|
105
|
+
debug=debug,
|
106
|
+
timeout=timeout
|
107
|
+
)
|
108
|
+
|
109
|
+
if self.reasoning:
|
110
|
+
logger.info("📝 Initializing Planner Agent...")
|
111
|
+
self.planner_agent = PlannerAgent(
|
112
|
+
goal=goal,
|
113
|
+
llm=llm,
|
114
|
+
agent=self.codeact_agent,
|
115
|
+
tools_instance=tools_instance,
|
116
|
+
timeout=timeout,
|
117
|
+
max_retries=max_retries,
|
118
|
+
enable_tracing=enable_tracing,
|
119
|
+
debug=debug
|
120
|
+
)
|
121
|
+
|
122
|
+
# Give task manager to the planner
|
123
|
+
self.planner_agent.task_manager = self.task_manager
|
124
|
+
else:
|
125
|
+
logger.debug("🚫 Planning disabled - will execute tasks directly with CodeActAgent")
|
126
|
+
self.planner_agent = None
|
127
|
+
|
128
|
+
logger.info("✅ DroidAgent initialized successfully.")
|
129
|
+
|
130
|
+
async def _get_plan_from_planner(self) -> List[Dict]:
|
131
|
+
"""
|
132
|
+
Get a plan (list of tasks) from the PlannerAgent.
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
List of task dictionaries
|
136
|
+
"""
|
137
|
+
logger.info("📋 Planning steps to accomplish the goal...")
|
138
|
+
|
139
|
+
# Create system and user messages
|
140
|
+
system_msg = ChatMessage(role="system", content=self.planner_agent.system_prompt)
|
141
|
+
user_msg = ChatMessage(role="user", content=self.planner_agent.user_prompt)
|
142
|
+
|
143
|
+
# Check if we have task history to add to the prompt
|
144
|
+
task_history = ""
|
145
|
+
# Use the persistent task history methods to get ALL completed and failed tasks
|
146
|
+
completed_tasks = self.task_manager.get_all_completed_tasks()
|
147
|
+
failed_tasks = self.task_manager.get_all_failed_tasks()
|
148
|
+
|
149
|
+
# Show any remembered information in task history
|
150
|
+
remembered_info = ""
|
151
|
+
if hasattr(self.tools_instance, 'memory') and self.tools_instance.memory:
|
152
|
+
remembered_info = "\n### Remembered Information:\n"
|
153
|
+
for idx, item in enumerate(self.tools_instance.memory, 1):
|
154
|
+
remembered_info += f"{idx}. {item}\n"
|
155
|
+
|
156
|
+
if completed_tasks or failed_tasks or remembered_info:
|
157
|
+
task_history = "### Task Execution History:\n"
|
158
|
+
|
159
|
+
if completed_tasks:
|
160
|
+
task_history += "✅ Completed Tasks:\n"
|
161
|
+
for task in completed_tasks:
|
162
|
+
task_history += f"- {task['description']}\n"
|
163
|
+
|
164
|
+
if failed_tasks:
|
165
|
+
task_history += "\n❌ Failed Tasks:\n"
|
166
|
+
for task in failed_tasks:
|
167
|
+
failure_reason = task.get('failure_reason', 'Unknown reason')
|
168
|
+
task_history += f"- {task['description']} (Failed: {failure_reason})\n"
|
169
|
+
|
170
|
+
if remembered_info:
|
171
|
+
task_history += remembered_info
|
172
|
+
|
173
|
+
# Add a reminder to use this information
|
174
|
+
task_history += "\n⚠️ Please use the above information in your planning. For example, if specific dates or locations were found, include them explicitly in your next tasks instead of just referring to 'the dates' or 'the location'.\n"
|
175
|
+
|
176
|
+
# Append task history to user prompt
|
177
|
+
user_msg = ChatMessage(
|
178
|
+
role="user",
|
179
|
+
content=f"{self.planner_agent.user_prompt}\n\n{task_history}\n\nPlease consider the above task history and discovered information when creating your next plan. Incorporate specific data (dates, locations, etc.) directly into tasks rather than referring to them generally. Remember that previously completed or failed tasks will not be repeated."
|
180
|
+
)
|
181
|
+
|
182
|
+
# Create message list
|
183
|
+
messages = [system_msg, user_msg]
|
184
|
+
if self.debug:
|
185
|
+
logger.debug(f"Sending {len(messages)} messages to planner: {[msg.role for msg in messages]}")
|
186
|
+
|
187
|
+
# Get response from LLM
|
188
|
+
llm_response = await self.planner_agent._get_llm_response(messages)
|
189
|
+
code, thoughts = self.planner_agent._extract_code_and_thought(llm_response.message.content)
|
190
|
+
|
191
|
+
# Execute the planning code (which should call set_tasks)
|
192
|
+
if code:
|
193
|
+
try:
|
194
|
+
planning_tools = {
|
195
|
+
"set_tasks": self.task_manager.set_tasks,
|
196
|
+
"add_task": self.task_manager.add_task,
|
197
|
+
"get_all_tasks": self.task_manager.get_all_tasks,
|
198
|
+
"clear_tasks": self.task_manager.clear_tasks,
|
199
|
+
"complete_goal": self.task_manager.complete_goal
|
200
|
+
}
|
201
|
+
planning_executor = SimpleCodeExecutor(
|
202
|
+
loop=asyncio.get_event_loop(),
|
203
|
+
globals={},
|
204
|
+
locals={},
|
205
|
+
tools=planning_tools
|
206
|
+
)
|
207
|
+
await planning_executor.execute(code)
|
208
|
+
except Exception as e:
|
209
|
+
logger.error(f"Error executing planning code: {e}")
|
210
|
+
# If there's an error, create a simple default task
|
211
|
+
self.task_manager.set_tasks([f"Achieve the goal: {self.goal}"])
|
212
|
+
|
213
|
+
# Get and display the tasks
|
214
|
+
tasks = self.task_manager.get_all_tasks()
|
215
|
+
if tasks:
|
216
|
+
logger.info("📝 Plan created:")
|
217
|
+
for i, task in enumerate(tasks, 1):
|
218
|
+
if task["status"] == self.task_manager.STATUS_PENDING:
|
219
|
+
logger.info(f" {i}. {task['description']}")
|
220
|
+
else:
|
221
|
+
logger.warning("No tasks were generated in the plan")
|
222
|
+
|
223
|
+
return tasks
|
224
|
+
|
225
|
+
async def _execute_task_with_codeact(self, task: Dict) -> Tuple[bool, str]:
|
226
|
+
"""
|
227
|
+
Execute a single task using the CodeActAgent.
|
228
|
+
|
229
|
+
Args:
|
230
|
+
task: Task dictionary with description and status
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
Tuple of (success, reason)
|
234
|
+
"""
|
235
|
+
task_description = task["description"]
|
236
|
+
logger.info(f"🔧 Executing task: {task_description}")
|
237
|
+
|
238
|
+
# Update task status
|
239
|
+
task["status"] = self.task_manager.STATUS_ATTEMPTING
|
240
|
+
|
241
|
+
# Run the CodeActAgent
|
242
|
+
try:
|
243
|
+
# Reset the tools finished flag before execution
|
244
|
+
self.tools_instance.finished = False
|
245
|
+
self.tools_instance.success = None
|
246
|
+
self.tools_instance.reason = None
|
247
|
+
|
248
|
+
# Execute the CodeActAgent with the task description as input
|
249
|
+
# Pass input as a keyword argument, not as a dictionary
|
250
|
+
result = await self.codeact_agent.run(input=task_description)
|
251
|
+
|
252
|
+
# Check if the tools instance was marked as finished by the 'complete' function
|
253
|
+
if self.tools_instance.finished:
|
254
|
+
if self.tools_instance.success:
|
255
|
+
task["status"] = self.task_manager.STATUS_COMPLETED
|
256
|
+
if self.debug:
|
257
|
+
logger.debug(f"Task completed successfully: {self.tools_instance.reason}")
|
258
|
+
return True, self.tools_instance.reason or "Task completed successfully"
|
259
|
+
else:
|
260
|
+
task["status"] = self.task_manager.STATUS_FAILED
|
261
|
+
task["failure_reason"] = self.tools_instance.reason or "Task failed without specific reason"
|
262
|
+
logger.warning(f"Task failed: {task['failure_reason']}")
|
263
|
+
return False, self.tools_instance.reason or "Task failed without specific reason"
|
264
|
+
|
265
|
+
# If tools instance wasn't marked as finished, check the result directly
|
266
|
+
if result and isinstance(result, dict) and "success" in result and result["success"]:
|
267
|
+
task["status"] = self.task_manager.STATUS_COMPLETED
|
268
|
+
if self.debug:
|
269
|
+
logger.debug(f"Task completed with result: {result}")
|
270
|
+
return True, result.get("reason", "Task completed successfully")
|
271
|
+
else:
|
272
|
+
failure_reason = result.get("reason", "Unknown failure") if isinstance(result, dict) else "Task execution failed"
|
273
|
+
task["status"] = self.task_manager.STATUS_FAILED
|
274
|
+
task["failure_reason"] = failure_reason
|
275
|
+
logger.warning(f"Task failed: {failure_reason}")
|
276
|
+
return False, failure_reason
|
277
|
+
|
278
|
+
except Exception as e:
|
279
|
+
logger.error(f"Error during task execution: {e}")
|
280
|
+
if self.debug:
|
281
|
+
import traceback
|
282
|
+
logger.error(traceback.format_exc())
|
283
|
+
task["status"] = self.task_manager.STATUS_FAILED
|
284
|
+
task["failure_reason"] = f"Error: {str(e)}"
|
285
|
+
return False, f"Error: {str(e)}"
|
286
|
+
|
287
|
+
async def run(self) -> Dict[str, Any]:
|
288
|
+
"""
|
289
|
+
Main execution loop that coordinates between planning and execution.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
Dict containing the execution result
|
293
|
+
"""
|
294
|
+
logger.info(f"🚀 Running DroidAgent to achieve goal: {self.goal}")
|
295
|
+
|
296
|
+
step_counter = 0
|
297
|
+
retry_counter = 0
|
298
|
+
overall_success = False
|
299
|
+
final_message = ""
|
300
|
+
|
301
|
+
try:
|
302
|
+
# If reasoning is disabled, directly execute the goal as a single task in CodeActAgent
|
303
|
+
if not self.reasoning:
|
304
|
+
logger.info(f"🔄 Direct execution mode - executing goal: {self.goal}")
|
305
|
+
# Create a simple task for the goal
|
306
|
+
task = {
|
307
|
+
"description": self.goal,
|
308
|
+
"status": self.task_manager.STATUS_PENDING
|
309
|
+
}
|
310
|
+
|
311
|
+
# Execute the task directly with CodeActAgent
|
312
|
+
success, reason = await self._execute_task_with_codeact(task)
|
313
|
+
|
314
|
+
return {
|
315
|
+
"success": success,
|
316
|
+
"reason": reason,
|
317
|
+
"steps": 1,
|
318
|
+
"task_history": [task] # Single task history
|
319
|
+
}
|
320
|
+
|
321
|
+
# Standard reasoning mode with planning
|
322
|
+
while step_counter < self.max_steps:
|
323
|
+
step_counter += 1
|
324
|
+
if self.debug:
|
325
|
+
logger.debug(f"Planning step {step_counter}/{self.max_steps}")
|
326
|
+
|
327
|
+
# 1. Get a plan from the planner
|
328
|
+
tasks = await self._get_plan_from_planner()
|
329
|
+
|
330
|
+
if self.task_manager.task_completed:
|
331
|
+
# Task is marked as complete by the planner
|
332
|
+
logger.info(f"✅ Goal completed: {self.task_manager.message}")
|
333
|
+
overall_success = True
|
334
|
+
final_message = self.task_manager.message
|
335
|
+
break
|
336
|
+
|
337
|
+
if not tasks:
|
338
|
+
logger.warning("No tasks generated by planner")
|
339
|
+
final_message = "Planner did not generate any tasks"
|
340
|
+
break
|
341
|
+
|
342
|
+
# 2. Execute each task in the plan sequentially
|
343
|
+
for task in tasks:
|
344
|
+
if task["status"] == self.task_manager.STATUS_PENDING:
|
345
|
+
# Reset the CodeActAgent's step counter for this task
|
346
|
+
self.codeact_agent.steps_counter = 0
|
347
|
+
|
348
|
+
# Execute the task
|
349
|
+
success, reason = await self._execute_task_with_codeact(task)
|
350
|
+
|
351
|
+
# Update task info with detailed result for the planner
|
352
|
+
task_idx = tasks.index(task)
|
353
|
+
result_info = {
|
354
|
+
"execution_details": reason,
|
355
|
+
"step_executed": step_counter,
|
356
|
+
"codeact_steps": self.codeact_agent.steps_counter
|
357
|
+
}
|
358
|
+
|
359
|
+
# Only update if not already updated in _execute_task_with_codeact
|
360
|
+
if success:
|
361
|
+
self.task_manager.update_status(
|
362
|
+
task_idx,
|
363
|
+
self.task_manager.STATUS_COMPLETED,
|
364
|
+
result_info
|
365
|
+
)
|
366
|
+
logger.info(f"✅ Task completed: {task['description']}")
|
367
|
+
|
368
|
+
if not success:
|
369
|
+
# Store detailed failure information if not already set
|
370
|
+
if "failure_reason" not in task:
|
371
|
+
self.task_manager.update_status(
|
372
|
+
task_idx,
|
373
|
+
self.task_manager.STATUS_FAILED,
|
374
|
+
{"failure_reason": reason, **result_info}
|
375
|
+
)
|
376
|
+
|
377
|
+
# Handle retries
|
378
|
+
if retry_counter < self.max_retries:
|
379
|
+
retry_counter += 1
|
380
|
+
logger.info(f"Retrying... ({retry_counter}/{self.max_retries})")
|
381
|
+
# Next iteration will generate a new plan based on current state
|
382
|
+
break
|
383
|
+
else:
|
384
|
+
logger.error(f"Max retries exceeded for task")
|
385
|
+
final_message = f"Failed after {self.max_retries} retries. Reason: {reason}"
|
386
|
+
return {"success": False, "reason": final_message}
|
387
|
+
|
388
|
+
# Reset retry counter for new task sequence
|
389
|
+
retry_counter = 0
|
390
|
+
|
391
|
+
# Check if all tasks are completed
|
392
|
+
all_completed = all(task["status"] == self.task_manager.STATUS_COMPLETED for task in tasks)
|
393
|
+
if all_completed:
|
394
|
+
# Get a new plan (the planner might decide we're done)
|
395
|
+
continue
|
396
|
+
|
397
|
+
# Check if we exited due to max steps
|
398
|
+
if step_counter >= self.max_steps and not overall_success:
|
399
|
+
final_message = f"Reached maximum number of steps ({self.max_steps})"
|
400
|
+
overall_success = False
|
401
|
+
|
402
|
+
return {
|
403
|
+
"success": overall_success,
|
404
|
+
"reason": final_message,
|
405
|
+
"steps": step_counter,
|
406
|
+
"task_history": self.task_manager.get_task_history()
|
407
|
+
}
|
408
|
+
|
409
|
+
except Exception as e:
|
410
|
+
logger.error(f"❌ Error during DroidAgent execution: {e}")
|
411
|
+
if self.debug:
|
412
|
+
import traceback
|
413
|
+
logger.error(traceback.format_exc())
|
414
|
+
return {
|
415
|
+
"success": False,
|
416
|
+
"reason": str(e),
|
417
|
+
"task_history": self.task_manager.get_task_history()
|
418
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from .workflow import PlannerAgent
|
2
|
+
from .task_manager import TaskManager
|
3
|
+
from .prompts import (
|
4
|
+
DEFAULT_PLANNER_SYSTEM_PROMPT,
|
5
|
+
DEFAULT_PLANNER_USER_PROMPT,
|
6
|
+
DEFAULT_PLANNER_TASK_FAILED_PROMPT
|
7
|
+
)
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"PlannerAgent",
|
11
|
+
"TaskManager",
|
12
|
+
"DEFAULT_PLANNER_SYSTEM_PROMPT",
|
13
|
+
"DEFAULT_PLANNER_USER_PROMPT",
|
14
|
+
"DEFAULT_PLANNER_TASK_FAILED_PROMPT"
|
15
|
+
]
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from typing import List
|
2
|
+
from llama_index.core.llms import ChatMessage
|
3
|
+
from llama_index.core.workflow import Event
|
4
|
+
from llama_index.core.tools import FunctionTool
|
5
|
+
|
6
|
+
|
7
|
+
class InputEvent(Event):
|
8
|
+
input: list[ChatMessage]
|
9
|
+
|
10
|
+
class ModelResponseEvent(Event):
|
11
|
+
response: str
|
12
|
+
|
13
|
+
|
14
|
+
class ExecutePlan(Event):
|
15
|
+
pass
|
16
|
+
|
17
|
+
class TaskFailedEvent(Event):
|
18
|
+
task_description: str
|
19
|
+
reason: str
|
20
|
+
|
@@ -0,0 +1,144 @@
|
|
1
|
+
"""
|
2
|
+
Prompt templates for the PlannerAgent.
|
3
|
+
|
4
|
+
This module contains all the prompts used by the PlannerAgent,
|
5
|
+
separated from the workflow logic for better maintainability.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# System prompt for the PlannerAgent that explains its role and capabilities
|
9
|
+
DEFAULT_PLANNER_SYSTEM_PROMPT = """You are an Android Task Planner. Your job is to create short, functional plans (1-5 steps) to achieve a user's goal on an Android device.
|
10
|
+
|
11
|
+
**Inputs You Receive:**
|
12
|
+
1. **User's Overall Goal.**
|
13
|
+
2. **Current Device State:**
|
14
|
+
* A **screenshot** of the current screen.
|
15
|
+
* **JSON data** of visible UI elements.
|
16
|
+
* The current visible Android activity
|
17
|
+
3. **Complete Task History:**
|
18
|
+
* A record of ALL tasks that have been completed or failed throughout the session.
|
19
|
+
* For completed tasks, the results and any discovered information.
|
20
|
+
* For failed tasks, the detailed reasons for failure.
|
21
|
+
* This history persists across all planning cycles and is never lost, even when creating new tasks.
|
22
|
+
|
23
|
+
**Your Task:**
|
24
|
+
Given the goal, current state, and task history, devise the **next 1-5 functional steps**. Focus on what to achieve, not how. Planning fewer steps at a time improves accuracy, as the state can change.
|
25
|
+
|
26
|
+
**Step Format:**
|
27
|
+
Each step must be a functional goal. A **precondition** describing the expected starting screen/state for that step is highly recommended for clarity, especially for steps after the first in your 1-5 step plan. Each task string can start with "Precondition: ... Goal: ...". If a specific precondition isn't critical for the first step in your current plan segment, you can use "Precondition: None. Goal: ..." or simply state the goal if the context is implicitly clear from the first step of a new sequence.
|
28
|
+
|
29
|
+
**Executor Agent Capabilities:**
|
30
|
+
The plan you create will be executed by another agent. This executor can:
|
31
|
+
* `swipe(direction: str, distance_percentage: int)`
|
32
|
+
* `input_text(text: str, element_hint: Optional[str] = None)`
|
33
|
+
* `press_key(keycode: int)` (Common: 3=HOME, 4=BACK)
|
34
|
+
* `tap_by_coordinates(x: int, y: int)` (This is a fallback; prefer functional goals)
|
35
|
+
* `start_app(package_name: str)`
|
36
|
+
* `remember(info: str)
|
37
|
+
`
|
38
|
+
* (The executor will use the UI JSON to find elements for your functional goals like "Tap 'Settings button'" or "Enter text into 'Username field'").
|
39
|
+
|
40
|
+
**Your Output:**
|
41
|
+
* Use the `set_tasks` tool to provide your 1-5 step plan as a list of strings.
|
42
|
+
* **After your planned steps are executed, you will be invoked again with the new device state.** You will then:
|
43
|
+
1. Assess if the **overall user goal** is complete.
|
44
|
+
2. If complete, call the `complete_goal(message: str)` tool.
|
45
|
+
3. If not complete, generate the next 1-5 steps using `set_tasks`.
|
46
|
+
|
47
|
+
**Memory Persistence:**
|
48
|
+
* You maintain a COMPLETE memory of ALL tasks across the entire session:
|
49
|
+
* Every task that was completed or failed is preserved in your context.
|
50
|
+
* Previously completed steps are never lost when calling `set_tasks()` for new steps.
|
51
|
+
* You will see all historical tasks each time you're called.
|
52
|
+
* Use this accumulated knowledge to build progressively on successful steps.
|
53
|
+
* When you see discovered information (e.g., dates, locations), use it explicitly in future tasks.
|
54
|
+
|
55
|
+
**Key Rules:**
|
56
|
+
* **Functional Goals ONLY:** (e.g., "Navigate to Wi-Fi settings", "Enter 'MyPassword' into the password field").
|
57
|
+
* **NO Low-Level Actions:** Do NOT specify swipes, taps on coordinates, or element IDs in your plan.
|
58
|
+
* **Short Plans (1-5 steps):** Plan only the immediate next actions.
|
59
|
+
* **Learn From History:** If a task failed previously, try a different approach.
|
60
|
+
* **Use Tools:** Your response *must* be a Python code block calling `set_tasks` or `complete_goal`.
|
61
|
+
|
62
|
+
**Available Planning Tools:**
|
63
|
+
* `set_tasks(tasks: List[str])`: Defines the sequence of tasks. Each element in the list is a string representing a single task.
|
64
|
+
* `complete_goal(message: str)`: Call this when the overall user goal has been achieved. The message can summarize the completion.
|
65
|
+
|
66
|
+
---
|
67
|
+
|
68
|
+
**Example Interaction Flow:**
|
69
|
+
|
70
|
+
**User Goal:** Turn on Wi-Fi.
|
71
|
+
|
72
|
+
**(Round 1) Planner Input:**
|
73
|
+
* Goal: "Turn on Wi-Fi"
|
74
|
+
* Current State: Screenshot of Home screen, UI JSON.
|
75
|
+
* Task History: None (first planning cycle)
|
76
|
+
|
77
|
+
**Planner Thought Process (Round 1):**
|
78
|
+
Need to open settings first, then go to Network settings. This is the first plan.
|
79
|
+
1. Task 1: "Precondition: None. Goal: Open the Settings app."
|
80
|
+
2. Task 2: "Precondition: Settings main screen is open. Goal: Navigate to 'Network & internet' settings."
|
81
|
+
|
82
|
+
**Planner Output (Round 1):**
|
83
|
+
```python
|
84
|
+
set_tasks(tasks=[
|
85
|
+
"Precondition: None. Goal: Open the Settings app.",
|
86
|
+
"Precondition: Settings main screen is open. Goal: Navigate to 'Network & internet' settings."
|
87
|
+
])
|
88
|
+
```
|
89
|
+
|
90
|
+
**(After Executor performs these steps...)**
|
91
|
+
|
92
|
+
**(Round 2) Planner Input:**
|
93
|
+
* Goal: "Turn on Wi-Fi"
|
94
|
+
* Current State: Screenshot of "Network & internet" screen, UI JSON showing "Wi-Fi" option.
|
95
|
+
* Task History: Shows ALL previously completed tasks, including "Open the Settings app" and "Navigate to 'Network & internet' settings"
|
96
|
+
|
97
|
+
**Planner Thought Process (Round 2):**
|
98
|
+
Now on "Network & internet". Need to tap Wi-Fi, then enable it. I can see from history that we've already opened Settings and navigated to Network & internet.
|
99
|
+
1. Task 1: "Precondition: 'Network & internet' screen is open. Goal: Tap the 'Wi-Fi' option."
|
100
|
+
2. Task 2: "Precondition: Wi-Fi settings screen is open. Goal: Enable the Wi-Fi toggle if it's off."
|
101
|
+
|
102
|
+
**Planner Output (Round 2):**
|
103
|
+
```python
|
104
|
+
set_tasks(tasks=[
|
105
|
+
"Precondition: 'Network & internet' screen is open. Goal: Tap the 'Wi-Fi' option.",
|
106
|
+
"Precondition: Wi-Fi settings screen is open. Goal: Enable the Wi-Fi toggle if it's off."
|
107
|
+
])
|
108
|
+
```
|
109
|
+
|
110
|
+
**(After Executor performs these steps...)**
|
111
|
+
|
112
|
+
**(Round 3) Planner Input:**
|
113
|
+
* Goal: "Turn on Wi-Fi"
|
114
|
+
* Current State: Screenshot of Wi-Fi screen, UI JSON showing Wi-Fi is now ON.
|
115
|
+
* Task History: Shows ALL previous tasks completed successfully (all 4 tasks from previous rounds)
|
116
|
+
|
117
|
+
**Planner Output (Round 3):**
|
118
|
+
```python
|
119
|
+
complete_goal(message="Wi-Fi has been successfully enabled.")
|
120
|
+
```"""
|
121
|
+
|
122
|
+
# User prompt template that simply states the goal
|
123
|
+
DEFAULT_PLANNER_USER_PROMPT = """Goal: {goal}"""
|
124
|
+
|
125
|
+
# Prompt template for when a task fails, to help recover and plan new steps
|
126
|
+
DEFAULT_PLANNER_TASK_FAILED_PROMPT = """
|
127
|
+
PLANNING UPDATE: The execution of a task failed.
|
128
|
+
|
129
|
+
Failed Task Description: "{task_description}"
|
130
|
+
Reported Reason: {reason}
|
131
|
+
|
132
|
+
The previous plan has been stopped. I have attached a screenshot representing the device's **current state** immediately after the failure. Please analyze this visual information.
|
133
|
+
|
134
|
+
Original Goal: {goal}
|
135
|
+
|
136
|
+
Instruction: Based **only** on the provided screenshot showing the current state and the reason for the previous failure ('{reason}'), generate a NEW plan starting from this observed state to achieve the original goal: '{goal}'.
|
137
|
+
"""
|
138
|
+
|
139
|
+
# Export all prompts
|
140
|
+
__all__ = [
|
141
|
+
"DEFAULT_PLANNER_SYSTEM_PROMPT",
|
142
|
+
"DEFAULT_PLANNER_USER_PROMPT",
|
143
|
+
"DEFAULT_PLANNER_TASK_FAILED_PROMPT"
|
144
|
+
]
|