droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. droidrun/agent/codeact/__init__.py +1 -4
  2. droidrun/agent/codeact/codeact_agent.py +95 -86
  3. droidrun/agent/codeact/events.py +1 -2
  4. droidrun/agent/context/__init__.py +5 -9
  5. droidrun/agent/context/episodic_memory.py +1 -3
  6. droidrun/agent/context/task_manager.py +8 -2
  7. droidrun/agent/droid/droid_agent.py +102 -141
  8. droidrun/agent/droid/events.py +45 -14
  9. droidrun/agent/executor/__init__.py +6 -4
  10. droidrun/agent/executor/events.py +29 -9
  11. droidrun/agent/executor/executor_agent.py +86 -28
  12. droidrun/agent/executor/prompts.py +8 -2
  13. droidrun/agent/manager/__init__.py +6 -7
  14. droidrun/agent/manager/events.py +16 -4
  15. droidrun/agent/manager/manager_agent.py +130 -69
  16. droidrun/agent/manager/prompts.py +1 -159
  17. droidrun/agent/utils/chat_utils.py +64 -2
  18. droidrun/agent/utils/device_state_formatter.py +54 -26
  19. droidrun/agent/utils/executer.py +66 -80
  20. droidrun/agent/utils/inference.py +11 -10
  21. droidrun/agent/utils/tools.py +58 -6
  22. droidrun/agent/utils/trajectory.py +18 -12
  23. droidrun/cli/logs.py +118 -56
  24. droidrun/cli/main.py +154 -136
  25. droidrun/config_manager/__init__.py +9 -7
  26. droidrun/config_manager/app_card_loader.py +148 -0
  27. droidrun/config_manager/config_manager.py +200 -102
  28. droidrun/config_manager/path_resolver.py +104 -0
  29. droidrun/config_manager/prompt_loader.py +75 -0
  30. droidrun/macro/__init__.py +1 -1
  31. droidrun/macro/cli.py +23 -18
  32. droidrun/telemetry/__init__.py +2 -2
  33. droidrun/telemetry/events.py +3 -3
  34. droidrun/telemetry/tracker.py +1 -1
  35. droidrun/tools/adb.py +1 -1
  36. droidrun/tools/ios.py +3 -2
  37. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/METADATA +9 -1
  38. droidrun-0.3.10.dev4.dist-info/RECORD +61 -0
  39. droidrun/agent/codeact/prompts.py +0 -26
  40. droidrun/agent/context/agent_persona.py +0 -16
  41. droidrun/agent/context/context_injection_manager.py +0 -66
  42. droidrun/agent/context/personas/__init__.py +0 -11
  43. droidrun/agent/context/personas/app_starter.py +0 -44
  44. droidrun/agent/context/personas/big_agent.py +0 -96
  45. droidrun/agent/context/personas/default.py +0 -95
  46. droidrun/agent/context/personas/ui_expert.py +0 -108
  47. droidrun/agent/planner/__init__.py +0 -13
  48. droidrun/agent/planner/events.py +0 -21
  49. droidrun/agent/planner/planner_agent.py +0 -311
  50. droidrun/agent/planner/prompts.py +0 -124
  51. droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
  52. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/WHEEL +0 -0
  53. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/entry_points.txt +0 -0
  54. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,95 +0,0 @@
1
- from droidrun.agent.context.agent_persona import AgentPersona
2
- from droidrun.tools import Tools
3
-
4
- DEFAULT = AgentPersona(
5
- name="Default",
6
- description="Default Agent. Use this as your Default",
7
- expertise_areas=[
8
- "UI navigation", "button interactions", "text input",
9
- "menu navigation", "form filling", "scrolling", "app launching"
10
- ],
11
- allowed_tools=[
12
- Tools.swipe.__name__,
13
- Tools.input_text.__name__,
14
- Tools.press_key.__name__,
15
- Tools.tap_by_index.__name__,
16
- Tools.start_app.__name__,
17
- Tools.list_packages.__name__,
18
- Tools.remember.__name__,
19
- Tools.complete.__name__
20
- ],
21
- required_context=[
22
- "ui_state",
23
- "screenshot",
24
- ],
25
- user_prompt="""
26
- **Current Request:**
27
- {goal}
28
- **Is the precondition met? What is your reasoning and the next step to address this request?**
29
- Explain your thought process then provide code in ```python ... ``` tags if needed.
30
- """"",
31
-
32
- system_prompt="""
33
- You are a helpful AI assistant that can write and execute Python code to solve problems.
34
-
35
- You will be given a task to perform. You should output:
36
- - Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
37
- - If there is a precondition for the task, you MUST check if it is met.
38
- - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
39
- - If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
40
-
41
-
42
- ## Context:
43
- The following context is given to you for analysis:
44
- - **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
45
- - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
46
- - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
47
- - **chat history**: You are also given the history of your actions (if any) from your previous steps.
48
- - **execution result**: The result of your last Action
49
- NOTE: you don't have access to these inputs in your tool calling context
50
-
51
- ## Response Format:
52
- Example of proper code format:
53
- **Task Assignment:**
54
- **Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
55
-
56
- **(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
57
-
58
- **(Step 1) Agent Action:**
59
- ```python
60
- # First step: Navigate to Wi-Fi settings
61
- tap_by_index(3)
62
- ```
63
-
64
- **(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
65
-
66
- **(Step 2) Agent Action:**
67
- ```python
68
- # Second step: Turn on Wi-Fi to see available networks
69
- tap_by_index(1)
70
- ```
71
-
72
- **(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
73
-
74
- **(Step 3) Agent Action:**
75
- ```python
76
- # Final step: Connect to the target network
77
- tap_by_index(5)
78
- complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
79
- ```
80
- ```
81
-
82
- ## Tools:
83
- In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
84
- {tool_descriptions}
85
-
86
-
87
- ## Final Answer Guidelines:
88
- - When providing a final answer, focus on directly answering the user's question in the response format given
89
- - Present the results clearly and concisely as if you computed them directly
90
- - Structure your response like you're directly answering the user's query, not explaining how you solved it
91
-
92
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
93
- """
94
-
95
- )
@@ -1,108 +0,0 @@
1
- from droidrun.agent.context.agent_persona import AgentPersona
2
- from droidrun.tools import Tools
3
-
4
- UI_EXPERT = AgentPersona(
5
- name="UIExpert",
6
- description="Specialized in UI interactions, navigation, and form filling",
7
- expertise_areas=[
8
- "UI navigation", "button interactions", "text input",
9
- "menu navigation", "form filling", "scrolling"
10
- ],
11
- allowed_tools=[
12
- Tools.swipe.__name__,
13
- Tools.input_text.__name__,
14
- Tools.press_key.__name__,
15
- Tools.tap_by_index.__name__,
16
- Tools.drag.__name__,
17
- Tools.remember.__name__,
18
- Tools.complete.__name__
19
- ],
20
- required_context=[
21
- "ui_state",
22
- "screenshot",
23
- "phone_state",
24
- "memory"
25
- ],
26
- user_prompt="""
27
- **Current Request:**
28
- {goal}
29
- **Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed.""""",
30
-
31
-
32
- system_prompt="""You are a UI Expert specialized in Android interface interactions. Your core expertise includes:
33
-
34
- **Primary Capabilities:**
35
- - Navigate through Android UI elements with precision
36
- - Interact with buttons, menus, forms, and interactive elements
37
- - Enter text into input fields and search bars
38
- - Scroll through content and lists
39
- - Handle complex UI navigation workflows
40
- - Recognize and interact with various UI patterns (tabs, drawers, dialogs, etc.)
41
-
42
- **Your Approach:**
43
- - Focus on understanding the current UI state through screenshots and element data
44
- - Use precise element identification for reliable interactions
45
- - Handle dynamic UI changes and loading states gracefully
46
- - Provide clear feedback on UI interactions and their outcomes
47
- - Adapt to different app interfaces and UI patterns
48
-
49
- **Key Principles:**
50
- - Always analyze the current screen state before taking action
51
- - Prefer using element indices for reliable targeting
52
- - Provide descriptive feedback about what you're interacting with
53
- - Handle edge cases like loading screens, popups, and navigation changes
54
- - Remember important UI state information for context
55
-
56
- You do NOT handle app launching or package management - that's handled by other specialists.
57
-
58
-
59
- ## Available Context:
60
- In your execution environment, you have access to:
61
- - `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
62
-
63
- ## Response Format:
64
- Example of proper code format:
65
- To calculate the area of a circle, I need to use the formula: area = pi * radius^2. I will write a function to do this.
66
- ```python
67
- import math
68
-
69
- def calculate_area(radius):
70
- return math.pi * radius**2
71
-
72
- # Calculate the area for radius = 5
73
- area = calculate_area(5)
74
- print(f"The area of the circle is {{area:.2f}} square units")
75
- ```
76
-
77
- Another example (with for loop):
78
- To calculate the sum of numbers from 1 to 10, I will use a for loop.
79
- ```python
80
- sum = 0
81
- for i in range(1, 11):
82
- sum += i
83
- print(f"The sum of numbers from 1 to 10 is {{sum}}")
84
- ```
85
-
86
- In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
87
- {tool_descriptions}
88
-
89
- You'll receive a screenshot showing the current screen and its UI elements to help you complete the task. However, screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
90
-
91
- **Important Notes:**
92
- - If there is a precondition for the task, you MUST check if it is met.
93
- - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
94
-
95
- ## Final Answer Guidelines:
96
- - When providing a final answer, focus on directly answering the user's question
97
- - Avoid referencing the code you generated unless specifically asked
98
- - Present the results clearly and concisely as if you computed them directly
99
- - If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
100
- - Structure your response like you're directly answering the user's query, not explaining how you solved it
101
-
102
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
103
-
104
- You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
105
- """
106
- )
107
-
108
-
@@ -1,13 +0,0 @@
1
- from droidrun.agent.planner.planner_agent import PlannerAgent
2
- from droidrun.agent.planner.prompts import (
3
- DEFAULT_PLANNER_SYSTEM_PROMPT,
4
- DEFAULT_PLANNER_TASK_FAILED_PROMPT,
5
- DEFAULT_PLANNER_USER_PROMPT,
6
- )
7
-
8
- __all__ = [
9
- "PlannerAgent",
10
- "DEFAULT_PLANNER_SYSTEM_PROMPT",
11
- "DEFAULT_PLANNER_USER_PROMPT",
12
- "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
13
- ]
@@ -1,21 +0,0 @@
1
- from typing import Optional
2
-
3
- from llama_index.core.base.llms.types import ChatMessage
4
- from llama_index.core.workflow import Event
5
-
6
- from droidrun.agent.context import Task
7
- from droidrun.agent.usage import UsageResult
8
-
9
-
10
- class PlanInputEvent(Event):
11
- input: list[ChatMessage]
12
-
13
-
14
- class PlanThinkingEvent(Event):
15
- thoughts: Optional[str] = None
16
- code: Optional[str] = None
17
- usage: Optional[UsageResult] = None
18
-
19
-
20
- class PlanCreatedEvent(Event):
21
- tasks: list[Task]
@@ -1,311 +0,0 @@
1
- import asyncio
2
- import logging
3
- from typing import TYPE_CHECKING, List, Union
4
-
5
- from dotenv import load_dotenv
6
- from llama_index.core.base.llms.types import ChatMessage, ChatResponse
7
- from llama_index.core.llms.llm import LLM
8
- from llama_index.core.memory import Memory
9
- from llama_index.core.prompts import PromptTemplate
10
- from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
11
-
12
- from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
13
- from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
14
- from droidrun.agent.context.agent_persona import AgentPersona
15
- from droidrun.agent.context.task_manager import TaskManager
16
- from droidrun.agent.planner.events import (
17
- PlanCreatedEvent,
18
- PlanInputEvent,
19
- PlanThinkingEvent,
20
- )
21
- from droidrun.agent.planner.prompts import (
22
- DEFAULT_PLANNER_SYSTEM_PROMPT,
23
- DEFAULT_PLANNER_USER_PROMPT,
24
- )
25
- from droidrun.agent.usage import get_usage_from_response
26
- from droidrun.agent.utils import chat_utils
27
- from droidrun.agent.utils.executer import SimpleCodeExecutor
28
- from droidrun.tools import Tools
29
-
30
- load_dotenv()
31
-
32
- # Setup logger
33
- logger = logging.getLogger("droidrun")
34
-
35
- if TYPE_CHECKING:
36
- from droidrun.tools import Tools
37
-
38
-
39
- class PlannerAgent(Workflow):
40
- def __init__(
41
- self,
42
- goal: str,
43
- llm: LLM,
44
- vision: bool,
45
- personas: List[AgentPersona],
46
- task_manager: TaskManager,
47
- tools_instance: Tools,
48
- system_prompt=None,
49
- user_prompt=None,
50
- debug=False,
51
- *args,
52
- **kwargs,
53
- ) -> None:
54
- super().__init__(*args, **kwargs)
55
-
56
- self.llm = llm
57
- self.goal = goal
58
- self.task_manager = task_manager
59
- self.debug = debug
60
- self.vision = vision
61
-
62
- self.chat_memory = None
63
- self.remembered_info = None
64
-
65
- self.current_retry = 0
66
- self.steps_counter = 0
67
-
68
- self.tool_list = {}
69
- self.tool_list[self.task_manager.set_tasks_with_agents.__name__] = (
70
- self.task_manager.set_tasks_with_agents
71
- )
72
- self.tool_list[self.task_manager.complete_goal.__name__] = (
73
- self.task_manager.complete_goal
74
- )
75
-
76
- self.tools_description = chat_utils.parse_tool_descriptions(self.tool_list)
77
- self.tools_instance = tools_instance
78
-
79
- self.personas = personas
80
-
81
- self.system_prompt = system_prompt or DEFAULT_PLANNER_SYSTEM_PROMPT.format(
82
- tools_description=self.tools_description,
83
- agents=chat_utils.parse_persona_description(self.personas),
84
- )
85
- self.user_prompt = user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=goal)
86
- self.system_message = ChatMessage(role="system", content=self.system_prompt)
87
- self.user_message = ChatMessage(role="user", content=self.user_prompt)
88
-
89
- self.executer = SimpleCodeExecutor(
90
- loop=asyncio.get_event_loop(), globals={}, locals={}, tools=self.tool_list
91
- )
92
-
93
- @step
94
- async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
95
- logger.info("💬 Preparing planning session...")
96
-
97
- self.chat_memory: Memory = await ctx.store.get(
98
- "chat_memory", default=Memory.from_defaults()
99
- )
100
- await self.chat_memory.aput(self.user_message)
101
-
102
- if ev.remembered_info:
103
- self.remembered_info = ev.remembered_info
104
-
105
- assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
106
-
107
- await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
108
-
109
- input_messages = self.chat_memory.get_all()
110
- logger.debug(f" - Memory contains {len(input_messages)} messages")
111
- return PlanInputEvent(input=input_messages)
112
-
113
- @step
114
- async def handle_llm_input(
115
- self, ev: PlanInputEvent, ctx: Context
116
- ) -> PlanThinkingEvent:
117
- """Handle LLM input."""
118
- chat_history = ev.input
119
- assert len(chat_history) > 0, "Chat history cannot be empty."
120
-
121
- ctx.write_event_to_stream(ev)
122
-
123
- self.steps_counter += 1
124
- logger.info("🧠 Thinking about how to plan the goal...")
125
-
126
- if self.vision:
127
- screenshot = (self.tools_instance.take_screenshot())[1]
128
- ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
129
- await ctx.store.set("screenshot", screenshot)
130
-
131
- try:
132
- state = self.tools_instance.get_state()
133
- await ctx.store.set("ui_state", state["a11y_tree"])
134
- await ctx.store.set("phone_state", state["phone_state"])
135
- ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
136
- except Exception:
137
- logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
138
-
139
-
140
- await ctx.store.set("remembered_info", self.remembered_info)
141
-
142
- response = await self._get_llm_response(ctx, chat_history)
143
- try:
144
- usage = get_usage_from_response(self.llm.class_name(), response)
145
- except Exception as e:
146
- logger.warning(f"Could not get llm usage from response: {e}")
147
- usage = None
148
- await self.chat_memory.aput(response.message)
149
-
150
- code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
151
-
152
- event = PlanThinkingEvent(thoughts=thoughts, code=code, usage=usage)
153
- ctx.write_event_to_stream(event)
154
- return event
155
-
156
- @step
157
- async def handle_llm_output(
158
- self, ev: PlanThinkingEvent, ctx: Context
159
- ) -> Union[PlanInputEvent, PlanCreatedEvent]:
160
- """Handle LLM output."""
161
- logger.debug("🤖 Processing planning output...")
162
- code = ev.code
163
-
164
- if code:
165
- try:
166
- result = await self.executer.execute(ctx, code)
167
- logger.info("📝 Planning complete")
168
- logger.debug(f" - Planning code executed. Result: {result['output']}")
169
-
170
- screenshots = result['screenshots']
171
- for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
172
- ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
173
-
174
- ui_states = result['ui_states']
175
- for ui_state in ui_states[:-1]:
176
- ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
177
-
178
- await self.chat_memory.aput(
179
- ChatMessage(
180
- role="user", content=f"Execution Result:\n```\n{result['output']}\n```"
181
- )
182
- )
183
-
184
- self.remembered_info = self.tools_instance.memory
185
-
186
- tasks = self.task_manager.get_all_tasks()
187
- event = PlanCreatedEvent(tasks=tasks)
188
-
189
- if not self.task_manager.goal_completed:
190
- logger.info(f"📋 Current plan created with {len(tasks)} tasks:")
191
- for i, task in enumerate(tasks):
192
- logger.info(
193
- f" Task {i}: [{task.status.upper()}] [{task.agent_type}] {task.description}"
194
- )
195
- ctx.write_event_to_stream(event)
196
-
197
- return event
198
-
199
- except Exception as e:
200
- logger.debug(f"error handling Planner: {e}")
201
- await self.chat_memory.aput(
202
- ChatMessage(
203
- role="user",
204
- content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
205
- wrap your code inside this:
206
- ```python
207
- <YOUR CODE HERE>
208
- ```""",
209
- )
210
- )
211
- logger.debug("🔄 Waiting for next plan or completion.")
212
- return PlanInputEvent(input=self.chat_memory.get_all())
213
- else:
214
- await self.chat_memory.aput(
215
- ChatMessage(
216
- role="user",
217
- content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
218
- wrap your code inside this:
219
- ```python
220
- <YOUR CODE HERE>
221
- ```""",
222
- )
223
- )
224
- logger.debug("🔄 Waiting for next plan or completion.")
225
- return PlanInputEvent(input=self.chat_memory.get_all())
226
-
227
- @step
228
- async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
229
- """Finalize the workflow."""
230
- await ctx.store.set("chat_memory", self.chat_memory)
231
-
232
- result = {}
233
- result.update(
234
- {
235
- "tasks": ev.tasks,
236
- }
237
- )
238
-
239
- return StopEvent(result=result)
240
-
241
- async def _get_llm_response(
242
- self, ctx: Context, chat_history: List[ChatMessage]
243
- ) -> ChatResponse:
244
- """Get streaming response from LLM."""
245
- try:
246
- logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
247
-
248
- model = self.llm.class_name()
249
- if self.vision:
250
- if model == "DeepSeek":
251
- logger.warning(
252
- "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
253
- )
254
- else:
255
- chat_history = await chat_utils.add_screenshot_image_block(
256
- await ctx.store.get("screenshot"), chat_history
257
- )
258
-
259
-
260
-
261
- chat_history = await chat_utils.add_task_history_block(
262
- #self.task_manager.get_completed_tasks(),
263
- #self.task_manager.get_failed_tasks(),
264
- self.task_manager.get_task_history(),
265
- chat_history,
266
- )
267
-
268
- remembered_info = await ctx.store.get("remembered_info", default=None)
269
- if remembered_info:
270
- chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
271
-
272
- chat_history = await chat_utils.add_phone_state_block(await ctx.store.get("phone_state"), chat_history)
273
- chat_history = await chat_utils.add_ui_text_block(await ctx.store.get("ui_state"), chat_history)
274
-
275
- limited_history = self._limit_history(chat_history)
276
- messages_to_send = [self.system_message] + limited_history
277
- messages_to_send = [
278
- chat_utils.message_copy(msg) for msg in messages_to_send
279
- ]
280
-
281
- logger.debug(f" - Final message count: {len(messages_to_send)}")
282
-
283
- response = await self.llm.achat(messages=messages_to_send)
284
- assert hasattr(
285
- response, "message"
286
- ), f"LLM response does not have a message attribute.\nResponse: {response}"
287
- logger.debug(" - Received response from LLM.")
288
- return response
289
- except Exception as e:
290
- logger.error(f"Could not get an answer from LLM: {repr(e)}")
291
- raise e
292
-
293
- def _limit_history(
294
- self, chat_history: List[ChatMessage]
295
- ) -> List[ChatMessage]:
296
- if LLM_HISTORY_LIMIT <= 0:
297
- return chat_history
298
-
299
- max_messages = LLM_HISTORY_LIMIT * 2
300
- if len(chat_history) <= max_messages:
301
- return chat_history
302
-
303
- preserved_head: List[ChatMessage] = []
304
- if chat_history and chat_history[0].role == "user":
305
- preserved_head = [chat_history[0]]
306
-
307
- tail = chat_history[-max_messages:]
308
- if preserved_head and preserved_head[0] in tail:
309
- preserved_head = []
310
-
311
- return preserved_head + tail
@@ -1,124 +0,0 @@
1
- """
2
- Prompt templates for the PlannerAgent.
3
-
4
- This module contains all the prompts used by the PlannerAgent,
5
- separated from the workflow logic for better maintainability.
6
- """
7
-
8
- # System prompt for the PlannerAgent that explains its role and capabilities
9
- DEFAULT_PLANNER_SYSTEM_PROMPT = """You are an Android Task Planner. Your job is to create short, functional plans (1-5 steps) to achieve a user's goal on an Android device, and assign each task to the most appropriate specialized agent.
10
-
11
- **Inputs You Receive:**
12
- 1. **User's Overall Goal.**
13
- 2. **Current Device State:**
14
- * A **screenshot** of the current screen.
15
- * **JSON data** of visible UI elements.
16
- * The current visible Android activity
17
- 3. **Complete Task History:**
18
- * A record of ALL tasks that have been completed or failed throughout the session.
19
- * For completed tasks, the results and any discovered information.
20
- * For failed tasks, the detailed reasons for failure.
21
- * This history persists across all planning cycles and is never lost, even when creating new tasks.
22
-
23
- **Available Specialized Agents:**
24
- You have access to specialized agents, each optimized for specific types of tasks:
25
- {agents}
26
-
27
- **Your Task:**
28
- Given the goal, current state, and task history, devise the **next 1-5 functional steps** and assign each to the most appropriate specialized agent.
29
- Focus on what to achieve, not how. Planning fewer steps at a time improves accuracy, as the state can change.
30
-
31
- **Step Format:**
32
- Each step must be a functional goal.
33
- A **precondition** describing the expected starting screen/state for that step is highly recommended for clarity, especially for steps after the first in your 1-5 step plan.
34
- Each task string can start with "Precondition: ... Goal: ...".
35
- If a specific precondition isn't critical for the first step in your current plan segment, you can use "Precondition: None. Goal: ..." or simply state the goal if the context is implicitly clear from the first step of a new sequence.
36
-
37
- **Your Output:**
38
- * Use the `set_tasks_with_agents` tool to provide your 1-5 step plan with agent assignments.
39
- * Each task should be assigned to a specialized agent using it's name.
40
-
41
- * **After your planned steps are executed, you will be invoked again with the new device state.**
42
- You will then:
43
- 1. Assess if the **overall user goal** is complete.
44
- 2. If complete, call the `complete_goal(message: str)` tool.
45
- 3. If not complete, generate the next 1-5 steps using `set_tasks_with_agents`.
46
-
47
- **Memory Persistence:**
48
- * You maintain a COMPLETE memory of ALL tasks across the entire session:
49
- * Every task that was completed or failed is preserved in your context.
50
- * Previously completed steps are never lost when calling `set_tasks_with_agents()` for new steps.
51
- * You will see all historical tasks each time you're called.
52
- * Use this accumulated knowledge to build progressively on successful steps.
53
- * When you see discovered information (e.g., dates, locations), use it explicitly in future tasks.
54
-
55
- **Key Rules:**
56
- * **Functional Goals ONLY:** (e.g., "Navigate to Wi-Fi settings", "Enter 'MyPassword' into the password field").
57
- * **NO Low-Level Actions:** Do NOT specify swipes, taps on coordinates, or element IDs in your plan.
58
- * **Short Plans (1-5 steps):** Plan only the immediate next actions.
59
- * **Learn From History:** If a task failed previously, try a different approach.
60
- * **Use Tools:** Your response *must* be a Python code block calling `set_tasks_with_agents` or `complete_goal`.
61
- * **Smart Agent Assignment:** Choose the most appropriate agent for each task type.
62
-
63
- **Available Planning Tools:**
64
- * `set_tasks_with_agents(task_assignments: List[Dict[str, str]])`: Defines the sequence of tasks with agent assignments. Each element should be a dictionary with 'task' and 'agent' keys.
65
- * `complete_goal(message: str)`: Call this when the overall user goal has been achieved. The message can summarize the completion.
66
-
67
- ---
68
-
69
- **Example Interaction Flow:**
70
-
71
- **User Goal:** Open Gmail and compose a new email.
72
-
73
- **(Round 1) Planner Input:**
74
- * Goal: "Open Gmail and compose a new email"
75
- * Current State: Screenshot of Home screen, UI JSON.
76
- * Task History: None (first planning cycle)
77
-
78
- **Planner Thought Process (Round 1):**
79
- Need to first open Gmail app, then navigate to compose. The first task is app launching, the second is UI navigation.
80
-
81
- **Planner Output (Round 1):**
82
- ```python
83
- set_tasks_with_agents([
84
- {{'task': 'Precondition: None. Goal: Open the Gmail app.', 'agent': <Specialized_Agent>}},
85
- {{'task': 'Precondition: Gmail app is open and loaded. Goal: Navigate to compose new email.', 'agent': <Specialized Agents>}}
86
- ])
87
- ```
88
-
89
- **(After specialized agents perform these steps...)**
90
-
91
- **(Round 2) Planner Input:**
92
- * Goal: "Open Gmail and compose a new email"
93
- * Current State: Screenshot of Gmail compose screen, UI JSON showing compose interface.
94
- * Task History: Shows completed tasks with their assigned agents
95
-
96
- **Planner Output (Round 2):**
97
- ```python
98
- complete_goal(message="Gmail has been opened and compose email screen is ready for use.")
99
- ```
100
- """
101
-
102
- # User prompt template that simply states the goal
103
- DEFAULT_PLANNER_USER_PROMPT = """Goal: {goal}"""
104
-
105
- # Prompt template for when a task fails, to help recover and plan new steps
106
- DEFAULT_PLANNER_TASK_FAILED_PROMPT = """
107
- PLANNING UPDATE: The execution of a task failed.
108
-
109
- Failed Task Description: "{task_description}"
110
- Reported Reason: {reason}
111
-
112
- The previous plan has been stopped. I have attached a screenshot representing the device's **current state** immediately after the failure. Please analyze this visual information.
113
-
114
- Original Goal: {goal}
115
-
116
- Instruction: Based **only** on the provided screenshot showing the current state and the reason for the previous failure ('{reason}'), generate a NEW plan starting from this observed state to achieve the original goal: '{goal}'.
117
- """
118
-
119
- # Export all prompts
120
- __all__ = [
121
- "DEFAULT_PLANNER_SYSTEM_PROMPT",
122
- "DEFAULT_PLANNER_USER_PROMPT",
123
- "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
124
- ]