droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +95 -86
- droidrun/agent/codeact/events.py +1 -2
- droidrun/agent/context/__init__.py +5 -9
- droidrun/agent/context/episodic_memory.py +1 -3
- droidrun/agent/context/task_manager.py +8 -2
- droidrun/agent/droid/droid_agent.py +102 -141
- droidrun/agent/droid/events.py +45 -14
- droidrun/agent/executor/__init__.py +6 -4
- droidrun/agent/executor/events.py +29 -9
- droidrun/agent/executor/executor_agent.py +86 -28
- droidrun/agent/executor/prompts.py +8 -2
- droidrun/agent/manager/__init__.py +6 -7
- droidrun/agent/manager/events.py +16 -4
- droidrun/agent/manager/manager_agent.py +130 -69
- droidrun/agent/manager/prompts.py +1 -159
- droidrun/agent/utils/chat_utils.py +64 -2
- droidrun/agent/utils/device_state_formatter.py +54 -26
- droidrun/agent/utils/executer.py +66 -80
- droidrun/agent/utils/inference.py +11 -10
- droidrun/agent/utils/tools.py +58 -6
- droidrun/agent/utils/trajectory.py +18 -12
- droidrun/cli/logs.py +118 -56
- droidrun/cli/main.py +154 -136
- droidrun/config_manager/__init__.py +9 -7
- droidrun/config_manager/app_card_loader.py +148 -0
- droidrun/config_manager/config_manager.py +200 -102
- droidrun/config_manager/path_resolver.py +104 -0
- droidrun/config_manager/prompt_loader.py +75 -0
- droidrun/macro/__init__.py +1 -1
- droidrun/macro/cli.py +23 -18
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -3
- droidrun/telemetry/tracker.py +1 -1
- droidrun/tools/adb.py +1 -1
- droidrun/tools/ios.py +3 -2
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/METADATA +10 -4
- droidrun-0.3.10.dev5.dist-info/RECORD +61 -0
- droidrun/agent/codeact/prompts.py +0 -26
- droidrun/agent/context/agent_persona.py +0 -16
- droidrun/agent/context/context_injection_manager.py +0 -66
- droidrun/agent/context/personas/__init__.py +0 -11
- droidrun/agent/context/personas/app_starter.py +0 -44
- droidrun/agent/context/personas/big_agent.py +0 -96
- droidrun/agent/context/personas/default.py +0 -95
- droidrun/agent/context/personas/ui_expert.py +0 -108
- droidrun/agent/planner/__init__.py +0 -13
- droidrun/agent/planner/events.py +0 -21
- droidrun/agent/planner/planner_agent.py +0 -311
- droidrun/agent/planner/prompts.py +0 -124
- droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/WHEEL +0 -0
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -1,95 +0,0 @@
|
|
1
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
2
|
-
from droidrun.tools import Tools
|
3
|
-
|
4
|
-
DEFAULT = AgentPersona(
|
5
|
-
name="Default",
|
6
|
-
description="Default Agent. Use this as your Default",
|
7
|
-
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
9
|
-
"menu navigation", "form filling", "scrolling", "app launching"
|
10
|
-
],
|
11
|
-
allowed_tools=[
|
12
|
-
Tools.swipe.__name__,
|
13
|
-
Tools.input_text.__name__,
|
14
|
-
Tools.press_key.__name__,
|
15
|
-
Tools.tap_by_index.__name__,
|
16
|
-
Tools.start_app.__name__,
|
17
|
-
Tools.list_packages.__name__,
|
18
|
-
Tools.remember.__name__,
|
19
|
-
Tools.complete.__name__
|
20
|
-
],
|
21
|
-
required_context=[
|
22
|
-
"ui_state",
|
23
|
-
"screenshot",
|
24
|
-
],
|
25
|
-
user_prompt="""
|
26
|
-
**Current Request:**
|
27
|
-
{goal}
|
28
|
-
**Is the precondition met? What is your reasoning and the next step to address this request?**
|
29
|
-
Explain your thought process then provide code in ```python ... ``` tags if needed.
|
30
|
-
""""",
|
31
|
-
|
32
|
-
system_prompt="""
|
33
|
-
You are a helpful AI assistant that can write and execute Python code to solve problems.
|
34
|
-
|
35
|
-
You will be given a task to perform. You should output:
|
36
|
-
- Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
|
37
|
-
- If there is a precondition for the task, you MUST check if it is met.
|
38
|
-
- If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
|
39
|
-
- If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
|
40
|
-
|
41
|
-
|
42
|
-
## Context:
|
43
|
-
The following context is given to you for analysis:
|
44
|
-
- **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
|
45
|
-
- **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
|
46
|
-
- **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
|
47
|
-
- **chat history**: You are also given the history of your actions (if any) from your previous steps.
|
48
|
-
- **execution result**: The result of your last Action
|
49
|
-
NOTE: you don't have access to these inputs in your tool calling context
|
50
|
-
|
51
|
-
## Response Format:
|
52
|
-
Example of proper code format:
|
53
|
-
**Task Assignment:**
|
54
|
-
**Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
|
55
|
-
|
56
|
-
**(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
|
57
|
-
|
58
|
-
**(Step 1) Agent Action:**
|
59
|
-
```python
|
60
|
-
# First step: Navigate to Wi-Fi settings
|
61
|
-
tap_by_index(3)
|
62
|
-
```
|
63
|
-
|
64
|
-
**(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
|
65
|
-
|
66
|
-
**(Step 2) Agent Action:**
|
67
|
-
```python
|
68
|
-
# Second step: Turn on Wi-Fi to see available networks
|
69
|
-
tap_by_index(1)
|
70
|
-
```
|
71
|
-
|
72
|
-
**(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
|
73
|
-
|
74
|
-
**(Step 3) Agent Action:**
|
75
|
-
```python
|
76
|
-
# Final step: Connect to the target network
|
77
|
-
tap_by_index(5)
|
78
|
-
complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
|
79
|
-
```
|
80
|
-
```
|
81
|
-
|
82
|
-
## Tools:
|
83
|
-
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
84
|
-
{tool_descriptions}
|
85
|
-
|
86
|
-
|
87
|
-
## Final Answer Guidelines:
|
88
|
-
- When providing a final answer, focus on directly answering the user's question in the response format given
|
89
|
-
- Present the results clearly and concisely as if you computed them directly
|
90
|
-
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
91
|
-
|
92
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
93
|
-
"""
|
94
|
-
|
95
|
-
)
|
@@ -1,108 +0,0 @@
|
|
1
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
2
|
-
from droidrun.tools import Tools
|
3
|
-
|
4
|
-
UI_EXPERT = AgentPersona(
|
5
|
-
name="UIExpert",
|
6
|
-
description="Specialized in UI interactions, navigation, and form filling",
|
7
|
-
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
9
|
-
"menu navigation", "form filling", "scrolling"
|
10
|
-
],
|
11
|
-
allowed_tools=[
|
12
|
-
Tools.swipe.__name__,
|
13
|
-
Tools.input_text.__name__,
|
14
|
-
Tools.press_key.__name__,
|
15
|
-
Tools.tap_by_index.__name__,
|
16
|
-
Tools.drag.__name__,
|
17
|
-
Tools.remember.__name__,
|
18
|
-
Tools.complete.__name__
|
19
|
-
],
|
20
|
-
required_context=[
|
21
|
-
"ui_state",
|
22
|
-
"screenshot",
|
23
|
-
"phone_state",
|
24
|
-
"memory"
|
25
|
-
],
|
26
|
-
user_prompt="""
|
27
|
-
**Current Request:**
|
28
|
-
{goal}
|
29
|
-
**Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed.""""",
|
30
|
-
|
31
|
-
|
32
|
-
system_prompt="""You are a UI Expert specialized in Android interface interactions. Your core expertise includes:
|
33
|
-
|
34
|
-
**Primary Capabilities:**
|
35
|
-
- Navigate through Android UI elements with precision
|
36
|
-
- Interact with buttons, menus, forms, and interactive elements
|
37
|
-
- Enter text into input fields and search bars
|
38
|
-
- Scroll through content and lists
|
39
|
-
- Handle complex UI navigation workflows
|
40
|
-
- Recognize and interact with various UI patterns (tabs, drawers, dialogs, etc.)
|
41
|
-
|
42
|
-
**Your Approach:**
|
43
|
-
- Focus on understanding the current UI state through screenshots and element data
|
44
|
-
- Use precise element identification for reliable interactions
|
45
|
-
- Handle dynamic UI changes and loading states gracefully
|
46
|
-
- Provide clear feedback on UI interactions and their outcomes
|
47
|
-
- Adapt to different app interfaces and UI patterns
|
48
|
-
|
49
|
-
**Key Principles:**
|
50
|
-
- Always analyze the current screen state before taking action
|
51
|
-
- Prefer using element indices for reliable targeting
|
52
|
-
- Provide descriptive feedback about what you're interacting with
|
53
|
-
- Handle edge cases like loading screens, popups, and navigation changes
|
54
|
-
- Remember important UI state information for context
|
55
|
-
|
56
|
-
You do NOT handle app launching or package management - that's handled by other specialists.
|
57
|
-
|
58
|
-
|
59
|
-
## Available Context:
|
60
|
-
In your execution environment, you have access to:
|
61
|
-
- `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
|
62
|
-
|
63
|
-
## Response Format:
|
64
|
-
Example of proper code format:
|
65
|
-
To calculate the area of a circle, I need to use the formula: area = pi * radius^2. I will write a function to do this.
|
66
|
-
```python
|
67
|
-
import math
|
68
|
-
|
69
|
-
def calculate_area(radius):
|
70
|
-
return math.pi * radius**2
|
71
|
-
|
72
|
-
# Calculate the area for radius = 5
|
73
|
-
area = calculate_area(5)
|
74
|
-
print(f"The area of the circle is {{area:.2f}} square units")
|
75
|
-
```
|
76
|
-
|
77
|
-
Another example (with for loop):
|
78
|
-
To calculate the sum of numbers from 1 to 10, I will use a for loop.
|
79
|
-
```python
|
80
|
-
sum = 0
|
81
|
-
for i in range(1, 11):
|
82
|
-
sum += i
|
83
|
-
print(f"The sum of numbers from 1 to 10 is {{sum}}")
|
84
|
-
```
|
85
|
-
|
86
|
-
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
87
|
-
{tool_descriptions}
|
88
|
-
|
89
|
-
You'll receive a screenshot showing the current screen and its UI elements to help you complete the task. However, screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
|
90
|
-
|
91
|
-
**Important Notes:**
|
92
|
-
- If there is a precondition for the task, you MUST check if it is met.
|
93
|
-
- If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
|
94
|
-
|
95
|
-
## Final Answer Guidelines:
|
96
|
-
- When providing a final answer, focus on directly answering the user's question
|
97
|
-
- Avoid referencing the code you generated unless specifically asked
|
98
|
-
- Present the results clearly and concisely as if you computed them directly
|
99
|
-
- If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
|
100
|
-
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
101
|
-
|
102
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
103
|
-
|
104
|
-
You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
|
105
|
-
"""
|
106
|
-
)
|
107
|
-
|
108
|
-
|
@@ -1,13 +0,0 @@
|
|
1
|
-
from droidrun.agent.planner.planner_agent import PlannerAgent
|
2
|
-
from droidrun.agent.planner.prompts import (
|
3
|
-
DEFAULT_PLANNER_SYSTEM_PROMPT,
|
4
|
-
DEFAULT_PLANNER_TASK_FAILED_PROMPT,
|
5
|
-
DEFAULT_PLANNER_USER_PROMPT,
|
6
|
-
)
|
7
|
-
|
8
|
-
__all__ = [
|
9
|
-
"PlannerAgent",
|
10
|
-
"DEFAULT_PLANNER_SYSTEM_PROMPT",
|
11
|
-
"DEFAULT_PLANNER_USER_PROMPT",
|
12
|
-
"DEFAULT_PLANNER_TASK_FAILED_PROMPT"
|
13
|
-
]
|
droidrun/agent/planner/events.py
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
from llama_index.core.base.llms.types import ChatMessage
|
4
|
-
from llama_index.core.workflow import Event
|
5
|
-
|
6
|
-
from droidrun.agent.context import Task
|
7
|
-
from droidrun.agent.usage import UsageResult
|
8
|
-
|
9
|
-
|
10
|
-
class PlanInputEvent(Event):
|
11
|
-
input: list[ChatMessage]
|
12
|
-
|
13
|
-
|
14
|
-
class PlanThinkingEvent(Event):
|
15
|
-
thoughts: Optional[str] = None
|
16
|
-
code: Optional[str] = None
|
17
|
-
usage: Optional[UsageResult] = None
|
18
|
-
|
19
|
-
|
20
|
-
class PlanCreatedEvent(Event):
|
21
|
-
tasks: list[Task]
|
@@ -1,311 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import logging
|
3
|
-
from typing import TYPE_CHECKING, List, Union
|
4
|
-
|
5
|
-
from dotenv import load_dotenv
|
6
|
-
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
7
|
-
from llama_index.core.llms.llm import LLM
|
8
|
-
from llama_index.core.memory import Memory
|
9
|
-
from llama_index.core.prompts import PromptTemplate
|
10
|
-
from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
|
11
|
-
|
12
|
-
from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
|
13
|
-
from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
|
14
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
15
|
-
from droidrun.agent.context.task_manager import TaskManager
|
16
|
-
from droidrun.agent.planner.events import (
|
17
|
-
PlanCreatedEvent,
|
18
|
-
PlanInputEvent,
|
19
|
-
PlanThinkingEvent,
|
20
|
-
)
|
21
|
-
from droidrun.agent.planner.prompts import (
|
22
|
-
DEFAULT_PLANNER_SYSTEM_PROMPT,
|
23
|
-
DEFAULT_PLANNER_USER_PROMPT,
|
24
|
-
)
|
25
|
-
from droidrun.agent.usage import get_usage_from_response
|
26
|
-
from droidrun.agent.utils import chat_utils
|
27
|
-
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
28
|
-
from droidrun.tools import Tools
|
29
|
-
|
30
|
-
load_dotenv()
|
31
|
-
|
32
|
-
# Setup logger
|
33
|
-
logger = logging.getLogger("droidrun")
|
34
|
-
|
35
|
-
if TYPE_CHECKING:
|
36
|
-
from droidrun.tools import Tools
|
37
|
-
|
38
|
-
|
39
|
-
class PlannerAgent(Workflow):
|
40
|
-
def __init__(
|
41
|
-
self,
|
42
|
-
goal: str,
|
43
|
-
llm: LLM,
|
44
|
-
vision: bool,
|
45
|
-
personas: List[AgentPersona],
|
46
|
-
task_manager: TaskManager,
|
47
|
-
tools_instance: Tools,
|
48
|
-
system_prompt=None,
|
49
|
-
user_prompt=None,
|
50
|
-
debug=False,
|
51
|
-
*args,
|
52
|
-
**kwargs,
|
53
|
-
) -> None:
|
54
|
-
super().__init__(*args, **kwargs)
|
55
|
-
|
56
|
-
self.llm = llm
|
57
|
-
self.goal = goal
|
58
|
-
self.task_manager = task_manager
|
59
|
-
self.debug = debug
|
60
|
-
self.vision = vision
|
61
|
-
|
62
|
-
self.chat_memory = None
|
63
|
-
self.remembered_info = None
|
64
|
-
|
65
|
-
self.current_retry = 0
|
66
|
-
self.steps_counter = 0
|
67
|
-
|
68
|
-
self.tool_list = {}
|
69
|
-
self.tool_list[self.task_manager.set_tasks_with_agents.__name__] = (
|
70
|
-
self.task_manager.set_tasks_with_agents
|
71
|
-
)
|
72
|
-
self.tool_list[self.task_manager.complete_goal.__name__] = (
|
73
|
-
self.task_manager.complete_goal
|
74
|
-
)
|
75
|
-
|
76
|
-
self.tools_description = chat_utils.parse_tool_descriptions(self.tool_list)
|
77
|
-
self.tools_instance = tools_instance
|
78
|
-
|
79
|
-
self.personas = personas
|
80
|
-
|
81
|
-
self.system_prompt = system_prompt or DEFAULT_PLANNER_SYSTEM_PROMPT.format(
|
82
|
-
tools_description=self.tools_description,
|
83
|
-
agents=chat_utils.parse_persona_description(self.personas),
|
84
|
-
)
|
85
|
-
self.user_prompt = user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=goal)
|
86
|
-
self.system_message = ChatMessage(role="system", content=self.system_prompt)
|
87
|
-
self.user_message = ChatMessage(role="user", content=self.user_prompt)
|
88
|
-
|
89
|
-
self.executer = SimpleCodeExecutor(
|
90
|
-
loop=asyncio.get_event_loop(), globals={}, locals={}, tools=self.tool_list
|
91
|
-
)
|
92
|
-
|
93
|
-
@step
|
94
|
-
async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
|
95
|
-
logger.info("💬 Preparing planning session...")
|
96
|
-
|
97
|
-
self.chat_memory: Memory = await ctx.store.get(
|
98
|
-
"chat_memory", default=Memory.from_defaults()
|
99
|
-
)
|
100
|
-
await self.chat_memory.aput(self.user_message)
|
101
|
-
|
102
|
-
if ev.remembered_info:
|
103
|
-
self.remembered_info = ev.remembered_info
|
104
|
-
|
105
|
-
assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
|
106
|
-
|
107
|
-
await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
|
108
|
-
|
109
|
-
input_messages = self.chat_memory.get_all()
|
110
|
-
logger.debug(f" - Memory contains {len(input_messages)} messages")
|
111
|
-
return PlanInputEvent(input=input_messages)
|
112
|
-
|
113
|
-
@step
|
114
|
-
async def handle_llm_input(
|
115
|
-
self, ev: PlanInputEvent, ctx: Context
|
116
|
-
) -> PlanThinkingEvent:
|
117
|
-
"""Handle LLM input."""
|
118
|
-
chat_history = ev.input
|
119
|
-
assert len(chat_history) > 0, "Chat history cannot be empty."
|
120
|
-
|
121
|
-
ctx.write_event_to_stream(ev)
|
122
|
-
|
123
|
-
self.steps_counter += 1
|
124
|
-
logger.info("🧠 Thinking about how to plan the goal...")
|
125
|
-
|
126
|
-
if self.vision:
|
127
|
-
screenshot = (self.tools_instance.take_screenshot())[1]
|
128
|
-
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
129
|
-
await ctx.store.set("screenshot", screenshot)
|
130
|
-
|
131
|
-
try:
|
132
|
-
state = self.tools_instance.get_state()
|
133
|
-
await ctx.store.set("ui_state", state["a11y_tree"])
|
134
|
-
await ctx.store.set("phone_state", state["phone_state"])
|
135
|
-
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
|
136
|
-
except Exception:
|
137
|
-
logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
|
138
|
-
|
139
|
-
|
140
|
-
await ctx.store.set("remembered_info", self.remembered_info)
|
141
|
-
|
142
|
-
response = await self._get_llm_response(ctx, chat_history)
|
143
|
-
try:
|
144
|
-
usage = get_usage_from_response(self.llm.class_name(), response)
|
145
|
-
except Exception as e:
|
146
|
-
logger.warning(f"Could not get llm usage from response: {e}")
|
147
|
-
usage = None
|
148
|
-
await self.chat_memory.aput(response.message)
|
149
|
-
|
150
|
-
code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
|
151
|
-
|
152
|
-
event = PlanThinkingEvent(thoughts=thoughts, code=code, usage=usage)
|
153
|
-
ctx.write_event_to_stream(event)
|
154
|
-
return event
|
155
|
-
|
156
|
-
@step
|
157
|
-
async def handle_llm_output(
|
158
|
-
self, ev: PlanThinkingEvent, ctx: Context
|
159
|
-
) -> Union[PlanInputEvent, PlanCreatedEvent]:
|
160
|
-
"""Handle LLM output."""
|
161
|
-
logger.debug("🤖 Processing planning output...")
|
162
|
-
code = ev.code
|
163
|
-
|
164
|
-
if code:
|
165
|
-
try:
|
166
|
-
result = await self.executer.execute(ctx, code)
|
167
|
-
logger.info("📝 Planning complete")
|
168
|
-
logger.debug(f" - Planning code executed. Result: {result['output']}")
|
169
|
-
|
170
|
-
screenshots = result['screenshots']
|
171
|
-
for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
|
172
|
-
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
173
|
-
|
174
|
-
ui_states = result['ui_states']
|
175
|
-
for ui_state in ui_states[:-1]:
|
176
|
-
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
|
177
|
-
|
178
|
-
await self.chat_memory.aput(
|
179
|
-
ChatMessage(
|
180
|
-
role="user", content=f"Execution Result:\n```\n{result['output']}\n```"
|
181
|
-
)
|
182
|
-
)
|
183
|
-
|
184
|
-
self.remembered_info = self.tools_instance.memory
|
185
|
-
|
186
|
-
tasks = self.task_manager.get_all_tasks()
|
187
|
-
event = PlanCreatedEvent(tasks=tasks)
|
188
|
-
|
189
|
-
if not self.task_manager.goal_completed:
|
190
|
-
logger.info(f"📋 Current plan created with {len(tasks)} tasks:")
|
191
|
-
for i, task in enumerate(tasks):
|
192
|
-
logger.info(
|
193
|
-
f" Task {i}: [{task.status.upper()}] [{task.agent_type}] {task.description}"
|
194
|
-
)
|
195
|
-
ctx.write_event_to_stream(event)
|
196
|
-
|
197
|
-
return event
|
198
|
-
|
199
|
-
except Exception as e:
|
200
|
-
logger.debug(f"error handling Planner: {e}")
|
201
|
-
await self.chat_memory.aput(
|
202
|
-
ChatMessage(
|
203
|
-
role="user",
|
204
|
-
content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
|
205
|
-
wrap your code inside this:
|
206
|
-
```python
|
207
|
-
<YOUR CODE HERE>
|
208
|
-
```""",
|
209
|
-
)
|
210
|
-
)
|
211
|
-
logger.debug("🔄 Waiting for next plan or completion.")
|
212
|
-
return PlanInputEvent(input=self.chat_memory.get_all())
|
213
|
-
else:
|
214
|
-
await self.chat_memory.aput(
|
215
|
-
ChatMessage(
|
216
|
-
role="user",
|
217
|
-
content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
|
218
|
-
wrap your code inside this:
|
219
|
-
```python
|
220
|
-
<YOUR CODE HERE>
|
221
|
-
```""",
|
222
|
-
)
|
223
|
-
)
|
224
|
-
logger.debug("🔄 Waiting for next plan or completion.")
|
225
|
-
return PlanInputEvent(input=self.chat_memory.get_all())
|
226
|
-
|
227
|
-
@step
|
228
|
-
async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
|
229
|
-
"""Finalize the workflow."""
|
230
|
-
await ctx.store.set("chat_memory", self.chat_memory)
|
231
|
-
|
232
|
-
result = {}
|
233
|
-
result.update(
|
234
|
-
{
|
235
|
-
"tasks": ev.tasks,
|
236
|
-
}
|
237
|
-
)
|
238
|
-
|
239
|
-
return StopEvent(result=result)
|
240
|
-
|
241
|
-
async def _get_llm_response(
|
242
|
-
self, ctx: Context, chat_history: List[ChatMessage]
|
243
|
-
) -> ChatResponse:
|
244
|
-
"""Get streaming response from LLM."""
|
245
|
-
try:
|
246
|
-
logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
|
247
|
-
|
248
|
-
model = self.llm.class_name()
|
249
|
-
if self.vision:
|
250
|
-
if model == "DeepSeek":
|
251
|
-
logger.warning(
|
252
|
-
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
253
|
-
)
|
254
|
-
else:
|
255
|
-
chat_history = await chat_utils.add_screenshot_image_block(
|
256
|
-
await ctx.store.get("screenshot"), chat_history
|
257
|
-
)
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
chat_history = await chat_utils.add_task_history_block(
|
262
|
-
#self.task_manager.get_completed_tasks(),
|
263
|
-
#self.task_manager.get_failed_tasks(),
|
264
|
-
self.task_manager.get_task_history(),
|
265
|
-
chat_history,
|
266
|
-
)
|
267
|
-
|
268
|
-
remembered_info = await ctx.store.get("remembered_info", default=None)
|
269
|
-
if remembered_info:
|
270
|
-
chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
|
271
|
-
|
272
|
-
chat_history = await chat_utils.add_phone_state_block(await ctx.store.get("phone_state"), chat_history)
|
273
|
-
chat_history = await chat_utils.add_ui_text_block(await ctx.store.get("ui_state"), chat_history)
|
274
|
-
|
275
|
-
limited_history = self._limit_history(chat_history)
|
276
|
-
messages_to_send = [self.system_message] + limited_history
|
277
|
-
messages_to_send = [
|
278
|
-
chat_utils.message_copy(msg) for msg in messages_to_send
|
279
|
-
]
|
280
|
-
|
281
|
-
logger.debug(f" - Final message count: {len(messages_to_send)}")
|
282
|
-
|
283
|
-
response = await self.llm.achat(messages=messages_to_send)
|
284
|
-
assert hasattr(
|
285
|
-
response, "message"
|
286
|
-
), f"LLM response does not have a message attribute.\nResponse: {response}"
|
287
|
-
logger.debug(" - Received response from LLM.")
|
288
|
-
return response
|
289
|
-
except Exception as e:
|
290
|
-
logger.error(f"Could not get an answer from LLM: {repr(e)}")
|
291
|
-
raise e
|
292
|
-
|
293
|
-
def _limit_history(
|
294
|
-
self, chat_history: List[ChatMessage]
|
295
|
-
) -> List[ChatMessage]:
|
296
|
-
if LLM_HISTORY_LIMIT <= 0:
|
297
|
-
return chat_history
|
298
|
-
|
299
|
-
max_messages = LLM_HISTORY_LIMIT * 2
|
300
|
-
if len(chat_history) <= max_messages:
|
301
|
-
return chat_history
|
302
|
-
|
303
|
-
preserved_head: List[ChatMessage] = []
|
304
|
-
if chat_history and chat_history[0].role == "user":
|
305
|
-
preserved_head = [chat_history[0]]
|
306
|
-
|
307
|
-
tail = chat_history[-max_messages:]
|
308
|
-
if preserved_head and preserved_head[0] in tail:
|
309
|
-
preserved_head = []
|
310
|
-
|
311
|
-
return preserved_head + tail
|
@@ -1,124 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Prompt templates for the PlannerAgent.
|
3
|
-
|
4
|
-
This module contains all the prompts used by the PlannerAgent,
|
5
|
-
separated from the workflow logic for better maintainability.
|
6
|
-
"""
|
7
|
-
|
8
|
-
# System prompt for the PlannerAgent that explains its role and capabilities
|
9
|
-
DEFAULT_PLANNER_SYSTEM_PROMPT = """You are an Android Task Planner. Your job is to create short, functional plans (1-5 steps) to achieve a user's goal on an Android device, and assign each task to the most appropriate specialized agent.
|
10
|
-
|
11
|
-
**Inputs You Receive:**
|
12
|
-
1. **User's Overall Goal.**
|
13
|
-
2. **Current Device State:**
|
14
|
-
* A **screenshot** of the current screen.
|
15
|
-
* **JSON data** of visible UI elements.
|
16
|
-
* The current visible Android activity
|
17
|
-
3. **Complete Task History:**
|
18
|
-
* A record of ALL tasks that have been completed or failed throughout the session.
|
19
|
-
* For completed tasks, the results and any discovered information.
|
20
|
-
* For failed tasks, the detailed reasons for failure.
|
21
|
-
* This history persists across all planning cycles and is never lost, even when creating new tasks.
|
22
|
-
|
23
|
-
**Available Specialized Agents:**
|
24
|
-
You have access to specialized agents, each optimized for specific types of tasks:
|
25
|
-
{agents}
|
26
|
-
|
27
|
-
**Your Task:**
|
28
|
-
Given the goal, current state, and task history, devise the **next 1-5 functional steps** and assign each to the most appropriate specialized agent.
|
29
|
-
Focus on what to achieve, not how. Planning fewer steps at a time improves accuracy, as the state can change.
|
30
|
-
|
31
|
-
**Step Format:**
|
32
|
-
Each step must be a functional goal.
|
33
|
-
A **precondition** describing the expected starting screen/state for that step is highly recommended for clarity, especially for steps after the first in your 1-5 step plan.
|
34
|
-
Each task string can start with "Precondition: ... Goal: ...".
|
35
|
-
If a specific precondition isn't critical for the first step in your current plan segment, you can use "Precondition: None. Goal: ..." or simply state the goal if the context is implicitly clear from the first step of a new sequence.
|
36
|
-
|
37
|
-
**Your Output:**
|
38
|
-
* Use the `set_tasks_with_agents` tool to provide your 1-5 step plan with agent assignments.
|
39
|
-
* Each task should be assigned to a specialized agent using it's name.
|
40
|
-
|
41
|
-
* **After your planned steps are executed, you will be invoked again with the new device state.**
|
42
|
-
You will then:
|
43
|
-
1. Assess if the **overall user goal** is complete.
|
44
|
-
2. If complete, call the `complete_goal(message: str)` tool.
|
45
|
-
3. If not complete, generate the next 1-5 steps using `set_tasks_with_agents`.
|
46
|
-
|
47
|
-
**Memory Persistence:**
|
48
|
-
* You maintain a COMPLETE memory of ALL tasks across the entire session:
|
49
|
-
* Every task that was completed or failed is preserved in your context.
|
50
|
-
* Previously completed steps are never lost when calling `set_tasks_with_agents()` for new steps.
|
51
|
-
* You will see all historical tasks each time you're called.
|
52
|
-
* Use this accumulated knowledge to build progressively on successful steps.
|
53
|
-
* When you see discovered information (e.g., dates, locations), use it explicitly in future tasks.
|
54
|
-
|
55
|
-
**Key Rules:**
|
56
|
-
* **Functional Goals ONLY:** (e.g., "Navigate to Wi-Fi settings", "Enter 'MyPassword' into the password field").
|
57
|
-
* **NO Low-Level Actions:** Do NOT specify swipes, taps on coordinates, or element IDs in your plan.
|
58
|
-
* **Short Plans (1-5 steps):** Plan only the immediate next actions.
|
59
|
-
* **Learn From History:** If a task failed previously, try a different approach.
|
60
|
-
* **Use Tools:** Your response *must* be a Python code block calling `set_tasks_with_agents` or `complete_goal`.
|
61
|
-
* **Smart Agent Assignment:** Choose the most appropriate agent for each task type.
|
62
|
-
|
63
|
-
**Available Planning Tools:**
|
64
|
-
* `set_tasks_with_agents(task_assignments: List[Dict[str, str]])`: Defines the sequence of tasks with agent assignments. Each element should be a dictionary with 'task' and 'agent' keys.
|
65
|
-
* `complete_goal(message: str)`: Call this when the overall user goal has been achieved. The message can summarize the completion.
|
66
|
-
|
67
|
-
---
|
68
|
-
|
69
|
-
**Example Interaction Flow:**
|
70
|
-
|
71
|
-
**User Goal:** Open Gmail and compose a new email.
|
72
|
-
|
73
|
-
**(Round 1) Planner Input:**
|
74
|
-
* Goal: "Open Gmail and compose a new email"
|
75
|
-
* Current State: Screenshot of Home screen, UI JSON.
|
76
|
-
* Task History: None (first planning cycle)
|
77
|
-
|
78
|
-
**Planner Thought Process (Round 1):**
|
79
|
-
Need to first open Gmail app, then navigate to compose. The first task is app launching, the second is UI navigation.
|
80
|
-
|
81
|
-
**Planner Output (Round 1):**
|
82
|
-
```python
|
83
|
-
set_tasks_with_agents([
|
84
|
-
{{'task': 'Precondition: None. Goal: Open the Gmail app.', 'agent': <Specialized_Agent>}},
|
85
|
-
{{'task': 'Precondition: Gmail app is open and loaded. Goal: Navigate to compose new email.', 'agent': <Specialized Agents>}}
|
86
|
-
])
|
87
|
-
```
|
88
|
-
|
89
|
-
**(After specialized agents perform these steps...)**
|
90
|
-
|
91
|
-
**(Round 2) Planner Input:**
|
92
|
-
* Goal: "Open Gmail and compose a new email"
|
93
|
-
* Current State: Screenshot of Gmail compose screen, UI JSON showing compose interface.
|
94
|
-
* Task History: Shows completed tasks with their assigned agents
|
95
|
-
|
96
|
-
**Planner Output (Round 2):**
|
97
|
-
```python
|
98
|
-
complete_goal(message="Gmail has been opened and compose email screen is ready for use.")
|
99
|
-
```
|
100
|
-
"""
|
101
|
-
|
102
|
-
# User prompt template that simply states the goal
|
103
|
-
DEFAULT_PLANNER_USER_PROMPT = """Goal: {goal}"""
|
104
|
-
|
105
|
-
# Prompt template for when a task fails, to help recover and plan new steps
|
106
|
-
DEFAULT_PLANNER_TASK_FAILED_PROMPT = """
|
107
|
-
PLANNING UPDATE: The execution of a task failed.
|
108
|
-
|
109
|
-
Failed Task Description: "{task_description}"
|
110
|
-
Reported Reason: {reason}
|
111
|
-
|
112
|
-
The previous plan has been stopped. I have attached a screenshot representing the device's **current state** immediately after the failure. Please analyze this visual information.
|
113
|
-
|
114
|
-
Original Goal: {goal}
|
115
|
-
|
116
|
-
Instruction: Based **only** on the provided screenshot showing the current state and the reason for the previous failure ('{reason}'), generate a NEW plan starting from this observed state to achieve the original goal: '{goal}'.
|
117
|
-
"""
|
118
|
-
|
119
|
-
# Export all prompts
|
120
|
-
__all__ = [
|
121
|
-
"DEFAULT_PLANNER_SYSTEM_PROMPT",
|
122
|
-
"DEFAULT_PLANNER_USER_PROMPT",
|
123
|
-
"DEFAULT_PLANNER_TASK_FAILED_PROMPT"
|
124
|
-
]
|