droidrun 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +16 -11
- droidrun/__main__.py +1 -1
- droidrun/adb/__init__.py +3 -3
- droidrun/adb/device.py +1 -1
- droidrun/adb/manager.py +2 -2
- droidrun/agent/__init__.py +6 -0
- droidrun/agent/codeact/__init__.py +2 -4
- droidrun/agent/codeact/codeact_agent.py +321 -235
- droidrun/agent/codeact/events.py +12 -20
- droidrun/agent/codeact/prompts.py +0 -52
- droidrun/agent/common/default.py +5 -0
- droidrun/agent/common/events.py +4 -0
- droidrun/agent/context/__init__.py +23 -0
- droidrun/agent/context/agent_persona.py +15 -0
- droidrun/agent/context/context_injection_manager.py +66 -0
- droidrun/agent/context/episodic_memory.py +15 -0
- droidrun/agent/context/personas/__init__.py +11 -0
- droidrun/agent/context/personas/app_starter.py +44 -0
- droidrun/agent/context/personas/default.py +95 -0
- droidrun/agent/context/personas/extractor.py +52 -0
- droidrun/agent/context/personas/ui_expert.py +107 -0
- droidrun/agent/context/reflection.py +20 -0
- droidrun/agent/context/task_manager.py +124 -0
- droidrun/agent/context/todo.txt +4 -0
- droidrun/agent/droid/__init__.py +2 -2
- droidrun/agent/droid/droid_agent.py +264 -325
- droidrun/agent/droid/events.py +28 -0
- droidrun/agent/oneflows/reflector.py +265 -0
- droidrun/agent/planner/__init__.py +2 -4
- droidrun/agent/planner/events.py +9 -13
- droidrun/agent/planner/planner_agent.py +268 -0
- droidrun/agent/planner/prompts.py +33 -53
- droidrun/agent/utils/__init__.py +3 -0
- droidrun/agent/utils/async_utils.py +1 -40
- droidrun/agent/utils/chat_utils.py +268 -48
- droidrun/agent/utils/executer.py +49 -14
- droidrun/agent/utils/llm_picker.py +14 -10
- droidrun/agent/utils/trajectory.py +184 -0
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +283 -0
- droidrun/cli/main.py +333 -439
- droidrun/run.py +105 -0
- droidrun/tools/__init__.py +5 -10
- droidrun/tools/{actions.py → adb.py} +279 -238
- droidrun/tools/ios.py +594 -0
- droidrun/tools/tools.py +99 -0
- droidrun-0.3.0.dist-info/METADATA +149 -0
- droidrun-0.3.0.dist-info/RECORD +52 -0
- droidrun/agent/planner/task_manager.py +0 -355
- droidrun/agent/planner/workflow.py +0 -371
- droidrun/tools/device.py +0 -29
- droidrun/tools/loader.py +0 -60
- droidrun-0.2.0.dist-info/METADATA +0 -373
- droidrun-0.2.0.dist-info/RECORD +0 -32
- {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
- {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
from llama_index.core.workflow import Event
|
2
|
+
from droidrun.agent.context import Reflection, Task
|
3
|
+
from typing import List, Optional
|
4
|
+
|
5
|
+
class CodeActExecuteEvent(Event):
|
6
|
+
task: Task
|
7
|
+
reflection: Optional[Reflection]
|
8
|
+
|
9
|
+
class CodeActResultEvent(Event):
|
10
|
+
success: bool
|
11
|
+
reason: str
|
12
|
+
steps: int
|
13
|
+
|
14
|
+
class ReasoningLogicEvent(Event):
|
15
|
+
reflection: Optional[Reflection] = None
|
16
|
+
|
17
|
+
class FinalizeEvent(Event):
|
18
|
+
success: bool
|
19
|
+
reason: str
|
20
|
+
task: List[Task]
|
21
|
+
steps: int = 1
|
22
|
+
|
23
|
+
class TaskRunnerEvent(Event):
|
24
|
+
pass
|
25
|
+
|
26
|
+
class ReflectionEvent(Event):
|
27
|
+
task: Task
|
28
|
+
pass
|
@@ -0,0 +1,265 @@
|
|
1
|
+
from llama_index.core.llms.llm import LLM
|
2
|
+
from droidrun.agent.context import EpisodicMemory
|
3
|
+
from droidrun.agent.context.reflection import Reflection
|
4
|
+
from llama_index.core.base.llms.types import ChatMessage, ImageBlock
|
5
|
+
from droidrun.agent.utils.chat_utils import add_screenshot_image_block
|
6
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
7
|
+
import json
|
8
|
+
from typing import Dict, Any, List, Optional
|
9
|
+
import logging
|
10
|
+
from PIL import Image, ImageDraw, ImageFont
|
11
|
+
import io
|
12
|
+
|
13
|
+
logger = logging.getLogger("droidrun")
|
14
|
+
|
15
|
+
class Reflector:
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
llm: LLM,
|
19
|
+
debug: bool = False,
|
20
|
+
*args,
|
21
|
+
**kwargs
|
22
|
+
):
|
23
|
+
self.llm = llm
|
24
|
+
self.debug = debug
|
25
|
+
|
26
|
+
async def reflect_on_episodic_memory(self, episodic_memory: EpisodicMemory, goal: str) -> Reflection:
|
27
|
+
"""Analyze episodic memory and provide reflection on the agent's performance."""
|
28
|
+
system_prompt_content = self._create_system_prompt()
|
29
|
+
system_prompt = ChatMessage(role="system", content=system_prompt_content)
|
30
|
+
|
31
|
+
episodic_memory_content = self._format_episodic_memory(episodic_memory)
|
32
|
+
persona_content = self._format_persona(episodic_memory.persona)
|
33
|
+
|
34
|
+
# Create user message content with persona information
|
35
|
+
user_content = f"{persona_content}\n\nGoal: {goal}\n\nEpisodic Memory Steps:\n{episodic_memory_content}\n\nPlease evaluate if the goal was achieved and provide your analysis in the specified JSON format."
|
36
|
+
|
37
|
+
# Create user message
|
38
|
+
user_message = ChatMessage(role="user", content=user_content)
|
39
|
+
|
40
|
+
# Create the screenshots grid and add as ImageBlock if screenshots exist
|
41
|
+
screenshots_grid = self._create_screenshots_grid(episodic_memory)
|
42
|
+
|
43
|
+
if screenshots_grid:
|
44
|
+
# Use the add_screenshot_image_block function to properly add the image
|
45
|
+
messages_list = [system_prompt, user_message]
|
46
|
+
messages_list = await add_screenshot_image_block(screenshots_grid, messages_list, copy=False)
|
47
|
+
messages = messages_list
|
48
|
+
else:
|
49
|
+
messages = [system_prompt, user_message]
|
50
|
+
response = await self.llm.achat(messages=messages)
|
51
|
+
|
52
|
+
logger.info(f"REFLECTION {response.message.content}")
|
53
|
+
|
54
|
+
try:
|
55
|
+
# Clean the response content to handle markdown code blocks
|
56
|
+
content = response.message.content.strip()
|
57
|
+
|
58
|
+
# Remove markdown code block formatting if present
|
59
|
+
if content.startswith('```json'):
|
60
|
+
content = content[7:] # Remove ```json
|
61
|
+
elif content.startswith('```'):
|
62
|
+
content = content[3:] # Remove ```
|
63
|
+
|
64
|
+
if content.endswith('```'):
|
65
|
+
content = content[:-3] # Remove trailing ```
|
66
|
+
|
67
|
+
content = content.strip()
|
68
|
+
|
69
|
+
parsed_response = json.loads(content)
|
70
|
+
return Reflection.from_dict(parsed_response)
|
71
|
+
except json.JSONDecodeError as e:
|
72
|
+
logger.error(f"Failed to parse reflection response: {e}")
|
73
|
+
logger.error(f"Raw response: {response.message.content}")
|
74
|
+
return await self.reflect_on_episodic_memory(episodic_memory=episodic_memory, goal=goal)
|
75
|
+
|
76
|
+
def _create_screenshots_grid(self, episodic_memory: EpisodicMemory) -> Optional[bytes]:
|
77
|
+
"""Create a 3x2 grid of screenshots from episodic memory steps."""
|
78
|
+
# Extract screenshots from steps
|
79
|
+
screenshots = []
|
80
|
+
for step in episodic_memory.steps:
|
81
|
+
if step.screenshot:
|
82
|
+
try:
|
83
|
+
# Convert bytes to PIL Image
|
84
|
+
screenshot_image = Image.open(io.BytesIO(step.screenshot))
|
85
|
+
screenshots.append(screenshot_image)
|
86
|
+
except Exception as e:
|
87
|
+
logger.warning(f"Failed to load screenshot: {e}")
|
88
|
+
continue
|
89
|
+
|
90
|
+
if not screenshots:
|
91
|
+
return None
|
92
|
+
|
93
|
+
num_screenshots = min(len(screenshots), 6)
|
94
|
+
cols, rows = num_screenshots, 1
|
95
|
+
|
96
|
+
screenshots = screenshots[:num_screenshots]
|
97
|
+
|
98
|
+
if not screenshots:
|
99
|
+
return None
|
100
|
+
|
101
|
+
if screenshots:
|
102
|
+
cell_width = screenshots[0].width // 2
|
103
|
+
cell_height = screenshots[0].height // 2
|
104
|
+
else:
|
105
|
+
return None
|
106
|
+
|
107
|
+
# Define header bar height
|
108
|
+
header_height = 60
|
109
|
+
|
110
|
+
# Create the grid image with space for header bars
|
111
|
+
grid_width = cols * cell_width
|
112
|
+
grid_height = rows * (cell_height + header_height)
|
113
|
+
grid_image = Image.new('RGB', (grid_width, grid_height), color='white')
|
114
|
+
|
115
|
+
# Set up font for step text
|
116
|
+
draw = ImageDraw.Draw(grid_image)
|
117
|
+
try:
|
118
|
+
# Use larger font for header text
|
119
|
+
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 48)
|
120
|
+
except:
|
121
|
+
font = ImageFont.load_default()
|
122
|
+
|
123
|
+
# Place screenshots in the grid with header bars
|
124
|
+
for i, screenshot in enumerate(screenshots):
|
125
|
+
row = i // cols
|
126
|
+
col = i % cols
|
127
|
+
|
128
|
+
# Calculate positions
|
129
|
+
x = col * cell_width
|
130
|
+
header_y = row * (cell_height + header_height)
|
131
|
+
screenshot_y = header_y + header_height
|
132
|
+
|
133
|
+
# Create header bar
|
134
|
+
header_rect = [x, header_y, x + cell_width, header_y + header_height]
|
135
|
+
draw.rectangle(header_rect, fill='#2c3e50') # Dark blue header
|
136
|
+
|
137
|
+
# Draw step text in header bar
|
138
|
+
text = f"Step {i+1}"
|
139
|
+
# Get text dimensions for centering
|
140
|
+
bbox = draw.textbbox((0, 0), text, font=font)
|
141
|
+
text_width = bbox[2] - bbox[0]
|
142
|
+
text_height = bbox[3] - bbox[1]
|
143
|
+
|
144
|
+
# Center text in header bar
|
145
|
+
text_x = x + (cell_width - text_width) // 2
|
146
|
+
text_y = header_y + (header_height - text_height) // 2
|
147
|
+
|
148
|
+
draw.text((text_x, text_y), text, fill='white', font=font)
|
149
|
+
|
150
|
+
# Resize and place screenshot below header
|
151
|
+
resized_screenshot = screenshot.resize((cell_width, cell_height), Image.Resampling.LANCZOS)
|
152
|
+
grid_image.paste(resized_screenshot, (x, screenshot_y))
|
153
|
+
|
154
|
+
# Save grid to disk for debugging (only if debug flag is enabled)
|
155
|
+
if self.debug:
|
156
|
+
import os
|
157
|
+
from datetime import datetime
|
158
|
+
|
159
|
+
# Create debug directory if it doesn't exist
|
160
|
+
debug_dir = "reflection_screenshots"
|
161
|
+
os.makedirs(debug_dir, exist_ok=True)
|
162
|
+
|
163
|
+
# Save with timestamp
|
164
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
165
|
+
debug_filename = os.path.join(debug_dir, f"screenshot_grid_{timestamp}.png")
|
166
|
+
grid_image.save(debug_filename)
|
167
|
+
logger.info(f"Screenshot grid saved to: {debug_filename}")
|
168
|
+
|
169
|
+
# Convert to bytes for use with add_screenshot_image_block
|
170
|
+
buffer = io.BytesIO()
|
171
|
+
grid_image.save(buffer, format='PNG')
|
172
|
+
buffer.seek(0)
|
173
|
+
|
174
|
+
return buffer.getvalue()
|
175
|
+
|
176
|
+
def _create_system_prompt(self) -> str:
|
177
|
+
"""Create a system prompt with reflection instructions."""
|
178
|
+
system_prompt = """You are a Reflector AI that analyzes the performance of an Android Agent. Your role is to examine episodic memory steps and evaluate whether the agent achieved its goal.
|
179
|
+
|
180
|
+
EVALUATION PROCESS:
|
181
|
+
1. First, determine if the agent achieved the stated goal based on the episodic memory steps
|
182
|
+
2. If the goal was achieved, acknowledge the success
|
183
|
+
3. If the goal was NOT achieved, analyze what went wrong and provide direct advice
|
184
|
+
4. Use the provided screenshots (if any) to understand the visual context of each step
|
185
|
+
The screenshots show a screen the agent saw. It is in chronological order from left to right
|
186
|
+
|
187
|
+
ANALYSIS AREAS (for failed goals):
|
188
|
+
- Missed opportunities or inefficient actions
|
189
|
+
- Incorrect tool usage or navigation choices
|
190
|
+
- Failure to understand context or user intent
|
191
|
+
- Suboptimal decision-making patterns
|
192
|
+
|
193
|
+
ADVICE GUIDELINES (for failed goals):
|
194
|
+
- Address the agent directly using "you" form with present/future focus (e.g., "You need to...", "Look for...", "Focus on...")
|
195
|
+
- Provide situational awareness advice that helps with the current state after the failed attempt
|
196
|
+
- Give actionable guidance for what to do NOW when retrying the goal, not what went wrong before
|
197
|
+
- Consider the current app state and context the agent will face when retrying
|
198
|
+
- Focus on the key strategy or approach needed for success in the current situation
|
199
|
+
- Keep it concise but precise (1-2 sentences)
|
200
|
+
|
201
|
+
OUTPUT FORMAT:
|
202
|
+
You MUST respond with a valid JSON object in this exact format:
|
203
|
+
|
204
|
+
{{
|
205
|
+
"goal_achieved": true,
|
206
|
+
"advice": null,
|
207
|
+
"summary": "Brief summary of what happened"
|
208
|
+
}}
|
209
|
+
|
210
|
+
OR
|
211
|
+
|
212
|
+
{{
|
213
|
+
"goal_achieved": false,
|
214
|
+
"advice": "Direct advice using 'you' form focused on current situation - what you need to do NOW when retrying",
|
215
|
+
"summary": "Brief summary of what happened"
|
216
|
+
}}
|
217
|
+
|
218
|
+
IMPORTANT:
|
219
|
+
- If goal_achieved is true, set advice to null
|
220
|
+
- If goal_achieved is false, provide direct "you" form advice focused on what to do NOW in the current situation when retrying
|
221
|
+
- Advice should be forward-looking and situational, not retrospective about past mistakes
|
222
|
+
- Always include a brief summary of the agent's performance
|
223
|
+
- Ensure the JSON is valid and parsable
|
224
|
+
- ONLY return the JSON object, no additional text or formatting"""
|
225
|
+
|
226
|
+
return system_prompt
|
227
|
+
|
228
|
+
def _format_persona(self, persona: AgentPersona) -> str:
|
229
|
+
"""Format the agent persona information for the user prompt."""
|
230
|
+
persona_content = f"""ACTOR AGENT PERSONA:
|
231
|
+
- Name: {persona.name}
|
232
|
+
- Description: {persona.description}
|
233
|
+
- Available Tools: {', '.join(persona.allowed_tools)}
|
234
|
+
- Expertise Areas: {', '.join(persona.expertise_areas)}
|
235
|
+
- System Prompt: {persona.system_prompt}"""
|
236
|
+
|
237
|
+
return persona_content
|
238
|
+
|
239
|
+
def _format_episodic_memory(self, episodic_memory: EpisodicMemory) -> str:
|
240
|
+
"""Format the episodic memory steps into a readable format for analysis."""
|
241
|
+
formatted_steps = []
|
242
|
+
|
243
|
+
for i, step in enumerate(episodic_memory.steps, 1):
|
244
|
+
try:
|
245
|
+
# Parse the JSON strings to get the original content without escape characters
|
246
|
+
chat_history = json.loads(step.chat_history)
|
247
|
+
response = json.loads(step.response)
|
248
|
+
|
249
|
+
|
250
|
+
formatted_step = f"""Step {i}:
|
251
|
+
Chat History: {json.dumps(chat_history, indent=2)}
|
252
|
+
Response: {json.dumps(response, indent=2)}
|
253
|
+
Timestamp: {step.timestamp}
|
254
|
+
---"""
|
255
|
+
except json.JSONDecodeError as e:
|
256
|
+
# Fallback to original format if JSON parsing fails
|
257
|
+
logger.warning(f"Failed to parse JSON for step {i}: {e}")
|
258
|
+
formatted_step = f"""Step {i}:
|
259
|
+
Chat History: {step.chat_history}
|
260
|
+
Response: {step.response}
|
261
|
+
Timestamp: {step.timestamp}
|
262
|
+
---"""
|
263
|
+
formatted_steps.append(formatted_step)
|
264
|
+
|
265
|
+
return "\n".join(formatted_steps)
|
@@ -1,6 +1,5 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .prompts import (
|
1
|
+
from droidrun.agent.planner.planner_agent import PlannerAgent
|
2
|
+
from droidrun.agent.planner.prompts import (
|
4
3
|
DEFAULT_PLANNER_SYSTEM_PROMPT,
|
5
4
|
DEFAULT_PLANNER_USER_PROMPT,
|
6
5
|
DEFAULT_PLANNER_TASK_FAILED_PROMPT
|
@@ -8,7 +7,6 @@ from .prompts import (
|
|
8
7
|
|
9
8
|
__all__ = [
|
10
9
|
"PlannerAgent",
|
11
|
-
"TaskManager",
|
12
10
|
"DEFAULT_PLANNER_SYSTEM_PROMPT",
|
13
11
|
"DEFAULT_PLANNER_USER_PROMPT",
|
14
12
|
"DEFAULT_PLANNER_TASK_FAILED_PROMPT"
|
droidrun/agent/planner/events.py
CHANGED
@@ -1,20 +1,16 @@
|
|
1
|
-
from typing import List
|
2
|
-
from llama_index.core.llms import ChatMessage
|
3
1
|
from llama_index.core.workflow import Event
|
4
|
-
from llama_index.core.
|
2
|
+
from llama_index.core.base.llms.types import ChatMessage
|
3
|
+
from typing import Optional, Any
|
4
|
+
from droidrun.agent.context import Task
|
5
5
|
|
6
|
-
|
7
|
-
class InputEvent(Event):
|
6
|
+
class PlanInputEvent(Event):
|
8
7
|
input: list[ChatMessage]
|
9
8
|
|
10
|
-
class ModelResponseEvent(Event):
|
11
|
-
response: str
|
12
|
-
|
13
9
|
|
14
|
-
class
|
15
|
-
|
10
|
+
class PlanThinkingEvent(Event):
|
11
|
+
thoughts: Optional[str] = None
|
12
|
+
code: Optional[str] = None
|
16
13
|
|
17
|
-
class TaskFailedEvent(Event):
|
18
|
-
task_description: str
|
19
|
-
reason: str
|
20
14
|
|
15
|
+
class PlanCreatedEvent(Event):
|
16
|
+
tasks: list[Task]
|
@@ -0,0 +1,268 @@
|
|
1
|
+
from droidrun.agent.planner.events import *
|
2
|
+
from droidrun.agent.planner.prompts import (
|
3
|
+
DEFAULT_PLANNER_SYSTEM_PROMPT,
|
4
|
+
DEFAULT_PLANNER_USER_PROMPT,
|
5
|
+
)
|
6
|
+
import logging
|
7
|
+
import asyncio
|
8
|
+
from typing import List, TYPE_CHECKING, Union
|
9
|
+
import inspect
|
10
|
+
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
11
|
+
from llama_index.core.prompts import PromptTemplate
|
12
|
+
from llama_index.core.llms.llm import LLM
|
13
|
+
from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
|
14
|
+
from llama_index.core.memory import Memory
|
15
|
+
from llama_index.core.llms.llm import LLM
|
16
|
+
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
17
|
+
from droidrun.agent.utils import chat_utils
|
18
|
+
from droidrun.agent.context.task_manager import TaskManager
|
19
|
+
from droidrun.tools import Tools
|
20
|
+
from droidrun.agent.common.events import ScreenshotEvent
|
21
|
+
from droidrun.agent.planner.events import (
|
22
|
+
PlanInputEvent,
|
23
|
+
PlanCreatedEvent,
|
24
|
+
PlanThinkingEvent,
|
25
|
+
)
|
26
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
27
|
+
from droidrun.agent.context.reflection import Reflection
|
28
|
+
|
29
|
+
from dotenv import load_dotenv
|
30
|
+
|
31
|
+
load_dotenv()
|
32
|
+
|
33
|
+
# Setup logger
|
34
|
+
logger = logging.getLogger("droidrun")
|
35
|
+
|
36
|
+
if TYPE_CHECKING:
|
37
|
+
from droidrun.tools import Tools
|
38
|
+
|
39
|
+
|
40
|
+
class PlannerAgent(Workflow):
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
goal: str,
|
44
|
+
llm: LLM,
|
45
|
+
personas: List[AgentPersona],
|
46
|
+
task_manager: TaskManager,
|
47
|
+
tools_instance: Tools,
|
48
|
+
system_prompt=None,
|
49
|
+
user_prompt=None,
|
50
|
+
debug=False,
|
51
|
+
*args,
|
52
|
+
**kwargs,
|
53
|
+
) -> None:
|
54
|
+
super().__init__(*args, **kwargs)
|
55
|
+
|
56
|
+
self.llm = llm
|
57
|
+
self.goal = goal
|
58
|
+
self.task_manager = task_manager
|
59
|
+
self.debug = debug
|
60
|
+
|
61
|
+
self.chat_memory = None
|
62
|
+
self.remembered_info = None
|
63
|
+
self.reflection: Reflection = None
|
64
|
+
|
65
|
+
self.current_retry = 0
|
66
|
+
self.steps_counter = 0
|
67
|
+
|
68
|
+
self.tool_list = {}
|
69
|
+
self.tool_list[self.task_manager.set_tasks_with_agents.__name__] = (
|
70
|
+
self.task_manager.set_tasks_with_agents
|
71
|
+
)
|
72
|
+
self.tool_list[self.task_manager.complete_goal.__name__] = (
|
73
|
+
self.task_manager.complete_goal
|
74
|
+
)
|
75
|
+
|
76
|
+
self.tools_description = chat_utils.parse_tool_descriptions(self.tool_list)
|
77
|
+
self.tools_instance = tools_instance
|
78
|
+
|
79
|
+
self.personas = personas
|
80
|
+
|
81
|
+
self.system_prompt = system_prompt or DEFAULT_PLANNER_SYSTEM_PROMPT.format(
|
82
|
+
tools_description=self.tools_description,
|
83
|
+
agents=chat_utils.parse_persona_description(self.personas),
|
84
|
+
)
|
85
|
+
self.user_prompt = user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=goal)
|
86
|
+
self.system_message = ChatMessage(role="system", content=self.system_prompt)
|
87
|
+
self.user_message = ChatMessage(role="user", content=self.user_prompt)
|
88
|
+
|
89
|
+
self.executer = SimpleCodeExecutor(
|
90
|
+
loop=asyncio.get_event_loop(), globals={}, locals={}, tools=self.tool_list
|
91
|
+
)
|
92
|
+
|
93
|
+
@step
|
94
|
+
async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
|
95
|
+
logger.info("💬 Preparing planning session...")
|
96
|
+
|
97
|
+
self.chat_memory: Memory = await ctx.get(
|
98
|
+
"chat_memory", default=Memory.from_defaults()
|
99
|
+
)
|
100
|
+
await self.chat_memory.aput(self.user_message)
|
101
|
+
|
102
|
+
if ev.remembered_info:
|
103
|
+
self.remembered_info = ev.remembered_info
|
104
|
+
|
105
|
+
if ev.reflection:
|
106
|
+
self.reflection = ev.reflection
|
107
|
+
else:
|
108
|
+
self.reflection = None
|
109
|
+
|
110
|
+
assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
|
111
|
+
|
112
|
+
await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
|
113
|
+
|
114
|
+
input_messages = self.chat_memory.get_all()
|
115
|
+
logger.debug(f" - Memory contains {len(input_messages)} messages")
|
116
|
+
return PlanInputEvent(input=input_messages)
|
117
|
+
|
118
|
+
@step
|
119
|
+
async def handle_llm_input(
|
120
|
+
self, ev: PlanInputEvent, ctx: Context
|
121
|
+
) -> PlanThinkingEvent:
|
122
|
+
"""Handle LLM input."""
|
123
|
+
chat_history = ev.input
|
124
|
+
assert len(chat_history) > 0, "Chat history cannot be empty."
|
125
|
+
|
126
|
+
ctx.write_event_to_stream(ev)
|
127
|
+
|
128
|
+
self.steps_counter += 1
|
129
|
+
logger.info(f"🧠 Thinking about how to plan the goal...")
|
130
|
+
|
131
|
+
screenshot = (await self.tools_instance.take_screenshot())[1]
|
132
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
133
|
+
await ctx.set("screenshot", screenshot)
|
134
|
+
|
135
|
+
await ctx.set("ui_state", await self.tools_instance.get_clickables())
|
136
|
+
await ctx.set("phone_state", await self.tools_instance.get_phone_state())
|
137
|
+
await ctx.set("remembered_info", self.remembered_info)
|
138
|
+
await ctx.set("reflection", self.reflection)
|
139
|
+
|
140
|
+
response = await self._get_llm_response(ctx, chat_history)
|
141
|
+
await self.chat_memory.aput(response.message)
|
142
|
+
|
143
|
+
code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
|
144
|
+
|
145
|
+
event = PlanThinkingEvent(thoughts=thoughts, code=code)
|
146
|
+
ctx.write_event_to_stream(event)
|
147
|
+
return event
|
148
|
+
|
149
|
+
@step
|
150
|
+
async def handle_llm_output(
|
151
|
+
self, ev: PlanThinkingEvent, ctx: Context
|
152
|
+
) -> Union[PlanInputEvent, PlanCreatedEvent]:
|
153
|
+
"""Handle LLM output."""
|
154
|
+
logger.debug("🤖 Processing planning output...")
|
155
|
+
code = ev.code
|
156
|
+
thoughts = ev.thoughts
|
157
|
+
|
158
|
+
if code:
|
159
|
+
try:
|
160
|
+
result = await self.executer.execute(ctx, code)
|
161
|
+
logger.info(f"📝 Planning complete")
|
162
|
+
logger.debug(f" - Planning code executed. Result: {result}")
|
163
|
+
|
164
|
+
await self.chat_memory.aput(
|
165
|
+
ChatMessage(
|
166
|
+
role="user", content=f"Execution Result:\n```\n{result}\n```"
|
167
|
+
)
|
168
|
+
)
|
169
|
+
|
170
|
+
self.remembered_info = self.tools_instance.memory
|
171
|
+
|
172
|
+
tasks = self.task_manager.get_all_tasks()
|
173
|
+
event = PlanCreatedEvent(tasks=tasks)
|
174
|
+
|
175
|
+
if not self.task_manager.goal_completed:
|
176
|
+
logger.info(f"📋 Current plan created with {len(tasks)} tasks:")
|
177
|
+
for i, task in enumerate(tasks):
|
178
|
+
logger.info(
|
179
|
+
f" Task {i}: [{task.status.upper()}] [{task.agent_type}] {task.description}"
|
180
|
+
)
|
181
|
+
ctx.write_event_to_stream(event)
|
182
|
+
|
183
|
+
return event
|
184
|
+
|
185
|
+
except Exception as e:
|
186
|
+
logger.debug(f"error handling Planner: {e}")
|
187
|
+
await self.chat_memory.aput(
|
188
|
+
ChatMessage(
|
189
|
+
role="user",
|
190
|
+
content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
|
191
|
+
)
|
192
|
+
)
|
193
|
+
logger.debug("🔄 Waiting for next plan or completion.")
|
194
|
+
return PlanInputEvent(input=self.chat_memory.get_all())
|
195
|
+
else:
|
196
|
+
await self.chat_memory.aput(
|
197
|
+
ChatMessage(
|
198
|
+
role="user",
|
199
|
+
content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
|
200
|
+
)
|
201
|
+
)
|
202
|
+
logger.debug("🔄 Waiting for next plan or completion.")
|
203
|
+
return PlanInputEvent(input=self.chat_memory.get_all())
|
204
|
+
|
205
|
+
@step
|
206
|
+
async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
|
207
|
+
"""Finalize the workflow."""
|
208
|
+
await ctx.set("chat_memory", self.chat_memory)
|
209
|
+
|
210
|
+
result = {}
|
211
|
+
result.update(
|
212
|
+
{
|
213
|
+
"tasks": ev.tasks,
|
214
|
+
}
|
215
|
+
)
|
216
|
+
|
217
|
+
return StopEvent(result=result)
|
218
|
+
|
219
|
+
async def _get_llm_response(
|
220
|
+
self, ctx: Context, chat_history: List[ChatMessage]
|
221
|
+
) -> ChatResponse:
|
222
|
+
"""Get streaming response from LLM."""
|
223
|
+
try:
|
224
|
+
logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
|
225
|
+
|
226
|
+
model = self.llm.class_name()
|
227
|
+
if model != "DeepSeek":
|
228
|
+
chat_history = await chat_utils.add_screenshot_image_block(
|
229
|
+
await ctx.get("screenshot"), chat_history
|
230
|
+
)
|
231
|
+
else:
|
232
|
+
logger.warning(
|
233
|
+
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
234
|
+
)
|
235
|
+
|
236
|
+
chat_history = await chat_utils.add_task_history_block(
|
237
|
+
self.task_manager.get_completed_tasks(),
|
238
|
+
self.task_manager.get_failed_tasks(),
|
239
|
+
chat_history,
|
240
|
+
)
|
241
|
+
|
242
|
+
remembered_info = await ctx.get("remembered_info", default=None)
|
243
|
+
if remembered_info:
|
244
|
+
chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
|
245
|
+
|
246
|
+
reflection = await ctx.get("reflection", None)
|
247
|
+
if reflection:
|
248
|
+
chat_history = await chat_utils.add_reflection_summary(reflection, chat_history)
|
249
|
+
|
250
|
+
chat_history = await chat_utils.add_phone_state_block(await ctx.get("phone_state"), chat_history)
|
251
|
+
chat_history = await chat_utils.add_ui_text_block(await ctx.get("ui_state"), chat_history)
|
252
|
+
|
253
|
+
messages_to_send = [self.system_message] + chat_history
|
254
|
+
messages_to_send = [
|
255
|
+
chat_utils.message_copy(msg) for msg in messages_to_send
|
256
|
+
]
|
257
|
+
|
258
|
+
logger.debug(f" - Final message count: {len(messages_to_send)}")
|
259
|
+
|
260
|
+
response = await self.llm.achat(messages=messages_to_send)
|
261
|
+
assert hasattr(
|
262
|
+
response, "message"
|
263
|
+
), f"LLM response does not have a message attribute.\nResponse: {response}"
|
264
|
+
logger.debug(" - Received response from LLM.")
|
265
|
+
return response
|
266
|
+
except Exception as e:
|
267
|
+
logger.error(f"Could not get an answer from LLM: {repr(e)}")
|
268
|
+
raise e
|