droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +22 -10
- droidrun/__main__.py +1 -2
- droidrun/adb/__init__.py +3 -3
- droidrun/adb/device.py +2 -2
- droidrun/adb/manager.py +2 -2
- droidrun/agent/__init__.py +5 -15
- droidrun/agent/codeact/__init__.py +11 -0
- droidrun/agent/codeact/codeact_agent.py +420 -0
- droidrun/agent/codeact/events.py +28 -0
- droidrun/agent/codeact/prompts.py +26 -0
- droidrun/agent/common/default.py +5 -0
- droidrun/agent/common/events.py +4 -0
- droidrun/agent/context/__init__.py +23 -0
- droidrun/agent/context/agent_persona.py +15 -0
- droidrun/agent/context/context_injection_manager.py +66 -0
- droidrun/agent/context/episodic_memory.py +15 -0
- droidrun/agent/context/personas/__init__.py +11 -0
- droidrun/agent/context/personas/app_starter.py +44 -0
- droidrun/agent/context/personas/default.py +95 -0
- droidrun/agent/context/personas/extractor.py +52 -0
- droidrun/agent/context/personas/ui_expert.py +107 -0
- droidrun/agent/context/reflection.py +20 -0
- droidrun/agent/context/task_manager.py +124 -0
- droidrun/agent/context/todo.txt +4 -0
- droidrun/agent/droid/__init__.py +13 -0
- droidrun/agent/droid/droid_agent.py +357 -0
- droidrun/agent/droid/events.py +28 -0
- droidrun/agent/oneflows/reflector.py +265 -0
- droidrun/agent/planner/__init__.py +13 -0
- droidrun/agent/planner/events.py +16 -0
- droidrun/agent/planner/planner_agent.py +268 -0
- droidrun/agent/planner/prompts.py +124 -0
- droidrun/agent/utils/__init__.py +3 -0
- droidrun/agent/utils/async_utils.py +17 -0
- droidrun/agent/utils/chat_utils.py +312 -0
- droidrun/agent/utils/executer.py +132 -0
- droidrun/agent/utils/llm_picker.py +147 -0
- droidrun/agent/utils/trajectory.py +184 -0
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +283 -0
- droidrun/cli/main.py +358 -149
- droidrun/run.py +105 -0
- droidrun/tools/__init__.py +4 -30
- droidrun/tools/adb.py +879 -0
- droidrun/tools/ios.py +594 -0
- droidrun/tools/tools.py +99 -0
- droidrun-0.3.0.dist-info/METADATA +149 -0
- droidrun-0.3.0.dist-info/RECORD +52 -0
- droidrun/agent/llm_reasoning.py +0 -567
- droidrun/agent/react_agent.py +0 -556
- droidrun/llm/__init__.py +0 -24
- droidrun/tools/actions.py +0 -854
- droidrun/tools/device.py +0 -29
- droidrun-0.1.0.dist-info/METADATA +0 -276
- droidrun-0.1.0.dist-info/RECORD +0 -20
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,357 @@
|
|
1
|
+
"""
|
2
|
+
DroidAgent - A wrapper class that coordinates the planning and execution of tasks
|
3
|
+
to achieve a user's goal on an Android device.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from typing import List
|
8
|
+
|
9
|
+
from llama_index.core.llms.llm import LLM
|
10
|
+
from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
|
11
|
+
from droidrun.agent.droid.events import *
|
12
|
+
from droidrun.agent.codeact import CodeActAgent
|
13
|
+
from droidrun.agent.codeact.events import EpisodicMemoryEvent
|
14
|
+
from droidrun.agent.planner import PlannerAgent
|
15
|
+
from droidrun.agent.context.task_manager import TaskManager
|
16
|
+
from droidrun.agent.utils.trajectory import Trajectory
|
17
|
+
from droidrun.tools import Tools, describe_tools
|
18
|
+
from droidrun.agent.common.events import ScreenshotEvent
|
19
|
+
from droidrun.agent.common.default import MockWorkflow
|
20
|
+
from droidrun.agent.context import ContextInjectionManager
|
21
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
22
|
+
from droidrun.agent.context.personas import DEFAULT
|
23
|
+
from droidrun.agent.oneflows.reflector import Reflector
|
24
|
+
|
25
|
+
|
26
|
+
logger = logging.getLogger("droidrun")
|
27
|
+
|
28
|
+
class DroidAgent(Workflow):
|
29
|
+
"""
|
30
|
+
A wrapper class that coordinates between PlannerAgent (creates plans) and
|
31
|
+
CodeActAgent (executes tasks) to achieve a user's goal.
|
32
|
+
"""
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def _configure_default_logging(debug: bool = False):
|
36
|
+
"""
|
37
|
+
Configure default logging for DroidAgent if no handlers are present.
|
38
|
+
This ensures logs are visible when using DroidAgent directly.
|
39
|
+
"""
|
40
|
+
# Only configure if no handlers exist (avoid duplicate configuration)
|
41
|
+
if not logger.handlers:
|
42
|
+
# Create a console handler
|
43
|
+
handler = logging.StreamHandler()
|
44
|
+
|
45
|
+
# Set format
|
46
|
+
if debug:
|
47
|
+
formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%H:%M:%S")
|
48
|
+
else:
|
49
|
+
formatter = logging.Formatter("%(message)s")
|
50
|
+
|
51
|
+
handler.setFormatter(formatter)
|
52
|
+
logger.addHandler(handler)
|
53
|
+
logger.setLevel(logging.DEBUG if debug else logging.INFO)
|
54
|
+
logger.propagate = False
|
55
|
+
|
56
|
+
def __init__(
|
57
|
+
self,
|
58
|
+
goal: str,
|
59
|
+
llm: LLM,
|
60
|
+
tools: Tools,
|
61
|
+
personas: List[AgentPersona] = [DEFAULT],
|
62
|
+
max_steps: int = 15,
|
63
|
+
timeout: int = 1000,
|
64
|
+
reasoning: bool = False,
|
65
|
+
reflection: bool = False,
|
66
|
+
enable_tracing: bool = False,
|
67
|
+
debug: bool = False,
|
68
|
+
save_trajectories: bool = False,
|
69
|
+
*args,
|
70
|
+
**kwargs
|
71
|
+
):
|
72
|
+
"""
|
73
|
+
Initialize the DroidAgent wrapper.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
goal: The user's goal or command to execute
|
77
|
+
llm: The language model to use for both agents
|
78
|
+
max_steps: Maximum number of steps for both agents
|
79
|
+
timeout: Timeout for agent execution in seconds
|
80
|
+
reasoning: Whether to use the PlannerAgent for complex reasoning (True)
|
81
|
+
or send tasks directly to CodeActAgent (False)
|
82
|
+
reflection: Whether to reflect on steps the CodeActAgent did to give the PlannerAgent advice
|
83
|
+
enable_tracing: Whether to enable Arize Phoenix tracing
|
84
|
+
debug: Whether to enable verbose debug logging
|
85
|
+
**kwargs: Additional keyword arguments to pass to the agents
|
86
|
+
"""
|
87
|
+
super().__init__(timeout=timeout ,*args,**kwargs)
|
88
|
+
|
89
|
+
# Configure default logging if not already configured
|
90
|
+
self._configure_default_logging(debug=debug)
|
91
|
+
|
92
|
+
# Setup global tracing first if enabled
|
93
|
+
if enable_tracing:
|
94
|
+
try:
|
95
|
+
from llama_index.core import set_global_handler
|
96
|
+
set_global_handler("arize_phoenix")
|
97
|
+
logger.info("🔍 Arize Phoenix tracing enabled globally")
|
98
|
+
except ImportError:
|
99
|
+
logger.warning("⚠️ Arize Phoenix package not found, tracing disabled")
|
100
|
+
enable_tracing = False
|
101
|
+
|
102
|
+
self.goal = goal
|
103
|
+
self.llm = llm
|
104
|
+
self.max_steps = max_steps
|
105
|
+
self.max_codeact_steps = max_steps
|
106
|
+
self.timeout = timeout
|
107
|
+
self.reasoning = reasoning
|
108
|
+
self.reflection = reflection
|
109
|
+
self.debug = debug
|
110
|
+
|
111
|
+
self.event_counter = 0
|
112
|
+
self.save_trajectories = save_trajectories
|
113
|
+
|
114
|
+
self.trajectory = Trajectory()
|
115
|
+
self.task_manager = TaskManager()
|
116
|
+
self.task_iter = None
|
117
|
+
self.cim = ContextInjectionManager(personas=personas)
|
118
|
+
self.current_episodic_memory = None
|
119
|
+
|
120
|
+
logger.info("🤖 Initializing DroidAgent...")
|
121
|
+
|
122
|
+
self.tool_list = describe_tools(tools)
|
123
|
+
self.tools_instance = tools
|
124
|
+
|
125
|
+
|
126
|
+
if self.reasoning:
|
127
|
+
logger.info("📝 Initializing Planner Agent...")
|
128
|
+
self.planner_agent = PlannerAgent(
|
129
|
+
goal=goal,
|
130
|
+
llm=llm,
|
131
|
+
personas=personas,
|
132
|
+
task_manager=self.task_manager,
|
133
|
+
tools_instance=tools,
|
134
|
+
timeout=timeout,
|
135
|
+
debug=debug
|
136
|
+
)
|
137
|
+
self.add_workflows(planner_agent=self.planner_agent)
|
138
|
+
self.max_codeact_steps = 5
|
139
|
+
|
140
|
+
if self.reflection:
|
141
|
+
self.reflector = Reflector(llm=llm, debug=debug)
|
142
|
+
|
143
|
+
else:
|
144
|
+
logger.debug("🚫 Planning disabled - will execute tasks directly with CodeActAgent")
|
145
|
+
self.planner_agent = None
|
146
|
+
|
147
|
+
logger.info("✅ DroidAgent initialized successfully.")
|
148
|
+
|
149
|
+
@step
|
150
|
+
async def execute_task(
|
151
|
+
self,
|
152
|
+
ctx: Context,
|
153
|
+
ev: CodeActExecuteEvent
|
154
|
+
) -> CodeActResultEvent:
|
155
|
+
"""
|
156
|
+
Execute a single task using the CodeActAgent.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
task: Task dictionary with description and status
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
Tuple of (success, reason)
|
163
|
+
"""
|
164
|
+
task: Task = ev.task
|
165
|
+
reflection = ev.reflection if ev.reflection is not None else None
|
166
|
+
persona = self.cim.get_persona(task.agent_type)
|
167
|
+
|
168
|
+
logger.info(f"🔧 Executing task: {task.description}")
|
169
|
+
|
170
|
+
try:
|
171
|
+
codeact_agent = CodeActAgent(
|
172
|
+
llm=self.llm,
|
173
|
+
persona=persona,
|
174
|
+
max_steps=self.max_codeact_steps,
|
175
|
+
all_tools_list=self.tool_list,
|
176
|
+
tools_instance=self.tools_instance,
|
177
|
+
debug=self.debug,
|
178
|
+
timeout=self.timeout,
|
179
|
+
)
|
180
|
+
|
181
|
+
handler = codeact_agent.run(
|
182
|
+
input=task.description,
|
183
|
+
remembered_info=self.tools_instance.memory,
|
184
|
+
reflection=reflection
|
185
|
+
)
|
186
|
+
|
187
|
+
async for nested_ev in handler.stream_events():
|
188
|
+
self.handle_stream_event(nested_ev, ctx)
|
189
|
+
|
190
|
+
result = await handler
|
191
|
+
|
192
|
+
|
193
|
+
if "success" in result and result["success"]:
|
194
|
+
return CodeActResultEvent(success=True, reason=result["reason"], task=task, steps=result["codeact_steps"])
|
195
|
+
else:
|
196
|
+
return CodeActResultEvent(success=False, reason=result["reason"], task=task, steps=result["codeact_steps"])
|
197
|
+
|
198
|
+
except Exception as e:
|
199
|
+
logger.error(f"Error during task execution: {e}")
|
200
|
+
if self.debug:
|
201
|
+
import traceback
|
202
|
+
logger.error(traceback.format_exc())
|
203
|
+
return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=result["codeact_steps"])
|
204
|
+
|
205
|
+
@step
|
206
|
+
async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent:
|
207
|
+
try:
|
208
|
+
task = ev.task
|
209
|
+
if not self.reasoning:
|
210
|
+
return FinalizeEvent(success=ev.success, reason=ev.reason, task=[task], steps=ev.steps)
|
211
|
+
|
212
|
+
if self.reflection:
|
213
|
+
return ReflectionEvent(task=task)
|
214
|
+
|
215
|
+
return ReasoningLogicEvent()
|
216
|
+
|
217
|
+
except Exception as e:
|
218
|
+
logger.error(f"❌ Error during DroidAgent execution: {e}")
|
219
|
+
if self.debug:
|
220
|
+
import traceback
|
221
|
+
logger.error(traceback.format_exc())
|
222
|
+
return FinalizeEvent(success=False, reason=str(e), task=self.task_manager.get_task_history(), steps=self.step_counter)
|
223
|
+
|
224
|
+
|
225
|
+
@step
|
226
|
+
async def reflect(
|
227
|
+
self,
|
228
|
+
ctx: Context,
|
229
|
+
ev: ReflectionEvent
|
230
|
+
) -> ReasoningLogicEvent | CodeActExecuteEvent:
|
231
|
+
|
232
|
+
|
233
|
+
task = ev.task
|
234
|
+
if ev.task.agent_type == "AppStarterExpert":
|
235
|
+
self.task_manager.complete_task(task)
|
236
|
+
return ReasoningLogicEvent()
|
237
|
+
|
238
|
+
reflection = await self.reflector.reflect_on_episodic_memory(episodic_memory=self.current_episodic_memory, goal=task.description)
|
239
|
+
|
240
|
+
if reflection.goal_achieved:
|
241
|
+
self.task_manager.complete_task(task)
|
242
|
+
return ReasoningLogicEvent()
|
243
|
+
|
244
|
+
else:
|
245
|
+
self.task_manager.fail_task(task)
|
246
|
+
return ReasoningLogicEvent(reflection=reflection)
|
247
|
+
|
248
|
+
|
249
|
+
@step
|
250
|
+
async def handle_reasoning_logic(
|
251
|
+
self,
|
252
|
+
ctx: Context,
|
253
|
+
ev: ReasoningLogicEvent,
|
254
|
+
planner_agent: Workflow = MockWorkflow()
|
255
|
+
) -> FinalizeEvent | CodeActExecuteEvent:
|
256
|
+
try:
|
257
|
+
if self.step_counter >= self.max_steps:
|
258
|
+
return FinalizeEvent(success=False, reason=f"Reached maximum number of steps ({self.max_steps})", task=self.task_manager.get_task_history(), steps=self.step_counter)
|
259
|
+
self.step_counter += 1
|
260
|
+
|
261
|
+
if ev.reflection:
|
262
|
+
handler = planner_agent.run(remembered_info=self.tools_instance.memory, reflection=ev.reflection)
|
263
|
+
else:
|
264
|
+
if self.task_iter:
|
265
|
+
try:
|
266
|
+
task = next(self.task_iter)
|
267
|
+
return CodeActExecuteEvent(task=task, reflection=None)
|
268
|
+
except StopIteration as e:
|
269
|
+
logger.info("Planning next steps...")
|
270
|
+
|
271
|
+
logger.debug(f"Planning step {self.step_counter}/{self.max_steps}")
|
272
|
+
|
273
|
+
handler = planner_agent.run(remembered_info=self.tools_instance.memory, reflection=None)
|
274
|
+
|
275
|
+
async for nested_ev in handler.stream_events():
|
276
|
+
self.handle_stream_event(nested_ev, ctx)
|
277
|
+
|
278
|
+
result = await handler
|
279
|
+
|
280
|
+
self.tasks = self.task_manager.get_all_tasks()
|
281
|
+
self.task_iter = iter(self.tasks)
|
282
|
+
|
283
|
+
if self.task_manager.goal_completed:
|
284
|
+
logger.info(f"✅ Goal completed: {self.task_manager.message}")
|
285
|
+
return FinalizeEvent(success=True, reason=self.task_manager.message, task=self.task_manager.get_task_history(), steps=self.step_counter)
|
286
|
+
if not self.tasks:
|
287
|
+
logger.warning("No tasks generated by planner")
|
288
|
+
return FinalizeEvent(success=False, reason="Planner did not generate any tasks", task=self.task_manager.get_task_history(), steps=self.step_counter)
|
289
|
+
|
290
|
+
return CodeActExecuteEvent(task=next(self.task_iter), reflection=None)
|
291
|
+
|
292
|
+
except Exception as e:
|
293
|
+
logger.error(f"❌ Error during DroidAgent execution: {e}")
|
294
|
+
if self.debug:
|
295
|
+
import traceback
|
296
|
+
logger.error(traceback.format_exc())
|
297
|
+
return FinalizeEvent(success=False, reason=str(e), task=self.task_manager.get_task_history(), steps=self.step_counter)
|
298
|
+
|
299
|
+
|
300
|
+
@step
|
301
|
+
async def start_handler(self, ctx: Context, ev: StartEvent) -> CodeActExecuteEvent | ReasoningLogicEvent:
|
302
|
+
"""
|
303
|
+
Main execution loop that coordinates between planning and execution.
|
304
|
+
|
305
|
+
Returns:
|
306
|
+
Dict containing the execution result
|
307
|
+
"""
|
308
|
+
logger.info(f"🚀 Running DroidAgent to achieve goal: {self.goal}")
|
309
|
+
|
310
|
+
self.step_counter = 0
|
311
|
+
self.retry_counter = 0
|
312
|
+
|
313
|
+
if not self.reasoning:
|
314
|
+
logger.info(f"🔄 Direct execution mode - executing goal: {self.goal}")
|
315
|
+
task = Task(
|
316
|
+
description=self.goal,
|
317
|
+
status=self.task_manager.STATUS_PENDING,
|
318
|
+
agent_type="Default"
|
319
|
+
)
|
320
|
+
|
321
|
+
return CodeActExecuteEvent(task=task, reflection=None)
|
322
|
+
|
323
|
+
return ReasoningLogicEvent()
|
324
|
+
|
325
|
+
|
326
|
+
@step
|
327
|
+
async def finalize(self, ctx: Context, ev: FinalizeEvent) -> StopEvent:
|
328
|
+
ctx.write_event_to_stream(ev)
|
329
|
+
|
330
|
+
result = {
|
331
|
+
"success": ev.success,
|
332
|
+
"reason": ev.reason,
|
333
|
+
"steps": ev.steps,
|
334
|
+
}
|
335
|
+
|
336
|
+
if self.trajectory and self.save_trajectories:
|
337
|
+
self.trajectory.save_trajectory()
|
338
|
+
|
339
|
+
return StopEvent(result)
|
340
|
+
|
341
|
+
def handle_stream_event(self, ev: Event, ctx: Context):
|
342
|
+
|
343
|
+
if isinstance(ev, EpisodicMemoryEvent):
|
344
|
+
self.current_episodic_memory = ev.episodic_memory
|
345
|
+
return
|
346
|
+
|
347
|
+
if not isinstance(ev, StopEvent):
|
348
|
+
ctx.write_event_to_stream(ev)
|
349
|
+
|
350
|
+
if isinstance(ev, ScreenshotEvent):
|
351
|
+
self.trajectory.screenshots.append(ev.screenshot)
|
352
|
+
|
353
|
+
else:
|
354
|
+
self.trajectory.events.append(ev)
|
355
|
+
|
356
|
+
|
357
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from llama_index.core.workflow import Event
|
2
|
+
from droidrun.agent.context import Reflection, Task
|
3
|
+
from typing import List, Optional
|
4
|
+
|
5
|
+
class CodeActExecuteEvent(Event):
|
6
|
+
task: Task
|
7
|
+
reflection: Optional[Reflection]
|
8
|
+
|
9
|
+
class CodeActResultEvent(Event):
|
10
|
+
success: bool
|
11
|
+
reason: str
|
12
|
+
steps: int
|
13
|
+
|
14
|
+
class ReasoningLogicEvent(Event):
|
15
|
+
reflection: Optional[Reflection] = None
|
16
|
+
|
17
|
+
class FinalizeEvent(Event):
|
18
|
+
success: bool
|
19
|
+
reason: str
|
20
|
+
task: List[Task]
|
21
|
+
steps: int = 1
|
22
|
+
|
23
|
+
class TaskRunnerEvent(Event):
|
24
|
+
pass
|
25
|
+
|
26
|
+
class ReflectionEvent(Event):
|
27
|
+
task: Task
|
28
|
+
pass
|
@@ -0,0 +1,265 @@
|
|
1
|
+
from llama_index.core.llms.llm import LLM
|
2
|
+
from droidrun.agent.context import EpisodicMemory
|
3
|
+
from droidrun.agent.context.reflection import Reflection
|
4
|
+
from llama_index.core.base.llms.types import ChatMessage, ImageBlock
|
5
|
+
from droidrun.agent.utils.chat_utils import add_screenshot_image_block
|
6
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
7
|
+
import json
|
8
|
+
from typing import Dict, Any, List, Optional
|
9
|
+
import logging
|
10
|
+
from PIL import Image, ImageDraw, ImageFont
|
11
|
+
import io
|
12
|
+
|
13
|
+
logger = logging.getLogger("droidrun")
|
14
|
+
|
15
|
+
class Reflector:
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
llm: LLM,
|
19
|
+
debug: bool = False,
|
20
|
+
*args,
|
21
|
+
**kwargs
|
22
|
+
):
|
23
|
+
self.llm = llm
|
24
|
+
self.debug = debug
|
25
|
+
|
26
|
+
async def reflect_on_episodic_memory(self, episodic_memory: EpisodicMemory, goal: str) -> Reflection:
|
27
|
+
"""Analyze episodic memory and provide reflection on the agent's performance."""
|
28
|
+
system_prompt_content = self._create_system_prompt()
|
29
|
+
system_prompt = ChatMessage(role="system", content=system_prompt_content)
|
30
|
+
|
31
|
+
episodic_memory_content = self._format_episodic_memory(episodic_memory)
|
32
|
+
persona_content = self._format_persona(episodic_memory.persona)
|
33
|
+
|
34
|
+
# Create user message content with persona information
|
35
|
+
user_content = f"{persona_content}\n\nGoal: {goal}\n\nEpisodic Memory Steps:\n{episodic_memory_content}\n\nPlease evaluate if the goal was achieved and provide your analysis in the specified JSON format."
|
36
|
+
|
37
|
+
# Create user message
|
38
|
+
user_message = ChatMessage(role="user", content=user_content)
|
39
|
+
|
40
|
+
# Create the screenshots grid and add as ImageBlock if screenshots exist
|
41
|
+
screenshots_grid = self._create_screenshots_grid(episodic_memory)
|
42
|
+
|
43
|
+
if screenshots_grid:
|
44
|
+
# Use the add_screenshot_image_block function to properly add the image
|
45
|
+
messages_list = [system_prompt, user_message]
|
46
|
+
messages_list = await add_screenshot_image_block(screenshots_grid, messages_list, copy=False)
|
47
|
+
messages = messages_list
|
48
|
+
else:
|
49
|
+
messages = [system_prompt, user_message]
|
50
|
+
response = await self.llm.achat(messages=messages)
|
51
|
+
|
52
|
+
logger.info(f"REFLECTION {response.message.content}")
|
53
|
+
|
54
|
+
try:
|
55
|
+
# Clean the response content to handle markdown code blocks
|
56
|
+
content = response.message.content.strip()
|
57
|
+
|
58
|
+
# Remove markdown code block formatting if present
|
59
|
+
if content.startswith('```json'):
|
60
|
+
content = content[7:] # Remove ```json
|
61
|
+
elif content.startswith('```'):
|
62
|
+
content = content[3:] # Remove ```
|
63
|
+
|
64
|
+
if content.endswith('```'):
|
65
|
+
content = content[:-3] # Remove trailing ```
|
66
|
+
|
67
|
+
content = content.strip()
|
68
|
+
|
69
|
+
parsed_response = json.loads(content)
|
70
|
+
return Reflection.from_dict(parsed_response)
|
71
|
+
except json.JSONDecodeError as e:
|
72
|
+
logger.error(f"Failed to parse reflection response: {e}")
|
73
|
+
logger.error(f"Raw response: {response.message.content}")
|
74
|
+
return await self.reflect_on_episodic_memory(episodic_memory=episodic_memory, goal=goal)
|
75
|
+
|
76
|
+
def _create_screenshots_grid(self, episodic_memory: EpisodicMemory) -> Optional[bytes]:
|
77
|
+
"""Create a 3x2 grid of screenshots from episodic memory steps."""
|
78
|
+
# Extract screenshots from steps
|
79
|
+
screenshots = []
|
80
|
+
for step in episodic_memory.steps:
|
81
|
+
if step.screenshot:
|
82
|
+
try:
|
83
|
+
# Convert bytes to PIL Image
|
84
|
+
screenshot_image = Image.open(io.BytesIO(step.screenshot))
|
85
|
+
screenshots.append(screenshot_image)
|
86
|
+
except Exception as e:
|
87
|
+
logger.warning(f"Failed to load screenshot: {e}")
|
88
|
+
continue
|
89
|
+
|
90
|
+
if not screenshots:
|
91
|
+
return None
|
92
|
+
|
93
|
+
num_screenshots = min(len(screenshots), 6)
|
94
|
+
cols, rows = num_screenshots, 1
|
95
|
+
|
96
|
+
screenshots = screenshots[:num_screenshots]
|
97
|
+
|
98
|
+
if not screenshots:
|
99
|
+
return None
|
100
|
+
|
101
|
+
if screenshots:
|
102
|
+
cell_width = screenshots[0].width // 2
|
103
|
+
cell_height = screenshots[0].height // 2
|
104
|
+
else:
|
105
|
+
return None
|
106
|
+
|
107
|
+
# Define header bar height
|
108
|
+
header_height = 60
|
109
|
+
|
110
|
+
# Create the grid image with space for header bars
|
111
|
+
grid_width = cols * cell_width
|
112
|
+
grid_height = rows * (cell_height + header_height)
|
113
|
+
grid_image = Image.new('RGB', (grid_width, grid_height), color='white')
|
114
|
+
|
115
|
+
# Set up font for step text
|
116
|
+
draw = ImageDraw.Draw(grid_image)
|
117
|
+
try:
|
118
|
+
# Use larger font for header text
|
119
|
+
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 48)
|
120
|
+
except:
|
121
|
+
font = ImageFont.load_default()
|
122
|
+
|
123
|
+
# Place screenshots in the grid with header bars
|
124
|
+
for i, screenshot in enumerate(screenshots):
|
125
|
+
row = i // cols
|
126
|
+
col = i % cols
|
127
|
+
|
128
|
+
# Calculate positions
|
129
|
+
x = col * cell_width
|
130
|
+
header_y = row * (cell_height + header_height)
|
131
|
+
screenshot_y = header_y + header_height
|
132
|
+
|
133
|
+
# Create header bar
|
134
|
+
header_rect = [x, header_y, x + cell_width, header_y + header_height]
|
135
|
+
draw.rectangle(header_rect, fill='#2c3e50') # Dark blue header
|
136
|
+
|
137
|
+
# Draw step text in header bar
|
138
|
+
text = f"Step {i+1}"
|
139
|
+
# Get text dimensions for centering
|
140
|
+
bbox = draw.textbbox((0, 0), text, font=font)
|
141
|
+
text_width = bbox[2] - bbox[0]
|
142
|
+
text_height = bbox[3] - bbox[1]
|
143
|
+
|
144
|
+
# Center text in header bar
|
145
|
+
text_x = x + (cell_width - text_width) // 2
|
146
|
+
text_y = header_y + (header_height - text_height) // 2
|
147
|
+
|
148
|
+
draw.text((text_x, text_y), text, fill='white', font=font)
|
149
|
+
|
150
|
+
# Resize and place screenshot below header
|
151
|
+
resized_screenshot = screenshot.resize((cell_width, cell_height), Image.Resampling.LANCZOS)
|
152
|
+
grid_image.paste(resized_screenshot, (x, screenshot_y))
|
153
|
+
|
154
|
+
# Save grid to disk for debugging (only if debug flag is enabled)
|
155
|
+
if self.debug:
|
156
|
+
import os
|
157
|
+
from datetime import datetime
|
158
|
+
|
159
|
+
# Create debug directory if it doesn't exist
|
160
|
+
debug_dir = "reflection_screenshots"
|
161
|
+
os.makedirs(debug_dir, exist_ok=True)
|
162
|
+
|
163
|
+
# Save with timestamp
|
164
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
165
|
+
debug_filename = os.path.join(debug_dir, f"screenshot_grid_{timestamp}.png")
|
166
|
+
grid_image.save(debug_filename)
|
167
|
+
logger.info(f"Screenshot grid saved to: {debug_filename}")
|
168
|
+
|
169
|
+
# Convert to bytes for use with add_screenshot_image_block
|
170
|
+
buffer = io.BytesIO()
|
171
|
+
grid_image.save(buffer, format='PNG')
|
172
|
+
buffer.seek(0)
|
173
|
+
|
174
|
+
return buffer.getvalue()
|
175
|
+
|
176
|
+
def _create_system_prompt(self) -> str:
|
177
|
+
"""Create a system prompt with reflection instructions."""
|
178
|
+
system_prompt = """You are a Reflector AI that analyzes the performance of an Android Agent. Your role is to examine episodic memory steps and evaluate whether the agent achieved its goal.
|
179
|
+
|
180
|
+
EVALUATION PROCESS:
|
181
|
+
1. First, determine if the agent achieved the stated goal based on the episodic memory steps
|
182
|
+
2. If the goal was achieved, acknowledge the success
|
183
|
+
3. If the goal was NOT achieved, analyze what went wrong and provide direct advice
|
184
|
+
4. Use the provided screenshots (if any) to understand the visual context of each step
|
185
|
+
The screenshots show a screen the agent saw. It is in chronological order from left to right
|
186
|
+
|
187
|
+
ANALYSIS AREAS (for failed goals):
|
188
|
+
- Missed opportunities or inefficient actions
|
189
|
+
- Incorrect tool usage or navigation choices
|
190
|
+
- Failure to understand context or user intent
|
191
|
+
- Suboptimal decision-making patterns
|
192
|
+
|
193
|
+
ADVICE GUIDELINES (for failed goals):
|
194
|
+
- Address the agent directly using "you" form with present/future focus (e.g., "You need to...", "Look for...", "Focus on...")
|
195
|
+
- Provide situational awareness advice that helps with the current state after the failed attempt
|
196
|
+
- Give actionable guidance for what to do NOW when retrying the goal, not what went wrong before
|
197
|
+
- Consider the current app state and context the agent will face when retrying
|
198
|
+
- Focus on the key strategy or approach needed for success in the current situation
|
199
|
+
- Keep it concise but precise (1-2 sentences)
|
200
|
+
|
201
|
+
OUTPUT FORMAT:
|
202
|
+
You MUST respond with a valid JSON object in this exact format:
|
203
|
+
|
204
|
+
{{
|
205
|
+
"goal_achieved": true,
|
206
|
+
"advice": null,
|
207
|
+
"summary": "Brief summary of what happened"
|
208
|
+
}}
|
209
|
+
|
210
|
+
OR
|
211
|
+
|
212
|
+
{{
|
213
|
+
"goal_achieved": false,
|
214
|
+
"advice": "Direct advice using 'you' form focused on current situation - what you need to do NOW when retrying",
|
215
|
+
"summary": "Brief summary of what happened"
|
216
|
+
}}
|
217
|
+
|
218
|
+
IMPORTANT:
|
219
|
+
- If goal_achieved is true, set advice to null
|
220
|
+
- If goal_achieved is false, provide direct "you" form advice focused on what to do NOW in the current situation when retrying
|
221
|
+
- Advice should be forward-looking and situational, not retrospective about past mistakes
|
222
|
+
- Always include a brief summary of the agent's performance
|
223
|
+
- Ensure the JSON is valid and parsable
|
224
|
+
- ONLY return the JSON object, no additional text or formatting"""
|
225
|
+
|
226
|
+
return system_prompt
|
227
|
+
|
228
|
+
def _format_persona(self, persona: AgentPersona) -> str:
|
229
|
+
"""Format the agent persona information for the user prompt."""
|
230
|
+
persona_content = f"""ACTOR AGENT PERSONA:
|
231
|
+
- Name: {persona.name}
|
232
|
+
- Description: {persona.description}
|
233
|
+
- Available Tools: {', '.join(persona.allowed_tools)}
|
234
|
+
- Expertise Areas: {', '.join(persona.expertise_areas)}
|
235
|
+
- System Prompt: {persona.system_prompt}"""
|
236
|
+
|
237
|
+
return persona_content
|
238
|
+
|
239
|
+
def _format_episodic_memory(self, episodic_memory: EpisodicMemory) -> str:
|
240
|
+
"""Format the episodic memory steps into a readable format for analysis."""
|
241
|
+
formatted_steps = []
|
242
|
+
|
243
|
+
for i, step in enumerate(episodic_memory.steps, 1):
|
244
|
+
try:
|
245
|
+
# Parse the JSON strings to get the original content without escape characters
|
246
|
+
chat_history = json.loads(step.chat_history)
|
247
|
+
response = json.loads(step.response)
|
248
|
+
|
249
|
+
|
250
|
+
formatted_step = f"""Step {i}:
|
251
|
+
Chat History: {json.dumps(chat_history, indent=2)}
|
252
|
+
Response: {json.dumps(response, indent=2)}
|
253
|
+
Timestamp: {step.timestamp}
|
254
|
+
---"""
|
255
|
+
except json.JSONDecodeError as e:
|
256
|
+
# Fallback to original format if JSON parsing fails
|
257
|
+
logger.warning(f"Failed to parse JSON for step {i}: {e}")
|
258
|
+
formatted_step = f"""Step {i}:
|
259
|
+
Chat History: {step.chat_history}
|
260
|
+
Response: {step.response}
|
261
|
+
Timestamp: {step.timestamp}
|
262
|
+
---"""
|
263
|
+
formatted_steps.append(formatted_step)
|
264
|
+
|
265
|
+
return "\n".join(formatted_steps)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from droidrun.agent.planner.planner_agent import PlannerAgent
|
2
|
+
from droidrun.agent.planner.prompts import (
|
3
|
+
DEFAULT_PLANNER_SYSTEM_PROMPT,
|
4
|
+
DEFAULT_PLANNER_USER_PROMPT,
|
5
|
+
DEFAULT_PLANNER_TASK_FAILED_PROMPT
|
6
|
+
)
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"PlannerAgent",
|
10
|
+
"DEFAULT_PLANNER_SYSTEM_PROMPT",
|
11
|
+
"DEFAULT_PLANNER_USER_PROMPT",
|
12
|
+
"DEFAULT_PLANNER_TASK_FAILED_PROMPT"
|
13
|
+
]
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from llama_index.core.workflow import Event
|
2
|
+
from llama_index.core.base.llms.types import ChatMessage
|
3
|
+
from typing import Optional, Any
|
4
|
+
from droidrun.agent.context import Task
|
5
|
+
|
6
|
+
class PlanInputEvent(Event):
|
7
|
+
input: list[ChatMessage]
|
8
|
+
|
9
|
+
|
10
|
+
class PlanThinkingEvent(Event):
|
11
|
+
thoughts: Optional[str] = None
|
12
|
+
code: Optional[str] = None
|
13
|
+
|
14
|
+
|
15
|
+
class PlanCreatedEvent(Event):
|
16
|
+
tasks: list[Task]
|