droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. droidrun/__init__.py +22 -10
  2. droidrun/__main__.py +1 -2
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +2 -2
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +5 -15
  7. droidrun/agent/codeact/__init__.py +11 -0
  8. droidrun/agent/codeact/codeact_agent.py +420 -0
  9. droidrun/agent/codeact/events.py +28 -0
  10. droidrun/agent/codeact/prompts.py +26 -0
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/context/todo.txt +4 -0
  25. droidrun/agent/droid/__init__.py +13 -0
  26. droidrun/agent/droid/droid_agent.py +357 -0
  27. droidrun/agent/droid/events.py +28 -0
  28. droidrun/agent/oneflows/reflector.py +265 -0
  29. droidrun/agent/planner/__init__.py +13 -0
  30. droidrun/agent/planner/events.py +16 -0
  31. droidrun/agent/planner/planner_agent.py +268 -0
  32. droidrun/agent/planner/prompts.py +124 -0
  33. droidrun/agent/utils/__init__.py +3 -0
  34. droidrun/agent/utils/async_utils.py +17 -0
  35. droidrun/agent/utils/chat_utils.py +312 -0
  36. droidrun/agent/utils/executer.py +132 -0
  37. droidrun/agent/utils/llm_picker.py +147 -0
  38. droidrun/agent/utils/trajectory.py +184 -0
  39. droidrun/cli/__init__.py +1 -1
  40. droidrun/cli/logs.py +283 -0
  41. droidrun/cli/main.py +358 -149
  42. droidrun/run.py +105 -0
  43. droidrun/tools/__init__.py +4 -30
  44. droidrun/tools/adb.py +879 -0
  45. droidrun/tools/ios.py +594 -0
  46. droidrun/tools/tools.py +99 -0
  47. droidrun-0.3.0.dist-info/METADATA +149 -0
  48. droidrun-0.3.0.dist-info/RECORD +52 -0
  49. droidrun/agent/llm_reasoning.py +0 -567
  50. droidrun/agent/react_agent.py +0 -556
  51. droidrun/llm/__init__.py +0 -24
  52. droidrun/tools/actions.py +0 -854
  53. droidrun/tools/device.py +0 -29
  54. droidrun-0.1.0.dist-info/METADATA +0 -276
  55. droidrun-0.1.0.dist-info/RECORD +0 -20
  56. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
  57. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
  58. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,357 @@
1
+ """
2
+ DroidAgent - A wrapper class that coordinates the planning and execution of tasks
3
+ to achieve a user's goal on an Android device.
4
+ """
5
+
6
+ import logging
7
+ from typing import List
8
+
9
+ from llama_index.core.llms.llm import LLM
10
+ from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
11
+ from droidrun.agent.droid.events import *
12
+ from droidrun.agent.codeact import CodeActAgent
13
+ from droidrun.agent.codeact.events import EpisodicMemoryEvent
14
+ from droidrun.agent.planner import PlannerAgent
15
+ from droidrun.agent.context.task_manager import TaskManager
16
+ from droidrun.agent.utils.trajectory import Trajectory
17
+ from droidrun.tools import Tools, describe_tools
18
+ from droidrun.agent.common.events import ScreenshotEvent
19
+ from droidrun.agent.common.default import MockWorkflow
20
+ from droidrun.agent.context import ContextInjectionManager
21
+ from droidrun.agent.context.agent_persona import AgentPersona
22
+ from droidrun.agent.context.personas import DEFAULT
23
+ from droidrun.agent.oneflows.reflector import Reflector
24
+
25
+
26
+ logger = logging.getLogger("droidrun")
27
+
28
+ class DroidAgent(Workflow):
29
+ """
30
+ A wrapper class that coordinates between PlannerAgent (creates plans) and
31
+ CodeActAgent (executes tasks) to achieve a user's goal.
32
+ """
33
+
34
+ @staticmethod
35
+ def _configure_default_logging(debug: bool = False):
36
+ """
37
+ Configure default logging for DroidAgent if no handlers are present.
38
+ This ensures logs are visible when using DroidAgent directly.
39
+ """
40
+ # Only configure if no handlers exist (avoid duplicate configuration)
41
+ if not logger.handlers:
42
+ # Create a console handler
43
+ handler = logging.StreamHandler()
44
+
45
+ # Set format
46
+ if debug:
47
+ formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%H:%M:%S")
48
+ else:
49
+ formatter = logging.Formatter("%(message)s")
50
+
51
+ handler.setFormatter(formatter)
52
+ logger.addHandler(handler)
53
+ logger.setLevel(logging.DEBUG if debug else logging.INFO)
54
+ logger.propagate = False
55
+
56
+ def __init__(
57
+ self,
58
+ goal: str,
59
+ llm: LLM,
60
+ tools: Tools,
61
+ personas: List[AgentPersona] = [DEFAULT],
62
+ max_steps: int = 15,
63
+ timeout: int = 1000,
64
+ reasoning: bool = False,
65
+ reflection: bool = False,
66
+ enable_tracing: bool = False,
67
+ debug: bool = False,
68
+ save_trajectories: bool = False,
69
+ *args,
70
+ **kwargs
71
+ ):
72
+ """
73
+ Initialize the DroidAgent wrapper.
74
+
75
+ Args:
76
+ goal: The user's goal or command to execute
77
+ llm: The language model to use for both agents
78
+ max_steps: Maximum number of steps for both agents
79
+ timeout: Timeout for agent execution in seconds
80
+ reasoning: Whether to use the PlannerAgent for complex reasoning (True)
81
+ or send tasks directly to CodeActAgent (False)
82
+ reflection: Whether to reflect on steps the CodeActAgent did to give the PlannerAgent advice
83
+ enable_tracing: Whether to enable Arize Phoenix tracing
84
+ debug: Whether to enable verbose debug logging
85
+ **kwargs: Additional keyword arguments to pass to the agents
86
+ """
87
+ super().__init__(timeout=timeout ,*args,**kwargs)
88
+
89
+ # Configure default logging if not already configured
90
+ self._configure_default_logging(debug=debug)
91
+
92
+ # Setup global tracing first if enabled
93
+ if enable_tracing:
94
+ try:
95
+ from llama_index.core import set_global_handler
96
+ set_global_handler("arize_phoenix")
97
+ logger.info("🔍 Arize Phoenix tracing enabled globally")
98
+ except ImportError:
99
+ logger.warning("⚠️ Arize Phoenix package not found, tracing disabled")
100
+ enable_tracing = False
101
+
102
+ self.goal = goal
103
+ self.llm = llm
104
+ self.max_steps = max_steps
105
+ self.max_codeact_steps = max_steps
106
+ self.timeout = timeout
107
+ self.reasoning = reasoning
108
+ self.reflection = reflection
109
+ self.debug = debug
110
+
111
+ self.event_counter = 0
112
+ self.save_trajectories = save_trajectories
113
+
114
+ self.trajectory = Trajectory()
115
+ self.task_manager = TaskManager()
116
+ self.task_iter = None
117
+ self.cim = ContextInjectionManager(personas=personas)
118
+ self.current_episodic_memory = None
119
+
120
+ logger.info("🤖 Initializing DroidAgent...")
121
+
122
+ self.tool_list = describe_tools(tools)
123
+ self.tools_instance = tools
124
+
125
+
126
+ if self.reasoning:
127
+ logger.info("📝 Initializing Planner Agent...")
128
+ self.planner_agent = PlannerAgent(
129
+ goal=goal,
130
+ llm=llm,
131
+ personas=personas,
132
+ task_manager=self.task_manager,
133
+ tools_instance=tools,
134
+ timeout=timeout,
135
+ debug=debug
136
+ )
137
+ self.add_workflows(planner_agent=self.planner_agent)
138
+ self.max_codeact_steps = 5
139
+
140
+ if self.reflection:
141
+ self.reflector = Reflector(llm=llm, debug=debug)
142
+
143
+ else:
144
+ logger.debug("🚫 Planning disabled - will execute tasks directly with CodeActAgent")
145
+ self.planner_agent = None
146
+
147
+ logger.info("✅ DroidAgent initialized successfully.")
148
+
149
+ @step
150
+ async def execute_task(
151
+ self,
152
+ ctx: Context,
153
+ ev: CodeActExecuteEvent
154
+ ) -> CodeActResultEvent:
155
+ """
156
+ Execute a single task using the CodeActAgent.
157
+
158
+ Args:
159
+ task: Task dictionary with description and status
160
+
161
+ Returns:
162
+ Tuple of (success, reason)
163
+ """
164
+ task: Task = ev.task
165
+ reflection = ev.reflection if ev.reflection is not None else None
166
+ persona = self.cim.get_persona(task.agent_type)
167
+
168
+ logger.info(f"🔧 Executing task: {task.description}")
169
+
170
+ try:
171
+ codeact_agent = CodeActAgent(
172
+ llm=self.llm,
173
+ persona=persona,
174
+ max_steps=self.max_codeact_steps,
175
+ all_tools_list=self.tool_list,
176
+ tools_instance=self.tools_instance,
177
+ debug=self.debug,
178
+ timeout=self.timeout,
179
+ )
180
+
181
+ handler = codeact_agent.run(
182
+ input=task.description,
183
+ remembered_info=self.tools_instance.memory,
184
+ reflection=reflection
185
+ )
186
+
187
+ async for nested_ev in handler.stream_events():
188
+ self.handle_stream_event(nested_ev, ctx)
189
+
190
+ result = await handler
191
+
192
+
193
+ if "success" in result and result["success"]:
194
+ return CodeActResultEvent(success=True, reason=result["reason"], task=task, steps=result["codeact_steps"])
195
+ else:
196
+ return CodeActResultEvent(success=False, reason=result["reason"], task=task, steps=result["codeact_steps"])
197
+
198
+ except Exception as e:
199
+ logger.error(f"Error during task execution: {e}")
200
+ if self.debug:
201
+ import traceback
202
+ logger.error(traceback.format_exc())
203
+ return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=result["codeact_steps"])
204
+
205
+ @step
206
+ async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent:
207
+ try:
208
+ task = ev.task
209
+ if not self.reasoning:
210
+ return FinalizeEvent(success=ev.success, reason=ev.reason, task=[task], steps=ev.steps)
211
+
212
+ if self.reflection:
213
+ return ReflectionEvent(task=task)
214
+
215
+ return ReasoningLogicEvent()
216
+
217
+ except Exception as e:
218
+ logger.error(f"❌ Error during DroidAgent execution: {e}")
219
+ if self.debug:
220
+ import traceback
221
+ logger.error(traceback.format_exc())
222
+ return FinalizeEvent(success=False, reason=str(e), task=self.task_manager.get_task_history(), steps=self.step_counter)
223
+
224
+
225
+ @step
226
+ async def reflect(
227
+ self,
228
+ ctx: Context,
229
+ ev: ReflectionEvent
230
+ ) -> ReasoningLogicEvent | CodeActExecuteEvent:
231
+
232
+
233
+ task = ev.task
234
+ if ev.task.agent_type == "AppStarterExpert":
235
+ self.task_manager.complete_task(task)
236
+ return ReasoningLogicEvent()
237
+
238
+ reflection = await self.reflector.reflect_on_episodic_memory(episodic_memory=self.current_episodic_memory, goal=task.description)
239
+
240
+ if reflection.goal_achieved:
241
+ self.task_manager.complete_task(task)
242
+ return ReasoningLogicEvent()
243
+
244
+ else:
245
+ self.task_manager.fail_task(task)
246
+ return ReasoningLogicEvent(reflection=reflection)
247
+
248
+
249
+ @step
250
+ async def handle_reasoning_logic(
251
+ self,
252
+ ctx: Context,
253
+ ev: ReasoningLogicEvent,
254
+ planner_agent: Workflow = MockWorkflow()
255
+ ) -> FinalizeEvent | CodeActExecuteEvent:
256
+ try:
257
+ if self.step_counter >= self.max_steps:
258
+ return FinalizeEvent(success=False, reason=f"Reached maximum number of steps ({self.max_steps})", task=self.task_manager.get_task_history(), steps=self.step_counter)
259
+ self.step_counter += 1
260
+
261
+ if ev.reflection:
262
+ handler = planner_agent.run(remembered_info=self.tools_instance.memory, reflection=ev.reflection)
263
+ else:
264
+ if self.task_iter:
265
+ try:
266
+ task = next(self.task_iter)
267
+ return CodeActExecuteEvent(task=task, reflection=None)
268
+ except StopIteration as e:
269
+ logger.info("Planning next steps...")
270
+
271
+ logger.debug(f"Planning step {self.step_counter}/{self.max_steps}")
272
+
273
+ handler = planner_agent.run(remembered_info=self.tools_instance.memory, reflection=None)
274
+
275
+ async for nested_ev in handler.stream_events():
276
+ self.handle_stream_event(nested_ev, ctx)
277
+
278
+ result = await handler
279
+
280
+ self.tasks = self.task_manager.get_all_tasks()
281
+ self.task_iter = iter(self.tasks)
282
+
283
+ if self.task_manager.goal_completed:
284
+ logger.info(f"✅ Goal completed: {self.task_manager.message}")
285
+ return FinalizeEvent(success=True, reason=self.task_manager.message, task=self.task_manager.get_task_history(), steps=self.step_counter)
286
+ if not self.tasks:
287
+ logger.warning("No tasks generated by planner")
288
+ return FinalizeEvent(success=False, reason="Planner did not generate any tasks", task=self.task_manager.get_task_history(), steps=self.step_counter)
289
+
290
+ return CodeActExecuteEvent(task=next(self.task_iter), reflection=None)
291
+
292
+ except Exception as e:
293
+ logger.error(f"❌ Error during DroidAgent execution: {e}")
294
+ if self.debug:
295
+ import traceback
296
+ logger.error(traceback.format_exc())
297
+ return FinalizeEvent(success=False, reason=str(e), task=self.task_manager.get_task_history(), steps=self.step_counter)
298
+
299
+
300
+ @step
301
+ async def start_handler(self, ctx: Context, ev: StartEvent) -> CodeActExecuteEvent | ReasoningLogicEvent:
302
+ """
303
+ Main execution loop that coordinates between planning and execution.
304
+
305
+ Returns:
306
+ Dict containing the execution result
307
+ """
308
+ logger.info(f"🚀 Running DroidAgent to achieve goal: {self.goal}")
309
+
310
+ self.step_counter = 0
311
+ self.retry_counter = 0
312
+
313
+ if not self.reasoning:
314
+ logger.info(f"🔄 Direct execution mode - executing goal: {self.goal}")
315
+ task = Task(
316
+ description=self.goal,
317
+ status=self.task_manager.STATUS_PENDING,
318
+ agent_type="Default"
319
+ )
320
+
321
+ return CodeActExecuteEvent(task=task, reflection=None)
322
+
323
+ return ReasoningLogicEvent()
324
+
325
+
326
+ @step
327
+ async def finalize(self, ctx: Context, ev: FinalizeEvent) -> StopEvent:
328
+ ctx.write_event_to_stream(ev)
329
+
330
+ result = {
331
+ "success": ev.success,
332
+ "reason": ev.reason,
333
+ "steps": ev.steps,
334
+ }
335
+
336
+ if self.trajectory and self.save_trajectories:
337
+ self.trajectory.save_trajectory()
338
+
339
+ return StopEvent(result)
340
+
341
+ def handle_stream_event(self, ev: Event, ctx: Context):
342
+
343
+ if isinstance(ev, EpisodicMemoryEvent):
344
+ self.current_episodic_memory = ev.episodic_memory
345
+ return
346
+
347
+ if not isinstance(ev, StopEvent):
348
+ ctx.write_event_to_stream(ev)
349
+
350
+ if isinstance(ev, ScreenshotEvent):
351
+ self.trajectory.screenshots.append(ev.screenshot)
352
+
353
+ else:
354
+ self.trajectory.events.append(ev)
355
+
356
+
357
+
@@ -0,0 +1,28 @@
1
+ from llama_index.core.workflow import Event
2
+ from droidrun.agent.context import Reflection, Task
3
+ from typing import List, Optional
4
+
5
+ class CodeActExecuteEvent(Event):
6
+ task: Task
7
+ reflection: Optional[Reflection]
8
+
9
+ class CodeActResultEvent(Event):
10
+ success: bool
11
+ reason: str
12
+ steps: int
13
+
14
+ class ReasoningLogicEvent(Event):
15
+ reflection: Optional[Reflection] = None
16
+
17
+ class FinalizeEvent(Event):
18
+ success: bool
19
+ reason: str
20
+ task: List[Task]
21
+ steps: int = 1
22
+
23
+ class TaskRunnerEvent(Event):
24
+ pass
25
+
26
+ class ReflectionEvent(Event):
27
+ task: Task
28
+ pass
@@ -0,0 +1,265 @@
1
+ from llama_index.core.llms.llm import LLM
2
+ from droidrun.agent.context import EpisodicMemory
3
+ from droidrun.agent.context.reflection import Reflection
4
+ from llama_index.core.base.llms.types import ChatMessage, ImageBlock
5
+ from droidrun.agent.utils.chat_utils import add_screenshot_image_block
6
+ from droidrun.agent.context.agent_persona import AgentPersona
7
+ import json
8
+ from typing import Dict, Any, List, Optional
9
+ import logging
10
+ from PIL import Image, ImageDraw, ImageFont
11
+ import io
12
+
13
+ logger = logging.getLogger("droidrun")
14
+
15
+ class Reflector:
16
+ def __init__(
17
+ self,
18
+ llm: LLM,
19
+ debug: bool = False,
20
+ *args,
21
+ **kwargs
22
+ ):
23
+ self.llm = llm
24
+ self.debug = debug
25
+
26
+ async def reflect_on_episodic_memory(self, episodic_memory: EpisodicMemory, goal: str) -> Reflection:
27
+ """Analyze episodic memory and provide reflection on the agent's performance."""
28
+ system_prompt_content = self._create_system_prompt()
29
+ system_prompt = ChatMessage(role="system", content=system_prompt_content)
30
+
31
+ episodic_memory_content = self._format_episodic_memory(episodic_memory)
32
+ persona_content = self._format_persona(episodic_memory.persona)
33
+
34
+ # Create user message content with persona information
35
+ user_content = f"{persona_content}\n\nGoal: {goal}\n\nEpisodic Memory Steps:\n{episodic_memory_content}\n\nPlease evaluate if the goal was achieved and provide your analysis in the specified JSON format."
36
+
37
+ # Create user message
38
+ user_message = ChatMessage(role="user", content=user_content)
39
+
40
+ # Create the screenshots grid and add as ImageBlock if screenshots exist
41
+ screenshots_grid = self._create_screenshots_grid(episodic_memory)
42
+
43
+ if screenshots_grid:
44
+ # Use the add_screenshot_image_block function to properly add the image
45
+ messages_list = [system_prompt, user_message]
46
+ messages_list = await add_screenshot_image_block(screenshots_grid, messages_list, copy=False)
47
+ messages = messages_list
48
+ else:
49
+ messages = [system_prompt, user_message]
50
+ response = await self.llm.achat(messages=messages)
51
+
52
+ logger.info(f"REFLECTION {response.message.content}")
53
+
54
+ try:
55
+ # Clean the response content to handle markdown code blocks
56
+ content = response.message.content.strip()
57
+
58
+ # Remove markdown code block formatting if present
59
+ if content.startswith('```json'):
60
+ content = content[7:] # Remove ```json
61
+ elif content.startswith('```'):
62
+ content = content[3:] # Remove ```
63
+
64
+ if content.endswith('```'):
65
+ content = content[:-3] # Remove trailing ```
66
+
67
+ content = content.strip()
68
+
69
+ parsed_response = json.loads(content)
70
+ return Reflection.from_dict(parsed_response)
71
+ except json.JSONDecodeError as e:
72
+ logger.error(f"Failed to parse reflection response: {e}")
73
+ logger.error(f"Raw response: {response.message.content}")
74
+ return await self.reflect_on_episodic_memory(episodic_memory=episodic_memory, goal=goal)
75
+
76
+ def _create_screenshots_grid(self, episodic_memory: EpisodicMemory) -> Optional[bytes]:
77
+ """Create a 3x2 grid of screenshots from episodic memory steps."""
78
+ # Extract screenshots from steps
79
+ screenshots = []
80
+ for step in episodic_memory.steps:
81
+ if step.screenshot:
82
+ try:
83
+ # Convert bytes to PIL Image
84
+ screenshot_image = Image.open(io.BytesIO(step.screenshot))
85
+ screenshots.append(screenshot_image)
86
+ except Exception as e:
87
+ logger.warning(f"Failed to load screenshot: {e}")
88
+ continue
89
+
90
+ if not screenshots:
91
+ return None
92
+
93
+ num_screenshots = min(len(screenshots), 6)
94
+ cols, rows = num_screenshots, 1
95
+
96
+ screenshots = screenshots[:num_screenshots]
97
+
98
+ if not screenshots:
99
+ return None
100
+
101
+ if screenshots:
102
+ cell_width = screenshots[0].width // 2
103
+ cell_height = screenshots[0].height // 2
104
+ else:
105
+ return None
106
+
107
+ # Define header bar height
108
+ header_height = 60
109
+
110
+ # Create the grid image with space for header bars
111
+ grid_width = cols * cell_width
112
+ grid_height = rows * (cell_height + header_height)
113
+ grid_image = Image.new('RGB', (grid_width, grid_height), color='white')
114
+
115
+ # Set up font for step text
116
+ draw = ImageDraw.Draw(grid_image)
117
+ try:
118
+ # Use larger font for header text
119
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 48)
120
+ except:
121
+ font = ImageFont.load_default()
122
+
123
+ # Place screenshots in the grid with header bars
124
+ for i, screenshot in enumerate(screenshots):
125
+ row = i // cols
126
+ col = i % cols
127
+
128
+ # Calculate positions
129
+ x = col * cell_width
130
+ header_y = row * (cell_height + header_height)
131
+ screenshot_y = header_y + header_height
132
+
133
+ # Create header bar
134
+ header_rect = [x, header_y, x + cell_width, header_y + header_height]
135
+ draw.rectangle(header_rect, fill='#2c3e50') # Dark blue header
136
+
137
+ # Draw step text in header bar
138
+ text = f"Step {i+1}"
139
+ # Get text dimensions for centering
140
+ bbox = draw.textbbox((0, 0), text, font=font)
141
+ text_width = bbox[2] - bbox[0]
142
+ text_height = bbox[3] - bbox[1]
143
+
144
+ # Center text in header bar
145
+ text_x = x + (cell_width - text_width) // 2
146
+ text_y = header_y + (header_height - text_height) // 2
147
+
148
+ draw.text((text_x, text_y), text, fill='white', font=font)
149
+
150
+ # Resize and place screenshot below header
151
+ resized_screenshot = screenshot.resize((cell_width, cell_height), Image.Resampling.LANCZOS)
152
+ grid_image.paste(resized_screenshot, (x, screenshot_y))
153
+
154
+ # Save grid to disk for debugging (only if debug flag is enabled)
155
+ if self.debug:
156
+ import os
157
+ from datetime import datetime
158
+
159
+ # Create debug directory if it doesn't exist
160
+ debug_dir = "reflection_screenshots"
161
+ os.makedirs(debug_dir, exist_ok=True)
162
+
163
+ # Save with timestamp
164
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
165
+ debug_filename = os.path.join(debug_dir, f"screenshot_grid_{timestamp}.png")
166
+ grid_image.save(debug_filename)
167
+ logger.info(f"Screenshot grid saved to: {debug_filename}")
168
+
169
+ # Convert to bytes for use with add_screenshot_image_block
170
+ buffer = io.BytesIO()
171
+ grid_image.save(buffer, format='PNG')
172
+ buffer.seek(0)
173
+
174
+ return buffer.getvalue()
175
+
176
+ def _create_system_prompt(self) -> str:
177
+ """Create a system prompt with reflection instructions."""
178
+ system_prompt = """You are a Reflector AI that analyzes the performance of an Android Agent. Your role is to examine episodic memory steps and evaluate whether the agent achieved its goal.
179
+
180
+ EVALUATION PROCESS:
181
+ 1. First, determine if the agent achieved the stated goal based on the episodic memory steps
182
+ 2. If the goal was achieved, acknowledge the success
183
+ 3. If the goal was NOT achieved, analyze what went wrong and provide direct advice
184
+ 4. Use the provided screenshots (if any) to understand the visual context of each step
185
+ The screenshots show a screen the agent saw. It is in chronological order from left to right
186
+
187
+ ANALYSIS AREAS (for failed goals):
188
+ - Missed opportunities or inefficient actions
189
+ - Incorrect tool usage or navigation choices
190
+ - Failure to understand context or user intent
191
+ - Suboptimal decision-making patterns
192
+
193
+ ADVICE GUIDELINES (for failed goals):
194
+ - Address the agent directly using "you" form with present/future focus (e.g., "You need to...", "Look for...", "Focus on...")
195
+ - Provide situational awareness advice that helps with the current state after the failed attempt
196
+ - Give actionable guidance for what to do NOW when retrying the goal, not what went wrong before
197
+ - Consider the current app state and context the agent will face when retrying
198
+ - Focus on the key strategy or approach needed for success in the current situation
199
+ - Keep it concise but precise (1-2 sentences)
200
+
201
+ OUTPUT FORMAT:
202
+ You MUST respond with a valid JSON object in this exact format:
203
+
204
+ {{
205
+ "goal_achieved": true,
206
+ "advice": null,
207
+ "summary": "Brief summary of what happened"
208
+ }}
209
+
210
+ OR
211
+
212
+ {{
213
+ "goal_achieved": false,
214
+ "advice": "Direct advice using 'you' form focused on current situation - what you need to do NOW when retrying",
215
+ "summary": "Brief summary of what happened"
216
+ }}
217
+
218
+ IMPORTANT:
219
+ - If goal_achieved is true, set advice to null
220
+ - If goal_achieved is false, provide direct "you" form advice focused on what to do NOW in the current situation when retrying
221
+ - Advice should be forward-looking and situational, not retrospective about past mistakes
222
+ - Always include a brief summary of the agent's performance
223
+ - Ensure the JSON is valid and parsable
224
+ - ONLY return the JSON object, no additional text or formatting"""
225
+
226
+ return system_prompt
227
+
228
+ def _format_persona(self, persona: AgentPersona) -> str:
229
+ """Format the agent persona information for the user prompt."""
230
+ persona_content = f"""ACTOR AGENT PERSONA:
231
+ - Name: {persona.name}
232
+ - Description: {persona.description}
233
+ - Available Tools: {', '.join(persona.allowed_tools)}
234
+ - Expertise Areas: {', '.join(persona.expertise_areas)}
235
+ - System Prompt: {persona.system_prompt}"""
236
+
237
+ return persona_content
238
+
239
+ def _format_episodic_memory(self, episodic_memory: EpisodicMemory) -> str:
240
+ """Format the episodic memory steps into a readable format for analysis."""
241
+ formatted_steps = []
242
+
243
+ for i, step in enumerate(episodic_memory.steps, 1):
244
+ try:
245
+ # Parse the JSON strings to get the original content without escape characters
246
+ chat_history = json.loads(step.chat_history)
247
+ response = json.loads(step.response)
248
+
249
+
250
+ formatted_step = f"""Step {i}:
251
+ Chat History: {json.dumps(chat_history, indent=2)}
252
+ Response: {json.dumps(response, indent=2)}
253
+ Timestamp: {step.timestamp}
254
+ ---"""
255
+ except json.JSONDecodeError as e:
256
+ # Fallback to original format if JSON parsing fails
257
+ logger.warning(f"Failed to parse JSON for step {i}: {e}")
258
+ formatted_step = f"""Step {i}:
259
+ Chat History: {step.chat_history}
260
+ Response: {step.response}
261
+ Timestamp: {step.timestamp}
262
+ ---"""
263
+ formatted_steps.append(formatted_step)
264
+
265
+ return "\n".join(formatted_steps)
@@ -0,0 +1,13 @@
1
+ from droidrun.agent.planner.planner_agent import PlannerAgent
2
+ from droidrun.agent.planner.prompts import (
3
+ DEFAULT_PLANNER_SYSTEM_PROMPT,
4
+ DEFAULT_PLANNER_USER_PROMPT,
5
+ DEFAULT_PLANNER_TASK_FAILED_PROMPT
6
+ )
7
+
8
+ __all__ = [
9
+ "PlannerAgent",
10
+ "DEFAULT_PLANNER_SYSTEM_PROMPT",
11
+ "DEFAULT_PLANNER_USER_PROMPT",
12
+ "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
13
+ ]
@@ -0,0 +1,16 @@
1
+ from llama_index.core.workflow import Event
2
+ from llama_index.core.base.llms.types import ChatMessage
3
+ from typing import Optional, Any
4
+ from droidrun.agent.context import Task
5
+
6
+ class PlanInputEvent(Event):
7
+ input: list[ChatMessage]
8
+
9
+
10
+ class PlanThinkingEvent(Event):
11
+ thoughts: Optional[str] = None
12
+ code: Optional[str] = None
13
+
14
+
15
+ class PlanCreatedEvent(Event):
16
+ tasks: list[Task]