droidrun 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
droidrun/__init__.py CHANGED
@@ -6,17 +6,21 @@ __version__ = "0.3.0"
6
6
 
7
7
  # Import main classes for easier access
8
8
  from droidrun.agent.utils.llm_picker import load_llm
9
- from droidrun.adb.manager import DeviceManager
10
9
  from droidrun.tools import Tools, AdbTools, IOSTools
11
10
  from droidrun.agent.droid import DroidAgent
12
11
 
12
+ # Import macro functionality
13
+ from droidrun.macro import MacroPlayer, replay_macro_file, replay_macro_folder
14
+
13
15
 
14
16
  # Make main components available at package level
15
17
  __all__ = [
16
18
  "DroidAgent",
17
- "DeviceManager",
18
19
  "load_llm",
19
20
  "Tools",
20
21
  "AdbTools",
21
22
  "IOSTools",
23
+ "MacroPlayer",
24
+ "replay_macro_file",
25
+ "replay_macro_folder",
22
26
  ]
@@ -97,6 +97,7 @@ class CodeActAgent(Workflow):
97
97
  loop=asyncio.get_event_loop(),
98
98
  locals={},
99
99
  tools=self.tool_list,
100
+ tools_instance=tools_instance,
100
101
  globals={"__builtins__": __builtins__},
101
102
  )
102
103
 
@@ -164,20 +165,22 @@ class CodeActAgent(Workflow):
164
165
  chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
165
166
 
166
167
  for context in self.required_context:
167
- if model == "DeepSeek":
168
- logger.warning(
169
- "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
170
- )
171
- elif self.vision == True and context == "screenshot":
172
- screenshot = (await self.tools.take_screenshot())[1]
168
+ if context == "screenshot":
169
+ # if vision is disabled, screenshot should save to trajectory
170
+ screenshot = (self.tools.take_screenshot())[1]
173
171
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
174
172
 
175
173
  await ctx.set("screenshot", screenshot)
176
- chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
174
+ if model == "DeepSeek":
175
+ logger.warning(
176
+ "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
177
+ )
178
+ elif self.vision == True: # if vision is enabled, add screenshot to chat history
179
+ chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
177
180
 
178
181
  if context == "ui_state":
179
182
  try:
180
- state = await self.tools.get_state()
183
+ state = self.tools.get_state()
181
184
  await ctx.set("ui_state", state["a11y_tree"])
182
185
  chat_history = await chat_utils.add_ui_text_block(
183
186
  state["a11y_tree"], chat_history
@@ -189,7 +192,7 @@ class CodeActAgent(Workflow):
189
192
 
190
193
  if context == "packages":
191
194
  chat_history = await chat_utils.add_packages_block(
192
- await self.tools.list_packages(include_system_apps=True),
195
+ self.tools.list_packages(include_system_apps=True),
193
196
  chat_history,
194
197
  )
195
198
 
@@ -242,12 +245,15 @@ class CodeActAgent(Workflow):
242
245
  code = ev.code
243
246
  assert code, "Code cannot be empty."
244
247
  logger.info(f"⚡ Executing action...")
245
- logger.debug(f"Code to execute:\n```python\n{code}\n```")
248
+ logger.info(f"Code to execute:\n```python\n{code}\n```")
246
249
 
247
250
  try:
248
251
  self.code_exec_counter += 1
249
252
  result = await self.executor.execute(ctx, code)
250
- logger.info(f"💡 Code execution successful. Result: {result}")
253
+ logger.info(f"💡 Code execution successful. Result: {result['output']}")
254
+ screenshots = result['screenshots']
255
+ for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
256
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
251
257
 
252
258
  if self.tools.finished == True:
253
259
  logger.debug(" - Task completed.")
@@ -259,7 +265,7 @@ class CodeActAgent(Workflow):
259
265
 
260
266
  self.remembered_info = self.tools.memory
261
267
 
262
- event = TaskExecutionResultEvent(output=str(result))
268
+ event = TaskExecutionResultEvent(output=str(result['output']))
263
269
  ctx.write_event_to_stream(event)
264
270
  return event
265
271
 
@@ -398,13 +404,13 @@ class CodeActAgent(Workflow):
398
404
  ui_state = None
399
405
 
400
406
  try:
401
- _, screenshot_bytes = await self.tools.take_screenshot()
407
+ _, screenshot_bytes = self.tools.take_screenshot()
402
408
  screenshot = screenshot_bytes
403
409
  except Exception as e:
404
410
  logger.warning(f"Failed to capture final screenshot: {e}")
405
411
 
406
412
  try:
407
- (a11y_tree, phone_state) = await self.tools.get_state()
413
+ (a11y_tree, phone_state) = self.tools.get_state()
408
414
  except Exception as e:
409
415
  logger.warning(f"Failed to capture final UI state: {e}")
410
416
 
@@ -1,4 +1,47 @@
1
1
  from llama_index.core.workflow import Event
2
2
 
3
3
  class ScreenshotEvent(Event):
4
- screenshot: bytes
4
+ screenshot: bytes
5
+
6
+ class MacroEvent(Event):
7
+ """Base class for coordinate-based action events"""
8
+ action_type: str
9
+ description: str
10
+
11
+ class TapActionEvent(MacroEvent):
12
+ """Event for tap actions with coordinates"""
13
+ x: int
14
+ y: int
15
+ element_index: int = None
16
+ element_text: str = ""
17
+ element_bounds: str = ""
18
+
19
+ class SwipeActionEvent(MacroEvent):
20
+ """Event for swipe actions with coordinates"""
21
+ start_x: int
22
+ start_y: int
23
+ end_x: int
24
+ end_y: int
25
+ duration_ms: int
26
+
27
+ class DragActionEvent(MacroEvent):
28
+ """Event for drag actions with coordinates"""
29
+ start_x: int
30
+ start_y: int
31
+ end_x: int
32
+ end_y: int
33
+ duration_ms: int
34
+
35
+ class InputTextActionEvent(MacroEvent):
36
+ """Event for text input actions"""
37
+ text: str
38
+
39
+ class KeyPressActionEvent(MacroEvent):
40
+ """Event for key press actions"""
41
+ keycode: int
42
+ key_name: str = ""
43
+
44
+ class StartAppEvent(MacroEvent):
45
+ """"Event for starting an app"""
46
+ package: str
47
+ activity: str = None
@@ -1,9 +1,11 @@
1
1
  from .default import DEFAULT
2
2
  from .ui_expert import UI_EXPERT
3
3
  from .app_starter import APP_STARTER_EXPERT
4
+ from .big_agent import BIG_AGENT
4
5
 
5
6
  __all__ = [
6
7
  'DEFAULT',
7
8
  'UI_EXPERT',
8
9
  'APP_STARTER_EXPERT',
10
+ 'BIG_AGENT',
9
11
  ]
@@ -0,0 +1,96 @@
1
+ from droidrun.agent.context.agent_persona import AgentPersona
2
+ from droidrun.tools import Tools
3
+
4
+ BIG_AGENT = AgentPersona(
5
+ name="Big Agent",
6
+ description="Big Agent. Use this as your Big Agent",
7
+ expertise_areas=[
8
+ "UI navigation", "button interactions", "text input",
9
+ "menu navigation", "form filling", "scrolling", "app launching"
10
+ ],
11
+ allowed_tools=[
12
+ Tools.swipe.__name__,
13
+ Tools.input_text.__name__,
14
+ Tools.press_key.__name__,
15
+ Tools.drag.__name__,
16
+ Tools.tap_by_index.__name__,
17
+ Tools.start_app.__name__,
18
+ Tools.list_packages.__name__,
19
+ Tools.remember.__name__,
20
+ Tools.complete.__name__
21
+ ],
22
+ required_context=[
23
+ "ui_state",
24
+ "screenshot",
25
+ ],
26
+ user_prompt="""
27
+ **Current Request:**
28
+ {goal}
29
+ **Is the precondition met? What is your reasoning and the next step to address this request?**
30
+ Explain your thought process then provide code in ```python ... ``` tags if needed.
31
+ """"",
32
+
33
+ system_prompt="""
34
+ You are a helpful AI assistant that can write and execute Python code to solve problems.
35
+
36
+ You will be given a task to perform. You should output:
37
+ - Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
38
+ - If there is a precondition for the task, you MUST check if it is met.
39
+ - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
40
+ - If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
41
+
42
+
43
+ ## Context:
44
+ The following context is given to you for analysis:
45
+ - **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
46
+ - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
47
+ - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
48
+ - **chat history**: You are also given the history of your actions (if any) from your previous steps.
49
+ - **execution result**: The result of your last Action
50
+ NOTE: you don't have access to these inputs in your tool calling context
51
+
52
+ ## Response Format:
53
+ Example of proper code format:
54
+ **Task Assignment:**
55
+ **Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
56
+
57
+ **(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
58
+
59
+ **(Step 1) Agent Action:**
60
+ ```python
61
+ # First step: Navigate to Wi-Fi settings
62
+ tap_by_index(3)
63
+ ```
64
+
65
+ **(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
66
+
67
+ **(Step 2) Agent Action:**
68
+ ```python
69
+ # Second step: Turn on Wi-Fi to see available networks
70
+ tap_by_index(1)
71
+ ```
72
+
73
+ **(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
74
+
75
+ **(Step 3) Agent Action:**
76
+ ```python
77
+ # Final step: Connect to the target network
78
+ tap_by_index(5)
79
+ complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
80
+ ```
81
+ ```
82
+
83
+ ## Tools:
84
+ In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
85
+ {tool_descriptions}
86
+
87
+
88
+ ## Final Answer Guidelines:
89
+ - When providing a final answer, focus on directly answering the user's question in the response format given
90
+ - Present the results clearly and concisely as if you computed them directly
91
+ - Structure your response like you're directly answering the user's query, not explaining how you solved it
92
+
93
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
94
+ """
95
+
96
+ )
@@ -13,6 +13,7 @@ UI_EXPERT = AgentPersona(
13
13
  Tools.input_text.__name__,
14
14
  Tools.press_key.__name__,
15
15
  Tools.tap_by_index.__name__,
16
+ Tools.drag.__name__,
16
17
  Tools.remember.__name__,
17
18
  Tools.complete.__name__
18
19
  ],
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import List, Dict
2
+ from typing import List, Dict, Optional
3
3
  from dataclasses import dataclass
4
4
  import copy
5
5
 
@@ -11,6 +11,9 @@ class Task:
11
11
  description: str
12
12
  status: str
13
13
  agent_type: str
14
+ # Optional fields to carry success/failure context back to the planner
15
+ message: Optional[str] = None
16
+ failure_reason: Optional[str] = None
14
17
 
15
18
 
16
19
  class TaskManager:
@@ -40,14 +43,16 @@ class TaskManager:
40
43
  def get_task_history(self):
41
44
  return self.task_history
42
45
 
43
- def complete_task(self, task: Task):
46
+ def complete_task(self, task: Task, message: Optional[str] = None):
44
47
  task = copy.deepcopy(task)
45
48
  task.status = self.STATUS_COMPLETED
49
+ task.message = message
46
50
  self.task_history.append(task)
47
51
 
48
- def fail_task(self, task: Task):
52
+ def fail_task(self, task: Task, failure_reason: Optional[str] = None):
49
53
  task = copy.deepcopy(task)
50
54
  task.status = self.STATUS_FAILED
55
+ task.failure_reason = failure_reason
51
56
  self.task_history.append(task)
52
57
 
53
58
  def get_completed_tasks(self) -> list[dict]:
@@ -16,7 +16,7 @@ from droidrun.agent.planner import PlannerAgent
16
16
  from droidrun.agent.context.task_manager import TaskManager
17
17
  from droidrun.agent.utils.trajectory import Trajectory
18
18
  from droidrun.tools import Tools, describe_tools
19
- from droidrun.agent.common.events import ScreenshotEvent
19
+ from droidrun.agent.common.events import ScreenshotEvent, MacroEvent
20
20
  from droidrun.agent.common.default import MockWorkflow
21
21
  from droidrun.agent.context import ContextInjectionManager
22
22
  from droidrun.agent.context.agent_persona import AgentPersona
@@ -68,7 +68,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
68
68
  reflection: bool = False,
69
69
  enable_tracing: bool = False,
70
70
  debug: bool = False,
71
- save_trajectories: bool = False,
71
+ save_trajectories: str = "none",
72
+ excluded_tools: List[str] = None,
72
73
  *args,
73
74
  **kwargs
74
75
  ):
@@ -85,8 +86,13 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
85
86
  reflection: Whether to reflect on steps the CodeActAgent did to give the PlannerAgent advice
86
87
  enable_tracing: Whether to enable Arize Phoenix tracing
87
88
  debug: Whether to enable verbose debug logging
89
+ save_trajectories: Trajectory saving level. Can be:
90
+ - "none" (no saving)
91
+ - "step" (save per step)
92
+ - "action" (save per action)
88
93
  **kwargs: Additional keyword arguments to pass to the agents
89
94
  """
95
+ self.user_id = kwargs.pop("user_id", None)
90
96
  super().__init__(timeout=timeout ,*args,**kwargs)
91
97
  # Configure default logging if not already configured
92
98
  self._configure_default_logging(debug=debug)
@@ -112,18 +118,33 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
112
118
  self.debug = debug
113
119
 
114
120
  self.event_counter = 0
115
- self.save_trajectories = save_trajectories
121
+ # Handle backward compatibility: bool -> str mapping
122
+ if isinstance(save_trajectories, bool):
123
+ self.save_trajectories = "step" if save_trajectories else "none"
124
+ else:
125
+ # Validate string values
126
+ valid_values = ["none", "step", "action"]
127
+ if save_trajectories not in valid_values:
128
+ logger.warning(f"Invalid save_trajectories value: {save_trajectories}. Using 'none' instead.")
129
+ self.save_trajectories = "none"
130
+ else:
131
+ self.save_trajectories = save_trajectories
116
132
 
117
- self.trajectory = Trajectory()
133
+ self.trajectory = Trajectory(goal=goal)
118
134
  self.task_manager = TaskManager()
119
135
  self.task_iter = None
136
+
137
+
120
138
  self.cim = ContextInjectionManager(personas=personas)
121
139
  self.current_episodic_memory = None
122
140
 
123
141
  logger.info("🤖 Initializing DroidAgent...")
142
+ logger.info(f"💾 Trajectory saving level: {self.save_trajectories}")
124
143
 
125
- self.tool_list = describe_tools(tools)
144
+ self.tool_list = describe_tools(tools, excluded_tools)
126
145
  self.tools_instance = tools
146
+
147
+ self.tools_instance.save_trajectories = self.save_trajectories
127
148
 
128
149
 
129
150
  if self.reasoning:
@@ -162,17 +183,18 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
162
183
  enable_tracing=enable_tracing,
163
184
  debug=debug,
164
185
  save_trajectories=save_trajectories,
165
- )
186
+ ),
187
+ self.user_id
166
188
  )
167
189
 
168
190
 
169
191
  logger.info("✅ DroidAgent initialized successfully.")
170
192
 
171
- def run(self) -> WorkflowHandler:
193
+ def run(self, *args, **kwargs) -> WorkflowHandler:
172
194
  """
173
195
  Run the DroidAgent workflow.
174
196
  """
175
- return super().run()
197
+ return super().run(*args, **kwargs)
176
198
 
177
199
  @step
178
200
  async def execute_task(
@@ -232,16 +254,24 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
232
254
  return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=[])
233
255
 
234
256
  @step
235
- async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent:
257
+ async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent | ReasoningLogicEvent:
236
258
  try:
237
259
  task = ev.task
238
260
  if not self.reasoning:
239
261
  return FinalizeEvent(success=ev.success, reason=ev.reason, output=ev.reason, task=[task], tasks=[task], steps=ev.steps)
240
262
 
241
- if self.reflection:
263
+ if self.reflection and ev.success:
242
264
  return ReflectionEvent(task=task)
243
-
244
- return ReasoningLogicEvent()
265
+
266
+ # Reasoning is enabled but reflection is disabled.
267
+ # Success: mark complete and proceed to next step in reasoning loop.
268
+ # Failure: mark failed and trigger planner immediately without advancing to the next queued task.
269
+ if ev.success:
270
+ self.task_manager.complete_task(task, message=ev.reason)
271
+ return ReasoningLogicEvent()
272
+ else:
273
+ self.task_manager.fail_task(task, failure_reason=ev.reason)
274
+ return ReasoningLogicEvent(force_planning=True)
245
275
 
246
276
  except Exception as e:
247
277
  logger.error(f"❌ Error during DroidAgent execution: {e}")
@@ -293,7 +323,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
293
323
  if ev.reflection:
294
324
  handler = planner_agent.run(remembered_info=self.tools_instance.memory, reflection=ev.reflection)
295
325
  else:
296
- if self.task_iter:
326
+ if not ev.force_planning and self.task_iter:
297
327
  try:
298
328
  task = next(self.task_iter)
299
329
  return CodeActExecuteEvent(task=task, reflection=None)
@@ -369,7 +399,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
369
399
  success=ev.success,
370
400
  output=ev.output,
371
401
  steps=ev.steps,
372
- )
402
+ ),
403
+ self.user_id
373
404
  )
374
405
  flush()
375
406
 
@@ -381,7 +412,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
381
412
  "steps": ev.steps,
382
413
  }
383
414
 
384
- if self.trajectory and self.save_trajectories:
415
+ if self.trajectory and self.save_trajectories != "none":
385
416
  self.trajectory.save_trajectory()
386
417
 
387
418
  return StopEvent(result)
@@ -391,13 +422,16 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
391
422
  if isinstance(ev, EpisodicMemoryEvent):
392
423
  self.current_episodic_memory = ev.episodic_memory
393
424
  return
425
+
426
+
394
427
 
395
428
  if not isinstance(ev, StopEvent):
396
429
  ctx.write_event_to_stream(ev)
397
430
 
398
431
  if isinstance(ev, ScreenshotEvent):
399
432
  self.trajectory.screenshots.append(ev.screenshot)
400
-
433
+ elif isinstance(ev, MacroEvent):
434
+ self.trajectory.macro.append(ev)
401
435
  else:
402
436
  self.trajectory.events.append(ev)
403
437
 
@@ -13,6 +13,7 @@ class CodeActResultEvent(Event):
13
13
 
14
14
  class ReasoningLogicEvent(Event):
15
15
  reflection: Optional[Reflection] = None
16
+ force_planning: bool = False
16
17
 
17
18
  class FinalizeEvent(Event):
18
19
  success: bool
@@ -130,13 +130,14 @@ class PlannerAgent(Workflow):
130
130
  self.steps_counter += 1
131
131
  logger.info(f"🧠 Thinking about how to plan the goal...")
132
132
 
133
+ # if vision is disabled, screenshot should save to trajectory
134
+ screenshot = (self.tools_instance.take_screenshot())[1]
135
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
133
136
  if self.vision:
134
- screenshot = (await self.tools_instance.take_screenshot())[1]
135
- ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
136
137
  await ctx.set("screenshot", screenshot)
137
138
 
138
139
  try:
139
- state = await self.tools_instance.get_state()
140
+ state = self.tools_instance.get_state()
140
141
  await ctx.set("ui_state", state["a11y_tree"])
141
142
  await ctx.set("phone_state", state["phone_state"])
142
143
  except Exception as e:
@@ -168,11 +169,15 @@ class PlannerAgent(Workflow):
168
169
  try:
169
170
  result = await self.executer.execute(ctx, code)
170
171
  logger.info(f"📝 Planning complete")
171
- logger.debug(f" - Planning code executed. Result: {result}")
172
+ logger.debug(f" - Planning code executed. Result: {result['output']}")
173
+
174
+ screenshots = result['screenshots']
175
+ for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
176
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
172
177
 
173
178
  await self.chat_memory.aput(
174
179
  ChatMessage(
175
- role="user", content=f"Execution Result:\n```\n{result}\n```"
180
+ role="user", content=f"Execution Result:\n```\n{result['output']}\n```"
176
181
  )
177
182
  )
178
183
 
@@ -241,15 +246,15 @@ wrap your code inside this:
241
246
  logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
242
247
 
243
248
  model = self.llm.class_name()
244
- if model == "DeepSeek":
245
- logger.warning(
246
- "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
247
- )
248
-
249
- elif self.vision == True:
250
- chat_history = await chat_utils.add_screenshot_image_block(
251
- await ctx.get("screenshot"), chat_history
252
- )
249
+ if self.vision == True:
250
+ if model == "DeepSeek":
251
+ logger.warning(
252
+ "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
253
+ )
254
+ else:
255
+ chat_history = await chat_utils.add_screenshot_image_block(
256
+ await ctx.get("screenshot"), chat_history
257
+ )
253
258
 
254
259
 
255
260
 
@@ -208,7 +208,7 @@ async def add_task_history_block(completed_tasks: list[dict], failed_tasks: list
208
208
  all_tasks = completed_tasks + failed_tasks
209
209
 
210
210
  if all_tasks:
211
- task_history += "Task History (chronological order):\n"
211
+ task_history += "### Task Execution History (chronological):\n"
212
212
  for i, task in enumerate(all_tasks, 1):
213
213
  if hasattr(task, 'description'):
214
214
  status_indicator = "[success]" if hasattr(task, 'status') and task.status == "completed" else "[failed]"
@@ -9,6 +9,7 @@ from llama_index.core.workflow import Context
9
9
  import asyncio
10
10
  from asyncio import AbstractEventLoop
11
11
  import threading
12
+ from droidrun.tools.adb import AdbTools
12
13
 
13
14
  logger = logging.getLogger("droidrun")
14
15
 
@@ -29,6 +30,7 @@ class SimpleCodeExecutor:
29
30
  locals: Dict[str, Any] = {},
30
31
  globals: Dict[str, Any] = {},
31
32
  tools={},
33
+ tools_instance=None,
32
34
  use_same_scope: bool = True,
33
35
  ):
34
36
  """
@@ -38,8 +40,11 @@ class SimpleCodeExecutor:
38
40
  locals: Local variables to use in the execution context
39
41
  globals: Global variables to use in the execution context
40
42
  tools: List of tools available for execution
43
+ tools_instance: Original tools instance (e.g., AdbTools instance)
41
44
  """
42
45
 
46
+ self.tools_instance = tools_instance
47
+
43
48
  # loop throught tools and add them to globals, but before that check if tool value is async, if so convert it to sync. tools is a dictionary of tool name: function
44
49
  # e.g. tools = {'tool_name': tool_function}
45
50
 
@@ -74,6 +79,7 @@ class SimpleCodeExecutor:
74
79
  self.locals = locals
75
80
  self.loop = loop
76
81
  self.use_same_scope = use_same_scope
82
+ self.tools = tools
77
83
  if self.use_same_scope:
78
84
  # If using the same scope, set the globals and locals to the same dictionary
79
85
  self.globals = self.locals = {
@@ -93,7 +99,12 @@ class SimpleCodeExecutor:
93
99
  """
94
100
  # Update UI elements before execution
95
101
  self.globals['ui_state'] = await ctx.get("ui_state", None)
102
+ self.globals['step_screenshots'] = []
103
+ self.globals['step_ui_states'] = []
96
104
 
105
+ if self.tools_instance and isinstance(self.tools_instance, AdbTools):
106
+ self.tools_instance._set_context(ctx)
107
+
97
108
  # Capture stdout and stderr
98
109
  stdout = io.StringIO()
99
110
  stderr = io.StringIO()
@@ -129,4 +140,9 @@ class SimpleCodeExecutor:
129
140
  output = f"Error: {type(e).__name__}: {str(e)}\n"
130
141
  output += traceback.format_exc()
131
142
 
132
- return output
143
+ result = {
144
+ 'output': output,
145
+ 'screenshots': self.globals['step_screenshots'],
146
+ 'ui_states': self.globals['step_ui_states']
147
+ }
148
+ return result