droidrun 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +6 -2
- droidrun/agent/codeact/codeact_agent.py +20 -14
- droidrun/agent/common/events.py +44 -1
- droidrun/agent/context/personas/__init__.py +2 -0
- droidrun/agent/context/personas/big_agent.py +96 -0
- droidrun/agent/context/personas/ui_expert.py +1 -0
- droidrun/agent/context/task_manager.py +8 -3
- droidrun/agent/droid/droid_agent.py +50 -16
- droidrun/agent/droid/events.py +1 -0
- droidrun/agent/planner/planner_agent.py +19 -14
- droidrun/agent/utils/chat_utils.py +1 -1
- droidrun/agent/utils/executer.py +17 -1
- droidrun/agent/utils/trajectory.py +258 -11
- droidrun/cli/main.py +108 -44
- droidrun/macro/__init__.py +14 -0
- droidrun/macro/__main__.py +10 -0
- droidrun/macro/cli.py +228 -0
- droidrun/macro/replay.py +309 -0
- droidrun/portal.py +37 -22
- droidrun/telemetry/events.py +1 -1
- droidrun/telemetry/tracker.py +3 -2
- droidrun/tools/adb.py +641 -185
- droidrun/tools/ios.py +163 -163
- droidrun/tools/tools.py +60 -14
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/METADATA +20 -8
- droidrun-0.3.4.dist-info/RECORD +54 -0
- droidrun/adb/__init__.py +0 -13
- droidrun/adb/device.py +0 -345
- droidrun/adb/manager.py +0 -93
- droidrun/adb/wrapper.py +0 -226
- droidrun-0.3.2.dist-info/RECORD +0 -53
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/WHEEL +0 -0
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py
CHANGED
@@ -6,17 +6,21 @@ __version__ = "0.3.0"
|
|
6
6
|
|
7
7
|
# Import main classes for easier access
|
8
8
|
from droidrun.agent.utils.llm_picker import load_llm
|
9
|
-
from droidrun.adb.manager import DeviceManager
|
10
9
|
from droidrun.tools import Tools, AdbTools, IOSTools
|
11
10
|
from droidrun.agent.droid import DroidAgent
|
12
11
|
|
12
|
+
# Import macro functionality
|
13
|
+
from droidrun.macro import MacroPlayer, replay_macro_file, replay_macro_folder
|
14
|
+
|
13
15
|
|
14
16
|
# Make main components available at package level
|
15
17
|
__all__ = [
|
16
18
|
"DroidAgent",
|
17
|
-
"DeviceManager",
|
18
19
|
"load_llm",
|
19
20
|
"Tools",
|
20
21
|
"AdbTools",
|
21
22
|
"IOSTools",
|
23
|
+
"MacroPlayer",
|
24
|
+
"replay_macro_file",
|
25
|
+
"replay_macro_folder",
|
22
26
|
]
|
@@ -97,6 +97,7 @@ class CodeActAgent(Workflow):
|
|
97
97
|
loop=asyncio.get_event_loop(),
|
98
98
|
locals={},
|
99
99
|
tools=self.tool_list,
|
100
|
+
tools_instance=tools_instance,
|
100
101
|
globals={"__builtins__": __builtins__},
|
101
102
|
)
|
102
103
|
|
@@ -164,20 +165,22 @@ class CodeActAgent(Workflow):
|
|
164
165
|
chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
|
165
166
|
|
166
167
|
for context in self.required_context:
|
167
|
-
if
|
168
|
-
|
169
|
-
|
170
|
-
)
|
171
|
-
elif self.vision == True and context == "screenshot":
|
172
|
-
screenshot = (await self.tools.take_screenshot())[1]
|
168
|
+
if context == "screenshot":
|
169
|
+
# if vision is disabled, screenshot should save to trajectory
|
170
|
+
screenshot = (self.tools.take_screenshot())[1]
|
173
171
|
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
174
172
|
|
175
173
|
await ctx.set("screenshot", screenshot)
|
176
|
-
|
174
|
+
if model == "DeepSeek":
|
175
|
+
logger.warning(
|
176
|
+
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
177
|
+
)
|
178
|
+
elif self.vision == True: # if vision is enabled, add screenshot to chat history
|
179
|
+
chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
|
177
180
|
|
178
181
|
if context == "ui_state":
|
179
182
|
try:
|
180
|
-
state =
|
183
|
+
state = self.tools.get_state()
|
181
184
|
await ctx.set("ui_state", state["a11y_tree"])
|
182
185
|
chat_history = await chat_utils.add_ui_text_block(
|
183
186
|
state["a11y_tree"], chat_history
|
@@ -189,7 +192,7 @@ class CodeActAgent(Workflow):
|
|
189
192
|
|
190
193
|
if context == "packages":
|
191
194
|
chat_history = await chat_utils.add_packages_block(
|
192
|
-
|
195
|
+
self.tools.list_packages(include_system_apps=True),
|
193
196
|
chat_history,
|
194
197
|
)
|
195
198
|
|
@@ -242,12 +245,15 @@ class CodeActAgent(Workflow):
|
|
242
245
|
code = ev.code
|
243
246
|
assert code, "Code cannot be empty."
|
244
247
|
logger.info(f"⚡ Executing action...")
|
245
|
-
logger.
|
248
|
+
logger.info(f"Code to execute:\n```python\n{code}\n```")
|
246
249
|
|
247
250
|
try:
|
248
251
|
self.code_exec_counter += 1
|
249
252
|
result = await self.executor.execute(ctx, code)
|
250
|
-
logger.info(f"💡 Code execution successful. Result: {result}")
|
253
|
+
logger.info(f"💡 Code execution successful. Result: {result['output']}")
|
254
|
+
screenshots = result['screenshots']
|
255
|
+
for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
|
256
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
251
257
|
|
252
258
|
if self.tools.finished == True:
|
253
259
|
logger.debug(" - Task completed.")
|
@@ -259,7 +265,7 @@ class CodeActAgent(Workflow):
|
|
259
265
|
|
260
266
|
self.remembered_info = self.tools.memory
|
261
267
|
|
262
|
-
event = TaskExecutionResultEvent(output=str(result))
|
268
|
+
event = TaskExecutionResultEvent(output=str(result['output']))
|
263
269
|
ctx.write_event_to_stream(event)
|
264
270
|
return event
|
265
271
|
|
@@ -398,13 +404,13 @@ class CodeActAgent(Workflow):
|
|
398
404
|
ui_state = None
|
399
405
|
|
400
406
|
try:
|
401
|
-
_, screenshot_bytes =
|
407
|
+
_, screenshot_bytes = self.tools.take_screenshot()
|
402
408
|
screenshot = screenshot_bytes
|
403
409
|
except Exception as e:
|
404
410
|
logger.warning(f"Failed to capture final screenshot: {e}")
|
405
411
|
|
406
412
|
try:
|
407
|
-
(a11y_tree, phone_state) =
|
413
|
+
(a11y_tree, phone_state) = self.tools.get_state()
|
408
414
|
except Exception as e:
|
409
415
|
logger.warning(f"Failed to capture final UI state: {e}")
|
410
416
|
|
droidrun/agent/common/events.py
CHANGED
@@ -1,4 +1,47 @@
|
|
1
1
|
from llama_index.core.workflow import Event
|
2
2
|
|
3
3
|
class ScreenshotEvent(Event):
|
4
|
-
screenshot: bytes
|
4
|
+
screenshot: bytes
|
5
|
+
|
6
|
+
class MacroEvent(Event):
|
7
|
+
"""Base class for coordinate-based action events"""
|
8
|
+
action_type: str
|
9
|
+
description: str
|
10
|
+
|
11
|
+
class TapActionEvent(MacroEvent):
|
12
|
+
"""Event for tap actions with coordinates"""
|
13
|
+
x: int
|
14
|
+
y: int
|
15
|
+
element_index: int = None
|
16
|
+
element_text: str = ""
|
17
|
+
element_bounds: str = ""
|
18
|
+
|
19
|
+
class SwipeActionEvent(MacroEvent):
|
20
|
+
"""Event for swipe actions with coordinates"""
|
21
|
+
start_x: int
|
22
|
+
start_y: int
|
23
|
+
end_x: int
|
24
|
+
end_y: int
|
25
|
+
duration_ms: int
|
26
|
+
|
27
|
+
class DragActionEvent(MacroEvent):
|
28
|
+
"""Event for drag actions with coordinates"""
|
29
|
+
start_x: int
|
30
|
+
start_y: int
|
31
|
+
end_x: int
|
32
|
+
end_y: int
|
33
|
+
duration_ms: int
|
34
|
+
|
35
|
+
class InputTextActionEvent(MacroEvent):
|
36
|
+
"""Event for text input actions"""
|
37
|
+
text: str
|
38
|
+
|
39
|
+
class KeyPressActionEvent(MacroEvent):
|
40
|
+
"""Event for key press actions"""
|
41
|
+
keycode: int
|
42
|
+
key_name: str = ""
|
43
|
+
|
44
|
+
class StartAppEvent(MacroEvent):
|
45
|
+
""""Event for starting an app"""
|
46
|
+
package: str
|
47
|
+
activity: str = None
|
@@ -0,0 +1,96 @@
|
|
1
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
2
|
+
from droidrun.tools import Tools
|
3
|
+
|
4
|
+
BIG_AGENT = AgentPersona(
|
5
|
+
name="Big Agent",
|
6
|
+
description="Big Agent. Use this as your Big Agent",
|
7
|
+
expertise_areas=[
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
|
+
"menu navigation", "form filling", "scrolling", "app launching"
|
10
|
+
],
|
11
|
+
allowed_tools=[
|
12
|
+
Tools.swipe.__name__,
|
13
|
+
Tools.input_text.__name__,
|
14
|
+
Tools.press_key.__name__,
|
15
|
+
Tools.drag.__name__,
|
16
|
+
Tools.tap_by_index.__name__,
|
17
|
+
Tools.start_app.__name__,
|
18
|
+
Tools.list_packages.__name__,
|
19
|
+
Tools.remember.__name__,
|
20
|
+
Tools.complete.__name__
|
21
|
+
],
|
22
|
+
required_context=[
|
23
|
+
"ui_state",
|
24
|
+
"screenshot",
|
25
|
+
],
|
26
|
+
user_prompt="""
|
27
|
+
**Current Request:**
|
28
|
+
{goal}
|
29
|
+
**Is the precondition met? What is your reasoning and the next step to address this request?**
|
30
|
+
Explain your thought process then provide code in ```python ... ``` tags if needed.
|
31
|
+
""""",
|
32
|
+
|
33
|
+
system_prompt="""
|
34
|
+
You are a helpful AI assistant that can write and execute Python code to solve problems.
|
35
|
+
|
36
|
+
You will be given a task to perform. You should output:
|
37
|
+
- Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
|
38
|
+
- If there is a precondition for the task, you MUST check if it is met.
|
39
|
+
- If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
|
40
|
+
- If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
|
41
|
+
|
42
|
+
|
43
|
+
## Context:
|
44
|
+
The following context is given to you for analysis:
|
45
|
+
- **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
|
46
|
+
- **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
|
47
|
+
- **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
|
48
|
+
- **chat history**: You are also given the history of your actions (if any) from your previous steps.
|
49
|
+
- **execution result**: The result of your last Action
|
50
|
+
NOTE: you don't have access to these inputs in your tool calling context
|
51
|
+
|
52
|
+
## Response Format:
|
53
|
+
Example of proper code format:
|
54
|
+
**Task Assignment:**
|
55
|
+
**Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
|
56
|
+
|
57
|
+
**(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
|
58
|
+
|
59
|
+
**(Step 1) Agent Action:**
|
60
|
+
```python
|
61
|
+
# First step: Navigate to Wi-Fi settings
|
62
|
+
tap_by_index(3)
|
63
|
+
```
|
64
|
+
|
65
|
+
**(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
|
66
|
+
|
67
|
+
**(Step 2) Agent Action:**
|
68
|
+
```python
|
69
|
+
# Second step: Turn on Wi-Fi to see available networks
|
70
|
+
tap_by_index(1)
|
71
|
+
```
|
72
|
+
|
73
|
+
**(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
|
74
|
+
|
75
|
+
**(Step 3) Agent Action:**
|
76
|
+
```python
|
77
|
+
# Final step: Connect to the target network
|
78
|
+
tap_by_index(5)
|
79
|
+
complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
|
80
|
+
```
|
81
|
+
```
|
82
|
+
|
83
|
+
## Tools:
|
84
|
+
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
85
|
+
{tool_descriptions}
|
86
|
+
|
87
|
+
|
88
|
+
## Final Answer Guidelines:
|
89
|
+
- When providing a final answer, focus on directly answering the user's question in the response format given
|
90
|
+
- Present the results clearly and concisely as if you computed them directly
|
91
|
+
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
92
|
+
|
93
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
94
|
+
"""
|
95
|
+
|
96
|
+
)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import os
|
2
|
-
from typing import List, Dict
|
2
|
+
from typing import List, Dict, Optional
|
3
3
|
from dataclasses import dataclass
|
4
4
|
import copy
|
5
5
|
|
@@ -11,6 +11,9 @@ class Task:
|
|
11
11
|
description: str
|
12
12
|
status: str
|
13
13
|
agent_type: str
|
14
|
+
# Optional fields to carry success/failure context back to the planner
|
15
|
+
message: Optional[str] = None
|
16
|
+
failure_reason: Optional[str] = None
|
14
17
|
|
15
18
|
|
16
19
|
class TaskManager:
|
@@ -40,14 +43,16 @@ class TaskManager:
|
|
40
43
|
def get_task_history(self):
|
41
44
|
return self.task_history
|
42
45
|
|
43
|
-
def complete_task(self, task: Task):
|
46
|
+
def complete_task(self, task: Task, message: Optional[str] = None):
|
44
47
|
task = copy.deepcopy(task)
|
45
48
|
task.status = self.STATUS_COMPLETED
|
49
|
+
task.message = message
|
46
50
|
self.task_history.append(task)
|
47
51
|
|
48
|
-
def fail_task(self, task: Task):
|
52
|
+
def fail_task(self, task: Task, failure_reason: Optional[str] = None):
|
49
53
|
task = copy.deepcopy(task)
|
50
54
|
task.status = self.STATUS_FAILED
|
55
|
+
task.failure_reason = failure_reason
|
51
56
|
self.task_history.append(task)
|
52
57
|
|
53
58
|
def get_completed_tasks(self) -> list[dict]:
|
@@ -16,7 +16,7 @@ from droidrun.agent.planner import PlannerAgent
|
|
16
16
|
from droidrun.agent.context.task_manager import TaskManager
|
17
17
|
from droidrun.agent.utils.trajectory import Trajectory
|
18
18
|
from droidrun.tools import Tools, describe_tools
|
19
|
-
from droidrun.agent.common.events import ScreenshotEvent
|
19
|
+
from droidrun.agent.common.events import ScreenshotEvent, MacroEvent
|
20
20
|
from droidrun.agent.common.default import MockWorkflow
|
21
21
|
from droidrun.agent.context import ContextInjectionManager
|
22
22
|
from droidrun.agent.context.agent_persona import AgentPersona
|
@@ -68,7 +68,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
68
68
|
reflection: bool = False,
|
69
69
|
enable_tracing: bool = False,
|
70
70
|
debug: bool = False,
|
71
|
-
save_trajectories:
|
71
|
+
save_trajectories: str = "none",
|
72
|
+
excluded_tools: List[str] = None,
|
72
73
|
*args,
|
73
74
|
**kwargs
|
74
75
|
):
|
@@ -85,8 +86,13 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
85
86
|
reflection: Whether to reflect on steps the CodeActAgent did to give the PlannerAgent advice
|
86
87
|
enable_tracing: Whether to enable Arize Phoenix tracing
|
87
88
|
debug: Whether to enable verbose debug logging
|
89
|
+
save_trajectories: Trajectory saving level. Can be:
|
90
|
+
- "none" (no saving)
|
91
|
+
- "step" (save per step)
|
92
|
+
- "action" (save per action)
|
88
93
|
**kwargs: Additional keyword arguments to pass to the agents
|
89
94
|
"""
|
95
|
+
self.user_id = kwargs.pop("user_id", None)
|
90
96
|
super().__init__(timeout=timeout ,*args,**kwargs)
|
91
97
|
# Configure default logging if not already configured
|
92
98
|
self._configure_default_logging(debug=debug)
|
@@ -112,18 +118,33 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
112
118
|
self.debug = debug
|
113
119
|
|
114
120
|
self.event_counter = 0
|
115
|
-
|
121
|
+
# Handle backward compatibility: bool -> str mapping
|
122
|
+
if isinstance(save_trajectories, bool):
|
123
|
+
self.save_trajectories = "step" if save_trajectories else "none"
|
124
|
+
else:
|
125
|
+
# Validate string values
|
126
|
+
valid_values = ["none", "step", "action"]
|
127
|
+
if save_trajectories not in valid_values:
|
128
|
+
logger.warning(f"Invalid save_trajectories value: {save_trajectories}. Using 'none' instead.")
|
129
|
+
self.save_trajectories = "none"
|
130
|
+
else:
|
131
|
+
self.save_trajectories = save_trajectories
|
116
132
|
|
117
|
-
self.trajectory = Trajectory()
|
133
|
+
self.trajectory = Trajectory(goal=goal)
|
118
134
|
self.task_manager = TaskManager()
|
119
135
|
self.task_iter = None
|
136
|
+
|
137
|
+
|
120
138
|
self.cim = ContextInjectionManager(personas=personas)
|
121
139
|
self.current_episodic_memory = None
|
122
140
|
|
123
141
|
logger.info("🤖 Initializing DroidAgent...")
|
142
|
+
logger.info(f"💾 Trajectory saving level: {self.save_trajectories}")
|
124
143
|
|
125
|
-
self.tool_list = describe_tools(tools)
|
144
|
+
self.tool_list = describe_tools(tools, excluded_tools)
|
126
145
|
self.tools_instance = tools
|
146
|
+
|
147
|
+
self.tools_instance.save_trajectories = self.save_trajectories
|
127
148
|
|
128
149
|
|
129
150
|
if self.reasoning:
|
@@ -162,17 +183,18 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
162
183
|
enable_tracing=enable_tracing,
|
163
184
|
debug=debug,
|
164
185
|
save_trajectories=save_trajectories,
|
165
|
-
)
|
186
|
+
),
|
187
|
+
self.user_id
|
166
188
|
)
|
167
189
|
|
168
190
|
|
169
191
|
logger.info("✅ DroidAgent initialized successfully.")
|
170
192
|
|
171
|
-
def run(self) -> WorkflowHandler:
|
193
|
+
def run(self, *args, **kwargs) -> WorkflowHandler:
|
172
194
|
"""
|
173
195
|
Run the DroidAgent workflow.
|
174
196
|
"""
|
175
|
-
return super().run()
|
197
|
+
return super().run(*args, **kwargs)
|
176
198
|
|
177
199
|
@step
|
178
200
|
async def execute_task(
|
@@ -232,16 +254,24 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
232
254
|
return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=[])
|
233
255
|
|
234
256
|
@step
|
235
|
-
async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent:
|
257
|
+
async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent | ReasoningLogicEvent:
|
236
258
|
try:
|
237
259
|
task = ev.task
|
238
260
|
if not self.reasoning:
|
239
261
|
return FinalizeEvent(success=ev.success, reason=ev.reason, output=ev.reason, task=[task], tasks=[task], steps=ev.steps)
|
240
262
|
|
241
|
-
if self.reflection:
|
263
|
+
if self.reflection and ev.success:
|
242
264
|
return ReflectionEvent(task=task)
|
243
|
-
|
244
|
-
|
265
|
+
|
266
|
+
# Reasoning is enabled but reflection is disabled.
|
267
|
+
# Success: mark complete and proceed to next step in reasoning loop.
|
268
|
+
# Failure: mark failed and trigger planner immediately without advancing to the next queued task.
|
269
|
+
if ev.success:
|
270
|
+
self.task_manager.complete_task(task, message=ev.reason)
|
271
|
+
return ReasoningLogicEvent()
|
272
|
+
else:
|
273
|
+
self.task_manager.fail_task(task, failure_reason=ev.reason)
|
274
|
+
return ReasoningLogicEvent(force_planning=True)
|
245
275
|
|
246
276
|
except Exception as e:
|
247
277
|
logger.error(f"❌ Error during DroidAgent execution: {e}")
|
@@ -293,7 +323,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
293
323
|
if ev.reflection:
|
294
324
|
handler = planner_agent.run(remembered_info=self.tools_instance.memory, reflection=ev.reflection)
|
295
325
|
else:
|
296
|
-
if self.task_iter:
|
326
|
+
if not ev.force_planning and self.task_iter:
|
297
327
|
try:
|
298
328
|
task = next(self.task_iter)
|
299
329
|
return CodeActExecuteEvent(task=task, reflection=None)
|
@@ -369,7 +399,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
369
399
|
success=ev.success,
|
370
400
|
output=ev.output,
|
371
401
|
steps=ev.steps,
|
372
|
-
)
|
402
|
+
),
|
403
|
+
self.user_id
|
373
404
|
)
|
374
405
|
flush()
|
375
406
|
|
@@ -381,7 +412,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
381
412
|
"steps": ev.steps,
|
382
413
|
}
|
383
414
|
|
384
|
-
if self.trajectory and self.save_trajectories:
|
415
|
+
if self.trajectory and self.save_trajectories != "none":
|
385
416
|
self.trajectory.save_trajectory()
|
386
417
|
|
387
418
|
return StopEvent(result)
|
@@ -391,13 +422,16 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
391
422
|
if isinstance(ev, EpisodicMemoryEvent):
|
392
423
|
self.current_episodic_memory = ev.episodic_memory
|
393
424
|
return
|
425
|
+
|
426
|
+
|
394
427
|
|
395
428
|
if not isinstance(ev, StopEvent):
|
396
429
|
ctx.write_event_to_stream(ev)
|
397
430
|
|
398
431
|
if isinstance(ev, ScreenshotEvent):
|
399
432
|
self.trajectory.screenshots.append(ev.screenshot)
|
400
|
-
|
433
|
+
elif isinstance(ev, MacroEvent):
|
434
|
+
self.trajectory.macro.append(ev)
|
401
435
|
else:
|
402
436
|
self.trajectory.events.append(ev)
|
403
437
|
|
droidrun/agent/droid/events.py
CHANGED
@@ -130,13 +130,14 @@ class PlannerAgent(Workflow):
|
|
130
130
|
self.steps_counter += 1
|
131
131
|
logger.info(f"🧠 Thinking about how to plan the goal...")
|
132
132
|
|
133
|
+
# if vision is disabled, screenshot should save to trajectory
|
134
|
+
screenshot = (self.tools_instance.take_screenshot())[1]
|
135
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
133
136
|
if self.vision:
|
134
|
-
screenshot = (await self.tools_instance.take_screenshot())[1]
|
135
|
-
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
136
137
|
await ctx.set("screenshot", screenshot)
|
137
138
|
|
138
139
|
try:
|
139
|
-
state =
|
140
|
+
state = self.tools_instance.get_state()
|
140
141
|
await ctx.set("ui_state", state["a11y_tree"])
|
141
142
|
await ctx.set("phone_state", state["phone_state"])
|
142
143
|
except Exception as e:
|
@@ -168,11 +169,15 @@ class PlannerAgent(Workflow):
|
|
168
169
|
try:
|
169
170
|
result = await self.executer.execute(ctx, code)
|
170
171
|
logger.info(f"📝 Planning complete")
|
171
|
-
logger.debug(f" - Planning code executed. Result: {result}")
|
172
|
+
logger.debug(f" - Planning code executed. Result: {result['output']}")
|
173
|
+
|
174
|
+
screenshots = result['screenshots']
|
175
|
+
for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
|
176
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
172
177
|
|
173
178
|
await self.chat_memory.aput(
|
174
179
|
ChatMessage(
|
175
|
-
role="user", content=f"Execution Result:\n```\n{result}\n```"
|
180
|
+
role="user", content=f"Execution Result:\n```\n{result['output']}\n```"
|
176
181
|
)
|
177
182
|
)
|
178
183
|
|
@@ -241,15 +246,15 @@ wrap your code inside this:
|
|
241
246
|
logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
|
242
247
|
|
243
248
|
model = self.llm.class_name()
|
244
|
-
if
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
249
|
+
if self.vision == True:
|
250
|
+
if model == "DeepSeek":
|
251
|
+
logger.warning(
|
252
|
+
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
253
|
+
)
|
254
|
+
else:
|
255
|
+
chat_history = await chat_utils.add_screenshot_image_block(
|
256
|
+
await ctx.get("screenshot"), chat_history
|
257
|
+
)
|
253
258
|
|
254
259
|
|
255
260
|
|
@@ -208,7 +208,7 @@ async def add_task_history_block(completed_tasks: list[dict], failed_tasks: list
|
|
208
208
|
all_tasks = completed_tasks + failed_tasks
|
209
209
|
|
210
210
|
if all_tasks:
|
211
|
-
task_history += "Task History (chronological
|
211
|
+
task_history += "### Task Execution History (chronological):\n"
|
212
212
|
for i, task in enumerate(all_tasks, 1):
|
213
213
|
if hasattr(task, 'description'):
|
214
214
|
status_indicator = "[success]" if hasattr(task, 'status') and task.status == "completed" else "[failed]"
|
droidrun/agent/utils/executer.py
CHANGED
@@ -9,6 +9,7 @@ from llama_index.core.workflow import Context
|
|
9
9
|
import asyncio
|
10
10
|
from asyncio import AbstractEventLoop
|
11
11
|
import threading
|
12
|
+
from droidrun.tools.adb import AdbTools
|
12
13
|
|
13
14
|
logger = logging.getLogger("droidrun")
|
14
15
|
|
@@ -29,6 +30,7 @@ class SimpleCodeExecutor:
|
|
29
30
|
locals: Dict[str, Any] = {},
|
30
31
|
globals: Dict[str, Any] = {},
|
31
32
|
tools={},
|
33
|
+
tools_instance=None,
|
32
34
|
use_same_scope: bool = True,
|
33
35
|
):
|
34
36
|
"""
|
@@ -38,8 +40,11 @@ class SimpleCodeExecutor:
|
|
38
40
|
locals: Local variables to use in the execution context
|
39
41
|
globals: Global variables to use in the execution context
|
40
42
|
tools: List of tools available for execution
|
43
|
+
tools_instance: Original tools instance (e.g., AdbTools instance)
|
41
44
|
"""
|
42
45
|
|
46
|
+
self.tools_instance = tools_instance
|
47
|
+
|
43
48
|
# loop throught tools and add them to globals, but before that check if tool value is async, if so convert it to sync. tools is a dictionary of tool name: function
|
44
49
|
# e.g. tools = {'tool_name': tool_function}
|
45
50
|
|
@@ -74,6 +79,7 @@ class SimpleCodeExecutor:
|
|
74
79
|
self.locals = locals
|
75
80
|
self.loop = loop
|
76
81
|
self.use_same_scope = use_same_scope
|
82
|
+
self.tools = tools
|
77
83
|
if self.use_same_scope:
|
78
84
|
# If using the same scope, set the globals and locals to the same dictionary
|
79
85
|
self.globals = self.locals = {
|
@@ -93,7 +99,12 @@ class SimpleCodeExecutor:
|
|
93
99
|
"""
|
94
100
|
# Update UI elements before execution
|
95
101
|
self.globals['ui_state'] = await ctx.get("ui_state", None)
|
102
|
+
self.globals['step_screenshots'] = []
|
103
|
+
self.globals['step_ui_states'] = []
|
96
104
|
|
105
|
+
if self.tools_instance and isinstance(self.tools_instance, AdbTools):
|
106
|
+
self.tools_instance._set_context(ctx)
|
107
|
+
|
97
108
|
# Capture stdout and stderr
|
98
109
|
stdout = io.StringIO()
|
99
110
|
stderr = io.StringIO()
|
@@ -129,4 +140,9 @@ class SimpleCodeExecutor:
|
|
129
140
|
output = f"Error: {type(e).__name__}: {str(e)}\n"
|
130
141
|
output += traceback.format_exc()
|
131
142
|
|
132
|
-
|
143
|
+
result = {
|
144
|
+
'output': output,
|
145
|
+
'screenshots': self.globals['step_screenshots'],
|
146
|
+
'ui_states': self.globals['step_ui_states']
|
147
|
+
}
|
148
|
+
return result
|