droidrun 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +7 -12
- droidrun/agent/codeact/codeact_agent.py +9 -7
- droidrun/agent/common/events.py +44 -1
- droidrun/agent/context/personas/__init__.py +2 -2
- droidrun/agent/context/personas/big_agent.py +96 -0
- droidrun/agent/context/personas/ui_expert.py +1 -0
- droidrun/agent/droid/droid_agent.py +63 -11
- droidrun/agent/droid/events.py +4 -0
- droidrun/agent/planner/planner_agent.py +2 -2
- droidrun/agent/utils/executer.py +10 -2
- droidrun/agent/utils/llm_picker.py +1 -0
- droidrun/agent/utils/trajectory.py +258 -11
- droidrun/cli/main.py +179 -86
- droidrun/macro/__init__.py +14 -0
- droidrun/macro/__main__.py +10 -0
- droidrun/macro/cli.py +228 -0
- droidrun/macro/replay.py +309 -0
- droidrun/portal.py +138 -0
- droidrun/telemetry/__init__.py +4 -0
- droidrun/telemetry/events.py +27 -0
- droidrun/telemetry/tracker.py +84 -0
- droidrun/tools/adb.py +704 -372
- droidrun/tools/ios.py +169 -166
- droidrun/tools/tools.py +70 -17
- {droidrun-0.3.1.dist-info → droidrun-0.3.3.dist-info}/METADATA +31 -29
- droidrun-0.3.3.dist-info/RECORD +54 -0
- droidrun/adb/__init__.py +0 -13
- droidrun/adb/device.py +0 -315
- droidrun/adb/manager.py +0 -93
- droidrun/adb/wrapper.py +0 -226
- droidrun/agent/context/personas/extractor.py +0 -52
- droidrun-0.3.1.dist-info/RECORD +0 -50
- {droidrun-0.3.1.dist-info → droidrun-0.3.3.dist-info}/WHEEL +0 -0
- {droidrun-0.3.1.dist-info → droidrun-0.3.3.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.1.dist-info → droidrun-0.3.3.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py
CHANGED
@@ -5,27 +5,22 @@ DroidRun - A framework for controlling Android devices through LLM agents.
|
|
5
5
|
__version__ = "0.3.0"
|
6
6
|
|
7
7
|
# Import main classes for easier access
|
8
|
-
from droidrun.agent.codeact.codeact_agent import CodeActAgent
|
9
|
-
from droidrun.agent.planner.planner_agent import PlannerAgent
|
10
|
-
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
11
8
|
from droidrun.agent.utils.llm_picker import load_llm
|
12
|
-
from droidrun.
|
13
|
-
from droidrun.tools.tools import Tools
|
14
|
-
from droidrun.tools.adb import AdbTools
|
15
|
-
from droidrun.tools.ios import IOSTools
|
9
|
+
from droidrun.tools import Tools, AdbTools, IOSTools
|
16
10
|
from droidrun.agent.droid import DroidAgent
|
17
11
|
|
12
|
+
# Import macro functionality
|
13
|
+
from droidrun.macro import MacroPlayer, replay_macro_file, replay_macro_folder
|
14
|
+
|
18
15
|
|
19
16
|
# Make main components available at package level
|
20
17
|
__all__ = [
|
21
18
|
"DroidAgent",
|
22
|
-
"CodeActAgent",
|
23
|
-
"PlannerAgent",
|
24
|
-
"DeviceManager",
|
25
|
-
"Tools",
|
26
19
|
"load_llm",
|
27
|
-
"SimpleCodeExecutor",
|
28
20
|
"Tools",
|
29
21
|
"AdbTools",
|
30
22
|
"IOSTools",
|
23
|
+
"MacroPlayer",
|
24
|
+
"replay_macro_file",
|
25
|
+
"replay_macro_folder",
|
31
26
|
]
|
@@ -97,6 +97,7 @@ class CodeActAgent(Workflow):
|
|
97
97
|
loop=asyncio.get_event_loop(),
|
98
98
|
locals={},
|
99
99
|
tools=self.tool_list,
|
100
|
+
tools_instance=tools_instance,
|
100
101
|
globals={"__builtins__": __builtins__},
|
101
102
|
)
|
102
103
|
|
@@ -169,7 +170,7 @@ class CodeActAgent(Workflow):
|
|
169
170
|
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
170
171
|
)
|
171
172
|
elif self.vision == True and context == "screenshot":
|
172
|
-
screenshot = (
|
173
|
+
screenshot = (self.tools.take_screenshot())[1]
|
173
174
|
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
174
175
|
|
175
176
|
await ctx.set("screenshot", screenshot)
|
@@ -177,7 +178,7 @@ class CodeActAgent(Workflow):
|
|
177
178
|
|
178
179
|
if context == "ui_state":
|
179
180
|
try:
|
180
|
-
state =
|
181
|
+
state = self.tools.get_state()
|
181
182
|
await ctx.set("ui_state", state["a11y_tree"])
|
182
183
|
chat_history = await chat_utils.add_ui_text_block(
|
183
184
|
state["a11y_tree"], chat_history
|
@@ -189,7 +190,7 @@ class CodeActAgent(Workflow):
|
|
189
190
|
|
190
191
|
if context == "packages":
|
191
192
|
chat_history = await chat_utils.add_packages_block(
|
192
|
-
|
193
|
+
self.tools.list_packages(include_system_apps=True),
|
193
194
|
chat_history,
|
194
195
|
)
|
195
196
|
|
@@ -242,7 +243,7 @@ class CodeActAgent(Workflow):
|
|
242
243
|
code = ev.code
|
243
244
|
assert code, "Code cannot be empty."
|
244
245
|
logger.info(f"⚡ Executing action...")
|
245
|
-
logger.
|
246
|
+
logger.info(f"Code to execute:\n```python\n{code}\n```")
|
246
247
|
|
247
248
|
try:
|
248
249
|
self.code_exec_counter += 1
|
@@ -312,6 +313,7 @@ class CodeActAgent(Workflow):
|
|
312
313
|
{
|
313
314
|
"success": ev.success,
|
314
315
|
"reason": ev.reason,
|
316
|
+
"output": ev.reason,
|
315
317
|
"codeact_steps": self.steps_counter,
|
316
318
|
"code_executions": self.code_exec_counter,
|
317
319
|
}
|
@@ -321,7 +323,7 @@ class CodeActAgent(Workflow):
|
|
321
323
|
EpisodicMemoryEvent(episodic_memory=self.episodic_memory)
|
322
324
|
)
|
323
325
|
|
324
|
-
return StopEvent(result
|
326
|
+
return StopEvent(result)
|
325
327
|
|
326
328
|
async def _get_llm_response(
|
327
329
|
self, ctx: Context, chat_history: List[ChatMessage]
|
@@ -397,13 +399,13 @@ class CodeActAgent(Workflow):
|
|
397
399
|
ui_state = None
|
398
400
|
|
399
401
|
try:
|
400
|
-
_, screenshot_bytes =
|
402
|
+
_, screenshot_bytes = self.tools.take_screenshot()
|
401
403
|
screenshot = screenshot_bytes
|
402
404
|
except Exception as e:
|
403
405
|
logger.warning(f"Failed to capture final screenshot: {e}")
|
404
406
|
|
405
407
|
try:
|
406
|
-
(a11y_tree, phone_state) =
|
408
|
+
(a11y_tree, phone_state) = self.tools.get_state()
|
407
409
|
except Exception as e:
|
408
410
|
logger.warning(f"Failed to capture final UI state: {e}")
|
409
411
|
|
droidrun/agent/common/events.py
CHANGED
@@ -1,4 +1,47 @@
|
|
1
1
|
from llama_index.core.workflow import Event
|
2
2
|
|
3
3
|
class ScreenshotEvent(Event):
|
4
|
-
screenshot: bytes
|
4
|
+
screenshot: bytes
|
5
|
+
|
6
|
+
class MacroEvent(Event):
|
7
|
+
"""Base class for coordinate-based action events"""
|
8
|
+
action_type: str
|
9
|
+
description: str
|
10
|
+
|
11
|
+
class TapActionEvent(MacroEvent):
|
12
|
+
"""Event for tap actions with coordinates"""
|
13
|
+
x: int
|
14
|
+
y: int
|
15
|
+
element_index: int = None
|
16
|
+
element_text: str = ""
|
17
|
+
element_bounds: str = ""
|
18
|
+
|
19
|
+
class SwipeActionEvent(MacroEvent):
|
20
|
+
"""Event for swipe actions with coordinates"""
|
21
|
+
start_x: int
|
22
|
+
start_y: int
|
23
|
+
end_x: int
|
24
|
+
end_y: int
|
25
|
+
duration_ms: int
|
26
|
+
|
27
|
+
class DragActionEvent(MacroEvent):
|
28
|
+
"""Event for drag actions with coordinates"""
|
29
|
+
start_x: int
|
30
|
+
start_y: int
|
31
|
+
end_x: int
|
32
|
+
end_y: int
|
33
|
+
duration_ms: int
|
34
|
+
|
35
|
+
class InputTextActionEvent(MacroEvent):
|
36
|
+
"""Event for text input actions"""
|
37
|
+
text: str
|
38
|
+
|
39
|
+
class KeyPressActionEvent(MacroEvent):
|
40
|
+
"""Event for key press actions"""
|
41
|
+
keycode: int
|
42
|
+
key_name: str = ""
|
43
|
+
|
44
|
+
class StartAppEvent(MacroEvent):
|
45
|
+
""""Event for starting an app"""
|
46
|
+
package: str
|
47
|
+
activity: str = None
|
@@ -1,11 +1,11 @@
|
|
1
1
|
from .default import DEFAULT
|
2
2
|
from .ui_expert import UI_EXPERT
|
3
3
|
from .app_starter import APP_STARTER_EXPERT
|
4
|
-
from .
|
4
|
+
from .big_agent import BIG_AGENT
|
5
5
|
|
6
6
|
__all__ = [
|
7
7
|
'DEFAULT',
|
8
8
|
'UI_EXPERT',
|
9
9
|
'APP_STARTER_EXPERT',
|
10
|
-
'
|
10
|
+
'BIG_AGENT',
|
11
11
|
]
|
@@ -0,0 +1,96 @@
|
|
1
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
2
|
+
from droidrun.tools import Tools
|
3
|
+
|
4
|
+
BIG_AGENT = AgentPersona(
|
5
|
+
name="Big Agent",
|
6
|
+
description="Big Agent. Use this as your Big Agent",
|
7
|
+
expertise_areas=[
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
|
+
"menu navigation", "form filling", "scrolling", "app launching"
|
10
|
+
],
|
11
|
+
allowed_tools=[
|
12
|
+
Tools.swipe.__name__,
|
13
|
+
Tools.input_text.__name__,
|
14
|
+
Tools.press_key.__name__,
|
15
|
+
Tools.drag.__name__,
|
16
|
+
Tools.tap_by_index.__name__,
|
17
|
+
Tools.start_app.__name__,
|
18
|
+
Tools.list_packages.__name__,
|
19
|
+
Tools.remember.__name__,
|
20
|
+
Tools.complete.__name__
|
21
|
+
],
|
22
|
+
required_context=[
|
23
|
+
"ui_state",
|
24
|
+
"screenshot",
|
25
|
+
],
|
26
|
+
user_prompt="""
|
27
|
+
**Current Request:**
|
28
|
+
{goal}
|
29
|
+
**Is the precondition met? What is your reasoning and the next step to address this request?**
|
30
|
+
Explain your thought process then provide code in ```python ... ``` tags if needed.
|
31
|
+
""""",
|
32
|
+
|
33
|
+
system_prompt="""
|
34
|
+
You are a helpful AI assistant that can write and execute Python code to solve problems.
|
35
|
+
|
36
|
+
You will be given a task to perform. You should output:
|
37
|
+
- Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
|
38
|
+
- If there is a precondition for the task, you MUST check if it is met.
|
39
|
+
- If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
|
40
|
+
- If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
|
41
|
+
|
42
|
+
|
43
|
+
## Context:
|
44
|
+
The following context is given to you for analysis:
|
45
|
+
- **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
|
46
|
+
- **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
|
47
|
+
- **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
|
48
|
+
- **chat history**: You are also given the history of your actions (if any) from your previous steps.
|
49
|
+
- **execution result**: The result of your last Action
|
50
|
+
NOTE: you don't have access to these inputs in your tool calling context
|
51
|
+
|
52
|
+
## Response Format:
|
53
|
+
Example of proper code format:
|
54
|
+
**Task Assignment:**
|
55
|
+
**Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
|
56
|
+
|
57
|
+
**(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
|
58
|
+
|
59
|
+
**(Step 1) Agent Action:**
|
60
|
+
```python
|
61
|
+
# First step: Navigate to Wi-Fi settings
|
62
|
+
tap_by_index(3)
|
63
|
+
```
|
64
|
+
|
65
|
+
**(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
|
66
|
+
|
67
|
+
**(Step 2) Agent Action:**
|
68
|
+
```python
|
69
|
+
# Second step: Turn on Wi-Fi to see available networks
|
70
|
+
tap_by_index(1)
|
71
|
+
```
|
72
|
+
|
73
|
+
**(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
|
74
|
+
|
75
|
+
**(Step 3) Agent Action:**
|
76
|
+
```python
|
77
|
+
# Final step: Connect to the target network
|
78
|
+
tap_by_index(5)
|
79
|
+
complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
|
80
|
+
```
|
81
|
+
```
|
82
|
+
|
83
|
+
## Tools:
|
84
|
+
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
85
|
+
{tool_descriptions}
|
86
|
+
|
87
|
+
|
88
|
+
## Final Answer Guidelines:
|
89
|
+
- When providing a final answer, focus on directly answering the user's question in the response format given
|
90
|
+
- Present the results clearly and concisely as if you computed them directly
|
91
|
+
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
92
|
+
|
93
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
94
|
+
"""
|
95
|
+
|
96
|
+
)
|
@@ -8,6 +8,7 @@ from typing import List
|
|
8
8
|
|
9
9
|
from llama_index.core.llms.llm import LLM
|
10
10
|
from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
|
11
|
+
from llama_index.core.workflow.handler import WorkflowHandler
|
11
12
|
from droidrun.agent.droid.events import *
|
12
13
|
from droidrun.agent.codeact import CodeActAgent
|
13
14
|
from droidrun.agent.codeact.events import EpisodicMemoryEvent
|
@@ -15,12 +16,13 @@ from droidrun.agent.planner import PlannerAgent
|
|
15
16
|
from droidrun.agent.context.task_manager import TaskManager
|
16
17
|
from droidrun.agent.utils.trajectory import Trajectory
|
17
18
|
from droidrun.tools import Tools, describe_tools
|
18
|
-
from droidrun.agent.common.events import ScreenshotEvent
|
19
|
+
from droidrun.agent.common.events import ScreenshotEvent, MacroEvent
|
19
20
|
from droidrun.agent.common.default import MockWorkflow
|
20
21
|
from droidrun.agent.context import ContextInjectionManager
|
21
22
|
from droidrun.agent.context.agent_persona import AgentPersona
|
22
23
|
from droidrun.agent.context.personas import DEFAULT
|
23
24
|
from droidrun.agent.oneflows.reflector import Reflector
|
25
|
+
from droidrun.telemetry import capture, flush, DroidAgentInitEvent, DroidAgentFinalizeEvent
|
24
26
|
|
25
27
|
|
26
28
|
logger = logging.getLogger("droidrun")
|
@@ -67,6 +69,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
67
69
|
enable_tracing: bool = False,
|
68
70
|
debug: bool = False,
|
69
71
|
save_trajectories: bool = False,
|
72
|
+
excluded_tools: List[str] = None,
|
70
73
|
*args,
|
71
74
|
**kwargs
|
72
75
|
):
|
@@ -85,8 +88,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
85
88
|
debug: Whether to enable verbose debug logging
|
86
89
|
**kwargs: Additional keyword arguments to pass to the agents
|
87
90
|
"""
|
91
|
+
self.user_id = kwargs.pop("user_id", None)
|
88
92
|
super().__init__(timeout=timeout ,*args,**kwargs)
|
89
|
-
|
90
93
|
# Configure default logging if not already configured
|
91
94
|
self._configure_default_logging(debug=debug)
|
92
95
|
|
@@ -113,15 +116,17 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
113
116
|
self.event_counter = 0
|
114
117
|
self.save_trajectories = save_trajectories
|
115
118
|
|
116
|
-
self.trajectory = Trajectory()
|
119
|
+
self.trajectory = Trajectory(goal=goal)
|
117
120
|
self.task_manager = TaskManager()
|
118
121
|
self.task_iter = None
|
122
|
+
|
123
|
+
|
119
124
|
self.cim = ContextInjectionManager(personas=personas)
|
120
125
|
self.current_episodic_memory = None
|
121
126
|
|
122
127
|
logger.info("🤖 Initializing DroidAgent...")
|
123
128
|
|
124
|
-
self.tool_list = describe_tools(tools)
|
129
|
+
self.tool_list = describe_tools(tools, excluded_tools)
|
125
130
|
self.tools_instance = tools
|
126
131
|
|
127
132
|
|
@@ -146,8 +151,33 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
146
151
|
else:
|
147
152
|
logger.debug("🚫 Planning disabled - will execute tasks directly with CodeActAgent")
|
148
153
|
self.planner_agent = None
|
154
|
+
|
155
|
+
capture(
|
156
|
+
DroidAgentInitEvent(
|
157
|
+
goal=goal,
|
158
|
+
llm=llm.class_name(),
|
159
|
+
tools=",".join(self.tool_list),
|
160
|
+
personas=",".join([p.name for p in personas]),
|
161
|
+
max_steps=max_steps,
|
162
|
+
timeout=timeout,
|
163
|
+
vision=vision,
|
164
|
+
reasoning=reasoning,
|
165
|
+
reflection=reflection,
|
166
|
+
enable_tracing=enable_tracing,
|
167
|
+
debug=debug,
|
168
|
+
save_trajectories=save_trajectories,
|
169
|
+
),
|
170
|
+
self.user_id
|
171
|
+
)
|
172
|
+
|
149
173
|
|
150
174
|
logger.info("✅ DroidAgent initialized successfully.")
|
175
|
+
|
176
|
+
def run(self) -> WorkflowHandler:
|
177
|
+
"""
|
178
|
+
Run the DroidAgent workflow.
|
179
|
+
"""
|
180
|
+
return super().run()
|
151
181
|
|
152
182
|
@step
|
153
183
|
async def execute_task(
|
@@ -211,7 +241,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
211
241
|
try:
|
212
242
|
task = ev.task
|
213
243
|
if not self.reasoning:
|
214
|
-
return FinalizeEvent(success=ev.success, reason=ev.reason, task=[task], steps=ev.steps)
|
244
|
+
return FinalizeEvent(success=ev.success, reason=ev.reason, output=ev.reason, task=[task], tasks=[task], steps=ev.steps)
|
215
245
|
|
216
246
|
if self.reflection:
|
217
247
|
return ReflectionEvent(task=task)
|
@@ -223,7 +253,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
223
253
|
if self.debug:
|
224
254
|
import traceback
|
225
255
|
logger.error(traceback.format_exc())
|
226
|
-
|
256
|
+
tasks = self.task_manager.get_task_history()
|
257
|
+
return FinalizeEvent(success=False, reason=str(e), output=str(e), task=tasks, tasks=tasks, steps=self.step_counter)
|
227
258
|
|
228
259
|
|
229
260
|
@step
|
@@ -259,7 +290,9 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
259
290
|
) -> FinalizeEvent | CodeActExecuteEvent:
|
260
291
|
try:
|
261
292
|
if self.step_counter >= self.max_steps:
|
262
|
-
|
293
|
+
output = f"Reached maximum number of steps ({self.max_steps})"
|
294
|
+
tasks = self.task_manager.get_task_history()
|
295
|
+
return FinalizeEvent(success=False, reason=output, output=output, task=tasks, tasks=tasks, steps=self.step_counter)
|
263
296
|
self.step_counter += 1
|
264
297
|
|
265
298
|
if ev.reflection:
|
@@ -286,10 +319,13 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
286
319
|
|
287
320
|
if self.task_manager.goal_completed:
|
288
321
|
logger.info(f"✅ Goal completed: {self.task_manager.message}")
|
289
|
-
|
322
|
+
tasks = self.task_manager.get_task_history()
|
323
|
+
return FinalizeEvent(success=True, reason=self.task_manager.message, output=self.task_manager.message, task=tasks, tasks=tasks, steps=self.step_counter)
|
290
324
|
if not self.tasks:
|
291
325
|
logger.warning("No tasks generated by planner")
|
292
|
-
|
326
|
+
output = "Planner did not generate any tasks"
|
327
|
+
tasks = self.task_manager.get_task_history()
|
328
|
+
return FinalizeEvent(success=False, reason=output, output=output, task=tasks, tasks=tasks, steps=self.step_counter)
|
293
329
|
|
294
330
|
return CodeActExecuteEvent(task=next(self.task_iter), reflection=None)
|
295
331
|
|
@@ -298,7 +334,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
298
334
|
if self.debug:
|
299
335
|
import traceback
|
300
336
|
logger.error(traceback.format_exc())
|
301
|
-
|
337
|
+
tasks = self.task_manager.get_task_history()
|
338
|
+
return FinalizeEvent(success=False, reason=str(e), output=str(e), task=tasks, tasks=tasks, steps=self.step_counter)
|
302
339
|
|
303
340
|
|
304
341
|
@step
|
@@ -331,10 +368,22 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
331
368
|
@step
|
332
369
|
async def finalize(self, ctx: Context, ev: FinalizeEvent) -> StopEvent:
|
333
370
|
ctx.write_event_to_stream(ev)
|
371
|
+
capture(
|
372
|
+
DroidAgentFinalizeEvent(
|
373
|
+
tasks=",".join([f"{t.agent_type}:{t.description}" for t in ev.task]),
|
374
|
+
success=ev.success,
|
375
|
+
output=ev.output,
|
376
|
+
steps=ev.steps,
|
377
|
+
),
|
378
|
+
self.user_id
|
379
|
+
)
|
380
|
+
flush()
|
334
381
|
|
335
382
|
result = {
|
336
383
|
"success": ev.success,
|
384
|
+
# deprecated. use output instead.
|
337
385
|
"reason": ev.reason,
|
386
|
+
"output": ev.output,
|
338
387
|
"steps": ev.steps,
|
339
388
|
}
|
340
389
|
|
@@ -348,13 +397,16 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
348
397
|
if isinstance(ev, EpisodicMemoryEvent):
|
349
398
|
self.current_episodic_memory = ev.episodic_memory
|
350
399
|
return
|
400
|
+
|
401
|
+
|
351
402
|
|
352
403
|
if not isinstance(ev, StopEvent):
|
353
404
|
ctx.write_event_to_stream(ev)
|
354
405
|
|
355
406
|
if isinstance(ev, ScreenshotEvent):
|
356
407
|
self.trajectory.screenshots.append(ev.screenshot)
|
357
|
-
|
408
|
+
elif isinstance(ev, MacroEvent):
|
409
|
+
self.trajectory.macro.append(ev)
|
358
410
|
else:
|
359
411
|
self.trajectory.events.append(ev)
|
360
412
|
|
droidrun/agent/droid/events.py
CHANGED
@@ -16,8 +16,12 @@ class ReasoningLogicEvent(Event):
|
|
16
16
|
|
17
17
|
class FinalizeEvent(Event):
|
18
18
|
success: bool
|
19
|
+
# deprecated. use output instead.
|
19
20
|
reason: str
|
21
|
+
output: str
|
22
|
+
# deprecated. use tasks instead.
|
20
23
|
task: List[Task]
|
24
|
+
tasks: List[Task]
|
21
25
|
steps: int = 1
|
22
26
|
|
23
27
|
class TaskRunnerEvent(Event):
|
@@ -131,12 +131,12 @@ class PlannerAgent(Workflow):
|
|
131
131
|
logger.info(f"🧠 Thinking about how to plan the goal...")
|
132
132
|
|
133
133
|
if self.vision:
|
134
|
-
screenshot = (
|
134
|
+
screenshot = (self.tools_instance.take_screenshot())[1]
|
135
135
|
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
136
136
|
await ctx.set("screenshot", screenshot)
|
137
137
|
|
138
138
|
try:
|
139
|
-
state =
|
139
|
+
state = self.tools_instance.get_state()
|
140
140
|
await ctx.set("ui_state", state["a11y_tree"])
|
141
141
|
await ctx.set("phone_state", state["phone_state"])
|
142
142
|
except Exception as e:
|
droidrun/agent/utils/executer.py
CHANGED
@@ -9,6 +9,7 @@ from llama_index.core.workflow import Context
|
|
9
9
|
import asyncio
|
10
10
|
from asyncio import AbstractEventLoop
|
11
11
|
import threading
|
12
|
+
from droidrun.tools.adb import AdbTools
|
12
13
|
|
13
14
|
logger = logging.getLogger("droidrun")
|
14
15
|
|
@@ -29,6 +30,7 @@ class SimpleCodeExecutor:
|
|
29
30
|
locals: Dict[str, Any] = {},
|
30
31
|
globals: Dict[str, Any] = {},
|
31
32
|
tools={},
|
33
|
+
tools_instance=None,
|
32
34
|
use_same_scope: bool = True,
|
33
35
|
):
|
34
36
|
"""
|
@@ -38,8 +40,11 @@ class SimpleCodeExecutor:
|
|
38
40
|
locals: Local variables to use in the execution context
|
39
41
|
globals: Global variables to use in the execution context
|
40
42
|
tools: List of tools available for execution
|
43
|
+
tools_instance: Original tools instance (e.g., AdbTools instance)
|
41
44
|
"""
|
42
45
|
|
46
|
+
self.tools_instance = tools_instance
|
47
|
+
|
43
48
|
# loop throught tools and add them to globals, but before that check if tool value is async, if so convert it to sync. tools is a dictionary of tool name: function
|
44
49
|
# e.g. tools = {'tool_name': tool_function}
|
45
50
|
|
@@ -74,6 +79,7 @@ class SimpleCodeExecutor:
|
|
74
79
|
self.locals = locals
|
75
80
|
self.loop = loop
|
76
81
|
self.use_same_scope = use_same_scope
|
82
|
+
self.tools = tools
|
77
83
|
if self.use_same_scope:
|
78
84
|
# If using the same scope, set the globals and locals to the same dictionary
|
79
85
|
self.globals = self.locals = {
|
@@ -93,8 +99,10 @@ class SimpleCodeExecutor:
|
|
93
99
|
"""
|
94
100
|
# Update UI elements before execution
|
95
101
|
self.globals['ui_state'] = await ctx.get("ui_state", None)
|
96
|
-
|
97
|
-
|
102
|
+
|
103
|
+
if self.tools_instance and isinstance(self.tools_instance, AdbTools):
|
104
|
+
self.tools_instance._set_context(ctx)
|
105
|
+
|
98
106
|
stdout = io.StringIO()
|
99
107
|
stderr = io.StringIO()
|
100
108
|
|
@@ -31,6 +31,7 @@ def load_llm(provider_name: str, **kwargs: Any) -> LLM:
|
|
31
31
|
raise ValueError("provider_name cannot be empty.")
|
32
32
|
if provider_name == "OpenAILike":
|
33
33
|
module_provider_part = "openai_like"
|
34
|
+
kwargs.setdefault("is_chat_model", True)
|
34
35
|
elif provider_name == "GoogleGenAI":
|
35
36
|
module_provider_part = "google_genai"
|
36
37
|
else:
|