droidrun 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +1 -10
- droidrun/adb/device.py +101 -71
- droidrun/adb/manager.py +3 -3
- droidrun/agent/codeact/codeact_agent.py +22 -12
- droidrun/agent/context/personas/__init__.py +0 -2
- droidrun/agent/context/personas/default.py +1 -1
- droidrun/agent/droid/droid_agent.py +56 -8
- droidrun/agent/droid/events.py +4 -0
- droidrun/agent/planner/planner_agent.py +32 -12
- droidrun/agent/utils/chat_utils.py +4 -7
- droidrun/agent/utils/llm_picker.py +1 -0
- droidrun/cli/main.py +163 -78
- droidrun/portal.py +139 -0
- droidrun/telemetry/__init__.py +4 -0
- droidrun/telemetry/events.py +27 -0
- droidrun/telemetry/tracker.py +83 -0
- droidrun/tools/adb.py +199 -407
- droidrun/tools/ios.py +10 -5
- droidrun/tools/tools.py +42 -11
- {droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/METADATA +19 -29
- {droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/RECORD +24 -23
- droidrun/agent/context/personas/extractor.py +0 -52
- droidrun/agent/context/todo.txt +0 -4
- droidrun/run.py +0 -105
- {droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/WHEEL +0 -0
- {droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -8,6 +8,7 @@ from typing import List
|
|
8
8
|
|
9
9
|
from llama_index.core.llms.llm import LLM
|
10
10
|
from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
|
11
|
+
from llama_index.core.workflow.handler import WorkflowHandler
|
11
12
|
from droidrun.agent.droid.events import *
|
12
13
|
from droidrun.agent.codeact import CodeActAgent
|
13
14
|
from droidrun.agent.codeact.events import EpisodicMemoryEvent
|
@@ -21,6 +22,7 @@ from droidrun.agent.context import ContextInjectionManager
|
|
21
22
|
from droidrun.agent.context.agent_persona import AgentPersona
|
22
23
|
from droidrun.agent.context.personas import DEFAULT
|
23
24
|
from droidrun.agent.oneflows.reflector import Reflector
|
25
|
+
from droidrun.telemetry import capture, flush, DroidAgentInitEvent, DroidAgentFinalizeEvent
|
24
26
|
|
25
27
|
|
26
28
|
logger = logging.getLogger("droidrun")
|
@@ -61,6 +63,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
61
63
|
personas: List[AgentPersona] = [DEFAULT],
|
62
64
|
max_steps: int = 15,
|
63
65
|
timeout: int = 1000,
|
66
|
+
vision: bool = False,
|
64
67
|
reasoning: bool = False,
|
65
68
|
reflection: bool = False,
|
66
69
|
enable_tracing: bool = False,
|
@@ -85,7 +88,6 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
85
88
|
**kwargs: Additional keyword arguments to pass to the agents
|
86
89
|
"""
|
87
90
|
super().__init__(timeout=timeout ,*args,**kwargs)
|
88
|
-
|
89
91
|
# Configure default logging if not already configured
|
90
92
|
self._configure_default_logging(debug=debug)
|
91
93
|
|
@@ -101,6 +103,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
101
103
|
|
102
104
|
self.goal = goal
|
103
105
|
self.llm = llm
|
106
|
+
self.vision = vision
|
104
107
|
self.max_steps = max_steps
|
105
108
|
self.max_codeact_steps = max_steps
|
106
109
|
self.timeout = timeout
|
@@ -128,6 +131,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
128
131
|
self.planner_agent = PlannerAgent(
|
129
132
|
goal=goal,
|
130
133
|
llm=llm,
|
134
|
+
vision=vision,
|
131
135
|
personas=personas,
|
132
136
|
task_manager=self.task_manager,
|
133
137
|
tools_instance=tools,
|
@@ -143,8 +147,32 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
143
147
|
else:
|
144
148
|
logger.debug("🚫 Planning disabled - will execute tasks directly with CodeActAgent")
|
145
149
|
self.planner_agent = None
|
150
|
+
|
151
|
+
capture(
|
152
|
+
DroidAgentInitEvent(
|
153
|
+
goal=goal,
|
154
|
+
llm=llm.class_name(),
|
155
|
+
tools=",".join(self.tool_list),
|
156
|
+
personas=",".join([p.name for p in personas]),
|
157
|
+
max_steps=max_steps,
|
158
|
+
timeout=timeout,
|
159
|
+
vision=vision,
|
160
|
+
reasoning=reasoning,
|
161
|
+
reflection=reflection,
|
162
|
+
enable_tracing=enable_tracing,
|
163
|
+
debug=debug,
|
164
|
+
save_trajectories=save_trajectories,
|
165
|
+
)
|
166
|
+
)
|
167
|
+
|
146
168
|
|
147
169
|
logger.info("✅ DroidAgent initialized successfully.")
|
170
|
+
|
171
|
+
def run(self) -> WorkflowHandler:
|
172
|
+
"""
|
173
|
+
Run the DroidAgent workflow.
|
174
|
+
"""
|
175
|
+
return super().run()
|
148
176
|
|
149
177
|
@step
|
150
178
|
async def execute_task(
|
@@ -171,6 +199,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
171
199
|
codeact_agent = CodeActAgent(
|
172
200
|
llm=self.llm,
|
173
201
|
persona=persona,
|
202
|
+
vision=self.vision,
|
174
203
|
max_steps=self.max_codeact_steps,
|
175
204
|
all_tools_list=self.tool_list,
|
176
205
|
tools_instance=self.tools_instance,
|
@@ -200,14 +229,14 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
200
229
|
if self.debug:
|
201
230
|
import traceback
|
202
231
|
logger.error(traceback.format_exc())
|
203
|
-
return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=
|
232
|
+
return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=[])
|
204
233
|
|
205
234
|
@step
|
206
235
|
async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent:
|
207
236
|
try:
|
208
237
|
task = ev.task
|
209
238
|
if not self.reasoning:
|
210
|
-
return FinalizeEvent(success=ev.success, reason=ev.reason, task=[task], steps=ev.steps)
|
239
|
+
return FinalizeEvent(success=ev.success, reason=ev.reason, output=ev.reason, task=[task], tasks=[task], steps=ev.steps)
|
211
240
|
|
212
241
|
if self.reflection:
|
213
242
|
return ReflectionEvent(task=task)
|
@@ -219,7 +248,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
219
248
|
if self.debug:
|
220
249
|
import traceback
|
221
250
|
logger.error(traceback.format_exc())
|
222
|
-
|
251
|
+
tasks = self.task_manager.get_task_history()
|
252
|
+
return FinalizeEvent(success=False, reason=str(e), output=str(e), task=tasks, tasks=tasks, steps=self.step_counter)
|
223
253
|
|
224
254
|
|
225
255
|
@step
|
@@ -255,7 +285,9 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
255
285
|
) -> FinalizeEvent | CodeActExecuteEvent:
|
256
286
|
try:
|
257
287
|
if self.step_counter >= self.max_steps:
|
258
|
-
|
288
|
+
output = f"Reached maximum number of steps ({self.max_steps})"
|
289
|
+
tasks = self.task_manager.get_task_history()
|
290
|
+
return FinalizeEvent(success=False, reason=output, output=output, task=tasks, tasks=tasks, steps=self.step_counter)
|
259
291
|
self.step_counter += 1
|
260
292
|
|
261
293
|
if ev.reflection:
|
@@ -282,10 +314,13 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
282
314
|
|
283
315
|
if self.task_manager.goal_completed:
|
284
316
|
logger.info(f"✅ Goal completed: {self.task_manager.message}")
|
285
|
-
|
317
|
+
tasks = self.task_manager.get_task_history()
|
318
|
+
return FinalizeEvent(success=True, reason=self.task_manager.message, output=self.task_manager.message, task=tasks, tasks=tasks, steps=self.step_counter)
|
286
319
|
if not self.tasks:
|
287
320
|
logger.warning("No tasks generated by planner")
|
288
|
-
|
321
|
+
output = "Planner did not generate any tasks"
|
322
|
+
tasks = self.task_manager.get_task_history()
|
323
|
+
return FinalizeEvent(success=False, reason=output, output=output, task=tasks, tasks=tasks, steps=self.step_counter)
|
289
324
|
|
290
325
|
return CodeActExecuteEvent(task=next(self.task_iter), reflection=None)
|
291
326
|
|
@@ -294,7 +329,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
294
329
|
if self.debug:
|
295
330
|
import traceback
|
296
331
|
logger.error(traceback.format_exc())
|
297
|
-
|
332
|
+
tasks = self.task_manager.get_task_history()
|
333
|
+
return FinalizeEvent(success=False, reason=str(e), output=str(e), task=tasks, tasks=tasks, steps=self.step_counter)
|
298
334
|
|
299
335
|
|
300
336
|
@step
|
@@ -306,6 +342,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
306
342
|
Dict containing the execution result
|
307
343
|
"""
|
308
344
|
logger.info(f"🚀 Running DroidAgent to achieve goal: {self.goal}")
|
345
|
+
ctx.write_event_to_stream(ev)
|
309
346
|
|
310
347
|
self.step_counter = 0
|
311
348
|
self.retry_counter = 0
|
@@ -326,10 +363,21 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
|
|
326
363
|
@step
|
327
364
|
async def finalize(self, ctx: Context, ev: FinalizeEvent) -> StopEvent:
|
328
365
|
ctx.write_event_to_stream(ev)
|
366
|
+
capture(
|
367
|
+
DroidAgentFinalizeEvent(
|
368
|
+
tasks=",".join([f"{t.agent_type}:{t.description}" for t in ev.task]),
|
369
|
+
success=ev.success,
|
370
|
+
output=ev.output,
|
371
|
+
steps=ev.steps,
|
372
|
+
)
|
373
|
+
)
|
374
|
+
flush()
|
329
375
|
|
330
376
|
result = {
|
331
377
|
"success": ev.success,
|
378
|
+
# deprecated. use output instead.
|
332
379
|
"reason": ev.reason,
|
380
|
+
"output": ev.output,
|
333
381
|
"steps": ev.steps,
|
334
382
|
}
|
335
383
|
|
droidrun/agent/droid/events.py
CHANGED
@@ -16,8 +16,12 @@ class ReasoningLogicEvent(Event):
|
|
16
16
|
|
17
17
|
class FinalizeEvent(Event):
|
18
18
|
success: bool
|
19
|
+
# deprecated. use output instead.
|
19
20
|
reason: str
|
21
|
+
output: str
|
22
|
+
# deprecated. use tasks instead.
|
20
23
|
task: List[Task]
|
24
|
+
tasks: List[Task]
|
21
25
|
steps: int = 1
|
22
26
|
|
23
27
|
class TaskRunnerEvent(Event):
|
@@ -42,6 +42,7 @@ class PlannerAgent(Workflow):
|
|
42
42
|
self,
|
43
43
|
goal: str,
|
44
44
|
llm: LLM,
|
45
|
+
vision: bool,
|
45
46
|
personas: List[AgentPersona],
|
46
47
|
task_manager: TaskManager,
|
47
48
|
tools_instance: Tools,
|
@@ -57,6 +58,7 @@ class PlannerAgent(Workflow):
|
|
57
58
|
self.goal = goal
|
58
59
|
self.task_manager = task_manager
|
59
60
|
self.debug = debug
|
61
|
+
self.vision = vision
|
60
62
|
|
61
63
|
self.chat_memory = None
|
62
64
|
self.remembered_info = None
|
@@ -128,12 +130,19 @@ class PlannerAgent(Workflow):
|
|
128
130
|
self.steps_counter += 1
|
129
131
|
logger.info(f"🧠 Thinking about how to plan the goal...")
|
130
132
|
|
131
|
-
|
132
|
-
|
133
|
-
|
133
|
+
if self.vision:
|
134
|
+
screenshot = (await self.tools_instance.take_screenshot())[1]
|
135
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
136
|
+
await ctx.set("screenshot", screenshot)
|
137
|
+
|
138
|
+
try:
|
139
|
+
state = await self.tools_instance.get_state()
|
140
|
+
await ctx.set("ui_state", state["a11y_tree"])
|
141
|
+
await ctx.set("phone_state", state["phone_state"])
|
142
|
+
except Exception as e:
|
143
|
+
logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
|
144
|
+
|
134
145
|
|
135
|
-
await ctx.set("ui_state", await self.tools_instance.get_clickables())
|
136
|
-
await ctx.set("phone_state", await self.tools_instance.get_phone_state())
|
137
146
|
await ctx.set("remembered_info", self.remembered_info)
|
138
147
|
await ctx.set("reflection", self.reflection)
|
139
148
|
|
@@ -187,7 +196,11 @@ class PlannerAgent(Workflow):
|
|
187
196
|
await self.chat_memory.aput(
|
188
197
|
ChatMessage(
|
189
198
|
role="user",
|
190
|
-
content=
|
199
|
+
content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
|
200
|
+
wrap your code inside this:
|
201
|
+
```python
|
202
|
+
<YOUR CODE HERE>
|
203
|
+
```""",
|
191
204
|
)
|
192
205
|
)
|
193
206
|
logger.debug("🔄 Waiting for next plan or completion.")
|
@@ -196,7 +209,11 @@ class PlannerAgent(Workflow):
|
|
196
209
|
await self.chat_memory.aput(
|
197
210
|
ChatMessage(
|
198
211
|
role="user",
|
199
|
-
content=
|
212
|
+
content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
|
213
|
+
wrap your code inside this:
|
214
|
+
```python
|
215
|
+
<YOUR CODE HERE>
|
216
|
+
```""",
|
200
217
|
)
|
201
218
|
)
|
202
219
|
logger.debug("🔄 Waiting for next plan or completion.")
|
@@ -224,15 +241,18 @@ class PlannerAgent(Workflow):
|
|
224
241
|
logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
|
225
242
|
|
226
243
|
model = self.llm.class_name()
|
227
|
-
if model
|
228
|
-
chat_history = await chat_utils.add_screenshot_image_block(
|
229
|
-
await ctx.get("screenshot"), chat_history
|
230
|
-
)
|
231
|
-
else:
|
244
|
+
if model == "DeepSeek":
|
232
245
|
logger.warning(
|
233
246
|
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
234
247
|
)
|
235
248
|
|
249
|
+
elif self.vision == True:
|
250
|
+
chat_history = await chat_utils.add_screenshot_image_block(
|
251
|
+
await ctx.get("screenshot"), chat_history
|
252
|
+
)
|
253
|
+
|
254
|
+
|
255
|
+
|
236
256
|
chat_history = await chat_utils.add_task_history_block(
|
237
257
|
self.task_manager.get_completed_tasks(),
|
238
258
|
self.task_manager.get_failed_tasks(),
|
@@ -132,24 +132,21 @@ async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) ->
|
|
132
132
|
|
133
133
|
# Format the phone state data nicely
|
134
134
|
if isinstance(phone_state, dict) and 'error' not in phone_state:
|
135
|
-
current_app = phone_state.get('currentApp', '
|
135
|
+
current_app = phone_state.get('currentApp', '')
|
136
136
|
package_name = phone_state.get('packageName', 'Unknown')
|
137
137
|
keyboard_visible = phone_state.get('keyboardVisible', False)
|
138
138
|
focused_element = phone_state.get('focusedElement')
|
139
139
|
|
140
140
|
# Format the focused element
|
141
141
|
if focused_element:
|
142
|
-
element_text = focused_element.get('text', '
|
143
|
-
element_class = focused_element.get('className', '
|
144
|
-
element_bounds = focused_element.get('bounds', 'Unknown')
|
145
|
-
element_type = focused_element.get('type', 'unknown')
|
142
|
+
element_text = focused_element.get('text', '')
|
143
|
+
element_class = focused_element.get('className', '')
|
146
144
|
element_resource_id = focused_element.get('resourceId', '')
|
147
145
|
|
148
146
|
# Build focused element description
|
149
|
-
focused_desc = f"'{element_text}'
|
147
|
+
focused_desc = f"'{element_text}' {element_class}"
|
150
148
|
if element_resource_id:
|
151
149
|
focused_desc += f" | ID: {element_resource_id}"
|
152
|
-
focused_desc += f" | Bounds: {element_bounds} | Type: {element_type}"
|
153
150
|
else:
|
154
151
|
focused_desc = "None"
|
155
152
|
|
@@ -31,6 +31,7 @@ def load_llm(provider_name: str, **kwargs: Any) -> LLM:
|
|
31
31
|
raise ValueError("provider_name cannot be empty.")
|
32
32
|
if provider_name == "OpenAILike":
|
33
33
|
module_provider_part = "openai_like"
|
34
|
+
kwargs.setdefault("is_chat_model", True)
|
34
35
|
elif provider_name == "GoogleGenAI":
|
35
36
|
module_provider_part = "google_genai"
|
36
37
|
else:
|