droidrun 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ from typing import List
8
8
 
9
9
  from llama_index.core.llms.llm import LLM
10
10
  from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
11
+ from llama_index.core.workflow.handler import WorkflowHandler
11
12
  from droidrun.agent.droid.events import *
12
13
  from droidrun.agent.codeact import CodeActAgent
13
14
  from droidrun.agent.codeact.events import EpisodicMemoryEvent
@@ -21,6 +22,7 @@ from droidrun.agent.context import ContextInjectionManager
21
22
  from droidrun.agent.context.agent_persona import AgentPersona
22
23
  from droidrun.agent.context.personas import DEFAULT
23
24
  from droidrun.agent.oneflows.reflector import Reflector
25
+ from droidrun.telemetry import capture, flush, DroidAgentInitEvent, DroidAgentFinalizeEvent
24
26
 
25
27
 
26
28
  logger = logging.getLogger("droidrun")
@@ -61,6 +63,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
61
63
  personas: List[AgentPersona] = [DEFAULT],
62
64
  max_steps: int = 15,
63
65
  timeout: int = 1000,
66
+ vision: bool = False,
64
67
  reasoning: bool = False,
65
68
  reflection: bool = False,
66
69
  enable_tracing: bool = False,
@@ -85,7 +88,6 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
85
88
  **kwargs: Additional keyword arguments to pass to the agents
86
89
  """
87
90
  super().__init__(timeout=timeout ,*args,**kwargs)
88
-
89
91
  # Configure default logging if not already configured
90
92
  self._configure_default_logging(debug=debug)
91
93
 
@@ -101,6 +103,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
101
103
 
102
104
  self.goal = goal
103
105
  self.llm = llm
106
+ self.vision = vision
104
107
  self.max_steps = max_steps
105
108
  self.max_codeact_steps = max_steps
106
109
  self.timeout = timeout
@@ -128,6 +131,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
128
131
  self.planner_agent = PlannerAgent(
129
132
  goal=goal,
130
133
  llm=llm,
134
+ vision=vision,
131
135
  personas=personas,
132
136
  task_manager=self.task_manager,
133
137
  tools_instance=tools,
@@ -143,8 +147,32 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
143
147
  else:
144
148
  logger.debug("🚫 Planning disabled - will execute tasks directly with CodeActAgent")
145
149
  self.planner_agent = None
150
+
151
+ capture(
152
+ DroidAgentInitEvent(
153
+ goal=goal,
154
+ llm=llm.class_name(),
155
+ tools=",".join(self.tool_list),
156
+ personas=",".join([p.name for p in personas]),
157
+ max_steps=max_steps,
158
+ timeout=timeout,
159
+ vision=vision,
160
+ reasoning=reasoning,
161
+ reflection=reflection,
162
+ enable_tracing=enable_tracing,
163
+ debug=debug,
164
+ save_trajectories=save_trajectories,
165
+ )
166
+ )
167
+
146
168
 
147
169
  logger.info("✅ DroidAgent initialized successfully.")
170
+
171
+ def run(self) -> WorkflowHandler:
172
+ """
173
+ Run the DroidAgent workflow.
174
+ """
175
+ return super().run()
148
176
 
149
177
  @step
150
178
  async def execute_task(
@@ -171,6 +199,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
171
199
  codeact_agent = CodeActAgent(
172
200
  llm=self.llm,
173
201
  persona=persona,
202
+ vision=self.vision,
174
203
  max_steps=self.max_codeact_steps,
175
204
  all_tools_list=self.tool_list,
176
205
  tools_instance=self.tools_instance,
@@ -200,14 +229,14 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
200
229
  if self.debug:
201
230
  import traceback
202
231
  logger.error(traceback.format_exc())
203
- return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=result["codeact_steps"])
232
+ return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=[])
204
233
 
205
234
  @step
206
235
  async def handle_codeact_execute(self, ctx: Context, ev: CodeActResultEvent) -> FinalizeEvent | ReflectionEvent:
207
236
  try:
208
237
  task = ev.task
209
238
  if not self.reasoning:
210
- return FinalizeEvent(success=ev.success, reason=ev.reason, task=[task], steps=ev.steps)
239
+ return FinalizeEvent(success=ev.success, reason=ev.reason, output=ev.reason, task=[task], tasks=[task], steps=ev.steps)
211
240
 
212
241
  if self.reflection:
213
242
  return ReflectionEvent(task=task)
@@ -219,7 +248,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
219
248
  if self.debug:
220
249
  import traceback
221
250
  logger.error(traceback.format_exc())
222
- return FinalizeEvent(success=False, reason=str(e), task=self.task_manager.get_task_history(), steps=self.step_counter)
251
+ tasks = self.task_manager.get_task_history()
252
+ return FinalizeEvent(success=False, reason=str(e), output=str(e), task=tasks, tasks=tasks, steps=self.step_counter)
223
253
 
224
254
 
225
255
  @step
@@ -255,7 +285,9 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
255
285
  ) -> FinalizeEvent | CodeActExecuteEvent:
256
286
  try:
257
287
  if self.step_counter >= self.max_steps:
258
- return FinalizeEvent(success=False, reason=f"Reached maximum number of steps ({self.max_steps})", task=self.task_manager.get_task_history(), steps=self.step_counter)
288
+ output = f"Reached maximum number of steps ({self.max_steps})"
289
+ tasks = self.task_manager.get_task_history()
290
+ return FinalizeEvent(success=False, reason=output, output=output, task=tasks, tasks=tasks, steps=self.step_counter)
259
291
  self.step_counter += 1
260
292
 
261
293
  if ev.reflection:
@@ -282,10 +314,13 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
282
314
 
283
315
  if self.task_manager.goal_completed:
284
316
  logger.info(f"✅ Goal completed: {self.task_manager.message}")
285
- return FinalizeEvent(success=True, reason=self.task_manager.message, task=self.task_manager.get_task_history(), steps=self.step_counter)
317
+ tasks = self.task_manager.get_task_history()
318
+ return FinalizeEvent(success=True, reason=self.task_manager.message, output=self.task_manager.message, task=tasks, tasks=tasks, steps=self.step_counter)
286
319
  if not self.tasks:
287
320
  logger.warning("No tasks generated by planner")
288
- return FinalizeEvent(success=False, reason="Planner did not generate any tasks", task=self.task_manager.get_task_history(), steps=self.step_counter)
321
+ output = "Planner did not generate any tasks"
322
+ tasks = self.task_manager.get_task_history()
323
+ return FinalizeEvent(success=False, reason=output, output=output, task=tasks, tasks=tasks, steps=self.step_counter)
289
324
 
290
325
  return CodeActExecuteEvent(task=next(self.task_iter), reflection=None)
291
326
 
@@ -294,7 +329,8 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
294
329
  if self.debug:
295
330
  import traceback
296
331
  logger.error(traceback.format_exc())
297
- return FinalizeEvent(success=False, reason=str(e), task=self.task_manager.get_task_history(), steps=self.step_counter)
332
+ tasks = self.task_manager.get_task_history()
333
+ return FinalizeEvent(success=False, reason=str(e), output=str(e), task=tasks, tasks=tasks, steps=self.step_counter)
298
334
 
299
335
 
300
336
  @step
@@ -306,6 +342,7 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
306
342
  Dict containing the execution result
307
343
  """
308
344
  logger.info(f"🚀 Running DroidAgent to achieve goal: {self.goal}")
345
+ ctx.write_event_to_stream(ev)
309
346
 
310
347
  self.step_counter = 0
311
348
  self.retry_counter = 0
@@ -326,10 +363,21 @@ A wrapper class that coordinates between PlannerAgent (creates plans) and
326
363
  @step
327
364
  async def finalize(self, ctx: Context, ev: FinalizeEvent) -> StopEvent:
328
365
  ctx.write_event_to_stream(ev)
366
+ capture(
367
+ DroidAgentFinalizeEvent(
368
+ tasks=",".join([f"{t.agent_type}:{t.description}" for t in ev.task]),
369
+ success=ev.success,
370
+ output=ev.output,
371
+ steps=ev.steps,
372
+ )
373
+ )
374
+ flush()
329
375
 
330
376
  result = {
331
377
  "success": ev.success,
378
+ # deprecated. use output instead.
332
379
  "reason": ev.reason,
380
+ "output": ev.output,
333
381
  "steps": ev.steps,
334
382
  }
335
383
 
@@ -16,8 +16,12 @@ class ReasoningLogicEvent(Event):
16
16
 
17
17
  class FinalizeEvent(Event):
18
18
  success: bool
19
+ # deprecated. use output instead.
19
20
  reason: str
21
+ output: str
22
+ # deprecated. use tasks instead.
20
23
  task: List[Task]
24
+ tasks: List[Task]
21
25
  steps: int = 1
22
26
 
23
27
  class TaskRunnerEvent(Event):
@@ -42,6 +42,7 @@ class PlannerAgent(Workflow):
42
42
  self,
43
43
  goal: str,
44
44
  llm: LLM,
45
+ vision: bool,
45
46
  personas: List[AgentPersona],
46
47
  task_manager: TaskManager,
47
48
  tools_instance: Tools,
@@ -57,6 +58,7 @@ class PlannerAgent(Workflow):
57
58
  self.goal = goal
58
59
  self.task_manager = task_manager
59
60
  self.debug = debug
61
+ self.vision = vision
60
62
 
61
63
  self.chat_memory = None
62
64
  self.remembered_info = None
@@ -128,12 +130,19 @@ class PlannerAgent(Workflow):
128
130
  self.steps_counter += 1
129
131
  logger.info(f"🧠 Thinking about how to plan the goal...")
130
132
 
131
- screenshot = (await self.tools_instance.take_screenshot())[1]
132
- ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
133
- await ctx.set("screenshot", screenshot)
133
+ if self.vision:
134
+ screenshot = (await self.tools_instance.take_screenshot())[1]
135
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
136
+ await ctx.set("screenshot", screenshot)
137
+
138
+ try:
139
+ state = await self.tools_instance.get_state()
140
+ await ctx.set("ui_state", state["a11y_tree"])
141
+ await ctx.set("phone_state", state["phone_state"])
142
+ except Exception as e:
143
+ logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
144
+
134
145
 
135
- await ctx.set("ui_state", await self.tools_instance.get_clickables())
136
- await ctx.set("phone_state", await self.tools_instance.get_phone_state())
137
146
  await ctx.set("remembered_info", self.remembered_info)
138
147
  await ctx.set("reflection", self.reflection)
139
148
 
@@ -187,7 +196,11 @@ class PlannerAgent(Workflow):
187
196
  await self.chat_memory.aput(
188
197
  ChatMessage(
189
198
  role="user",
190
- content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
199
+ content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
200
+ wrap your code inside this:
201
+ ```python
202
+ <YOUR CODE HERE>
203
+ ```""",
191
204
  )
192
205
  )
193
206
  logger.debug("🔄 Waiting for next plan or completion.")
@@ -196,7 +209,11 @@ class PlannerAgent(Workflow):
196
209
  await self.chat_memory.aput(
197
210
  ChatMessage(
198
211
  role="user",
199
- content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
212
+ content="""Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.
213
+ wrap your code inside this:
214
+ ```python
215
+ <YOUR CODE HERE>
216
+ ```""",
200
217
  )
201
218
  )
202
219
  logger.debug("🔄 Waiting for next plan or completion.")
@@ -224,15 +241,18 @@ class PlannerAgent(Workflow):
224
241
  logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
225
242
 
226
243
  model = self.llm.class_name()
227
- if model != "DeepSeek":
228
- chat_history = await chat_utils.add_screenshot_image_block(
229
- await ctx.get("screenshot"), chat_history
230
- )
231
- else:
244
+ if model == "DeepSeek":
232
245
  logger.warning(
233
246
  "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
234
247
  )
235
248
 
249
+ elif self.vision == True:
250
+ chat_history = await chat_utils.add_screenshot_image_block(
251
+ await ctx.get("screenshot"), chat_history
252
+ )
253
+
254
+
255
+
236
256
  chat_history = await chat_utils.add_task_history_block(
237
257
  self.task_manager.get_completed_tasks(),
238
258
  self.task_manager.get_failed_tasks(),
@@ -132,24 +132,21 @@ async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) ->
132
132
 
133
133
  # Format the phone state data nicely
134
134
  if isinstance(phone_state, dict) and 'error' not in phone_state:
135
- current_app = phone_state.get('currentApp', 'Unknown')
135
+ current_app = phone_state.get('currentApp', '')
136
136
  package_name = phone_state.get('packageName', 'Unknown')
137
137
  keyboard_visible = phone_state.get('keyboardVisible', False)
138
138
  focused_element = phone_state.get('focusedElement')
139
139
 
140
140
  # Format the focused element
141
141
  if focused_element:
142
- element_text = focused_element.get('text', 'No text')
143
- element_class = focused_element.get('className', 'Unknown')
144
- element_bounds = focused_element.get('bounds', 'Unknown')
145
- element_type = focused_element.get('type', 'unknown')
142
+ element_text = focused_element.get('text', '')
143
+ element_class = focused_element.get('className', '')
146
144
  element_resource_id = focused_element.get('resourceId', '')
147
145
 
148
146
  # Build focused element description
149
- focused_desc = f"'{element_text}' ({element_class})"
147
+ focused_desc = f"'{element_text}' {element_class}"
150
148
  if element_resource_id:
151
149
  focused_desc += f" | ID: {element_resource_id}"
152
- focused_desc += f" | Bounds: {element_bounds} | Type: {element_type}"
153
150
  else:
154
151
  focused_desc = "None"
155
152
 
@@ -31,6 +31,7 @@ def load_llm(provider_name: str, **kwargs: Any) -> LLM:
31
31
  raise ValueError("provider_name cannot be empty.")
32
32
  if provider_name == "OpenAILike":
33
33
  module_provider_part = "openai_like"
34
+ kwargs.setdefault("is_chat_model", True)
34
35
  elif provider_name == "GoogleGenAI":
35
36
  module_provider_part = "google_genai"
36
37
  else: