droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. droidrun/agent/codeact/__init__.py +1 -4
  2. droidrun/agent/codeact/codeact_agent.py +95 -86
  3. droidrun/agent/codeact/events.py +1 -2
  4. droidrun/agent/context/__init__.py +5 -9
  5. droidrun/agent/context/episodic_memory.py +1 -3
  6. droidrun/agent/context/task_manager.py +8 -2
  7. droidrun/agent/droid/droid_agent.py +102 -141
  8. droidrun/agent/droid/events.py +45 -14
  9. droidrun/agent/executor/__init__.py +6 -4
  10. droidrun/agent/executor/events.py +29 -9
  11. droidrun/agent/executor/executor_agent.py +86 -28
  12. droidrun/agent/executor/prompts.py +8 -2
  13. droidrun/agent/manager/__init__.py +6 -7
  14. droidrun/agent/manager/events.py +16 -4
  15. droidrun/agent/manager/manager_agent.py +130 -69
  16. droidrun/agent/manager/prompts.py +1 -159
  17. droidrun/agent/utils/chat_utils.py +64 -2
  18. droidrun/agent/utils/device_state_formatter.py +54 -26
  19. droidrun/agent/utils/executer.py +66 -80
  20. droidrun/agent/utils/inference.py +11 -10
  21. droidrun/agent/utils/tools.py +58 -6
  22. droidrun/agent/utils/trajectory.py +18 -12
  23. droidrun/cli/logs.py +118 -56
  24. droidrun/cli/main.py +154 -136
  25. droidrun/config_manager/__init__.py +9 -7
  26. droidrun/config_manager/app_card_loader.py +148 -0
  27. droidrun/config_manager/config_manager.py +200 -102
  28. droidrun/config_manager/path_resolver.py +104 -0
  29. droidrun/config_manager/prompt_loader.py +75 -0
  30. droidrun/macro/__init__.py +1 -1
  31. droidrun/macro/cli.py +23 -18
  32. droidrun/telemetry/__init__.py +2 -2
  33. droidrun/telemetry/events.py +3 -3
  34. droidrun/telemetry/tracker.py +1 -1
  35. droidrun/tools/adb.py +1 -1
  36. droidrun/tools/ios.py +3 -2
  37. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/METADATA +10 -4
  38. droidrun-0.3.10.dev5.dist-info/RECORD +61 -0
  39. droidrun/agent/codeact/prompts.py +0 -26
  40. droidrun/agent/context/agent_persona.py +0 -16
  41. droidrun/agent/context/context_injection_manager.py +0 -66
  42. droidrun/agent/context/personas/__init__.py +0 -11
  43. droidrun/agent/context/personas/app_starter.py +0 -44
  44. droidrun/agent/context/personas/big_agent.py +0 -96
  45. droidrun/agent/context/personas/default.py +0 -95
  46. droidrun/agent/context/personas/ui_expert.py +0 -108
  47. droidrun/agent/planner/__init__.py +0 -13
  48. droidrun/agent/planner/events.py +0 -21
  49. droidrun/agent/planner/planner_agent.py +0 -311
  50. droidrun/agent/planner/prompts.py +0 -124
  51. droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
  52. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/WHEEL +0 -0
  53. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/entry_points.txt +0 -0
  54. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -16,14 +16,9 @@ from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow,
16
16
  from llama_index.core.workflow.handler import WorkflowHandler
17
17
  from workflows.events import Event
18
18
 
19
- from droidrun.config_manager.config_manager import VisionConfig
20
-
21
19
  from droidrun.agent.codeact import CodeActAgent
22
20
  from droidrun.agent.codeact.events import EpisodicMemoryEvent
23
21
  from droidrun.agent.common.events import MacroEvent, RecordUIStateEvent, ScreenshotEvent
24
- from droidrun.agent.context import ContextInjectionManager
25
- from droidrun.agent.context.agent_persona import AgentPersona
26
- from droidrun.agent.context.personas import DEFAULT
27
22
  from droidrun.agent.context.task_manager import Task, TaskManager
28
23
  from droidrun.agent.droid.events import (
29
24
  CodeActExecuteEvent,
@@ -39,6 +34,17 @@ from droidrun.agent.executor import ExecutorAgent
39
34
  from droidrun.agent.manager import ManagerAgent
40
35
  from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES
41
36
  from droidrun.agent.utils.trajectory import Trajectory
37
+ from droidrun.config_manager.config_manager import (
38
+ AgentConfig,
39
+ DeviceConfig,
40
+ DroidRunConfig,
41
+ LoggingConfig,
42
+ TelemetryConfig,
43
+ ToolsConfig,
44
+ TracingConfig,
45
+ )
46
+
47
+ from droidrun.agent.utils.tools import open_app
42
48
  from droidrun.telemetry import (
43
49
  DroidAgentFinalizeEvent,
44
50
  DroidAgentInitEvent,
@@ -87,16 +93,16 @@ class DroidAgent(Workflow):
87
93
  goal: str,
88
94
  llms: dict[str, LLM] | LLM,
89
95
  tools: Tools,
90
- personas: List[AgentPersona] = [DEFAULT], # noqa: B006
91
- max_steps: int = 15,
92
- timeout: int = 1000,
93
- vision: "VisionConfig | dict | bool" = False,
94
- reasoning: bool = False,
95
- enable_tracing: bool = False,
96
- debug: bool = False,
97
- save_trajectories: str = "none",
96
+ config: DroidRunConfig | None = None,
97
+ agent_config: AgentConfig | None = None,
98
+ device_config: DeviceConfig | None = None,
99
+ tools_config: ToolsConfig | None = None,
100
+ logging_config: LoggingConfig | None = None,
101
+ tracing_config: TracingConfig | None = None,
102
+ telemetry_config: TelemetryConfig | None = None,
98
103
  excluded_tools: List[str] = None,
99
104
  custom_tools: dict = None,
105
+ timeout: int = 1000,
100
106
  *args,
101
107
  **kwargs,
102
108
  ):
@@ -104,35 +110,40 @@ class DroidAgent(Workflow):
104
110
  Initialize the DroidAgent wrapper.
105
111
 
106
112
  Args:
107
- goal: The user's goal or command to execute
108
- llm: The language model to use for both agents
109
- max_steps: Maximum number of steps for both agents
110
- timeout: Timeout for agent execution in seconds
111
- reasoning: Whether to use Manager+Executor for complex reasoning (True)
112
- or send tasks directly to CodeActAgent (False)
113
- enable_tracing: Whether to enable Arize Phoenix tracing
114
- debug: Whether to enable verbose debug logging
115
- save_trajectories: Trajectory saving level. Can be:
116
- - "none" (no saving)
117
- - "step" (save per step)
118
- - "action" (save per action)
119
- custom_tools: Dictionary of custom tools in ATOMIC_ACTION_SIGNATURES format:
120
- {
121
- "tool_name": {
122
- "arguments": ["arg1", "arg2"],
123
- "description": "Tool description with usage example",
124
- "function": callable
125
- }
126
- }
127
- **kwargs: Additional keyword arguments to pass to the agents
113
+ goal: User's goal or command
114
+ llms: Dict of agent-specific LLMs or single LLM for all
115
+ tools: Tools instance (AdbTools or IOSTools)
116
+ config: Full config override (optional)
117
+ agent_config: Agent config override (optional)
118
+ device_config: Device config override (optional)
119
+ tools_config: Tools config override (optional)
120
+ logging_config: Logging config override (optional)
121
+ tracing_config: Tracing config override (optional)
122
+ telemetry_config: Telemetry config override (optional)
123
+ excluded_tools: Tools to exclude
124
+ custom_tools: Custom tool definitions
125
+ timeout: Workflow timeout in seconds
128
126
  """
127
+
129
128
  self.user_id = kwargs.pop("user_id", None)
130
- super().__init__(timeout=timeout, *args, **kwargs) # noqa: B026
131
- # Configure default logging if not already configured
132
- self._configure_default_logging(debug=debug)
133
129
 
134
- # Setup global tracing first if enabled
135
- if enable_tracing:
130
+ base_config = config
131
+
132
+ self.config = DroidRunConfig(
133
+ agent=agent_config or base_config.agent,
134
+ device=device_config or base_config.device,
135
+ tools=tools_config or base_config.tools,
136
+ logging=logging_config or base_config.logging,
137
+ tracing=tracing_config or base_config.tracing,
138
+ telemetry=telemetry_config or base_config.telemetry,
139
+ llm_profiles=base_config.llm_profiles,
140
+ )
141
+
142
+ super().__init__(*args, timeout=timeout, **kwargs)
143
+
144
+ self._configure_default_logging(debug=self.config.logging.debug)
145
+
146
+ if self.config.tracing.enabled:
136
147
  try:
137
148
  handler = arize_phoenix_callback_handler()
138
149
  llama_index.core.global_handler = handler
@@ -144,148 +155,110 @@ class DroidAgent(Workflow):
144
155
  " • If installed via tool: `uv tool install droidrun[phoenix]`"
145
156
  " • If installed via pip: `uv pip install droidrun[phoenix]`\n"
146
157
  )
147
- enable_tracing = False
148
158
 
149
159
  self.goal = goal
150
- self.max_steps = max_steps
151
- self.max_codeact_steps = max_steps
152
160
  self.timeout = timeout
153
- self.reasoning = reasoning
154
- self.debug = debug
155
161
  self.custom_tools = custom_tools or {}
156
162
 
157
- # ====================================================================
158
- # Handle LLM parameter - support both dict and single LLM
159
- # ====================================================================
160
163
  if isinstance(llms, dict):
161
164
  self.manager_llm = llms.get('manager')
162
165
  self.executor_llm = llms.get('executor')
163
166
  self.codeact_llm = llms.get('codeact')
164
167
  self.text_manipulator_llm = llms.get('text_manipulator')
165
168
  self.app_opener_llm = llms.get('app_opener')
166
-
167
- # Validate required LLMs are present
168
- if reasoning and (not self.manager_llm or not self.executor_llm):
169
+
170
+ if self.config.agent.reasoning and (not self.manager_llm or not self.executor_llm):
169
171
  raise ValueError("When reasoning=True, 'manager' and 'executor' LLMs must be provided in llms dict")
170
172
  if not self.codeact_llm:
171
173
  raise ValueError("'codeact' LLM must be provided in llms dict")
172
-
174
+
173
175
  logger.info("📚 Using agent-specific LLMs from dictionary")
174
176
  else:
175
- # single LLM for all agents
176
- logger.info("📚 Using single LLM for all agents (backward compatibility mode)")
177
+ logger.info("📚 Using single LLM for all agents")
177
178
  self.manager_llm = llms
178
179
  self.executor_llm = llms
179
180
  self.codeact_llm = llms
180
181
  self.text_manipulator_llm = llms
181
182
  self.app_opener_llm = llms
182
183
 
183
- # ====================================================================
184
- # Handle vision parameter - support VisionConfig, dict, or bool
185
- # ====================================================================
186
- if isinstance(vision, VisionConfig):
187
- self.vision_config = vision
188
- elif isinstance(vision, dict):
189
- self.vision_config = VisionConfig.from_dict(vision)
190
- elif isinstance(vision, bool):
191
- # Backward compatibility: single bool for all agents
192
- logger.info(f"👁️ Using vision={vision} for all agents (backward compatibility mode)")
193
- self.vision_config = VisionConfig(manager=vision, executor=vision, codeact=vision)
194
- else:
195
- raise TypeError(f"vision must be VisionConfig, dict, or bool, got {type(vision)}")
196
-
197
- # Store individual vision flags for easy access
198
- self.manager_vision = self.vision_config.manager
199
- self.executor_vision = self.vision_config.executor
200
- self.codeact_vision = self.vision_config.codeact
201
-
202
184
 
203
185
  self.event_counter = 0
204
- # Handle backward compatibility: bool -> str mapping
205
- if isinstance(save_trajectories, bool):
206
- self.save_trajectories = "step" if save_trajectories else "none"
207
- else:
208
- # Validate string values
209
- valid_values = ["none", "step", "action"]
210
- if save_trajectories not in valid_values:
211
- logger.warning(
212
- f"Invalid save_trajectories value: {save_trajectories}. Using 'none' instead."
213
- )
214
- self.save_trajectories = "none"
215
- else:
216
- self.save_trajectories = save_trajectories
217
-
218
186
  self.trajectory = Trajectory(goal=goal)
219
187
  self.task_manager = TaskManager()
220
188
  self.task_iter = None
221
-
222
- self.cim = ContextInjectionManager(personas=personas)
223
189
  self.current_episodic_memory = None
224
190
 
191
+ open_app_tool = {
192
+ "arguments": ["text"],
193
+ "description": "Open an app by name. Usage example: {\"action\": \"open_app\", \"text\": \"the name of app\"}",
194
+ "function": open_app,
195
+ }
196
+ # Merge with user-provided custom tools
197
+ self.custom_tools = {**self.custom_tools, "open_app": open_app_tool}
198
+
225
199
  logger.info("🤖 Initializing DroidAgent...")
226
- logger.info(f"💾 Trajectory saving level: {self.save_trajectories}")
200
+ logger.info(f"💾 Trajectory saving: {self.config.logging.save_trajectory}")
227
201
 
228
202
  self.tools_instance = tools
203
+ self.tools_instance.save_trajectories = self.config.logging.save_trajectory
204
+ # Set app_opener_llm on tools instance for open_app custom tool
205
+ self.tools_instance.app_opener_llm = self.app_opener_llm
229
206
 
230
- self.tools_instance.save_trajectories = self.save_trajectories
231
-
232
- # Create shared state instance for Manager/Executor workflows
233
207
  self.shared_state = DroidAgentState(
234
208
  instruction=goal,
235
209
  err_to_manager_thresh=2
236
210
  )
237
211
 
238
- if self.reasoning:
212
+ if self.config.agent.reasoning:
239
213
  logger.info("📝 Initializing Manager and Executor Agents...")
240
214
  self.manager_agent = ManagerAgent(
241
215
  llm=self.manager_llm,
242
- vision=self.manager_vision,
243
- personas=personas,
244
216
  tools_instance=tools,
245
217
  shared_state=self.shared_state,
218
+ agent_config=self.config.agent,
246
219
  custom_tools=self.custom_tools,
247
220
  timeout=timeout,
248
- debug=debug,
249
221
  )
250
222
  self.executor_agent = ExecutorAgent(
251
223
  llm=self.executor_llm,
252
- vision=self.executor_vision,
253
224
  tools_instance=tools,
254
225
  shared_state=self.shared_state,
255
- persona=None, # Need to figure this out
226
+ agent_config=self.config.agent,
256
227
  custom_tools=self.custom_tools,
257
228
  timeout=timeout,
258
- debug=debug,
259
229
  )
260
- self.max_codeact_steps = 5
261
-
262
-
263
- # Keep planner_agent for backward compatibility (can be removed later)
264
230
  self.planner_agent = None
265
-
266
231
  else:
267
- logger.debug("🚫 Reasoning disabled - will execute tasks directly with CodeActAgent")
232
+ logger.debug("🚫 Reasoning disabled - executing directly with CodeActAgent")
268
233
  self.manager_agent = None
269
234
  self.executor_agent = None
270
235
  self.planner_agent = None
271
236
 
272
- # Get tool names from ATOMIC_ACTION_SIGNATURES for telemetry
273
237
  atomic_tools = list(ATOMIC_ACTION_SIGNATURES.keys())
274
238
 
275
239
  capture(
276
- # TODO: do proper telemetry instead of this ductaped crap
277
240
  DroidAgentInitEvent(
278
241
  goal=goal,
279
- llm=self.llm.class_name(),
242
+ llms={
243
+ "manager": self.manager_llm.class_name() if self.manager_llm else "None",
244
+ "executor": self.executor_llm.class_name() if self.executor_llm else "None",
245
+ "codeact": self.codeact_llm.class_name() if self.codeact_llm else "None",
246
+ "text_manipulator": self.text_manipulator_llm.class_name() if
247
+ self.text_manipulator_llm else "None",
248
+ "app_opener": self.app_opener_llm.class_name() if self.app_opener_llm else "None",
249
+ },
280
250
  tools=",".join(atomic_tools + ["remember", "complete"]),
281
- personas=",".join([p.name for p in personas]),
282
- max_steps=max_steps,
251
+ max_steps=self.config.agent.max_steps,
283
252
  timeout=timeout,
284
- vision=self.vision,
285
- reasoning=reasoning,
286
- enable_tracing=enable_tracing,
287
- debug=debug,
288
- save_trajectories=save_trajectories,
253
+ vision={
254
+ "manager": self.config.agent.manager.vision,
255
+ "executor": self.config.agent.executor.vision,
256
+ "codeact": self.config.agent.codeact.vision
257
+ },
258
+ reasoning=self.config.agent.reasoning,
259
+ enable_tracing=self.config.tracing.enabled,
260
+ debug=self.config.logging.debug,
261
+ save_trajectories=self.config.logging.save_trajectory,
289
262
  ),
290
263
  self.user_id,
291
264
  )
@@ -339,19 +312,17 @@ class DroidAgent(Workflow):
339
312
  Tuple of (success, reason)
340
313
  """
341
314
  task: Task = ev.task
342
- persona = self.cim.get_persona(task.agent_type)
343
315
 
344
316
  logger.info(f"🔧 Executing task: {task.description}")
345
317
 
346
318
  try:
347
319
  codeact_agent = CodeActAgent(
348
320
  llm=self.codeact_llm,
349
- persona=persona,
350
- vision=self.codeact_vision,
351
- max_steps=self.max_codeact_steps,
321
+ agent_config=self.config.agent,
352
322
  tools_instance=self.tools_instance,
353
323
  custom_tools=self.custom_tools,
354
- debug=self.debug,
324
+ debug=self.config.logging.debug,
325
+ shared_state=self.shared_state,
355
326
  timeout=self.timeout,
356
327
  )
357
328
 
@@ -382,11 +353,10 @@ class DroidAgent(Workflow):
382
353
 
383
354
  except Exception as e:
384
355
  logger.error(f"Error during task execution: {e}")
385
- if self.debug:
356
+ if self.config.logging.debug:
386
357
  import traceback
387
-
388
358
  logger.error(traceback.format_exc())
389
- return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=[])
359
+ return CodeActResultEvent(success=False, reason=f"Error: {str(e)}", task=task, steps=0)
390
360
 
391
361
  @step
392
362
  async def handle_codeact_execute(
@@ -404,9 +374,8 @@ class DroidAgent(Workflow):
404
374
  )
405
375
  except Exception as e:
406
376
  logger.error(f"❌ Error during DroidAgent execution: {e}")
407
- if self.debug:
377
+ if self.config.logging.debug:
408
378
  import traceback
409
-
410
379
  logger.error(traceback.format_exc())
411
380
  tasks = self.task_manager.get_task_history()
412
381
  return FinalizeEvent(
@@ -434,17 +403,15 @@ class DroidAgent(Workflow):
434
403
  self.step_counter = 0
435
404
  self.retry_counter = 0
436
405
 
437
- if not self.reasoning:
406
+ if not self.config.agent.reasoning:
438
407
  logger.info(f"🔄 Direct execution mode - executing goal: {self.goal}")
439
408
  task = Task(
440
409
  description=self.goal,
441
410
  status=self.task_manager.STATUS_PENDING,
442
411
  agent_type="Default",
443
412
  )
444
-
445
413
  return CodeActExecuteEvent(task=task)
446
414
 
447
- # Reasoning mode - state already initialized in __init__, start with Manager
448
415
  logger.info("🧠 Reasoning mode - initializing Manager/Executor workflow")
449
416
  return ManagerInputEvent()
450
417
 
@@ -464,17 +431,15 @@ class DroidAgent(Workflow):
464
431
  Pre-flight checks for termination before running manager.
465
432
  The Manager analyzes current state and creates a plan with subgoals.
466
433
  """
467
- # Check if we've reached the maximum number of steps
468
- if self.step_counter >= self.max_steps:
469
- logger.warning(f"⚠️ Reached maximum steps ({self.max_steps})")
434
+ if self.step_counter >= self.config.agent.max_steps:
435
+ logger.warning(f"⚠️ Reached maximum steps ({self.config.agent.max_steps})")
470
436
  return self._create_finalize_event(
471
437
  success=False,
472
- reason=f"Reached maximum steps ({self.max_steps})",
473
- output=f"Reached maximum steps ({self.max_steps})"
438
+ reason=f"Reached maximum steps ({self.config.agent.max_steps})",
439
+ output=f"Reached maximum steps ({self.config.agent.max_steps})"
474
440
  )
475
441
 
476
- # Continue with Manager execution
477
- logger.info(f"📋 Running Manager for planning... (step {self.step_counter}/{self.max_steps})")
442
+ logger.info(f"📋 Running Manager for planning... (step {self.step_counter}/{self.config.agent.max_steps})")
478
443
 
479
444
  # Run Manager workflow
480
445
  handler = self.manager_agent.run()
@@ -489,7 +454,6 @@ class DroidAgent(Workflow):
489
454
  return ManagerPlanEvent(
490
455
  plan=result["plan"],
491
456
  current_subgoal=result["current_subgoal"],
492
- completed_plan=result["completed_plan"],
493
457
  thought=result["thought"],
494
458
  manager_answer=result.get("manager_answer", "")
495
459
  )
@@ -551,7 +515,6 @@ class DroidAgent(Workflow):
551
515
  self.shared_state.last_summary = result["summary"]
552
516
  self.shared_state.last_action_thought = result.get("thought", "")
553
517
  self.shared_state.action_pool.append(result["action_json"])
554
- self.shared_state.progress_status = self.shared_state.completed_plan
555
518
 
556
519
  return ExecutorResultEvent(
557
520
  action=result["action"],
@@ -583,9 +546,8 @@ class DroidAgent(Workflow):
583
546
  self.shared_state.error_flag_plan = True
584
547
 
585
548
  self.step_counter += 1
586
- logger.info(f"🔄 Step {self.step_counter}/{self.max_steps} complete, looping to Manager")
549
+ logger.info(f"🔄 Step {self.step_counter}/{self.config.agent.max_steps} complete, looping to Manager")
587
550
 
588
- # Always loop back to Manager (it will check max steps in pre-flight)
589
551
  return ManagerInputEvent()
590
552
 
591
553
  # ========================================================================
@@ -608,13 +570,12 @@ class DroidAgent(Workflow):
608
570
 
609
571
  result = {
610
572
  "success": ev.success,
611
- # deprecated. use output instead.
612
573
  "reason": ev.reason,
613
574
  "output": ev.output,
614
575
  "steps": ev.steps,
615
576
  }
616
577
 
617
- if self.trajectory and self.save_trajectories != "none":
578
+ if self.trajectory and self.config.logging.save_trajectory != "none":
618
579
  self.trajectory.save_trajectory()
619
580
 
620
581
  return StopEvent(result)
@@ -1,3 +1,15 @@
1
+ """
2
+ DroidAgent coordination events.
3
+
4
+ These events are used for WORKFLOW COORDINATION between DroidAgent and its child agents.
5
+ They carry minimal data needed for routing workflow steps.
6
+
7
+ For internal events with full debugging metadata, see:
8
+ - manager/events.py (ManagerInternalPlanEvent)
9
+ - executor/events.py (ExecutorInternalActionEvent, ExecutorInternalResultEvent)
10
+ - codeact/events.py (Task*, EpisodicMemoryEvent)
11
+ """
12
+
1
13
  from typing import Dict, List
2
14
 
3
15
  from llama_index.core.workflow import Event
@@ -12,6 +24,7 @@ class CodeActExecuteEvent(Event):
12
24
  class CodeActResultEvent(Event):
13
25
  success: bool
14
26
  reason: str
27
+ task: Task
15
28
  steps: int
16
29
 
17
30
 
@@ -29,11 +42,6 @@ class TaskRunnerEvent(Event):
29
42
  pass
30
43
 
31
44
 
32
-
33
- # ============================================================================
34
- # DroidAgentState - State model for llama-index Context
35
- # ============================================================================
36
-
37
45
  class DroidAgentState(BaseModel):
38
46
  """
39
47
  State model for DroidAgent workflow - shared across parent and child workflows.
@@ -42,21 +50,36 @@ class DroidAgentState(BaseModel):
42
50
  # Task context
43
51
  instruction: str = ""
44
52
 
45
- # UI State
46
- ui_elements_list_before: str = ""
47
- ui_elements_list_after: str = ""
53
+ # Formatted device state for prompts (complete text)
54
+ formatted_device_state: str = ""
55
+
56
+ # Focused element text
48
57
  focused_text: str = ""
49
- device_state_text: str = ""
58
+
59
+ # Raw device state components (for access to raw data)
60
+ a11y_tree: List[Dict] = Field(default_factory=list)
61
+ phone_state: Dict = Field(default_factory=dict)
62
+
63
+ # Derived fields (extracted from phone_state)
64
+ current_package_name: str = ""
65
+ current_app_name: str = ""
66
+
67
+ # Previous device state (for before/after comparison in Manager)
68
+ previous_formatted_device_state: str = ""
69
+
70
+ # Screen dimensions and screenshot
50
71
  width: int = 0
51
72
  height: int = 0
52
73
  screenshot: str | bytes | None = None
74
+
75
+ # Text manipulation flag
53
76
  has_text_to_modify: bool = False
54
77
 
55
78
  # Action tracking
56
79
  action_pool: List[Dict] = Field(default_factory=list)
57
80
  action_history: List[Dict] = Field(default_factory=list)
58
81
  summary_history: List[str] = Field(default_factory=list)
59
- action_outcomes: List[str] = Field(default_factory=list) # "A", "B", "C"
82
+ action_outcomes: List[bool] = Field(default_factory=list) # "A", "B", "C"
60
83
  error_descriptions: List[str] = Field(default_factory=list)
61
84
 
62
85
  # Last action info
@@ -70,7 +93,6 @@ class DroidAgentState(BaseModel):
70
93
 
71
94
  # Planning
72
95
  plan: str = ""
73
- completed_plan: str = ""
74
96
  current_subgoal: str = ""
75
97
  finish_thought: str = ""
76
98
  progress_status: str = ""
@@ -94,10 +116,14 @@ class ManagerInputEvent(Event):
94
116
 
95
117
 
96
118
  class ManagerPlanEvent(Event):
97
- """Manager has created a plan"""
119
+ """
120
+ Coordination event from ManagerAgent to DroidAgent.
121
+
122
+ Used for workflow step routing only (NOT streamed to frontend).
123
+ For internal events with memory_update metadata, see ManagerInternalPlanEvent.
124
+ """
98
125
  plan: str
99
126
  current_subgoal: str
100
- completed_plan: str
101
127
  thought: str
102
128
  manager_answer: str = ""
103
129
 
@@ -108,7 +134,12 @@ class ExecutorInputEvent(Event):
108
134
 
109
135
 
110
136
  class ExecutorResultEvent(Event):
111
- """Executor action result"""
137
+ """
138
+ Coordination event from ExecutorAgent to DroidAgent.
139
+
140
+ Used for workflow step routing only (NOT streamed to frontend).
141
+ For internal events with thought/action_json metadata, see ExecutorInternalResultEvent.
142
+ """
112
143
  action: Dict
113
144
  outcome: bool
114
145
  error: str
@@ -2,12 +2,14 @@
2
2
  Executor Agent - Action execution workflow.
3
3
  """
4
4
 
5
- from droidrun.agent.executor.events import ExecutorActionEvent, ExecutorResultEvent
5
+ from droidrun.agent.droid.events import ExecutorInputEvent, ExecutorResultEvent
6
+ from droidrun.agent.executor.events import ExecutorInternalActionEvent, ExecutorInternalResultEvent
6
7
  from droidrun.agent.executor.executor_agent import ExecutorAgent
7
8
 
8
9
  __all__ = [
9
10
  "ExecutorAgent",
10
- "ExecutorThinkingEvent",
11
- "ExecutorActionEvent",
12
- "ExecutorResultEvent"
11
+ "ExecutorInputEvent",
12
+ "ExecutorResultEvent",
13
+ "ExecutorInternalActionEvent",
14
+ "ExecutorInternalResultEvent"
13
15
  ]
@@ -1,5 +1,11 @@
1
1
  """
2
2
  Events for the ExecutorAgent workflow.
3
+
4
+ These are INTERNAL events used within ExecutorAgent for:
5
+ - Streaming to frontend/logging
6
+ - Carrying full debug metadata (thought process, raw action JSON)
7
+
8
+ For workflow coordination with DroidAgent, see droid/events.py
3
9
  """
4
10
 
5
11
  from typing import Dict
@@ -7,18 +13,32 @@ from typing import Dict
7
13
  from llama_index.core.workflow.events import Event
8
14
 
9
15
 
10
- class ExecutorActionEvent(Event):
11
- """Executor has selected an action to execute"""
12
- action_json: str
13
- thought: str
14
- description: str
16
+ class ExecutorInternalActionEvent(Event):
17
+ """
18
+ Internal Executor action selection event with thought process.
19
+
20
+ This event is streamed to frontend/logging but NOT used for
21
+ workflow coordination between ExecutorAgent and DroidAgent.
22
+
23
+ For workflow coordination, see ExecutorInputEvent in droid/events.py
24
+ """
25
+ action_json: str # Raw JSON string of the action
26
+ thought: str # Debugging metadata: LLM's reasoning process
27
+ description: str # Human-readable action description
28
+
29
+
30
+ class ExecutorInternalResultEvent(Event):
31
+ """
32
+ Internal Executor result event with full debug information.
15
33
 
34
+ This event is streamed to frontend/logging but NOT used for
35
+ workflow coordination between ExecutorAgent and DroidAgent.
16
36
 
17
- class ExecutorResultEvent(Event):
18
- """Executor action result"""
37
+ For workflow coordination, see ExecutorResultEvent in droid/events.py
38
+ """
19
39
  action: Dict
20
40
  outcome: bool
21
41
  error: str
22
42
  summary: str
23
- thought: str = ""
24
- action_json: str = ""
43
+ thought: str = "" # Debugging metadata: LLM's thought process
44
+ action_json: str = "" # Debugging metadata: Raw action JSON