droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. droidrun/agent/codeact/__init__.py +1 -4
  2. droidrun/agent/codeact/codeact_agent.py +95 -86
  3. droidrun/agent/codeact/events.py +1 -2
  4. droidrun/agent/context/__init__.py +5 -9
  5. droidrun/agent/context/episodic_memory.py +1 -3
  6. droidrun/agent/context/task_manager.py +8 -2
  7. droidrun/agent/droid/droid_agent.py +102 -141
  8. droidrun/agent/droid/events.py +45 -14
  9. droidrun/agent/executor/__init__.py +6 -4
  10. droidrun/agent/executor/events.py +29 -9
  11. droidrun/agent/executor/executor_agent.py +86 -28
  12. droidrun/agent/executor/prompts.py +8 -2
  13. droidrun/agent/manager/__init__.py +6 -7
  14. droidrun/agent/manager/events.py +16 -4
  15. droidrun/agent/manager/manager_agent.py +130 -69
  16. droidrun/agent/manager/prompts.py +1 -159
  17. droidrun/agent/utils/chat_utils.py +64 -2
  18. droidrun/agent/utils/device_state_formatter.py +54 -26
  19. droidrun/agent/utils/executer.py +66 -80
  20. droidrun/agent/utils/inference.py +11 -10
  21. droidrun/agent/utils/tools.py +58 -6
  22. droidrun/agent/utils/trajectory.py +18 -12
  23. droidrun/cli/logs.py +118 -56
  24. droidrun/cli/main.py +154 -136
  25. droidrun/config_manager/__init__.py +9 -7
  26. droidrun/config_manager/app_card_loader.py +148 -0
  27. droidrun/config_manager/config_manager.py +200 -102
  28. droidrun/config_manager/path_resolver.py +104 -0
  29. droidrun/config_manager/prompt_loader.py +75 -0
  30. droidrun/macro/__init__.py +1 -1
  31. droidrun/macro/cli.py +23 -18
  32. droidrun/telemetry/__init__.py +2 -2
  33. droidrun/telemetry/events.py +3 -3
  34. droidrun/telemetry/tracker.py +1 -1
  35. droidrun/tools/adb.py +1 -1
  36. droidrun/tools/ios.py +3 -2
  37. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/METADATA +9 -1
  38. droidrun-0.3.10.dev4.dist-info/RECORD +61 -0
  39. droidrun/agent/codeact/prompts.py +0 -26
  40. droidrun/agent/context/agent_persona.py +0 -16
  41. droidrun/agent/context/context_injection_manager.py +0 -66
  42. droidrun/agent/context/personas/__init__.py +0 -11
  43. droidrun/agent/context/personas/app_starter.py +0 -44
  44. droidrun/agent/context/personas/big_agent.py +0 -96
  45. droidrun/agent/context/personas/default.py +0 -95
  46. droidrun/agent/context/personas/ui_expert.py +0 -108
  47. droidrun/agent/planner/__init__.py +0 -13
  48. droidrun/agent/planner/events.py +0 -21
  49. droidrun/agent/planner/planner_agent.py +0 -311
  50. droidrun/agent/planner/prompts.py +0 -124
  51. droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
  52. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/WHEEL +0 -0
  53. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/entry_points.txt +0 -0
  54. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,5 @@
1
1
  from droidrun.agent.codeact.codeact_agent import CodeActAgent
2
- from droidrun.agent.codeact.prompts import DEFAULT_CODE_ACT_USER_PROMPT, DEFAULT_NO_THOUGHTS_PROMPT
3
2
 
4
3
  __all__ = [
5
- "CodeActAgent",
6
- "DEFAULT_CODE_ACT_USER_PROMPT",
7
- "DEFAULT_NO_THOUGHTS_PROMPT"
4
+ "CodeActAgent"
8
5
  ]
@@ -3,12 +3,11 @@ import json
3
3
  import logging
4
4
  import re
5
5
  import time
6
- from typing import List, Union
6
+ from typing import List, Union, Optional, TYPE_CHECKING
7
7
 
8
8
  from llama_index.core.base.llms.types import ChatMessage, ChatResponse
9
9
  from llama_index.core.llms.llm import LLM
10
10
  from llama_index.core.memory import Memory
11
- from llama_index.core.prompts import PromptTemplate
12
11
  from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
13
12
 
14
13
  from droidrun.agent.codeact.events import (
@@ -19,20 +18,26 @@ from droidrun.agent.codeact.events import (
19
18
  TaskInputEvent,
20
19
  TaskThinkingEvent,
21
20
  )
22
- from droidrun.agent.codeact.prompts import (
23
- DEFAULT_CODE_ACT_USER_PROMPT,
24
- DEFAULT_NO_THOUGHTS_PROMPT,
25
- )
26
21
  from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
27
22
  from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
28
- from droidrun.agent.context.agent_persona import AgentPersona
29
23
  from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
30
24
  from droidrun.agent.usage import get_usage_from_response
31
25
  from droidrun.agent.utils import chat_utils
32
- from droidrun.agent.utils.executer import SimpleCodeExecutor
33
- from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, get_atomic_tool_descriptions, build_custom_tool_descriptions
26
+ from droidrun.agent.utils.executer import SimpleCodeExecutor, ExecuterState
27
+ from droidrun.agent.utils.device_state_formatter import format_device_state
28
+
29
+ from droidrun.agent.utils.tools import (
30
+ ATOMIC_ACTION_SIGNATURES,
31
+ build_custom_tool_descriptions,
32
+ get_atomic_tool_descriptions,
33
+ )
34
+ from droidrun.config_manager.config_manager import AgentConfig
35
+ from droidrun.config_manager.prompt_loader import PromptLoader
34
36
  from droidrun.tools import Tools
35
37
 
38
+ if TYPE_CHECKING:
39
+ from droidrun.agent.droid.droid_agent import DroidAgentState
40
+
36
41
  logger = logging.getLogger("droidrun")
37
42
 
38
43
 
@@ -46,81 +51,69 @@ class CodeActAgent(Workflow):
46
51
  def __init__(
47
52
  self,
48
53
  llm: LLM,
49
- persona: AgentPersona,
50
- vision: bool,
54
+ agent_config: AgentConfig,
51
55
  tools_instance: "Tools",
52
- max_steps: int = 5,
53
56
  custom_tools: dict = None,
54
57
  debug: bool = False,
58
+ shared_state: Optional["DroidAgentState"] = None,
55
59
  *args,
56
60
  **kwargs,
57
61
  ):
58
- # assert instead of if
59
62
  assert llm, "llm must be provided."
60
63
  super().__init__(*args, **kwargs)
61
64
 
62
65
  self.llm = llm
63
- self.max_steps = max_steps
64
-
65
- self.user_prompt = persona.user_prompt
66
- self.no_thoughts_prompt = None
67
-
68
- self.vision = vision
66
+ self.agent_config = agent_config
67
+ self.config = agent_config.codeact # Shortcut to codeact config
68
+ self.max_steps = agent_config.max_steps
69
+ self.vision = agent_config.codeact.vision
70
+ self.debug = debug
71
+ self.tools = tools_instance
72
+ self.shared_state = shared_state
69
73
 
70
74
  self.chat_memory = None
71
- self.episodic_memory = EpisodicMemory(persona=persona)
75
+ self.episodic_memory = EpisodicMemory()
72
76
  self.remembered_info = None
73
77
 
74
78
  self.goal = None
75
79
  self.steps_counter = 0
76
80
  self.code_exec_counter = 0
77
- self.debug = debug
78
-
79
- self.tools = tools_instance
80
81
 
81
- # Merge custom_tools with ATOMIC_ACTION_SIGNATURES
82
- # Custom tools are treated the same as atomic actions by CodeAct
82
+ # Build tool list
83
83
  merged_signatures = {**ATOMIC_ACTION_SIGNATURES, **(custom_tools or {})}
84
84
 
85
- # Build tool_list from merged signatures
86
85
  self.tool_list = {}
87
86
  for action_name, signature in merged_signatures.items():
88
87
  func = signature["function"]
89
- # Create bound function (curry tools_instance as first argument)
90
- # Handle both sync and async functions
91
88
  if asyncio.iscoroutinefunction(func):
92
- async def make_async_bound(f, ti):
89
+ # Create async bound function with proper closure
90
+ def make_bound(f, ti):
93
91
  async def bound_func(*args, **kwargs):
94
92
  return await f(ti, *args, **kwargs)
95
93
  return bound_func
96
- self.tool_list[action_name] = asyncio.run(make_async_bound(func, tools_instance))
94
+ self.tool_list[action_name] = make_bound(func, tools_instance)
97
95
  else:
98
- self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance: f(ti, *args)
96
+ self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
99
97
 
100
- # Add non-atomic tools (remember, complete) from tools_instance
101
98
  self.tool_list["remember"] = tools_instance.remember
102
99
  self.tool_list["complete"] = tools_instance.complete
103
100
 
104
- # Get tool descriptions from ATOMIC_ACTION_SIGNATURES and custom_tools
101
+ # Build tool descriptions
105
102
  self.tool_descriptions = get_atomic_tool_descriptions()
106
-
107
- # Add custom tool descriptions if provided
108
103
  custom_descriptions = build_custom_tool_descriptions(custom_tools or {})
109
104
  if custom_descriptions:
110
105
  self.tool_descriptions += "\n" + custom_descriptions
111
-
112
- # Add descriptions for remember/complete
113
106
  self.tool_descriptions += "\n- remember(information: str): Remember information for later use"
114
107
  self.tool_descriptions += "\n- complete(success: bool, reason: str): Mark task as complete"
115
108
 
116
- self.system_prompt_content = persona.system_prompt.format(
117
- tool_descriptions=self.tool_descriptions
118
- )
119
- self.system_prompt = ChatMessage(
120
- role="system", content=self.system_prompt_content
109
+ # Load prompts from config
110
+ system_prompt_text = PromptLoader.load_prompt(
111
+ agent_config.get_codeact_system_prompt_path(),
112
+ {"tool_descriptions": self.tool_descriptions}
121
113
  )
114
+ self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
122
115
 
123
- self.required_context = persona.required_context
116
+ self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
124
117
 
125
118
  self.executor = SimpleCodeExecutor(
126
119
  loop=asyncio.get_event_loop(),
@@ -150,16 +143,21 @@ class CodeActAgent(Workflow):
150
143
 
151
144
  logger.debug(" - Adding goal to memory.")
152
145
  goal = user_input
153
- self.user_message = ChatMessage(
154
- role="user",
155
- content=PromptTemplate(
156
- self.user_prompt or DEFAULT_CODE_ACT_USER_PROMPT
157
- ).format(goal=goal),
158
- )
159
- self.no_thoughts_prompt = ChatMessage(
160
- role="user",
161
- content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal),
146
+
147
+ # Format user prompt with goal
148
+ user_prompt_text = PromptLoader.load_prompt(
149
+ self.agent_config.get_codeact_user_prompt_path(),
150
+ {"goal": goal}
162
151
  )
152
+ self.user_message = ChatMessage(role="user", content=user_prompt_text)
153
+
154
+ # No thoughts prompt
155
+ no_thoughts_text = f"""Your previous response provided code without explaining your reasoning first. Remember to always describe your thought process and plan *before* providing the code block.
156
+
157
+ The code you provided will be executed below.
158
+
159
+ Now, describe the next step you will take to address the original goal: {goal}"""
160
+ self.no_thoughts_prompt = ChatMessage(role="user", content=no_thoughts_text)
163
161
 
164
162
 
165
163
  await self.chat_memory.aput(self.user_message)
@@ -194,38 +192,46 @@ class CodeActAgent(Workflow):
194
192
  await ctx.store.set("remembered_info", self.remembered_info)
195
193
  chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
196
194
 
197
- for context in self.required_context:
198
- if context == "screenshot":
199
- # if vision is disabled, screenshot should save to trajectory
200
- screenshot = (self.tools.take_screenshot())[1]
201
- ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
195
+ # Always capture screenshot for trajectory
196
+ screenshot = (self.tools.take_screenshot())[1]
197
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
198
+ await ctx.store.set("screenshot", screenshot)
202
199
 
203
- await ctx.store.set("screenshot", screenshot)
204
- if model == "DeepSeek":
205
- logger.warning(
206
- "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
207
- )
208
- elif self.vision: # if vision is enabled, add screenshot to chat history
209
- chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
210
-
211
- if context == "ui_state":
212
- try:
213
- state = self.tools.get_state()
214
- await ctx.store.set("ui_state", state["a11y_tree"])
215
- ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
216
- chat_history = await chat_utils.add_ui_text_block(
217
- state["a11y_tree"], chat_history
218
- )
219
- chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
220
- except Exception:
221
- logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
222
-
223
-
224
- if context == "packages":
225
- chat_history = await chat_utils.add_packages_block(
226
- self.tools.list_packages(include_system_apps=True),
227
- chat_history,
228
- )
200
+ # Add screenshot to chat only if vision enabled
201
+ if self.vision and model != "DeepSeek":
202
+ chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
203
+
204
+ # Get and format device state using unified formatter
205
+ try:
206
+
207
+ # Get raw state from device
208
+ raw_state = self.tools.get_state()
209
+
210
+ # Format using unified function (returns 4 values)
211
+ formatted_text, focused_text, a11y_tree, phone_state = format_device_state(raw_state)
212
+
213
+ # Update shared_state if available
214
+ if self.shared_state is not None:
215
+ self.shared_state.formatted_device_state = formatted_text
216
+ self.shared_state.focused_text = focused_text
217
+ self.shared_state.a11y_tree = a11y_tree
218
+ self.shared_state.phone_state = phone_state
219
+
220
+ # Extract and store package/app name
221
+ self.shared_state.current_package_name = phone_state.get('packageName', 'Unknown')
222
+ self.shared_state.current_app_name = phone_state.get('currentApp', 'Unknown')
223
+
224
+ # Stream formatted state for trajectory
225
+ ctx.write_event_to_stream(RecordUIStateEvent(ui_state=a11y_tree))
226
+
227
+ # Add device state to chat using new chat_utils function
228
+ # This injects into LAST user message, doesn't create new message
229
+ chat_history = await chat_utils.add_device_state_block(formatted_text, chat_history)
230
+
231
+ except Exception as e:
232
+ logger.warning(f"⚠️ Error retrieving state from the connected device: {e}")
233
+ if self.debug:
234
+ logger.error("State retrieval error details:", exc_info=True)
229
235
 
230
236
  response = await self._get_llm_response(ctx, chat_history)
231
237
  if response is None:
@@ -286,8 +292,9 @@ class CodeActAgent(Workflow):
286
292
 
287
293
  try:
288
294
  self.code_exec_counter += 1
289
- result = await self.executor.execute(ctx, code)
295
+ result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
290
296
  logger.info(f"💡 Code execution successful. Result: {result['output']}")
297
+ await asyncio.sleep(self.agent_config.after_sleep_action)
291
298
  screenshots = result['screenshots']
292
299
  for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
293
300
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
@@ -487,9 +494,11 @@ class CodeActAgent(Workflow):
487
494
  logger.warning(f"Failed to capture final screenshot: {e}")
488
495
 
489
496
  try:
490
- (a11y_tree, phone_state) = self.tools.get_state()
497
+ state = self.tools.get_state()
498
+ a11y_tree = state.get("a11y_tree", "")
499
+ phone_state = state.get("phone_state", "")
491
500
  except Exception as e:
492
- logger.warning(f"Failed to capture final UI state: {e}")
501
+ raise Exception(f"Failed to capture final UI state: {e}") from e
493
502
 
494
503
  # Create final observation chat history and response
495
504
  final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
@@ -3,10 +3,9 @@ from typing import Optional
3
3
  from llama_index.core.llms import ChatMessage
4
4
  from llama_index.core.workflow import Event
5
5
 
6
+ from droidrun.agent.context.episodic_memory import EpisodicMemory
6
7
  from droidrun.agent.usage import UsageResult
7
8
 
8
- from ..context.episodic_memory import EpisodicMemory
9
-
10
9
 
11
10
  class TaskInputEvent(Event):
12
11
  input: list[ChatMessage]
@@ -1,19 +1,15 @@
1
1
  """
2
- Agent Context Module - Provides specialized agent personas and context injection management.
2
+ Agent Context Module - Provides episodic memory and task management.
3
3
 
4
4
  This module contains:
5
- - AgentPersona: Dataclass for defining specialized agent configurations
6
- - ContextInjectionManager: Manager for handling different agent personas and their contexts
5
+ - EpisodicMemory: Memory system for tracking agent steps
6
+ - TaskManager: Manages tasks and their execution
7
7
  """
8
8
 
9
- from .agent_persona import AgentPersona
10
- from .context_injection_manager import ContextInjectionManager
11
- from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
12
- from .task_manager import Task, TaskManager
9
+ from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
10
+ from droidrun.agent.context.task_manager import Task, TaskManager
13
11
 
14
12
  __all__ = [
15
- "AgentPersona",
16
- "ContextInjectionManager",
17
13
  "EpisodicMemory",
18
14
  "EpisodicMemoryStep",
19
15
  "TaskManager",
@@ -1,8 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
2
  from typing import List, Optional
3
3
 
4
- from droidrun.agent.context.agent_persona import AgentPersona
5
-
6
4
 
7
5
  @dataclass
8
6
  class EpisodicMemoryStep:
@@ -11,7 +9,7 @@ class EpisodicMemoryStep:
11
9
  timestamp: float
12
10
  screenshot: Optional[bytes]
13
11
 
12
+
14
13
  @dataclass
15
14
  class EpisodicMemory:
16
- persona: AgentPersona
17
15
  steps: List[EpisodicMemoryStep] = field(default_factory=list)
@@ -3,6 +3,8 @@ import os
3
3
  from dataclasses import dataclass
4
4
  from typing import Dict, List, Optional
5
5
 
6
+ from droidrun.config_manager.path_resolver import PathResolver
7
+
6
8
 
7
9
  @dataclass
8
10
  class Task:
@@ -36,7 +38,8 @@ class TaskManager:
36
38
  self.goal_completed = False
37
39
  self.message = None
38
40
  self.task_history = []
39
- self.file_path = os.path.join(os.path.dirname(__file__), "todo.txt")
41
+ # Save to working directory for user visibility
42
+ self.file_path = PathResolver.resolve("droidrun_tasks.txt", create_if_missing=True)
40
43
 
41
44
  def get_all_tasks(self) -> List[Task]:
42
45
  return self.tasks
@@ -71,8 +74,11 @@ class TaskManager:
71
74
 
72
75
 
73
76
  def save_to_file(self):
74
- """Saves the current task list to a Markdown file."""
77
+ """Saves the current task list to a text file."""
75
78
  try:
79
+ # Ensure parent directory exists
80
+ self.file_path.parent.mkdir(parents=True, exist_ok=True)
81
+
76
82
  with open(self.file_path, 'w', encoding='utf-8') as f:
77
83
  for i, task in enumerate(self.tasks, 1):
78
84
  f.write(f"Task {i}: {task.description}\n")