droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +95 -86
- droidrun/agent/codeact/events.py +1 -2
- droidrun/agent/context/__init__.py +5 -9
- droidrun/agent/context/episodic_memory.py +1 -3
- droidrun/agent/context/task_manager.py +8 -2
- droidrun/agent/droid/droid_agent.py +102 -141
- droidrun/agent/droid/events.py +45 -14
- droidrun/agent/executor/__init__.py +6 -4
- droidrun/agent/executor/events.py +29 -9
- droidrun/agent/executor/executor_agent.py +86 -28
- droidrun/agent/executor/prompts.py +8 -2
- droidrun/agent/manager/__init__.py +6 -7
- droidrun/agent/manager/events.py +16 -4
- droidrun/agent/manager/manager_agent.py +130 -69
- droidrun/agent/manager/prompts.py +1 -159
- droidrun/agent/utils/chat_utils.py +64 -2
- droidrun/agent/utils/device_state_formatter.py +54 -26
- droidrun/agent/utils/executer.py +66 -80
- droidrun/agent/utils/inference.py +11 -10
- droidrun/agent/utils/tools.py +58 -6
- droidrun/agent/utils/trajectory.py +18 -12
- droidrun/cli/logs.py +118 -56
- droidrun/cli/main.py +154 -136
- droidrun/config_manager/__init__.py +9 -7
- droidrun/config_manager/app_card_loader.py +148 -0
- droidrun/config_manager/config_manager.py +200 -102
- droidrun/config_manager/path_resolver.py +104 -0
- droidrun/config_manager/prompt_loader.py +75 -0
- droidrun/macro/__init__.py +1 -1
- droidrun/macro/cli.py +23 -18
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -3
- droidrun/telemetry/tracker.py +1 -1
- droidrun/tools/adb.py +1 -1
- droidrun/tools/ios.py +3 -2
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/METADATA +9 -1
- droidrun-0.3.10.dev4.dist-info/RECORD +61 -0
- droidrun/agent/codeact/prompts.py +0 -26
- droidrun/agent/context/agent_persona.py +0 -16
- droidrun/agent/context/context_injection_manager.py +0 -66
- droidrun/agent/context/personas/__init__.py +0 -11
- droidrun/agent/context/personas/app_starter.py +0 -44
- droidrun/agent/context/personas/big_agent.py +0 -96
- droidrun/agent/context/personas/default.py +0 -95
- droidrun/agent/context/personas/ui_expert.py +0 -108
- droidrun/agent/planner/__init__.py +0 -13
- droidrun/agent/planner/events.py +0 -21
- droidrun/agent/planner/planner_agent.py +0 -311
- droidrun/agent/planner/prompts.py +0 -124
- droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/WHEEL +0 -0
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,5 @@
|
|
1
1
|
from droidrun.agent.codeact.codeact_agent import CodeActAgent
|
2
|
-
from droidrun.agent.codeact.prompts import DEFAULT_CODE_ACT_USER_PROMPT, DEFAULT_NO_THOUGHTS_PROMPT
|
3
2
|
|
4
3
|
__all__ = [
|
5
|
-
"CodeActAgent"
|
6
|
-
"DEFAULT_CODE_ACT_USER_PROMPT",
|
7
|
-
"DEFAULT_NO_THOUGHTS_PROMPT"
|
4
|
+
"CodeActAgent"
|
8
5
|
]
|
@@ -3,12 +3,11 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import re
|
5
5
|
import time
|
6
|
-
from typing import List, Union
|
6
|
+
from typing import List, Union, Optional, TYPE_CHECKING
|
7
7
|
|
8
8
|
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
9
9
|
from llama_index.core.llms.llm import LLM
|
10
10
|
from llama_index.core.memory import Memory
|
11
|
-
from llama_index.core.prompts import PromptTemplate
|
12
11
|
from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
|
13
12
|
|
14
13
|
from droidrun.agent.codeact.events import (
|
@@ -19,20 +18,26 @@ from droidrun.agent.codeact.events import (
|
|
19
18
|
TaskInputEvent,
|
20
19
|
TaskThinkingEvent,
|
21
20
|
)
|
22
|
-
from droidrun.agent.codeact.prompts import (
|
23
|
-
DEFAULT_CODE_ACT_USER_PROMPT,
|
24
|
-
DEFAULT_NO_THOUGHTS_PROMPT,
|
25
|
-
)
|
26
21
|
from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
|
27
22
|
from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
|
28
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
29
23
|
from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
30
24
|
from droidrun.agent.usage import get_usage_from_response
|
31
25
|
from droidrun.agent.utils import chat_utils
|
32
|
-
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
33
|
-
from droidrun.agent.utils.
|
26
|
+
from droidrun.agent.utils.executer import SimpleCodeExecutor, ExecuterState
|
27
|
+
from droidrun.agent.utils.device_state_formatter import format_device_state
|
28
|
+
|
29
|
+
from droidrun.agent.utils.tools import (
|
30
|
+
ATOMIC_ACTION_SIGNATURES,
|
31
|
+
build_custom_tool_descriptions,
|
32
|
+
get_atomic_tool_descriptions,
|
33
|
+
)
|
34
|
+
from droidrun.config_manager.config_manager import AgentConfig
|
35
|
+
from droidrun.config_manager.prompt_loader import PromptLoader
|
34
36
|
from droidrun.tools import Tools
|
35
37
|
|
38
|
+
if TYPE_CHECKING:
|
39
|
+
from droidrun.agent.droid.droid_agent import DroidAgentState
|
40
|
+
|
36
41
|
logger = logging.getLogger("droidrun")
|
37
42
|
|
38
43
|
|
@@ -46,81 +51,69 @@ class CodeActAgent(Workflow):
|
|
46
51
|
def __init__(
|
47
52
|
self,
|
48
53
|
llm: LLM,
|
49
|
-
|
50
|
-
vision: bool,
|
54
|
+
agent_config: AgentConfig,
|
51
55
|
tools_instance: "Tools",
|
52
|
-
max_steps: int = 5,
|
53
56
|
custom_tools: dict = None,
|
54
57
|
debug: bool = False,
|
58
|
+
shared_state: Optional["DroidAgentState"] = None,
|
55
59
|
*args,
|
56
60
|
**kwargs,
|
57
61
|
):
|
58
|
-
# assert instead of if
|
59
62
|
assert llm, "llm must be provided."
|
60
63
|
super().__init__(*args, **kwargs)
|
61
64
|
|
62
65
|
self.llm = llm
|
63
|
-
self.
|
64
|
-
|
65
|
-
self.
|
66
|
-
self.
|
67
|
-
|
68
|
-
self.
|
66
|
+
self.agent_config = agent_config
|
67
|
+
self.config = agent_config.codeact # Shortcut to codeact config
|
68
|
+
self.max_steps = agent_config.max_steps
|
69
|
+
self.vision = agent_config.codeact.vision
|
70
|
+
self.debug = debug
|
71
|
+
self.tools = tools_instance
|
72
|
+
self.shared_state = shared_state
|
69
73
|
|
70
74
|
self.chat_memory = None
|
71
|
-
self.episodic_memory = EpisodicMemory(
|
75
|
+
self.episodic_memory = EpisodicMemory()
|
72
76
|
self.remembered_info = None
|
73
77
|
|
74
78
|
self.goal = None
|
75
79
|
self.steps_counter = 0
|
76
80
|
self.code_exec_counter = 0
|
77
|
-
self.debug = debug
|
78
|
-
|
79
|
-
self.tools = tools_instance
|
80
81
|
|
81
|
-
#
|
82
|
-
# Custom tools are treated the same as atomic actions by CodeAct
|
82
|
+
# Build tool list
|
83
83
|
merged_signatures = {**ATOMIC_ACTION_SIGNATURES, **(custom_tools or {})}
|
84
84
|
|
85
|
-
# Build tool_list from merged signatures
|
86
85
|
self.tool_list = {}
|
87
86
|
for action_name, signature in merged_signatures.items():
|
88
87
|
func = signature["function"]
|
89
|
-
# Create bound function (curry tools_instance as first argument)
|
90
|
-
# Handle both sync and async functions
|
91
88
|
if asyncio.iscoroutinefunction(func):
|
92
|
-
async
|
89
|
+
# Create async bound function with proper closure
|
90
|
+
def make_bound(f, ti):
|
93
91
|
async def bound_func(*args, **kwargs):
|
94
92
|
return await f(ti, *args, **kwargs)
|
95
93
|
return bound_func
|
96
|
-
self.tool_list[action_name] =
|
94
|
+
self.tool_list[action_name] = make_bound(func, tools_instance)
|
97
95
|
else:
|
98
|
-
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance: f(ti, *args)
|
96
|
+
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
|
99
97
|
|
100
|
-
# Add non-atomic tools (remember, complete) from tools_instance
|
101
98
|
self.tool_list["remember"] = tools_instance.remember
|
102
99
|
self.tool_list["complete"] = tools_instance.complete
|
103
100
|
|
104
|
-
#
|
101
|
+
# Build tool descriptions
|
105
102
|
self.tool_descriptions = get_atomic_tool_descriptions()
|
106
|
-
|
107
|
-
# Add custom tool descriptions if provided
|
108
103
|
custom_descriptions = build_custom_tool_descriptions(custom_tools or {})
|
109
104
|
if custom_descriptions:
|
110
105
|
self.tool_descriptions += "\n" + custom_descriptions
|
111
|
-
|
112
|
-
# Add descriptions for remember/complete
|
113
106
|
self.tool_descriptions += "\n- remember(information: str): Remember information for later use"
|
114
107
|
self.tool_descriptions += "\n- complete(success: bool, reason: str): Mark task as complete"
|
115
108
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
role="system", content=self.system_prompt_content
|
109
|
+
# Load prompts from config
|
110
|
+
system_prompt_text = PromptLoader.load_prompt(
|
111
|
+
agent_config.get_codeact_system_prompt_path(),
|
112
|
+
{"tool_descriptions": self.tool_descriptions}
|
121
113
|
)
|
114
|
+
self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
|
122
115
|
|
123
|
-
self.
|
116
|
+
self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
|
124
117
|
|
125
118
|
self.executor = SimpleCodeExecutor(
|
126
119
|
loop=asyncio.get_event_loop(),
|
@@ -150,16 +143,21 @@ class CodeActAgent(Workflow):
|
|
150
143
|
|
151
144
|
logger.debug(" - Adding goal to memory.")
|
152
145
|
goal = user_input
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
)
|
159
|
-
self.no_thoughts_prompt = ChatMessage(
|
160
|
-
role="user",
|
161
|
-
content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal),
|
146
|
+
|
147
|
+
# Format user prompt with goal
|
148
|
+
user_prompt_text = PromptLoader.load_prompt(
|
149
|
+
self.agent_config.get_codeact_user_prompt_path(),
|
150
|
+
{"goal": goal}
|
162
151
|
)
|
152
|
+
self.user_message = ChatMessage(role="user", content=user_prompt_text)
|
153
|
+
|
154
|
+
# No thoughts prompt
|
155
|
+
no_thoughts_text = f"""Your previous response provided code without explaining your reasoning first. Remember to always describe your thought process and plan *before* providing the code block.
|
156
|
+
|
157
|
+
The code you provided will be executed below.
|
158
|
+
|
159
|
+
Now, describe the next step you will take to address the original goal: {goal}"""
|
160
|
+
self.no_thoughts_prompt = ChatMessage(role="user", content=no_thoughts_text)
|
163
161
|
|
164
162
|
|
165
163
|
await self.chat_memory.aput(self.user_message)
|
@@ -194,38 +192,46 @@ class CodeActAgent(Workflow):
|
|
194
192
|
await ctx.store.set("remembered_info", self.remembered_info)
|
195
193
|
chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
|
196
194
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
195
|
+
# Always capture screenshot for trajectory
|
196
|
+
screenshot = (self.tools.take_screenshot())[1]
|
197
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
198
|
+
await ctx.store.set("screenshot", screenshot)
|
202
199
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
200
|
+
# Add screenshot to chat only if vision enabled
|
201
|
+
if self.vision and model != "DeepSeek":
|
202
|
+
chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
|
203
|
+
|
204
|
+
# Get and format device state using unified formatter
|
205
|
+
try:
|
206
|
+
|
207
|
+
# Get raw state from device
|
208
|
+
raw_state = self.tools.get_state()
|
209
|
+
|
210
|
+
# Format using unified function (returns 4 values)
|
211
|
+
formatted_text, focused_text, a11y_tree, phone_state = format_device_state(raw_state)
|
212
|
+
|
213
|
+
# Update shared_state if available
|
214
|
+
if self.shared_state is not None:
|
215
|
+
self.shared_state.formatted_device_state = formatted_text
|
216
|
+
self.shared_state.focused_text = focused_text
|
217
|
+
self.shared_state.a11y_tree = a11y_tree
|
218
|
+
self.shared_state.phone_state = phone_state
|
219
|
+
|
220
|
+
# Extract and store package/app name
|
221
|
+
self.shared_state.current_package_name = phone_state.get('packageName', 'Unknown')
|
222
|
+
self.shared_state.current_app_name = phone_state.get('currentApp', 'Unknown')
|
223
|
+
|
224
|
+
# Stream formatted state for trajectory
|
225
|
+
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=a11y_tree))
|
226
|
+
|
227
|
+
# Add device state to chat using new chat_utils function
|
228
|
+
# This injects into LAST user message, doesn't create new message
|
229
|
+
chat_history = await chat_utils.add_device_state_block(formatted_text, chat_history)
|
230
|
+
|
231
|
+
except Exception as e:
|
232
|
+
logger.warning(f"⚠️ Error retrieving state from the connected device: {e}")
|
233
|
+
if self.debug:
|
234
|
+
logger.error("State retrieval error details:", exc_info=True)
|
229
235
|
|
230
236
|
response = await self._get_llm_response(ctx, chat_history)
|
231
237
|
if response is None:
|
@@ -286,8 +292,9 @@ class CodeActAgent(Workflow):
|
|
286
292
|
|
287
293
|
try:
|
288
294
|
self.code_exec_counter += 1
|
289
|
-
result = await self.executor.execute(ctx, code)
|
295
|
+
result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
|
290
296
|
logger.info(f"💡 Code execution successful. Result: {result['output']}")
|
297
|
+
await asyncio.sleep(self.agent_config.after_sleep_action)
|
291
298
|
screenshots = result['screenshots']
|
292
299
|
for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
|
293
300
|
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
@@ -487,9 +494,11 @@ class CodeActAgent(Workflow):
|
|
487
494
|
logger.warning(f"Failed to capture final screenshot: {e}")
|
488
495
|
|
489
496
|
try:
|
490
|
-
|
497
|
+
state = self.tools.get_state()
|
498
|
+
a11y_tree = state.get("a11y_tree", "")
|
499
|
+
phone_state = state.get("phone_state", "")
|
491
500
|
except Exception as e:
|
492
|
-
|
501
|
+
raise Exception(f"Failed to capture final UI state: {e}") from e
|
493
502
|
|
494
503
|
# Create final observation chat history and response
|
495
504
|
final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
|
droidrun/agent/codeact/events.py
CHANGED
@@ -3,10 +3,9 @@ from typing import Optional
|
|
3
3
|
from llama_index.core.llms import ChatMessage
|
4
4
|
from llama_index.core.workflow import Event
|
5
5
|
|
6
|
+
from droidrun.agent.context.episodic_memory import EpisodicMemory
|
6
7
|
from droidrun.agent.usage import UsageResult
|
7
8
|
|
8
|
-
from ..context.episodic_memory import EpisodicMemory
|
9
|
-
|
10
9
|
|
11
10
|
class TaskInputEvent(Event):
|
12
11
|
input: list[ChatMessage]
|
@@ -1,19 +1,15 @@
|
|
1
1
|
"""
|
2
|
-
Agent Context Module - Provides
|
2
|
+
Agent Context Module - Provides episodic memory and task management.
|
3
3
|
|
4
4
|
This module contains:
|
5
|
-
-
|
6
|
-
-
|
5
|
+
- EpisodicMemory: Memory system for tracking agent steps
|
6
|
+
- TaskManager: Manages tasks and their execution
|
7
7
|
"""
|
8
8
|
|
9
|
-
from .
|
10
|
-
from .
|
11
|
-
from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
12
|
-
from .task_manager import Task, TaskManager
|
9
|
+
from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
10
|
+
from droidrun.agent.context.task_manager import Task, TaskManager
|
13
11
|
|
14
12
|
__all__ = [
|
15
|
-
"AgentPersona",
|
16
|
-
"ContextInjectionManager",
|
17
13
|
"EpisodicMemory",
|
18
14
|
"EpisodicMemoryStep",
|
19
15
|
"TaskManager",
|
@@ -1,8 +1,6 @@
|
|
1
1
|
from dataclasses import dataclass, field
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
5
|
-
|
6
4
|
|
7
5
|
@dataclass
|
8
6
|
class EpisodicMemoryStep:
|
@@ -11,7 +9,7 @@ class EpisodicMemoryStep:
|
|
11
9
|
timestamp: float
|
12
10
|
screenshot: Optional[bytes]
|
13
11
|
|
12
|
+
|
14
13
|
@dataclass
|
15
14
|
class EpisodicMemory:
|
16
|
-
persona: AgentPersona
|
17
15
|
steps: List[EpisodicMemoryStep] = field(default_factory=list)
|
@@ -3,6 +3,8 @@ import os
|
|
3
3
|
from dataclasses import dataclass
|
4
4
|
from typing import Dict, List, Optional
|
5
5
|
|
6
|
+
from droidrun.config_manager.path_resolver import PathResolver
|
7
|
+
|
6
8
|
|
7
9
|
@dataclass
|
8
10
|
class Task:
|
@@ -36,7 +38,8 @@ class TaskManager:
|
|
36
38
|
self.goal_completed = False
|
37
39
|
self.message = None
|
38
40
|
self.task_history = []
|
39
|
-
|
41
|
+
# Save to working directory for user visibility
|
42
|
+
self.file_path = PathResolver.resolve("droidrun_tasks.txt", create_if_missing=True)
|
40
43
|
|
41
44
|
def get_all_tasks(self) -> List[Task]:
|
42
45
|
return self.tasks
|
@@ -71,8 +74,11 @@ class TaskManager:
|
|
71
74
|
|
72
75
|
|
73
76
|
def save_to_file(self):
|
74
|
-
"""Saves the current task list to a
|
77
|
+
"""Saves the current task list to a text file."""
|
75
78
|
try:
|
79
|
+
# Ensure parent directory exists
|
80
|
+
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
81
|
+
|
76
82
|
with open(self.file_path, 'w', encoding='utf-8') as f:
|
77
83
|
for i, task in enumerate(self.tasks, 1):
|
78
84
|
f.write(f"Task {i}: {task.description}\n")
|