droidrun 0.3.10.dev2__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +95 -86
- droidrun/agent/codeact/events.py +1 -2
- droidrun/agent/context/__init__.py +5 -9
- droidrun/agent/context/episodic_memory.py +1 -3
- droidrun/agent/context/task_manager.py +8 -2
- droidrun/agent/droid/droid_agent.py +102 -141
- droidrun/agent/droid/events.py +45 -14
- droidrun/agent/executor/__init__.py +6 -4
- droidrun/agent/executor/events.py +29 -9
- droidrun/agent/executor/executor_agent.py +86 -28
- droidrun/agent/executor/prompts.py +8 -2
- droidrun/agent/manager/__init__.py +6 -7
- droidrun/agent/manager/events.py +16 -4
- droidrun/agent/manager/manager_agent.py +130 -69
- droidrun/agent/manager/prompts.py +1 -159
- droidrun/agent/utils/chat_utils.py +64 -2
- droidrun/agent/utils/device_state_formatter.py +54 -26
- droidrun/agent/utils/executer.py +66 -80
- droidrun/agent/utils/inference.py +11 -10
- droidrun/agent/utils/tools.py +58 -6
- droidrun/agent/utils/trajectory.py +18 -12
- droidrun/cli/logs.py +118 -56
- droidrun/cli/main.py +154 -136
- droidrun/config_manager/__init__.py +9 -7
- droidrun/config_manager/app_card_loader.py +148 -0
- droidrun/config_manager/config_manager.py +200 -102
- droidrun/config_manager/path_resolver.py +104 -0
- droidrun/config_manager/prompt_loader.py +75 -0
- droidrun/macro/__init__.py +1 -1
- droidrun/macro/cli.py +23 -18
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -3
- droidrun/telemetry/tracker.py +1 -1
- droidrun/tools/adb.py +1 -1
- droidrun/tools/ios.py +3 -2
- {droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/METADATA +10 -3
- droidrun-0.3.10.dev4.dist-info/RECORD +61 -0
- droidrun/agent/codeact/prompts.py +0 -26
- droidrun/agent/context/agent_persona.py +0 -16
- droidrun/agent/context/context_injection_manager.py +0 -66
- droidrun/agent/context/personas/__init__.py +0 -11
- droidrun/agent/context/personas/app_starter.py +0 -44
- droidrun/agent/context/personas/big_agent.py +0 -96
- droidrun/agent/context/personas/default.py +0 -95
- droidrun/agent/context/personas/ui_expert.py +0 -108
- droidrun/agent/planner/__init__.py +0 -13
- droidrun/agent/planner/events.py +0 -21
- droidrun/agent/planner/planner_agent.py +0 -311
- droidrun/agent/planner/prompts.py +0 -124
- droidrun-0.3.10.dev2.dist-info/RECORD +0 -70
- {droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/WHEEL +0 -0
- {droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/licenses/LICENSE +0 -0
droidrun/agent/utils/tools.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from typing import TYPE_CHECKING, List
|
2
|
+
import time
|
2
3
|
|
3
4
|
if TYPE_CHECKING:
|
4
5
|
from droidrun.tools import Tools
|
@@ -116,12 +117,13 @@ def open_app(tool_instance: "Tools", text: str) -> str:
|
|
116
117
|
"app_opener_llm not configured. "
|
117
118
|
"provide app_opener_llm when initializing Tools."
|
118
119
|
)
|
119
|
-
|
120
|
+
|
120
121
|
# Create workflow instance
|
121
122
|
workflow = AppStarter(tools=tool_instance, llm=tool_instance.app_opener_llm, timeout=60, verbose=True)
|
122
123
|
|
123
124
|
# Run workflow to open an app
|
124
125
|
result = workflow.run(app_description=text)
|
126
|
+
time.sleep(1)
|
125
127
|
return result
|
126
128
|
|
127
129
|
|
@@ -155,11 +157,6 @@ ATOMIC_ACTION_SIGNATURES = {
|
|
155
157
|
"description": "Scroll from the position with coordinate to the position with coordinate2. Please make sure the start and end points of your swipe are within the swipeable area and away from the keyboard (y1 < 1400). Usage Example: {\"action\": \"swipe\", \"coordinate\": [x1, y1], \"coordinate2\": [x2, y2]}",
|
156
158
|
"function": swipe,
|
157
159
|
},
|
158
|
-
"open_app": {
|
159
|
-
"arguments": ["text"],
|
160
|
-
"description": "Open an app. Usage example: {\"action\": \"open_app\", \"text\": \"the name of app\"}",
|
161
|
-
"function": open_app,
|
162
|
-
},
|
163
160
|
# "copy": {
|
164
161
|
# "arguments": ["text"],
|
165
162
|
# "description": "Copy the specified text to the clipboard. Provide the text to copy using the 'text' argument. Example: {\"action\": \"copy\", \"text\": \"the text you want to copy\"}\nAlways use copy action to copy text to clipboard."
|
@@ -218,3 +215,58 @@ def build_custom_tool_descriptions(custom_tools: dict) -> str:
|
|
218
215
|
descriptions.append(f"- {action_name}({args}): {desc}")
|
219
216
|
|
220
217
|
return "\n".join(descriptions)
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
async def test_open_app(mock_tools, text: str) -> str:
|
222
|
+
return await open_app(mock_tools, text)
|
223
|
+
|
224
|
+
if __name__ == "__main__":
|
225
|
+
"""
|
226
|
+
Simple test for the tool functions.
|
227
|
+
Tests the atomic action wrapper functions.
|
228
|
+
"""
|
229
|
+
import asyncio
|
230
|
+
from typing import List
|
231
|
+
|
232
|
+
from llama_index.llms.google_genai import GoogleGenAI
|
233
|
+
|
234
|
+
from droidrun.tools.adb import AdbTools
|
235
|
+
llm = GoogleGenAI(model="gemini-2.5-pro", temperature=0.0)
|
236
|
+
# Create mock tools instance
|
237
|
+
mock_tools = AdbTools(app_opener_llm=llm, text_manipulator_llm=llm)
|
238
|
+
# print("=== Testing click ===")
|
239
|
+
# result = click(mock_tools, 0)
|
240
|
+
mock_tools.get_state()
|
241
|
+
print("\n=== Testing long_press ===")
|
242
|
+
result = long_press(mock_tools, 5)
|
243
|
+
print(f"Result: {result}")
|
244
|
+
input("Press Enter to continue...")
|
245
|
+
print("\n=== Testing type ===")
|
246
|
+
result = type(mock_tools, "Hello World", -1)
|
247
|
+
print(f"Result: {result}")
|
248
|
+
input("Press Enter to continue...")
|
249
|
+
|
250
|
+
print("\n=== Testing system_button ===")
|
251
|
+
result = system_button(mock_tools, "back")
|
252
|
+
print(f"Result: {result}")
|
253
|
+
input("Press Enter to continue...")
|
254
|
+
|
255
|
+
|
256
|
+
print("\n=== Testing swipe ===")
|
257
|
+
result = swipe(mock_tools, [500, 0], [500, 1000])
|
258
|
+
print(f"Result: {result}")
|
259
|
+
input("Press Enter to continue...")
|
260
|
+
|
261
|
+
print("\n=== Testing open_app ===")
|
262
|
+
# This one is more complex and requires real LLM setup, so just show the structure
|
263
|
+
try:
|
264
|
+
result = asyncio.run(test_open_app(mock_tools, "Calculator"))
|
265
|
+
print(f"Result: {result}")
|
266
|
+
input("Press Enter to continue...")
|
267
|
+
except Exception as e:
|
268
|
+
print(f"Expected error (no LLM): {e}")
|
269
|
+
input("Press Enter to continue...")
|
270
|
+
|
271
|
+
print("\n=== All tests completed ===")
|
272
|
+
|
@@ -16,6 +16,8 @@ from typing import Any, Dict, List
|
|
16
16
|
from llama_index.core.workflow import Event
|
17
17
|
from PIL import Image
|
18
18
|
|
19
|
+
from droidrun.config_manager.path_resolver import PathResolver
|
20
|
+
|
19
21
|
logger = logging.getLogger("droidrun")
|
20
22
|
|
21
23
|
|
@@ -136,16 +138,19 @@ class Trajectory:
|
|
136
138
|
Creates a dedicated folder for each trajectory containing all related files.
|
137
139
|
|
138
140
|
Args:
|
139
|
-
directory: Base directory to save the trajectory files
|
141
|
+
directory: Base directory to save the trajectory files (relative or absolute)
|
140
142
|
|
141
143
|
Returns:
|
142
144
|
Path to the trajectory folder
|
143
145
|
"""
|
144
|
-
|
146
|
+
# Resolve directory (prefer working dir for output)
|
147
|
+
base_dir = PathResolver.resolve(directory, create_if_missing=True)
|
148
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
149
|
+
|
145
150
|
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
146
151
|
unique_id = str(uuid.uuid4())[:8]
|
147
|
-
trajectory_folder =
|
148
|
-
|
152
|
+
trajectory_folder = base_dir / f"{timestamp}_{unique_id}"
|
153
|
+
trajectory_folder.mkdir(parents=True, exist_ok=True)
|
149
154
|
|
150
155
|
serializable_events = []
|
151
156
|
for event in self.events:
|
@@ -189,7 +194,7 @@ class Trajectory:
|
|
189
194
|
serializable_events.append(event_dict)
|
190
195
|
|
191
196
|
|
192
|
-
trajectory_json_path =
|
197
|
+
trajectory_json_path = trajectory_folder / "trajectory.json"
|
193
198
|
with open(trajectory_json_path, "w") as f:
|
194
199
|
json.dump(serializable_events, f, indent=2)
|
195
200
|
|
@@ -207,7 +212,7 @@ class Trajectory:
|
|
207
212
|
}
|
208
213
|
macro_data.append(macro_dict)
|
209
214
|
|
210
|
-
macro_json_path =
|
215
|
+
macro_json_path = trajectory_folder / "macro.json"
|
211
216
|
with open(macro_json_path, "w") as f:
|
212
217
|
json.dump(
|
213
218
|
{
|
@@ -224,11 +229,11 @@ class Trajectory:
|
|
224
229
|
logger.info(
|
225
230
|
f"💾 Saved macro sequence with {len(macro_data)} actions to {macro_json_path}"
|
226
231
|
)
|
227
|
-
screenshots_folder =
|
228
|
-
|
232
|
+
screenshots_folder = trajectory_folder / "screenshots"
|
233
|
+
screenshots_folder.mkdir(parents=True, exist_ok=True)
|
229
234
|
|
230
235
|
gif_path = self.create_screenshot_gif(
|
231
|
-
screenshots_folder
|
236
|
+
str(screenshots_folder)
|
232
237
|
)
|
233
238
|
if gif_path:
|
234
239
|
logger.info(f"🎬 Saved screenshot GIF to {gif_path}")
|
@@ -238,12 +243,13 @@ class Trajectory:
|
|
238
243
|
if len(self.ui_states) != len(self.screenshots):
|
239
244
|
logger.warning("UI states and screenshots are not the same length!")
|
240
245
|
|
241
|
-
|
246
|
+
ui_states_folder = trajectory_folder / "ui_states"
|
247
|
+
ui_states_folder.mkdir(parents=True, exist_ok=True)
|
242
248
|
for idx, ui_state in enumerate(self.ui_states):
|
243
|
-
ui_states_path =
|
249
|
+
ui_states_path = ui_states_folder / f"{idx}.json"
|
244
250
|
with open(ui_states_path, "w", encoding="utf-8") as f:
|
245
251
|
json.dump(ui_state, f, ensure_ascii=False, indent=2)
|
246
|
-
return trajectory_folder
|
252
|
+
return str(trajectory_folder)
|
247
253
|
|
248
254
|
@staticmethod
|
249
255
|
def load_trajectory_folder(trajectory_folder: str) -> Dict[str, Any]:
|
droidrun/cli/logs.py
CHANGED
@@ -21,54 +21,80 @@ from droidrun.agent.droid.events import (
|
|
21
21
|
FinalizeEvent,
|
22
22
|
TaskRunnerEvent,
|
23
23
|
)
|
24
|
-
from droidrun.agent.
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
from droidrun.agent.manager.events import (
|
25
|
+
ManagerInternalPlanEvent,
|
26
|
+
ManagerThinkingEvent,
|
27
|
+
)
|
28
|
+
from droidrun.agent.executor.events import (
|
29
|
+
ExecutorInternalActionEvent,
|
30
|
+
ExecutorInternalResultEvent,
|
28
31
|
)
|
29
32
|
|
30
33
|
|
31
34
|
class LogHandler(logging.Handler):
|
32
|
-
def __init__(self, goal: str, current_step: str = "Initializing..."):
|
35
|
+
def __init__(self, goal: str, current_step: str = "Initializing...", rich_text: bool = True):
|
33
36
|
super().__init__()
|
34
37
|
|
35
38
|
self.goal = goal
|
36
39
|
self.current_step = current_step
|
37
40
|
self.is_completed = False
|
38
41
|
self.is_success = False
|
39
|
-
self.
|
40
|
-
|
41
|
-
|
42
|
-
|
42
|
+
self.rich_text = rich_text
|
43
|
+
|
44
|
+
if self.rich_text:
|
45
|
+
self.spinner = Spinner("dots")
|
46
|
+
self.console = Console()
|
47
|
+
self.layout = self._create_layout()
|
48
|
+
self.logs: List[str] = []
|
49
|
+
else:
|
50
|
+
self.console = Console()
|
51
|
+
self.logs: List[str] = []
|
43
52
|
|
44
53
|
def emit(self, record):
|
45
54
|
msg = self.format(record)
|
46
55
|
lines = msg.splitlines()
|
47
56
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
self.logs
|
53
|
-
|
54
|
-
|
57
|
+
if self.rich_text:
|
58
|
+
for line in lines:
|
59
|
+
self.logs.append(line)
|
60
|
+
# Optionally, limit the log list size
|
61
|
+
if len(self.logs) > 100:
|
62
|
+
self.logs.pop(0)
|
63
|
+
self.rerender()
|
64
|
+
else:
|
65
|
+
# Simple console output for non-rich mode
|
66
|
+
for line in lines:
|
67
|
+
self.console.print(line)
|
55
68
|
|
56
69
|
def render(self):
|
57
|
-
|
70
|
+
if self.rich_text:
|
71
|
+
return Live(self.layout, refresh_per_second=4, console=self.console)
|
72
|
+
else:
|
73
|
+
# Return a no-op context manager for non-rich mode
|
74
|
+
from contextlib import nullcontext
|
75
|
+
return nullcontext()
|
58
76
|
|
59
77
|
def rerender(self):
|
60
|
-
self.
|
61
|
-
self.
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
78
|
+
if self.rich_text:
|
79
|
+
self._update_layout(
|
80
|
+
self.layout,
|
81
|
+
self.logs,
|
82
|
+
self.current_step,
|
83
|
+
self.goal,
|
84
|
+
self.is_completed,
|
85
|
+
self.is_success,
|
86
|
+
)
|
68
87
|
|
69
88
|
def update_step(self, step: str):
|
70
89
|
self.current_step = step
|
71
|
-
self.
|
90
|
+
if self.rich_text:
|
91
|
+
self.rerender()
|
92
|
+
else:
|
93
|
+
# Simple console output for status updates
|
94
|
+
status_symbol = "⚡"
|
95
|
+
if self.is_completed:
|
96
|
+
status_symbol = "✓" if self.is_success else "✗"
|
97
|
+
self.console.print(f"{status_symbol} {step}")
|
72
98
|
|
73
99
|
def _create_layout(self):
|
74
100
|
"""Create a layout with logs at top and status at bottom"""
|
@@ -170,7 +196,7 @@ class LogHandler(logging.Handler):
|
|
170
196
|
)
|
171
197
|
)
|
172
198
|
|
173
|
-
def handle_event(self, event):
|
199
|
+
def handle_event(self, event):
|
174
200
|
"""Handle streaming events from the agent workflow."""
|
175
201
|
logger = logging.getLogger("droidrun")
|
176
202
|
|
@@ -181,32 +207,72 @@ class LogHandler(logging.Handler):
|
|
181
207
|
elif isinstance(event, RecordUIStateEvent):
|
182
208
|
logger.debug("✏️ Recording UI state")
|
183
209
|
|
184
|
-
#
|
185
|
-
elif isinstance(event,
|
186
|
-
self.current_step = "
|
187
|
-
logger.info("
|
188
|
-
|
189
|
-
elif isinstance(event,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
210
|
+
# Manager events (reasoning mode - planning)
|
211
|
+
elif isinstance(event, ManagerThinkingEvent):
|
212
|
+
self.current_step = "Manager analyzing state..."
|
213
|
+
logger.info("🧠 Manager analyzing current state...")
|
214
|
+
|
215
|
+
elif isinstance(event, ManagerInternalPlanEvent):
|
216
|
+
self.current_step = "Plan created"
|
217
|
+
# Show thought (concise reasoning)
|
218
|
+
if hasattr(event, "thought") and event.thought:
|
219
|
+
thought_preview = (
|
220
|
+
event.thought[:120] + "..."
|
221
|
+
if len(event.thought) > 120
|
222
|
+
else event.thought
|
223
|
+
)
|
224
|
+
logger.info(f"💭 Thought: {thought_preview}")
|
225
|
+
|
226
|
+
# Show current subgoal (what we're working on next)
|
227
|
+
if hasattr(event, "current_subgoal") and event.current_subgoal:
|
228
|
+
subgoal_preview = (
|
229
|
+
event.current_subgoal[:150] + "..."
|
230
|
+
if len(event.current_subgoal) > 150
|
231
|
+
else event.current_subgoal
|
195
232
|
)
|
196
|
-
logger.info(f"
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
233
|
+
logger.info(f"📋 Next step: {subgoal_preview}")
|
234
|
+
|
235
|
+
# Show answer if provided (task complete)
|
236
|
+
if hasattr(event, "manager_answer") and event.manager_answer:
|
237
|
+
answer_preview = (
|
238
|
+
event.manager_answer[:200] + "..."
|
239
|
+
if len(event.manager_answer) > 200
|
240
|
+
else event.manager_answer
|
241
|
+
)
|
242
|
+
logger.info(f"💬 Answer: {answer_preview}")
|
243
|
+
|
244
|
+
# Debug: show memory updates
|
245
|
+
if hasattr(event, "memory_update") and event.memory_update:
|
246
|
+
logger.debug(f"🧠 Memory: {event.memory_update[:100]}...")
|
247
|
+
|
248
|
+
# Executor events (reasoning mode - action execution)
|
249
|
+
elif isinstance(event, ExecutorInternalActionEvent):
|
250
|
+
self.current_step = "Selecting action..."
|
251
|
+
# Show what action was chosen
|
252
|
+
if hasattr(event, "description") and event.description:
|
253
|
+
logger.info(f"🎯 Action: {event.description}")
|
254
|
+
|
255
|
+
# Debug: show executor's reasoning
|
256
|
+
if hasattr(event, "thought") and event.thought:
|
257
|
+
thought_preview = (
|
258
|
+
event.thought[:120] + "..."
|
259
|
+
if len(event.thought) > 120
|
260
|
+
else event.thought
|
261
|
+
)
|
262
|
+
logger.debug(f"💭 Reasoning: {thought_preview}")
|
263
|
+
|
264
|
+
elif isinstance(event, ExecutorInternalResultEvent):
|
265
|
+
# Show result with appropriate emoji
|
266
|
+
if hasattr(event, "outcome") and hasattr(event, "summary"):
|
267
|
+
if event.outcome:
|
268
|
+
self.current_step = "Action completed"
|
269
|
+
logger.info(f"✅ {event.summary}")
|
270
|
+
else:
|
271
|
+
self.current_step = "Action failed"
|
272
|
+
error_msg = event.error if hasattr(event, "error") else "Unknown error"
|
273
|
+
logger.info(f"❌ {event.summary} ({error_msg})")
|
274
|
+
|
275
|
+
# CodeAct events (direct mode)
|
210
276
|
elif isinstance(event, TaskInputEvent):
|
211
277
|
self.current_step = "Processing task input..."
|
212
278
|
logger.info("💬 Task input received...")
|
@@ -264,10 +330,6 @@ class LogHandler(logging.Handler):
|
|
264
330
|
self.current_step = "Task failed"
|
265
331
|
logger.info(f"❌ Task failed: {event.reason}")
|
266
332
|
|
267
|
-
# elif isinstance(event, ReasoningLogicEvent): TODO: fix event handling
|
268
|
-
# self.current_step = "Planning..."
|
269
|
-
# logger.info("🤔 Planning next steps...")
|
270
|
-
|
271
333
|
elif isinstance(event, TaskRunnerEvent):
|
272
334
|
self.current_step = "Processing tasks..."
|
273
335
|
logger.info("🏃 Processing task queue...")
|