droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. droidrun/agent/codeact/__init__.py +1 -4
  2. droidrun/agent/codeact/codeact_agent.py +95 -86
  3. droidrun/agent/codeact/events.py +1 -2
  4. droidrun/agent/context/__init__.py +5 -9
  5. droidrun/agent/context/episodic_memory.py +1 -3
  6. droidrun/agent/context/task_manager.py +8 -2
  7. droidrun/agent/droid/droid_agent.py +102 -141
  8. droidrun/agent/droid/events.py +45 -14
  9. droidrun/agent/executor/__init__.py +6 -4
  10. droidrun/agent/executor/events.py +29 -9
  11. droidrun/agent/executor/executor_agent.py +86 -28
  12. droidrun/agent/executor/prompts.py +8 -2
  13. droidrun/agent/manager/__init__.py +6 -7
  14. droidrun/agent/manager/events.py +16 -4
  15. droidrun/agent/manager/manager_agent.py +130 -69
  16. droidrun/agent/manager/prompts.py +1 -159
  17. droidrun/agent/utils/chat_utils.py +64 -2
  18. droidrun/agent/utils/device_state_formatter.py +54 -26
  19. droidrun/agent/utils/executer.py +66 -80
  20. droidrun/agent/utils/inference.py +11 -10
  21. droidrun/agent/utils/tools.py +58 -6
  22. droidrun/agent/utils/trajectory.py +18 -12
  23. droidrun/cli/logs.py +118 -56
  24. droidrun/cli/main.py +154 -136
  25. droidrun/config_manager/__init__.py +9 -7
  26. droidrun/config_manager/app_card_loader.py +148 -0
  27. droidrun/config_manager/config_manager.py +200 -102
  28. droidrun/config_manager/path_resolver.py +104 -0
  29. droidrun/config_manager/prompt_loader.py +75 -0
  30. droidrun/macro/__init__.py +1 -1
  31. droidrun/macro/cli.py +23 -18
  32. droidrun/telemetry/__init__.py +2 -2
  33. droidrun/telemetry/events.py +3 -3
  34. droidrun/telemetry/tracker.py +1 -1
  35. droidrun/tools/adb.py +1 -1
  36. droidrun/tools/ios.py +3 -2
  37. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/METADATA +10 -4
  38. droidrun-0.3.10.dev5.dist-info/RECORD +61 -0
  39. droidrun/agent/codeact/prompts.py +0 -26
  40. droidrun/agent/context/agent_persona.py +0 -16
  41. droidrun/agent/context/context_injection_manager.py +0 -66
  42. droidrun/agent/context/personas/__init__.py +0 -11
  43. droidrun/agent/context/personas/app_starter.py +0 -44
  44. droidrun/agent/context/personas/big_agent.py +0 -96
  45. droidrun/agent/context/personas/default.py +0 -95
  46. droidrun/agent/context/personas/ui_expert.py +0 -108
  47. droidrun/agent/planner/__init__.py +0 -13
  48. droidrun/agent/planner/events.py +0 -21
  49. droidrun/agent/planner/planner_agent.py +0 -311
  50. droidrun/agent/planner/prompts.py +0 -124
  51. droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
  52. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/WHEEL +0 -0
  53. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/entry_points.txt +0 -0
  54. {droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,5 @@
1
1
  from typing import TYPE_CHECKING, List
2
+ import time
2
3
 
3
4
  if TYPE_CHECKING:
4
5
  from droidrun.tools import Tools
@@ -116,12 +117,13 @@ def open_app(tool_instance: "Tools", text: str) -> str:
116
117
  "app_opener_llm not configured. "
117
118
  "provide app_opener_llm when initializing Tools."
118
119
  )
119
-
120
+
120
121
  # Create workflow instance
121
122
  workflow = AppStarter(tools=tool_instance, llm=tool_instance.app_opener_llm, timeout=60, verbose=True)
122
123
 
123
124
  # Run workflow to open an app
124
125
  result = workflow.run(app_description=text)
126
+ time.sleep(1)
125
127
  return result
126
128
 
127
129
 
@@ -155,11 +157,6 @@ ATOMIC_ACTION_SIGNATURES = {
155
157
  "description": "Scroll from the position with coordinate to the position with coordinate2. Please make sure the start and end points of your swipe are within the swipeable area and away from the keyboard (y1 < 1400). Usage Example: {\"action\": \"swipe\", \"coordinate\": [x1, y1], \"coordinate2\": [x2, y2]}",
156
158
  "function": swipe,
157
159
  },
158
- "open_app": {
159
- "arguments": ["text"],
160
- "description": "Open an app. Usage example: {\"action\": \"open_app\", \"text\": \"the name of app\"}",
161
- "function": open_app,
162
- },
163
160
  # "copy": {
164
161
  # "arguments": ["text"],
165
162
  # "description": "Copy the specified text to the clipboard. Provide the text to copy using the 'text' argument. Example: {\"action\": \"copy\", \"text\": \"the text you want to copy\"}\nAlways use copy action to copy text to clipboard."
@@ -218,3 +215,58 @@ def build_custom_tool_descriptions(custom_tools: dict) -> str:
218
215
  descriptions.append(f"- {action_name}({args}): {desc}")
219
216
 
220
217
  return "\n".join(descriptions)
218
+
219
+
220
+
221
+ async def test_open_app(mock_tools, text: str) -> str:
222
+ return await open_app(mock_tools, text)
223
+
224
+ if __name__ == "__main__":
225
+ """
226
+ Simple test for the tool functions.
227
+ Tests the atomic action wrapper functions.
228
+ """
229
+ import asyncio
230
+ from typing import List
231
+
232
+ from llama_index.llms.google_genai import GoogleGenAI
233
+
234
+ from droidrun.tools.adb import AdbTools
235
+ llm = GoogleGenAI(model="gemini-2.5-pro", temperature=0.0)
236
+ # Create mock tools instance
237
+ mock_tools = AdbTools(app_opener_llm=llm, text_manipulator_llm=llm)
238
+ # print("=== Testing click ===")
239
+ # result = click(mock_tools, 0)
240
+ mock_tools.get_state()
241
+ print("\n=== Testing long_press ===")
242
+ result = long_press(mock_tools, 5)
243
+ print(f"Result: {result}")
244
+ input("Press Enter to continue...")
245
+ print("\n=== Testing type ===")
246
+ result = type(mock_tools, "Hello World", -1)
247
+ print(f"Result: {result}")
248
+ input("Press Enter to continue...")
249
+
250
+ print("\n=== Testing system_button ===")
251
+ result = system_button(mock_tools, "back")
252
+ print(f"Result: {result}")
253
+ input("Press Enter to continue...")
254
+
255
+
256
+ print("\n=== Testing swipe ===")
257
+ result = swipe(mock_tools, [500, 0], [500, 1000])
258
+ print(f"Result: {result}")
259
+ input("Press Enter to continue...")
260
+
261
+ print("\n=== Testing open_app ===")
262
+ # This one is more complex and requires real LLM setup, so just show the structure
263
+ try:
264
+ result = asyncio.run(test_open_app(mock_tools, "Calculator"))
265
+ print(f"Result: {result}")
266
+ input("Press Enter to continue...")
267
+ except Exception as e:
268
+ print(f"Expected error (no LLM): {e}")
269
+ input("Press Enter to continue...")
270
+
271
+ print("\n=== All tests completed ===")
272
+
@@ -16,6 +16,8 @@ from typing import Any, Dict, List
16
16
  from llama_index.core.workflow import Event
17
17
  from PIL import Image
18
18
 
19
+ from droidrun.config_manager.path_resolver import PathResolver
20
+
19
21
  logger = logging.getLogger("droidrun")
20
22
 
21
23
 
@@ -136,16 +138,19 @@ class Trajectory:
136
138
  Creates a dedicated folder for each trajectory containing all related files.
137
139
 
138
140
  Args:
139
- directory: Base directory to save the trajectory files
141
+ directory: Base directory to save the trajectory files (relative or absolute)
140
142
 
141
143
  Returns:
142
144
  Path to the trajectory folder
143
145
  """
144
- os.makedirs(directory, exist_ok=True)
146
+ # Resolve directory (prefer working dir for output)
147
+ base_dir = PathResolver.resolve(directory, create_if_missing=True)
148
+ base_dir.mkdir(parents=True, exist_ok=True)
149
+
145
150
  timestamp = time.strftime("%Y%m%d_%H%M%S")
146
151
  unique_id = str(uuid.uuid4())[:8]
147
- trajectory_folder = os.path.join(directory, f"{timestamp}_{unique_id}")
148
- os.makedirs(trajectory_folder, exist_ok=True)
152
+ trajectory_folder = base_dir / f"{timestamp}_{unique_id}"
153
+ trajectory_folder.mkdir(parents=True, exist_ok=True)
149
154
 
150
155
  serializable_events = []
151
156
  for event in self.events:
@@ -189,7 +194,7 @@ class Trajectory:
189
194
  serializable_events.append(event_dict)
190
195
 
191
196
 
192
- trajectory_json_path = os.path.join(trajectory_folder, "trajectory.json")
197
+ trajectory_json_path = trajectory_folder / "trajectory.json"
193
198
  with open(trajectory_json_path, "w") as f:
194
199
  json.dump(serializable_events, f, indent=2)
195
200
 
@@ -207,7 +212,7 @@ class Trajectory:
207
212
  }
208
213
  macro_data.append(macro_dict)
209
214
 
210
- macro_json_path = os.path.join(trajectory_folder, "macro.json")
215
+ macro_json_path = trajectory_folder / "macro.json"
211
216
  with open(macro_json_path, "w") as f:
212
217
  json.dump(
213
218
  {
@@ -224,11 +229,11 @@ class Trajectory:
224
229
  logger.info(
225
230
  f"💾 Saved macro sequence with {len(macro_data)} actions to {macro_json_path}"
226
231
  )
227
- screenshots_folder = os.path.join(trajectory_folder, "screenshots")
228
- os.makedirs(screenshots_folder, exist_ok=True)
232
+ screenshots_folder = trajectory_folder / "screenshots"
233
+ screenshots_folder.mkdir(parents=True, exist_ok=True)
229
234
 
230
235
  gif_path = self.create_screenshot_gif(
231
- screenshots_folder
236
+ str(screenshots_folder)
232
237
  )
233
238
  if gif_path:
234
239
  logger.info(f"🎬 Saved screenshot GIF to {gif_path}")
@@ -238,12 +243,13 @@ class Trajectory:
238
243
  if len(self.ui_states) != len(self.screenshots):
239
244
  logger.warning("UI states and screenshots are not the same length!")
240
245
 
241
- os.makedirs(os.path.join(trajectory_folder, "ui_states"), exist_ok=True)
246
+ ui_states_folder = trajectory_folder / "ui_states"
247
+ ui_states_folder.mkdir(parents=True, exist_ok=True)
242
248
  for idx, ui_state in enumerate(self.ui_states):
243
- ui_states_path = os.path.join(trajectory_folder, "ui_states", f"{idx}.json")
249
+ ui_states_path = ui_states_folder / f"{idx}.json"
244
250
  with open(ui_states_path, "w", encoding="utf-8") as f:
245
251
  json.dump(ui_state, f, ensure_ascii=False, indent=2)
246
- return trajectory_folder
252
+ return str(trajectory_folder)
247
253
 
248
254
  @staticmethod
249
255
  def load_trajectory_folder(trajectory_folder: str) -> Dict[str, Any]:
droidrun/cli/logs.py CHANGED
@@ -21,54 +21,80 @@ from droidrun.agent.droid.events import (
21
21
  FinalizeEvent,
22
22
  TaskRunnerEvent,
23
23
  )
24
- from droidrun.agent.planner.events import (
25
- PlanCreatedEvent,
26
- PlanInputEvent,
27
- PlanThinkingEvent,
24
+ from droidrun.agent.manager.events import (
25
+ ManagerInternalPlanEvent,
26
+ ManagerThinkingEvent,
27
+ )
28
+ from droidrun.agent.executor.events import (
29
+ ExecutorInternalActionEvent,
30
+ ExecutorInternalResultEvent,
28
31
  )
29
32
 
30
33
 
31
34
  class LogHandler(logging.Handler):
32
- def __init__(self, goal: str, current_step: str = "Initializing..."):
35
+ def __init__(self, goal: str, current_step: str = "Initializing...", rich_text: bool = True):
33
36
  super().__init__()
34
37
 
35
38
  self.goal = goal
36
39
  self.current_step = current_step
37
40
  self.is_completed = False
38
41
  self.is_success = False
39
- self.spinner = Spinner("dots")
40
- self.console = Console()
41
- self.layout = self._create_layout()
42
- self.logs: List[str] = []
42
+ self.rich_text = rich_text
43
+
44
+ if self.rich_text:
45
+ self.spinner = Spinner("dots")
46
+ self.console = Console()
47
+ self.layout = self._create_layout()
48
+ self.logs: List[str] = []
49
+ else:
50
+ self.console = Console()
51
+ self.logs: List[str] = []
43
52
 
44
53
  def emit(self, record):
45
54
  msg = self.format(record)
46
55
  lines = msg.splitlines()
47
56
 
48
- for line in lines:
49
- self.logs.append(line)
50
- # Optionally, limit the log list size
51
- if len(self.logs) > 100:
52
- self.logs.pop(0)
53
-
54
- self.rerender()
57
+ if self.rich_text:
58
+ for line in lines:
59
+ self.logs.append(line)
60
+ # Optionally, limit the log list size
61
+ if len(self.logs) > 100:
62
+ self.logs.pop(0)
63
+ self.rerender()
64
+ else:
65
+ # Simple console output for non-rich mode
66
+ for line in lines:
67
+ self.console.print(line)
55
68
 
56
69
  def render(self):
57
- return Live(self.layout, refresh_per_second=4, console=self.console)
70
+ if self.rich_text:
71
+ return Live(self.layout, refresh_per_second=4, console=self.console)
72
+ else:
73
+ # Return a no-op context manager for non-rich mode
74
+ from contextlib import nullcontext
75
+ return nullcontext()
58
76
 
59
77
  def rerender(self):
60
- self._update_layout(
61
- self.layout,
62
- self.logs,
63
- self.current_step,
64
- self.goal,
65
- self.is_completed,
66
- self.is_success,
67
- )
78
+ if self.rich_text:
79
+ self._update_layout(
80
+ self.layout,
81
+ self.logs,
82
+ self.current_step,
83
+ self.goal,
84
+ self.is_completed,
85
+ self.is_success,
86
+ )
68
87
 
69
88
  def update_step(self, step: str):
70
89
  self.current_step = step
71
- self.rerender()
90
+ if self.rich_text:
91
+ self.rerender()
92
+ else:
93
+ # Simple console output for status updates
94
+ status_symbol = "⚡"
95
+ if self.is_completed:
96
+ status_symbol = "✓" if self.is_success else "✗"
97
+ self.console.print(f"{status_symbol} {step}")
72
98
 
73
99
  def _create_layout(self):
74
100
  """Create a layout with logs at top and status at bottom"""
@@ -170,7 +196,7 @@ class LogHandler(logging.Handler):
170
196
  )
171
197
  )
172
198
 
173
- def handle_event(self, event): # TODO: fix event handling for the refactor
199
+ def handle_event(self, event):
174
200
  """Handle streaming events from the agent workflow."""
175
201
  logger = logging.getLogger("droidrun")
176
202
 
@@ -181,32 +207,72 @@ class LogHandler(logging.Handler):
181
207
  elif isinstance(event, RecordUIStateEvent):
182
208
  logger.debug("✏️ Recording UI state")
183
209
 
184
- # Planner events
185
- elif isinstance(event, PlanInputEvent):
186
- self.current_step = "Planning..."
187
- logger.info("💭 Planner receiving input...")
188
-
189
- elif isinstance(event, PlanThinkingEvent):
190
- if event.thoughts:
191
- thoughts_preview = (
192
- event.thoughts[:150] + "..."
193
- if len(event.thoughts) > 150
194
- else event.thoughts
210
+ # Manager events (reasoning mode - planning)
211
+ elif isinstance(event, ManagerThinkingEvent):
212
+ self.current_step = "Manager analyzing state..."
213
+ logger.info("🧠 Manager analyzing current state...")
214
+
215
+ elif isinstance(event, ManagerInternalPlanEvent):
216
+ self.current_step = "Plan created"
217
+ # Show thought (concise reasoning)
218
+ if hasattr(event, "thought") and event.thought:
219
+ thought_preview = (
220
+ event.thought[:120] + "..."
221
+ if len(event.thought) > 120
222
+ else event.thought
223
+ )
224
+ logger.info(f"💭 Thought: {thought_preview}")
225
+
226
+ # Show current subgoal (what we're working on next)
227
+ if hasattr(event, "current_subgoal") and event.current_subgoal:
228
+ subgoal_preview = (
229
+ event.current_subgoal[:150] + "..."
230
+ if len(event.current_subgoal) > 150
231
+ else event.current_subgoal
195
232
  )
196
- logger.info(f"🧠 Planning: {thoughts_preview}")
197
- if event.code:
198
- logger.info("📝 Generated plan code")
199
-
200
- elif isinstance(event, PlanCreatedEvent):
201
- if event.tasks:
202
- task_count = len(event.tasks) if event.tasks else 0
203
- self.current_step = f"Plan ready ({task_count} tasks)"
204
- logger.info(f"📋 Plan created with {task_count} tasks")
205
- for task in event.tasks:
206
- desc = task.description
207
- logger.info(f"- {desc}")
208
-
209
- # CodeAct events
233
+ logger.info(f"📋 Next step: {subgoal_preview}")
234
+
235
+ # Show answer if provided (task complete)
236
+ if hasattr(event, "manager_answer") and event.manager_answer:
237
+ answer_preview = (
238
+ event.manager_answer[:200] + "..."
239
+ if len(event.manager_answer) > 200
240
+ else event.manager_answer
241
+ )
242
+ logger.info(f"💬 Answer: {answer_preview}")
243
+
244
+ # Debug: show memory updates
245
+ if hasattr(event, "memory_update") and event.memory_update:
246
+ logger.debug(f"🧠 Memory: {event.memory_update[:100]}...")
247
+
248
+ # Executor events (reasoning mode - action execution)
249
+ elif isinstance(event, ExecutorInternalActionEvent):
250
+ self.current_step = "Selecting action..."
251
+ # Show what action was chosen
252
+ if hasattr(event, "description") and event.description:
253
+ logger.info(f"🎯 Action: {event.description}")
254
+
255
+ # Debug: show executor's reasoning
256
+ if hasattr(event, "thought") and event.thought:
257
+ thought_preview = (
258
+ event.thought[:120] + "..."
259
+ if len(event.thought) > 120
260
+ else event.thought
261
+ )
262
+ logger.debug(f"💭 Reasoning: {thought_preview}")
263
+
264
+ elif isinstance(event, ExecutorInternalResultEvent):
265
+ # Show result with appropriate emoji
266
+ if hasattr(event, "outcome") and hasattr(event, "summary"):
267
+ if event.outcome:
268
+ self.current_step = "Action completed"
269
+ logger.info(f"✅ {event.summary}")
270
+ else:
271
+ self.current_step = "Action failed"
272
+ error_msg = event.error if hasattr(event, "error") else "Unknown error"
273
+ logger.info(f"❌ {event.summary} ({error_msg})")
274
+
275
+ # CodeAct events (direct mode)
210
276
  elif isinstance(event, TaskInputEvent):
211
277
  self.current_step = "Processing task input..."
212
278
  logger.info("💬 Task input received...")
@@ -264,10 +330,6 @@ class LogHandler(logging.Handler):
264
330
  self.current_step = "Task failed"
265
331
  logger.info(f"❌ Task failed: {event.reason}")
266
332
 
267
- # elif isinstance(event, ReasoningLogicEvent): TODO: fix event handling
268
- # self.current_step = "Planning..."
269
- # logger.info("🤔 Planning next steps...")
270
-
271
333
  elif isinstance(event, TaskRunnerEvent):
272
334
  self.current_step = "Processing tasks..."
273
335
  logger.info("🏃 Processing task queue...")