droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. droidrun/__init__.py +2 -3
  2. droidrun/__main__.py +1 -1
  3. droidrun/agent/__init__.py +1 -1
  4. droidrun/agent/codeact/__init__.py +1 -4
  5. droidrun/agent/codeact/codeact_agent.py +112 -48
  6. droidrun/agent/codeact/events.py +6 -3
  7. droidrun/agent/codeact/prompts.py +2 -2
  8. droidrun/agent/common/constants.py +2 -0
  9. droidrun/agent/common/events.py +5 -3
  10. droidrun/agent/context/__init__.py +1 -3
  11. droidrun/agent/context/agent_persona.py +2 -1
  12. droidrun/agent/context/context_injection_manager.py +6 -6
  13. droidrun/agent/context/episodic_memory.py +5 -3
  14. droidrun/agent/context/personas/__init__.py +3 -3
  15. droidrun/agent/context/personas/app_starter.py +3 -3
  16. droidrun/agent/context/personas/big_agent.py +3 -3
  17. droidrun/agent/context/personas/default.py +3 -3
  18. droidrun/agent/context/personas/ui_expert.py +5 -5
  19. droidrun/agent/context/task_manager.py +15 -17
  20. droidrun/agent/droid/__init__.py +1 -1
  21. droidrun/agent/droid/droid_agent.py +327 -182
  22. droidrun/agent/droid/events.py +91 -9
  23. droidrun/agent/executor/__init__.py +13 -0
  24. droidrun/agent/executor/events.py +24 -0
  25. droidrun/agent/executor/executor_agent.py +327 -0
  26. droidrun/agent/executor/prompts.py +136 -0
  27. droidrun/agent/manager/__init__.py +18 -0
  28. droidrun/agent/manager/events.py +20 -0
  29. droidrun/agent/manager/manager_agent.py +459 -0
  30. droidrun/agent/manager/prompts.py +223 -0
  31. droidrun/agent/oneflows/app_starter_workflow.py +118 -0
  32. droidrun/agent/oneflows/text_manipulator.py +204 -0
  33. droidrun/agent/planner/__init__.py +3 -3
  34. droidrun/agent/planner/events.py +6 -3
  35. droidrun/agent/planner/planner_agent.py +60 -53
  36. droidrun/agent/planner/prompts.py +2 -2
  37. droidrun/agent/usage.py +15 -13
  38. droidrun/agent/utils/__init__.py +11 -1
  39. droidrun/agent/utils/async_utils.py +2 -1
  40. droidrun/agent/utils/chat_utils.py +48 -60
  41. droidrun/agent/utils/device_state_formatter.py +177 -0
  42. droidrun/agent/utils/executer.py +13 -12
  43. droidrun/agent/utils/inference.py +114 -0
  44. droidrun/agent/utils/llm_picker.py +2 -0
  45. droidrun/agent/utils/message_utils.py +85 -0
  46. droidrun/agent/utils/tools.py +220 -0
  47. droidrun/agent/utils/trajectory.py +8 -7
  48. droidrun/cli/__init__.py +1 -1
  49. droidrun/cli/logs.py +29 -28
  50. droidrun/cli/main.py +279 -143
  51. droidrun/config_manager/__init__.py +25 -0
  52. droidrun/config_manager/config_manager.py +583 -0
  53. droidrun/macro/__init__.py +2 -2
  54. droidrun/macro/__main__.py +1 -1
  55. droidrun/macro/cli.py +36 -34
  56. droidrun/macro/replay.py +7 -9
  57. droidrun/portal.py +1 -1
  58. droidrun/telemetry/__init__.py +2 -2
  59. droidrun/telemetry/events.py +3 -4
  60. droidrun/telemetry/phoenix.py +173 -0
  61. droidrun/telemetry/tracker.py +7 -5
  62. droidrun/tools/__init__.py +1 -1
  63. droidrun/tools/adb.py +210 -82
  64. droidrun/tools/ios.py +7 -5
  65. droidrun/tools/tools.py +25 -8
  66. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
  67. droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
  68. droidrun/agent/common/default.py +0 -5
  69. droidrun/agent/context/reflection.py +0 -20
  70. droidrun/agent/oneflows/reflector.py +0 -265
  71. droidrun-0.3.8.dist-info/RECORD +0 -55
  72. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
  73. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
  74. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,85 @@
1
+ """
2
+ Message conversion utilities for Manager Agent.
3
+
4
+ Converts between dict message format and llama-index ChatMessage format.
5
+ """
6
+
7
+ from io import BytesIO
8
+ from pathlib import Path
9
+ from typing import Union
10
+
11
+ from llama_index.core.llms import ChatMessage, ImageBlock, TextBlock
12
+ from PIL import Image
13
+
14
+
15
+ def image_to_image_bytes(image_source: Union[str, Path, Image.Image, bytes]) -> bytes:
16
+ """
17
+ Convert image to bytes for ImageBlock.
18
+
19
+ Args:
20
+ image_source: Can be:
21
+ - str/Path: path to image file
22
+ - PIL.Image.Image: PIL Image object
23
+ - bytes: bytes of image
24
+
25
+ Returns:
26
+ Image bytes in PNG format
27
+ """
28
+ if isinstance(image_source, (str, Path)):
29
+ image = Image.open(image_source)
30
+ elif isinstance(image_source, Image.Image):
31
+ image = image_source
32
+ elif isinstance(image_source, bytes):
33
+ return image_source
34
+ else:
35
+ raise ValueError(f"Unsupported image source type: {type(image_source)}")
36
+
37
+ buffer = BytesIO()
38
+ image.save(buffer, format="PNG")
39
+ return buffer.getvalue()
40
+
41
+
42
+ def convert_messages_to_chatmessages(messages: list[dict]) -> list[ChatMessage]:
43
+ """
44
+ Convert dict messages to llama-index ChatMessage format.
45
+
46
+ Dict format (input):
47
+ {
48
+ "role": "user" | "assistant" | "system",
49
+ "content": [
50
+ {"text": "some text"},
51
+ {"image": "/path/to/image.png"} # or PIL Image
52
+ ]
53
+ }
54
+
55
+ ChatMessage format (output):
56
+ ChatMessage(
57
+ role="user",
58
+ blocks=[
59
+ TextBlock(text="some text"),
60
+ ImageBlock(image=b"...bytes...")
61
+ ]
62
+ )
63
+
64
+ Args:
65
+ messages: List of message dicts
66
+
67
+ Returns:
68
+ List of ChatMessage objects
69
+ """
70
+ chat_messages = []
71
+
72
+ for message in messages:
73
+ blocks = []
74
+
75
+ for item in message['content']:
76
+ if 'text' in item:
77
+ blocks.append(TextBlock(text=item['text']))
78
+ elif 'image' in item:
79
+ # Convert image to bytes
80
+ image_bytes = image_to_image_bytes(item['image'])
81
+ blocks.append(ImageBlock(image=image_bytes))
82
+
83
+ chat_messages.append(ChatMessage(role=message['role'], blocks=blocks))
84
+
85
+ return chat_messages
@@ -0,0 +1,220 @@
1
+ from typing import TYPE_CHECKING, List
2
+
3
+ if TYPE_CHECKING:
4
+ from droidrun.tools import Tools
5
+
6
+ from droidrun.agent.oneflows.app_starter_workflow import AppStarter
7
+
8
+
9
+ def click(tool_instance: "Tools", index: int) -> str:
10
+ """
11
+ Click the element with the given index.
12
+
13
+ Args:
14
+ tool_instance: The Tools instance
15
+ index: The index of the element to click
16
+
17
+ Returns:
18
+ Result message from the tap operation
19
+ """
20
+ return tool_instance.tap_by_index(index)
21
+
22
+
23
+ def long_press(tool_instance: "Tools", index: int) -> bool:
24
+ """
25
+ Long press the element with the given index.
26
+
27
+ Args:
28
+ tool_instance: The Tools instance
29
+ index: The index of the element to long press
30
+
31
+ Returns:
32
+ True if successful, False otherwise
33
+ """
34
+ x, y = tool_instance._extract_element_coordinates_by_index(index)
35
+ return tool_instance.swipe(x, y, x, y, 1000)
36
+
37
+
38
+ def type(tool_instance: "Tools", text: str, index: int) -> str:
39
+ """
40
+ Type the given text into the element with the given index.
41
+
42
+ Args:
43
+ tool_instance: The Tools instance
44
+ text: The text to type
45
+ index: The index of the element to type into
46
+
47
+ Returns:
48
+ Result message from the input operation
49
+ """
50
+ return tool_instance.input_text(text, index)
51
+
52
+
53
+ def system_button(tool_instance: "Tools", button: str) -> str:
54
+ """
55
+ Press a system button (back, home, or enter).
56
+
57
+ Args:
58
+ tool_instance: The Tools instance
59
+ button: The button name (case insensitive): "back", "home", or "enter"
60
+
61
+ Returns:
62
+ Result message from the key press operation
63
+ """
64
+ # Map button names to keycodes (case insensitive)
65
+ button_map = {
66
+ "back": 4,
67
+ "home": 3,
68
+ "enter": 66,
69
+ }
70
+
71
+ button_lower = button.lower()
72
+ if button_lower not in button_map:
73
+ return f"Error: Unknown system button '{button}'. Valid options: back, home, enter"
74
+
75
+ keycode = button_map[button_lower]
76
+ return tool_instance.press_key(keycode)
77
+
78
+
79
+ def swipe(tool_instance: "Tools", coordinate: List[int], coordinate2: List[int]) -> bool:
80
+ """
81
+ Swipe from one coordinate to another.
82
+
83
+ Args:
84
+ tool_instance: The Tools instance
85
+ coordinate: Starting coordinate as [x, y]
86
+ coordinate2: Ending coordinate as [x, y]
87
+
88
+ Returns:
89
+ True if successful, False otherwise
90
+ """
91
+ if not isinstance(coordinate, list) or len(coordinate) != 2:
92
+ raise ValueError(f"coordinate must be a list of 2 integers, got: {coordinate}")
93
+ if not isinstance(coordinate2, list) or len(coordinate2) != 2:
94
+ raise ValueError(f"coordinate2 must be a list of 2 integers, got: {coordinate2}")
95
+
96
+ start_x, start_y = coordinate
97
+ end_x, end_y = coordinate2
98
+
99
+ return tool_instance.swipe(start_x, start_y, end_x, end_y, duration_ms=300)
100
+
101
+
102
+ def open_app(tool_instance: "Tools", text: str) -> str:
103
+ """
104
+ Open an app by its name.
105
+
106
+ Args:
107
+ tool_instance: The Tools instance
108
+ text: The name of the app to open
109
+
110
+ Returns:
111
+ Result message from opening the app
112
+ """
113
+ # Get LLM from tools instance
114
+ if tool_instance.app_opener_llm is None:
115
+ raise RuntimeError(
116
+ "app_opener_llm not configured. "
117
+ "provide app_opener_llm when initializing Tools."
118
+ )
119
+
120
+ # Create workflow instance
121
+ workflow = AppStarter(tools=tool_instance, llm=tool_instance.app_opener_llm, timeout=60, verbose=True)
122
+
123
+ # Run workflow to open an app
124
+ result = workflow.run(app_description=text)
125
+ return result
126
+
127
+
128
+ # =============================================================================
129
+ # ATOMIC ACTION SIGNATURES - Single source of truth for both Executor and CodeAct
130
+ # =============================================================================
131
+
132
+ ATOMIC_ACTION_SIGNATURES = {
133
+ "click": {
134
+ "arguments": ["index"],
135
+ "description": "Click the point on the screen with specified index. Usage Example: {\"action\": \"click\", \"index\": element_index}",
136
+ "function": click,
137
+ },
138
+ "long_press": {
139
+ "arguments": ["index"],
140
+ "description": "Long press on the position with specified index. Usage Example: {\"action\": \"long_press\", \"index\": element_index}",
141
+ "function": long_press,
142
+ },
143
+ "type": {
144
+ "arguments": ["text", "index"],
145
+ "description": "Type text into an input box or text field. Specify the element with index to focus the input field before typing. Usage Example: {\"action\": \"type\", \"text\": \"the text you want to type\", \"index\": element_index}",
146
+ "function": type,
147
+ },
148
+ "system_button": {
149
+ "arguments": ["button"],
150
+ "description": "Press a system button, including back, home, and enter. Usage example: {\"action\": \"system_button\", \"button\": \"Home\"}",
151
+ "function": system_button,
152
+ },
153
+ "swipe": {
154
+ "arguments": ["coordinate", "coordinate2"],
155
+ "description": "Scroll from the position with coordinate to the position with coordinate2. Please make sure the start and end points of your swipe are within the swipeable area and away from the keyboard (y1 < 1400). Usage Example: {\"action\": \"swipe\", \"coordinate\": [x1, y1], \"coordinate2\": [x2, y2]}",
156
+ "function": swipe,
157
+ },
158
+ "open_app": {
159
+ "arguments": ["text"],
160
+ "description": "Open an app. Usage example: {\"action\": \"open_app\", \"text\": \"the name of app\"}",
161
+ "function": open_app,
162
+ },
163
+ # "copy": {
164
+ # "arguments": ["text"],
165
+ # "description": "Copy the specified text to the clipboard. Provide the text to copy using the 'text' argument. Example: {\"action\": \"copy\", \"text\": \"the text you want to copy\"}\nAlways use copy action to copy text to clipboard."
166
+ # "function": copy,
167
+ # },
168
+ # "paste": {
169
+ # "arguments": ["index", "clear"],
170
+ # "description": "Paste clipboard text into a text box. 'index' specifies which text box to focus on and paste into. Set 'clear' to true to clear existing text before pasting. Example: {\"action\": \"paste\", \"index\": 0, \"clear\": true}\nAlways use paste action to paste text from clipboard."
171
+ # "function": paste,
172
+ # },
173
+ }
174
+
175
+
176
+ def get_atomic_tool_descriptions() -> str:
177
+ """
178
+ Get formatted tool descriptions for CodeAct system prompt.
179
+
180
+ Parses ATOMIC_ACTION_SIGNATURES to create formatted descriptions.
181
+
182
+ Returns:
183
+ Formatted string of tool descriptions for LLM prompt
184
+ """
185
+ descriptions = []
186
+ for action_name, signature in ATOMIC_ACTION_SIGNATURES.items():
187
+ args = ", ".join(signature["arguments"])
188
+ desc = signature["description"]
189
+ descriptions.append(f"- {action_name}({args}): {desc}")
190
+
191
+ return "\n".join(descriptions)
192
+
193
+
194
+ def build_custom_tool_descriptions(custom_tools: dict) -> str:
195
+ """
196
+ Build formatted tool descriptions from custom_tools dict.
197
+
198
+ Args:
199
+ custom_tools: Dictionary of custom tools in ATOMIC_ACTION_SIGNATURES format
200
+ {
201
+ "tool_name": {
202
+ "arguments": ["arg1", "arg2"],
203
+ "description": "Tool description with usage",
204
+ "function": callable
205
+ }
206
+ }
207
+
208
+ Returns:
209
+ Formatted string of custom tool descriptions for LLM prompt
210
+ """
211
+ if not custom_tools:
212
+ return ""
213
+
214
+ descriptions = []
215
+ for action_name, signature in custom_tools.items():
216
+ args = ", ".join(signature.get("arguments", []))
217
+ desc = signature.get("description", f"Custom action: {action_name}")
218
+ descriptions.append(f"- {action_name}({args}): {desc}")
219
+
220
+ return "\n".join(descriptions)
@@ -5,15 +5,16 @@ This module provides helper functions for working with agent trajectories,
5
5
  including saving, loading, and analyzing them.
6
6
  """
7
7
 
8
+ import io
8
9
  import json
9
10
  import logging
10
11
  import os
11
12
  import time
12
13
  import uuid
13
- from typing import Dict, List, Any
14
- from PIL import Image
15
- import io
14
+ from typing import Any, Dict, List
15
+
16
16
  from llama_index.core.workflow import Event
17
+ from PIL import Image
17
18
 
18
19
  logger = logging.getLogger("droidrun")
19
20
 
@@ -66,7 +67,7 @@ class Trajectory:
66
67
  Args:
67
68
  goal: The goal/prompt that this trajectory is trying to achieve
68
69
  """
69
- self.events: List[Event] = []
70
+ self.events: List[Event] = []
70
71
  self.screenshots: List[bytes] = []
71
72
  self.ui_states: List[Dict[str, Any]] = []
72
73
  self.macro: List[Event] = []
@@ -183,7 +184,7 @@ class Trajectory:
183
184
  f"Serialized event contains tokens: {event_dict['tokens']}"
184
185
  )
185
186
  else:
186
- logger.debug(f"Serialized event does NOT contain tokens")
187
+ logger.debug("Serialized event does NOT contain tokens")
187
188
 
188
189
  serializable_events.append(event_dict)
189
190
 
@@ -223,7 +224,7 @@ class Trajectory:
223
224
  logger.info(
224
225
  f"💾 Saved macro sequence with {len(macro_data)} actions to {macro_json_path}"
225
226
  )
226
- screenshots_folder = os.path.join(trajectory_folder, "screenshots");
227
+ screenshots_folder = os.path.join(trajectory_folder, "screenshots")
227
228
  os.makedirs(screenshots_folder, exist_ok=True)
228
229
 
229
230
  gif_path = self.create_screenshot_gif(
@@ -418,7 +419,7 @@ class Trajectory:
418
419
  print(f" - {action_type}: {count}")
419
420
 
420
421
  if folder_data["trajectory_data"]:
421
- print(f"\n--- Trajectory Summary ---")
422
+ print("\n--- Trajectory Summary ---")
422
423
  print(f"Total events: {len(folder_data['trajectory_data'])}")
423
424
 
424
425
  print("=================================")
droidrun/cli/__init__.py CHANGED
@@ -6,4 +6,4 @@ This module provides command-line interfaces for interacting with Android device
6
6
 
7
7
  from droidrun.cli.main import cli
8
8
 
9
- __all__ = ["cli"]
9
+ __all__ = ["cli"]
droidrun/cli/logs.py CHANGED
@@ -1,30 +1,30 @@
1
1
  import logging
2
+ from typing import List
3
+
4
+ from rich.console import Console
2
5
  from rich.layout import Layout
6
+ from rich.live import Live
3
7
  from rich.panel import Panel
4
8
  from rich.spinner import Spinner
5
- from rich.console import Console
6
- from rich.live import Live
7
- from typing import List
8
9
 
9
- from droidrun.agent.common.events import ScreenshotEvent, RecordUIStateEvent
10
- from droidrun.agent.planner.events import (
11
- PlanInputEvent,
12
- PlanThinkingEvent,
13
- PlanCreatedEvent,
14
- )
15
10
  from droidrun.agent.codeact.events import (
16
- TaskInputEvent,
17
- TaskThinkingEvent,
11
+ TaskEndEvent,
18
12
  TaskExecutionEvent,
19
13
  TaskExecutionResultEvent,
20
- TaskEndEvent,
14
+ TaskInputEvent,
15
+ TaskThinkingEvent,
21
16
  )
17
+ from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
22
18
  from droidrun.agent.droid.events import (
23
19
  CodeActExecuteEvent,
24
20
  CodeActResultEvent,
25
- ReasoningLogicEvent,
26
- TaskRunnerEvent,
27
21
  FinalizeEvent,
22
+ TaskRunnerEvent,
23
+ )
24
+ from droidrun.agent.planner.events import (
25
+ PlanCreatedEvent,
26
+ PlanInputEvent,
27
+ PlanThinkingEvent,
28
28
  )
29
29
 
30
30
 
@@ -90,13 +90,14 @@ class LogHandler(logging.Handler):
90
90
  success: bool = False,
91
91
  ):
92
92
  """Update the layout with current logs and step information"""
93
- from rich.text import Text
94
93
  import shutil
95
94
 
95
+ from rich.text import Text
96
+
96
97
  # Cache terminal size to avoid frequent recalculation
97
98
  try:
98
99
  terminal_height = shutil.get_terminal_size().lines
99
- except:
100
+ except: # noqa: E722
100
101
  terminal_height = 24 # fallback
101
102
 
102
103
  # Reserve space for panels and borders (more conservative estimate)
@@ -169,7 +170,7 @@ class LogHandler(logging.Handler):
169
170
  )
170
171
  )
171
172
 
172
- def handle_event(self, event):
173
+ def handle_event(self, event): # TODO: fix event handling for the refactor
173
174
  """Handle streaming events from the agent workflow."""
174
175
  logger = logging.getLogger("droidrun")
175
176
 
@@ -178,7 +179,7 @@ class LogHandler(logging.Handler):
178
179
  logger.debug("📸 Taking screenshot...")
179
180
 
180
181
  elif isinstance(event, RecordUIStateEvent):
181
- logger.debug(f"✏️ Recording UI state")
182
+ logger.debug("✏️ Recording UI state")
182
183
 
183
184
  # Planner events
184
185
  elif isinstance(event, PlanInputEvent):
@@ -194,7 +195,7 @@ class LogHandler(logging.Handler):
194
195
  )
195
196
  logger.info(f"🧠 Planning: {thoughts_preview}")
196
197
  if event.code:
197
- logger.info(f"📝 Generated plan code")
198
+ logger.info("📝 Generated plan code")
198
199
 
199
200
  elif isinstance(event, PlanCreatedEvent):
200
201
  if event.tasks:
@@ -219,12 +220,12 @@ class LogHandler(logging.Handler):
219
220
  )
220
221
  logger.info(f"🧠 Thinking: {thoughts_preview}")
221
222
  if hasattr(event, "code") and event.code:
222
- logger.info(f"💻 Executing action code")
223
+ logger.info("💻 Executing action code")
223
224
  logger.debug(f"{event.code}")
224
225
 
225
226
  elif isinstance(event, TaskExecutionEvent):
226
227
  self.current_step = "Executing action..."
227
- logger.info(f"⚡ Executing action...")
228
+ logger.info("⚡ Executing action...")
228
229
 
229
230
  elif isinstance(event, TaskExecutionResultEvent):
230
231
  if hasattr(event, "output") and event.output:
@@ -246,13 +247,13 @@ class LogHandler(logging.Handler):
246
247
  self.current_step = event.reason
247
248
  logger.info(f"✅ Task completed: {event.reason}")
248
249
  else:
249
- self.current_step = f"Task failed"
250
+ self.current_step = "Task failed"
250
251
  logger.info(f"❌ Task failed: {event.reason}")
251
252
 
252
253
  # Droid coordination events
253
254
  elif isinstance(event, CodeActExecuteEvent):
254
255
  self.current_step = "Executing task..."
255
- logger.info(f"🔧 Starting task execution...")
256
+ logger.info("🔧 Starting task execution...")
256
257
 
257
258
  elif isinstance(event, CodeActResultEvent):
258
259
  if hasattr(event, "success") and hasattr(event, "reason"):
@@ -260,16 +261,16 @@ class LogHandler(logging.Handler):
260
261
  self.current_step = event.reason
261
262
  logger.info(f"✅ Task completed: {event.reason}")
262
263
  else:
263
- self.current_step = f"Task failed"
264
+ self.current_step = "Task failed"
264
265
  logger.info(f"❌ Task failed: {event.reason}")
265
266
 
266
- elif isinstance(event, ReasoningLogicEvent):
267
- self.current_step = "Planning..."
268
- logger.info(f"🤔 Planning next steps...")
267
+ # elif isinstance(event, ReasoningLogicEvent): TODO: fix event handling
268
+ # self.current_step = "Planning..."
269
+ # logger.info("🤔 Planning next steps...")
269
270
 
270
271
  elif isinstance(event, TaskRunnerEvent):
271
272
  self.current_step = "Processing tasks..."
272
- logger.info(f"🏃 Processing task queue...")
273
+ logger.info("🏃 Processing task queue...")
273
274
 
274
275
  elif isinstance(event, FinalizeEvent):
275
276
  if hasattr(event, "success") and hasattr(event, "reason"):