droidrun 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
droidrun/__init__.py CHANGED
@@ -5,26 +5,17 @@ DroidRun - A framework for controlling Android devices through LLM agents.
5
5
  __version__ = "0.3.0"
6
6
 
7
7
  # Import main classes for easier access
8
- from droidrun.agent.codeact.codeact_agent import CodeActAgent
9
- from droidrun.agent.planner.planner_agent import PlannerAgent
10
- from droidrun.agent.utils.executer import SimpleCodeExecutor
11
8
  from droidrun.agent.utils.llm_picker import load_llm
12
9
  from droidrun.adb.manager import DeviceManager
13
- from droidrun.tools.tools import Tools
14
- from droidrun.tools.adb import AdbTools
15
- from droidrun.tools.ios import IOSTools
10
+ from droidrun.tools import Tools, AdbTools, IOSTools
16
11
  from droidrun.agent.droid import DroidAgent
17
12
 
18
13
 
19
14
  # Make main components available at package level
20
15
  __all__ = [
21
16
  "DroidAgent",
22
- "CodeActAgent",
23
- "PlannerAgent",
24
17
  "DeviceManager",
25
- "Tools",
26
18
  "load_llm",
27
- "SimpleCodeExecutor",
28
19
  "Tools",
29
20
  "AdbTools",
30
21
  "IOSTools",
droidrun/adb/device.py CHANGED
@@ -10,12 +10,13 @@ import string
10
10
  from typing import Dict, Optional, Tuple, List
11
11
  from droidrun.adb.wrapper import ADBWrapper
12
12
 
13
+
13
14
  class Device:
14
15
  """High-level representation of an Android device."""
15
16
 
16
17
  def __init__(self, serial: str, adb: ADBWrapper):
17
18
  """Initialize device.
18
-
19
+
19
20
  Args:
20
21
  serial: Device serial number
21
22
  adb: ADB wrapper instance
@@ -60,9 +61,13 @@ class Device:
60
61
  """Get SDK level."""
61
62
  return await self.get_property("ro.build.version.sdk")
62
63
 
64
+ async def shell(self, command: str, timeout: float | None = None) -> str:
65
+ """Execute a shell command on the device."""
66
+ return await self._adb.shell(self._serial, command, timeout)
67
+
63
68
  async def tap(self, x: int, y: int) -> None:
64
69
  """Tap at coordinates.
65
-
70
+
66
71
  Args:
67
72
  x: X coordinate
68
73
  y: Y coordinate
@@ -70,15 +75,10 @@ class Device:
70
75
  await self._adb.shell(self._serial, f"input tap {x} {y}")
71
76
 
72
77
  async def swipe(
73
- self,
74
- start_x: int,
75
- start_y: int,
76
- end_x: int,
77
- end_y: int,
78
- duration_ms: int = 300
78
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
79
79
  ) -> None:
80
80
  """Perform swipe gesture.
81
-
81
+
82
82
  Args:
83
83
  start_x: Starting X coordinate
84
84
  start_y: Starting Y coordinate
@@ -88,12 +88,12 @@ class Device:
88
88
  """
89
89
  await self._adb.shell(
90
90
  self._serial,
91
- f"input swipe {start_x} {start_y} {end_x} {end_y} {duration_ms}"
91
+ f"input swipe {start_x} {start_y} {end_x} {end_y} {duration_ms}",
92
92
  )
93
93
 
94
94
  async def input_text(self, text: str) -> None:
95
95
  """Input text.
96
-
96
+
97
97
  Args:
98
98
  text: Text to input
99
99
  """
@@ -101,7 +101,7 @@ class Device:
101
101
 
102
102
  async def press_key(self, keycode: int) -> None:
103
103
  """Press a key.
104
-
104
+
105
105
  Args:
106
106
  keycode: Android keycode to press
107
107
  """
@@ -111,10 +111,10 @@ class Device:
111
111
  self,
112
112
  package: str,
113
113
  activity: str = ".MainActivity",
114
- extras: Optional[Dict[str, str]] = None
114
+ extras: Optional[Dict[str, str]] = None,
115
115
  ) -> None:
116
116
  """Start an app activity.
117
-
117
+
118
118
  Args:
119
119
  package: Package name
120
120
  activity: Activity name
@@ -125,48 +125,56 @@ class Device:
125
125
  for key, value in extras.items():
126
126
  cmd += f" -e {key} {value}"
127
127
  await self._adb.shell(self._serial, cmd)
128
-
128
+
129
129
  async def start_app(self, package: str, activity: str = "") -> str:
130
130
  """Start an app on the device.
131
-
131
+
132
132
  Args:
133
133
  package: Package name
134
134
  activity: Optional activity name (if empty, launches default activity)
135
-
135
+
136
136
  Returns:
137
137
  Result message
138
138
  """
139
139
  if activity:
140
140
  if not activity.startswith(".") and "." not in activity:
141
141
  activity = f".{activity}"
142
-
143
- if not activity.startswith(".") and "." in activity and not activity.startswith(package):
142
+
143
+ if (
144
+ not activity.startswith(".")
145
+ and "." in activity
146
+ and not activity.startswith(package)
147
+ ):
144
148
  # Fully qualified activity name
145
149
  component = activity.split("/", 1)
146
- return await self.start_activity(component[0], component[1] if len(component) > 1 else activity)
147
-
150
+ return await self.start_activity(
151
+ component[0], component[1] if len(component) > 1 else activity
152
+ )
153
+
148
154
  # Relative activity name
149
155
  return await self.start_activity(package, activity)
150
-
156
+
151
157
  # Start main activity using monkey
152
158
  cmd = f"monkey -p {package} -c android.intent.category.LAUNCHER 1"
153
159
  result = await self._adb.shell(self._serial, cmd)
154
160
  return f"Started {package}"
155
-
156
- async def install_app(self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True) -> str:
161
+
162
+ async def install_app(
163
+ self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
164
+ ) -> str:
157
165
  """Install an APK on the device.
158
-
166
+
159
167
  Args:
160
168
  apk_path: Path to the APK file
161
169
  reinstall: Whether to reinstall if app exists
162
170
  grant_permissions: Whether to grant all requested permissions
163
-
171
+
164
172
  Returns:
165
173
  Installation result
166
174
  """
167
175
  if not os.path.exists(apk_path):
168
176
  return f"Error: APK file not found: {apk_path}"
169
-
177
+
170
178
  # Build install command args
171
179
  install_args = ["install"]
172
180
  if reinstall:
@@ -174,28 +182,28 @@ class Device:
174
182
  if grant_permissions:
175
183
  install_args.append("-g")
176
184
  install_args.append(apk_path)
177
-
185
+
178
186
  try:
179
187
  stdout, stderr = await self._adb._run_device_command(
180
188
  self._serial,
181
189
  install_args,
182
- timeout=120 # Longer timeout for installation
190
+ timeout=120, # Longer timeout for installation
183
191
  )
184
-
192
+
185
193
  if "success" in stdout.lower():
186
194
  return f"Successfully installed {os.path.basename(apk_path)}"
187
195
  return f"Installation failed: {stdout or stderr}"
188
-
196
+
189
197
  except Exception as e:
190
198
  return f"Installation failed: {str(e)}"
191
-
199
+
192
200
  async def uninstall_app(self, package: str, keep_data: bool = False) -> str:
193
201
  """Uninstall an app from the device.
194
-
202
+
195
203
  Args:
196
204
  package: Package name to uninstall
197
205
  keep_data: Whether to keep app data and cache directories
198
-
206
+
199
207
  Returns:
200
208
  Uninstallation result
201
209
  """
@@ -203,41 +211,42 @@ class Device:
203
211
  if keep_data:
204
212
  cmd.append("-k")
205
213
  cmd.append(package)
206
-
214
+
207
215
  result = await self._adb.shell(self._serial, " ".join(cmd))
208
216
  return result.strip()
209
-
217
+
210
218
  async def take_screenshot(self, quality: int = 75) -> Tuple[str, bytes]:
211
219
  """Take a screenshot of the device and compress it.
212
-
220
+
213
221
  Args:
214
222
  quality: JPEG quality (1-100, lower means smaller file size)
215
-
223
+
216
224
  Returns:
217
225
  Tuple of (local file path, screenshot data as bytes)
218
226
  """
219
227
  # Create a temporary file for the screenshot
220
228
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
221
229
  screenshot_path = temp.name
222
-
230
+
223
231
  try:
224
232
  # Generate a random filename for the device
225
233
  timestamp = int(time.time())
226
- random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
234
+ random_suffix = "".join(
235
+ random.choices(string.ascii_lowercase + string.digits, k=8)
236
+ )
227
237
  device_path = f"/sdcard/screenshot_{timestamp}_{random_suffix}.png"
228
-
238
+
229
239
  # Take screenshot using screencap command
230
240
  await self._adb.shell(self._serial, f"screencap -p {device_path}")
231
-
241
+
232
242
  # Pull screenshot to local machine
233
243
  await self._adb._run_device_command(
234
- self._serial,
235
- ["pull", device_path, screenshot_path]
244
+ self._serial, ["pull", device_path, screenshot_path]
236
245
  )
237
-
246
+
238
247
  # Clean up on device
239
248
  await self._adb.shell(self._serial, f"rm {device_path}")
240
-
249
+
241
250
  # Read the screenshot file
242
251
  with open(screenshot_path, "rb") as f:
243
252
  screenshot_data = f.read()
@@ -249,12 +258,14 @@ class Device:
249
258
 
250
259
  # Create buffer for the compressed image
251
260
  buffer = io.BytesIO()
252
-
261
+
253
262
  # Load the PNG data into a PIL Image
254
263
  with Image.open(io.BytesIO(screenshot_data)) as img:
255
264
  # Convert to RGB (removing alpha channel if present) and save as JPEG
256
265
  converted_img = img.convert("RGB") if img.mode == "RGBA" else img
257
- converted_img.save(buffer, format="JPEG", quality=quality, optimize=True)
266
+ converted_img.save(
267
+ buffer, format="JPEG", quality=quality, optimize=True
268
+ )
258
269
  compressed_data = buffer.getvalue()
259
270
 
260
271
  # Get size reduction info for logging
@@ -263,6 +274,7 @@ class Device:
263
274
  reduction = 100 - (jpg_size / png_size * 100) if png_size > 0 else 0
264
275
 
265
276
  import logging
277
+
266
278
  logger = logging.getLogger("droidrun")
267
279
  logger.debug(
268
280
  f"Screenshot compressed successfully: {png_size:.1f}KB → {jpg_size:.1f}KB ({reduction:.1f}% reduction)"
@@ -275,9 +287,11 @@ class Device:
275
287
  return screenshot_path, screenshot_data
276
288
  except Exception as e:
277
289
  # If compression fails, return the original PNG data
278
- logger.warning(f"Screenshot compression failed: {e}, returning uncompressed")
290
+ logger.warning(
291
+ f"Screenshot compression failed: {e}, returning uncompressed"
292
+ )
279
293
  return screenshot_path, screenshot_data
280
-
294
+
281
295
  except Exception as e:
282
296
  # Clean up in case of error
283
297
  try:
@@ -285,31 +299,47 @@ class Device:
285
299
  except OSError:
286
300
  pass
287
301
  raise RuntimeError(f"Screenshot capture failed: {str(e)}")
288
-
289
- async def list_packages(self, include_system_apps: bool = False) -> List[Dict[str, str]]:
290
- """List installed packages on the device.
291
302
 
303
+ def _parse_package_list(self, output: str) -> List[Dict[str, str]]:
304
+ """Parse the output of 'pm list packages -f' command.
305
+
306
+ Args:
307
+ output: Raw command output from 'pm list packages -f'
308
+
309
+ Returns:
310
+ List of dictionaries containing package info with 'package' and 'path' keys
311
+ """
312
+ apps = []
313
+ for line in output.splitlines():
314
+ if line.startswith("package:"):
315
+ # Format is: "package:/path/to/base.apk=com.package.name"
316
+ path_and_pkg = line[8:] # Strip "package:"
317
+ if "=" in path_and_pkg:
318
+ path, package = path_and_pkg.rsplit("=", 1)
319
+ apps.append({"package": package.strip(), "path": path.strip()})
320
+ return apps
321
+
322
+ async def list_packages(self, include_system_apps: bool = False) -> List[str]:
323
+ """
324
+ List installed packages on the device.
325
+
292
326
  Args:
293
- include_system_apps: Whether to include system apps
294
-
327
+ include_system_apps: Whether to include system apps (default: False)
328
+
295
329
  Returns:
296
- List of package dictionaries with 'package' and 'path' keys
330
+ List of package names
297
331
  """
332
+ # Use the direct ADB command to get packages with paths
298
333
  cmd = ["pm", "list", "packages", "-f"]
299
334
  if not include_system_apps:
300
335
  cmd.append("-3")
301
-
302
- output = await self._adb.shell(self._serial, " ".join(cmd))
303
-
304
- packages = []
305
- for line in output.splitlines():
306
- if line.startswith("package:"):
307
- parts = line[8:].split("=")
308
- if len(parts) == 2:
309
- path, package = parts
310
- packages.append({
311
- "package": package,
312
- "path": path
313
- })
314
-
315
- return packages
336
+
337
+ output = await self.shell(" ".join(cmd))
338
+
339
+ # Parse the package list using the function
340
+ packages = self._parse_package_list(output)
341
+ # Format package list for better readability
342
+ package_list = [pack["package"] for pack in packages]
343
+ #for package in package_list:
344
+ # print(package)
345
+ return package_list
droidrun/adb/manager.py CHANGED
@@ -42,7 +42,7 @@ class DeviceManager:
42
42
 
43
43
  return list(self._devices.values())
44
44
 
45
- async def get_device(self, serial: str) -> Optional[Device]:
45
+ async def get_device(self, serial: str | None = None) -> Optional[Device]:
46
46
  """Get a specific device.
47
47
 
48
48
  Args:
@@ -51,13 +51,13 @@ class DeviceManager:
51
51
  Returns:
52
52
  Device instance if found, None otherwise
53
53
  """
54
- if serial in self._devices:
54
+ if serial and serial in self._devices:
55
55
  return self._devices[serial]
56
56
 
57
57
  # Try to find the device
58
58
  devices = await self.list_devices()
59
59
  for device in devices:
60
- if device.serial == serial:
60
+ if device.serial == serial or not serial:
61
61
  return device
62
62
 
63
63
  return None
@@ -45,6 +45,7 @@ class CodeActAgent(Workflow):
45
45
  self,
46
46
  llm: LLM,
47
47
  persona: AgentPersona,
48
+ vision: bool,
48
49
  tools_instance: "Tools",
49
50
  all_tools_list: Dict[str, Callable[..., Any]],
50
51
  max_steps: int = 5,
@@ -62,6 +63,8 @@ class CodeActAgent(Workflow):
62
63
  self.user_prompt = persona.user_prompt
63
64
  self.no_thoughts_prompt = None
64
65
 
66
+ self.vision = vision
67
+
65
68
  self.chat_memory = None
66
69
  self.episodic_memory = EpisodicMemory(persona=persona)
67
70
  self.remembered_info = None
@@ -161,22 +164,28 @@ class CodeActAgent(Workflow):
161
164
  chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
162
165
 
163
166
  for context in self.required_context:
164
- if context == "screenshot" and model != "DeepSeek":
167
+ if model == "DeepSeek":
168
+ logger.warning(
169
+ "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
170
+ )
171
+ elif self.vision == True and context == "screenshot":
165
172
  screenshot = (await self.tools.take_screenshot())[1]
166
173
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
167
174
 
168
175
  await ctx.set("screenshot", screenshot)
169
176
  chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
170
177
 
171
- if context == "phone_state":
172
- chat_history = await chat_utils.add_phone_state_block(await self.tools.get_phone_state(), chat_history)
173
-
174
178
  if context == "ui_state":
175
- ui_state = await self.tools.get_clickables()
176
- await ctx.set("ui_state", ui_state)
177
- chat_history = await chat_utils.add_ui_text_block(
178
- ui_state, chat_history
179
- )
179
+ try:
180
+ state = await self.tools.get_state()
181
+ await ctx.set("ui_state", state["a11y_tree"])
182
+ chat_history = await chat_utils.add_ui_text_block(
183
+ state["a11y_tree"], chat_history
184
+ )
185
+ chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
186
+ except Exception as e:
187
+ logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
188
+
180
189
 
181
190
  if context == "packages":
182
191
  chat_history = await chat_utils.add_packages_block(
@@ -303,6 +312,7 @@ class CodeActAgent(Workflow):
303
312
  {
304
313
  "success": ev.success,
305
314
  "reason": ev.reason,
315
+ "output": ev.reason,
306
316
  "codeact_steps": self.steps_counter,
307
317
  "code_executions": self.code_exec_counter,
308
318
  }
@@ -312,7 +322,7 @@ class CodeActAgent(Workflow):
312
322
  EpisodicMemoryEvent(episodic_memory=self.episodic_memory)
313
323
  )
314
324
 
315
- return StopEvent(result=result)
325
+ return StopEvent(result)
316
326
 
317
327
  async def _get_llm_response(
318
328
  self, ctx: Context, chat_history: List[ChatMessage]
@@ -394,7 +404,7 @@ class CodeActAgent(Workflow):
394
404
  logger.warning(f"Failed to capture final screenshot: {e}")
395
405
 
396
406
  try:
397
- ui_state = await self.tools.get_clickables()
407
+ (a11y_tree, phone_state) = await self.tools.get_state()
398
408
  except Exception as e:
399
409
  logger.warning(f"Failed to capture final UI state: {e}")
400
410
 
@@ -402,7 +412,7 @@ class CodeActAgent(Workflow):
402
412
  final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
403
413
  final_response = {
404
414
  "role": "user",
405
- "content": f"Final State Observation:\nUI State: {ui_state}\nScreenshot: {'Available' if screenshot else 'Not available'}"
415
+ "content": f"Final State Observation:\nUI State: {a11y_tree}\nScreenshot: {'Available' if screenshot else 'Not available'}"
406
416
  }
407
417
 
408
418
  # Create final episodic memory step
@@ -1,11 +1,9 @@
1
1
  from .default import DEFAULT
2
2
  from .ui_expert import UI_EXPERT
3
3
  from .app_starter import APP_STARTER_EXPERT
4
- from .extractor import EXTRACTOR
5
4
 
6
5
  __all__ = [
7
6
  'DEFAULT',
8
7
  'UI_EXPERT',
9
8
  'APP_STARTER_EXPERT',
10
- 'EXTRACTOR'
11
9
  ]
@@ -21,7 +21,6 @@ DEFAULT = AgentPersona(
21
21
  required_context=[
22
22
  "ui_state",
23
23
  "screenshot",
24
- "phone_state"
25
24
  ],
26
25
  user_prompt="""
27
26
  **Current Request:**
@@ -46,6 +45,7 @@ DEFAULT = AgentPersona(
46
45
  - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
47
46
  - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
48
47
  - **chat history**: You are also given the history of your actions (if any) from your previous steps.
48
+ - **execution result**: The result of your last Action
49
49
  NOTE: you don't have access to these inputs in your tool calling context
50
50
 
51
51
  ## Response Format: