droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. droidrun/__init__.py +22 -10
  2. droidrun/__main__.py +1 -2
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +2 -2
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +5 -15
  7. droidrun/agent/codeact/__init__.py +11 -0
  8. droidrun/agent/codeact/codeact_agent.py +420 -0
  9. droidrun/agent/codeact/events.py +28 -0
  10. droidrun/agent/codeact/prompts.py +26 -0
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/context/todo.txt +4 -0
  25. droidrun/agent/droid/__init__.py +13 -0
  26. droidrun/agent/droid/droid_agent.py +357 -0
  27. droidrun/agent/droid/events.py +28 -0
  28. droidrun/agent/oneflows/reflector.py +265 -0
  29. droidrun/agent/planner/__init__.py +13 -0
  30. droidrun/agent/planner/events.py +16 -0
  31. droidrun/agent/planner/planner_agent.py +268 -0
  32. droidrun/agent/planner/prompts.py +124 -0
  33. droidrun/agent/utils/__init__.py +3 -0
  34. droidrun/agent/utils/async_utils.py +17 -0
  35. droidrun/agent/utils/chat_utils.py +312 -0
  36. droidrun/agent/utils/executer.py +132 -0
  37. droidrun/agent/utils/llm_picker.py +147 -0
  38. droidrun/agent/utils/trajectory.py +184 -0
  39. droidrun/cli/__init__.py +1 -1
  40. droidrun/cli/logs.py +283 -0
  41. droidrun/cli/main.py +358 -149
  42. droidrun/run.py +105 -0
  43. droidrun/tools/__init__.py +4 -30
  44. droidrun/tools/adb.py +879 -0
  45. droidrun/tools/ios.py +594 -0
  46. droidrun/tools/tools.py +99 -0
  47. droidrun-0.3.0.dist-info/METADATA +149 -0
  48. droidrun-0.3.0.dist-info/RECORD +52 -0
  49. droidrun/agent/llm_reasoning.py +0 -567
  50. droidrun/agent/react_agent.py +0 -556
  51. droidrun/llm/__init__.py +0 -24
  52. droidrun/tools/actions.py +0 -854
  53. droidrun/tools/device.py +0 -29
  54. droidrun-0.1.0.dist-info/METADATA +0 -276
  55. droidrun-0.1.0.dist-info/RECORD +0 -20
  56. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
  57. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
  58. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/adb.py ADDED
@@ -0,0 +1,879 @@
1
+ """
2
+ UI Actions - Core UI interaction tools for Android device control.
3
+ """
4
+
5
+ import os
6
+ import re
7
+ import json
8
+ import time
9
+ import tempfile
10
+ import asyncio
11
+ import aiofiles
12
+ import contextlib
13
+ from typing import Optional, Dict, Tuple, List, Any
14
+ from droidrun.adb.device import Device
15
+ from droidrun.adb.manager import DeviceManager
16
+ from droidrun.tools.tools import Tools
17
+
18
+
19
+ class AdbTools(Tools):
20
+ """Core UI interaction tools for Android device control."""
21
+
22
+ def __init__(self, serial: str = "emulator-5554") -> None:
23
+ # Instance‐level cache for clickable elements (index-based tapping)
24
+ self.clickable_elements_cache: List[Dict[str, Any]] = []
25
+ self.serial = serial
26
+ self.device_manager = DeviceManager()
27
+ self.last_screenshot = None
28
+ self.reason = None
29
+ self.success = None
30
+ self.finished = False
31
+ # Memory storage for remembering important information
32
+ self.memory: List[str] = []
33
+ # Store all screenshots with timestamps
34
+ self.screenshots: List[Dict[str, Any]] = []
35
+
36
+ def get_device_serial(self) -> str:
37
+ """Get the device serial from the instance or environment variable."""
38
+ # First try using the instance's serial
39
+ if self.serial:
40
+ return self.serial
41
+
42
+ async def get_device(self) -> Optional[Device]:
43
+ """Get the device instance using the instance's serial or from environment variable.
44
+
45
+ Returns:
46
+ Device instance or None if not found
47
+ """
48
+ serial = self.get_device_serial()
49
+ if not serial:
50
+ raise ValueError("No device serial specified - set device_serial parameter")
51
+
52
+ device = await self.device_manager.get_device(serial)
53
+ if not device:
54
+ raise ValueError(f"Device {serial} not found")
55
+
56
+ return device
57
+
58
+ def parse_package_list(self, output: str) -> List[Dict[str, str]]:
59
+ """Parse the output of 'pm list packages -f' command.
60
+
61
+ Args:
62
+ output: Raw command output from 'pm list packages -f'
63
+
64
+ Returns:
65
+ List of dictionaries containing package info with 'package' and 'path' keys
66
+ """
67
+ apps = []
68
+ for line in output.splitlines():
69
+ if line.startswith("package:"):
70
+ # Format is: "package:/path/to/base.apk=com.package.name"
71
+ path_and_pkg = line[8:] # Strip "package:"
72
+ if "=" in path_and_pkg:
73
+ path, package = path_and_pkg.rsplit("=", 1)
74
+ apps.append({"package": package.strip(), "path": path.strip()})
75
+ return apps
76
+
77
+ async def get_clickables(self, serial: Optional[str] = None) -> str:
78
+ """
79
+ Get all clickable UI elements from the device using the custom TopViewService.
80
+
81
+ This function interacts with the TopViewService app installed on the device
82
+ to capture UI elements. The service writes UI data to a JSON file on the device,
83
+ which is then pulled to the host. If no elements are found initially, it will
84
+ retry for up to 30 seconds.
85
+
86
+ Args:
87
+ serial: Optional device serial number
88
+
89
+ Returns:
90
+ JSON string containing UI elements extracted from the device screen
91
+ """
92
+ try:
93
+ # Get the device
94
+ if serial:
95
+ device_manager = DeviceManager()
96
+ device = await device_manager.get_device(serial)
97
+ if not device:
98
+ raise ValueError(f"Device {serial} not found")
99
+ else:
100
+ device = await self.get_device()
101
+
102
+ # Create a temporary file for the JSON
103
+ with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
104
+ local_path = temp.name
105
+
106
+ try:
107
+ # Set retry parameters
108
+ max_total_time = 30 # Maximum total time to try in seconds
109
+ retry_interval = 1.0 # Time between retries in seconds
110
+ start_total_time = asyncio.get_event_loop().time()
111
+
112
+ while True:
113
+ # Check if we've exceeded total time
114
+ current_time = asyncio.get_event_loop().time()
115
+ if current_time - start_total_time > max_total_time:
116
+ raise ValueError(
117
+ f"Failed to get UI elements after {max_total_time} seconds of retries"
118
+ )
119
+
120
+ # Clear logcat to make it easier to find our output
121
+ await device._adb.shell(device._serial, "logcat -c")
122
+
123
+ # Trigger the custom service via broadcast to get only interactive elements
124
+ await device._adb.shell(
125
+ device._serial,
126
+ "am broadcast -a com.droidrun.portal.GET_ELEMENTS",
127
+ )
128
+
129
+ # Poll for the JSON file path
130
+ start_time = asyncio.get_event_loop().time()
131
+ max_wait_time = 10 # Maximum wait time in seconds
132
+ poll_interval = 0.2 # Check every 200ms
133
+
134
+ device_path = None
135
+ while asyncio.get_event_loop().time() - start_time < max_wait_time:
136
+ # Check logcat for the file path
137
+ logcat_output = await device._adb.shell(
138
+ device._serial,
139
+ 'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
140
+ )
141
+
142
+ # Parse the file path if present
143
+ match = re.search(r"JSON data written to: (.*)", logcat_output)
144
+ if match:
145
+ device_path = match.group(1).strip()
146
+ break
147
+
148
+ # Wait before polling again
149
+ await asyncio.sleep(poll_interval)
150
+
151
+ # Check if we found the file path
152
+ if not device_path:
153
+ await asyncio.sleep(retry_interval)
154
+ continue
155
+
156
+ # Pull the JSON file from the device
157
+ await device._adb.pull_file(device._serial, device_path, local_path)
158
+
159
+ # Read the JSON file
160
+ async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
161
+ json_content = await f.read()
162
+
163
+ # Try to parse the JSON
164
+ try:
165
+ ui_data = json.loads(json_content)
166
+
167
+ # Filter out the "type" attribute from all elements
168
+ filtered_data = []
169
+ for element in ui_data:
170
+ # Create a copy of the element without the "type" attribute
171
+ filtered_element = {
172
+ k: v for k, v in element.items() if k != "type"
173
+ }
174
+
175
+ # Also filter children if present
176
+ if "children" in filtered_element:
177
+ filtered_element["children"] = [
178
+ {k: v for k, v in child.items() if k != "type"}
179
+ for child in filtered_element["children"]
180
+ ]
181
+
182
+ filtered_data.append(filtered_element)
183
+
184
+ # If we got elements, store them and return
185
+ if filtered_data:
186
+ # Store the filtered UI data in cache
187
+ global CLICKABLE_ELEMENTS_CACHE
188
+ CLICKABLE_ELEMENTS_CACHE = filtered_data
189
+
190
+ # Add a small sleep to ensure UI is fully loaded/processed
191
+ await asyncio.sleep(0.5) # 500ms sleep
192
+
193
+ # Convert the dictionary to a JSON string before returning
194
+
195
+ return filtered_data
196
+
197
+ # If no elements found, wait and retry
198
+ await asyncio.sleep(retry_interval)
199
+
200
+ except json.JSONDecodeError:
201
+ # If JSON parsing failed, wait and retry
202
+ await asyncio.sleep(retry_interval)
203
+ continue
204
+
205
+ except Exception as e:
206
+ # Clean up in case of error
207
+ with contextlib.suppress(OSError):
208
+ os.unlink(local_path)
209
+ raise ValueError(f"Error retrieving clickable elements: {e}")
210
+
211
+ except Exception as e:
212
+ raise ValueError(f"Error getting clickable elements: {e}")
213
+
214
+ async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
215
+ """
216
+ Tap on a UI element by its index.
217
+
218
+ This function uses the cached clickable elements
219
+ to find the element with the given index and tap on its center coordinates.
220
+
221
+ Args:
222
+ index: Index of the element to tap
223
+
224
+ Returns:
225
+ Result message
226
+ """
227
+
228
+ def collect_all_indices(elements):
229
+ """Recursively collect all indices from elements and their children."""
230
+ indices = []
231
+ for item in elements:
232
+ if item.get("index") is not None:
233
+ indices.append(item.get("index"))
234
+ # Check children if present
235
+ children = item.get("children", [])
236
+ indices.extend(collect_all_indices(children))
237
+ return indices
238
+
239
+ def find_element_by_index(elements, target_index):
240
+ """Recursively find an element with the given index."""
241
+ for item in elements:
242
+ if item.get("index") == target_index:
243
+ return item
244
+ # Check children if present
245
+ children = item.get("children", [])
246
+ result = find_element_by_index(children, target_index)
247
+ if result:
248
+ return result
249
+ return None
250
+
251
+ try:
252
+ # Check if we have cached elements
253
+ if not CLICKABLE_ELEMENTS_CACHE:
254
+ return "Error: No UI elements cached. Call get_clickables first."
255
+
256
+ # Find the element with the given index (including in children)
257
+ element = find_element_by_index(CLICKABLE_ELEMENTS_CACHE, index)
258
+
259
+ if not element:
260
+ # List available indices to help the user
261
+ indices = sorted(collect_all_indices(CLICKABLE_ELEMENTS_CACHE))
262
+ indices_str = ", ".join(str(idx) for idx in indices[:20])
263
+ if len(indices) > 20:
264
+ indices_str += f"... and {len(indices) - 20} more"
265
+
266
+ return f"Error: No element found with index {index}. Available indices: {indices_str}"
267
+
268
+ # Get the bounds of the element
269
+ bounds_str = element.get("bounds")
270
+ if not bounds_str:
271
+ element_text = element.get("text", "No text")
272
+ element_type = element.get("type", "unknown")
273
+ element_class = element.get("className", "Unknown class")
274
+ return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
275
+
276
+ # Parse the bounds (format: "left,top,right,bottom")
277
+ try:
278
+ left, top, right, bottom = map(int, bounds_str.split(","))
279
+ except ValueError:
280
+ return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
281
+
282
+ # Calculate the center of the element
283
+ x = (left + right) // 2
284
+ y = (top + bottom) // 2
285
+
286
+ # Get the device and tap at the coordinates
287
+ if serial:
288
+ device_manager = DeviceManager()
289
+ device = await device_manager.get_device(serial)
290
+ if not device:
291
+ return f"Error: Device {serial} not found"
292
+ else:
293
+ device = await self.get_device()
294
+
295
+ await device.tap(x, y)
296
+
297
+ # Add a small delay to allow UI to update
298
+ await asyncio.sleep(0.5)
299
+
300
+ # Create a descriptive response
301
+ response_parts = []
302
+ response_parts.append(f"Tapped element with index {index}")
303
+ response_parts.append(f"Text: '{element.get('text', 'No text')}'")
304
+ response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
305
+ response_parts.append(f"Type: {element.get('type', 'unknown')}")
306
+
307
+ # Add information about children if present
308
+ children = element.get("children", [])
309
+ if children:
310
+ child_texts = [
311
+ child.get("text") for child in children if child.get("text")
312
+ ]
313
+ if child_texts:
314
+ response_parts.append(f"Contains text: {' | '.join(child_texts)}")
315
+
316
+ response_parts.append(f"Coordinates: ({x}, {y})")
317
+
318
+ return " | ".join(response_parts)
319
+ except ValueError as e:
320
+ return f"Error: {str(e)}"
321
+
322
+ # Rename the old tap function to tap_by_coordinates for backward compatibility
323
+ async def tap_by_coordinates(self, x: int, y: int) -> bool:
324
+ """
325
+ Tap on the device screen at specific coordinates.
326
+
327
+ Args:
328
+ x: X coordinate
329
+ y: Y coordinate
330
+
331
+ Returns:
332
+ Bool indicating success or failure
333
+ """
334
+ try:
335
+ if self.serial:
336
+ device_manager = DeviceManager()
337
+ device = await device_manager.get_device(self.serial)
338
+ if not device:
339
+ return f"Error: Device {self.serial} not found"
340
+ else:
341
+ device = await self.get_device()
342
+
343
+ await device.tap(x, y)
344
+ print(f"Tapped at coordinates ({x}, {y})")
345
+ return True
346
+ except ValueError as e:
347
+ print(f"Error: {str(e)}")
348
+ return False
349
+
350
+ # Replace the old tap function with the new one
351
+ async def tap(self, index: int) -> str:
352
+ """
353
+ Tap on a UI element by its index.
354
+
355
+ This function uses the cached clickable elements from the last get_clickables call
356
+ to find the element with the given index and tap on its center coordinates.
357
+
358
+ Args:
359
+ index: Index of the element to tap
360
+
361
+ Returns:
362
+ Result message
363
+ """
364
+ return await self.tap_by_index(index)
365
+
366
+ async def swipe(
367
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
368
+ ) -> bool:
369
+ """
370
+ Performs a straight-line swipe gesture on the device screen.
371
+ To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
372
+ Args:
373
+ start_x: Starting X coordinate
374
+ start_y: Starting Y coordinate
375
+ end_x: Ending X coordinate
376
+ end_y: Ending Y coordinate
377
+ duration_ms: Duration of swipe in milliseconds
378
+ Returns:
379
+ Bool indicating success or failure
380
+ """
381
+ try:
382
+ if self.serial:
383
+ device_manager = DeviceManager()
384
+ device = await device_manager.get_device(self.serial)
385
+ if not device:
386
+ return f"Error: Device {self.serial} not found"
387
+ else:
388
+ device = await self.get_device()
389
+
390
+ await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
391
+ await asyncio.sleep(1)
392
+ print(f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms")
393
+ return True
394
+ except ValueError as e:
395
+ print(f"Error: {str(e)}")
396
+ return False
397
+
398
+ async def input_text(self, text: str, serial: Optional[str] = None) -> str:
399
+ """
400
+ Input text on the device.
401
+ Always make sure that the Focused Element is not None before inputting text.
402
+
403
+ Args:
404
+ text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
405
+
406
+ Returns:
407
+ Result message
408
+ """
409
+ try:
410
+ if serial:
411
+ device_manager = DeviceManager()
412
+ device = await device_manager.get_device(serial)
413
+ if not device:
414
+ return f"Error: Device {serial} not found"
415
+ else:
416
+ device = await self.get_device()
417
+
418
+ # Save the current keyboard
419
+ original_ime = await device._adb.shell(
420
+ device._serial, "settings get secure default_input_method"
421
+ )
422
+ original_ime = original_ime.strip()
423
+
424
+ # Enable the Droidrun keyboard
425
+ await device._adb.shell(
426
+ device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
427
+ )
428
+
429
+ # Set the Droidrun keyboard as the default
430
+ await device._adb.shell(
431
+ device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
432
+ )
433
+
434
+ # Wait for keyboard to change
435
+ await asyncio.sleep(0.2)
436
+
437
+ # Encode the text to Base64
438
+ import base64
439
+
440
+ encoded_text = base64.b64encode(text.encode()).decode()
441
+
442
+ cmd = f'am broadcast -a com.droidrun.portal.DROIDRUN_INPUT_B64 --es msg "{encoded_text}" -p com.droidrun.portal'
443
+ await device._adb.shell(device._serial, cmd)
444
+
445
+ # Wait for text input to complete
446
+ await asyncio.sleep(0.5)
447
+
448
+ # Restore the original keyboard
449
+ if original_ime and "com.droidrun.portal" not in original_ime:
450
+ await device._adb.shell(device._serial, f"ime set {original_ime}")
451
+
452
+ return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
453
+ except ValueError as e:
454
+ return f"Error: {str(e)}"
455
+ except Exception as e:
456
+ return f"Error sending text input: {str(e)}"
457
+
458
+ async def back(self) -> str:
459
+ """
460
+ Go back on the current view.
461
+ This presses the Android back button.
462
+ """
463
+ try:
464
+ if self.serial:
465
+ device_manager = DeviceManager()
466
+ device = await device_manager.get_device(self.serial)
467
+ if not device:
468
+ return f"Error: Device {self.serial} not found"
469
+ else:
470
+ device = await self.get_device()
471
+
472
+ await device.press_key(3)
473
+ return f"Pressed key BACK"
474
+ except ValueError as e:
475
+ return f"Error: {str(e)}"
476
+
477
+ async def press_key(self, keycode: int) -> str:
478
+ """
479
+ Press a key on the Android device.
480
+
481
+ Common keycodes:
482
+ - 4: BACK
483
+ - 66: ENTER
484
+ - 67: DELETE
485
+
486
+ Args:
487
+ keycode: Android keycode to press
488
+ """
489
+ try:
490
+ if self.serial:
491
+ device_manager = DeviceManager()
492
+ device = await device_manager.get_device(self.serial)
493
+ if not device:
494
+ return f"Error: Device {self.serial} not found"
495
+ else:
496
+ device = await self.get_device()
497
+
498
+ key_names = {
499
+ 66: "ENTER",
500
+ 4: "BACK",
501
+ 67: "DELETE",
502
+ }
503
+ key_name = key_names.get(keycode, str(keycode))
504
+
505
+ await device.press_key(keycode)
506
+ return f"Pressed key {key_name}"
507
+ except ValueError as e:
508
+ return f"Error: {str(e)}"
509
+
510
+ async def start_app(self, package: str, activity: str = "") -> str:
511
+ """
512
+ Start an app on the device.
513
+
514
+ Args:
515
+ package: Package name (e.g., "com.android.settings")
516
+ activity: Optional activity name
517
+ """
518
+ try:
519
+ if self.serial:
520
+ device_manager = DeviceManager()
521
+ device = await device_manager.get_device(self.serial)
522
+ if not device:
523
+ return f"Error: Device {self.serial} not found"
524
+ else:
525
+ device = await self.get_device()
526
+
527
+ result = await device.start_app(package, activity)
528
+ return result
529
+ except ValueError as e:
530
+ return f"Error: {str(e)}"
531
+
532
+ async def install_app(
533
+ self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
534
+ ) -> str:
535
+ """
536
+ Install an app on the device.
537
+
538
+ Args:
539
+ apk_path: Path to the APK file
540
+ reinstall: Whether to reinstall if app exists
541
+ grant_permissions: Whether to grant all permissions
542
+ """
543
+ try:
544
+ if self.serial:
545
+ device_manager = DeviceManager()
546
+ device = await device_manager.get_device(self.serial)
547
+ if not device:
548
+ return f"Error: Device {self.serial} not found"
549
+ else:
550
+ device = await self.get_device()
551
+
552
+ if not os.path.exists(apk_path):
553
+ return f"Error: APK file not found at {apk_path}"
554
+
555
+ result = await device.install_app(apk_path, reinstall, grant_permissions)
556
+ return result
557
+ except ValueError as e:
558
+ return f"Error: {str(e)}"
559
+
560
+ async def take_screenshot(self) -> Tuple[str, bytes]:
561
+ """
562
+ Take a screenshot of the device.
563
+ This function captures the current screen and adds the screenshot to context in the next message.
564
+ Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
565
+ """
566
+ try:
567
+ if self.serial:
568
+ device_manager = DeviceManager()
569
+ device = await device_manager.get_device(self.serial)
570
+ if not device:
571
+ raise ValueError(f"Device {self.serial} not found")
572
+ else:
573
+ device = await self.get_device()
574
+ screen_tuple = await device.take_screenshot()
575
+ self.last_screenshot = screen_tuple[1]
576
+
577
+ # Store screenshot with timestamp
578
+ self.screenshots.append(
579
+ {
580
+ "timestamp": time.time(),
581
+ "image_data": screen_tuple[1],
582
+ "format": screen_tuple[0], # Usually 'PNG'
583
+ }
584
+ )
585
+ return screen_tuple
586
+ except ValueError as e:
587
+ raise ValueError(f"Error taking screenshot: {str(e)}")
588
+
589
+ async def list_packages(self, include_system_apps: bool = False) -> List[str]:
590
+ """
591
+ List installed packages on the device.
592
+
593
+ Args:
594
+ include_system_apps: Whether to include system apps (default: False)
595
+
596
+ Returns:
597
+ List of package names
598
+ """
599
+ try:
600
+ if self.serial:
601
+ device_manager = DeviceManager()
602
+ device = await device_manager.get_device(self.serial)
603
+ if not device:
604
+ raise ValueError(f"Device {self.serial} not found")
605
+ else:
606
+ device = await self.get_device()
607
+
608
+ # Use the direct ADB command to get packages with paths
609
+ cmd = ["pm", "list", "packages", "-f"]
610
+ if not include_system_apps:
611
+ cmd.append("-3")
612
+
613
+ output = await device._adb.shell(device._serial, " ".join(cmd))
614
+
615
+ # Parse the package list using the function
616
+ packages = self.parse_package_list(output)
617
+ # Format package list for better readability
618
+ package_list = [pack["package"] for pack in packages]
619
+ print(f"Returning {len(package_list)} packages")
620
+ return package_list
621
+ except ValueError as e:
622
+ raise ValueError(f"Error listing packages: {str(e)}")
623
+
624
+ async def extract(self, filename: Optional[str] = None) -> str:
625
+ """Extract and save the current UI state to a JSON file.
626
+
627
+ This function captures the current UI state including all UI elements
628
+ and saves it to a JSON file for later analysis or reference.
629
+
630
+ Args:
631
+ filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
632
+
633
+ Returns:
634
+ Path to the saved JSON file
635
+ """
636
+ try:
637
+ # Generate default filename if not provided
638
+ if not filename:
639
+ timestamp = int(time.time())
640
+ filename = f"ui_state_{timestamp}.json"
641
+
642
+ # Ensure the filename ends with .json
643
+ if not filename.endswith(".json"):
644
+ filename += ".json"
645
+
646
+ # Get the UI elements
647
+ ui_elements = await self.get_all_elements(self.serial)
648
+
649
+ # Save to file
650
+ save_path = os.path.abspath(filename)
651
+ async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
652
+ await f.write(json.dumps(ui_elements, indent=2))
653
+
654
+ return f"UI state extracted and saved to {save_path}"
655
+
656
+ except Exception as e:
657
+ return f"Error extracting UI state: {e}"
658
+
659
+ async def get_all_elements(self) -> Dict[str, Any]:
660
+ """
661
+ Get all UI elements from the device, including non-interactive elements.
662
+
663
+ This function interacts with the TopViewService app installed on the device
664
+ to capture all UI elements, even those that are not interactive. This provides
665
+ a complete view of the UI hierarchy for analysis or debugging purposes.
666
+
667
+ Returns:
668
+ Dictionary containing all UI elements extracted from the device screen
669
+ """
670
+ try:
671
+ # Get the device
672
+ device_manager = DeviceManager()
673
+ device = await device_manager.get_device(self.serial)
674
+ if not device:
675
+ raise ValueError(f"Device {self.serial} not found")
676
+
677
+ # Create a temporary file for the JSON
678
+ with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
679
+ local_path = temp.name
680
+
681
+ try:
682
+ # Clear logcat to make it easier to find our output
683
+ await device._adb.shell(device._serial, "logcat -c")
684
+
685
+ # Trigger the custom service via broadcast to get ALL elements
686
+ await device._adb.shell(
687
+ device._serial,
688
+ "am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS",
689
+ )
690
+
691
+ # Poll for the JSON file path
692
+ start_time = asyncio.get_event_loop().time()
693
+ max_wait_time = 10 # Maximum wait time in seconds
694
+ poll_interval = 0.2 # Check every 200ms
695
+
696
+ device_path = None
697
+ while asyncio.get_event_loop().time() - start_time < max_wait_time:
698
+ # Check logcat for the file path
699
+ logcat_output = await device._adb.shell(
700
+ device._serial,
701
+ 'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
702
+ )
703
+
704
+ # Parse the file path if present
705
+ match = re.search(r"JSON data written to: (.*)", logcat_output)
706
+ if match:
707
+ device_path = match.group(1).strip()
708
+ break
709
+
710
+ # Wait before polling again
711
+ await asyncio.sleep(poll_interval)
712
+
713
+ # Check if we found the file path
714
+ if not device_path:
715
+ raise ValueError(
716
+ f"Failed to find the JSON file path in logcat after {max_wait_time} seconds"
717
+ )
718
+
719
+ # Pull the JSON file from the device
720
+ await device._adb.pull_file(device._serial, device_path, local_path)
721
+
722
+ # Read the JSON file
723
+ async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
724
+ json_content = await f.read()
725
+
726
+ # Clean up the temporary file
727
+ with contextlib.suppress(OSError):
728
+ os.unlink(local_path)
729
+
730
+ # Try to parse the JSON
731
+ import json
732
+
733
+ try:
734
+ ui_data = json.loads(json_content)
735
+
736
+ return {
737
+ "all_elements": ui_data,
738
+ "count": (
739
+ len(ui_data)
740
+ if isinstance(ui_data, list)
741
+ else sum(1 for _ in ui_data.get("elements", []))
742
+ ),
743
+ "message": "Retrieved all UI elements from the device screen",
744
+ }
745
+ except json.JSONDecodeError:
746
+ raise ValueError("Failed to parse UI elements JSON data")
747
+
748
+ except Exception as e:
749
+ # Clean up in case of error
750
+ with contextlib.suppress(OSError):
751
+ os.unlink(local_path)
752
+ raise ValueError(f"Error retrieving all UI elements: {e}")
753
+
754
+ except Exception as e:
755
+ raise ValueError(f"Error getting all UI elements: {e}")
756
+
757
+ def complete(self, success: bool, reason: str = ""):
758
+ """
759
+ Mark the task as finished.
760
+
761
+ Args:
762
+ success: Indicates if the task was successful.
763
+ reason: Reason for failure/success
764
+ """
765
+ if success:
766
+ self.success = True
767
+ self.reason = reason or "Task completed successfully."
768
+ self.finished = True
769
+ else:
770
+ self.success = False
771
+ if not reason:
772
+ raise ValueError("Reason for failure is required if success is False.")
773
+ self.reason = reason
774
+ self.finished = True
775
+
776
+ async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
777
+ """
778
+ Get the current phone state including current activity and keyboard visibility.
779
+
780
+ Args:
781
+ serial: Optional device serial number
782
+
783
+ Returns:
784
+ Dictionary with current phone state information
785
+ """
786
+ try:
787
+ # Get the device
788
+ if serial:
789
+ device_manager = DeviceManager()
790
+ device = await device_manager.get_device(serial)
791
+ if not device:
792
+ raise ValueError(f"Device {serial} not found")
793
+ else:
794
+ device = await self.get_device()
795
+
796
+ # Clear logcat to make it easier to find our output
797
+ await device._adb.shell(device._serial, "logcat -c")
798
+
799
+ # Trigger the custom service via broadcast to get phone state
800
+ await device._adb.shell(
801
+ device._serial, "am broadcast -a com.droidrun.portal.GET_PHONE_STATE"
802
+ )
803
+
804
+ # Poll for the phone state data in logcat
805
+ start_time = asyncio.get_event_loop().time()
806
+ max_wait_time = 10 # Maximum wait time in seconds
807
+ poll_interval = 0.2 # Check every 200ms
808
+
809
+ while asyncio.get_event_loop().time() - start_time < max_wait_time:
810
+ # Check logcat for the phone state data
811
+ logcat_output = await device._adb.shell(
812
+ device._serial,
813
+ 'logcat -d | grep "DROIDRUN_PHONE_STATE_DATA" | tail -1',
814
+ )
815
+
816
+ # Parse the JSON data if present
817
+ if "CHUNK|" in logcat_output:
818
+ # Format: DROIDRUN_PHONE_STATE_DATA: CHUNK|0|1|{json_data}
819
+ # Extract the JSON part after the last |
820
+ parts = logcat_output.split("|")
821
+ if len(parts) >= 4:
822
+ json_data = "|".join(
823
+ parts[3:]
824
+ ) # In case JSON contains | characters
825
+ try:
826
+ phone_state = json.loads(json_data)
827
+ return phone_state
828
+ except json.JSONDecodeError:
829
+ # If JSON parsing failed, wait and retry
830
+ await asyncio.sleep(poll_interval)
831
+ continue
832
+
833
+ # Wait before polling again
834
+ await asyncio.sleep(poll_interval)
835
+
836
+ # If we couldn't get the phone state, return error
837
+ return {
838
+ "error": "Timeout",
839
+ "message": f"Failed to get phone state data after {max_wait_time} seconds",
840
+ }
841
+
842
+ except Exception as e:
843
+ return {"error": str(e), "message": f"Error getting phone state: {str(e)}"}
844
+
845
+ async def remember(self, information: str) -> str:
846
+ """
847
+ Store important information to remember for future context.
848
+
849
+ This information will be extracted and included into your next steps to maintain context
850
+ across interactions. Use this for critical facts, observations, or user preferences
851
+ that should influence future decisions.
852
+
853
+ Args:
854
+ information: The information to remember
855
+
856
+ Returns:
857
+ Confirmation message
858
+ """
859
+ if not information or not isinstance(information, str):
860
+ return "Error: Please provide valid information to remember."
861
+
862
+ # Add the information to memory
863
+ self.memory.append(information.strip())
864
+
865
+ # Limit memory size to prevent context overflow (keep most recent items)
866
+ max_memory_items = 10
867
+ if len(self.memory) > max_memory_items:
868
+ self.memory = self.memory[-max_memory_items:]
869
+
870
+ return f"Remembered: {information}"
871
+
872
+ def get_memory(self) -> List[str]:
873
+ """
874
+ Retrieve all stored memory items.
875
+
876
+ Returns:
877
+ List of stored memory items
878
+ """
879
+ return self.memory.copy()