droidrun 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. droidrun/__init__.py +16 -11
  2. droidrun/__main__.py +1 -1
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +1 -1
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +6 -0
  7. droidrun/agent/codeact/__init__.py +2 -4
  8. droidrun/agent/codeact/codeact_agent.py +330 -235
  9. droidrun/agent/codeact/events.py +12 -20
  10. droidrun/agent/codeact/prompts.py +0 -52
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/droid/__init__.py +2 -2
  25. droidrun/agent/droid/droid_agent.py +269 -325
  26. droidrun/agent/droid/events.py +28 -0
  27. droidrun/agent/oneflows/reflector.py +265 -0
  28. droidrun/agent/planner/__init__.py +2 -4
  29. droidrun/agent/planner/events.py +9 -13
  30. droidrun/agent/planner/planner_agent.py +288 -0
  31. droidrun/agent/planner/prompts.py +33 -53
  32. droidrun/agent/utils/__init__.py +3 -0
  33. droidrun/agent/utils/async_utils.py +1 -40
  34. droidrun/agent/utils/chat_utils.py +265 -48
  35. droidrun/agent/utils/executer.py +49 -14
  36. droidrun/agent/utils/llm_picker.py +14 -10
  37. droidrun/agent/utils/trajectory.py +184 -0
  38. droidrun/cli/__init__.py +1 -1
  39. droidrun/cli/logs.py +283 -0
  40. droidrun/cli/main.py +364 -441
  41. droidrun/tools/__init__.py +5 -10
  42. droidrun/tools/{actions.py → adb.py} +381 -412
  43. droidrun/tools/ios.py +596 -0
  44. droidrun/tools/tools.py +95 -0
  45. droidrun-0.3.1.dist-info/METADATA +150 -0
  46. droidrun-0.3.1.dist-info/RECORD +50 -0
  47. droidrun/agent/planner/task_manager.py +0 -355
  48. droidrun/agent/planner/workflow.py +0 -371
  49. droidrun/tools/device.py +0 -29
  50. droidrun/tools/loader.py +0 -60
  51. droidrun-0.2.0.dist-info/METADATA +0 -373
  52. droidrun-0.2.0.dist-info/RECORD +0 -32
  53. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/WHEEL +0 -0
  54. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/entry_points.txt +0 -0
  55. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -10,14 +10,18 @@ import tempfile
10
10
  import asyncio
11
11
  import aiofiles
12
12
  import contextlib
13
+ import logging
13
14
  from typing import Optional, Dict, Tuple, List, Any
14
- from ..adb import Device, DeviceManager
15
+ from droidrun.adb.device import Device
16
+ from droidrun.adb.manager import DeviceManager
17
+ from droidrun.tools.tools import Tools
15
18
 
19
+ logger = logging.getLogger("droidrun-adb-tools")
16
20
 
17
- class Tools:
21
+ class AdbTools(Tools):
18
22
  """Core UI interaction tools for Android device control."""
19
23
 
20
- def __init__(self, serial: str) -> None:
24
+ def __init__(self, serial: str = "emulator-5554") -> None:
21
25
  # Instance‐level cache for clickable elements (index-based tapping)
22
26
  self.clickable_elements_cache: List[Dict[str, Any]] = []
23
27
  self.serial = serial
@@ -28,30 +32,29 @@ class Tools:
28
32
  self.finished = False
29
33
  # Memory storage for remembering important information
30
34
  self.memory: List[str] = []
35
+ # Store all screenshots with timestamps
36
+ self.screenshots: List[Dict[str, Any]] = []
31
37
 
32
38
  def get_device_serial(self) -> str:
33
39
  """Get the device serial from the instance or environment variable."""
34
40
  # First try using the instance's serial
35
41
  if self.serial:
36
42
  return self.serial
37
-
38
- # Fall back to environment variable if not set on the instance
39
- return os.environ.get("DROIDRUN_DEVICE_SERIAL", "")
40
43
 
41
44
  async def get_device(self) -> Optional[Device]:
42
45
  """Get the device instance using the instance's serial or from environment variable.
43
-
46
+
44
47
  Returns:
45
48
  Device instance or None if not found
46
49
  """
47
50
  serial = self.get_device_serial()
48
51
  if not serial:
49
- raise ValueError("No device serial specified - set DROIDRUN_DEVICE_SERIAL environment variable or provide device_serial parameter")
50
-
52
+ raise ValueError("No device serial specified - set device_serial parameter")
53
+
51
54
  device = await self.device_manager.get_device(serial)
52
55
  if not device:
53
56
  raise ValueError(f"Device {serial} not found")
54
-
57
+
55
58
  return device
56
59
 
57
60
  def parse_package_list(self, output: str) -> List[Dict[str, str]]:
@@ -73,270 +76,178 @@ class Tools:
73
76
  apps.append({"package": package.strip(), "path": path.strip()})
74
77
  return apps
75
78
 
76
- async def get_clickables(self, serial: Optional[str] = None) -> str:
79
+ def _parse_content_provider_output(self, raw_output: str) -> Optional[Dict[str, Any]]:
77
80
  """
78
- Get all clickable UI elements from the device using the custom TopViewService.
79
-
80
- This function interacts with the TopViewService app installed on the device
81
- to capture UI elements. The service writes UI data to a JSON file on the device,
82
- which is then pulled to the host. If no elements are found initially, it will
83
- retry for up to 30 seconds.
81
+ Parse the raw ADB content provider output and extract JSON data.
84
82
 
85
83
  Args:
86
- serial: Optional device serial number
87
-
84
+ raw_output (str): Raw output from ADB content query command
85
+
88
86
  Returns:
89
- JSON string containing UI elements extracted from the device screen
87
+ dict: Parsed JSON data or None if parsing failed
90
88
  """
91
- try:
92
- # Get the device
93
- if serial:
94
- from droidrun.adb import DeviceManager
95
- device_manager = DeviceManager()
96
- device = await device_manager.get_device(serial)
97
- if not device:
98
- raise ValueError(f"Device {serial} not found")
99
- else:
100
- device = await self.get_device()
101
-
102
- # Create a temporary file for the JSON
103
- with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
104
- local_path = temp.name
89
+ # The ADB content query output format is: "Row: 0 result={json_data}"
90
+ # We need to extract the JSON part after "result="
91
+ lines = raw_output.strip().split('\n')
92
+
93
+ for line in lines:
94
+ line = line.strip()
105
95
 
106
- try:
107
- # Set retry parameters
108
- max_total_time = 30 # Maximum total time to try in seconds
109
- retry_interval = 1.0 # Time between retries in seconds
110
- start_total_time = asyncio.get_event_loop().time()
96
+ # Look for lines that contain "result=" pattern
97
+ if "result=" in line:
98
+ # Extract everything after "result="
99
+ result_start = line.find("result=") + 7
100
+ json_str = line[result_start:]
111
101
 
112
- while True:
113
- # Check if we've exceeded total time
114
- current_time = asyncio.get_event_loop().time()
115
- if current_time - start_total_time > max_total_time:
116
- raise ValueError(f"Failed to get UI elements after {max_total_time} seconds of retries")
117
-
118
- # Clear logcat to make it easier to find our output
119
- await device._adb.shell(device._serial, "logcat -c")
120
-
121
- # Trigger the custom service via broadcast to get only interactive elements
122
- await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ELEMENTS")
123
-
124
- # Poll for the JSON file path
125
- start_time = asyncio.get_event_loop().time()
126
- max_wait_time = 10 # Maximum wait time in seconds
127
- poll_interval = 0.2 # Check every 200ms
128
-
129
- device_path = None
130
- while asyncio.get_event_loop().time() - start_time < max_wait_time:
131
- # Check logcat for the file path
132
- logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
133
-
134
- # Parse the file path if present
135
- match = re.search(r"JSON data written to: (.*)", logcat_output)
136
- if match:
137
- device_path = match.group(1).strip()
138
- break
139
-
140
- # Wait before polling again
141
- await asyncio.sleep(poll_interval)
142
-
143
- # Check if we found the file path
144
- if not device_path:
145
- await asyncio.sleep(retry_interval)
146
- continue
147
-
148
- # Pull the JSON file from the device
149
- await device._adb.pull_file(device._serial, device_path, local_path)
150
-
151
- # Read the JSON file
152
- async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
153
- json_content = await f.read()
154
-
155
- # Try to parse the JSON
156
- try:
157
- ui_data = json.loads(json_content)
158
-
159
- # Filter out the "type" attribute from all elements
160
- filtered_data = []
161
- for element in ui_data:
162
- # Create a copy of the element without the "type" attribute
163
- filtered_element = {k: v for k, v in element.items() if k != "type"}
164
-
165
- # Also filter children if present
166
- if "children" in filtered_element:
167
- filtered_element["children"] = [
168
- {k: v for k, v in child.items() if k != "type"}
169
- for child in filtered_element["children"]
170
- ]
171
-
172
- filtered_data.append(filtered_element)
173
-
174
- # If we got elements, store them and return
175
- if filtered_data:
176
- # Store the filtered UI data in cache
177
- global CLICKABLE_ELEMENTS_CACHE
178
- CLICKABLE_ELEMENTS_CACHE = filtered_data
179
-
180
- # Add a small sleep to ensure UI is fully loaded/processed
181
- await asyncio.sleep(0.5) # 500ms sleep
182
-
183
- # Convert the dictionary to a JSON string before returning
184
- result = {
185
- "clickable_elements": filtered_data,
186
- "count": len(filtered_data),
187
- "message": f"Found {len(filtered_data)} UI elements after retrying"
188
- }
189
-
190
- return result
191
-
192
- # If no elements found, wait and retry
193
- await asyncio.sleep(retry_interval)
194
-
195
- except json.JSONDecodeError:
196
- # If JSON parsing failed, wait and retry
197
- await asyncio.sleep(retry_interval)
198
- continue
199
-
200
- except Exception as e:
201
- # Clean up in case of error
202
- with contextlib.suppress(OSError):
203
- os.unlink(local_path)
204
- raise ValueError(f"Error retrieving clickable elements: {e}")
205
-
206
- except Exception as e:
207
- raise ValueError(f"Error getting clickable elements: {e}")
208
-
102
+ try:
103
+ # Parse the JSON string
104
+ json_data = json.loads(json_str)
105
+ return json_data
106
+ except json.JSONDecodeError:
107
+ continue
108
+
109
+ # Fallback: try to parse lines that start with { or [
110
+ elif line.startswith('{') or line.startswith('['):
111
+ try:
112
+ json_data = json.loads(line)
113
+ return json_data
114
+ except json.JSONDecodeError:
115
+ continue
116
+
117
+ # If no valid JSON found in individual lines, try the entire output
118
+ try:
119
+ json_data = json.loads(raw_output.strip())
120
+ return json_data
121
+ except json.JSONDecodeError:
122
+ return None
209
123
 
210
124
  async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
211
125
  """
212
126
  Tap on a UI element by its index.
213
-
127
+
214
128
  This function uses the cached clickable elements
215
129
  to find the element with the given index and tap on its center coordinates.
216
-
130
+
217
131
  Args:
218
132
  index: Index of the element to tap
219
-
133
+
220
134
  Returns:
221
135
  Result message
222
136
  """
223
-
137
+
224
138
  def collect_all_indices(elements):
225
139
  """Recursively collect all indices from elements and their children."""
226
140
  indices = []
227
141
  for item in elements:
228
- if item.get('index') is not None:
229
- indices.append(item.get('index'))
142
+ if item.get("index") is not None:
143
+ indices.append(item.get("index"))
230
144
  # Check children if present
231
- children = item.get('children', [])
145
+ children = item.get("children", [])
232
146
  indices.extend(collect_all_indices(children))
233
147
  return indices
234
148
 
235
149
  def find_element_by_index(elements, target_index):
236
150
  """Recursively find an element with the given index."""
237
151
  for item in elements:
238
- if item.get('index') == target_index:
152
+ if item.get("index") == target_index:
239
153
  return item
240
154
  # Check children if present
241
- children = item.get('children', [])
155
+ children = item.get("children", [])
242
156
  result = find_element_by_index(children, target_index)
243
157
  if result:
244
158
  return result
245
159
  return None
246
-
160
+
247
161
  try:
248
162
  # Check if we have cached elements
249
- if not CLICKABLE_ELEMENTS_CACHE:
250
- return "Error: No UI elements cached. Call get_clickables first."
251
-
163
+ if not self.clickable_elements_cache:
164
+ return "Error: No UI elements cached. Call get_state first."
165
+
252
166
  # Find the element with the given index (including in children)
253
- element = find_element_by_index(CLICKABLE_ELEMENTS_CACHE, index)
254
-
167
+ element = find_element_by_index(self.clickable_elements_cache, index)
168
+
255
169
  if not element:
256
170
  # List available indices to help the user
257
- indices = sorted(collect_all_indices(CLICKABLE_ELEMENTS_CACHE))
171
+ indices = sorted(collect_all_indices(self.clickable_elements_cache))
258
172
  indices_str = ", ".join(str(idx) for idx in indices[:20])
259
173
  if len(indices) > 20:
260
174
  indices_str += f"... and {len(indices) - 20} more"
261
-
175
+
262
176
  return f"Error: No element found with index {index}. Available indices: {indices_str}"
263
-
177
+
264
178
  # Get the bounds of the element
265
- bounds_str = element.get('bounds')
179
+ bounds_str = element.get("bounds")
266
180
  if not bounds_str:
267
- element_text = element.get('text', 'No text')
268
- element_type = element.get('type', 'unknown')
269
- element_class = element.get('className', 'Unknown class')
181
+ element_text = element.get("text", "No text")
182
+ element_type = element.get("type", "unknown")
183
+ element_class = element.get("className", "Unknown class")
270
184
  return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
271
-
185
+
272
186
  # Parse the bounds (format: "left,top,right,bottom")
273
187
  try:
274
- left, top, right, bottom = map(int, bounds_str.split(','))
188
+ left, top, right, bottom = map(int, bounds_str.split(","))
275
189
  except ValueError:
276
190
  return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
277
-
191
+
278
192
  # Calculate the center of the element
279
193
  x = (left + right) // 2
280
194
  y = (top + bottom) // 2
281
-
195
+
282
196
  # Get the device and tap at the coordinates
283
197
  if serial:
284
- from droidrun.adb import DeviceManager
285
- device_manager = DeviceManager()
286
- device = await device_manager.get_device(serial)
198
+ device = await self.device_manager.get_device(serial)
287
199
  if not device:
288
200
  return f"Error: Device {serial} not found"
289
201
  else:
290
202
  device = await self.get_device()
291
-
203
+
292
204
  await device.tap(x, y)
293
-
205
+
294
206
  # Add a small delay to allow UI to update
295
207
  await asyncio.sleep(0.5)
296
-
297
-
208
+
298
209
  # Create a descriptive response
299
210
  response_parts = []
300
211
  response_parts.append(f"Tapped element with index {index}")
301
212
  response_parts.append(f"Text: '{element.get('text', 'No text')}'")
302
213
  response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
303
214
  response_parts.append(f"Type: {element.get('type', 'unknown')}")
304
-
215
+
305
216
  # Add information about children if present
306
- children = element.get('children', [])
217
+ children = element.get("children", [])
307
218
  if children:
308
- child_texts = [child.get('text') for child in children if child.get('text')]
219
+ child_texts = [
220
+ child.get("text") for child in children if child.get("text")
221
+ ]
309
222
  if child_texts:
310
223
  response_parts.append(f"Contains text: {' | '.join(child_texts)}")
311
-
224
+
312
225
  response_parts.append(f"Coordinates: ({x}, {y})")
313
-
226
+
314
227
  return " | ".join(response_parts)
315
228
  except ValueError as e:
316
229
  return f"Error: {str(e)}"
317
230
 
318
-
319
231
  # Rename the old tap function to tap_by_coordinates for backward compatibility
320
232
  async def tap_by_coordinates(self, x: int, y: int) -> bool:
321
233
  """
322
- Tap on the device screen at specific coordinates.
323
-
234
+ Tap on the device screen at specific coordinates.
235
+
324
236
  Args:
325
237
  x: X coordinate
326
238
  y: Y coordinate
327
-
239
+
328
240
  Returns:
329
241
  Bool indicating success or failure
330
242
  """
331
243
  try:
332
244
  if self.serial:
333
- device_manager = DeviceManager()
334
- device = await device_manager.get_device(self.serial)
245
+ device = await self.device_manager.get_device(self.serial)
335
246
  if not device:
336
247
  return f"Error: Device {self.serial} not found"
337
248
  else:
338
249
  device = await self.get_device()
339
-
250
+
340
251
  await device.tap(x, y)
341
252
  print(f"Tapped at coordinates ({x}, {y})")
342
253
  return True
@@ -348,29 +259,24 @@ class Tools:
348
259
  async def tap(self, index: int) -> str:
349
260
  """
350
261
  Tap on a UI element by its index.
351
-
262
+
352
263
  This function uses the cached clickable elements from the last get_clickables call
353
264
  to find the element with the given index and tap on its center coordinates.
354
-
265
+
355
266
  Args:
356
267
  index: Index of the element to tap
357
-
268
+
358
269
  Returns:
359
270
  Result message
360
271
  """
361
272
  return await self.tap_by_index(index)
362
273
 
363
274
  async def swipe(
364
- self,
365
- start_x: int,
366
- start_y: int,
367
- end_x: int,
368
- end_y: int,
369
- duration_ms: int = 300
275
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
370
276
  ) -> bool:
371
277
  """
372
278
  Performs a straight-line swipe gesture on the device screen.
373
- To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
279
+ To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
374
280
  Args:
375
281
  start_x: Starting X coordinate
376
282
  start_y: Starting Y coordinate
@@ -382,14 +288,14 @@ class Tools:
382
288
  """
383
289
  try:
384
290
  if self.serial:
385
- device_manager = DeviceManager()
386
- device = await device_manager.get_device(self.serial)
291
+ device = await self.device_manager.get_device(self.serial)
387
292
  if not device:
388
293
  return f"Error: Device {self.serial} not found"
389
294
  else:
390
295
  device = await self.get_device()
391
-
296
+
392
297
  await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
298
+ await asyncio.sleep(1)
393
299
  print(f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms")
394
300
  return True
395
301
  except ValueError as e:
@@ -398,132 +304,142 @@ class Tools:
398
304
 
399
305
  async def input_text(self, text: str, serial: Optional[str] = None) -> str:
400
306
  """
401
- Input text on the device using Base64 encoding and broadcast intent.
402
-
307
+ Input text on the device.
308
+ Always make sure that the Focused Element is not None before inputting text.
309
+
403
310
  Args:
404
311
  text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
405
- serial: Optional device serial (for backward compatibility)
406
-
312
+
407
313
  Returns:
408
314
  Result message
409
315
  """
410
316
  try:
411
317
  if serial:
412
- device_manager = DeviceManager()
413
- device = await device_manager.get_device(serial)
318
+ device = await self.device_manager.get_device(serial)
414
319
  if not device:
415
320
  return f"Error: Device {serial} not found"
416
321
  else:
417
322
  device = await self.get_device()
418
-
323
+
419
324
  # Save the current keyboard
420
- original_ime = await device._adb.shell(device._serial, "settings get secure default_input_method")
325
+ original_ime = await device._adb.shell(
326
+ device._serial, "settings get secure default_input_method"
327
+ )
421
328
  original_ime = original_ime.strip()
422
-
329
+
423
330
  # Enable the Droidrun keyboard
424
- await device._adb.shell(device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME")
425
-
331
+ await device._adb.shell(
332
+ device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
333
+ )
334
+
426
335
  # Set the Droidrun keyboard as the default
427
- await device._adb.shell(device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME")
428
-
336
+ await device._adb.shell(
337
+ device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
338
+ )
339
+
429
340
  # Wait for keyboard to change
430
- await asyncio.sleep(0.2)
431
-
341
+ await asyncio.sleep(1)
342
+
432
343
  # Encode the text to Base64
433
344
  import base64
345
+
434
346
  encoded_text = base64.b64encode(text.encode()).decode()
435
-
436
- # Send the broadcast intent with the Base64-encoded text
437
- cmd = f'am broadcast -a DROIDRUN_INPUT_B64 --es msg "{encoded_text}"'
347
+
348
+ cmd = f'content insert --uri "content://com.droidrun.portal/keyboard/input" --bind base64_text:s:"{encoded_text}"'
438
349
  await device._adb.shell(device._serial, cmd)
439
-
350
+
440
351
  # Wait for text input to complete
441
352
  await asyncio.sleep(0.5)
442
-
353
+
443
354
  # Restore the original keyboard
444
355
  if original_ime and "com.droidrun.portal" not in original_ime:
445
356
  await device._adb.shell(device._serial, f"ime set {original_ime}")
446
-
357
+
447
358
  return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
448
359
  except ValueError as e:
449
360
  return f"Error: {str(e)}"
450
361
  except Exception as e:
451
362
  return f"Error sending text input: {str(e)}"
452
363
 
364
+ async def back(self) -> str:
365
+ """
366
+ Go back on the current view.
367
+ This presses the Android back button.
368
+ """
369
+ try:
370
+ if self.serial:
371
+ device = await self.device_manager.get_device(self.serial)
372
+ if not device:
373
+ return f"Error: Device {self.serial} not found"
374
+ else:
375
+ device = await self.get_device()
376
+
377
+ await device.press_key(3)
378
+ return f"Pressed key BACK"
379
+ except ValueError as e:
380
+ return f"Error: {str(e)}"
381
+
453
382
  async def press_key(self, keycode: int) -> str:
454
383
  """
455
- Press a key on the device.
456
-
384
+ Press a key on the Android device.
385
+
457
386
  Common keycodes:
458
387
  - 3: HOME
459
388
  - 4: BACK
460
- - 24: VOLUME UP
461
- - 25: VOLUME DOWN
462
- - 26: POWER
463
- - 82: MENU
464
-
389
+ - 66: ENTER
390
+ - 67: DELETE
391
+
465
392
  Args:
466
393
  keycode: Android keycode to press
467
394
  """
468
395
  try:
469
396
  if self.serial:
470
- device_manager = DeviceManager()
471
- device = await device_manager.get_device(self.serial)
397
+ device = await self.device_manager.get_device(self.serial)
472
398
  if not device:
473
399
  return f"Error: Device {self.serial} not found"
474
400
  else:
475
401
  device = await self.get_device()
476
-
402
+
477
403
  key_names = {
478
- 3: "HOME",
404
+ 66: "ENTER",
479
405
  4: "BACK",
480
- 24: "VOLUME UP",
481
- 25: "VOLUME DOWN",
482
- 26: "POWER",
483
- 82: "MENU",
406
+ 3: "HOME",
407
+ 67: "DELETE",
484
408
  }
485
409
  key_name = key_names.get(keycode, str(keycode))
486
-
410
+
487
411
  await device.press_key(keycode)
488
412
  return f"Pressed key {key_name}"
489
413
  except ValueError as e:
490
414
  return f"Error: {str(e)}"
491
415
 
492
- async def start_app(
493
- self,
494
- package: str,
495
- activity: str = ""
496
- ) -> str:
416
+ async def start_app(self, package: str, activity: str = "") -> str:
497
417
  """
498
418
  Start an app on the device.
499
-
419
+
500
420
  Args:
501
421
  package: Package name (e.g., "com.android.settings")
502
422
  activity: Optional activity name
503
423
  """
504
424
  try:
505
425
  if self.serial:
506
- device_manager = DeviceManager()
507
- device = await device_manager.get_device(self.serial)
426
+ device = await self.device_manager.get_device(self.serial)
508
427
  if not device:
509
428
  return f"Error: Device {self.serial} not found"
510
429
  else:
511
430
  device = await self.get_device()
512
-
431
+
513
432
  result = await device.start_app(package, activity)
514
433
  return result
515
434
  except ValueError as e:
516
435
  return f"Error: {str(e)}"
517
436
 
518
437
  async def install_app(
519
- self,
520
- apk_path: str,
521
- reinstall: bool = False,
522
- grant_permissions: bool = True
438
+ self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
523
439
  ) -> str:
524
440
  """
525
441
  Install an app on the device.
526
-
442
+
527
443
  Args:
528
444
  apk_path: Path to the APK file
529
445
  reinstall: Whether to reinstall if app exists
@@ -531,93 +447,92 @@ class Tools:
531
447
  """
532
448
  try:
533
449
  if self.serial:
534
- device_manager = DeviceManager()
535
- device = await device_manager.get_device(self.serial)
450
+ device = await self.device_manager.get_device(self.serial)
536
451
  if not device:
537
452
  return f"Error: Device {self.serial} not found"
538
453
  else:
539
454
  device = await self.get_device()
540
-
455
+
541
456
  if not os.path.exists(apk_path):
542
457
  return f"Error: APK file not found at {apk_path}"
543
-
458
+
544
459
  result = await device.install_app(apk_path, reinstall, grant_permissions)
545
460
  return result
546
461
  except ValueError as e:
547
462
  return f"Error: {str(e)}"
548
463
 
549
- async def take_screenshot(self) -> bool:
464
+ async def take_screenshot(self) -> Tuple[str, bytes]:
550
465
  """
551
466
  Take a screenshot of the device.
552
-
553
467
  This function captures the current screen and adds the screenshot to context in the next message.
554
-
555
- This does not save the screenshot anywhere on the phone, it just attaches it to the next message.
556
-
557
- Returns:
558
- True if successful, False otherwise
468
+ Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
559
469
  """
560
470
  try:
561
471
  if self.serial:
562
- device_manager = DeviceManager()
563
- device = await device_manager.get_device(self.serial)
472
+ device = await self.device_manager.get_device(self.serial)
564
473
  if not device:
565
474
  raise ValueError(f"Device {self.serial} not found")
566
475
  else:
567
476
  device = await self.get_device()
568
477
  screen_tuple = await device.take_screenshot()
569
478
  self.last_screenshot = screen_tuple[1]
570
- return True
479
+
480
+ # Store screenshot with timestamp
481
+ self.screenshots.append(
482
+ {
483
+ "timestamp": time.time(),
484
+ "image_data": screen_tuple[1],
485
+ "format": screen_tuple[0], # Usually 'PNG'
486
+ }
487
+ )
488
+ return screen_tuple
571
489
  except ValueError as e:
572
490
  raise ValueError(f"Error taking screenshot: {str(e)}")
573
491
 
574
- async def list_packages(
575
- self,
576
- include_system_apps: bool = False
577
- ) -> List[str]:
492
+ async def list_packages(self, include_system_apps: bool = False) -> List[str]:
578
493
  """
579
494
  List installed packages on the device.
580
-
495
+
581
496
  Args:
582
497
  include_system_apps: Whether to include system apps (default: False)
583
-
498
+
584
499
  Returns:
585
500
  List of package names
586
501
  """
587
502
  try:
588
503
  if self.serial:
589
- device_manager = DeviceManager()
590
- device = await device_manager.get_device(self.serial)
504
+ device = await self.device_manager.get_device(self.serial)
591
505
  if not device:
592
506
  raise ValueError(f"Device {self.serial} not found")
593
507
  else:
594
508
  device = await self.get_device()
595
-
509
+
596
510
  # Use the direct ADB command to get packages with paths
597
511
  cmd = ["pm", "list", "packages", "-f"]
598
512
  if not include_system_apps:
599
513
  cmd.append("-3")
600
-
514
+
601
515
  output = await device._adb.shell(device._serial, " ".join(cmd))
602
-
516
+
603
517
  # Parse the package list using the function
604
- packages = self.parse_package_list(output)
518
+ packages = self.parse_package_list(output)
605
519
  # Format package list for better readability
606
520
  package_list = [pack["package"] for pack in packages]
607
- print(f"Returning {len(package_list)} packages")
521
+ for package in package_list:
522
+ print(package)
608
523
  return package_list
609
524
  except ValueError as e:
610
525
  raise ValueError(f"Error listing packages: {str(e)}")
611
526
 
612
527
  async def extract(self, filename: Optional[str] = None) -> str:
613
528
  """Extract and save the current UI state to a JSON file.
614
-
529
+
615
530
  This function captures the current UI state including all UI elements
616
531
  and saves it to a JSON file for later analysis or reference.
617
-
532
+
618
533
  Args:
619
534
  filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
620
-
535
+
621
536
  Returns:
622
537
  Path to the saved JSON file
623
538
  """
@@ -626,120 +541,133 @@ class Tools:
626
541
  if not filename:
627
542
  timestamp = int(time.time())
628
543
  filename = f"ui_state_{timestamp}.json"
629
-
544
+
630
545
  # Ensure the filename ends with .json
631
546
  if not filename.endswith(".json"):
632
547
  filename += ".json"
633
-
548
+
634
549
  # Get the UI elements
635
550
  ui_elements = await self.get_all_elements(self.serial)
636
-
551
+
637
552
  # Save to file
638
553
  save_path = os.path.abspath(filename)
639
554
  async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
640
555
  await f.write(json.dumps(ui_elements, indent=2))
641
-
556
+
642
557
  return f"UI state extracted and saved to {save_path}"
643
-
558
+
644
559
  except Exception as e:
645
560
  return f"Error extracting UI state: {e}"
646
561
 
647
562
  async def get_all_elements(self) -> Dict[str, Any]:
648
563
  """
649
564
  Get all UI elements from the device, including non-interactive elements.
650
-
565
+
651
566
  This function interacts with the TopViewService app installed on the device
652
567
  to capture all UI elements, even those that are not interactive. This provides
653
568
  a complete view of the UI hierarchy for analysis or debugging purposes.
654
-
569
+
655
570
  Returns:
656
571
  Dictionary containing all UI elements extracted from the device screen
657
572
  """
658
573
  try:
659
574
  # Get the device
660
- device_manager = DeviceManager()
661
- device = await device_manager.get_device(self.serial)
575
+ device = await self.device_manager.get_device(self.serial)
662
576
  if not device:
663
577
  raise ValueError(f"Device {self.serial} not found")
664
-
578
+
665
579
  # Create a temporary file for the JSON
666
- with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
580
+ with tempfile.NamedTemporaryFile(suffix=".json") as temp:
667
581
  local_path = temp.name
668
-
669
- try:
670
- # Clear logcat to make it easier to find our output
671
- await device._adb.shell(device._serial, "logcat -c")
672
-
673
- # Trigger the custom service via broadcast to get ALL elements
674
- await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS")
675
-
676
- # Poll for the JSON file path
677
- start_time = asyncio.get_event_loop().time()
678
- max_wait_time = 10 # Maximum wait time in seconds
679
- poll_interval = 0.2 # Check every 200ms
680
-
681
- device_path = None
682
- while asyncio.get_event_loop().time() - start_time < max_wait_time:
683
- # Check logcat for the file path
684
- logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
685
-
686
- # Parse the file path if present
687
- match = re.search(r"JSON data written to: (.*)", logcat_output)
688
- if match:
689
- device_path = match.group(1).strip()
690
- break
691
-
692
- # Wait before polling again
693
- await asyncio.sleep(poll_interval)
694
-
695
- # Check if we found the file path
696
- if not device_path:
697
- raise ValueError(f"Failed to find the JSON file path in logcat after {max_wait_time} seconds")
698
-
699
- # Pull the JSON file from the device
700
- await device._adb.pull_file(device._serial, device_path, local_path)
701
-
702
- # Read the JSON file
703
- async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
704
- json_content = await f.read()
705
-
706
- # Clean up the temporary file
707
- with contextlib.suppress(OSError):
708
- os.unlink(local_path)
709
-
710
- # Try to parse the JSON
711
- import json
582
+
712
583
  try:
713
- ui_data = json.loads(json_content)
714
-
715
- return {
716
- "all_elements": ui_data,
717
- "count": len(ui_data) if isinstance(ui_data, list) else sum(1 for _ in ui_data.get("elements", [])),
718
- "message": "Retrieved all UI elements from the device screen"
719
- }
720
- except json.JSONDecodeError:
721
- raise ValueError("Failed to parse UI elements JSON data")
722
-
723
- except Exception as e:
724
- # Clean up in case of error
725
- with contextlib.suppress(OSError):
726
- os.unlink(local_path)
727
- raise ValueError(f"Error retrieving all UI elements: {e}")
728
-
584
+ # Clear logcat to make it easier to find our output
585
+ await device._adb.shell(device._serial, "logcat -c")
586
+
587
+ # Trigger the custom service via broadcast to get ALL elements
588
+ await device._adb.shell(
589
+ device._serial,
590
+ "am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS",
591
+ )
592
+
593
+ # Poll for the JSON file path
594
+ start_time = asyncio.get_event_loop().time()
595
+ max_wait_time = 10 # Maximum wait time in seconds
596
+ poll_interval = 0.2 # Check every 200ms
597
+
598
+ device_path = None
599
+ while asyncio.get_event_loop().time() - start_time < max_wait_time:
600
+ # Check logcat for the file path
601
+ logcat_output = await device._adb.shell(
602
+ device._serial,
603
+ 'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
604
+ )
605
+
606
+ # Parse the file path if present
607
+ match = re.search(r"JSON data written to: (.*)", logcat_output)
608
+ if match:
609
+ device_path = match.group(1).strip()
610
+ break
611
+
612
+ # Wait before polling again
613
+ await asyncio.sleep(poll_interval)
614
+
615
+ # Check if we found the file path
616
+ if not device_path:
617
+ raise ValueError(
618
+ f"Failed to find the JSON file path in logcat after {max_wait_time} seconds"
619
+ )
620
+
621
+ logger.debug(f"Pulling file from {device_path} to {local_path}")
622
+ # Pull the JSON file from the device
623
+ await device._adb.pull_file(device._serial, device_path, local_path)
624
+
625
+ # Read the JSON file
626
+ async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
627
+ json_content = await f.read()
628
+
629
+ # Clean up the temporary file
630
+ with contextlib.suppress(OSError):
631
+ os.unlink(local_path)
632
+
633
+ # Try to parse the JSON
634
+ import json
635
+
636
+ try:
637
+ ui_data = json.loads(json_content)
638
+
639
+ return {
640
+ "all_elements": ui_data,
641
+ "count": (
642
+ len(ui_data)
643
+ if isinstance(ui_data, list)
644
+ else sum(1 for _ in ui_data.get("elements", []))
645
+ ),
646
+ "message": "Retrieved all UI elements from the device screen",
647
+ }
648
+ except json.JSONDecodeError:
649
+ raise ValueError("Failed to parse UI elements JSON data")
650
+
651
+ except Exception as e:
652
+ # Clean up in case of error
653
+ with contextlib.suppress(OSError):
654
+ os.unlink(local_path)
655
+ raise ValueError(f"Error retrieving all UI elements: {e}")
656
+
729
657
  except Exception as e:
730
658
  raise ValueError(f"Error getting all UI elements: {e}")
731
-
659
+
732
660
  def complete(self, success: bool, reason: str = ""):
733
661
  """
734
662
  Mark the task as finished.
735
663
 
736
664
  Args:
737
665
  success: Indicates if the task was successful.
738
- reason: Reason for failure, if any. (required if success is False)
666
+ reason: Reason for failure/success
739
667
  """
740
668
  if success:
741
669
  self.success = True
742
- self.reason = self.reason or "Task completed successfully."
670
+ self.reason = reason or "Task completed successfully."
743
671
  self.finished = True
744
672
  else:
745
673
  self.success = False
@@ -748,91 +676,132 @@ class Tools:
748
676
  self.reason = reason
749
677
  self.finished = True
750
678
 
751
-
752
- async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
753
- """
754
- Get the current phone state including current activity and keyboard visibility.
755
-
756
- Args:
757
- serial: Optional device serial number
758
-
759
- Returns:
760
- Dictionary with current phone state information
761
- """
762
- try:
763
- # Get the device
764
- if serial:
765
- device_manager = DeviceManager()
766
- device = await device_manager.get_device(serial)
767
- if not device:
768
- raise ValueError(f"Device {serial} not found")
769
- else:
770
- device = await self.get_device()
771
-
772
- # Get the top resumed activity
773
- activity_output = await device._adb.shell(device._serial, "dumpsys activity activities | grep topResumedActivity")
774
-
775
- if not activity_output:
776
- # Try alternative command for older Android versions
777
- activity_output = await device._adb.shell(device._serial, "dumpsys activity activities | grep ResumedActivity")
778
-
779
- # Get keyboard visibility state
780
- keyboard_output = await device._adb.shell(device._serial, "dumpsys input_method | grep mInputShown")
781
-
782
- # Process activity information
783
- current_activity = "Unable to determine current activity"
784
- if activity_output:
785
- current_activity = activity_output.strip()
786
-
787
- # Process keyboard information
788
- is_keyboard_shown = False
789
- if keyboard_output:
790
- is_keyboard_shown = "mInputShown=true" in keyboard_output
791
-
792
- # Return combined state
793
- return {
794
- "current_activity": current_activity,
795
- "keyboard_shown": is_keyboard_shown,
796
- }
797
-
798
- except Exception as e:
799
- return {
800
- "error": str(e),
801
- "message": f"Error getting phone state: {str(e)}"
802
- }
803
-
804
679
  async def remember(self, information: str) -> str:
805
680
  """
806
681
  Store important information to remember for future context.
807
-
808
- This information will be included in future LLM prompts to help maintain context
682
+
683
+ This information will be extracted and included into your next steps to maintain context
809
684
  across interactions. Use this for critical facts, observations, or user preferences
810
685
  that should influence future decisions.
811
-
686
+
812
687
  Args:
813
688
  information: The information to remember
814
-
689
+
815
690
  Returns:
816
691
  Confirmation message
817
692
  """
818
693
  if not information or not isinstance(information, str):
819
694
  return "Error: Please provide valid information to remember."
820
-
695
+
821
696
  # Add the information to memory
822
697
  self.memory.append(information.strip())
823
-
698
+
824
699
  # Limit memory size to prevent context overflow (keep most recent items)
825
700
  max_memory_items = 10
826
701
  if len(self.memory) > max_memory_items:
827
702
  self.memory = self.memory[-max_memory_items:]
828
-
703
+
829
704
  return f"Remembered: {information}"
830
-
705
+
831
706
  def get_memory(self) -> List[str]:
832
707
  """
833
708
  Retrieve all stored memory items.
834
-
709
+
835
710
  Returns:
836
711
  List of stored memory items
837
712
  """
838
- return self.memory.copy()
713
+ return self.memory.copy()
714
+
715
+ async def get_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
716
+ """
717
+ Get both the a11y tree and phone state in a single call using the combined /state endpoint.
718
+
719
+ Args:
720
+ serial: Optional device serial number
721
+
722
+ Returns:
723
+ Dictionary containing both 'a11y_tree' and 'phone_state' data
724
+ """
725
+
726
+ try:
727
+ if serial:
728
+ device = await self.device_manager.get_device(serial)
729
+ if not device:
730
+ raise ValueError(f"Device {serial} not found")
731
+ else:
732
+ device = await self.get_device()
733
+
734
+ adb_output = await device._adb.shell(
735
+ device._serial,
736
+ 'content query --uri content://com.droidrun.portal/state'
737
+ )
738
+
739
+ state_data = self._parse_content_provider_output(adb_output)
740
+
741
+ if state_data is None:
742
+ return {
743
+ "error": "Parse Error",
744
+ "message": "Failed to parse state data from ContentProvider response"
745
+ }
746
+
747
+ if isinstance(state_data, dict) and "data" in state_data:
748
+ data_str = state_data["data"]
749
+ try:
750
+ combined_data = json.loads(data_str)
751
+ except json.JSONDecodeError:
752
+ return {
753
+ "error": "Parse Error",
754
+ "message": "Failed to parse JSON data from ContentProvider data field"
755
+ }
756
+ else:
757
+ return {
758
+ "error": "Format Error",
759
+ "message": f"Unexpected state data format: {type(state_data)}"
760
+ }
761
+
762
+ # Validate that both a11y_tree and phone_state are present
763
+ if "a11y_tree" not in combined_data:
764
+ return {
765
+ "error": "Missing Data",
766
+ "message": "a11y_tree not found in combined state data"
767
+ }
768
+
769
+ if "phone_state" not in combined_data:
770
+ return {
771
+ "error": "Missing Data",
772
+ "message": "phone_state not found in combined state data"
773
+ }
774
+
775
+ # Filter out the "type" attribute from all a11y_tree elements
776
+ elements = combined_data["a11y_tree"]
777
+ filtered_elements = []
778
+ for element in elements:
779
+ # Create a copy of the element without the "type" attribute
780
+ filtered_element = {
781
+ k: v for k, v in element.items() if k != "type"
782
+ }
783
+
784
+ # Also filter children if present
785
+ if "children" in filtered_element:
786
+ filtered_element["children"] = [
787
+ {k: v for k, v in child.items() if k != "type"}
788
+ for child in filtered_element["children"]
789
+ ]
790
+
791
+ filtered_elements.append(filtered_element)
792
+
793
+ self.clickable_elements_cache = filtered_elements
794
+
795
+ return {
796
+ "a11y_tree": filtered_elements,
797
+ "phone_state": combined_data["phone_state"]
798
+ }
799
+
800
+ except Exception as e:
801
+ return {"error": str(e), "message": f"Error getting combined state: {str(e)}"}
802
+
803
+ if __name__ == "__main__":
804
+ async def main():
805
+ tools = AdbTools()
806
+
807
+ asyncio.run(main())