droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +22 -10
- droidrun/__main__.py +1 -2
- droidrun/adb/__init__.py +3 -3
- droidrun/adb/device.py +2 -2
- droidrun/adb/manager.py +2 -2
- droidrun/agent/__init__.py +5 -15
- droidrun/agent/codeact/__init__.py +11 -0
- droidrun/agent/codeact/codeact_agent.py +420 -0
- droidrun/agent/codeact/events.py +28 -0
- droidrun/agent/codeact/prompts.py +26 -0
- droidrun/agent/common/default.py +5 -0
- droidrun/agent/common/events.py +4 -0
- droidrun/agent/context/__init__.py +23 -0
- droidrun/agent/context/agent_persona.py +15 -0
- droidrun/agent/context/context_injection_manager.py +66 -0
- droidrun/agent/context/episodic_memory.py +15 -0
- droidrun/agent/context/personas/__init__.py +11 -0
- droidrun/agent/context/personas/app_starter.py +44 -0
- droidrun/agent/context/personas/default.py +95 -0
- droidrun/agent/context/personas/extractor.py +52 -0
- droidrun/agent/context/personas/ui_expert.py +107 -0
- droidrun/agent/context/reflection.py +20 -0
- droidrun/agent/context/task_manager.py +124 -0
- droidrun/agent/context/todo.txt +4 -0
- droidrun/agent/droid/__init__.py +13 -0
- droidrun/agent/droid/droid_agent.py +357 -0
- droidrun/agent/droid/events.py +28 -0
- droidrun/agent/oneflows/reflector.py +265 -0
- droidrun/agent/planner/__init__.py +13 -0
- droidrun/agent/planner/events.py +16 -0
- droidrun/agent/planner/planner_agent.py +268 -0
- droidrun/agent/planner/prompts.py +124 -0
- droidrun/agent/utils/__init__.py +3 -0
- droidrun/agent/utils/async_utils.py +17 -0
- droidrun/agent/utils/chat_utils.py +312 -0
- droidrun/agent/utils/executer.py +132 -0
- droidrun/agent/utils/llm_picker.py +147 -0
- droidrun/agent/utils/trajectory.py +184 -0
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +283 -0
- droidrun/cli/main.py +358 -149
- droidrun/run.py +105 -0
- droidrun/tools/__init__.py +4 -30
- droidrun/tools/adb.py +879 -0
- droidrun/tools/ios.py +594 -0
- droidrun/tools/tools.py +99 -0
- droidrun-0.3.0.dist-info/METADATA +149 -0
- droidrun-0.3.0.dist-info/RECORD +52 -0
- droidrun/agent/llm_reasoning.py +0 -567
- droidrun/agent/react_agent.py +0 -556
- droidrun/llm/__init__.py +0 -24
- droidrun/tools/actions.py +0 -854
- droidrun/tools/device.py +0 -29
- droidrun-0.1.0.dist-info/METADATA +0 -276
- droidrun-0.1.0.dist-info/RECORD +0 -20
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/adb.py
ADDED
@@ -0,0 +1,879 @@
|
|
1
|
+
"""
|
2
|
+
UI Actions - Core UI interaction tools for Android device control.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
import json
|
8
|
+
import time
|
9
|
+
import tempfile
|
10
|
+
import asyncio
|
11
|
+
import aiofiles
|
12
|
+
import contextlib
|
13
|
+
from typing import Optional, Dict, Tuple, List, Any
|
14
|
+
from droidrun.adb.device import Device
|
15
|
+
from droidrun.adb.manager import DeviceManager
|
16
|
+
from droidrun.tools.tools import Tools
|
17
|
+
|
18
|
+
|
19
|
+
class AdbTools(Tools):
|
20
|
+
"""Core UI interaction tools for Android device control."""
|
21
|
+
|
22
|
+
def __init__(self, serial: str = "emulator-5554") -> None:
|
23
|
+
# Instance‐level cache for clickable elements (index-based tapping)
|
24
|
+
self.clickable_elements_cache: List[Dict[str, Any]] = []
|
25
|
+
self.serial = serial
|
26
|
+
self.device_manager = DeviceManager()
|
27
|
+
self.last_screenshot = None
|
28
|
+
self.reason = None
|
29
|
+
self.success = None
|
30
|
+
self.finished = False
|
31
|
+
# Memory storage for remembering important information
|
32
|
+
self.memory: List[str] = []
|
33
|
+
# Store all screenshots with timestamps
|
34
|
+
self.screenshots: List[Dict[str, Any]] = []
|
35
|
+
|
36
|
+
def get_device_serial(self) -> str:
|
37
|
+
"""Get the device serial from the instance or environment variable."""
|
38
|
+
# First try using the instance's serial
|
39
|
+
if self.serial:
|
40
|
+
return self.serial
|
41
|
+
|
42
|
+
async def get_device(self) -> Optional[Device]:
|
43
|
+
"""Get the device instance using the instance's serial or from environment variable.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
Device instance or None if not found
|
47
|
+
"""
|
48
|
+
serial = self.get_device_serial()
|
49
|
+
if not serial:
|
50
|
+
raise ValueError("No device serial specified - set device_serial parameter")
|
51
|
+
|
52
|
+
device = await self.device_manager.get_device(serial)
|
53
|
+
if not device:
|
54
|
+
raise ValueError(f"Device {serial} not found")
|
55
|
+
|
56
|
+
return device
|
57
|
+
|
58
|
+
def parse_package_list(self, output: str) -> List[Dict[str, str]]:
|
59
|
+
"""Parse the output of 'pm list packages -f' command.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
output: Raw command output from 'pm list packages -f'
|
63
|
+
|
64
|
+
Returns:
|
65
|
+
List of dictionaries containing package info with 'package' and 'path' keys
|
66
|
+
"""
|
67
|
+
apps = []
|
68
|
+
for line in output.splitlines():
|
69
|
+
if line.startswith("package:"):
|
70
|
+
# Format is: "package:/path/to/base.apk=com.package.name"
|
71
|
+
path_and_pkg = line[8:] # Strip "package:"
|
72
|
+
if "=" in path_and_pkg:
|
73
|
+
path, package = path_and_pkg.rsplit("=", 1)
|
74
|
+
apps.append({"package": package.strip(), "path": path.strip()})
|
75
|
+
return apps
|
76
|
+
|
77
|
+
async def get_clickables(self, serial: Optional[str] = None) -> str:
|
78
|
+
"""
|
79
|
+
Get all clickable UI elements from the device using the custom TopViewService.
|
80
|
+
|
81
|
+
This function interacts with the TopViewService app installed on the device
|
82
|
+
to capture UI elements. The service writes UI data to a JSON file on the device,
|
83
|
+
which is then pulled to the host. If no elements are found initially, it will
|
84
|
+
retry for up to 30 seconds.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
serial: Optional device serial number
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
JSON string containing UI elements extracted from the device screen
|
91
|
+
"""
|
92
|
+
try:
|
93
|
+
# Get the device
|
94
|
+
if serial:
|
95
|
+
device_manager = DeviceManager()
|
96
|
+
device = await device_manager.get_device(serial)
|
97
|
+
if not device:
|
98
|
+
raise ValueError(f"Device {serial} not found")
|
99
|
+
else:
|
100
|
+
device = await self.get_device()
|
101
|
+
|
102
|
+
# Create a temporary file for the JSON
|
103
|
+
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
|
104
|
+
local_path = temp.name
|
105
|
+
|
106
|
+
try:
|
107
|
+
# Set retry parameters
|
108
|
+
max_total_time = 30 # Maximum total time to try in seconds
|
109
|
+
retry_interval = 1.0 # Time between retries in seconds
|
110
|
+
start_total_time = asyncio.get_event_loop().time()
|
111
|
+
|
112
|
+
while True:
|
113
|
+
# Check if we've exceeded total time
|
114
|
+
current_time = asyncio.get_event_loop().time()
|
115
|
+
if current_time - start_total_time > max_total_time:
|
116
|
+
raise ValueError(
|
117
|
+
f"Failed to get UI elements after {max_total_time} seconds of retries"
|
118
|
+
)
|
119
|
+
|
120
|
+
# Clear logcat to make it easier to find our output
|
121
|
+
await device._adb.shell(device._serial, "logcat -c")
|
122
|
+
|
123
|
+
# Trigger the custom service via broadcast to get only interactive elements
|
124
|
+
await device._adb.shell(
|
125
|
+
device._serial,
|
126
|
+
"am broadcast -a com.droidrun.portal.GET_ELEMENTS",
|
127
|
+
)
|
128
|
+
|
129
|
+
# Poll for the JSON file path
|
130
|
+
start_time = asyncio.get_event_loop().time()
|
131
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
132
|
+
poll_interval = 0.2 # Check every 200ms
|
133
|
+
|
134
|
+
device_path = None
|
135
|
+
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
136
|
+
# Check logcat for the file path
|
137
|
+
logcat_output = await device._adb.shell(
|
138
|
+
device._serial,
|
139
|
+
'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
|
140
|
+
)
|
141
|
+
|
142
|
+
# Parse the file path if present
|
143
|
+
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
144
|
+
if match:
|
145
|
+
device_path = match.group(1).strip()
|
146
|
+
break
|
147
|
+
|
148
|
+
# Wait before polling again
|
149
|
+
await asyncio.sleep(poll_interval)
|
150
|
+
|
151
|
+
# Check if we found the file path
|
152
|
+
if not device_path:
|
153
|
+
await asyncio.sleep(retry_interval)
|
154
|
+
continue
|
155
|
+
|
156
|
+
# Pull the JSON file from the device
|
157
|
+
await device._adb.pull_file(device._serial, device_path, local_path)
|
158
|
+
|
159
|
+
# Read the JSON file
|
160
|
+
async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
|
161
|
+
json_content = await f.read()
|
162
|
+
|
163
|
+
# Try to parse the JSON
|
164
|
+
try:
|
165
|
+
ui_data = json.loads(json_content)
|
166
|
+
|
167
|
+
# Filter out the "type" attribute from all elements
|
168
|
+
filtered_data = []
|
169
|
+
for element in ui_data:
|
170
|
+
# Create a copy of the element without the "type" attribute
|
171
|
+
filtered_element = {
|
172
|
+
k: v for k, v in element.items() if k != "type"
|
173
|
+
}
|
174
|
+
|
175
|
+
# Also filter children if present
|
176
|
+
if "children" in filtered_element:
|
177
|
+
filtered_element["children"] = [
|
178
|
+
{k: v for k, v in child.items() if k != "type"}
|
179
|
+
for child in filtered_element["children"]
|
180
|
+
]
|
181
|
+
|
182
|
+
filtered_data.append(filtered_element)
|
183
|
+
|
184
|
+
# If we got elements, store them and return
|
185
|
+
if filtered_data:
|
186
|
+
# Store the filtered UI data in cache
|
187
|
+
global CLICKABLE_ELEMENTS_CACHE
|
188
|
+
CLICKABLE_ELEMENTS_CACHE = filtered_data
|
189
|
+
|
190
|
+
# Add a small sleep to ensure UI is fully loaded/processed
|
191
|
+
await asyncio.sleep(0.5) # 500ms sleep
|
192
|
+
|
193
|
+
# Convert the dictionary to a JSON string before returning
|
194
|
+
|
195
|
+
return filtered_data
|
196
|
+
|
197
|
+
# If no elements found, wait and retry
|
198
|
+
await asyncio.sleep(retry_interval)
|
199
|
+
|
200
|
+
except json.JSONDecodeError:
|
201
|
+
# If JSON parsing failed, wait and retry
|
202
|
+
await asyncio.sleep(retry_interval)
|
203
|
+
continue
|
204
|
+
|
205
|
+
except Exception as e:
|
206
|
+
# Clean up in case of error
|
207
|
+
with contextlib.suppress(OSError):
|
208
|
+
os.unlink(local_path)
|
209
|
+
raise ValueError(f"Error retrieving clickable elements: {e}")
|
210
|
+
|
211
|
+
except Exception as e:
|
212
|
+
raise ValueError(f"Error getting clickable elements: {e}")
|
213
|
+
|
214
|
+
async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
|
215
|
+
"""
|
216
|
+
Tap on a UI element by its index.
|
217
|
+
|
218
|
+
This function uses the cached clickable elements
|
219
|
+
to find the element with the given index and tap on its center coordinates.
|
220
|
+
|
221
|
+
Args:
|
222
|
+
index: Index of the element to tap
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
Result message
|
226
|
+
"""
|
227
|
+
|
228
|
+
def collect_all_indices(elements):
|
229
|
+
"""Recursively collect all indices from elements and their children."""
|
230
|
+
indices = []
|
231
|
+
for item in elements:
|
232
|
+
if item.get("index") is not None:
|
233
|
+
indices.append(item.get("index"))
|
234
|
+
# Check children if present
|
235
|
+
children = item.get("children", [])
|
236
|
+
indices.extend(collect_all_indices(children))
|
237
|
+
return indices
|
238
|
+
|
239
|
+
def find_element_by_index(elements, target_index):
|
240
|
+
"""Recursively find an element with the given index."""
|
241
|
+
for item in elements:
|
242
|
+
if item.get("index") == target_index:
|
243
|
+
return item
|
244
|
+
# Check children if present
|
245
|
+
children = item.get("children", [])
|
246
|
+
result = find_element_by_index(children, target_index)
|
247
|
+
if result:
|
248
|
+
return result
|
249
|
+
return None
|
250
|
+
|
251
|
+
try:
|
252
|
+
# Check if we have cached elements
|
253
|
+
if not CLICKABLE_ELEMENTS_CACHE:
|
254
|
+
return "Error: No UI elements cached. Call get_clickables first."
|
255
|
+
|
256
|
+
# Find the element with the given index (including in children)
|
257
|
+
element = find_element_by_index(CLICKABLE_ELEMENTS_CACHE, index)
|
258
|
+
|
259
|
+
if not element:
|
260
|
+
# List available indices to help the user
|
261
|
+
indices = sorted(collect_all_indices(CLICKABLE_ELEMENTS_CACHE))
|
262
|
+
indices_str = ", ".join(str(idx) for idx in indices[:20])
|
263
|
+
if len(indices) > 20:
|
264
|
+
indices_str += f"... and {len(indices) - 20} more"
|
265
|
+
|
266
|
+
return f"Error: No element found with index {index}. Available indices: {indices_str}"
|
267
|
+
|
268
|
+
# Get the bounds of the element
|
269
|
+
bounds_str = element.get("bounds")
|
270
|
+
if not bounds_str:
|
271
|
+
element_text = element.get("text", "No text")
|
272
|
+
element_type = element.get("type", "unknown")
|
273
|
+
element_class = element.get("className", "Unknown class")
|
274
|
+
return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
|
275
|
+
|
276
|
+
# Parse the bounds (format: "left,top,right,bottom")
|
277
|
+
try:
|
278
|
+
left, top, right, bottom = map(int, bounds_str.split(","))
|
279
|
+
except ValueError:
|
280
|
+
return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
|
281
|
+
|
282
|
+
# Calculate the center of the element
|
283
|
+
x = (left + right) // 2
|
284
|
+
y = (top + bottom) // 2
|
285
|
+
|
286
|
+
# Get the device and tap at the coordinates
|
287
|
+
if serial:
|
288
|
+
device_manager = DeviceManager()
|
289
|
+
device = await device_manager.get_device(serial)
|
290
|
+
if not device:
|
291
|
+
return f"Error: Device {serial} not found"
|
292
|
+
else:
|
293
|
+
device = await self.get_device()
|
294
|
+
|
295
|
+
await device.tap(x, y)
|
296
|
+
|
297
|
+
# Add a small delay to allow UI to update
|
298
|
+
await asyncio.sleep(0.5)
|
299
|
+
|
300
|
+
# Create a descriptive response
|
301
|
+
response_parts = []
|
302
|
+
response_parts.append(f"Tapped element with index {index}")
|
303
|
+
response_parts.append(f"Text: '{element.get('text', 'No text')}'")
|
304
|
+
response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
|
305
|
+
response_parts.append(f"Type: {element.get('type', 'unknown')}")
|
306
|
+
|
307
|
+
# Add information about children if present
|
308
|
+
children = element.get("children", [])
|
309
|
+
if children:
|
310
|
+
child_texts = [
|
311
|
+
child.get("text") for child in children if child.get("text")
|
312
|
+
]
|
313
|
+
if child_texts:
|
314
|
+
response_parts.append(f"Contains text: {' | '.join(child_texts)}")
|
315
|
+
|
316
|
+
response_parts.append(f"Coordinates: ({x}, {y})")
|
317
|
+
|
318
|
+
return " | ".join(response_parts)
|
319
|
+
except ValueError as e:
|
320
|
+
return f"Error: {str(e)}"
|
321
|
+
|
322
|
+
# Rename the old tap function to tap_by_coordinates for backward compatibility
|
323
|
+
async def tap_by_coordinates(self, x: int, y: int) -> bool:
|
324
|
+
"""
|
325
|
+
Tap on the device screen at specific coordinates.
|
326
|
+
|
327
|
+
Args:
|
328
|
+
x: X coordinate
|
329
|
+
y: Y coordinate
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
Bool indicating success or failure
|
333
|
+
"""
|
334
|
+
try:
|
335
|
+
if self.serial:
|
336
|
+
device_manager = DeviceManager()
|
337
|
+
device = await device_manager.get_device(self.serial)
|
338
|
+
if not device:
|
339
|
+
return f"Error: Device {self.serial} not found"
|
340
|
+
else:
|
341
|
+
device = await self.get_device()
|
342
|
+
|
343
|
+
await device.tap(x, y)
|
344
|
+
print(f"Tapped at coordinates ({x}, {y})")
|
345
|
+
return True
|
346
|
+
except ValueError as e:
|
347
|
+
print(f"Error: {str(e)}")
|
348
|
+
return False
|
349
|
+
|
350
|
+
# Replace the old tap function with the new one
|
351
|
+
async def tap(self, index: int) -> str:
|
352
|
+
"""
|
353
|
+
Tap on a UI element by its index.
|
354
|
+
|
355
|
+
This function uses the cached clickable elements from the last get_clickables call
|
356
|
+
to find the element with the given index and tap on its center coordinates.
|
357
|
+
|
358
|
+
Args:
|
359
|
+
index: Index of the element to tap
|
360
|
+
|
361
|
+
Returns:
|
362
|
+
Result message
|
363
|
+
"""
|
364
|
+
return await self.tap_by_index(index)
|
365
|
+
|
366
|
+
async def swipe(
|
367
|
+
self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
|
368
|
+
) -> bool:
|
369
|
+
"""
|
370
|
+
Performs a straight-line swipe gesture on the device screen.
|
371
|
+
To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
|
372
|
+
Args:
|
373
|
+
start_x: Starting X coordinate
|
374
|
+
start_y: Starting Y coordinate
|
375
|
+
end_x: Ending X coordinate
|
376
|
+
end_y: Ending Y coordinate
|
377
|
+
duration_ms: Duration of swipe in milliseconds
|
378
|
+
Returns:
|
379
|
+
Bool indicating success or failure
|
380
|
+
"""
|
381
|
+
try:
|
382
|
+
if self.serial:
|
383
|
+
device_manager = DeviceManager()
|
384
|
+
device = await device_manager.get_device(self.serial)
|
385
|
+
if not device:
|
386
|
+
return f"Error: Device {self.serial} not found"
|
387
|
+
else:
|
388
|
+
device = await self.get_device()
|
389
|
+
|
390
|
+
await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
|
391
|
+
await asyncio.sleep(1)
|
392
|
+
print(f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms")
|
393
|
+
return True
|
394
|
+
except ValueError as e:
|
395
|
+
print(f"Error: {str(e)}")
|
396
|
+
return False
|
397
|
+
|
398
|
+
async def input_text(self, text: str, serial: Optional[str] = None) -> str:
|
399
|
+
"""
|
400
|
+
Input text on the device.
|
401
|
+
Always make sure that the Focused Element is not None before inputting text.
|
402
|
+
|
403
|
+
Args:
|
404
|
+
text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
|
405
|
+
|
406
|
+
Returns:
|
407
|
+
Result message
|
408
|
+
"""
|
409
|
+
try:
|
410
|
+
if serial:
|
411
|
+
device_manager = DeviceManager()
|
412
|
+
device = await device_manager.get_device(serial)
|
413
|
+
if not device:
|
414
|
+
return f"Error: Device {serial} not found"
|
415
|
+
else:
|
416
|
+
device = await self.get_device()
|
417
|
+
|
418
|
+
# Save the current keyboard
|
419
|
+
original_ime = await device._adb.shell(
|
420
|
+
device._serial, "settings get secure default_input_method"
|
421
|
+
)
|
422
|
+
original_ime = original_ime.strip()
|
423
|
+
|
424
|
+
# Enable the Droidrun keyboard
|
425
|
+
await device._adb.shell(
|
426
|
+
device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
|
427
|
+
)
|
428
|
+
|
429
|
+
# Set the Droidrun keyboard as the default
|
430
|
+
await device._adb.shell(
|
431
|
+
device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
|
432
|
+
)
|
433
|
+
|
434
|
+
# Wait for keyboard to change
|
435
|
+
await asyncio.sleep(0.2)
|
436
|
+
|
437
|
+
# Encode the text to Base64
|
438
|
+
import base64
|
439
|
+
|
440
|
+
encoded_text = base64.b64encode(text.encode()).decode()
|
441
|
+
|
442
|
+
cmd = f'am broadcast -a com.droidrun.portal.DROIDRUN_INPUT_B64 --es msg "{encoded_text}" -p com.droidrun.portal'
|
443
|
+
await device._adb.shell(device._serial, cmd)
|
444
|
+
|
445
|
+
# Wait for text input to complete
|
446
|
+
await asyncio.sleep(0.5)
|
447
|
+
|
448
|
+
# Restore the original keyboard
|
449
|
+
if original_ime and "com.droidrun.portal" not in original_ime:
|
450
|
+
await device._adb.shell(device._serial, f"ime set {original_ime}")
|
451
|
+
|
452
|
+
return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
453
|
+
except ValueError as e:
|
454
|
+
return f"Error: {str(e)}"
|
455
|
+
except Exception as e:
|
456
|
+
return f"Error sending text input: {str(e)}"
|
457
|
+
|
458
|
+
async def back(self) -> str:
|
459
|
+
"""
|
460
|
+
Go back on the current view.
|
461
|
+
This presses the Android back button.
|
462
|
+
"""
|
463
|
+
try:
|
464
|
+
if self.serial:
|
465
|
+
device_manager = DeviceManager()
|
466
|
+
device = await device_manager.get_device(self.serial)
|
467
|
+
if not device:
|
468
|
+
return f"Error: Device {self.serial} not found"
|
469
|
+
else:
|
470
|
+
device = await self.get_device()
|
471
|
+
|
472
|
+
await device.press_key(3)
|
473
|
+
return f"Pressed key BACK"
|
474
|
+
except ValueError as e:
|
475
|
+
return f"Error: {str(e)}"
|
476
|
+
|
477
|
+
async def press_key(self, keycode: int) -> str:
|
478
|
+
"""
|
479
|
+
Press a key on the Android device.
|
480
|
+
|
481
|
+
Common keycodes:
|
482
|
+
- 4: BACK
|
483
|
+
- 66: ENTER
|
484
|
+
- 67: DELETE
|
485
|
+
|
486
|
+
Args:
|
487
|
+
keycode: Android keycode to press
|
488
|
+
"""
|
489
|
+
try:
|
490
|
+
if self.serial:
|
491
|
+
device_manager = DeviceManager()
|
492
|
+
device = await device_manager.get_device(self.serial)
|
493
|
+
if not device:
|
494
|
+
return f"Error: Device {self.serial} not found"
|
495
|
+
else:
|
496
|
+
device = await self.get_device()
|
497
|
+
|
498
|
+
key_names = {
|
499
|
+
66: "ENTER",
|
500
|
+
4: "BACK",
|
501
|
+
67: "DELETE",
|
502
|
+
}
|
503
|
+
key_name = key_names.get(keycode, str(keycode))
|
504
|
+
|
505
|
+
await device.press_key(keycode)
|
506
|
+
return f"Pressed key {key_name}"
|
507
|
+
except ValueError as e:
|
508
|
+
return f"Error: {str(e)}"
|
509
|
+
|
510
|
+
async def start_app(self, package: str, activity: str = "") -> str:
|
511
|
+
"""
|
512
|
+
Start an app on the device.
|
513
|
+
|
514
|
+
Args:
|
515
|
+
package: Package name (e.g., "com.android.settings")
|
516
|
+
activity: Optional activity name
|
517
|
+
"""
|
518
|
+
try:
|
519
|
+
if self.serial:
|
520
|
+
device_manager = DeviceManager()
|
521
|
+
device = await device_manager.get_device(self.serial)
|
522
|
+
if not device:
|
523
|
+
return f"Error: Device {self.serial} not found"
|
524
|
+
else:
|
525
|
+
device = await self.get_device()
|
526
|
+
|
527
|
+
result = await device.start_app(package, activity)
|
528
|
+
return result
|
529
|
+
except ValueError as e:
|
530
|
+
return f"Error: {str(e)}"
|
531
|
+
|
532
|
+
async def install_app(
|
533
|
+
self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
|
534
|
+
) -> str:
|
535
|
+
"""
|
536
|
+
Install an app on the device.
|
537
|
+
|
538
|
+
Args:
|
539
|
+
apk_path: Path to the APK file
|
540
|
+
reinstall: Whether to reinstall if app exists
|
541
|
+
grant_permissions: Whether to grant all permissions
|
542
|
+
"""
|
543
|
+
try:
|
544
|
+
if self.serial:
|
545
|
+
device_manager = DeviceManager()
|
546
|
+
device = await device_manager.get_device(self.serial)
|
547
|
+
if not device:
|
548
|
+
return f"Error: Device {self.serial} not found"
|
549
|
+
else:
|
550
|
+
device = await self.get_device()
|
551
|
+
|
552
|
+
if not os.path.exists(apk_path):
|
553
|
+
return f"Error: APK file not found at {apk_path}"
|
554
|
+
|
555
|
+
result = await device.install_app(apk_path, reinstall, grant_permissions)
|
556
|
+
return result
|
557
|
+
except ValueError as e:
|
558
|
+
return f"Error: {str(e)}"
|
559
|
+
|
560
|
+
async def take_screenshot(self) -> Tuple[str, bytes]:
|
561
|
+
"""
|
562
|
+
Take a screenshot of the device.
|
563
|
+
This function captures the current screen and adds the screenshot to context in the next message.
|
564
|
+
Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
|
565
|
+
"""
|
566
|
+
try:
|
567
|
+
if self.serial:
|
568
|
+
device_manager = DeviceManager()
|
569
|
+
device = await device_manager.get_device(self.serial)
|
570
|
+
if not device:
|
571
|
+
raise ValueError(f"Device {self.serial} not found")
|
572
|
+
else:
|
573
|
+
device = await self.get_device()
|
574
|
+
screen_tuple = await device.take_screenshot()
|
575
|
+
self.last_screenshot = screen_tuple[1]
|
576
|
+
|
577
|
+
# Store screenshot with timestamp
|
578
|
+
self.screenshots.append(
|
579
|
+
{
|
580
|
+
"timestamp": time.time(),
|
581
|
+
"image_data": screen_tuple[1],
|
582
|
+
"format": screen_tuple[0], # Usually 'PNG'
|
583
|
+
}
|
584
|
+
)
|
585
|
+
return screen_tuple
|
586
|
+
except ValueError as e:
|
587
|
+
raise ValueError(f"Error taking screenshot: {str(e)}")
|
588
|
+
|
589
|
+
async def list_packages(self, include_system_apps: bool = False) -> List[str]:
|
590
|
+
"""
|
591
|
+
List installed packages on the device.
|
592
|
+
|
593
|
+
Args:
|
594
|
+
include_system_apps: Whether to include system apps (default: False)
|
595
|
+
|
596
|
+
Returns:
|
597
|
+
List of package names
|
598
|
+
"""
|
599
|
+
try:
|
600
|
+
if self.serial:
|
601
|
+
device_manager = DeviceManager()
|
602
|
+
device = await device_manager.get_device(self.serial)
|
603
|
+
if not device:
|
604
|
+
raise ValueError(f"Device {self.serial} not found")
|
605
|
+
else:
|
606
|
+
device = await self.get_device()
|
607
|
+
|
608
|
+
# Use the direct ADB command to get packages with paths
|
609
|
+
cmd = ["pm", "list", "packages", "-f"]
|
610
|
+
if not include_system_apps:
|
611
|
+
cmd.append("-3")
|
612
|
+
|
613
|
+
output = await device._adb.shell(device._serial, " ".join(cmd))
|
614
|
+
|
615
|
+
# Parse the package list using the function
|
616
|
+
packages = self.parse_package_list(output)
|
617
|
+
# Format package list for better readability
|
618
|
+
package_list = [pack["package"] for pack in packages]
|
619
|
+
print(f"Returning {len(package_list)} packages")
|
620
|
+
return package_list
|
621
|
+
except ValueError as e:
|
622
|
+
raise ValueError(f"Error listing packages: {str(e)}")
|
623
|
+
|
624
|
+
async def extract(self, filename: Optional[str] = None) -> str:
|
625
|
+
"""Extract and save the current UI state to a JSON file.
|
626
|
+
|
627
|
+
This function captures the current UI state including all UI elements
|
628
|
+
and saves it to a JSON file for later analysis or reference.
|
629
|
+
|
630
|
+
Args:
|
631
|
+
filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
|
632
|
+
|
633
|
+
Returns:
|
634
|
+
Path to the saved JSON file
|
635
|
+
"""
|
636
|
+
try:
|
637
|
+
# Generate default filename if not provided
|
638
|
+
if not filename:
|
639
|
+
timestamp = int(time.time())
|
640
|
+
filename = f"ui_state_{timestamp}.json"
|
641
|
+
|
642
|
+
# Ensure the filename ends with .json
|
643
|
+
if not filename.endswith(".json"):
|
644
|
+
filename += ".json"
|
645
|
+
|
646
|
+
# Get the UI elements
|
647
|
+
ui_elements = await self.get_all_elements(self.serial)
|
648
|
+
|
649
|
+
# Save to file
|
650
|
+
save_path = os.path.abspath(filename)
|
651
|
+
async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
|
652
|
+
await f.write(json.dumps(ui_elements, indent=2))
|
653
|
+
|
654
|
+
return f"UI state extracted and saved to {save_path}"
|
655
|
+
|
656
|
+
except Exception as e:
|
657
|
+
return f"Error extracting UI state: {e}"
|
658
|
+
|
659
|
+
async def get_all_elements(self) -> Dict[str, Any]:
|
660
|
+
"""
|
661
|
+
Get all UI elements from the device, including non-interactive elements.
|
662
|
+
|
663
|
+
This function interacts with the TopViewService app installed on the device
|
664
|
+
to capture all UI elements, even those that are not interactive. This provides
|
665
|
+
a complete view of the UI hierarchy for analysis or debugging purposes.
|
666
|
+
|
667
|
+
Returns:
|
668
|
+
Dictionary containing all UI elements extracted from the device screen
|
669
|
+
"""
|
670
|
+
try:
|
671
|
+
# Get the device
|
672
|
+
device_manager = DeviceManager()
|
673
|
+
device = await device_manager.get_device(self.serial)
|
674
|
+
if not device:
|
675
|
+
raise ValueError(f"Device {self.serial} not found")
|
676
|
+
|
677
|
+
# Create a temporary file for the JSON
|
678
|
+
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
|
679
|
+
local_path = temp.name
|
680
|
+
|
681
|
+
try:
|
682
|
+
# Clear logcat to make it easier to find our output
|
683
|
+
await device._adb.shell(device._serial, "logcat -c")
|
684
|
+
|
685
|
+
# Trigger the custom service via broadcast to get ALL elements
|
686
|
+
await device._adb.shell(
|
687
|
+
device._serial,
|
688
|
+
"am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS",
|
689
|
+
)
|
690
|
+
|
691
|
+
# Poll for the JSON file path
|
692
|
+
start_time = asyncio.get_event_loop().time()
|
693
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
694
|
+
poll_interval = 0.2 # Check every 200ms
|
695
|
+
|
696
|
+
device_path = None
|
697
|
+
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
698
|
+
# Check logcat for the file path
|
699
|
+
logcat_output = await device._adb.shell(
|
700
|
+
device._serial,
|
701
|
+
'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
|
702
|
+
)
|
703
|
+
|
704
|
+
# Parse the file path if present
|
705
|
+
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
706
|
+
if match:
|
707
|
+
device_path = match.group(1).strip()
|
708
|
+
break
|
709
|
+
|
710
|
+
# Wait before polling again
|
711
|
+
await asyncio.sleep(poll_interval)
|
712
|
+
|
713
|
+
# Check if we found the file path
|
714
|
+
if not device_path:
|
715
|
+
raise ValueError(
|
716
|
+
f"Failed to find the JSON file path in logcat after {max_wait_time} seconds"
|
717
|
+
)
|
718
|
+
|
719
|
+
# Pull the JSON file from the device
|
720
|
+
await device._adb.pull_file(device._serial, device_path, local_path)
|
721
|
+
|
722
|
+
# Read the JSON file
|
723
|
+
async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
|
724
|
+
json_content = await f.read()
|
725
|
+
|
726
|
+
# Clean up the temporary file
|
727
|
+
with contextlib.suppress(OSError):
|
728
|
+
os.unlink(local_path)
|
729
|
+
|
730
|
+
# Try to parse the JSON
|
731
|
+
import json
|
732
|
+
|
733
|
+
try:
|
734
|
+
ui_data = json.loads(json_content)
|
735
|
+
|
736
|
+
return {
|
737
|
+
"all_elements": ui_data,
|
738
|
+
"count": (
|
739
|
+
len(ui_data)
|
740
|
+
if isinstance(ui_data, list)
|
741
|
+
else sum(1 for _ in ui_data.get("elements", []))
|
742
|
+
),
|
743
|
+
"message": "Retrieved all UI elements from the device screen",
|
744
|
+
}
|
745
|
+
except json.JSONDecodeError:
|
746
|
+
raise ValueError("Failed to parse UI elements JSON data")
|
747
|
+
|
748
|
+
except Exception as e:
|
749
|
+
# Clean up in case of error
|
750
|
+
with contextlib.suppress(OSError):
|
751
|
+
os.unlink(local_path)
|
752
|
+
raise ValueError(f"Error retrieving all UI elements: {e}")
|
753
|
+
|
754
|
+
except Exception as e:
|
755
|
+
raise ValueError(f"Error getting all UI elements: {e}")
|
756
|
+
|
757
|
+
def complete(self, success: bool, reason: str = ""):
|
758
|
+
"""
|
759
|
+
Mark the task as finished.
|
760
|
+
|
761
|
+
Args:
|
762
|
+
success: Indicates if the task was successful.
|
763
|
+
reason: Reason for failure/success
|
764
|
+
"""
|
765
|
+
if success:
|
766
|
+
self.success = True
|
767
|
+
self.reason = reason or "Task completed successfully."
|
768
|
+
self.finished = True
|
769
|
+
else:
|
770
|
+
self.success = False
|
771
|
+
if not reason:
|
772
|
+
raise ValueError("Reason for failure is required if success is False.")
|
773
|
+
self.reason = reason
|
774
|
+
self.finished = True
|
775
|
+
|
776
|
+
async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
|
777
|
+
"""
|
778
|
+
Get the current phone state including current activity and keyboard visibility.
|
779
|
+
|
780
|
+
Args:
|
781
|
+
serial: Optional device serial number
|
782
|
+
|
783
|
+
Returns:
|
784
|
+
Dictionary with current phone state information
|
785
|
+
"""
|
786
|
+
try:
|
787
|
+
# Get the device
|
788
|
+
if serial:
|
789
|
+
device_manager = DeviceManager()
|
790
|
+
device = await device_manager.get_device(serial)
|
791
|
+
if not device:
|
792
|
+
raise ValueError(f"Device {serial} not found")
|
793
|
+
else:
|
794
|
+
device = await self.get_device()
|
795
|
+
|
796
|
+
# Clear logcat to make it easier to find our output
|
797
|
+
await device._adb.shell(device._serial, "logcat -c")
|
798
|
+
|
799
|
+
# Trigger the custom service via broadcast to get phone state
|
800
|
+
await device._adb.shell(
|
801
|
+
device._serial, "am broadcast -a com.droidrun.portal.GET_PHONE_STATE"
|
802
|
+
)
|
803
|
+
|
804
|
+
# Poll for the phone state data in logcat
|
805
|
+
start_time = asyncio.get_event_loop().time()
|
806
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
807
|
+
poll_interval = 0.2 # Check every 200ms
|
808
|
+
|
809
|
+
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
810
|
+
# Check logcat for the phone state data
|
811
|
+
logcat_output = await device._adb.shell(
|
812
|
+
device._serial,
|
813
|
+
'logcat -d | grep "DROIDRUN_PHONE_STATE_DATA" | tail -1',
|
814
|
+
)
|
815
|
+
|
816
|
+
# Parse the JSON data if present
|
817
|
+
if "CHUNK|" in logcat_output:
|
818
|
+
# Format: DROIDRUN_PHONE_STATE_DATA: CHUNK|0|1|{json_data}
|
819
|
+
# Extract the JSON part after the last |
|
820
|
+
parts = logcat_output.split("|")
|
821
|
+
if len(parts) >= 4:
|
822
|
+
json_data = "|".join(
|
823
|
+
parts[3:]
|
824
|
+
) # In case JSON contains | characters
|
825
|
+
try:
|
826
|
+
phone_state = json.loads(json_data)
|
827
|
+
return phone_state
|
828
|
+
except json.JSONDecodeError:
|
829
|
+
# If JSON parsing failed, wait and retry
|
830
|
+
await asyncio.sleep(poll_interval)
|
831
|
+
continue
|
832
|
+
|
833
|
+
# Wait before polling again
|
834
|
+
await asyncio.sleep(poll_interval)
|
835
|
+
|
836
|
+
# If we couldn't get the phone state, return error
|
837
|
+
return {
|
838
|
+
"error": "Timeout",
|
839
|
+
"message": f"Failed to get phone state data after {max_wait_time} seconds",
|
840
|
+
}
|
841
|
+
|
842
|
+
except Exception as e:
|
843
|
+
return {"error": str(e), "message": f"Error getting phone state: {str(e)}"}
|
844
|
+
|
845
|
+
async def remember(self, information: str) -> str:
|
846
|
+
"""
|
847
|
+
Store important information to remember for future context.
|
848
|
+
|
849
|
+
This information will be extracted and included into your next steps to maintain context
|
850
|
+
across interactions. Use this for critical facts, observations, or user preferences
|
851
|
+
that should influence future decisions.
|
852
|
+
|
853
|
+
Args:
|
854
|
+
information: The information to remember
|
855
|
+
|
856
|
+
Returns:
|
857
|
+
Confirmation message
|
858
|
+
"""
|
859
|
+
if not information or not isinstance(information, str):
|
860
|
+
return "Error: Please provide valid information to remember."
|
861
|
+
|
862
|
+
# Add the information to memory
|
863
|
+
self.memory.append(information.strip())
|
864
|
+
|
865
|
+
# Limit memory size to prevent context overflow (keep most recent items)
|
866
|
+
max_memory_items = 10
|
867
|
+
if len(self.memory) > max_memory_items:
|
868
|
+
self.memory = self.memory[-max_memory_items:]
|
869
|
+
|
870
|
+
return f"Remembered: {information}"
|
871
|
+
|
872
|
+
def get_memory(self) -> List[str]:
|
873
|
+
"""
|
874
|
+
Retrieve all stored memory items.
|
875
|
+
|
876
|
+
Returns:
|
877
|
+
List of stored memory items
|
878
|
+
"""
|
879
|
+
return self.memory.copy()
|