droidrun 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +16 -11
- droidrun/__main__.py +1 -1
- droidrun/adb/__init__.py +3 -3
- droidrun/adb/device.py +1 -1
- droidrun/adb/manager.py +2 -2
- droidrun/agent/__init__.py +6 -0
- droidrun/agent/codeact/__init__.py +2 -4
- droidrun/agent/codeact/codeact_agent.py +321 -235
- droidrun/agent/codeact/events.py +12 -20
- droidrun/agent/codeact/prompts.py +0 -52
- droidrun/agent/common/default.py +5 -0
- droidrun/agent/common/events.py +4 -0
- droidrun/agent/context/__init__.py +23 -0
- droidrun/agent/context/agent_persona.py +15 -0
- droidrun/agent/context/context_injection_manager.py +66 -0
- droidrun/agent/context/episodic_memory.py +15 -0
- droidrun/agent/context/personas/__init__.py +11 -0
- droidrun/agent/context/personas/app_starter.py +44 -0
- droidrun/agent/context/personas/default.py +95 -0
- droidrun/agent/context/personas/extractor.py +52 -0
- droidrun/agent/context/personas/ui_expert.py +107 -0
- droidrun/agent/context/reflection.py +20 -0
- droidrun/agent/context/task_manager.py +124 -0
- droidrun/agent/context/todo.txt +4 -0
- droidrun/agent/droid/__init__.py +2 -2
- droidrun/agent/droid/droid_agent.py +264 -325
- droidrun/agent/droid/events.py +28 -0
- droidrun/agent/oneflows/reflector.py +265 -0
- droidrun/agent/planner/__init__.py +2 -4
- droidrun/agent/planner/events.py +9 -13
- droidrun/agent/planner/planner_agent.py +268 -0
- droidrun/agent/planner/prompts.py +33 -53
- droidrun/agent/utils/__init__.py +3 -0
- droidrun/agent/utils/async_utils.py +1 -40
- droidrun/agent/utils/chat_utils.py +268 -48
- droidrun/agent/utils/executer.py +49 -14
- droidrun/agent/utils/llm_picker.py +14 -10
- droidrun/agent/utils/trajectory.py +184 -0
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +283 -0
- droidrun/cli/main.py +333 -439
- droidrun/run.py +105 -0
- droidrun/tools/__init__.py +5 -10
- droidrun/tools/{actions.py → adb.py} +279 -238
- droidrun/tools/ios.py +594 -0
- droidrun/tools/tools.py +99 -0
- droidrun-0.3.0.dist-info/METADATA +149 -0
- droidrun-0.3.0.dist-info/RECORD +52 -0
- droidrun/agent/planner/task_manager.py +0 -355
- droidrun/agent/planner/workflow.py +0 -371
- droidrun/tools/device.py +0 -29
- droidrun/tools/loader.py +0 -60
- droidrun-0.2.0.dist-info/METADATA +0 -373
- droidrun-0.2.0.dist-info/RECORD +0 -32
- {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
- {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -11,13 +11,15 @@ import asyncio
|
|
11
11
|
import aiofiles
|
12
12
|
import contextlib
|
13
13
|
from typing import Optional, Dict, Tuple, List, Any
|
14
|
-
from
|
14
|
+
from droidrun.adb.device import Device
|
15
|
+
from droidrun.adb.manager import DeviceManager
|
16
|
+
from droidrun.tools.tools import Tools
|
15
17
|
|
16
18
|
|
17
|
-
class Tools:
|
19
|
+
class AdbTools(Tools):
|
18
20
|
"""Core UI interaction tools for Android device control."""
|
19
21
|
|
20
|
-
def __init__(self, serial: str) -> None:
|
22
|
+
def __init__(self, serial: str = "emulator-5554") -> None:
|
21
23
|
# Instance‐level cache for clickable elements (index-based tapping)
|
22
24
|
self.clickable_elements_cache: List[Dict[str, Any]] = []
|
23
25
|
self.serial = serial
|
@@ -28,30 +30,29 @@ class Tools:
|
|
28
30
|
self.finished = False
|
29
31
|
# Memory storage for remembering important information
|
30
32
|
self.memory: List[str] = []
|
33
|
+
# Store all screenshots with timestamps
|
34
|
+
self.screenshots: List[Dict[str, Any]] = []
|
31
35
|
|
32
36
|
def get_device_serial(self) -> str:
|
33
37
|
"""Get the device serial from the instance or environment variable."""
|
34
38
|
# First try using the instance's serial
|
35
39
|
if self.serial:
|
36
40
|
return self.serial
|
37
|
-
|
38
|
-
# Fall back to environment variable if not set on the instance
|
39
|
-
return os.environ.get("DROIDRUN_DEVICE_SERIAL", "")
|
40
41
|
|
41
42
|
async def get_device(self) -> Optional[Device]:
|
42
43
|
"""Get the device instance using the instance's serial or from environment variable.
|
43
|
-
|
44
|
+
|
44
45
|
Returns:
|
45
46
|
Device instance or None if not found
|
46
47
|
"""
|
47
48
|
serial = self.get_device_serial()
|
48
49
|
if not serial:
|
49
|
-
raise ValueError("No device serial specified - set
|
50
|
-
|
50
|
+
raise ValueError("No device serial specified - set device_serial parameter")
|
51
|
+
|
51
52
|
device = await self.device_manager.get_device(serial)
|
52
53
|
if not device:
|
53
54
|
raise ValueError(f"Device {serial} not found")
|
54
|
-
|
55
|
+
|
55
56
|
return device
|
56
57
|
|
57
58
|
def parse_package_list(self, output: str) -> List[Dict[str, str]]:
|
@@ -76,255 +77,257 @@ class Tools:
|
|
76
77
|
async def get_clickables(self, serial: Optional[str] = None) -> str:
|
77
78
|
"""
|
78
79
|
Get all clickable UI elements from the device using the custom TopViewService.
|
79
|
-
|
80
|
+
|
80
81
|
This function interacts with the TopViewService app installed on the device
|
81
82
|
to capture UI elements. The service writes UI data to a JSON file on the device,
|
82
83
|
which is then pulled to the host. If no elements are found initially, it will
|
83
84
|
retry for up to 30 seconds.
|
84
|
-
|
85
|
+
|
85
86
|
Args:
|
86
87
|
serial: Optional device serial number
|
87
|
-
|
88
|
+
|
88
89
|
Returns:
|
89
90
|
JSON string containing UI elements extracted from the device screen
|
90
91
|
"""
|
91
92
|
try:
|
92
93
|
# Get the device
|
93
94
|
if serial:
|
94
|
-
from droidrun.adb import DeviceManager
|
95
95
|
device_manager = DeviceManager()
|
96
96
|
device = await device_manager.get_device(serial)
|
97
97
|
if not device:
|
98
98
|
raise ValueError(f"Device {serial} not found")
|
99
99
|
else:
|
100
100
|
device = await self.get_device()
|
101
|
-
|
101
|
+
|
102
102
|
# Create a temporary file for the JSON
|
103
103
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
|
104
104
|
local_path = temp.name
|
105
|
-
|
105
|
+
|
106
106
|
try:
|
107
107
|
# Set retry parameters
|
108
108
|
max_total_time = 30 # Maximum total time to try in seconds
|
109
109
|
retry_interval = 1.0 # Time between retries in seconds
|
110
110
|
start_total_time = asyncio.get_event_loop().time()
|
111
|
-
|
111
|
+
|
112
112
|
while True:
|
113
113
|
# Check if we've exceeded total time
|
114
114
|
current_time = asyncio.get_event_loop().time()
|
115
115
|
if current_time - start_total_time > max_total_time:
|
116
|
-
raise ValueError(
|
117
|
-
|
116
|
+
raise ValueError(
|
117
|
+
f"Failed to get UI elements after {max_total_time} seconds of retries"
|
118
|
+
)
|
119
|
+
|
118
120
|
# Clear logcat to make it easier to find our output
|
119
121
|
await device._adb.shell(device._serial, "logcat -c")
|
120
|
-
|
122
|
+
|
121
123
|
# Trigger the custom service via broadcast to get only interactive elements
|
122
|
-
await device._adb.shell(
|
123
|
-
|
124
|
+
await device._adb.shell(
|
125
|
+
device._serial,
|
126
|
+
"am broadcast -a com.droidrun.portal.GET_ELEMENTS",
|
127
|
+
)
|
128
|
+
|
124
129
|
# Poll for the JSON file path
|
125
130
|
start_time = asyncio.get_event_loop().time()
|
126
131
|
max_wait_time = 10 # Maximum wait time in seconds
|
127
132
|
poll_interval = 0.2 # Check every 200ms
|
128
|
-
|
133
|
+
|
129
134
|
device_path = None
|
130
135
|
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
131
136
|
# Check logcat for the file path
|
132
|
-
logcat_output = await device._adb.shell(
|
133
|
-
|
137
|
+
logcat_output = await device._adb.shell(
|
138
|
+
device._serial,
|
139
|
+
'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
|
140
|
+
)
|
141
|
+
|
134
142
|
# Parse the file path if present
|
135
143
|
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
136
144
|
if match:
|
137
145
|
device_path = match.group(1).strip()
|
138
146
|
break
|
139
|
-
|
147
|
+
|
140
148
|
# Wait before polling again
|
141
149
|
await asyncio.sleep(poll_interval)
|
142
|
-
|
150
|
+
|
143
151
|
# Check if we found the file path
|
144
152
|
if not device_path:
|
145
153
|
await asyncio.sleep(retry_interval)
|
146
154
|
continue
|
147
|
-
|
155
|
+
|
148
156
|
# Pull the JSON file from the device
|
149
157
|
await device._adb.pull_file(device._serial, device_path, local_path)
|
150
|
-
|
158
|
+
|
151
159
|
# Read the JSON file
|
152
160
|
async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
|
153
161
|
json_content = await f.read()
|
154
|
-
|
162
|
+
|
155
163
|
# Try to parse the JSON
|
156
164
|
try:
|
157
165
|
ui_data = json.loads(json_content)
|
158
|
-
|
166
|
+
|
159
167
|
# Filter out the "type" attribute from all elements
|
160
168
|
filtered_data = []
|
161
169
|
for element in ui_data:
|
162
170
|
# Create a copy of the element without the "type" attribute
|
163
|
-
filtered_element = {
|
164
|
-
|
171
|
+
filtered_element = {
|
172
|
+
k: v for k, v in element.items() if k != "type"
|
173
|
+
}
|
174
|
+
|
165
175
|
# Also filter children if present
|
166
176
|
if "children" in filtered_element:
|
167
177
|
filtered_element["children"] = [
|
168
178
|
{k: v for k, v in child.items() if k != "type"}
|
169
179
|
for child in filtered_element["children"]
|
170
180
|
]
|
171
|
-
|
181
|
+
|
172
182
|
filtered_data.append(filtered_element)
|
173
|
-
|
183
|
+
|
174
184
|
# If we got elements, store them and return
|
175
185
|
if filtered_data:
|
176
186
|
# Store the filtered UI data in cache
|
177
187
|
global CLICKABLE_ELEMENTS_CACHE
|
178
188
|
CLICKABLE_ELEMENTS_CACHE = filtered_data
|
179
|
-
|
189
|
+
|
180
190
|
# Add a small sleep to ensure UI is fully loaded/processed
|
181
191
|
await asyncio.sleep(0.5) # 500ms sleep
|
182
|
-
|
192
|
+
|
183
193
|
# Convert the dictionary to a JSON string before returning
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
"message": f"Found {len(filtered_data)} UI elements after retrying"
|
188
|
-
}
|
189
|
-
|
190
|
-
return result
|
191
|
-
|
194
|
+
|
195
|
+
return filtered_data
|
196
|
+
|
192
197
|
# If no elements found, wait and retry
|
193
198
|
await asyncio.sleep(retry_interval)
|
194
|
-
|
199
|
+
|
195
200
|
except json.JSONDecodeError:
|
196
201
|
# If JSON parsing failed, wait and retry
|
197
202
|
await asyncio.sleep(retry_interval)
|
198
203
|
continue
|
199
|
-
|
204
|
+
|
200
205
|
except Exception as e:
|
201
206
|
# Clean up in case of error
|
202
207
|
with contextlib.suppress(OSError):
|
203
208
|
os.unlink(local_path)
|
204
209
|
raise ValueError(f"Error retrieving clickable elements: {e}")
|
205
|
-
|
210
|
+
|
206
211
|
except Exception as e:
|
207
212
|
raise ValueError(f"Error getting clickable elements: {e}")
|
208
213
|
|
209
|
-
|
210
214
|
async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
|
211
215
|
"""
|
212
216
|
Tap on a UI element by its index.
|
213
|
-
|
217
|
+
|
214
218
|
This function uses the cached clickable elements
|
215
219
|
to find the element with the given index and tap on its center coordinates.
|
216
|
-
|
220
|
+
|
217
221
|
Args:
|
218
222
|
index: Index of the element to tap
|
219
|
-
|
223
|
+
|
220
224
|
Returns:
|
221
225
|
Result message
|
222
226
|
"""
|
223
|
-
|
227
|
+
|
224
228
|
def collect_all_indices(elements):
|
225
229
|
"""Recursively collect all indices from elements and their children."""
|
226
230
|
indices = []
|
227
231
|
for item in elements:
|
228
|
-
if item.get(
|
229
|
-
indices.append(item.get(
|
232
|
+
if item.get("index") is not None:
|
233
|
+
indices.append(item.get("index"))
|
230
234
|
# Check children if present
|
231
|
-
children = item.get(
|
235
|
+
children = item.get("children", [])
|
232
236
|
indices.extend(collect_all_indices(children))
|
233
237
|
return indices
|
234
238
|
|
235
239
|
def find_element_by_index(elements, target_index):
|
236
240
|
"""Recursively find an element with the given index."""
|
237
241
|
for item in elements:
|
238
|
-
if item.get(
|
242
|
+
if item.get("index") == target_index:
|
239
243
|
return item
|
240
244
|
# Check children if present
|
241
|
-
children = item.get(
|
245
|
+
children = item.get("children", [])
|
242
246
|
result = find_element_by_index(children, target_index)
|
243
247
|
if result:
|
244
248
|
return result
|
245
249
|
return None
|
246
|
-
|
250
|
+
|
247
251
|
try:
|
248
252
|
# Check if we have cached elements
|
249
253
|
if not CLICKABLE_ELEMENTS_CACHE:
|
250
254
|
return "Error: No UI elements cached. Call get_clickables first."
|
251
|
-
|
255
|
+
|
252
256
|
# Find the element with the given index (including in children)
|
253
257
|
element = find_element_by_index(CLICKABLE_ELEMENTS_CACHE, index)
|
254
|
-
|
258
|
+
|
255
259
|
if not element:
|
256
260
|
# List available indices to help the user
|
257
261
|
indices = sorted(collect_all_indices(CLICKABLE_ELEMENTS_CACHE))
|
258
262
|
indices_str = ", ".join(str(idx) for idx in indices[:20])
|
259
263
|
if len(indices) > 20:
|
260
264
|
indices_str += f"... and {len(indices) - 20} more"
|
261
|
-
|
265
|
+
|
262
266
|
return f"Error: No element found with index {index}. Available indices: {indices_str}"
|
263
|
-
|
267
|
+
|
264
268
|
# Get the bounds of the element
|
265
|
-
bounds_str = element.get(
|
269
|
+
bounds_str = element.get("bounds")
|
266
270
|
if not bounds_str:
|
267
|
-
element_text = element.get(
|
268
|
-
element_type = element.get(
|
269
|
-
element_class = element.get(
|
271
|
+
element_text = element.get("text", "No text")
|
272
|
+
element_type = element.get("type", "unknown")
|
273
|
+
element_class = element.get("className", "Unknown class")
|
270
274
|
return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
|
271
|
-
|
275
|
+
|
272
276
|
# Parse the bounds (format: "left,top,right,bottom")
|
273
277
|
try:
|
274
|
-
left, top, right, bottom = map(int, bounds_str.split(
|
278
|
+
left, top, right, bottom = map(int, bounds_str.split(","))
|
275
279
|
except ValueError:
|
276
280
|
return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
|
277
|
-
|
281
|
+
|
278
282
|
# Calculate the center of the element
|
279
283
|
x = (left + right) // 2
|
280
284
|
y = (top + bottom) // 2
|
281
|
-
|
285
|
+
|
282
286
|
# Get the device and tap at the coordinates
|
283
287
|
if serial:
|
284
|
-
from droidrun.adb import DeviceManager
|
285
288
|
device_manager = DeviceManager()
|
286
289
|
device = await device_manager.get_device(serial)
|
287
290
|
if not device:
|
288
291
|
return f"Error: Device {serial} not found"
|
289
292
|
else:
|
290
293
|
device = await self.get_device()
|
291
|
-
|
294
|
+
|
292
295
|
await device.tap(x, y)
|
293
|
-
|
296
|
+
|
294
297
|
# Add a small delay to allow UI to update
|
295
298
|
await asyncio.sleep(0.5)
|
296
|
-
|
297
|
-
|
299
|
+
|
298
300
|
# Create a descriptive response
|
299
301
|
response_parts = []
|
300
302
|
response_parts.append(f"Tapped element with index {index}")
|
301
303
|
response_parts.append(f"Text: '{element.get('text', 'No text')}'")
|
302
304
|
response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
|
303
305
|
response_parts.append(f"Type: {element.get('type', 'unknown')}")
|
304
|
-
|
306
|
+
|
305
307
|
# Add information about children if present
|
306
|
-
children = element.get(
|
308
|
+
children = element.get("children", [])
|
307
309
|
if children:
|
308
|
-
child_texts = [
|
310
|
+
child_texts = [
|
311
|
+
child.get("text") for child in children if child.get("text")
|
312
|
+
]
|
309
313
|
if child_texts:
|
310
314
|
response_parts.append(f"Contains text: {' | '.join(child_texts)}")
|
311
|
-
|
315
|
+
|
312
316
|
response_parts.append(f"Coordinates: ({x}, {y})")
|
313
|
-
|
317
|
+
|
314
318
|
return " | ".join(response_parts)
|
315
319
|
except ValueError as e:
|
316
320
|
return f"Error: {str(e)}"
|
317
321
|
|
318
|
-
|
319
322
|
# Rename the old tap function to tap_by_coordinates for backward compatibility
|
320
323
|
async def tap_by_coordinates(self, x: int, y: int) -> bool:
|
321
324
|
"""
|
322
|
-
Tap on the device screen at specific coordinates.
|
323
|
-
|
325
|
+
Tap on the device screen at specific coordinates.
|
326
|
+
|
324
327
|
Args:
|
325
328
|
x: X coordinate
|
326
329
|
y: Y coordinate
|
327
|
-
|
330
|
+
|
328
331
|
Returns:
|
329
332
|
Bool indicating success or failure
|
330
333
|
"""
|
@@ -336,7 +339,7 @@ class Tools:
|
|
336
339
|
return f"Error: Device {self.serial} not found"
|
337
340
|
else:
|
338
341
|
device = await self.get_device()
|
339
|
-
|
342
|
+
|
340
343
|
await device.tap(x, y)
|
341
344
|
print(f"Tapped at coordinates ({x}, {y})")
|
342
345
|
return True
|
@@ -348,29 +351,24 @@ class Tools:
|
|
348
351
|
async def tap(self, index: int) -> str:
|
349
352
|
"""
|
350
353
|
Tap on a UI element by its index.
|
351
|
-
|
354
|
+
|
352
355
|
This function uses the cached clickable elements from the last get_clickables call
|
353
356
|
to find the element with the given index and tap on its center coordinates.
|
354
|
-
|
357
|
+
|
355
358
|
Args:
|
356
359
|
index: Index of the element to tap
|
357
|
-
|
360
|
+
|
358
361
|
Returns:
|
359
362
|
Result message
|
360
363
|
"""
|
361
364
|
return await self.tap_by_index(index)
|
362
365
|
|
363
366
|
async def swipe(
|
364
|
-
self,
|
365
|
-
start_x: int,
|
366
|
-
start_y: int,
|
367
|
-
end_x: int,
|
368
|
-
end_y: int,
|
369
|
-
duration_ms: int = 300
|
367
|
+
self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
|
370
368
|
) -> bool:
|
371
369
|
"""
|
372
370
|
Performs a straight-line swipe gesture on the device screen.
|
373
|
-
To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
|
371
|
+
To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
|
374
372
|
Args:
|
375
373
|
start_x: Starting X coordinate
|
376
374
|
start_y: Starting Y coordinate
|
@@ -388,8 +386,9 @@ class Tools:
|
|
388
386
|
return f"Error: Device {self.serial} not found"
|
389
387
|
else:
|
390
388
|
device = await self.get_device()
|
391
|
-
|
389
|
+
|
392
390
|
await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
|
391
|
+
await asyncio.sleep(1)
|
393
392
|
print(f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms")
|
394
393
|
return True
|
395
394
|
except ValueError as e:
|
@@ -398,12 +397,12 @@ class Tools:
|
|
398
397
|
|
399
398
|
async def input_text(self, text: str, serial: Optional[str] = None) -> str:
|
400
399
|
"""
|
401
|
-
Input text on the device
|
402
|
-
|
400
|
+
Input text on the device.
|
401
|
+
Always make sure that the Focused Element is not None before inputting text.
|
402
|
+
|
403
403
|
Args:
|
404
404
|
text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
|
405
|
-
|
406
|
-
|
405
|
+
|
407
406
|
Returns:
|
408
407
|
Result message
|
409
408
|
"""
|
@@ -415,53 +414,75 @@ class Tools:
|
|
415
414
|
return f"Error: Device {serial} not found"
|
416
415
|
else:
|
417
416
|
device = await self.get_device()
|
418
|
-
|
417
|
+
|
419
418
|
# Save the current keyboard
|
420
|
-
original_ime = await device._adb.shell(
|
419
|
+
original_ime = await device._adb.shell(
|
420
|
+
device._serial, "settings get secure default_input_method"
|
421
|
+
)
|
421
422
|
original_ime = original_ime.strip()
|
422
|
-
|
423
|
+
|
423
424
|
# Enable the Droidrun keyboard
|
424
|
-
await device._adb.shell(
|
425
|
-
|
425
|
+
await device._adb.shell(
|
426
|
+
device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
|
427
|
+
)
|
428
|
+
|
426
429
|
# Set the Droidrun keyboard as the default
|
427
|
-
await device._adb.shell(
|
428
|
-
|
430
|
+
await device._adb.shell(
|
431
|
+
device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
|
432
|
+
)
|
433
|
+
|
429
434
|
# Wait for keyboard to change
|
430
435
|
await asyncio.sleep(0.2)
|
431
|
-
|
436
|
+
|
432
437
|
# Encode the text to Base64
|
433
438
|
import base64
|
439
|
+
|
434
440
|
encoded_text = base64.b64encode(text.encode()).decode()
|
435
|
-
|
436
|
-
|
437
|
-
cmd = f'am broadcast -a DROIDRUN_INPUT_B64 --es msg "{encoded_text}"'
|
441
|
+
|
442
|
+
cmd = f'am broadcast -a com.droidrun.portal.DROIDRUN_INPUT_B64 --es msg "{encoded_text}" -p com.droidrun.portal'
|
438
443
|
await device._adb.shell(device._serial, cmd)
|
439
|
-
|
444
|
+
|
440
445
|
# Wait for text input to complete
|
441
446
|
await asyncio.sleep(0.5)
|
442
|
-
|
447
|
+
|
443
448
|
# Restore the original keyboard
|
444
449
|
if original_ime and "com.droidrun.portal" not in original_ime:
|
445
450
|
await device._adb.shell(device._serial, f"ime set {original_ime}")
|
446
|
-
|
451
|
+
|
447
452
|
return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
448
453
|
except ValueError as e:
|
449
454
|
return f"Error: {str(e)}"
|
450
455
|
except Exception as e:
|
451
456
|
return f"Error sending text input: {str(e)}"
|
452
457
|
|
458
|
+
async def back(self) -> str:
|
459
|
+
"""
|
460
|
+
Go back on the current view.
|
461
|
+
This presses the Android back button.
|
462
|
+
"""
|
463
|
+
try:
|
464
|
+
if self.serial:
|
465
|
+
device_manager = DeviceManager()
|
466
|
+
device = await device_manager.get_device(self.serial)
|
467
|
+
if not device:
|
468
|
+
return f"Error: Device {self.serial} not found"
|
469
|
+
else:
|
470
|
+
device = await self.get_device()
|
471
|
+
|
472
|
+
await device.press_key(3)
|
473
|
+
return f"Pressed key BACK"
|
474
|
+
except ValueError as e:
|
475
|
+
return f"Error: {str(e)}"
|
476
|
+
|
453
477
|
async def press_key(self, keycode: int) -> str:
|
454
478
|
"""
|
455
|
-
Press a key on the device.
|
456
|
-
|
479
|
+
Press a key on the Android device.
|
480
|
+
|
457
481
|
Common keycodes:
|
458
|
-
- 3: HOME
|
459
482
|
- 4: BACK
|
460
|
-
-
|
461
|
-
-
|
462
|
-
|
463
|
-
- 82: MENU
|
464
|
-
|
483
|
+
- 66: ENTER
|
484
|
+
- 67: DELETE
|
485
|
+
|
465
486
|
Args:
|
466
487
|
keycode: Android keycode to press
|
467
488
|
"""
|
@@ -473,30 +494,23 @@ class Tools:
|
|
473
494
|
return f"Error: Device {self.serial} not found"
|
474
495
|
else:
|
475
496
|
device = await self.get_device()
|
476
|
-
|
497
|
+
|
477
498
|
key_names = {
|
478
|
-
|
499
|
+
66: "ENTER",
|
479
500
|
4: "BACK",
|
480
|
-
|
481
|
-
25: "VOLUME DOWN",
|
482
|
-
26: "POWER",
|
483
|
-
82: "MENU",
|
501
|
+
67: "DELETE",
|
484
502
|
}
|
485
503
|
key_name = key_names.get(keycode, str(keycode))
|
486
|
-
|
504
|
+
|
487
505
|
await device.press_key(keycode)
|
488
506
|
return f"Pressed key {key_name}"
|
489
507
|
except ValueError as e:
|
490
508
|
return f"Error: {str(e)}"
|
491
509
|
|
492
|
-
async def start_app(
|
493
|
-
self,
|
494
|
-
package: str,
|
495
|
-
activity: str = ""
|
496
|
-
) -> str:
|
510
|
+
async def start_app(self, package: str, activity: str = "") -> str:
|
497
511
|
"""
|
498
512
|
Start an app on the device.
|
499
|
-
|
513
|
+
|
500
514
|
Args:
|
501
515
|
package: Package name (e.g., "com.android.settings")
|
502
516
|
activity: Optional activity name
|
@@ -509,21 +523,18 @@ class Tools:
|
|
509
523
|
return f"Error: Device {self.serial} not found"
|
510
524
|
else:
|
511
525
|
device = await self.get_device()
|
512
|
-
|
526
|
+
|
513
527
|
result = await device.start_app(package, activity)
|
514
528
|
return result
|
515
529
|
except ValueError as e:
|
516
530
|
return f"Error: {str(e)}"
|
517
531
|
|
518
532
|
async def install_app(
|
519
|
-
self,
|
520
|
-
apk_path: str,
|
521
|
-
reinstall: bool = False,
|
522
|
-
grant_permissions: bool = True
|
533
|
+
self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
|
523
534
|
) -> str:
|
524
535
|
"""
|
525
536
|
Install an app on the device.
|
526
|
-
|
537
|
+
|
527
538
|
Args:
|
528
539
|
apk_path: Path to the APK file
|
529
540
|
reinstall: Whether to reinstall if app exists
|
@@ -537,25 +548,20 @@ class Tools:
|
|
537
548
|
return f"Error: Device {self.serial} not found"
|
538
549
|
else:
|
539
550
|
device = await self.get_device()
|
540
|
-
|
551
|
+
|
541
552
|
if not os.path.exists(apk_path):
|
542
553
|
return f"Error: APK file not found at {apk_path}"
|
543
|
-
|
554
|
+
|
544
555
|
result = await device.install_app(apk_path, reinstall, grant_permissions)
|
545
556
|
return result
|
546
557
|
except ValueError as e:
|
547
558
|
return f"Error: {str(e)}"
|
548
559
|
|
549
|
-
async def take_screenshot(self) ->
|
560
|
+
async def take_screenshot(self) -> Tuple[str, bytes]:
|
550
561
|
"""
|
551
562
|
Take a screenshot of the device.
|
552
|
-
|
553
563
|
This function captures the current screen and adds the screenshot to context in the next message.
|
554
|
-
|
555
|
-
This does not save the screenshot anywhere on the phone, it just attaches it to the next message.
|
556
|
-
|
557
|
-
Returns:
|
558
|
-
True if successful, False otherwise
|
564
|
+
Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
|
559
565
|
"""
|
560
566
|
try:
|
561
567
|
if self.serial:
|
@@ -567,20 +573,26 @@ class Tools:
|
|
567
573
|
device = await self.get_device()
|
568
574
|
screen_tuple = await device.take_screenshot()
|
569
575
|
self.last_screenshot = screen_tuple[1]
|
570
|
-
|
576
|
+
|
577
|
+
# Store screenshot with timestamp
|
578
|
+
self.screenshots.append(
|
579
|
+
{
|
580
|
+
"timestamp": time.time(),
|
581
|
+
"image_data": screen_tuple[1],
|
582
|
+
"format": screen_tuple[0], # Usually 'PNG'
|
583
|
+
}
|
584
|
+
)
|
585
|
+
return screen_tuple
|
571
586
|
except ValueError as e:
|
572
587
|
raise ValueError(f"Error taking screenshot: {str(e)}")
|
573
588
|
|
574
|
-
async def list_packages(
|
575
|
-
self,
|
576
|
-
include_system_apps: bool = False
|
577
|
-
) -> List[str]:
|
589
|
+
async def list_packages(self, include_system_apps: bool = False) -> List[str]:
|
578
590
|
"""
|
579
591
|
List installed packages on the device.
|
580
|
-
|
592
|
+
|
581
593
|
Args:
|
582
594
|
include_system_apps: Whether to include system apps (default: False)
|
583
|
-
|
595
|
+
|
584
596
|
Returns:
|
585
597
|
List of package names
|
586
598
|
"""
|
@@ -592,32 +604,32 @@ class Tools:
|
|
592
604
|
raise ValueError(f"Device {self.serial} not found")
|
593
605
|
else:
|
594
606
|
device = await self.get_device()
|
595
|
-
|
607
|
+
|
596
608
|
# Use the direct ADB command to get packages with paths
|
597
609
|
cmd = ["pm", "list", "packages", "-f"]
|
598
610
|
if not include_system_apps:
|
599
611
|
cmd.append("-3")
|
600
|
-
|
612
|
+
|
601
613
|
output = await device._adb.shell(device._serial, " ".join(cmd))
|
602
|
-
|
614
|
+
|
603
615
|
# Parse the package list using the function
|
604
|
-
packages = self.parse_package_list(output)
|
616
|
+
packages = self.parse_package_list(output)
|
605
617
|
# Format package list for better readability
|
606
618
|
package_list = [pack["package"] for pack in packages]
|
607
|
-
print(f"Returning {len(package_list)} packages")
|
619
|
+
print(f"Returning {len(package_list)} packages")
|
608
620
|
return package_list
|
609
621
|
except ValueError as e:
|
610
622
|
raise ValueError(f"Error listing packages: {str(e)}")
|
611
623
|
|
612
624
|
async def extract(self, filename: Optional[str] = None) -> str:
|
613
625
|
"""Extract and save the current UI state to a JSON file.
|
614
|
-
|
626
|
+
|
615
627
|
This function captures the current UI state including all UI elements
|
616
628
|
and saves it to a JSON file for later analysis or reference.
|
617
|
-
|
629
|
+
|
618
630
|
Args:
|
619
631
|
filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
|
620
|
-
|
632
|
+
|
621
633
|
Returns:
|
622
634
|
Path to the saved JSON file
|
623
635
|
"""
|
@@ -626,32 +638,32 @@ class Tools:
|
|
626
638
|
if not filename:
|
627
639
|
timestamp = int(time.time())
|
628
640
|
filename = f"ui_state_{timestamp}.json"
|
629
|
-
|
641
|
+
|
630
642
|
# Ensure the filename ends with .json
|
631
643
|
if not filename.endswith(".json"):
|
632
644
|
filename += ".json"
|
633
|
-
|
645
|
+
|
634
646
|
# Get the UI elements
|
635
647
|
ui_elements = await self.get_all_elements(self.serial)
|
636
|
-
|
648
|
+
|
637
649
|
# Save to file
|
638
650
|
save_path = os.path.abspath(filename)
|
639
651
|
async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
|
640
652
|
await f.write(json.dumps(ui_elements, indent=2))
|
641
|
-
|
653
|
+
|
642
654
|
return f"UI state extracted and saved to {save_path}"
|
643
|
-
|
655
|
+
|
644
656
|
except Exception as e:
|
645
657
|
return f"Error extracting UI state: {e}"
|
646
658
|
|
647
659
|
async def get_all_elements(self) -> Dict[str, Any]:
|
648
660
|
"""
|
649
661
|
Get all UI elements from the device, including non-interactive elements.
|
650
|
-
|
662
|
+
|
651
663
|
This function interacts with the TopViewService app installed on the device
|
652
664
|
to capture all UI elements, even those that are not interactive. This provides
|
653
665
|
a complete view of the UI hierarchy for analysis or debugging purposes.
|
654
|
-
|
666
|
+
|
655
667
|
Returns:
|
656
668
|
Dictionary containing all UI elements extracted from the device screen
|
657
669
|
"""
|
@@ -661,85 +673,98 @@ class Tools:
|
|
661
673
|
device = await device_manager.get_device(self.serial)
|
662
674
|
if not device:
|
663
675
|
raise ValueError(f"Device {self.serial} not found")
|
664
|
-
|
676
|
+
|
665
677
|
# Create a temporary file for the JSON
|
666
678
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
|
667
679
|
local_path = temp.name
|
668
|
-
|
680
|
+
|
669
681
|
try:
|
670
682
|
# Clear logcat to make it easier to find our output
|
671
683
|
await device._adb.shell(device._serial, "logcat -c")
|
672
|
-
|
684
|
+
|
673
685
|
# Trigger the custom service via broadcast to get ALL elements
|
674
|
-
await device._adb.shell(
|
675
|
-
|
686
|
+
await device._adb.shell(
|
687
|
+
device._serial,
|
688
|
+
"am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS",
|
689
|
+
)
|
690
|
+
|
676
691
|
# Poll for the JSON file path
|
677
692
|
start_time = asyncio.get_event_loop().time()
|
678
693
|
max_wait_time = 10 # Maximum wait time in seconds
|
679
694
|
poll_interval = 0.2 # Check every 200ms
|
680
|
-
|
695
|
+
|
681
696
|
device_path = None
|
682
697
|
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
683
698
|
# Check logcat for the file path
|
684
|
-
logcat_output = await device._adb.shell(
|
685
|
-
|
699
|
+
logcat_output = await device._adb.shell(
|
700
|
+
device._serial,
|
701
|
+
'logcat -d | grep "DROIDRUN_FILE" | grep "JSON data written to" | tail -1',
|
702
|
+
)
|
703
|
+
|
686
704
|
# Parse the file path if present
|
687
705
|
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
688
706
|
if match:
|
689
707
|
device_path = match.group(1).strip()
|
690
708
|
break
|
691
|
-
|
709
|
+
|
692
710
|
# Wait before polling again
|
693
711
|
await asyncio.sleep(poll_interval)
|
694
|
-
|
712
|
+
|
695
713
|
# Check if we found the file path
|
696
714
|
if not device_path:
|
697
|
-
raise ValueError(
|
698
|
-
|
715
|
+
raise ValueError(
|
716
|
+
f"Failed to find the JSON file path in logcat after {max_wait_time} seconds"
|
717
|
+
)
|
718
|
+
|
699
719
|
# Pull the JSON file from the device
|
700
720
|
await device._adb.pull_file(device._serial, device_path, local_path)
|
701
|
-
|
721
|
+
|
702
722
|
# Read the JSON file
|
703
723
|
async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
|
704
724
|
json_content = await f.read()
|
705
|
-
|
725
|
+
|
706
726
|
# Clean up the temporary file
|
707
727
|
with contextlib.suppress(OSError):
|
708
728
|
os.unlink(local_path)
|
709
|
-
|
729
|
+
|
710
730
|
# Try to parse the JSON
|
711
731
|
import json
|
732
|
+
|
712
733
|
try:
|
713
734
|
ui_data = json.loads(json_content)
|
714
|
-
|
735
|
+
|
715
736
|
return {
|
716
737
|
"all_elements": ui_data,
|
717
|
-
"count":
|
718
|
-
|
738
|
+
"count": (
|
739
|
+
len(ui_data)
|
740
|
+
if isinstance(ui_data, list)
|
741
|
+
else sum(1 for _ in ui_data.get("elements", []))
|
742
|
+
),
|
743
|
+
"message": "Retrieved all UI elements from the device screen",
|
719
744
|
}
|
720
745
|
except json.JSONDecodeError:
|
721
746
|
raise ValueError("Failed to parse UI elements JSON data")
|
722
|
-
|
747
|
+
|
723
748
|
except Exception as e:
|
724
749
|
# Clean up in case of error
|
725
750
|
with contextlib.suppress(OSError):
|
726
751
|
os.unlink(local_path)
|
727
752
|
raise ValueError(f"Error retrieving all UI elements: {e}")
|
728
|
-
|
753
|
+
|
729
754
|
except Exception as e:
|
730
755
|
raise ValueError(f"Error getting all UI elements: {e}")
|
731
|
-
|
756
|
+
|
732
757
|
def complete(self, success: bool, reason: str = ""):
|
733
758
|
"""
|
734
759
|
Mark the task as finished.
|
735
760
|
|
736
761
|
Args:
|
737
762
|
success: Indicates if the task was successful.
|
738
|
-
reason: Reason for failure
|
763
|
+
reason: Reason for failure/success
|
739
764
|
"""
|
740
765
|
if success:
|
741
766
|
self.success = True
|
742
|
-
self.reason =
|
767
|
+
self.reason = reason or "Task completed successfully."
|
743
768
|
self.finished = True
|
744
769
|
else:
|
745
770
|
self.success = False
|
@@ -748,14 +773,13 @@ class Tools:
|
|
748
773
|
self.reason = reason
|
749
774
|
self.finished = True
|
750
775
|
|
751
|
-
|
752
776
|
async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
|
753
777
|
"""
|
754
778
|
Get the current phone state including current activity and keyboard visibility.
|
755
|
-
|
779
|
+
|
756
780
|
Args:
|
757
781
|
serial: Optional device serial number
|
758
|
-
|
782
|
+
|
759
783
|
Returns:
|
760
784
|
Dictionary with current phone state information
|
761
785
|
"""
|
@@ -768,71 +792,88 @@ class Tools:
|
|
768
792
|
raise ValueError(f"Device {serial} not found")
|
769
793
|
else:
|
770
794
|
device = await self.get_device()
|
771
|
-
|
772
|
-
#
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
#
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
795
|
+
|
796
|
+
# Clear logcat to make it easier to find our output
|
797
|
+
await device._adb.shell(device._serial, "logcat -c")
|
798
|
+
|
799
|
+
# Trigger the custom service via broadcast to get phone state
|
800
|
+
await device._adb.shell(
|
801
|
+
device._serial, "am broadcast -a com.droidrun.portal.GET_PHONE_STATE"
|
802
|
+
)
|
803
|
+
|
804
|
+
# Poll for the phone state data in logcat
|
805
|
+
start_time = asyncio.get_event_loop().time()
|
806
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
807
|
+
poll_interval = 0.2 # Check every 200ms
|
808
|
+
|
809
|
+
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
810
|
+
# Check logcat for the phone state data
|
811
|
+
logcat_output = await device._adb.shell(
|
812
|
+
device._serial,
|
813
|
+
'logcat -d | grep "DROIDRUN_PHONE_STATE_DATA" | tail -1',
|
814
|
+
)
|
815
|
+
|
816
|
+
# Parse the JSON data if present
|
817
|
+
if "CHUNK|" in logcat_output:
|
818
|
+
# Format: DROIDRUN_PHONE_STATE_DATA: CHUNK|0|1|{json_data}
|
819
|
+
# Extract the JSON part after the last |
|
820
|
+
parts = logcat_output.split("|")
|
821
|
+
if len(parts) >= 4:
|
822
|
+
json_data = "|".join(
|
823
|
+
parts[3:]
|
824
|
+
) # In case JSON contains | characters
|
825
|
+
try:
|
826
|
+
phone_state = json.loads(json_data)
|
827
|
+
return phone_state
|
828
|
+
except json.JSONDecodeError:
|
829
|
+
# If JSON parsing failed, wait and retry
|
830
|
+
await asyncio.sleep(poll_interval)
|
831
|
+
continue
|
832
|
+
|
833
|
+
# Wait before polling again
|
834
|
+
await asyncio.sleep(poll_interval)
|
835
|
+
|
836
|
+
# If we couldn't get the phone state, return error
|
793
837
|
return {
|
794
|
-
"
|
795
|
-
"
|
838
|
+
"error": "Timeout",
|
839
|
+
"message": f"Failed to get phone state data after {max_wait_time} seconds",
|
796
840
|
}
|
797
|
-
|
841
|
+
|
798
842
|
except Exception as e:
|
799
|
-
return {
|
800
|
-
"error": str(e),
|
801
|
-
"message": f"Error getting phone state: {str(e)}"
|
802
|
-
}
|
843
|
+
return {"error": str(e), "message": f"Error getting phone state: {str(e)}"}
|
803
844
|
|
804
845
|
async def remember(self, information: str) -> str:
|
805
846
|
"""
|
806
847
|
Store important information to remember for future context.
|
807
|
-
|
808
|
-
This information will be included
|
848
|
+
|
849
|
+
This information will be extracted and included into your next steps to maintain context
|
809
850
|
across interactions. Use this for critical facts, observations, or user preferences
|
810
851
|
that should influence future decisions.
|
811
|
-
|
852
|
+
|
812
853
|
Args:
|
813
854
|
information: The information to remember
|
814
|
-
|
855
|
+
|
815
856
|
Returns:
|
816
857
|
Confirmation message
|
817
858
|
"""
|
818
859
|
if not information or not isinstance(information, str):
|
819
860
|
return "Error: Please provide valid information to remember."
|
820
|
-
|
861
|
+
|
821
862
|
# Add the information to memory
|
822
863
|
self.memory.append(information.strip())
|
823
|
-
|
864
|
+
|
824
865
|
# Limit memory size to prevent context overflow (keep most recent items)
|
825
866
|
max_memory_items = 10
|
826
867
|
if len(self.memory) > max_memory_items:
|
827
868
|
self.memory = self.memory[-max_memory_items:]
|
828
|
-
|
869
|
+
|
829
870
|
return f"Remembered: {information}"
|
830
|
-
|
871
|
+
|
831
872
|
def get_memory(self) -> List[str]:
|
832
873
|
"""
|
833
874
|
Retrieve all stored memory items.
|
834
|
-
|
875
|
+
|
835
876
|
Returns:
|
836
877
|
List of stored memory items
|
837
878
|
"""
|
838
|
-
return self.memory.copy()
|
879
|
+
return self.memory.copy()
|