droidrun 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +15 -8
- droidrun/__main__.py +2 -3
- droidrun/adb/device.py +1 -1
- droidrun/agent/codeact/__init__.py +13 -0
- droidrun/agent/codeact/codeact_agent.py +334 -0
- droidrun/agent/codeact/events.py +36 -0
- droidrun/agent/codeact/prompts.py +78 -0
- droidrun/agent/droid/__init__.py +13 -0
- droidrun/agent/droid/droid_agent.py +418 -0
- droidrun/agent/planner/__init__.py +15 -0
- droidrun/agent/planner/events.py +20 -0
- droidrun/agent/planner/prompts.py +144 -0
- droidrun/agent/planner/task_manager.py +355 -0
- droidrun/agent/planner/workflow.py +371 -0
- droidrun/agent/utils/async_utils.py +56 -0
- droidrun/agent/utils/chat_utils.py +92 -0
- droidrun/agent/utils/executer.py +97 -0
- droidrun/agent/utils/llm_picker.py +143 -0
- droidrun/cli/main.py +422 -107
- droidrun/tools/__init__.py +4 -25
- droidrun/tools/actions.py +767 -783
- droidrun/tools/device.py +1 -1
- droidrun/tools/loader.py +60 -0
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/METADATA +134 -37
- droidrun-0.2.0.dist-info/RECORD +32 -0
- droidrun/agent/__init__.py +0 -16
- droidrun/agent/llm_reasoning.py +0 -567
- droidrun/agent/react_agent.py +0 -556
- droidrun/llm/__init__.py +0 -24
- droidrun-0.1.0.dist-info/RECORD +0 -20
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/WHEEL +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.2.0.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/actions.py
CHANGED
@@ -11,844 +11,828 @@ import asyncio
|
|
11
11
|
import aiofiles
|
12
12
|
import contextlib
|
13
13
|
from typing import Optional, Dict, Tuple, List, Any
|
14
|
-
from
|
14
|
+
from ..adb import Device, DeviceManager
|
15
15
|
|
16
|
-
# Global variable to store clickable elements for index-based tapping
|
17
|
-
CLICKABLE_ELEMENTS_CACHE = []
|
18
16
|
|
19
|
-
|
20
|
-
|
21
|
-
"""Get the device serial from environment variable.
|
22
|
-
|
23
|
-
Returns:
|
24
|
-
Device serial from environment or None
|
25
|
-
"""
|
26
|
-
return os.environ.get("DROIDRUN_DEVICE_SERIAL", "")
|
17
|
+
class Tools:
|
18
|
+
"""Core UI interaction tools for Android device control."""
|
27
19
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
device = await device_manager.get_device(serial)
|
40
|
-
if not device:
|
41
|
-
raise ValueError(f"Device {serial} not found")
|
42
|
-
|
43
|
-
return device
|
20
|
+
def __init__(self, serial: str) -> None:
|
21
|
+
# Instance‐level cache for clickable elements (index-based tapping)
|
22
|
+
self.clickable_elements_cache: List[Dict[str, Any]] = []
|
23
|
+
self.serial = serial
|
24
|
+
self.device_manager = DeviceManager()
|
25
|
+
self.last_screenshot = None
|
26
|
+
self.reason = None
|
27
|
+
self.success = None
|
28
|
+
self.finished = False
|
29
|
+
# Memory storage for remembering important information
|
30
|
+
self.memory: List[str] = []
|
44
31
|
|
45
|
-
def
|
46
|
-
|
32
|
+
def get_device_serial(self) -> str:
|
33
|
+
"""Get the device serial from the instance or environment variable."""
|
34
|
+
# First try using the instance's serial
|
35
|
+
if self.serial:
|
36
|
+
return self.serial
|
37
|
+
|
38
|
+
# Fall back to environment variable if not set on the instance
|
39
|
+
return os.environ.get("DROIDRUN_DEVICE_SERIAL", "")
|
47
40
|
|
48
|
-
|
49
|
-
|
41
|
+
async def get_device(self) -> Optional[Device]:
|
42
|
+
"""Get the device instance using the instance's serial or from environment variable.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
Device instance or None if not found
|
46
|
+
"""
|
47
|
+
serial = self.get_device_serial()
|
48
|
+
if not serial:
|
49
|
+
raise ValueError("No device serial specified - set DROIDRUN_DEVICE_SERIAL environment variable or provide device_serial parameter")
|
50
|
+
|
51
|
+
device = await self.device_manager.get_device(serial)
|
52
|
+
if not device:
|
53
|
+
raise ValueError(f"Device {serial} not found")
|
54
|
+
|
55
|
+
return device
|
50
56
|
|
51
|
-
|
52
|
-
|
53
|
-
"""
|
54
|
-
apps = []
|
55
|
-
for line in output.splitlines():
|
56
|
-
if line.startswith("package:"):
|
57
|
-
# Format is: "package:/path/to/base.apk=com.package.name"
|
58
|
-
path_and_pkg = line[8:] # Strip "package:"
|
59
|
-
if "=" in path_and_pkg:
|
60
|
-
path, package = path_and_pkg.rsplit("=", 1)
|
61
|
-
apps.append({"package": package.strip(), "path": path.strip()})
|
62
|
-
return apps
|
57
|
+
def parse_package_list(self, output: str) -> List[Dict[str, str]]:
|
58
|
+
"""Parse the output of 'pm list packages -f' command.
|
63
59
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
device = await get_device()
|
60
|
+
Args:
|
61
|
+
output: Raw command output from 'pm list packages -f'
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
List of dictionaries containing package info with 'package' and 'path' keys
|
65
|
+
"""
|
66
|
+
apps = []
|
67
|
+
for line in output.splitlines():
|
68
|
+
if line.startswith("package:"):
|
69
|
+
# Format is: "package:/path/to/base.apk=com.package.name"
|
70
|
+
path_and_pkg = line[8:] # Strip "package:"
|
71
|
+
if "=" in path_and_pkg:
|
72
|
+
path, package = path_and_pkg.rsplit("=", 1)
|
73
|
+
apps.append({"package": package.strip(), "path": path.strip()})
|
74
|
+
return apps
|
75
|
+
|
76
|
+
async def get_clickables(self, serial: Optional[str] = None) -> str:
|
77
|
+
"""
|
78
|
+
Get all clickable UI elements from the device using the custom TopViewService.
|
79
|
+
|
80
|
+
This function interacts with the TopViewService app installed on the device
|
81
|
+
to capture UI elements. The service writes UI data to a JSON file on the device,
|
82
|
+
which is then pulled to the host. If no elements are found initially, it will
|
83
|
+
retry for up to 30 seconds.
|
89
84
|
|
90
|
-
|
91
|
-
|
92
|
-
local_path = temp.name
|
85
|
+
Args:
|
86
|
+
serial: Optional device serial number
|
93
87
|
|
88
|
+
Returns:
|
89
|
+
JSON string containing UI elements extracted from the device screen
|
90
|
+
"""
|
94
91
|
try:
|
95
|
-
#
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
poll_interval = 0.2 # Check every 200ms
|
105
|
-
|
106
|
-
device_path = None
|
107
|
-
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
108
|
-
# Check logcat for the file path
|
109
|
-
logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
|
110
|
-
|
111
|
-
# Parse the file path if present
|
112
|
-
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
113
|
-
if match:
|
114
|
-
device_path = match.group(1).strip()
|
115
|
-
break
|
116
|
-
|
117
|
-
# Wait before polling again
|
118
|
-
await asyncio.sleep(poll_interval)
|
119
|
-
|
120
|
-
# Check if we found the file path
|
121
|
-
if not device_path:
|
122
|
-
raise ValueError(f"Failed to find the JSON file path in logcat after {max_wait_time} seconds")
|
123
|
-
|
124
|
-
# Pull the JSON file from the device
|
125
|
-
await device._adb.pull_file(device._serial, device_path, local_path)
|
92
|
+
# Get the device
|
93
|
+
if serial:
|
94
|
+
from droidrun.adb import DeviceManager
|
95
|
+
device_manager = DeviceManager()
|
96
|
+
device = await device_manager.get_device(serial)
|
97
|
+
if not device:
|
98
|
+
raise ValueError(f"Device {serial} not found")
|
99
|
+
else:
|
100
|
+
device = await self.get_device()
|
126
101
|
|
127
|
-
#
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
# Clean up the temporary file
|
132
|
-
with contextlib.suppress(OSError):
|
133
|
-
os.unlink(local_path)
|
102
|
+
# Create a temporary file for the JSON
|
103
|
+
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
|
104
|
+
local_path = temp.name
|
134
105
|
|
135
|
-
# Try to parse the JSON
|
136
|
-
import json
|
137
106
|
try:
|
138
|
-
|
139
|
-
|
140
|
-
#
|
141
|
-
|
142
|
-
|
143
|
-
# Process the nested elements structure
|
144
|
-
if isinstance(ui_data, list):
|
145
|
-
# For each parent element in the list
|
146
|
-
for parent in ui_data:
|
147
|
-
# Add the parent if it's clickable (type should be 'clickable')
|
148
|
-
if parent.get('type') == 'clickable' and parent.get('index', -1) != -1:
|
149
|
-
parent_copy = {k: v for k, v in parent.items() if k != 'children'}
|
150
|
-
parent_copy['isParent'] = True
|
151
|
-
flattened_elements.append(parent_copy)
|
152
|
-
|
153
|
-
# Process children
|
154
|
-
children = parent.get('children', [])
|
155
|
-
for child in children:
|
156
|
-
# Add all children that have valid indices, regardless of type
|
157
|
-
# Include text elements as well, not just clickable ones
|
158
|
-
if child.get('index', -1) != -1:
|
159
|
-
child_copy = child.copy()
|
160
|
-
child_copy['isParent'] = False
|
161
|
-
child_copy['parentIndex'] = parent.get('index')
|
162
|
-
flattened_elements.append(child_copy)
|
163
|
-
|
164
|
-
# Also process nested children if present
|
165
|
-
nested_children = child.get('children', [])
|
166
|
-
for nested_child in nested_children:
|
167
|
-
if nested_child.get('index', -1) != -1:
|
168
|
-
nested_copy = nested_child.copy()
|
169
|
-
nested_copy['isParent'] = False
|
170
|
-
nested_copy['parentIndex'] = child.get('index')
|
171
|
-
nested_copy['grandparentIndex'] = parent.get('index')
|
172
|
-
flattened_elements.append(nested_copy)
|
173
|
-
else:
|
174
|
-
# Old format handling (dictionary with clickable_elements)
|
175
|
-
clickable_elements = ui_data.get("clickable_elements", [])
|
176
|
-
for element in clickable_elements:
|
177
|
-
if element.get('index', -1) != -1:
|
178
|
-
element_copy = {k: v for k, v in element.items() if k != 'isClickable'}
|
179
|
-
flattened_elements.append(element_copy)
|
180
|
-
|
181
|
-
# Update the global cache with the processed elements
|
182
|
-
CLICKABLE_ELEMENTS_CACHE = flattened_elements
|
183
|
-
|
184
|
-
# Sort by index
|
185
|
-
flattened_elements.sort(key=lambda x: x.get('index', 0))
|
186
|
-
|
187
|
-
# Create a summary of important text elements for each clickable parent
|
188
|
-
text_summary = []
|
189
|
-
parent_texts = {}
|
190
|
-
tappable_elements = []
|
107
|
+
# Set retry parameters
|
108
|
+
max_total_time = 30 # Maximum total time to try in seconds
|
109
|
+
retry_interval = 1.0 # Time between retries in seconds
|
110
|
+
start_total_time = asyncio.get_event_loop().time()
|
191
111
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
if
|
196
|
-
|
112
|
+
while True:
|
113
|
+
# Check if we've exceeded total time
|
114
|
+
current_time = asyncio.get_event_loop().time()
|
115
|
+
if current_time - start_total_time > max_total_time:
|
116
|
+
raise ValueError(f"Failed to get UI elements after {max_total_time} seconds of retries")
|
197
117
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
#
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
118
|
+
# Clear logcat to make it easier to find our output
|
119
|
+
await device._adb.shell(device._serial, "logcat -c")
|
120
|
+
|
121
|
+
# Trigger the custom service via broadcast to get only interactive elements
|
122
|
+
await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ELEMENTS")
|
123
|
+
|
124
|
+
# Poll for the JSON file path
|
125
|
+
start_time = asyncio.get_event_loop().time()
|
126
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
127
|
+
poll_interval = 0.2 # Check every 200ms
|
128
|
+
|
129
|
+
device_path = None
|
130
|
+
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
131
|
+
# Check logcat for the file path
|
132
|
+
logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
|
133
|
+
|
134
|
+
# Parse the file path if present
|
135
|
+
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
136
|
+
if match:
|
137
|
+
device_path = match.group(1).strip()
|
212
138
|
break
|
139
|
+
|
140
|
+
# Wait before polling again
|
141
|
+
await asyncio.sleep(poll_interval)
|
213
142
|
|
214
|
-
if
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
143
|
+
# Check if we found the file path
|
144
|
+
if not device_path:
|
145
|
+
await asyncio.sleep(retry_interval)
|
146
|
+
continue
|
147
|
+
|
148
|
+
# Pull the JSON file from the device
|
149
|
+
await device._adb.pull_file(device._serial, device_path, local_path)
|
150
|
+
|
151
|
+
# Read the JSON file
|
152
|
+
async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
|
153
|
+
json_content = await f.read()
|
154
|
+
|
155
|
+
# Try to parse the JSON
|
156
|
+
try:
|
157
|
+
ui_data = json.loads(json_content)
|
158
|
+
|
159
|
+
# Filter out the "type" attribute from all elements
|
160
|
+
filtered_data = []
|
161
|
+
for element in ui_data:
|
162
|
+
# Create a copy of the element without the "type" attribute
|
163
|
+
filtered_element = {k: v for k, v in element.items() if k != "type"}
|
164
|
+
|
165
|
+
# Also filter children if present
|
166
|
+
if "children" in filtered_element:
|
167
|
+
filtered_element["children"] = [
|
168
|
+
{k: v for k, v in child.items() if k != "type"}
|
169
|
+
for child in filtered_element["children"]
|
170
|
+
]
|
171
|
+
|
172
|
+
filtered_data.append(filtered_element)
|
173
|
+
|
174
|
+
# If we got elements, store them and return
|
175
|
+
if filtered_data:
|
176
|
+
# Store the filtered UI data in cache
|
177
|
+
global CLICKABLE_ELEMENTS_CACHE
|
178
|
+
CLICKABLE_ELEMENTS_CACHE = filtered_data
|
179
|
+
|
180
|
+
# Add a small sleep to ensure UI is fully loaded/processed
|
181
|
+
await asyncio.sleep(0.5) # 500ms sleep
|
182
|
+
|
183
|
+
# Convert the dictionary to a JSON string before returning
|
184
|
+
result = {
|
185
|
+
"clickable_elements": filtered_data,
|
186
|
+
"count": len(filtered_data),
|
187
|
+
"message": f"Found {len(filtered_data)} UI elements after retrying"
|
188
|
+
}
|
189
|
+
|
190
|
+
return result
|
191
|
+
|
192
|
+
# If no elements found, wait and retry
|
193
|
+
await asyncio.sleep(retry_interval)
|
194
|
+
|
195
|
+
except json.JSONDecodeError:
|
196
|
+
# If JSON parsing failed, wait and retry
|
197
|
+
await asyncio.sleep(retry_interval)
|
198
|
+
continue
|
225
199
|
|
226
|
-
|
227
|
-
|
200
|
+
except Exception as e:
|
201
|
+
# Clean up in case of error
|
202
|
+
with contextlib.suppress(OSError):
|
203
|
+
os.unlink(local_path)
|
204
|
+
raise ValueError(f"Error retrieving clickable elements: {e}")
|
228
205
|
|
229
|
-
return {
|
230
|
-
"clickable_elements": flattened_elements,
|
231
|
-
"count": len(flattened_elements),
|
232
|
-
"tappable_count": tappable_count,
|
233
|
-
"tappable_indices": sorted(tappable_elements),
|
234
|
-
"text_summary": text_summary,
|
235
|
-
"message": f"Found {tappable_count} tappable elements out of {len(flattened_elements)} total elements"
|
236
|
-
}
|
237
|
-
except json.JSONDecodeError:
|
238
|
-
raise ValueError("Failed to parse UI elements JSON data")
|
239
|
-
|
240
206
|
except Exception as e:
|
241
|
-
|
242
|
-
with contextlib.suppress(OSError):
|
243
|
-
os.unlink(local_path)
|
244
|
-
raise ValueError(f"Error retrieving clickable elements: {e}")
|
245
|
-
|
246
|
-
except Exception as e:
|
247
|
-
raise ValueError(f"Error getting clickable elements: {e}")
|
207
|
+
raise ValueError(f"Error getting clickable elements: {e}")
|
248
208
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
# Get the bounds of the element
|
287
|
-
bounds_str = element.get('bounds')
|
288
|
-
if not bounds_str:
|
289
|
-
element_text = element.get('text', 'No text')
|
290
|
-
element_type = element.get('type', 'unknown')
|
291
|
-
element_class = element.get('className', 'Unknown class')
|
292
|
-
|
293
|
-
# Check if this is a child element with a parent that can be tapped instead
|
294
|
-
parent_suggestion = ""
|
295
|
-
if 'parentIndex' in element:
|
296
|
-
parent_idx = element.get('parentIndex')
|
297
|
-
parent_suggestion = f" You might want to tap its parent element with index {parent_idx} instead."
|
298
|
-
|
299
|
-
return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped directly.{parent_suggestion}"
|
209
|
+
|
210
|
+
async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
|
211
|
+
"""
|
212
|
+
Tap on a UI element by its index.
|
213
|
+
|
214
|
+
This function uses the cached clickable elements
|
215
|
+
to find the element with the given index and tap on its center coordinates.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
index: Index of the element to tap
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
Result message
|
222
|
+
"""
|
223
|
+
|
224
|
+
def collect_all_indices(elements):
|
225
|
+
"""Recursively collect all indices from elements and their children."""
|
226
|
+
indices = []
|
227
|
+
for item in elements:
|
228
|
+
if item.get('index') is not None:
|
229
|
+
indices.append(item.get('index'))
|
230
|
+
# Check children if present
|
231
|
+
children = item.get('children', [])
|
232
|
+
indices.extend(collect_all_indices(children))
|
233
|
+
return indices
|
234
|
+
|
235
|
+
def find_element_by_index(elements, target_index):
|
236
|
+
"""Recursively find an element with the given index."""
|
237
|
+
for item in elements:
|
238
|
+
if item.get('index') == target_index:
|
239
|
+
return item
|
240
|
+
# Check children if present
|
241
|
+
children = item.get('children', [])
|
242
|
+
result = find_element_by_index(children, target_index)
|
243
|
+
if result:
|
244
|
+
return result
|
245
|
+
return None
|
300
246
|
|
301
|
-
# Parse the bounds (format: "left,top,right,bottom")
|
302
247
|
try:
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
248
|
+
# Check if we have cached elements
|
249
|
+
if not CLICKABLE_ELEMENTS_CACHE:
|
250
|
+
return "Error: No UI elements cached. Call get_clickables first."
|
251
|
+
|
252
|
+
# Find the element with the given index (including in children)
|
253
|
+
element = find_element_by_index(CLICKABLE_ELEMENTS_CACHE, index)
|
254
|
+
|
255
|
+
if not element:
|
256
|
+
# List available indices to help the user
|
257
|
+
indices = sorted(collect_all_indices(CLICKABLE_ELEMENTS_CACHE))
|
258
|
+
indices_str = ", ".join(str(idx) for idx in indices[:20])
|
259
|
+
if len(indices) > 20:
|
260
|
+
indices_str += f"... and {len(indices) - 20} more"
|
261
|
+
|
262
|
+
return f"Error: No element found with index {index}. Available indices: {indices_str}"
|
263
|
+
|
264
|
+
# Get the bounds of the element
|
265
|
+
bounds_str = element.get('bounds')
|
266
|
+
if not bounds_str:
|
267
|
+
element_text = element.get('text', 'No text')
|
268
|
+
element_type = element.get('type', 'unknown')
|
269
|
+
element_class = element.get('className', 'Unknown class')
|
270
|
+
return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
|
271
|
+
|
272
|
+
# Parse the bounds (format: "left,top,right,bottom")
|
273
|
+
try:
|
274
|
+
left, top, right, bottom = map(int, bounds_str.split(','))
|
275
|
+
except ValueError:
|
276
|
+
return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
|
277
|
+
|
278
|
+
# Calculate the center of the element
|
279
|
+
x = (left + right) // 2
|
280
|
+
y = (top + bottom) // 2
|
281
|
+
|
282
|
+
# Get the device and tap at the coordinates
|
283
|
+
if serial:
|
284
|
+
from droidrun.adb import DeviceManager
|
285
|
+
device_manager = DeviceManager()
|
286
|
+
device = await device_manager.get_device(serial)
|
287
|
+
if not device:
|
288
|
+
return f"Error: Device {serial} not found"
|
289
|
+
else:
|
290
|
+
device = await self.get_device()
|
291
|
+
|
292
|
+
await device.tap(x, y)
|
293
|
+
|
294
|
+
# Add a small delay to allow UI to update
|
295
|
+
await asyncio.sleep(0.5)
|
296
|
+
|
297
|
+
|
298
|
+
# Create a descriptive response
|
299
|
+
response_parts = []
|
300
|
+
response_parts.append(f"Tapped element with index {index}")
|
301
|
+
response_parts.append(f"Text: '{element.get('text', 'No text')}'")
|
302
|
+
response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
|
303
|
+
response_parts.append(f"Type: {element.get('type', 'unknown')}")
|
304
|
+
|
305
|
+
# Add information about children if present
|
306
|
+
children = element.get('children', [])
|
307
|
+
if children:
|
308
|
+
child_texts = [child.get('text') for child in children if child.get('text')]
|
309
|
+
if child_texts:
|
310
|
+
response_parts.append(f"Contains text: {' | '.join(child_texts)}")
|
311
|
+
|
312
|
+
response_parts.append(f"Coordinates: ({x}, {y})")
|
313
|
+
|
314
|
+
return " | ".join(response_parts)
|
315
|
+
except ValueError as e:
|
316
|
+
return f"Error: {str(e)}"
|
317
|
+
|
318
|
+
|
319
|
+
# Rename the old tap function to tap_by_coordinates for backward compatibility
|
320
|
+
async def tap_by_coordinates(self, x: int, y: int) -> bool:
|
321
|
+
"""
|
322
|
+
Tap on the device screen at specific coordinates.
|
323
|
+
|
324
|
+
Args:
|
325
|
+
x: X coordinate
|
326
|
+
y: Y coordinate
|
327
|
+
|
328
|
+
Returns:
|
329
|
+
Bool indicating success or failure
|
330
|
+
"""
|
331
|
+
try:
|
332
|
+
if self.serial:
|
333
|
+
device_manager = DeviceManager()
|
334
|
+
device = await device_manager.get_device(self.serial)
|
335
|
+
if not device:
|
336
|
+
return f"Error: Device {self.serial} not found"
|
337
|
+
else:
|
338
|
+
device = await self.get_device()
|
339
|
+
|
340
|
+
await device.tap(x, y)
|
341
|
+
print(f"Tapped at coordinates ({x}, {y})")
|
342
|
+
return True
|
343
|
+
except ValueError as e:
|
344
|
+
print(f"Error: {str(e)}")
|
345
|
+
return False
|
346
|
+
|
347
|
+
# Replace the old tap function with the new one
|
348
|
+
async def tap(self, index: int) -> str:
|
349
|
+
"""
|
350
|
+
Tap on a UI element by its index.
|
310
351
|
|
311
|
-
|
312
|
-
|
313
|
-
device_manager = DeviceManager()
|
314
|
-
device = await device_manager.get_device(serial)
|
315
|
-
if not device:
|
316
|
-
return f"Error: Device {serial} not found"
|
317
|
-
else:
|
318
|
-
device = await get_device()
|
319
|
-
|
320
|
-
await device.tap(x, y)
|
321
|
-
|
322
|
-
# Gather element details for the response
|
323
|
-
element_text = element.get('text', 'No text')
|
324
|
-
element_class = element.get('className', 'Unknown class')
|
325
|
-
element_type = element.get('type', 'unknown')
|
326
|
-
is_parent = element.get('isParent', False)
|
327
|
-
|
328
|
-
# Create a descriptive response
|
329
|
-
response_parts = []
|
330
|
-
response_parts.append(f"Tapped element with index {index}")
|
331
|
-
response_parts.append(f"Text: '{element_text}'")
|
332
|
-
response_parts.append(f"Class: {element_class}")
|
333
|
-
response_parts.append(f"Type: {element_type}")
|
334
|
-
response_parts.append(f"Role: {'parent' if is_parent else 'child'}")
|
335
|
-
|
336
|
-
# If it's a parent element, include information about its text children
|
337
|
-
if is_parent:
|
338
|
-
# Find all child elements that are text elements
|
339
|
-
text_children = []
|
340
|
-
for item in CLICKABLE_ELEMENTS_CACHE:
|
341
|
-
if (item.get('parentIndex') == index and
|
342
|
-
item.get('type') == 'text' and
|
343
|
-
item.get('text')):
|
344
|
-
text_children.append(item.get('text'))
|
345
|
-
|
346
|
-
if text_children:
|
347
|
-
response_parts.append(f"Contains text: {' | '.join(text_children)}")
|
348
|
-
|
349
|
-
# If it's a child element, include parent information
|
350
|
-
if not is_parent and 'parentIndex' in element:
|
351
|
-
parent_index = element.get('parentIndex')
|
352
|
-
# Find the parent element
|
353
|
-
parent = None
|
354
|
-
for item in CLICKABLE_ELEMENTS_CACHE:
|
355
|
-
if item.get('index') == parent_index:
|
356
|
-
parent = item
|
357
|
-
break
|
358
|
-
|
359
|
-
if parent:
|
360
|
-
parent_text = parent.get('text', 'No text')
|
361
|
-
response_parts.append(f"Parent: {parent_index} ('{parent_text}')")
|
362
|
-
|
363
|
-
# Find sibling text elements (other children of the same parent)
|
364
|
-
sibling_texts = []
|
365
|
-
for item in CLICKABLE_ELEMENTS_CACHE:
|
366
|
-
if (item.get('parentIndex') == parent_index and
|
367
|
-
item.get('index') != index and
|
368
|
-
item.get('type') == 'text' and
|
369
|
-
item.get('text')):
|
370
|
-
sibling_texts.append(item.get('text'))
|
371
|
-
|
372
|
-
if sibling_texts:
|
373
|
-
response_parts.append(f"Related text: {' | '.join(sibling_texts)}")
|
352
|
+
This function uses the cached clickable elements from the last get_clickables call
|
353
|
+
to find the element with the given index and tap on its center coordinates.
|
374
354
|
|
375
|
-
|
355
|
+
Args:
|
356
|
+
index: Index of the element to tap
|
376
357
|
|
377
|
-
|
378
|
-
|
379
|
-
|
358
|
+
Returns:
|
359
|
+
Result message
|
360
|
+
"""
|
361
|
+
return await self.tap_by_index(index)
|
380
362
|
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
363
|
+
async def swipe(
|
364
|
+
self,
|
365
|
+
start_x: int,
|
366
|
+
start_y: int,
|
367
|
+
end_x: int,
|
368
|
+
end_y: int,
|
369
|
+
duration_ms: int = 300
|
370
|
+
) -> bool:
|
371
|
+
"""
|
372
|
+
Performs a straight-line swipe gesture on the device screen.
|
373
|
+
To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
|
374
|
+
Args:
|
375
|
+
start_x: Starting X coordinate
|
376
|
+
start_y: Starting Y coordinate
|
377
|
+
end_x: Ending X coordinate
|
378
|
+
end_y: Ending Y coordinate
|
379
|
+
duration_ms: Duration of swipe in milliseconds
|
380
|
+
Returns:
|
381
|
+
Bool indicating success or failure
|
382
|
+
"""
|
383
|
+
try:
|
384
|
+
if self.serial:
|
385
|
+
device_manager = DeviceManager()
|
386
|
+
device = await device_manager.get_device(self.serial)
|
387
|
+
if not device:
|
388
|
+
return f"Error: Device {self.serial} not found"
|
389
|
+
else:
|
390
|
+
device = await self.get_device()
|
391
|
+
|
392
|
+
await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
|
393
|
+
print(f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms")
|
394
|
+
return True
|
395
|
+
except ValueError as e:
|
396
|
+
print(f"Error: {str(e)}")
|
397
|
+
return False
|
398
|
+
|
399
|
+
async def input_text(self, text: str, serial: Optional[str] = None) -> str:
|
400
|
+
"""
|
401
|
+
Input text on the device using Base64 encoding and broadcast intent.
|
399
402
|
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
403
|
+
Args:
|
404
|
+
text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
|
405
|
+
serial: Optional device serial (for backward compatibility)
|
406
|
+
|
407
|
+
Returns:
|
408
|
+
Result message
|
409
|
+
"""
|
410
|
+
try:
|
411
|
+
if serial:
|
412
|
+
device_manager = DeviceManager()
|
413
|
+
device = await device_manager.get_device(serial)
|
414
|
+
if not device:
|
415
|
+
return f"Error: Device {serial} not found"
|
416
|
+
else:
|
417
|
+
device = await self.get_device()
|
418
|
+
|
419
|
+
# Save the current keyboard
|
420
|
+
original_ime = await device._adb.shell(device._serial, "settings get secure default_input_method")
|
421
|
+
original_ime = original_ime.strip()
|
422
|
+
|
423
|
+
# Enable the Droidrun keyboard
|
424
|
+
await device._adb.shell(device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME")
|
425
|
+
|
426
|
+
# Set the Droidrun keyboard as the default
|
427
|
+
await device._adb.shell(device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME")
|
428
|
+
|
429
|
+
# Wait for keyboard to change
|
430
|
+
await asyncio.sleep(0.2)
|
431
|
+
|
432
|
+
# Encode the text to Base64
|
433
|
+
import base64
|
434
|
+
encoded_text = base64.b64encode(text.encode()).decode()
|
435
|
+
|
436
|
+
# Send the broadcast intent with the Base64-encoded text
|
437
|
+
cmd = f'am broadcast -a DROIDRUN_INPUT_B64 --es msg "{encoded_text}"'
|
438
|
+
await device._adb.shell(device._serial, cmd)
|
439
|
+
|
440
|
+
# Wait for text input to complete
|
441
|
+
await asyncio.sleep(0.5)
|
442
|
+
|
443
|
+
# Restore the original keyboard
|
444
|
+
if original_ime and "com.droidrun.portal" not in original_ime:
|
445
|
+
await device._adb.shell(device._serial, f"ime set {original_ime}")
|
446
|
+
|
447
|
+
return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
448
|
+
except ValueError as e:
|
449
|
+
return f"Error: {str(e)}"
|
450
|
+
except Exception as e:
|
451
|
+
return f"Error sending text input: {str(e)}"
|
404
452
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
453
|
+
async def press_key(self, keycode: int) -> str:
|
454
|
+
"""
|
455
|
+
Press a key on the device.
|
456
|
+
|
457
|
+
Common keycodes:
|
458
|
+
- 3: HOME
|
459
|
+
- 4: BACK
|
460
|
+
- 24: VOLUME UP
|
461
|
+
- 25: VOLUME DOWN
|
462
|
+
- 26: POWER
|
463
|
+
- 82: MENU
|
464
|
+
|
465
|
+
Args:
|
466
|
+
keycode: Android keycode to press
|
467
|
+
"""
|
468
|
+
try:
|
469
|
+
if self.serial:
|
470
|
+
device_manager = DeviceManager()
|
471
|
+
device = await device_manager.get_device(self.serial)
|
472
|
+
if not device:
|
473
|
+
return f"Error: Device {self.serial} not found"
|
474
|
+
else:
|
475
|
+
device = await self.get_device()
|
476
|
+
|
477
|
+
key_names = {
|
478
|
+
3: "HOME",
|
479
|
+
4: "BACK",
|
480
|
+
24: "VOLUME UP",
|
481
|
+
25: "VOLUME DOWN",
|
482
|
+
26: "POWER",
|
483
|
+
82: "MENU",
|
484
|
+
}
|
485
|
+
key_name = key_names.get(keycode, str(keycode))
|
486
|
+
|
487
|
+
await device.press_key(keycode)
|
488
|
+
return f"Pressed key {key_name}"
|
489
|
+
except ValueError as e:
|
490
|
+
return f"Error: {str(e)}"
|
421
491
|
|
422
|
-
async def
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
else:
|
448
|
-
device = await get_device()
|
449
|
-
|
450
|
-
await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
|
451
|
-
return f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y})"
|
452
|
-
except ValueError as e:
|
453
|
-
return f"Error: {str(e)}"
|
492
|
+
async def start_app(
|
493
|
+
self,
|
494
|
+
package: str,
|
495
|
+
activity: str = ""
|
496
|
+
) -> str:
|
497
|
+
"""
|
498
|
+
Start an app on the device.
|
499
|
+
|
500
|
+
Args:
|
501
|
+
package: Package name (e.g., "com.android.settings")
|
502
|
+
activity: Optional activity name
|
503
|
+
"""
|
504
|
+
try:
|
505
|
+
if self.serial:
|
506
|
+
device_manager = DeviceManager()
|
507
|
+
device = await device_manager.get_device(self.serial)
|
508
|
+
if not device:
|
509
|
+
return f"Error: Device {self.serial} not found"
|
510
|
+
else:
|
511
|
+
device = await self.get_device()
|
512
|
+
|
513
|
+
result = await device.start_app(package, activity)
|
514
|
+
return result
|
515
|
+
except ValueError as e:
|
516
|
+
return f"Error: {str(e)}"
|
454
517
|
|
455
|
-
async def
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
return
|
485
|
-
|
486
|
-
# Split text into smaller chunks (max 500 chars)
|
487
|
-
chunk_size = 500
|
488
|
-
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
|
489
|
-
|
490
|
-
for chunk in chunks:
|
491
|
-
# Escape the text chunk
|
492
|
-
escaped_chunk = escape_text(chunk)
|
493
|
-
|
494
|
-
# Try different input methods if one fails
|
495
|
-
methods = [
|
496
|
-
f'input text "{escaped_chunk}"', # Standard method
|
497
|
-
f'am broadcast -a ADB_INPUT_TEXT --es msg "{escaped_chunk}"', # Broadcast intent method
|
498
|
-
f'input keyboard text "{escaped_chunk}"' # Keyboard method
|
499
|
-
]
|
500
|
-
|
501
|
-
success = False
|
502
|
-
last_error = None
|
503
|
-
|
504
|
-
for method in methods:
|
505
|
-
try:
|
506
|
-
await device._adb.shell(device._serial, method)
|
507
|
-
success = True
|
508
|
-
break
|
509
|
-
except Exception as e:
|
510
|
-
last_error = str(e)
|
511
|
-
continue
|
512
|
-
|
513
|
-
if not success:
|
514
|
-
return f"Error: Failed to input text chunk. Last error: {last_error}"
|
515
|
-
|
516
|
-
# Small delay between chunks
|
517
|
-
await asyncio.sleep(0.1)
|
518
|
-
|
519
|
-
return f"Text input completed: {text}"
|
520
|
-
except ValueError as e:
|
521
|
-
return f"Error: {str(e)}"
|
518
|
+
async def install_app(
|
519
|
+
self,
|
520
|
+
apk_path: str,
|
521
|
+
reinstall: bool = False,
|
522
|
+
grant_permissions: bool = True
|
523
|
+
) -> str:
|
524
|
+
"""
|
525
|
+
Install an app on the device.
|
526
|
+
|
527
|
+
Args:
|
528
|
+
apk_path: Path to the APK file
|
529
|
+
reinstall: Whether to reinstall if app exists
|
530
|
+
grant_permissions: Whether to grant all permissions
|
531
|
+
"""
|
532
|
+
try:
|
533
|
+
if self.serial:
|
534
|
+
device_manager = DeviceManager()
|
535
|
+
device = await device_manager.get_device(self.serial)
|
536
|
+
if not device:
|
537
|
+
return f"Error: Device {self.serial} not found"
|
538
|
+
else:
|
539
|
+
device = await self.get_device()
|
540
|
+
|
541
|
+
if not os.path.exists(apk_path):
|
542
|
+
return f"Error: APK file not found at {apk_path}"
|
543
|
+
|
544
|
+
result = await device.install_app(apk_path, reinstall, grant_permissions)
|
545
|
+
return result
|
546
|
+
except ValueError as e:
|
547
|
+
return f"Error: {str(e)}"
|
522
548
|
|
523
|
-
async def
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
Common keycodes:
|
528
|
-
- 3: HOME
|
529
|
-
- 4: BACK
|
530
|
-
- 24: VOLUME UP
|
531
|
-
- 25: VOLUME DOWN
|
532
|
-
- 26: POWER
|
533
|
-
- 82: MENU
|
534
|
-
|
535
|
-
Args:
|
536
|
-
keycode: Android keycode to press
|
537
|
-
serial: Optional device serial (for backward compatibility)
|
538
|
-
"""
|
539
|
-
try:
|
540
|
-
if serial:
|
541
|
-
device_manager = DeviceManager()
|
542
|
-
device = await device_manager.get_device(serial)
|
543
|
-
if not device:
|
544
|
-
return f"Error: Device {serial} not found"
|
545
|
-
else:
|
546
|
-
device = await get_device()
|
547
|
-
|
548
|
-
key_names = {
|
549
|
-
3: "HOME",
|
550
|
-
4: "BACK",
|
551
|
-
24: "VOLUME UP",
|
552
|
-
25: "VOLUME DOWN",
|
553
|
-
26: "POWER",
|
554
|
-
82: "MENU",
|
555
|
-
}
|
556
|
-
key_name = key_names.get(keycode, str(keycode))
|
557
|
-
|
558
|
-
await device.press_key(keycode)
|
559
|
-
return f"Pressed key {key_name}"
|
560
|
-
except ValueError as e:
|
561
|
-
return f"Error: {str(e)}"
|
549
|
+
async def take_screenshot(self) -> bool:
|
550
|
+
"""
|
551
|
+
Take a screenshot of the device.
|
562
552
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
serial: Optional[str] = None
|
567
|
-
) -> str:
|
568
|
-
"""
|
569
|
-
Start an app on the device.
|
570
|
-
|
571
|
-
Args:
|
572
|
-
package: Package name (e.g., "com.android.settings")
|
573
|
-
activity: Optional activity name
|
574
|
-
serial: Optional device serial (for backward compatibility)
|
575
|
-
"""
|
576
|
-
try:
|
577
|
-
if serial:
|
578
|
-
device_manager = DeviceManager()
|
579
|
-
device = await device_manager.get_device(serial)
|
580
|
-
if not device:
|
581
|
-
return f"Error: Device {serial} not found"
|
582
|
-
else:
|
583
|
-
device = await get_device()
|
553
|
+
This function captures the current screen and adds the screenshot to context in the next message.
|
554
|
+
|
555
|
+
This does not save the screenshot anywhere on the phone, it just attaches it to the next message.
|
584
556
|
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
557
|
+
Returns:
|
558
|
+
True if successful, False otherwise
|
559
|
+
"""
|
560
|
+
try:
|
561
|
+
if self.serial:
|
562
|
+
device_manager = DeviceManager()
|
563
|
+
device = await device_manager.get_device(self.serial)
|
564
|
+
if not device:
|
565
|
+
raise ValueError(f"Device {self.serial} not found")
|
566
|
+
else:
|
567
|
+
device = await self.get_device()
|
568
|
+
screen_tuple = await device.take_screenshot()
|
569
|
+
self.last_screenshot = screen_tuple[1]
|
570
|
+
return True
|
571
|
+
except ValueError as e:
|
572
|
+
raise ValueError(f"Error taking screenshot: {str(e)}")
|
589
573
|
|
590
|
-
async def
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
574
|
+
async def list_packages(
|
575
|
+
self,
|
576
|
+
include_system_apps: bool = False
|
577
|
+
) -> List[str]:
|
578
|
+
"""
|
579
|
+
List installed packages on the device.
|
580
|
+
|
581
|
+
Args:
|
582
|
+
include_system_apps: Whether to include system apps (default: False)
|
583
|
+
|
584
|
+
Returns:
|
585
|
+
List of package names
|
586
|
+
"""
|
587
|
+
try:
|
588
|
+
if self.serial:
|
589
|
+
device_manager = DeviceManager()
|
590
|
+
device = await device_manager.get_device(self.serial)
|
591
|
+
if not device:
|
592
|
+
raise ValueError(f"Device {self.serial} not found")
|
593
|
+
else:
|
594
|
+
device = await self.get_device()
|
595
|
+
|
596
|
+
# Use the direct ADB command to get packages with paths
|
597
|
+
cmd = ["pm", "list", "packages", "-f"]
|
598
|
+
if not include_system_apps:
|
599
|
+
cmd.append("-3")
|
600
|
+
|
601
|
+
output = await device._adb.shell(device._serial, " ".join(cmd))
|
602
|
+
|
603
|
+
# Parse the package list using the function
|
604
|
+
packages = self.parse_package_list(output)
|
605
|
+
# Format package list for better readability
|
606
|
+
package_list = [pack["package"] for pack in packages]
|
607
|
+
print(f"Returning {len(package_list)} packages")
|
608
|
+
return package_list
|
609
|
+
except ValueError as e:
|
610
|
+
raise ValueError(f"Error listing packages: {str(e)}")
|
611
|
+
|
612
|
+
async def extract(self, filename: Optional[str] = None) -> str:
|
613
|
+
"""Extract and save the current UI state to a JSON file.
|
613
614
|
|
614
|
-
|
615
|
-
|
615
|
+
This function captures the current UI state including all UI elements
|
616
|
+
and saves it to a JSON file for later analysis or reference.
|
616
617
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
618
|
+
Args:
|
619
|
+
filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
|
620
|
+
|
621
|
+
Returns:
|
622
|
+
Path to the saved JSON file
|
623
|
+
"""
|
624
|
+
try:
|
625
|
+
# Generate default filename if not provided
|
626
|
+
if not filename:
|
627
|
+
timestamp = int(time.time())
|
628
|
+
filename = f"ui_state_{timestamp}.json"
|
629
|
+
|
630
|
+
# Ensure the filename ends with .json
|
631
|
+
if not filename.endswith(".json"):
|
632
|
+
filename += ".json"
|
633
|
+
|
634
|
+
# Get the UI elements
|
635
|
+
ui_elements = await self.get_all_elements(self.serial)
|
636
|
+
|
637
|
+
# Save to file
|
638
|
+
save_path = os.path.abspath(filename)
|
639
|
+
async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
|
640
|
+
await f.write(json.dumps(ui_elements, indent=2))
|
641
|
+
|
642
|
+
return f"UI state extracted and saved to {save_path}"
|
643
|
+
|
644
|
+
except Exception as e:
|
645
|
+
return f"Error extracting UI state: {e}"
|
621
646
|
|
622
|
-
async def
|
623
|
-
|
624
|
-
|
625
|
-
serial: Optional[str] = None
|
626
|
-
) -> str:
|
627
|
-
"""
|
628
|
-
Uninstall an app from the device.
|
629
|
-
|
630
|
-
Args:
|
631
|
-
package: Package name to uninstall
|
632
|
-
keep_data: Whether to keep app data and cache
|
633
|
-
serial: Optional device serial (for backward compatibility)
|
634
|
-
"""
|
635
|
-
try:
|
636
|
-
if serial:
|
637
|
-
device_manager = DeviceManager()
|
638
|
-
device = await device_manager.get_device(serial)
|
639
|
-
if not device:
|
640
|
-
return f"Error: Device {serial} not found"
|
641
|
-
else:
|
642
|
-
device = await get_device()
|
647
|
+
async def get_all_elements(self) -> Dict[str, Any]:
|
648
|
+
"""
|
649
|
+
Get all UI elements from the device, including non-interactive elements.
|
643
650
|
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
Args:
|
654
|
-
serial: Optional device serial (for backward compatibility)
|
655
|
-
|
656
|
-
Returns:
|
657
|
-
Tuple of (local file path, screenshot data as bytes)
|
658
|
-
"""
|
659
|
-
try:
|
660
|
-
if serial:
|
651
|
+
This function interacts with the TopViewService app installed on the device
|
652
|
+
to capture all UI elements, even those that are not interactive. This provides
|
653
|
+
a complete view of the UI hierarchy for analysis or debugging purposes.
|
654
|
+
|
655
|
+
Returns:
|
656
|
+
Dictionary containing all UI elements extracted from the device screen
|
657
|
+
"""
|
658
|
+
try:
|
659
|
+
# Get the device
|
661
660
|
device_manager = DeviceManager()
|
662
|
-
device = await device_manager.get_device(serial)
|
661
|
+
device = await device_manager.get_device(self.serial)
|
663
662
|
if not device:
|
664
|
-
raise ValueError(f"Device {serial} not found")
|
665
|
-
|
666
|
-
|
663
|
+
raise ValueError(f"Device {self.serial} not found")
|
664
|
+
|
665
|
+
# Create a temporary file for the JSON
|
666
|
+
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
|
667
|
+
local_path = temp.name
|
668
|
+
|
669
|
+
try:
|
670
|
+
# Clear logcat to make it easier to find our output
|
671
|
+
await device._adb.shell(device._serial, "logcat -c")
|
672
|
+
|
673
|
+
# Trigger the custom service via broadcast to get ALL elements
|
674
|
+
await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS")
|
675
|
+
|
676
|
+
# Poll for the JSON file path
|
677
|
+
start_time = asyncio.get_event_loop().time()
|
678
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
679
|
+
poll_interval = 0.2 # Check every 200ms
|
680
|
+
|
681
|
+
device_path = None
|
682
|
+
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
683
|
+
# Check logcat for the file path
|
684
|
+
logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
|
685
|
+
|
686
|
+
# Parse the file path if present
|
687
|
+
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
688
|
+
if match:
|
689
|
+
device_path = match.group(1).strip()
|
690
|
+
break
|
691
|
+
|
692
|
+
# Wait before polling again
|
693
|
+
await asyncio.sleep(poll_interval)
|
694
|
+
|
695
|
+
# Check if we found the file path
|
696
|
+
if not device_path:
|
697
|
+
raise ValueError(f"Failed to find the JSON file path in logcat after {max_wait_time} seconds")
|
698
|
+
|
699
|
+
# Pull the JSON file from the device
|
700
|
+
await device._adb.pull_file(device._serial, device_path, local_path)
|
701
|
+
|
702
|
+
# Read the JSON file
|
703
|
+
async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
|
704
|
+
json_content = await f.read()
|
705
|
+
|
706
|
+
# Clean up the temporary file
|
707
|
+
with contextlib.suppress(OSError):
|
708
|
+
os.unlink(local_path)
|
709
|
+
|
710
|
+
# Try to parse the JSON
|
711
|
+
import json
|
712
|
+
try:
|
713
|
+
ui_data = json.loads(json_content)
|
714
|
+
|
715
|
+
return {
|
716
|
+
"all_elements": ui_data,
|
717
|
+
"count": len(ui_data) if isinstance(ui_data, list) else sum(1 for _ in ui_data.get("elements", [])),
|
718
|
+
"message": "Retrieved all UI elements from the device screen"
|
719
|
+
}
|
720
|
+
except json.JSONDecodeError:
|
721
|
+
raise ValueError("Failed to parse UI elements JSON data")
|
722
|
+
|
723
|
+
except Exception as e:
|
724
|
+
# Clean up in case of error
|
725
|
+
with contextlib.suppress(OSError):
|
726
|
+
os.unlink(local_path)
|
727
|
+
raise ValueError(f"Error retrieving all UI elements: {e}")
|
728
|
+
|
729
|
+
except Exception as e:
|
730
|
+
raise ValueError(f"Error getting all UI elements: {e}")
|
667
731
|
|
668
|
-
|
669
|
-
|
670
|
-
|
732
|
+
def complete(self, success: bool, reason: str = ""):
|
733
|
+
"""
|
734
|
+
Mark the task as finished.
|
671
735
|
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
include_system_apps: Whether to include system apps (default: False)
|
681
|
-
serial: Optional device serial (for backward compatibility)
|
682
|
-
|
683
|
-
Returns:
|
684
|
-
Dictionary containing:
|
685
|
-
- packages: List of dictionaries with 'package' and 'path' keys
|
686
|
-
- count: Number of packages found
|
687
|
-
- type: Type of packages listed ("all" or "non-system")
|
688
|
-
"""
|
689
|
-
try:
|
690
|
-
if serial:
|
691
|
-
device_manager = DeviceManager()
|
692
|
-
device = await device_manager.get_device(serial)
|
693
|
-
if not device:
|
694
|
-
raise ValueError(f"Device {serial} not found")
|
736
|
+
Args:
|
737
|
+
success: Indicates if the task was successful.
|
738
|
+
reason: Reason for failure, if any. (required if success is False)
|
739
|
+
"""
|
740
|
+
if success:
|
741
|
+
self.success = True
|
742
|
+
self.reason = self.reason or "Task completed successfully."
|
743
|
+
self.finished = True
|
695
744
|
else:
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
cmd.append("-3")
|
702
|
-
|
703
|
-
output = await device._adb.shell(device._serial, " ".join(cmd))
|
704
|
-
|
705
|
-
# Parse the package list using the function
|
706
|
-
packages = parse_package_list(output)
|
707
|
-
package_type = "all" if include_system_apps else "non-system"
|
708
|
-
|
709
|
-
return {
|
710
|
-
"packages": packages,
|
711
|
-
"count": len(packages),
|
712
|
-
"type": package_type,
|
713
|
-
"message": f"Found {len(packages)} {package_type} packages on the device"
|
714
|
-
}
|
715
|
-
except ValueError as e:
|
716
|
-
raise ValueError(f"Error listing packages: {str(e)}")
|
745
|
+
self.success = False
|
746
|
+
if not reason:
|
747
|
+
raise ValueError("Reason for failure is required if success is False.")
|
748
|
+
self.reason = reason
|
749
|
+
self.finished = True
|
717
750
|
|
718
|
-
async def complete(result: str) -> str:
|
719
|
-
"""Complete the task with a result message.
|
720
|
-
|
721
|
-
Args:
|
722
|
-
result: The result message
|
723
|
-
|
724
|
-
Returns:
|
725
|
-
Success message
|
726
|
-
"""
|
727
|
-
return f"Task completed: {result}"
|
728
751
|
|
729
|
-
async def
|
730
|
-
|
731
|
-
|
732
|
-
This function captures the current UI state including all UI elements
|
733
|
-
and saves it to a JSON file for later analysis or reference.
|
734
|
-
|
735
|
-
Args:
|
736
|
-
filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
|
737
|
-
serial: Optional device serial number
|
738
|
-
|
739
|
-
Returns:
|
740
|
-
Path to the saved JSON file
|
741
|
-
"""
|
742
|
-
try:
|
743
|
-
# Generate default filename if not provided
|
744
|
-
if not filename:
|
745
|
-
timestamp = int(time.time())
|
746
|
-
filename = f"ui_state_{timestamp}.json"
|
747
|
-
|
748
|
-
# Ensure the filename ends with .json
|
749
|
-
if not filename.endswith(".json"):
|
750
|
-
filename += ".json"
|
751
|
-
|
752
|
-
# Get the UI elements
|
753
|
-
ui_elements = await get_all_elements(serial)
|
754
|
-
|
755
|
-
# Save to file
|
756
|
-
save_path = os.path.abspath(filename)
|
757
|
-
async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
|
758
|
-
await f.write(json.dumps(ui_elements, indent=2))
|
759
|
-
|
760
|
-
return f"UI state extracted and saved to {save_path}"
|
761
|
-
|
762
|
-
except Exception as e:
|
763
|
-
return f"Error extracting UI state: {e}"
|
764
|
-
|
765
|
-
async def get_all_elements(serial: Optional[str] = None) -> Dict[str, Any]:
|
766
|
-
"""
|
767
|
-
Get all UI elements from the device, including non-interactive elements.
|
768
|
-
|
769
|
-
This function interacts with the TopViewService app installed on the device
|
770
|
-
to capture all UI elements, even those that are not interactive. This provides
|
771
|
-
a complete view of the UI hierarchy for analysis or debugging purposes.
|
772
|
-
|
773
|
-
Args:
|
774
|
-
serial: Optional device serial number
|
775
|
-
|
776
|
-
Returns:
|
777
|
-
Dictionary containing all UI elements extracted from the device screen
|
778
|
-
"""
|
779
|
-
try:
|
780
|
-
# Get the device
|
781
|
-
if serial:
|
782
|
-
device_manager = DeviceManager()
|
783
|
-
device = await device_manager.get_device(serial)
|
784
|
-
if not device:
|
785
|
-
raise ValueError(f"Device {serial} not found")
|
786
|
-
else:
|
787
|
-
device = await get_device()
|
752
|
+
async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
|
753
|
+
"""
|
754
|
+
Get the current phone state including current activity and keyboard visibility.
|
788
755
|
|
789
|
-
|
790
|
-
|
791
|
-
local_path = temp.name
|
756
|
+
Args:
|
757
|
+
serial: Optional device serial number
|
792
758
|
|
759
|
+
Returns:
|
760
|
+
Dictionary with current phone state information
|
761
|
+
"""
|
793
762
|
try:
|
794
|
-
#
|
795
|
-
|
763
|
+
# Get the device
|
764
|
+
if serial:
|
765
|
+
device_manager = DeviceManager()
|
766
|
+
device = await device_manager.get_device(serial)
|
767
|
+
if not device:
|
768
|
+
raise ValueError(f"Device {serial} not found")
|
769
|
+
else:
|
770
|
+
device = await self.get_device()
|
796
771
|
|
797
|
-
#
|
798
|
-
await device._adb.shell(device._serial, "
|
772
|
+
# Get the top resumed activity
|
773
|
+
activity_output = await device._adb.shell(device._serial, "dumpsys activity activities | grep topResumedActivity")
|
799
774
|
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
poll_interval = 0.2 # Check every 200ms
|
775
|
+
if not activity_output:
|
776
|
+
# Try alternative command for older Android versions
|
777
|
+
activity_output = await device._adb.shell(device._serial, "dumpsys activity activities | grep ResumedActivity")
|
804
778
|
|
805
|
-
|
806
|
-
|
807
|
-
# Check logcat for the file path
|
808
|
-
logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
|
809
|
-
|
810
|
-
# Parse the file path if present
|
811
|
-
match = re.search(r"JSON data written to: (.*)", logcat_output)
|
812
|
-
if match:
|
813
|
-
device_path = match.group(1).strip()
|
814
|
-
break
|
815
|
-
|
816
|
-
# Wait before polling again
|
817
|
-
await asyncio.sleep(poll_interval)
|
779
|
+
# Get keyboard visibility state
|
780
|
+
keyboard_output = await device._adb.shell(device._serial, "dumpsys input_method | grep mInputShown")
|
818
781
|
|
819
|
-
#
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
# Pull the JSON file from the device
|
824
|
-
await device._adb.pull_file(device._serial, device_path, local_path)
|
782
|
+
# Process activity information
|
783
|
+
current_activity = "Unable to determine current activity"
|
784
|
+
if activity_output:
|
785
|
+
current_activity = activity_output.strip()
|
825
786
|
|
826
|
-
#
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
# Clean up the temporary file
|
831
|
-
with contextlib.suppress(OSError):
|
832
|
-
os.unlink(local_path)
|
787
|
+
# Process keyboard information
|
788
|
+
is_keyboard_shown = False
|
789
|
+
if keyboard_output:
|
790
|
+
is_keyboard_shown = "mInputShown=true" in keyboard_output
|
833
791
|
|
834
|
-
#
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
return {
|
840
|
-
"all_elements": ui_data,
|
841
|
-
"count": len(ui_data) if isinstance(ui_data, list) else sum(1 for _ in ui_data.get("elements", [])),
|
842
|
-
"message": "Retrieved all UI elements from the device screen"
|
843
|
-
}
|
844
|
-
except json.JSONDecodeError:
|
845
|
-
raise ValueError("Failed to parse UI elements JSON data")
|
792
|
+
# Return combined state
|
793
|
+
return {
|
794
|
+
"current_activity": current_activity,
|
795
|
+
"keyboard_shown": is_keyboard_shown,
|
796
|
+
}
|
846
797
|
|
847
798
|
except Exception as e:
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
799
|
+
return {
|
800
|
+
"error": str(e),
|
801
|
+
"message": f"Error getting phone state: {str(e)}"
|
802
|
+
}
|
803
|
+
|
804
|
+
async def remember(self, information: str) -> str:
|
805
|
+
"""
|
806
|
+
Store important information to remember for future context.
|
807
|
+
|
808
|
+
This information will be included in future LLM prompts to help maintain context
|
809
|
+
across interactions. Use this for critical facts, observations, or user preferences
|
810
|
+
that should influence future decisions.
|
811
|
+
|
812
|
+
Args:
|
813
|
+
information: The information to remember
|
852
814
|
|
853
|
-
|
854
|
-
|
815
|
+
Returns:
|
816
|
+
Confirmation message
|
817
|
+
"""
|
818
|
+
if not information or not isinstance(information, str):
|
819
|
+
return "Error: Please provide valid information to remember."
|
820
|
+
|
821
|
+
# Add the information to memory
|
822
|
+
self.memory.append(information.strip())
|
823
|
+
|
824
|
+
# Limit memory size to prevent context overflow (keep most recent items)
|
825
|
+
max_memory_items = 10
|
826
|
+
if len(self.memory) > max_memory_items:
|
827
|
+
self.memory = self.memory[-max_memory_items:]
|
828
|
+
|
829
|
+
return f"Remembered: {information}"
|
830
|
+
|
831
|
+
def get_memory(self) -> List[str]:
|
832
|
+
"""
|
833
|
+
Retrieve all stored memory items.
|
834
|
+
|
835
|
+
Returns:
|
836
|
+
List of stored memory items
|
837
|
+
"""
|
838
|
+
return self.memory.copy()
|