droidrun 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
droidrun/tools/actions.py CHANGED
@@ -11,844 +11,828 @@ import asyncio
11
11
  import aiofiles
12
12
  import contextlib
13
13
  from typing import Optional, Dict, Tuple, List, Any
14
- from droidrun.adb import Device, DeviceManager
14
+ from ..adb import Device, DeviceManager
15
15
 
16
- # Global variable to store clickable elements for index-based tapping
17
- CLICKABLE_ELEMENTS_CACHE = []
18
16
 
19
- # Default device serial will be read from environment variable
20
- def get_device_serial() -> str:
21
- """Get the device serial from environment variable.
22
-
23
- Returns:
24
- Device serial from environment or None
25
- """
26
- return os.environ.get("DROIDRUN_DEVICE_SERIAL", "")
17
+ class Tools:
18
+ """Core UI interaction tools for Android device control."""
27
19
 
28
- async def get_device() -> Optional[Device]:
29
- """Get the device instance using the serial from environment variable.
30
-
31
- Returns:
32
- Device instance or None if not found
33
- """
34
- serial = get_device_serial()
35
- if not serial:
36
- raise ValueError("DROIDRUN_DEVICE_SERIAL environment variable not set")
37
-
38
- device_manager = DeviceManager()
39
- device = await device_manager.get_device(serial)
40
- if not device:
41
- raise ValueError(f"Device {serial} not found")
42
-
43
- return device
20
+ def __init__(self, serial: str) -> None:
21
+ # Instance‐level cache for clickable elements (index-based tapping)
22
+ self.clickable_elements_cache: List[Dict[str, Any]] = []
23
+ self.serial = serial
24
+ self.device_manager = DeviceManager()
25
+ self.last_screenshot = None
26
+ self.reason = None
27
+ self.success = None
28
+ self.finished = False
29
+ # Memory storage for remembering important information
30
+ self.memory: List[str] = []
44
31
 
45
- def parse_package_list(output: str) -> List[Dict[str, str]]:
46
- """Parse the output of 'pm list packages -f' command.
32
+ def get_device_serial(self) -> str:
33
+ """Get the device serial from the instance or environment variable."""
34
+ # First try using the instance's serial
35
+ if self.serial:
36
+ return self.serial
37
+
38
+ # Fall back to environment variable if not set on the instance
39
+ return os.environ.get("DROIDRUN_DEVICE_SERIAL", "")
47
40
 
48
- Args:
49
- output: Raw command output from 'pm list packages -f'
41
+ async def get_device(self) -> Optional[Device]:
42
+ """Get the device instance using the instance's serial or from environment variable.
43
+
44
+ Returns:
45
+ Device instance or None if not found
46
+ """
47
+ serial = self.get_device_serial()
48
+ if not serial:
49
+ raise ValueError("No device serial specified - set DROIDRUN_DEVICE_SERIAL environment variable or provide device_serial parameter")
50
+
51
+ device = await self.device_manager.get_device(serial)
52
+ if not device:
53
+ raise ValueError(f"Device {serial} not found")
54
+
55
+ return device
50
56
 
51
- Returns:
52
- List of dictionaries containing package info with 'package' and 'path' keys
53
- """
54
- apps = []
55
- for line in output.splitlines():
56
- if line.startswith("package:"):
57
- # Format is: "package:/path/to/base.apk=com.package.name"
58
- path_and_pkg = line[8:] # Strip "package:"
59
- if "=" in path_and_pkg:
60
- path, package = path_and_pkg.rsplit("=", 1)
61
- apps.append({"package": package.strip(), "path": path.strip()})
62
- return apps
57
+ def parse_package_list(self, output: str) -> List[Dict[str, str]]:
58
+ """Parse the output of 'pm list packages -f' command.
63
59
 
64
- async def get_clickables(serial: Optional[str] = None) -> Dict[str, Any]:
65
- """
66
- Get all clickable UI elements from the device using the custom TopViewService.
67
-
68
- This function interacts with the TopViewService app installed on the device
69
- to capture only the clickable UI elements. The service writes UI data
70
- to a JSON file on the device, which is then pulled to the host.
71
-
72
- Args:
73
- serial: Optional device serial number
74
-
75
- Returns:
76
- Dictionary containing clickable UI elements extracted from the device screen
77
- """
78
- global CLICKABLE_ELEMENTS_CACHE
79
-
80
- try:
81
- # Get the device
82
- if serial:
83
- device_manager = DeviceManager()
84
- device = await device_manager.get_device(serial)
85
- if not device:
86
- raise ValueError(f"Device {serial} not found")
87
- else:
88
- device = await get_device()
60
+ Args:
61
+ output: Raw command output from 'pm list packages -f'
62
+
63
+ Returns:
64
+ List of dictionaries containing package info with 'package' and 'path' keys
65
+ """
66
+ apps = []
67
+ for line in output.splitlines():
68
+ if line.startswith("package:"):
69
+ # Format is: "package:/path/to/base.apk=com.package.name"
70
+ path_and_pkg = line[8:] # Strip "package:"
71
+ if "=" in path_and_pkg:
72
+ path, package = path_and_pkg.rsplit("=", 1)
73
+ apps.append({"package": package.strip(), "path": path.strip()})
74
+ return apps
75
+
76
+ async def get_clickables(self, serial: Optional[str] = None) -> str:
77
+ """
78
+ Get all clickable UI elements from the device using the custom TopViewService.
79
+
80
+ This function interacts with the TopViewService app installed on the device
81
+ to capture UI elements. The service writes UI data to a JSON file on the device,
82
+ which is then pulled to the host. If no elements are found initially, it will
83
+ retry for up to 30 seconds.
89
84
 
90
- # Create a temporary file for the JSON
91
- with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
92
- local_path = temp.name
85
+ Args:
86
+ serial: Optional device serial number
93
87
 
88
+ Returns:
89
+ JSON string containing UI elements extracted from the device screen
90
+ """
94
91
  try:
95
- # Clear logcat to make it easier to find our output
96
- await device._adb.shell(device._serial, "logcat -c")
97
-
98
- # Trigger the custom service via broadcast to get only interactive elements
99
- await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ELEMENTS")
100
-
101
- # Poll for the JSON file path
102
- start_time = asyncio.get_event_loop().time()
103
- max_wait_time = 10 # Maximum wait time in seconds
104
- poll_interval = 0.2 # Check every 200ms
105
-
106
- device_path = None
107
- while asyncio.get_event_loop().time() - start_time < max_wait_time:
108
- # Check logcat for the file path
109
- logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
110
-
111
- # Parse the file path if present
112
- match = re.search(r"JSON data written to: (.*)", logcat_output)
113
- if match:
114
- device_path = match.group(1).strip()
115
- break
116
-
117
- # Wait before polling again
118
- await asyncio.sleep(poll_interval)
119
-
120
- # Check if we found the file path
121
- if not device_path:
122
- raise ValueError(f"Failed to find the JSON file path in logcat after {max_wait_time} seconds")
123
-
124
- # Pull the JSON file from the device
125
- await device._adb.pull_file(device._serial, device_path, local_path)
92
+ # Get the device
93
+ if serial:
94
+ from droidrun.adb import DeviceManager
95
+ device_manager = DeviceManager()
96
+ device = await device_manager.get_device(serial)
97
+ if not device:
98
+ raise ValueError(f"Device {serial} not found")
99
+ else:
100
+ device = await self.get_device()
126
101
 
127
- # Read the JSON file
128
- async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
129
- json_content = await f.read()
130
-
131
- # Clean up the temporary file
132
- with contextlib.suppress(OSError):
133
- os.unlink(local_path)
102
+ # Create a temporary file for the JSON
103
+ with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
104
+ local_path = temp.name
134
105
 
135
- # Try to parse the JSON
136
- import json
137
106
  try:
138
- ui_data = json.loads(json_content)
139
-
140
- # Process the JSON to extract elements
141
- flattened_elements = []
142
-
143
- # Process the nested elements structure
144
- if isinstance(ui_data, list):
145
- # For each parent element in the list
146
- for parent in ui_data:
147
- # Add the parent if it's clickable (type should be 'clickable')
148
- if parent.get('type') == 'clickable' and parent.get('index', -1) != -1:
149
- parent_copy = {k: v for k, v in parent.items() if k != 'children'}
150
- parent_copy['isParent'] = True
151
- flattened_elements.append(parent_copy)
152
-
153
- # Process children
154
- children = parent.get('children', [])
155
- for child in children:
156
- # Add all children that have valid indices, regardless of type
157
- # Include text elements as well, not just clickable ones
158
- if child.get('index', -1) != -1:
159
- child_copy = child.copy()
160
- child_copy['isParent'] = False
161
- child_copy['parentIndex'] = parent.get('index')
162
- flattened_elements.append(child_copy)
163
-
164
- # Also process nested children if present
165
- nested_children = child.get('children', [])
166
- for nested_child in nested_children:
167
- if nested_child.get('index', -1) != -1:
168
- nested_copy = nested_child.copy()
169
- nested_copy['isParent'] = False
170
- nested_copy['parentIndex'] = child.get('index')
171
- nested_copy['grandparentIndex'] = parent.get('index')
172
- flattened_elements.append(nested_copy)
173
- else:
174
- # Old format handling (dictionary with clickable_elements)
175
- clickable_elements = ui_data.get("clickable_elements", [])
176
- for element in clickable_elements:
177
- if element.get('index', -1) != -1:
178
- element_copy = {k: v for k, v in element.items() if k != 'isClickable'}
179
- flattened_elements.append(element_copy)
180
-
181
- # Update the global cache with the processed elements
182
- CLICKABLE_ELEMENTS_CACHE = flattened_elements
183
-
184
- # Sort by index
185
- flattened_elements.sort(key=lambda x: x.get('index', 0))
186
-
187
- # Create a summary of important text elements for each clickable parent
188
- text_summary = []
189
- parent_texts = {}
190
- tappable_elements = []
107
+ # Set retry parameters
108
+ max_total_time = 30 # Maximum total time to try in seconds
109
+ retry_interval = 1.0 # Time between retries in seconds
110
+ start_total_time = asyncio.get_event_loop().time()
191
111
 
192
- # Group text elements by their parent and identify tappable elements
193
- for elem in flattened_elements:
194
- # Track elements that are actually tappable (have bounds and either type clickable or are parents)
195
- if elem.get('bounds') and (elem.get('type') == 'clickable' or elem.get('isParent')):
196
- tappable_elements.append(elem.get('index'))
112
+ while True:
113
+ # Check if we've exceeded total time
114
+ current_time = asyncio.get_event_loop().time()
115
+ if current_time - start_total_time > max_total_time:
116
+ raise ValueError(f"Failed to get UI elements after {max_total_time} seconds of retries")
197
117
 
198
- if elem.get('type') == 'text' and elem.get('text'):
199
- parent_id = elem.get('parentIndex')
200
- if parent_id is not None:
201
- if parent_id not in parent_texts:
202
- parent_texts[parent_id] = []
203
- parent_texts[parent_id].append(elem.get('text'))
204
-
205
- # Create a text summary for parents with text children
206
- for parent_id, texts in parent_texts.items():
207
- # Find the parent element
208
- parent = None
209
- for elem in flattened_elements:
210
- if elem.get('index') == parent_id:
211
- parent = elem
118
+ # Clear logcat to make it easier to find our output
119
+ await device._adb.shell(device._serial, "logcat -c")
120
+
121
+ # Trigger the custom service via broadcast to get only interactive elements
122
+ await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ELEMENTS")
123
+
124
+ # Poll for the JSON file path
125
+ start_time = asyncio.get_event_loop().time()
126
+ max_wait_time = 10 # Maximum wait time in seconds
127
+ poll_interval = 0.2 # Check every 200ms
128
+
129
+ device_path = None
130
+ while asyncio.get_event_loop().time() - start_time < max_wait_time:
131
+ # Check logcat for the file path
132
+ logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
133
+
134
+ # Parse the file path if present
135
+ match = re.search(r"JSON data written to: (.*)", logcat_output)
136
+ if match:
137
+ device_path = match.group(1).strip()
212
138
  break
139
+
140
+ # Wait before polling again
141
+ await asyncio.sleep(poll_interval)
213
142
 
214
- if parent:
215
- # Mark if this element is directly tappable
216
- tappable_marker = "🔘" if parent_id in tappable_elements else "📄"
217
- summary = f"{tappable_marker} Element {parent_id} ({parent.get('className', 'Unknown')}): " + " | ".join(texts)
218
- text_summary.append(summary)
219
-
220
- # Sort the text summary for better readability
221
- text_summary.sort()
222
-
223
- # Count how many elements are actually tappable
224
- tappable_count = len(tappable_elements)
143
+ # Check if we found the file path
144
+ if not device_path:
145
+ await asyncio.sleep(retry_interval)
146
+ continue
147
+
148
+ # Pull the JSON file from the device
149
+ await device._adb.pull_file(device._serial, device_path, local_path)
150
+
151
+ # Read the JSON file
152
+ async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
153
+ json_content = await f.read()
154
+
155
+ # Try to parse the JSON
156
+ try:
157
+ ui_data = json.loads(json_content)
158
+
159
+ # Filter out the "type" attribute from all elements
160
+ filtered_data = []
161
+ for element in ui_data:
162
+ # Create a copy of the element without the "type" attribute
163
+ filtered_element = {k: v for k, v in element.items() if k != "type"}
164
+
165
+ # Also filter children if present
166
+ if "children" in filtered_element:
167
+ filtered_element["children"] = [
168
+ {k: v for k, v in child.items() if k != "type"}
169
+ for child in filtered_element["children"]
170
+ ]
171
+
172
+ filtered_data.append(filtered_element)
173
+
174
+ # If we got elements, store them and return
175
+ if filtered_data:
176
+ # Store the filtered UI data in cache
177
+ global CLICKABLE_ELEMENTS_CACHE
178
+ CLICKABLE_ELEMENTS_CACHE = filtered_data
179
+
180
+ # Add a small sleep to ensure UI is fully loaded/processed
181
+ await asyncio.sleep(0.5) # 500ms sleep
182
+
183
+ # Convert the dictionary to a JSON string before returning
184
+ result = {
185
+ "clickable_elements": filtered_data,
186
+ "count": len(filtered_data),
187
+ "message": f"Found {len(filtered_data)} UI elements after retrying"
188
+ }
189
+
190
+ return result
191
+
192
+ # If no elements found, wait and retry
193
+ await asyncio.sleep(retry_interval)
194
+
195
+ except json.JSONDecodeError:
196
+ # If JSON parsing failed, wait and retry
197
+ await asyncio.sleep(retry_interval)
198
+ continue
225
199
 
226
- # Add a short sleep to ensure UI is fully loaded/processed
227
- await asyncio.sleep(0.5) # 500ms sleep
200
+ except Exception as e:
201
+ # Clean up in case of error
202
+ with contextlib.suppress(OSError):
203
+ os.unlink(local_path)
204
+ raise ValueError(f"Error retrieving clickable elements: {e}")
228
205
 
229
- return {
230
- "clickable_elements": flattened_elements,
231
- "count": len(flattened_elements),
232
- "tappable_count": tappable_count,
233
- "tappable_indices": sorted(tappable_elements),
234
- "text_summary": text_summary,
235
- "message": f"Found {tappable_count} tappable elements out of {len(flattened_elements)} total elements"
236
- }
237
- except json.JSONDecodeError:
238
- raise ValueError("Failed to parse UI elements JSON data")
239
-
240
206
  except Exception as e:
241
- # Clean up in case of error
242
- with contextlib.suppress(OSError):
243
- os.unlink(local_path)
244
- raise ValueError(f"Error retrieving clickable elements: {e}")
245
-
246
- except Exception as e:
247
- raise ValueError(f"Error getting clickable elements: {e}")
207
+ raise ValueError(f"Error getting clickable elements: {e}")
248
208
 
249
- async def tap_by_index(index: int, serial: Optional[str] = None) -> str:
250
- """
251
- Tap on a UI element by its index.
252
-
253
- This function uses the cached clickable elements from the last get_clickables call
254
- to find the element with the given index and tap on its center coordinates.
255
-
256
- Args:
257
- index: Index of the element to tap
258
- serial: Optional device serial (for backward compatibility)
259
-
260
- Returns:
261
- Result message
262
- """
263
- global CLICKABLE_ELEMENTS_CACHE
264
-
265
- try:
266
- # Check if we have cached elements
267
- if not CLICKABLE_ELEMENTS_CACHE:
268
- return "Error: No UI elements cached. Call get_clickables first."
269
-
270
- # Find the element with the given index
271
- element = None
272
- for item in CLICKABLE_ELEMENTS_CACHE:
273
- if item.get('index') == index:
274
- element = item
275
- break
276
-
277
- if not element:
278
- # List available indices to help the user
279
- indices = sorted([item.get('index') for item in CLICKABLE_ELEMENTS_CACHE if item.get('index') is not None])
280
- indices_str = ", ".join(str(idx) for idx in indices[:20])
281
- if len(indices) > 20:
282
- indices_str += f"... and {len(indices) - 20} more"
283
-
284
- return f"Error: No element found with index {index}. Available indices: {indices_str}"
285
-
286
- # Get the bounds of the element
287
- bounds_str = element.get('bounds')
288
- if not bounds_str:
289
- element_text = element.get('text', 'No text')
290
- element_type = element.get('type', 'unknown')
291
- element_class = element.get('className', 'Unknown class')
292
-
293
- # Check if this is a child element with a parent that can be tapped instead
294
- parent_suggestion = ""
295
- if 'parentIndex' in element:
296
- parent_idx = element.get('parentIndex')
297
- parent_suggestion = f" You might want to tap its parent element with index {parent_idx} instead."
298
-
299
- return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped directly.{parent_suggestion}"
209
+
210
+ async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
211
+ """
212
+ Tap on a UI element by its index.
213
+
214
+ This function uses the cached clickable elements
215
+ to find the element with the given index and tap on its center coordinates.
216
+
217
+ Args:
218
+ index: Index of the element to tap
219
+
220
+ Returns:
221
+ Result message
222
+ """
223
+
224
+ def collect_all_indices(elements):
225
+ """Recursively collect all indices from elements and their children."""
226
+ indices = []
227
+ for item in elements:
228
+ if item.get('index') is not None:
229
+ indices.append(item.get('index'))
230
+ # Check children if present
231
+ children = item.get('children', [])
232
+ indices.extend(collect_all_indices(children))
233
+ return indices
234
+
235
+ def find_element_by_index(elements, target_index):
236
+ """Recursively find an element with the given index."""
237
+ for item in elements:
238
+ if item.get('index') == target_index:
239
+ return item
240
+ # Check children if present
241
+ children = item.get('children', [])
242
+ result = find_element_by_index(children, target_index)
243
+ if result:
244
+ return result
245
+ return None
300
246
 
301
- # Parse the bounds (format: "left,top,right,bottom")
302
247
  try:
303
- left, top, right, bottom = map(int, bounds_str.split(','))
304
- except ValueError:
305
- return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
306
-
307
- # Calculate the center of the element
308
- x = (left + right) // 2
309
- y = (top + bottom) // 2
248
+ # Check if we have cached elements
249
+ if not CLICKABLE_ELEMENTS_CACHE:
250
+ return "Error: No UI elements cached. Call get_clickables first."
251
+
252
+ # Find the element with the given index (including in children)
253
+ element = find_element_by_index(CLICKABLE_ELEMENTS_CACHE, index)
254
+
255
+ if not element:
256
+ # List available indices to help the user
257
+ indices = sorted(collect_all_indices(CLICKABLE_ELEMENTS_CACHE))
258
+ indices_str = ", ".join(str(idx) for idx in indices[:20])
259
+ if len(indices) > 20:
260
+ indices_str += f"... and {len(indices) - 20} more"
261
+
262
+ return f"Error: No element found with index {index}. Available indices: {indices_str}"
263
+
264
+ # Get the bounds of the element
265
+ bounds_str = element.get('bounds')
266
+ if not bounds_str:
267
+ element_text = element.get('text', 'No text')
268
+ element_type = element.get('type', 'unknown')
269
+ element_class = element.get('className', 'Unknown class')
270
+ return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
271
+
272
+ # Parse the bounds (format: "left,top,right,bottom")
273
+ try:
274
+ left, top, right, bottom = map(int, bounds_str.split(','))
275
+ except ValueError:
276
+ return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
277
+
278
+ # Calculate the center of the element
279
+ x = (left + right) // 2
280
+ y = (top + bottom) // 2
281
+
282
+ # Get the device and tap at the coordinates
283
+ if serial:
284
+ from droidrun.adb import DeviceManager
285
+ device_manager = DeviceManager()
286
+ device = await device_manager.get_device(serial)
287
+ if not device:
288
+ return f"Error: Device {serial} not found"
289
+ else:
290
+ device = await self.get_device()
291
+
292
+ await device.tap(x, y)
293
+
294
+ # Add a small delay to allow UI to update
295
+ await asyncio.sleep(0.5)
296
+
297
+
298
+ # Create a descriptive response
299
+ response_parts = []
300
+ response_parts.append(f"Tapped element with index {index}")
301
+ response_parts.append(f"Text: '{element.get('text', 'No text')}'")
302
+ response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
303
+ response_parts.append(f"Type: {element.get('type', 'unknown')}")
304
+
305
+ # Add information about children if present
306
+ children = element.get('children', [])
307
+ if children:
308
+ child_texts = [child.get('text') for child in children if child.get('text')]
309
+ if child_texts:
310
+ response_parts.append(f"Contains text: {' | '.join(child_texts)}")
311
+
312
+ response_parts.append(f"Coordinates: ({x}, {y})")
313
+
314
+ return " | ".join(response_parts)
315
+ except ValueError as e:
316
+ return f"Error: {str(e)}"
317
+
318
+
319
+ # Rename the old tap function to tap_by_coordinates for backward compatibility
320
+ async def tap_by_coordinates(self, x: int, y: int) -> bool:
321
+ """
322
+ Tap on the device screen at specific coordinates.
323
+
324
+ Args:
325
+ x: X coordinate
326
+ y: Y coordinate
327
+
328
+ Returns:
329
+ Bool indicating success or failure
330
+ """
331
+ try:
332
+ if self.serial:
333
+ device_manager = DeviceManager()
334
+ device = await device_manager.get_device(self.serial)
335
+ if not device:
336
+ return f"Error: Device {self.serial} not found"
337
+ else:
338
+ device = await self.get_device()
339
+
340
+ await device.tap(x, y)
341
+ print(f"Tapped at coordinates ({x}, {y})")
342
+ return True
343
+ except ValueError as e:
344
+ print(f"Error: {str(e)}")
345
+ return False
346
+
347
+ # Replace the old tap function with the new one
348
+ async def tap(self, index: int) -> str:
349
+ """
350
+ Tap on a UI element by its index.
310
351
 
311
- # Get the device and tap at the coordinates
312
- if serial:
313
- device_manager = DeviceManager()
314
- device = await device_manager.get_device(serial)
315
- if not device:
316
- return f"Error: Device {serial} not found"
317
- else:
318
- device = await get_device()
319
-
320
- await device.tap(x, y)
321
-
322
- # Gather element details for the response
323
- element_text = element.get('text', 'No text')
324
- element_class = element.get('className', 'Unknown class')
325
- element_type = element.get('type', 'unknown')
326
- is_parent = element.get('isParent', False)
327
-
328
- # Create a descriptive response
329
- response_parts = []
330
- response_parts.append(f"Tapped element with index {index}")
331
- response_parts.append(f"Text: '{element_text}'")
332
- response_parts.append(f"Class: {element_class}")
333
- response_parts.append(f"Type: {element_type}")
334
- response_parts.append(f"Role: {'parent' if is_parent else 'child'}")
335
-
336
- # If it's a parent element, include information about its text children
337
- if is_parent:
338
- # Find all child elements that are text elements
339
- text_children = []
340
- for item in CLICKABLE_ELEMENTS_CACHE:
341
- if (item.get('parentIndex') == index and
342
- item.get('type') == 'text' and
343
- item.get('text')):
344
- text_children.append(item.get('text'))
345
-
346
- if text_children:
347
- response_parts.append(f"Contains text: {' | '.join(text_children)}")
348
-
349
- # If it's a child element, include parent information
350
- if not is_parent and 'parentIndex' in element:
351
- parent_index = element.get('parentIndex')
352
- # Find the parent element
353
- parent = None
354
- for item in CLICKABLE_ELEMENTS_CACHE:
355
- if item.get('index') == parent_index:
356
- parent = item
357
- break
358
-
359
- if parent:
360
- parent_text = parent.get('text', 'No text')
361
- response_parts.append(f"Parent: {parent_index} ('{parent_text}')")
362
-
363
- # Find sibling text elements (other children of the same parent)
364
- sibling_texts = []
365
- for item in CLICKABLE_ELEMENTS_CACHE:
366
- if (item.get('parentIndex') == parent_index and
367
- item.get('index') != index and
368
- item.get('type') == 'text' and
369
- item.get('text')):
370
- sibling_texts.append(item.get('text'))
371
-
372
- if sibling_texts:
373
- response_parts.append(f"Related text: {' | '.join(sibling_texts)}")
352
+ This function uses the cached clickable elements from the last get_clickables call
353
+ to find the element with the given index and tap on its center coordinates.
374
354
 
375
- response_parts.append(f"Coordinates: ({x}, {y})")
355
+ Args:
356
+ index: Index of the element to tap
376
357
 
377
- return " | ".join(response_parts)
378
- except ValueError as e:
379
- return f"Error: {str(e)}"
358
+ Returns:
359
+ Result message
360
+ """
361
+ return await self.tap_by_index(index)
380
362
 
381
- # Rename the old tap function to tap_by_coordinates for backward compatibility
382
- async def tap_by_coordinates(x: int, y: int, serial: Optional[str] = None) -> str:
383
- """
384
- Tap on the device screen at specific coordinates.
385
-
386
- Args:
387
- x: X coordinate
388
- y: Y coordinate
389
- serial: Optional device serial (for backward compatibility)
390
- """
391
- try:
392
- if serial:
393
- device_manager = DeviceManager()
394
- device = await device_manager.get_device(serial)
395
- if not device:
396
- return f"Error: Device {serial} not found"
397
- else:
398
- device = await get_device()
363
+ async def swipe(
364
+ self,
365
+ start_x: int,
366
+ start_y: int,
367
+ end_x: int,
368
+ end_y: int,
369
+ duration_ms: int = 300
370
+ ) -> bool:
371
+ """
372
+ Performs a straight-line swipe gesture on the device screen.
373
+ To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
374
+ Args:
375
+ start_x: Starting X coordinate
376
+ start_y: Starting Y coordinate
377
+ end_x: Ending X coordinate
378
+ end_y: Ending Y coordinate
379
+ duration_ms: Duration of swipe in milliseconds
380
+ Returns:
381
+ Bool indicating success or failure
382
+ """
383
+ try:
384
+ if self.serial:
385
+ device_manager = DeviceManager()
386
+ device = await device_manager.get_device(self.serial)
387
+ if not device:
388
+ return f"Error: Device {self.serial} not found"
389
+ else:
390
+ device = await self.get_device()
391
+
392
+ await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
393
+ print(f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms")
394
+ return True
395
+ except ValueError as e:
396
+ print(f"Error: {str(e)}")
397
+ return False
398
+
399
+ async def input_text(self, text: str, serial: Optional[str] = None) -> str:
400
+ """
401
+ Input text on the device using Base64 encoding and broadcast intent.
399
402
 
400
- await device.tap(x, y)
401
- return f"Tapped at ({x}, {y})"
402
- except ValueError as e:
403
- return f"Error: {str(e)}"
403
+ Args:
404
+ text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
405
+ serial: Optional device serial (for backward compatibility)
406
+
407
+ Returns:
408
+ Result message
409
+ """
410
+ try:
411
+ if serial:
412
+ device_manager = DeviceManager()
413
+ device = await device_manager.get_device(serial)
414
+ if not device:
415
+ return f"Error: Device {serial} not found"
416
+ else:
417
+ device = await self.get_device()
418
+
419
+ # Save the current keyboard
420
+ original_ime = await device._adb.shell(device._serial, "settings get secure default_input_method")
421
+ original_ime = original_ime.strip()
422
+
423
+ # Enable the Droidrun keyboard
424
+ await device._adb.shell(device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME")
425
+
426
+ # Set the Droidrun keyboard as the default
427
+ await device._adb.shell(device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME")
428
+
429
+ # Wait for keyboard to change
430
+ await asyncio.sleep(0.2)
431
+
432
+ # Encode the text to Base64
433
+ import base64
434
+ encoded_text = base64.b64encode(text.encode()).decode()
435
+
436
+ # Send the broadcast intent with the Base64-encoded text
437
+ cmd = f'am broadcast -a DROIDRUN_INPUT_B64 --es msg "{encoded_text}"'
438
+ await device._adb.shell(device._serial, cmd)
439
+
440
+ # Wait for text input to complete
441
+ await asyncio.sleep(0.5)
442
+
443
+ # Restore the original keyboard
444
+ if original_ime and "com.droidrun.portal" not in original_ime:
445
+ await device._adb.shell(device._serial, f"ime set {original_ime}")
446
+
447
+ return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
448
+ except ValueError as e:
449
+ return f"Error: {str(e)}"
450
+ except Exception as e:
451
+ return f"Error sending text input: {str(e)}"
404
452
 
405
- # Replace the old tap function with the new one
406
- async def tap(index: int, serial: Optional[str] = None) -> str:
407
- """
408
- Tap on a UI element by its index.
409
-
410
- This function uses the cached clickable elements from the last get_clickables call
411
- to find the element with the given index and tap on its center coordinates.
412
-
413
- Args:
414
- index: Index of the element to tap
415
- serial: Optional device serial (for backward compatibility)
416
-
417
- Returns:
418
- Result message
419
- """
420
- return await tap_by_index(index, serial)
453
+ async def press_key(self, keycode: int) -> str:
454
+ """
455
+ Press a key on the device.
456
+
457
+ Common keycodes:
458
+ - 3: HOME
459
+ - 4: BACK
460
+ - 24: VOLUME UP
461
+ - 25: VOLUME DOWN
462
+ - 26: POWER
463
+ - 82: MENU
464
+
465
+ Args:
466
+ keycode: Android keycode to press
467
+ """
468
+ try:
469
+ if self.serial:
470
+ device_manager = DeviceManager()
471
+ device = await device_manager.get_device(self.serial)
472
+ if not device:
473
+ return f"Error: Device {self.serial} not found"
474
+ else:
475
+ device = await self.get_device()
476
+
477
+ key_names = {
478
+ 3: "HOME",
479
+ 4: "BACK",
480
+ 24: "VOLUME UP",
481
+ 25: "VOLUME DOWN",
482
+ 26: "POWER",
483
+ 82: "MENU",
484
+ }
485
+ key_name = key_names.get(keycode, str(keycode))
486
+
487
+ await device.press_key(keycode)
488
+ return f"Pressed key {key_name}"
489
+ except ValueError as e:
490
+ return f"Error: {str(e)}"
421
491
 
422
- async def swipe(
423
- start_x: int,
424
- start_y: int,
425
- end_x: int,
426
- end_y: int,
427
- duration_ms: int = 300,
428
- serial: Optional[str] = None
429
- ) -> str:
430
- """
431
- Perform a swipe gesture on the device screen.
432
-
433
- Args:
434
- start_x: Starting X coordinate
435
- start_y: Starting Y coordinate
436
- end_x: Ending X coordinate
437
- end_y: Ending Y coordinate
438
- duration_ms: Duration of swipe in milliseconds
439
- serial: Optional device serial (for backward compatibility)
440
- """
441
- try:
442
- if serial:
443
- device_manager = DeviceManager()
444
- device = await device_manager.get_device(serial)
445
- if not device:
446
- return f"Error: Device {serial} not found"
447
- else:
448
- device = await get_device()
449
-
450
- await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
451
- return f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y})"
452
- except ValueError as e:
453
- return f"Error: {str(e)}"
492
+ async def start_app(
493
+ self,
494
+ package: str,
495
+ activity: str = ""
496
+ ) -> str:
497
+ """
498
+ Start an app on the device.
499
+
500
+ Args:
501
+ package: Package name (e.g., "com.android.settings")
502
+ activity: Optional activity name
503
+ """
504
+ try:
505
+ if self.serial:
506
+ device_manager = DeviceManager()
507
+ device = await device_manager.get_device(self.serial)
508
+ if not device:
509
+ return f"Error: Device {self.serial} not found"
510
+ else:
511
+ device = await self.get_device()
512
+
513
+ result = await device.start_app(package, activity)
514
+ return result
515
+ except ValueError as e:
516
+ return f"Error: {str(e)}"
454
517
 
455
- async def input_text(text: str, serial: Optional[str] = None) -> str:
456
- """
457
- Input text on the device.
458
-
459
- Args:
460
- text: Text to input. Can contain spaces and special characters.
461
- serial: Optional device serial (for backward compatibility)
462
- """
463
- try:
464
- if serial:
465
- device_manager = DeviceManager()
466
- device = await device_manager.get_device(serial)
467
- if not device:
468
- return f"Error: Device {serial} not found"
469
- else:
470
- device = await get_device()
471
-
472
- # Function to escape special characters
473
- def escape_text(s: str) -> str:
474
- # Escape special characters that need shell escaping, excluding space
475
- special_chars = '[]()|&;$<>\\`"\'{}#!?^~' # Removed space from special chars
476
- escaped = ''
477
- for c in s:
478
- if c == ' ':
479
- escaped += ' ' # Just add space without escaping
480
- elif c in special_chars:
481
- escaped += '\\' + c
482
- else:
483
- escaped += c
484
- return escaped
485
-
486
- # Split text into smaller chunks (max 500 chars)
487
- chunk_size = 500
488
- chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
489
-
490
- for chunk in chunks:
491
- # Escape the text chunk
492
- escaped_chunk = escape_text(chunk)
493
-
494
- # Try different input methods if one fails
495
- methods = [
496
- f'input text "{escaped_chunk}"', # Standard method
497
- f'am broadcast -a ADB_INPUT_TEXT --es msg "{escaped_chunk}"', # Broadcast intent method
498
- f'input keyboard text "{escaped_chunk}"' # Keyboard method
499
- ]
500
-
501
- success = False
502
- last_error = None
503
-
504
- for method in methods:
505
- try:
506
- await device._adb.shell(device._serial, method)
507
- success = True
508
- break
509
- except Exception as e:
510
- last_error = str(e)
511
- continue
512
-
513
- if not success:
514
- return f"Error: Failed to input text chunk. Last error: {last_error}"
515
-
516
- # Small delay between chunks
517
- await asyncio.sleep(0.1)
518
-
519
- return f"Text input completed: {text}"
520
- except ValueError as e:
521
- return f"Error: {str(e)}"
518
+ async def install_app(
519
+ self,
520
+ apk_path: str,
521
+ reinstall: bool = False,
522
+ grant_permissions: bool = True
523
+ ) -> str:
524
+ """
525
+ Install an app on the device.
526
+
527
+ Args:
528
+ apk_path: Path to the APK file
529
+ reinstall: Whether to reinstall if app exists
530
+ grant_permissions: Whether to grant all permissions
531
+ """
532
+ try:
533
+ if self.serial:
534
+ device_manager = DeviceManager()
535
+ device = await device_manager.get_device(self.serial)
536
+ if not device:
537
+ return f"Error: Device {self.serial} not found"
538
+ else:
539
+ device = await self.get_device()
540
+
541
+ if not os.path.exists(apk_path):
542
+ return f"Error: APK file not found at {apk_path}"
543
+
544
+ result = await device.install_app(apk_path, reinstall, grant_permissions)
545
+ return result
546
+ except ValueError as e:
547
+ return f"Error: {str(e)}"
522
548
 
523
- async def press_key(keycode: int, serial: Optional[str] = None) -> str:
524
- """
525
- Press a key on the device.
526
-
527
- Common keycodes:
528
- - 3: HOME
529
- - 4: BACK
530
- - 24: VOLUME UP
531
- - 25: VOLUME DOWN
532
- - 26: POWER
533
- - 82: MENU
534
-
535
- Args:
536
- keycode: Android keycode to press
537
- serial: Optional device serial (for backward compatibility)
538
- """
539
- try:
540
- if serial:
541
- device_manager = DeviceManager()
542
- device = await device_manager.get_device(serial)
543
- if not device:
544
- return f"Error: Device {serial} not found"
545
- else:
546
- device = await get_device()
547
-
548
- key_names = {
549
- 3: "HOME",
550
- 4: "BACK",
551
- 24: "VOLUME UP",
552
- 25: "VOLUME DOWN",
553
- 26: "POWER",
554
- 82: "MENU",
555
- }
556
- key_name = key_names.get(keycode, str(keycode))
557
-
558
- await device.press_key(keycode)
559
- return f"Pressed key {key_name}"
560
- except ValueError as e:
561
- return f"Error: {str(e)}"
549
+ async def take_screenshot(self) -> bool:
550
+ """
551
+ Take a screenshot of the device.
562
552
 
563
- async def start_app(
564
- package: str,
565
- activity: str = "",
566
- serial: Optional[str] = None
567
- ) -> str:
568
- """
569
- Start an app on the device.
570
-
571
- Args:
572
- package: Package name (e.g., "com.android.settings")
573
- activity: Optional activity name
574
- serial: Optional device serial (for backward compatibility)
575
- """
576
- try:
577
- if serial:
578
- device_manager = DeviceManager()
579
- device = await device_manager.get_device(serial)
580
- if not device:
581
- return f"Error: Device {serial} not found"
582
- else:
583
- device = await get_device()
553
+ This function captures the current screen and adds the screenshot to context in the next message.
554
+
555
+ This does not save the screenshot anywhere on the phone, it just attaches it to the next message.
584
556
 
585
- result = await device.start_app(package, activity)
586
- return result
587
- except ValueError as e:
588
- return f"Error: {str(e)}"
557
+ Returns:
558
+ True if successful, False otherwise
559
+ """
560
+ try:
561
+ if self.serial:
562
+ device_manager = DeviceManager()
563
+ device = await device_manager.get_device(self.serial)
564
+ if not device:
565
+ raise ValueError(f"Device {self.serial} not found")
566
+ else:
567
+ device = await self.get_device()
568
+ screen_tuple = await device.take_screenshot()
569
+ self.last_screenshot = screen_tuple[1]
570
+ return True
571
+ except ValueError as e:
572
+ raise ValueError(f"Error taking screenshot: {str(e)}")
589
573
 
590
- async def install_app(
591
- apk_path: str,
592
- reinstall: bool = False,
593
- grant_permissions: bool = True,
594
- serial: Optional[str] = None
595
- ) -> str:
596
- """
597
- Install an app on the device.
598
-
599
- Args:
600
- apk_path: Path to the APK file
601
- reinstall: Whether to reinstall if app exists
602
- grant_permissions: Whether to grant all permissions
603
- serial: Optional device serial (for backward compatibility)
604
- """
605
- try:
606
- if serial:
607
- device_manager = DeviceManager()
608
- device = await device_manager.get_device(serial)
609
- if not device:
610
- return f"Error: Device {serial} not found"
611
- else:
612
- device = await get_device()
574
+ async def list_packages(
575
+ self,
576
+ include_system_apps: bool = False
577
+ ) -> List[str]:
578
+ """
579
+ List installed packages on the device.
580
+
581
+ Args:
582
+ include_system_apps: Whether to include system apps (default: False)
583
+
584
+ Returns:
585
+ List of package names
586
+ """
587
+ try:
588
+ if self.serial:
589
+ device_manager = DeviceManager()
590
+ device = await device_manager.get_device(self.serial)
591
+ if not device:
592
+ raise ValueError(f"Device {self.serial} not found")
593
+ else:
594
+ device = await self.get_device()
595
+
596
+ # Use the direct ADB command to get packages with paths
597
+ cmd = ["pm", "list", "packages", "-f"]
598
+ if not include_system_apps:
599
+ cmd.append("-3")
600
+
601
+ output = await device._adb.shell(device._serial, " ".join(cmd))
602
+
603
+ # Parse the package list using the function
604
+ packages = self.parse_package_list(output)
605
+ # Format package list for better readability
606
+ package_list = [pack["package"] for pack in packages]
607
+ print(f"Returning {len(package_list)} packages")
608
+ return package_list
609
+ except ValueError as e:
610
+ raise ValueError(f"Error listing packages: {str(e)}")
611
+
612
+ async def extract(self, filename: Optional[str] = None) -> str:
613
+ """Extract and save the current UI state to a JSON file.
613
614
 
614
- if not os.path.exists(apk_path):
615
- return f"Error: APK file not found at {apk_path}"
615
+ This function captures the current UI state including all UI elements
616
+ and saves it to a JSON file for later analysis or reference.
616
617
 
617
- result = await device.install_app(apk_path, reinstall, grant_permissions)
618
- return result
619
- except ValueError as e:
620
- return f"Error: {str(e)}"
618
+ Args:
619
+ filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
620
+
621
+ Returns:
622
+ Path to the saved JSON file
623
+ """
624
+ try:
625
+ # Generate default filename if not provided
626
+ if not filename:
627
+ timestamp = int(time.time())
628
+ filename = f"ui_state_{timestamp}.json"
629
+
630
+ # Ensure the filename ends with .json
631
+ if not filename.endswith(".json"):
632
+ filename += ".json"
633
+
634
+ # Get the UI elements
635
+ ui_elements = await self.get_all_elements(self.serial)
636
+
637
+ # Save to file
638
+ save_path = os.path.abspath(filename)
639
+ async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
640
+ await f.write(json.dumps(ui_elements, indent=2))
641
+
642
+ return f"UI state extracted and saved to {save_path}"
643
+
644
+ except Exception as e:
645
+ return f"Error extracting UI state: {e}"
621
646
 
622
- async def uninstall_app(
623
- package: str,
624
- keep_data: bool = False,
625
- serial: Optional[str] = None
626
- ) -> str:
627
- """
628
- Uninstall an app from the device.
629
-
630
- Args:
631
- package: Package name to uninstall
632
- keep_data: Whether to keep app data and cache
633
- serial: Optional device serial (for backward compatibility)
634
- """
635
- try:
636
- if serial:
637
- device_manager = DeviceManager()
638
- device = await device_manager.get_device(serial)
639
- if not device:
640
- return f"Error: Device {serial} not found"
641
- else:
642
- device = await get_device()
647
+ async def get_all_elements(self) -> Dict[str, Any]:
648
+ """
649
+ Get all UI elements from the device, including non-interactive elements.
643
650
 
644
- result = await device.uninstall_app(package, keep_data)
645
- return result
646
- except ValueError as e:
647
- return f"Error: {str(e)}"
648
-
649
- async def take_screenshot(serial: Optional[str] = None) -> Tuple[str, bytes]:
650
- """
651
- Take a screenshot of the device.
652
-
653
- Args:
654
- serial: Optional device serial (for backward compatibility)
655
-
656
- Returns:
657
- Tuple of (local file path, screenshot data as bytes)
658
- """
659
- try:
660
- if serial:
651
+ This function interacts with the TopViewService app installed on the device
652
+ to capture all UI elements, even those that are not interactive. This provides
653
+ a complete view of the UI hierarchy for analysis or debugging purposes.
654
+
655
+ Returns:
656
+ Dictionary containing all UI elements extracted from the device screen
657
+ """
658
+ try:
659
+ # Get the device
661
660
  device_manager = DeviceManager()
662
- device = await device_manager.get_device(serial)
661
+ device = await device_manager.get_device(self.serial)
663
662
  if not device:
664
- raise ValueError(f"Device {serial} not found")
665
- else:
666
- device = await get_device()
663
+ raise ValueError(f"Device {self.serial} not found")
664
+
665
+ # Create a temporary file for the JSON
666
+ with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
667
+ local_path = temp.name
668
+
669
+ try:
670
+ # Clear logcat to make it easier to find our output
671
+ await device._adb.shell(device._serial, "logcat -c")
672
+
673
+ # Trigger the custom service via broadcast to get ALL elements
674
+ await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS")
675
+
676
+ # Poll for the JSON file path
677
+ start_time = asyncio.get_event_loop().time()
678
+ max_wait_time = 10 # Maximum wait time in seconds
679
+ poll_interval = 0.2 # Check every 200ms
680
+
681
+ device_path = None
682
+ while asyncio.get_event_loop().time() - start_time < max_wait_time:
683
+ # Check logcat for the file path
684
+ logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
685
+
686
+ # Parse the file path if present
687
+ match = re.search(r"JSON data written to: (.*)", logcat_output)
688
+ if match:
689
+ device_path = match.group(1).strip()
690
+ break
691
+
692
+ # Wait before polling again
693
+ await asyncio.sleep(poll_interval)
694
+
695
+ # Check if we found the file path
696
+ if not device_path:
697
+ raise ValueError(f"Failed to find the JSON file path in logcat after {max_wait_time} seconds")
698
+
699
+ # Pull the JSON file from the device
700
+ await device._adb.pull_file(device._serial, device_path, local_path)
701
+
702
+ # Read the JSON file
703
+ async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
704
+ json_content = await f.read()
705
+
706
+ # Clean up the temporary file
707
+ with contextlib.suppress(OSError):
708
+ os.unlink(local_path)
709
+
710
+ # Try to parse the JSON
711
+ import json
712
+ try:
713
+ ui_data = json.loads(json_content)
714
+
715
+ return {
716
+ "all_elements": ui_data,
717
+ "count": len(ui_data) if isinstance(ui_data, list) else sum(1 for _ in ui_data.get("elements", [])),
718
+ "message": "Retrieved all UI elements from the device screen"
719
+ }
720
+ except json.JSONDecodeError:
721
+ raise ValueError("Failed to parse UI elements JSON data")
722
+
723
+ except Exception as e:
724
+ # Clean up in case of error
725
+ with contextlib.suppress(OSError):
726
+ os.unlink(local_path)
727
+ raise ValueError(f"Error retrieving all UI elements: {e}")
728
+
729
+ except Exception as e:
730
+ raise ValueError(f"Error getting all UI elements: {e}")
667
731
 
668
- return await device.take_screenshot()
669
- except ValueError as e:
670
- raise ValueError(f"Error taking screenshot: {str(e)}")
732
+ def complete(self, success: bool, reason: str = ""):
733
+ """
734
+ Mark the task as finished.
671
735
 
672
- async def list_packages(
673
- include_system_apps: bool = False,
674
- serial: Optional[str] = None
675
- ) -> Dict[str, Any]:
676
- """
677
- List installed packages on the device.
678
-
679
- Args:
680
- include_system_apps: Whether to include system apps (default: False)
681
- serial: Optional device serial (for backward compatibility)
682
-
683
- Returns:
684
- Dictionary containing:
685
- - packages: List of dictionaries with 'package' and 'path' keys
686
- - count: Number of packages found
687
- - type: Type of packages listed ("all" or "non-system")
688
- """
689
- try:
690
- if serial:
691
- device_manager = DeviceManager()
692
- device = await device_manager.get_device(serial)
693
- if not device:
694
- raise ValueError(f"Device {serial} not found")
736
+ Args:
737
+ success: Indicates if the task was successful.
738
+ reason: Reason for failure, if any. (required if success is False)
739
+ """
740
+ if success:
741
+ self.success = True
742
+ self.reason = self.reason or "Task completed successfully."
743
+ self.finished = True
695
744
  else:
696
- device = await get_device()
697
-
698
- # Use the direct ADB command to get packages with paths
699
- cmd = ["pm", "list", "packages", "-f"]
700
- if not include_system_apps:
701
- cmd.append("-3")
702
-
703
- output = await device._adb.shell(device._serial, " ".join(cmd))
704
-
705
- # Parse the package list using the function
706
- packages = parse_package_list(output)
707
- package_type = "all" if include_system_apps else "non-system"
708
-
709
- return {
710
- "packages": packages,
711
- "count": len(packages),
712
- "type": package_type,
713
- "message": f"Found {len(packages)} {package_type} packages on the device"
714
- }
715
- except ValueError as e:
716
- raise ValueError(f"Error listing packages: {str(e)}")
745
+ self.success = False
746
+ if not reason:
747
+ raise ValueError("Reason for failure is required if success is False.")
748
+ self.reason = reason
749
+ self.finished = True
717
750
 
718
- async def complete(result: str) -> str:
719
- """Complete the task with a result message.
720
-
721
- Args:
722
- result: The result message
723
-
724
- Returns:
725
- Success message
726
- """
727
- return f"Task completed: {result}"
728
751
 
729
- async def extract(filename: Optional[str] = None, serial: Optional[str] = None) -> str:
730
- """Extract and save the current UI state to a JSON file.
731
-
732
- This function captures the current UI state including all UI elements
733
- and saves it to a JSON file for later analysis or reference.
734
-
735
- Args:
736
- filename: Optional filename to save the UI state (defaults to ui_state_TIMESTAMP.json)
737
- serial: Optional device serial number
738
-
739
- Returns:
740
- Path to the saved JSON file
741
- """
742
- try:
743
- # Generate default filename if not provided
744
- if not filename:
745
- timestamp = int(time.time())
746
- filename = f"ui_state_{timestamp}.json"
747
-
748
- # Ensure the filename ends with .json
749
- if not filename.endswith(".json"):
750
- filename += ".json"
751
-
752
- # Get the UI elements
753
- ui_elements = await get_all_elements(serial)
754
-
755
- # Save to file
756
- save_path = os.path.abspath(filename)
757
- async with aiofiles.open(save_path, "w", encoding="utf-8") as f:
758
- await f.write(json.dumps(ui_elements, indent=2))
759
-
760
- return f"UI state extracted and saved to {save_path}"
761
-
762
- except Exception as e:
763
- return f"Error extracting UI state: {e}"
764
-
765
- async def get_all_elements(serial: Optional[str] = None) -> Dict[str, Any]:
766
- """
767
- Get all UI elements from the device, including non-interactive elements.
768
-
769
- This function interacts with the TopViewService app installed on the device
770
- to capture all UI elements, even those that are not interactive. This provides
771
- a complete view of the UI hierarchy for analysis or debugging purposes.
772
-
773
- Args:
774
- serial: Optional device serial number
775
-
776
- Returns:
777
- Dictionary containing all UI elements extracted from the device screen
778
- """
779
- try:
780
- # Get the device
781
- if serial:
782
- device_manager = DeviceManager()
783
- device = await device_manager.get_device(serial)
784
- if not device:
785
- raise ValueError(f"Device {serial} not found")
786
- else:
787
- device = await get_device()
752
+ async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
753
+ """
754
+ Get the current phone state including current activity and keyboard visibility.
788
755
 
789
- # Create a temporary file for the JSON
790
- with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp:
791
- local_path = temp.name
756
+ Args:
757
+ serial: Optional device serial number
792
758
 
759
+ Returns:
760
+ Dictionary with current phone state information
761
+ """
793
762
  try:
794
- # Clear logcat to make it easier to find our output
795
- await device._adb.shell(device._serial, "logcat -c")
763
+ # Get the device
764
+ if serial:
765
+ device_manager = DeviceManager()
766
+ device = await device_manager.get_device(serial)
767
+ if not device:
768
+ raise ValueError(f"Device {serial} not found")
769
+ else:
770
+ device = await self.get_device()
796
771
 
797
- # Trigger the custom service via broadcast to get ALL elements
798
- await device._adb.shell(device._serial, "am broadcast -a com.droidrun.portal.GET_ALL_ELEMENTS")
772
+ # Get the top resumed activity
773
+ activity_output = await device._adb.shell(device._serial, "dumpsys activity activities | grep topResumedActivity")
799
774
 
800
- # Poll for the JSON file path
801
- start_time = asyncio.get_event_loop().time()
802
- max_wait_time = 10 # Maximum wait time in seconds
803
- poll_interval = 0.2 # Check every 200ms
775
+ if not activity_output:
776
+ # Try alternative command for older Android versions
777
+ activity_output = await device._adb.shell(device._serial, "dumpsys activity activities | grep ResumedActivity")
804
778
 
805
- device_path = None
806
- while asyncio.get_event_loop().time() - start_time < max_wait_time:
807
- # Check logcat for the file path
808
- logcat_output = await device._adb.shell(device._serial, "logcat -d | grep \"DROIDRUN_FILE\" | grep \"JSON data written to\" | tail -1")
809
-
810
- # Parse the file path if present
811
- match = re.search(r"JSON data written to: (.*)", logcat_output)
812
- if match:
813
- device_path = match.group(1).strip()
814
- break
815
-
816
- # Wait before polling again
817
- await asyncio.sleep(poll_interval)
779
+ # Get keyboard visibility state
780
+ keyboard_output = await device._adb.shell(device._serial, "dumpsys input_method | grep mInputShown")
818
781
 
819
- # Check if we found the file path
820
- if not device_path:
821
- raise ValueError(f"Failed to find the JSON file path in logcat after {max_wait_time} seconds")
822
-
823
- # Pull the JSON file from the device
824
- await device._adb.pull_file(device._serial, device_path, local_path)
782
+ # Process activity information
783
+ current_activity = "Unable to determine current activity"
784
+ if activity_output:
785
+ current_activity = activity_output.strip()
825
786
 
826
- # Read the JSON file
827
- async with aiofiles.open(local_path, "r", encoding="utf-8") as f:
828
- json_content = await f.read()
829
-
830
- # Clean up the temporary file
831
- with contextlib.suppress(OSError):
832
- os.unlink(local_path)
787
+ # Process keyboard information
788
+ is_keyboard_shown = False
789
+ if keyboard_output:
790
+ is_keyboard_shown = "mInputShown=true" in keyboard_output
833
791
 
834
- # Try to parse the JSON
835
- import json
836
- try:
837
- ui_data = json.loads(json_content)
838
-
839
- return {
840
- "all_elements": ui_data,
841
- "count": len(ui_data) if isinstance(ui_data, list) else sum(1 for _ in ui_data.get("elements", [])),
842
- "message": "Retrieved all UI elements from the device screen"
843
- }
844
- except json.JSONDecodeError:
845
- raise ValueError("Failed to parse UI elements JSON data")
792
+ # Return combined state
793
+ return {
794
+ "current_activity": current_activity,
795
+ "keyboard_shown": is_keyboard_shown,
796
+ }
846
797
 
847
798
  except Exception as e:
848
- # Clean up in case of error
849
- with contextlib.suppress(OSError):
850
- os.unlink(local_path)
851
- raise ValueError(f"Error retrieving all UI elements: {e}")
799
+ return {
800
+ "error": str(e),
801
+ "message": f"Error getting phone state: {str(e)}"
802
+ }
803
+
804
+ async def remember(self, information: str) -> str:
805
+ """
806
+ Store important information to remember for future context.
807
+
808
+ This information will be included in future LLM prompts to help maintain context
809
+ across interactions. Use this for critical facts, observations, or user preferences
810
+ that should influence future decisions.
811
+
812
+ Args:
813
+ information: The information to remember
852
814
 
853
- except Exception as e:
854
- raise ValueError(f"Error getting all UI elements: {e}")
815
+ Returns:
816
+ Confirmation message
817
+ """
818
+ if not information or not isinstance(information, str):
819
+ return "Error: Please provide valid information to remember."
820
+
821
+ # Add the information to memory
822
+ self.memory.append(information.strip())
823
+
824
+ # Limit memory size to prevent context overflow (keep most recent items)
825
+ max_memory_items = 10
826
+ if len(self.memory) > max_memory_items:
827
+ self.memory = self.memory[-max_memory_items:]
828
+
829
+ return f"Remembered: {information}"
830
+
831
+ def get_memory(self) -> List[str]:
832
+ """
833
+ Retrieve all stored memory items.
834
+
835
+ Returns:
836
+ List of stored memory items
837
+ """
838
+ return self.memory.copy()