droidrun 0.3.9__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +2 -3
- droidrun/__main__.py +1 -1
- droidrun/agent/__init__.py +1 -1
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +66 -40
- droidrun/agent/codeact/events.py +6 -3
- droidrun/agent/codeact/prompts.py +2 -2
- droidrun/agent/common/events.py +4 -2
- droidrun/agent/context/__init__.py +1 -3
- droidrun/agent/context/agent_persona.py +2 -1
- droidrun/agent/context/context_injection_manager.py +6 -6
- droidrun/agent/context/episodic_memory.py +5 -3
- droidrun/agent/context/personas/__init__.py +3 -3
- droidrun/agent/context/personas/app_starter.py +3 -3
- droidrun/agent/context/personas/big_agent.py +3 -3
- droidrun/agent/context/personas/default.py +3 -3
- droidrun/agent/context/personas/ui_expert.py +5 -5
- droidrun/agent/context/task_manager.py +15 -17
- droidrun/agent/droid/__init__.py +1 -1
- droidrun/agent/droid/droid_agent.py +327 -180
- droidrun/agent/droid/events.py +91 -9
- droidrun/agent/executor/__init__.py +13 -0
- droidrun/agent/executor/events.py +24 -0
- droidrun/agent/executor/executor_agent.py +327 -0
- droidrun/agent/executor/prompts.py +136 -0
- droidrun/agent/manager/__init__.py +18 -0
- droidrun/agent/manager/events.py +20 -0
- droidrun/agent/manager/manager_agent.py +459 -0
- droidrun/agent/manager/prompts.py +223 -0
- droidrun/agent/oneflows/app_starter_workflow.py +118 -0
- droidrun/agent/oneflows/text_manipulator.py +204 -0
- droidrun/agent/planner/__init__.py +3 -3
- droidrun/agent/planner/events.py +6 -3
- droidrun/agent/planner/planner_agent.py +27 -42
- droidrun/agent/planner/prompts.py +2 -2
- droidrun/agent/usage.py +11 -11
- droidrun/agent/utils/__init__.py +11 -1
- droidrun/agent/utils/async_utils.py +2 -1
- droidrun/agent/utils/chat_utils.py +48 -60
- droidrun/agent/utils/device_state_formatter.py +177 -0
- droidrun/agent/utils/executer.py +12 -11
- droidrun/agent/utils/inference.py +114 -0
- droidrun/agent/utils/llm_picker.py +2 -0
- droidrun/agent/utils/message_utils.py +85 -0
- droidrun/agent/utils/tools.py +220 -0
- droidrun/agent/utils/trajectory.py +8 -7
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +29 -28
- droidrun/cli/main.py +279 -143
- droidrun/config_manager/__init__.py +25 -0
- droidrun/config_manager/config_manager.py +583 -0
- droidrun/macro/__init__.py +2 -2
- droidrun/macro/__main__.py +1 -1
- droidrun/macro/cli.py +36 -34
- droidrun/macro/replay.py +7 -9
- droidrun/portal.py +1 -1
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -4
- droidrun/telemetry/phoenix.py +173 -0
- droidrun/telemetry/tracker.py +7 -5
- droidrun/tools/__init__.py +1 -1
- droidrun/tools/adb.py +210 -82
- droidrun/tools/ios.py +7 -5
- droidrun/tools/tools.py +25 -8
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +6 -3
- droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
- droidrun/agent/common/default.py +0 -5
- droidrun/agent/context/reflection.py +0 -20
- droidrun/agent/oneflows/reflector.py +0 -265
- droidrun-0.3.9.dist-info/RECORD +0 -56
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/adb.py
CHANGED
@@ -2,25 +2,27 @@
|
|
2
2
|
UI Actions - Core UI interaction tools for Android device control.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import
|
5
|
+
import base64
|
6
6
|
import io
|
7
7
|
import json
|
8
|
-
import time
|
9
8
|
import logging
|
9
|
+
import os
|
10
|
+
import time
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
12
|
+
|
13
|
+
import requests
|
14
|
+
from adbutils import adb
|
10
15
|
from llama_index.core.workflow import Context
|
11
|
-
|
16
|
+
|
12
17
|
from droidrun.agent.common.events import (
|
18
|
+
DragActionEvent,
|
13
19
|
InputTextActionEvent,
|
14
20
|
KeyPressActionEvent,
|
15
21
|
StartAppEvent,
|
16
22
|
SwipeActionEvent,
|
17
23
|
TapActionEvent,
|
18
|
-
DragActionEvent,
|
19
24
|
)
|
20
25
|
from droidrun.tools.tools import Tools
|
21
|
-
from adbutils import adb
|
22
|
-
import requests
|
23
|
-
import base64
|
24
26
|
|
25
27
|
logger = logging.getLogger("droidrun-tools")
|
26
28
|
PORTAL_DEFAULT_TCP_PORT = 8080
|
@@ -34,6 +36,8 @@ class AdbTools(Tools):
|
|
34
36
|
serial: str | None = None,
|
35
37
|
use_tcp: bool = False,
|
36
38
|
remote_tcp_port: int = PORTAL_DEFAULT_TCP_PORT,
|
39
|
+
app_opener_llm = None,
|
40
|
+
text_manipulator_llm = None,
|
37
41
|
) -> None:
|
38
42
|
"""Initialize the AdbTools instance.
|
39
43
|
|
@@ -41,6 +45,8 @@ class AdbTools(Tools):
|
|
41
45
|
serial: Device serial number
|
42
46
|
use_tcp: Whether to use TCP communication (default: False)
|
43
47
|
tcp_port: TCP port for communication (default: 8080)
|
48
|
+
app_opener_llm: LLM instance for app opening workflow (optional)
|
49
|
+
text_manipulator_llm: LLM instance for text manipulation (optional)
|
44
50
|
"""
|
45
51
|
self.device = adb.device(serial=serial)
|
46
52
|
self.use_tcp = use_tcp
|
@@ -60,6 +66,10 @@ class AdbTools(Tools):
|
|
60
66
|
self.screenshots: List[Dict[str, Any]] = []
|
61
67
|
# Trajectory saving level
|
62
68
|
self.save_trajectories = "none"
|
69
|
+
|
70
|
+
# LLM instances for specialized workflows
|
71
|
+
self.app_opener_llm = app_opener_llm
|
72
|
+
self.text_manipulator_llm = text_manipulator_llm
|
63
73
|
|
64
74
|
self.setup_keyboard()
|
65
75
|
|
@@ -68,6 +78,13 @@ class AdbTools(Tools):
|
|
68
78
|
self.setup_tcp_forward()
|
69
79
|
|
70
80
|
|
81
|
+
def get_date(self) -> str:
|
82
|
+
"""
|
83
|
+
Get the current date and time on device.
|
84
|
+
"""
|
85
|
+
return self.device.shell("date").strip()
|
86
|
+
|
87
|
+
|
71
88
|
def setup_tcp_forward(self) -> bool:
|
72
89
|
"""
|
73
90
|
Set up ADB TCP port forwarding for communication with the portal app.
|
@@ -126,7 +143,7 @@ class AdbTools(Tools):
|
|
126
143
|
c.close()
|
127
144
|
|
128
145
|
self.tcp_forwarded = False
|
129
|
-
logger.debug(
|
146
|
+
logger.debug("TCP port forwarding removed")
|
130
147
|
return True
|
131
148
|
return True
|
132
149
|
except Exception as e:
|
@@ -137,7 +154,7 @@ class AdbTools(Tools):
|
|
137
154
|
"""
|
138
155
|
Set up the DroidRun keyboard as the default input method.
|
139
156
|
Simple setup that just switches to DroidRun keyboard without saving/restoring.
|
140
|
-
|
157
|
+
|
141
158
|
Returns:
|
142
159
|
bool: True if setup was successful, False otherwise
|
143
160
|
"""
|
@@ -207,20 +224,19 @@ class AdbTools(Tools):
|
|
207
224
|
return None
|
208
225
|
|
209
226
|
@Tools.ui_action
|
210
|
-
def
|
227
|
+
def _extract_element_coordinates_by_index(self, index: int) -> Tuple[int, int]:
|
211
228
|
"""
|
212
|
-
|
213
|
-
|
214
|
-
This function uses the cached clickable elements
|
215
|
-
to find the element with the given index and tap on its center coordinates.
|
229
|
+
Extract center coordinates from an element by its index.
|
216
230
|
|
217
231
|
Args:
|
218
|
-
index: Index of the element to
|
232
|
+
index: Index of the element to find and extract coordinates from
|
219
233
|
|
220
234
|
Returns:
|
221
|
-
|
222
|
-
"""
|
235
|
+
Tuple of (x, y) center coordinates
|
223
236
|
|
237
|
+
Raises:
|
238
|
+
ValueError: If element not found, bounds format is invalid, or missing bounds
|
239
|
+
"""
|
224
240
|
def collect_all_indices(elements):
|
225
241
|
"""Recursively collect all indices from elements and their children."""
|
226
242
|
indices = []
|
@@ -244,40 +260,57 @@ class AdbTools(Tools):
|
|
244
260
|
return result
|
245
261
|
return None
|
246
262
|
|
263
|
+
# Check if we have cached elements
|
264
|
+
if not self.clickable_elements_cache:
|
265
|
+
raise ValueError("No UI elements cached. Call get_state first.")
|
266
|
+
|
267
|
+
# Find the element with the given index (including in children)
|
268
|
+
element = find_element_by_index(self.clickable_elements_cache, index)
|
269
|
+
|
270
|
+
if not element:
|
271
|
+
# List available indices to help the user
|
272
|
+
indices = sorted(collect_all_indices(self.clickable_elements_cache))
|
273
|
+
indices_str = ", ".join(str(idx) for idx in indices[:20])
|
274
|
+
if len(indices) > 20:
|
275
|
+
indices_str += f"... and {len(indices) - 20} more"
|
276
|
+
raise ValueError(f"No element found with index {index}. Available indices: {indices_str}")
|
277
|
+
|
278
|
+
# Get the bounds of the element
|
279
|
+
bounds_str = element.get("bounds")
|
280
|
+
if not bounds_str:
|
281
|
+
element_text = element.get("text", "No text")
|
282
|
+
element_type = element.get("type", "unknown")
|
283
|
+
element_class = element.get("className", "Unknown class")
|
284
|
+
raise ValueError(f"Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped")
|
285
|
+
|
286
|
+
# Parse the bounds (format: "left,top,right,bottom")
|
247
287
|
try:
|
248
|
-
|
249
|
-
|
250
|
-
|
288
|
+
left, top, right, bottom = map(int, bounds_str.split(","))
|
289
|
+
except ValueError:
|
290
|
+
raise ValueError(f"Invalid bounds format for element with index {index}: {bounds_str}") from ValueError
|
251
291
|
|
252
|
-
|
253
|
-
|
292
|
+
# Calculate the center of the element
|
293
|
+
x = (left + right) // 2
|
294
|
+
y = (top + bottom) // 2
|
254
295
|
|
255
|
-
|
256
|
-
# List available indices to help the user
|
257
|
-
indices = sorted(collect_all_indices(self.clickable_elements_cache))
|
258
|
-
indices_str = ", ".join(str(idx) for idx in indices[:20])
|
259
|
-
if len(indices) > 20:
|
260
|
-
indices_str += f"... and {len(indices) - 20} more"
|
296
|
+
return x, y
|
261
297
|
|
262
|
-
|
298
|
+
def tap_by_index(self, index: int) -> str:
|
299
|
+
"""
|
300
|
+
Tap on a UI element by its index.
|
263
301
|
|
264
|
-
|
265
|
-
|
266
|
-
if not bounds_str:
|
267
|
-
element_text = element.get("text", "No text")
|
268
|
-
element_type = element.get("type", "unknown")
|
269
|
-
element_class = element.get("className", "Unknown class")
|
270
|
-
return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
|
302
|
+
This function uses the cached clickable elements
|
303
|
+
to find the element with the given index and tap on its center coordinates.
|
271
304
|
|
272
|
-
|
273
|
-
|
274
|
-
left, top, right, bottom = map(int, bounds_str.split(","))
|
275
|
-
except ValueError:
|
276
|
-
return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
|
305
|
+
Args:
|
306
|
+
index: Index of the element to tap
|
277
307
|
|
278
|
-
|
279
|
-
|
280
|
-
|
308
|
+
Returns:
|
309
|
+
Result message
|
310
|
+
"""
|
311
|
+
try:
|
312
|
+
# Extract coordinates using the helper function
|
313
|
+
x, y = self._extract_element_coordinates_by_index(index)
|
281
314
|
|
282
315
|
logger.debug(
|
283
316
|
f"Tapping element with index {index} at coordinates ({x}, {y})"
|
@@ -287,10 +320,24 @@ class AdbTools(Tools):
|
|
287
320
|
logger.debug(f"Tapped element with index {index} at coordinates ({x}, {y})")
|
288
321
|
|
289
322
|
# Emit coordinate action event for trajectory recording
|
290
|
-
|
291
323
|
if self._ctx:
|
292
|
-
|
293
|
-
|
324
|
+
# Find element again for event details
|
325
|
+
def find_element_by_index(elements, target_index):
|
326
|
+
"""Recursively find an element with the given index."""
|
327
|
+
for item in elements:
|
328
|
+
if item.get("index") == target_index:
|
329
|
+
return item
|
330
|
+
# Check children if present
|
331
|
+
children = item.get("children", [])
|
332
|
+
result = find_element_by_index(children, target_index)
|
333
|
+
if result:
|
334
|
+
return result
|
335
|
+
return None
|
336
|
+
|
337
|
+
element = find_element_by_index(self.clickable_elements_cache, index)
|
338
|
+
element_text = element.get("text", "No text") if element else "No text"
|
339
|
+
element_class = element.get("className", "Unknown class") if element else "Unknown class"
|
340
|
+
bounds_str = element.get("bounds", "") if element else ""
|
294
341
|
|
295
342
|
tap_event = TapActionEvent(
|
296
343
|
action_type="tap",
|
@@ -307,20 +354,34 @@ class AdbTools(Tools):
|
|
307
354
|
time.sleep(0.5)
|
308
355
|
|
309
356
|
# Create a descriptive response
|
357
|
+
def find_element_by_index(elements, target_index):
|
358
|
+
"""Recursively find an element with the given index."""
|
359
|
+
for item in elements:
|
360
|
+
if item.get("index") == target_index:
|
361
|
+
return item
|
362
|
+
# Check children if present
|
363
|
+
children = item.get("children", [])
|
364
|
+
result = find_element_by_index(children, target_index)
|
365
|
+
if result:
|
366
|
+
return result
|
367
|
+
return None
|
368
|
+
|
369
|
+
element = find_element_by_index(self.clickable_elements_cache, index)
|
310
370
|
response_parts = []
|
311
371
|
response_parts.append(f"Tapped element with index {index}")
|
312
|
-
response_parts.append(f"Text: '{element.get('text', 'No text')}'")
|
313
|
-
response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
|
314
|
-
response_parts.append(f"Type: {element.get('type', 'unknown')}")
|
372
|
+
response_parts.append(f"Text: '{element.get('text', 'No text') if element else 'No text'}'")
|
373
|
+
response_parts.append(f"Class: {element.get('className', 'Unknown class') if element else 'Unknown class'}")
|
374
|
+
response_parts.append(f"Type: {element.get('type', 'unknown') if element else 'unknown'}")
|
315
375
|
|
316
376
|
# Add information about children if present
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
377
|
+
if element:
|
378
|
+
children = element.get("children", [])
|
379
|
+
if children:
|
380
|
+
child_texts = [
|
381
|
+
child.get("text") for child in children if child.get("text")
|
382
|
+
]
|
383
|
+
if child_texts:
|
384
|
+
response_parts.append(f"Contains text: {' | '.join(child_texts)}")
|
324
385
|
|
325
386
|
response_parts.append(f"Coordinates: ({x}, {y})")
|
326
387
|
|
@@ -453,24 +514,31 @@ class AdbTools(Tools):
|
|
453
514
|
return False
|
454
515
|
|
455
516
|
@Tools.ui_action
|
456
|
-
def input_text(self, text: str) -> str:
|
517
|
+
def input_text(self, text: str, index: int = -1, clear: bool = False) -> str:
|
457
518
|
"""
|
458
519
|
Input text on the device.
|
459
520
|
Always make sure that the Focused Element is not None before inputting text.
|
460
521
|
|
461
522
|
Args:
|
462
523
|
text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
|
524
|
+
index: Index of the element to input text into. If -1, the focused element will be used.
|
525
|
+
clear: Whether to clear the text before inputting.
|
463
526
|
|
464
527
|
Returns:
|
465
528
|
Result message
|
466
529
|
"""
|
467
530
|
try:
|
531
|
+
if index != -1:
|
532
|
+
self.tap_by_index(index)
|
533
|
+
# Encode the text to Base64 (needed for both TCP and content provider)
|
534
|
+
encoded_text = base64.b64encode(text.encode()).decode()
|
468
535
|
|
469
536
|
if self.use_tcp and self.tcp_forwarded:
|
470
537
|
# Use TCP communication
|
471
|
-
|
472
|
-
|
473
|
-
|
538
|
+
payload = {
|
539
|
+
"base64_text": encoded_text,
|
540
|
+
"clear": clear # Include clear parameter for TCP
|
541
|
+
}
|
474
542
|
response = requests.post(
|
475
543
|
f"{self.tcp_base_url}/keyboard/input",
|
476
544
|
json=payload,
|
@@ -479,32 +547,48 @@ class AdbTools(Tools):
|
|
479
547
|
)
|
480
548
|
|
481
549
|
logger.debug(
|
482
|
-
|
550
|
+
f"Keyboard input TCP response: {response.status_code}, {response.text}"
|
483
551
|
)
|
484
552
|
|
485
553
|
if response.status_code != 200:
|
486
554
|
return f"Error: HTTP request failed with status {response.status_code}: {response.text}"
|
487
555
|
|
556
|
+
# For TCP, you might want to parse the response for success/error details
|
557
|
+
try:
|
558
|
+
result_data = response.json()
|
559
|
+
if result_data.get("status") == "success":
|
560
|
+
return f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
|
561
|
+
else:
|
562
|
+
return f"Error: {result_data.get('error', 'Unknown error')}"
|
563
|
+
except: # noqa: E722
|
564
|
+
return f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
|
565
|
+
|
488
566
|
else:
|
489
567
|
# Fallback to content provider method
|
490
|
-
#
|
491
|
-
|
568
|
+
# Build the content insert command with clear parameter
|
569
|
+
clear_str = "true" if clear else "false"
|
570
|
+
cmd = (
|
571
|
+
f'content insert --uri "content://com.droidrun.portal/keyboard/input" '
|
572
|
+
f'--bind base64_text:s:"{encoded_text}" '
|
573
|
+
f'--bind clear:b:{clear_str}'
|
574
|
+
)
|
492
575
|
|
493
|
-
|
494
|
-
self.device.shell(cmd)
|
576
|
+
# Execute the command and capture output for better error handling
|
577
|
+
result = self.device.shell(cmd)
|
578
|
+
logger.debug(f"Content provider result: {result}")
|
495
579
|
|
496
580
|
if self._ctx:
|
497
581
|
input_event = InputTextActionEvent(
|
498
582
|
action_type="input_text",
|
499
|
-
description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}'",
|
583
|
+
description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}' (clear={clear})",
|
500
584
|
text=text,
|
501
585
|
)
|
502
586
|
self._ctx.write_event_to_stream(input_event)
|
503
587
|
|
504
588
|
logger.debug(
|
505
|
-
f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
589
|
+
f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
|
506
590
|
)
|
507
|
-
return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
591
|
+
return f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
|
508
592
|
|
509
593
|
except requests.exceptions.RequestException as e:
|
510
594
|
return f"Error: TCP request failed: {str(e)}"
|
@@ -526,13 +610,13 @@ class AdbTools(Tools):
|
|
526
610
|
if self._ctx:
|
527
611
|
key_event = KeyPressActionEvent(
|
528
612
|
action_type="key_press",
|
529
|
-
description=
|
613
|
+
description="Pressed key BACK",
|
530
614
|
keycode=4,
|
531
615
|
key_name="BACK",
|
532
616
|
)
|
533
617
|
self._ctx.write_event_to_stream(key_event)
|
534
618
|
|
535
|
-
return
|
619
|
+
return "Pressed key BACK"
|
536
620
|
except ValueError as e:
|
537
621
|
return f"Error: {str(e)}"
|
538
622
|
|
@@ -645,7 +729,7 @@ class AdbTools(Tools):
|
|
645
729
|
Take a screenshot of the device.
|
646
730
|
This function captures the current screen and adds the screenshot to context in the next message.
|
647
731
|
Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
|
648
|
-
|
732
|
+
|
649
733
|
Args:
|
650
734
|
hide_overlay: Whether to hide the overlay elements during screenshot (default: True)
|
651
735
|
"""
|
@@ -659,11 +743,11 @@ class AdbTools(Tools):
|
|
659
743
|
url = f"{self.tcp_base_url}/screenshot"
|
660
744
|
if not hide_overlay:
|
661
745
|
url += "?hideOverlay=false"
|
662
|
-
|
746
|
+
|
663
747
|
response = requests.get(url, timeout=10)
|
664
748
|
if response.status_code == 200:
|
665
749
|
tcp_response = response.json()
|
666
|
-
|
750
|
+
|
667
751
|
# Check if response has the expected format with data field
|
668
752
|
if tcp_response.get("status") == "success" and "data" in tcp_response:
|
669
753
|
# Decode base64 string to bytes
|
@@ -696,11 +780,12 @@ class AdbTools(Tools):
|
|
696
780
|
return img_format, image_bytes
|
697
781
|
|
698
782
|
except requests.exceptions.RequestException as e:
|
699
|
-
raise ValueError(f"Error taking screenshot via TCP: {str(e)}")
|
783
|
+
raise ValueError(f"Error taking screenshot via TCP: {str(e)}") from e
|
700
784
|
except ValueError as e:
|
701
|
-
raise ValueError(f"Error taking screenshot: {str(e)}")
|
785
|
+
raise ValueError(f"Error taking screenshot: {str(e)}") from e
|
702
786
|
except Exception as e:
|
703
|
-
raise ValueError(f"Unexpected error taking screenshot: {str(e)}")
|
787
|
+
raise ValueError(f"Unexpected error taking screenshot: {str(e)}") from e
|
788
|
+
|
704
789
|
|
705
790
|
def list_packages(self, include_system_apps: bool = False) -> List[str]:
|
706
791
|
"""
|
@@ -716,7 +801,50 @@ class AdbTools(Tools):
|
|
716
801
|
logger.debug("Listing packages")
|
717
802
|
return self.device.list_packages(["-3"] if not include_system_apps else [])
|
718
803
|
except ValueError as e:
|
719
|
-
raise ValueError(f"Error listing packages: {str(e)}")
|
804
|
+
raise ValueError(f"Error listing packages: {str(e)}") from e
|
805
|
+
|
806
|
+
def get_apps(self, include_system: bool = True) -> List[Dict[str, str]]:
|
807
|
+
"""
|
808
|
+
Get installed apps with package name and label in human readable format.
|
809
|
+
|
810
|
+
Args:
|
811
|
+
include_system: Whether to include system apps (default: True)
|
812
|
+
|
813
|
+
Returns:
|
814
|
+
List of dictionaries containing 'package' and 'label' keys
|
815
|
+
"""
|
816
|
+
try:
|
817
|
+
logger.debug("Getting apps via content provider")
|
818
|
+
|
819
|
+
# Query the content provider for packages
|
820
|
+
adb_output = self.device.shell(
|
821
|
+
"content query --uri content://com.droidrun.portal/packages"
|
822
|
+
)
|
823
|
+
|
824
|
+
# Parse the content provider output
|
825
|
+
packages_data = self._parse_content_provider_output(adb_output)
|
826
|
+
|
827
|
+
if not packages_data or "packages" not in packages_data:
|
828
|
+
logger.warning("No packages data found in content provider response")
|
829
|
+
return []
|
830
|
+
|
831
|
+
apps = []
|
832
|
+
for package_info in packages_data["packages"]:
|
833
|
+
# Filter system apps if requested
|
834
|
+
if not include_system and package_info.get("isSystemApp", False):
|
835
|
+
continue
|
836
|
+
|
837
|
+
apps.append({
|
838
|
+
"package": package_info.get("packageName", ""),
|
839
|
+
"label": package_info.get("label", "")
|
840
|
+
})
|
841
|
+
|
842
|
+
logger.debug(f"Found {len(apps)} apps")
|
843
|
+
return apps
|
844
|
+
|
845
|
+
except Exception as e:
|
846
|
+
logger.error(f"Error getting apps: {str(e)}")
|
847
|
+
raise ValueError(f"Error getting apps: {str(e)}") from e
|
720
848
|
|
721
849
|
@Tools.ui_action
|
722
850
|
def complete(self, success: bool, reason: str = ""):
|
@@ -831,7 +959,7 @@ class AdbTools(Tools):
|
|
831
959
|
data_str = None
|
832
960
|
if "data" in state_data:
|
833
961
|
data_str = state_data["data"]
|
834
|
-
|
962
|
+
|
835
963
|
if data_str:
|
836
964
|
try:
|
837
965
|
combined_data = json.loads(data_str)
|
@@ -842,7 +970,7 @@ class AdbTools(Tools):
|
|
842
970
|
}
|
843
971
|
else:
|
844
972
|
return {
|
845
|
-
"error": "Format Error",
|
973
|
+
"error": "Format Error",
|
846
974
|
"message": "Neither 'data' nor 'message' field found in ContentProvider response",
|
847
975
|
}
|
848
976
|
else:
|
@@ -956,8 +1084,8 @@ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
|
|
956
1084
|
Returns:
|
957
1085
|
Tuple of (output, elapsed_time)
|
958
1086
|
"""
|
959
|
-
import time
|
960
1087
|
import subprocess
|
1088
|
+
import time
|
961
1089
|
|
962
1090
|
adb_cmd = ["adb", "-s", serial, "shell", command]
|
963
1091
|
start = time.perf_counter()
|
@@ -1009,8 +1137,8 @@ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
|
|
1009
1137
|
Returns:
|
1010
1138
|
Tuple of (output, elapsed_time)
|
1011
1139
|
"""
|
1012
|
-
import time
|
1013
1140
|
import subprocess
|
1141
|
+
import time
|
1014
1142
|
|
1015
1143
|
adb_cmd = ["adb", "-s", serial, "shell", command]
|
1016
1144
|
start = time.perf_counter()
|
droidrun/tools/ios.py
CHANGED
@@ -2,11 +2,13 @@
|
|
2
2
|
UI Actions - Core UI interaction tools for iOS device control.
|
3
3
|
"""
|
4
4
|
|
5
|
+
import logging
|
5
6
|
import re
|
6
7
|
import time
|
7
|
-
from typing import
|
8
|
-
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
9
|
+
|
9
10
|
import requests
|
11
|
+
|
10
12
|
from droidrun.tools.tools import Tools
|
11
13
|
|
12
14
|
logger = logging.getLogger("IOS")
|
@@ -37,7 +39,7 @@ SYSTEM_BUNDLE_IDENTIFIERS = [
|
|
37
39
|
class IOSTools(Tools):
|
38
40
|
"""Core UI interaction tools for iOS device control."""
|
39
41
|
|
40
|
-
def __init__(self, url: str, bundle_identifiers: List[str] = []) -> None:
|
42
|
+
def __init__(self, url: str, bundle_identifiers: List[str] = []) -> None: # noqa: B006
|
41
43
|
"""Initialize the IOSTools instance.
|
42
44
|
|
43
45
|
Args:
|
@@ -387,7 +389,7 @@ class IOSTools(Tools):
|
|
387
389
|
Bool indicating success or failure
|
388
390
|
"""
|
389
391
|
# TODO: implement this
|
390
|
-
logger.info(
|
392
|
+
logger.info("Drag action FAILED! Not implemented for iOS")
|
391
393
|
return False
|
392
394
|
|
393
395
|
def input_text(self, text: str) -> str:
|
@@ -506,7 +508,7 @@ class IOSTools(Tools):
|
|
506
508
|
|
507
509
|
except Exception as e:
|
508
510
|
logger.error(f"Error capturing screenshot: {e}")
|
509
|
-
raise ValueError(f"Error taking screenshot: {str(e)}")
|
511
|
+
raise ValueError(f"Error taking screenshot: {str(e)}") from e
|
510
512
|
|
511
513
|
def _get_phone_state(self) -> Dict[str, Any]:
|
512
514
|
"""
|
droidrun/tools/tools.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import List, Optional, Dict, Any
|
3
1
|
import logging
|
4
|
-
from typing import Tuple, Dict, Callable, Any, Optional
|
5
|
-
from functools import wraps
|
6
2
|
import sys
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from functools import wraps
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
7
6
|
|
8
7
|
# Get a logger for this module
|
9
8
|
logger = logging.getLogger(__name__)
|
@@ -24,15 +23,15 @@ class Tools(ABC):
|
|
24
23
|
def wrapper(*args, **kwargs):
|
25
24
|
self = args[0]
|
26
25
|
result = func(*args, **kwargs)
|
27
|
-
|
26
|
+
|
28
27
|
# Check if save_trajectories attribute exists and is set to "action"
|
29
28
|
if hasattr(self, 'save_trajectories') and self.save_trajectories == "action":
|
30
29
|
frame = sys._getframe(1)
|
31
30
|
caller_globals = frame.f_globals
|
32
|
-
|
31
|
+
|
33
32
|
step_screenshots = caller_globals.get('step_screenshots')
|
34
33
|
step_ui_states = caller_globals.get('step_ui_states')
|
35
|
-
|
34
|
+
|
36
35
|
if step_screenshots is not None:
|
37
36
|
step_screenshots.append(self.take_screenshot()[1])
|
38
37
|
if step_ui_states is not None:
|
@@ -46,6 +45,12 @@ class Tools(ABC):
|
|
46
45
|
Get the current state of the tool.
|
47
46
|
"""
|
48
47
|
pass
|
48
|
+
@abstractmethod
|
49
|
+
def get_date(self) -> str:
|
50
|
+
"""
|
51
|
+
Get the current date on device.
|
52
|
+
"""
|
53
|
+
pass
|
49
54
|
|
50
55
|
@abstractmethod
|
51
56
|
def tap_by_index(self, index: int) -> str:
|
@@ -77,7 +82,7 @@ class Tools(ABC):
|
|
77
82
|
pass
|
78
83
|
|
79
84
|
@abstractmethod
|
80
|
-
def input_text(self, text: str) -> str:
|
85
|
+
def input_text(self, text: str, index: int = -1, clear: bool = False) -> str:
|
81
86
|
"""
|
82
87
|
Input the given text into a focused input field.
|
83
88
|
"""
|
@@ -117,6 +122,12 @@ class Tools(ABC):
|
|
117
122
|
List all packages on the device.
|
118
123
|
"""
|
119
124
|
pass
|
125
|
+
@abstractmethod
|
126
|
+
def get_apps(self, include_system_apps: bool = True) -> List[Dict[str, Any]]:
|
127
|
+
"""
|
128
|
+
List all apps on the device.
|
129
|
+
"""
|
130
|
+
pass
|
120
131
|
|
121
132
|
@abstractmethod
|
122
133
|
def remember(self, information: str) -> str:
|
@@ -138,6 +149,12 @@ class Tools(ABC):
|
|
138
149
|
Complete the tool. This is used to indicate that the tool has completed its task.
|
139
150
|
"""
|
140
151
|
pass
|
152
|
+
@abstractmethod
|
153
|
+
def _extract_element_coordinates_by_index(self, index: int) -> Tuple[int, int]:
|
154
|
+
"""
|
155
|
+
Extract the coordinates of the element with the given index.
|
156
|
+
"""
|
157
|
+
pass
|
141
158
|
|
142
159
|
|
143
160
|
def describe_tools(tools: Tools, exclude_tools: Optional[List[str]] = None) -> Dict[str, Callable[..., Any]]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: droidrun
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.10.dev2
|
4
4
|
Summary: A framework for controlling Android devices through LLM agents
|
5
5
|
Project-URL: Homepage, https://github.com/droidrun/droidrun
|
6
6
|
Project-URL: Bug Tracker, https://github.com/droidrun/droidrun/issues
|
@@ -27,10 +27,9 @@ Classifier: Topic :: Software Development :: Testing
|
|
27
27
|
Classifier: Topic :: Software Development :: Testing :: Acceptance
|
28
28
|
Classifier: Topic :: System :: Emulators
|
29
29
|
Classifier: Topic :: Utilities
|
30
|
-
Requires-Python: >=3.
|
30
|
+
Requires-Python: >=3.13
|
31
31
|
Requires-Dist: adbutils>=2.10.2
|
32
32
|
Requires-Dist: apkutils==2.0.0
|
33
|
-
Requires-Dist: llama-index-llms-google-genai>=0.6.2
|
34
33
|
Requires-Dist: llama-index==0.14.4
|
35
34
|
Requires-Dist: posthog>=6.7.6
|
36
35
|
Requires-Dist: pydantic>=2.11.10
|
@@ -54,6 +53,10 @@ Provides-Extra: openai
|
|
54
53
|
Requires-Dist: llama-index-llms-openai-like>=0.5.1; extra == 'openai'
|
55
54
|
Requires-Dist: llama-index-llms-openai>=0.5.6; extra == 'openai'
|
56
55
|
Requires-Dist: openai>=1.99.1; extra == 'openai'
|
56
|
+
Provides-Extra: openrouter
|
57
|
+
Requires-Dist: llama-index-llms-openrouter>=0.4.2; extra == 'openrouter'
|
58
|
+
Provides-Extra: phoenix
|
59
|
+
Requires-Dist: arize-phoenix>=12.3.0; extra == 'phoenix'
|
57
60
|
Description-Content-Type: text/markdown
|
58
61
|
|
59
62
|
<picture>
|