droidrun 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +6 -2
- droidrun/agent/codeact/codeact_agent.py +20 -14
- droidrun/agent/common/events.py +44 -1
- droidrun/agent/context/personas/__init__.py +2 -0
- droidrun/agent/context/personas/big_agent.py +96 -0
- droidrun/agent/context/personas/ui_expert.py +1 -0
- droidrun/agent/context/task_manager.py +8 -3
- droidrun/agent/droid/droid_agent.py +50 -16
- droidrun/agent/droid/events.py +1 -0
- droidrun/agent/planner/planner_agent.py +19 -14
- droidrun/agent/utils/chat_utils.py +1 -1
- droidrun/agent/utils/executer.py +17 -1
- droidrun/agent/utils/trajectory.py +258 -11
- droidrun/cli/main.py +108 -44
- droidrun/macro/__init__.py +14 -0
- droidrun/macro/__main__.py +10 -0
- droidrun/macro/cli.py +228 -0
- droidrun/macro/replay.py +309 -0
- droidrun/portal.py +37 -22
- droidrun/telemetry/events.py +1 -1
- droidrun/telemetry/tracker.py +3 -2
- droidrun/tools/adb.py +641 -185
- droidrun/tools/ios.py +163 -163
- droidrun/tools/tools.py +60 -14
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/METADATA +20 -8
- droidrun-0.3.4.dist-info/RECORD +54 -0
- droidrun/adb/__init__.py +0 -13
- droidrun/adb/device.py +0 -345
- droidrun/adb/manager.py +0 -93
- droidrun/adb/wrapper.py +0 -226
- droidrun-0.3.2.dist-info/RECORD +0 -53
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/WHEEL +0 -0
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.2.dist-info → droidrun-0.3.4.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/adb.py
CHANGED
@@ -3,31 +3,53 @@ UI Actions - Core UI interaction tools for Android device control.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import os
|
6
|
+
import io
|
6
7
|
import json
|
7
8
|
import time
|
8
|
-
import asyncio
|
9
9
|
import logging
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from droidrun.
|
10
|
+
from llama_index.core.workflow import Context
|
11
|
+
from typing import Optional, Dict, Tuple, List, Any
|
12
|
+
from droidrun.agent.common.events import (
|
13
|
+
InputTextActionEvent,
|
14
|
+
KeyPressActionEvent,
|
15
|
+
StartAppEvent,
|
16
|
+
SwipeActionEvent,
|
17
|
+
TapActionEvent,
|
18
|
+
DragActionEvent,
|
19
|
+
)
|
13
20
|
from droidrun.tools.tools import Tools
|
21
|
+
from adbutils import adb
|
22
|
+
import requests
|
23
|
+
import base64
|
14
24
|
|
15
|
-
logger = logging.getLogger("droidrun-
|
25
|
+
logger = logging.getLogger("droidrun-tools")
|
26
|
+
PORTAL_DEFAULT_TCP_PORT = 8080
|
16
27
|
|
17
28
|
|
18
29
|
class AdbTools(Tools):
|
19
30
|
"""Core UI interaction tools for Android device control."""
|
20
31
|
|
21
|
-
def __init__(
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
serial: str | None = None,
|
35
|
+
use_tcp: bool = False,
|
36
|
+
remote_tcp_port: int = PORTAL_DEFAULT_TCP_PORT,
|
37
|
+
) -> None:
|
22
38
|
"""Initialize the AdbTools instance.
|
23
39
|
|
24
40
|
Args:
|
25
41
|
serial: Device serial number
|
42
|
+
use_tcp: Whether to use TCP communication (default: False)
|
43
|
+
tcp_port: TCP port for communication (default: 8080)
|
26
44
|
"""
|
27
|
-
self.
|
45
|
+
self.device = adb.device(serial=serial)
|
46
|
+
self.use_tcp = use_tcp
|
47
|
+
self.remote_tcp_port = remote_tcp_port
|
48
|
+
self.tcp_forwarded = False
|
49
|
+
|
50
|
+
self._ctx = None
|
28
51
|
# Instance‐level cache for clickable elements (index-based tapping)
|
29
52
|
self.clickable_elements_cache: List[Dict[str, Any]] = []
|
30
|
-
self.serial = serial
|
31
53
|
self.last_screenshot = None
|
32
54
|
self.reason = None
|
33
55
|
self.success = None
|
@@ -36,47 +58,85 @@ class AdbTools(Tools):
|
|
36
58
|
self.memory: List[str] = []
|
37
59
|
# Store all screenshots with timestamps
|
38
60
|
self.screenshots: List[Dict[str, Any]] = []
|
61
|
+
# Trajectory saving level
|
62
|
+
self.save_trajectories = "none"
|
39
63
|
|
40
|
-
|
41
|
-
|
42
|
-
|
64
|
+
# Set up TCP forwarding if requested
|
65
|
+
if self.use_tcp:
|
66
|
+
self.setup_tcp_forward()
|
43
67
|
|
44
|
-
|
45
|
-
|
68
|
+
def setup_tcp_forward(self) -> bool:
|
69
|
+
"""
|
70
|
+
Set up ADB TCP port forwarding for communication with the portal app.
|
46
71
|
|
47
72
|
Returns:
|
48
|
-
|
73
|
+
bool: True if forwarding was set up successfully, False otherwise
|
49
74
|
"""
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
75
|
+
try:
|
76
|
+
logger.debug(
|
77
|
+
f"Setting up TCP port forwarding for port tcp:{self.remote_tcp_port} on device {self.device.serial}"
|
78
|
+
)
|
79
|
+
# Use adb forward command to set up port forwarding
|
80
|
+
self.local_tcp_port = self.device.forward_port(self.remote_tcp_port)
|
81
|
+
self.tcp_base_url = f"http://localhost:{self.local_tcp_port}"
|
82
|
+
logger.debug(
|
83
|
+
f"TCP port forwarding set up successfully to {self.tcp_base_url}"
|
84
|
+
)
|
56
85
|
|
57
|
-
|
86
|
+
# Test the connection with a ping
|
87
|
+
try:
|
88
|
+
response = requests.get(f"{self.tcp_base_url}/ping", timeout=5)
|
89
|
+
if response.status_code == 200:
|
90
|
+
logger.debug("TCP connection test successful")
|
91
|
+
self.tcp_forwarded = True
|
92
|
+
return True
|
93
|
+
else:
|
94
|
+
logger.warning(
|
95
|
+
f"TCP connection test failed with status: {response.status_code}"
|
96
|
+
)
|
97
|
+
return False
|
98
|
+
except requests.exceptions.RequestException as e:
|
99
|
+
logger.warning(f"TCP connection test failed: {e}")
|
100
|
+
return False
|
58
101
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
return self.serial
|
102
|
+
except Exception as e:
|
103
|
+
logger.error(f"Failed to set up TCP port forwarding: {e}")
|
104
|
+
self.tcp_forwarded = False
|
105
|
+
return False
|
64
106
|
|
65
|
-
|
66
|
-
"""
|
107
|
+
def teardown_tcp_forward(self) -> bool:
|
108
|
+
"""
|
109
|
+
Remove ADB TCP port forwarding.
|
67
110
|
|
68
111
|
Returns:
|
69
|
-
|
112
|
+
bool: True if forwarding was removed successfully, False otherwise
|
70
113
|
"""
|
71
|
-
|
72
|
-
|
73
|
-
|
114
|
+
try:
|
115
|
+
if self.tcp_forwarded:
|
116
|
+
logger.debug(
|
117
|
+
f"Removing TCP port forwarding for port {self.local_tcp_port}"
|
118
|
+
)
|
119
|
+
# remove forwarding
|
120
|
+
cmd = f"killforward:tcp:{self.local_tcp_port}"
|
121
|
+
logger.debug(f"Removing TCP port forwarding: {cmd}")
|
122
|
+
c = self.device.open_transport(cmd)
|
123
|
+
c.close()
|
124
|
+
|
125
|
+
self.tcp_forwarded = False
|
126
|
+
logger.debug(f"TCP port forwarding removed")
|
127
|
+
return True
|
128
|
+
return True
|
129
|
+
except Exception as e:
|
130
|
+
logger.error(f"Failed to remove TCP port forwarding: {e}")
|
131
|
+
return False
|
74
132
|
|
75
|
-
|
76
|
-
|
77
|
-
|
133
|
+
def __del__(self):
|
134
|
+
"""Cleanup when the object is destroyed."""
|
135
|
+
if hasattr(self, "tcp_forwarded") and self.tcp_forwarded:
|
136
|
+
self.teardown_tcp_forward()
|
78
137
|
|
79
|
-
|
138
|
+
def _set_context(self, ctx: Context):
|
139
|
+
self._ctx = ctx
|
80
140
|
|
81
141
|
def _parse_content_provider_output(
|
82
142
|
self, raw_output: str
|
@@ -125,7 +185,8 @@ class AdbTools(Tools):
|
|
125
185
|
except json.JSONDecodeError:
|
126
186
|
return None
|
127
187
|
|
128
|
-
|
188
|
+
@Tools.ui_action
|
189
|
+
def tap_by_index(self, index: int) -> str:
|
129
190
|
"""
|
130
191
|
Tap on a UI element by its index.
|
131
192
|
|
@@ -197,18 +258,32 @@ class AdbTools(Tools):
|
|
197
258
|
x = (left + right) // 2
|
198
259
|
y = (top + bottom) // 2
|
199
260
|
|
261
|
+
logger.debug(
|
262
|
+
f"Tapping element with index {index} at coordinates ({x}, {y})"
|
263
|
+
)
|
200
264
|
# Get the device and tap at the coordinates
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
265
|
+
self.device.click(x, y)
|
266
|
+
logger.debug(f"Tapped element with index {index} at coordinates ({x}, {y})")
|
267
|
+
|
268
|
+
# Emit coordinate action event for trajectory recording
|
269
|
+
|
270
|
+
if self._ctx:
|
271
|
+
element_text = element.get("text", "No text")
|
272
|
+
element_class = element.get("className", "Unknown class")
|
207
273
|
|
208
|
-
|
274
|
+
tap_event = TapActionEvent(
|
275
|
+
action_type="tap",
|
276
|
+
description=f"Tap element at index {index}: '{element_text}' ({element_class}) at coordinates ({x}, {y})",
|
277
|
+
x=x,
|
278
|
+
y=y,
|
279
|
+
element_index=index,
|
280
|
+
element_text=element_text,
|
281
|
+
element_bounds=bounds_str,
|
282
|
+
)
|
283
|
+
self._ctx.write_event_to_stream(tap_event)
|
209
284
|
|
210
285
|
# Add a small delay to allow UI to update
|
211
|
-
|
286
|
+
time.sleep(0.5)
|
212
287
|
|
213
288
|
# Create a descriptive response
|
214
289
|
response_parts = []
|
@@ -233,7 +308,7 @@ class AdbTools(Tools):
|
|
233
308
|
return f"Error: {str(e)}"
|
234
309
|
|
235
310
|
# Rename the old tap function to tap_by_coordinates for backward compatibility
|
236
|
-
|
311
|
+
def tap_by_coordinates(self, x: int, y: int) -> bool:
|
237
312
|
"""
|
238
313
|
Tap on the device screen at specific coordinates.
|
239
314
|
|
@@ -245,22 +320,16 @@ class AdbTools(Tools):
|
|
245
320
|
Bool indicating success or failure
|
246
321
|
"""
|
247
322
|
try:
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
return f"Error: Device {self.serial} not found"
|
252
|
-
else:
|
253
|
-
device = await self._get_device()
|
254
|
-
|
255
|
-
await device.tap(x, y)
|
256
|
-
print(f"Tapped at coordinates ({x}, {y})")
|
323
|
+
logger.debug(f"Tapping at coordinates ({x}, {y})")
|
324
|
+
self.device.click(x, y)
|
325
|
+
logger.debug(f"Tapped at coordinates ({x}, {y})")
|
257
326
|
return True
|
258
327
|
except ValueError as e:
|
259
|
-
|
328
|
+
logger.debug(f"Error: {str(e)}")
|
260
329
|
return False
|
261
330
|
|
262
331
|
# Replace the old tap function with the new one
|
263
|
-
|
332
|
+
def tap(self, index: int) -> str:
|
264
333
|
"""
|
265
334
|
Tap on a UI element by its index.
|
266
335
|
|
@@ -273,10 +342,16 @@ class AdbTools(Tools):
|
|
273
342
|
Returns:
|
274
343
|
Result message
|
275
344
|
"""
|
276
|
-
return
|
277
|
-
|
278
|
-
|
279
|
-
|
345
|
+
return self.tap_by_index(index)
|
346
|
+
|
347
|
+
@Tools.ui_action
|
348
|
+
def swipe(
|
349
|
+
self,
|
350
|
+
start_x: int,
|
351
|
+
start_y: int,
|
352
|
+
end_x: int,
|
353
|
+
end_y: int,
|
354
|
+
duration_ms: float = 300,
|
280
355
|
) -> bool:
|
281
356
|
"""
|
282
357
|
Performs a straight-line swipe gesture on the device screen.
|
@@ -286,29 +361,78 @@ class AdbTools(Tools):
|
|
286
361
|
start_y: Starting Y coordinate
|
287
362
|
end_x: Ending X coordinate
|
288
363
|
end_y: Ending Y coordinate
|
289
|
-
|
364
|
+
duration: Duration of swipe in seconds
|
290
365
|
Returns:
|
291
366
|
Bool indicating success or failure
|
292
367
|
"""
|
293
368
|
try:
|
294
|
-
if self.serial:
|
295
|
-
device = await self.device_manager.get_device(self.serial)
|
296
|
-
if not device:
|
297
|
-
return f"Error: Device {self.serial} not found"
|
298
|
-
else:
|
299
|
-
device = await self._get_device()
|
300
369
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
370
|
+
if self._ctx:
|
371
|
+
swipe_event = SwipeActionEvent(
|
372
|
+
action_type="swipe",
|
373
|
+
description=f"Swipe from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms} milliseconds",
|
374
|
+
start_x=start_x,
|
375
|
+
start_y=start_y,
|
376
|
+
end_x=end_x,
|
377
|
+
end_y=end_y,
|
378
|
+
duration_ms=duration_ms,
|
379
|
+
)
|
380
|
+
self._ctx.write_event_to_stream(swipe_event)
|
381
|
+
|
382
|
+
self.device.swipe(start_x, start_y, end_x, end_y, float(duration_ms / 1000))
|
383
|
+
time.sleep(duration_ms / 1000)
|
384
|
+
logger.debug(
|
385
|
+
f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms} milliseconds"
|
305
386
|
)
|
306
387
|
return True
|
307
388
|
except ValueError as e:
|
308
389
|
print(f"Error: {str(e)}")
|
309
390
|
return False
|
310
391
|
|
311
|
-
|
392
|
+
@Tools.ui_action
|
393
|
+
def drag(
|
394
|
+
self, start_x: int, start_y: int, end_x: int, end_y: int, duration: float = 3
|
395
|
+
) -> bool:
|
396
|
+
"""
|
397
|
+
Performs a straight-line drag and drop gesture on the device screen.
|
398
|
+
Args:
|
399
|
+
start_x: Starting X coordinate
|
400
|
+
start_y: Starting Y coordinate
|
401
|
+
end_x: Ending X coordinate
|
402
|
+
end_y: Ending Y coordinate
|
403
|
+
duration: Duration of swipe in seconds
|
404
|
+
Returns:
|
405
|
+
Bool indicating success or failure
|
406
|
+
"""
|
407
|
+
try:
|
408
|
+
logger.debug(
|
409
|
+
f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds"
|
410
|
+
)
|
411
|
+
self.device.drag(start_x, start_y, end_x, end_y, duration)
|
412
|
+
|
413
|
+
if self._ctx:
|
414
|
+
drag_event = DragActionEvent(
|
415
|
+
action_type="drag",
|
416
|
+
description=f"Drag from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds",
|
417
|
+
start_x=start_x,
|
418
|
+
start_y=start_y,
|
419
|
+
end_x=end_x,
|
420
|
+
end_y=end_y,
|
421
|
+
duration=duration,
|
422
|
+
)
|
423
|
+
self._ctx.write_event_to_stream(drag_event)
|
424
|
+
|
425
|
+
time.sleep(duration)
|
426
|
+
logger.debug(
|
427
|
+
f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds"
|
428
|
+
)
|
429
|
+
return True
|
430
|
+
except ValueError as e:
|
431
|
+
print(f"Error: {str(e)}")
|
432
|
+
return False
|
433
|
+
|
434
|
+
@Tools.ui_action
|
435
|
+
def input_text(self, text: str) -> str:
|
312
436
|
"""
|
313
437
|
Input text on the device.
|
314
438
|
Always make sure that the Focused Element is not None before inputting text.
|
@@ -320,72 +444,107 @@ class AdbTools(Tools):
|
|
320
444
|
Result message
|
321
445
|
"""
|
322
446
|
try:
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
device = await self._get_device()
|
447
|
+
logger.debug(f"Inputting text: {text}")
|
448
|
+
|
449
|
+
# if self.use_tcp and self.tcp_forwarded:
|
450
|
+
# # Use TCP communication
|
451
|
+
# encoded_text = base64.b64encode(text.encode()).decode()
|
329
452
|
|
453
|
+
# payload = {"base64_text": encoded_text}
|
454
|
+
# response = requests.post(
|
455
|
+
# f"{self.tcp_base_url}/keyboard/input",
|
456
|
+
# json=payload,
|
457
|
+
# headers={"Content-Type": "application/json"},
|
458
|
+
# timeout=10,
|
459
|
+
# )
|
460
|
+
|
461
|
+
# logger.debug(
|
462
|
+
# f"Keyboard input TCP response: {response.status_code}, {response.text}"
|
463
|
+
# )
|
464
|
+
|
465
|
+
# if response.status_code != 200:
|
466
|
+
# return f"Error: HTTP request failed with status {response.status_code}: {response.text}"
|
467
|
+
|
468
|
+
# else:
|
469
|
+
# Fallback to content provider method
|
330
470
|
# Save the current keyboard
|
331
|
-
original_ime =
|
332
|
-
|
471
|
+
original_ime = self.device.shell(
|
472
|
+
"settings get secure default_input_method"
|
333
473
|
)
|
334
474
|
original_ime = original_ime.strip()
|
335
475
|
|
336
476
|
# Enable the Droidrun keyboard
|
337
|
-
|
338
|
-
device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
|
339
|
-
)
|
477
|
+
self.device.shell("ime enable com.droidrun.portal/.DroidrunKeyboardIME")
|
340
478
|
|
341
479
|
# Set the Droidrun keyboard as the default
|
342
|
-
|
343
|
-
device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
|
344
|
-
)
|
480
|
+
self.device.shell("ime set com.droidrun.portal/.DroidrunKeyboardIME")
|
345
481
|
|
346
482
|
# Wait for keyboard to change
|
347
|
-
|
483
|
+
time.sleep(1)
|
348
484
|
|
349
485
|
# Encode the text to Base64
|
350
|
-
import base64
|
351
|
-
|
352
486
|
encoded_text = base64.b64encode(text.encode()).decode()
|
353
487
|
|
354
488
|
cmd = f'content insert --uri "content://com.droidrun.portal/keyboard/input" --bind base64_text:s:"{encoded_text}"'
|
355
|
-
|
489
|
+
self.device.shell(cmd)
|
356
490
|
|
357
491
|
# Wait for text input to complete
|
358
|
-
|
492
|
+
time.sleep(0.5)
|
359
493
|
|
360
494
|
# Restore the original keyboard
|
361
495
|
if original_ime and "com.droidrun.portal" not in original_ime:
|
362
|
-
|
496
|
+
self.device.shell(f"ime set {original_ime}")
|
363
497
|
|
498
|
+
logger.debug(
|
499
|
+
f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
500
|
+
)
|
364
501
|
return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
502
|
+
|
503
|
+
if self._ctx:
|
504
|
+
input_event = InputTextActionEvent(
|
505
|
+
action_type="input_text",
|
506
|
+
description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}'",
|
507
|
+
text=text,
|
508
|
+
)
|
509
|
+
self._ctx.write_event_to_stream(input_event)
|
510
|
+
|
511
|
+
logger.debug(
|
512
|
+
f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
513
|
+
)
|
514
|
+
return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
|
515
|
+
|
516
|
+
except requests.exceptions.RequestException as e:
|
517
|
+
return f"Error: TCP request failed: {str(e)}"
|
365
518
|
except ValueError as e:
|
366
519
|
return f"Error: {str(e)}"
|
367
520
|
except Exception as e:
|
368
521
|
return f"Error sending text input: {str(e)}"
|
369
522
|
|
370
|
-
|
523
|
+
@Tools.ui_action
|
524
|
+
def back(self) -> str:
|
371
525
|
"""
|
372
526
|
Go back on the current view.
|
373
527
|
This presses the Android back button.
|
374
528
|
"""
|
375
529
|
try:
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
530
|
+
logger.debug("Pressing key BACK")
|
531
|
+
self.device.keyevent(3)
|
532
|
+
|
533
|
+
if self._ctx:
|
534
|
+
key_event = KeyPressActionEvent(
|
535
|
+
action_type="key_press",
|
536
|
+
description=f"Pressed key BACK",
|
537
|
+
keycode=3,
|
538
|
+
key_name="BACK",
|
539
|
+
)
|
540
|
+
self._ctx.write_event_to_stream(key_event)
|
382
541
|
|
383
|
-
await device.press_key(3)
|
384
542
|
return f"Pressed key BACK"
|
385
543
|
except ValueError as e:
|
386
544
|
return f"Error: {str(e)}"
|
387
545
|
|
388
|
-
|
546
|
+
@Tools.ui_action
|
547
|
+
def press_key(self, keycode: int) -> str:
|
389
548
|
"""
|
390
549
|
Press a key on the Android device.
|
391
550
|
|
@@ -399,13 +558,6 @@ class AdbTools(Tools):
|
|
399
558
|
keycode: Android keycode to press
|
400
559
|
"""
|
401
560
|
try:
|
402
|
-
if self.serial:
|
403
|
-
device = await self.device_manager.get_device(self.serial)
|
404
|
-
if not device:
|
405
|
-
return f"Error: Device {self.serial} not found"
|
406
|
-
else:
|
407
|
-
device = await self._get_device()
|
408
|
-
|
409
561
|
key_names = {
|
410
562
|
66: "ENTER",
|
411
563
|
4: "BACK",
|
@@ -414,12 +566,24 @@ class AdbTools(Tools):
|
|
414
566
|
}
|
415
567
|
key_name = key_names.get(keycode, str(keycode))
|
416
568
|
|
417
|
-
|
569
|
+
if self._ctx:
|
570
|
+
key_event = KeyPressActionEvent(
|
571
|
+
action_type="key_press",
|
572
|
+
description=f"Pressed key {key_name}",
|
573
|
+
keycode=keycode,
|
574
|
+
key_name=key_name,
|
575
|
+
)
|
576
|
+
self._ctx.write_event_to_stream(key_event)
|
577
|
+
|
578
|
+
logger.debug(f"Pressing key {key_name}")
|
579
|
+
self.device.keyevent(keycode)
|
580
|
+
logger.debug(f"Pressed key {key_name}")
|
418
581
|
return f"Pressed key {key_name}"
|
419
582
|
except ValueError as e:
|
420
583
|
return f"Error: {str(e)}"
|
421
584
|
|
422
|
-
|
585
|
+
@Tools.ui_action
|
586
|
+
def start_app(self, package: str, activity: str | None = None) -> str:
|
423
587
|
"""
|
424
588
|
Start an app on the device.
|
425
589
|
|
@@ -428,19 +592,32 @@ class AdbTools(Tools):
|
|
428
592
|
activity: Optional activity name
|
429
593
|
"""
|
430
594
|
try:
|
431
|
-
if self.serial:
|
432
|
-
device = await self.device_manager.get_device(self.serial)
|
433
|
-
if not device:
|
434
|
-
return f"Error: Device {self.serial} not found"
|
435
|
-
else:
|
436
|
-
device = await self._get_device()
|
437
595
|
|
438
|
-
|
439
|
-
|
440
|
-
|
596
|
+
logger.debug(f"Starting app {package} with activity {activity}")
|
597
|
+
if not activity:
|
598
|
+
dumpsys_output = self.device.shell(
|
599
|
+
f"cmd package resolve-activity --brief {package}"
|
600
|
+
)
|
601
|
+
activity = dumpsys_output.splitlines()[1].split("/")[1]
|
602
|
+
|
603
|
+
if self._ctx:
|
604
|
+
start_app_event = StartAppEvent(
|
605
|
+
action_type="start_app",
|
606
|
+
description=f"Start app {package}",
|
607
|
+
package=package,
|
608
|
+
activity=activity,
|
609
|
+
)
|
610
|
+
self._ctx.write_event_to_stream(start_app_event)
|
611
|
+
|
612
|
+
print(f"Activity: {activity}")
|
613
|
+
|
614
|
+
self.device.app_start(package, activity)
|
615
|
+
logger.debug(f"App started: {package} with activity {activity}")
|
616
|
+
return f"App started: {package} with activity {activity}"
|
617
|
+
except Exception as e:
|
441
618
|
return f"Error: {str(e)}"
|
442
619
|
|
443
|
-
|
620
|
+
def install_app(
|
444
621
|
self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
|
445
622
|
) -> str:
|
446
623
|
"""
|
@@ -452,50 +629,59 @@ class AdbTools(Tools):
|
|
452
629
|
grant_permissions: Whether to grant all permissions
|
453
630
|
"""
|
454
631
|
try:
|
455
|
-
if self.serial:
|
456
|
-
device = await self.device_manager.get_device(self.serial)
|
457
|
-
if not device:
|
458
|
-
return f"Error: Device {self.serial} not found"
|
459
|
-
else:
|
460
|
-
device = await self._get_device()
|
461
|
-
|
462
632
|
if not os.path.exists(apk_path):
|
463
633
|
return f"Error: APK file not found at {apk_path}"
|
464
634
|
|
465
|
-
|
635
|
+
logger.debug(
|
636
|
+
f"Installing app: {apk_path} with reinstall: {reinstall} and grant_permissions: {grant_permissions}"
|
637
|
+
)
|
638
|
+
result = self.device.install(
|
639
|
+
apk_path,
|
640
|
+
nolaunch=True,
|
641
|
+
uninstall=reinstall,
|
642
|
+
flags=["-g"] if grant_permissions else [],
|
643
|
+
silent=True,
|
644
|
+
)
|
645
|
+
logger.debug(f"Installed app: {apk_path} with result: {result}")
|
466
646
|
return result
|
467
647
|
except ValueError as e:
|
468
648
|
return f"Error: {str(e)}"
|
469
649
|
|
470
|
-
|
650
|
+
def take_screenshot(self) -> Tuple[str, bytes]:
|
471
651
|
"""
|
472
652
|
Take a screenshot of the device.
|
473
653
|
This function captures the current screen and adds the screenshot to context in the next message.
|
474
654
|
Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
|
475
655
|
"""
|
476
656
|
try:
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
657
|
+
logger.debug("Taking screenshot")
|
658
|
+
|
659
|
+
# Fallback to ADB screenshot method
|
660
|
+
img = self.device.screenshot()
|
661
|
+
img_buf = io.BytesIO()
|
662
|
+
img_format = "PNG"
|
663
|
+
img.save(img_buf, format=img_format)
|
664
|
+
image_bytes = img_buf.getvalue()
|
665
|
+
logger.debug("Screenshot taken via ADB")
|
485
666
|
|
486
667
|
# Store screenshot with timestamp
|
487
668
|
self.screenshots.append(
|
488
669
|
{
|
489
670
|
"timestamp": time.time(),
|
490
|
-
"image_data":
|
491
|
-
"format":
|
671
|
+
"image_data": image_bytes,
|
672
|
+
"format": img_format,
|
492
673
|
}
|
493
674
|
)
|
494
|
-
return
|
675
|
+
return img_format, image_bytes
|
676
|
+
|
677
|
+
except requests.exceptions.RequestException as e:
|
678
|
+
raise ValueError(f"Error taking screenshot via TCP: {str(e)}")
|
495
679
|
except ValueError as e:
|
496
680
|
raise ValueError(f"Error taking screenshot: {str(e)}")
|
681
|
+
except Exception as e:
|
682
|
+
raise ValueError(f"Unexpected error taking screenshot: {str(e)}")
|
497
683
|
|
498
|
-
|
684
|
+
def list_packages(self, include_system_apps: bool = False) -> List[str]:
|
499
685
|
"""
|
500
686
|
List installed packages on the device.
|
501
687
|
|
@@ -506,17 +692,12 @@ class AdbTools(Tools):
|
|
506
692
|
List of package names
|
507
693
|
"""
|
508
694
|
try:
|
509
|
-
|
510
|
-
|
511
|
-
if not device:
|
512
|
-
raise ValueError(f"Device {self.serial} not found")
|
513
|
-
else:
|
514
|
-
device = await self._get_device()
|
515
|
-
|
516
|
-
return await device.list_packages(include_system_apps)
|
695
|
+
logger.debug("Listing packages")
|
696
|
+
return self.device.list_packages(["-3"] if not include_system_apps else [])
|
517
697
|
except ValueError as e:
|
518
698
|
raise ValueError(f"Error listing packages: {str(e)}")
|
519
699
|
|
700
|
+
@Tools.ui_action
|
520
701
|
def complete(self, success: bool, reason: str = ""):
|
521
702
|
"""
|
522
703
|
Mark the task as finished.
|
@@ -536,7 +717,7 @@ class AdbTools(Tools):
|
|
536
717
|
self.reason = reason
|
537
718
|
self.finished = True
|
538
719
|
|
539
|
-
|
720
|
+
def remember(self, information: str) -> str:
|
540
721
|
"""
|
541
722
|
Store important information to remember for future context.
|
542
723
|
|
@@ -572,7 +753,7 @@ class AdbTools(Tools):
|
|
572
753
|
"""
|
573
754
|
return self.memory.copy()
|
574
755
|
|
575
|
-
|
756
|
+
def get_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
|
576
757
|
"""
|
577
758
|
Get both the a11y tree and phone state in a single call using the combined /state endpoint.
|
578
759
|
|
@@ -584,40 +765,61 @@ class AdbTools(Tools):
|
|
584
765
|
"""
|
585
766
|
|
586
767
|
try:
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
768
|
+
logger.debug("Getting state")
|
769
|
+
|
770
|
+
if self.use_tcp and self.tcp_forwarded:
|
771
|
+
# Use TCP communication
|
772
|
+
response = requests.get(f"{self.tcp_base_url}/state", timeout=10)
|
773
|
+
|
774
|
+
if response.status_code == 200:
|
775
|
+
tcp_response = response.json()
|
776
|
+
|
777
|
+
# Check if response has the expected format
|
778
|
+
if isinstance(tcp_response, dict) and "data" in tcp_response:
|
779
|
+
data_str = tcp_response["data"]
|
780
|
+
try:
|
781
|
+
combined_data = json.loads(data_str)
|
782
|
+
except json.JSONDecodeError:
|
783
|
+
return {
|
784
|
+
"error": "Parse Error",
|
785
|
+
"message": "Failed to parse JSON data from TCP response data field",
|
786
|
+
}
|
787
|
+
else:
|
788
|
+
# Fallback: assume direct JSON format
|
789
|
+
combined_data = tcp_response
|
790
|
+
else:
|
791
|
+
return {
|
792
|
+
"error": "HTTP Error",
|
793
|
+
"message": f"HTTP request failed with status {response.status_code}",
|
794
|
+
}
|
591
795
|
else:
|
592
|
-
|
796
|
+
# Fallback to content provider method
|
797
|
+
adb_output = self.device.shell(
|
798
|
+
"content query --uri content://com.droidrun.portal/state",
|
799
|
+
)
|
593
800
|
|
594
|
-
|
595
|
-
device._serial,
|
596
|
-
"content query --uri content://com.droidrun.portal/state",
|
597
|
-
)
|
598
|
-
|
599
|
-
state_data = self._parse_content_provider_output(adb_output)
|
600
|
-
|
601
|
-
if state_data is None:
|
602
|
-
return {
|
603
|
-
"error": "Parse Error",
|
604
|
-
"message": "Failed to parse state data from ContentProvider response",
|
605
|
-
}
|
801
|
+
state_data = self._parse_content_provider_output(adb_output)
|
606
802
|
|
607
|
-
|
608
|
-
data_str = state_data["data"]
|
609
|
-
try:
|
610
|
-
combined_data = json.loads(data_str)
|
611
|
-
except json.JSONDecodeError:
|
803
|
+
if state_data is None:
|
612
804
|
return {
|
613
805
|
"error": "Parse Error",
|
614
|
-
"message": "Failed to parse
|
806
|
+
"message": "Failed to parse state data from ContentProvider response",
|
807
|
+
}
|
808
|
+
|
809
|
+
if isinstance(state_data, dict) and "data" in state_data:
|
810
|
+
data_str = state_data["data"]
|
811
|
+
try:
|
812
|
+
combined_data = json.loads(data_str)
|
813
|
+
except json.JSONDecodeError:
|
814
|
+
return {
|
815
|
+
"error": "Parse Error",
|
816
|
+
"message": "Failed to parse JSON data from ContentProvider data field",
|
817
|
+
}
|
818
|
+
else:
|
819
|
+
return {
|
820
|
+
"error": "Format Error",
|
821
|
+
"message": f"Unexpected state data format: {type(state_data)}",
|
615
822
|
}
|
616
|
-
else:
|
617
|
-
return {
|
618
|
-
"error": "Format Error",
|
619
|
-
"message": f"Unexpected state data format: {type(state_data)}",
|
620
|
-
}
|
621
823
|
|
622
824
|
# Validate that both a11y_tree and phone_state are present
|
623
825
|
if "a11y_tree" not in combined_data:
|
@@ -655,17 +857,271 @@ class AdbTools(Tools):
|
|
655
857
|
"phone_state": combined_data["phone_state"],
|
656
858
|
}
|
657
859
|
|
860
|
+
except requests.exceptions.RequestException as e:
|
861
|
+
return {
|
862
|
+
"error": "TCP Error",
|
863
|
+
"message": f"TCP request failed: {str(e)}",
|
864
|
+
}
|
658
865
|
except Exception as e:
|
659
866
|
return {
|
660
867
|
"error": str(e),
|
661
868
|
"message": f"Error getting combined state: {str(e)}",
|
662
869
|
}
|
663
870
|
|
871
|
+
def get_a11y_tree(self) -> Dict[str, Any]:
|
872
|
+
"""
|
873
|
+
Get just the accessibility tree using the /a11y_tree endpoint.
|
664
874
|
|
665
|
-
|
875
|
+
Returns:
|
876
|
+
Dictionary containing accessibility tree data
|
877
|
+
"""
|
878
|
+
try:
|
879
|
+
if self.use_tcp and self.tcp_forwarded:
|
880
|
+
response = requests.get(f"{self.tcp_base_url}/a11y_tree", timeout=10)
|
881
|
+
|
882
|
+
if response.status_code == 200:
|
883
|
+
tcp_response = response.json()
|
884
|
+
|
885
|
+
# Check if response has the expected format with data field
|
886
|
+
if isinstance(tcp_response, dict) and "data" in tcp_response:
|
887
|
+
data_str = tcp_response["data"]
|
888
|
+
try:
|
889
|
+
return json.loads(data_str)
|
890
|
+
except json.JSONDecodeError:
|
891
|
+
return {
|
892
|
+
"error": "Parse Error",
|
893
|
+
"message": "Failed to parse JSON data from TCP response data field",
|
894
|
+
}
|
895
|
+
else:
|
896
|
+
# Fallback: assume direct JSON format
|
897
|
+
return tcp_response
|
898
|
+
else:
|
899
|
+
return {
|
900
|
+
"error": "HTTP Error",
|
901
|
+
"message": f"HTTP request failed with status {response.status_code}",
|
902
|
+
}
|
903
|
+
else:
|
904
|
+
# Fallback: use get_state and extract a11y_tree
|
905
|
+
state = self.get_state()
|
906
|
+
if "error" in state:
|
907
|
+
return state
|
908
|
+
return {"a11y_tree": state.get("a11y_tree", [])}
|
666
909
|
|
667
|
-
|
668
|
-
|
669
|
-
|
910
|
+
except requests.exceptions.RequestException as e:
|
911
|
+
return {
|
912
|
+
"error": "TCP Error",
|
913
|
+
"message": f"TCP request failed: {str(e)}",
|
914
|
+
}
|
915
|
+
except Exception as e:
|
916
|
+
return {
|
917
|
+
"error": str(e),
|
918
|
+
"message": f"Error getting a11y tree: {str(e)}",
|
919
|
+
}
|
920
|
+
|
921
|
+
def get_phone_state(self) -> Dict[str, Any]:
|
922
|
+
"""
|
923
|
+
Get just the phone state using the /phone_state endpoint.
|
670
924
|
|
671
|
-
|
925
|
+
Returns:
|
926
|
+
Dictionary containing phone state data
|
927
|
+
"""
|
928
|
+
try:
|
929
|
+
if self.use_tcp and self.tcp_forwarded:
|
930
|
+
response = requests.get(f"{self.tcp_base_url}/phone_state", timeout=10)
|
931
|
+
|
932
|
+
if response.status_code == 200:
|
933
|
+
tcp_response = response.json()
|
934
|
+
|
935
|
+
# Check if response has the expected format with data field
|
936
|
+
if isinstance(tcp_response, dict) and "data" in tcp_response:
|
937
|
+
data_str = tcp_response["data"]
|
938
|
+
try:
|
939
|
+
return json.loads(data_str)
|
940
|
+
except json.JSONDecodeError:
|
941
|
+
return {
|
942
|
+
"error": "Parse Error",
|
943
|
+
"message": "Failed to parse JSON data from TCP response data field",
|
944
|
+
}
|
945
|
+
else:
|
946
|
+
# Fallback: assume direct JSON format
|
947
|
+
return tcp_response
|
948
|
+
else:
|
949
|
+
return {
|
950
|
+
"error": "HTTP Error",
|
951
|
+
"message": f"HTTP request failed with status {response.status_code}",
|
952
|
+
}
|
953
|
+
else:
|
954
|
+
# Fallback: use get_state and extract phone_state
|
955
|
+
state = self.get_state()
|
956
|
+
if "error" in state:
|
957
|
+
return state
|
958
|
+
return {"phone_state": state.get("phone_state", {})}
|
959
|
+
|
960
|
+
except requests.exceptions.RequestException as e:
|
961
|
+
return {
|
962
|
+
"error": "TCP Error",
|
963
|
+
"message": f"TCP request failed: {str(e)}",
|
964
|
+
}
|
965
|
+
except Exception as e:
|
966
|
+
return {
|
967
|
+
"error": str(e),
|
968
|
+
"message": f"Error getting phone state: {str(e)}",
|
969
|
+
}
|
970
|
+
|
971
|
+
def ping(self) -> Dict[str, Any]:
|
972
|
+
"""
|
973
|
+
Test the TCP connection using the /ping endpoint.
|
974
|
+
|
975
|
+
Returns:
|
976
|
+
Dictionary with ping result
|
977
|
+
"""
|
978
|
+
try:
|
979
|
+
if self.use_tcp and self.tcp_forwarded:
|
980
|
+
response = requests.get(f"{self.tcp_base_url}/ping", timeout=5)
|
981
|
+
|
982
|
+
if response.status_code == 200:
|
983
|
+
try:
|
984
|
+
tcp_response = response.json() if response.content else {}
|
985
|
+
logger.debug(f"Ping TCP response: {tcp_response}")
|
986
|
+
return {
|
987
|
+
"status": "success",
|
988
|
+
"message": "Ping successful",
|
989
|
+
"response": tcp_response,
|
990
|
+
}
|
991
|
+
except json.JSONDecodeError:
|
992
|
+
return {
|
993
|
+
"status": "success",
|
994
|
+
"message": "Ping successful (non-JSON response)",
|
995
|
+
"response": response.text,
|
996
|
+
}
|
997
|
+
else:
|
998
|
+
return {
|
999
|
+
"status": "error",
|
1000
|
+
"message": f"Ping failed with status {response.status_code}: {response.text}",
|
1001
|
+
}
|
1002
|
+
else:
|
1003
|
+
return {
|
1004
|
+
"status": "error",
|
1005
|
+
"message": "TCP communication is not enabled",
|
1006
|
+
}
|
1007
|
+
|
1008
|
+
except requests.exceptions.RequestException as e:
|
1009
|
+
return {
|
1010
|
+
"status": "error",
|
1011
|
+
"message": f"Ping failed: {str(e)}",
|
1012
|
+
}
|
1013
|
+
except Exception as e:
|
1014
|
+
return {
|
1015
|
+
"status": "error",
|
1016
|
+
"message": f"Error during ping: {str(e)}",
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
|
1020
|
+
def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
|
1021
|
+
"""
|
1022
|
+
Run an adb shell command using the adb CLI and measure execution time.
|
1023
|
+
Args:
|
1024
|
+
serial: Device serial number
|
1025
|
+
command: Shell command to run
|
1026
|
+
Returns:
|
1027
|
+
Tuple of (output, elapsed_time)
|
1028
|
+
"""
|
1029
|
+
import time
|
1030
|
+
import subprocess
|
1031
|
+
|
1032
|
+
adb_cmd = ["adb", "-s", serial, "shell", command]
|
1033
|
+
start = time.perf_counter()
|
1034
|
+
result = subprocess.run(adb_cmd, capture_output=True, text=True)
|
1035
|
+
elapsed = time.perf_counter() - start
|
1036
|
+
output = result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
|
1037
|
+
return output, elapsed
|
1038
|
+
|
1039
|
+
|
1040
|
+
def _shell_test():
|
1041
|
+
device = adb.device("emulator-5554")
|
1042
|
+
# Native Python adb client
|
1043
|
+
start = time.time()
|
1044
|
+
res = device.shell("echo 'Hello, World!'")
|
1045
|
+
end = time.time()
|
1046
|
+
print(f"[Native] Shell execution took {end - start:.3f} seconds: {res}")
|
1047
|
+
|
1048
|
+
start = time.time()
|
1049
|
+
res = device.shell("content query --uri content://com.droidrun.portal/state")
|
1050
|
+
end = time.time()
|
1051
|
+
print(f"[Native] Shell execution took {end - start:.3f} seconds: phone_state")
|
1052
|
+
|
1053
|
+
# CLI version
|
1054
|
+
output, elapsed = _shell_test_cli("emulator-5554", "echo 'Hello, World!'")
|
1055
|
+
print(f"[CLI] Shell execution took {elapsed:.3f} seconds: {output}")
|
1056
|
+
|
1057
|
+
output, elapsed = _shell_test_cli(
|
1058
|
+
"emulator-5554", "content query --uri content://com.droidrun.portal/state"
|
1059
|
+
)
|
1060
|
+
print(f"[CLI] Shell execution took {elapsed:.3f} seconds: phone_state")
|
1061
|
+
|
1062
|
+
|
1063
|
+
def _list_packages():
|
1064
|
+
tools = AdbTools()
|
1065
|
+
print(tools.list_packages())
|
1066
|
+
|
1067
|
+
|
1068
|
+
def _start_app():
|
1069
|
+
tools = AdbTools()
|
1070
|
+
tools.start_app("com.android.settings", ".Settings")
|
1071
|
+
|
1072
|
+
|
1073
|
+
def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
|
1074
|
+
"""
|
1075
|
+
Run an adb shell command using the adb CLI and measure execution time.
|
1076
|
+
Args:
|
1077
|
+
serial: Device serial number
|
1078
|
+
command: Shell command to run
|
1079
|
+
Returns:
|
1080
|
+
Tuple of (output, elapsed_time)
|
1081
|
+
"""
|
1082
|
+
import time
|
1083
|
+
import subprocess
|
1084
|
+
|
1085
|
+
adb_cmd = ["adb", "-s", serial, "shell", command]
|
1086
|
+
start = time.perf_counter()
|
1087
|
+
result = subprocess.run(adb_cmd, capture_output=True, text=True)
|
1088
|
+
elapsed = time.perf_counter() - start
|
1089
|
+
output = result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
|
1090
|
+
return output, elapsed
|
1091
|
+
|
1092
|
+
|
1093
|
+
def _shell_test():
|
1094
|
+
device = adb.device("emulator-5554")
|
1095
|
+
# Native Python adb client
|
1096
|
+
start = time.time()
|
1097
|
+
res = device.shell("echo 'Hello, World!'")
|
1098
|
+
end = time.time()
|
1099
|
+
print(f"[Native] Shell execution took {end - start:.3f} seconds: {res}")
|
1100
|
+
|
1101
|
+
start = time.time()
|
1102
|
+
res = device.shell("content query --uri content://com.droidrun.portal/state")
|
1103
|
+
end = time.time()
|
1104
|
+
print(f"[Native] Shell execution took {end - start:.3f} seconds: phone_state")
|
1105
|
+
|
1106
|
+
# CLI version
|
1107
|
+
output, elapsed = _shell_test_cli("emulator-5554", "echo 'Hello, World!'")
|
1108
|
+
print(f"[CLI] Shell execution took {elapsed:.3f} seconds: {output}")
|
1109
|
+
|
1110
|
+
output, elapsed = _shell_test_cli(
|
1111
|
+
"emulator-5554", "content query --uri content://com.droidrun.portal/state"
|
1112
|
+
)
|
1113
|
+
print(f"[CLI] Shell execution took {elapsed:.3f} seconds: phone_state")
|
1114
|
+
|
1115
|
+
|
1116
|
+
def _list_packages():
|
1117
|
+
tools = AdbTools()
|
1118
|
+
print(tools.list_packages())
|
1119
|
+
|
1120
|
+
|
1121
|
+
def _start_app():
|
1122
|
+
tools = AdbTools()
|
1123
|
+
tools.start_app("com.android.settings", ".Settings")
|
1124
|
+
|
1125
|
+
|
1126
|
+
if __name__ == "__main__":
|
1127
|
+
_start_app()
|