droidrun 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
droidrun/tools/adb.py CHANGED
@@ -3,31 +3,53 @@ UI Actions - Core UI interaction tools for Android device control.
3
3
  """
4
4
 
5
5
  import os
6
+ import io
6
7
  import json
7
8
  import time
8
- import asyncio
9
9
  import logging
10
- from typing import Optional, Dict, Tuple, List, Any, Type, Self
11
- from droidrun.adb.device import Device
12
- from droidrun.adb.manager import DeviceManager
10
+ from llama_index.core.workflow import Context
11
+ from typing import Optional, Dict, Tuple, List, Any
12
+ from droidrun.agent.common.events import (
13
+ InputTextActionEvent,
14
+ KeyPressActionEvent,
15
+ StartAppEvent,
16
+ SwipeActionEvent,
17
+ TapActionEvent,
18
+ DragActionEvent,
19
+ )
13
20
  from droidrun.tools.tools import Tools
21
+ from adbutils import adb
22
+ import requests
23
+ import base64
14
24
 
15
- logger = logging.getLogger("droidrun-adb-tools")
25
+ logger = logging.getLogger("droidrun-tools")
26
+ PORTAL_DEFAULT_TCP_PORT = 8080
16
27
 
17
28
 
18
29
  class AdbTools(Tools):
19
30
  """Core UI interaction tools for Android device control."""
20
31
 
21
- def __init__(self, serial: str) -> None:
32
+ def __init__(
33
+ self,
34
+ serial: str | None = None,
35
+ use_tcp: bool = False,
36
+ remote_tcp_port: int = PORTAL_DEFAULT_TCP_PORT,
37
+ ) -> None:
22
38
  """Initialize the AdbTools instance.
23
39
 
24
40
  Args:
25
41
  serial: Device serial number
42
+ use_tcp: Whether to use TCP communication (default: False)
43
+ tcp_port: TCP port for communication (default: 8080)
26
44
  """
27
- self.device_manager = DeviceManager()
45
+ self.device = adb.device(serial=serial)
46
+ self.use_tcp = use_tcp
47
+ self.remote_tcp_port = remote_tcp_port
48
+ self.tcp_forwarded = False
49
+
50
+ self._ctx = None
28
51
  # Instance‐level cache for clickable elements (index-based tapping)
29
52
  self.clickable_elements_cache: List[Dict[str, Any]] = []
30
- self.serial = serial
31
53
  self.last_screenshot = None
32
54
  self.reason = None
33
55
  self.success = None
@@ -36,47 +58,85 @@ class AdbTools(Tools):
36
58
  self.memory: List[str] = []
37
59
  # Store all screenshots with timestamps
38
60
  self.screenshots: List[Dict[str, Any]] = []
61
+ # Trajectory saving level
62
+ self.save_trajectories = "none"
39
63
 
40
- @classmethod
41
- async def create(cls: Type[Self], serial: str = None) -> Self:
42
- """Create an AdbTools instance.
64
+ # Set up TCP forwarding if requested
65
+ if self.use_tcp:
66
+ self.setup_tcp_forward()
43
67
 
44
- Args:
45
- serial: Optional device serial number. If not provided, the first device found will be used.
68
+ def setup_tcp_forward(self) -> bool:
69
+ """
70
+ Set up ADB TCP port forwarding for communication with the portal app.
46
71
 
47
72
  Returns:
48
- AdbTools instance
73
+ bool: True if forwarding was set up successfully, False otherwise
49
74
  """
50
- if not serial:
51
- dvm = DeviceManager()
52
- devices = await dvm.list_devices()
53
- if not devices or len(devices) < 1:
54
- raise ValueError("No devices found")
55
- serial = devices[0].serial
75
+ try:
76
+ logger.debug(
77
+ f"Setting up TCP port forwarding for port tcp:{self.remote_tcp_port} on device {self.device.serial}"
78
+ )
79
+ # Use adb forward command to set up port forwarding
80
+ self.local_tcp_port = self.device.forward_port(self.remote_tcp_port)
81
+ self.tcp_base_url = f"http://localhost:{self.local_tcp_port}"
82
+ logger.debug(
83
+ f"TCP port forwarding set up successfully to {self.tcp_base_url}"
84
+ )
56
85
 
57
- return AdbTools(serial)
86
+ # Test the connection with a ping
87
+ try:
88
+ response = requests.get(f"{self.tcp_base_url}/ping", timeout=5)
89
+ if response.status_code == 200:
90
+ logger.debug("TCP connection test successful")
91
+ self.tcp_forwarded = True
92
+ return True
93
+ else:
94
+ logger.warning(
95
+ f"TCP connection test failed with status: {response.status_code}"
96
+ )
97
+ return False
98
+ except requests.exceptions.RequestException as e:
99
+ logger.warning(f"TCP connection test failed: {e}")
100
+ return False
58
101
 
59
- def _get_device_serial(self) -> str:
60
- """Get the device serial from the instance or environment variable."""
61
- # First try using the instance's serial
62
- if self.serial:
63
- return self.serial
102
+ except Exception as e:
103
+ logger.error(f"Failed to set up TCP port forwarding: {e}")
104
+ self.tcp_forwarded = False
105
+ return False
64
106
 
65
- async def _get_device(self) -> Optional[Device]:
66
- """Get the device instance using the instance's serial or from environment variable.
107
+ def teardown_tcp_forward(self) -> bool:
108
+ """
109
+ Remove ADB TCP port forwarding.
67
110
 
68
111
  Returns:
69
- Device instance or None if not found
112
+ bool: True if forwarding was removed successfully, False otherwise
70
113
  """
71
- serial = self._get_device_serial()
72
- if not serial:
73
- raise ValueError("No device serial specified - set device_serial parameter")
114
+ try:
115
+ if self.tcp_forwarded:
116
+ logger.debug(
117
+ f"Removing TCP port forwarding for port {self.local_tcp_port}"
118
+ )
119
+ # remove forwarding
120
+ cmd = f"killforward:tcp:{self.local_tcp_port}"
121
+ logger.debug(f"Removing TCP port forwarding: {cmd}")
122
+ c = self.device.open_transport(cmd)
123
+ c.close()
124
+
125
+ self.tcp_forwarded = False
126
+ logger.debug(f"TCP port forwarding removed")
127
+ return True
128
+ return True
129
+ except Exception as e:
130
+ logger.error(f"Failed to remove TCP port forwarding: {e}")
131
+ return False
74
132
 
75
- device = await self.device_manager.get_device(serial)
76
- if not device:
77
- raise ValueError(f"Device {serial} not found")
133
+ def __del__(self):
134
+ """Cleanup when the object is destroyed."""
135
+ if hasattr(self, "tcp_forwarded") and self.tcp_forwarded:
136
+ self.teardown_tcp_forward()
78
137
 
79
- return device
138
+ def _set_context(self, ctx: Context):
139
+ self._ctx = ctx
80
140
 
81
141
  def _parse_content_provider_output(
82
142
  self, raw_output: str
@@ -125,7 +185,8 @@ class AdbTools(Tools):
125
185
  except json.JSONDecodeError:
126
186
  return None
127
187
 
128
- async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
188
+ @Tools.ui_action
189
+ def tap_by_index(self, index: int) -> str:
129
190
  """
130
191
  Tap on a UI element by its index.
131
192
 
@@ -197,18 +258,32 @@ class AdbTools(Tools):
197
258
  x = (left + right) // 2
198
259
  y = (top + bottom) // 2
199
260
 
261
+ logger.debug(
262
+ f"Tapping element with index {index} at coordinates ({x}, {y})"
263
+ )
200
264
  # Get the device and tap at the coordinates
201
- if serial:
202
- device = await self.device_manager.get_device(serial)
203
- if not device:
204
- return f"Error: Device {serial} not found"
205
- else:
206
- device = await self._get_device()
265
+ self.device.click(x, y)
266
+ logger.debug(f"Tapped element with index {index} at coordinates ({x}, {y})")
267
+
268
+ # Emit coordinate action event for trajectory recording
269
+
270
+ if self._ctx:
271
+ element_text = element.get("text", "No text")
272
+ element_class = element.get("className", "Unknown class")
207
273
 
208
- await device.tap(x, y)
274
+ tap_event = TapActionEvent(
275
+ action_type="tap",
276
+ description=f"Tap element at index {index}: '{element_text}' ({element_class}) at coordinates ({x}, {y})",
277
+ x=x,
278
+ y=y,
279
+ element_index=index,
280
+ element_text=element_text,
281
+ element_bounds=bounds_str,
282
+ )
283
+ self._ctx.write_event_to_stream(tap_event)
209
284
 
210
285
  # Add a small delay to allow UI to update
211
- await asyncio.sleep(0.5)
286
+ time.sleep(0.5)
212
287
 
213
288
  # Create a descriptive response
214
289
  response_parts = []
@@ -233,7 +308,7 @@ class AdbTools(Tools):
233
308
  return f"Error: {str(e)}"
234
309
 
235
310
  # Rename the old tap function to tap_by_coordinates for backward compatibility
236
- async def tap_by_coordinates(self, x: int, y: int) -> bool:
311
+ def tap_by_coordinates(self, x: int, y: int) -> bool:
237
312
  """
238
313
  Tap on the device screen at specific coordinates.
239
314
 
@@ -245,22 +320,16 @@ class AdbTools(Tools):
245
320
  Bool indicating success or failure
246
321
  """
247
322
  try:
248
- if self.serial:
249
- device = await self.device_manager.get_device(self.serial)
250
- if not device:
251
- return f"Error: Device {self.serial} not found"
252
- else:
253
- device = await self._get_device()
254
-
255
- await device.tap(x, y)
256
- print(f"Tapped at coordinates ({x}, {y})")
323
+ logger.debug(f"Tapping at coordinates ({x}, {y})")
324
+ self.device.click(x, y)
325
+ logger.debug(f"Tapped at coordinates ({x}, {y})")
257
326
  return True
258
327
  except ValueError as e:
259
- print(f"Error: {str(e)}")
328
+ logger.debug(f"Error: {str(e)}")
260
329
  return False
261
330
 
262
331
  # Replace the old tap function with the new one
263
- async def tap(self, index: int) -> str:
332
+ def tap(self, index: int) -> str:
264
333
  """
265
334
  Tap on a UI element by its index.
266
335
 
@@ -273,10 +342,16 @@ class AdbTools(Tools):
273
342
  Returns:
274
343
  Result message
275
344
  """
276
- return await self.tap_by_index(index)
277
-
278
- async def swipe(
279
- self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
345
+ return self.tap_by_index(index)
346
+
347
+ @Tools.ui_action
348
+ def swipe(
349
+ self,
350
+ start_x: int,
351
+ start_y: int,
352
+ end_x: int,
353
+ end_y: int,
354
+ duration_ms: float = 300,
280
355
  ) -> bool:
281
356
  """
282
357
  Performs a straight-line swipe gesture on the device screen.
@@ -286,29 +361,78 @@ class AdbTools(Tools):
286
361
  start_y: Starting Y coordinate
287
362
  end_x: Ending X coordinate
288
363
  end_y: Ending Y coordinate
289
- duration_ms: Duration of swipe in milliseconds
364
+ duration: Duration of swipe in seconds
290
365
  Returns:
291
366
  Bool indicating success or failure
292
367
  """
293
368
  try:
294
- if self.serial:
295
- device = await self.device_manager.get_device(self.serial)
296
- if not device:
297
- return f"Error: Device {self.serial} not found"
298
- else:
299
- device = await self._get_device()
300
369
 
301
- await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
302
- await asyncio.sleep(1)
303
- print(
304
- f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms"
370
+ if self._ctx:
371
+ swipe_event = SwipeActionEvent(
372
+ action_type="swipe",
373
+ description=f"Swipe from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms} milliseconds",
374
+ start_x=start_x,
375
+ start_y=start_y,
376
+ end_x=end_x,
377
+ end_y=end_y,
378
+ duration_ms=duration_ms,
379
+ )
380
+ self._ctx.write_event_to_stream(swipe_event)
381
+
382
+ self.device.swipe(start_x, start_y, end_x, end_y, float(duration_ms / 1000))
383
+ time.sleep(duration_ms / 1000)
384
+ logger.debug(
385
+ f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms} milliseconds"
305
386
  )
306
387
  return True
307
388
  except ValueError as e:
308
389
  print(f"Error: {str(e)}")
309
390
  return False
310
391
 
311
- async def input_text(self, text: str, serial: Optional[str] = None) -> str:
392
+ @Tools.ui_action
393
+ def drag(
394
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration: float = 3
395
+ ) -> bool:
396
+ """
397
+ Performs a straight-line drag and drop gesture on the device screen.
398
+ Args:
399
+ start_x: Starting X coordinate
400
+ start_y: Starting Y coordinate
401
+ end_x: Ending X coordinate
402
+ end_y: Ending Y coordinate
403
+ duration: Duration of swipe in seconds
404
+ Returns:
405
+ Bool indicating success or failure
406
+ """
407
+ try:
408
+ logger.debug(
409
+ f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds"
410
+ )
411
+ self.device.drag(start_x, start_y, end_x, end_y, duration)
412
+
413
+ if self._ctx:
414
+ drag_event = DragActionEvent(
415
+ action_type="drag",
416
+ description=f"Drag from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds",
417
+ start_x=start_x,
418
+ start_y=start_y,
419
+ end_x=end_x,
420
+ end_y=end_y,
421
+ duration=duration,
422
+ )
423
+ self._ctx.write_event_to_stream(drag_event)
424
+
425
+ time.sleep(duration)
426
+ logger.debug(
427
+ f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds"
428
+ )
429
+ return True
430
+ except ValueError as e:
431
+ print(f"Error: {str(e)}")
432
+ return False
433
+
434
+ @Tools.ui_action
435
+ def input_text(self, text: str) -> str:
312
436
  """
313
437
  Input text on the device.
314
438
  Always make sure that the Focused Element is not None before inputting text.
@@ -320,72 +444,107 @@ class AdbTools(Tools):
320
444
  Result message
321
445
  """
322
446
  try:
323
- if serial:
324
- device = await self.device_manager.get_device(serial)
325
- if not device:
326
- return f"Error: Device {serial} not found"
327
- else:
328
- device = await self._get_device()
447
+ logger.debug(f"Inputting text: {text}")
448
+
449
+ # if self.use_tcp and self.tcp_forwarded:
450
+ # # Use TCP communication
451
+ # encoded_text = base64.b64encode(text.encode()).decode()
329
452
 
453
+ # payload = {"base64_text": encoded_text}
454
+ # response = requests.post(
455
+ # f"{self.tcp_base_url}/keyboard/input",
456
+ # json=payload,
457
+ # headers={"Content-Type": "application/json"},
458
+ # timeout=10,
459
+ # )
460
+
461
+ # logger.debug(
462
+ # f"Keyboard input TCP response: {response.status_code}, {response.text}"
463
+ # )
464
+
465
+ # if response.status_code != 200:
466
+ # return f"Error: HTTP request failed with status {response.status_code}: {response.text}"
467
+
468
+ # else:
469
+ # Fallback to content provider method
330
470
  # Save the current keyboard
331
- original_ime = await device._adb.shell(
332
- device._serial, "settings get secure default_input_method"
471
+ original_ime = self.device.shell(
472
+ "settings get secure default_input_method"
333
473
  )
334
474
  original_ime = original_ime.strip()
335
475
 
336
476
  # Enable the Droidrun keyboard
337
- await device._adb.shell(
338
- device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
339
- )
477
+ self.device.shell("ime enable com.droidrun.portal/.DroidrunKeyboardIME")
340
478
 
341
479
  # Set the Droidrun keyboard as the default
342
- await device._adb.shell(
343
- device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
344
- )
480
+ self.device.shell("ime set com.droidrun.portal/.DroidrunKeyboardIME")
345
481
 
346
482
  # Wait for keyboard to change
347
- await asyncio.sleep(1)
483
+ time.sleep(1)
348
484
 
349
485
  # Encode the text to Base64
350
- import base64
351
-
352
486
  encoded_text = base64.b64encode(text.encode()).decode()
353
487
 
354
488
  cmd = f'content insert --uri "content://com.droidrun.portal/keyboard/input" --bind base64_text:s:"{encoded_text}"'
355
- await device._adb.shell(device._serial, cmd)
489
+ self.device.shell(cmd)
356
490
 
357
491
  # Wait for text input to complete
358
- await asyncio.sleep(0.5)
492
+ time.sleep(0.5)
359
493
 
360
494
  # Restore the original keyboard
361
495
  if original_ime and "com.droidrun.portal" not in original_ime:
362
- await device._adb.shell(device._serial, f"ime set {original_ime}")
496
+ self.device.shell(f"ime set {original_ime}")
363
497
 
498
+ logger.debug(
499
+ f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
500
+ )
364
501
  return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
502
+
503
+ if self._ctx:
504
+ input_event = InputTextActionEvent(
505
+ action_type="input_text",
506
+ description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}'",
507
+ text=text,
508
+ )
509
+ self._ctx.write_event_to_stream(input_event)
510
+
511
+ logger.debug(
512
+ f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
513
+ )
514
+ return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
515
+
516
+ except requests.exceptions.RequestException as e:
517
+ return f"Error: TCP request failed: {str(e)}"
365
518
  except ValueError as e:
366
519
  return f"Error: {str(e)}"
367
520
  except Exception as e:
368
521
  return f"Error sending text input: {str(e)}"
369
522
 
370
- async def back(self) -> str:
523
+ @Tools.ui_action
524
+ def back(self) -> str:
371
525
  """
372
526
  Go back on the current view.
373
527
  This presses the Android back button.
374
528
  """
375
529
  try:
376
- if self.serial:
377
- device = await self.device_manager.get_device(self.serial)
378
- if not device:
379
- return f"Error: Device {self.serial} not found"
380
- else:
381
- device = await self._get_device()
530
+ logger.debug("Pressing key BACK")
531
+ self.device.keyevent(3)
532
+
533
+ if self._ctx:
534
+ key_event = KeyPressActionEvent(
535
+ action_type="key_press",
536
+ description=f"Pressed key BACK",
537
+ keycode=3,
538
+ key_name="BACK",
539
+ )
540
+ self._ctx.write_event_to_stream(key_event)
382
541
 
383
- await device.press_key(3)
384
542
  return f"Pressed key BACK"
385
543
  except ValueError as e:
386
544
  return f"Error: {str(e)}"
387
545
 
388
- async def press_key(self, keycode: int) -> str:
546
+ @Tools.ui_action
547
+ def press_key(self, keycode: int) -> str:
389
548
  """
390
549
  Press a key on the Android device.
391
550
 
@@ -399,13 +558,6 @@ class AdbTools(Tools):
399
558
  keycode: Android keycode to press
400
559
  """
401
560
  try:
402
- if self.serial:
403
- device = await self.device_manager.get_device(self.serial)
404
- if not device:
405
- return f"Error: Device {self.serial} not found"
406
- else:
407
- device = await self._get_device()
408
-
409
561
  key_names = {
410
562
  66: "ENTER",
411
563
  4: "BACK",
@@ -414,12 +566,24 @@ class AdbTools(Tools):
414
566
  }
415
567
  key_name = key_names.get(keycode, str(keycode))
416
568
 
417
- await device.press_key(keycode)
569
+ if self._ctx:
570
+ key_event = KeyPressActionEvent(
571
+ action_type="key_press",
572
+ description=f"Pressed key {key_name}",
573
+ keycode=keycode,
574
+ key_name=key_name,
575
+ )
576
+ self._ctx.write_event_to_stream(key_event)
577
+
578
+ logger.debug(f"Pressing key {key_name}")
579
+ self.device.keyevent(keycode)
580
+ logger.debug(f"Pressed key {key_name}")
418
581
  return f"Pressed key {key_name}"
419
582
  except ValueError as e:
420
583
  return f"Error: {str(e)}"
421
584
 
422
- async def start_app(self, package: str, activity: str = "") -> str:
585
+ @Tools.ui_action
586
+ def start_app(self, package: str, activity: str | None = None) -> str:
423
587
  """
424
588
  Start an app on the device.
425
589
 
@@ -428,19 +592,32 @@ class AdbTools(Tools):
428
592
  activity: Optional activity name
429
593
  """
430
594
  try:
431
- if self.serial:
432
- device = await self.device_manager.get_device(self.serial)
433
- if not device:
434
- return f"Error: Device {self.serial} not found"
435
- else:
436
- device = await self._get_device()
437
595
 
438
- result = await device.start_app(package, activity)
439
- return result
440
- except ValueError as e:
596
+ logger.debug(f"Starting app {package} with activity {activity}")
597
+ if not activity:
598
+ dumpsys_output = self.device.shell(
599
+ f"cmd package resolve-activity --brief {package}"
600
+ )
601
+ activity = dumpsys_output.splitlines()[1].split("/")[1]
602
+
603
+ if self._ctx:
604
+ start_app_event = StartAppEvent(
605
+ action_type="start_app",
606
+ description=f"Start app {package}",
607
+ package=package,
608
+ activity=activity,
609
+ )
610
+ self._ctx.write_event_to_stream(start_app_event)
611
+
612
+ print(f"Activity: {activity}")
613
+
614
+ self.device.app_start(package, activity)
615
+ logger.debug(f"App started: {package} with activity {activity}")
616
+ return f"App started: {package} with activity {activity}"
617
+ except Exception as e:
441
618
  return f"Error: {str(e)}"
442
619
 
443
- async def install_app(
620
+ def install_app(
444
621
  self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
445
622
  ) -> str:
446
623
  """
@@ -452,50 +629,59 @@ class AdbTools(Tools):
452
629
  grant_permissions: Whether to grant all permissions
453
630
  """
454
631
  try:
455
- if self.serial:
456
- device = await self.device_manager.get_device(self.serial)
457
- if not device:
458
- return f"Error: Device {self.serial} not found"
459
- else:
460
- device = await self._get_device()
461
-
462
632
  if not os.path.exists(apk_path):
463
633
  return f"Error: APK file not found at {apk_path}"
464
634
 
465
- result = await device.install_app(apk_path, reinstall, grant_permissions)
635
+ logger.debug(
636
+ f"Installing app: {apk_path} with reinstall: {reinstall} and grant_permissions: {grant_permissions}"
637
+ )
638
+ result = self.device.install(
639
+ apk_path,
640
+ nolaunch=True,
641
+ uninstall=reinstall,
642
+ flags=["-g"] if grant_permissions else [],
643
+ silent=True,
644
+ )
645
+ logger.debug(f"Installed app: {apk_path} with result: {result}")
466
646
  return result
467
647
  except ValueError as e:
468
648
  return f"Error: {str(e)}"
469
649
 
470
- async def take_screenshot(self) -> Tuple[str, bytes]:
650
+ def take_screenshot(self) -> Tuple[str, bytes]:
471
651
  """
472
652
  Take a screenshot of the device.
473
653
  This function captures the current screen and adds the screenshot to context in the next message.
474
654
  Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
475
655
  """
476
656
  try:
477
- if self.serial:
478
- device = await self.device_manager.get_device(self.serial)
479
- if not device:
480
- raise ValueError(f"Device {self.serial} not found")
481
- else:
482
- device = await self._get_device()
483
- screen_tuple = await device.take_screenshot()
484
- self.last_screenshot = screen_tuple[1]
657
+ logger.debug("Taking screenshot")
658
+
659
+ # Fallback to ADB screenshot method
660
+ img = self.device.screenshot()
661
+ img_buf = io.BytesIO()
662
+ img_format = "PNG"
663
+ img.save(img_buf, format=img_format)
664
+ image_bytes = img_buf.getvalue()
665
+ logger.debug("Screenshot taken via ADB")
485
666
 
486
667
  # Store screenshot with timestamp
487
668
  self.screenshots.append(
488
669
  {
489
670
  "timestamp": time.time(),
490
- "image_data": screen_tuple[1],
491
- "format": screen_tuple[0], # Usually 'PNG'
671
+ "image_data": image_bytes,
672
+ "format": img_format,
492
673
  }
493
674
  )
494
- return screen_tuple
675
+ return img_format, image_bytes
676
+
677
+ except requests.exceptions.RequestException as e:
678
+ raise ValueError(f"Error taking screenshot via TCP: {str(e)}")
495
679
  except ValueError as e:
496
680
  raise ValueError(f"Error taking screenshot: {str(e)}")
681
+ except Exception as e:
682
+ raise ValueError(f"Unexpected error taking screenshot: {str(e)}")
497
683
 
498
- async def list_packages(self, include_system_apps: bool = False) -> List[str]:
684
+ def list_packages(self, include_system_apps: bool = False) -> List[str]:
499
685
  """
500
686
  List installed packages on the device.
501
687
 
@@ -506,17 +692,12 @@ class AdbTools(Tools):
506
692
  List of package names
507
693
  """
508
694
  try:
509
- if self.serial:
510
- device = await self.device_manager.get_device(self.serial)
511
- if not device:
512
- raise ValueError(f"Device {self.serial} not found")
513
- else:
514
- device = await self._get_device()
515
-
516
- return await device.list_packages(include_system_apps)
695
+ logger.debug("Listing packages")
696
+ return self.device.list_packages(["-3"] if not include_system_apps else [])
517
697
  except ValueError as e:
518
698
  raise ValueError(f"Error listing packages: {str(e)}")
519
699
 
700
+ @Tools.ui_action
520
701
  def complete(self, success: bool, reason: str = ""):
521
702
  """
522
703
  Mark the task as finished.
@@ -536,7 +717,7 @@ class AdbTools(Tools):
536
717
  self.reason = reason
537
718
  self.finished = True
538
719
 
539
- async def remember(self, information: str) -> str:
720
+ def remember(self, information: str) -> str:
540
721
  """
541
722
  Store important information to remember for future context.
542
723
 
@@ -572,7 +753,7 @@ class AdbTools(Tools):
572
753
  """
573
754
  return self.memory.copy()
574
755
 
575
- async def get_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
756
+ def get_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
576
757
  """
577
758
  Get both the a11y tree and phone state in a single call using the combined /state endpoint.
578
759
 
@@ -584,40 +765,61 @@ class AdbTools(Tools):
584
765
  """
585
766
 
586
767
  try:
587
- if serial:
588
- device = await self.device_manager.get_device(serial)
589
- if not device:
590
- raise ValueError(f"Device {serial} not found")
768
+ logger.debug("Getting state")
769
+
770
+ if self.use_tcp and self.tcp_forwarded:
771
+ # Use TCP communication
772
+ response = requests.get(f"{self.tcp_base_url}/state", timeout=10)
773
+
774
+ if response.status_code == 200:
775
+ tcp_response = response.json()
776
+
777
+ # Check if response has the expected format
778
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
779
+ data_str = tcp_response["data"]
780
+ try:
781
+ combined_data = json.loads(data_str)
782
+ except json.JSONDecodeError:
783
+ return {
784
+ "error": "Parse Error",
785
+ "message": "Failed to parse JSON data from TCP response data field",
786
+ }
787
+ else:
788
+ # Fallback: assume direct JSON format
789
+ combined_data = tcp_response
790
+ else:
791
+ return {
792
+ "error": "HTTP Error",
793
+ "message": f"HTTP request failed with status {response.status_code}",
794
+ }
591
795
  else:
592
- device = await self._get_device()
796
+ # Fallback to content provider method
797
+ adb_output = self.device.shell(
798
+ "content query --uri content://com.droidrun.portal/state",
799
+ )
593
800
 
594
- adb_output = await device._adb.shell(
595
- device._serial,
596
- "content query --uri content://com.droidrun.portal/state",
597
- )
598
-
599
- state_data = self._parse_content_provider_output(adb_output)
600
-
601
- if state_data is None:
602
- return {
603
- "error": "Parse Error",
604
- "message": "Failed to parse state data from ContentProvider response",
605
- }
801
+ state_data = self._parse_content_provider_output(adb_output)
606
802
 
607
- if isinstance(state_data, dict) and "data" in state_data:
608
- data_str = state_data["data"]
609
- try:
610
- combined_data = json.loads(data_str)
611
- except json.JSONDecodeError:
803
+ if state_data is None:
612
804
  return {
613
805
  "error": "Parse Error",
614
- "message": "Failed to parse JSON data from ContentProvider data field",
806
+ "message": "Failed to parse state data from ContentProvider response",
807
+ }
808
+
809
+ if isinstance(state_data, dict) and "data" in state_data:
810
+ data_str = state_data["data"]
811
+ try:
812
+ combined_data = json.loads(data_str)
813
+ except json.JSONDecodeError:
814
+ return {
815
+ "error": "Parse Error",
816
+ "message": "Failed to parse JSON data from ContentProvider data field",
817
+ }
818
+ else:
819
+ return {
820
+ "error": "Format Error",
821
+ "message": f"Unexpected state data format: {type(state_data)}",
615
822
  }
616
- else:
617
- return {
618
- "error": "Format Error",
619
- "message": f"Unexpected state data format: {type(state_data)}",
620
- }
621
823
 
622
824
  # Validate that both a11y_tree and phone_state are present
623
825
  if "a11y_tree" not in combined_data:
@@ -655,17 +857,271 @@ class AdbTools(Tools):
655
857
  "phone_state": combined_data["phone_state"],
656
858
  }
657
859
 
860
+ except requests.exceptions.RequestException as e:
861
+ return {
862
+ "error": "TCP Error",
863
+ "message": f"TCP request failed: {str(e)}",
864
+ }
658
865
  except Exception as e:
659
866
  return {
660
867
  "error": str(e),
661
868
  "message": f"Error getting combined state: {str(e)}",
662
869
  }
663
870
 
871
+ def get_a11y_tree(self) -> Dict[str, Any]:
872
+ """
873
+ Get just the accessibility tree using the /a11y_tree endpoint.
664
874
 
665
- if __name__ == "__main__":
875
+ Returns:
876
+ Dictionary containing accessibility tree data
877
+ """
878
+ try:
879
+ if self.use_tcp and self.tcp_forwarded:
880
+ response = requests.get(f"{self.tcp_base_url}/a11y_tree", timeout=10)
881
+
882
+ if response.status_code == 200:
883
+ tcp_response = response.json()
884
+
885
+ # Check if response has the expected format with data field
886
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
887
+ data_str = tcp_response["data"]
888
+ try:
889
+ return json.loads(data_str)
890
+ except json.JSONDecodeError:
891
+ return {
892
+ "error": "Parse Error",
893
+ "message": "Failed to parse JSON data from TCP response data field",
894
+ }
895
+ else:
896
+ # Fallback: assume direct JSON format
897
+ return tcp_response
898
+ else:
899
+ return {
900
+ "error": "HTTP Error",
901
+ "message": f"HTTP request failed with status {response.status_code}",
902
+ }
903
+ else:
904
+ # Fallback: use get_state and extract a11y_tree
905
+ state = self.get_state()
906
+ if "error" in state:
907
+ return state
908
+ return {"a11y_tree": state.get("a11y_tree", [])}
666
909
 
667
- async def main():
668
- tools = await AdbTools.create()
669
- print(tools.serial)
910
+ except requests.exceptions.RequestException as e:
911
+ return {
912
+ "error": "TCP Error",
913
+ "message": f"TCP request failed: {str(e)}",
914
+ }
915
+ except Exception as e:
916
+ return {
917
+ "error": str(e),
918
+ "message": f"Error getting a11y tree: {str(e)}",
919
+ }
920
+
921
+ def get_phone_state(self) -> Dict[str, Any]:
922
+ """
923
+ Get just the phone state using the /phone_state endpoint.
670
924
 
671
- asyncio.run(main())
925
+ Returns:
926
+ Dictionary containing phone state data
927
+ """
928
+ try:
929
+ if self.use_tcp and self.tcp_forwarded:
930
+ response = requests.get(f"{self.tcp_base_url}/phone_state", timeout=10)
931
+
932
+ if response.status_code == 200:
933
+ tcp_response = response.json()
934
+
935
+ # Check if response has the expected format with data field
936
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
937
+ data_str = tcp_response["data"]
938
+ try:
939
+ return json.loads(data_str)
940
+ except json.JSONDecodeError:
941
+ return {
942
+ "error": "Parse Error",
943
+ "message": "Failed to parse JSON data from TCP response data field",
944
+ }
945
+ else:
946
+ # Fallback: assume direct JSON format
947
+ return tcp_response
948
+ else:
949
+ return {
950
+ "error": "HTTP Error",
951
+ "message": f"HTTP request failed with status {response.status_code}",
952
+ }
953
+ else:
954
+ # Fallback: use get_state and extract phone_state
955
+ state = self.get_state()
956
+ if "error" in state:
957
+ return state
958
+ return {"phone_state": state.get("phone_state", {})}
959
+
960
+ except requests.exceptions.RequestException as e:
961
+ return {
962
+ "error": "TCP Error",
963
+ "message": f"TCP request failed: {str(e)}",
964
+ }
965
+ except Exception as e:
966
+ return {
967
+ "error": str(e),
968
+ "message": f"Error getting phone state: {str(e)}",
969
+ }
970
+
971
+ def ping(self) -> Dict[str, Any]:
972
+ """
973
+ Test the TCP connection using the /ping endpoint.
974
+
975
+ Returns:
976
+ Dictionary with ping result
977
+ """
978
+ try:
979
+ if self.use_tcp and self.tcp_forwarded:
980
+ response = requests.get(f"{self.tcp_base_url}/ping", timeout=5)
981
+
982
+ if response.status_code == 200:
983
+ try:
984
+ tcp_response = response.json() if response.content else {}
985
+ logger.debug(f"Ping TCP response: {tcp_response}")
986
+ return {
987
+ "status": "success",
988
+ "message": "Ping successful",
989
+ "response": tcp_response,
990
+ }
991
+ except json.JSONDecodeError:
992
+ return {
993
+ "status": "success",
994
+ "message": "Ping successful (non-JSON response)",
995
+ "response": response.text,
996
+ }
997
+ else:
998
+ return {
999
+ "status": "error",
1000
+ "message": f"Ping failed with status {response.status_code}: {response.text}",
1001
+ }
1002
+ else:
1003
+ return {
1004
+ "status": "error",
1005
+ "message": "TCP communication is not enabled",
1006
+ }
1007
+
1008
+ except requests.exceptions.RequestException as e:
1009
+ return {
1010
+ "status": "error",
1011
+ "message": f"Ping failed: {str(e)}",
1012
+ }
1013
+ except Exception as e:
1014
+ return {
1015
+ "status": "error",
1016
+ "message": f"Error during ping: {str(e)}",
1017
+ }
1018
+
1019
+
1020
+ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
1021
+ """
1022
+ Run an adb shell command using the adb CLI and measure execution time.
1023
+ Args:
1024
+ serial: Device serial number
1025
+ command: Shell command to run
1026
+ Returns:
1027
+ Tuple of (output, elapsed_time)
1028
+ """
1029
+ import time
1030
+ import subprocess
1031
+
1032
+ adb_cmd = ["adb", "-s", serial, "shell", command]
1033
+ start = time.perf_counter()
1034
+ result = subprocess.run(adb_cmd, capture_output=True, text=True)
1035
+ elapsed = time.perf_counter() - start
1036
+ output = result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
1037
+ return output, elapsed
1038
+
1039
+
1040
+ def _shell_test():
1041
+ device = adb.device("emulator-5554")
1042
+ # Native Python adb client
1043
+ start = time.time()
1044
+ res = device.shell("echo 'Hello, World!'")
1045
+ end = time.time()
1046
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: {res}")
1047
+
1048
+ start = time.time()
1049
+ res = device.shell("content query --uri content://com.droidrun.portal/state")
1050
+ end = time.time()
1051
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: phone_state")
1052
+
1053
+ # CLI version
1054
+ output, elapsed = _shell_test_cli("emulator-5554", "echo 'Hello, World!'")
1055
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: {output}")
1056
+
1057
+ output, elapsed = _shell_test_cli(
1058
+ "emulator-5554", "content query --uri content://com.droidrun.portal/state"
1059
+ )
1060
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: phone_state")
1061
+
1062
+
1063
+ def _list_packages():
1064
+ tools = AdbTools()
1065
+ print(tools.list_packages())
1066
+
1067
+
1068
+ def _start_app():
1069
+ tools = AdbTools()
1070
+ tools.start_app("com.android.settings", ".Settings")
1071
+
1072
+
1073
+ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
1074
+ """
1075
+ Run an adb shell command using the adb CLI and measure execution time.
1076
+ Args:
1077
+ serial: Device serial number
1078
+ command: Shell command to run
1079
+ Returns:
1080
+ Tuple of (output, elapsed_time)
1081
+ """
1082
+ import time
1083
+ import subprocess
1084
+
1085
+ adb_cmd = ["adb", "-s", serial, "shell", command]
1086
+ start = time.perf_counter()
1087
+ result = subprocess.run(adb_cmd, capture_output=True, text=True)
1088
+ elapsed = time.perf_counter() - start
1089
+ output = result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
1090
+ return output, elapsed
1091
+
1092
+
1093
+ def _shell_test():
1094
+ device = adb.device("emulator-5554")
1095
+ # Native Python adb client
1096
+ start = time.time()
1097
+ res = device.shell("echo 'Hello, World!'")
1098
+ end = time.time()
1099
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: {res}")
1100
+
1101
+ start = time.time()
1102
+ res = device.shell("content query --uri content://com.droidrun.portal/state")
1103
+ end = time.time()
1104
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: phone_state")
1105
+
1106
+ # CLI version
1107
+ output, elapsed = _shell_test_cli("emulator-5554", "echo 'Hello, World!'")
1108
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: {output}")
1109
+
1110
+ output, elapsed = _shell_test_cli(
1111
+ "emulator-5554", "content query --uri content://com.droidrun.portal/state"
1112
+ )
1113
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: phone_state")
1114
+
1115
+
1116
+ def _list_packages():
1117
+ tools = AdbTools()
1118
+ print(tools.list_packages())
1119
+
1120
+
1121
+ def _start_app():
1122
+ tools = AdbTools()
1123
+ tools.start_app("com.android.settings", ".Settings")
1124
+
1125
+
1126
+ if __name__ == "__main__":
1127
+ _start_app()