droidrun 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
droidrun/tools/adb.py CHANGED
@@ -3,31 +3,50 @@ UI Actions - Core UI interaction tools for Android device control.
3
3
  """
4
4
 
5
5
  import os
6
+ import io
6
7
  import json
7
8
  import time
8
- import asyncio
9
9
  import logging
10
- from typing import Optional, Dict, Tuple, List, Any, Type, Self
11
- from droidrun.adb.device import Device
12
- from droidrun.adb.manager import DeviceManager
10
+ from llama_index.core.workflow import Context
11
+ from typing_extensions import Optional, Dict, Tuple, List, Any, Type, Self
12
+ from droidrun.agent.common.events import (
13
+ InputTextActionEvent,
14
+ KeyPressActionEvent,
15
+ StartAppEvent,
16
+ SwipeActionEvent,
17
+ TapActionEvent,
18
+ DragActionEvent,
19
+ )
13
20
  from droidrun.tools.tools import Tools
21
+ from adbutils import adb
22
+ import requests
23
+ import base64
14
24
 
15
- logger = logging.getLogger("droidrun-adb-tools")
25
+ logger = logging.getLogger("droidrun-tools")
16
26
 
17
27
 
18
28
  class AdbTools(Tools):
19
29
  """Core UI interaction tools for Android device control."""
20
30
 
21
- def __init__(self, serial: str) -> None:
31
+ def __init__(
32
+ self, serial: str | None = None, use_tcp: bool = False, tcp_port: int = 8080
33
+ ) -> None:
22
34
  """Initialize the AdbTools instance.
23
35
 
24
36
  Args:
25
37
  serial: Device serial number
38
+ use_tcp: Whether to use TCP communication (default: False)
39
+ tcp_port: TCP port for communication (default: 8080)
26
40
  """
27
- self.device_manager = DeviceManager()
41
+ self.device = adb.device(serial=serial)
42
+ self.use_tcp = use_tcp
43
+ self.tcp_port = tcp_port
44
+ self.tcp_base_url = f"http://localhost:{tcp_port}"
45
+ self.tcp_forwarded = False
46
+
47
+ self._ctx = None
28
48
  # Instance‐level cache for clickable elements (index-based tapping)
29
49
  self.clickable_elements_cache: List[Dict[str, Any]] = []
30
- self.serial = serial
31
50
  self.last_screenshot = None
32
51
  self.reason = None
33
52
  self.success = None
@@ -37,46 +56,72 @@ class AdbTools(Tools):
37
56
  # Store all screenshots with timestamps
38
57
  self.screenshots: List[Dict[str, Any]] = []
39
58
 
40
- @classmethod
41
- async def create(cls: Type[Self], serial: str = None) -> Self:
42
- """Create an AdbTools instance.
59
+ # Set up TCP forwarding if requested
60
+ if self.use_tcp:
61
+ self.setup_tcp_forward()
43
62
 
44
- Args:
45
- serial: Optional device serial number. If not provided, the first device found will be used.
63
+ def setup_tcp_forward(self) -> bool:
64
+ """
65
+ Set up ADB TCP port forwarding for communication with the portal app.
46
66
 
47
67
  Returns:
48
- AdbTools instance
68
+ bool: True if forwarding was set up successfully, False otherwise
49
69
  """
50
- if not serial:
51
- dvm = DeviceManager()
52
- devices = await dvm.list_devices()
53
- if not devices or len(devices) < 1:
54
- raise ValueError("No devices found")
55
- serial = devices[0].serial
70
+ try:
71
+ logger.debug(
72
+ f"Setting up TCP port forwarding: tcp:{self.tcp_port} tcp:{self.tcp_port}"
73
+ )
74
+ # Use adb forward command to set up port forwarding
75
+ result = self.device.forward(f"tcp:{self.tcp_port}", f"tcp:{self.tcp_port}")
76
+ self.tcp_forwarded = True
77
+ logger.debug(f"TCP port forwarding set up successfully: {result}")
56
78
 
57
- return AdbTools(serial)
79
+ # Test the connection with a ping
80
+ try:
81
+ response = requests.get(f"{self.tcp_base_url}/ping", timeout=5)
82
+ if response.status_code == 200:
83
+ logger.debug("TCP connection test successful")
84
+ return True
85
+ else:
86
+ logger.warning(
87
+ f"TCP connection test failed with status: {response.status_code}"
88
+ )
89
+ return False
90
+ except requests.exceptions.RequestException as e:
91
+ logger.warning(f"TCP connection test failed: {e}")
92
+ return False
58
93
 
59
- def _get_device_serial(self) -> str:
60
- """Get the device serial from the instance or environment variable."""
61
- # First try using the instance's serial
62
- if self.serial:
63
- return self.serial
94
+ except Exception as e:
95
+ logger.error(f"Failed to set up TCP port forwarding: {e}")
96
+ self.tcp_forwarded = False
97
+ return False
64
98
 
65
- async def _get_device(self) -> Optional[Device]:
66
- """Get the device instance using the instance's serial or from environment variable.
99
+ def teardown_tcp_forward(self) -> bool:
100
+ """
101
+ Remove ADB TCP port forwarding.
67
102
 
68
103
  Returns:
69
- Device instance or None if not found
104
+ bool: True if forwarding was removed successfully, False otherwise
70
105
  """
71
- serial = self._get_device_serial()
72
- if not serial:
73
- raise ValueError("No device serial specified - set device_serial parameter")
106
+ try:
107
+ if self.tcp_forwarded:
108
+ logger.debug(f"Removing TCP port forwarding for port {self.tcp_port}")
109
+ result = self.device.forward_remove(f"tcp:{self.tcp_port}")
110
+ self.tcp_forwarded = False
111
+ logger.debug(f"TCP port forwarding removed: {result}")
112
+ return True
113
+ return True
114
+ except Exception as e:
115
+ logger.error(f"Failed to remove TCP port forwarding: {e}")
116
+ return False
74
117
 
75
- device = await self.device_manager.get_device(serial)
76
- if not device:
77
- raise ValueError(f"Device {serial} not found")
118
+ def __del__(self):
119
+ """Cleanup when the object is destroyed."""
120
+ if hasattr(self, "tcp_forwarded") and self.tcp_forwarded:
121
+ self.teardown_tcp_forward()
78
122
 
79
- return device
123
+ def _set_context(self, ctx: Context):
124
+ self._ctx = ctx
80
125
 
81
126
  def _parse_content_provider_output(
82
127
  self, raw_output: str
@@ -125,7 +170,7 @@ class AdbTools(Tools):
125
170
  except json.JSONDecodeError:
126
171
  return None
127
172
 
128
- async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
173
+ def tap_by_index(self, index: int) -> str:
129
174
  """
130
175
  Tap on a UI element by its index.
131
176
 
@@ -197,18 +242,32 @@ class AdbTools(Tools):
197
242
  x = (left + right) // 2
198
243
  y = (top + bottom) // 2
199
244
 
245
+ logger.debug(
246
+ f"Tapping element with index {index} at coordinates ({x}, {y})"
247
+ )
200
248
  # Get the device and tap at the coordinates
201
- if serial:
202
- device = await self.device_manager.get_device(serial)
203
- if not device:
204
- return f"Error: Device {serial} not found"
205
- else:
206
- device = await self._get_device()
249
+ self.device.click(x, y)
250
+ logger.debug(f"Tapped element with index {index} at coordinates ({x}, {y})")
251
+
252
+ # Emit coordinate action event for trajectory recording
207
253
 
208
- await device.tap(x, y)
254
+ if self._ctx:
255
+ element_text = element.get("text", "No text")
256
+ element_class = element.get("className", "Unknown class")
257
+
258
+ tap_event = TapActionEvent(
259
+ action_type="tap",
260
+ description=f"Tap element at index {index}: '{element_text}' ({element_class}) at coordinates ({x}, {y})",
261
+ x=x,
262
+ y=y,
263
+ element_index=index,
264
+ element_text=element_text,
265
+ element_bounds=bounds_str,
266
+ )
267
+ self._ctx.write_event_to_stream(tap_event)
209
268
 
210
269
  # Add a small delay to allow UI to update
211
- await asyncio.sleep(0.5)
270
+ time.sleep(0.5)
212
271
 
213
272
  # Create a descriptive response
214
273
  response_parts = []
@@ -233,7 +292,7 @@ class AdbTools(Tools):
233
292
  return f"Error: {str(e)}"
234
293
 
235
294
  # Rename the old tap function to tap_by_coordinates for backward compatibility
236
- async def tap_by_coordinates(self, x: int, y: int) -> bool:
295
+ def tap_by_coordinates(self, x: int, y: int) -> bool:
237
296
  """
238
297
  Tap on the device screen at specific coordinates.
239
298
 
@@ -245,22 +304,16 @@ class AdbTools(Tools):
245
304
  Bool indicating success or failure
246
305
  """
247
306
  try:
248
- if self.serial:
249
- device = await self.device_manager.get_device(self.serial)
250
- if not device:
251
- return f"Error: Device {self.serial} not found"
252
- else:
253
- device = await self._get_device()
254
-
255
- await device.tap(x, y)
256
- print(f"Tapped at coordinates ({x}, {y})")
307
+ logger.debug(f"Tapping at coordinates ({x}, {y})")
308
+ self.device.click(x, y)
309
+ logger.debug(f"Tapped at coordinates ({x}, {y})")
257
310
  return True
258
311
  except ValueError as e:
259
- print(f"Error: {str(e)}")
312
+ logger.debug(f"Error: {str(e)}")
260
313
  return False
261
314
 
262
315
  # Replace the old tap function with the new one
263
- async def tap(self, index: int) -> str:
316
+ def tap(self, index: int) -> str:
264
317
  """
265
318
  Tap on a UI element by its index.
266
319
 
@@ -273,10 +326,15 @@ class AdbTools(Tools):
273
326
  Returns:
274
327
  Result message
275
328
  """
276
- return await self.tap_by_index(index)
277
-
278
- async def swipe(
279
- self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
329
+ return self.tap_by_index(index)
330
+
331
+ def swipe(
332
+ self,
333
+ start_x: int,
334
+ start_y: int,
335
+ end_x: int,
336
+ end_y: int,
337
+ duration_ms: float = 300,
280
338
  ) -> bool:
281
339
  """
282
340
  Performs a straight-line swipe gesture on the device screen.
@@ -286,29 +344,76 @@ class AdbTools(Tools):
286
344
  start_y: Starting Y coordinate
287
345
  end_x: Ending X coordinate
288
346
  end_y: Ending Y coordinate
289
- duration_ms: Duration of swipe in milliseconds
347
+ duration: Duration of swipe in seconds
290
348
  Returns:
291
349
  Bool indicating success or failure
292
350
  """
293
351
  try:
294
- if self.serial:
295
- device = await self.device_manager.get_device(self.serial)
296
- if not device:
297
- return f"Error: Device {self.serial} not found"
298
- else:
299
- device = await self._get_device()
300
352
 
301
- await device.swipe(start_x, start_y, end_x, end_y, duration_ms)
302
- await asyncio.sleep(1)
303
- print(
304
- f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms}ms"
353
+ if self._ctx:
354
+ swipe_event = SwipeActionEvent(
355
+ action_type="swipe",
356
+ description=f"Swipe from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms} milliseconds",
357
+ start_x=start_x,
358
+ start_y=start_y,
359
+ end_x=end_x,
360
+ end_y=end_y,
361
+ duration_ms=duration_ms,
362
+ )
363
+ self._ctx.write_event_to_stream(swipe_event)
364
+
365
+ self.device.swipe(start_x, start_y, end_x, end_y, float(duration_ms / 1000))
366
+ time.sleep(duration_ms / 1000)
367
+ logger.debug(
368
+ f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration_ms} milliseconds"
305
369
  )
306
370
  return True
307
371
  except ValueError as e:
308
372
  print(f"Error: {str(e)}")
309
373
  return False
310
374
 
311
- async def input_text(self, text: str, serial: Optional[str] = None) -> str:
375
+ def drag(
376
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration: float = 3
377
+ ) -> bool:
378
+ """
379
+ Performs a straight-line drag and drop gesture on the device screen.
380
+ Args:
381
+ start_x: Starting X coordinate
382
+ start_y: Starting Y coordinate
383
+ end_x: Ending X coordinate
384
+ end_y: Ending Y coordinate
385
+ duration: Duration of swipe in seconds
386
+ Returns:
387
+ Bool indicating success or failure
388
+ """
389
+ try:
390
+ logger.debug(
391
+ f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds"
392
+ )
393
+ self.device.drag(start_x, start_y, end_x, end_y, duration)
394
+
395
+ if self._ctx:
396
+ drag_event = DragActionEvent(
397
+ action_type="drag",
398
+ description=f"Drag from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds",
399
+ start_x=start_x,
400
+ start_y=start_y,
401
+ end_x=end_x,
402
+ end_y=end_y,
403
+ duration=duration,
404
+ )
405
+ self._ctx.write_event_to_stream(drag_event)
406
+
407
+ time.sleep(duration)
408
+ logger.debug(
409
+ f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration} seconds"
410
+ )
411
+ return True
412
+ except ValueError as e:
413
+ print(f"Error: {str(e)}")
414
+ return False
415
+
416
+ def input_text(self, text: str) -> str:
312
417
  """
313
418
  Input text on the device.
314
419
  Always make sure that the Focused Element is not None before inputting text.
@@ -320,72 +425,105 @@ class AdbTools(Tools):
320
425
  Result message
321
426
  """
322
427
  try:
323
- if serial:
324
- device = await self.device_manager.get_device(serial)
325
- if not device:
326
- return f"Error: Device {serial} not found"
327
- else:
328
- device = await self._get_device()
428
+ logger.debug(f"Inputting text: {text}")
329
429
 
330
- # Save the current keyboard
331
- original_ime = await device._adb.shell(
332
- device._serial, "settings get secure default_input_method"
333
- )
334
- original_ime = original_ime.strip()
430
+ if self.use_tcp and self.tcp_forwarded:
431
+ # Use TCP communication
432
+ encoded_text = base64.b64encode(text.encode()).decode()
335
433
 
336
- # Enable the Droidrun keyboard
337
- await device._adb.shell(
338
- device._serial, "ime enable com.droidrun.portal/.DroidrunKeyboardIME"
339
- )
340
-
341
- # Set the Droidrun keyboard as the default
342
- await device._adb.shell(
343
- device._serial, "ime set com.droidrun.portal/.DroidrunKeyboardIME"
344
- )
345
-
346
- # Wait for keyboard to change
347
- await asyncio.sleep(1)
434
+ payload = {"base64_text": encoded_text}
435
+ response = requests.post(
436
+ f"{self.tcp_base_url}/keyboard/input",
437
+ json=payload,
438
+ headers={"Content-Type": "application/json"},
439
+ timeout=10,
440
+ )
348
441
 
349
- # Encode the text to Base64
350
- import base64
442
+ logger.debug(
443
+ f"Keyboard input TCP response: {response.status_code}, {response.text}"
444
+ )
351
445
 
352
- encoded_text = base64.b64encode(text.encode()).decode()
353
-
354
- cmd = f'content insert --uri "content://com.droidrun.portal/keyboard/input" --bind base64_text:s:"{encoded_text}"'
355
- await device._adb.shell(device._serial, cmd)
356
-
357
- # Wait for text input to complete
358
- await asyncio.sleep(0.5)
359
-
360
- # Restore the original keyboard
361
- if original_ime and "com.droidrun.portal" not in original_ime:
362
- await device._adb.shell(device._serial, f"ime set {original_ime}")
446
+ if response.status_code != 200:
447
+ return f"Error: HTTP request failed with status {response.status_code}: {response.text}"
363
448
 
449
+ else:
450
+ # Fallback to content provider method
451
+ # Save the current keyboard
452
+ original_ime = self.device.shell(
453
+ "settings get secure default_input_method"
454
+ )
455
+ original_ime = original_ime.strip()
456
+
457
+ # Enable the Droidrun keyboard
458
+ self.device.shell("ime enable com.droidrun.portal/.DroidrunKeyboardIME")
459
+
460
+ # Set the Droidrun keyboard as the default
461
+ self.device.shell("ime set com.droidrun.portal/.DroidrunKeyboardIME")
462
+
463
+ # Wait for keyboard to change
464
+ time.sleep(1)
465
+
466
+ # Encode the text to Base64
467
+ encoded_text = base64.b64encode(text.encode()).decode()
468
+
469
+ cmd = f'content insert --uri "content://com.droidrun.portal/keyboard/input" --bind base64_text:s:"{encoded_text}"'
470
+ self.device.shell(cmd)
471
+
472
+ # Wait for text input to complete
473
+ time.sleep(0.5)
474
+
475
+ # Restore the original keyboard
476
+ if original_ime and "com.droidrun.portal" not in original_ime:
477
+ self.device.shell(f"ime set {original_ime}")
478
+
479
+ logger.debug(
480
+ f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
481
+ )
482
+ return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
483
+
484
+ if self._ctx:
485
+ input_event = InputTextActionEvent(
486
+ action_type="input_text",
487
+ description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}'",
488
+ text=text,
489
+ )
490
+ self._ctx.write_event_to_stream(input_event)
491
+
492
+ logger.debug(
493
+ f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
494
+ )
364
495
  return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
496
+
497
+ except requests.exceptions.RequestException as e:
498
+ return f"Error: TCP request failed: {str(e)}"
365
499
  except ValueError as e:
366
500
  return f"Error: {str(e)}"
367
501
  except Exception as e:
368
502
  return f"Error sending text input: {str(e)}"
369
503
 
370
- async def back(self) -> str:
504
+ def back(self) -> str:
371
505
  """
372
506
  Go back on the current view.
373
507
  This presses the Android back button.
374
508
  """
375
509
  try:
376
- if self.serial:
377
- device = await self.device_manager.get_device(self.serial)
378
- if not device:
379
- return f"Error: Device {self.serial} not found"
380
- else:
381
- device = await self._get_device()
510
+ logger.debug("Pressing key BACK")
511
+ self.device.keyevent(3)
512
+
513
+ if self._ctx:
514
+ key_event = KeyPressActionEvent(
515
+ action_type="key_press",
516
+ description=f"Pressed key BACK",
517
+ keycode=3,
518
+ key_name="BACK",
519
+ )
520
+ self._ctx.write_event_to_stream(key_event)
382
521
 
383
- await device.press_key(3)
384
522
  return f"Pressed key BACK"
385
523
  except ValueError as e:
386
524
  return f"Error: {str(e)}"
387
525
 
388
- async def press_key(self, keycode: int) -> str:
526
+ def press_key(self, keycode: int) -> str:
389
527
  """
390
528
  Press a key on the Android device.
391
529
 
@@ -399,13 +537,6 @@ class AdbTools(Tools):
399
537
  keycode: Android keycode to press
400
538
  """
401
539
  try:
402
- if self.serial:
403
- device = await self.device_manager.get_device(self.serial)
404
- if not device:
405
- return f"Error: Device {self.serial} not found"
406
- else:
407
- device = await self._get_device()
408
-
409
540
  key_names = {
410
541
  66: "ENTER",
411
542
  4: "BACK",
@@ -414,12 +545,23 @@ class AdbTools(Tools):
414
545
  }
415
546
  key_name = key_names.get(keycode, str(keycode))
416
547
 
417
- await device.press_key(keycode)
548
+ if self._ctx:
549
+ key_event = KeyPressActionEvent(
550
+ action_type="key_press",
551
+ description=f"Pressed key {key_name}",
552
+ keycode=keycode,
553
+ key_name=key_name,
554
+ )
555
+ self._ctx.write_event_to_stream(key_event)
556
+
557
+ logger.debug(f"Pressing key {key_name}")
558
+ self.device.keyevent(keycode)
559
+ logger.debug(f"Pressed key {key_name}")
418
560
  return f"Pressed key {key_name}"
419
561
  except ValueError as e:
420
562
  return f"Error: {str(e)}"
421
563
 
422
- async def start_app(self, package: str, activity: str = "") -> str:
564
+ def start_app(self, package: str, activity: str | None = None) -> str:
423
565
  """
424
566
  Start an app on the device.
425
567
 
@@ -428,19 +570,32 @@ class AdbTools(Tools):
428
570
  activity: Optional activity name
429
571
  """
430
572
  try:
431
- if self.serial:
432
- device = await self.device_manager.get_device(self.serial)
433
- if not device:
434
- return f"Error: Device {self.serial} not found"
435
- else:
436
- device = await self._get_device()
437
573
 
438
- result = await device.start_app(package, activity)
439
- return result
440
- except ValueError as e:
574
+ logger.debug(f"Starting app {package} with activity {activity}")
575
+ if not activity:
576
+ dumpsys_output = self.device.shell(
577
+ f"cmd package resolve-activity --brief {package}"
578
+ )
579
+ activity = dumpsys_output.splitlines()[1].split("/")[1]
580
+
581
+ if self._ctx:
582
+ start_app_event = StartAppEvent(
583
+ action_type="start_app",
584
+ description=f"Start app {package}",
585
+ package=package,
586
+ activity=activity,
587
+ )
588
+ self._ctx.write_event_to_stream(start_app_event)
589
+
590
+ print(f"Activity: {activity}")
591
+
592
+ self.device.app_start(package, activity)
593
+ logger.debug(f"App started: {package} with activity {activity}")
594
+ return f"App started: {package} with activity {activity}"
595
+ except Exception as e:
441
596
  return f"Error: {str(e)}"
442
597
 
443
- async def install_app(
598
+ def install_app(
444
599
  self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
445
600
  ) -> str:
446
601
  """
@@ -452,50 +607,94 @@ class AdbTools(Tools):
452
607
  grant_permissions: Whether to grant all permissions
453
608
  """
454
609
  try:
455
- if self.serial:
456
- device = await self.device_manager.get_device(self.serial)
457
- if not device:
458
- return f"Error: Device {self.serial} not found"
459
- else:
460
- device = await self._get_device()
461
-
462
610
  if not os.path.exists(apk_path):
463
611
  return f"Error: APK file not found at {apk_path}"
464
612
 
465
- result = await device.install_app(apk_path, reinstall, grant_permissions)
613
+ logger.debug(
614
+ f"Installing app: {apk_path} with reinstall: {reinstall} and grant_permissions: {grant_permissions}"
615
+ )
616
+ result = self.device.install(
617
+ apk_path,
618
+ nolaunch=True,
619
+ uninstall=reinstall,
620
+ flags=["-g"] if grant_permissions else [],
621
+ silent=True,
622
+ )
623
+ logger.debug(f"Installed app: {apk_path} with result: {result}")
466
624
  return result
467
625
  except ValueError as e:
468
626
  return f"Error: {str(e)}"
469
627
 
470
- async def take_screenshot(self) -> Tuple[str, bytes]:
628
+ def take_screenshot(self) -> Tuple[str, bytes]:
471
629
  """
472
630
  Take a screenshot of the device.
473
631
  This function captures the current screen and adds the screenshot to context in the next message.
474
632
  Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
475
633
  """
476
634
  try:
477
- if self.serial:
478
- device = await self.device_manager.get_device(self.serial)
479
- if not device:
480
- raise ValueError(f"Device {self.serial} not found")
635
+ logger.debug("Taking screenshot")
636
+
637
+ if self.use_tcp and self.tcp_forwarded:
638
+ # Use TCP communication
639
+ response = requests.get(f"{self.tcp_base_url}/screenshot", timeout=15)
640
+
641
+ if response.status_code == 200:
642
+ tcp_response = response.json()
643
+
644
+ # Check if response has the expected format with data field
645
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
646
+ base64_data = tcp_response["data"]
647
+ try:
648
+ # Decode base64 to get image bytes
649
+ image_bytes = base64.b64decode(base64_data)
650
+ img_format = "PNG" # Assuming PNG format from TCP endpoint
651
+ logger.debug("Screenshot taken via TCP")
652
+ except Exception as e:
653
+ raise ValueError(
654
+ f"Failed to decode base64 screenshot data: {str(e)}"
655
+ )
656
+ else:
657
+ # Fallback: assume direct base64 format
658
+ try:
659
+ image_bytes = base64.b64decode(tcp_response)
660
+ img_format = "PNG"
661
+ logger.debug("Screenshot taken via TCP (direct base64)")
662
+ except Exception as e:
663
+ raise ValueError(
664
+ f"Failed to decode screenshot response: {str(e)}"
665
+ )
666
+ else:
667
+ raise ValueError(
668
+ f"HTTP request failed with status {response.status_code}: {response.text}"
669
+ )
670
+
481
671
  else:
482
- device = await self._get_device()
483
- screen_tuple = await device.take_screenshot()
484
- self.last_screenshot = screen_tuple[1]
672
+ # Fallback to ADB screenshot method
673
+ img = self.device.screenshot()
674
+ img_buf = io.BytesIO()
675
+ img_format = "PNG"
676
+ img.save(img_buf, format=img_format)
677
+ image_bytes = img_buf.getvalue()
678
+ logger.debug("Screenshot taken via ADB")
485
679
 
486
680
  # Store screenshot with timestamp
487
681
  self.screenshots.append(
488
682
  {
489
683
  "timestamp": time.time(),
490
- "image_data": screen_tuple[1],
491
- "format": screen_tuple[0], # Usually 'PNG'
684
+ "image_data": image_bytes,
685
+ "format": img_format,
492
686
  }
493
687
  )
494
- return screen_tuple
688
+ return img_format, image_bytes
689
+
690
+ except requests.exceptions.RequestException as e:
691
+ raise ValueError(f"Error taking screenshot via TCP: {str(e)}")
495
692
  except ValueError as e:
496
693
  raise ValueError(f"Error taking screenshot: {str(e)}")
694
+ except Exception as e:
695
+ raise ValueError(f"Unexpected error taking screenshot: {str(e)}")
497
696
 
498
- async def list_packages(self, include_system_apps: bool = False) -> List[str]:
697
+ def list_packages(self, include_system_apps: bool = False) -> List[str]:
499
698
  """
500
699
  List installed packages on the device.
501
700
 
@@ -506,14 +705,8 @@ class AdbTools(Tools):
506
705
  List of package names
507
706
  """
508
707
  try:
509
- if self.serial:
510
- device = await self.device_manager.get_device(self.serial)
511
- if not device:
512
- raise ValueError(f"Device {self.serial} not found")
513
- else:
514
- device = await self._get_device()
515
-
516
- return await device.list_packages(include_system_apps)
708
+ logger.debug("Listing packages")
709
+ return self.device.list_packages(["-3"] if not include_system_apps else [])
517
710
  except ValueError as e:
518
711
  raise ValueError(f"Error listing packages: {str(e)}")
519
712
 
@@ -536,7 +729,7 @@ class AdbTools(Tools):
536
729
  self.reason = reason
537
730
  self.finished = True
538
731
 
539
- async def remember(self, information: str) -> str:
732
+ def remember(self, information: str) -> str:
540
733
  """
541
734
  Store important information to remember for future context.
542
735
 
@@ -572,7 +765,7 @@ class AdbTools(Tools):
572
765
  """
573
766
  return self.memory.copy()
574
767
 
575
- async def get_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
768
+ def get_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
576
769
  """
577
770
  Get both the a11y tree and phone state in a single call using the combined /state endpoint.
578
771
 
@@ -584,40 +777,61 @@ class AdbTools(Tools):
584
777
  """
585
778
 
586
779
  try:
587
- if serial:
588
- device = await self.device_manager.get_device(serial)
589
- if not device:
590
- raise ValueError(f"Device {serial} not found")
780
+ logger.debug("Getting state")
781
+
782
+ if self.use_tcp and self.tcp_forwarded:
783
+ # Use TCP communication
784
+ response = requests.get(f"{self.tcp_base_url}/state", timeout=10)
785
+
786
+ if response.status_code == 200:
787
+ tcp_response = response.json()
788
+
789
+ # Check if response has the expected format
790
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
791
+ data_str = tcp_response["data"]
792
+ try:
793
+ combined_data = json.loads(data_str)
794
+ except json.JSONDecodeError:
795
+ return {
796
+ "error": "Parse Error",
797
+ "message": "Failed to parse JSON data from TCP response data field",
798
+ }
799
+ else:
800
+ # Fallback: assume direct JSON format
801
+ combined_data = tcp_response
802
+ else:
803
+ return {
804
+ "error": "HTTP Error",
805
+ "message": f"HTTP request failed with status {response.status_code}",
806
+ }
591
807
  else:
592
- device = await self._get_device()
593
-
594
- adb_output = await device._adb.shell(
595
- device._serial,
596
- "content query --uri content://com.droidrun.portal/state",
597
- )
808
+ # Fallback to content provider method
809
+ adb_output = self.device.shell(
810
+ "content query --uri content://com.droidrun.portal/state",
811
+ )
598
812
 
599
- state_data = self._parse_content_provider_output(adb_output)
813
+ state_data = self._parse_content_provider_output(adb_output)
600
814
 
601
- if state_data is None:
602
- return {
603
- "error": "Parse Error",
604
- "message": "Failed to parse state data from ContentProvider response",
605
- }
606
-
607
- if isinstance(state_data, dict) and "data" in state_data:
608
- data_str = state_data["data"]
609
- try:
610
- combined_data = json.loads(data_str)
611
- except json.JSONDecodeError:
815
+ if state_data is None:
612
816
  return {
613
817
  "error": "Parse Error",
614
- "message": "Failed to parse JSON data from ContentProvider data field",
818
+ "message": "Failed to parse state data from ContentProvider response",
819
+ }
820
+
821
+ if isinstance(state_data, dict) and "data" in state_data:
822
+ data_str = state_data["data"]
823
+ try:
824
+ combined_data = json.loads(data_str)
825
+ except json.JSONDecodeError:
826
+ return {
827
+ "error": "Parse Error",
828
+ "message": "Failed to parse JSON data from ContentProvider data field",
829
+ }
830
+ else:
831
+ return {
832
+ "error": "Format Error",
833
+ "message": f"Unexpected state data format: {type(state_data)}",
615
834
  }
616
- else:
617
- return {
618
- "error": "Format Error",
619
- "message": f"Unexpected state data format: {type(state_data)}",
620
- }
621
835
 
622
836
  # Validate that both a11y_tree and phone_state are present
623
837
  if "a11y_tree" not in combined_data:
@@ -655,17 +869,271 @@ class AdbTools(Tools):
655
869
  "phone_state": combined_data["phone_state"],
656
870
  }
657
871
 
872
+ except requests.exceptions.RequestException as e:
873
+ return {
874
+ "error": "TCP Error",
875
+ "message": f"TCP request failed: {str(e)}",
876
+ }
658
877
  except Exception as e:
659
878
  return {
660
879
  "error": str(e),
661
880
  "message": f"Error getting combined state: {str(e)}",
662
881
  }
663
882
 
883
+ def get_a11y_tree(self) -> Dict[str, Any]:
884
+ """
885
+ Get just the accessibility tree using the /a11y_tree endpoint.
664
886
 
665
- if __name__ == "__main__":
887
+ Returns:
888
+ Dictionary containing accessibility tree data
889
+ """
890
+ try:
891
+ if self.use_tcp and self.tcp_forwarded:
892
+ response = requests.get(f"{self.tcp_base_url}/a11y_tree", timeout=10)
893
+
894
+ if response.status_code == 200:
895
+ tcp_response = response.json()
896
+
897
+ # Check if response has the expected format with data field
898
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
899
+ data_str = tcp_response["data"]
900
+ try:
901
+ return json.loads(data_str)
902
+ except json.JSONDecodeError:
903
+ return {
904
+ "error": "Parse Error",
905
+ "message": "Failed to parse JSON data from TCP response data field",
906
+ }
907
+ else:
908
+ # Fallback: assume direct JSON format
909
+ return tcp_response
910
+ else:
911
+ return {
912
+ "error": "HTTP Error",
913
+ "message": f"HTTP request failed with status {response.status_code}",
914
+ }
915
+ else:
916
+ # Fallback: use get_state and extract a11y_tree
917
+ state = self.get_state()
918
+ if "error" in state:
919
+ return state
920
+ return {"a11y_tree": state.get("a11y_tree", [])}
666
921
 
667
- async def main():
668
- tools = await AdbTools.create()
669
- print(tools.serial)
922
+ except requests.exceptions.RequestException as e:
923
+ return {
924
+ "error": "TCP Error",
925
+ "message": f"TCP request failed: {str(e)}",
926
+ }
927
+ except Exception as e:
928
+ return {
929
+ "error": str(e),
930
+ "message": f"Error getting a11y tree: {str(e)}",
931
+ }
932
+
933
+ def get_phone_state(self) -> Dict[str, Any]:
934
+ """
935
+ Get just the phone state using the /phone_state endpoint.
670
936
 
671
- asyncio.run(main())
937
+ Returns:
938
+ Dictionary containing phone state data
939
+ """
940
+ try:
941
+ if self.use_tcp and self.tcp_forwarded:
942
+ response = requests.get(f"{self.tcp_base_url}/phone_state", timeout=10)
943
+
944
+ if response.status_code == 200:
945
+ tcp_response = response.json()
946
+
947
+ # Check if response has the expected format with data field
948
+ if isinstance(tcp_response, dict) and "data" in tcp_response:
949
+ data_str = tcp_response["data"]
950
+ try:
951
+ return json.loads(data_str)
952
+ except json.JSONDecodeError:
953
+ return {
954
+ "error": "Parse Error",
955
+ "message": "Failed to parse JSON data from TCP response data field",
956
+ }
957
+ else:
958
+ # Fallback: assume direct JSON format
959
+ return tcp_response
960
+ else:
961
+ return {
962
+ "error": "HTTP Error",
963
+ "message": f"HTTP request failed with status {response.status_code}",
964
+ }
965
+ else:
966
+ # Fallback: use get_state and extract phone_state
967
+ state = self.get_state()
968
+ if "error" in state:
969
+ return state
970
+ return {"phone_state": state.get("phone_state", {})}
971
+
972
+ except requests.exceptions.RequestException as e:
973
+ return {
974
+ "error": "TCP Error",
975
+ "message": f"TCP request failed: {str(e)}",
976
+ }
977
+ except Exception as e:
978
+ return {
979
+ "error": str(e),
980
+ "message": f"Error getting phone state: {str(e)}",
981
+ }
982
+
983
+ def ping(self) -> Dict[str, Any]:
984
+ """
985
+ Test the TCP connection using the /ping endpoint.
986
+
987
+ Returns:
988
+ Dictionary with ping result
989
+ """
990
+ try:
991
+ if self.use_tcp and self.tcp_forwarded:
992
+ response = requests.get(f"{self.tcp_base_url}/ping", timeout=5)
993
+
994
+ if response.status_code == 200:
995
+ try:
996
+ tcp_response = response.json() if response.content else {}
997
+ logger.debug(f"Ping TCP response: {tcp_response}")
998
+ return {
999
+ "status": "success",
1000
+ "message": "Ping successful",
1001
+ "response": tcp_response,
1002
+ }
1003
+ except json.JSONDecodeError:
1004
+ return {
1005
+ "status": "success",
1006
+ "message": "Ping successful (non-JSON response)",
1007
+ "response": response.text,
1008
+ }
1009
+ else:
1010
+ return {
1011
+ "status": "error",
1012
+ "message": f"Ping failed with status {response.status_code}: {response.text}",
1013
+ }
1014
+ else:
1015
+ return {
1016
+ "status": "error",
1017
+ "message": "TCP communication is not enabled",
1018
+ }
1019
+
1020
+ except requests.exceptions.RequestException as e:
1021
+ return {
1022
+ "status": "error",
1023
+ "message": f"Ping failed: {str(e)}",
1024
+ }
1025
+ except Exception as e:
1026
+ return {
1027
+ "status": "error",
1028
+ "message": f"Error during ping: {str(e)}",
1029
+ }
1030
+
1031
+
1032
+ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
1033
+ """
1034
+ Run an adb shell command using the adb CLI and measure execution time.
1035
+ Args:
1036
+ serial: Device serial number
1037
+ command: Shell command to run
1038
+ Returns:
1039
+ Tuple of (output, elapsed_time)
1040
+ """
1041
+ import time
1042
+ import subprocess
1043
+
1044
+ adb_cmd = ["adb", "-s", serial, "shell", command]
1045
+ start = time.perf_counter()
1046
+ result = subprocess.run(adb_cmd, capture_output=True, text=True)
1047
+ elapsed = time.perf_counter() - start
1048
+ output = result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
1049
+ return output, elapsed
1050
+
1051
+
1052
+ def _shell_test():
1053
+ device = adb.device("emulator-5554")
1054
+ # Native Python adb client
1055
+ start = time.time()
1056
+ res = device.shell("echo 'Hello, World!'")
1057
+ end = time.time()
1058
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: {res}")
1059
+
1060
+ start = time.time()
1061
+ res = device.shell("content query --uri content://com.droidrun.portal/state")
1062
+ end = time.time()
1063
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: phone_state")
1064
+
1065
+ # CLI version
1066
+ output, elapsed = _shell_test_cli("emulator-5554", "echo 'Hello, World!'")
1067
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: {output}")
1068
+
1069
+ output, elapsed = _shell_test_cli(
1070
+ "emulator-5554", "content query --uri content://com.droidrun.portal/state"
1071
+ )
1072
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: phone_state")
1073
+
1074
+
1075
+ def _list_packages():
1076
+ tools = AdbTools()
1077
+ print(tools.list_packages())
1078
+
1079
+
1080
+ def _start_app():
1081
+ tools = AdbTools()
1082
+ tools.start_app("com.android.settings", ".Settings")
1083
+
1084
+
1085
+ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
1086
+ """
1087
+ Run an adb shell command using the adb CLI and measure execution time.
1088
+ Args:
1089
+ serial: Device serial number
1090
+ command: Shell command to run
1091
+ Returns:
1092
+ Tuple of (output, elapsed_time)
1093
+ """
1094
+ import time
1095
+ import subprocess
1096
+
1097
+ adb_cmd = ["adb", "-s", serial, "shell", command]
1098
+ start = time.perf_counter()
1099
+ result = subprocess.run(adb_cmd, capture_output=True, text=True)
1100
+ elapsed = time.perf_counter() - start
1101
+ output = result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
1102
+ return output, elapsed
1103
+
1104
+
1105
+ def _shell_test():
1106
+ device = adb.device("emulator-5554")
1107
+ # Native Python adb client
1108
+ start = time.time()
1109
+ res = device.shell("echo 'Hello, World!'")
1110
+ end = time.time()
1111
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: {res}")
1112
+
1113
+ start = time.time()
1114
+ res = device.shell("content query --uri content://com.droidrun.portal/state")
1115
+ end = time.time()
1116
+ print(f"[Native] Shell execution took {end - start:.3f} seconds: phone_state")
1117
+
1118
+ # CLI version
1119
+ output, elapsed = _shell_test_cli("emulator-5554", "echo 'Hello, World!'")
1120
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: {output}")
1121
+
1122
+ output, elapsed = _shell_test_cli(
1123
+ "emulator-5554", "content query --uri content://com.droidrun.portal/state"
1124
+ )
1125
+ print(f"[CLI] Shell execution took {elapsed:.3f} seconds: phone_state")
1126
+
1127
+
1128
+ def _list_packages():
1129
+ tools = AdbTools()
1130
+ print(tools.list_packages())
1131
+
1132
+
1133
+ def _start_app():
1134
+ tools = AdbTools()
1135
+ tools.start_app("com.android.settings", ".Settings")
1136
+
1137
+
1138
+ if __name__ == "__main__":
1139
+ _start_app()