droidrun 0.3.9__py3-none-any.whl → 0.3.10.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. droidrun/__init__.py +2 -3
  2. droidrun/__main__.py +1 -1
  3. droidrun/agent/__init__.py +1 -1
  4. droidrun/agent/codeact/__init__.py +1 -4
  5. droidrun/agent/codeact/codeact_agent.py +66 -40
  6. droidrun/agent/codeact/events.py +6 -3
  7. droidrun/agent/codeact/prompts.py +2 -2
  8. droidrun/agent/common/events.py +4 -2
  9. droidrun/agent/context/__init__.py +1 -3
  10. droidrun/agent/context/agent_persona.py +2 -1
  11. droidrun/agent/context/context_injection_manager.py +6 -6
  12. droidrun/agent/context/episodic_memory.py +5 -3
  13. droidrun/agent/context/personas/__init__.py +3 -3
  14. droidrun/agent/context/personas/app_starter.py +3 -3
  15. droidrun/agent/context/personas/big_agent.py +3 -3
  16. droidrun/agent/context/personas/default.py +3 -3
  17. droidrun/agent/context/personas/ui_expert.py +5 -5
  18. droidrun/agent/context/task_manager.py +15 -17
  19. droidrun/agent/droid/__init__.py +1 -1
  20. droidrun/agent/droid/droid_agent.py +327 -180
  21. droidrun/agent/droid/events.py +91 -9
  22. droidrun/agent/executor/__init__.py +13 -0
  23. droidrun/agent/executor/events.py +24 -0
  24. droidrun/agent/executor/executor_agent.py +327 -0
  25. droidrun/agent/executor/prompts.py +136 -0
  26. droidrun/agent/manager/__init__.py +18 -0
  27. droidrun/agent/manager/events.py +20 -0
  28. droidrun/agent/manager/manager_agent.py +459 -0
  29. droidrun/agent/manager/prompts.py +223 -0
  30. droidrun/agent/oneflows/app_starter_workflow.py +118 -0
  31. droidrun/agent/oneflows/text_manipulator.py +204 -0
  32. droidrun/agent/planner/__init__.py +3 -3
  33. droidrun/agent/planner/events.py +6 -3
  34. droidrun/agent/planner/planner_agent.py +27 -42
  35. droidrun/agent/planner/prompts.py +2 -2
  36. droidrun/agent/usage.py +11 -11
  37. droidrun/agent/utils/__init__.py +11 -1
  38. droidrun/agent/utils/async_utils.py +2 -1
  39. droidrun/agent/utils/chat_utils.py +48 -60
  40. droidrun/agent/utils/device_state_formatter.py +177 -0
  41. droidrun/agent/utils/executer.py +12 -11
  42. droidrun/agent/utils/inference.py +114 -0
  43. droidrun/agent/utils/llm_picker.py +2 -0
  44. droidrun/agent/utils/message_utils.py +85 -0
  45. droidrun/agent/utils/tools.py +220 -0
  46. droidrun/agent/utils/trajectory.py +8 -7
  47. droidrun/cli/__init__.py +1 -1
  48. droidrun/cli/logs.py +29 -28
  49. droidrun/cli/main.py +279 -143
  50. droidrun/config_manager/__init__.py +25 -0
  51. droidrun/config_manager/config_manager.py +583 -0
  52. droidrun/macro/__init__.py +2 -2
  53. droidrun/macro/__main__.py +1 -1
  54. droidrun/macro/cli.py +36 -34
  55. droidrun/macro/replay.py +7 -9
  56. droidrun/portal.py +1 -1
  57. droidrun/telemetry/__init__.py +2 -2
  58. droidrun/telemetry/events.py +3 -4
  59. droidrun/telemetry/phoenix.py +173 -0
  60. droidrun/telemetry/tracker.py +7 -5
  61. droidrun/tools/__init__.py +1 -1
  62. droidrun/tools/adb.py +210 -82
  63. droidrun/tools/ios.py +7 -5
  64. droidrun/tools/tools.py +25 -8
  65. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/METADATA +5 -3
  66. droidrun-0.3.10.dev3.dist-info/RECORD +70 -0
  67. droidrun/agent/common/default.py +0 -5
  68. droidrun/agent/context/reflection.py +0 -20
  69. droidrun/agent/oneflows/reflector.py +0 -265
  70. droidrun-0.3.9.dist-info/RECORD +0 -56
  71. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/WHEEL +0 -0
  72. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/entry_points.txt +0 -0
  73. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/adb.py CHANGED
@@ -2,25 +2,27 @@
2
2
  UI Actions - Core UI interaction tools for Android device control.
3
3
  """
4
4
 
5
- import os
5
+ import base64
6
6
  import io
7
7
  import json
8
- import time
9
8
  import logging
9
+ import os
10
+ import time
11
+ from typing import Any, Dict, List, Optional, Tuple
12
+
13
+ import requests
14
+ from adbutils import adb
10
15
  from llama_index.core.workflow import Context
11
- from typing import Optional, Dict, Tuple, List, Any
16
+
12
17
  from droidrun.agent.common.events import (
18
+ DragActionEvent,
13
19
  InputTextActionEvent,
14
20
  KeyPressActionEvent,
15
21
  StartAppEvent,
16
22
  SwipeActionEvent,
17
23
  TapActionEvent,
18
- DragActionEvent,
19
24
  )
20
25
  from droidrun.tools.tools import Tools
21
- from adbutils import adb
22
- import requests
23
- import base64
24
26
 
25
27
  logger = logging.getLogger("droidrun-tools")
26
28
  PORTAL_DEFAULT_TCP_PORT = 8080
@@ -34,6 +36,8 @@ class AdbTools(Tools):
34
36
  serial: str | None = None,
35
37
  use_tcp: bool = False,
36
38
  remote_tcp_port: int = PORTAL_DEFAULT_TCP_PORT,
39
+ app_opener_llm = None,
40
+ text_manipulator_llm = None,
37
41
  ) -> None:
38
42
  """Initialize the AdbTools instance.
39
43
 
@@ -41,6 +45,8 @@ class AdbTools(Tools):
41
45
  serial: Device serial number
42
46
  use_tcp: Whether to use TCP communication (default: False)
43
47
  tcp_port: TCP port for communication (default: 8080)
48
+ app_opener_llm: LLM instance for app opening workflow (optional)
49
+ text_manipulator_llm: LLM instance for text manipulation (optional)
44
50
  """
45
51
  self.device = adb.device(serial=serial)
46
52
  self.use_tcp = use_tcp
@@ -60,6 +66,10 @@ class AdbTools(Tools):
60
66
  self.screenshots: List[Dict[str, Any]] = []
61
67
  # Trajectory saving level
62
68
  self.save_trajectories = "none"
69
+
70
+ # LLM instances for specialized workflows
71
+ self.app_opener_llm = app_opener_llm
72
+ self.text_manipulator_llm = text_manipulator_llm
63
73
 
64
74
  self.setup_keyboard()
65
75
 
@@ -68,6 +78,13 @@ class AdbTools(Tools):
68
78
  self.setup_tcp_forward()
69
79
 
70
80
 
81
+ def get_date(self) -> str:
82
+ """
83
+ Get the current date and time on device.
84
+ """
85
+ return self.device.shell("date").strip()
86
+
87
+
71
88
  def setup_tcp_forward(self) -> bool:
72
89
  """
73
90
  Set up ADB TCP port forwarding for communication with the portal app.
@@ -126,7 +143,7 @@ class AdbTools(Tools):
126
143
  c.close()
127
144
 
128
145
  self.tcp_forwarded = False
129
- logger.debug(f"TCP port forwarding removed")
146
+ logger.debug("TCP port forwarding removed")
130
147
  return True
131
148
  return True
132
149
  except Exception as e:
@@ -137,7 +154,7 @@ class AdbTools(Tools):
137
154
  """
138
155
  Set up the DroidRun keyboard as the default input method.
139
156
  Simple setup that just switches to DroidRun keyboard without saving/restoring.
140
-
157
+
141
158
  Returns:
142
159
  bool: True if setup was successful, False otherwise
143
160
  """
@@ -207,20 +224,19 @@ class AdbTools(Tools):
207
224
  return None
208
225
 
209
226
  @Tools.ui_action
210
- def tap_by_index(self, index: int) -> str:
227
+ def _extract_element_coordinates_by_index(self, index: int) -> Tuple[int, int]:
211
228
  """
212
- Tap on a UI element by its index.
213
-
214
- This function uses the cached clickable elements
215
- to find the element with the given index and tap on its center coordinates.
229
+ Extract center coordinates from an element by its index.
216
230
 
217
231
  Args:
218
- index: Index of the element to tap
232
+ index: Index of the element to find and extract coordinates from
219
233
 
220
234
  Returns:
221
- Result message
222
- """
235
+ Tuple of (x, y) center coordinates
223
236
 
237
+ Raises:
238
+ ValueError: If element not found, bounds format is invalid, or missing bounds
239
+ """
224
240
  def collect_all_indices(elements):
225
241
  """Recursively collect all indices from elements and their children."""
226
242
  indices = []
@@ -244,40 +260,57 @@ class AdbTools(Tools):
244
260
  return result
245
261
  return None
246
262
 
263
+ # Check if we have cached elements
264
+ if not self.clickable_elements_cache:
265
+ raise ValueError("No UI elements cached. Call get_state first.")
266
+
267
+ # Find the element with the given index (including in children)
268
+ element = find_element_by_index(self.clickable_elements_cache, index)
269
+
270
+ if not element:
271
+ # List available indices to help the user
272
+ indices = sorted(collect_all_indices(self.clickable_elements_cache))
273
+ indices_str = ", ".join(str(idx) for idx in indices[:20])
274
+ if len(indices) > 20:
275
+ indices_str += f"... and {len(indices) - 20} more"
276
+ raise ValueError(f"No element found with index {index}. Available indices: {indices_str}")
277
+
278
+ # Get the bounds of the element
279
+ bounds_str = element.get("bounds")
280
+ if not bounds_str:
281
+ element_text = element.get("text", "No text")
282
+ element_type = element.get("type", "unknown")
283
+ element_class = element.get("className", "Unknown class")
284
+ raise ValueError(f"Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped")
285
+
286
+ # Parse the bounds (format: "left,top,right,bottom")
247
287
  try:
248
- # Check if we have cached elements
249
- if not self.clickable_elements_cache:
250
- return "Error: No UI elements cached. Call get_state first."
288
+ left, top, right, bottom = map(int, bounds_str.split(","))
289
+ except ValueError:
290
+ raise ValueError(f"Invalid bounds format for element with index {index}: {bounds_str}") from ValueError
251
291
 
252
- # Find the element with the given index (including in children)
253
- element = find_element_by_index(self.clickable_elements_cache, index)
292
+ # Calculate the center of the element
293
+ x = (left + right) // 2
294
+ y = (top + bottom) // 2
254
295
 
255
- if not element:
256
- # List available indices to help the user
257
- indices = sorted(collect_all_indices(self.clickable_elements_cache))
258
- indices_str = ", ".join(str(idx) for idx in indices[:20])
259
- if len(indices) > 20:
260
- indices_str += f"... and {len(indices) - 20} more"
296
+ return x, y
261
297
 
262
- return f"Error: No element found with index {index}. Available indices: {indices_str}"
298
+ def tap_by_index(self, index: int) -> str:
299
+ """
300
+ Tap on a UI element by its index.
263
301
 
264
- # Get the bounds of the element
265
- bounds_str = element.get("bounds")
266
- if not bounds_str:
267
- element_text = element.get("text", "No text")
268
- element_type = element.get("type", "unknown")
269
- element_class = element.get("className", "Unknown class")
270
- return f"Error: Element with index {index} ('{element_text}', {element_class}, type: {element_type}) has no bounds and cannot be tapped"
302
+ This function uses the cached clickable elements
303
+ to find the element with the given index and tap on its center coordinates.
271
304
 
272
- # Parse the bounds (format: "left,top,right,bottom")
273
- try:
274
- left, top, right, bottom = map(int, bounds_str.split(","))
275
- except ValueError:
276
- return f"Error: Invalid bounds format for element with index {index}: {bounds_str}"
305
+ Args:
306
+ index: Index of the element to tap
277
307
 
278
- # Calculate the center of the element
279
- x = (left + right) // 2
280
- y = (top + bottom) // 2
308
+ Returns:
309
+ Result message
310
+ """
311
+ try:
312
+ # Extract coordinates using the helper function
313
+ x, y = self._extract_element_coordinates_by_index(index)
281
314
 
282
315
  logger.debug(
283
316
  f"Tapping element with index {index} at coordinates ({x}, {y})"
@@ -287,10 +320,24 @@ class AdbTools(Tools):
287
320
  logger.debug(f"Tapped element with index {index} at coordinates ({x}, {y})")
288
321
 
289
322
  # Emit coordinate action event for trajectory recording
290
-
291
323
  if self._ctx:
292
- element_text = element.get("text", "No text")
293
- element_class = element.get("className", "Unknown class")
324
+ # Find element again for event details
325
+ def find_element_by_index(elements, target_index):
326
+ """Recursively find an element with the given index."""
327
+ for item in elements:
328
+ if item.get("index") == target_index:
329
+ return item
330
+ # Check children if present
331
+ children = item.get("children", [])
332
+ result = find_element_by_index(children, target_index)
333
+ if result:
334
+ return result
335
+ return None
336
+
337
+ element = find_element_by_index(self.clickable_elements_cache, index)
338
+ element_text = element.get("text", "No text") if element else "No text"
339
+ element_class = element.get("className", "Unknown class") if element else "Unknown class"
340
+ bounds_str = element.get("bounds", "") if element else ""
294
341
 
295
342
  tap_event = TapActionEvent(
296
343
  action_type="tap",
@@ -307,20 +354,34 @@ class AdbTools(Tools):
307
354
  time.sleep(0.5)
308
355
 
309
356
  # Create a descriptive response
357
+ def find_element_by_index(elements, target_index):
358
+ """Recursively find an element with the given index."""
359
+ for item in elements:
360
+ if item.get("index") == target_index:
361
+ return item
362
+ # Check children if present
363
+ children = item.get("children", [])
364
+ result = find_element_by_index(children, target_index)
365
+ if result:
366
+ return result
367
+ return None
368
+
369
+ element = find_element_by_index(self.clickable_elements_cache, index)
310
370
  response_parts = []
311
371
  response_parts.append(f"Tapped element with index {index}")
312
- response_parts.append(f"Text: '{element.get('text', 'No text')}'")
313
- response_parts.append(f"Class: {element.get('className', 'Unknown class')}")
314
- response_parts.append(f"Type: {element.get('type', 'unknown')}")
372
+ response_parts.append(f"Text: '{element.get('text', 'No text') if element else 'No text'}'")
373
+ response_parts.append(f"Class: {element.get('className', 'Unknown class') if element else 'Unknown class'}")
374
+ response_parts.append(f"Type: {element.get('type', 'unknown') if element else 'unknown'}")
315
375
 
316
376
  # Add information about children if present
317
- children = element.get("children", [])
318
- if children:
319
- child_texts = [
320
- child.get("text") for child in children if child.get("text")
321
- ]
322
- if child_texts:
323
- response_parts.append(f"Contains text: {' | '.join(child_texts)}")
377
+ if element:
378
+ children = element.get("children", [])
379
+ if children:
380
+ child_texts = [
381
+ child.get("text") for child in children if child.get("text")
382
+ ]
383
+ if child_texts:
384
+ response_parts.append(f"Contains text: {' | '.join(child_texts)}")
324
385
 
325
386
  response_parts.append(f"Coordinates: ({x}, {y})")
326
387
 
@@ -453,24 +514,31 @@ class AdbTools(Tools):
453
514
  return False
454
515
 
455
516
  @Tools.ui_action
456
- def input_text(self, text: str) -> str:
517
+ def input_text(self, text: str, index: int = -1, clear: bool = False) -> str:
457
518
  """
458
519
  Input text on the device.
459
520
  Always make sure that the Focused Element is not None before inputting text.
460
521
 
461
522
  Args:
462
523
  text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
524
+ index: Index of the element to input text into. If -1, the focused element will be used.
525
+ clear: Whether to clear the text before inputting.
463
526
 
464
527
  Returns:
465
528
  Result message
466
529
  """
467
530
  try:
531
+ if index != -1:
532
+ self.tap_by_index(index)
533
+ # Encode the text to Base64 (needed for both TCP and content provider)
534
+ encoded_text = base64.b64encode(text.encode()).decode()
468
535
 
469
536
  if self.use_tcp and self.tcp_forwarded:
470
537
  # Use TCP communication
471
- encoded_text = base64.b64encode(text.encode()).decode()
472
-
473
- payload = {"base64_text": encoded_text}
538
+ payload = {
539
+ "base64_text": encoded_text,
540
+ "clear": clear # Include clear parameter for TCP
541
+ }
474
542
  response = requests.post(
475
543
  f"{self.tcp_base_url}/keyboard/input",
476
544
  json=payload,
@@ -479,32 +547,48 @@ class AdbTools(Tools):
479
547
  )
480
548
 
481
549
  logger.debug(
482
- f"Keyboard input TCP response: {response.status_code}, {response.text}"
550
+ f"Keyboard input TCP response: {response.status_code}, {response.text}"
483
551
  )
484
552
 
485
553
  if response.status_code != 200:
486
554
  return f"Error: HTTP request failed with status {response.status_code}: {response.text}"
487
555
 
556
+ # For TCP, you might want to parse the response for success/error details
557
+ try:
558
+ result_data = response.json()
559
+ if result_data.get("status") == "success":
560
+ return f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
561
+ else:
562
+ return f"Error: {result_data.get('error', 'Unknown error')}"
563
+ except: # noqa: E722
564
+ return f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
565
+
488
566
  else:
489
567
  # Fallback to content provider method
490
- # Encode the text to Base64
491
- encoded_text = base64.b64encode(text.encode()).decode()
568
+ # Build the content insert command with clear parameter
569
+ clear_str = "true" if clear else "false"
570
+ cmd = (
571
+ f'content insert --uri "content://com.droidrun.portal/keyboard/input" '
572
+ f'--bind base64_text:s:"{encoded_text}" '
573
+ f'--bind clear:b:{clear_str}'
574
+ )
492
575
 
493
- cmd = f'content insert --uri "content://com.droidrun.portal/keyboard/input" --bind base64_text:s:"{encoded_text}"'
494
- self.device.shell(cmd)
576
+ # Execute the command and capture output for better error handling
577
+ result = self.device.shell(cmd)
578
+ logger.debug(f"Content provider result: {result}")
495
579
 
496
580
  if self._ctx:
497
581
  input_event = InputTextActionEvent(
498
582
  action_type="input_text",
499
- description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}'",
583
+ description=f"Input text: '{text[:50]}{'...' if len(text) > 50 else ''}' (clear={clear})",
500
584
  text=text,
501
585
  )
502
586
  self._ctx.write_event_to_stream(input_event)
503
587
 
504
588
  logger.debug(
505
- f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
589
+ f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
506
590
  )
507
- return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
591
+ return f"Text input completed (clear={clear}): {text[:50]}{'...' if len(text) > 50 else ''}"
508
592
 
509
593
  except requests.exceptions.RequestException as e:
510
594
  return f"Error: TCP request failed: {str(e)}"
@@ -526,13 +610,13 @@ class AdbTools(Tools):
526
610
  if self._ctx:
527
611
  key_event = KeyPressActionEvent(
528
612
  action_type="key_press",
529
- description=f"Pressed key BACK",
613
+ description="Pressed key BACK",
530
614
  keycode=4,
531
615
  key_name="BACK",
532
616
  )
533
617
  self._ctx.write_event_to_stream(key_event)
534
618
 
535
- return f"Pressed key BACK"
619
+ return "Pressed key BACK"
536
620
  except ValueError as e:
537
621
  return f"Error: {str(e)}"
538
622
 
@@ -645,7 +729,7 @@ class AdbTools(Tools):
645
729
  Take a screenshot of the device.
646
730
  This function captures the current screen and adds the screenshot to context in the next message.
647
731
  Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
648
-
732
+
649
733
  Args:
650
734
  hide_overlay: Whether to hide the overlay elements during screenshot (default: True)
651
735
  """
@@ -659,11 +743,11 @@ class AdbTools(Tools):
659
743
  url = f"{self.tcp_base_url}/screenshot"
660
744
  if not hide_overlay:
661
745
  url += "?hideOverlay=false"
662
-
746
+
663
747
  response = requests.get(url, timeout=10)
664
748
  if response.status_code == 200:
665
749
  tcp_response = response.json()
666
-
750
+
667
751
  # Check if response has the expected format with data field
668
752
  if tcp_response.get("status") == "success" and "data" in tcp_response:
669
753
  # Decode base64 string to bytes
@@ -696,11 +780,12 @@ class AdbTools(Tools):
696
780
  return img_format, image_bytes
697
781
 
698
782
  except requests.exceptions.RequestException as e:
699
- raise ValueError(f"Error taking screenshot via TCP: {str(e)}")
783
+ raise ValueError(f"Error taking screenshot via TCP: {str(e)}") from e
700
784
  except ValueError as e:
701
- raise ValueError(f"Error taking screenshot: {str(e)}")
785
+ raise ValueError(f"Error taking screenshot: {str(e)}") from e
702
786
  except Exception as e:
703
- raise ValueError(f"Unexpected error taking screenshot: {str(e)}")
787
+ raise ValueError(f"Unexpected error taking screenshot: {str(e)}") from e
788
+
704
789
 
705
790
  def list_packages(self, include_system_apps: bool = False) -> List[str]:
706
791
  """
@@ -716,7 +801,50 @@ class AdbTools(Tools):
716
801
  logger.debug("Listing packages")
717
802
  return self.device.list_packages(["-3"] if not include_system_apps else [])
718
803
  except ValueError as e:
719
- raise ValueError(f"Error listing packages: {str(e)}")
804
+ raise ValueError(f"Error listing packages: {str(e)}") from e
805
+
806
+ def get_apps(self, include_system: bool = True) -> List[Dict[str, str]]:
807
+ """
808
+ Get installed apps with package name and label in human readable format.
809
+
810
+ Args:
811
+ include_system: Whether to include system apps (default: True)
812
+
813
+ Returns:
814
+ List of dictionaries containing 'package' and 'label' keys
815
+ """
816
+ try:
817
+ logger.debug("Getting apps via content provider")
818
+
819
+ # Query the content provider for packages
820
+ adb_output = self.device.shell(
821
+ "content query --uri content://com.droidrun.portal/packages"
822
+ )
823
+
824
+ # Parse the content provider output
825
+ packages_data = self._parse_content_provider_output(adb_output)
826
+
827
+ if not packages_data or "packages" not in packages_data:
828
+ logger.warning("No packages data found in content provider response")
829
+ return []
830
+
831
+ apps = []
832
+ for package_info in packages_data["packages"]:
833
+ # Filter system apps if requested
834
+ if not include_system and package_info.get("isSystemApp", False):
835
+ continue
836
+
837
+ apps.append({
838
+ "package": package_info.get("packageName", ""),
839
+ "label": package_info.get("label", "")
840
+ })
841
+
842
+ logger.debug(f"Found {len(apps)} apps")
843
+ return apps
844
+
845
+ except Exception as e:
846
+ logger.error(f"Error getting apps: {str(e)}")
847
+ raise ValueError(f"Error getting apps: {str(e)}") from e
720
848
 
721
849
  @Tools.ui_action
722
850
  def complete(self, success: bool, reason: str = ""):
@@ -831,7 +959,7 @@ class AdbTools(Tools):
831
959
  data_str = None
832
960
  if "data" in state_data:
833
961
  data_str = state_data["data"]
834
-
962
+
835
963
  if data_str:
836
964
  try:
837
965
  combined_data = json.loads(data_str)
@@ -842,7 +970,7 @@ class AdbTools(Tools):
842
970
  }
843
971
  else:
844
972
  return {
845
- "error": "Format Error",
973
+ "error": "Format Error",
846
974
  "message": "Neither 'data' nor 'message' field found in ContentProvider response",
847
975
  }
848
976
  else:
@@ -956,8 +1084,8 @@ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
956
1084
  Returns:
957
1085
  Tuple of (output, elapsed_time)
958
1086
  """
959
- import time
960
1087
  import subprocess
1088
+ import time
961
1089
 
962
1090
  adb_cmd = ["adb", "-s", serial, "shell", command]
963
1091
  start = time.perf_counter()
@@ -1009,8 +1137,8 @@ def _shell_test_cli(serial: str, command: str) -> tuple[str, float]:
1009
1137
  Returns:
1010
1138
  Tuple of (output, elapsed_time)
1011
1139
  """
1012
- import time
1013
1140
  import subprocess
1141
+ import time
1014
1142
 
1015
1143
  adb_cmd = ["adb", "-s", serial, "shell", command]
1016
1144
  start = time.perf_counter()
droidrun/tools/ios.py CHANGED
@@ -2,11 +2,13 @@
2
2
  UI Actions - Core UI interaction tools for iOS device control.
3
3
  """
4
4
 
5
+ import logging
5
6
  import re
6
7
  import time
7
- from typing import Optional, Dict, Tuple, List, Any
8
- import logging
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
9
10
  import requests
11
+
10
12
  from droidrun.tools.tools import Tools
11
13
 
12
14
  logger = logging.getLogger("IOS")
@@ -37,7 +39,7 @@ SYSTEM_BUNDLE_IDENTIFIERS = [
37
39
  class IOSTools(Tools):
38
40
  """Core UI interaction tools for iOS device control."""
39
41
 
40
- def __init__(self, url: str, bundle_identifiers: List[str] = []) -> None:
42
+ def __init__(self, url: str, bundle_identifiers: List[str] = []) -> None: # noqa: B006
41
43
  """Initialize the IOSTools instance.
42
44
 
43
45
  Args:
@@ -387,7 +389,7 @@ class IOSTools(Tools):
387
389
  Bool indicating success or failure
388
390
  """
389
391
  # TODO: implement this
390
- logger.info(f"Drag action FAILED! Not implemented for iOS")
392
+ logger.info("Drag action FAILED! Not implemented for iOS")
391
393
  return False
392
394
 
393
395
  def input_text(self, text: str) -> str:
@@ -506,7 +508,7 @@ class IOSTools(Tools):
506
508
 
507
509
  except Exception as e:
508
510
  logger.error(f"Error capturing screenshot: {e}")
509
- raise ValueError(f"Error taking screenshot: {str(e)}")
511
+ raise ValueError(f"Error taking screenshot: {str(e)}") from e
510
512
 
511
513
  def _get_phone_state(self) -> Dict[str, Any]:
512
514
  """
droidrun/tools/tools.py CHANGED
@@ -1,9 +1,8 @@
1
- from abc import ABC, abstractmethod
2
- from typing import List, Optional, Dict, Any
3
1
  import logging
4
- from typing import Tuple, Dict, Callable, Any, Optional
5
- from functools import wraps
6
2
  import sys
3
+ from abc import ABC, abstractmethod
4
+ from functools import wraps
5
+ from typing import Any, Callable, Dict, List, Optional, Tuple
7
6
 
8
7
  # Get a logger for this module
9
8
  logger = logging.getLogger(__name__)
@@ -24,15 +23,15 @@ class Tools(ABC):
24
23
  def wrapper(*args, **kwargs):
25
24
  self = args[0]
26
25
  result = func(*args, **kwargs)
27
-
26
+
28
27
  # Check if save_trajectories attribute exists and is set to "action"
29
28
  if hasattr(self, 'save_trajectories') and self.save_trajectories == "action":
30
29
  frame = sys._getframe(1)
31
30
  caller_globals = frame.f_globals
32
-
31
+
33
32
  step_screenshots = caller_globals.get('step_screenshots')
34
33
  step_ui_states = caller_globals.get('step_ui_states')
35
-
34
+
36
35
  if step_screenshots is not None:
37
36
  step_screenshots.append(self.take_screenshot()[1])
38
37
  if step_ui_states is not None:
@@ -46,6 +45,12 @@ class Tools(ABC):
46
45
  Get the current state of the tool.
47
46
  """
48
47
  pass
48
+ @abstractmethod
49
+ def get_date(self) -> str:
50
+ """
51
+ Get the current date on device.
52
+ """
53
+ pass
49
54
 
50
55
  @abstractmethod
51
56
  def tap_by_index(self, index: int) -> str:
@@ -77,7 +82,7 @@ class Tools(ABC):
77
82
  pass
78
83
 
79
84
  @abstractmethod
80
- def input_text(self, text: str) -> str:
85
+ def input_text(self, text: str, index: int = -1, clear: bool = False) -> str:
81
86
  """
82
87
  Input the given text into a focused input field.
83
88
  """
@@ -117,6 +122,12 @@ class Tools(ABC):
117
122
  List all packages on the device.
118
123
  """
119
124
  pass
125
+ @abstractmethod
126
+ def get_apps(self, include_system_apps: bool = True) -> List[Dict[str, Any]]:
127
+ """
128
+ List all apps on the device.
129
+ """
130
+ pass
120
131
 
121
132
  @abstractmethod
122
133
  def remember(self, information: str) -> str:
@@ -138,6 +149,12 @@ class Tools(ABC):
138
149
  Complete the tool. This is used to indicate that the tool has completed its task.
139
150
  """
140
151
  pass
152
+ @abstractmethod
153
+ def _extract_element_coordinates_by_index(self, index: int) -> Tuple[int, int]:
154
+ """
155
+ Extract the coordinates of the element with the given index.
156
+ """
157
+ pass
141
158
 
142
159
 
143
160
  def describe_tools(tools: Tools, exclude_tools: Optional[List[str]] = None) -> Dict[str, Callable[..., Any]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: droidrun
3
- Version: 0.3.9
3
+ Version: 0.3.10.dev3
4
4
  Summary: A framework for controlling Android devices through LLM agents
5
5
  Project-URL: Homepage, https://github.com/droidrun/droidrun
6
6
  Project-URL: Bug Tracker, https://github.com/droidrun/droidrun/issues
@@ -27,10 +27,10 @@ Classifier: Topic :: Software Development :: Testing
27
27
  Classifier: Topic :: Software Development :: Testing :: Acceptance
28
28
  Classifier: Topic :: System :: Emulators
29
29
  Classifier: Topic :: Utilities
30
- Requires-Python: >=3.11
30
+ Requires-Python: >=3.13
31
31
  Requires-Dist: adbutils>=2.10.2
32
32
  Requires-Dist: apkutils==2.0.0
33
- Requires-Dist: llama-index-llms-google-genai>=0.6.2
33
+ Requires-Dist: arize-phoenix>=12.3.0
34
34
  Requires-Dist: llama-index==0.14.4
35
35
  Requires-Dist: posthog>=6.7.6
36
36
  Requires-Dist: pydantic>=2.11.10
@@ -54,6 +54,8 @@ Provides-Extra: openai
54
54
  Requires-Dist: llama-index-llms-openai-like>=0.5.1; extra == 'openai'
55
55
  Requires-Dist: llama-index-llms-openai>=0.5.6; extra == 'openai'
56
56
  Requires-Dist: openai>=1.99.1; extra == 'openai'
57
+ Provides-Extra: openrouter
58
+ Requires-Dist: llama-index-llms-openrouter>=0.4.2; extra == 'openrouter'
57
59
  Description-Content-Type: text/markdown
58
60
 
59
61
  <picture>