PyPI - droidrun - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

droidrun 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

droidrun/__init__.py +1 -10
droidrun/adb/device.py +101 -71
droidrun/adb/manager.py +3 -3
droidrun/agent/codeact/codeact_agent.py +22 -12
droidrun/agent/context/personas/__init__.py +0 -2
droidrun/agent/context/personas/default.py +1 -1
droidrun/agent/droid/droid_agent.py +56 -8
droidrun/agent/droid/events.py +4 -0
droidrun/agent/planner/planner_agent.py +32 -12
droidrun/agent/utils/chat_utils.py +4 -7
droidrun/agent/utils/llm_picker.py +1 -0
droidrun/cli/main.py +163 -78
droidrun/portal.py +139 -0
droidrun/telemetry/__init__.py +4 -0
droidrun/telemetry/events.py +27 -0
droidrun/telemetry/tracker.py +83 -0
droidrun/tools/adb.py +199 -407
droidrun/tools/ios.py +10 -5
droidrun/tools/tools.py +42 -11
{droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/METADATA +19 -29
{droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/RECORD +24 -23
droidrun/agent/context/personas/extractor.py +0 -52
droidrun/agent/context/todo.txt +0 -4
droidrun/run.py +0 -105
{droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/WHEEL +0 -0
{droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.0.dist-info → droidrun-0.3.2.dist-info}/licenses/LICENSE +0 -0

droidrun/__init__.py CHANGED Viewed

@@ -5,26 +5,17 @@ DroidRun - A framework for controlling Android devices through LLM agents.
 __version__ = "0.3.0"
 # Import main classes for easier access
-from droidrun.agent.codeact.codeact_agent import CodeActAgent
-from droidrun.agent.planner.planner_agent import PlannerAgent
-from droidrun.agent.utils.executer import SimpleCodeExecutor
 from droidrun.agent.utils.llm_picker import load_llm
 from droidrun.adb.manager import DeviceManager
-from droidrun.tools.tools import Tools
-from droidrun.tools.adb import AdbTools
-from droidrun.tools.ios import IOSTools
+from droidrun.tools import Tools, AdbTools, IOSTools
 from droidrun.agent.droid import DroidAgent
 # Make main components available at package level
 __all__ = [
     "DroidAgent",
-    "CodeActAgent",
-    "PlannerAgent",
     "DeviceManager",
-    "Tools",
     "load_llm",
-    "SimpleCodeExecutor",
     "Tools",
     "AdbTools",
     "IOSTools",

droidrun/adb/device.py CHANGED Viewed

@@ -10,12 +10,13 @@ import string
 from typing import Dict, Optional, Tuple, List
 from droidrun.adb.wrapper import ADBWrapper
 class Device:
     """High-level representation of an Android device."""
     def __init__(self, serial: str, adb: ADBWrapper):
         """Initialize device.
         Args:
             serial: Device serial number
             adb: ADB wrapper instance
@@ -60,9 +61,13 @@ class Device:
         """Get SDK level."""
         return await self.get_property("ro.build.version.sdk")
+    async def shell(self, command: str, timeout: float | None = None) -> str:
+        """Execute a shell command on the device."""
+        return await self._adb.shell(self._serial, command, timeout)
     async def tap(self, x: int, y: int) -> None:
         """Tap at coordinates.
         Args:
             x: X coordinate
             y: Y coordinate
@@ -70,15 +75,10 @@ class Device:
         await self._adb.shell(self._serial, f"input tap {x} {y}")
     async def swipe(
-        self,
-        start_x: int,
-        start_y: int,
-        end_x: int,
-        end_y: int,
-        duration_ms: int = 300
+        self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
     ) -> None:
         """Perform swipe gesture.
         Args:
             start_x: Starting X coordinate
             start_y: Starting Y coordinate
@@ -88,12 +88,12 @@ class Device:
         """
         await self._adb.shell(
             self._serial,
-            f"input swipe {start_x} {start_y} {end_x} {end_y} {duration_ms}"
+            f"input swipe {start_x} {start_y} {end_x} {end_y} {duration_ms}",
         )
     async def input_text(self, text: str) -> None:
         """Input text.
         Args:
             text: Text to input
         """
@@ -101,7 +101,7 @@ class Device:
     async def press_key(self, keycode: int) -> None:
         """Press a key.
         Args:
             keycode: Android keycode to press
         """
@@ -111,10 +111,10 @@ class Device:
         self,
         package: str,
         activity: str = ".MainActivity",
-        extras: Optional[Dict[str, str]] = None
+        extras: Optional[Dict[str, str]] = None,
     ) -> None:
         """Start an app activity.
         Args:
             package: Package name
             activity: Activity name
@@ -125,48 +125,56 @@ class Device:
             for key, value in extras.items():
                 cmd += f" -e {key} {value}"
         await self._adb.shell(self._serial, cmd)
     async def start_app(self, package: str, activity: str = "") -> str:
         """Start an app on the device.
         Args:
             package: Package name
             activity: Optional activity name (if empty, launches default activity)
         Returns:
             Result message
         """
         if activity:
             if not activity.startswith(".") and "." not in activity:
                 activity = f".{activity}"
-            if not activity.startswith(".") and "." in activity and not activity.startswith(package):
+            if (
+                not activity.startswith(".")
+                and "." in activity
+                and not activity.startswith(package)
+            ):
                 # Fully qualified activity name
                 component = activity.split("/", 1)
-                return await self.start_activity(component[0], component[1] if len(component) > 1 else activity)
+                return await self.start_activity(
+                    component[0], component[1] if len(component) > 1 else activity
+                )
             # Relative activity name
             return await self.start_activity(package, activity)
         # Start main activity using monkey
         cmd = f"monkey -p {package} -c android.intent.category.LAUNCHER 1"
         result = await self._adb.shell(self._serial, cmd)
         return f"Started {package}"
-    async def install_app(self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True) -> str:
+    async def install_app(
+        self, apk_path: str, reinstall: bool = False, grant_permissions: bool = True
+    ) -> str:
         """Install an APK on the device.
         Args:
             apk_path: Path to the APK file
             reinstall: Whether to reinstall if app exists
             grant_permissions: Whether to grant all requested permissions
         Returns:
             Installation result
         """
         if not os.path.exists(apk_path):
             return f"Error: APK file not found: {apk_path}"
         # Build install command args
         install_args = ["install"]
         if reinstall:
@@ -174,28 +182,28 @@ class Device:
         if grant_permissions:
             install_args.append("-g")
         install_args.append(apk_path)
         try:
             stdout, stderr = await self._adb._run_device_command(
                 self._serial,
                 install_args,
-                timeout=120  # Longer timeout for installation
+                timeout=120,  # Longer timeout for installation
             )
             if "success" in stdout.lower():
                 return f"Successfully installed {os.path.basename(apk_path)}"
             return f"Installation failed: {stdout or stderr}"
         except Exception as e:
             return f"Installation failed: {str(e)}"
     async def uninstall_app(self, package: str, keep_data: bool = False) -> str:
         """Uninstall an app from the device.
         Args:
             package: Package name to uninstall
             keep_data: Whether to keep app data and cache directories
         Returns:
             Uninstallation result
         """
@@ -203,41 +211,42 @@ class Device:
         if keep_data:
             cmd.append("-k")
         cmd.append(package)
         result = await self._adb.shell(self._serial, " ".join(cmd))
         return result.strip()
     async def take_screenshot(self, quality: int = 75) -> Tuple[str, bytes]:
         """Take a screenshot of the device and compress it.
         Args:
             quality: JPEG quality (1-100, lower means smaller file size)
         Returns:
             Tuple of (local file path, screenshot data as bytes)
         """
         # Create a temporary file for the screenshot
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
             screenshot_path = temp.name
         try:
             # Generate a random filename for the device
             timestamp = int(time.time())
-            random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+            random_suffix = "".join(
+                random.choices(string.ascii_lowercase + string.digits, k=8)
+            )
             device_path = f"/sdcard/screenshot_{timestamp}_{random_suffix}.png"
             # Take screenshot using screencap command
             await self._adb.shell(self._serial, f"screencap -p {device_path}")
             # Pull screenshot to local machine
             await self._adb._run_device_command(
-                self._serial,
-                ["pull", device_path, screenshot_path]
+                self._serial, ["pull", device_path, screenshot_path]
             )
             # Clean up on device
             await self._adb.shell(self._serial, f"rm {device_path}")
             # Read the screenshot file
             with open(screenshot_path, "rb") as f:
                 screenshot_data = f.read()
@@ -249,12 +258,14 @@ class Device:
                 # Create buffer for the compressed image
                 buffer = io.BytesIO()
                 # Load the PNG data into a PIL Image
                 with Image.open(io.BytesIO(screenshot_data)) as img:
                     # Convert to RGB (removing alpha channel if present) and save as JPEG
                     converted_img = img.convert("RGB") if img.mode == "RGBA" else img
-                    converted_img.save(buffer, format="JPEG", quality=quality, optimize=True)
+                    converted_img.save(
+                        buffer, format="JPEG", quality=quality, optimize=True
+                    )
                     compressed_data = buffer.getvalue()
                 # Get size reduction info for logging
@@ -263,6 +274,7 @@ class Device:
                 reduction = 100 - (jpg_size / png_size * 100) if png_size > 0 else 0
                 import logging
                 logger = logging.getLogger("droidrun")
                 logger.debug(
                     f"Screenshot compressed successfully: {png_size:.1f}KB → {jpg_size:.1f}KB ({reduction:.1f}% reduction)"
@@ -275,9 +287,11 @@ class Device:
                 return screenshot_path, screenshot_data
             except Exception as e:
                 # If compression fails, return the original PNG data
-                logger.warning(f"Screenshot compression failed: {e}, returning uncompressed")
+                logger.warning(
+                    f"Screenshot compression failed: {e}, returning uncompressed"
+                )
                 return screenshot_path, screenshot_data
         except Exception as e:
             # Clean up in case of error
             try:
@@ -285,31 +299,47 @@ class Device:
             except OSError:
                 pass
             raise RuntimeError(f"Screenshot capture failed: {str(e)}")
-    async def list_packages(self, include_system_apps: bool = False) -> List[Dict[str, str]]:
-        """List installed packages on the device.
+    def _parse_package_list(self, output: str) -> List[Dict[str, str]]:
+        """Parse the output of 'pm list packages -f' command.
+        Args:
+            output: Raw command output from 'pm list packages -f'
+        Returns:
+            List of dictionaries containing package info with 'package' and 'path' keys
+        """
+        apps = []
+        for line in output.splitlines():
+            if line.startswith("package:"):
+                # Format is: "package:/path/to/base.apk=com.package.name"
+                path_and_pkg = line[8:]  # Strip "package:"
+                if "=" in path_and_pkg:
+                    path, package = path_and_pkg.rsplit("=", 1)
+                    apps.append({"package": package.strip(), "path": path.strip()})
+        return apps
+    async def list_packages(self, include_system_apps: bool = False) -> List[str]:
+        """
+        List installed packages on the device.
         Args:
-            include_system_apps: Whether to include system apps
+            include_system_apps: Whether to include system apps (default: False)
         Returns:
-            List of package dictionaries with 'package' and 'path' keys
+            List of package names
         """
+        # Use the direct ADB command to get packages with paths
         cmd = ["pm", "list", "packages", "-f"]
         if not include_system_apps:
             cmd.append("-3")
-        output = await self._adb.shell(self._serial, " ".join(cmd))
-        packages = []
-        for line in output.splitlines():
-            if line.startswith("package:"):
-                parts = line[8:].split("=")
-                if len(parts) == 2:
-                    path, package = parts
-                    packages.append({
-                        "package": package,
-                        "path": path
-                    })
-        return packages
+        output = await self.shell(" ".join(cmd))
+        # Parse the package list using the function
+        packages = self._parse_package_list(output)
+        # Format package list for better readability
+        package_list = [pack["package"] for pack in packages]
+        #for package in package_list:
+        #    print(package)
+        return package_list

droidrun/adb/manager.py CHANGED Viewed

@@ -42,7 +42,7 @@ class DeviceManager:
         return list(self._devices.values())
-    async def get_device(self, serial: str) -> Optional[Device]:
+    async def get_device(self, serial: str | None = None) -> Optional[Device]:
         """Get a specific device.
         Args:
@@ -51,13 +51,13 @@ class DeviceManager:
         Returns:
             Device instance if found, None otherwise
         """
-        if serial in self._devices:
+        if serial and serial in self._devices:
             return self._devices[serial]
         # Try to find the device
         devices = await self.list_devices()
         for device in devices:
-            if device.serial == serial:
+            if device.serial == serial or not serial:
                 return device
         return None

droidrun/agent/codeact/codeact_agent.py CHANGED Viewed

@@ -45,6 +45,7 @@ class CodeActAgent(Workflow):
         self,
         llm: LLM,
         persona: AgentPersona,
+        vision: bool,
         tools_instance: "Tools",
         all_tools_list: Dict[str, Callable[..., Any]],
         max_steps: int = 5,
@@ -62,6 +63,8 @@ class CodeActAgent(Workflow):
         self.user_prompt = persona.user_prompt
         self.no_thoughts_prompt = None
+        self.vision = vision
         self.chat_memory = None
         self.episodic_memory = EpisodicMemory(persona=persona)
         self.remembered_info = None
@@ -161,22 +164,28 @@ class CodeActAgent(Workflow):
             chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
         for context in self.required_context:
-            if context == "screenshot" and model != "DeepSeek":
+            if model == "DeepSeek":
+                logger.warning(
+                    "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
+                )
+            elif self.vision == True and context == "screenshot":
                 screenshot = (await self.tools.take_screenshot())[1]
                 ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
                 await ctx.set("screenshot", screenshot)
                 chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
-            if context == "phone_state":
-                chat_history = await chat_utils.add_phone_state_block(await self.tools.get_phone_state(), chat_history)
             if context == "ui_state":
-                ui_state = await self.tools.get_clickables()
-                await ctx.set("ui_state", ui_state)
-                chat_history = await chat_utils.add_ui_text_block(
-                    ui_state, chat_history
-                )
+                try:
+                    state = await self.tools.get_state()
+                    await ctx.set("ui_state", state["a11y_tree"])
+                    chat_history = await chat_utils.add_ui_text_block(
+                        state["a11y_tree"], chat_history
+                    )
+                    chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
+                except Exception as e:
+                    logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
             if context == "packages":
                 chat_history = await chat_utils.add_packages_block(
@@ -303,6 +312,7 @@ class CodeActAgent(Workflow):
             {
                 "success": ev.success,
                 "reason": ev.reason,
+                "output": ev.reason,
                 "codeact_steps": self.steps_counter,
                 "code_executions": self.code_exec_counter,
             }
@@ -312,7 +322,7 @@ class CodeActAgent(Workflow):
             EpisodicMemoryEvent(episodic_memory=self.episodic_memory)
         )
-        return StopEvent(result=result)
+        return StopEvent(result)
     async def _get_llm_response(
         self, ctx: Context, chat_history: List[ChatMessage]
@@ -394,7 +404,7 @@ class CodeActAgent(Workflow):
                 logger.warning(f"Failed to capture final screenshot: {e}")
             try:
-                ui_state = await self.tools.get_clickables()
+                (a11y_tree, phone_state) = await self.tools.get_state()
             except Exception as e:
                 logger.warning(f"Failed to capture final UI state: {e}")
@@ -402,7 +412,7 @@ class CodeActAgent(Workflow):
             final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
             final_response = {
                 "role": "user",
-                "content": f"Final State Observation:\nUI State: {ui_state}\nScreenshot: {'Available' if screenshot else 'Not available'}"
+                "content": f"Final State Observation:\nUI State: {a11y_tree}\nScreenshot: {'Available' if screenshot else 'Not available'}"
             }
             # Create final episodic memory step

droidrun/agent/context/personas/__init__.py CHANGED Viewed

@@ -1,11 +1,9 @@
 from .default import DEFAULT
 from .ui_expert import UI_EXPERT
 from .app_starter import APP_STARTER_EXPERT
-from .extractor import EXTRACTOR
 __all__ = [
     'DEFAULT',
     'UI_EXPERT',
     'APP_STARTER_EXPERT',
-    'EXTRACTOR'
     ]

droidrun/agent/context/personas/default.py CHANGED Viewed

@@ -21,7 +21,6 @@ DEFAULT = AgentPersona(
     required_context=[
         "ui_state",
         "screenshot",
-        "phone_state"
     ],
     user_prompt="""
     **Current Request:**
@@ -46,6 +45,7 @@ DEFAULT = AgentPersona(
     - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
     - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
     - **chat history**: You are also given the history of your actions (if any) from your previous steps.
+    - **execution result**: The result of your last Action
     NOTE: you don't have access to these inputs in your tool calling context
     ## Response Format:

droidrun 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

droidrun 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl