PyPI - droidrun - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl - Mend

droidrun 0.3.8py3-none-any.whl → 0.3.10.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

droidrun/__init__.py +2 -3
droidrun/__main__.py +1 -1
droidrun/agent/__init__.py +1 -1
droidrun/agent/codeact/__init__.py +1 -4
droidrun/agent/codeact/codeact_agent.py +112 -48
droidrun/agent/codeact/events.py +6 -3
droidrun/agent/codeact/prompts.py +2 -2
droidrun/agent/common/constants.py +2 -0
droidrun/agent/common/events.py +5 -3
droidrun/agent/context/__init__.py +1 -3
droidrun/agent/context/agent_persona.py +2 -1
droidrun/agent/context/context_injection_manager.py +6 -6
droidrun/agent/context/episodic_memory.py +5 -3
droidrun/agent/context/personas/__init__.py +3 -3
droidrun/agent/context/personas/app_starter.py +3 -3
droidrun/agent/context/personas/big_agent.py +3 -3
droidrun/agent/context/personas/default.py +3 -3
droidrun/agent/context/personas/ui_expert.py +5 -5
droidrun/agent/context/task_manager.py +15 -17
droidrun/agent/droid/__init__.py +1 -1
droidrun/agent/droid/droid_agent.py +327 -182
droidrun/agent/droid/events.py +91 -9
droidrun/agent/executor/__init__.py +13 -0
droidrun/agent/executor/events.py +24 -0
droidrun/agent/executor/executor_agent.py +327 -0
droidrun/agent/executor/prompts.py +136 -0
droidrun/agent/manager/__init__.py +18 -0
droidrun/agent/manager/events.py +20 -0
droidrun/agent/manager/manager_agent.py +459 -0
droidrun/agent/manager/prompts.py +223 -0
droidrun/agent/oneflows/app_starter_workflow.py +118 -0
droidrun/agent/oneflows/text_manipulator.py +204 -0
droidrun/agent/planner/__init__.py +3 -3
droidrun/agent/planner/events.py +6 -3
droidrun/agent/planner/planner_agent.py +60 -53
droidrun/agent/planner/prompts.py +2 -2
droidrun/agent/usage.py +15 -13
droidrun/agent/utils/__init__.py +11 -1
droidrun/agent/utils/async_utils.py +2 -1
droidrun/agent/utils/chat_utils.py +48 -60
droidrun/agent/utils/device_state_formatter.py +177 -0
droidrun/agent/utils/executer.py +13 -12
droidrun/agent/utils/inference.py +114 -0
droidrun/agent/utils/llm_picker.py +2 -0
droidrun/agent/utils/message_utils.py +85 -0
droidrun/agent/utils/tools.py +220 -0
droidrun/agent/utils/trajectory.py +8 -7
droidrun/cli/__init__.py +1 -1
droidrun/cli/logs.py +29 -28
droidrun/cli/main.py +279 -143
droidrun/config_manager/__init__.py +25 -0
droidrun/config_manager/config_manager.py +583 -0
droidrun/macro/__init__.py +2 -2
droidrun/macro/__main__.py +1 -1
droidrun/macro/cli.py +36 -34
droidrun/macro/replay.py +7 -9
droidrun/portal.py +1 -1
droidrun/telemetry/__init__.py +2 -2
droidrun/telemetry/events.py +3 -4
droidrun/telemetry/phoenix.py +173 -0
droidrun/telemetry/tracker.py +7 -5
droidrun/tools/__init__.py +1 -1
droidrun/tools/adb.py +210 -82
droidrun/tools/ios.py +7 -5
droidrun/tools/tools.py +25 -8
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
droidrun/agent/common/default.py +0 -5
droidrun/agent/context/reflection.py +0 -20
droidrun/agent/oneflows/reflector.py +0 -265
droidrun-0.3.8.dist-info/RECORD +0 -55
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/utils/message_utils.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""
+Message conversion utilities for Manager Agent.
+Converts between dict message format and llama-index ChatMessage format.
+"""
+from io import BytesIO
+from pathlib import Path
+from typing import Union
+from llama_index.core.llms import ChatMessage, ImageBlock, TextBlock
+from PIL import Image
+def image_to_image_bytes(image_source: Union[str, Path, Image.Image, bytes]) -> bytes:
+    """
+    Convert image to bytes for ImageBlock.
+    Args:
+        image_source: Can be:
+            - str/Path: path to image file
+            - PIL.Image.Image: PIL Image object
+            - bytes: bytes of image
+    Returns:
+        Image bytes in PNG format
+    """
+    if isinstance(image_source, (str, Path)):
+        image = Image.open(image_source)
+    elif isinstance(image_source, Image.Image):
+        image = image_source
+    elif isinstance(image_source, bytes):
+        return image_source
+    else:
+        raise ValueError(f"Unsupported image source type: {type(image_source)}")
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    return buffer.getvalue()
+def convert_messages_to_chatmessages(messages: list[dict]) -> list[ChatMessage]:
+    """
+    Convert dict messages to llama-index ChatMessage format.
+    Dict format (input):
+        {
+            "role": "user" | "assistant" | "system",
+            "content": [
+                {"text": "some text"},
+                {"image": "/path/to/image.png"}  # or PIL Image
+            ]
+        }
+    ChatMessage format (output):
+        ChatMessage(
+            role="user",
+            blocks=[
+                TextBlock(text="some text"),
+                ImageBlock(image=b"...bytes...")
+            ]
+        )
+    Args:
+        messages: List of message dicts
+    Returns:
+        List of ChatMessage objects
+    """
+    chat_messages = []
+    for message in messages:
+        blocks = []
+        for item in message['content']:
+            if 'text' in item:
+                blocks.append(TextBlock(text=item['text']))
+            elif 'image' in item:
+                # Convert image to bytes
+                image_bytes = image_to_image_bytes(item['image'])
+                blocks.append(ImageBlock(image=image_bytes))
+        chat_messages.append(ChatMessage(role=message['role'], blocks=blocks))
+    return chat_messages

droidrun/agent/utils/tools.py ADDED Viewed

@@ -0,0 +1,220 @@
+from typing import TYPE_CHECKING, List
+if TYPE_CHECKING:
+    from droidrun.tools import Tools
+from droidrun.agent.oneflows.app_starter_workflow import AppStarter
+def click(tool_instance: "Tools", index: int) -> str:
+    """
+    Click the element with the given index.
+    Args:
+        tool_instance: The Tools instance
+        index: The index of the element to click
+    Returns:
+        Result message from the tap operation
+    """
+    return tool_instance.tap_by_index(index)
+def long_press(tool_instance: "Tools", index: int) -> bool:
+    """
+    Long press the element with the given index.
+    Args:
+        tool_instance: The Tools instance
+        index: The index of the element to long press
+    Returns:
+        True if successful, False otherwise
+    """
+    x, y = tool_instance._extract_element_coordinates_by_index(index)
+    return tool_instance.swipe(x, y, x, y, 1000)
+def type(tool_instance: "Tools", text: str, index: int) -> str:
+    """
+    Type the given text into the element with the given index.
+    Args:
+        tool_instance: The Tools instance
+        text: The text to type
+        index: The index of the element to type into
+    Returns:
+        Result message from the input operation
+    """
+    return tool_instance.input_text(text, index)
+def system_button(tool_instance: "Tools", button: str) -> str:
+    """
+    Press a system button (back, home, or enter).
+    Args:
+        tool_instance: The Tools instance
+        button: The button name (case insensitive): "back", "home", or "enter"
+    Returns:
+        Result message from the key press operation
+    """
+    # Map button names to keycodes (case insensitive)
+    button_map = {
+        "back": 4,
+        "home": 3,
+        "enter": 66,
+    }
+    button_lower = button.lower()
+    if button_lower not in button_map:
+        return f"Error: Unknown system button '{button}'. Valid options: back, home, enter"
+    keycode = button_map[button_lower]
+    return tool_instance.press_key(keycode)
+def swipe(tool_instance: "Tools", coordinate: List[int], coordinate2: List[int]) -> bool:
+    """
+    Swipe from one coordinate to another.
+    Args:
+        tool_instance: The Tools instance
+        coordinate: Starting coordinate as [x, y]
+        coordinate2: Ending coordinate as [x, y]
+    Returns:
+        True if successful, False otherwise
+    """
+    if not isinstance(coordinate, list) or len(coordinate) != 2:
+        raise ValueError(f"coordinate must be a list of 2 integers, got: {coordinate}")
+    if not isinstance(coordinate2, list) or len(coordinate2) != 2:
+        raise ValueError(f"coordinate2 must be a list of 2 integers, got: {coordinate2}")
+    start_x, start_y = coordinate
+    end_x, end_y = coordinate2
+    return tool_instance.swipe(start_x, start_y, end_x, end_y, duration_ms=300)
+def open_app(tool_instance: "Tools", text: str) -> str:
+    """
+    Open an app by its name.
+    Args:
+        tool_instance: The Tools instance
+        text: The name of the app to open
+    Returns:
+        Result message from opening the app
+    """
+    # Get LLM from tools instance
+    if tool_instance.app_opener_llm is None:
+        raise RuntimeError(
+            "app_opener_llm not configured. "
+            "provide app_opener_llm when initializing Tools."
+        )
+    # Create workflow instance
+    workflow = AppStarter(tools=tool_instance, llm=tool_instance.app_opener_llm, timeout=60, verbose=True)
+    # Run workflow to open an app
+    result = workflow.run(app_description=text)
+    return result
+# =============================================================================
+# ATOMIC ACTION SIGNATURES - Single source of truth for both Executor and CodeAct
+# =============================================================================
+ATOMIC_ACTION_SIGNATURES = {
+    "click": {
+        "arguments": ["index"],
+        "description": "Click the point on the screen with specified index. Usage Example: {\"action\": \"click\", \"index\": element_index}",
+        "function": click,
+    },
+    "long_press": {
+        "arguments": ["index"],
+        "description": "Long press on the position with specified index. Usage Example: {\"action\": \"long_press\", \"index\": element_index}",
+        "function": long_press,
+    },
+    "type": {
+        "arguments": ["text", "index"],
+        "description": "Type text into an input box or text field. Specify the element with index to focus the input field before typing. Usage Example: {\"action\": \"type\", \"text\": \"the text you want to type\", \"index\": element_index}",
+        "function": type,
+    },
+    "system_button": {
+        "arguments": ["button"],
+        "description": "Press a system button, including back, home, and enter. Usage example: {\"action\": \"system_button\", \"button\": \"Home\"}",
+        "function": system_button,
+    },
+    "swipe": {
+        "arguments": ["coordinate", "coordinate2"],
+        "description": "Scroll from the position with coordinate to the position with coordinate2. Please make sure the start and end points of your swipe are within the swipeable area and away from the keyboard (y1 < 1400). Usage Example: {\"action\": \"swipe\", \"coordinate\": [x1, y1], \"coordinate2\": [x2, y2]}",
+        "function": swipe,
+    },
+    "open_app": {
+        "arguments": ["text"],
+        "description": "Open an app. Usage example: {\"action\": \"open_app\", \"text\": \"the name of app\"}",
+        "function": open_app,
+    },
+    # "copy": {
+    #     "arguments": ["text"],
+    #     "description": "Copy the specified text to the clipboard. Provide the text to copy using the 'text' argument. Example: {\"action\": \"copy\", \"text\": \"the text you want to copy\"}\nAlways use copy action to copy text to clipboard."
+    #     "function": copy,
+    # },
+    # "paste": {
+    #     "arguments": ["index", "clear"],
+    #     "description": "Paste clipboard text into a text box. 'index' specifies which text box to focus on and paste into. Set 'clear' to true to clear existing text before pasting. Example: {\"action\": \"paste\", \"index\": 0, \"clear\": true}\nAlways use paste action to paste text from clipboard."
+    #     "function": paste,
+    # },
+}
+def get_atomic_tool_descriptions() -> str:
+    """
+    Get formatted tool descriptions for CodeAct system prompt.
+    Parses ATOMIC_ACTION_SIGNATURES to create formatted descriptions.
+    Returns:
+        Formatted string of tool descriptions for LLM prompt
+    """
+    descriptions = []
+    for action_name, signature in ATOMIC_ACTION_SIGNATURES.items():
+        args = ", ".join(signature["arguments"])
+        desc = signature["description"]
+        descriptions.append(f"- {action_name}({args}): {desc}")
+    return "\n".join(descriptions)
+def build_custom_tool_descriptions(custom_tools: dict) -> str:
+    """
+    Build formatted tool descriptions from custom_tools dict.
+    Args:
+        custom_tools: Dictionary of custom tools in ATOMIC_ACTION_SIGNATURES format
+            {
+                "tool_name": {
+                    "arguments": ["arg1", "arg2"],
+                    "description": "Tool description with usage",
+                    "function": callable
+                }
+            }
+    Returns:
+        Formatted string of custom tool descriptions for LLM prompt
+    """
+    if not custom_tools:
+        return ""
+    descriptions = []
+    for action_name, signature in custom_tools.items():
+        args = ", ".join(signature.get("arguments", []))
+        desc = signature.get("description", f"Custom action: {action_name}")
+        descriptions.append(f"- {action_name}({args}): {desc}")
+    return "\n".join(descriptions)

droidrun/agent/utils/trajectory.py CHANGED Viewed

@@ -5,15 +5,16 @@ This module provides helper functions for working with agent trajectories,
 including saving, loading, and analyzing them.
 """
+import io
 import json
 import logging
 import os
 import time
 import uuid
-from typing import Dict, List, Any
-from PIL import Image
-import io
+from typing import Any, Dict, List
 from llama_index.core.workflow import Event
+from PIL import Image
 logger = logging.getLogger("droidrun")
@@ -66,7 +67,7 @@ class Trajectory:
         Args:
             goal: The goal/prompt that this trajectory is trying to achieve
         """
-        self.events: List[Event] = []
+        self.events: List[Event] = []
         self.screenshots: List[bytes] = []
         self.ui_states: List[Dict[str, Any]] = []
         self.macro: List[Event] = []
@@ -183,7 +184,7 @@ class Trajectory:
                     f"Serialized event contains tokens: {event_dict['tokens']}"
                 )
             else:
-                logger.debug(f"Serialized event does NOT contain tokens")
+                logger.debug("Serialized event does NOT contain tokens")
             serializable_events.append(event_dict)
@@ -223,7 +224,7 @@ class Trajectory:
             logger.info(
                 f"💾 Saved macro sequence with {len(macro_data)} actions to {macro_json_path}"
             )
-        screenshots_folder = os.path.join(trajectory_folder, "screenshots");
+        screenshots_folder = os.path.join(trajectory_folder, "screenshots")
         os.makedirs(screenshots_folder, exist_ok=True)
         gif_path = self.create_screenshot_gif(
@@ -418,7 +419,7 @@ class Trajectory:
                 print(f"  - {action_type}: {count}")
         if folder_data["trajectory_data"]:
-            print(f"\n--- Trajectory Summary ---")
+            print("\n--- Trajectory Summary ---")
             print(f"Total events: {len(folder_data['trajectory_data'])}")
         print("=================================")

droidrun/cli/__init__.py CHANGED Viewed

@@ -6,4 +6,4 @@ This module provides command-line interfaces for interacting with Android device
 from droidrun.cli.main import cli
-__all__ = ["cli"]
+__all__ = ["cli"]

droidrun/cli/logs.py CHANGED Viewed

@@ -1,30 +1,30 @@
 import logging
+from typing import List
+from rich.console import Console
 from rich.layout import Layout
+from rich.live import Live
 from rich.panel import Panel
 from rich.spinner import Spinner
-from rich.console import Console
-from rich.live import Live
-from typing import List
-from droidrun.agent.common.events import ScreenshotEvent, RecordUIStateEvent
-from droidrun.agent.planner.events import (
-    PlanInputEvent,
-    PlanThinkingEvent,
-    PlanCreatedEvent,
-)
 from droidrun.agent.codeact.events import (
-    TaskInputEvent,
-    TaskThinkingEvent,
+    TaskEndEvent,
     TaskExecutionEvent,
     TaskExecutionResultEvent,
-    TaskEndEvent,
+    TaskInputEvent,
+    TaskThinkingEvent,
 )
+from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
 from droidrun.agent.droid.events import (
     CodeActExecuteEvent,
     CodeActResultEvent,
-    ReasoningLogicEvent,
-    TaskRunnerEvent,
     FinalizeEvent,
+    TaskRunnerEvent,
+)
+from droidrun.agent.planner.events import (
+    PlanCreatedEvent,
+    PlanInputEvent,
+    PlanThinkingEvent,
 )
@@ -90,13 +90,14 @@ class LogHandler(logging.Handler):
         success: bool = False,
     ):
         """Update the layout with current logs and step information"""
-        from rich.text import Text
         import shutil
+        from rich.text import Text
         # Cache terminal size to avoid frequent recalculation
         try:
             terminal_height = shutil.get_terminal_size().lines
-        except:
+        except:  # noqa: E722
             terminal_height = 24  # fallback
         # Reserve space for panels and borders (more conservative estimate)
@@ -169,7 +170,7 @@ class LogHandler(logging.Handler):
             )
         )
-    def handle_event(self, event):
+    def handle_event(self, event): # TODO: fix event handling for the refactor
         """Handle streaming events from the agent workflow."""
         logger = logging.getLogger("droidrun")
@@ -178,7 +179,7 @@ class LogHandler(logging.Handler):
             logger.debug("📸 Taking screenshot...")
         elif isinstance(event, RecordUIStateEvent):
-            logger.debug(f"✏️ Recording UI state")
+            logger.debug("✏️ Recording UI state")
         # Planner events
         elif isinstance(event, PlanInputEvent):
@@ -194,7 +195,7 @@ class LogHandler(logging.Handler):
                 )
                 logger.info(f"🧠 Planning: {thoughts_preview}")
             if event.code:
-                logger.info(f"📝 Generated plan code")
+                logger.info("📝 Generated plan code")
         elif isinstance(event, PlanCreatedEvent):
             if event.tasks:
@@ -219,12 +220,12 @@ class LogHandler(logging.Handler):
                 )
                 logger.info(f"🧠 Thinking: {thoughts_preview}")
             if hasattr(event, "code") and event.code:
-                logger.info(f"💻 Executing action code")
+                logger.info("💻 Executing action code")
                 logger.debug(f"{event.code}")
         elif isinstance(event, TaskExecutionEvent):
             self.current_step = "Executing action..."
-            logger.info(f"⚡ Executing action...")
+            logger.info("⚡ Executing action...")
         elif isinstance(event, TaskExecutionResultEvent):
             if hasattr(event, "output") and event.output:
@@ -246,13 +247,13 @@ class LogHandler(logging.Handler):
                     self.current_step = event.reason
                     logger.info(f"✅ Task completed: {event.reason}")
                 else:
-                    self.current_step = f"Task failed"
+                    self.current_step = "Task failed"
                     logger.info(f"❌ Task failed: {event.reason}")
         # Droid coordination events
         elif isinstance(event, CodeActExecuteEvent):
             self.current_step = "Executing task..."
-            logger.info(f"🔧 Starting task execution...")
+            logger.info("🔧 Starting task execution...")
         elif isinstance(event, CodeActResultEvent):
             if hasattr(event, "success") and hasattr(event, "reason"):
@@ -260,16 +261,16 @@ class LogHandler(logging.Handler):
                     self.current_step = event.reason
                     logger.info(f"✅ Task completed: {event.reason}")
                 else:
-                    self.current_step = f"Task failed"
+                    self.current_step = "Task failed"
                     logger.info(f"❌ Task failed: {event.reason}")
-        elif isinstance(event, ReasoningLogicEvent):
-            self.current_step = "Planning..."
-            logger.info(f"🤔 Planning next steps...")
+        # elif isinstance(event, ReasoningLogicEvent): TODO: fix event handling
+        #     self.current_step = "Planning..."
+        #     logger.info("🤔 Planning next steps...")
         elif isinstance(event, TaskRunnerEvent):
             self.current_step = "Processing tasks..."
-            logger.info(f"🏃 Processing task queue...")
+            logger.info("🏃 Processing task queue...")
         elif isinstance(event, FinalizeEvent):
             if hasattr(event, "success") and hasattr(event, "reason"):

droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl

droidrun 0.3.8py3-none-any.whl → 0.3.10.dev2py3-none-any.whl