PyPI - droidrun - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

droidrun 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

droidrun/__init__.py +22 -10
droidrun/__main__.py +1 -2
droidrun/adb/__init__.py +3 -3
droidrun/adb/device.py +2 -2
droidrun/adb/manager.py +2 -2
droidrun/agent/__init__.py +5 -15
droidrun/agent/codeact/__init__.py +11 -0
droidrun/agent/codeact/codeact_agent.py +420 -0
droidrun/agent/codeact/events.py +28 -0
droidrun/agent/codeact/prompts.py +26 -0
droidrun/agent/common/default.py +5 -0
droidrun/agent/common/events.py +4 -0
droidrun/agent/context/__init__.py +23 -0
droidrun/agent/context/agent_persona.py +15 -0
droidrun/agent/context/context_injection_manager.py +66 -0
droidrun/agent/context/episodic_memory.py +15 -0
droidrun/agent/context/personas/__init__.py +11 -0
droidrun/agent/context/personas/app_starter.py +44 -0
droidrun/agent/context/personas/default.py +95 -0
droidrun/agent/context/personas/extractor.py +52 -0
droidrun/agent/context/personas/ui_expert.py +107 -0
droidrun/agent/context/reflection.py +20 -0
droidrun/agent/context/task_manager.py +124 -0
droidrun/agent/context/todo.txt +4 -0
droidrun/agent/droid/__init__.py +13 -0
droidrun/agent/droid/droid_agent.py +357 -0
droidrun/agent/droid/events.py +28 -0
droidrun/agent/oneflows/reflector.py +265 -0
droidrun/agent/planner/__init__.py +13 -0
droidrun/agent/planner/events.py +16 -0
droidrun/agent/planner/planner_agent.py +268 -0
droidrun/agent/planner/prompts.py +124 -0
droidrun/agent/utils/__init__.py +3 -0
droidrun/agent/utils/async_utils.py +17 -0
droidrun/agent/utils/chat_utils.py +312 -0
droidrun/agent/utils/executer.py +132 -0
droidrun/agent/utils/llm_picker.py +147 -0
droidrun/agent/utils/trajectory.py +184 -0
droidrun/cli/__init__.py +1 -1
droidrun/cli/logs.py +283 -0
droidrun/cli/main.py +358 -149
droidrun/run.py +105 -0
droidrun/tools/__init__.py +4 -30
droidrun/tools/adb.py +879 -0
droidrun/tools/ios.py +594 -0
droidrun/tools/tools.py +99 -0
droidrun-0.3.0.dist-info/METADATA +149 -0
droidrun-0.3.0.dist-info/RECORD +52 -0
droidrun/agent/llm_reasoning.py +0 -567
droidrun/agent/react_agent.py +0 -556
droidrun/llm/__init__.py +0 -24
droidrun/tools/actions.py +0 -854
droidrun/tools/device.py +0 -29
droidrun-0.1.0.dist-info/METADATA +0 -276
droidrun-0.1.0.dist-info/RECORD +0 -20
{droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
{droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
{droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/context/context_injection_manager.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""
+Context Injection Manager - Manages specialized agent personas with dynamic tool and context injection.
+This module provides the ContextInjectionManager class that manages different agent personas,
+each with specific system prompts, contexts, and tool subsets tailored for specialized tasks.
+"""
+import logging
+from typing import Optional, List
+from droidrun.agent.context.agent_persona import AgentPersona
+#import chromadb
+import json
+from pathlib import Path
+logger = logging.getLogger("droidrun")
+class ContextInjectionManager:
+    """
+    Manages different agent personas with specialized contexts and tool subsets.
+    This class is responsible for:
+    - Defining agent personas with specific capabilities
+    - Injecting appropriate system prompts based on agent type
+    - Filtering tool lists to match agent specialization
+    - Providing context-aware configurations for CodeActAgent instances
+    """
+    def __init__(
+            self,
+            personas: List[AgentPersona]
+        ):
+        """Initialize the Context Injection Manager with predefined personas."""
+        self.personas = {}
+        for persona in personas:
+            self.personas[persona.name] = persona
+    def _load_persona(self, data: str) -> AgentPersona:
+        persona = json.loads(data)
+        logger.info(f"🎭 Loaded persona: {persona['name']}")
+        return AgentPersona(
+            name=persona['name'],
+            system_prompt=persona['system_prompt'],
+            allowed_tools=persona['allowed_tools'],
+            description=persona['description'],
+            expertise_areas=persona['expertise_areas'],
+            user_prompt=persona['user_prompt'],
+            required_context=persona['required_context'],
+        )
+    def get_persona(self, agent_type: str) -> Optional[AgentPersona]:
+        """
+        Get a specific agent persona by type.
+        Args:
+            agent_type: The type of agent ("UIExpert" or "AppStarterExpert")
+        Returns:
+            AgentPersona instance or None if not found
+        """
+        return self.personas.get(agent_type)
+    def get_all_personas(self) -> List[str]:
+        return self.personas

droidrun/agent/context/episodic_memory.py ADDED Viewed

@@ -0,0 +1,15 @@
+from dataclasses import dataclass, field
+from droidrun.agent.context.agent_persona import AgentPersona
+from typing import List, Optional
+@dataclass
+class EpisodicMemoryStep:
+    chat_history: str
+    response: str
+    timestamp: float
+    screenshot: Optional[bytes]
+@dataclass
+class EpisodicMemory:
+    persona: AgentPersona
+    steps: List[EpisodicMemoryStep] = field(default_factory=list)

droidrun/agent/context/personas/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .default import DEFAULT
+from .ui_expert import UI_EXPERT
+from .app_starter import APP_STARTER_EXPERT
+from .extractor import EXTRACTOR
+__all__ = [
+    'DEFAULT',
+    'UI_EXPERT',
+    'APP_STARTER_EXPERT',
+    'EXTRACTOR'
+    ]

droidrun/agent/context/personas/app_starter.py ADDED Viewed

@@ -0,0 +1,44 @@
+from droidrun.agent.context.agent_persona import AgentPersona
+from droidrun.tools import Tools
+APP_STARTER_EXPERT = AgentPersona(
+    name="AppStarterExpert",
+    description="Specialized in app launching",
+    expertise_areas=[
+        "app launching"
+    ],
+    allowed_tools=[
+        Tools.start_app.__name__,
+        Tools.complete.__name__
+    ],
+    required_context=[
+        "packages"
+    ],
+    user_prompt="""
+    **Current Request:**
+    {goal}
+    **Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed.""""",
+    system_prompt= """You are an App Starter Expert specialized in Android application lifecycle management. Your core expertise includes:
+    **Primary Capabilities:**
+    - Launch Android applications by package name
+    - Use proper package name format (com.example.app)
+    ## Response Format:
+    Example of proper code format:
+    To launch the Calculator app, I need to use the start_app function with the correct package name.
+    ```python
+    # Launch the Calculator app
+    start_app("com.android.calculator2")
+    complete(success=True)
+    ```
+    In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
+    {tool_descriptions}
+    Reminder: Always place your Python code between ```...``` tags when you want to run code.
+    You focus ONLY on app launching and package management - UI interactions within apps are handled by UI specialists.""",
+)

droidrun/agent/context/personas/default.py ADDED Viewed

@@ -0,0 +1,95 @@
+from droidrun.agent.context.agent_persona import AgentPersona
+from droidrun.tools import Tools
+DEFAULT = AgentPersona(
+    name="Default",
+    description="Default Agent. Use this as your Default",
+    expertise_areas=[
+        "UI navigation", "button interactions", "text input",
+        "menu navigation", "form filling", "scrolling", "app launching"
+    ],
+    allowed_tools=[
+        Tools.swipe.__name__,
+        Tools.input_text.__name__,
+        Tools.press_key.__name__,
+        Tools.tap_by_index.__name__,
+        Tools.start_app.__name__,
+        Tools.list_packages.__name__,
+        Tools.remember.__name__,
+        Tools.complete.__name__
+    ],
+    required_context=[
+        "ui_state",
+        "screenshot",
+        "phone_state"
+    ],
+    user_prompt="""
+    **Current Request:**
+    {goal}
+    **Is the precondition met? What is your reasoning and the next step to address this request?**
+    Explain your thought process then provide code in ```python ... ``` tags if needed.
+    """"",
+    system_prompt="""
+    You are a helpful AI assistant that can write and execute Python code to solve problems.
+    You will be given a task to perform. You should output:
+    - Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
+    - If there is a precondition for the task, you MUST check if it is met.
+    - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
+    - If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
+    ## Context:
+    The following context is given to you for analysis:
+    - **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
+    - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
+    - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
+    - **chat history**: You are also given the history of your actions (if any) from your previous steps.
+    NOTE: you don't have access to these inputs in your tool calling context
+    ## Response Format:
+    Example of proper code format:
+    **Task Assignment:**
+    **Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
+    **(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
+    **(Step 1) Agent Action:**
+    ```python
+    # First step: Navigate to Wi-Fi settings
+    tap_by_index(3)
+    ```
+    **(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
+    **(Step 2) Agent Action:**
+    ```python
+    # Second step: Turn on Wi-Fi to see available networks
+    tap_by_index(1)
+    ```
+    **(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
+    **(Step 3) Agent Action:**
+    ```python
+    # Final step: Connect to the target network
+    tap_by_index(5)
+    complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
+    ```
+    ```
+    ## Tools:
+    In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
+    {tool_descriptions}
+    ## Final Answer Guidelines:
+    - When providing a final answer, focus on directly answering the user's question in the response format given
+    - Present the results clearly and concisely as if you computed them directly
+    - Structure your response like you're directly answering the user's query, not explaining how you solved it
+    Reminder: Always place your Python code between ```...``` tags when you want to run code.
+"""
+)

droidrun/agent/context/personas/extractor.py ADDED Viewed

@@ -0,0 +1,52 @@
+from droidrun.agent.context.agent_persona import AgentPersona
+from droidrun.tools import Tools
+EXTRACTOR = AgentPersona(
+    name="DataExtractor",
+    description="Specialized in extracting data from UI elements and screenshots",
+    expertise_areas=[
+        "data extraction",
+        "UI analysis",
+        "text recognition"
+    ],
+    allowed_tools=[
+        Tools.extract.__name__,
+        Tools.complete.__name__
+    ],
+    required_context=[
+        "ui_state",
+        "screenshot"
+    ],
+    user_prompt="""
+    **Current Request:**
+    {goal}
+    **What data needs to be extracted?
+    Analyze the current UI state and screenshot, then extract the requested information.
+    ** Explain your thought process then provide code in ```python ... ``` tags if needed.""",
+    system_prompt= """
+    You are a Data Extractor Expert specialized in analyzing Android UI states and screenshots to extract specific information. Your core expertise includes:
+    **Primary Capabilities:**
+    - Analyze UI elements from ui_state data
+    - Extract text, values, and structured data from screenshots
+    - Identify and parse specific UI components (buttons, text fields, lists, etc.)
+    - Extract data based on user requirements
+    ## Response Format:
+    Example of proper code format:
+    To extract the current battery percentage from the status bar:
+    ```python
+    # Extract battery percentage from UI state
+    battery_data = extract("battery percentage")
+    complete(success=True)
+    ```
+    In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
+    {tool_descriptions}
+    Reminder: Always place your Python code between ```...``` tags when you want to run code.
+    You focus ONLY on data extraction from the current UI state and screenshot - navigation and UI interactions are handled by other specialists.""",
+)

droidrun/agent/context/personas/ui_expert.py ADDED Viewed

@@ -0,0 +1,107 @@
+from droidrun.agent.context.agent_persona import AgentPersona
+from droidrun.tools import Tools
+UI_EXPERT = AgentPersona(
+    name="UIExpert",
+    description="Specialized in UI interactions, navigation, and form filling",
+    expertise_areas=[
+        "UI navigation", "button interactions", "text input",
+        "menu navigation", "form filling", "scrolling"
+    ],
+    allowed_tools=[
+        Tools.swipe.__name__,
+        Tools.input_text.__name__,
+        Tools.press_key.__name__,
+        Tools.tap_by_index.__name__,
+        Tools.remember.__name__,
+        Tools.complete.__name__
+    ],
+    required_context=[
+        "ui_state",
+        "screenshot",
+        "phone_state",
+        "memory"
+    ],
+    user_prompt="""
+    **Current Request:**
+    {goal}
+    **Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed.""""",
+    system_prompt="""You are a UI Expert specialized in Android interface interactions. Your core expertise includes:
+    **Primary Capabilities:**
+    - Navigate through Android UI elements with precision
+    - Interact with buttons, menus, forms, and interactive elements
+    - Enter text into input fields and search bars
+    - Scroll through content and lists
+    - Handle complex UI navigation workflows
+    - Recognize and interact with various UI patterns (tabs, drawers, dialogs, etc.)
+    **Your Approach:**
+    - Focus on understanding the current UI state through screenshots and element data
+    - Use precise element identification for reliable interactions
+    - Handle dynamic UI changes and loading states gracefully
+    - Provide clear feedback on UI interactions and their outcomes
+    - Adapt to different app interfaces and UI patterns
+    **Key Principles:**
+    - Always analyze the current screen state before taking action
+    - Prefer using element indices for reliable targeting
+    - Provide descriptive feedback about what you're interacting with
+    - Handle edge cases like loading screens, popups, and navigation changes
+    - Remember important UI state information for context
+    You do NOT handle app launching or package management - that's handled by other specialists.
+    ## Available Context:
+    In your execution environment, you have access to:
+    - `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
+    ## Response Format:
+    Example of proper code format:
+    To calculate the area of a circle, I need to use the formula: area = pi * radius^2. I will write a function to do this.
+    ```python
+    import math
+    def calculate_area(radius):
+        return math.pi * radius**2
+    # Calculate the area for radius = 5
+    area = calculate_area(5)
+    print(f"The area of the circle is {{area:.2f}} square units")
+    ```
+    Another example (with for loop):
+    To calculate the sum of numbers from 1 to 10, I will use a for loop.
+    ```python
+    sum = 0
+    for i in range(1, 11):
+        sum += i
+    print(f"The sum of numbers from 1 to 10 is {{sum}}")
+    ```
+    In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
+    {tool_descriptions}
+    You'll receive a screenshot showing the current screen and its UI elements to help you complete the task. However, screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
+    **Important Notes:**
+    - If there is a precondition for the task, you MUST check if it is met.
+    - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
+    ## Final Answer Guidelines:
+    - When providing a final answer, focus on directly answering the user's question
+    - Avoid referencing the code you generated unless specifically asked
+    - Present the results clearly and concisely as if you computed them directly
+    - If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
+    - Structure your response like you're directly answering the user's query, not explaining how you solved it
+    Reminder: Always place your Python code between ```...``` tags when you want to run code.
+    You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
+    """
+)

droidrun/agent/context/reflection.py ADDED Viewed

@@ -0,0 +1,20 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class Reflection:
+    """Represents the result of a reflection analysis on episodic memory."""
+    goal_achieved: bool
+    summary: str
+    advice: Optional[str] = None
+    raw_response: Optional[str] = None
+    @classmethod
+    def from_dict(cls, data: dict) -> 'Reflection':
+        """Create a Reflection from a dictionary (e.g., parsed JSON)."""
+        return cls(
+            goal_achieved=data.get('goal_achieved', False),
+            summary=data.get('summary', ''),
+            advice=data.get('advice'),
+            raw_response=data.get('raw_response')
+        )

droidrun/agent/context/task_manager.py ADDED Viewed

@@ -0,0 +1,124 @@
+import os
+from typing import List, Dict
+from dataclasses import dataclass
+import copy
+@dataclass
+class Task:
+    """
+    Represents a single task with its properties.
+    """
+    description: str
+    status: str
+    agent_type: str
+class TaskManager:
+    """
+    Manages a list of tasks for an agent, each with a status and assigned specialized agent.
+    """
+    STATUS_PENDING = "pending"
+    STATUS_COMPLETED = "completed"
+    STATUS_FAILED = "failed"
+    VALID_STATUSES = {
+        STATUS_PENDING,
+        STATUS_COMPLETED,
+        STATUS_FAILED
+    }
+    def __init__(self):
+        """Initializes an empty task list."""
+        self.tasks: List[Task] = []
+        self.goal_completed = False
+        self.message = None
+        self.task_history = []
+        self.file_path = os.path.join(os.path.dirname(__file__), "todo.txt")
+    def get_all_tasks(self) -> List[Task]:
+        return self.tasks
+    def get_task_history(self):
+        return self.task_history
+    def complete_task(self, task: Task):
+        task = copy.deepcopy(task)
+        task.status = self.STATUS_COMPLETED
+        self.task_history.append(task)
+    def fail_task(self, task: Task):
+        task = copy.deepcopy(task)
+        task.status = self.STATUS_FAILED
+        self.task_history.append(task)
+    def get_completed_tasks(self) -> list[dict]:
+        return [task for task in self.task_history if task.status == self.STATUS_COMPLETED]
+    def get_failed_tasks(self) -> list[dict]:
+        return [task for task in self.task_history if task.status == self.STATUS_FAILED]
+    def save_to_file(self):
+        """Saves the current task list to a Markdown file."""
+        try:
+            with open(self.file_path, 'w', encoding='utf-8') as f:
+                for i, task in enumerate(self.tasks, 1):
+                    f.write(f"Task {i}: {task.description}\n")
+                    f.write(f"Status: {task.status}\n")
+                    f.write(f"Agent: {task.agent_type}\n")
+                    f.write("-" * 40 + "\n")
+        except Exception as e:
+            print(f"Error saving tasks to file: {e}")
+    def set_tasks_with_agents(self, task_assignments: List[Dict[str, str]]):
+        """
+        Clears the current task list and sets new tasks with their assigned agents.
+        Args:
+            task_assignments: A list of dictionaries, each containing:
+                            - 'task': The task description string
+                            - 'agent': The agent type
+        Example:
+            task_manager.set_tasks_with_agents([
+                {'task': 'Open Gmail app', 'agent': 'AppStarterExpert'},
+                {'task': 'Navigate to compose email', 'agent': 'UIExpert'}
+            ])
+        """
+        try:
+            self.tasks = []
+            for i, assignment in enumerate(task_assignments):
+                if not isinstance(assignment, dict) or 'task' not in assignment:
+                    raise ValueError(f"Each task assignment must be a dictionary with 'task' key at index {i}.")
+                task_description = assignment['task']
+                if not isinstance(task_description, str) or not task_description.strip():
+                    raise ValueError(f"Task description must be a non-empty string at index {i}.")
+                agent_type = assignment.get('agent', 'Default')
+                task_obj = Task(
+                    description=task_description.strip(),
+                    status=self.STATUS_PENDING,
+                    agent_type=agent_type
+                )
+                self.tasks.append(task_obj)
+            print(f"Tasks set with agents: {len(self.tasks)} tasks added.")
+            self.save_to_file()
+        except Exception as e:
+            print(f"Error setting tasks with agents: {e}")
+    def complete_goal(self, message: str):
+        """
+        Marks the goal as completed, use this whether the task completion was successful or on failure.
+        This method should be called when the task is finished, regardless of the outcome.
+        Args:
+            message: The message to be logged.
+        """
+        self.goal_completed = True
+        self.message = message
+        print(f"Goal completed: {message}")

droidrun/agent/context/todo.txt ADDED Viewed

@@ -0,0 +1,4 @@
+Task 1: Precondition: None. Goal: Open the notification shade to find the settings button.
+Status: pending
+Agent: Default
+----------------------------------------

droidrun/agent/droid/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Droidrun Agent Module.
+This module provides a ReAct agent for automating Android devices using reasoning and acting.
+"""
+from droidrun.agent.codeact.codeact_agent import CodeActAgent
+from droidrun.agent.droid.droid_agent import DroidAgent
+__all__ = [
+    "CodeActAgent",
+    "DroidAgent"
+]

droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

droidrun 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl