PyPI - droidrun - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

droidrun 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

droidrun/__init__.py +22 -10
droidrun/__main__.py +1 -2
droidrun/adb/__init__.py +3 -3
droidrun/adb/device.py +2 -2
droidrun/adb/manager.py +2 -2
droidrun/agent/__init__.py +5 -15
droidrun/agent/codeact/__init__.py +11 -0
droidrun/agent/codeact/codeact_agent.py +420 -0
droidrun/agent/codeact/events.py +28 -0
droidrun/agent/codeact/prompts.py +26 -0
droidrun/agent/common/default.py +5 -0
droidrun/agent/common/events.py +4 -0
droidrun/agent/context/__init__.py +23 -0
droidrun/agent/context/agent_persona.py +15 -0
droidrun/agent/context/context_injection_manager.py +66 -0
droidrun/agent/context/episodic_memory.py +15 -0
droidrun/agent/context/personas/__init__.py +11 -0
droidrun/agent/context/personas/app_starter.py +44 -0
droidrun/agent/context/personas/default.py +95 -0
droidrun/agent/context/personas/extractor.py +52 -0
droidrun/agent/context/personas/ui_expert.py +107 -0
droidrun/agent/context/reflection.py +20 -0
droidrun/agent/context/task_manager.py +124 -0
droidrun/agent/context/todo.txt +4 -0
droidrun/agent/droid/__init__.py +13 -0
droidrun/agent/droid/droid_agent.py +357 -0
droidrun/agent/droid/events.py +28 -0
droidrun/agent/oneflows/reflector.py +265 -0
droidrun/agent/planner/__init__.py +13 -0
droidrun/agent/planner/events.py +16 -0
droidrun/agent/planner/planner_agent.py +268 -0
droidrun/agent/planner/prompts.py +124 -0
droidrun/agent/utils/__init__.py +3 -0
droidrun/agent/utils/async_utils.py +17 -0
droidrun/agent/utils/chat_utils.py +312 -0
droidrun/agent/utils/executer.py +132 -0
droidrun/agent/utils/llm_picker.py +147 -0
droidrun/agent/utils/trajectory.py +184 -0
droidrun/cli/__init__.py +1 -1
droidrun/cli/logs.py +283 -0
droidrun/cli/main.py +358 -149
droidrun/run.py +105 -0
droidrun/tools/__init__.py +4 -30
droidrun/tools/adb.py +879 -0
droidrun/tools/ios.py +594 -0
droidrun/tools/tools.py +99 -0
droidrun-0.3.0.dist-info/METADATA +149 -0
droidrun-0.3.0.dist-info/RECORD +52 -0
droidrun/agent/llm_reasoning.py +0 -567
droidrun/agent/react_agent.py +0 -556
droidrun/llm/__init__.py +0 -24
droidrun/tools/actions.py +0 -854
droidrun/tools/device.py +0 -29
droidrun-0.1.0.dist-info/METADATA +0 -276
droidrun-0.1.0.dist-info/RECORD +0 -20
{droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
{droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
{droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/planner/planner_agent.py ADDED Viewed

@@ -0,0 +1,268 @@
+from droidrun.agent.planner.events import *
+from droidrun.agent.planner.prompts import (
+    DEFAULT_PLANNER_SYSTEM_PROMPT,
+    DEFAULT_PLANNER_USER_PROMPT,
+)
+import logging
+import asyncio
+from typing import List, TYPE_CHECKING, Union
+import inspect
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.llms.llm import LLM
+from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
+from llama_index.core.memory import Memory
+from llama_index.core.llms.llm import LLM
+from droidrun.agent.utils.executer import SimpleCodeExecutor
+from droidrun.agent.utils import chat_utils
+from droidrun.agent.context.task_manager import TaskManager
+from droidrun.tools import Tools
+from droidrun.agent.common.events import ScreenshotEvent
+from droidrun.agent.planner.events import (
+    PlanInputEvent,
+    PlanCreatedEvent,
+    PlanThinkingEvent,
+)
+from droidrun.agent.context.agent_persona import AgentPersona
+from droidrun.agent.context.reflection import Reflection
+from dotenv import load_dotenv
+load_dotenv()
+# Setup logger
+logger = logging.getLogger("droidrun")
+if TYPE_CHECKING:
+    from droidrun.tools import Tools
+class PlannerAgent(Workflow):
+    def __init__(
+        self,
+        goal: str,
+        llm: LLM,
+        personas: List[AgentPersona],
+        task_manager: TaskManager,
+        tools_instance: Tools,
+        system_prompt=None,
+        user_prompt=None,
+        debug=False,
+        *args,
+        **kwargs,
+    ) -> None:
+        super().__init__(*args, **kwargs)
+        self.llm = llm
+        self.goal = goal
+        self.task_manager = task_manager
+        self.debug = debug
+        self.chat_memory = None
+        self.remembered_info = None
+        self.reflection: Reflection = None
+        self.current_retry = 0
+        self.steps_counter = 0
+        self.tool_list = {}
+        self.tool_list[self.task_manager.set_tasks_with_agents.__name__] = (
+            self.task_manager.set_tasks_with_agents
+        )
+        self.tool_list[self.task_manager.complete_goal.__name__] = (
+            self.task_manager.complete_goal
+        )
+        self.tools_description = chat_utils.parse_tool_descriptions(self.tool_list)
+        self.tools_instance = tools_instance
+        self.personas = personas
+        self.system_prompt = system_prompt or DEFAULT_PLANNER_SYSTEM_PROMPT.format(
+            tools_description=self.tools_description,
+            agents=chat_utils.parse_persona_description(self.personas),
+        )
+        self.user_prompt = user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=goal)
+        self.system_message = ChatMessage(role="system", content=self.system_prompt)
+        self.user_message = ChatMessage(role="user", content=self.user_prompt)
+        self.executer = SimpleCodeExecutor(
+            loop=asyncio.get_event_loop(), globals={}, locals={}, tools=self.tool_list
+        )
+    @step
+    async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
+        logger.info("💬 Preparing planning session...")
+        self.chat_memory: Memory = await ctx.get(
+            "chat_memory", default=Memory.from_defaults()
+        )
+        await self.chat_memory.aput(self.user_message)
+        if ev.remembered_info:
+            self.remembered_info = ev.remembered_info
+        if ev.reflection:
+            self.reflection = ev.reflection
+        else:
+            self.reflection = None
+        assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
+        await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
+        input_messages = self.chat_memory.get_all()
+        logger.debug(f"  - Memory contains {len(input_messages)} messages")
+        return PlanInputEvent(input=input_messages)
+    @step
+    async def handle_llm_input(
+        self, ev: PlanInputEvent, ctx: Context
+    ) -> PlanThinkingEvent:
+        """Handle LLM input."""
+        chat_history = ev.input
+        assert len(chat_history) > 0, "Chat history cannot be empty."
+        ctx.write_event_to_stream(ev)
+        self.steps_counter += 1
+        logger.info(f"🧠 Thinking about how to plan the goal...")
+        screenshot = (await self.tools_instance.take_screenshot())[1]
+        ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
+        await ctx.set("screenshot", screenshot)
+        await ctx.set("ui_state", await self.tools_instance.get_clickables())
+        await ctx.set("phone_state", await self.tools_instance.get_phone_state())
+        await ctx.set("remembered_info", self.remembered_info)
+        await ctx.set("reflection", self.reflection)
+        response = await self._get_llm_response(ctx, chat_history)
+        await self.chat_memory.aput(response.message)
+        code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
+        event = PlanThinkingEvent(thoughts=thoughts, code=code)
+        ctx.write_event_to_stream(event)
+        return event
+    @step
+    async def handle_llm_output(
+        self, ev: PlanThinkingEvent, ctx: Context
+    ) -> Union[PlanInputEvent, PlanCreatedEvent]:
+        """Handle LLM output."""
+        logger.debug("🤖 Processing planning output...")
+        code = ev.code
+        thoughts = ev.thoughts
+        if code:
+            try:
+                result = await self.executer.execute(ctx, code)
+                logger.info(f"📝 Planning complete")
+                logger.debug(f"  - Planning code executed. Result: {result}")
+                await self.chat_memory.aput(
+                    ChatMessage(
+                        role="user", content=f"Execution Result:\n```\n{result}\n```"
+                    )
+                )
+                self.remembered_info = self.tools_instance.memory
+                tasks = self.task_manager.get_all_tasks()
+                event = PlanCreatedEvent(tasks=tasks)
+                if not self.task_manager.goal_completed:
+                    logger.info(f"📋 Current plan created with {len(tasks)} tasks:")
+                    for i, task in enumerate(tasks):
+                        logger.info(
+                            f"  Task {i}: [{task.status.upper()}] [{task.agent_type}] {task.description}"
+                        )
+                    ctx.write_event_to_stream(event)
+                return event
+            except Exception as e:
+                logger.debug(f"error handling Planner: {e}")
+                await self.chat_memory.aput(
+                    ChatMessage(
+                        role="user",
+                        content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
+                    )
+                )
+                logger.debug("🔄 Waiting for next plan or completion.")
+                return PlanInputEvent(input=self.chat_memory.get_all())
+        else:
+            await self.chat_memory.aput(
+                ChatMessage(
+                    role="user",
+                    content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
+                )
+            )
+            logger.debug("🔄 Waiting for next plan or completion.")
+            return PlanInputEvent(input=self.chat_memory.get_all())
+    @step
+    async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
+        """Finalize the workflow."""
+        await ctx.set("chat_memory", self.chat_memory)
+        result = {}
+        result.update(
+            {
+                "tasks": ev.tasks,
+            }
+        )
+        return StopEvent(result=result)
+    async def _get_llm_response(
+        self, ctx: Context, chat_history: List[ChatMessage]
+    ) -> ChatResponse:
+        """Get streaming response from LLM."""
+        try:
+            logger.debug(f"  - Sending {len(chat_history)} messages to LLM.")
+            model = self.llm.class_name()
+            if model != "DeepSeek":
+                chat_history = await chat_utils.add_screenshot_image_block(
+                    await ctx.get("screenshot"), chat_history
+                )
+            else:
+                logger.warning(
+                    "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
+                )
+            chat_history = await chat_utils.add_task_history_block(
+                self.task_manager.get_completed_tasks(),
+                self.task_manager.get_failed_tasks(),
+                chat_history,
+            )
+            remembered_info = await ctx.get("remembered_info", default=None)
+            if remembered_info:
+                chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
+            reflection = await ctx.get("reflection", None)
+            if reflection:
+                chat_history = await chat_utils.add_reflection_summary(reflection, chat_history)
+            chat_history = await chat_utils.add_phone_state_block(await ctx.get("phone_state"), chat_history)
+            chat_history = await chat_utils.add_ui_text_block(await ctx.get("ui_state"), chat_history)
+            messages_to_send = [self.system_message] + chat_history
+            messages_to_send = [
+                chat_utils.message_copy(msg) for msg in messages_to_send
+            ]
+            logger.debug(f"  - Final message count: {len(messages_to_send)}")
+            response = await self.llm.achat(messages=messages_to_send)
+            assert hasattr(
+                response, "message"
+            ), f"LLM response does not have a message attribute.\nResponse: {response}"
+            logger.debug("  - Received response from LLM.")
+            return response
+        except Exception as e:
+            logger.error(f"Could not get an answer from LLM: {repr(e)}")
+            raise e

droidrun/agent/planner/prompts.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Prompt templates for the PlannerAgent.
+This module contains all the prompts used by the PlannerAgent,
+separated from the workflow logic for better maintainability.
+"""
+# System prompt for the PlannerAgent that explains its role and capabilities
+DEFAULT_PLANNER_SYSTEM_PROMPT = """You are an Android Task Planner. Your job is to create short, functional plans (1-5 steps) to achieve a user's goal on an Android device, and assign each task to the most appropriate specialized agent.
+**Inputs You Receive:**
+1.  **User's Overall Goal.**
+2.  **Current Device State:**
+    *   A **screenshot** of the current screen.
+    *   **JSON data** of visible UI elements.
+    *   The current visible Android activity
+3.  **Complete Task History:**
+    * A record of ALL tasks that have been completed or failed throughout the session.
+    * For completed tasks, the results and any discovered information.
+    * For failed tasks, the detailed reasons for failure.
+    * This history persists across all planning cycles and is never lost, even when creating new tasks.
+**Available Specialized Agents:**
+You have access to specialized agents, each optimized for specific types of tasks:
+{agents}
+**Your Task:**
+Given the goal, current state, and task history, devise the **next 1-5 functional steps** and assign each to the most appropriate specialized agent.
+Focus on what to achieve, not how. Planning fewer steps at a time improves accuracy, as the state can change.
+**Step Format:**
+Each step must be a functional goal.
+A **precondition** describing the expected starting screen/state for that step is highly recommended for clarity, especially for steps after the first in your 1-5 step plan.
+Each task string can start with "Precondition: ... Goal: ...".
+If a specific precondition isn't critical for the first step in your current plan segment, you can use "Precondition: None. Goal: ..." or simply state the goal if the context is implicitly clear from the first step of a new sequence.
+**Your Output:**
+*   Use the `set_tasks_with_agents` tool to provide your 1-5 step plan with agent assignments.
+*   Each task should be assigned to a specialized agent using it's name.
+*   **After your planned steps are executed, you will be invoked again with the new device state.**
+You will then:
+    1.  Assess if the **overall user goal** is complete.
+    2.  If complete, call the `complete_goal(message: str)` tool.
+    3.  If not complete, generate the next 1-5 steps using `set_tasks_with_agents`.
+**Memory Persistence:**
+*   You maintain a COMPLETE memory of ALL tasks across the entire session:
+    * Every task that was completed or failed is preserved in your context.
+    * Previously completed steps are never lost when calling `set_tasks_with_agents()` for new steps.
+    * You will see all historical tasks each time you're called.
+    * Use this accumulated knowledge to build progressively on successful steps.
+    * When you see discovered information (e.g., dates, locations), use it explicitly in future tasks.
+**Key Rules:**
+*   **Functional Goals ONLY:** (e.g., "Navigate to Wi-Fi settings", "Enter 'MyPassword' into the password field").
+*   **NO Low-Level Actions:** Do NOT specify swipes, taps on coordinates, or element IDs in your plan.
+*   **Short Plans (1-5 steps):** Plan only the immediate next actions.
+*   **Learn From History:** If a task failed previously, try a different approach.
+*   **Use Tools:** Your response *must* be a Python code block calling `set_tasks_with_agents` or `complete_goal`.
+*   **Smart Agent Assignment:** Choose the most appropriate agent for each task type.
+**Available Planning Tools:**
+*   `set_tasks_with_agents(task_assignments: List[Dict[str, str]])`: Defines the sequence of tasks with agent assignments. Each element should be a dictionary with 'task' and 'agent' keys.
+*   `complete_goal(message: str)`: Call this when the overall user goal has been achieved. The message can summarize the completion.
+---
+**Example Interaction Flow:**
+**User Goal:** Open Gmail and compose a new email.
+**(Round 1) Planner Input:**
+*   Goal: "Open Gmail and compose a new email"
+*   Current State: Screenshot of Home screen, UI JSON.
+*   Task History: None (first planning cycle)
+**Planner Thought Process (Round 1):**
+Need to first open Gmail app, then navigate to compose. The first task is app launching, the second is UI navigation.
+**Planner Output (Round 1):**
+```python
+set_tasks_with_agents([
+    {{'task': 'Precondition: None. Goal: Open the Gmail app.', 'agent': <Specialized_Agent>}},
+    {{'task': 'Precondition: Gmail app is open and loaded. Goal: Navigate to compose new email.', 'agent': <Specialized Agents>}}
+])
+```
+**(After specialized agents perform these steps...)**
+**(Round 2) Planner Input:**
+*   Goal: "Open Gmail and compose a new email"
+*   Current State: Screenshot of Gmail compose screen, UI JSON showing compose interface.
+*   Task History: Shows completed tasks with their assigned agents
+**Planner Output (Round 2):**
+```python
+complete_goal(message="Gmail has been opened and compose email screen is ready for use.")
+```
+"""
+# User prompt template that simply states the goal
+DEFAULT_PLANNER_USER_PROMPT = """Goal: {goal}"""
+# Prompt template for when a task fails, to help recover and plan new steps
+DEFAULT_PLANNER_TASK_FAILED_PROMPT = """
+PLANNING UPDATE: The execution of a task failed.
+Failed Task Description: "{task_description}"
+Reported Reason: {reason}
+The previous plan has been stopped. I have attached a screenshot representing the device's **current state** immediately after the failure. Please analyze this visual information.
+Original Goal: {goal}
+Instruction: Based **only** on the provided screenshot showing the current state and the reason for the previous failure ('{reason}'), generate a NEW plan starting from this observed state to achieve the original goal: '{goal}'.
+"""
+# Export all prompts
+__all__ = [
+    "DEFAULT_PLANNER_SYSTEM_PROMPT",
+    "DEFAULT_PLANNER_USER_PROMPT",
+    "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
+]

droidrun/agent/utils/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Utility modules for DroidRun agents.
+"""

droidrun/agent/utils/async_utils.py ADDED Viewed

@@ -0,0 +1,17 @@
+import asyncio
+def async_to_sync(func):
+    """
+    Convert an async function to a sync function.
+    Args:
+        func: Async function to convert
+    Returns:
+        Callable: Synchronous version of the async function
+    """
+    def wrapper(*args, **kwargs):
+        return asyncio.run(func(*args, **kwargs))
+    return wrapper

droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

droidrun 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl