PyPI - droidrun - Versions diffs - 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl - Mend

droidrun 0.3.10.dev3py3-none-any.whl → 0.3.10.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

droidrun/agent/codeact/__init__.py +1 -4
droidrun/agent/codeact/codeact_agent.py +95 -86
droidrun/agent/codeact/events.py +1 -2
droidrun/agent/context/__init__.py +5 -9
droidrun/agent/context/episodic_memory.py +1 -3
droidrun/agent/context/task_manager.py +8 -2
droidrun/agent/droid/droid_agent.py +102 -141
droidrun/agent/droid/events.py +45 -14
droidrun/agent/executor/__init__.py +6 -4
droidrun/agent/executor/events.py +29 -9
droidrun/agent/executor/executor_agent.py +86 -28
droidrun/agent/executor/prompts.py +8 -2
droidrun/agent/manager/__init__.py +6 -7
droidrun/agent/manager/events.py +16 -4
droidrun/agent/manager/manager_agent.py +130 -69
droidrun/agent/manager/prompts.py +1 -159
droidrun/agent/utils/chat_utils.py +64 -2
droidrun/agent/utils/device_state_formatter.py +54 -26
droidrun/agent/utils/executer.py +66 -80
droidrun/agent/utils/inference.py +11 -10
droidrun/agent/utils/tools.py +58 -6
droidrun/agent/utils/trajectory.py +18 -12
droidrun/cli/logs.py +118 -56
droidrun/cli/main.py +154 -136
droidrun/config_manager/__init__.py +9 -7
droidrun/config_manager/app_card_loader.py +148 -0
droidrun/config_manager/config_manager.py +200 -102
droidrun/config_manager/path_resolver.py +104 -0
droidrun/config_manager/prompt_loader.py +75 -0
droidrun/macro/__init__.py +1 -1
droidrun/macro/cli.py +23 -18
droidrun/telemetry/__init__.py +2 -2
droidrun/telemetry/events.py +3 -3
droidrun/telemetry/tracker.py +1 -1
droidrun/tools/adb.py +1 -1
droidrun/tools/ios.py +3 -2
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/METADATA +9 -1
droidrun-0.3.10.dev4.dist-info/RECORD +61 -0
droidrun/agent/codeact/prompts.py +0 -26
droidrun/agent/context/agent_persona.py +0 -16
droidrun/agent/context/context_injection_manager.py +0 -66
droidrun/agent/context/personas/__init__.py +0 -11
droidrun/agent/context/personas/app_starter.py +0 -44
droidrun/agent/context/personas/big_agent.py +0 -96
droidrun/agent/context/personas/default.py +0 -95
droidrun/agent/context/personas/ui_expert.py +0 -108
droidrun/agent/planner/__init__.py +0 -13
droidrun/agent/planner/events.py +0 -21
droidrun/agent/planner/planner_agent.py +0 -311
droidrun/agent/planner/prompts.py +0 -124
droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/WHEEL +0 -0
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev4.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/codeact/__init__.py CHANGED Viewed

@@ -1,8 +1,5 @@
 from droidrun.agent.codeact.codeact_agent import CodeActAgent
-from droidrun.agent.codeact.prompts import DEFAULT_CODE_ACT_USER_PROMPT, DEFAULT_NO_THOUGHTS_PROMPT
 __all__ = [
-    "CodeActAgent",
-    "DEFAULT_CODE_ACT_USER_PROMPT",
-    "DEFAULT_NO_THOUGHTS_PROMPT"
+    "CodeActAgent"
 ]

droidrun/agent/codeact/codeact_agent.py CHANGED Viewed

@@ -3,12 +3,11 @@ import json
 import logging
 import re
 import time
-from typing import List, Union
+from typing import List, Union, Optional, TYPE_CHECKING
 from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory import Memory
-from llama_index.core.prompts import PromptTemplate
 from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
 from droidrun.agent.codeact.events import (
@@ -19,20 +18,26 @@ from droidrun.agent.codeact.events import (
     TaskInputEvent,
     TaskThinkingEvent,
 )
-from droidrun.agent.codeact.prompts import (
-    DEFAULT_CODE_ACT_USER_PROMPT,
-    DEFAULT_NO_THOUGHTS_PROMPT,
-)
 from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
 from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
-from droidrun.agent.context.agent_persona import AgentPersona
 from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
 from droidrun.agent.usage import get_usage_from_response
 from droidrun.agent.utils import chat_utils
-from droidrun.agent.utils.executer import SimpleCodeExecutor
-from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, get_atomic_tool_descriptions, build_custom_tool_descriptions
+from droidrun.agent.utils.executer import SimpleCodeExecutor, ExecuterState
+from droidrun.agent.utils.device_state_formatter import format_device_state
+from droidrun.agent.utils.tools import (
+    ATOMIC_ACTION_SIGNATURES,
+    build_custom_tool_descriptions,
+    get_atomic_tool_descriptions,
+)
+from droidrun.config_manager.config_manager import AgentConfig
+from droidrun.config_manager.prompt_loader import PromptLoader
 from droidrun.tools import Tools
+if TYPE_CHECKING:
+    from droidrun.agent.droid.droid_agent import DroidAgentState
 logger = logging.getLogger("droidrun")
@@ -46,81 +51,69 @@ class CodeActAgent(Workflow):
     def __init__(
         self,
         llm: LLM,
-        persona: AgentPersona,
-        vision: bool,
+        agent_config: AgentConfig,
         tools_instance: "Tools",
-        max_steps: int = 5,
         custom_tools: dict = None,
         debug: bool = False,
+        shared_state: Optional["DroidAgentState"] = None,
         *args,
         **kwargs,
     ):
-        # assert instead of if
         assert llm, "llm must be provided."
         super().__init__(*args, **kwargs)
         self.llm = llm
-        self.max_steps = max_steps
-        self.user_prompt = persona.user_prompt
-        self.no_thoughts_prompt = None
-        self.vision = vision
+        self.agent_config = agent_config
+        self.config = agent_config.codeact  # Shortcut to codeact config
+        self.max_steps = agent_config.max_steps
+        self.vision = agent_config.codeact.vision
+        self.debug = debug
+        self.tools = tools_instance
+        self.shared_state = shared_state
         self.chat_memory = None
-        self.episodic_memory = EpisodicMemory(persona=persona)
+        self.episodic_memory = EpisodicMemory()
         self.remembered_info = None
         self.goal = None
         self.steps_counter = 0
         self.code_exec_counter = 0
-        self.debug = debug
-        self.tools = tools_instance
-        # Merge custom_tools with ATOMIC_ACTION_SIGNATURES
-        # Custom tools are treated the same as atomic actions by CodeAct
+        # Build tool list
         merged_signatures = {**ATOMIC_ACTION_SIGNATURES, **(custom_tools or {})}
-        # Build tool_list from merged signatures
         self.tool_list = {}
         for action_name, signature in merged_signatures.items():
             func = signature["function"]
-            # Create bound function (curry tools_instance as first argument)
-            # Handle both sync and async functions
             if asyncio.iscoroutinefunction(func):
-                async def make_async_bound(f, ti):
+                # Create async bound function with proper closure
+                def make_bound(f, ti):
                     async def bound_func(*args, **kwargs):
                         return await f(ti, *args, **kwargs)
                     return bound_func
-                self.tool_list[action_name] = asyncio.run(make_async_bound(func, tools_instance))
+                self.tool_list[action_name] = make_bound(func, tools_instance)
             else:
-                self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance: f(ti, *args)
+                self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
-        # Add non-atomic tools (remember, complete) from tools_instance
         self.tool_list["remember"] = tools_instance.remember
         self.tool_list["complete"] = tools_instance.complete
-        # Get tool descriptions from ATOMIC_ACTION_SIGNATURES and custom_tools
+        # Build tool descriptions
         self.tool_descriptions = get_atomic_tool_descriptions()
-        # Add custom tool descriptions if provided
         custom_descriptions = build_custom_tool_descriptions(custom_tools or {})
         if custom_descriptions:
             self.tool_descriptions += "\n" + custom_descriptions
-        # Add descriptions for remember/complete
         self.tool_descriptions += "\n- remember(information: str): Remember information for later use"
         self.tool_descriptions += "\n- complete(success: bool, reason: str): Mark task as complete"
-        self.system_prompt_content = persona.system_prompt.format(
-            tool_descriptions=self.tool_descriptions
-        )
-        self.system_prompt = ChatMessage(
-            role="system", content=self.system_prompt_content
+        # Load prompts from config
+        system_prompt_text = PromptLoader.load_prompt(
+            agent_config.get_codeact_system_prompt_path(),
+            {"tool_descriptions": self.tool_descriptions}
         )
+        self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
-        self.required_context = persona.required_context
+        self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
         self.executor = SimpleCodeExecutor(
             loop=asyncio.get_event_loop(),
@@ -150,16 +143,21 @@ class CodeActAgent(Workflow):
         logger.debug("  - Adding goal to memory.")
         goal = user_input
-        self.user_message = ChatMessage(
-            role="user",
-            content=PromptTemplate(
-                self.user_prompt or DEFAULT_CODE_ACT_USER_PROMPT
-            ).format(goal=goal),
-        )
-        self.no_thoughts_prompt = ChatMessage(
-            role="user",
-            content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal),
+        # Format user prompt with goal
+        user_prompt_text = PromptLoader.load_prompt(
+            self.agent_config.get_codeact_user_prompt_path(),
+            {"goal": goal}
         )
+        self.user_message = ChatMessage(role="user", content=user_prompt_text)
+        # No thoughts prompt
+        no_thoughts_text = f"""Your previous response provided code without explaining your reasoning first. Remember to always describe your thought process and plan *before* providing the code block.
+The code you provided will be executed below.
+Now, describe the next step you will take to address the original goal: {goal}"""
+        self.no_thoughts_prompt = ChatMessage(role="user", content=no_thoughts_text)
         await self.chat_memory.aput(self.user_message)
@@ -194,38 +192,46 @@ class CodeActAgent(Workflow):
             await ctx.store.set("remembered_info", self.remembered_info)
             chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
-        for context in self.required_context:
-            if context == "screenshot":
-                # if vision is disabled, screenshot should save to trajectory
-                screenshot = (self.tools.take_screenshot())[1]
-                ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
+        # Always capture screenshot for trajectory
+        screenshot = (self.tools.take_screenshot())[1]
+        ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
+        await ctx.store.set("screenshot", screenshot)
-                await ctx.store.set("screenshot", screenshot)
-                if model == "DeepSeek":
-                    logger.warning(
-                        "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
-                    )
-                elif self.vision: # if vision is enabled, add screenshot to chat history
-                    chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
-            if context == "ui_state":
-                try:
-                    state = self.tools.get_state()
-                    await ctx.store.set("ui_state", state["a11y_tree"])
-                    ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
-                    chat_history = await chat_utils.add_ui_text_block(
-                        state["a11y_tree"], chat_history
-                    )
-                    chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
-                except Exception:
-                    logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
-            if context == "packages":
-                chat_history = await chat_utils.add_packages_block(
-                    self.tools.list_packages(include_system_apps=True),
-                    chat_history,
-                )
+        # Add screenshot to chat only if vision enabled
+        if self.vision and model != "DeepSeek":
+            chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
+        # Get and format device state using unified formatter
+        try:
+            # Get raw state from device
+            raw_state = self.tools.get_state()
+            # Format using unified function (returns 4 values)
+            formatted_text, focused_text, a11y_tree, phone_state = format_device_state(raw_state)
+            # Update shared_state if available
+            if self.shared_state is not None:
+                self.shared_state.formatted_device_state = formatted_text
+                self.shared_state.focused_text = focused_text
+                self.shared_state.a11y_tree = a11y_tree
+                self.shared_state.phone_state = phone_state
+                # Extract and store package/app name
+                self.shared_state.current_package_name = phone_state.get('packageName', 'Unknown')
+                self.shared_state.current_app_name = phone_state.get('currentApp', 'Unknown')
+            # Stream formatted state for trajectory
+            ctx.write_event_to_stream(RecordUIStateEvent(ui_state=a11y_tree))
+            # Add device state to chat using new chat_utils function
+            # This injects into LAST user message, doesn't create new message
+            chat_history = await chat_utils.add_device_state_block(formatted_text, chat_history)
+        except Exception as e:
+            logger.warning(f"⚠️ Error retrieving state from the connected device: {e}")
+            if self.debug:
+                logger.error("State retrieval error details:", exc_info=True)
         response = await self._get_llm_response(ctx, chat_history)
         if response is None:
@@ -286,8 +292,9 @@ class CodeActAgent(Workflow):
         try:
             self.code_exec_counter += 1
-            result = await self.executor.execute(ctx, code)
+            result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
             logger.info(f"💡 Code execution successful. Result: {result['output']}")
+            await asyncio.sleep(self.agent_config.after_sleep_action)
             screenshots = result['screenshots']
             for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
                 ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
@@ -487,9 +494,11 @@ class CodeActAgent(Workflow):
                 logger.warning(f"Failed to capture final screenshot: {e}")
             try:
-                (a11y_tree, phone_state) = self.tools.get_state()
+                state = self.tools.get_state()
+                a11y_tree = state.get("a11y_tree", "")
+                phone_state = state.get("phone_state", "")
             except Exception as e:
-                logger.warning(f"Failed to capture final UI state: {e}")
+                raise Exception(f"Failed to capture final UI state: {e}") from e
             # Create final observation chat history and response
             final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]

droidrun/agent/codeact/events.py CHANGED Viewed

@@ -3,10 +3,9 @@ from typing import Optional
 from llama_index.core.llms import ChatMessage
 from llama_index.core.workflow import Event
+from droidrun.agent.context.episodic_memory import EpisodicMemory
 from droidrun.agent.usage import UsageResult
-from ..context.episodic_memory import EpisodicMemory
 class TaskInputEvent(Event):
     input: list[ChatMessage]

droidrun/agent/context/__init__.py CHANGED Viewed

@@ -1,19 +1,15 @@
 """
-Agent Context Module - Provides specialized agent personas and context injection management.
+Agent Context Module - Provides episodic memory and task management.
 This module contains:
-- AgentPersona: Dataclass for defining specialized agent configurations
-- ContextInjectionManager: Manager for handling different agent personas and their contexts
+- EpisodicMemory: Memory system for tracking agent steps
+- TaskManager: Manages tasks and their execution
 """
-from .agent_persona import AgentPersona
-from .context_injection_manager import ContextInjectionManager
-from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
-from .task_manager import Task, TaskManager
+from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
+from droidrun.agent.context.task_manager import Task, TaskManager
 __all__ = [
-    "AgentPersona",
-    "ContextInjectionManager",
     "EpisodicMemory",
     "EpisodicMemoryStep",
     "TaskManager",

droidrun/agent/context/episodic_memory.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from dataclasses import dataclass, field
 from typing import List, Optional
-from droidrun.agent.context.agent_persona import AgentPersona
 @dataclass
 class EpisodicMemoryStep:
@@ -11,7 +9,7 @@ class EpisodicMemoryStep:
     timestamp: float
     screenshot: Optional[bytes]
 @dataclass
 class EpisodicMemory:
-    persona: AgentPersona
     steps: List[EpisodicMemoryStep] = field(default_factory=list)

droidrun/agent/context/task_manager.py CHANGED Viewed

@@ -3,6 +3,8 @@ import os
 from dataclasses import dataclass
 from typing import Dict, List, Optional
+from droidrun.config_manager.path_resolver import PathResolver
 @dataclass
 class Task:
@@ -36,7 +38,8 @@ class TaskManager:
         self.goal_completed = False
         self.message = None
         self.task_history = []
-        self.file_path = os.path.join(os.path.dirname(__file__), "todo.txt")
+        # Save to working directory for user visibility
+        self.file_path = PathResolver.resolve("droidrun_tasks.txt", create_if_missing=True)
     def get_all_tasks(self) -> List[Task]:
         return self.tasks
@@ -71,8 +74,11 @@ class TaskManager:
     def save_to_file(self):
-        """Saves the current task list to a Markdown file."""
+        """Saves the current task list to a text file."""
         try:
+            # Ensure parent directory exists
+            self.file_path.parent.mkdir(parents=True, exist_ok=True)
             with open(self.file_path, 'w', encoding='utf-8') as f:
                 for i, task in enumerate(self.tasks, 1):
                     f.write(f"Task {i}: {task.description}\n")

droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl

droidrun 0.3.10.dev3py3-none-any.whl → 0.3.10.dev4py3-none-any.whl