PyPI - droidrun - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl - Mend

droidrun 0.3.8py3-none-any.whl → 0.3.10.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

droidrun/__init__.py +2 -3
droidrun/__main__.py +1 -1
droidrun/agent/__init__.py +1 -1
droidrun/agent/codeact/__init__.py +1 -4
droidrun/agent/codeact/codeact_agent.py +112 -48
droidrun/agent/codeact/events.py +6 -3
droidrun/agent/codeact/prompts.py +2 -2
droidrun/agent/common/constants.py +2 -0
droidrun/agent/common/events.py +5 -3
droidrun/agent/context/__init__.py +1 -3
droidrun/agent/context/agent_persona.py +2 -1
droidrun/agent/context/context_injection_manager.py +6 -6
droidrun/agent/context/episodic_memory.py +5 -3
droidrun/agent/context/personas/__init__.py +3 -3
droidrun/agent/context/personas/app_starter.py +3 -3
droidrun/agent/context/personas/big_agent.py +3 -3
droidrun/agent/context/personas/default.py +3 -3
droidrun/agent/context/personas/ui_expert.py +5 -5
droidrun/agent/context/task_manager.py +15 -17
droidrun/agent/droid/__init__.py +1 -1
droidrun/agent/droid/droid_agent.py +327 -182
droidrun/agent/droid/events.py +91 -9
droidrun/agent/executor/__init__.py +13 -0
droidrun/agent/executor/events.py +24 -0
droidrun/agent/executor/executor_agent.py +327 -0
droidrun/agent/executor/prompts.py +136 -0
droidrun/agent/manager/__init__.py +18 -0
droidrun/agent/manager/events.py +20 -0
droidrun/agent/manager/manager_agent.py +459 -0
droidrun/agent/manager/prompts.py +223 -0
droidrun/agent/oneflows/app_starter_workflow.py +118 -0
droidrun/agent/oneflows/text_manipulator.py +204 -0
droidrun/agent/planner/__init__.py +3 -3
droidrun/agent/planner/events.py +6 -3
droidrun/agent/planner/planner_agent.py +60 -53
droidrun/agent/planner/prompts.py +2 -2
droidrun/agent/usage.py +15 -13
droidrun/agent/utils/__init__.py +11 -1
droidrun/agent/utils/async_utils.py +2 -1
droidrun/agent/utils/chat_utils.py +48 -60
droidrun/agent/utils/device_state_formatter.py +177 -0
droidrun/agent/utils/executer.py +13 -12
droidrun/agent/utils/inference.py +114 -0
droidrun/agent/utils/llm_picker.py +2 -0
droidrun/agent/utils/message_utils.py +85 -0
droidrun/agent/utils/tools.py +220 -0
droidrun/agent/utils/trajectory.py +8 -7
droidrun/cli/__init__.py +1 -1
droidrun/cli/logs.py +29 -28
droidrun/cli/main.py +279 -143
droidrun/config_manager/__init__.py +25 -0
droidrun/config_manager/config_manager.py +583 -0
droidrun/macro/__init__.py +2 -2
droidrun/macro/__main__.py +1 -1
droidrun/macro/cli.py +36 -34
droidrun/macro/replay.py +7 -9
droidrun/portal.py +1 -1
droidrun/telemetry/__init__.py +2 -2
droidrun/telemetry/events.py +3 -4
droidrun/telemetry/phoenix.py +173 -0
droidrun/telemetry/tracker.py +7 -5
droidrun/tools/__init__.py +1 -1
droidrun/tools/adb.py +210 -82
droidrun/tools/ios.py +7 -5
droidrun/tools/tools.py +25 -8
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
droidrun/agent/common/default.py +0 -5
droidrun/agent/context/reflection.py +0 -20
droidrun/agent/oneflows/reflector.py +0 -265
droidrun-0.3.8.dist-info/RECORD +0 -55
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/oneflows/app_starter_workflow.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""
+Simple workflow to open an app based on a description.
+"""
+import json
+from workflows import Context, Workflow, step
+from workflows.events import StartEvent, StopEvent
+from droidrun.tools.tools import Tools
+class AppStarter(Workflow):
+    """
+    A simple workflow that opens an app based on a description.
+    The workflow uses an LLM to intelligently match the app description
+    to an installed app's package name, then opens it.
+    """
+    def __init__(self, tools: Tools, llm, timeout: int = 60, **kwargs):
+        """
+        Initialize the OpenAppWorkflow.
+        Args:
+            tools: An instance of Tools (e.g., AdbTools) to interact with the device
+            llm: An LLM instance (e.g., OpenAI) to determine which app to open
+            timeout: Workflow timeout in seconds (default: 60)
+            **kwargs: Additional arguments passed to Workflow
+        """
+        super().__init__(timeout=timeout, **kwargs)
+        self.tools = tools
+        self.llm = llm
+    @step
+    async def open_app_step(self, ev: StartEvent, ctx: Context) -> StopEvent:
+        """
+        Opens an app based on the provided description.
+        Expected StartEvent attributes:
+            - app_description (str): The name or description of the app to open
+        Returns:
+            StopEvent with the result of the open_app operation
+        """
+        app_description = ev.app_description
+        # Get list of installed apps
+        apps = self.tools.get_apps(include_system=True)
+        # Format apps list for LLM
+        apps_list = "\n".join([
+            f"- {app['label']} (package: {app['package']})"
+            for app in apps
+        ])
+        # Construct prompt for LLM
+        prompt = f"""Given the following list of installed apps and a user's description, determine which app package name to open.
+Installed Apps:
+{apps_list}
+User's Request: "{app_description}"
+Return ONLY a JSON object with the following structure:
+{{
+    "package": "com.example.package"
+}}
+Choose the most appropriate app based on the description. Return the package name of the best match."""
+        # Get LLM response
+        response = await self.llm.acomplete(prompt)
+        response_text = str(response).strip()
+        # Parse JSON response - extract content between { and }
+        try:
+            start = response_text.find("{")
+            end = response_text.rfind("}") + 1
+            json_str = response_text[start:end]
+            result_json = json.loads(json_str)
+            package_name = result_json["package"]
+        except (json.JSONDecodeError, KeyError, ValueError) as e:
+            return StopEvent(result=f"Error parsing LLM response: {e}. Response: {response_text}")
+        # Open the selected app using the package name
+        result = self.tools.start_app(package_name)
+        return StopEvent(result=result)
+# Example usage
+async def main():
+    """
+    Example of how to use the OpenAppWorkflow.
+    """
+    from llama_index.llms.openai import OpenAI
+    from droidrun.tools.adb import AdbTools
+    # Initialize tools with device serial (None for default device)
+    tools = AdbTools(serial=None)
+    # Initialize LLM
+    llm = OpenAI(model="gpt-4o-mini")
+    # Create workflow instance
+    workflow = AppStarter(tools=tools, llm=llm, timeout=60, verbose=True)
+    # Run workflow to open an app
+    result = await workflow.run(app_description="Settings")
+    print(f"Result: {result}")
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())

droidrun/agent/oneflows/text_manipulator.py ADDED Viewed

@@ -0,0 +1,204 @@
+'''CodeAct-style agent for text manipulation via constrained Python execution.
+This agent receives two inputs:
+- current_text: the current content of the focused text box
+- task_instruction: a natural language instruction describing how to modify the text
+It asks an LLM to produce Python code that:
+- Uses ONLY a single provided function: input_text(text: str)
+- Constructs the final text to type as a triple-quoted big string, assigned
+  to a variable of the model's choice (e.g., new_text = """...""")
+- May reference the predefined variable ORIGINAL which contains the current text
+  from the text box
+- Calls input_text(new_text) exactly once to clear the field and input the new text
+The produced code is executed in a restricted sandbox exposing ONLY:
+- ORIGINAL: str (the original text content)
+- input_text: function (captures the final text; semantically clears and types)
+If the generated code produces execution errors, the agent automatically sends the
+stack trace back to the LLM for correction, with up to 3 retry attempts by default.
+This enables iterative refinement of the generated code.
+The agent returns the final text that should be entered into the text box and the
+raw code produced by the model (potentially after corrections).
+'''
+import traceback
+from llama_index.core.llms import ChatMessage
+from llama_index.core.llms.llm import LLM
+from droidrun.agent.utils.inference import call_with_retries
+from droidrun.telemetry.phoenix import clean_span
+@clean_span("text_manipulator")
+def run_text_manipulation_agent(instruction: str, current_subgoal: str, current_text: str, overall_plan, hitorical_plan, llm: LLM, max_retries: int = 4) -> tuple[str, str]:
+    """Convenience function to run CodeAct text manipulation with error correction.
+    Args:
+        instruction: User's overall instruction
+        current_subgoal: Current subgoal to accomplish
+        current_text: The current content of the focused text field
+        overall_plan: Overall plan context
+        hitorical_plan: Historical progress
+        llm: LLM instance to use for text manipulation
+        max_retries: Maximum number of retry attempts if code execution fails
+    Returns:
+        Tuple of (final_text, raw_code) - the final text to input and the generated code
+    """
+    system_prompt = (
+        "You are CODEACT_TEXT_AGENT, a constrained Python code generator for editing text in an Android text box.\n"
+        "You will be given: (1) the current text in the focused text box as ORIGINAL, and (2) a TASK that describes how to modify it.\n\n"
+        "Your job is to output ONLY a single Python code block in ```python format that:\n"
+        "- Defines NO new functions, classes, or imports.\n"
+        "- Uses ONLY the provided function input_text(text: str).\n"
+        "- Builds the final content in a triple-quoted big string assigned to a variable of your choice, e.g.:\n"
+        "    new_text = \"\"\"...\"\"\"\n"
+        "- Includes ORIGINAL in the new_text if needed to fulfill the TASK.\n"
+        "- Calls input_text(new_text) exactly once to clear the field and input the new content.\n\n"
+        "STRICT FORMAT RULES:\n"
+        "- Respond with ONLY a fenced Python code block: ```python\n<code>\n```\n"
+        "- Do NOT print anything. Do NOT use input().\n"
+        "- Do NOT import any modules. Do NOT define additional functions or classes.\n"
+        "- Do NOT access files, network, or system.\n"
+        "If you are unsure about the ORIGINAL, use it by referencing ORIGINAL variable so you dont make mistake with white space or new line characters\n"
+        "below is ORIGINAL use it by referencing ORIGINAL variable or directly typing it out:\n<ORIGINAL>\n{current_text}\n</ORIGINAL>\n"
+        f"""
+<user_request>
+{instruction}
+</user_request>
+<overall_plan>
+{overall_plan}
+</overall_plan>
+<progress_status>
+{hitorical_plan}
+</progress_status>
+<current_subgoal>
+{current_subgoal}
+</current_subgoal>
+        """
+    )
+    error_correction_prompt = (
+        "You are CODEACT_TEXT_AGENT, correcting your previous code that had execution errors.\n\n"
+        "The code you generated previously failed with this error:\n{error_message}\n\n"
+        "Please fix the code and output ONLY a new Python code block in ```python format.\n"
+        "Follow the same rules as before:\n"
+        "- Use ONLY the provided function input_text(text: str)\n"
+        "- Build the final content in a triple-quoted big string\n"
+        "- Include ORIGINAL in the new_text if needed\n"
+        "- Call input_text(new_text) exactly once\n"
+        "- Respond with ONLY a fenced Python code block\n"
+        "If you are unsure about the ORIGINAL, use it by referencing ORIGINAL variable so you dont make mistake with white space or new line characters"
+        "below is ORIGINAL use it by referencing ORIGINAL variable or directly typing it out:\n<ORIGINAL>{current_text}</ORIGINAL>\n"
+    )
+    user_prompt = (
+        "TASK:\n{task_instruction}\n\n"
+        "CURRENT TEXT (ORIGINAL):\n{current_text}\n\n"
+        "Write the Python code now."
+    ).format(
+        task_instruction=current_subgoal.strip(),
+        current_text=current_text,
+        )
+    messages = [ChatMessage(role="system", content=system_prompt.format(overall_plan=overall_plan, hitorical_plan=hitorical_plan, current_subgoal=current_subgoal, instruction=instruction, current_text=current_text)), ChatMessage(role="user", content=user_prompt)]
+    for attempt in range(max_retries + 1):  # +1 for initial attempt
+        # Call the LLM with current messages
+        response_message = call_with_retries(llm, messages).message
+        content = response_message.content
+        messages.append(response_message)
+        # Extract code from ```python blocks
+        code = _extract_python_code(content)
+        if not code:
+            # Fallback: if no code block found, use entire response as code
+            code = content.strip()
+        # Execute the code in a sandbox
+        final_text, error_message = _execute_sandbox(code, current_text)
+        # If successful (no error), return the result
+        if not error_message:
+            return final_text, code
+        # If this was the last attempt, return what we have
+        if attempt == max_retries:
+            return final_text, code
+        # Add error correction message to conversation
+        correction_message = error_correction_prompt.format(error_message=error_message)
+        messages.append(ChatMessage(role="user", content=correction_message))
+    # This should never be reached, but just in case
+    return current_text, ""
+def _extract_python_code(text: str) -> str:
+    """Extract Python code from ```python fenced blocks using simple string operations."""
+    if not text:
+        return ""
+    # Try different variations of code block markers
+    patterns = [
+        # ```python with newlines
+        ("```python\n", "\n```"),
+        # ```python without newlines
+        ("```python", "```"),
+        # Generic ``` with newlines
+        ("```\n", "\n```"),
+        # Generic ``` without newlines
+        ("```", "```"),
+    ]
+    for start_marker, end_marker in patterns:
+        if start_marker in text and end_marker in text:
+            # Find the start position after the marker
+            start_idx = text.find(start_marker) + len(start_marker)
+            # Find the end position before the marker
+            end_idx = text.find(end_marker, start_idx)
+            if end_idx != -1:
+                code = text[start_idx:end_idx].strip()
+                # Only return if we actually extracted some code
+                if code:
+                    return code
+    return ""
+def _execute_sandbox(code: str, original_text: str) -> tuple[str, str]:
+    """Execute model code in a locked-down environment with exec().
+    Returns:
+        Tuple of (result_text, error_message) - result_text is the final text if successful,
+        or original_text if failed. error_message is the stack trace if execution failed,
+        or empty string if successful.
+    """
+    if not code:
+        return original_text, ""
+    captured = {"value": None}
+    def input_text(text: str) -> None:
+        """Capture the final text to be input."""
+        captured["value"] = text
+    # Create restricted environment
+    sandbox_globals = {
+        "__builtins__": {},  # Empty builtins for security
+        "input_text": input_text,
+        "ORIGINAL": original_text
+    }
+    sandbox_locals = {}
+    try:
+        exec(code, sandbox_globals, sandbox_locals)
+        return captured["value"] if captured["value"] is not None else original_text, ""
+    except Exception:
+        error_message = traceback.format_exc()
+        return original_text, error_message

droidrun/agent/planner/__init__.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from droidrun.agent.planner.planner_agent import PlannerAgent
 from droidrun.agent.planner.prompts import (
     DEFAULT_PLANNER_SYSTEM_PROMPT,
+    DEFAULT_PLANNER_TASK_FAILED_PROMPT,
     DEFAULT_PLANNER_USER_PROMPT,
-    DEFAULT_PLANNER_TASK_FAILED_PROMPT
 )
 __all__ = [
-    "PlannerAgent",
+    "PlannerAgent",
     "DEFAULT_PLANNER_SYSTEM_PROMPT",
     "DEFAULT_PLANNER_USER_PROMPT",
     "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
-]
+]

droidrun/agent/planner/events.py CHANGED Viewed

@@ -1,16 +1,19 @@
-from llama_index.core.workflow import Event
+from typing import Optional
 from llama_index.core.base.llms.types import ChatMessage
-from typing import Optional, Any
+from llama_index.core.workflow import Event
 from droidrun.agent.context import Task
 from droidrun.agent.usage import UsageResult
 class PlanInputEvent(Event):
     input: list[ChatMessage]
 class PlanThinkingEvent(Event):
     thoughts: Optional[str] = None
-    code: Optional[str] = None
+    code: Optional[str] = None
     usage: Optional[UsageResult] = None

droidrun/agent/planner/planner_agent.py CHANGED Viewed

@@ -1,33 +1,31 @@
-from droidrun.agent.planner.events import *
-from droidrun.agent.planner.prompts import (
-    DEFAULT_PLANNER_SYSTEM_PROMPT,
-    DEFAULT_PLANNER_USER_PROMPT,
-)
-import logging
 import asyncio
-from typing import List, TYPE_CHECKING, Union
-import inspect
+import logging
+from typing import TYPE_CHECKING, List, Union
+from dotenv import load_dotenv
 from llama_index.core.base.llms.types import ChatMessage, ChatResponse
-from llama_index.core.prompts import PromptTemplate
 from llama_index.core.llms.llm import LLM
-from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
 from llama_index.core.memory import Memory
-from llama_index.core.llms.llm import LLM
-from droidrun.agent.usage import get_usage_from_response
-from droidrun.agent.utils.executer import SimpleCodeExecutor
-from droidrun.agent.utils import chat_utils
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
+from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
+from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
+from droidrun.agent.context.agent_persona import AgentPersona
 from droidrun.agent.context.task_manager import TaskManager
-from droidrun.tools import Tools
-from droidrun.agent.common.events import ScreenshotEvent, RecordUIStateEvent
 from droidrun.agent.planner.events import (
-    PlanInputEvent,
     PlanCreatedEvent,
+    PlanInputEvent,
     PlanThinkingEvent,
 )
-from droidrun.agent.context.agent_persona import AgentPersona
-from droidrun.agent.context.reflection import Reflection
-from dotenv import load_dotenv
+from droidrun.agent.planner.prompts import (
+    DEFAULT_PLANNER_SYSTEM_PROMPT,
+    DEFAULT_PLANNER_USER_PROMPT,
+)
+from droidrun.agent.usage import get_usage_from_response
+from droidrun.agent.utils import chat_utils
+from droidrun.agent.utils.executer import SimpleCodeExecutor
+from droidrun.tools import Tools
 load_dotenv()
@@ -63,7 +61,6 @@ class PlannerAgent(Workflow):
         self.chat_memory = None
         self.remembered_info = None
-        self.reflection: Reflection = None
         self.current_retry = 0
         self.steps_counter = 0
@@ -97,7 +94,7 @@ class PlannerAgent(Workflow):
     async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
         logger.info("💬 Preparing planning session...")
-        self.chat_memory: Memory = await ctx.get(
+        self.chat_memory: Memory = await ctx.store.get(
             "chat_memory", default=Memory.from_defaults()
         )
         await self.chat_memory.aput(self.user_message)
@@ -105,15 +102,10 @@ class PlannerAgent(Workflow):
         if ev.remembered_info:
             self.remembered_info = ev.remembered_info
-        if ev.reflection:
-            self.reflection = ev.reflection
-        else:
-            self.reflection = None
         assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
         await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
         input_messages = self.chat_memory.get_all()
         logger.debug(f"  - Memory contains {len(input_messages)} messages")
         return PlanInputEvent(input=input_messages)
@@ -129,24 +121,23 @@ class PlannerAgent(Workflow):
         ctx.write_event_to_stream(ev)
         self.steps_counter += 1
-        logger.info(f"🧠 Thinking about how to plan the goal...")
+        logger.info("🧠 Thinking about how to plan the goal...")
         if self.vision:
             screenshot = (self.tools_instance.take_screenshot())[1]
             ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
-            await ctx.set("screenshot", screenshot)
+            await ctx.store.set("screenshot", screenshot)
         try:
             state = self.tools_instance.get_state()
-            await ctx.set("ui_state", state["a11y_tree"])
-            await ctx.set("phone_state", state["phone_state"])
+            await ctx.store.set("ui_state", state["a11y_tree"])
+            await ctx.store.set("phone_state", state["phone_state"])
             ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
-        except Exception as e:
-            logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
+        except Exception:
+            logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
-        await ctx.set("remembered_info", self.remembered_info)
-        await ctx.set("reflection", self.reflection)
+        await ctx.store.set("remembered_info", self.remembered_info)
         response = await self._get_llm_response(ctx, chat_history)
         try:
@@ -169,18 +160,17 @@ class PlannerAgent(Workflow):
         """Handle LLM output."""
         logger.debug("🤖 Processing planning output...")
         code = ev.code
-        thoughts = ev.thoughts
         if code:
             try:
                 result = await self.executer.execute(ctx, code)
-                logger.info(f"📝 Planning complete")
+                logger.info("📝 Planning complete")
                 logger.debug(f"  - Planning code executed. Result: {result['output']}")
                 screenshots = result['screenshots']
                 for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
                     ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
                 ui_states = result['ui_states']
                 for ui_state in ui_states[:-1]:
                     ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
@@ -237,7 +227,7 @@ wrap your code inside this:
     @step
     async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
         """Finalize the workflow."""
-        await ctx.set("chat_memory", self.chat_memory)
+        await ctx.store.set("chat_memory", self.chat_memory)
         result = {}
         result.update(
@@ -256,15 +246,15 @@ wrap your code inside this:
             logger.debug(f"  - Sending {len(chat_history)} messages to LLM.")
             model = self.llm.class_name()
-            if self.vision == True:
+            if self.vision:
                 if model == "DeepSeek":
                     logger.warning(
                         "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
                     )
                 else:
                     chat_history = await chat_utils.add_screenshot_image_block(
-                        await ctx.get("screenshot"), chat_history
-                    )
+                        await ctx.store.get("screenshot"), chat_history
+                    )
@@ -275,18 +265,15 @@ wrap your code inside this:
                 chat_history,
             )
-            remembered_info = await ctx.get("remembered_info", default=None)
+            remembered_info = await ctx.store.get("remembered_info", default=None)
             if remembered_info:
                 chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
-            reflection = await ctx.get("reflection", None)
-            if reflection:
-                chat_history = await chat_utils.add_reflection_summary(reflection, chat_history)
-            chat_history = await chat_utils.add_phone_state_block(await ctx.get("phone_state"), chat_history)
-            chat_history = await chat_utils.add_ui_text_block(await ctx.get("ui_state"), chat_history)
+            chat_history = await chat_utils.add_phone_state_block(await ctx.store.get("phone_state"), chat_history)
+            chat_history = await chat_utils.add_ui_text_block(await ctx.store.get("ui_state"), chat_history)
-            messages_to_send = [self.system_message] + chat_history
+            limited_history = self._limit_history(chat_history)
+            messages_to_send = [self.system_message] + limited_history
             messages_to_send = [
                 chat_utils.message_copy(msg) for msg in messages_to_send
             ]
@@ -302,3 +289,23 @@ wrap your code inside this:
         except Exception as e:
             logger.error(f"Could not get an answer from LLM: {repr(e)}")
             raise e
+    def _limit_history(
+        self, chat_history: List[ChatMessage]
+    ) -> List[ChatMessage]:
+        if LLM_HISTORY_LIMIT <= 0:
+            return chat_history
+        max_messages = LLM_HISTORY_LIMIT * 2
+        if len(chat_history) <= max_messages:
+            return chat_history
+        preserved_head: List[ChatMessage] = []
+        if chat_history and chat_history[0].role == "user":
+            preserved_head = [chat_history[0]]
+        tail = chat_history[-max_messages:]
+        if preserved_head and preserved_head[0] in tail:
+            preserved_head = []
+        return preserved_head + tail

droidrun/agent/planner/prompts.py CHANGED Viewed

@@ -119,6 +119,6 @@ Instruction: Based **only** on the provided screenshot showing the current state
 # Export all prompts
 __all__ = [
     "DEFAULT_PLANNER_SYSTEM_PROMPT",
-    "DEFAULT_PLANNER_USER_PROMPT",
+    "DEFAULT_PLANNER_USER_PROMPT",
     "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
-]
+]

droidrun/agent/usage.py CHANGED Viewed

@@ -1,18 +1,20 @@
 import contextlib
-from llama_index.core.callbacks import CallbackManager
+import logging
+from typing import Any, Dict, List, Optional
+from uuid import uuid4
 from llama_index.core.callbacks.base_handler import BaseCallbackHandler
 from llama_index.core.callbacks.schema import CBEventType, EventPayload
 from llama_index.core.llms import LLM, ChatResponse
 from pydantic import BaseModel
-from typing import Any, Dict, List, Optional
-from uuid import uuid4
-import logging
 logger = logging.getLogger("droidrun")
 SUPPORTED_PROVIDERS = [
     "Gemini",
     "GoogleGenAI",
+    "GenAI",
     "OpenAI",
+    "openai_llm",
     "Anthropic",
     "Ollama",
     "DeepSeek",
@@ -32,14 +34,14 @@ def get_usage_from_response(provider: str, chat_rsp: ChatResponse) -> UsageResul
     print(f"rsp: {rsp.__class__.__name__}")
-    if provider == "Gemini" or provider == "GoogleGenAI":
+    if provider == "Gemini" or provider == "GoogleGenAI" or provider == "GenAI":
         return UsageResult(
             request_tokens=rsp["usage_metadata"]["prompt_token_count"],
             response_tokens=rsp["usage_metadata"]["candidates_token_count"],
             total_tokens=rsp["usage_metadata"]["total_token_count"],
             requests=1,
         )
-    elif provider == "OpenAI":
+    elif provider == "OpenAI" or provider == "openai_llm":
         from openai.types import CompletionUsage as OpenAIUsage
         usage: OpenAIUsage = rsp.usage
@@ -109,7 +111,7 @@ class TokenCountingHandler(BaseCallbackHandler):
         )
     def _get_event_usage(self, payload: Dict[str, Any]) -> UsageResult:
-        if not EventPayload.RESPONSE in payload:
+        if EventPayload.RESPONSE not in payload:
             raise ValueError("No response in payload")
         chat_rsp: ChatResponse = payload.get(EventPayload.RESPONSE)
@@ -178,26 +180,26 @@ def create_tracker(llm: LLM) -> TokenCountingHandler:
 def track_usage(llm: LLM) -> TokenCountingHandler:
     """Track token usage for an LLM instance across all requests.
     This function:
     - Creates a new TokenCountingHandler for the LLM provider
     - Registers that handler as an LLM callback to monitor all requests
     - Returns the handler for accessing cumulative usage statistics
     The handler counts tokens for total LLM usage across all requests. For fine-grained
     per-request counting, use either:
     - `create_tracker()` with `llm_callback()` context manager for temporary tracking
     - `get_usage_from_response()` to extract usage from individual responses
     Args:
         llm: The LLamaIndex LLM instance to track usage for
     Returns:
         TokenCountingHandler: The registered handler that accumulates usage statistics
     Raises:
         ValueError: If the LLM provider is not supported for tracking
     Example:
         >>> llm = OpenAI()
         >>> tracker = track_usage(llm)

droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl

droidrun 0.3.8py3-none-any.whl → 0.3.10.dev2py3-none-any.whl