PyPI - droidrun - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

droidrun 0.2.0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

droidrun/__init__.py +16 -11
droidrun/__main__.py +1 -1
droidrun/adb/__init__.py +3 -3
droidrun/adb/device.py +1 -1
droidrun/adb/manager.py +2 -2
droidrun/agent/__init__.py +6 -0
droidrun/agent/codeact/__init__.py +2 -4
droidrun/agent/codeact/codeact_agent.py +330 -235
droidrun/agent/codeact/events.py +12 -20
droidrun/agent/codeact/prompts.py +0 -52
droidrun/agent/common/default.py +5 -0
droidrun/agent/common/events.py +4 -0
droidrun/agent/context/__init__.py +23 -0
droidrun/agent/context/agent_persona.py +15 -0
droidrun/agent/context/context_injection_manager.py +66 -0
droidrun/agent/context/episodic_memory.py +15 -0
droidrun/agent/context/personas/__init__.py +11 -0
droidrun/agent/context/personas/app_starter.py +44 -0
droidrun/agent/context/personas/default.py +95 -0
droidrun/agent/context/personas/extractor.py +52 -0
droidrun/agent/context/personas/ui_expert.py +107 -0
droidrun/agent/context/reflection.py +20 -0
droidrun/agent/context/task_manager.py +124 -0
droidrun/agent/droid/__init__.py +2 -2
droidrun/agent/droid/droid_agent.py +269 -325
droidrun/agent/droid/events.py +28 -0
droidrun/agent/oneflows/reflector.py +265 -0
droidrun/agent/planner/__init__.py +2 -4
droidrun/agent/planner/events.py +9 -13
droidrun/agent/planner/planner_agent.py +288 -0
droidrun/agent/planner/prompts.py +33 -53
droidrun/agent/utils/__init__.py +3 -0
droidrun/agent/utils/async_utils.py +1 -40
droidrun/agent/utils/chat_utils.py +265 -48
droidrun/agent/utils/executer.py +49 -14
droidrun/agent/utils/llm_picker.py +14 -10
droidrun/agent/utils/trajectory.py +184 -0
droidrun/cli/__init__.py +1 -1
droidrun/cli/logs.py +283 -0
droidrun/cli/main.py +364 -441
droidrun/tools/__init__.py +5 -10
droidrun/tools/{actions.py → adb.py} +381 -412
droidrun/tools/ios.py +596 -0
droidrun/tools/tools.py +95 -0
droidrun-0.3.1.dist-info/METADATA +150 -0
droidrun-0.3.1.dist-info/RECORD +50 -0
droidrun/agent/planner/task_manager.py +0 -355
droidrun/agent/planner/workflow.py +0 -371
droidrun/tools/device.py +0 -29
droidrun/tools/loader.py +0 -60
droidrun-0.2.0.dist-info/METADATA +0 -373
droidrun-0.2.0.dist-info/RECORD +0 -32
{droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/WHEEL +0 -0
{droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/entry_points.txt +0 -0
{droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/codeact/codeact_agent.py CHANGED Viewed

@@ -1,23 +1,35 @@
 import logging
 import re
-import inspect
 import time
-from typing import Awaitable, Callable, List, Optional, Dict, Any, Tuple, TYPE_CHECKING, Union
-from llama_index.core.base.llms.types import ChatMessage, ChatResponse, TextBlock
+import asyncio
+import json
+import os
+from typing import List, Optional, Tuple, Union
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.llms.llm import LLM
 from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
-from llama_index.core.memory import ChatMemoryBuffer
-from .events import FinalizeEvent, InputEvent, ModelOutputEvent, ExecutionEvent, ExecutionResultEvent
-from ..utils.chat_utils import add_screenshot, add_screenshot_image_block, add_ui_text_block, message_copy
-from .prompts import (
-    DEFAULT_CODE_ACT_SYSTEM_PROMPT,
-    DEFAULT_CODE_ACT_USER_PROMPT,
-    DEFAULT_NO_THOUGHTS_PROMPT
+from llama_index.core.memory import Memory
+from droidrun.agent.codeact.events import (
+    TaskInputEvent,
+    TaskEndEvent,
+    TaskExecutionEvent,
+    TaskExecutionResultEvent,
+    TaskThinkingEvent,
+    EpisodicMemoryEvent,
+)
+from droidrun.agent.common.events import ScreenshotEvent
+from droidrun.agent.utils import chat_utils
+from droidrun.agent.utils.executer import SimpleCodeExecutor
+from droidrun.agent.codeact.prompts import (
+    DEFAULT_CODE_ACT_USER_PROMPT,
+    DEFAULT_NO_THOUGHTS_PROMPT,
 )
-if TYPE_CHECKING:
-    from ...tools import Tools
+from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
+from droidrun.tools import Tools
+from typing import Optional, Dict, Tuple, List, Any, Callable
+from droidrun.agent.context.agent_persona import AgentPersona
 logger = logging.getLogger("droidrun")
@@ -28,307 +40,390 @@ class CodeActAgent(Workflow):
     to solve problems requiring code execution. It extracts code from
     Markdown blocks and uses specific step types for tracking.
     """
     def __init__(
         self,
         llm: LLM,
-        code_execute_fn: Callable[[str], Awaitable[Dict[str, Any]]],
-        tools: 'Tools',
-        available_tools: List = [],
-        max_steps: int = 10, # Default max steps (kept for backwards compatibility but no longer enforced)
-        system_prompt: Optional[str] = None,
-        user_prompt: Optional[str] = None,
-        vision: bool = False,
+        persona: AgentPersona,
+        vision: bool,
+        tools_instance: "Tools",
+        all_tools_list: Dict[str, Callable[..., Any]],
+        max_steps: int = 5,
         debug: bool = False,
         *args,
-        **kwargs
+        **kwargs,
     ):
         # assert instead of if
         assert llm, "llm must be provided."
-        assert code_execute_fn, "code_execute_fn must be provided"
         super().__init__(*args, **kwargs)
         self.llm = llm
-        self.code_execute_fn = code_execute_fn
-        self.available_tools = available_tools or []
-        self.tools = tools
-        self.max_steps = max_steps  # Kept for backwards compatibility but not enforced
-        self.tool_descriptions = self.parse_tool_descriptions() # Parse tool descriptions once at initialization
-        self.system_prompt_content = (system_prompt or DEFAULT_CODE_ACT_SYSTEM_PROMPT).format(tool_descriptions=self.tool_descriptions)
-        self.system_prompt = ChatMessage(role="system", content=self.system_prompt_content)
-        self.user_prompt = user_prompt
+        self.max_steps = max_steps
+        self.user_prompt = persona.user_prompt
         self.no_thoughts_prompt = None
-        self.memory = None
-        self.goal = None
-        self.steps_counter = 0 # Initialize step counter (kept for tracking purposes)
-        self.code_exec_counter = 0 # Initialize execution counter
         self.vision = vision
+        self.chat_memory = None
+        self.episodic_memory = EpisodicMemory(persona=persona)
+        self.remembered_info = None
+        self.goal = None
+        self.steps_counter = 0
+        self.code_exec_counter = 0
         self.debug = debug
-        logger.info("✅ CodeActAgent initialized successfully.")
-    def parse_tool_descriptions(self) -> str:
-        """Parses the available tools and their descriptions for the system prompt."""
-        logger.info("🛠️ Parsing tool descriptions...")
-        # self.available_tools is a list of functions, we need to get their docstrings, names, and signatures and display them as `def name(args) -> return_type:\n"""docstring"""    ...\n`
-        tool_descriptions = []
-        excluded_tools = ["take_screenshot"]  # List of tools to exclude
-        for tool in self.available_tools:
-            assert callable(tool), f"Tool {tool} is not callable."
-            tool_name = tool.__name__
-            # Skip excluded tools
-            if tool_name in excluded_tools:
-                logger.debug(f"  - Skipping excluded tool: {tool_name}")
-                continue
-            tool_signature = inspect.signature(tool)
-            tool_docstring = tool.__doc__ or "No description available."
-            # Format the function signature and docstring
-            formatted_signature = f"def {tool_name}{tool_signature}:\n    \"\"\"{tool_docstring}\"\"\"\n..."
-            tool_descriptions.append(formatted_signature)
-            logger.debug(f"  - Parsed tool: {tool_name}")
-        # Join all tool descriptions into a single string
-        descriptions = "\n".join(tool_descriptions)
-        logger.info(f"🔩 Found {len(tool_descriptions)} tools.")
-        return descriptions
-    def _extract_code_and_thought(self, response_text: str) -> Tuple[Optional[str], str]:
-        """
-        Extracts code from Markdown blocks (```python ... ```) and the surrounding text (thought),
-        handling indented code blocks.
-        Returns:
-            Tuple[Optional[code_string], thought_string]
-        """
-        if self.debug:
-            logger.debug("✂️ Extracting code and thought from response...")
-        code_pattern = r"^\s*```python\s*\n(.*?)\n^\s*```\s*?$" # Added ^\s*, re.MULTILINE, and made closing fence match more robust
-        # Use re.DOTALL to make '.' match newlines and re.MULTILINE to make '^' match start of lines
-        code_matches = list(re.finditer(code_pattern, response_text, re.DOTALL | re.MULTILINE))
-        if not code_matches:
-            # No code found, the entire response is thought
-            if self.debug:
-                logger.debug("  - No code block found. Entire response is thought.")
-            return None, response_text.strip()
-        extracted_code_parts = []
-        for match in code_matches:
-             # group(1) is the (.*?) part - the actual code content
-             code_content = match.group(1)
-             extracted_code_parts.append(code_content) # Keep original indentation for now
-        extracted_code = "\n\n".join(extracted_code_parts)
-        if self.debug:
-            logger.debug(f"  - Combined extracted code:\n```python\n{extracted_code}\n```")
-        # Extract thought text (text before the first code block, between blocks, and after the last)
-        thought_parts = []
-        last_end = 0
-        for match in code_matches:
-            # Use span(0) to get the start/end of the *entire* match (including fences and indentation)
-            start, end = match.span(0)
-            thought_parts.append(response_text[last_end:start])
-            last_end = end
-        thought_parts.append(response_text[last_end:]) # Text after the last block
-        thought_text = "".join(thought_parts).strip()
-        # Avoid overly long debug messages for thought
-        if self.debug:
-            thought_preview = (thought_text[:100] + '...') if len(thought_text) > 100 else thought_text
-            logger.debug(f"  - Extracted thought: {thought_preview}")
-        return extracted_code, thought_text
+        self.tools = tools_instance
+        self.tool_list = {}
+        for tool_name in persona.allowed_tools:
+            if tool_name in all_tools_list:
+                self.tool_list[tool_name] = all_tools_list[tool_name]
+        self.tool_descriptions = chat_utils.parse_tool_descriptions(self.tool_list)
+        self.system_prompt_content = persona.system_prompt.format(
+            tool_descriptions=self.tool_descriptions
+        )
+        self.system_prompt = ChatMessage(
+            role="system", content=self.system_prompt_content
+        )
+        self.required_context = persona.required_context
+        self.executor = SimpleCodeExecutor(
+            loop=asyncio.get_event_loop(),
+            locals={},
+            tools=self.tool_list,
+            globals={"__builtins__": __builtins__},
+        )
+        logger.info("✅ CodeActAgent initialized successfully.")
     @step
-    async def prepare_chat(self, ev: StartEvent, ctx: Context) -> InputEvent:
+    async def prepare_chat(self, ctx: Context, ev: StartEvent) -> TaskInputEvent:
         """Prepare chat history from user input."""
         logger.info("💬 Preparing chat for task execution...")
-        # Get or create memory
-        self.memory: ChatMemoryBuffer = await ctx.get(
-            "memory", default=ChatMemoryBuffer.from_defaults(llm=self.llm)
+        self.chat_memory: Memory = await ctx.get(
+            "chat_memory", default=Memory.from_defaults()
         )
         user_input = ev.get("input", default=None)
         assert user_input, "User input cannot be empty."
-        # Add user input to memory
-        if self.debug:
-            logger.debug("  - Adding goal to memory.")
+        if ev.remembered_info:
+            self.remembered_info = ev.remembered_info
+        logger.debug("  - Adding goal to memory.")
         goal = user_input
-        self.user_message = ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_CODE_ACT_USER_PROMPT).format(goal=goal))
-        self.no_thoughts_prompt = ChatMessage(role="user", content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal))
-        await self.memory.aput(self.user_message)
-        # Update context
-        await ctx.set("memory", self.memory)
-        input_messages = self.memory.get_all()
-        return InputEvent(input=input_messages)
+        self.user_message = ChatMessage(
+            role="user",
+            content=PromptTemplate(
+                self.user_prompt or DEFAULT_CODE_ACT_USER_PROMPT
+            ).format(goal=goal),
+        )
+        self.no_thoughts_prompt = ChatMessage(
+            role="user",
+            content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal),
+        )
+        await self.chat_memory.aput(self.user_message)
+        await ctx.set("chat_memory", self.chat_memory)
+        input_messages = self.chat_memory.get_all()
+        return TaskInputEvent(input=input_messages)
     @step
-    async def handle_llm_input(self, ev: InputEvent, ctx: Context) -> Union[ModelOutputEvent, FinalizeEvent]:
+    async def handle_llm_input(
+        self, ctx: Context, ev: TaskInputEvent
+    ) -> TaskThinkingEvent | TaskEndEvent:
         """Handle LLM input."""
-        # Get chat history from event
         chat_history = ev.input
         assert len(chat_history) > 0, "Chat history cannot be empty."
+        ctx.write_event_to_stream(ev)
+        if self.steps_counter >= self.max_steps:
+            ev = TaskEndEvent(
+                success=False,
+                reason=f"Reached max step count of {self.max_steps} steps",
+            )
+            ctx.write_event_to_stream(ev)
+            return ev
         self.steps_counter += 1
         logger.info(f"🧠 Step {self.steps_counter}: Thinking...")
+        model = self.llm.class_name()
-        # Get LLM response
-        response = await self._get_llm_response(chat_history)
-        # Add response to memory
-        await self.memory.aput(response.message)
-        if self.debug:
-            logger.debug("🤖 LLM response received.")
-        code, thoughts = self._extract_code_and_thought(response.message.content)
-        if self.debug:
-            logger.debug(f"  - Thoughts: {'Yes' if thoughts else 'No'}, Code: {'Yes' if code else 'No'}")
-        return ModelOutputEvent(thoughts=thoughts, code=code)
+        if "remember" in self.tool_list and self.remembered_info:
+            await ctx.set("remembered_info", self.remembered_info)
+            chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
+        for context in self.required_context:
+            if model == "DeepSeek":
+                logger.warning(
+                    "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
+                )
+            elif self.vision == True and context == "screenshot":
+                screenshot = (await self.tools.take_screenshot())[1]
+                ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
+                await ctx.set("screenshot", screenshot)
+                chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
+            if context == "ui_state":
+                try:
+                    state = await self.tools.get_state()
+                    await ctx.set("ui_state", state["a11y_tree"])
+                    chat_history = await chat_utils.add_ui_text_block(
+                        state["a11y_tree"], chat_history
+                    )
+                    chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
+                except Exception as e:
+                    logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
+            if context == "packages":
+                chat_history = await chat_utils.add_packages_block(
+                    await self.tools.list_packages(include_system_apps=True),
+                    chat_history,
+                )
+        response = await self._get_llm_response(ctx, chat_history)
+        if response is None:
+            return TaskEndEvent(
+                success=False, reason="LLM response is None. This is a critical error."
+            )
+        await self.chat_memory.aput(response.message)
+        code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
+        event = TaskThinkingEvent(thoughts=thoughts, code=code)
+        ctx.write_event_to_stream(event)
+        return event
     @step
-    async def handle_llm_output(self, ev: ModelOutputEvent, ctx: Context) -> Union[ExecutionEvent, FinalizeEvent]:
+    async def handle_llm_output(
+        self, ctx: Context, ev: TaskThinkingEvent
+    ) -> Union[TaskExecutionEvent, TaskInputEvent]:
         """Handle LLM output."""
-        if self.debug:
-            logger.debug("⚙️ Handling LLM output...")
-        # Get code and thoughts from event
+        logger.debug("⚙️ Handling LLM output...")
         code = ev.code
         thoughts = ev.thoughts
-        # Warning if no thoughts are provided
         if not thoughts:
-            logger.warning("🤔 LLM provided code without thoughts. Adding reminder prompt.")
-            await self.memory.aput(self.no_thoughts_prompt)
+            logger.warning(
+                "🤔 LLM provided code without thoughts. Adding reminder prompt."
+            )
+            await self.chat_memory.aput(self.no_thoughts_prompt)
         else:
-            # print thought but start with emoji at the start of the log
             logger.info(f"🤔 Reasoning: {thoughts}")
-        # If code is present, execute it
         if code:
-            return ExecutionEvent(code=code)
+            return TaskExecutionEvent(code=code)
         else:
-            message = ChatMessage(role="user", content="No code was provided. If you want to mark task as complete (whether it failed or succeeded), use complete(success:bool, reason:str) function within a code block ```pythn\n```.")
-            await self.memory.aput(message)
-            return InputEvent(input=self.memory.get_all())
+            message = ChatMessage(
+                role="user",
+                content="No code was provided. If you want to mark task as complete (whether it failed or succeeded), use complete(success:bool, reason:str) function within a code block ```pythn\n```.",
+            )
+            await self.chat_memory.aput(message)
+            return TaskInputEvent(input=self.chat_memory.get_all())
     @step
-    async def execute_code(self, ev: ExecutionEvent, ctx: Context) -> ExecutionResultEvent:
+    async def execute_code(
+        self, ctx: Context, ev: TaskExecutionEvent
+    ) -> Union[TaskExecutionResultEvent, TaskEndEvent]:
         """Execute the code and return the result."""
         code = ev.code
         assert code, "Code cannot be empty."
         logger.info(f"⚡ Executing action...")
-        if self.debug:
-            logger.debug(f"Code to execute:\n```python\n{code}\n```")
-        # Execute the code using the provided function
+        logger.debug(f"Code to execute:\n```python\n{code}\n```")
         try:
             self.code_exec_counter += 1
-            result = await self.code_execute_fn(code)
+            result = await self.executor.execute(ctx, code)
             logger.info(f"💡 Code execution successful. Result: {result}")
             if self.tools.finished == True:
                 logger.debug("  - Task completed.")
-                return FinalizeEvent(result={'success': self.tools.success, 'reason': self.tools.reason})
-            return ExecutionResultEvent(output=str(result)) # Ensure output is string
+                event = TaskEndEvent(
+                    success=self.tools.success, reason=self.tools.reason
+                )
+                ctx.write_event_to_stream(event)
+                return event
+            self.remembered_info = self.tools.memory
+            event = TaskExecutionResultEvent(output=str(result))
+            ctx.write_event_to_stream(event)
+            return event
         except Exception as e:
             logger.error(f"💥 Action failed: {e}")
             if self.debug:
                 logger.error("Exception details:", exc_info=True)
             error_message = f"Error during execution: {e}"
-            return ExecutionResultEvent(output=error_message) # Return error message as output
+            event = TaskExecutionResultEvent(output=error_message)
+            ctx.write_event_to_stream(event)
+            return event
     @step
-    async def handle_execution_result(self, ev: ExecutionResultEvent, ctx: Context) -> InputEvent:
+    async def handle_execution_result(
+        self, ctx: Context, ev: TaskExecutionResultEvent
+    ) -> TaskInputEvent:
         """Handle the execution result. Currently it just returns InputEvent."""
-        if self.debug:
-            logger.debug("📊 Handling execution result...")
+        logger.debug("📊 Handling execution result...")
         # Get the output from the event
         output = ev.output
         if output is None:
             output = "Code executed, but produced no output."
             logger.warning("  - Execution produced no output.")
         else:
-             if self.debug:
-                 logger.debug(f"  - Execution output: {output[:100]}..." if len(output) > 100 else f"  - Execution output: {output}")
+            logger.debug(
+                f"  - Execution output: {output[:100]}..."
+                if len(output) > 100
+                else f"  - Execution output: {output}"
+            )
         # Add the output to memory as an user message (observation)
-        observation_message = ChatMessage(role="user", content=f"Execution Result:\n```\n{output}\n```")
-        await self.memory.aput(observation_message)
-        if self.debug:
-            logger.debug("  - Added execution result to memory.")
-        return InputEvent(input=self.memory.get_all())
+        observation_message = ChatMessage(
+            role="user", content=f"Execution Result:\n```\n{output}\n```"
+        )
+        await self.chat_memory.aput(observation_message)
+        return TaskInputEvent(input=self.chat_memory.get_all())
     @step
-    async def finalize(self, ev: FinalizeEvent, ctx: Context) -> StopEvent:
+    async def finalize(self, ev: TaskEndEvent, ctx: Context) -> StopEvent:
         """Finalize the workflow."""
-        self.tools.finished = False # Reset finished flag
-        await ctx.set("memory", self.memory) # Ensure memory is set in context
+        self.tools.finished = False
+        await ctx.set("chat_memory", self.chat_memory)
-        # Include steps and code execution information in the result
-        result = ev.result or {}
-        result.update({
-            "codeact_steps": self.steps_counter,
-            "code_executions": self.code_exec_counter
-        })
+        # Add final state observation to episodic memory
+        await self._add_final_state_observation(ctx)
-        return StopEvent(result=result)
+        result = {}
+        result.update(
+            {
+                "success": ev.success,
+                "reason": ev.reason,
+                "codeact_steps": self.steps_counter,
+                "code_executions": self.code_exec_counter,
+            }
+        )
-    async def _get_llm_response(self, chat_history: List[ChatMessage]) -> ChatResponse:
-        """Get streaming response from LLM."""
-        if self.debug:
-            logger.debug(f"  - Sending {len(chat_history)} messages to LLM.")
-        # Combine system prompt with chat history
-        if self.vision:
-            chat_history = await add_screenshot_image_block(self.tools, chat_history)
-        elif self.tools.last_screenshot:
-            chat_history = await add_screenshot(chat_history, self.tools.last_screenshot)
-            self.tools.last_screenshot = None # Reset last screenshot after sending it
-        # always add ui
-        chat_history = await add_ui_text_block(self.tools, chat_history)
-        # Add remembered information if available
-        if hasattr(self.tools, 'memory') and self.tools.memory:
-            memory_block = "\n### Remembered Information:\n"
-            for idx, item in enumerate(self.tools.memory, 1):
-                memory_block += f"{idx}. {item}\n"
-            # Find the first user message and inject memory before it
-            for i, msg in enumerate(chat_history):
-                if msg.role == "user":
-                    if isinstance(msg.content, str):
-                        # For text-only messages
-                        updated_content = f"{memory_block}\n\n{msg.content}"
-                        chat_history[i] = ChatMessage(role="user", content=updated_content)
-                    elif isinstance(msg.content, list):
-                        # For multimodal content
-                        memory_text_block = TextBlock(text=memory_block)
-                        # Insert memory text block at beginning
-                        content_blocks = [memory_text_block] + msg.content
-                        chat_history[i] = ChatMessage(role="user", content=content_blocks)
-                    break
-        messages_to_send = [self.system_prompt] + chat_history
+        ctx.write_event_to_stream(
+            EpisodicMemoryEvent(episodic_memory=self.episodic_memory)
+        )
+        return StopEvent(result=result)
-        messages_to_send = [message_copy(msg) for msg in messages_to_send]
+    async def _get_llm_response(
+        self, ctx: Context, chat_history: List[ChatMessage]
+    ) -> ChatResponse | None:
+        logger.debug("🔍 Getting LLM response...")
+        messages_to_send = [self.system_prompt] + chat_history
+        messages_to_send = [chat_utils.message_copy(msg) for msg in messages_to_send]
         try:
-            response = await self.llm.achat(
-                messages=messages_to_send
+            response = await self.llm.achat(messages=messages_to_send)
+            logger.debug("🔍 Received LLM response.")
+            filtered_chat_history = []
+            for msg in chat_history:
+                filtered_msg = chat_utils.message_copy(msg)
+                if hasattr(filtered_msg, "blocks") and filtered_msg.blocks:
+                    filtered_msg.blocks = [
+                        block
+                        for block in filtered_msg.blocks
+                        if not isinstance(block, chat_utils.ImageBlock)
+                    ]
+                filtered_chat_history.append(filtered_msg)
+            # Convert chat history and response to JSON strings
+            chat_history_str = json.dumps(
+                [
+                    {"role": msg.role, "content": msg.content}
+                    for msg in filtered_chat_history
+                ]
+            )
+            response_str = json.dumps(
+                {"role": response.message.role, "content": response.message.content}
             )
-            assert hasattr(response, "message"), f"LLM response does not have a message attribute.\nResponse: {response}"
+            step = EpisodicMemoryStep(
+                chat_history=chat_history_str,
+                response=response_str,
+                timestamp=time.time(),
+                screenshot=(await ctx.get("screenshot", None))
+            )
+            self.episodic_memory.steps.append(step)
+            assert hasattr(
+                response, "message"
+            ), f"LLM response does not have a message attribute.\nResponse: {response}"
         except Exception as e:
-            if self.llm.class_name() == "Gemini_LLM" and "You exceeded your current quota" in str(e):
-                    s = str(e._details[2])
-                    match = re.search(r'seconds:\s*(\d+)', s)
-                    if match:
-                        seconds = int(match.group(1)) + 1
-                        logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
-                        time.sleep(seconds)
-                    else:
-                        logger.error(f"Rate limit error. Retrying in 5 seconds...")
-                        time.sleep(40)
-                    response = await self.llm.achat(
-                        messages=messages_to_send
-                    )
+            if (
+                self.llm.class_name() == "Gemini_LLM"
+                and "You exceeded your current quota" in str(e)
+            ):
+                s = str(e._details[2])
+                match = re.search(r"seconds:\s*(\d+)", s)
+                if match:
+                    seconds = int(match.group(1)) + 1
+                    logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
+                    time.sleep(seconds)
+                else:
+                    logger.error(f"Rate limit error. Retrying in 5 seconds...")
+                    time.sleep(40)
+                logger.debug("🔍 Retrying call to LLM...")
+                response = await self.llm.achat(messages=messages_to_send)
             else:
-                logger.error(f"Error getting LLM response: {e}")
-                return StopEvent(result={'finished': True, 'message': f"Error getting LLM response: {e}", 'steps': self.steps_counter, 'code_executions': self.code_exec_counter}) # Return final message and steps
-        if self.debug:
-            logger.debug("  - Received response from LLM.")
+                logger.error(f"Could not get an answer from LLM: {repr(e)}")
+                raise e
+        logger.debug("  - Received response from LLM.")
         return response
+    async def _add_final_state_observation(self, ctx: Context) -> None:
+        """Add the current UI state and screenshot as the final observation step."""
+        try:
+            # Get current screenshot and UI state
+            screenshot = None
+            ui_state = None
+            try:
+                _, screenshot_bytes = await self.tools.take_screenshot()
+                screenshot = screenshot_bytes
+            except Exception as e:
+                logger.warning(f"Failed to capture final screenshot: {e}")
+            try:
+                (a11y_tree, phone_state) = await self.tools.get_state()
+            except Exception as e:
+                logger.warning(f"Failed to capture final UI state: {e}")
+            # Create final observation chat history and response
+            final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
+            final_response = {
+                "role": "user",
+                "content": f"Final State Observation:\nUI State: {a11y_tree}\nScreenshot: {'Available' if screenshot else 'Not available'}"
+            }
+            # Create final episodic memory step
+            final_step = EpisodicMemoryStep(
+                chat_history=json.dumps(final_chat_history),
+                response=json.dumps(final_response),
+                timestamp=time.time(),
+                screenshot=screenshot
+            )
+            self.episodic_memory.steps.append(final_step)
+            logger.info("Added final state observation to episodic memory")
+        except Exception as e:
+            logger.error(f"Failed to add final state observation: {e}")

droidrun 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

droidrun 0.2.0py3-none-any.whl → 0.3.1py3-none-any.whl