PyPI - droidrun - Versions diffs - 0.3.10.dev5__py3-none-any.whl → 0.3.10.dev7__py3-none-any.whl - Mend

droidrun 0.3.10.dev5py3-none-any.whl → 0.3.10.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

droidrun/agent/codeact/codeact_agent.py +21 -29
droidrun/agent/context/task_manager.py +0 -1
droidrun/agent/droid/droid_agent.py +1 -3
droidrun/agent/droid/events.py +6 -3
droidrun/agent/executor/executor_agent.py +24 -38
droidrun/agent/executor/prompts.py +0 -108
droidrun/agent/manager/__init__.py +1 -1
droidrun/agent/manager/manager_agent.py +104 -87
droidrun/agent/utils/executer.py +11 -10
droidrun/agent/utils/llm_picker.py +63 -1
droidrun/agent/utils/tools.py +30 -1
droidrun/app_cards/app_card_provider.py +26 -0
droidrun/app_cards/providers/__init__.py +7 -0
droidrun/app_cards/providers/composite_provider.py +97 -0
droidrun/app_cards/providers/local_provider.py +115 -0
droidrun/app_cards/providers/server_provider.py +126 -0
droidrun/cli/logs.py +4 -4
droidrun/cli/main.py +244 -34
droidrun/config_manager/__init__.py +0 -2
droidrun/config_manager/config_manager.py +45 -102
droidrun/config_manager/path_resolver.py +1 -1
droidrun/config_manager/prompt_loader.py +48 -51
droidrun/macro/cli.py +0 -1
droidrun/portal.py +17 -0
droidrun/tools/adb.py +13 -34
{droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev7.dist-info}/METADATA +2 -9
{droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev7.dist-info}/RECORD +30 -26
droidrun/config_manager/app_card_loader.py +0 -148
{droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev7.dist-info}/WHEEL +0 -0
{droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev7.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.10.dev5.dist-info → droidrun-0.3.10.dev7.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/codeact/codeact_agent.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import logging
 import re
 import time
-from typing import List, Union, Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, List, Optional, Union
 from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.llms.llm import LLM
@@ -23,9 +23,8 @@ from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
 from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
 from droidrun.agent.usage import get_usage_from_response
 from droidrun.agent.utils import chat_utils
-from droidrun.agent.utils.executer import SimpleCodeExecutor, ExecuterState
 from droidrun.agent.utils.device_state_formatter import format_device_state
+from droidrun.agent.utils.executer import ExecuterState, SimpleCodeExecutor
 from droidrun.agent.utils.tools import (
     ATOMIC_ACTION_SIGNATURES,
     build_custom_tool_descriptions,
@@ -85,15 +84,8 @@ class CodeActAgent(Workflow):
         self.tool_list = {}
         for action_name, signature in merged_signatures.items():
             func = signature["function"]
-            if asyncio.iscoroutinefunction(func):
-                # Create async bound function with proper closure
-                def make_bound(f, ti):
-                    async def bound_func(*args, **kwargs):
-                        return await f(ti, *args, **kwargs)
-                    return bound_func
-                self.tool_list[action_name] = make_bound(func, tools_instance)
-            else:
-                self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
+            self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
         self.tool_list["remember"] = tools_instance.remember
         self.tool_list["complete"] = tools_instance.complete
@@ -113,13 +105,10 @@ class CodeActAgent(Workflow):
         )
         self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
-        self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
         self.executor = SimpleCodeExecutor(
             loop=asyncio.get_event_loop(),
             locals={},
             tools=self.tool_list,
-            tools_instance=tools_instance,
             globals={"__builtins__": __builtins__},
         )
@@ -293,27 +282,30 @@ Now, describe the next step you will take to address the original goal: {goal}""
         try:
             self.code_exec_counter += 1
             result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
-            logger.info(f"💡 Code execution successful. Result: {result['output']}")
+            logger.info(f"💡 Code execution successful. Result: {result}")
             await asyncio.sleep(self.agent_config.after_sleep_action)
-            screenshots = result['screenshots']
-            for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
-                ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
-            ui_states = result['ui_states']
-            for ui_state in ui_states[:-1]:
-                ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
+            # Check if complete() was called
             if self.tools.finished:
-                logger.debug("  - Task completed.")
-                event = TaskEndEvent(
-                    success=self.tools.success, reason=self.tools.reason
-                )
+                logger.info("✅ Task marked as complete via complete() function")
+                # Validate completion state
+                success = self.tools.success if self.tools.success is not None else False
+                reason = self.tools.reason if self.tools.reason else "Task completed without reason"
+                # Reset finished flag for next execution
+                self.tools.finished = False
+                logger.info(f"  - Success: {success}")
+                logger.info(f"  - Reason: {reason}")
+                event = TaskEndEvent(success=success, reason=reason)
                 ctx.write_event_to_stream(event)
                 return event
             self.remembered_info = self.tools.memory
-            event = TaskExecutionResultEvent(output=str(result['output']))
+            event = TaskExecutionResultEvent(output=str(result))
             ctx.write_event_to_stream(event)
             return event
@@ -496,7 +488,7 @@ Now, describe the next step you will take to address the original goal: {goal}""
             try:
                 state = self.tools.get_state()
                 a11y_tree = state.get("a11y_tree", "")
-                phone_state = state.get("phone_state", "")
+                phone_state = state.get("phone_state", "")  # noqa: F841
             except Exception as e:
                 raise Exception(f"Failed to capture final UI state: {e}") from e

droidrun/agent/context/task_manager.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import copy
-import os
 from dataclasses import dataclass
 from typing import Dict, List, Optional

droidrun/agent/droid/droid_agent.py CHANGED Viewed

@@ -32,7 +32,7 @@ from droidrun.agent.droid.events import (
 )
 from droidrun.agent.executor import ExecutorAgent
 from droidrun.agent.manager import ManagerAgent
-from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES
+from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, open_app
 from droidrun.agent.utils.trajectory import Trajectory
 from droidrun.config_manager.config_manager import (
     AgentConfig,
@@ -43,8 +43,6 @@ from droidrun.config_manager.config_manager import (
     ToolsConfig,
     TracingConfig,
 )
-from droidrun.agent.utils.tools import open_app
 from droidrun.telemetry import (
     DroidAgentFinalizeEvent,
     DroidAgentInitEvent,

droidrun/agent/droid/events.py CHANGED Viewed

@@ -10,10 +10,11 @@ For internal events with full debugging metadata, see:
 - codeact/events.py (Task*, EpisodicMemoryEvent)
 """
+import asyncio
 from typing import Dict, List
 from llama_index.core.workflow import Event
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 from droidrun.agent.context import Task
@@ -46,10 +47,12 @@ class DroidAgentState(BaseModel):
     """
     State model for DroidAgent workflow - shared across parent and child workflows.
     """
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     # Task context
     instruction: str = ""
+    # App Cards
+    app_card: str = ""
+    app_card_loading_task: asyncio.Task[str] | None = None
     # Formatted device state for prompts (complete text)
     formatted_device_state: str = ""

droidrun/agent/executor/executor_agent.py CHANGED Viewed

@@ -90,52 +90,38 @@ class ExecutorAgent(Workflow): # TODO: Fix a bug in bad prompt
         subgoal = ev.get("subgoal", "")
         logger.info(f"🧠 Executor thinking about action for: {subgoal}")
-        # Format app card (include tags in variable value or empty string)
-        app_card = ""  # TODO: Implement app card retrieval
-        app_card_text = ""
-        if app_card.strip():
-            app_card_text = "App card gives information on how to operate the app and perform actions.\n### App Card ###\n" + app_card.strip() + "\n\n"
-        # Format device state (use unified state)
-        device_state_text = ""
-        if self.shared_state.formatted_device_state and self.shared_state.formatted_device_state.strip():
-            device_state_text = "### Device State ###\n" + self.shared_state.formatted_device_state.strip() + "\n\n"
-        # Format progress status
-        progress_status_text = self.shared_state.progress_status + "\n\n" if self.shared_state.progress_status else "No progress yet.\n\n"
-        # Format atomic actions
-        atomic_actions_text = chr(10).join(
-            f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}"
-            for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items()
-        ) + "\n"
-        # Format action history
+        # Prepare action history as structured data (last 5 actions)
+        action_history = []
         if self.shared_state.action_history:
-            action_history_text = "Recent actions you took previously and whether they were successful:\n" + "\n".join(
-                (f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome
-                 else f"Action: {act} | Description: {summ} | Outcome: Failed | Feedback: {err_des}")
+            n = min(5, len(self.shared_state.action_history))
+            action_history = [
+                {
+                    "action": act,
+                    "summary": summ,
+                    "outcome": outcome,
+                    "error": err_des
+                }
                 for act, summ, outcome, err_des in zip(
-                    self.shared_state.action_history[-min(5, len(self.shared_state.action_history)):],
-                    self.shared_state.summary_history[-min(5, len(self.shared_state.action_history)):],
-                    self.shared_state.action_outcomes[-min(5, len(self.shared_state.action_history)):],
-                    self.shared_state.error_descriptions[-min(5, len(self.shared_state.action_history)):], strict=True)
-            ) + "\n\n"
-        else:
-            action_history_text = "No actions have been taken yet.\n\n"
-        # Load and format prompt
+                    self.shared_state.action_history[-n:],
+                    self.shared_state.summary_history[-n:],
+                    self.shared_state.action_outcomes[-n:],
+                    self.shared_state.error_descriptions[-n:],
+                    strict=True
+                )
+            ]
+        # Let Jinja2 handle all formatting
         system_prompt = PromptLoader.load_prompt(
             self.agent_config.get_executor_system_prompt_path(),
             {
                 "instruction": self.shared_state.instruction,
-                "app_card": app_card_text,
-                "device_state_text": device_state_text,
+                "app_card": "",  # TODO: Implement app card loader
+                "device_state": self.shared_state.formatted_device_state,
                 "plan": self.shared_state.plan,
                 "subgoal": subgoal,
-                "progress_status": progress_status_text,
-                "atomic_actions": atomic_actions_text,
-                "action_history": action_history_text
+                "progress_status": self.shared_state.progress_status,
+                "atomic_actions": ATOMIC_ACTION_SIGNATURES,
+                "action_history": action_history
             }
         )

droidrun/agent/executor/prompts.py CHANGED Viewed

@@ -2,114 +2,6 @@
 Prompts for the ExecutorAgent.
 """
-from droidrun.agent.droid.events import DroidAgentState
-from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES
-def build_executor_system_prompt(
-    state: "DroidAgentState",
-    subgoal: str,
-    app_card: str = "",
-) -> str:
-    """
-    Build the complete Executor system prompt with all context.
-    Args:
-        state: Current DroidAgentState with all context
-        subgoal: Current subgoal to execute
-        app_card: Optional app-specific instructions
-    Returns:
-        Complete system prompt for the Executor
-    """
-    prompt = f"""You are a LOW-LEVEL ACTION EXECUTOR for an Android phone. You do NOT answer questions or provide results. You ONLY perform individual atomic actions as specified in the current subgoal. You are part of a larger system - your job is to execute actions, not to think about or answer the user's original question.
-### User Request ###
-{state.instruction}
-{("App card gives information on how to operate the app and perform actions.\n" + "### App Card ###\n" + app_card.strip() + "\n\n") if app_card.strip() else ""}{(("### Device State ###\n" + state.device_state_text.strip() + "\n\n") if state.device_state_text.strip() else "")}### Overall Plan ###
-{state.plan}
-### Current Subgoal ###
-EXECUTE THIS SUBGOAL: {subgoal}
-EXECUTION MODE: You are a dumb robot. Find the exact text/element mentioned in the subgoal above and perform the specified action on it. Do not read anything below this line until after you execute the subgoal.
-### SUBGOAL PARSING MODE ###
-Read the current subgoal exactly as written. Look for:
-- Action words: "tap", "click", "swipe", "type", "press", "open" etc.
-- Target elements: specific text, buttons, fields, coordinates mentioned
-- Locations: "header", "bottom", "left", "right", specific coordinates
-Convert directly to atomic action:
-- "tap/click" → click action
-- "swipe" → swipe action
-- "type" → type action
-- "press [system button]" → system_button action
-- "open [app]" → open_app action
-Execute the atomic action for the exact target mentioned. Ignore everything else.
-### Progress Status ###
-{(state.progress_status + "\n\n") if state.progress_status != "" else "No progress yet.\n\n"}
-### Guidelines ###
-General:
-- For any pop-up window, such as a permission request, you need to close it (e.g., by clicking `Don't Allow` or `Accept & continue`) before proceeding. Never choose to add any account or log in.
-Action Related:
-- Use the `open_app` action whenever you want to open an app (nothing will happen if the app is not installed), do not use the app drawer to open an app.
-- Consider exploring the screen by using the `swipe` action with different directions to reveal additional content. Or use search to quickly find a specific entry, if applicable.
-- If you cannot change the page content by swiping in the same direction continuously, the page may have been swiped to the bottom. Please try another operation to display more content.
-- For some horizontally distributed tags, you can swipe horizontally to view more.
-Text Related Operations:
-- Activated input box: If an input box is activated, it may have a cursor inside it and the keyboard is visible. If there is no cursor on the screen but the keyboard is visible, it may be because the cursor is blinking. The color of the activated input box will be highlighted. If you are not sure whether the input box is activated, click it before typing.
-- To input some text: first click the input box that you want to input, make sure the correct input box is activated and the keyboard is visible, then use `type` action to enter the specified text.
-- To clear the text: long press the backspace button in the keyboard.
-- To copy some text: first long press the text you want to copy, then click the `copy` button in bar.
-- To paste text into a text box: first long press the text box, then click the `paste` button in bar.
----
-Execute the current subgoal mechanically. Do NOT examine the screen content or make decisions about what you see. Parse the current subgoal text to identify the required action and execute it exactly as written. You must choose your action from one of the atomic actions.
-#### Atomic Actions ####
-The atomic action functions are listed in the format of `action(arguments): description` as follows:
-{chr(10).join(f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}" for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items())}
-\n
-### Latest Action History ###
-{(("Recent actions you took previously and whether they were successful:\n" + "\n".join(
-    (f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome
-     else f"Action: {act} | Description: {summ} | Outcome: Failed | Feedback: {err_des}")
-    for act, summ, outcome, err_des in zip(
-        state.action_history[-min(5, len(state.action_history)):],
-        state.summary_history[-min(5, len(state.action_history)):],
-        state.action_outcomes[-min(5, len(state.action_history)):],
-        state.error_descriptions[-min(5, len(state.action_history)):], strict=True)
-) + "\n\n")) if state.action_history else "No actions have been taken yet.\n\n"}
----
-### LITERAL EXECUTION RULE ###
-Whatever the current subgoal says to do, do that EXACTLY. Do not substitute with what you think is better. Do not optimize. Do not consider screen state. Parse the subgoal text literally and execute the matching atomic action.
-IMPORTANT:
-1. Do NOT repeat previously failed actions multiple times. Try changing to another action.
-2. Must do the current subgoal.
-Provide your output in the following format, which contains three parts:
-### Thought ###
-Break down the current subgoal into: (1) What atomic action is required? (2) What target/location is specified? (3) What parameters do I need? Do NOT reason about whether this makes sense - just mechanically convert the subgoal text into the appropriate action format.
-### Action ###
-Choose only one action or shortcut from the options provided.
-You must provide your decision using a valid JSON format specifying the `action` and the arguments of the action. For example, if you want to open an App, you should write {{ "action":"open_app", "text": "app name" }}.
-### Description ###
-A brief description of the chosen action. Do not describe expected outcome.
-"""
-    return prompt
 def parse_executor_response(response: str) -> dict:
     """
     Parse the Executor LLM response.

droidrun/agent/manager/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ Manager Agent - Planning and reasoning workflow.
 """
 from droidrun.agent.droid.events import ManagerInputEvent, ManagerPlanEvent
-from droidrun.agent.manager.events import ManagerThinkingEvent, ManagerInternalPlanEvent
+from droidrun.agent.manager.events import ManagerInternalPlanEvent, ManagerThinkingEvent
 from droidrun.agent.manager.manager_agent import ManagerAgent
 from droidrun.agent.manager.prompts import parse_manager_response

droidrun/agent/manager/manager_agent.py CHANGED Viewed

@@ -10,6 +10,7 @@ This agent is responsible for:
 from __future__ import annotations
+import asyncio
 import logging
 from typing import TYPE_CHECKING
@@ -23,13 +24,18 @@ from droidrun.agent.utils.chat_utils import remove_empty_messages
 from droidrun.agent.utils.device_state_formatter import format_device_state
 from droidrun.agent.utils.inference import acall_with_retries
 from droidrun.agent.utils.tools import build_custom_tool_descriptions
+from droidrun.app_cards.app_card_provider import AppCardProvider
+from droidrun.app_cards.providers import (
+    CompositeAppCardProvider,
+    LocalAppCardProvider,
+    ServerAppCardProvider,
+)
 from droidrun.config_manager.prompt_loader import PromptLoader
-from droidrun.config_manager.app_card_loader import AppCardLoader
 if TYPE_CHECKING:
     from droidrun.agent.droid.events import DroidAgentState
-    from droidrun.tools import Tools
     from droidrun.config_manager.config_manager import AgentConfig
+    from droidrun.tools import Tools
 logger = logging.getLogger("droidrun")
@@ -63,33 +69,75 @@ class ManagerAgent(Workflow):
         self.shared_state = shared_state
         self.custom_tools = custom_tools or {}
         self.agent_config = agent_config
-        self.app_card_loader = self.agent_config.app_cards
+        self.app_card_config = self.agent_config.app_cards
+        # Initialize app card provider based on mode
+        self.app_card_provider: AppCardProvider = self._initialize_app_card_provider()
         logger.info("✅ ManagerAgent initialized successfully.")
+    def _initialize_app_card_provider(self) -> AppCardProvider:
+        """Initialize app card provider based on configuration mode."""
+        if not self.app_card_config.enabled:
+            # Return a dummy provider that always returns empty string
+            class DisabledProvider(AppCardProvider):
+                async def load_app_card(self, package_name: str, instruction: str = "") -> str:
+                    return ""
+            return DisabledProvider()
+        mode = self.app_card_config.mode.lower()
+        if mode == "local":
+            logger.info(f"Initializing local app card provider (dir: {self.app_card_config.app_cards_dir})")
+            return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
+        elif mode == "server":
+            if not self.app_card_config.server_url:
+                logger.warning("Server mode enabled but no server_url configured, falling back to local")
+                return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
+            logger.info(f"Initializing server app card provider (url: {self.app_card_config.server_url})")
+            return ServerAppCardProvider(
+                server_url=self.app_card_config.server_url,
+                timeout=self.app_card_config.server_timeout,
+                max_retries=self.app_card_config.server_max_retries,
+            )
+        elif mode == "composite":
+            if not self.app_card_config.server_url:
+                logger.warning("Composite mode enabled but no server_url configured, falling back to local")
+                return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
+            logger.info(
+                f"Initializing composite app card provider "
+                f"(server: {self.app_card_config.server_url}, local: {self.app_card_config.app_cards_dir})"
+            )
+            return CompositeAppCardProvider(
+                server_url=self.app_card_config.server_url,
+                app_cards_dir=self.app_card_config.app_cards_dir,
+                server_timeout=self.app_card_config.server_timeout,
+                server_max_retries=self.app_card_config.server_max_retries,
+            )
+        else:
+            logger.warning(f"Unknown app_card mode '{mode}', falling back to local")
+            return LocalAppCardProvider(app_cards_dir=self.app_card_config.app_cards_dir)
     # ========================================================================
     # Helper Methods
     # ========================================================================
     def _build_system_prompt(
         self,
-        has_text_to_modify: bool,
-        app_card: str = ""
+        has_text_to_modify: bool
     ) -> str:
-        """
-        Build system prompt with all context.
+        """Build system prompt with all context."""
-        Args:
-            has_text_to_modify: Whether text manipulation mode is enabled
-            app_card: App card content
-        Returns:
-            Complete system prompt
-        """
-        # Format error history
-        error_history_text = ""
+        # Prepare error history as structured data (if needed)
+        error_history = None
         if self.shared_state.error_flag_plan:
             k = self.shared_state.err_to_manager_thresh
-            errors = [
+            error_history = [
                 {
                     "action": act,
                     "summary": summ,
@@ -98,78 +146,22 @@ class ManagerAgent(Workflow):
                 for act, summ, err_des in zip(
                     self.shared_state.action_history[-k:],
                     self.shared_state.summary_history[-k:],
-                    self.shared_state.error_descriptions[-k:], strict=True
+                    self.shared_state.error_descriptions[-k:],
+                    strict=True
                 )
             ]
-            error_history_text = (
-                "<potentially_stuck>\n"
-                "You have encountered several failed attempts. Here are some logs:\n"
-            )
-            for error in errors:
-                error_history_text += (
-                    f"- Attempt: Action: {error['action']} | "
-                    f"Description: {error['summary']} | "
-                    f"Outcome: Failed | "
-                    f"Feedback: {error['error']}\n"
-                )
-            error_history_text += "</potentially_stuck>\n\n"
-        # Text manipulation section
-        text_manipulation_section = ""
-        if has_text_to_modify:
-            text_manipulation_section = """
-<text_manipulation>
-1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
-2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
-3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
-4. The focused text field contains editable text that you can modify
-5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
-6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
-</text_manipulation>"""
-        # Device date (include tags in variable value or empty string)
-        device_date = self.tools_instance.get_date()
-        device_date_text = ""
-        if device_date.strip():
-            device_date_text = f"<device_date>\n{device_date}\n</device_date>\n\n"
-        # App card (include tags in variable value or empty string)
-        app_card = app_card
-        app_card_text = ""
-        if app_card.strip():
-            app_card_text = "App card gives information on how to operate the app and perform actions.\n<app_card>\n" + app_card.strip() + "\n</app_card>\n\n"
-        # Important notes (include tags in variable value or empty string)
-        important_notes = ""  # TODO: implement
-        important_notes_text = ""
-        if important_notes.strip():
-            important_notes_text = "<important_notes>\n" + important_notes + "\n</important_notes>\n\n"
-        # Custom tools
-        custom_tools_desc = build_custom_tool_descriptions(self.custom_tools)
-        custom_tools_text = ""
-        if custom_tools_desc.strip():
-            custom_tools_text = """
-<custom_actions>
-The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
-""" + custom_tools_desc + """
-You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
-</custom_actions>"""
-        # Load and format prompt
+        # Let Jinja2 handle all formatting and conditionals
         return PromptLoader.load_prompt(
             self.agent_config.get_manager_system_prompt_path(),
             {
                 "instruction": self.shared_state.instruction,
-                "device_date": device_date_text,
-                "app_card": app_card_text,
-                "important_notes": important_notes_text,
-                "error_history": error_history_text,
-                "text_manipulation_section": text_manipulation_section,
-                "custom_tools_descriptions": custom_tools_text
+                "device_date": self.tools_instance.get_date(),
+                "app_card": self.shared_state.app_card,
+                "important_notes": "",  # TODO: implement
+                "error_history": error_history,
+                "text_manipulation_enabled": has_text_to_modify,
+                "custom_tools_descriptions": build_custom_tool_descriptions(self.custom_tools)
             }
         )
@@ -339,7 +331,17 @@ You can reference these custom actions or tell the Executer agent to use them in
         self.shared_state.current_package_name = phone_state.get('packageName', 'Unknown')
         self.shared_state.current_app_name = phone_state.get('currentApp', 'Unknown')
-        # App cards
+        # ====================================================================
+        # Step 1.5: Start loading app card in background
+        # ====================================================================
+        if self.app_card_config.enabled:
+            loading_task = asyncio.create_task(
+                self.app_card_provider.load_app_card(
+                    package_name=self.shared_state.current_package_name,
+                    instruction=self.shared_state.instruction
+                )
+            )
+            self.shared_state.app_card_loading_task = loading_task
         # ====================================================================
         # Step 2: Capture screenshot if vision enabled
@@ -417,15 +419,30 @@ You can reference these custom actions or tell the Executer agent to use them in
         has_text_to_modify = self.shared_state.has_text_to_modify
         screenshot = self.shared_state.screenshot
-        if self.app_card_loader.enabled:
-            app_card = AppCardLoader.load_app_card(self.shared_state.current_package_name, self.app_card_loader.app_cards_dir)
+        # ====================================================================
+        # Try to get app card from previous iteration's loading task
+        # ====================================================================
+        if self.app_card_config.enabled and self.shared_state.app_card_loading_task:
+            try:
+                # Wait briefly for the background task to complete (0.1s timeout)
+                self.shared_state.app_card = await asyncio.wait_for(
+                    self.shared_state.app_card_loading_task,
+                    timeout=0.1
+                )
+            except asyncio.TimeoutError:
+                # Task not ready yet, use empty string
+                self.shared_state.app_card = ""
+            except Exception as e:
+                logger.warning(f"Error getting app card: {e}")
+                self.shared_state.app_card = ""
         else:
-            app_card = ""
+            self.shared_state.app_card = ""
         # ====================================================================
         # Step 1: Build system prompt
         # ====================================================================
-        system_prompt = self._build_system_prompt(has_text_to_modify, app_card)
+        system_prompt = self._build_system_prompt(has_text_to_modify)
         # ====================================================================
         # Step 2: Build messages with context

droidrun 0.3.10.dev5__py3-none-any.whl → 0.3.10.dev7__py3-none-any.whl

droidrun 0.3.10.dev5py3-none-any.whl → 0.3.10.dev7py3-none-any.whl