PyPI - droidrun - Versions diffs - 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev5__py3-none-any.whl - Mend

droidrun 0.3.10.dev3py3-none-any.whl → 0.3.10.dev5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

droidrun/agent/codeact/__init__.py +1 -4
droidrun/agent/codeact/codeact_agent.py +95 -86
droidrun/agent/codeact/events.py +1 -2
droidrun/agent/context/__init__.py +5 -9
droidrun/agent/context/episodic_memory.py +1 -3
droidrun/agent/context/task_manager.py +8 -2
droidrun/agent/droid/droid_agent.py +102 -141
droidrun/agent/droid/events.py +45 -14
droidrun/agent/executor/__init__.py +6 -4
droidrun/agent/executor/events.py +29 -9
droidrun/agent/executor/executor_agent.py +86 -28
droidrun/agent/executor/prompts.py +8 -2
droidrun/agent/manager/__init__.py +6 -7
droidrun/agent/manager/events.py +16 -4
droidrun/agent/manager/manager_agent.py +130 -69
droidrun/agent/manager/prompts.py +1 -159
droidrun/agent/utils/chat_utils.py +64 -2
droidrun/agent/utils/device_state_formatter.py +54 -26
droidrun/agent/utils/executer.py +66 -80
droidrun/agent/utils/inference.py +11 -10
droidrun/agent/utils/tools.py +58 -6
droidrun/agent/utils/trajectory.py +18 -12
droidrun/cli/logs.py +118 -56
droidrun/cli/main.py +154 -136
droidrun/config_manager/__init__.py +9 -7
droidrun/config_manager/app_card_loader.py +148 -0
droidrun/config_manager/config_manager.py +200 -102
droidrun/config_manager/path_resolver.py +104 -0
droidrun/config_manager/prompt_loader.py +75 -0
droidrun/macro/__init__.py +1 -1
droidrun/macro/cli.py +23 -18
droidrun/telemetry/__init__.py +2 -2
droidrun/telemetry/events.py +3 -3
droidrun/telemetry/tracker.py +1 -1
droidrun/tools/adb.py +1 -1
droidrun/tools/ios.py +3 -2
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/METADATA +10 -4
droidrun-0.3.10.dev5.dist-info/RECORD +61 -0
droidrun/agent/codeact/prompts.py +0 -26
droidrun/agent/context/agent_persona.py +0 -16
droidrun/agent/context/context_injection_manager.py +0 -66
droidrun/agent/context/personas/__init__.py +0 -11
droidrun/agent/context/personas/app_starter.py +0 -44
droidrun/agent/context/personas/big_agent.py +0 -96
droidrun/agent/context/personas/default.py +0 -95
droidrun/agent/context/personas/ui_expert.py +0 -108
droidrun/agent/planner/__init__.py +0 -13
droidrun/agent/planner/events.py +0 -21
droidrun/agent/planner/planner_agent.py +0 -311
droidrun/agent/planner/prompts.py +0 -124
droidrun-0.3.10.dev3.dist-info/RECORD +0 -70
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/WHEEL +0 -0
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.10.dev3.dist-info → droidrun-0.3.10.dev5.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/executor/executor_agent.py CHANGED Viewed

@@ -9,6 +9,7 @@ This agent is responsible for:
 from __future__ import annotations
+import asyncio
 import json
 import logging
 from typing import TYPE_CHECKING
@@ -17,12 +18,20 @@ from llama_index.core.llms import ChatMessage, ImageBlock, TextBlock
 from llama_index.core.llms.llm import LLM
 from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
-from droidrun.agent.executor.events import ExecutorActionEvent, ExecutorResultEvent
-from droidrun.agent.executor.prompts import build_executor_system_prompt, parse_executor_response
-from droidrun.agent.utils.tools import click, long_press, open_app, swipe, system_button, type
+from droidrun.agent.executor.events import ExecutorInternalActionEvent, ExecutorInternalResultEvent
+from droidrun.agent.executor.prompts import parse_executor_response
 from droidrun.agent.utils.inference import acall_with_retries
-from droidrun.config_manager import config
-import asyncio
+from droidrun.agent.utils.tools import (
+    ATOMIC_ACTION_SIGNATURES,
+    click,
+    long_press,
+    open_app,
+    swipe,
+    system_button,
+    type,
+)
+from droidrun.config_manager.config_manager import AgentConfig
+from droidrun.config_manager.prompt_loader import PromptLoader
 if TYPE_CHECKING:
     from droidrun.agent.droid.events import DroidAgentState
@@ -30,7 +39,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger("droidrun")
-class ExecutorAgent(Workflow):
+class ExecutorAgent(Workflow): # TODO: Fix a bug in bad prompt
     """
     Action execution agent that performs specific actions.
@@ -45,22 +54,20 @@ class ExecutorAgent(Workflow):
     def __init__(
         self,
         llm: LLM,
-        vision: bool,
         tools_instance,
         shared_state: "DroidAgentState",
-        persona=None,
+        agent_config: AgentConfig,
         custom_tools: dict = None,
-        debug: bool = False,
         **kwargs
     ):
         super().__init__(**kwargs)
         self.llm = llm
-        self.vision = vision
+        self.agent_config = agent_config
+        self.config = agent_config.executor
+        self.vision = agent_config.executor.vision
         self.tools_instance = tools_instance
         self.shared_state = shared_state
-        self.persona = persona
         self.custom_tools = custom_tools or {}
-        self.debug = debug
         logger.info("✅ ExecutorAgent initialized successfully.")
@@ -70,7 +77,7 @@ class ExecutorAgent(Workflow):
         self,
         ctx: Context,
         ev: StartEvent
-    ) -> ExecutorActionEvent:
+    ) -> ExecutorInternalActionEvent:
         """
         Executor decides which action to take.
@@ -83,12 +90,53 @@ class ExecutorAgent(Workflow):
         subgoal = ev.get("subgoal", "")
         logger.info(f"🧠 Executor thinking about action for: {subgoal}")
+        # Format app card (include tags in variable value or empty string)
         app_card = ""  # TODO: Implement app card retrieval
-        system_prompt = build_executor_system_prompt(
-            state=self.shared_state,
-            subgoal=subgoal,
-            app_card=app_card
+        app_card_text = ""
+        if app_card.strip():
+            app_card_text = "App card gives information on how to operate the app and perform actions.\n### App Card ###\n" + app_card.strip() + "\n\n"
+        # Format device state (use unified state)
+        device_state_text = ""
+        if self.shared_state.formatted_device_state and self.shared_state.formatted_device_state.strip():
+            device_state_text = "### Device State ###\n" + self.shared_state.formatted_device_state.strip() + "\n\n"
+        # Format progress status
+        progress_status_text = self.shared_state.progress_status + "\n\n" if self.shared_state.progress_status else "No progress yet.\n\n"
+        # Format atomic actions
+        atomic_actions_text = chr(10).join(
+            f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}"
+            for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items()
+        ) + "\n"
+        # Format action history
+        if self.shared_state.action_history:
+            action_history_text = "Recent actions you took previously and whether they were successful:\n" + "\n".join(
+                (f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome
+                 else f"Action: {act} | Description: {summ} | Outcome: Failed | Feedback: {err_des}")
+                for act, summ, outcome, err_des in zip(
+                    self.shared_state.action_history[-min(5, len(self.shared_state.action_history)):],
+                    self.shared_state.summary_history[-min(5, len(self.shared_state.action_history)):],
+                    self.shared_state.action_outcomes[-min(5, len(self.shared_state.action_history)):],
+                    self.shared_state.error_descriptions[-min(5, len(self.shared_state.action_history)):], strict=True)
+            ) + "\n\n"
+        else:
+            action_history_text = "No actions have been taken yet.\n\n"
+        # Load and format prompt
+        system_prompt = PromptLoader.load_prompt(
+            self.agent_config.get_executor_system_prompt_path(),
+            {
+                "instruction": self.shared_state.instruction,
+                "app_card": app_card_text,
+                "device_state_text": device_state_text,
+                "plan": self.shared_state.plan,
+                "subgoal": subgoal,
+                "progress_status": progress_status_text,
+                "atomic_actions": atomic_actions_text,
+                "action_history": action_history_text
+            }
         )
         blocks = [TextBlock(text=system_prompt)]
@@ -112,7 +160,7 @@ class ExecutorAgent(Workflow):
             parsed = parse_executor_response(response_text)
         except Exception as e:
             logger.error(f"❌ Failed to parse executor response: {e}")
-            return ExecutorActionEvent(
+            return ExecutorInternalActionEvent(
                 action_json=json.dumps({"action": "invalid"}),
                 thought=f"Failed to parse response: {str(e)}",
                 description="Invalid response format from LLM"
@@ -122,18 +170,23 @@ class ExecutorAgent(Workflow):
         logger.info(f"🎯 Action: {parsed['action']}")
         logger.debug(f"  - Description: {parsed['description']}")
-        return ExecutorActionEvent(
+        event = ExecutorInternalActionEvent(
             action_json=parsed["action"],
             thought=parsed["thought"],
             description=parsed["description"]
         )
+        # Write event to stream for web interface
+        ctx.write_event_to_stream(event)
+        return event
     @step
     async def execute(
         self,
         ctx: Context,
-        ev: ExecutorActionEvent
-    ) -> ExecutorResultEvent:
+        ev: ExecutorInternalActionEvent
+    ) -> ExecutorInternalResultEvent:
         """
         Execute the selected action using the tools instance.
@@ -146,7 +199,7 @@ class ExecutorAgent(Workflow):
             action_dict = json.loads(ev.action_json)
         except json.JSONDecodeError as e:
             logger.error(f"❌ Failed to parse action JSON: {e}")
-            return ExecutorResultEvent(
+            return ExecutorInternalResultEvent(
                 action={"action": "invalid"},
                 outcome=False,
                 error=f"Invalid action JSON: {str(e)}",
@@ -155,15 +208,15 @@ class ExecutorAgent(Workflow):
                 action_json=ev.action_json
             )
-        # Execute the action
         outcome, error, summary = await self._execute_action(action_dict, ev.description)
-        if outcome:
-            await asyncio.sleep(config.agent.after_sleep_action)
+        # TODO: Add sleep after action (should be in DroidAgent.handle_executor_result)
+        # Available via: self.agent_config.after_sleep_action
+        # await asyncio.sleep(self.agent_config.after_sleep_action)
         logger.info(f"{'✅' if outcome else '❌'} Execution complete: {summary}")
-        return ExecutorResultEvent(
+        result_event = ExecutorInternalResultEvent(
             action=action_dict,
             outcome=outcome,
             error=error,
@@ -172,6 +225,11 @@ class ExecutorAgent(Workflow):
             action_json=ev.action_json
         )
+        # Write event to stream for web interface
+        ctx.write_event_to_stream(result_event)
+        return result_event
     async def _execute_action(self, action_dict: dict, description: str) -> tuple[bool, str, str]:
         """
         Execute a single action based on the action dictionary.
@@ -312,7 +370,7 @@ class ExecutorAgent(Workflow):
     async def finalize(
         self,
         ctx: Context,
-        ev: ExecutorResultEvent
+        ev: ExecutorInternalResultEvent
     ) -> StopEvent:
         """Return executor results to parent workflow."""
         logger.debug("✅ Executor execution complete")

droidrun/agent/executor/prompts.py CHANGED Viewed

@@ -76,7 +76,7 @@ The atomic action functions are listed in the format of `action(arguments): desc
 \n
 ### Latest Action History ###
 {(("Recent actions you took previously and whether they were successful:\n" + "\n".join(
-    (f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome == "A"
+    (f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome
      else f"Action: {act} | Description: {summ} | Outcome: Failed | Feedback: {err_des}")
     for act, summ, outcome, err_des in zip(
         state.action_history[-min(5, len(state.action_history)):],
@@ -126,7 +126,13 @@ def parse_executor_response(response: str) -> dict:
         Dictionary with 'thought', 'action', 'description' keys
     """
     thought = response.split("### Thought")[-1].split("### Action")[0].replace("\n", " ").replace("  ", " ").replace("###", "").strip()
-    action = response.split("### Action")[-1].split("### Description")[0].replace("\n", " ").replace("  ", " ").replace("###", "").strip()
+    action_raw = response.split("### Action")[-1].split("### Description")[0].replace("\n", " ").replace("  ", " ").replace("###", "").strip()
+    start_idx = action_raw.find('{')
+    end_idx = action_raw.rfind('}')
+    if start_idx != -1 and end_idx != -1:
+        action = action_raw[start_idx:end_idx + 1]
+    else:
+        action = action_raw
     description = response.split("### Description")[-1].replace("\n", " ").replace("  ", " ").replace("###", "").strip()
     return {

droidrun/agent/manager/__init__.py CHANGED Viewed

@@ -2,17 +2,16 @@
 Manager Agent - Planning and reasoning workflow.
 """
-from droidrun.agent.manager.events import ManagerPlanEvent, ManagerThinkingEvent
+from droidrun.agent.droid.events import ManagerInputEvent, ManagerPlanEvent
+from droidrun.agent.manager.events import ManagerThinkingEvent, ManagerInternalPlanEvent
 from droidrun.agent.manager.manager_agent import ManagerAgent
-from droidrun.agent.manager.prompts import (
-    build_manager_system_prompt,
-    parse_manager_response,
-)
+from droidrun.agent.manager.prompts import parse_manager_response
 __all__ = [
     "ManagerAgent",
-    "ManagerThinkingEvent",
+    "ManagerInputEvent",
     "ManagerPlanEvent",
-    "build_manager_system_prompt",
+    "ManagerThinkingEvent",
+    "ManagerInternalPlanEvent",
     "parse_manager_response",
 ]

droidrun/agent/manager/events.py CHANGED Viewed

@@ -1,5 +1,11 @@
 """
 Events for the ManagerAgent workflow.
+These are INTERNAL events used within ManagerAgent for:
+- Streaming to frontend/logging
+- Carrying full debug metadata
+For workflow coordination with DroidAgent, see droid/events.py
 """
 from llama_index.core.workflow.events import Event
@@ -10,11 +16,17 @@ class ManagerThinkingEvent(Event):
     pass
-class ManagerPlanEvent(Event):
-    """Manager has created a plan"""
+class ManagerInternalPlanEvent(Event):
+    """
+    Internal Manager planning event with full state and metadata.
+    This event is streamed to frontend/logging but NOT used for
+    workflow coordination between ManagerAgent and DroidAgent.
+    For workflow coordination, see ManagerPlanEvent in droid/events.py
+    """
     plan: str
     current_subgoal: str
-    completed_plan: str
     thought: str
     manager_answer: str = ""
-    memory_update: str = ""
+    memory_update: str = ""  # Debugging metadata: LLM's memory additions

droidrun/agent/manager/manager_agent.py CHANGED Viewed

@@ -11,22 +11,26 @@ This agent is responsible for:
 from __future__ import annotations
 import logging
-from typing import TYPE_CHECKING, List
+from typing import TYPE_CHECKING
 from llama_index.core.llms.llm import LLM
 from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
-from droidrun.agent.manager.events import ManagerPlanEvent, ManagerThinkingEvent
-from droidrun.agent.manager.prompts import build_manager_system_prompt, parse_manager_response
+from droidrun.agent.manager.events import ManagerInternalPlanEvent, ManagerThinkingEvent
+from droidrun.agent.manager.prompts import parse_manager_response
 from droidrun.agent.utils import convert_messages_to_chatmessages
 from droidrun.agent.utils.chat_utils import remove_empty_messages
-from droidrun.agent.utils.device_state_formatter import get_device_state_exact_format
+from droidrun.agent.utils.device_state_formatter import format_device_state
 from droidrun.agent.utils.inference import acall_with_retries
 from droidrun.agent.utils.tools import build_custom_tool_descriptions
+from droidrun.config_manager.prompt_loader import PromptLoader
+from droidrun.config_manager.app_card_loader import AppCardLoader
 if TYPE_CHECKING:
     from droidrun.agent.droid.events import DroidAgentState
     from droidrun.tools import Tools
+    from droidrun.config_manager.config_manager import AgentConfig
 logger = logging.getLogger("droidrun")
@@ -45,22 +49,21 @@ class ManagerAgent(Workflow):
     def __init__(
         self,
         llm: LLM,
-        vision: bool,
-        personas: List,
         tools_instance: "Tools",
         shared_state: "DroidAgentState",
+        agent_config: "AgentConfig",
         custom_tools: dict = None,
-        debug: bool = False,
         **kwargs
     ):
         super().__init__(**kwargs)
         self.llm = llm
-        self.vision = vision
-        self.personas = personas
+        self.config = agent_config.manager
+        self.vision = self.config.vision
         self.tools_instance = tools_instance
         self.shared_state = shared_state
         self.custom_tools = custom_tools or {}
-        self.debug = debug
+        self.agent_config = agent_config
+        self.app_card_loader = self.agent_config.app_cards
         logger.info("✅ ManagerAgent initialized successfully.")
@@ -70,23 +73,23 @@ class ManagerAgent(Workflow):
     def _build_system_prompt(
         self,
-        has_text_to_modify: bool
+        has_text_to_modify: bool,
+        app_card: str = ""
     ) -> str:
         """
         Build system prompt with all context.
         Args:
             has_text_to_modify: Whether text manipulation mode is enabled
+            app_card: App card content
         Returns:
             Complete system prompt
         """
-        # Get error history if error_flag_plan is set
-        error_history = []
+        # Format error history
+        error_history_text = ""
         if self.shared_state.error_flag_plan:
             k = self.shared_state.err_to_manager_thresh
-            error_history = [
+            errors = [
                 {
                     "action": act,
                     "summary": summ,
@@ -98,19 +101,76 @@ class ManagerAgent(Workflow):
                     self.shared_state.error_descriptions[-k:], strict=True
                 )
             ]
-        # Build custom tools descriptions
-        custom_tools_descriptions = build_custom_tool_descriptions(self.custom_tools)
-        return build_manager_system_prompt(
-            instruction=self.shared_state.instruction,
-            has_text_to_modify=has_text_to_modify,
-            app_card="",  # TODO: implement app card retrieval system
-            device_date=self.tools_instance.get_date(),
-            important_notes="",  # TODO: expose important_notes in DroidAgentState if needed
-            error_flag=self.shared_state.error_flag_plan,
-            error_history=error_history,
-            custom_tools_descriptions=custom_tools_descriptions
+            error_history_text = (
+                "<potentially_stuck>\n"
+                "You have encountered several failed attempts. Here are some logs:\n"
+            )
+            for error in errors:
+                error_history_text += (
+                    f"- Attempt: Action: {error['action']} | "
+                    f"Description: {error['summary']} | "
+                    f"Outcome: Failed | "
+                    f"Feedback: {error['error']}\n"
+                )
+            error_history_text += "</potentially_stuck>\n\n"
+        # Text manipulation section
+        text_manipulation_section = ""
+        if has_text_to_modify:
+            text_manipulation_section = """
+<text_manipulation>
+1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
+2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
+3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
+4. The focused text field contains editable text that you can modify
+5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
+6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
+</text_manipulation>"""
+        # Device date (include tags in variable value or empty string)
+        device_date = self.tools_instance.get_date()
+        device_date_text = ""
+        if device_date.strip():
+            device_date_text = f"<device_date>\n{device_date}\n</device_date>\n\n"
+        # App card (include tags in variable value or empty string)
+        app_card = app_card
+        app_card_text = ""
+        if app_card.strip():
+            app_card_text = "App card gives information on how to operate the app and perform actions.\n<app_card>\n" + app_card.strip() + "\n</app_card>\n\n"
+        # Important notes (include tags in variable value or empty string)
+        important_notes = ""  # TODO: implement
+        important_notes_text = ""
+        if important_notes.strip():
+            important_notes_text = "<important_notes>\n" + important_notes + "\n</important_notes>\n\n"
+        # Custom tools
+        custom_tools_desc = build_custom_tool_descriptions(self.custom_tools)
+        custom_tools_text = ""
+        if custom_tools_desc.strip():
+            custom_tools_text = """
+<custom_actions>
+The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
+""" + custom_tools_desc + """
+You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
+</custom_actions>"""
+        # Load and format prompt
+        return PromptLoader.load_prompt(
+            self.agent_config.get_manager_system_prompt_path(),
+            {
+                "instruction": self.shared_state.instruction,
+                "device_date": device_date_text,
+                "app_card": app_card_text,
+                "important_notes": important_notes_text,
+                "error_history": error_history_text,
+                "text_manipulation_section": text_manipulation_section,
+                "custom_tools_descriptions": custom_tools_text
+            }
         )
     def _build_messages_with_context(
@@ -155,28 +215,28 @@ class ManagerAgent(Workflow):
                 else:
                     messages[last_user_idx]['content'].insert(0, {"text": f"<memory>\n{current_memory}\n</memory>\n"})
-            # Add device state to last user message
-            current_a11y = (self.shared_state.ui_elements_list_after or self.shared_state.device_state_text or "").strip()
-            if current_a11y:
+            # Add CURRENT device state to last user message (use unified state)
+            current_state = self.shared_state.formatted_device_state.strip()
+            if current_state:
                 if messages[last_user_idx]['content'] and 'text' in messages[last_user_idx]['content'][0]:
-                    messages[last_user_idx]['content'][0]['text'] += f"\n<device_state>\n{current_a11y}\n</device_state>\n"
+                    messages[last_user_idx]['content'][0]['text'] += f"\n<device_state>\n{current_state}\n</device_state>\n"
                 else:
-                    messages[last_user_idx]['content'].insert(0, {"text": f"<device_state>\n{current_a11y}\n</device_state>\n"})
+                    messages[last_user_idx]['content'].insert(0, {"text": f"<device_state>\n{current_state}\n</device_state>\n"})
             # Add screenshot to last user message
             if screenshot and self.vision:
                 messages[last_user_idx]['content'].append({"image": screenshot})
-            # Add previous device state to SECOND-TO-LAST user message (if exists)
+            # Add PREVIOUS device state to SECOND-TO-LAST user message (if exists)
             if len(user_indices) >= 2:
                 second_last_user_idx = user_indices[-2]
-                prev_a11y = (self.shared_state.ui_elements_list_before or "").strip()
+                prev_state = self.shared_state.previous_formatted_device_state.strip()
-                if prev_a11y:
+                if prev_state:
                     if messages[second_last_user_idx]['content'] and 'text' in messages[second_last_user_idx]['content'][0]:
-                        messages[second_last_user_idx]['content'][0]['text'] += f"\n<device_state>\n{prev_a11y}\n</device_state>\n"
+                        messages[second_last_user_idx]['content'][0]['text'] += f"\n<device_state>\n{prev_state}\n</device_state>\n"
                     else:
-                        messages[second_last_user_idx]['content'].insert(0, {"text": f"<device_state>\n{prev_a11y}\n</device_state>\n"})
+                        messages[second_last_user_idx]['content'].insert(0, {"text": f"<device_state>\n{prev_state}\n</device_state>\n"})
         messages = remove_empty_messages(messages)
         return messages
@@ -263,9 +323,23 @@ class ManagerAgent(Workflow):
         logger.info("💬 Preparing manager input...")
         # ====================================================================
-        # Step 1: Get device state (UI elements accessibility tree)
+        # Step 1: Get and format device state using unified formatter
         # ====================================================================
-        device_state_text, focused_text = get_device_state_exact_format(self.tools_instance.get_state())
+        raw_state = self.tools_instance.get_state()
+        formatted_text, focused_text, a11y_tree, phone_state = format_device_state(raw_state)
+        # Update shared state (previous ← current, current ← new)
+        self.shared_state.previous_formatted_device_state = self.shared_state.formatted_device_state
+        self.shared_state.formatted_device_state = formatted_text
+        self.shared_state.focused_text = focused_text
+        self.shared_state.a11y_tree = a11y_tree
+        self.shared_state.phone_state = phone_state
+        # Extract and store package/app name
+        self.shared_state.current_package_name = phone_state.get('packageName', 'Unknown')
+        self.shared_state.current_app_name = phone_state.get('currentApp', 'Unknown')
+        # App cards
         # ====================================================================
         # Step 2: Capture screenshot if vision enabled
@@ -278,6 +352,7 @@ class ManagerAgent(Workflow):
                     success, screenshot = result
                     if not success:
                         screenshot = None
                 else:
                     screenshot = result
                 logger.debug("📸 Screenshot captured for Manager")
@@ -288,29 +363,9 @@ class ManagerAgent(Workflow):
         # ====================================================================
         # Step 3: Detect text manipulation mode
         # ====================================================================
-        focused_text = focused_text or ""
         focused_text_clean = focused_text.replace("'", "").strip()
-        # Check if focused text differs from last typed text
-        # last_typed_text = ""
-        # if self.shared_state.action_history:
-        #     recent_actions = self.shared_state.action_history[-1:] if len(self.shared_state.action_history) >= 1 else []
-        #     for action in reversed(recent_actions):
-        #         if isinstance(action, dict) and action.get('action') == 'type':
-        #             last_typed_text = action.get('text', '')
-        #             break
         has_text_to_modify = (focused_text_clean != "")
-        # ====================================================================
-        # Step 4: Update state with device info
-        # ====================================================================
-        self.shared_state.device_state_text = device_state_text
-        self.shared_state.focused_text = focused_text
-        # Shift UI elements: before ← after, after ← current
-        self.shared_state.ui_elements_list_before = self.shared_state.ui_elements_list_after
-        self.shared_state.ui_elements_list_after = device_state_text
         # ====================================================================
         # Step 5: Build user message entry
         # ====================================================================
@@ -328,7 +383,7 @@ class ManagerAgent(Workflow):
         if self.shared_state.last_summary:
             parts.append(f"<last_action_description>\n{self.shared_state.last_summary}\n</last_action_description>\n")
         self.shared_state.message_history.append({
             "role": "user",
             "content": [{"text": "".join(parts)}]
@@ -346,7 +401,7 @@ class ManagerAgent(Workflow):
         self,
         ctx: Context,
         ev: ManagerThinkingEvent
-    ) -> ManagerPlanEvent:
+    ) -> ManagerInternalPlanEvent:
         """
         Manager reasons and creates plan.
@@ -362,11 +417,15 @@ class ManagerAgent(Workflow):
         has_text_to_modify = self.shared_state.has_text_to_modify
         screenshot = self.shared_state.screenshot
+        if self.app_card_loader.enabled:
+            app_card = AppCardLoader.load_app_card(self.shared_state.current_package_name, self.app_card_loader.app_cards_dir)
+        else:
+            app_card = ""
         # ====================================================================
         # Step 1: Build system prompt
         # ====================================================================
-        system_prompt = self._build_system_prompt(has_text_to_modify)
+        system_prompt = self._build_system_prompt(has_text_to_modify, app_card)
         # ====================================================================
         # Step 2: Build messages with context
@@ -423,7 +482,6 @@ class ManagerAgent(Workflow):
         # Update planning fields
         self.shared_state.plan = parsed["plan"]
         self.shared_state.current_subgoal = parsed["current_subgoal"]
-        self.shared_state.completed_plan = parsed.get("completed_subgoal", "No completed subgoal.")
         self.shared_state.finish_thought = parsed["thought"]
         self.shared_state.manager_answer = parsed["answer"]
@@ -431,20 +489,24 @@ class ManagerAgent(Workflow):
         logger.debug(f"  - Current subgoal: {parsed['current_subgoal']}")
         logger.debug(f"  - Manager answer: {parsed['answer'][:50] if parsed['answer'] else 'None'}")
-        return ManagerPlanEvent(
+        event = ManagerInternalPlanEvent(
             plan=parsed["plan"],
             current_subgoal=parsed["current_subgoal"],
-            completed_plan=parsed.get("completed_subgoal", "No completed subgoal."),
             thought=parsed["thought"],
             manager_answer=parsed["answer"],
             memory_update=memory_update
         )
+        # Write event to stream for web interface
+        ctx.write_event_to_stream(event)
+        return event
     @step
     async def finalize(
         self,
         ctx: Context,
-        ev: ManagerPlanEvent
+        ev: ManagerInternalPlanEvent
     ) -> StopEvent:
         """Return manager results to parent workflow."""
         logger.debug("✅ Manager planning complete")
@@ -452,7 +514,6 @@ class ManagerAgent(Workflow):
         return StopEvent(result={
             "plan": ev.plan,
             "current_subgoal": ev.current_subgoal,
-            "completed_plan": ev.completed_plan,
             "thought": ev.thought,
             "manager_answer": ev.manager_answer,
             "memory_update": ev.memory_update

droidrun 0.3.10.dev3__py3-none-any.whl → 0.3.10.dev5__py3-none-any.whl

droidrun 0.3.10.dev3py3-none-any.whl → 0.3.10.dev5py3-none-any.whl