PyPI - droidrun - Versions diffs - 0.3.10.dev2__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl - Mend

droidrun 0.3.10.dev2py3-none-any.whl → 0.3.10.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

droidrun/agent/codeact/__init__.py +1 -4
droidrun/agent/codeact/codeact_agent.py +95 -86
droidrun/agent/codeact/events.py +1 -2
droidrun/agent/context/__init__.py +5 -9
droidrun/agent/context/episodic_memory.py +1 -3
droidrun/agent/context/task_manager.py +8 -2
droidrun/agent/droid/droid_agent.py +102 -141
droidrun/agent/droid/events.py +45 -14
droidrun/agent/executor/__init__.py +6 -4
droidrun/agent/executor/events.py +29 -9
droidrun/agent/executor/executor_agent.py +86 -28
droidrun/agent/executor/prompts.py +8 -2
droidrun/agent/manager/__init__.py +6 -7
droidrun/agent/manager/events.py +16 -4
droidrun/agent/manager/manager_agent.py +130 -69
droidrun/agent/manager/prompts.py +1 -159
droidrun/agent/utils/chat_utils.py +64 -2
droidrun/agent/utils/device_state_formatter.py +54 -26
droidrun/agent/utils/executer.py +66 -80
droidrun/agent/utils/inference.py +11 -10
droidrun/agent/utils/tools.py +58 -6
droidrun/agent/utils/trajectory.py +18 -12
droidrun/cli/logs.py +118 -56
droidrun/cli/main.py +154 -136
droidrun/config_manager/__init__.py +9 -7
droidrun/config_manager/app_card_loader.py +148 -0
droidrun/config_manager/config_manager.py +200 -102
droidrun/config_manager/path_resolver.py +104 -0
droidrun/config_manager/prompt_loader.py +75 -0
droidrun/macro/__init__.py +1 -1
droidrun/macro/cli.py +23 -18
droidrun/telemetry/__init__.py +2 -2
droidrun/telemetry/events.py +3 -3
droidrun/telemetry/tracker.py +1 -1
droidrun/tools/adb.py +1 -1
droidrun/tools/ios.py +3 -2
{droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/METADATA +10 -3
droidrun-0.3.10.dev4.dist-info/RECORD +61 -0
droidrun/agent/codeact/prompts.py +0 -26
droidrun/agent/context/agent_persona.py +0 -16
droidrun/agent/context/context_injection_manager.py +0 -66
droidrun/agent/context/personas/__init__.py +0 -11
droidrun/agent/context/personas/app_starter.py +0 -44
droidrun/agent/context/personas/big_agent.py +0 -96
droidrun/agent/context/personas/default.py +0 -95
droidrun/agent/context/personas/ui_expert.py +0 -108
droidrun/agent/planner/__init__.py +0 -13
droidrun/agent/planner/events.py +0 -21
droidrun/agent/planner/planner_agent.py +0 -311
droidrun/agent/planner/prompts.py +0 -124
droidrun-0.3.10.dev2.dist-info/RECORD +0 -70
{droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/WHEEL +0 -0
{droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.10.dev2.dist-info → droidrun-0.3.10.dev4.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/utils/tools.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import TYPE_CHECKING, List
+import time
 if TYPE_CHECKING:
     from droidrun.tools import Tools
@@ -116,12 +117,13 @@ def open_app(tool_instance: "Tools", text: str) -> str:
             "app_opener_llm not configured. "
             "provide app_opener_llm when initializing Tools."
         )
     # Create workflow instance
     workflow = AppStarter(tools=tool_instance, llm=tool_instance.app_opener_llm, timeout=60, verbose=True)
     # Run workflow to open an app
     result = workflow.run(app_description=text)
+    time.sleep(1)
     return result
@@ -155,11 +157,6 @@ ATOMIC_ACTION_SIGNATURES = {
         "description": "Scroll from the position with coordinate to the position with coordinate2. Please make sure the start and end points of your swipe are within the swipeable area and away from the keyboard (y1 < 1400). Usage Example: {\"action\": \"swipe\", \"coordinate\": [x1, y1], \"coordinate2\": [x2, y2]}",
         "function": swipe,
     },
-    "open_app": {
-        "arguments": ["text"],
-        "description": "Open an app. Usage example: {\"action\": \"open_app\", \"text\": \"the name of app\"}",
-        "function": open_app,
-    },
     # "copy": {
     #     "arguments": ["text"],
     #     "description": "Copy the specified text to the clipboard. Provide the text to copy using the 'text' argument. Example: {\"action\": \"copy\", \"text\": \"the text you want to copy\"}\nAlways use copy action to copy text to clipboard."
@@ -218,3 +215,58 @@ def build_custom_tool_descriptions(custom_tools: dict) -> str:
         descriptions.append(f"- {action_name}({args}): {desc}")
     return "\n".join(descriptions)
+async def test_open_app(mock_tools, text: str) -> str:
+    return await open_app(mock_tools, text)
+if __name__ == "__main__":
+    """
+    Simple test for the tool functions.
+    Tests the atomic action wrapper functions.
+    """
+    import asyncio
+    from typing import List
+    from llama_index.llms.google_genai import GoogleGenAI
+    from droidrun.tools.adb import AdbTools
+    llm = GoogleGenAI(model="gemini-2.5-pro", temperature=0.0)
+    # Create mock tools instance
+    mock_tools = AdbTools(app_opener_llm=llm, text_manipulator_llm=llm)
+    # print("=== Testing click ===")
+    # result = click(mock_tools, 0)
+    mock_tools.get_state()
+    print("\n=== Testing long_press ===")
+    result = long_press(mock_tools, 5)
+    print(f"Result: {result}")
+    input("Press Enter to continue...")
+    print("\n=== Testing type ===")
+    result = type(mock_tools, "Hello World", -1)
+    print(f"Result: {result}")
+    input("Press Enter to continue...")
+    print("\n=== Testing system_button ===")
+    result = system_button(mock_tools, "back")
+    print(f"Result: {result}")
+    input("Press Enter to continue...")
+    print("\n=== Testing swipe ===")
+    result = swipe(mock_tools, [500, 0], [500, 1000])
+    print(f"Result: {result}")
+    input("Press Enter to continue...")
+    print("\n=== Testing open_app ===")
+    # This one is more complex and requires real LLM setup, so just show the structure
+    try:
+        result = asyncio.run(test_open_app(mock_tools, "Calculator"))
+        print(f"Result: {result}")
+        input("Press Enter to continue...")
+    except Exception as e:
+        print(f"Expected error (no LLM): {e}")
+        input("Press Enter to continue...")
+    print("\n=== All tests completed ===")

droidrun/agent/utils/trajectory.py CHANGED Viewed

@@ -16,6 +16,8 @@ from typing import Any, Dict, List
 from llama_index.core.workflow import Event
 from PIL import Image
+from droidrun.config_manager.path_resolver import PathResolver
 logger = logging.getLogger("droidrun")
@@ -136,16 +138,19 @@ class Trajectory:
         Creates a dedicated folder for each trajectory containing all related files.
         Args:
-            directory: Base directory to save the trajectory files
+            directory: Base directory to save the trajectory files (relative or absolute)
         Returns:
             Path to the trajectory folder
         """
-        os.makedirs(directory, exist_ok=True)
+        # Resolve directory (prefer working dir for output)
+        base_dir = PathResolver.resolve(directory, create_if_missing=True)
+        base_dir.mkdir(parents=True, exist_ok=True)
         timestamp = time.strftime("%Y%m%d_%H%M%S")
         unique_id = str(uuid.uuid4())[:8]
-        trajectory_folder = os.path.join(directory, f"{timestamp}_{unique_id}")
-        os.makedirs(trajectory_folder, exist_ok=True)
+        trajectory_folder = base_dir / f"{timestamp}_{unique_id}"
+        trajectory_folder.mkdir(parents=True, exist_ok=True)
         serializable_events = []
         for event in self.events:
@@ -189,7 +194,7 @@ class Trajectory:
             serializable_events.append(event_dict)
-        trajectory_json_path = os.path.join(trajectory_folder, "trajectory.json")
+        trajectory_json_path = trajectory_folder / "trajectory.json"
         with open(trajectory_json_path, "w") as f:
             json.dump(serializable_events, f, indent=2)
@@ -207,7 +212,7 @@ class Trajectory:
                 }
                 macro_data.append(macro_dict)
-            macro_json_path = os.path.join(trajectory_folder, "macro.json")
+            macro_json_path = trajectory_folder / "macro.json"
             with open(macro_json_path, "w") as f:
                 json.dump(
                     {
@@ -224,11 +229,11 @@ class Trajectory:
             logger.info(
                 f"💾 Saved macro sequence with {len(macro_data)} actions to {macro_json_path}"
             )
-        screenshots_folder = os.path.join(trajectory_folder, "screenshots")
-        os.makedirs(screenshots_folder, exist_ok=True)
+        screenshots_folder = trajectory_folder / "screenshots"
+        screenshots_folder.mkdir(parents=True, exist_ok=True)
         gif_path = self.create_screenshot_gif(
-            screenshots_folder
+            str(screenshots_folder)
         )
         if gif_path:
             logger.info(f"🎬 Saved screenshot GIF to {gif_path}")
@@ -238,12 +243,13 @@ class Trajectory:
         if len(self.ui_states) != len(self.screenshots):
             logger.warning("UI states and screenshots are not the same length!")
-        os.makedirs(os.path.join(trajectory_folder, "ui_states"), exist_ok=True)
+        ui_states_folder = trajectory_folder / "ui_states"
+        ui_states_folder.mkdir(parents=True, exist_ok=True)
         for idx, ui_state in enumerate(self.ui_states):
-            ui_states_path = os.path.join(trajectory_folder, "ui_states", f"{idx}.json")
+            ui_states_path = ui_states_folder / f"{idx}.json"
             with open(ui_states_path, "w", encoding="utf-8") as f:
                 json.dump(ui_state, f, ensure_ascii=False, indent=2)
-        return trajectory_folder
+        return str(trajectory_folder)
     @staticmethod
     def load_trajectory_folder(trajectory_folder: str) -> Dict[str, Any]:

droidrun/cli/logs.py CHANGED Viewed

@@ -21,54 +21,80 @@ from droidrun.agent.droid.events import (
     FinalizeEvent,
     TaskRunnerEvent,
 )
-from droidrun.agent.planner.events import (
-    PlanCreatedEvent,
-    PlanInputEvent,
-    PlanThinkingEvent,
+from droidrun.agent.manager.events import (
+    ManagerInternalPlanEvent,
+    ManagerThinkingEvent,
+)
+from droidrun.agent.executor.events import (
+    ExecutorInternalActionEvent,
+    ExecutorInternalResultEvent,
 )
 class LogHandler(logging.Handler):
-    def __init__(self, goal: str, current_step: str = "Initializing..."):
+    def __init__(self, goal: str, current_step: str = "Initializing...", rich_text: bool = True):
         super().__init__()
         self.goal = goal
         self.current_step = current_step
         self.is_completed = False
         self.is_success = False
-        self.spinner = Spinner("dots")
-        self.console = Console()
-        self.layout = self._create_layout()
-        self.logs: List[str] = []
+        self.rich_text = rich_text
+        if self.rich_text:
+            self.spinner = Spinner("dots")
+            self.console = Console()
+            self.layout = self._create_layout()
+            self.logs: List[str] = []
+        else:
+            self.console = Console()
+            self.logs: List[str] = []
     def emit(self, record):
         msg = self.format(record)
         lines = msg.splitlines()
-        for line in lines:
-            self.logs.append(line)
-            # Optionally, limit the log list size
-            if len(self.logs) > 100:
-                self.logs.pop(0)
-        self.rerender()
+        if self.rich_text:
+            for line in lines:
+                self.logs.append(line)
+                # Optionally, limit the log list size
+                if len(self.logs) > 100:
+                    self.logs.pop(0)
+            self.rerender()
+        else:
+            # Simple console output for non-rich mode
+            for line in lines:
+                self.console.print(line)
     def render(self):
-        return Live(self.layout, refresh_per_second=4, console=self.console)
+        if self.rich_text:
+            return Live(self.layout, refresh_per_second=4, console=self.console)
+        else:
+            # Return a no-op context manager for non-rich mode
+            from contextlib import nullcontext
+            return nullcontext()
     def rerender(self):
-        self._update_layout(
-            self.layout,
-            self.logs,
-            self.current_step,
-            self.goal,
-            self.is_completed,
-            self.is_success,
-        )
+        if self.rich_text:
+            self._update_layout(
+                self.layout,
+                self.logs,
+                self.current_step,
+                self.goal,
+                self.is_completed,
+                self.is_success,
+            )
     def update_step(self, step: str):
         self.current_step = step
-        self.rerender()
+        if self.rich_text:
+            self.rerender()
+        else:
+            # Simple console output for status updates
+            status_symbol = "⚡"
+            if self.is_completed:
+                status_symbol = "✓" if self.is_success else "✗"
+            self.console.print(f"{status_symbol} {step}")
     def _create_layout(self):
         """Create a layout with logs at top and status at bottom"""
@@ -170,7 +196,7 @@ class LogHandler(logging.Handler):
             )
         )
-    def handle_event(self, event): # TODO: fix event handling for the refactor
+    def handle_event(self, event):
         """Handle streaming events from the agent workflow."""
         logger = logging.getLogger("droidrun")
@@ -181,32 +207,72 @@ class LogHandler(logging.Handler):
         elif isinstance(event, RecordUIStateEvent):
             logger.debug("✏️ Recording UI state")
-        # Planner events
-        elif isinstance(event, PlanInputEvent):
-            self.current_step = "Planning..."
-            logger.info("💭 Planner receiving input...")
-        elif isinstance(event, PlanThinkingEvent):
-            if event.thoughts:
-                thoughts_preview = (
-                    event.thoughts[:150] + "..."
-                    if len(event.thoughts) > 150
-                    else event.thoughts
+        # Manager events (reasoning mode - planning)
+        elif isinstance(event, ManagerThinkingEvent):
+            self.current_step = "Manager analyzing state..."
+            logger.info("🧠 Manager analyzing current state...")
+        elif isinstance(event, ManagerInternalPlanEvent):
+            self.current_step = "Plan created"
+            # Show thought (concise reasoning)
+            if hasattr(event, "thought") and event.thought:
+                thought_preview = (
+                    event.thought[:120] + "..."
+                    if len(event.thought) > 120
+                    else event.thought
+                )
+                logger.info(f"💭 Thought: {thought_preview}")
+            # Show current subgoal (what we're working on next)
+            if hasattr(event, "current_subgoal") and event.current_subgoal:
+                subgoal_preview = (
+                    event.current_subgoal[:150] + "..."
+                    if len(event.current_subgoal) > 150
+                    else event.current_subgoal
                 )
-                logger.info(f"🧠 Planning: {thoughts_preview}")
-            if event.code:
-                logger.info("📝 Generated plan code")
-        elif isinstance(event, PlanCreatedEvent):
-            if event.tasks:
-                task_count = len(event.tasks) if event.tasks else 0
-                self.current_step = f"Plan ready ({task_count} tasks)"
-                logger.info(f"📋 Plan created with {task_count} tasks")
-                for task in event.tasks:
-                    desc = task.description
-                    logger.info(f"- {desc}")
-        # CodeAct events
+                logger.info(f"📋 Next step: {subgoal_preview}")
+            # Show answer if provided (task complete)
+            if hasattr(event, "manager_answer") and event.manager_answer:
+                answer_preview = (
+                    event.manager_answer[:200] + "..."
+                    if len(event.manager_answer) > 200
+                    else event.manager_answer
+                )
+                logger.info(f"💬 Answer: {answer_preview}")
+            # Debug: show memory updates
+            if hasattr(event, "memory_update") and event.memory_update:
+                logger.debug(f"🧠 Memory: {event.memory_update[:100]}...")
+        # Executor events (reasoning mode - action execution)
+        elif isinstance(event, ExecutorInternalActionEvent):
+            self.current_step = "Selecting action..."
+            # Show what action was chosen
+            if hasattr(event, "description") and event.description:
+                logger.info(f"🎯 Action: {event.description}")
+            # Debug: show executor's reasoning
+            if hasattr(event, "thought") and event.thought:
+                thought_preview = (
+                    event.thought[:120] + "..."
+                    if len(event.thought) > 120
+                    else event.thought
+                )
+                logger.debug(f"💭 Reasoning: {thought_preview}")
+        elif isinstance(event, ExecutorInternalResultEvent):
+            # Show result with appropriate emoji
+            if hasattr(event, "outcome") and hasattr(event, "summary"):
+                if event.outcome:
+                    self.current_step = "Action completed"
+                    logger.info(f"✅ {event.summary}")
+                else:
+                    self.current_step = "Action failed"
+                    error_msg = event.error if hasattr(event, "error") else "Unknown error"
+                    logger.info(f"❌ {event.summary} ({error_msg})")
+        # CodeAct events (direct mode)
         elif isinstance(event, TaskInputEvent):
             self.current_step = "Processing task input..."
             logger.info("💬 Task input received...")
@@ -264,10 +330,6 @@ class LogHandler(logging.Handler):
                     self.current_step = "Task failed"
                     logger.info(f"❌ Task failed: {event.reason}")
-        # elif isinstance(event, ReasoningLogicEvent): TODO: fix event handling
-        #     self.current_step = "Planning..."
-        #     logger.info("🤔 Planning next steps...")
         elif isinstance(event, TaskRunnerEvent):
             self.current_step = "Processing tasks..."
             logger.info("🏃 Processing task queue...")

droidrun 0.3.10.dev2__py3-none-any.whl → 0.3.10.dev4__py3-none-any.whl

droidrun 0.3.10.dev2py3-none-any.whl → 0.3.10.dev4py3-none-any.whl