PyPI - daveloop - Versions diffs - 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

daveloop 1.3.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

daveloop-1.5.0.dist-info/METADATA +392 -0
daveloop-1.5.0.dist-info/RECORD +7 -0
{daveloop-1.3.0.dist-info → daveloop-1.5.0.dist-info}/WHEEL +1 -1
daveloop.py +369 -20
daveloop-1.3.0.dist-info/METADATA +0 -391
daveloop-1.3.0.dist-info/RECORD +0 -7
{daveloop-1.3.0.dist-info → daveloop-1.5.0.dist-info}/entry_points.txt +0 -0
{daveloop-1.3.0.dist-info → daveloop-1.5.0.dist-info}/top_level.txt +0 -0

daveloop.py CHANGED Viewed

@@ -20,6 +20,8 @@ MAX_ITERATIONS = 20
 DEFAULT_TIMEOUT = 600  # 10 minutes in seconds
 SCRIPT_DIR = Path(__file__).parent
 PROMPT_FILE = SCRIPT_DIR / "daveloop_prompt.md"
+MAESTRO_PROMPT_FILE = SCRIPT_DIR / "daveloop_maestro_prompt.md"
+WEB_PROMPT_FILE = SCRIPT_DIR / "daveloop_web_prompt.md"
 LOG_DIR = SCRIPT_DIR / "logs"
 # Exit signals from Claude Code
@@ -27,6 +29,12 @@ SIGNAL_RESOLVED = "[DAVELOOP:RESOLVED]"
 SIGNAL_BLOCKED = "[DAVELOOP:BLOCKED]"
 SIGNAL_CLARIFY = "[DAVELOOP:CLARIFY]"
+# Allowed tools for Claude Code CLI
+# Default: no Task tool (prevents recursive sub-agent spawning)
+ALLOWED_TOOLS_DEFAULT = "Bash,Read,Write,Edit,Glob,Grep"
+# Swarm mode: Task tool enabled for controlled sub-agent spawning
+ALLOWED_TOOLS_SWARM = "Bash,Read,Write,Edit,Glob,Grep,Task"
 # ============================================================================
 # ANSI Color Codes
 # ============================================================================
@@ -266,6 +274,128 @@ class TaskQueue:
         print()
+# ============================================================================
+# Swarm Budget
+# ============================================================================
+class SwarmBudget:
+    """Tracks and enforces sub-agent spawn budget for swarm mode."""
+    def __init__(self, max_spawns: int = 5, max_depth: int = 1):
+        self.max_spawns = max_spawns
+        self.max_depth = max_depth
+        self.spawn_count = 0
+        self.active_agents = 0
+        self.completed_agents = 0
+    def can_spawn(self) -> bool:
+        """Check if spawning another sub-agent is within budget."""
+        return self.spawn_count < self.max_spawns
+    def record_spawn(self, description: str):
+        """Record a sub-agent spawn."""
+        self.spawn_count += 1
+        self.active_agents += 1
+        print(f"  {C.BRIGHT_CYAN}[Swarm]{C.RESET} Sub-agent {self.spawn_count}/{self.max_spawns}: {description}")
+    def record_completion(self):
+        """Record a sub-agent completion."""
+        self.active_agents -= 1
+        self.completed_agents += 1
+    def budget_exhausted_message(self) -> str:
+        """Return message when budget is exhausted."""
+        return (
+            f"Sub-agent budget exhausted ({self.spawn_count}/{self.max_spawns}). "
+            f"Complete remaining work directly without spawning more sub-agents."
+        )
+    def summary(self) -> dict:
+        """Return budget tracking summary."""
+        return {
+            "total_spawned": self.spawn_count,
+            "completed": self.completed_agents,
+            "budget": self.max_spawns,
+        }
+# ============================================================================
+# Token Tracker
+# ============================================================================
+class TokenTracker:
+    """Tracks token usage across API turns in a DaveLoop session."""
+    def __init__(self):
+        self.total_input = 0
+        self.total_output = 0
+        self.turn_count = 0
+        self.peak_input = 0
+        self.peak_output = 0
+        self.peak_total = 0
+        self.per_tool = {}  # tool_name -> {"input": int, "output": int, "count": int}
+        self._current_tool = None  # Track which tool is active for per-tool attribution
+        self._turn_input = 0  # Accumulate within a turn for per-tool attribution
+        self._turn_output = 0
+    def set_current_tool(self, tool_name: str):
+        """Set the currently active tool for per-tool token attribution."""
+        self._current_tool = tool_name
+    def record_usage(self, input_tokens: int, output_tokens: int):
+        """Record token usage from an API turn."""
+        self.total_input += input_tokens
+        self.total_output += output_tokens
+        self.turn_count += 1
+        turn_total = input_tokens + output_tokens
+        if turn_total > self.peak_total:
+            self.peak_total = turn_total
+            self.peak_input = input_tokens
+            self.peak_output = output_tokens
+        # Attribute to current tool if one is active
+        if self._current_tool:
+            if self._current_tool not in self.per_tool:
+                self.per_tool[self._current_tool] = {"input": 0, "output": 0, "count": 0}
+            self.per_tool[self._current_tool]["input"] += input_tokens
+            self.per_tool[self._current_tool]["output"] += output_tokens
+            self.per_tool[self._current_tool]["count"] += 1
+    @property
+    def total_tokens(self) -> int:
+        return self.total_input + self.total_output
+    def summary(self) -> dict:
+        """Return a dict with all token stats."""
+        return {
+            "input_tokens": self.total_input,
+            "output_tokens": self.total_output,
+            "total_tokens": self.total_tokens,
+            "turn_count": self.turn_count,
+            "peak_turn": {
+                "input": self.peak_input,
+                "output": self.peak_output,
+                "total": self.peak_total,
+            },
+            "per_tool": dict(self.per_tool),
+        }
+    def summary_line(self) -> str:
+        """Return a one-line summary string for display."""
+        return (
+            f"Tokens: {self.total_input:,} in / {self.total_output:,} out / "
+            f"{self.total_tokens:,} total ({self.turn_count} turns)"
+        )
+    def verbose_turn_line(self, input_tokens: int, output_tokens: int) -> str:
+        """Return a per-turn detail line for --show-tokens mode."""
+        total = input_tokens + output_tokens
+        tool_info = f" [{self._current_tool}]" if self._current_tool else ""
+        return (
+            f"  Turn {self.turn_count}: {input_tokens:,} in / {output_tokens:,} out / "
+            f"{total:,} total{tool_info}"
+        )
 # ============================================================================
 # Session Memory
 # ============================================================================
@@ -292,16 +422,21 @@ def save_history(working_dir: str, history_data: dict):
     history_file.write_text(json.dumps(history_data, indent=2), encoding="utf-8")
-def summarize_session(bug: str, outcome: str, iterations: int) -> dict:
+def summarize_session(bug: str, outcome: str, iterations: int, token_tracker: "TokenTracker" = None) -> dict:
     """Return a dict summarizing a session."""
     now = datetime.now()
-    return {
+    entry = {
         "session_id": now.strftime("%Y%m%d_%H%M%S"),
         "bug": bug,
         "outcome": outcome,
         "iterations": iterations,
         "timestamp": now.isoformat()
     }
+    if token_tracker and token_tracker.turn_count > 0:
+        entry["tokens_in"] = token_tracker.total_input
+        entry["tokens_out"] = token_tracker.total_output
+        entry["tokens_total"] = token_tracker.total_tokens
+    return entry
 def format_history_context(sessions: list) -> str:
@@ -327,10 +462,12 @@ def print_history_box(sessions: list):
         outcome = s.get("outcome", "UNKNOWN")
         bug = s.get("bug", "unknown")[:55]
         iters = s.get("iterations", "?")
+        tokens_total = s.get("tokens_total")
+        token_str = f" · {tokens_total:,} tok" if tokens_total else ""
         if outcome == "RESOLVED":
-            print(f"  {C.BRIGHT_GREEN}✓{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter){C.RESET}")
+            print(f"  {C.BRIGHT_GREEN}✓{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter{token_str}){C.RESET}")
         else:
-            print(f"  {C.BRIGHT_RED}✗{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter){C.RESET}")
+            print(f"  {C.BRIGHT_RED}✗{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter{token_str}){C.RESET}")
     print()
@@ -424,7 +561,7 @@ class InputMonitor:
     Call resume_reading() after the main thread is done with input().
     """
-    VALID_COMMANDS = ("wait", "pause", "add", "done")
+    VALID_COMMANDS = ("wait", "pause", "add", "done", "stop")
     def __init__(self):
         self._command = None
@@ -493,6 +630,24 @@ def load_prompt() -> str:
         return "You are debugging. Fix the bug. Output [DAVELOOP:RESOLVED] when done."
+def load_maestro_prompt() -> str:
+    """Load the Maestro mobile testing prompt."""
+    if MAESTRO_PROMPT_FILE.exists():
+        return MAESTRO_PROMPT_FILE.read_text(encoding="utf-8")
+    else:
+        print_warning_box(f"Maestro prompt file not found: {MAESTRO_PROMPT_FILE}")
+        return None
+def load_web_prompt() -> str:
+    """Load the Web UI testing prompt."""
+    if WEB_PROMPT_FILE.exists():
+        return WEB_PROMPT_FILE.read_text(encoding="utf-8")
+    else:
+        print_warning_box(f"Web prompt file not found: {WEB_PROMPT_FILE}")
+        return None
 def find_claude_cli():
     """Find Claude CLI executable path."""
     import platform
@@ -534,12 +689,17 @@ def find_claude_cli():
     return None
-def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True, timeout: int = DEFAULT_TIMEOUT, input_monitor=None) -> str:
+def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True, timeout: int = DEFAULT_TIMEOUT, input_monitor=None, swarm_mode: bool = False, swarm_budget_max: int = 5, swarm_depth_max: int = 1, token_tracker: "TokenTracker" = None, show_tokens: bool = False) -> str:
     """Execute Claude Code CLI with the given prompt.
     If stream=True, output is printed in real-time and also returned.
     timeout is in seconds (default 600 = 10 minutes).
     input_monitor: optional InputMonitor to check for user commands during execution.
+    swarm_mode: if True, enables Task tool for sub-agent spawning.
+    swarm_budget_max: max sub-agents per session in swarm mode.
+    swarm_depth_max: max sub-agent depth in swarm mode.
+    token_tracker: optional TokenTracker to accumulate token usage from the stream.
+    show_tokens: if True, print per-turn token usage during execution.
     """
     claude_cmd = find_claude_cli()
     if not claude_cmd:
@@ -558,7 +718,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
     if continue_session:
         cmd.append("--continue")
-    cmd.extend(["-p", "--verbose", "--output-format", "stream-json", "--allowedTools", "Bash,Read,Write,Edit,Glob,Grep,Task"])
+    allowed = ALLOWED_TOOLS_SWARM if swarm_mode else ALLOWED_TOOLS_DEFAULT
+    cmd.extend(["-p", "--verbose", "--output-format", "stream-json", "--allowedTools", allowed])
     try:
         if stream:
@@ -582,6 +743,9 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
             # Track start time
             start_time = time.time()
+            # Swarm budget tracking (only active in swarm mode)
+            swarm_budget = SwarmBudget(max_spawns=swarm_budget_max, max_depth=swarm_depth_max) if swarm_mode else None
             # Read and display JSON stream output
             output_lines = []
             full_text = []
@@ -597,6 +761,19 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
                     msg_type = data.get("type", "")
+                    # Extract token usage from any message that has it
+                    if token_tracker:
+                        usage = (data.get("message", {}).get("usage")
+                                 or data.get("usage")
+                                 or None)
+                        if usage and isinstance(usage, dict):
+                            inp = usage.get("input_tokens", 0)
+                            outp = usage.get("output_tokens", 0)
+                            if inp or outp:
+                                token_tracker.record_usage(inp, outp)
+                                if show_tokens:
+                                    print(f"  {C.DIM}{token_tracker.verbose_turn_line(inp, outp)}{C.RESET}")
                     # Handle different message types
                     if msg_type == "assistant":
                         # Assistant text message
@@ -611,6 +788,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
                             elif block.get("type") == "tool_use":
                                 # Tool being called - show what Claude is doing
                                 tool_name = block.get("name", "unknown")
+                                if token_tracker:
+                                    token_tracker.set_current_tool(tool_name)
                                 tool_input = block.get("input", {})
                                 # Format tool call based on type
@@ -640,6 +819,18 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
                                 elif tool_name == "Task":
                                     desc = tool_input.get("description", "")
                                     tool_display = f"{C.BRIGHT_BLUE}Task{C.RESET}({C.WHITE}{desc}{C.RESET})"
+                                    # Swarm budget enforcement
+                                    if swarm_budget:
+                                        if not swarm_budget.can_spawn():
+                                            print(f"  {C.BRIGHT_YELLOW}[Swarm] Budget exhausted. Terminating to restart without Task tool.{C.RESET}")
+                                            process.terminate()
+                                            try:
+                                                process.wait(timeout=10)
+                                            except Exception:
+                                                process.kill()
+                                            return '\n'.join(full_text) + "\n[DAVELOOP:SWARM_BUDGET_EXHAUSTED]"
+                                        else:
+                                            swarm_budget.record_spawn(desc)
                                 else:
                                     tool_display = f"{C.BRIGHT_BLUE}{tool_name}{C.RESET}"
@@ -657,6 +848,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
                     elif msg_type == "tool_use":
                         # Tool being used - show what Claude is doing
                         tool_name = data.get("name", "unknown")
+                        if token_tracker:
+                            token_tracker.set_current_tool(tool_name)
                         tool_input = data.get("input", {})
                         # Format tool call based on type
@@ -686,6 +879,18 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
                         elif tool_name == "Task":
                             desc = tool_input.get("description", "")
                             tool_display = f"{C.BRIGHT_BLUE}Task{C.RESET}({C.WHITE}{desc}{C.RESET})"
+                            # Swarm budget enforcement
+                            if swarm_budget:
+                                if not swarm_budget.can_spawn():
+                                    print(f"  {C.BRIGHT_YELLOW}[Swarm] Budget exhausted. Terminating to restart without Task tool.{C.RESET}")
+                                    process.terminate()
+                                    try:
+                                        process.wait(timeout=10)
+                                    except Exception:
+                                        process.kill()
+                                    return '\n'.join(full_text) + "\n[DAVELOOP:SWARM_BUDGET_EXHAUSTED]"
+                                else:
+                                    swarm_budget.record_spawn(desc)
                         else:
                             tool_display = f"{C.BRIGHT_BLUE}{tool_name}{C.RESET}"
@@ -837,6 +1042,16 @@ def main():
     parser.add_argument("-t", "--timeout", type=int, default=DEFAULT_TIMEOUT,
                         help="Timeout per iteration in seconds (default: 600)")
     parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    parser.add_argument("--maestro", action="store_true", help="Enable Maestro mobile testing mode")
+    parser.add_argument("--web", action="store_true", help="Enable Playwright web UI testing mode")
+    parser.add_argument("--swarm", action="store_true",
+                        help="Enable swarm mode: DaveLoop can spawn sub-agents via Task tool")
+    parser.add_argument("--swarm-budget", type=int, default=5,
+                        help="Max sub-agents per DaveLoop worker in swarm mode (default: 5)")
+    parser.add_argument("--swarm-depth", type=int, default=1, choices=[1, 2],
+                        help="Max sub-agent depth in swarm mode (default: 1, no recursive spawning)")
+    parser.add_argument("--show-tokens", action="store_true",
+                        help="Show verbose per-turn token usage during execution")
     args = parser.parse_args()
@@ -863,6 +1078,14 @@ def main():
     # Setup
     session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
     system_prompt = load_prompt()
+    if args.maestro:
+        maestro_prompt = load_maestro_prompt()
+        if maestro_prompt:
+            system_prompt = system_prompt + "\n\n---\n\n" + maestro_prompt
+    elif args.web:
+        web_prompt = load_web_prompt()
+        if web_prompt:
+            system_prompt = system_prompt + "\n\n---\n\n" + web_prompt
     working_dir = args.dir or os.getcwd()
     # Load session history
@@ -876,7 +1099,13 @@ def main():
     print_status("Iterations", str(args.max_iterations), C.WHITE)
     print_status("Timeout", f"{args.timeout // 60}m per iteration", C.WHITE)
     print_status("Tasks", str(len(bug_descriptions)), C.WHITE)
-    print_status("Mode", "Autonomous", C.WHITE)
+    mode_name = "Maestro Mobile Testing" if args.maestro else "Playwright Web Testing" if args.web else "Autonomous"
+    print_status("Mode", mode_name, C.WHITE)
+    if args.swarm:
+        print_status("Swarm", f"ENABLED (budget: {args.swarm_budget}, depth: {args.swarm_depth})", C.BRIGHT_CYAN)
+        print_status("Tools", ALLOWED_TOOLS_SWARM, C.WHITE)
+    else:
+        print_status("Tools", ALLOWED_TOOLS_DEFAULT, C.WHITE)
     print(f"{C.BRIGHT_BLUE}└{'─' * 70}┘{C.RESET}")
     # Build task queue
@@ -886,7 +1115,7 @@ def main():
     # Print controls hint
     print(f"\n{C.BRIGHT_BLUE}{C.BOLD}┌─ CONTROLS {'─' * 58}┐{C.RESET}")
-    print(f"{C.BRIGHT_BLUE}│{C.RESET} Type while running:  {C.BRIGHT_WHITE}wait{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}pause{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}add{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}done{C.RESET}                     {C.BRIGHT_BLUE}│{C.RESET}")
+    print(f"{C.BRIGHT_BLUE}│{C.RESET} Type while running:  {C.BRIGHT_WHITE}wait{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}pause{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}add{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}done{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}stop{C.RESET}                {C.BRIGHT_BLUE}│{C.RESET}")
     print(f"{C.BRIGHT_BLUE}└{'─' * 70}┘{C.RESET}")
     # Start input monitor
@@ -898,6 +1127,9 @@ def main():
     if history_data["sessions"]:
         history_context = "\n\n" + format_history_context(history_data["sessions"])
+    # Session-wide token tracking (aggregates across all tasks)
+    session_token_tracker = TokenTracker()
     # === OUTER LOOP: iterate over tasks ===
     while True:
         task = task_queue.next()
@@ -907,15 +1139,60 @@ def main():
         bug_input = task["description"]
         task_queue.summary_display()
-        print_section("BUG REPORT", C.BRIGHT_RED)
+        if args.maestro:
+            print_section("MAESTRO TASK", C.BRIGHT_CYAN)
+            section_color = C.BRIGHT_CYAN
+        elif args.web:
+            print_section("WEB UI TASK", C.BRIGHT_MAGENTA)
+            section_color = C.BRIGHT_MAGENTA
+        else:
+            print_section("BUG REPORT", C.BRIGHT_RED)
+            section_color = C.BRIGHT_RED
         for line in bug_input.split('\n')[:8]:
-            print(f"  {C.BRIGHT_RED}{line[:70]}{C.RESET}")
+            print(f"  {section_color}{line[:70]}{C.RESET}")
         if len(bug_input.split('\n')) > 8:
-            print(f"  {C.RED}... +{len(bug_input.split(chr(10))) - 8} more lines{C.RESET}")
+            print(f"  {section_color}... +{len(bug_input.split(chr(10))) - 8} more lines{C.RESET}")
         sys.stdout.flush()
         # Initial context for this task
-        context = f"""
+        if args.maestro:
+            context = f"""
+## Maestro Mobile Testing Task
+{bug_input}
+{history_context}
+## Instructions
+1. First, detect connected devices/emulators (run `adb devices` and/or `xcrun simctl list devices available`)
+2. If no device is found, auto-launch an emulator/simulator
+3. Ensure the target app is installed on the device
+4. Proceed with the Maestro testing task described above
+5. Before declaring success, verify by running the flow(s) 3 consecutive times - all must pass
+Use the reasoning protocol before each action.
+"""
+        elif args.web:
+            context = f"""
+## Web UI Testing Task
+{bug_input}
+{history_context}
+## Instructions
+1. First, explore the project to detect the framework and find the dev server command
+2. Install Playwright if not already installed (`npm install -D @playwright/test && npx playwright install chromium`)
+3. Start the dev server if not already running
+4. Read the source code to understand the UI components, especially any gesture/drag/interactive elements
+5. Write Playwright tests in an `e2e/` directory that test the app like a real human would - use actual mouse movements, drags, clicks, hovers, keyboard input
+6. Test gestures and buttons SEPARATELY - a working button does not prove the gesture works
+7. Before declaring success, verify by running the tests 3 consecutive times - all must pass
+Use the reasoning protocol before each action.
+"""
+        else:
+            context = f"""
 ## Bug Report
 {bug_input}
@@ -928,6 +1205,7 @@ Then fix it. Use the reasoning protocol before each action.
 """
         iteration_history = []
+        task_token_tracker = TokenTracker()
         # === INNER LOOP: iterations for current task ===
         for iteration in range(1, args.max_iterations + 1):
@@ -951,11 +1229,20 @@ Then fix it. Use the reasoning protocol before each action.
                 full_prompt, working_dir,
                 continue_session=continue_session,
                 stream=True, timeout=args.timeout,
-                input_monitor=input_monitor
+                input_monitor=input_monitor,
+                swarm_mode=args.swarm,
+                swarm_budget_max=args.swarm_budget,
+                swarm_depth_max=args.swarm_depth,
+                token_tracker=task_token_tracker,
+                show_tokens=args.show_tokens
             )
             print(f"\n{C.BRIGHT_BLUE}  {'─' * 70}{C.RESET}")
+            # Print token usage summary for this iteration
+            if task_token_tracker.turn_count > 0:
+                print(f"  {C.BRIGHT_CYAN}⊛ {task_token_tracker.summary_line()}{C.RESET}")
             # Save log
             save_log(iteration, output, session_id)
             iteration_history.append(output)
@@ -1009,22 +1296,34 @@ Continue the current debugging task. Use the reasoning protocol before each acti
                 elif user_cmd == "done":
                     # Clean exit
                     input_monitor.stop()
-                    session_entry = summarize_session(bug_input, "DONE_BY_USER", iteration)
+                    session_entry = summarize_session(bug_input, "DONE_BY_USER", iteration, task_token_tracker)
                     history_data["sessions"].append(session_entry)
                     save_history(working_dir, history_data)
                     print(f"\n  {C.GREEN}✓{C.RESET} Session saved. Exiting by user request.")
                     return 0
+                elif user_cmd == "stop":
+                    # Boris-commanded stop - terminate this iteration immediately
+                    print(f"\n  {C.BRIGHT_RED}{C.BOLD}  ■ STOPPED BY BORIS{C.RESET}")
+                    print(f"{C.BRIGHT_RED}  {'─' * 70}{C.RESET}")
+                    input_monitor.stop()
+                    session_entry = summarize_session(bug_input, "STOPPED_BY_BORIS", iteration, task_token_tracker)
+                    history_data["sessions"].append(session_entry)
+                    save_history(working_dir, history_data)
+                    return 1
             # Check exit condition
             signal, should_exit = check_exit_condition(output)
             if should_exit:
                 if signal == "RESOLVED":
                     print_success_box("")
+                    if task_token_tracker.turn_count > 0:
+                        print(f"  {C.BRIGHT_CYAN}⊛ {task_token_tracker.summary_line()}{C.RESET}")
                     print(f"  {C.DIM}Session: {session_id}{C.RESET}")
                     print(f"  {C.DIM}Logs: {LOG_DIR}{C.RESET}\n")
                     task_queue.mark_done()
-                    session_entry = summarize_session(bug_input, "RESOLVED", iteration)
+                    session_entry = summarize_session(bug_input, "RESOLVED", iteration, task_token_tracker)
                     history_data["sessions"].append(session_entry)
                     save_history(working_dir, history_data)
                     break  # Move to next task
@@ -1050,20 +1349,42 @@ Continue debugging with this information. Use the reasoning protocol before each
                     print_status("Logs", str(LOG_DIR), C.WHITE)
                     print()
                     task_queue.mark_failed()
-                    session_entry = summarize_session(bug_input, "BLOCKED", iteration)
+                    session_entry = summarize_session(bug_input, "BLOCKED", iteration, task_token_tracker)
                     history_data["sessions"].append(session_entry)
                     save_history(working_dir, history_data)
                     break  # Move to next task
                 else:
                     print_error_box(f"Error occurred: {signal}")
                     task_queue.mark_failed()
-                    session_entry = summarize_session(bug_input, "ERROR", iteration)
+                    session_entry = summarize_session(bug_input, "ERROR", iteration, task_token_tracker)
                     history_data["sessions"].append(session_entry)
                     save_history(working_dir, history_data)
                     break  # Move to next task
             # Prepare context for next iteration
-            context = f"""
+            if args.maestro:
+                context = f"""
+## Iteration {iteration + 1}
+The Maestro flow(s) are NOT yet passing reliably. You have full context from previous iterations.
+Continue working on the flows. Check device status, inspect the UI hierarchy, fix selectors or timing issues, and re-run.
+Remember: all flows must pass 3 consecutive times before resolving.
+Use the reasoning protocol before each action.
+"""
+            elif args.web:
+                context = f"""
+## Iteration {iteration + 1}
+The Playwright tests are NOT yet passing reliably. You have full context from previous iterations.
+Continue working on the tests. Check selectors, timing, server status, and re-run.
+Make sure you are testing like a real human - use actual mouse gestures, not just button clicks.
+Remember: all tests must pass 3 consecutive times before resolving.
+Use the reasoning protocol before each action.
+"""
+            else:
+                context = f"""
 ## Iteration {iteration + 1}
 The bug is NOT yet resolved. You have full context from previous iterations.
@@ -1075,15 +1396,33 @@ Use the reasoning protocol before each action.
             # Max iterations reached for this task (for-else)
             print_warning_box(f"Max iterations ({args.max_iterations}) reached for current task")
             task_queue.mark_failed()
-            session_entry = summarize_session(bug_input, "MAX_ITERATIONS", args.max_iterations)
+            session_entry = summarize_session(bug_input, "MAX_ITERATIONS", args.max_iterations, task_token_tracker)
             history_data["sessions"].append(session_entry)
             save_history(working_dir, history_data)
+        # Aggregate task tokens into session-level tracker
+        if task_token_tracker.turn_count > 0:
+            session_token_tracker.total_input += task_token_tracker.total_input
+            session_token_tracker.total_output += task_token_tracker.total_output
+            session_token_tracker.turn_count += task_token_tracker.turn_count
+            if task_token_tracker.peak_total > session_token_tracker.peak_total:
+                session_token_tracker.peak_total = task_token_tracker.peak_total
+                session_token_tracker.peak_input = task_token_tracker.peak_input
+                session_token_tracker.peak_output = task_token_tracker.peak_output
+            for tool, stats in task_token_tracker.per_tool.items():
+                if tool not in session_token_tracker.per_tool:
+                    session_token_tracker.per_tool[tool] = {"input": 0, "output": 0, "count": 0}
+                session_token_tracker.per_tool[tool]["input"] += stats["input"]
+                session_token_tracker.per_tool[tool]["output"] += stats["output"]
+                session_token_tracker.per_tool[tool]["count"] += stats["count"]
         # Save iteration summary for this task
         LOG_DIR.mkdir(exist_ok=True)
         summary = f"# DaveLoop Session {session_id}\n\n"
         summary += f"Bug: {bug_input[:200]}...\n\n"
         summary += f"Iterations: {len(iteration_history)}\n\n"
+        if task_token_tracker.turn_count > 0:
+            summary += f"Token Usage: {task_token_tracker.summary_line()}\n\n"
         summary += "## Iteration History\n\n"
         for i, hist in enumerate(iteration_history, 1):
             summary += f"### Iteration {i}\n```\n{hist[:500]}...\n```\n\n"
@@ -1105,6 +1444,16 @@ Use the reasoning protocol before each action.
             print(f"  {C.DIM}○ {desc}{C.RESET}")
     print()
+    # Print session-wide token usage
+    if session_token_tracker.turn_count > 0:
+        print(f"  {C.BRIGHT_CYAN}⊛ {session_token_tracker.summary_line()}{C.RESET}")
+        if session_token_tracker.per_tool:
+            print(f"  {C.DIM}  Per tool:{C.RESET}")
+            for tool, stats in sorted(session_token_tracker.per_tool.items(), key=lambda x: x[1]["input"] + x[1]["output"], reverse=True):
+                tool_total = stats["input"] + stats["output"]
+                print(f"  {C.DIM}    {tool}: {stats['input']:,} in / {stats['output']:,} out / {tool_total:,} total ({stats['count']} calls){C.RESET}")
+        print()
     print(f"  {C.DIM}Session: {session_id}{C.RESET}")
     print(f"  {C.DIM}Logs: {LOG_DIR}{C.RESET}\n")

daveloop 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

daveloop 1.3.0py3-none-any.whl → 1.5.0py3-none-any.whl