daveloop 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
daveloop.py CHANGED
@@ -20,6 +20,8 @@ MAX_ITERATIONS = 20
20
20
  DEFAULT_TIMEOUT = 600 # 10 minutes in seconds
21
21
  SCRIPT_DIR = Path(__file__).parent
22
22
  PROMPT_FILE = SCRIPT_DIR / "daveloop_prompt.md"
23
+ MAESTRO_PROMPT_FILE = SCRIPT_DIR / "daveloop_maestro_prompt.md"
24
+ WEB_PROMPT_FILE = SCRIPT_DIR / "daveloop_web_prompt.md"
23
25
  LOG_DIR = SCRIPT_DIR / "logs"
24
26
 
25
27
  # Exit signals from Claude Code
@@ -27,6 +29,12 @@ SIGNAL_RESOLVED = "[DAVELOOP:RESOLVED]"
27
29
  SIGNAL_BLOCKED = "[DAVELOOP:BLOCKED]"
28
30
  SIGNAL_CLARIFY = "[DAVELOOP:CLARIFY]"
29
31
 
32
+ # Allowed tools for Claude Code CLI
33
+ # Default: no Task tool (prevents recursive sub-agent spawning)
34
+ ALLOWED_TOOLS_DEFAULT = "Bash,Read,Write,Edit,Glob,Grep"
35
+ # Swarm mode: Task tool enabled for controlled sub-agent spawning
36
+ ALLOWED_TOOLS_SWARM = "Bash,Read,Write,Edit,Glob,Grep,Task"
37
+
30
38
  # ============================================================================
31
39
  # ANSI Color Codes
32
40
  # ============================================================================
@@ -266,6 +274,128 @@ class TaskQueue:
266
274
  print()
267
275
 
268
276
 
277
+ # ============================================================================
278
+ # Swarm Budget
279
+ # ============================================================================
280
+ class SwarmBudget:
281
+ """Tracks and enforces sub-agent spawn budget for swarm mode."""
282
+
283
+ def __init__(self, max_spawns: int = 5, max_depth: int = 1):
284
+ self.max_spawns = max_spawns
285
+ self.max_depth = max_depth
286
+ self.spawn_count = 0
287
+ self.active_agents = 0
288
+ self.completed_agents = 0
289
+
290
+ def can_spawn(self) -> bool:
291
+ """Check if spawning another sub-agent is within budget."""
292
+ return self.spawn_count < self.max_spawns
293
+
294
+ def record_spawn(self, description: str):
295
+ """Record a sub-agent spawn."""
296
+ self.spawn_count += 1
297
+ self.active_agents += 1
298
+ print(f" {C.BRIGHT_CYAN}[Swarm]{C.RESET} Sub-agent {self.spawn_count}/{self.max_spawns}: {description}")
299
+
300
+ def record_completion(self):
301
+ """Record a sub-agent completion."""
302
+ self.active_agents -= 1
303
+ self.completed_agents += 1
304
+
305
+ def budget_exhausted_message(self) -> str:
306
+ """Return message when budget is exhausted."""
307
+ return (
308
+ f"Sub-agent budget exhausted ({self.spawn_count}/{self.max_spawns}). "
309
+ f"Complete remaining work directly without spawning more sub-agents."
310
+ )
311
+
312
+ def summary(self) -> dict:
313
+ """Return budget tracking summary."""
314
+ return {
315
+ "total_spawned": self.spawn_count,
316
+ "completed": self.completed_agents,
317
+ "budget": self.max_spawns,
318
+ }
319
+
320
+
321
+ # ============================================================================
322
+ # Token Tracker
323
+ # ============================================================================
324
+ class TokenTracker:
325
+ """Tracks token usage across API turns in a DaveLoop session."""
326
+
327
+ def __init__(self):
328
+ self.total_input = 0
329
+ self.total_output = 0
330
+ self.turn_count = 0
331
+ self.peak_input = 0
332
+ self.peak_output = 0
333
+ self.peak_total = 0
334
+ self.per_tool = {} # tool_name -> {"input": int, "output": int, "count": int}
335
+ self._current_tool = None # Track which tool is active for per-tool attribution
336
+ self._turn_input = 0 # Accumulate within a turn for per-tool attribution
337
+ self._turn_output = 0
338
+
339
+ def set_current_tool(self, tool_name: str):
340
+ """Set the currently active tool for per-tool token attribution."""
341
+ self._current_tool = tool_name
342
+
343
+ def record_usage(self, input_tokens: int, output_tokens: int):
344
+ """Record token usage from an API turn."""
345
+ self.total_input += input_tokens
346
+ self.total_output += output_tokens
347
+ self.turn_count += 1
348
+
349
+ turn_total = input_tokens + output_tokens
350
+ if turn_total > self.peak_total:
351
+ self.peak_total = turn_total
352
+ self.peak_input = input_tokens
353
+ self.peak_output = output_tokens
354
+
355
+ # Attribute to current tool if one is active
356
+ if self._current_tool:
357
+ if self._current_tool not in self.per_tool:
358
+ self.per_tool[self._current_tool] = {"input": 0, "output": 0, "count": 0}
359
+ self.per_tool[self._current_tool]["input"] += input_tokens
360
+ self.per_tool[self._current_tool]["output"] += output_tokens
361
+ self.per_tool[self._current_tool]["count"] += 1
362
+
363
+ @property
364
+ def total_tokens(self) -> int:
365
+ return self.total_input + self.total_output
366
+
367
+ def summary(self) -> dict:
368
+ """Return a dict with all token stats."""
369
+ return {
370
+ "input_tokens": self.total_input,
371
+ "output_tokens": self.total_output,
372
+ "total_tokens": self.total_tokens,
373
+ "turn_count": self.turn_count,
374
+ "peak_turn": {
375
+ "input": self.peak_input,
376
+ "output": self.peak_output,
377
+ "total": self.peak_total,
378
+ },
379
+ "per_tool": dict(self.per_tool),
380
+ }
381
+
382
+ def summary_line(self) -> str:
383
+ """Return a one-line summary string for display."""
384
+ return (
385
+ f"Tokens: {self.total_input:,} in / {self.total_output:,} out / "
386
+ f"{self.total_tokens:,} total ({self.turn_count} turns)"
387
+ )
388
+
389
+ def verbose_turn_line(self, input_tokens: int, output_tokens: int) -> str:
390
+ """Return a per-turn detail line for --show-tokens mode."""
391
+ total = input_tokens + output_tokens
392
+ tool_info = f" [{self._current_tool}]" if self._current_tool else ""
393
+ return (
394
+ f" Turn {self.turn_count}: {input_tokens:,} in / {output_tokens:,} out / "
395
+ f"{total:,} total{tool_info}"
396
+ )
397
+
398
+
269
399
  # ============================================================================
270
400
  # Session Memory
271
401
  # ============================================================================
@@ -292,16 +422,21 @@ def save_history(working_dir: str, history_data: dict):
292
422
  history_file.write_text(json.dumps(history_data, indent=2), encoding="utf-8")
293
423
 
294
424
 
295
- def summarize_session(bug: str, outcome: str, iterations: int) -> dict:
425
+ def summarize_session(bug: str, outcome: str, iterations: int, token_tracker: "TokenTracker" = None) -> dict:
296
426
  """Return a dict summarizing a session."""
297
427
  now = datetime.now()
298
- return {
428
+ entry = {
299
429
  "session_id": now.strftime("%Y%m%d_%H%M%S"),
300
430
  "bug": bug,
301
431
  "outcome": outcome,
302
432
  "iterations": iterations,
303
433
  "timestamp": now.isoformat()
304
434
  }
435
+ if token_tracker and token_tracker.turn_count > 0:
436
+ entry["tokens_in"] = token_tracker.total_input
437
+ entry["tokens_out"] = token_tracker.total_output
438
+ entry["tokens_total"] = token_tracker.total_tokens
439
+ return entry
305
440
 
306
441
 
307
442
  def format_history_context(sessions: list) -> str:
@@ -327,10 +462,12 @@ def print_history_box(sessions: list):
327
462
  outcome = s.get("outcome", "UNKNOWN")
328
463
  bug = s.get("bug", "unknown")[:55]
329
464
  iters = s.get("iterations", "?")
465
+ tokens_total = s.get("tokens_total")
466
+ token_str = f" · {tokens_total:,} tok" if tokens_total else ""
330
467
  if outcome == "RESOLVED":
331
- print(f" {C.BRIGHT_GREEN}✓{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter){C.RESET}")
468
+ print(f" {C.BRIGHT_GREEN}✓{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter{token_str}){C.RESET}")
332
469
  else:
333
- print(f" {C.BRIGHT_RED}✗{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter){C.RESET}")
470
+ print(f" {C.BRIGHT_RED}✗{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter{token_str}){C.RESET}")
334
471
  print()
335
472
 
336
473
 
@@ -424,7 +561,7 @@ class InputMonitor:
424
561
  Call resume_reading() after the main thread is done with input().
425
562
  """
426
563
 
427
- VALID_COMMANDS = ("wait", "pause", "add", "done")
564
+ VALID_COMMANDS = ("wait", "pause", "add", "done", "stop")
428
565
 
429
566
  def __init__(self):
430
567
  self._command = None
@@ -493,6 +630,24 @@ def load_prompt() -> str:
493
630
  return "You are debugging. Fix the bug. Output [DAVELOOP:RESOLVED] when done."
494
631
 
495
632
 
633
+ def load_maestro_prompt() -> str:
634
+ """Load the Maestro mobile testing prompt."""
635
+ if MAESTRO_PROMPT_FILE.exists():
636
+ return MAESTRO_PROMPT_FILE.read_text(encoding="utf-8")
637
+ else:
638
+ print_warning_box(f"Maestro prompt file not found: {MAESTRO_PROMPT_FILE}")
639
+ return None
640
+
641
+
642
+ def load_web_prompt() -> str:
643
+ """Load the Web UI testing prompt."""
644
+ if WEB_PROMPT_FILE.exists():
645
+ return WEB_PROMPT_FILE.read_text(encoding="utf-8")
646
+ else:
647
+ print_warning_box(f"Web prompt file not found: {WEB_PROMPT_FILE}")
648
+ return None
649
+
650
+
496
651
  def find_claude_cli():
497
652
  """Find Claude CLI executable path."""
498
653
  import platform
@@ -534,12 +689,17 @@ def find_claude_cli():
534
689
  return None
535
690
 
536
691
 
537
- def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True, timeout: int = DEFAULT_TIMEOUT, input_monitor=None) -> str:
692
+ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True, timeout: int = DEFAULT_TIMEOUT, input_monitor=None, swarm_mode: bool = False, swarm_budget_max: int = 5, swarm_depth_max: int = 1, token_tracker: "TokenTracker" = None, show_tokens: bool = False) -> str:
538
693
  """Execute Claude Code CLI with the given prompt.
539
694
 
540
695
  If stream=True, output is printed in real-time and also returned.
541
696
  timeout is in seconds (default 600 = 10 minutes).
542
697
  input_monitor: optional InputMonitor to check for user commands during execution.
698
+ swarm_mode: if True, enables Task tool for sub-agent spawning.
699
+ swarm_budget_max: max sub-agents per session in swarm mode.
700
+ swarm_depth_max: max sub-agent depth in swarm mode.
701
+ token_tracker: optional TokenTracker to accumulate token usage from the stream.
702
+ show_tokens: if True, print per-turn token usage during execution.
543
703
  """
544
704
  claude_cmd = find_claude_cli()
545
705
  if not claude_cmd:
@@ -558,7 +718,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
558
718
  if continue_session:
559
719
  cmd.append("--continue")
560
720
 
561
- cmd.extend(["-p", "--verbose", "--output-format", "stream-json", "--allowedTools", "Bash,Read,Write,Edit,Glob,Grep,Task"])
721
+ allowed = ALLOWED_TOOLS_SWARM if swarm_mode else ALLOWED_TOOLS_DEFAULT
722
+ cmd.extend(["-p", "--verbose", "--output-format", "stream-json", "--allowedTools", allowed])
562
723
 
563
724
  try:
564
725
  if stream:
@@ -582,6 +743,9 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
582
743
  # Track start time
583
744
  start_time = time.time()
584
745
 
746
+ # Swarm budget tracking (only active in swarm mode)
747
+ swarm_budget = SwarmBudget(max_spawns=swarm_budget_max, max_depth=swarm_depth_max) if swarm_mode else None
748
+
585
749
  # Read and display JSON stream output
586
750
  output_lines = []
587
751
  full_text = []
@@ -597,6 +761,19 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
597
761
  msg_type = data.get("type", "")
598
762
 
599
763
 
764
+ # Extract token usage from any message that has it
765
+ if token_tracker:
766
+ usage = (data.get("message", {}).get("usage")
767
+ or data.get("usage")
768
+ or None)
769
+ if usage and isinstance(usage, dict):
770
+ inp = usage.get("input_tokens", 0)
771
+ outp = usage.get("output_tokens", 0)
772
+ if inp or outp:
773
+ token_tracker.record_usage(inp, outp)
774
+ if show_tokens:
775
+ print(f" {C.DIM}{token_tracker.verbose_turn_line(inp, outp)}{C.RESET}")
776
+
600
777
  # Handle different message types
601
778
  if msg_type == "assistant":
602
779
  # Assistant text message
@@ -611,6 +788,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
611
788
  elif block.get("type") == "tool_use":
612
789
  # Tool being called - show what Claude is doing
613
790
  tool_name = block.get("name", "unknown")
791
+ if token_tracker:
792
+ token_tracker.set_current_tool(tool_name)
614
793
  tool_input = block.get("input", {})
615
794
 
616
795
  # Format tool call based on type
@@ -640,6 +819,18 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
640
819
  elif tool_name == "Task":
641
820
  desc = tool_input.get("description", "")
642
821
  tool_display = f"{C.BRIGHT_BLUE}Task{C.RESET}({C.WHITE}{desc}{C.RESET})"
822
+ # Swarm budget enforcement
823
+ if swarm_budget:
824
+ if not swarm_budget.can_spawn():
825
+ print(f" {C.BRIGHT_YELLOW}[Swarm] Budget exhausted. Terminating to restart without Task tool.{C.RESET}")
826
+ process.terminate()
827
+ try:
828
+ process.wait(timeout=10)
829
+ except Exception:
830
+ process.kill()
831
+ return '\n'.join(full_text) + "\n[DAVELOOP:SWARM_BUDGET_EXHAUSTED]"
832
+ else:
833
+ swarm_budget.record_spawn(desc)
643
834
  else:
644
835
  tool_display = f"{C.BRIGHT_BLUE}{tool_name}{C.RESET}"
645
836
 
@@ -657,6 +848,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
657
848
  elif msg_type == "tool_use":
658
849
  # Tool being used - show what Claude is doing
659
850
  tool_name = data.get("name", "unknown")
851
+ if token_tracker:
852
+ token_tracker.set_current_tool(tool_name)
660
853
  tool_input = data.get("input", {})
661
854
 
662
855
  # Format tool call based on type
@@ -686,6 +879,18 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
686
879
  elif tool_name == "Task":
687
880
  desc = tool_input.get("description", "")
688
881
  tool_display = f"{C.BRIGHT_BLUE}Task{C.RESET}({C.WHITE}{desc}{C.RESET})"
882
+ # Swarm budget enforcement
883
+ if swarm_budget:
884
+ if not swarm_budget.can_spawn():
885
+ print(f" {C.BRIGHT_YELLOW}[Swarm] Budget exhausted. Terminating to restart without Task tool.{C.RESET}")
886
+ process.terminate()
887
+ try:
888
+ process.wait(timeout=10)
889
+ except Exception:
890
+ process.kill()
891
+ return '\n'.join(full_text) + "\n[DAVELOOP:SWARM_BUDGET_EXHAUSTED]"
892
+ else:
893
+ swarm_budget.record_spawn(desc)
689
894
  else:
690
895
  tool_display = f"{C.BRIGHT_BLUE}{tool_name}{C.RESET}"
691
896
 
@@ -837,6 +1042,16 @@ def main():
837
1042
  parser.add_argument("-t", "--timeout", type=int, default=DEFAULT_TIMEOUT,
838
1043
  help="Timeout per iteration in seconds (default: 600)")
839
1044
  parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
1045
+ parser.add_argument("--maestro", action="store_true", help="Enable Maestro mobile testing mode")
1046
+ parser.add_argument("--web", action="store_true", help="Enable Playwright web UI testing mode")
1047
+ parser.add_argument("--swarm", action="store_true",
1048
+ help="Enable swarm mode: DaveLoop can spawn sub-agents via Task tool")
1049
+ parser.add_argument("--swarm-budget", type=int, default=5,
1050
+ help="Max sub-agents per DaveLoop worker in swarm mode (default: 5)")
1051
+ parser.add_argument("--swarm-depth", type=int, default=1, choices=[1, 2],
1052
+ help="Max sub-agent depth in swarm mode (default: 1, no recursive spawning)")
1053
+ parser.add_argument("--show-tokens", action="store_true",
1054
+ help="Show verbose per-turn token usage during execution")
840
1055
 
841
1056
  args = parser.parse_args()
842
1057
 
@@ -863,6 +1078,14 @@ def main():
863
1078
  # Setup
864
1079
  session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
865
1080
  system_prompt = load_prompt()
1081
+ if args.maestro:
1082
+ maestro_prompt = load_maestro_prompt()
1083
+ if maestro_prompt:
1084
+ system_prompt = system_prompt + "\n\n---\n\n" + maestro_prompt
1085
+ elif args.web:
1086
+ web_prompt = load_web_prompt()
1087
+ if web_prompt:
1088
+ system_prompt = system_prompt + "\n\n---\n\n" + web_prompt
866
1089
  working_dir = args.dir or os.getcwd()
867
1090
 
868
1091
  # Load session history
@@ -876,7 +1099,13 @@ def main():
876
1099
  print_status("Iterations", str(args.max_iterations), C.WHITE)
877
1100
  print_status("Timeout", f"{args.timeout // 60}m per iteration", C.WHITE)
878
1101
  print_status("Tasks", str(len(bug_descriptions)), C.WHITE)
879
- print_status("Mode", "Autonomous", C.WHITE)
1102
+ mode_name = "Maestro Mobile Testing" if args.maestro else "Playwright Web Testing" if args.web else "Autonomous"
1103
+ print_status("Mode", mode_name, C.WHITE)
1104
+ if args.swarm:
1105
+ print_status("Swarm", f"ENABLED (budget: {args.swarm_budget}, depth: {args.swarm_depth})", C.BRIGHT_CYAN)
1106
+ print_status("Tools", ALLOWED_TOOLS_SWARM, C.WHITE)
1107
+ else:
1108
+ print_status("Tools", ALLOWED_TOOLS_DEFAULT, C.WHITE)
880
1109
  print(f"{C.BRIGHT_BLUE}└{'─' * 70}┘{C.RESET}")
881
1110
 
882
1111
  # Build task queue
@@ -886,7 +1115,7 @@ def main():
886
1115
 
887
1116
  # Print controls hint
888
1117
  print(f"\n{C.BRIGHT_BLUE}{C.BOLD}┌─ CONTROLS {'─' * 58}┐{C.RESET}")
889
- print(f"{C.BRIGHT_BLUE}│{C.RESET} Type while running: {C.BRIGHT_WHITE}wait{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}pause{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}add{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}done{C.RESET} {C.BRIGHT_BLUE}│{C.RESET}")
1118
+ print(f"{C.BRIGHT_BLUE}│{C.RESET} Type while running: {C.BRIGHT_WHITE}wait{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}pause{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}add{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}done{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}stop{C.RESET} {C.BRIGHT_BLUE}│{C.RESET}")
890
1119
  print(f"{C.BRIGHT_BLUE}└{'─' * 70}┘{C.RESET}")
891
1120
 
892
1121
  # Start input monitor
@@ -898,6 +1127,9 @@ def main():
898
1127
  if history_data["sessions"]:
899
1128
  history_context = "\n\n" + format_history_context(history_data["sessions"])
900
1129
 
1130
+ # Session-wide token tracking (aggregates across all tasks)
1131
+ session_token_tracker = TokenTracker()
1132
+
901
1133
  # === OUTER LOOP: iterate over tasks ===
902
1134
  while True:
903
1135
  task = task_queue.next()
@@ -907,15 +1139,60 @@ def main():
907
1139
  bug_input = task["description"]
908
1140
  task_queue.summary_display()
909
1141
 
910
- print_section("BUG REPORT", C.BRIGHT_RED)
1142
+ if args.maestro:
1143
+ print_section("MAESTRO TASK", C.BRIGHT_CYAN)
1144
+ section_color = C.BRIGHT_CYAN
1145
+ elif args.web:
1146
+ print_section("WEB UI TASK", C.BRIGHT_MAGENTA)
1147
+ section_color = C.BRIGHT_MAGENTA
1148
+ else:
1149
+ print_section("BUG REPORT", C.BRIGHT_RED)
1150
+ section_color = C.BRIGHT_RED
911
1151
  for line in bug_input.split('\n')[:8]:
912
- print(f" {C.BRIGHT_RED}{line[:70]}{C.RESET}")
1152
+ print(f" {section_color}{line[:70]}{C.RESET}")
913
1153
  if len(bug_input.split('\n')) > 8:
914
- print(f" {C.RED}... +{len(bug_input.split(chr(10))) - 8} more lines{C.RESET}")
1154
+ print(f" {section_color}... +{len(bug_input.split(chr(10))) - 8} more lines{C.RESET}")
915
1155
  sys.stdout.flush()
916
1156
 
917
1157
  # Initial context for this task
918
- context = f"""
1158
+ if args.maestro:
1159
+ context = f"""
1160
+ ## Maestro Mobile Testing Task
1161
+
1162
+ {bug_input}
1163
+ {history_context}
1164
+
1165
+ ## Instructions
1166
+
1167
+ 1. First, detect connected devices/emulators (run `adb devices` and/or `xcrun simctl list devices available`)
1168
+ 2. If no device is found, auto-launch an emulator/simulator
1169
+ 3. Ensure the target app is installed on the device
1170
+ 4. Proceed with the Maestro testing task described above
1171
+ 5. Before declaring success, verify by running the flow(s) 3 consecutive times - all must pass
1172
+
1173
+ Use the reasoning protocol before each action.
1174
+ """
1175
+ elif args.web:
1176
+ context = f"""
1177
+ ## Web UI Testing Task
1178
+
1179
+ {bug_input}
1180
+ {history_context}
1181
+
1182
+ ## Instructions
1183
+
1184
+ 1. First, explore the project to detect the framework and find the dev server command
1185
+ 2. Install Playwright if not already installed (`npm install -D @playwright/test && npx playwright install chromium`)
1186
+ 3. Start the dev server if not already running
1187
+ 4. Read the source code to understand the UI components, especially any gesture/drag/interactive elements
1188
+ 5. Write Playwright tests in an `e2e/` directory that test the app like a real human would - use actual mouse movements, drags, clicks, hovers, keyboard input
1189
+ 6. Test gestures and buttons SEPARATELY - a working button does not prove the gesture works
1190
+ 7. Before declaring success, verify by running the tests 3 consecutive times - all must pass
1191
+
1192
+ Use the reasoning protocol before each action.
1193
+ """
1194
+ else:
1195
+ context = f"""
919
1196
  ## Bug Report
920
1197
 
921
1198
  {bug_input}
@@ -928,6 +1205,7 @@ Then fix it. Use the reasoning protocol before each action.
928
1205
  """
929
1206
 
930
1207
  iteration_history = []
1208
+ task_token_tracker = TokenTracker()
931
1209
 
932
1210
  # === INNER LOOP: iterations for current task ===
933
1211
  for iteration in range(1, args.max_iterations + 1):
@@ -951,11 +1229,20 @@ Then fix it. Use the reasoning protocol before each action.
951
1229
  full_prompt, working_dir,
952
1230
  continue_session=continue_session,
953
1231
  stream=True, timeout=args.timeout,
954
- input_monitor=input_monitor
1232
+ input_monitor=input_monitor,
1233
+ swarm_mode=args.swarm,
1234
+ swarm_budget_max=args.swarm_budget,
1235
+ swarm_depth_max=args.swarm_depth,
1236
+ token_tracker=task_token_tracker,
1237
+ show_tokens=args.show_tokens
955
1238
  )
956
1239
 
957
1240
  print(f"\n{C.BRIGHT_BLUE} {'─' * 70}{C.RESET}")
958
1241
 
1242
+ # Print token usage summary for this iteration
1243
+ if task_token_tracker.turn_count > 0:
1244
+ print(f" {C.BRIGHT_CYAN}⊛ {task_token_tracker.summary_line()}{C.RESET}")
1245
+
959
1246
  # Save log
960
1247
  save_log(iteration, output, session_id)
961
1248
  iteration_history.append(output)
@@ -1009,22 +1296,34 @@ Continue the current debugging task. Use the reasoning protocol before each acti
1009
1296
  elif user_cmd == "done":
1010
1297
  # Clean exit
1011
1298
  input_monitor.stop()
1012
- session_entry = summarize_session(bug_input, "DONE_BY_USER", iteration)
1299
+ session_entry = summarize_session(bug_input, "DONE_BY_USER", iteration, task_token_tracker)
1013
1300
  history_data["sessions"].append(session_entry)
1014
1301
  save_history(working_dir, history_data)
1015
1302
  print(f"\n {C.GREEN}✓{C.RESET} Session saved. Exiting by user request.")
1016
1303
  return 0
1017
1304
 
1305
+ elif user_cmd == "stop":
1306
+ # Boris-commanded stop - terminate this iteration immediately
1307
+ print(f"\n {C.BRIGHT_RED}{C.BOLD} ■ STOPPED BY BORIS{C.RESET}")
1308
+ print(f"{C.BRIGHT_RED} {'─' * 70}{C.RESET}")
1309
+ input_monitor.stop()
1310
+ session_entry = summarize_session(bug_input, "STOPPED_BY_BORIS", iteration, task_token_tracker)
1311
+ history_data["sessions"].append(session_entry)
1312
+ save_history(working_dir, history_data)
1313
+ return 1
1314
+
1018
1315
  # Check exit condition
1019
1316
  signal, should_exit = check_exit_condition(output)
1020
1317
 
1021
1318
  if should_exit:
1022
1319
  if signal == "RESOLVED":
1023
1320
  print_success_box("")
1321
+ if task_token_tracker.turn_count > 0:
1322
+ print(f" {C.BRIGHT_CYAN}⊛ {task_token_tracker.summary_line()}{C.RESET}")
1024
1323
  print(f" {C.DIM}Session: {session_id}{C.RESET}")
1025
1324
  print(f" {C.DIM}Logs: {LOG_DIR}{C.RESET}\n")
1026
1325
  task_queue.mark_done()
1027
- session_entry = summarize_session(bug_input, "RESOLVED", iteration)
1326
+ session_entry = summarize_session(bug_input, "RESOLVED", iteration, task_token_tracker)
1028
1327
  history_data["sessions"].append(session_entry)
1029
1328
  save_history(working_dir, history_data)
1030
1329
  break # Move to next task
@@ -1050,20 +1349,42 @@ Continue debugging with this information. Use the reasoning protocol before each
1050
1349
  print_status("Logs", str(LOG_DIR), C.WHITE)
1051
1350
  print()
1052
1351
  task_queue.mark_failed()
1053
- session_entry = summarize_session(bug_input, "BLOCKED", iteration)
1352
+ session_entry = summarize_session(bug_input, "BLOCKED", iteration, task_token_tracker)
1054
1353
  history_data["sessions"].append(session_entry)
1055
1354
  save_history(working_dir, history_data)
1056
1355
  break # Move to next task
1057
1356
  else:
1058
1357
  print_error_box(f"Error occurred: {signal}")
1059
1358
  task_queue.mark_failed()
1060
- session_entry = summarize_session(bug_input, "ERROR", iteration)
1359
+ session_entry = summarize_session(bug_input, "ERROR", iteration, task_token_tracker)
1061
1360
  history_data["sessions"].append(session_entry)
1062
1361
  save_history(working_dir, history_data)
1063
1362
  break # Move to next task
1064
1363
 
1065
1364
  # Prepare context for next iteration
1066
- context = f"""
1365
+ if args.maestro:
1366
+ context = f"""
1367
+ ## Iteration {iteration + 1}
1368
+
1369
+ The Maestro flow(s) are NOT yet passing reliably. You have full context from previous iterations.
1370
+
1371
+ Continue working on the flows. Check device status, inspect the UI hierarchy, fix selectors or timing issues, and re-run.
1372
+ Remember: all flows must pass 3 consecutive times before resolving.
1373
+ Use the reasoning protocol before each action.
1374
+ """
1375
+ elif args.web:
1376
+ context = f"""
1377
+ ## Iteration {iteration + 1}
1378
+
1379
+ The Playwright tests are NOT yet passing reliably. You have full context from previous iterations.
1380
+
1381
+ Continue working on the tests. Check selectors, timing, server status, and re-run.
1382
+ Make sure you are testing like a real human - use actual mouse gestures, not just button clicks.
1383
+ Remember: all tests must pass 3 consecutive times before resolving.
1384
+ Use the reasoning protocol before each action.
1385
+ """
1386
+ else:
1387
+ context = f"""
1067
1388
  ## Iteration {iteration + 1}
1068
1389
 
1069
1390
  The bug is NOT yet resolved. You have full context from previous iterations.
@@ -1075,15 +1396,33 @@ Use the reasoning protocol before each action.
1075
1396
  # Max iterations reached for this task (for-else)
1076
1397
  print_warning_box(f"Max iterations ({args.max_iterations}) reached for current task")
1077
1398
  task_queue.mark_failed()
1078
- session_entry = summarize_session(bug_input, "MAX_ITERATIONS", args.max_iterations)
1399
+ session_entry = summarize_session(bug_input, "MAX_ITERATIONS", args.max_iterations, task_token_tracker)
1079
1400
  history_data["sessions"].append(session_entry)
1080
1401
  save_history(working_dir, history_data)
1081
1402
 
1403
+ # Aggregate task tokens into session-level tracker
1404
+ if task_token_tracker.turn_count > 0:
1405
+ session_token_tracker.total_input += task_token_tracker.total_input
1406
+ session_token_tracker.total_output += task_token_tracker.total_output
1407
+ session_token_tracker.turn_count += task_token_tracker.turn_count
1408
+ if task_token_tracker.peak_total > session_token_tracker.peak_total:
1409
+ session_token_tracker.peak_total = task_token_tracker.peak_total
1410
+ session_token_tracker.peak_input = task_token_tracker.peak_input
1411
+ session_token_tracker.peak_output = task_token_tracker.peak_output
1412
+ for tool, stats in task_token_tracker.per_tool.items():
1413
+ if tool not in session_token_tracker.per_tool:
1414
+ session_token_tracker.per_tool[tool] = {"input": 0, "output": 0, "count": 0}
1415
+ session_token_tracker.per_tool[tool]["input"] += stats["input"]
1416
+ session_token_tracker.per_tool[tool]["output"] += stats["output"]
1417
+ session_token_tracker.per_tool[tool]["count"] += stats["count"]
1418
+
1082
1419
  # Save iteration summary for this task
1083
1420
  LOG_DIR.mkdir(exist_ok=True)
1084
1421
  summary = f"# DaveLoop Session {session_id}\n\n"
1085
1422
  summary += f"Bug: {bug_input[:200]}...\n\n"
1086
1423
  summary += f"Iterations: {len(iteration_history)}\n\n"
1424
+ if task_token_tracker.turn_count > 0:
1425
+ summary += f"Token Usage: {task_token_tracker.summary_line()}\n\n"
1087
1426
  summary += "## Iteration History\n\n"
1088
1427
  for i, hist in enumerate(iteration_history, 1):
1089
1428
  summary += f"### Iteration {i}\n```\n{hist[:500]}...\n```\n\n"
@@ -1105,6 +1444,16 @@ Use the reasoning protocol before each action.
1105
1444
  print(f" {C.DIM}○ {desc}{C.RESET}")
1106
1445
  print()
1107
1446
 
1447
+ # Print session-wide token usage
1448
+ if session_token_tracker.turn_count > 0:
1449
+ print(f" {C.BRIGHT_CYAN}⊛ {session_token_tracker.summary_line()}{C.RESET}")
1450
+ if session_token_tracker.per_tool:
1451
+ print(f" {C.DIM} Per tool:{C.RESET}")
1452
+ for tool, stats in sorted(session_token_tracker.per_tool.items(), key=lambda x: x[1]["input"] + x[1]["output"], reverse=True):
1453
+ tool_total = stats["input"] + stats["output"]
1454
+ print(f" {C.DIM} {tool}: {stats['input']:,} in / {stats['output']:,} out / {tool_total:,} total ({stats['count']} calls){C.RESET}")
1455
+ print()
1456
+
1108
1457
  print(f" {C.DIM}Session: {session_id}{C.RESET}")
1109
1458
  print(f" {C.DIM}Logs: {LOG_DIR}{C.RESET}\n")
1110
1459