daveloop 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- daveloop-1.5.0.dist-info/METADATA +392 -0
- daveloop-1.5.0.dist-info/RECORD +7 -0
- {daveloop-1.3.0.dist-info → daveloop-1.5.0.dist-info}/WHEEL +1 -1
- daveloop.py +369 -20
- daveloop-1.3.0.dist-info/METADATA +0 -391
- daveloop-1.3.0.dist-info/RECORD +0 -7
- {daveloop-1.3.0.dist-info → daveloop-1.5.0.dist-info}/entry_points.txt +0 -0
- {daveloop-1.3.0.dist-info → daveloop-1.5.0.dist-info}/top_level.txt +0 -0
daveloop.py
CHANGED
|
@@ -20,6 +20,8 @@ MAX_ITERATIONS = 20
|
|
|
20
20
|
DEFAULT_TIMEOUT = 600 # 10 minutes in seconds
|
|
21
21
|
SCRIPT_DIR = Path(__file__).parent
|
|
22
22
|
PROMPT_FILE = SCRIPT_DIR / "daveloop_prompt.md"
|
|
23
|
+
MAESTRO_PROMPT_FILE = SCRIPT_DIR / "daveloop_maestro_prompt.md"
|
|
24
|
+
WEB_PROMPT_FILE = SCRIPT_DIR / "daveloop_web_prompt.md"
|
|
23
25
|
LOG_DIR = SCRIPT_DIR / "logs"
|
|
24
26
|
|
|
25
27
|
# Exit signals from Claude Code
|
|
@@ -27,6 +29,12 @@ SIGNAL_RESOLVED = "[DAVELOOP:RESOLVED]"
|
|
|
27
29
|
SIGNAL_BLOCKED = "[DAVELOOP:BLOCKED]"
|
|
28
30
|
SIGNAL_CLARIFY = "[DAVELOOP:CLARIFY]"
|
|
29
31
|
|
|
32
|
+
# Allowed tools for Claude Code CLI
|
|
33
|
+
# Default: no Task tool (prevents recursive sub-agent spawning)
|
|
34
|
+
ALLOWED_TOOLS_DEFAULT = "Bash,Read,Write,Edit,Glob,Grep"
|
|
35
|
+
# Swarm mode: Task tool enabled for controlled sub-agent spawning
|
|
36
|
+
ALLOWED_TOOLS_SWARM = "Bash,Read,Write,Edit,Glob,Grep,Task"
|
|
37
|
+
|
|
30
38
|
# ============================================================================
|
|
31
39
|
# ANSI Color Codes
|
|
32
40
|
# ============================================================================
|
|
@@ -266,6 +274,128 @@ class TaskQueue:
|
|
|
266
274
|
print()
|
|
267
275
|
|
|
268
276
|
|
|
277
|
+
# ============================================================================
|
|
278
|
+
# Swarm Budget
|
|
279
|
+
# ============================================================================
|
|
280
|
+
class SwarmBudget:
|
|
281
|
+
"""Tracks and enforces sub-agent spawn budget for swarm mode."""
|
|
282
|
+
|
|
283
|
+
def __init__(self, max_spawns: int = 5, max_depth: int = 1):
|
|
284
|
+
self.max_spawns = max_spawns
|
|
285
|
+
self.max_depth = max_depth
|
|
286
|
+
self.spawn_count = 0
|
|
287
|
+
self.active_agents = 0
|
|
288
|
+
self.completed_agents = 0
|
|
289
|
+
|
|
290
|
+
def can_spawn(self) -> bool:
|
|
291
|
+
"""Check if spawning another sub-agent is within budget."""
|
|
292
|
+
return self.spawn_count < self.max_spawns
|
|
293
|
+
|
|
294
|
+
def record_spawn(self, description: str):
|
|
295
|
+
"""Record a sub-agent spawn."""
|
|
296
|
+
self.spawn_count += 1
|
|
297
|
+
self.active_agents += 1
|
|
298
|
+
print(f" {C.BRIGHT_CYAN}[Swarm]{C.RESET} Sub-agent {self.spawn_count}/{self.max_spawns}: {description}")
|
|
299
|
+
|
|
300
|
+
def record_completion(self):
|
|
301
|
+
"""Record a sub-agent completion."""
|
|
302
|
+
self.active_agents -= 1
|
|
303
|
+
self.completed_agents += 1
|
|
304
|
+
|
|
305
|
+
def budget_exhausted_message(self) -> str:
|
|
306
|
+
"""Return message when budget is exhausted."""
|
|
307
|
+
return (
|
|
308
|
+
f"Sub-agent budget exhausted ({self.spawn_count}/{self.max_spawns}). "
|
|
309
|
+
f"Complete remaining work directly without spawning more sub-agents."
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
def summary(self) -> dict:
|
|
313
|
+
"""Return budget tracking summary."""
|
|
314
|
+
return {
|
|
315
|
+
"total_spawned": self.spawn_count,
|
|
316
|
+
"completed": self.completed_agents,
|
|
317
|
+
"budget": self.max_spawns,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
# ============================================================================
|
|
322
|
+
# Token Tracker
|
|
323
|
+
# ============================================================================
|
|
324
|
+
class TokenTracker:
|
|
325
|
+
"""Tracks token usage across API turns in a DaveLoop session."""
|
|
326
|
+
|
|
327
|
+
def __init__(self):
|
|
328
|
+
self.total_input = 0
|
|
329
|
+
self.total_output = 0
|
|
330
|
+
self.turn_count = 0
|
|
331
|
+
self.peak_input = 0
|
|
332
|
+
self.peak_output = 0
|
|
333
|
+
self.peak_total = 0
|
|
334
|
+
self.per_tool = {} # tool_name -> {"input": int, "output": int, "count": int}
|
|
335
|
+
self._current_tool = None # Track which tool is active for per-tool attribution
|
|
336
|
+
self._turn_input = 0 # Accumulate within a turn for per-tool attribution
|
|
337
|
+
self._turn_output = 0
|
|
338
|
+
|
|
339
|
+
def set_current_tool(self, tool_name: str):
|
|
340
|
+
"""Set the currently active tool for per-tool token attribution."""
|
|
341
|
+
self._current_tool = tool_name
|
|
342
|
+
|
|
343
|
+
def record_usage(self, input_tokens: int, output_tokens: int):
|
|
344
|
+
"""Record token usage from an API turn."""
|
|
345
|
+
self.total_input += input_tokens
|
|
346
|
+
self.total_output += output_tokens
|
|
347
|
+
self.turn_count += 1
|
|
348
|
+
|
|
349
|
+
turn_total = input_tokens + output_tokens
|
|
350
|
+
if turn_total > self.peak_total:
|
|
351
|
+
self.peak_total = turn_total
|
|
352
|
+
self.peak_input = input_tokens
|
|
353
|
+
self.peak_output = output_tokens
|
|
354
|
+
|
|
355
|
+
# Attribute to current tool if one is active
|
|
356
|
+
if self._current_tool:
|
|
357
|
+
if self._current_tool not in self.per_tool:
|
|
358
|
+
self.per_tool[self._current_tool] = {"input": 0, "output": 0, "count": 0}
|
|
359
|
+
self.per_tool[self._current_tool]["input"] += input_tokens
|
|
360
|
+
self.per_tool[self._current_tool]["output"] += output_tokens
|
|
361
|
+
self.per_tool[self._current_tool]["count"] += 1
|
|
362
|
+
|
|
363
|
+
@property
|
|
364
|
+
def total_tokens(self) -> int:
|
|
365
|
+
return self.total_input + self.total_output
|
|
366
|
+
|
|
367
|
+
def summary(self) -> dict:
|
|
368
|
+
"""Return a dict with all token stats."""
|
|
369
|
+
return {
|
|
370
|
+
"input_tokens": self.total_input,
|
|
371
|
+
"output_tokens": self.total_output,
|
|
372
|
+
"total_tokens": self.total_tokens,
|
|
373
|
+
"turn_count": self.turn_count,
|
|
374
|
+
"peak_turn": {
|
|
375
|
+
"input": self.peak_input,
|
|
376
|
+
"output": self.peak_output,
|
|
377
|
+
"total": self.peak_total,
|
|
378
|
+
},
|
|
379
|
+
"per_tool": dict(self.per_tool),
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
def summary_line(self) -> str:
|
|
383
|
+
"""Return a one-line summary string for display."""
|
|
384
|
+
return (
|
|
385
|
+
f"Tokens: {self.total_input:,} in / {self.total_output:,} out / "
|
|
386
|
+
f"{self.total_tokens:,} total ({self.turn_count} turns)"
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
def verbose_turn_line(self, input_tokens: int, output_tokens: int) -> str:
|
|
390
|
+
"""Return a per-turn detail line for --show-tokens mode."""
|
|
391
|
+
total = input_tokens + output_tokens
|
|
392
|
+
tool_info = f" [{self._current_tool}]" if self._current_tool else ""
|
|
393
|
+
return (
|
|
394
|
+
f" Turn {self.turn_count}: {input_tokens:,} in / {output_tokens:,} out / "
|
|
395
|
+
f"{total:,} total{tool_info}"
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
|
|
269
399
|
# ============================================================================
|
|
270
400
|
# Session Memory
|
|
271
401
|
# ============================================================================
|
|
@@ -292,16 +422,21 @@ def save_history(working_dir: str, history_data: dict):
|
|
|
292
422
|
history_file.write_text(json.dumps(history_data, indent=2), encoding="utf-8")
|
|
293
423
|
|
|
294
424
|
|
|
295
|
-
def summarize_session(bug: str, outcome: str, iterations: int) -> dict:
|
|
425
|
+
def summarize_session(bug: str, outcome: str, iterations: int, token_tracker: "TokenTracker" = None) -> dict:
|
|
296
426
|
"""Return a dict summarizing a session."""
|
|
297
427
|
now = datetime.now()
|
|
298
|
-
|
|
428
|
+
entry = {
|
|
299
429
|
"session_id": now.strftime("%Y%m%d_%H%M%S"),
|
|
300
430
|
"bug": bug,
|
|
301
431
|
"outcome": outcome,
|
|
302
432
|
"iterations": iterations,
|
|
303
433
|
"timestamp": now.isoformat()
|
|
304
434
|
}
|
|
435
|
+
if token_tracker and token_tracker.turn_count > 0:
|
|
436
|
+
entry["tokens_in"] = token_tracker.total_input
|
|
437
|
+
entry["tokens_out"] = token_tracker.total_output
|
|
438
|
+
entry["tokens_total"] = token_tracker.total_tokens
|
|
439
|
+
return entry
|
|
305
440
|
|
|
306
441
|
|
|
307
442
|
def format_history_context(sessions: list) -> str:
|
|
@@ -327,10 +462,12 @@ def print_history_box(sessions: list):
|
|
|
327
462
|
outcome = s.get("outcome", "UNKNOWN")
|
|
328
463
|
bug = s.get("bug", "unknown")[:55]
|
|
329
464
|
iters = s.get("iterations", "?")
|
|
465
|
+
tokens_total = s.get("tokens_total")
|
|
466
|
+
token_str = f" · {tokens_total:,} tok" if tokens_total else ""
|
|
330
467
|
if outcome == "RESOLVED":
|
|
331
|
-
print(f" {C.BRIGHT_GREEN}✓{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter){C.RESET}")
|
|
468
|
+
print(f" {C.BRIGHT_GREEN}✓{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter{token_str}){C.RESET}")
|
|
332
469
|
else:
|
|
333
|
-
print(f" {C.BRIGHT_RED}✗{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter){C.RESET}")
|
|
470
|
+
print(f" {C.BRIGHT_RED}✗{C.RESET} {C.WHITE}{bug}{C.RESET} {C.DIM}({iters} iter{token_str}){C.RESET}")
|
|
334
471
|
print()
|
|
335
472
|
|
|
336
473
|
|
|
@@ -424,7 +561,7 @@ class InputMonitor:
|
|
|
424
561
|
Call resume_reading() after the main thread is done with input().
|
|
425
562
|
"""
|
|
426
563
|
|
|
427
|
-
VALID_COMMANDS = ("wait", "pause", "add", "done")
|
|
564
|
+
VALID_COMMANDS = ("wait", "pause", "add", "done", "stop")
|
|
428
565
|
|
|
429
566
|
def __init__(self):
|
|
430
567
|
self._command = None
|
|
@@ -493,6 +630,24 @@ def load_prompt() -> str:
|
|
|
493
630
|
return "You are debugging. Fix the bug. Output [DAVELOOP:RESOLVED] when done."
|
|
494
631
|
|
|
495
632
|
|
|
633
|
+
def load_maestro_prompt() -> str:
|
|
634
|
+
"""Load the Maestro mobile testing prompt."""
|
|
635
|
+
if MAESTRO_PROMPT_FILE.exists():
|
|
636
|
+
return MAESTRO_PROMPT_FILE.read_text(encoding="utf-8")
|
|
637
|
+
else:
|
|
638
|
+
print_warning_box(f"Maestro prompt file not found: {MAESTRO_PROMPT_FILE}")
|
|
639
|
+
return None
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def load_web_prompt() -> str:
|
|
643
|
+
"""Load the Web UI testing prompt."""
|
|
644
|
+
if WEB_PROMPT_FILE.exists():
|
|
645
|
+
return WEB_PROMPT_FILE.read_text(encoding="utf-8")
|
|
646
|
+
else:
|
|
647
|
+
print_warning_box(f"Web prompt file not found: {WEB_PROMPT_FILE}")
|
|
648
|
+
return None
|
|
649
|
+
|
|
650
|
+
|
|
496
651
|
def find_claude_cli():
|
|
497
652
|
"""Find Claude CLI executable path."""
|
|
498
653
|
import platform
|
|
@@ -534,12 +689,17 @@ def find_claude_cli():
|
|
|
534
689
|
return None
|
|
535
690
|
|
|
536
691
|
|
|
537
|
-
def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True, timeout: int = DEFAULT_TIMEOUT, input_monitor=None) -> str:
|
|
692
|
+
def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True, timeout: int = DEFAULT_TIMEOUT, input_monitor=None, swarm_mode: bool = False, swarm_budget_max: int = 5, swarm_depth_max: int = 1, token_tracker: "TokenTracker" = None, show_tokens: bool = False) -> str:
|
|
538
693
|
"""Execute Claude Code CLI with the given prompt.
|
|
539
694
|
|
|
540
695
|
If stream=True, output is printed in real-time and also returned.
|
|
541
696
|
timeout is in seconds (default 600 = 10 minutes).
|
|
542
697
|
input_monitor: optional InputMonitor to check for user commands during execution.
|
|
698
|
+
swarm_mode: if True, enables Task tool for sub-agent spawning.
|
|
699
|
+
swarm_budget_max: max sub-agents per session in swarm mode.
|
|
700
|
+
swarm_depth_max: max sub-agent depth in swarm mode.
|
|
701
|
+
token_tracker: optional TokenTracker to accumulate token usage from the stream.
|
|
702
|
+
show_tokens: if True, print per-turn token usage during execution.
|
|
543
703
|
"""
|
|
544
704
|
claude_cmd = find_claude_cli()
|
|
545
705
|
if not claude_cmd:
|
|
@@ -558,7 +718,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
558
718
|
if continue_session:
|
|
559
719
|
cmd.append("--continue")
|
|
560
720
|
|
|
561
|
-
|
|
721
|
+
allowed = ALLOWED_TOOLS_SWARM if swarm_mode else ALLOWED_TOOLS_DEFAULT
|
|
722
|
+
cmd.extend(["-p", "--verbose", "--output-format", "stream-json", "--allowedTools", allowed])
|
|
562
723
|
|
|
563
724
|
try:
|
|
564
725
|
if stream:
|
|
@@ -582,6 +743,9 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
582
743
|
# Track start time
|
|
583
744
|
start_time = time.time()
|
|
584
745
|
|
|
746
|
+
# Swarm budget tracking (only active in swarm mode)
|
|
747
|
+
swarm_budget = SwarmBudget(max_spawns=swarm_budget_max, max_depth=swarm_depth_max) if swarm_mode else None
|
|
748
|
+
|
|
585
749
|
# Read and display JSON stream output
|
|
586
750
|
output_lines = []
|
|
587
751
|
full_text = []
|
|
@@ -597,6 +761,19 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
597
761
|
msg_type = data.get("type", "")
|
|
598
762
|
|
|
599
763
|
|
|
764
|
+
# Extract token usage from any message that has it
|
|
765
|
+
if token_tracker:
|
|
766
|
+
usage = (data.get("message", {}).get("usage")
|
|
767
|
+
or data.get("usage")
|
|
768
|
+
or None)
|
|
769
|
+
if usage and isinstance(usage, dict):
|
|
770
|
+
inp = usage.get("input_tokens", 0)
|
|
771
|
+
outp = usage.get("output_tokens", 0)
|
|
772
|
+
if inp or outp:
|
|
773
|
+
token_tracker.record_usage(inp, outp)
|
|
774
|
+
if show_tokens:
|
|
775
|
+
print(f" {C.DIM}{token_tracker.verbose_turn_line(inp, outp)}{C.RESET}")
|
|
776
|
+
|
|
600
777
|
# Handle different message types
|
|
601
778
|
if msg_type == "assistant":
|
|
602
779
|
# Assistant text message
|
|
@@ -611,6 +788,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
611
788
|
elif block.get("type") == "tool_use":
|
|
612
789
|
# Tool being called - show what Claude is doing
|
|
613
790
|
tool_name = block.get("name", "unknown")
|
|
791
|
+
if token_tracker:
|
|
792
|
+
token_tracker.set_current_tool(tool_name)
|
|
614
793
|
tool_input = block.get("input", {})
|
|
615
794
|
|
|
616
795
|
# Format tool call based on type
|
|
@@ -640,6 +819,18 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
640
819
|
elif tool_name == "Task":
|
|
641
820
|
desc = tool_input.get("description", "")
|
|
642
821
|
tool_display = f"{C.BRIGHT_BLUE}Task{C.RESET}({C.WHITE}{desc}{C.RESET})"
|
|
822
|
+
# Swarm budget enforcement
|
|
823
|
+
if swarm_budget:
|
|
824
|
+
if not swarm_budget.can_spawn():
|
|
825
|
+
print(f" {C.BRIGHT_YELLOW}[Swarm] Budget exhausted. Terminating to restart without Task tool.{C.RESET}")
|
|
826
|
+
process.terminate()
|
|
827
|
+
try:
|
|
828
|
+
process.wait(timeout=10)
|
|
829
|
+
except Exception:
|
|
830
|
+
process.kill()
|
|
831
|
+
return '\n'.join(full_text) + "\n[DAVELOOP:SWARM_BUDGET_EXHAUSTED]"
|
|
832
|
+
else:
|
|
833
|
+
swarm_budget.record_spawn(desc)
|
|
643
834
|
else:
|
|
644
835
|
tool_display = f"{C.BRIGHT_BLUE}{tool_name}{C.RESET}"
|
|
645
836
|
|
|
@@ -657,6 +848,8 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
657
848
|
elif msg_type == "tool_use":
|
|
658
849
|
# Tool being used - show what Claude is doing
|
|
659
850
|
tool_name = data.get("name", "unknown")
|
|
851
|
+
if token_tracker:
|
|
852
|
+
token_tracker.set_current_tool(tool_name)
|
|
660
853
|
tool_input = data.get("input", {})
|
|
661
854
|
|
|
662
855
|
# Format tool call based on type
|
|
@@ -686,6 +879,18 @@ def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool
|
|
|
686
879
|
elif tool_name == "Task":
|
|
687
880
|
desc = tool_input.get("description", "")
|
|
688
881
|
tool_display = f"{C.BRIGHT_BLUE}Task{C.RESET}({C.WHITE}{desc}{C.RESET})"
|
|
882
|
+
# Swarm budget enforcement
|
|
883
|
+
if swarm_budget:
|
|
884
|
+
if not swarm_budget.can_spawn():
|
|
885
|
+
print(f" {C.BRIGHT_YELLOW}[Swarm] Budget exhausted. Terminating to restart without Task tool.{C.RESET}")
|
|
886
|
+
process.terminate()
|
|
887
|
+
try:
|
|
888
|
+
process.wait(timeout=10)
|
|
889
|
+
except Exception:
|
|
890
|
+
process.kill()
|
|
891
|
+
return '\n'.join(full_text) + "\n[DAVELOOP:SWARM_BUDGET_EXHAUSTED]"
|
|
892
|
+
else:
|
|
893
|
+
swarm_budget.record_spawn(desc)
|
|
689
894
|
else:
|
|
690
895
|
tool_display = f"{C.BRIGHT_BLUE}{tool_name}{C.RESET}"
|
|
691
896
|
|
|
@@ -837,6 +1042,16 @@ def main():
|
|
|
837
1042
|
parser.add_argument("-t", "--timeout", type=int, default=DEFAULT_TIMEOUT,
|
|
838
1043
|
help="Timeout per iteration in seconds (default: 600)")
|
|
839
1044
|
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
|
1045
|
+
parser.add_argument("--maestro", action="store_true", help="Enable Maestro mobile testing mode")
|
|
1046
|
+
parser.add_argument("--web", action="store_true", help="Enable Playwright web UI testing mode")
|
|
1047
|
+
parser.add_argument("--swarm", action="store_true",
|
|
1048
|
+
help="Enable swarm mode: DaveLoop can spawn sub-agents via Task tool")
|
|
1049
|
+
parser.add_argument("--swarm-budget", type=int, default=5,
|
|
1050
|
+
help="Max sub-agents per DaveLoop worker in swarm mode (default: 5)")
|
|
1051
|
+
parser.add_argument("--swarm-depth", type=int, default=1, choices=[1, 2],
|
|
1052
|
+
help="Max sub-agent depth in swarm mode (default: 1, no recursive spawning)")
|
|
1053
|
+
parser.add_argument("--show-tokens", action="store_true",
|
|
1054
|
+
help="Show verbose per-turn token usage during execution")
|
|
840
1055
|
|
|
841
1056
|
args = parser.parse_args()
|
|
842
1057
|
|
|
@@ -863,6 +1078,14 @@ def main():
|
|
|
863
1078
|
# Setup
|
|
864
1079
|
session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
865
1080
|
system_prompt = load_prompt()
|
|
1081
|
+
if args.maestro:
|
|
1082
|
+
maestro_prompt = load_maestro_prompt()
|
|
1083
|
+
if maestro_prompt:
|
|
1084
|
+
system_prompt = system_prompt + "\n\n---\n\n" + maestro_prompt
|
|
1085
|
+
elif args.web:
|
|
1086
|
+
web_prompt = load_web_prompt()
|
|
1087
|
+
if web_prompt:
|
|
1088
|
+
system_prompt = system_prompt + "\n\n---\n\n" + web_prompt
|
|
866
1089
|
working_dir = args.dir or os.getcwd()
|
|
867
1090
|
|
|
868
1091
|
# Load session history
|
|
@@ -876,7 +1099,13 @@ def main():
|
|
|
876
1099
|
print_status("Iterations", str(args.max_iterations), C.WHITE)
|
|
877
1100
|
print_status("Timeout", f"{args.timeout // 60}m per iteration", C.WHITE)
|
|
878
1101
|
print_status("Tasks", str(len(bug_descriptions)), C.WHITE)
|
|
879
|
-
|
|
1102
|
+
mode_name = "Maestro Mobile Testing" if args.maestro else "Playwright Web Testing" if args.web else "Autonomous"
|
|
1103
|
+
print_status("Mode", mode_name, C.WHITE)
|
|
1104
|
+
if args.swarm:
|
|
1105
|
+
print_status("Swarm", f"ENABLED (budget: {args.swarm_budget}, depth: {args.swarm_depth})", C.BRIGHT_CYAN)
|
|
1106
|
+
print_status("Tools", ALLOWED_TOOLS_SWARM, C.WHITE)
|
|
1107
|
+
else:
|
|
1108
|
+
print_status("Tools", ALLOWED_TOOLS_DEFAULT, C.WHITE)
|
|
880
1109
|
print(f"{C.BRIGHT_BLUE}└{'─' * 70}┘{C.RESET}")
|
|
881
1110
|
|
|
882
1111
|
# Build task queue
|
|
@@ -886,7 +1115,7 @@ def main():
|
|
|
886
1115
|
|
|
887
1116
|
# Print controls hint
|
|
888
1117
|
print(f"\n{C.BRIGHT_BLUE}{C.BOLD}┌─ CONTROLS {'─' * 58}┐{C.RESET}")
|
|
889
|
-
print(f"{C.BRIGHT_BLUE}│{C.RESET} Type while running: {C.BRIGHT_WHITE}wait{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}pause{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}add{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}done{C.RESET}
|
|
1118
|
+
print(f"{C.BRIGHT_BLUE}│{C.RESET} Type while running: {C.BRIGHT_WHITE}wait{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}pause{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}add{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}done{C.RESET} {C.DIM}·{C.RESET} {C.BRIGHT_WHITE}stop{C.RESET} {C.BRIGHT_BLUE}│{C.RESET}")
|
|
890
1119
|
print(f"{C.BRIGHT_BLUE}└{'─' * 70}┘{C.RESET}")
|
|
891
1120
|
|
|
892
1121
|
# Start input monitor
|
|
@@ -898,6 +1127,9 @@ def main():
|
|
|
898
1127
|
if history_data["sessions"]:
|
|
899
1128
|
history_context = "\n\n" + format_history_context(history_data["sessions"])
|
|
900
1129
|
|
|
1130
|
+
# Session-wide token tracking (aggregates across all tasks)
|
|
1131
|
+
session_token_tracker = TokenTracker()
|
|
1132
|
+
|
|
901
1133
|
# === OUTER LOOP: iterate over tasks ===
|
|
902
1134
|
while True:
|
|
903
1135
|
task = task_queue.next()
|
|
@@ -907,15 +1139,60 @@ def main():
|
|
|
907
1139
|
bug_input = task["description"]
|
|
908
1140
|
task_queue.summary_display()
|
|
909
1141
|
|
|
910
|
-
|
|
1142
|
+
if args.maestro:
|
|
1143
|
+
print_section("MAESTRO TASK", C.BRIGHT_CYAN)
|
|
1144
|
+
section_color = C.BRIGHT_CYAN
|
|
1145
|
+
elif args.web:
|
|
1146
|
+
print_section("WEB UI TASK", C.BRIGHT_MAGENTA)
|
|
1147
|
+
section_color = C.BRIGHT_MAGENTA
|
|
1148
|
+
else:
|
|
1149
|
+
print_section("BUG REPORT", C.BRIGHT_RED)
|
|
1150
|
+
section_color = C.BRIGHT_RED
|
|
911
1151
|
for line in bug_input.split('\n')[:8]:
|
|
912
|
-
print(f" {
|
|
1152
|
+
print(f" {section_color}{line[:70]}{C.RESET}")
|
|
913
1153
|
if len(bug_input.split('\n')) > 8:
|
|
914
|
-
print(f" {
|
|
1154
|
+
print(f" {section_color}... +{len(bug_input.split(chr(10))) - 8} more lines{C.RESET}")
|
|
915
1155
|
sys.stdout.flush()
|
|
916
1156
|
|
|
917
1157
|
# Initial context for this task
|
|
918
|
-
|
|
1158
|
+
if args.maestro:
|
|
1159
|
+
context = f"""
|
|
1160
|
+
## Maestro Mobile Testing Task
|
|
1161
|
+
|
|
1162
|
+
{bug_input}
|
|
1163
|
+
{history_context}
|
|
1164
|
+
|
|
1165
|
+
## Instructions
|
|
1166
|
+
|
|
1167
|
+
1. First, detect connected devices/emulators (run `adb devices` and/or `xcrun simctl list devices available`)
|
|
1168
|
+
2. If no device is found, auto-launch an emulator/simulator
|
|
1169
|
+
3. Ensure the target app is installed on the device
|
|
1170
|
+
4. Proceed with the Maestro testing task described above
|
|
1171
|
+
5. Before declaring success, verify by running the flow(s) 3 consecutive times - all must pass
|
|
1172
|
+
|
|
1173
|
+
Use the reasoning protocol before each action.
|
|
1174
|
+
"""
|
|
1175
|
+
elif args.web:
|
|
1176
|
+
context = f"""
|
|
1177
|
+
## Web UI Testing Task
|
|
1178
|
+
|
|
1179
|
+
{bug_input}
|
|
1180
|
+
{history_context}
|
|
1181
|
+
|
|
1182
|
+
## Instructions
|
|
1183
|
+
|
|
1184
|
+
1. First, explore the project to detect the framework and find the dev server command
|
|
1185
|
+
2. Install Playwright if not already installed (`npm install -D @playwright/test && npx playwright install chromium`)
|
|
1186
|
+
3. Start the dev server if not already running
|
|
1187
|
+
4. Read the source code to understand the UI components, especially any gesture/drag/interactive elements
|
|
1188
|
+
5. Write Playwright tests in an `e2e/` directory that test the app like a real human would - use actual mouse movements, drags, clicks, hovers, keyboard input
|
|
1189
|
+
6. Test gestures and buttons SEPARATELY - a working button does not prove the gesture works
|
|
1190
|
+
7. Before declaring success, verify by running the tests 3 consecutive times - all must pass
|
|
1191
|
+
|
|
1192
|
+
Use the reasoning protocol before each action.
|
|
1193
|
+
"""
|
|
1194
|
+
else:
|
|
1195
|
+
context = f"""
|
|
919
1196
|
## Bug Report
|
|
920
1197
|
|
|
921
1198
|
{bug_input}
|
|
@@ -928,6 +1205,7 @@ Then fix it. Use the reasoning protocol before each action.
|
|
|
928
1205
|
"""
|
|
929
1206
|
|
|
930
1207
|
iteration_history = []
|
|
1208
|
+
task_token_tracker = TokenTracker()
|
|
931
1209
|
|
|
932
1210
|
# === INNER LOOP: iterations for current task ===
|
|
933
1211
|
for iteration in range(1, args.max_iterations + 1):
|
|
@@ -951,11 +1229,20 @@ Then fix it. Use the reasoning protocol before each action.
|
|
|
951
1229
|
full_prompt, working_dir,
|
|
952
1230
|
continue_session=continue_session,
|
|
953
1231
|
stream=True, timeout=args.timeout,
|
|
954
|
-
input_monitor=input_monitor
|
|
1232
|
+
input_monitor=input_monitor,
|
|
1233
|
+
swarm_mode=args.swarm,
|
|
1234
|
+
swarm_budget_max=args.swarm_budget,
|
|
1235
|
+
swarm_depth_max=args.swarm_depth,
|
|
1236
|
+
token_tracker=task_token_tracker,
|
|
1237
|
+
show_tokens=args.show_tokens
|
|
955
1238
|
)
|
|
956
1239
|
|
|
957
1240
|
print(f"\n{C.BRIGHT_BLUE} {'─' * 70}{C.RESET}")
|
|
958
1241
|
|
|
1242
|
+
# Print token usage summary for this iteration
|
|
1243
|
+
if task_token_tracker.turn_count > 0:
|
|
1244
|
+
print(f" {C.BRIGHT_CYAN}⊛ {task_token_tracker.summary_line()}{C.RESET}")
|
|
1245
|
+
|
|
959
1246
|
# Save log
|
|
960
1247
|
save_log(iteration, output, session_id)
|
|
961
1248
|
iteration_history.append(output)
|
|
@@ -1009,22 +1296,34 @@ Continue the current debugging task. Use the reasoning protocol before each acti
|
|
|
1009
1296
|
elif user_cmd == "done":
|
|
1010
1297
|
# Clean exit
|
|
1011
1298
|
input_monitor.stop()
|
|
1012
|
-
session_entry = summarize_session(bug_input, "DONE_BY_USER", iteration)
|
|
1299
|
+
session_entry = summarize_session(bug_input, "DONE_BY_USER", iteration, task_token_tracker)
|
|
1013
1300
|
history_data["sessions"].append(session_entry)
|
|
1014
1301
|
save_history(working_dir, history_data)
|
|
1015
1302
|
print(f"\n {C.GREEN}✓{C.RESET} Session saved. Exiting by user request.")
|
|
1016
1303
|
return 0
|
|
1017
1304
|
|
|
1305
|
+
elif user_cmd == "stop":
|
|
1306
|
+
# Boris-commanded stop - terminate this iteration immediately
|
|
1307
|
+
print(f"\n {C.BRIGHT_RED}{C.BOLD} ■ STOPPED BY BORIS{C.RESET}")
|
|
1308
|
+
print(f"{C.BRIGHT_RED} {'─' * 70}{C.RESET}")
|
|
1309
|
+
input_monitor.stop()
|
|
1310
|
+
session_entry = summarize_session(bug_input, "STOPPED_BY_BORIS", iteration, task_token_tracker)
|
|
1311
|
+
history_data["sessions"].append(session_entry)
|
|
1312
|
+
save_history(working_dir, history_data)
|
|
1313
|
+
return 1
|
|
1314
|
+
|
|
1018
1315
|
# Check exit condition
|
|
1019
1316
|
signal, should_exit = check_exit_condition(output)
|
|
1020
1317
|
|
|
1021
1318
|
if should_exit:
|
|
1022
1319
|
if signal == "RESOLVED":
|
|
1023
1320
|
print_success_box("")
|
|
1321
|
+
if task_token_tracker.turn_count > 0:
|
|
1322
|
+
print(f" {C.BRIGHT_CYAN}⊛ {task_token_tracker.summary_line()}{C.RESET}")
|
|
1024
1323
|
print(f" {C.DIM}Session: {session_id}{C.RESET}")
|
|
1025
1324
|
print(f" {C.DIM}Logs: {LOG_DIR}{C.RESET}\n")
|
|
1026
1325
|
task_queue.mark_done()
|
|
1027
|
-
session_entry = summarize_session(bug_input, "RESOLVED", iteration)
|
|
1326
|
+
session_entry = summarize_session(bug_input, "RESOLVED", iteration, task_token_tracker)
|
|
1028
1327
|
history_data["sessions"].append(session_entry)
|
|
1029
1328
|
save_history(working_dir, history_data)
|
|
1030
1329
|
break # Move to next task
|
|
@@ -1050,20 +1349,42 @@ Continue debugging with this information. Use the reasoning protocol before each
|
|
|
1050
1349
|
print_status("Logs", str(LOG_DIR), C.WHITE)
|
|
1051
1350
|
print()
|
|
1052
1351
|
task_queue.mark_failed()
|
|
1053
|
-
session_entry = summarize_session(bug_input, "BLOCKED", iteration)
|
|
1352
|
+
session_entry = summarize_session(bug_input, "BLOCKED", iteration, task_token_tracker)
|
|
1054
1353
|
history_data["sessions"].append(session_entry)
|
|
1055
1354
|
save_history(working_dir, history_data)
|
|
1056
1355
|
break # Move to next task
|
|
1057
1356
|
else:
|
|
1058
1357
|
print_error_box(f"Error occurred: {signal}")
|
|
1059
1358
|
task_queue.mark_failed()
|
|
1060
|
-
session_entry = summarize_session(bug_input, "ERROR", iteration)
|
|
1359
|
+
session_entry = summarize_session(bug_input, "ERROR", iteration, task_token_tracker)
|
|
1061
1360
|
history_data["sessions"].append(session_entry)
|
|
1062
1361
|
save_history(working_dir, history_data)
|
|
1063
1362
|
break # Move to next task
|
|
1064
1363
|
|
|
1065
1364
|
# Prepare context for next iteration
|
|
1066
|
-
|
|
1365
|
+
if args.maestro:
|
|
1366
|
+
context = f"""
|
|
1367
|
+
## Iteration {iteration + 1}
|
|
1368
|
+
|
|
1369
|
+
The Maestro flow(s) are NOT yet passing reliably. You have full context from previous iterations.
|
|
1370
|
+
|
|
1371
|
+
Continue working on the flows. Check device status, inspect the UI hierarchy, fix selectors or timing issues, and re-run.
|
|
1372
|
+
Remember: all flows must pass 3 consecutive times before resolving.
|
|
1373
|
+
Use the reasoning protocol before each action.
|
|
1374
|
+
"""
|
|
1375
|
+
elif args.web:
|
|
1376
|
+
context = f"""
|
|
1377
|
+
## Iteration {iteration + 1}
|
|
1378
|
+
|
|
1379
|
+
The Playwright tests are NOT yet passing reliably. You have full context from previous iterations.
|
|
1380
|
+
|
|
1381
|
+
Continue working on the tests. Check selectors, timing, server status, and re-run.
|
|
1382
|
+
Make sure you are testing like a real human - use actual mouse gestures, not just button clicks.
|
|
1383
|
+
Remember: all tests must pass 3 consecutive times before resolving.
|
|
1384
|
+
Use the reasoning protocol before each action.
|
|
1385
|
+
"""
|
|
1386
|
+
else:
|
|
1387
|
+
context = f"""
|
|
1067
1388
|
## Iteration {iteration + 1}
|
|
1068
1389
|
|
|
1069
1390
|
The bug is NOT yet resolved. You have full context from previous iterations.
|
|
@@ -1075,15 +1396,33 @@ Use the reasoning protocol before each action.
|
|
|
1075
1396
|
# Max iterations reached for this task (for-else)
|
|
1076
1397
|
print_warning_box(f"Max iterations ({args.max_iterations}) reached for current task")
|
|
1077
1398
|
task_queue.mark_failed()
|
|
1078
|
-
session_entry = summarize_session(bug_input, "MAX_ITERATIONS", args.max_iterations)
|
|
1399
|
+
session_entry = summarize_session(bug_input, "MAX_ITERATIONS", args.max_iterations, task_token_tracker)
|
|
1079
1400
|
history_data["sessions"].append(session_entry)
|
|
1080
1401
|
save_history(working_dir, history_data)
|
|
1081
1402
|
|
|
1403
|
+
# Aggregate task tokens into session-level tracker
|
|
1404
|
+
if task_token_tracker.turn_count > 0:
|
|
1405
|
+
session_token_tracker.total_input += task_token_tracker.total_input
|
|
1406
|
+
session_token_tracker.total_output += task_token_tracker.total_output
|
|
1407
|
+
session_token_tracker.turn_count += task_token_tracker.turn_count
|
|
1408
|
+
if task_token_tracker.peak_total > session_token_tracker.peak_total:
|
|
1409
|
+
session_token_tracker.peak_total = task_token_tracker.peak_total
|
|
1410
|
+
session_token_tracker.peak_input = task_token_tracker.peak_input
|
|
1411
|
+
session_token_tracker.peak_output = task_token_tracker.peak_output
|
|
1412
|
+
for tool, stats in task_token_tracker.per_tool.items():
|
|
1413
|
+
if tool not in session_token_tracker.per_tool:
|
|
1414
|
+
session_token_tracker.per_tool[tool] = {"input": 0, "output": 0, "count": 0}
|
|
1415
|
+
session_token_tracker.per_tool[tool]["input"] += stats["input"]
|
|
1416
|
+
session_token_tracker.per_tool[tool]["output"] += stats["output"]
|
|
1417
|
+
session_token_tracker.per_tool[tool]["count"] += stats["count"]
|
|
1418
|
+
|
|
1082
1419
|
# Save iteration summary for this task
|
|
1083
1420
|
LOG_DIR.mkdir(exist_ok=True)
|
|
1084
1421
|
summary = f"# DaveLoop Session {session_id}\n\n"
|
|
1085
1422
|
summary += f"Bug: {bug_input[:200]}...\n\n"
|
|
1086
1423
|
summary += f"Iterations: {len(iteration_history)}\n\n"
|
|
1424
|
+
if task_token_tracker.turn_count > 0:
|
|
1425
|
+
summary += f"Token Usage: {task_token_tracker.summary_line()}\n\n"
|
|
1087
1426
|
summary += "## Iteration History\n\n"
|
|
1088
1427
|
for i, hist in enumerate(iteration_history, 1):
|
|
1089
1428
|
summary += f"### Iteration {i}\n```\n{hist[:500]}...\n```\n\n"
|
|
@@ -1105,6 +1444,16 @@ Use the reasoning protocol before each action.
|
|
|
1105
1444
|
print(f" {C.DIM}○ {desc}{C.RESET}")
|
|
1106
1445
|
print()
|
|
1107
1446
|
|
|
1447
|
+
# Print session-wide token usage
|
|
1448
|
+
if session_token_tracker.turn_count > 0:
|
|
1449
|
+
print(f" {C.BRIGHT_CYAN}⊛ {session_token_tracker.summary_line()}{C.RESET}")
|
|
1450
|
+
if session_token_tracker.per_tool:
|
|
1451
|
+
print(f" {C.DIM} Per tool:{C.RESET}")
|
|
1452
|
+
for tool, stats in sorted(session_token_tracker.per_tool.items(), key=lambda x: x[1]["input"] + x[1]["output"], reverse=True):
|
|
1453
|
+
tool_total = stats["input"] + stats["output"]
|
|
1454
|
+
print(f" {C.DIM} {tool}: {stats['input']:,} in / {stats['output']:,} out / {tool_total:,} total ({stats['count']} calls){C.RESET}")
|
|
1455
|
+
print()
|
|
1456
|
+
|
|
1108
1457
|
print(f" {C.DIM}Session: {session_id}{C.RESET}")
|
|
1109
1458
|
print(f" {C.DIM}Logs: {LOG_DIR}{C.RESET}\n")
|
|
1110
1459
|
|