PyPI - daveloop - Versions diffs - 1.0.0__py3-none-any.whl - Mend

daveloop 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

daveloop-1.0.0.dist-info/METADATA +78 -0
daveloop-1.0.0.dist-info/RECORD +7 -0
daveloop-1.0.0.dist-info/WHEEL +5 -0
daveloop-1.0.0.dist-info/entry_points.txt +3 -0
daveloop-1.0.0.dist-info/top_level.txt +2 -0
daveloop.py +716 -0
daveloop_swebench.py +432 -0

daveloop-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,78 @@
+Metadata-Version: 2.4
+Name: daveloop
+Version: 1.0.0
+Summary: Self-healing debug agent powered by Claude Code CLI
+Home-page: https://github.com/davebruzil/DaveLoop
+Author: Dave Bruzil
+Keywords: debugging ai claude automation agent
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Debuggers
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: requires-python
+Dynamic: summary
+# DaveLoop: Autonomous Debugging Agent
+DaveLoop is a Claude CLI-based debugging tool that iteratively solves complex bugs through multiple attempts. Rather than relying on single-shot fixes, it uses persistent context via the `--continue` flag to build solutions incrementally.
+## Installation
+### From Source (Current Method)
+```bash
+git clone https://github.com/davebruzil/DaveLoop.git
+cd DaveLoop
+pip install -e .
+```
+### Via pip (Coming Soon)
+Once published to PyPI, you'll be able to install with:
+```bash
+pip install daveloop
+```
+## Core Functionality
+The tool operates through a systematic loop: users provide bug descriptions, Claude analyzes and attempts fixes, and if unsuccessful, the process repeats with accumulated context. The agent signals completion with `[DAVELOOP:RESOLVED]` or exits when blocked.
+## Key Capabilities
+**4-Level Reasoning Protocol**: The system structures debugging through KNOWN facts, UNKNOWN gaps, HYPOTHESIS formulation, and concrete NEXT ACTIONs. This prevents random modifications and enforces methodical problem-solving.
+**Persistent Memory**: Unlike isolated attempts, each iteration builds on previous findings through Claude's context continuation feature.
+**Autonomous Operation**: The agent works without requiring manual permission prompts between iterations, enabling hands-free debugging sessions.
+**Exit Signals**: Clear indicators show whether bugs are resolved, the agent is blocked, or iteration limits have been reached.
+## Usage
+Basic invocation requires a bug description:
+```bash
+python daveloop.py "your bug description here"
+```
+The tool accepts detailed bug reports via files, custom working directories, and configurable iteration limits. Sessions automatically generate timestamped logs documenting the agent's reasoning and actions across all iterations.
+## Tested Domains
+The agent has demonstrated effectiveness on security vulnerabilities, race conditions, multi-file refactors, and real-world benchmark problems from Django, Pytest, and SymPy codebases.

daveloop-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+daveloop.py,sha256=qBaRmxB7pJrpxeJh1bvodMfkz3BJKFiV-i3UlOWSeBg,28348
+daveloop_swebench.py,sha256=iD9AU3XRiMQpt7TknFNlvnmPCNp64V-JaTfqTFgsGBM,15996
+daveloop-1.0.0.dist-info/METADATA,sha256=edBrqn1_7JCe0CKGVg91oQicbCJKA251oAObNjG2J4A,3190
+daveloop-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+daveloop-1.0.0.dist-info/entry_points.txt,sha256=QcFAZgFrDfPtIikNQb7eW9DxOpBK7T-qWrKqbGAS9Ww,86
+daveloop-1.0.0.dist-info/top_level.txt,sha256=36DiYt70m4DIK8t7IhV_y6hAzUIyeb5-qDUf3-gbDdg,27
+daveloop-1.0.0.dist-info/RECORD,,

daveloop-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.10.2)
+Root-Is-Purelib: true
+Tag: py3-none-any

daveloop-1.0.0.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[console_scripts]
+daveloop = daveloop:main
+daveloop-swebench = daveloop_swebench:main

daveloop-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ daveloop
2	+ daveloop_swebench

daveloop.py ADDED Viewed

@@ -0,0 +1,716 @@
+#!/usr/bin/env python3
+"""
+DaveLoop - Self-Healing Debug Agent
+Orchestrates Claude Code CLI in a feedback loop until bugs are resolved.
+"""
+import subprocess
+import sys
+import os
+import argparse
+import threading
+import time
+import itertools
+from datetime import datetime
+from pathlib import Path
+# Configuration
+MAX_ITERATIONS = 20
+# Find prompt file - works both when running as script and when installed as package
+def find_prompt_file():
+    """Find the prompt file in the correct location."""
+    # Try script directory first (for development)
+    script_dir = Path(__file__).parent
+    script_prompt = script_dir / "daveloop_prompt.md"
+    if script_prompt.exists():
+        return script_prompt
+    # Try package resources (for pip install)
+    try:
+        import importlib.resources as pkg_resources
+        try:
+            # Python 3.9+
+            files = pkg_resources.files(__package__ or __name__.split('.')[0])
+            return files / "daveloop_prompt.md"
+        except AttributeError:
+            # Python 3.7-3.8
+            with pkg_resources.path(__package__ or __name__.split('.')[0], "daveloop_prompt.md") as p:
+                return p
+    except (ImportError, FileNotFoundError):
+        pass
+    # Fallback to script directory
+    return script_prompt
+SCRIPT_DIR = Path(__file__).parent
+PROMPT_FILE = find_prompt_file()
+LOG_DIR = Path.cwd() / "logs"  # Use current working directory for logs
+# Exit signals from Claude Code
+SIGNAL_RESOLVED = "[DAVELOOP:RESOLVED]"
+SIGNAL_BLOCKED = "[DAVELOOP:BLOCKED]"
+SIGNAL_CLARIFY = "[DAVELOOP:CLARIFY]"
+# ============================================================================
+# ANSI Color Codes
+# ============================================================================
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    # Foreground
+    BLACK = "\033[30m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+    WHITE = "\033[37m"
+    # Bright foreground
+    BRIGHT_RED = "\033[91m"
+    BRIGHT_GREEN = "\033[92m"
+    BRIGHT_YELLOW = "\033[93m"
+    BRIGHT_BLUE = "\033[94m"
+    BRIGHT_MAGENTA = "\033[95m"
+    BRIGHT_CYAN = "\033[96m"
+    BRIGHT_WHITE = "\033[97m"
+    # Background
+    BG_BLACK = "\033[40m"
+    BG_RED = "\033[41m"
+    BG_GREEN = "\033[42m"
+    BG_BLUE = "\033[44m"
+    BG_MAGENTA = "\033[45m"
+    BG_CYAN = "\033[46m"
+C = Colors  # Shorthand
+# Enable ANSI and UTF-8 on Windows
+if sys.platform == "win32":
+    os.system("chcp 65001 >nul 2>&1")  # Set console to UTF-8
+    os.system("")  # Enables ANSI escape sequences in Windows terminal
+    # Force UTF-8 encoding for stdout/stderr (only if not already wrapped)
+    import io
+    if not isinstance(sys.stdout, io.TextIOWrapper) or sys.stdout.encoding != 'utf-8':
+        if hasattr(sys.stdout, 'buffer'):
+            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
+    if not isinstance(sys.stderr, io.TextIOWrapper) or sys.stderr.encoding != 'utf-8':
+        if hasattr(sys.stderr, 'buffer'):
+            sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
+# ============================================================================
+# ASCII Art Banner
+# ============================================================================
+BANNER = f"""
+{C.BRIGHT_BLUE}{C.BOLD}
+   ██████╗  █████╗ ██╗   ██╗███████╗██╗      ██████╗  ██████╗ ██████╗
+   ██╔══██╗██╔══██╗██║   ██║██╔════╝██║     ██╔═══██╗██╔═══██╗██╔══██╗
+   ██║  ██║███████║██║   ██║█████╗  ██║     ██║   ██║██║   ██║██████╔╝
+   ██║  ██║██╔══██║╚██╗ ██╔╝██╔══╝  ██║     ██║   ██║██║   ██║██╔═══╝
+   ██████╔╝██║  ██║ ╚████╔╝ ███████╗███████╗╚██████╔╝╚██████╔╝██║
+   ╚═════╝ ╚═╝  ╚═╝  ╚═══╝  ╚══════╝╚══════╝ ╚═════╝  ╚═════╝ ╚═╝
+{C.RESET}
+{C.BRIGHT_WHITE}{C.BOLD}                  Self-Healing Debug Agent{C.RESET}
+{C.WHITE}            Powered by Claude Code - Autonomous Mode{C.RESET}
+"""
+# ============================================================================
+# UI Components
+# ============================================================================
+def print_header_box(title: str, color: str = C.BRIGHT_BLUE):
+    """Print a header."""
+    print(f"\n{color}{C.BOLD}{title}{C.RESET}")
+    print(f"{color}{'─'*len(title)}{C.RESET}\n")
+def print_section(title: str, color: str = C.BRIGHT_BLUE):
+    """Print a section divider."""
+    print(f"\n{color}{C.BOLD}{title}{C.RESET}")
+    print(f"{color}{'─'*len(title)}{C.RESET}\n")
+def print_status(label: str, value: str, color: str = C.WHITE):
+    """Print a status line."""
+    print(f"  {C.WHITE}{label}:{C.RESET} {color}{value}{C.RESET}")
+def print_iteration_header(iteration: int, max_iter: int):
+    """Print the iteration header with visual progress."""
+    progress = iteration / max_iter
+    bar_width = 30
+    filled = int(bar_width * progress)
+    bar = f"{C.BLUE}{'█' * filled}{C.DIM}{'░' * (bar_width - filled)}{C.RESET}"
+    iteration_text = f"ITERATION {iteration}/{max_iter}"
+    percentage_text = f"{int(progress*100)}%"
+    print(f"\n{C.BOLD}{C.WHITE}{iteration_text}{C.RESET}  {bar}  {C.BRIGHT_BLUE}{percentage_text}{C.RESET}\n")
+def print_success_box(message: str):
+    """Print an epic success message."""
+    print(f"\n{C.BRIGHT_GREEN}{C.BOLD}")
+    print("    ███████╗ ██╗   ██╗  ██████╗  ██████╗ ███████╗ ███████╗ ███████╗")
+    print("    ██╔════╝ ██║   ██║ ██╔════╝ ██╔════╝ ██╔════╝ ██╔════╝ ██╔════╝")
+    print("    ███████╗ ██║   ██║ ██║      ██║      █████╗   ███████╗ ███████╗")
+    print("    ╚════██║ ██║   ██║ ██║      ██║      ██╔══╝   ╚════██║ ╚════██║")
+    print("    ███████║ ╚██████╔╝ ╚██████╗ ╚██████╗ ███████╗ ███████║ ███████║")
+    print("    ╚══════╝  ╚═════╝   ╚═════╝  ╚═════╝ ╚══════╝ ╚══════╝ ╚══════╝")
+    print()
+    print(f"         {C.BRIGHT_YELLOW}★ ★ ★{C.RESET}{C.BRIGHT_GREEN}{C.BOLD}  {C.BRIGHT_WHITE}BUG SUCCESSFULLY RESOLVED{C.RESET}{C.BRIGHT_GREEN}{C.BOLD}  {C.BRIGHT_YELLOW}★ ★ ★{C.RESET}")
+    print()
+    print(f"                      {C.WHITE}{message}{C.RESET}")
+    print(f"{C.RESET}\n")
+def print_error_box(message: str):
+    """Print an error message."""
+    print(f"\n{C.BRIGHT_RED}{C.BOLD}✗ ERROR: {C.WHITE}{message}{C.RESET}\n")
+def print_warning_box(message: str):
+    """Print a warning message."""
+    print(f"\n{C.BRIGHT_YELLOW}{C.BOLD}⚠ WARNING: {C.WHITE}{message}{C.RESET}\n")
+# ============================================================================
+# Spinner Animation
+# ============================================================================
+class Spinner:
+    """Animated spinner for showing work in progress."""
+    def __init__(self, message: str = "Processing"):
+        self.message = message
+        self.running = False
+        self.thread = None
+        self.frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+        self.start_time = None
+    def spin(self):
+        idx = 0
+        while self.running:
+            elapsed = time.time() - self.start_time
+            frame = self.frames[idx % len(self.frames)]
+            sys.stdout.write(f"\r  {C.BRIGHT_CYAN}{frame}{C.RESET} {C.BOLD}{self.message}{C.RESET} {C.DIM}({elapsed:.0f}s){C.RESET}  ")
+            sys.stdout.flush()
+            idx += 1
+            time.sleep(0.1)
+    def start(self):
+        self.running = True
+        self.start_time = time.time()
+        self.thread = threading.Thread(target=self.spin)
+        self.thread.start()
+    def stop(self, final_message: str = None):
+        self.running = False
+        if self.thread:
+            self.thread.join()
+        elapsed = time.time() - self.start_time
+        if final_message:
+            sys.stdout.write(f"\r  {C.GREEN}✓{C.RESET} {final_message} {C.DIM}({elapsed:.1f}s){C.RESET}                    \n")
+        else:
+            sys.stdout.write(f"\r  {C.GREEN}✓{C.RESET} {self.message} complete {C.DIM}({elapsed:.1f}s){C.RESET}                    \n")
+        sys.stdout.flush()
+# ============================================================================
+# Output Formatter
+# ============================================================================
+def format_claude_output(output: str) -> str:
+    """Format Claude's output with colors and sections."""
+    lines = output.split('\n')
+    formatted = []
+    in_reasoning = False
+    in_code = False
+    for line in lines:
+        # Reasoning block
+        if "=== DAVELOOP REASONING ===" in line:
+            in_reasoning = True
+            formatted.append(f"\n{C.BRIGHT_YELLOW}┌{'─'*50}┐{C.RESET}")
+            formatted.append(f"{C.BRIGHT_YELLOW}│{C.BOLD} 🧠 REASONING{C.RESET}")
+            formatted.append(f"{C.BRIGHT_YELLOW}├{'─'*50}┤{C.RESET}")
+            continue
+        elif "===========================" in line and in_reasoning:
+            in_reasoning = False
+            formatted.append(f"{C.BRIGHT_YELLOW}└{'─'*50}┘{C.RESET}\n")
+            continue
+        # Verification block
+        if "=== VERIFICATION ===" in line:
+            formatted.append(f"\n{C.BRIGHT_GREEN}┌{'─'*50}┐{C.RESET}")
+            formatted.append(f"{C.BRIGHT_GREEN}│{C.BOLD} ✓ VERIFICATION{C.RESET}")
+            formatted.append(f"{C.BRIGHT_GREEN}├{'─'*50}┤{C.RESET}")
+            continue
+        elif "====================" in line:
+            formatted.append(f"{C.BRIGHT_GREEN}└{'─'*50}┘{C.RESET}\n")
+            continue
+        # Code blocks
+        if line.strip().startswith("```"):
+            in_code = not in_code
+            if in_code:
+                formatted.append(f"{C.DIM}┌─ code ────────────────────────────────{C.RESET}")
+            else:
+                formatted.append(f"{C.DIM}└───────────────────────────────────────{C.RESET}")
+            continue
+        # Reasoning labels
+        if in_reasoning:
+            if line.startswith("KNOWN:"):
+                formatted.append(f"{C.BRIGHT_YELLOW}│{C.RESET} {C.CYAN}KNOWN:{C.RESET}{line[6:]}")
+            elif line.startswith("UNKNOWN:"):
+                formatted.append(f"{C.BRIGHT_YELLOW}│{C.RESET} {C.MAGENTA}UNKNOWN:{C.RESET}{line[8:]}")
+            elif line.startswith("HYPOTHESIS:"):
+                formatted.append(f"{C.BRIGHT_YELLOW}│{C.RESET} {C.YELLOW}HYPOTHESIS:{C.RESET}{line[11:]}")
+            elif line.startswith("NEXT ACTION:"):
+                formatted.append(f"{C.BRIGHT_YELLOW}│{C.RESET} {C.GREEN}NEXT ACTION:{C.RESET}{line[12:]}")
+            elif line.startswith("WHY:"):
+                formatted.append(f"{C.BRIGHT_YELLOW}│{C.RESET} {C.BLUE}WHY:{C.RESET}{line[4:]}")
+            else:
+                formatted.append(f"{C.BRIGHT_YELLOW}│{C.RESET} {line}")
+            continue
+        # Exit signals - dim them out, don't make prominent
+        if "[DAVELOOP:RESOLVED]" in line:
+            formatted.append(f"  {C.DIM}→ [Exit signal: RESOLVED]{C.RESET}")
+            continue
+        elif "[DAVELOOP:BLOCKED]" in line:
+            formatted.append(f"  {C.DIM}→ [Exit signal: BLOCKED]{C.RESET}")
+            continue
+        elif "[DAVELOOP:CLARIFY]" in line:
+            formatted.append(f"  {C.DIM}→ [Exit signal: CLARIFY]{C.RESET}")
+            continue
+        # Code content
+        if in_code:
+            formatted.append(f"{C.DIM}│{C.RESET} {C.WHITE}{line}{C.RESET}")
+            continue
+        # Regular content
+        formatted.append(f"  {line}")
+    return '\n'.join(formatted)
+# ============================================================================
+# Core Functions
+# ============================================================================
+def load_prompt() -> str:
+    """Load the DaveLoop system prompt."""
+    if PROMPT_FILE.exists():
+        return PROMPT_FILE.read_text(encoding="utf-8")
+    else:
+        print_warning_box(f"Prompt file not found: {PROMPT_FILE}")
+        return "You are debugging. Fix the bug. Output [DAVELOOP:RESOLVED] when done."
+def find_claude_cli():
+    """Find Claude CLI executable path."""
+    import platform
+    import shutil
+    # 1. Check environment variable (highest priority)
+    env_path = os.environ.get('CLAUDE_CLI_PATH')
+    if env_path and os.path.exists(env_path):
+        return env_path
+    # 2. Try common installation paths
+    is_windows = platform.system() == "Windows"
+    if is_windows:
+        common_paths = [
+            os.path.expanduser("~\\AppData\\Local\\Programs\\claude\\claude.cmd"),
+            os.path.expanduser("~\\AppData\\Roaming\\npm\\claude.cmd"),
+            "C:\\Program Files\\Claude\\claude.cmd",
+            "C:\\Program Files (x86)\\Claude\\claude.cmd",
+        ]
+        for path in common_paths:
+            if os.path.exists(path):
+                return path
+    else:
+        common_paths = [
+            "/usr/local/bin/claude",
+            "/usr/bin/claude",
+            os.path.expanduser("~/.local/bin/claude"),
+        ]
+        for path in common_paths:
+            if os.path.exists(path):
+                return path
+    # 3. Check if it's in PATH
+    claude_name = "claude.cmd" if is_windows else "claude"
+    if shutil.which(claude_name):
+        return claude_name
+    # 4. Not found
+    return None
+def run_claude_code(prompt: str, working_dir: str = None, continue_session: bool = False, stream: bool = True) -> str:
+    """Execute Claude Code CLI with the given prompt.
+    If stream=True, output is printed in real-time and also returned.
+    """
+    claude_cmd = find_claude_cli()
+    if not claude_cmd:
+        error_msg = (
+            "Claude CLI not found!\n\n"
+            "Please install Claude Code CLI or set CLAUDE_CLI_PATH environment variable:\n"
+            "  Windows: set CLAUDE_CLI_PATH=C:\\path\\to\\claude.cmd\n"
+            "  Linux/Mac: export CLAUDE_CLI_PATH=/path/to/claude\n\n"
+            "Install from: https://github.com/anthropics/claude-code"
+        )
+        print_error_box(error_msg)
+        return "[DAVELOOP:ERROR] Claude CLI not found"
+    cmd = [claude_cmd]
+    if continue_session:
+        cmd.append("--continue")
+    cmd.extend(["-p", "--verbose", "--output-format", "stream-json", "--allowedTools", "Bash,Read,Write,Edit,Glob,Grep,Task"])
+    try:
+        if stream:
+            # Stream output in real-time
+            process = subprocess.Popen(
+                cmd,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                encoding='utf-8',
+                errors='replace',
+                cwd=working_dir,
+                bufsize=1  # Line buffered
+            )
+            # Send prompt and close stdin
+            process.stdin.write(prompt)
+            process.stdin.close()
+            # Heartbeat thread to show we're alive
+            start_time = time.time()
+            heartbeat_active = True
+            def heartbeat():
+                while heartbeat_active:
+                    elapsed = int(time.time() - start_time)
+                    print(f"\r  {C.BLUE}[{elapsed}s elapsed...]{C.RESET}    ", end='')
+                    sys.stdout.flush()
+                    time.sleep(3)
+            heartbeat_thread = threading.Thread(target=heartbeat, daemon=True)
+            heartbeat_thread.start()
+            # Read and display JSON stream output
+            import json
+            output_lines = []
+            full_text = []
+            for line in process.stdout:
+                # Clear heartbeat line
+                print(f"\r{' '*40}\r", end='')
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    data = json.loads(line)
+                    msg_type = data.get("type", "")
+                    # Handle different message types
+                    if msg_type == "assistant":
+                        # Assistant text message
+                        content = data.get("message", {}).get("content", [])
+                        for block in content:
+                            if block.get("type") == "text":
+                                text = block.get("text", "")
+                                for line_text in text.split('\n'):
+                                    formatted = format_output_line(line_text)
+                                    print(formatted)
+                                full_text.append(text)
+                    elif msg_type == "content_block_delta":
+                        # Streaming text delta
+                        delta = data.get("delta", {})
+                        if delta.get("type") == "text_delta":
+                            text = delta.get("text", "")
+                            print(text, end='')
+                            full_text.append(text)
+                    elif msg_type == "tool_use":
+                        # Tool being used
+                        tool_name = data.get("name", "unknown")
+                        print(f"\n  {C.BLUE}🔧 Using tool: {tool_name}{C.RESET}")
+                    elif msg_type == "tool_result":
+                        # Tool result
+                        print(f"  {C.BLUE}✓ Tool completed{C.RESET}\n")
+                    elif msg_type == "result":
+                        # Final result
+                        text = data.get("result", "")
+                        if text:
+                            for line_text in text.split('\n'):
+                                formatted = format_output_line(line_text)
+                                print(formatted)
+                            full_text.append(text)
+                    elif msg_type == "error":
+                        error_msg = data.get("error", {}).get("message", "Unknown error")
+                        print(f"  {C.RED}ERROR: {error_msg}{C.RESET}")
+                    sys.stdout.flush()
+                except json.JSONDecodeError:
+                    # Not JSON, just print as-is
+                    print(f"  {line}")
+                    full_text.append(line)
+                output_lines.append(line)
+            heartbeat_active = False
+            print(f"\r{' '*40}\r", end='')  # Clear final heartbeat
+            process.wait(timeout=600)
+            return '\n'.join(full_text)
+        else:
+            # Non-streaming mode
+            result = subprocess.run(
+                cmd,
+                input=prompt,
+                capture_output=True,
+                text=True,
+                encoding='utf-8',
+                errors='replace',
+                cwd=working_dir,
+                timeout=600
+            )
+            output = result.stdout
+            if result.stderr:
+                output += f"\n{C.RED}[STDERR]{C.RESET}\n{result.stderr}"
+            return output
+    except subprocess.TimeoutExpired:
+        return "[DAVELOOP:TIMEOUT] Claude Code iteration timed out after 10 minutes"
+    except FileNotFoundError:
+        return "[DAVELOOP:ERROR] Claude Code CLI not found. Is it installed?"
+    except Exception as e:
+        return f"[DAVELOOP:ERROR] {str(e)}"
+def format_output_line(line: str) -> str:
+    """Format a single line of Claude's output with colors."""
+    # Reasoning markers
+    if "=== DAVELOOP REASONING ===" in line:
+        return f"\n{C.BRIGHT_BLUE}{'─'*50}\n  🧠 REASONING\n{'─'*50}{C.RESET}"
+    if "===========================" in line:
+        return f"{C.BRIGHT_BLUE}{'─'*50}{C.RESET}\n"
+    # Reasoning labels
+    if line.startswith("KNOWN:"):
+        return f"  {C.BLUE}KNOWN:{C.RESET}{C.WHITE}{line[6:]}{C.RESET}"
+    if line.startswith("UNKNOWN:"):
+        return f"  {C.BLUE}UNKNOWN:{C.RESET}{C.WHITE}{line[8:]}{C.RESET}"
+    if line.startswith("HYPOTHESIS:"):
+        return f"  {C.BLUE}HYPOTHESIS:{C.RESET}{C.WHITE}{line[11:]}{C.RESET}"
+    if line.startswith("NEXT ACTION:"):
+        return f"  {C.BLUE}NEXT ACTION:{C.RESET}{C.WHITE}{line[12:]}{C.RESET}"
+    if line.startswith("WHY:"):
+        return f"  {C.BLUE}WHY:{C.RESET}{C.WHITE}{line[4:]}{C.RESET}"
+    # Exit signals - just dim them out in the stream, don't make them prominent
+    # The actual success/error boxes will be shown after iteration completes
+    if "[DAVELOOP:RESOLVED]" in line:
+        return f"  {C.DIM}→ [Exit signal detected: RESOLVED]{C.RESET}"
+    if "[DAVELOOP:BLOCKED]" in line:
+        return f"  {C.DIM}→ [Exit signal detected: BLOCKED]{C.RESET}"
+    if "[DAVELOOP:CLARIFY]" in line:
+        return f"  {C.DIM}→ [Exit signal detected: CLARIFY]{C.RESET}"
+    # Code blocks
+    if line.strip().startswith("```"):
+        return f"{C.BLUE}{'─'*40}{C.RESET}"
+    # Default - white text
+    return f"  {C.WHITE}{line}{C.RESET}"
+def check_exit_condition(output: str) -> tuple[str, bool]:
+    """Check if we should exit the loop."""
+    if SIGNAL_RESOLVED in output:
+        return "RESOLVED", True
+    if SIGNAL_BLOCKED in output:
+        return "BLOCKED", True
+    if SIGNAL_CLARIFY in output:
+        return "CLARIFY", True
+    if "[DAVELOOP:ERROR]" in output:
+        return "ERROR", True
+    if "[DAVELOOP:TIMEOUT]" in output:
+        return "TIMEOUT", False
+    return "CONTINUE", False
+def save_log(iteration: int, content: str, session_id: str):
+    """Save iteration log to file."""
+    LOG_DIR.mkdir(exist_ok=True)
+    log_file = LOG_DIR / f"{session_id}_iteration_{iteration:02d}.log"
+    log_file.write_text(content, encoding="utf-8")
+# ============================================================================
+# Main Entry Point
+# ============================================================================
+def main():
+    parser = argparse.ArgumentParser(
+        description="DaveLoop - Self-Healing Debug Agent",
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument("bug", nargs="?", help="Bug description or error message")
+    parser.add_argument("-f", "--file", help="Read bug description from file")
+    parser.add_argument("-d", "--dir", help="Working directory for Claude Code")
+    parser.add_argument("-m", "--max-iterations", type=int, default=MAX_ITERATIONS)
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    args = parser.parse_args()
+    # Clear screen and show banner
+    os.system('cls' if os.name == 'nt' else 'clear')
+    print(BANNER)
+    # Get bug description
+    if args.file:
+        bug_input = Path(args.file).read_text(encoding="utf-8")
+    elif args.bug:
+        bug_input = args.bug
+    else:
+        print(f"  {C.CYAN}Describe the bug (Ctrl+D or Ctrl+Z to finish):{C.RESET}")
+        bug_input = sys.stdin.read().strip()
+    if not bug_input:
+        print_error_box("No bug description provided")
+        return 1
+    # Setup
+    session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+    system_prompt = load_prompt()
+    working_dir = args.dir or os.getcwd()
+    # Session info
+    print_header_box(f"SESSION: {session_id}", C.BRIGHT_BLUE)
+    print_status("Working Directory", working_dir, C.WHITE)
+    print_status("Max Iterations", str(args.max_iterations), C.WHITE)
+    print_status("Context Mode", "PERSISTENT (--continue)", C.WHITE)
+    print_status("System Prompt", f"{len(system_prompt)} chars loaded", C.WHITE)
+    print()
+    print_section("BUG REPORT", C.BRIGHT_RED)
+    # Wrap bug input nicely
+    for line in bug_input.split('\n')[:10]:
+        print(f"  {C.RED}{line[:80]}{C.RESET}")
+    if len(bug_input.split('\n')) > 10:
+        print(f"  {C.RED}... ({len(bug_input.split(chr(10))) - 10} more lines){C.RESET}")
+    print()
+    sys.stdout.flush()
+    # Initial context
+    context = f"""
+## Bug Report
+{bug_input}
+## Instructions
+Analyze this bug. Gather whatever logs/information you need to understand it.
+Then fix it. Use the reasoning protocol before each action.
+"""
+    iteration_history = []
+    for iteration in range(1, args.max_iterations + 1):
+        print_iteration_header(iteration, args.max_iterations)
+        if iteration == 1:
+            full_prompt = f"{system_prompt}\n\n---\n\n{context}"
+            continue_session = False
+        else:
+            full_prompt = context
+            continue_session = True
+        if args.verbose:
+            print(f"  {C.DIM}[DEBUG] Prompt: {len(full_prompt)} chars, continue={continue_session}{C.RESET}")
+        # Show "Claude is working" indicator
+        print(f"\n  {C.BRIGHT_BLUE}▶ Claude is working...{C.RESET}\n")
+        sys.stdout.flush()
+        # Run Claude with real-time streaming output
+        output = run_claude_code(full_prompt, working_dir, continue_session=continue_session, stream=True)
+        print(f"\n  {C.BLUE}✓ Iteration complete{C.RESET}\n")
+        # Save log
+        save_log(iteration, output, session_id)
+        iteration_history.append(output)
+        # Check exit condition
+        signal, should_exit = check_exit_condition(output)
+        if should_exit:
+            if signal == "RESOLVED":
+                print_success_box(f"Bug fixed in {iteration} iteration(s)!")
+                print_status("Session", session_id, C.WHITE)
+                print_status("Logs", str(LOG_DIR), C.WHITE)
+                print()
+                return 0
+            elif signal == "CLARIFY":
+                print_warning_box("Claude needs clarification")
+                print(f"\n  {C.BLUE}Your response:{C.RESET}")
+                human_input = input(f"  {C.WHITE}> {C.RESET}")
+                context = f"""
+## Human Clarification
+{human_input}
+Continue debugging with this information. Use the reasoning protocol before each action.
+"""
+                continue
+            elif signal == "BLOCKED":
+                print_error_box(f"Claude is blocked - needs human help")
+                print_status("Session", session_id, C.WHITE)
+                print_status("Logs", str(LOG_DIR), C.WHITE)
+                print()
+                return 1
+            else:
+                print_error_box(f"Error occurred: {signal}")
+                return 1
+        # Prepare context for next iteration
+        context = f"""
+## Iteration {iteration + 1}
+The bug is NOT yet resolved. You have full context from previous iterations.
+Continue debugging. Analyze what happened, determine next steps, and proceed.
+Use the reasoning protocol before each action.
+"""
+    # Max iterations reached
+    print_warning_box(f"Max iterations ({args.max_iterations}) reached")
+    print_status("Session", session_id, C.WHITE)
+    print_status("Logs", str(LOG_DIR), C.WHITE)
+    print()
+    # Save summary
+    summary = f"# DaveLoop Session {session_id}\n\n"
+    summary += f"Bug: {bug_input[:200]}...\n\n"
+    summary += f"Iterations: {args.max_iterations}\n\n"
+    summary += "## Iteration History\n\n"
+    for i, hist in enumerate(iteration_history, 1):
+        summary += f"### Iteration {i}\n```\n{hist[:500]}...\n```\n\n"
+    (LOG_DIR / f"{session_id}_summary.md").write_text(summary, encoding="utf-8")
+    return 1
+if __name__ == "__main__":
+    sys.exit(main())

daveloop_swebench.py ADDED Viewed

@@ -0,0 +1,432 @@
+#!/usr/bin/env python3
+"""
+DaveLoop SWE-bench Runner
+Evaluates DaveLoop agent against SWE-bench benchmark tasks.
+"""
+import subprocess
+import sys
+import os
+import json
+import argparse
+import shutil
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Optional
+import tempfile
+# Import DaveLoop components
+from daveloop import (
+    Colors as C, print_header_box, print_section, print_status,
+    print_success_box, print_error_box, print_warning_box,
+    run_claude_code, check_exit_condition, SIGNAL_RESOLVED
+)
+# Configuration
+SCRIPT_DIR = Path(__file__).parent
+RESULTS_DIR = SCRIPT_DIR / "swebench_results"
+WORK_DIR = SCRIPT_DIR / "swebench_work"
+PROMPT_FILE = SCRIPT_DIR / "daveloop_prompt.md"
+MAX_ITERATIONS_PER_TASK = 10
+BANNER = f"""
+{C.BRIGHT_BLUE}{C.BOLD}
+   ███████╗██╗    ██╗███████╗      ██████╗ ███████╗███╗   ██╗ ██████╗██╗  ██╗
+   ██╔════╝██║    ██║██╔════╝      ██╔══██╗██╔════╝████╗  ██║██╔════╝██║  ██║
+   ███████╗██║ █╗ ██║█████╗  █████╗██████╔╝█████╗  ██╔██╗ ██║██║     ███████║
+   ╚════██║██║███╗██║██╔══╝  ╚════╝██╔══██╗██╔══╝  ██║╚██╗██║██║     ██╔══██║
+   ███████║╚███╔███╔╝███████╗      ██████╔╝███████╗██║ ╚████║╚██████╗██║  ██║
+   ╚══════╝ ╚══╝╚══╝ ╚══════╝      ╚═════╝ ╚══════╝╚═╝  ╚═══╝ ╚═════╝╚═╝  ╚═╝
+{C.RESET}
+{C.WHITE}              DaveLoop × SWE-bench: Real-World Bug Benchmark{C.RESET}
+"""
+# ============================================================================
+# SWE-bench Dataset Interface
+# ============================================================================
+class SWEBenchTask:
+    """Represents a single SWE-bench task."""
+    def __init__(self, data: Dict):
+        self.instance_id = data.get('instance_id', '')
+        self.repo = data.get('repo', '')
+        self.base_commit = data.get('base_commit', '')
+        self.problem_statement = data.get('problem_statement', '')
+        self.hints_text = data.get('hints_text', '')
+        self.patch = data.get('patch', '')
+        self.test_patch = data.get('test_patch', '')
+        self.version = data.get('version', '')
+        self.environment_setup_commit = data.get('environment_setup_commit', '')
+    def __str__(self):
+        return f"{self.instance_id} ({self.repo})"
+def load_swebench_dataset(dataset_name: str = "princeton-nlp/SWE-bench_Lite", split: str = "test", limit: Optional[int] = None) -> List[SWEBenchTask]:
+    """Load SWE-bench dataset from Hugging Face."""
+    try:
+        from datasets import load_dataset
+        print_section(f"Loading {dataset_name}", C.BRIGHT_CYAN)
+        print(f"  {C.CYAN}Downloading dataset from Hugging Face...{C.RESET}")
+        dataset = load_dataset(dataset_name, split=split)
+        if limit:
+            dataset = dataset.select(range(min(limit, len(dataset))))
+        tasks = [SWEBenchTask(item) for item in dataset]
+        print(f"  {C.GREEN}✓ Loaded {len(tasks)} tasks{C.RESET}\n")
+        return tasks
+    except ImportError:
+        print_error_box("datasets library not installed. Run: pip install datasets")
+        sys.exit(1)
+    except Exception as e:
+        print_error_box(f"Failed to load dataset: {e}")
+        sys.exit(1)
+def load_swebench_local(json_file: Path) -> List[SWEBenchTask]:
+    """Load SWE-bench tasks from local JSON file."""
+    try:
+        with open(json_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        if isinstance(data, list):
+            tasks = [SWEBenchTask(item) for item in data]
+        else:
+            tasks = [SWEBenchTask(data)]
+        print(f"  {C.GREEN}✓ Loaded {len(tasks)} tasks from {json_file}{C.RESET}\n")
+        return tasks
+    except Exception as e:
+        print_error_box(f"Failed to load local file: {e}")
+        sys.exit(1)
+# ============================================================================
+# Repository Setup
+# ============================================================================
+def remove_readonly(func, path, excinfo):
+    """Helper to handle readonly files on Windows."""
+    import stat
+    os.chmod(path, stat.S_IWRITE)
+    func(path)
+def safe_rmtree(path):
+    """Safely remove directory tree, handling Windows permission issues."""
+    try:
+        shutil.rmtree(path, onerror=remove_readonly)
+    except Exception as e:
+        print(f"  {C.YELLOW}Warning: Could not fully clean directory: {e}{C.RESET}")
+def setup_task_repo(task: SWEBenchTask, work_dir: Path) -> Optional[Path]:
+    """Clone and setup repository for a task."""
+    repo_name = task.repo.replace('/', '_')
+    repo_path = work_dir / repo_name / task.instance_id
+    print_section(f"Setting up: {task.instance_id}", C.BRIGHT_CYAN)
+    # Clean existing directory
+    if repo_path.exists():
+        print(f"  {C.YELLOW}Cleaning existing directory...{C.RESET}")
+        safe_rmtree(repo_path)
+    repo_path.mkdir(parents=True, exist_ok=True)
+    # Clone repository
+    repo_url = f"https://github.com/{task.repo}.git"
+    print(f"  {C.CYAN}Cloning {repo_url}...{C.RESET}")
+    try:
+        subprocess.run(
+            ["git", "clone", "--quiet", repo_url, str(repo_path)],
+            check=True,
+            capture_output=True,
+            timeout=300
+        )
+    except subprocess.CalledProcessError as e:
+        print_error_box(f"Failed to clone repository: {e.stderr.decode()}")
+        return None
+    except subprocess.TimeoutExpired:
+        print_error_box("Git clone timed out after 5 minutes")
+        return None
+    # Checkout base commit
+    print(f"  {C.CYAN}Checking out commit {task.base_commit[:8]}...{C.RESET}")
+    try:
+        subprocess.run(
+            ["git", "checkout", task.base_commit],
+            cwd=repo_path,
+            check=True,
+            capture_output=True
+        )
+    except subprocess.CalledProcessError as e:
+        print_error_box(f"Failed to checkout commit: {e.stderr.decode()}")
+        return None
+    print(f"  {C.GREEN}✓ Repository ready at {repo_path}{C.RESET}\n")
+    return repo_path
+# ============================================================================
+# Task Execution
+# ============================================================================
+def create_task_prompt(task: SWEBenchTask) -> str:
+    """Create DaveLoop prompt for a SWE-bench task."""
+    prompt = f"""# SWE-bench Task: {task.instance_id}
+## Repository
+{task.repo}
+## Problem Statement
+{task.problem_statement}
+"""
+    if task.hints_text:
+        prompt += f"""
+## Hints
+{task.hints_text}
+"""
+    prompt += """
+## Your Task
+Analyze and fix the issue described above. Use the reasoning protocol before each action.
+When you believe the issue is resolved, run the tests and output [DAVELOOP:RESOLVED].
+"""
+    return prompt
+def run_task(task: SWEBenchTask, repo_path: Path, system_prompt: str, max_iterations: int = 10) -> Dict:
+    """Run DaveLoop on a single SWE-bench task."""
+    result = {
+        'instance_id': task.instance_id,
+        'repo': task.repo,
+        'resolved': False,
+        'iterations': 0,
+        'error': None,
+        'start_time': datetime.now().isoformat(),
+        'end_time': None
+    }
+    print_header_box(f"RUNNING: {task.instance_id}", C.BRIGHT_MAGENTA)
+    task_prompt = create_task_prompt(task)
+    context = task_prompt
+    for iteration in range(1, max_iterations + 1):
+        result['iterations'] = iteration
+        print(f"\n{C.BRIGHT_BLUE}{'─'*70}")
+        print(f"  ITERATION {iteration}/{max_iterations}")
+        print(f"{'─'*70}{C.RESET}\n")
+        # Build prompt
+        if iteration == 1:
+            full_prompt = f"{system_prompt}\n\n---\n\n{context}"
+            continue_session = False
+        else:
+            full_prompt = context
+            continue_session = True
+        # Run Claude
+        print(f"  {C.BRIGHT_BLUE}▶ Claude is working...{C.RESET}\n")
+        output = run_claude_code(full_prompt, str(repo_path), continue_session=continue_session, stream=True)
+        print(f"\n  {C.BLUE}✓ Iteration complete{C.RESET}\n")
+        # Check exit condition
+        signal, should_exit = check_exit_condition(output)
+        if should_exit:
+            if signal == "RESOLVED":
+                result['resolved'] = True
+                result['end_time'] = datetime.now().isoformat()
+                print_success_box(f"Task resolved in {iteration} iteration(s)!")
+                return result
+            elif signal in ["BLOCKED", "ERROR"]:
+                result['error'] = signal
+                result['end_time'] = datetime.now().isoformat()
+                print_error_box(f"Task failed: {signal}")
+                return result
+        # Continue to next iteration
+        context = f"""
+## Iteration {iteration + 1}
+The issue is NOT yet resolved. You have full context from previous iterations.
+Continue debugging and fixing the issue. Use the reasoning protocol before each action.
+"""
+    # Max iterations reached
+    result['error'] = 'MAX_ITERATIONS'
+    result['end_time'] = datetime.now().isoformat()
+    print_warning_box(f"Max iterations ({max_iterations}) reached without resolution")
+    return result
+# ============================================================================
+# Evaluation & Reporting
+# ============================================================================
+def save_results(results: List[Dict], output_file: Path):
+    """Save results to JSON file."""
+    RESULTS_DIR.mkdir(exist_ok=True)
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2)
+    print(f"\n  {C.GREEN}✓ Results saved to {output_file}{C.RESET}")
+def print_summary(results: List[Dict]):
+    """Print summary statistics."""
+    total = len(results)
+    resolved = sum(1 for r in results if r['resolved'])
+    failed = sum(1 for r in results if r.get('error'))
+    print_header_box("EVALUATION SUMMARY", C.BRIGHT_GREEN)
+    print_status("Total Tasks", str(total), C.WHITE)
+    print_status("Resolved", f"{resolved} ({resolved/total*100:.1f}%)", C.GREEN)
+    print_status("Failed", f"{failed} ({failed/total*100:.1f}%)", C.RED)
+    print()
+    # Breakdown by error type
+    if failed > 0:
+        print(f"  {C.BLUE}│{C.RESET} {C.WHITE}Failure Breakdown:{C.RESET}")
+        error_types = {}
+        for r in results:
+            if r.get('error'):
+                error_types[r['error']] = error_types.get(r['error'], 0) + 1
+        for error_type, count in error_types.items():
+            print(f"  {C.BLUE}│{C.RESET}   - {error_type}: {count}")
+        print()
+# ============================================================================
+# Main Entry Point
+# ============================================================================
+def main():
+    parser = argparse.ArgumentParser(
+        description="DaveLoop SWE-bench Evaluation",
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument("-d", "--dataset",
+                       default="princeton-nlp/SWE-bench_Lite",
+                       help="Hugging Face dataset name (default: SWE-bench_Lite)")
+    parser.add_argument("-f", "--file",
+                       help="Load tasks from local JSON file instead of Hugging Face")
+    parser.add_argument("-l", "--limit", type=int,
+                       help="Limit number of tasks to run")
+    parser.add_argument("-m", "--max-iterations", type=int,
+                       default=MAX_ITERATIONS_PER_TASK,
+                       help="Max iterations per task")
+    parser.add_argument("-s", "--start-from", type=int, default=0,
+                       help="Start from task index (0-based)")
+    parser.add_argument("--keep-repos", action="store_true",
+                       help="Keep cloned repositories after evaluation")
+    args = parser.parse_args()
+    # Clear screen and show banner
+    os.system('cls' if os.name == 'nt' else 'clear')
+    print(BANNER)
+    # Load system prompt
+    if PROMPT_FILE.exists():
+        system_prompt = PROMPT_FILE.read_text(encoding="utf-8")
+    else:
+        print_warning_box(f"Prompt file not found: {PROMPT_FILE}")
+        system_prompt = "You are a debugging agent. Fix bugs and output [DAVELOOP:RESOLVED] when done."
+    # Load tasks
+    if args.file:
+        tasks = load_swebench_local(Path(args.file))
+    else:
+        tasks = load_swebench_dataset(args.dataset, limit=args.limit)
+    if args.start_from > 0:
+        tasks = tasks[args.start_from:]
+        print(f"  {C.YELLOW}Starting from task index {args.start_from}{C.RESET}\n")
+    # Setup work directory
+    WORK_DIR.mkdir(exist_ok=True)
+    # Run evaluation
+    session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+    results = []
+    print_header_box(f"SESSION: {session_id}", C.BRIGHT_BLUE)
+    print_status("Dataset", args.dataset if not args.file else args.file, C.WHITE)
+    print_status("Tasks", str(len(tasks)), C.WHITE)
+    print_status("Max Iterations/Task", str(args.max_iterations), C.WHITE)
+    print_status("Work Directory", str(WORK_DIR), C.WHITE)
+    print()
+    for i, task in enumerate(tasks, 1):
+        print(f"\n{C.BRIGHT_MAGENTA}{'='*70}")
+        print(f"  TASK {i}/{len(tasks)}: {task.instance_id}")
+        print(f"{'='*70}{C.RESET}\n")
+        # Setup repository
+        repo_path = setup_task_repo(task, WORK_DIR)
+        if not repo_path:
+            results.append({
+                'instance_id': task.instance_id,
+                'repo': task.repo,
+                'resolved': False,
+                'error': 'SETUP_FAILED',
+                'iterations': 0
+            })
+            continue
+        # Run task
+        try:
+            result = run_task(task, repo_path, system_prompt, args.max_iterations)
+            results.append(result)
+        except KeyboardInterrupt:
+            print_warning_box("Evaluation interrupted by user")
+            break
+        except Exception as e:
+            print_error_box(f"Unexpected error: {e}")
+            results.append({
+                'instance_id': task.instance_id,
+                'repo': task.repo,
+                'resolved': False,
+                'error': f'EXCEPTION: {str(e)}',
+                'iterations': 0
+            })
+        finally:
+            # Cleanup repository unless --keep-repos
+            if not args.keep_repos and repo_path.exists():
+                print(f"  {C.DIM}Cleaning up repository...{C.RESET}")
+                safe_rmtree(repo_path)
+    # Save and display results
+    output_file = RESULTS_DIR / f"results_{session_id}.json"
+    save_results(results, output_file)
+    print_summary(results)
+    print(f"\n{C.BRIGHT_BLUE}{'='*70}{C.RESET}")
+    print(f"  {C.BOLD}Evaluation complete!{C.RESET}")
+    print(f"  {C.DIM}Results: {output_file}{C.RESET}")
+    print(f"{C.BRIGHT_BLUE}{'='*70}{C.RESET}\n")
+    return 0 if all(r['resolved'] for r in results) else 1
+if __name__ == "__main__":
+    sys.exit(main())