npm - claude-evolve - Versions diffs - 1.3.40 → 1.3.42 - Mend

claude-evolve 1.3.40 → 1.3.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +79 -354
package/bin/claude-evolve-csv-fix +100 -0
package/bin/claude-evolve-edit +33 -12
package/bin/claude-evolve-ideate +79 -12
package/bin/claude-evolve-main +16 -14
package/bin/claude-evolve-run +3 -22
package/bin/claude-evolve-status +27 -5
package/bin/claude-evolve-worker +154 -34
package/lib/config.sh +8 -0
package/lib/csv-lock.sh +14 -4
package/lib/csv_helper.py +40 -2
package/lib/evolution_processor.py +10 -3
package/package.json +1 -1
package/templates/config.yaml +4 -0

package/bin/claude-evolve-worker CHANGED Viewed

@@ -18,6 +18,59 @@ cleanup_temp() {
 # Set trap to clean up temp files on exit
 trap cleanup_temp EXIT INT TERM
+# Function to handle failures with retry logic
+handle_failure() {
+  local candidate_id="$1"
+  local current_status="$2"
+  local performance="${3:-0}"
+  # If this is already a retry, increment the retry count
+  if [[ $current_status =~ ^failed-retry([0-9]+)$ ]]; then
+    local retry_num=${BASH_REMATCH[1]}
+    local new_retry_num=$((retry_num + 1))
+    if [[ $new_retry_num -le $MAX_RETRIES ]]; then
+      local new_status="failed-retry${new_retry_num}"
+      update_csv_row_with_lock "$candidate_id" "status" "$new_status"
+      update_csv_row_with_lock "$candidate_id" "performance" "$performance"
+      echo "[WORKER-$$] ✗ Retry $retry_num failed, marked as $new_status"
+      exit 1
+    else
+      update_csv_row_with_lock "$candidate_id" "status" "failed"
+      update_csv_row_with_lock "$candidate_id" "performance" "$performance"
+      echo "[WORKER-$$] ✗ Max retries ($MAX_RETRIES) exceeded, marking as permanently failed"
+      exit 1
+    fi
+  elif [[ $current_status == "failed" ]]; then
+    # Initial failure, convert to retry1 if retries are enabled
+    if [[ $MAX_RETRIES -gt 0 ]]; then
+      update_csv_row_with_lock "$candidate_id" "status" "failed-retry1"
+      update_csv_row_with_lock "$candidate_id" "performance" "$performance"
+      echo "[WORKER-$$] ✗ Initial failure, marked as failed-retry1 for retry"
+      exit 1
+    else
+      update_csv_row_with_lock "$candidate_id" "status" "failed"
+      update_csv_row_with_lock "$candidate_id" "performance" "$performance"
+      echo "[WORKER-$$] ✗ Failed (retries disabled)"
+      # Use exit code 1 - systemic issue since retries are disabled
+      exit 1
+    fi
+  else
+    # Not a failure scenario, convert to retry1 if retries enabled
+    if [[ $MAX_RETRIES -gt 0 ]]; then
+      update_csv_row_with_lock "$candidate_id" "status" "failed-retry1"
+      update_csv_row_with_lock "$candidate_id" "performance" "$performance"
+      echo "[WORKER-$$] ✗ Evaluation failed, marked as failed-retry1 for retry"
+      exit 1
+    else
+      update_csv_row_with_lock "$candidate_id" "status" "failed"
+      update_csv_row_with_lock "$candidate_id" "performance" "$performance"
+      echo "[WORKER-$$] ✗ Evaluation failed (retries disabled)"
+      exit 1
+    fi
+  fi
+}
 # Load configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # shellcheck source=../lib/config.sh
@@ -59,14 +112,25 @@ done
 # If no ID provided, find next pending
 if [[ -z $candidate_id ]]; then
-  candidate_id=$(find_next_pending_with_lock)
-  if [[ -z $candidate_id ]]; then
+  candidate_result=$(find_next_pending_with_lock)
+  if [[ -z $candidate_result ]]; then
     echo "[DEBUG] No pending candidates found" >&2
     exit 0
   fi
+  # Parse candidate_id|original_status format
+  if [[ $candidate_result == *"|"* ]]; then
+    candidate_id="${candidate_result%|*}"  # Everything before |
+    original_candidate_status="${candidate_result#*|}"  # Everything after |
+  else
+    # Fallback for old format (shouldn't happen)
+    candidate_id="$candidate_result"
+    original_candidate_status=""
+  fi
 else
   # Mark specified candidate as running
   update_csv_row_with_lock "$candidate_id" "status" "running"
+  original_candidate_status=""  # Unknown for manually specified candidates
 fi
 echo "[WORKER-$$] Processing candidate ID: $candidate_id"
@@ -124,6 +188,23 @@ fi
 echo "[WORKER-$$] Description: $description"
 echo "[WORKER-$$] Based on ID: $based_on_id"
+# AIDEV-NOTE: Retry logic - detect if this is a retry attempt
+is_retry=false
+retry_count=0
+# Use original_candidate_status for retry detection (if available), otherwise fall back to CSV status
+retry_status="$original_candidate_status"
+if [[ -z "$retry_status" ]]; then
+  retry_status="$status"
+fi
+if [[ $retry_status =~ ^failed-retry([0-9]+)$ ]]; then
+  is_retry=true
+  retry_count=${BASH_REMATCH[1]}
+  echo "[WORKER-$$] 🔄 Processing retry attempt #$retry_count"
+elif [[ $retry_status == "failed" ]]; then
+  echo "[WORKER-$$] ⚠️ Initial failure detected - this should be converted to failed-retry1 to enable retries"
+fi
 # AIDEV-NOTE: Using common evolution processor logic for consistent handling
 # Determine parent algorithm
 if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
@@ -139,7 +220,7 @@ else
   if [[ ! -f $parent_file ]]; then
     echo "[ERROR] Parent algorithm not found: $parent_file" >&2
-    update_csv_row_with_lock "$candidate_id" "status" "failed"
+    handle_failure "$candidate_id" "$retry_status" "0"
     exit 1
   fi
 fi
@@ -155,11 +236,28 @@ fi
 temp_file="${output_file}.tmp$$"
 # Check if processing should be skipped using common logic
+# Set environment variable for retry detection
+if [[ $is_retry == "true" ]]; then
+  export RETRY_CANDIDATE=true
+else
+  export RETRY_CANDIDATE=false
+fi
 eval "$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_processor.py" "$id" "$based_on_id" "$FULL_OUTPUT_DIR" "$ROOT_DIR" "$parent_file" "$output_file")"
 # Handle copy operation to temp file
 if [[ "$skip_copy" == "True" ]]; then
   echo "[WORKER-$$] ⚠️  Skipping copy - $reason"
+elif [[ $is_retry == "true" ]]; then
+  # For retries, edit the existing failed algorithm in-place
+  if [[ -f "$output_file" ]]; then
+    cp "$output_file" "$temp_file"
+    echo "[WORKER-$$] 🔄 Copied existing algorithm for retry: $temp_file"
+  else
+    # Fallback to parent if existing file doesn't exist
+    cp "$parent_file" "$temp_file"
+    echo "[WORKER-$$] ⚠️ Existing algorithm not found, using parent: $temp_file"
+  fi
 else
   cp "$parent_file" "$temp_file"
   echo "[WORKER-$$] Copied parent to temp file: $temp_file"
@@ -178,15 +276,28 @@ else
   claude_cmd="${CLAUDE_CMD:-claude}"
   if ! command -v "$claude_cmd" >/dev/null 2>&1; then
     echo "[ERROR] Claude CLI not found" >&2
-    update_csv_row_with_lock "$candidate_id" "status" "failed"
+    handle_failure "$candidate_id" "$retry_status" "0"
     exit 1
   fi
   CLAUDE_MODEL="sonnet"
   echo "[WORKER-$$] Using Claude $CLAUDE_MODEL for mutation"
-  # Create mutation prompt
-  prompt="Edit the file $temp_file to implement this specific change: $description
+  # Create mutation prompt (different for retries vs initial attempts)
+  if [[ $is_retry == "true" ]]; then
+    prompt="Fix the bugs in the file $temp_file. This algorithm was attempting to implement: $description
+The algorithm failed during evaluation. Please:
+- Analyze the code for potential bugs (syntax errors, logical issues, missing imports, etc.)
+- Fix any problems you find
+- Ensure the code runs without errors
+- Make sure it still implements the intended change: $description
+- Add appropriate error handling and validation
+- If possible, suggest a simple way to test this fix
+This is retry attempt #$retry_count. Focus on making the code robust and correct."
+  else
+    prompt="Edit the file $temp_file to implement this specific change: $description
 Requirements:
 - Edit the file directly (don't just provide comments or suggestions)
@@ -196,6 +307,7 @@ Requirements:
 - Add proper error handling if needed
 The file currently contains the parent algorithm. Modify it according to the description above."
+  fi
   # Log prompt
   {
@@ -213,7 +325,9 @@ The file currently contains the parent algorithm. Modify it according to the des
   # Check for rate limit (multiple possible messages)
   if echo "$claude_output" | grep -q -E "(usage limit|rate limit|limit reached|too many requests)"; then
-    echo "[ERROR] Claude API rate limit reached" >&2
+    echo "⚠️  Claude API rate limit reached" >&2
+    echo "⚠️  Claude output:" >&2
+    echo "$claude_output" >&2
     # Clean up the temp file
     if [[ -f "$temp_file" ]]; then
       rm "$temp_file"
@@ -225,30 +339,29 @@ The file currently contains the parent algorithm. Modify it according to the des
   fi
   if [[ $claude_exit_code -ne 0 ]]; then
-    echo "[ERROR] Claude failed to mutate algorithm" >&2
+    echo "⚠️  Claude failed to mutate algorithm (exit code: $claude_exit_code)" >&2
+    echo "⚠️  Claude output:" >&2
+    echo "$claude_output" >&2
     # Clean up the temp file
     if [[ -f "$temp_file" ]]; then
       rm "$temp_file"
       echo "[WORKER-$$] Cleaned up temp file due to Claude failure" >&2
     fi
-    update_csv_row_with_lock "$candidate_id" "status" "failed"
+    handle_failure "$candidate_id" "$retry_status" "0"
     exit 1
   fi
   # Verify that Claude actually modified the file
   if [[ -f "$temp_file" && -f "$parent_file" ]]; then
     if cmp -s "$temp_file" "$parent_file"; then
-      echo "" >&2
-      echo "🚨🚨🚨 RED ALERT: UNCHANGED ALGORITHM DETECTED 🚨🚨🚨" >&2
-      echo "ERROR: Temp file is IDENTICAL to parent algorithm!" >&2
-      echo "ERROR: Claude failed to make any changes" >&2
-      echo "ERROR: Marking as failed - no evaluation will run" >&2
-      echo "🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨" >&2
-      echo "" >&2
+      echo "⚠️  Unchanged algorithm detected - Claude didn't modify the file" >&2
+      echo "⚠️  Description was: $description" >&2
+      echo "⚠️  Claude's response:" >&2
+      echo "$claude_output" >&2
       # Clean up temp file and mark as failed
       rm "$temp_file"
-      update_csv_row_with_lock "$candidate_id" "status" "failed"
+      handle_failure "$candidate_id" "$retry_status" "0"
       exit 1
     else
       # Changes were made - move temp file to final location
@@ -269,28 +382,41 @@ echo "[WORKER-$$] Running evaluation..."
 eval_output=""
 eval_exit_code=0
+# Stream evaluator output in real-time while capturing it
+eval_tempfile=$(mktemp)
 if [[ -n $timeout_seconds ]]; then
   echo "[WORKER-$$] Evaluation timeout: ${timeout_seconds}s"
-  # For Modal compatibility, don't capture stderr
-  if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
-    eval_exit_code=0
+  # Stream output to both log and temp file
+  if EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1 | tee "$eval_tempfile" | while IFS= read -r line; do
+    echo "[EVALUATOR] $line" >> "$LOGFILE"
+    echo "[EVALUATOR] $line" >&2
+  done; then
+    eval_exit_code=${PIPESTATUS[0]}
   else
     eval_exit_code=$?
     if [[ $eval_exit_code -eq 124 ]]; then
       echo "[ERROR] Evaluation timed out" >&2
       update_csv_row_with_lock "$candidate_id" "status" "timeout"
+      rm -f "$eval_tempfile"
       exit 1
     fi
   fi
 else
-  # For Modal compatibility, don't capture stderr
-  if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
-    eval_exit_code=0
+  # Stream output to both log and temp file
+  if EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1 | tee "$eval_tempfile" | while IFS= read -r line; do
+    echo "[EVALUATOR] $line" >> "$LOGFILE"
+    echo "[EVALUATOR] $line" >&2
+  done; then
+    eval_exit_code=${PIPESTATUS[0]}
   else
     eval_exit_code=$?
   fi
 fi
+# Read the complete output from temp file
+eval_output=$(cat "$eval_tempfile")
+rm -f "$eval_tempfile"
 # Log evaluator output
 {
   echo "=== WORKER $$ - EVALUATOR OUTPUT ==="
@@ -305,9 +431,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
   if [[ $eval_output =~ ^[[:space:]]*-?[0-9]+\.?[0-9]*[[:space:]]*$ ]]; then
     score=$(echo "$eval_output" | tr -d ' ')
     if [[ $(echo "$score == 0" | bc -l) == "1" ]]; then
-      update_csv_row_with_lock "$candidate_id" "status" "failed"
-      update_csv_row_with_lock "$candidate_id" "performance" "$score"
-      echo "[WORKER-$$] ✗ Evaluation failed with score 0"
+      handle_failure "$candidate_id" "$retry_status" "$score"
       exit 1
     else
       update_csv_row_with_lock "$candidate_id" "performance" "$score"
@@ -347,9 +471,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
   if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
     if [[ -n $score ]]; then
       if [[ $(echo "$score == 0" | bc -l) == "1" ]]; then
-        update_csv_row_with_lock "$candidate_id" "status" "failed"
-        update_csv_row_with_lock "$candidate_id" "performance" "$score"
-        echo "[WORKER-$$] ✗ Evaluation failed with score 0"
+        handle_failure "$candidate_id" "$retry_status" "$score"
         exit 1
       else
         update_csv_row_with_lock "$candidate_id" "performance" "$score"
@@ -364,9 +486,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
   if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
     if [[ -n $score ]]; then
       if [[ $(echo "$score == 0" | bc -l) == "1" ]]; then
-        update_csv_row_with_lock "$candidate_id" "status" "failed"
-        update_csv_row_with_lock "$candidate_id" "performance" "$score"
-        echo "[WORKER-$$] ✗ Evaluation failed with score 0"
+        handle_failure "$candidate_id" "$retry_status" "$score"
         exit 1
       else
         update_csv_row_with_lock "$candidate_id" "performance" "$score"
@@ -381,10 +501,10 @@ if [[ $eval_exit_code -eq 0 ]]; then
   echo "[ERROR] Expected: plain number (e.g., 1.23) or JSON with 'score' or 'performance' field" >&2
   echo "[ERROR] Actual evaluator output was:" >&2
   echo "$eval_output" >&2
-  update_csv_row_with_lock "$candidate_id" "status" "failed"
+  handle_failure "$candidate_id" "$retry_status" "0"
   exit 1
 else
   echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
-  update_csv_row_with_lock "$candidate_id" "status" "failed"
+  handle_failure "$candidate_id" "$retry_status" "0"
   exit 1
 fi

package/lib/config.sh CHANGED Viewed

@@ -45,6 +45,9 @@ DEFAULT_LOCK_TIMEOUT=10
 # Default auto ideation value
 DEFAULT_AUTO_IDEATE=true
+# Default retry value
+DEFAULT_MAX_RETRIES=3
 # Load configuration from config file
 load_config() {
   # Accept config file path as parameter
@@ -76,6 +79,9 @@ load_config() {
   # Set auto ideation default
   AUTO_IDEATE="$DEFAULT_AUTO_IDEATE"
+  # Set retry default
+  MAX_RETRIES="$DEFAULT_MAX_RETRIES"
   # Load config if found
   if [[ -f "$config_file" ]]; then
     echo "[DEBUG] Loading configuration from: $config_file" >&2
@@ -151,6 +157,7 @@ load_config() {
           parent_selection) PARENT_SELECTION="$value" ;;
           python_cmd) PYTHON_CMD="$value" ;;
           auto_ideate) AUTO_IDEATE="$value" ;;
+          max_retries) MAX_RETRIES="$value" ;;
           evolution_dir)
             echo "[WARN] evolution_dir in config is ignored - automatically inferred from config file location" >&2
             ;;
@@ -245,4 +252,5 @@ show_config() {
   echo "  Max workers: $MAX_WORKERS"
   echo "  Lock timeout: $LOCK_TIMEOUT"
   echo "  Auto ideate: $AUTO_IDEATE"
+  echo "  Max retries: $MAX_RETRIES"
 }

package/lib/csv-lock.sh CHANGED Viewed

@@ -230,29 +230,39 @@ find_next_pending_with_lock() {
         return 1
     fi
-    # Find oldest pending candidate and update to running using Python
+    # Find oldest pending candidate (including retries) and update to running using Python
     local candidate=$("$PYTHON_CMD" -c "
 import csv
 import sys
+import re
+def is_pending_retry(status):
+    '''Check if status is pending (empty, pending, or retry status).'''
+    if not status or status == 'pending':
+        return True
+    return status.startswith('failed-retry')
 # Read CSV
 with open('$csv_file', 'r') as f:
     reader = csv.reader(f)
     rows = list(reader)
-# Find first pending candidate
+# Find first pending candidate (including retries)
 candidate_id = None
+original_status = None
 for i in range(1, len(rows)):
     # If row has fewer than 5 fields, it's pending
     if len(rows[i]) < 5:
         candidate_id = rows[i][0]
+        original_status = ''  # Empty status means pending
         # Ensure row has 5 fields before setting status
         while len(rows[i]) < 5:
             rows[i].append('')
         rows[i][4] = 'running'  # Update status
         break
-    elif len(rows[i]) >= 5 and (rows[i][4] == 'pending' or rows[i][4] == ''):
+    elif len(rows[i]) >= 5 and is_pending_retry(rows[i][4]):
         candidate_id = rows[i][0]
+        original_status = rows[i][4]  # Save original status before overwriting
         rows[i][4] = 'running'  # Update status
         break
@@ -261,7 +271,7 @@ if candidate_id:
     with open('${csv_file}.tmp', 'w', newline='') as f:
         writer = csv.writer(f)
         writer.writerows(rows)
-    print(candidate_id)
+    print(f'{candidate_id}|{original_status}')  # Return both ID and original status
 ")
     if [ -n "$candidate" ]; then

package/lib/csv_helper.py CHANGED Viewed

@@ -8,6 +8,7 @@ import csv
 import json
 import sys
 import os
+import re
 from typing import Dict, List, Any
@@ -50,6 +51,40 @@ def ensure_columns(headers: list[str], rows: list[list[str]], new_fields: dict)
     return headers, rows
+def parse_retry_status(status: str) -> tuple[str, int]:
+    """Parse retry status and return (base_status, retry_count).
+    Examples:
+        'failed' -> ('failed', 0)
+        'failed-retry1' -> ('failed', 1)
+        'failed-retry3' -> ('failed', 3)
+        'complete' -> ('complete', 0)
+    """
+    if not status:
+        return ('', 0)
+    match = re.match(r'^(.*)-retry(\d+)$', status)
+    if match:
+        base_status = match.group(1)
+        retry_count = int(match.group(2))
+        return (base_status, retry_count)
+    else:
+        return (status, 0)
+def is_retry_candidate(status: str) -> bool:
+    """Check if a status represents a retry candidate."""
+    base_status, _ = parse_retry_status(status)
+    return base_status == 'failed' and status.startswith('failed-retry')
+def is_pending_retry(status: str) -> bool:
+    """Check if status is pending (empty, 'pending', or retry status)."""
+    if not status or status == 'pending':
+        return True
+    return is_retry_candidate(status)
 def update_row_with_fields(headers: list[str], rows: list[list[str]], target_id: str, fields: dict):
     """Update a specific row with multiple fields."""
     # Find column indices
@@ -162,9 +197,12 @@ def main():
         try:
             headers, rows = read_csv(csv_file)
-            # Find first row with empty status or status == "pending"
+            # Find first row with empty status, "pending", or retry status
             for i, row in enumerate(rows, start=2):  # Start at 2 (1-indexed, skip header)
-                if len(row) < 5 or row[4] == '' or row[4] == 'pending':
+                if len(row) < 5:
+                    print(i)
+                    sys.exit(0)
+                elif len(row) >= 5 and is_pending_retry(row[4]):
                     print(i)
                     sys.exit(0)

package/lib/evolution_processor.py CHANGED Viewed

@@ -14,7 +14,8 @@ def should_skip_processing(id_val, based_on_id, parent_file, output_file):
     """
     Determine if evolution processing should be skipped.
-    Simple rule: If file exists, skip everything. This handles all edge cases cleanly.
+    Simple rule: If file exists, skip everything UNLESS this is a retry candidate.
+    For retry candidates, we want Claude to process the existing file to fix bugs.
     Returns tuple: (skip_copy, skip_claude, reason)
     """
@@ -23,9 +24,15 @@ def should_skip_processing(id_val, based_on_id, parent_file, output_file):
         return True, True, "Baseline algorithm - no processing needed"
     # File existence check - if file exists, skip both copy and Claude
-    # This automatically handles self-parent cases and re-runs
+    # EXCEPT for retry candidates which need Claude to fix the existing file
     if os.path.exists(output_file):
-        return True, True, "File already exists - skipping all processing"
+        # Check if this might be a retry candidate by looking for retry status in environment
+        # The worker sets RETRY_CANDIDATE=true for retry processing
+        retry_env = os.environ.get('RETRY_CANDIDATE')
+        if retry_env == 'true':
+            return True, False, "Retry candidate - skip copy but run Claude for bug fixing"
+        else:
+            return True, True, "File already exists - skipping all processing"
     # File doesn't exist - proceed with copy and Claude
     return False, False, None

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-evolve",
-  "version": "1.3.40",
+  "version": "1.3.42",
   "bin": {
     "claude-evolve": "./bin/claude-evolve",
     "claude-evolve-main": "./bin/claude-evolve-main",

package/templates/config.yaml CHANGED Viewed

@@ -42,6 +42,10 @@ python_cmd: "python3"
 # When true, automatically generate new ideas when no pending candidates remain
 auto_ideate: true
+# Retry configuration
+# Maximum number of retries for failed candidates before marking as permanently failed
+max_retries: 3
 # Parallel execution configuration
 parallel:
   # Enable parallel execution of evolution candidates