npm - claude-evolve - Versions diffs - 1.2.5 → 1.2.6 - Mend

claude-evolve 1.2.5 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +37 -0
package/bin/claude-evolve-analyze +64 -7
package/bin/claude-evolve-ideate +96 -18
package/bin/claude-evolve-run +185 -12
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -130,6 +130,43 @@ This isn't sci-fi level "sleep through the entire evolution" automation - it's m
 - **Restart later** with `claude-evolve run` to continue from where you left off
 - **Perfect for long-term optimization** - run overnight, over weekends, or while working on other projects
+## Handling Failures and Recovery
+Evolution experiments can fail for various reasons. The system tracks these failures and provides recovery options.
+**Common failure types:**
+- **Infrastructure failures** - Missing dependencies (e.g., xgboost not installed)
+- **Code generation bugs** - Claude occasionally generates syntactically incorrect code
+- **Evaluation errors** - Evaluator crashes or returns invalid output
+- **Performance score 0** - Algorithm runs but produces no meaningful results (now marked as "failed")
+**Failure tracking in evolution.csv:**
+- `failed` - Evaluation error or performance score of 0
+- `timeout` - Evaluation exceeded time limit
+- `interrupted` - User interrupted with Ctrl+C
+- Check the `status` column to identify failed candidates
+**Manual recovery strategies:**
+1. **Force retry of failed candidates:**
+   - Edit `evolution.csv` and change status from "failed" to "pending"
+   - Clear the performance value for that row
+   - Run `claude-evolve run` to retry the candidate
+2. **Fix infrastructure issues:**
+   - Install missing dependencies: `pip install xgboost numpy scipy`
+   - Update Python environment if needed
+   - Check that evaluator.py has proper error handling
+3. **Guide around persistent failures:**
+   - If a specific approach keeps failing, add constraints to BRIEF.md
+   - Use `claude-evolve ideate` with explicit directions to avoid problematic patterns
+   - Consider updating evaluator.py to catch and handle specific error types
+**Future auto-recovery (planned):**
+- Automatic retry with different prompts for code generation failures
+- Dependency detection and installation suggestions
+- Smart failure pattern recognition to avoid similar mutations
 ## Requirements
 ### Required

package/bin/claude-evolve-analyze CHANGED Viewed

@@ -126,31 +126,88 @@ else
   echo "No completed candidates yet"
 fi
+# Generation analysis
+echo
+echo "=== Generation Analysis ==="
+declare -A gen_count gen_sum gen_completed
+while IFS=, read -r id _ desc perf status; do
+  [[ $id == "id" ]] && continue # Skip header
+  # Extract generation from ID
+  gen="gen01" # default for old numeric IDs
+  if [[ $id =~ ^(gen[0-9]+)- ]]; then
+    gen="${BASH_REMATCH[1]}"
+  elif [[ $id =~ ^[0-9]+$ ]]; then
+    gen="gen00" # Mark old numeric IDs as gen00
+  fi
+  # Track generation stats
+  : ${gen_count[$gen]:=0}
+  ((gen_count[$gen]++))
+  if [[ $status =~ ^(complete|completed)$ && -n $perf && $perf != "" ]]; then
+    : ${gen_completed[$gen]:=0}
+    : ${gen_sum[$gen]:=0}
+    ((gen_completed[$gen]++))
+    gen_sum[$gen]=$(echo "${gen_sum[$gen]} + $perf" | bc -l 2>/dev/null || echo "${gen_sum[$gen]}")
+  fi
+done <"$csv_file"
+# Display generation stats
+for gen in $(printf '%s\n' "${!gen_count[@]}" | sort); do
+  total_in_gen=${gen_count[$gen]}
+  completed_in_gen=${gen_completed[$gen]:-0}
+  echo -n "$gen: $total_in_gen candidates"
+  if [[ $completed_in_gen -gt 0 ]]; then
+    avg=$(echo "scale=4; ${gen_sum[$gen]} / $completed_in_gen" | bc -l 2>/dev/null || echo "0")
+    echo " ($completed_in_gen completed, avg: $avg)"
+  else
+    echo " (0 completed)"
+  fi
+done
 # Simple chart generation using gnuplot if available
 if command -v gnuplot >/dev/null 2>&1 && [[ $count_with_performance -gt 0 ]]; then
   echo
   echo "Generating performance chart: $output_file"
-  # Create data file for gnuplot
+  # Create data file for gnuplot with row numbers
   data_file="/tmp/evolution_data_$$.dat"
-  echo "# ID Performance" >"$data_file"
+  echo "# Row ID Performance Generation" >"$data_file"
+  row_num=0
   while IFS=, read -r id _ desc perf status; do
     [[ $id == "id" ]] && continue # Skip header
-    if [[ -n $perf && $perf != "" ]]; then
-      echo "$id $perf" >>"$data_file"
+    ((row_num++))
+    if [[ -n $perf && $perf != "" && $status =~ ^(complete|completed)$ ]]; then
+      # Extract generation number for coloring
+      gen_num=0
+      if [[ $id =~ ^gen([0-9]+)- ]]; then
+        gen_num=$((10#${BASH_REMATCH[1]}))
+      fi
+      echo "$row_num \"$id\" $perf $gen_num" >>"$data_file"
     fi
   done <"$csv_file"
   # Generate plot
   gnuplot <<EOF
-set terminal png size 800,600
+set terminal png size 1000,600
 set output "$output_file"
 set title "Algorithm Evolution Performance"
-set xlabel "Evolution ID"
+set xlabel "Evolution Order"
 set ylabel "Performance Score"
 set grid
-plot "$data_file" using 1:2 with linespoints title "Performance"
+set key outside right
+# Define color palette for generations
+set palette defined (0 "dark-red", 1 "red", 2 "orange", 3 "yellow", 4 "green", 5 "blue", 6 "violet")
+# Main plot with lines and points colored by generation
+plot "$data_file" using 1:3:4 with linespoints palette title "Performance", \
+     "$data_file" using 1:3:(sprintf("%g", \$3)) with labels offset 0,0.5 notitle
 EOF
   rm -f "$data_file"

package/bin/claude-evolve-ideate CHANGED Viewed

@@ -8,6 +8,43 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$SCRIPT_DIR/../lib/config.sh"
 load_config
+# Helper function to call Claude with usage limit detection
+call_claude_with_limit_check() {
+  local prompt="$1"
+  local model="${2:-opus}"
+  # Call Claude and capture output
+  local claude_output
+  claude_output=$(echo "$prompt" | claude --dangerously-skip-permissions --model "$model" -p 2>&1)
+  local claude_exit_code=$?
+  # Check for usage limit
+  if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
+    # Extract timestamp if available
+    local limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
+    # Print red error message
+    echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
+    echo -e "\033[31m[ERROR] Ideation halted due to API rate limits.\033[0m" >&2
+    if [[ -n $limit_timestamp ]]; then
+      # Convert timestamp to human-readable format
+      local limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
+      echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
+    fi
+    echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
+    echo -e "\033[33m[INFO] No ideas were generated. Run ideate again when the limit resets.\033[0m" >&2
+    exit 1
+  fi
+  # Output Claude's response
+  echo "$claude_output"
+  return $claude_exit_code
+}
 # Parse arguments
 use_strategies=true
 no_ai=false
@@ -80,28 +117,60 @@ if [[ $use_strategies == true ]]; then
   fi
 fi
-# Get next available ID
+# Get next generation number
+get_next_generation() {
+  if [[ ! -f "$FULL_CSV_PATH" ]]; then
+    echo "01"
+    return
+  fi
+  # Find highest generation number
+  local max_gen=0
+  while IFS=, read -r id rest; do
+    # Extract generation number from IDs like "gen01-001"
+    if [[ $id =~ ^gen([0-9]+)- ]]; then
+      local gen_num=$((10#${BASH_REMATCH[1]}))
+      if (( gen_num > max_gen )); then
+        max_gen=$gen_num
+      fi
+    fi
+  done < <(tail -n +2 "$FULL_CSV_PATH")
+  # Increment and format with leading zero
+  printf "%02d" $((max_gen + 1))
+}
+# Get next available ID for current generation
 get_next_id() {
+  local generation="$1"
   if [[ ! -f "$FULL_CSV_PATH" ]]; then
-    echo "1"
+    echo "gen${generation}-001"
     return
   fi
-  # Find highest ID and increment (pure shell)
+  # Find highest ID within this generation
   local max_id=0
   while IFS=, read -r id rest; do
-    if [[ $id =~ ^[0-9]+$ ]] && (( 10#$id > max_id )); then
-      max_id=$((10#$id))
+    # Match IDs for this specific generation
+    if [[ $id =~ ^gen${generation}-([0-9]+)$ ]]; then
+      local id_num=$((10#${BASH_REMATCH[1]}))
+      if (( id_num > max_id )); then
+        max_id=$id_num
+      fi
     fi
   done < <(tail -n +2 "$FULL_CSV_PATH")
-  echo $((max_id + 1))
+  # Format next ID with generation and 3-digit number
+  printf "gen%s-%03d" "$generation" $((max_id + 1))
 }
 # Add idea to CSV manually (fallback for manual mode)
 add_idea_manual() {
   local description="$1"
   local based_on_id="$2"
+  local generation="$3"
   local id
-  id=$(get_next_id)
+  id=$(get_next_id "$generation")
   # Escape quotes in description
   local escaped_desc="${description//\"/\"\"}"
@@ -141,7 +210,7 @@ ideate_manual() {
       continue
     fi
-    add_idea_manual "$description" ""
+    add_idea_manual "$description" "" "$CURRENT_GENERATION"
     ((ideas_added++))
     if [[ $i -lt $TOTAL_IDEAS ]]; then
@@ -209,7 +278,8 @@ Project Brief:
 $(cat "$FULL_BRIEF_PATH")
 Requirements for new CSV rows:
-- IDs must be numbers only (suitable for filenames)
+- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
+- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
 - basedOnId should be empty (these are novel approaches)
 - Each description should be one clear sentence describing a specific algorithmic change
 - Descriptions should explore completely different approaches than existing ones
@@ -223,7 +293,7 @@ Example descriptions:
 Add exactly $count rows to the CSV file now."
   echo "[INFO] Calling Claude Opus to generate $count novel exploration ideas..."
-  if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
+  if ! call_claude_with_limit_check "$prompt" "opus"; then
     echo "[WARN] Claude failed to generate novel ideas" >&2
     return 1
   fi
@@ -251,7 +321,8 @@ Project Brief:
 $(cat "$FULL_BRIEF_PATH")
 Requirements for new CSV rows:
-- IDs must be numbers only (suitable for filenames)
+- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
+- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
 - basedOnId should reference ONE of the successful algorithm IDs above (pick the best one)
 - Each description should be one clear sentence about parameter tuning
 - Focus on adjusting hyperparameters, thresholds, sizes, learning rates
@@ -265,7 +336,7 @@ Example descriptions:
 Add exactly $count parameter tuning rows to the CSV file now."
   echo "[INFO] Calling Claude Opus to generate $count hill climbing ideas..."
-  if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
+  if ! call_claude_with_limit_check "$prompt" "opus"; then
     echo "[WARN] Claude failed to generate hill climbing ideas" >&2
     return 1
   fi
@@ -293,7 +364,8 @@ Project Brief:
 $(cat "$FULL_BRIEF_PATH")
 Requirements for new CSV rows:
-- IDs must be numbers only (suitable for filenames)
+- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
+- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
 - basedOnId should reference ONE of the successful algorithm IDs above (pick the best one)
 - Each description should be one clear sentence about architectural changes
 - Keep core insights but change implementation approach
@@ -307,7 +379,7 @@ Example descriptions:
 Add exactly $count structural modification rows to the CSV file now."
   echo "[INFO] Calling Claude Opus to generate $count structural mutation ideas..."
-  if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
+  if ! call_claude_with_limit_check "$prompt" "opus"; then
     echo "[WARN] Claude failed to generate structural mutation ideas" >&2
     return 1
   fi
@@ -335,7 +407,8 @@ Project Brief:
 $(cat "$FULL_BRIEF_PATH")
 Requirements for new CSV rows:
-- IDs must be numbers only (suitable for filenames)
+- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
+- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
 - basedOnId should reference ONE of the successful algorithm IDs above (pick the best one as base)
 - Each description should be one clear sentence combining elements from different algorithms
 - Be specific about what elements to merge
@@ -349,7 +422,7 @@ Example descriptions:
 Add exactly $count hybrid combination rows to the CSV file now."
   echo "[INFO] Calling Claude Opus to generate $count crossover hybrid ideas..."
-  if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
+  if ! call_claude_with_limit_check "$prompt" "opus"; then
     echo "[WARN] Claude failed to generate crossover ideas" >&2
     return 1
   fi
@@ -399,7 +472,8 @@ $top_performers"
   prompt+="
 Requirements for new CSV rows:
-- IDs must be numbers only (suitable for filenames)
+- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
+- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
 - basedOnId should be empty or reference existing algorithm ID
 - Each description should be one clear sentence describing an algorithmic approach
 - All new rows should have empty performance and status fields
@@ -407,13 +481,17 @@ Requirements for new CSV rows:
 Add exactly $TOTAL_IDEAS algorithm variation rows to the CSV file now."
   echo "[INFO] Calling Claude Opus to generate $TOTAL_IDEAS ideas (legacy mode)..."
-  if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
+  if ! call_claude_with_limit_check "$prompt" "opus"; then
     echo "[WARN] Claude failed to generate ideas" >&2
     return 1
   fi
   echo "[INFO] Legacy ideas generated"
 }
+# Determine generation number for this ideation run
+CURRENT_GENERATION=$(get_next_generation)
+echo "[INFO] Starting ideation for generation $CURRENT_GENERATION"
 # Main execution
 if [[ $no_ai == true ]]; then
   echo "[INFO] Manual entry mode"

package/bin/claude-evolve-run CHANGED Viewed

@@ -141,6 +141,71 @@ update_csv_row() {
   mv "$temp_file" "$FULL_CSV_PATH"
 }
+# Auto-recovery mechanism for common failures
+attempt_recovery() {
+  local failure_type="$1"
+  local error_output="$2"
+  local row_num="$3"
+  local id="$4"
+  local description="$5"
+  echo "[INFO] Attempting auto-recovery for $failure_type..."
+  case "$failure_type" in
+    "import_error")
+      # Extract missing module name
+      missing_module=$(echo "$error_output" | grep -o "No module named '[^']*'" | sed "s/No module named '\\([^']*\\)'/\\1/" | head -1)
+      if [[ -n $missing_module ]]; then
+        echo "[INFO] Detected missing module: $missing_module"
+        echo "[INFO] Attempting to install via pip..."
+        # Try to install the missing module
+        if pip install "$missing_module" 2>&1; then
+          echo "[INFO] Successfully installed $missing_module. Retrying evaluation..."
+          return 0  # Signal to retry
+        else
+          echo "[WARN] Failed to install $missing_module automatically"
+          echo "[INFO] Please install manually: pip install $missing_module"
+        fi
+      fi
+      ;;
+    "syntax_error")
+      # For syntax errors, we could retry the mutation with additional guidance
+      echo "[INFO] Detected syntax error in generated code"
+      echo "[INFO] Retrying mutation with additional constraints..."
+      # Update the CSV to retry this candidate with enhanced prompt
+      update_csv_row "$row_num" "" "pending"
+      # Add a recovery marker to the description to guide the next attempt
+      new_desc="[RETRY: Fix syntax] $description"
+      temp_file=$(mktemp)
+      current_row=1
+      while IFS=, read -r csv_id csv_based_on csv_desc csv_perf csv_stat; do
+        if [[ $current_row -eq $row_num ]]; then
+          echo "$csv_id,$csv_based_on,$new_desc,$csv_perf,pending"
+        else
+          echo "$csv_id,$csv_based_on,$csv_desc,$csv_perf,$csv_stat"
+        fi
+        ((current_row++))
+      done <"$FULL_CSV_PATH" >"$temp_file"
+      mv "$temp_file" "$FULL_CSV_PATH"
+      return 0  # Signal to retry
+      ;;
+    "zero_score")
+      # For zero scores, log additional diagnostic info
+      echo "[INFO] Algorithm produced zero score - likely a logic error"
+      echo "[INFO] Consider adding constraints to BRIEF.md to avoid this approach"
+      ;;
+  esac
+  return 1  # No recovery possible
+}
 # Main evolution loop
 while true; do
   # Find next candidate
@@ -198,7 +263,15 @@ if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
   parent_file="$FULL_ALGORITHM_PATH"
   echo "[INFO] Using base algorithm (basedonID is empty or 0)"
 else
-  parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
+  # Handle both old format (numeric) and new format (genXX-XXX)
+  if [[ $based_on_id =~ ^[0-9]+$ ]]; then
+    # Old numeric format
+    parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
+  else
+    # New generation format
+    parent_file="$FULL_OUTPUT_DIR/evolution_${based_on_id}.py"
+  fi
   if [[ ! -f $parent_file ]]; then
     echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
     update_csv_row "$row_num" "" "failed"
@@ -212,8 +285,15 @@ fi
 echo "[INFO] Using parent algorithm: $parent_file"
-# Generate mutation
-output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
+# Generate mutation output file
+# Handle both old format (numeric) and new format (genXX-XXX)
+if [[ $id =~ ^[0-9]+$ ]]; then
+  # Old numeric format
+  output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
+else
+  # New generation format
+  output_file="$FULL_OUTPUT_DIR/evolution_${id}.py"
+fi
 echo "[INFO] Generating algorithm mutation..."
 # Copy parent algorithm to output file first
@@ -267,8 +347,34 @@ else
     echo "=== CLAUDE RESPONSE ==="
   } >> "${LOGFILE}-developer"
-  # Use tee to show output and log simultaneously
-  if ! echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer"; then
+  # Use tee to show output and log simultaneously, and capture output for limit detection
+  claude_output=$(echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer")
+  claude_exit_code=${PIPESTATUS[1]}  # Get exit code from claude command, not tee
+  # Check for usage limit
+  if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
+    # Extract timestamp if available
+    limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
+    # Print red error message
+    echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
+    echo -e "\033[31m[ERROR] Evolution halted due to API rate limits.\033[0m" >&2
+    if [[ -n $limit_timestamp ]]; then
+      # Convert timestamp to human-readable format
+      limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
+      echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
+    fi
+    echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
+    echo -e "\033[33m[INFO] The current candidate will remain in 'pending' status.\033[0m" >&2
+    # Don't mark as failed - leave it pending for retry later
+    exit 1
+  fi
+  # Check for other failures
+  if [[ $claude_exit_code -ne 0 ]]; then
     echo "[ERROR] Claude failed to mutate algorithm" >&2
     update_csv_row "$row_num" "" "failed"
     if should_continue_after_failure; then
@@ -338,15 +444,39 @@ if [[ $eval_exit_code -eq 0 ]]; then
   # Extract score from JSON (simple grep approach)
   if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
     if [[ -n $score ]]; then
-      update_csv_row "$row_num" "$score" "complete"
-      echo "[INFO] ✓ Evaluation completed successfully"
-      echo "[INFO] Performance score: $score"
-    else
-      # Try "performance" field
-      if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
+      # Check if score is 0 and mark as failed
+      if (( $(echo "$score == 0" | bc -l) )); then
+        update_csv_row "$row_num" "$score" "failed"
+        echo "[INFO] ✗ Evaluation failed with score 0"
+        echo "[INFO] Performance score: $score"
+        if should_continue_after_failure; then
+          continue 2
+        else
+          exit 1
+        fi
+      else
         update_csv_row "$row_num" "$score" "complete"
         echo "[INFO] ✓ Evaluation completed successfully"
         echo "[INFO] Performance score: $score"
+      fi
+    else
+      # Try "performance" field
+      if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
+        # Check if score is 0 and mark as failed
+        if (( $(echo "$score == 0" | bc -l) )); then
+          update_csv_row "$row_num" "$score" "failed"
+          echo "[INFO] ✗ Evaluation failed with score 0"
+          echo "[INFO] Performance score: $score"
+          if should_continue_after_failure; then
+            continue 2
+          else
+            exit 1
+          fi
+        else
+          update_csv_row "$row_num" "$score" "complete"
+          echo "[INFO] ✓ Evaluation completed successfully"
+          echo "[INFO] Performance score: $score"
+        fi
       else
         echo "[ERROR] No score found in evaluator output" >&2
         echo "[ERROR] Output: $eval_output" >&2
@@ -371,7 +501,50 @@ if [[ $eval_exit_code -eq 0 ]]; then
 else
   echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
   echo "[ERROR] Output: $eval_output" >&2
-  update_csv_row "$row_num" "" "failed"
+  # Check for common failure patterns and attempt recovery
+  recovery_attempted=false
+  # Check for import errors
+  if echo "$eval_output" | grep -q "No module named"; then
+    if attempt_recovery "import_error" "$eval_output" "$row_num" "$id" "$description"; then
+      recovery_attempted=true
+      # Retry the evaluation
+      echo "[INFO] Retrying evaluation after recovery attempt..."
+      if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
+        # Re-process the successful result
+        if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
+          if [[ -n $score ]]; then
+            if (( $(echo "$score == 0" | bc -l) )); then
+              update_csv_row "$row_num" "$score" "failed"
+              echo "[INFO] ✗ Evaluation failed with score 0"
+            else
+              update_csv_row "$row_num" "$score" "complete"
+              echo "[INFO] ✓ Recovery successful! Performance score: $score"
+              consecutive_failures=0
+              continue
+            fi
+          fi
+        fi
+      fi
+    fi
+  fi
+  # Check for syntax errors
+  if echo "$eval_output" | grep -q "SyntaxError"; then
+    if attempt_recovery "syntax_error" "$eval_output" "$row_num" "$id" "$description"; then
+      recovery_attempted=true
+      # Skip to next iteration to retry with enhanced prompt
+      consecutive_failures=0
+      continue
+    fi
+  fi
+  # If no recovery was successful, mark as failed
+  if [[ $recovery_attempted == false ]]; then
+    update_csv_row "$row_num" "" "failed"
+  fi
   if should_continue_after_failure; then
     continue
   else

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-evolve",
-  "version": "1.2.5",
+  "version": "1.2.6",
   "bin": {
     "claude-evolve": "./bin/claude-evolve",
     "claude-evolve-main": "./bin/claude-evolve-main",