claude-evolve 1.2.5 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -130,6 +130,43 @@ This isn't sci-fi level "sleep through the entire evolution" automation - it's m
130
130
  - **Restart later** with `claude-evolve run` to continue from where you left off
131
131
  - **Perfect for long-term optimization** - run overnight, over weekends, or while working on other projects
132
132
 
133
+ ## Handling Failures and Recovery
134
+
135
+ Evolution experiments can fail for various reasons. The system tracks these failures and provides recovery options.
136
+
137
+ **Common failure types:**
138
+ - **Infrastructure failures** - Missing dependencies (e.g., xgboost not installed)
139
+ - **Code generation bugs** - Claude occasionally generates syntactically incorrect code
140
+ - **Evaluation errors** - Evaluator crashes or returns invalid output
141
+ - **Performance score 0** - Algorithm runs but produces no meaningful results (now marked as "failed")
142
+
143
+ **Failure tracking in evolution.csv:**
144
+ - `failed` - Evaluation error or performance score of 0
145
+ - `timeout` - Evaluation exceeded time limit
146
+ - `interrupted` - User interrupted with Ctrl+C
147
+ - Check the `status` column to identify failed candidates
148
+
149
+ **Manual recovery strategies:**
150
+ 1. **Force retry of failed candidates:**
151
+ - Edit `evolution.csv` and change status from "failed" to "pending"
152
+ - Clear the performance value for that row
153
+ - Run `claude-evolve run` to retry the candidate
154
+
155
+ 2. **Fix infrastructure issues:**
156
+ - Install missing dependencies: `pip install xgboost numpy scipy`
157
+ - Update Python environment if needed
158
+ - Check that evaluator.py has proper error handling
159
+
160
+ 3. **Guide around persistent failures:**
161
+ - If a specific approach keeps failing, add constraints to BRIEF.md
162
+ - Use `claude-evolve ideate` with explicit directions to avoid problematic patterns
163
+ - Consider updating evaluator.py to catch and handle specific error types
164
+
165
+ **Future auto-recovery (planned):**
166
+ - Automatic retry with different prompts for code generation failures
167
+ - Dependency detection and installation suggestions
168
+ - Smart failure pattern recognition to avoid similar mutations
169
+
133
170
  ## Requirements
134
171
 
135
172
  ### Required
@@ -126,31 +126,88 @@ else
126
126
  echo "No completed candidates yet"
127
127
  fi
128
128
 
129
+ # Generation analysis
130
+ echo
131
+ echo "=== Generation Analysis ==="
132
+ declare -A gen_count gen_sum gen_completed
133
+
134
+ while IFS=, read -r id _ desc perf status; do
135
+ [[ $id == "id" ]] && continue # Skip header
136
+
137
+ # Extract generation from ID
138
+ gen="gen01" # default for old numeric IDs
139
+ if [[ $id =~ ^(gen[0-9]+)- ]]; then
140
+ gen="${BASH_REMATCH[1]}"
141
+ elif [[ $id =~ ^[0-9]+$ ]]; then
142
+ gen="gen00" # Mark old numeric IDs as gen00
143
+ fi
144
+
145
+ # Track generation stats
146
+ : ${gen_count[$gen]:=0}
147
+ ((gen_count[$gen]++))
148
+
149
+ if [[ $status =~ ^(complete|completed)$ && -n $perf && $perf != "" ]]; then
150
+ : ${gen_completed[$gen]:=0}
151
+ : ${gen_sum[$gen]:=0}
152
+ ((gen_completed[$gen]++))
153
+ gen_sum[$gen]=$(echo "${gen_sum[$gen]} + $perf" | bc -l 2>/dev/null || echo "${gen_sum[$gen]}")
154
+ fi
155
+ done <"$csv_file"
156
+
157
+ # Display generation stats
158
+ for gen in $(printf '%s\n' "${!gen_count[@]}" | sort); do
159
+ total_in_gen=${gen_count[$gen]}
160
+ completed_in_gen=${gen_completed[$gen]:-0}
161
+
162
+ echo -n "$gen: $total_in_gen candidates"
163
+
164
+ if [[ $completed_in_gen -gt 0 ]]; then
165
+ avg=$(echo "scale=4; ${gen_sum[$gen]} / $completed_in_gen" | bc -l 2>/dev/null || echo "0")
166
+ echo " ($completed_in_gen completed, avg: $avg)"
167
+ else
168
+ echo " (0 completed)"
169
+ fi
170
+ done
171
+
129
172
  # Simple chart generation using gnuplot if available
130
173
  if command -v gnuplot >/dev/null 2>&1 && [[ $count_with_performance -gt 0 ]]; then
131
174
  echo
132
175
  echo "Generating performance chart: $output_file"
133
176
 
134
- # Create data file for gnuplot
177
+ # Create data file for gnuplot with row numbers
135
178
  data_file="/tmp/evolution_data_$$.dat"
136
- echo "# ID Performance" >"$data_file"
179
+ echo "# Row ID Performance Generation" >"$data_file"
137
180
 
181
+ row_num=0
138
182
  while IFS=, read -r id _ desc perf status; do
139
183
  [[ $id == "id" ]] && continue # Skip header
140
- if [[ -n $perf && $perf != "" ]]; then
141
- echo "$id $perf" >>"$data_file"
184
+ ((row_num++))
185
+ if [[ -n $perf && $perf != "" && $status =~ ^(complete|completed)$ ]]; then
186
+ # Extract generation number for coloring
187
+ gen_num=0
188
+ if [[ $id =~ ^gen([0-9]+)- ]]; then
189
+ gen_num=$((10#${BASH_REMATCH[1]}))
190
+ fi
191
+ echo "$row_num \"$id\" $perf $gen_num" >>"$data_file"
142
192
  fi
143
193
  done <"$csv_file"
144
194
 
145
195
  # Generate plot
146
196
  gnuplot <<EOF
147
- set terminal png size 800,600
197
+ set terminal png size 1000,600
148
198
  set output "$output_file"
149
199
  set title "Algorithm Evolution Performance"
150
- set xlabel "Evolution ID"
200
+ set xlabel "Evolution Order"
151
201
  set ylabel "Performance Score"
152
202
  set grid
153
- plot "$data_file" using 1:2 with linespoints title "Performance"
203
+ set key outside right
204
+
205
+ # Define color palette for generations
206
+ set palette defined (0 "dark-red", 1 "red", 2 "orange", 3 "yellow", 4 "green", 5 "blue", 6 "violet")
207
+
208
+ # Main plot with lines and points colored by generation
209
+ plot "$data_file" using 1:3:4 with linespoints palette title "Performance", \
210
+ "$data_file" using 1:3:(sprintf("%g", \$3)) with labels offset 0,0.5 notitle
154
211
  EOF
155
212
 
156
213
  rm -f "$data_file"
@@ -8,6 +8,43 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8
8
  source "$SCRIPT_DIR/../lib/config.sh"
9
9
  load_config
10
10
 
11
+ # Helper function to call Claude with usage limit detection
12
+ call_claude_with_limit_check() {
13
+ local prompt="$1"
14
+ local model="${2:-opus}"
15
+
16
+ # Call Claude and capture output
17
+ local claude_output
18
+ claude_output=$(echo "$prompt" | claude --dangerously-skip-permissions --model "$model" -p 2>&1)
19
+ local claude_exit_code=$?
20
+
21
+ # Check for usage limit
22
+ if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
23
+ # Extract timestamp if available
24
+ local limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
25
+
26
+ # Print red error message
27
+ echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
28
+ echo -e "\033[31m[ERROR] Ideation halted due to API rate limits.\033[0m" >&2
29
+
30
+ if [[ -n $limit_timestamp ]]; then
31
+ # Convert timestamp to human-readable format
32
+ local limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
33
+ echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
34
+ fi
35
+
36
+ echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
37
+ echo -e "\033[33m[INFO] No ideas were generated. Run ideate again when the limit resets.\033[0m" >&2
38
+
39
+ exit 1
40
+ fi
41
+
42
+ # Output Claude's response
43
+ echo "$claude_output"
44
+
45
+ return $claude_exit_code
46
+ }
47
+
11
48
  # Parse arguments
12
49
  use_strategies=true
13
50
  no_ai=false
@@ -80,28 +117,60 @@ if [[ $use_strategies == true ]]; then
80
117
  fi
81
118
  fi
82
119
 
83
- # Get next available ID
120
+ # Get next generation number
121
+ get_next_generation() {
122
+ if [[ ! -f "$FULL_CSV_PATH" ]]; then
123
+ echo "01"
124
+ return
125
+ fi
126
+
127
+ # Find highest generation number
128
+ local max_gen=0
129
+ while IFS=, read -r id rest; do
130
+ # Extract generation number from IDs like "gen01-001"
131
+ if [[ $id =~ ^gen([0-9]+)- ]]; then
132
+ local gen_num=$((10#${BASH_REMATCH[1]}))
133
+ if (( gen_num > max_gen )); then
134
+ max_gen=$gen_num
135
+ fi
136
+ fi
137
+ done < <(tail -n +2 "$FULL_CSV_PATH")
138
+
139
+ # Increment and format with leading zero
140
+ printf "%02d" $((max_gen + 1))
141
+ }
142
+
143
+ # Get next available ID for current generation
84
144
  get_next_id() {
145
+ local generation="$1"
85
146
  if [[ ! -f "$FULL_CSV_PATH" ]]; then
86
- echo "1"
147
+ echo "gen${generation}-001"
87
148
  return
88
149
  fi
89
- # Find highest ID and increment (pure shell)
150
+
151
+ # Find highest ID within this generation
90
152
  local max_id=0
91
153
  while IFS=, read -r id rest; do
92
- if [[ $id =~ ^[0-9]+$ ]] && (( 10#$id > max_id )); then
93
- max_id=$((10#$id))
154
+ # Match IDs for this specific generation
155
+ if [[ $id =~ ^gen${generation}-([0-9]+)$ ]]; then
156
+ local id_num=$((10#${BASH_REMATCH[1]}))
157
+ if (( id_num > max_id )); then
158
+ max_id=$id_num
159
+ fi
94
160
  fi
95
161
  done < <(tail -n +2 "$FULL_CSV_PATH")
96
- echo $((max_id + 1))
162
+
163
+ # Format next ID with generation and 3-digit number
164
+ printf "gen%s-%03d" "$generation" $((max_id + 1))
97
165
  }
98
166
 
99
167
  # Add idea to CSV manually (fallback for manual mode)
100
168
  add_idea_manual() {
101
169
  local description="$1"
102
170
  local based_on_id="$2"
171
+ local generation="$3"
103
172
  local id
104
- id=$(get_next_id)
173
+ id=$(get_next_id "$generation")
105
174
 
106
175
  # Escape quotes in description
107
176
  local escaped_desc="${description//\"/\"\"}"
@@ -141,7 +210,7 @@ ideate_manual() {
141
210
  continue
142
211
  fi
143
212
 
144
- add_idea_manual "$description" ""
213
+ add_idea_manual "$description" "" "$CURRENT_GENERATION"
145
214
  ((ideas_added++))
146
215
 
147
216
  if [[ $i -lt $TOTAL_IDEAS ]]; then
@@ -209,7 +278,8 @@ Project Brief:
209
278
  $(cat "$FULL_BRIEF_PATH")
210
279
 
211
280
  Requirements for new CSV rows:
212
- - IDs must be numbers only (suitable for filenames)
281
+ - IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
282
+ - Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
213
283
  - basedOnId should be empty (these are novel approaches)
214
284
  - Each description should be one clear sentence describing a specific algorithmic change
215
285
  - Descriptions should explore completely different approaches than existing ones
@@ -223,7 +293,7 @@ Example descriptions:
223
293
  Add exactly $count rows to the CSV file now."
224
294
 
225
295
  echo "[INFO] Calling Claude Opus to generate $count novel exploration ideas..."
226
- if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
296
+ if ! call_claude_with_limit_check "$prompt" "opus"; then
227
297
  echo "[WARN] Claude failed to generate novel ideas" >&2
228
298
  return 1
229
299
  fi
@@ -251,7 +321,8 @@ Project Brief:
251
321
  $(cat "$FULL_BRIEF_PATH")
252
322
 
253
323
  Requirements for new CSV rows:
254
- - IDs must be numbers only (suitable for filenames)
324
+ - IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
325
+ - Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
255
326
  - basedOnId should reference ONE of the successful algorithm IDs above (pick the best one)
256
327
  - Each description should be one clear sentence about parameter tuning
257
328
  - Focus on adjusting hyperparameters, thresholds, sizes, learning rates
@@ -265,7 +336,7 @@ Example descriptions:
265
336
  Add exactly $count parameter tuning rows to the CSV file now."
266
337
 
267
338
  echo "[INFO] Calling Claude Opus to generate $count hill climbing ideas..."
268
- if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
339
+ if ! call_claude_with_limit_check "$prompt" "opus"; then
269
340
  echo "[WARN] Claude failed to generate hill climbing ideas" >&2
270
341
  return 1
271
342
  fi
@@ -293,7 +364,8 @@ Project Brief:
293
364
  $(cat "$FULL_BRIEF_PATH")
294
365
 
295
366
  Requirements for new CSV rows:
296
- - IDs must be numbers only (suitable for filenames)
367
+ - IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
368
+ - Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
297
369
  - basedOnId should reference ONE of the successful algorithm IDs above (pick the best one)
298
370
  - Each description should be one clear sentence about architectural changes
299
371
  - Keep core insights but change implementation approach
@@ -307,7 +379,7 @@ Example descriptions:
307
379
  Add exactly $count structural modification rows to the CSV file now."
308
380
 
309
381
  echo "[INFO] Calling Claude Opus to generate $count structural mutation ideas..."
310
- if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
382
+ if ! call_claude_with_limit_check "$prompt" "opus"; then
311
383
  echo "[WARN] Claude failed to generate structural mutation ideas" >&2
312
384
  return 1
313
385
  fi
@@ -335,7 +407,8 @@ Project Brief:
335
407
  $(cat "$FULL_BRIEF_PATH")
336
408
 
337
409
  Requirements for new CSV rows:
338
- - IDs must be numbers only (suitable for filenames)
410
+ - IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
411
+ - Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
339
412
  - basedOnId should reference ONE of the successful algorithm IDs above (pick the best one as base)
340
413
  - Each description should be one clear sentence combining elements from different algorithms
341
414
  - Be specific about what elements to merge
@@ -349,7 +422,7 @@ Example descriptions:
349
422
  Add exactly $count hybrid combination rows to the CSV file now."
350
423
 
351
424
  echo "[INFO] Calling Claude Opus to generate $count crossover hybrid ideas..."
352
- if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
425
+ if ! call_claude_with_limit_check "$prompt" "opus"; then
353
426
  echo "[WARN] Claude failed to generate crossover ideas" >&2
354
427
  return 1
355
428
  fi
@@ -399,7 +472,8 @@ $top_performers"
399
472
  prompt+="
400
473
 
401
474
  Requirements for new CSV rows:
402
- - IDs must be numbers only (suitable for filenames)
475
+ - IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
476
+ - Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
403
477
  - basedOnId should be empty or reference existing algorithm ID
404
478
  - Each description should be one clear sentence describing an algorithmic approach
405
479
  - All new rows should have empty performance and status fields
@@ -407,13 +481,17 @@ Requirements for new CSV rows:
407
481
  Add exactly $TOTAL_IDEAS algorithm variation rows to the CSV file now."
408
482
 
409
483
  echo "[INFO] Calling Claude Opus to generate $TOTAL_IDEAS ideas (legacy mode)..."
410
- if ! echo "$prompt" | claude --dangerously-skip-permissions --model opus -p; then
484
+ if ! call_claude_with_limit_check "$prompt" "opus"; then
411
485
  echo "[WARN] Claude failed to generate ideas" >&2
412
486
  return 1
413
487
  fi
414
488
  echo "[INFO] Legacy ideas generated"
415
489
  }
416
490
 
491
+ # Determine generation number for this ideation run
492
+ CURRENT_GENERATION=$(get_next_generation)
493
+ echo "[INFO] Starting ideation for generation $CURRENT_GENERATION"
494
+
417
495
  # Main execution
418
496
  if [[ $no_ai == true ]]; then
419
497
  echo "[INFO] Manual entry mode"
@@ -141,6 +141,71 @@ update_csv_row() {
141
141
  mv "$temp_file" "$FULL_CSV_PATH"
142
142
  }
143
143
 
144
+ # Auto-recovery mechanism for common failures
145
+ attempt_recovery() {
146
+ local failure_type="$1"
147
+ local error_output="$2"
148
+ local row_num="$3"
149
+ local id="$4"
150
+ local description="$5"
151
+
152
+ echo "[INFO] Attempting auto-recovery for $failure_type..."
153
+
154
+ case "$failure_type" in
155
+ "import_error")
156
+ # Extract missing module name
157
+ missing_module=$(echo "$error_output" | grep -o "No module named '[^']*'" | sed "s/No module named '\\([^']*\\)'/\\1/" | head -1)
158
+ if [[ -n $missing_module ]]; then
159
+ echo "[INFO] Detected missing module: $missing_module"
160
+ echo "[INFO] Attempting to install via pip..."
161
+
162
+ # Try to install the missing module
163
+ if pip install "$missing_module" 2>&1; then
164
+ echo "[INFO] Successfully installed $missing_module. Retrying evaluation..."
165
+ return 0 # Signal to retry
166
+ else
167
+ echo "[WARN] Failed to install $missing_module automatically"
168
+ echo "[INFO] Please install manually: pip install $missing_module"
169
+ fi
170
+ fi
171
+ ;;
172
+
173
+ "syntax_error")
174
+ # For syntax errors, we could retry the mutation with additional guidance
175
+ echo "[INFO] Detected syntax error in generated code"
176
+ echo "[INFO] Retrying mutation with additional constraints..."
177
+
178
+ # Update the CSV to retry this candidate with enhanced prompt
179
+ update_csv_row "$row_num" "" "pending"
180
+
181
+ # Add a recovery marker to the description to guide the next attempt
182
+ new_desc="[RETRY: Fix syntax] $description"
183
+ temp_file=$(mktemp)
184
+ current_row=1
185
+
186
+ while IFS=, read -r csv_id csv_based_on csv_desc csv_perf csv_stat; do
187
+ if [[ $current_row -eq $row_num ]]; then
188
+ echo "$csv_id,$csv_based_on,$new_desc,$csv_perf,pending"
189
+ else
190
+ echo "$csv_id,$csv_based_on,$csv_desc,$csv_perf,$csv_stat"
191
+ fi
192
+ ((current_row++))
193
+ done <"$FULL_CSV_PATH" >"$temp_file"
194
+
195
+ mv "$temp_file" "$FULL_CSV_PATH"
196
+ return 0 # Signal to retry
197
+ ;;
198
+
199
+ "zero_score")
200
+ # For zero scores, log additional diagnostic info
201
+ echo "[INFO] Algorithm produced zero score - likely a logic error"
202
+ echo "[INFO] Consider adding constraints to BRIEF.md to avoid this approach"
203
+ ;;
204
+ esac
205
+
206
+ return 1 # No recovery possible
207
+ }
208
+
144
209
  # Main evolution loop
145
210
  while true; do
146
211
  # Find next candidate
@@ -198,7 +263,15 @@ if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
198
263
  parent_file="$FULL_ALGORITHM_PATH"
199
264
  echo "[INFO] Using base algorithm (basedonID is empty or 0)"
200
265
  else
201
- parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
266
+ # Handle both old format (numeric) and new format (genXX-XXX)
267
+ if [[ $based_on_id =~ ^[0-9]+$ ]]; then
268
+ # Old numeric format
269
+ parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
270
+ else
271
+ # New generation format
272
+ parent_file="$FULL_OUTPUT_DIR/evolution_${based_on_id}.py"
273
+ fi
274
+
202
275
  if [[ ! -f $parent_file ]]; then
203
276
  echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
204
277
  update_csv_row "$row_num" "" "failed"
@@ -212,8 +285,15 @@ fi
212
285
 
213
286
  echo "[INFO] Using parent algorithm: $parent_file"
214
287
 
215
- # Generate mutation
216
- output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
288
+ # Generate mutation output file
289
+ # Handle both old format (numeric) and new format (genXX-XXX)
290
+ if [[ $id =~ ^[0-9]+$ ]]; then
291
+ # Old numeric format
292
+ output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
293
+ else
294
+ # New generation format
295
+ output_file="$FULL_OUTPUT_DIR/evolution_${id}.py"
296
+ fi
217
297
  echo "[INFO] Generating algorithm mutation..."
218
298
 
219
299
  # Copy parent algorithm to output file first
@@ -267,8 +347,34 @@ else
267
347
  echo "=== CLAUDE RESPONSE ==="
268
348
  } >> "${LOGFILE}-developer"
269
349
 
270
- # Use tee to show output and log simultaneously
271
- if ! echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer"; then
350
+ # Use tee to show output and log simultaneously, and capture output for limit detection
351
+ claude_output=$(echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer")
352
+ claude_exit_code=${PIPESTATUS[1]} # Get exit code from claude command, not tee
353
+
354
+ # Check for usage limit
355
+ if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
356
+ # Extract timestamp if available
357
+ limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
358
+
359
+ # Print red error message
360
+ echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
361
+ echo -e "\033[31m[ERROR] Evolution halted due to API rate limits.\033[0m" >&2
362
+
363
+ if [[ -n $limit_timestamp ]]; then
364
+ # Convert timestamp to human-readable format
365
+ limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
366
+ echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
367
+ fi
368
+
369
+ echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
370
+ echo -e "\033[33m[INFO] The current candidate will remain in 'pending' status.\033[0m" >&2
371
+
372
+ # Don't mark as failed - leave it pending for retry later
373
+ exit 1
374
+ fi
375
+
376
+ # Check for other failures
377
+ if [[ $claude_exit_code -ne 0 ]]; then
272
378
  echo "[ERROR] Claude failed to mutate algorithm" >&2
273
379
  update_csv_row "$row_num" "" "failed"
274
380
  if should_continue_after_failure; then
@@ -338,15 +444,39 @@ if [[ $eval_exit_code -eq 0 ]]; then
338
444
  # Extract score from JSON (simple grep approach)
339
445
  if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
340
446
  if [[ -n $score ]]; then
341
- update_csv_row "$row_num" "$score" "complete"
342
- echo "[INFO] Evaluation completed successfully"
343
- echo "[INFO] Performance score: $score"
344
- else
345
- # Try "performance" field
346
- if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
447
+ # Check if score is 0 and mark as failed
448
+ if (( $(echo "$score == 0" | bc -l) )); then
449
+ update_csv_row "$row_num" "$score" "failed"
450
+ echo "[INFO] ✗ Evaluation failed with score 0"
451
+ echo "[INFO] Performance score: $score"
452
+ if should_continue_after_failure; then
453
+ continue 2
454
+ else
455
+ exit 1
456
+ fi
457
+ else
347
458
  update_csv_row "$row_num" "$score" "complete"
348
459
  echo "[INFO] ✓ Evaluation completed successfully"
349
460
  echo "[INFO] Performance score: $score"
461
+ fi
462
+ else
463
+ # Try "performance" field
464
+ if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
465
+ # Check if score is 0 and mark as failed
466
+ if (( $(echo "$score == 0" | bc -l) )); then
467
+ update_csv_row "$row_num" "$score" "failed"
468
+ echo "[INFO] ✗ Evaluation failed with score 0"
469
+ echo "[INFO] Performance score: $score"
470
+ if should_continue_after_failure; then
471
+ continue 2
472
+ else
473
+ exit 1
474
+ fi
475
+ else
476
+ update_csv_row "$row_num" "$score" "complete"
477
+ echo "[INFO] ✓ Evaluation completed successfully"
478
+ echo "[INFO] Performance score: $score"
479
+ fi
350
480
  else
351
481
  echo "[ERROR] No score found in evaluator output" >&2
352
482
  echo "[ERROR] Output: $eval_output" >&2
@@ -371,7 +501,50 @@ if [[ $eval_exit_code -eq 0 ]]; then
371
501
  else
372
502
  echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
373
503
  echo "[ERROR] Output: $eval_output" >&2
374
- update_csv_row "$row_num" "" "failed"
504
+
505
+ # Check for common failure patterns and attempt recovery
506
+ recovery_attempted=false
507
+
508
+ # Check for import errors
509
+ if echo "$eval_output" | grep -q "No module named"; then
510
+ if attempt_recovery "import_error" "$eval_output" "$row_num" "$id" "$description"; then
511
+ recovery_attempted=true
512
+ # Retry the evaluation
513
+ echo "[INFO] Retrying evaluation after recovery attempt..."
514
+ if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
515
+ # Re-process the successful result
516
+ if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
517
+ if [[ -n $score ]]; then
518
+ if (( $(echo "$score == 0" | bc -l) )); then
519
+ update_csv_row "$row_num" "$score" "failed"
520
+ echo "[INFO] ✗ Evaluation failed with score 0"
521
+ else
522
+ update_csv_row "$row_num" "$score" "complete"
523
+ echo "[INFO] ✓ Recovery successful! Performance score: $score"
524
+ consecutive_failures=0
525
+ continue
526
+ fi
527
+ fi
528
+ fi
529
+ fi
530
+ fi
531
+ fi
532
+
533
+ # Check for syntax errors
534
+ if echo "$eval_output" | grep -q "SyntaxError"; then
535
+ if attempt_recovery "syntax_error" "$eval_output" "$row_num" "$id" "$description"; then
536
+ recovery_attempted=true
537
+ # Skip to next iteration to retry with enhanced prompt
538
+ consecutive_failures=0
539
+ continue
540
+ fi
541
+ fi
542
+
543
+ # If no recovery was successful, mark as failed
544
+ if [[ $recovery_attempted == false ]]; then
545
+ update_csv_row "$row_num" "" "failed"
546
+ fi
547
+
375
548
  if should_continue_after_failure; then
376
549
  continue
377
550
  else
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.2.5",
3
+ "version": "1.2.6",
4
4
  "bin": {
5
5
  "claude-evolve": "./bin/claude-evolve",
6
6
  "claude-evolve-main": "./bin/claude-evolve-main",