claude-evolve 1.2.5 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -0
- package/bin/claude-evolve-analyze +64 -7
- package/bin/claude-evolve-ideate +96 -18
- package/bin/claude-evolve-run +185 -12
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -130,6 +130,43 @@ This isn't sci-fi level "sleep through the entire evolution" automation - it's m
|
|
|
130
130
|
- **Restart later** with `claude-evolve run` to continue from where you left off
|
|
131
131
|
- **Perfect for long-term optimization** - run overnight, over weekends, or while working on other projects
|
|
132
132
|
|
|
133
|
+
## Handling Failures and Recovery
|
|
134
|
+
|
|
135
|
+
Evolution experiments can fail for various reasons. The system tracks these failures and provides recovery options.
|
|
136
|
+
|
|
137
|
+
**Common failure types:**
|
|
138
|
+
- **Infrastructure failures** - Missing dependencies (e.g., xgboost not installed)
|
|
139
|
+
- **Code generation bugs** - Claude occasionally generates syntactically incorrect code
|
|
140
|
+
- **Evaluation errors** - Evaluator crashes or returns invalid output
|
|
141
|
+
- **Performance score 0** - Algorithm runs but produces no meaningful results (now marked as "failed")
|
|
142
|
+
|
|
143
|
+
**Failure tracking in evolution.csv:**
|
|
144
|
+
- `failed` - Evaluation error or performance score of 0
|
|
145
|
+
- `timeout` - Evaluation exceeded time limit
|
|
146
|
+
- `interrupted` - User interrupted with Ctrl+C
|
|
147
|
+
- Check the `status` column to identify failed candidates
|
|
148
|
+
|
|
149
|
+
**Manual recovery strategies:**
|
|
150
|
+
1. **Force retry of failed candidates:**
|
|
151
|
+
- Edit `evolution.csv` and change status from "failed" to "pending"
|
|
152
|
+
- Clear the performance value for that row
|
|
153
|
+
- Run `claude-evolve run` to retry the candidate
|
|
154
|
+
|
|
155
|
+
2. **Fix infrastructure issues:**
|
|
156
|
+
- Install missing dependencies: `pip install xgboost numpy scipy`
|
|
157
|
+
- Update Python environment if needed
|
|
158
|
+
- Check that evaluator.py has proper error handling
|
|
159
|
+
|
|
160
|
+
3. **Guide around persistent failures:**
|
|
161
|
+
- If a specific approach keeps failing, add constraints to BRIEF.md
|
|
162
|
+
- Use `claude-evolve ideate` with explicit directions to avoid problematic patterns
|
|
163
|
+
- Consider updating evaluator.py to catch and handle specific error types
|
|
164
|
+
|
|
165
|
+
**Future auto-recovery (planned):**
|
|
166
|
+
- Automatic retry with different prompts for code generation failures
|
|
167
|
+
- Dependency detection and installation suggestions
|
|
168
|
+
- Smart failure pattern recognition to avoid similar mutations
|
|
169
|
+
|
|
133
170
|
## Requirements
|
|
134
171
|
|
|
135
172
|
### Required
|
|
@@ -126,31 +126,88 @@ else
|
|
|
126
126
|
echo "No completed candidates yet"
|
|
127
127
|
fi
|
|
128
128
|
|
|
129
|
+
# Generation analysis
|
|
130
|
+
echo
|
|
131
|
+
echo "=== Generation Analysis ==="
|
|
132
|
+
declare -A gen_count gen_sum gen_completed
|
|
133
|
+
|
|
134
|
+
while IFS=, read -r id _ desc perf status; do
|
|
135
|
+
[[ $id == "id" ]] && continue # Skip header
|
|
136
|
+
|
|
137
|
+
# Extract generation from ID
|
|
138
|
+
gen="gen01" # default for old numeric IDs
|
|
139
|
+
if [[ $id =~ ^(gen[0-9]+)- ]]; then
|
|
140
|
+
gen="${BASH_REMATCH[1]}"
|
|
141
|
+
elif [[ $id =~ ^[0-9]+$ ]]; then
|
|
142
|
+
gen="gen00" # Mark old numeric IDs as gen00
|
|
143
|
+
fi
|
|
144
|
+
|
|
145
|
+
# Track generation stats
|
|
146
|
+
: ${gen_count[$gen]:=0}
|
|
147
|
+
((gen_count[$gen]++))
|
|
148
|
+
|
|
149
|
+
if [[ $status =~ ^(complete|completed)$ && -n $perf && $perf != "" ]]; then
|
|
150
|
+
: ${gen_completed[$gen]:=0}
|
|
151
|
+
: ${gen_sum[$gen]:=0}
|
|
152
|
+
((gen_completed[$gen]++))
|
|
153
|
+
gen_sum[$gen]=$(echo "${gen_sum[$gen]} + $perf" | bc -l 2>/dev/null || echo "${gen_sum[$gen]}")
|
|
154
|
+
fi
|
|
155
|
+
done <"$csv_file"
|
|
156
|
+
|
|
157
|
+
# Display generation stats
|
|
158
|
+
for gen in $(printf '%s\n' "${!gen_count[@]}" | sort); do
|
|
159
|
+
total_in_gen=${gen_count[$gen]}
|
|
160
|
+
completed_in_gen=${gen_completed[$gen]:-0}
|
|
161
|
+
|
|
162
|
+
echo -n "$gen: $total_in_gen candidates"
|
|
163
|
+
|
|
164
|
+
if [[ $completed_in_gen -gt 0 ]]; then
|
|
165
|
+
avg=$(echo "scale=4; ${gen_sum[$gen]} / $completed_in_gen" | bc -l 2>/dev/null || echo "0")
|
|
166
|
+
echo " ($completed_in_gen completed, avg: $avg)"
|
|
167
|
+
else
|
|
168
|
+
echo " (0 completed)"
|
|
169
|
+
fi
|
|
170
|
+
done
|
|
171
|
+
|
|
129
172
|
# Simple chart generation using gnuplot if available
|
|
130
173
|
if command -v gnuplot >/dev/null 2>&1 && [[ $count_with_performance -gt 0 ]]; then
|
|
131
174
|
echo
|
|
132
175
|
echo "Generating performance chart: $output_file"
|
|
133
176
|
|
|
134
|
-
# Create data file for gnuplot
|
|
177
|
+
# Create data file for gnuplot with row numbers
|
|
135
178
|
data_file="/tmp/evolution_data_$$.dat"
|
|
136
|
-
echo "# ID Performance" >"$data_file"
|
|
179
|
+
echo "# Row ID Performance Generation" >"$data_file"
|
|
137
180
|
|
|
181
|
+
row_num=0
|
|
138
182
|
while IFS=, read -r id _ desc perf status; do
|
|
139
183
|
[[ $id == "id" ]] && continue # Skip header
|
|
140
|
-
|
|
141
|
-
|
|
184
|
+
((row_num++))
|
|
185
|
+
if [[ -n $perf && $perf != "" && $status =~ ^(complete|completed)$ ]]; then
|
|
186
|
+
# Extract generation number for coloring
|
|
187
|
+
gen_num=0
|
|
188
|
+
if [[ $id =~ ^gen([0-9]+)- ]]; then
|
|
189
|
+
gen_num=$((10#${BASH_REMATCH[1]}))
|
|
190
|
+
fi
|
|
191
|
+
echo "$row_num \"$id\" $perf $gen_num" >>"$data_file"
|
|
142
192
|
fi
|
|
143
193
|
done <"$csv_file"
|
|
144
194
|
|
|
145
195
|
# Generate plot
|
|
146
196
|
gnuplot <<EOF
|
|
147
|
-
set terminal png size
|
|
197
|
+
set terminal png size 1000,600
|
|
148
198
|
set output "$output_file"
|
|
149
199
|
set title "Algorithm Evolution Performance"
|
|
150
|
-
set xlabel "Evolution
|
|
200
|
+
set xlabel "Evolution Order"
|
|
151
201
|
set ylabel "Performance Score"
|
|
152
202
|
set grid
|
|
153
|
-
|
|
203
|
+
set key outside right
|
|
204
|
+
|
|
205
|
+
# Define color palette for generations
|
|
206
|
+
set palette defined (0 "dark-red", 1 "red", 2 "orange", 3 "yellow", 4 "green", 5 "blue", 6 "violet")
|
|
207
|
+
|
|
208
|
+
# Main plot with lines and points colored by generation
|
|
209
|
+
plot "$data_file" using 1:3:4 with linespoints palette title "Performance", \
|
|
210
|
+
"$data_file" using 1:3:(sprintf("%g", \$3)) with labels offset 0,0.5 notitle
|
|
154
211
|
EOF
|
|
155
212
|
|
|
156
213
|
rm -f "$data_file"
|
package/bin/claude-evolve-ideate
CHANGED
|
@@ -8,6 +8,43 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
8
8
|
source "$SCRIPT_DIR/../lib/config.sh"
|
|
9
9
|
load_config
|
|
10
10
|
|
|
11
|
+
# Helper function to call Claude with usage limit detection
|
|
12
|
+
call_claude_with_limit_check() {
|
|
13
|
+
local prompt="$1"
|
|
14
|
+
local model="${2:-opus}"
|
|
15
|
+
|
|
16
|
+
# Call Claude and capture output
|
|
17
|
+
local claude_output
|
|
18
|
+
claude_output=$(echo "$prompt" | claude --dangerously-skip-permissions --model "$model" -p 2>&1)
|
|
19
|
+
local claude_exit_code=$?
|
|
20
|
+
|
|
21
|
+
# Check for usage limit
|
|
22
|
+
if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
|
|
23
|
+
# Extract timestamp if available
|
|
24
|
+
local limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
|
|
25
|
+
|
|
26
|
+
# Print red error message
|
|
27
|
+
echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
|
|
28
|
+
echo -e "\033[31m[ERROR] Ideation halted due to API rate limits.\033[0m" >&2
|
|
29
|
+
|
|
30
|
+
if [[ -n $limit_timestamp ]]; then
|
|
31
|
+
# Convert timestamp to human-readable format
|
|
32
|
+
local limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
|
|
33
|
+
echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
|
|
37
|
+
echo -e "\033[33m[INFO] No ideas were generated. Run ideate again when the limit resets.\033[0m" >&2
|
|
38
|
+
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# Output Claude's response
|
|
43
|
+
echo "$claude_output"
|
|
44
|
+
|
|
45
|
+
return $claude_exit_code
|
|
46
|
+
}
|
|
47
|
+
|
|
11
48
|
# Parse arguments
|
|
12
49
|
use_strategies=true
|
|
13
50
|
no_ai=false
|
|
@@ -80,28 +117,60 @@ if [[ $use_strategies == true ]]; then
|
|
|
80
117
|
fi
|
|
81
118
|
fi
|
|
82
119
|
|
|
83
|
-
# Get next
|
|
120
|
+
# Get next generation number
|
|
121
|
+
get_next_generation() {
|
|
122
|
+
if [[ ! -f "$FULL_CSV_PATH" ]]; then
|
|
123
|
+
echo "01"
|
|
124
|
+
return
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# Find highest generation number
|
|
128
|
+
local max_gen=0
|
|
129
|
+
while IFS=, read -r id rest; do
|
|
130
|
+
# Extract generation number from IDs like "gen01-001"
|
|
131
|
+
if [[ $id =~ ^gen([0-9]+)- ]]; then
|
|
132
|
+
local gen_num=$((10#${BASH_REMATCH[1]}))
|
|
133
|
+
if (( gen_num > max_gen )); then
|
|
134
|
+
max_gen=$gen_num
|
|
135
|
+
fi
|
|
136
|
+
fi
|
|
137
|
+
done < <(tail -n +2 "$FULL_CSV_PATH")
|
|
138
|
+
|
|
139
|
+
# Increment and format with leading zero
|
|
140
|
+
printf "%02d" $((max_gen + 1))
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Get next available ID for current generation
|
|
84
144
|
get_next_id() {
|
|
145
|
+
local generation="$1"
|
|
85
146
|
if [[ ! -f "$FULL_CSV_PATH" ]]; then
|
|
86
|
-
echo "
|
|
147
|
+
echo "gen${generation}-001"
|
|
87
148
|
return
|
|
88
149
|
fi
|
|
89
|
-
|
|
150
|
+
|
|
151
|
+
# Find highest ID within this generation
|
|
90
152
|
local max_id=0
|
|
91
153
|
while IFS=, read -r id rest; do
|
|
92
|
-
|
|
93
|
-
|
|
154
|
+
# Match IDs for this specific generation
|
|
155
|
+
if [[ $id =~ ^gen${generation}-([0-9]+)$ ]]; then
|
|
156
|
+
local id_num=$((10#${BASH_REMATCH[1]}))
|
|
157
|
+
if (( id_num > max_id )); then
|
|
158
|
+
max_id=$id_num
|
|
159
|
+
fi
|
|
94
160
|
fi
|
|
95
161
|
done < <(tail -n +2 "$FULL_CSV_PATH")
|
|
96
|
-
|
|
162
|
+
|
|
163
|
+
# Format next ID with generation and 3-digit number
|
|
164
|
+
printf "gen%s-%03d" "$generation" $((max_id + 1))
|
|
97
165
|
}
|
|
98
166
|
|
|
99
167
|
# Add idea to CSV manually (fallback for manual mode)
|
|
100
168
|
add_idea_manual() {
|
|
101
169
|
local description="$1"
|
|
102
170
|
local based_on_id="$2"
|
|
171
|
+
local generation="$3"
|
|
103
172
|
local id
|
|
104
|
-
id=$(get_next_id)
|
|
173
|
+
id=$(get_next_id "$generation")
|
|
105
174
|
|
|
106
175
|
# Escape quotes in description
|
|
107
176
|
local escaped_desc="${description//\"/\"\"}"
|
|
@@ -141,7 +210,7 @@ ideate_manual() {
|
|
|
141
210
|
continue
|
|
142
211
|
fi
|
|
143
212
|
|
|
144
|
-
add_idea_manual "$description" ""
|
|
213
|
+
add_idea_manual "$description" "" "$CURRENT_GENERATION"
|
|
145
214
|
((ideas_added++))
|
|
146
215
|
|
|
147
216
|
if [[ $i -lt $TOTAL_IDEAS ]]; then
|
|
@@ -209,7 +278,8 @@ Project Brief:
|
|
|
209
278
|
$(cat "$FULL_BRIEF_PATH")
|
|
210
279
|
|
|
211
280
|
Requirements for new CSV rows:
|
|
212
|
-
- IDs must
|
|
281
|
+
- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
|
|
282
|
+
- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
|
|
213
283
|
- basedOnId should be empty (these are novel approaches)
|
|
214
284
|
- Each description should be one clear sentence describing a specific algorithmic change
|
|
215
285
|
- Descriptions should explore completely different approaches than existing ones
|
|
@@ -223,7 +293,7 @@ Example descriptions:
|
|
|
223
293
|
Add exactly $count rows to the CSV file now."
|
|
224
294
|
|
|
225
295
|
echo "[INFO] Calling Claude Opus to generate $count novel exploration ideas..."
|
|
226
|
-
if !
|
|
296
|
+
if ! call_claude_with_limit_check "$prompt" "opus"; then
|
|
227
297
|
echo "[WARN] Claude failed to generate novel ideas" >&2
|
|
228
298
|
return 1
|
|
229
299
|
fi
|
|
@@ -251,7 +321,8 @@ Project Brief:
|
|
|
251
321
|
$(cat "$FULL_BRIEF_PATH")
|
|
252
322
|
|
|
253
323
|
Requirements for new CSV rows:
|
|
254
|
-
- IDs must
|
|
324
|
+
- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
|
|
325
|
+
- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
|
|
255
326
|
- basedOnId should reference ONE of the successful algorithm IDs above (pick the best one)
|
|
256
327
|
- Each description should be one clear sentence about parameter tuning
|
|
257
328
|
- Focus on adjusting hyperparameters, thresholds, sizes, learning rates
|
|
@@ -265,7 +336,7 @@ Example descriptions:
|
|
|
265
336
|
Add exactly $count parameter tuning rows to the CSV file now."
|
|
266
337
|
|
|
267
338
|
echo "[INFO] Calling Claude Opus to generate $count hill climbing ideas..."
|
|
268
|
-
if !
|
|
339
|
+
if ! call_claude_with_limit_check "$prompt" "opus"; then
|
|
269
340
|
echo "[WARN] Claude failed to generate hill climbing ideas" >&2
|
|
270
341
|
return 1
|
|
271
342
|
fi
|
|
@@ -293,7 +364,8 @@ Project Brief:
|
|
|
293
364
|
$(cat "$FULL_BRIEF_PATH")
|
|
294
365
|
|
|
295
366
|
Requirements for new CSV rows:
|
|
296
|
-
- IDs must
|
|
367
|
+
- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
|
|
368
|
+
- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
|
|
297
369
|
- basedOnId should reference ONE of the successful algorithm IDs above (pick the best one)
|
|
298
370
|
- Each description should be one clear sentence about architectural changes
|
|
299
371
|
- Keep core insights but change implementation approach
|
|
@@ -307,7 +379,7 @@ Example descriptions:
|
|
|
307
379
|
Add exactly $count structural modification rows to the CSV file now."
|
|
308
380
|
|
|
309
381
|
echo "[INFO] Calling Claude Opus to generate $count structural mutation ideas..."
|
|
310
|
-
if !
|
|
382
|
+
if ! call_claude_with_limit_check "$prompt" "opus"; then
|
|
311
383
|
echo "[WARN] Claude failed to generate structural mutation ideas" >&2
|
|
312
384
|
return 1
|
|
313
385
|
fi
|
|
@@ -335,7 +407,8 @@ Project Brief:
|
|
|
335
407
|
$(cat "$FULL_BRIEF_PATH")
|
|
336
408
|
|
|
337
409
|
Requirements for new CSV rows:
|
|
338
|
-
- IDs must
|
|
410
|
+
- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
|
|
411
|
+
- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
|
|
339
412
|
- basedOnId should reference ONE of the successful algorithm IDs above (pick the best one as base)
|
|
340
413
|
- Each description should be one clear sentence combining elements from different algorithms
|
|
341
414
|
- Be specific about what elements to merge
|
|
@@ -349,7 +422,7 @@ Example descriptions:
|
|
|
349
422
|
Add exactly $count hybrid combination rows to the CSV file now."
|
|
350
423
|
|
|
351
424
|
echo "[INFO] Calling Claude Opus to generate $count crossover hybrid ideas..."
|
|
352
|
-
if !
|
|
425
|
+
if ! call_claude_with_limit_check "$prompt" "opus"; then
|
|
353
426
|
echo "[WARN] Claude failed to generate crossover ideas" >&2
|
|
354
427
|
return 1
|
|
355
428
|
fi
|
|
@@ -399,7 +472,8 @@ $top_performers"
|
|
|
399
472
|
prompt+="
|
|
400
473
|
|
|
401
474
|
Requirements for new CSV rows:
|
|
402
|
-
- IDs must
|
|
475
|
+
- IDs must use format: gen$CURRENT_GENERATION-XXX (e.g., gen$CURRENT_GENERATION-001, gen$CURRENT_GENERATION-002)
|
|
476
|
+
- Continue numbering from the highest existing ID in generation $CURRENT_GENERATION
|
|
403
477
|
- basedOnId should be empty or reference existing algorithm ID
|
|
404
478
|
- Each description should be one clear sentence describing an algorithmic approach
|
|
405
479
|
- All new rows should have empty performance and status fields
|
|
@@ -407,13 +481,17 @@ Requirements for new CSV rows:
|
|
|
407
481
|
Add exactly $TOTAL_IDEAS algorithm variation rows to the CSV file now."
|
|
408
482
|
|
|
409
483
|
echo "[INFO] Calling Claude Opus to generate $TOTAL_IDEAS ideas (legacy mode)..."
|
|
410
|
-
if !
|
|
484
|
+
if ! call_claude_with_limit_check "$prompt" "opus"; then
|
|
411
485
|
echo "[WARN] Claude failed to generate ideas" >&2
|
|
412
486
|
return 1
|
|
413
487
|
fi
|
|
414
488
|
echo "[INFO] Legacy ideas generated"
|
|
415
489
|
}
|
|
416
490
|
|
|
491
|
+
# Determine generation number for this ideation run
|
|
492
|
+
CURRENT_GENERATION=$(get_next_generation)
|
|
493
|
+
echo "[INFO] Starting ideation for generation $CURRENT_GENERATION"
|
|
494
|
+
|
|
417
495
|
# Main execution
|
|
418
496
|
if [[ $no_ai == true ]]; then
|
|
419
497
|
echo "[INFO] Manual entry mode"
|
package/bin/claude-evolve-run
CHANGED
|
@@ -141,6 +141,71 @@ update_csv_row() {
|
|
|
141
141
|
mv "$temp_file" "$FULL_CSV_PATH"
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
+
# Auto-recovery mechanism for common failures
|
|
145
|
+
attempt_recovery() {
|
|
146
|
+
local failure_type="$1"
|
|
147
|
+
local error_output="$2"
|
|
148
|
+
local row_num="$3"
|
|
149
|
+
local id="$4"
|
|
150
|
+
local description="$5"
|
|
151
|
+
|
|
152
|
+
echo "[INFO] Attempting auto-recovery for $failure_type..."
|
|
153
|
+
|
|
154
|
+
case "$failure_type" in
|
|
155
|
+
"import_error")
|
|
156
|
+
# Extract missing module name
|
|
157
|
+
missing_module=$(echo "$error_output" | grep -o "No module named '[^']*'" | sed "s/No module named '\\([^']*\\)'/\\1/" | head -1)
|
|
158
|
+
if [[ -n $missing_module ]]; then
|
|
159
|
+
echo "[INFO] Detected missing module: $missing_module"
|
|
160
|
+
echo "[INFO] Attempting to install via pip..."
|
|
161
|
+
|
|
162
|
+
# Try to install the missing module
|
|
163
|
+
if pip install "$missing_module" 2>&1; then
|
|
164
|
+
echo "[INFO] Successfully installed $missing_module. Retrying evaluation..."
|
|
165
|
+
return 0 # Signal to retry
|
|
166
|
+
else
|
|
167
|
+
echo "[WARN] Failed to install $missing_module automatically"
|
|
168
|
+
echo "[INFO] Please install manually: pip install $missing_module"
|
|
169
|
+
fi
|
|
170
|
+
fi
|
|
171
|
+
;;
|
|
172
|
+
|
|
173
|
+
"syntax_error")
|
|
174
|
+
# For syntax errors, we could retry the mutation with additional guidance
|
|
175
|
+
echo "[INFO] Detected syntax error in generated code"
|
|
176
|
+
echo "[INFO] Retrying mutation with additional constraints..."
|
|
177
|
+
|
|
178
|
+
# Update the CSV to retry this candidate with enhanced prompt
|
|
179
|
+
update_csv_row "$row_num" "" "pending"
|
|
180
|
+
|
|
181
|
+
# Add a recovery marker to the description to guide the next attempt
|
|
182
|
+
new_desc="[RETRY: Fix syntax] $description"
|
|
183
|
+
temp_file=$(mktemp)
|
|
184
|
+
current_row=1
|
|
185
|
+
|
|
186
|
+
while IFS=, read -r csv_id csv_based_on csv_desc csv_perf csv_stat; do
|
|
187
|
+
if [[ $current_row -eq $row_num ]]; then
|
|
188
|
+
echo "$csv_id,$csv_based_on,$new_desc,$csv_perf,pending"
|
|
189
|
+
else
|
|
190
|
+
echo "$csv_id,$csv_based_on,$csv_desc,$csv_perf,$csv_stat"
|
|
191
|
+
fi
|
|
192
|
+
((current_row++))
|
|
193
|
+
done <"$FULL_CSV_PATH" >"$temp_file"
|
|
194
|
+
|
|
195
|
+
mv "$temp_file" "$FULL_CSV_PATH"
|
|
196
|
+
return 0 # Signal to retry
|
|
197
|
+
;;
|
|
198
|
+
|
|
199
|
+
"zero_score")
|
|
200
|
+
# For zero scores, log additional diagnostic info
|
|
201
|
+
echo "[INFO] Algorithm produced zero score - likely a logic error"
|
|
202
|
+
echo "[INFO] Consider adding constraints to BRIEF.md to avoid this approach"
|
|
203
|
+
;;
|
|
204
|
+
esac
|
|
205
|
+
|
|
206
|
+
return 1 # No recovery possible
|
|
207
|
+
}
|
|
208
|
+
|
|
144
209
|
# Main evolution loop
|
|
145
210
|
while true; do
|
|
146
211
|
# Find next candidate
|
|
@@ -198,7 +263,15 @@ if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
|
|
|
198
263
|
parent_file="$FULL_ALGORITHM_PATH"
|
|
199
264
|
echo "[INFO] Using base algorithm (basedonID is empty or 0)"
|
|
200
265
|
else
|
|
201
|
-
|
|
266
|
+
# Handle both old format (numeric) and new format (genXX-XXX)
|
|
267
|
+
if [[ $based_on_id =~ ^[0-9]+$ ]]; then
|
|
268
|
+
# Old numeric format
|
|
269
|
+
parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
|
|
270
|
+
else
|
|
271
|
+
# New generation format
|
|
272
|
+
parent_file="$FULL_OUTPUT_DIR/evolution_${based_on_id}.py"
|
|
273
|
+
fi
|
|
274
|
+
|
|
202
275
|
if [[ ! -f $parent_file ]]; then
|
|
203
276
|
echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
|
|
204
277
|
update_csv_row "$row_num" "" "failed"
|
|
@@ -212,8 +285,15 @@ fi
|
|
|
212
285
|
|
|
213
286
|
echo "[INFO] Using parent algorithm: $parent_file"
|
|
214
287
|
|
|
215
|
-
# Generate mutation
|
|
216
|
-
|
|
288
|
+
# Generate mutation output file
|
|
289
|
+
# Handle both old format (numeric) and new format (genXX-XXX)
|
|
290
|
+
if [[ $id =~ ^[0-9]+$ ]]; then
|
|
291
|
+
# Old numeric format
|
|
292
|
+
output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
|
|
293
|
+
else
|
|
294
|
+
# New generation format
|
|
295
|
+
output_file="$FULL_OUTPUT_DIR/evolution_${id}.py"
|
|
296
|
+
fi
|
|
217
297
|
echo "[INFO] Generating algorithm mutation..."
|
|
218
298
|
|
|
219
299
|
# Copy parent algorithm to output file first
|
|
@@ -267,8 +347,34 @@ else
|
|
|
267
347
|
echo "=== CLAUDE RESPONSE ==="
|
|
268
348
|
} >> "${LOGFILE}-developer"
|
|
269
349
|
|
|
270
|
-
# Use tee to show output and log simultaneously
|
|
271
|
-
|
|
350
|
+
# Use tee to show output and log simultaneously, and capture output for limit detection
|
|
351
|
+
claude_output=$(echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer")
|
|
352
|
+
claude_exit_code=${PIPESTATUS[1]} # Get exit code from claude command, not tee
|
|
353
|
+
|
|
354
|
+
# Check for usage limit
|
|
355
|
+
if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
|
|
356
|
+
# Extract timestamp if available
|
|
357
|
+
limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
|
|
358
|
+
|
|
359
|
+
# Print red error message
|
|
360
|
+
echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
|
|
361
|
+
echo -e "\033[31m[ERROR] Evolution halted due to API rate limits.\033[0m" >&2
|
|
362
|
+
|
|
363
|
+
if [[ -n $limit_timestamp ]]; then
|
|
364
|
+
# Convert timestamp to human-readable format
|
|
365
|
+
limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
|
|
366
|
+
echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
|
|
367
|
+
fi
|
|
368
|
+
|
|
369
|
+
echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
|
|
370
|
+
echo -e "\033[33m[INFO] The current candidate will remain in 'pending' status.\033[0m" >&2
|
|
371
|
+
|
|
372
|
+
# Don't mark as failed - leave it pending for retry later
|
|
373
|
+
exit 1
|
|
374
|
+
fi
|
|
375
|
+
|
|
376
|
+
# Check for other failures
|
|
377
|
+
if [[ $claude_exit_code -ne 0 ]]; then
|
|
272
378
|
echo "[ERROR] Claude failed to mutate algorithm" >&2
|
|
273
379
|
update_csv_row "$row_num" "" "failed"
|
|
274
380
|
if should_continue_after_failure; then
|
|
@@ -338,15 +444,39 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
338
444
|
# Extract score from JSON (simple grep approach)
|
|
339
445
|
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
340
446
|
if [[ -n $score ]]; then
|
|
341
|
-
|
|
342
|
-
echo "
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
447
|
+
# Check if score is 0 and mark as failed
|
|
448
|
+
if (( $(echo "$score == 0" | bc -l) )); then
|
|
449
|
+
update_csv_row "$row_num" "$score" "failed"
|
|
450
|
+
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
451
|
+
echo "[INFO] Performance score: $score"
|
|
452
|
+
if should_continue_after_failure; then
|
|
453
|
+
continue 2
|
|
454
|
+
else
|
|
455
|
+
exit 1
|
|
456
|
+
fi
|
|
457
|
+
else
|
|
347
458
|
update_csv_row "$row_num" "$score" "complete"
|
|
348
459
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
349
460
|
echo "[INFO] Performance score: $score"
|
|
461
|
+
fi
|
|
462
|
+
else
|
|
463
|
+
# Try "performance" field
|
|
464
|
+
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
465
|
+
# Check if score is 0 and mark as failed
|
|
466
|
+
if (( $(echo "$score == 0" | bc -l) )); then
|
|
467
|
+
update_csv_row "$row_num" "$score" "failed"
|
|
468
|
+
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
469
|
+
echo "[INFO] Performance score: $score"
|
|
470
|
+
if should_continue_after_failure; then
|
|
471
|
+
continue 2
|
|
472
|
+
else
|
|
473
|
+
exit 1
|
|
474
|
+
fi
|
|
475
|
+
else
|
|
476
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
477
|
+
echo "[INFO] ✓ Evaluation completed successfully"
|
|
478
|
+
echo "[INFO] Performance score: $score"
|
|
479
|
+
fi
|
|
350
480
|
else
|
|
351
481
|
echo "[ERROR] No score found in evaluator output" >&2
|
|
352
482
|
echo "[ERROR] Output: $eval_output" >&2
|
|
@@ -371,7 +501,50 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
371
501
|
else
|
|
372
502
|
echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
|
|
373
503
|
echo "[ERROR] Output: $eval_output" >&2
|
|
374
|
-
|
|
504
|
+
|
|
505
|
+
# Check for common failure patterns and attempt recovery
|
|
506
|
+
recovery_attempted=false
|
|
507
|
+
|
|
508
|
+
# Check for import errors
|
|
509
|
+
if echo "$eval_output" | grep -q "No module named"; then
|
|
510
|
+
if attempt_recovery "import_error" "$eval_output" "$row_num" "$id" "$description"; then
|
|
511
|
+
recovery_attempted=true
|
|
512
|
+
# Retry the evaluation
|
|
513
|
+
echo "[INFO] Retrying evaluation after recovery attempt..."
|
|
514
|
+
if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
515
|
+
# Re-process the successful result
|
|
516
|
+
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
517
|
+
if [[ -n $score ]]; then
|
|
518
|
+
if (( $(echo "$score == 0" | bc -l) )); then
|
|
519
|
+
update_csv_row "$row_num" "$score" "failed"
|
|
520
|
+
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
521
|
+
else
|
|
522
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
523
|
+
echo "[INFO] ✓ Recovery successful! Performance score: $score"
|
|
524
|
+
consecutive_failures=0
|
|
525
|
+
continue
|
|
526
|
+
fi
|
|
527
|
+
fi
|
|
528
|
+
fi
|
|
529
|
+
fi
|
|
530
|
+
fi
|
|
531
|
+
fi
|
|
532
|
+
|
|
533
|
+
# Check for syntax errors
|
|
534
|
+
if echo "$eval_output" | grep -q "SyntaxError"; then
|
|
535
|
+
if attempt_recovery "syntax_error" "$eval_output" "$row_num" "$id" "$description"; then
|
|
536
|
+
recovery_attempted=true
|
|
537
|
+
# Skip to next iteration to retry with enhanced prompt
|
|
538
|
+
consecutive_failures=0
|
|
539
|
+
continue
|
|
540
|
+
fi
|
|
541
|
+
fi
|
|
542
|
+
|
|
543
|
+
# If no recovery was successful, mark as failed
|
|
544
|
+
if [[ $recovery_attempted == false ]]; then
|
|
545
|
+
update_csv_row "$row_num" "" "failed"
|
|
546
|
+
fi
|
|
547
|
+
|
|
375
548
|
if should_continue_after_failure; then
|
|
376
549
|
continue
|
|
377
550
|
else
|