claude-evolve 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-analyze +36 -12
- package/bin/claude-evolve-edit +182 -17
- package/bin/claude-evolve-ideate +79 -34
- package/bin/claude-evolve-run +71 -1
- package/bin/claude-evolve-worker +96 -39
- package/lib/ai-cli.sh +7 -2
- package/lib/config.sh +22 -16
- package/lib/csv_fixer.py +35 -0
- package/lib/memory_limit_wrapper.py +192 -0
- package/package.json +1 -1
- package/templates/config.yaml +10 -10
|
@@ -513,6 +513,9 @@ print(f'max_desc=\"{desc_escaped}\"')
|
|
|
513
513
|
# Calculate total data points for dynamic sizing
|
|
514
514
|
total_data_points=$(awk 'END {print NR-1}' "$data_file") # Subtract header row
|
|
515
515
|
|
|
516
|
+
# Count unique generations
|
|
517
|
+
unique_generations=$(awk '{if(NR>1) print $4}' "$data_file" | sort -nu | wc -l)
|
|
518
|
+
|
|
516
519
|
# AIDEV-NOTE: Dynamic dot sizing based on data point count
|
|
517
520
|
# Use significantly larger dots when there are fewer data points for better visibility
|
|
518
521
|
if [[ $total_data_points -lt 35 ]]; then
|
|
@@ -530,16 +533,24 @@ print(f'max_desc=\"{desc_escaped}\"')
|
|
|
530
533
|
# Find all generations that have data
|
|
531
534
|
generations=($(awk '{if(NR>1) print $4}' "$data_file" | sort -n | uniq))
|
|
532
535
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
536
|
+
# If too many generations (>10), use a simplified plot without individual generation legends
|
|
537
|
+
if [[ $unique_generations -gt 10 ]]; then
|
|
538
|
+
# Single plot with color gradient based on generation number
|
|
539
|
+
plot_cmd="\"$data_file\" using 1:3:(\$4) with points palette pointsize $regular_dot_size notitle"
|
|
540
|
+
gen_plots_added=1
|
|
541
|
+
else
|
|
542
|
+
# Original plotting with individual generation legends
|
|
543
|
+
for gen_num in "${generations[@]}"; do
|
|
544
|
+
if [[ -n $gen_num ]]; then
|
|
545
|
+
color=$(get_gen_color "$gen_num")
|
|
546
|
+
if [[ $gen_plots_added -gt 0 ]]; then
|
|
547
|
+
plot_cmd="$plot_cmd, \\"$'\n'
|
|
548
|
+
fi
|
|
549
|
+
plot_cmd="${plot_cmd} \"$data_file\" using (\$4==$gen_num?\$1:1/0):3 with points linecolor rgb \"$color\" pointsize $regular_dot_size title \"Gen $gen_num\""
|
|
550
|
+
((gen_plots_added++))
|
|
538
551
|
fi
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
fi
|
|
542
|
-
done
|
|
552
|
+
done
|
|
553
|
+
fi
|
|
543
554
|
|
|
544
555
|
# Add novel candidates
|
|
545
556
|
if [[ -s "$novel_file" ]] && [[ $(wc -l < "$novel_file") -gt 1 ]]; then
|
|
@@ -586,6 +597,9 @@ print(f'max_desc=\"{desc_escaped}\"')
|
|
|
586
597
|
set terminal png size 1200,800
|
|
587
598
|
set output "$output_file"
|
|
588
599
|
|
|
600
|
+
# Define unique generations count
|
|
601
|
+
unique_gens = $unique_generations
|
|
602
|
+
|
|
589
603
|
# Set up multiplot with proper spacing
|
|
590
604
|
set multiplot layout 2,1 margins 0.08,0.82,0.15,0.95 spacing 0.1,0.15
|
|
591
605
|
|
|
@@ -595,7 +609,9 @@ set title "$EVOLUTION_CONTEXT Algorithm Evolution Performance Over Time" font ",
|
|
|
595
609
|
unset xlabel
|
|
596
610
|
set ylabel "Performance Score"
|
|
597
611
|
set grid y # Only show horizontal grid lines
|
|
598
|
-
|
|
612
|
+
|
|
613
|
+
# Show legend only if 10 or fewer generations
|
|
614
|
+
if (unique_gens <= 10) set key outside right; else unset key
|
|
599
615
|
|
|
600
616
|
# AIDEV-NOTE: Remove x-axis entirely to avoid tick problems with large datasets
|
|
601
617
|
unset xtics
|
|
@@ -603,6 +619,9 @@ set autoscale
|
|
|
603
619
|
set yrange [*:*] # Auto-scale y-axis only
|
|
604
620
|
|
|
605
621
|
# Define colors for generations
|
|
622
|
+
# Use palette for many generations
|
|
623
|
+
if (unique_gens > 10) set palette model RGB defined (0 "#1f77b4", 1 "#ff7f0e", 2 "#2ca02c", 3 "#d62728", 4 "#9467bd", 5 "#8c564b", 6 "#e377c2")
|
|
624
|
+
|
|
606
625
|
plot $plot_cmd
|
|
607
626
|
|
|
608
627
|
#=================== BOTTOM PLOT: Generation Medians ===================
|
|
@@ -614,8 +633,13 @@ set boxwidth 0.6
|
|
|
614
633
|
unset key
|
|
615
634
|
set grid y
|
|
616
635
|
|
|
617
|
-
# Set custom x-axis labels
|
|
618
|
-
|
|
636
|
+
# Set custom x-axis labels (but hide if too many generations)
|
|
637
|
+
if (unique_gens > 10) {
|
|
638
|
+
set xtics auto
|
|
639
|
+
set xtics rotate by -45
|
|
640
|
+
} else {
|
|
641
|
+
set xtics ($xtics_labels)
|
|
642
|
+
}
|
|
619
643
|
|
|
620
644
|
# Auto-scale for generation plot too
|
|
621
645
|
set autoscale
|
package/bin/claude-evolve-edit
CHANGED
|
@@ -20,7 +20,10 @@ show_help() {
|
|
|
20
20
|
claude-evolve edit - Manage evolution candidate statuses by generation or status
|
|
21
21
|
|
|
22
22
|
USAGE:
|
|
23
|
-
claude-evolve edit <selector> <action>
|
|
23
|
+
claude-evolve edit [--recent-generations=N] <selector> <action>
|
|
24
|
+
|
|
25
|
+
OPTIONS:
|
|
26
|
+
--recent-generations=N Limit operations to the most recent N generations only
|
|
24
27
|
|
|
25
28
|
SELECTORS:
|
|
26
29
|
gen01, gen02, etc. Target specific generation
|
|
@@ -41,29 +44,56 @@ ACTIONS:
|
|
|
41
44
|
delete Delete candidates from CSV and remove .py files (asks confirmation)
|
|
42
45
|
|
|
43
46
|
EXAMPLES:
|
|
44
|
-
claude-evolve edit gen03 failed
|
|
45
|
-
claude-evolve edit failed pending
|
|
46
|
-
claude-evolve edit
|
|
47
|
-
claude-evolve edit complete failed
|
|
48
|
-
claude-evolve edit
|
|
49
|
-
claude-evolve edit
|
|
50
|
-
claude-evolve edit
|
|
47
|
+
claude-evolve edit gen03 failed # Mark all gen03 as failed
|
|
48
|
+
claude-evolve edit failed pending # Reset all failed candidates to pending
|
|
49
|
+
claude-evolve edit --recent-generations=15 failed pending # Reset only recent 15 gen failures
|
|
50
|
+
claude-evolve edit --recent-generations=5 complete failed # Re-run recent 5 gen completions
|
|
51
|
+
claude-evolve edit failed failed-retry1 # Convert failed to retry status (bug fixing)
|
|
52
|
+
claude-evolve edit complete failed # Mark all complete as failed for re-run
|
|
53
|
+
claude-evolve edit all pending # Mark everything as pending for re-run
|
|
54
|
+
claude-evolve edit gen02 reboot # Full reset of gen02 (delete files + clear data)
|
|
55
|
+
claude-evolve edit gen02 delete # Delete gen02 from CSV and remove .py files
|
|
51
56
|
|
|
52
57
|
DESCRIPTION:
|
|
53
58
|
This command helps manage evolution runs when you need to re-evaluate candidates.
|
|
54
59
|
Use status selectors (failed, complete, etc.) to bulk-change candidates by status.
|
|
55
60
|
Use 'reboot' for complete reset including file deletion.
|
|
61
|
+
Use --recent-generations to limit operations to recent work only, useful for large systems.
|
|
56
62
|
EOF
|
|
57
63
|
}
|
|
58
64
|
|
|
59
65
|
# Parse arguments
|
|
60
|
-
|
|
66
|
+
recent_generations=""
|
|
67
|
+
args=()
|
|
68
|
+
|
|
69
|
+
while [[ $# -gt 0 ]]; do
|
|
70
|
+
case "$1" in
|
|
71
|
+
--recent-generations=*)
|
|
72
|
+
recent_generations="${1#*=}"
|
|
73
|
+
if [[ ! "$recent_generations" =~ ^[1-9][0-9]*$ ]]; then
|
|
74
|
+
echo "[ERROR] --recent-generations must be a positive integer" >&2
|
|
75
|
+
exit 1
|
|
76
|
+
fi
|
|
77
|
+
shift
|
|
78
|
+
;;
|
|
79
|
+
--help|-h)
|
|
80
|
+
show_help
|
|
81
|
+
exit 0
|
|
82
|
+
;;
|
|
83
|
+
*)
|
|
84
|
+
args+=("$1")
|
|
85
|
+
shift
|
|
86
|
+
;;
|
|
87
|
+
esac
|
|
88
|
+
done
|
|
89
|
+
|
|
90
|
+
if [[ ${#args[@]} -ne 2 ]]; then
|
|
61
91
|
show_help
|
|
62
92
|
exit 1
|
|
63
93
|
fi
|
|
64
94
|
|
|
65
|
-
SELECTOR="$
|
|
66
|
-
ACTION="$
|
|
95
|
+
SELECTOR="${args[0]}"
|
|
96
|
+
ACTION="${args[1]}"
|
|
67
97
|
|
|
68
98
|
# Validate configuration
|
|
69
99
|
if ! validate_config; then
|
|
@@ -99,7 +129,11 @@ update_candidates_status() {
|
|
|
99
129
|
local new_status="$2"
|
|
100
130
|
local clear_scores="$3"
|
|
101
131
|
|
|
102
|
-
|
|
132
|
+
local filter_msg=""
|
|
133
|
+
if [[ -n "$recent_generations" ]]; then
|
|
134
|
+
filter_msg=" (limited to recent $recent_generations generations)"
|
|
135
|
+
fi
|
|
136
|
+
echo "[INFO] Updating candidates matching '$selector' to status: $new_status${filter_msg}"
|
|
103
137
|
|
|
104
138
|
# Use Python to safely edit the CSV
|
|
105
139
|
"$PYTHON_CMD" -c "
|
|
@@ -112,6 +146,7 @@ csv_file = '$FULL_CSV_PATH'
|
|
|
112
146
|
selector = '$selector'
|
|
113
147
|
new_status = '$new_status'
|
|
114
148
|
clear_scores = '$clear_scores' == 'true'
|
|
149
|
+
recent_generations = '$recent_generations'
|
|
115
150
|
|
|
116
151
|
|
|
117
152
|
try:
|
|
@@ -127,6 +162,31 @@ try:
|
|
|
127
162
|
header = rows[0]
|
|
128
163
|
updated_count = 0
|
|
129
164
|
|
|
165
|
+
# If recent_generations is specified, determine which generations to include
|
|
166
|
+
recent_gen_set = set()
|
|
167
|
+
if recent_generations and recent_generations.isdigit():
|
|
168
|
+
n_recent = int(recent_generations)
|
|
169
|
+
|
|
170
|
+
# Find all generation numbers from candidate IDs
|
|
171
|
+
all_generations = set()
|
|
172
|
+
for i in range(1, len(rows)):
|
|
173
|
+
row = rows[i]
|
|
174
|
+
if len(row) < 1:
|
|
175
|
+
continue
|
|
176
|
+
candidate_id = row[0]
|
|
177
|
+
|
|
178
|
+
# Extract generation number from candidate_id (e.g., gen01-001 -> 1)
|
|
179
|
+
match = re.match(r'^gen(\d+)-', candidate_id)
|
|
180
|
+
if match:
|
|
181
|
+
gen_num = int(match.group(1))
|
|
182
|
+
all_generations.add(gen_num)
|
|
183
|
+
|
|
184
|
+
# Get the most recent N generations
|
|
185
|
+
if all_generations:
|
|
186
|
+
sorted_generations = sorted(all_generations, reverse=True)
|
|
187
|
+
recent_gen_set = set(sorted_generations[:n_recent])
|
|
188
|
+
print(f'[INFO] Filtering to recent generations: {sorted(recent_gen_set)}', file=sys.stderr)
|
|
189
|
+
|
|
130
190
|
# Update matching rows
|
|
131
191
|
for i in range(1, len(rows)):
|
|
132
192
|
row = rows[i]
|
|
@@ -152,6 +212,18 @@ try:
|
|
|
152
212
|
else:
|
|
153
213
|
matches = current_status == selector
|
|
154
214
|
|
|
215
|
+
# Apply recent generations filter if specified
|
|
216
|
+
if matches and recent_gen_set:
|
|
217
|
+
# Extract generation number from candidate_id
|
|
218
|
+
gen_match = re.match(r'^gen(\d+)-', candidate_id)
|
|
219
|
+
if gen_match:
|
|
220
|
+
candidate_gen = int(gen_match.group(1))
|
|
221
|
+
if candidate_gen not in recent_gen_set:
|
|
222
|
+
matches = False # Filter out this candidate
|
|
223
|
+
else:
|
|
224
|
+
# Non-generation candidates (like baseline) - exclude when filtering by recent generations
|
|
225
|
+
matches = False
|
|
226
|
+
|
|
155
227
|
if matches:
|
|
156
228
|
if clear_scores:
|
|
157
229
|
# Clear everything after description (keep id, basedOnId, description)
|
|
@@ -192,6 +264,12 @@ delete_evolution_files() {
|
|
|
192
264
|
return
|
|
193
265
|
fi
|
|
194
266
|
|
|
267
|
+
local filter_msg=""
|
|
268
|
+
if [[ -n "$recent_generations" ]]; then
|
|
269
|
+
filter_msg=" (limited to recent $recent_generations generations)"
|
|
270
|
+
fi
|
|
271
|
+
echo "[INFO] Deleting evolution files for '$selector'${filter_msg}..."
|
|
272
|
+
|
|
195
273
|
local deleted_count=0
|
|
196
274
|
|
|
197
275
|
if [[ "$selector" == "all" ]]; then
|
|
@@ -225,15 +303,46 @@ import re
|
|
|
225
303
|
|
|
226
304
|
csv_file = '$FULL_CSV_PATH'
|
|
227
305
|
selector = '$selector'
|
|
306
|
+
recent_generations = '$recent_generations'
|
|
228
307
|
|
|
229
308
|
|
|
230
309
|
try:
|
|
231
310
|
with open(csv_file, 'r') as f:
|
|
232
311
|
reader = csv.reader(f)
|
|
233
|
-
|
|
312
|
+
rows = list(reader)
|
|
313
|
+
|
|
314
|
+
if not rows:
|
|
315
|
+
print('')
|
|
316
|
+
sys.exit(0)
|
|
317
|
+
|
|
318
|
+
# Skip header if present
|
|
319
|
+
start_idx = 1 if rows and rows[0] and rows[0][0].lower() == 'id' else 0
|
|
320
|
+
|
|
321
|
+
# Determine recent generations if filtering is requested
|
|
322
|
+
recent_gen_set = set()
|
|
323
|
+
if recent_generations and recent_generations.isdigit():
|
|
324
|
+
n_recent = int(recent_generations)
|
|
325
|
+
|
|
326
|
+
# Find all generation numbers from candidate IDs
|
|
327
|
+
all_generations = set()
|
|
328
|
+
for row in rows[start_idx:]:
|
|
329
|
+
if len(row) < 1:
|
|
330
|
+
continue
|
|
331
|
+
candidate_id = row[0]
|
|
332
|
+
|
|
333
|
+
# Extract generation number from candidate_id (e.g., gen01-001 -> 1)
|
|
334
|
+
match = re.match(r'^gen(\d+)-', candidate_id)
|
|
335
|
+
if match:
|
|
336
|
+
gen_num = int(match.group(1))
|
|
337
|
+
all_generations.add(gen_num)
|
|
338
|
+
|
|
339
|
+
# Get the most recent N generations
|
|
340
|
+
if all_generations:
|
|
341
|
+
sorted_generations = sorted(all_generations, reverse=True)
|
|
342
|
+
recent_gen_set = set(sorted_generations[:n_recent])
|
|
234
343
|
|
|
235
344
|
candidates = []
|
|
236
|
-
for row in
|
|
345
|
+
for row in rows[start_idx:]:
|
|
237
346
|
if len(row) < 1:
|
|
238
347
|
continue
|
|
239
348
|
|
|
@@ -249,6 +358,18 @@ try:
|
|
|
249
358
|
else:
|
|
250
359
|
matches = current_status == selector
|
|
251
360
|
|
|
361
|
+
# Apply recent generations filter if specified
|
|
362
|
+
if matches and recent_gen_set:
|
|
363
|
+
# Extract generation number from candidate_id
|
|
364
|
+
gen_match = re.match(r'^gen(\d+)-', candidate_id)
|
|
365
|
+
if gen_match:
|
|
366
|
+
candidate_gen = int(gen_match.group(1))
|
|
367
|
+
if candidate_gen not in recent_gen_set:
|
|
368
|
+
matches = False # Filter out this candidate
|
|
369
|
+
else:
|
|
370
|
+
# Non-generation candidates (like baseline) - exclude when filtering by recent generations
|
|
371
|
+
matches = False
|
|
372
|
+
|
|
252
373
|
if matches:
|
|
253
374
|
candidates.append(candidate_id)
|
|
254
375
|
|
|
@@ -284,7 +405,11 @@ except Exception as e:
|
|
|
284
405
|
delete_candidates_from_csv() {
|
|
285
406
|
local selector="$1"
|
|
286
407
|
|
|
287
|
-
|
|
408
|
+
local filter_msg=""
|
|
409
|
+
if [[ -n "$recent_generations" ]]; then
|
|
410
|
+
filter_msg=" (limited to recent $recent_generations generations)"
|
|
411
|
+
fi
|
|
412
|
+
echo "[INFO] Deleting candidates matching '$selector' from CSV${filter_msg}..."
|
|
288
413
|
|
|
289
414
|
"$PYTHON_CMD" -c "
|
|
290
415
|
import sys
|
|
@@ -293,6 +418,7 @@ from lib.evolution_csv import EvolutionCSV
|
|
|
293
418
|
import re
|
|
294
419
|
|
|
295
420
|
selector = '$selector'
|
|
421
|
+
recent_generations = '$recent_generations'
|
|
296
422
|
deleted_count = 0
|
|
297
423
|
|
|
298
424
|
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
@@ -306,6 +432,29 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
306
432
|
has_header = rows and rows[0] and rows[0][0].lower() == 'id'
|
|
307
433
|
start_idx = 1 if has_header else 0
|
|
308
434
|
|
|
435
|
+
# Determine recent generations if filtering is requested
|
|
436
|
+
recent_gen_set = set()
|
|
437
|
+
if recent_generations and recent_generations.isdigit():
|
|
438
|
+
n_recent = int(recent_generations)
|
|
439
|
+
|
|
440
|
+
# Find all generation numbers from candidate IDs
|
|
441
|
+
all_generations = set()
|
|
442
|
+
for row in rows[start_idx:]:
|
|
443
|
+
if not row or not row[0].strip():
|
|
444
|
+
continue
|
|
445
|
+
candidate_id = row[0].strip()
|
|
446
|
+
|
|
447
|
+
# Extract generation number from candidate_id (e.g., gen01-001 -> 1)
|
|
448
|
+
match = re.match(r'^gen(\d+)-', candidate_id)
|
|
449
|
+
if match:
|
|
450
|
+
gen_num = int(match.group(1))
|
|
451
|
+
all_generations.add(gen_num)
|
|
452
|
+
|
|
453
|
+
# Get the most recent N generations
|
|
454
|
+
if all_generations:
|
|
455
|
+
sorted_generations = sorted(all_generations, reverse=True)
|
|
456
|
+
recent_gen_set = set(sorted_generations[:n_recent])
|
|
457
|
+
|
|
309
458
|
for row in rows[start_idx:]:
|
|
310
459
|
if not row or not row[0].strip():
|
|
311
460
|
continue
|
|
@@ -327,6 +476,18 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
327
476
|
else:
|
|
328
477
|
matches = current_status == selector
|
|
329
478
|
|
|
479
|
+
# Apply recent generations filter if specified
|
|
480
|
+
if matches and recent_gen_set:
|
|
481
|
+
# Extract generation number from candidate_id
|
|
482
|
+
gen_match = re.match(r'^gen(\d+)-', candidate_id)
|
|
483
|
+
if gen_match:
|
|
484
|
+
candidate_gen = int(gen_match.group(1))
|
|
485
|
+
if candidate_gen not in recent_gen_set:
|
|
486
|
+
matches = False # Filter out this candidate
|
|
487
|
+
else:
|
|
488
|
+
# Non-generation candidates (like baseline) - exclude when filtering by recent generations
|
|
489
|
+
matches = False
|
|
490
|
+
|
|
330
491
|
if matches:
|
|
331
492
|
candidates_to_delete.append(candidate_id)
|
|
332
493
|
|
|
@@ -341,7 +502,11 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
341
502
|
}
|
|
342
503
|
|
|
343
504
|
# Main execution
|
|
344
|
-
|
|
505
|
+
info_msg="Processing '$SELECTOR' with action: $ACTION"
|
|
506
|
+
if [[ -n "$recent_generations" ]]; then
|
|
507
|
+
info_msg="$info_msg (limited to recent $recent_generations generations)"
|
|
508
|
+
fi
|
|
509
|
+
echo "[INFO] $info_msg"
|
|
345
510
|
|
|
346
511
|
case "$ACTION" in
|
|
347
512
|
failed)
|
|
@@ -387,4 +552,4 @@ echo "[INFO] Edit operation complete"
|
|
|
387
552
|
|
|
388
553
|
# Call status command to show current state
|
|
389
554
|
echo ""
|
|
390
|
-
"$SCRIPT_DIR/claude-evolve-status" --brief
|
|
555
|
+
"$SCRIPT_DIR/claude-evolve-status" --brief
|
package/bin/claude-evolve-ideate
CHANGED
|
@@ -84,32 +84,77 @@ call_ai_for_ideation() {
|
|
|
84
84
|
original_csv_count=0
|
|
85
85
|
fi
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
local ai_output
|
|
89
|
-
ai_output=$(call_ai_with_round_robin "$prompt" "ideate" "$gen_num")
|
|
90
|
-
local ai_exit_code=$?
|
|
87
|
+
echo "[DEBUG] Original CSV has $original_csv_count data rows" >&2
|
|
91
88
|
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
89
|
+
# Get models for ideation
|
|
90
|
+
local model_list
|
|
91
|
+
model_list=$(get_models_for_command "ideate")
|
|
92
|
+
local models=()
|
|
93
|
+
read -ra models <<< "$model_list"
|
|
95
94
|
|
|
96
|
-
if [[ -
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
if [[ ${#models[@]} -eq 0 ]]; then
|
|
96
|
+
echo "[ERROR] No models configured for ideation" >&2
|
|
97
|
+
return 1
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# Calculate starting index for round-robin
|
|
101
|
+
local num_models=${#models[@]}
|
|
102
|
+
local start_index=$((gen_num % num_models))
|
|
103
|
+
|
|
104
|
+
# Create ordered list based on round-robin
|
|
105
|
+
local ordered_models=()
|
|
106
|
+
for ((i=0; i<num_models; i++)); do
|
|
107
|
+
local idx=$(((start_index + i) % num_models))
|
|
108
|
+
ordered_models+=("${models[$idx]}")
|
|
109
|
+
done
|
|
110
|
+
|
|
111
|
+
echo "[AI] Model order for ideate (round-robin): ${ordered_models[*]}" >&2
|
|
112
|
+
|
|
113
|
+
# Try each model until CSV changes
|
|
114
|
+
for model in "${ordered_models[@]}"; do
|
|
115
|
+
echo "[AI] Attempting ideate with $model" >&2
|
|
116
|
+
|
|
117
|
+
# Call the model directly
|
|
118
|
+
local ai_output
|
|
119
|
+
ai_output=$(call_ai_model_configured "$model" "$prompt")
|
|
120
|
+
local ai_exit_code=$?
|
|
121
|
+
|
|
122
|
+
echo "[AI] $model completed with exit code $ai_exit_code" >&2
|
|
99
123
|
|
|
100
|
-
if
|
|
101
|
-
|
|
102
|
-
|
|
124
|
+
# Check if the file was modified - this is ALL that matters
|
|
125
|
+
if [[ -f "$temp_csv_file" ]]; then
|
|
126
|
+
local new_csv_count
|
|
127
|
+
new_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l)
|
|
128
|
+
|
|
129
|
+
if [[ $new_csv_count -gt $original_csv_count ]]; then
|
|
130
|
+
echo "[INFO] CSV was modified by $model ($new_csv_count vs $original_csv_count rows) - validating format..." >&2
|
|
131
|
+
|
|
132
|
+
# Post-process to ensure all description fields are quoted
|
|
133
|
+
local fixed_csv_file="${temp_csv_file}.fixed"
|
|
134
|
+
|
|
135
|
+
# Use the CSV fixer script
|
|
136
|
+
if "$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_fixer.py" "$temp_csv_file" "$fixed_csv_file"; then
|
|
137
|
+
mv "$fixed_csv_file" "$temp_csv_file"
|
|
138
|
+
echo "[INFO] CSV format validated and fixed if needed" >&2
|
|
139
|
+
else
|
|
140
|
+
echo "[WARN] CSV format validation failed, using original" >&2
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
return 0
|
|
144
|
+
else
|
|
145
|
+
echo "[INFO] CSV unchanged after $model (exit code: $ai_exit_code)" >&2
|
|
146
|
+
echo "[DEBUG] Original count: $original_csv_count, New count: $new_csv_count" >&2
|
|
147
|
+
# Continue to next model
|
|
148
|
+
fi
|
|
103
149
|
else
|
|
104
|
-
echo "[INFO] CSV
|
|
105
|
-
|
|
106
|
-
return 1
|
|
150
|
+
echo "[INFO] Temp CSV file not found after $model: $temp_csv_file" >&2
|
|
151
|
+
# Continue to next model
|
|
107
152
|
fi
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
153
|
+
done
|
|
154
|
+
|
|
155
|
+
# All models tried, none changed the file
|
|
156
|
+
echo "[ERROR] All AI models failed to generate ideas" >&2
|
|
157
|
+
return 1
|
|
113
158
|
}
|
|
114
159
|
|
|
115
160
|
# Parse arguments
|
|
@@ -455,8 +500,8 @@ Instructions:
|
|
|
455
500
|
3. For each idea, create a row with: id,parent_id,description,,pending
|
|
456
501
|
4. CRITICAL CSV FORMATTING RULES:
|
|
457
502
|
- ALWAYS wrap the description field in double quotes
|
|
458
|
-
- If the description contains quotes, escape them by doubling them (" becomes "")
|
|
459
|
-
- Example: gen01-001,gen00-000
|
|
503
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
504
|
+
- Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds\",,pending
|
|
460
505
|
- BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds,,pending
|
|
461
506
|
- NEVER omit quotes - unquoted descriptions cause CSV corruption
|
|
462
507
|
5. For novel ideas: leave parent_id empty
|
|
@@ -839,8 +884,8 @@ CRITICAL INSTRUCTIONS:
|
|
|
839
884
|
7. For each idea, create a row with: id,,description,,pending (empty parent_id for novel ideas)
|
|
840
885
|
8. CRITICAL CSV FORMATTING RULES:
|
|
841
886
|
- ALWAYS wrap the description field in double quotes
|
|
842
|
-
- If the description contains quotes, escape them by doubling them (" becomes "")
|
|
843
|
-
- Example: gen01-001
|
|
887
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
888
|
+
- Example: gen01-001,,\"Implement adaptive RSI thresholds based on volatility\",,pending
|
|
844
889
|
- BAD: gen01-001,,Implement adaptive RSI thresholds based on volatility,,pending
|
|
845
890
|
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
846
891
|
9. Each description should be one clear sentence describing a novel algorithmic approach
|
|
@@ -945,12 +990,12 @@ CRITICAL INSTRUCTIONS:
|
|
|
945
990
|
7. Each parent_id MUST be one of: $valid_parent_ids
|
|
946
991
|
8. CRITICAL CSV FORMATTING RULES:
|
|
947
992
|
- ALWAYS wrap the description field in double quotes
|
|
948
|
-
- If the description contains quotes, escape them by doubling them (" becomes "")
|
|
949
|
-
- Example: gen01-001,gen00-000
|
|
993
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
994
|
+
- Example: gen01-001,gen00-000,\"Lower rsi_entry from 21 to 18\",,pending
|
|
950
995
|
- BAD: gen01-001,gen00-000,Lower rsi_entry from 21 to 18,,pending
|
|
951
996
|
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
952
997
|
9. Each description should focus on adjusting specific parameters that exist in the parent's source code
|
|
953
|
-
10. Include current and new parameter values
|
|
998
|
+
10. Include current and new parameter values - for example: \"Lower rsi_entry from 21 to 18\"
|
|
954
999
|
|
|
955
1000
|
IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
|
|
956
1001
|
CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
|
|
@@ -1032,8 +1077,8 @@ CRITICAL INSTRUCTIONS:
|
|
|
1032
1077
|
7. Each parent_id MUST be one of: $valid_parent_ids
|
|
1033
1078
|
8. CRITICAL CSV FORMATTING RULES:
|
|
1034
1079
|
- ALWAYS wrap the description field in double quotes
|
|
1035
|
-
- If the description contains quotes, escape them by doubling them (" becomes "")
|
|
1036
|
-
- Example: gen01-001,gen00-000
|
|
1080
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
1081
|
+
- Example: gen01-001,gen00-000,\"Add ML-based regime detection using LSTM\",,pending
|
|
1037
1082
|
- BAD: gen01-001,gen00-000,Add ML-based regime detection using LSTM,,pending
|
|
1038
1083
|
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
1039
1084
|
9. Each description should focus on architectural/structural changes based on the parent's actual code
|
|
@@ -1119,8 +1164,8 @@ CRITICAL INSTRUCTIONS:
|
|
|
1119
1164
|
7. Each parent_id MUST be one of: $valid_parent_ids (choose the primary parent)
|
|
1120
1165
|
8. CRITICAL CSV FORMATTING RULES:
|
|
1121
1166
|
- ALWAYS wrap the description field in double quotes
|
|
1122
|
-
- If the description contains quotes, escape them by doubling them (" becomes "")
|
|
1123
|
-
- Example: gen01-001,gen00-000
|
|
1167
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
1168
|
+
- Example: gen01-001,gen00-000,\"Combine gen01-123's RSI logic with gen01-456's volatility scaling\",,pending
|
|
1124
1169
|
- BAD: gen01-001,gen00-000,Combine gen01-123's RSI logic with gen01-456's volatility scaling,,pending
|
|
1125
1170
|
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
1126
1171
|
9. Each description should combine actual elements from 2+ algorithms based on their source code
|
|
@@ -1219,8 +1264,8 @@ CRITICAL INSTRUCTIONS:
|
|
|
1219
1264
|
6. For each idea, create a row with: id,parent_id,description,,pending
|
|
1220
1265
|
7. CRITICAL CSV FORMATTING RULES:
|
|
1221
1266
|
- ALWAYS wrap the description field in double quotes
|
|
1222
|
-
- If the description contains quotes, escape them by doubling them (" becomes "")
|
|
1223
|
-
- Example: gen01-001,gen00-000
|
|
1267
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
1268
|
+
- Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds based on volatility\",,pending
|
|
1224
1269
|
- BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds based on volatility,,pending
|
|
1225
1270
|
- NEVER omit quotes around descriptions - this causes CSV parsing errors that corrupt the data
|
|
1226
1271
|
8. Mix both parameter tuning and structural changes
|
package/bin/claude-evolve-run
CHANGED
|
@@ -478,6 +478,66 @@ ensure_baseline_entry
|
|
|
478
478
|
# Flag to track API limit status
|
|
479
479
|
api_limit_reached=false
|
|
480
480
|
|
|
481
|
+
# Check if previous generation has at least one completed item
|
|
482
|
+
check_previous_generation_has_completed() {
|
|
483
|
+
local csv_path="$1"
|
|
484
|
+
|
|
485
|
+
if [[ ! -f "$csv_path" ]]; then
|
|
486
|
+
# No CSV file yet - allow first ideation
|
|
487
|
+
return 0
|
|
488
|
+
fi
|
|
489
|
+
|
|
490
|
+
# Use Python to find the maximum generation and check if it has completed items
|
|
491
|
+
"$PYTHON_CMD" -c "
|
|
492
|
+
import csv
|
|
493
|
+
import sys
|
|
494
|
+
|
|
495
|
+
max_gen = 0
|
|
496
|
+
gen_completed = {}
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
with open('$csv_path', 'r') as f:
|
|
500
|
+
reader = csv.reader(f)
|
|
501
|
+
next(reader, None) # Skip header
|
|
502
|
+
for row in reader:
|
|
503
|
+
if row and len(row) >= 5:
|
|
504
|
+
id_field = row[0].strip()
|
|
505
|
+
status = row[4].strip() if len(row) > 4 else 'pending'
|
|
506
|
+
|
|
507
|
+
if id_field.startswith('gen') and '-' in id_field:
|
|
508
|
+
try:
|
|
509
|
+
gen_part = id_field.split('-')[0] # e.g., 'gen01'
|
|
510
|
+
gen_num = int(gen_part[3:]) # Extract number after 'gen'
|
|
511
|
+
max_gen = max(max_gen, gen_num)
|
|
512
|
+
|
|
513
|
+
if gen_num not in gen_completed:
|
|
514
|
+
gen_completed[gen_num] = 0
|
|
515
|
+
|
|
516
|
+
if status == 'complete':
|
|
517
|
+
gen_completed[gen_num] += 1
|
|
518
|
+
except (ValueError, IndexError):
|
|
519
|
+
pass
|
|
520
|
+
|
|
521
|
+
# If max_gen is 0, no generations exist yet - allow ideation
|
|
522
|
+
if max_gen == 0:
|
|
523
|
+
sys.exit(0)
|
|
524
|
+
|
|
525
|
+
# Check if the most recent generation has at least one completed item
|
|
526
|
+
if gen_completed.get(max_gen, 0) > 0:
|
|
527
|
+
print(f'[INFO] Generation {max_gen:02d} has {gen_completed[max_gen]} completed items - allowing ideation', file=sys.stderr)
|
|
528
|
+
sys.exit(0)
|
|
529
|
+
else:
|
|
530
|
+
print(f'[INFO] Generation {max_gen:02d} has no completed items - blocking ideation to prevent endless loops', file=sys.stderr)
|
|
531
|
+
sys.exit(1)
|
|
532
|
+
|
|
533
|
+
except Exception as e:
|
|
534
|
+
print(f'[ERROR] Failed to check previous generation: {e}', file=sys.stderr)
|
|
535
|
+
# On error, allow ideation to maintain backward compatibility
|
|
536
|
+
sys.exit(0)
|
|
537
|
+
"
|
|
538
|
+
return $?
|
|
539
|
+
}
|
|
540
|
+
|
|
481
541
|
# Main dispatch loop
|
|
482
542
|
while true; do
|
|
483
543
|
# Clean up finished workers
|
|
@@ -506,7 +566,17 @@ while true; do
|
|
|
506
566
|
|
|
507
567
|
# Check if auto ideation is enabled
|
|
508
568
|
if [[ "$AUTO_IDEATE" == "true" || "$AUTO_IDEATE" == "1" ]]; then
|
|
509
|
-
echo "[DISPATCHER] Auto ideation is enabled.
|
|
569
|
+
echo "[DISPATCHER] Auto ideation is enabled. Checking prerequisites..."
|
|
570
|
+
|
|
571
|
+
# Check if previous generation has at least one completed item
|
|
572
|
+
if ! check_previous_generation_has_completed "$FULL_CSV_PATH"; then
|
|
573
|
+
echo "[DISPATCHER] Evolution complete - previous generation has no completed items."
|
|
574
|
+
echo "[DISPATCHER] This prevents endless ideation loops when API limits are hit."
|
|
575
|
+
echo "[DISPATCHER] Wait for current generation to complete, then run 'claude-evolve ideate' manually."
|
|
576
|
+
break
|
|
577
|
+
fi
|
|
578
|
+
|
|
579
|
+
echo "[DISPATCHER] Prerequisites met. Generating new ideas..."
|
|
510
580
|
|
|
511
581
|
# Check if claude-evolve-ideate exists
|
|
512
582
|
ideate_script="$SCRIPT_DIR/claude-evolve-ideate"
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -14,34 +14,11 @@ TERMINATION_SIGNAL=""
|
|
|
14
14
|
# Cleanup function to handle termination
|
|
15
15
|
cleanup_on_exit() {
|
|
16
16
|
if [[ -n "$CURRENT_CANDIDATE_ID" ]]; then
|
|
17
|
-
|
|
18
|
-
#
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
import sys
|
|
23
|
-
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
24
|
-
from lib.evolution_csv import EvolutionCSV
|
|
25
|
-
try:
|
|
26
|
-
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
27
|
-
csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'failed')
|
|
28
|
-
except:
|
|
29
|
-
pass # Best effort cleanup
|
|
30
|
-
" 2>/dev/null || true
|
|
31
|
-
else
|
|
32
|
-
echo "[WORKER-$$] Interrupted, leaving $CURRENT_CANDIDATE_ID for retry" >&2
|
|
33
|
-
# Optionally reset to pending instead of leaving as running
|
|
34
|
-
"$PYTHON_CMD" -c "
|
|
35
|
-
import sys
|
|
36
|
-
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
37
|
-
from lib.evolution_csv import EvolutionCSV
|
|
38
|
-
try:
|
|
39
|
-
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
40
|
-
csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'pending')
|
|
41
|
-
except:
|
|
42
|
-
pass # Best effort cleanup
|
|
43
|
-
" 2>/dev/null || true
|
|
44
|
-
fi
|
|
17
|
+
echo "[WORKER-$$] Worker terminated while processing $CURRENT_CANDIDATE_ID" >&2
|
|
18
|
+
# If we're interrupted while processing, leave it as "running"
|
|
19
|
+
# This prevents other workers from picking it up in the same session
|
|
20
|
+
# A human can manually reset to pending if needed
|
|
21
|
+
echo "[WORKER-$$] Leaving $CURRENT_CANDIDATE_ID in current state" >&2
|
|
45
22
|
fi
|
|
46
23
|
}
|
|
47
24
|
|
|
@@ -135,6 +112,12 @@ process_candidate() {
|
|
|
135
112
|
echo "[WORKER-$$] Description: $description"
|
|
136
113
|
echo "[WORKER-$$] Based on ID: $parent_id"
|
|
137
114
|
|
|
115
|
+
# Treat "baseline-000" parent ID as empty/baseline
|
|
116
|
+
if [[ "$parent_id" == "baseline-000" ]]; then
|
|
117
|
+
parent_id=""
|
|
118
|
+
echo "[WORKER-$$] Parent ID 'baseline-000' treated as baseline (empty parent)"
|
|
119
|
+
fi
|
|
120
|
+
|
|
138
121
|
# Determine source algorithm
|
|
139
122
|
local source_file
|
|
140
123
|
if [[ -z "$parent_id" ]]; then
|
|
@@ -213,16 +196,26 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
|
|
|
213
196
|
|
|
214
197
|
# Try AI models with round-robin based on candidate ID
|
|
215
198
|
if ! call_ai_for_evolution "$evolution_prompt" "$candidate_id"; then
|
|
216
|
-
echo "[WORKER-$$] ERROR: All AI models failed to generate code" >&2
|
|
199
|
+
echo "[WORKER-$$] ERROR: All AI models failed to generate code - leaving as pending for retry" >&2
|
|
217
200
|
cd "$original_pwd"
|
|
218
201
|
rm -f "$target_file" # Clean up on failure
|
|
219
|
-
|
|
202
|
+
# Return with special code to indicate AI failure (should remain pending)
|
|
203
|
+
return 77
|
|
220
204
|
fi
|
|
221
205
|
|
|
222
206
|
# Restore working directory
|
|
223
207
|
cd "$original_pwd"
|
|
224
208
|
|
|
225
209
|
echo "[WORKER-$$] Evolution applied successfully"
|
|
210
|
+
|
|
211
|
+
# Check if the generated Python file has syntax errors
|
|
212
|
+
echo "[WORKER-$$] Checking Python syntax..." >&2
|
|
213
|
+
if ! "$PYTHON_CMD" -m py_compile "$target_file" 2>&1; then
|
|
214
|
+
echo "[WORKER-$$] ERROR: Generated Python file has syntax errors!" >&2
|
|
215
|
+
echo "[WORKER-$$] File: $target_file" >&2
|
|
216
|
+
# This is still an evaluation failure, not an AI failure
|
|
217
|
+
return 1
|
|
218
|
+
fi
|
|
226
219
|
fi
|
|
227
220
|
fi
|
|
228
221
|
|
|
@@ -239,11 +232,22 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
|
|
|
239
232
|
eval_arg=""
|
|
240
233
|
fi
|
|
241
234
|
local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$eval_arg")
|
|
235
|
+
|
|
236
|
+
# Add memory limiting if configured
|
|
237
|
+
if [[ -n "$MEMORY_LIMIT_MB" ]] && [[ "$MEMORY_LIMIT_MB" -gt 0 ]]; then
|
|
238
|
+
eval_cmd=("$PYTHON_CMD" "$SCRIPT_DIR/../lib/memory_limit_wrapper.py" "$MEMORY_LIMIT_MB" "${eval_cmd[@]}")
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
# Add timeout if configured
|
|
242
242
|
[[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
|
|
243
243
|
|
|
244
244
|
# Run evaluation with tee to both display and capture output
|
|
245
245
|
# Use stdbuf to disable buffering for real-time output
|
|
246
|
-
|
|
246
|
+
# IMPORTANT: Use PIPESTATUS to get the exit code of the evaluation command, not tee
|
|
247
|
+
stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2
|
|
248
|
+
local eval_exit_code=${PIPESTATUS[0]} # Get exit code of first command in pipe
|
|
249
|
+
|
|
250
|
+
if [[ $eval_exit_code -eq 0 ]]; then
|
|
247
251
|
local eval_end=$(date +%s)
|
|
248
252
|
local eval_duration=$((eval_end - eval_start))
|
|
249
253
|
|
|
@@ -353,14 +357,19 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
353
357
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
354
358
|
# rm -f "$eval_output_file" # Keep for debugging
|
|
355
359
|
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
360
|
+
# Clear CURRENT_CANDIDATE_ID before returning
|
|
361
|
+
CURRENT_CANDIDATE_ID=""
|
|
356
362
|
return 1
|
|
357
363
|
fi
|
|
358
364
|
|
|
359
365
|
# Clean up temp file (comment out to keep for debugging)
|
|
360
366
|
# rm -f "$eval_output_file"
|
|
361
367
|
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
368
|
+
|
|
369
|
+
# Clear CURRENT_CANDIDATE_ID on successful completion
|
|
370
|
+
CURRENT_CANDIDATE_ID=""
|
|
362
371
|
else
|
|
363
|
-
local exit_code
|
|
372
|
+
local exit_code=$eval_exit_code
|
|
364
373
|
# Read any output that was captured before failure
|
|
365
374
|
eval_output=$(<"$eval_output_file")
|
|
366
375
|
# rm -f "$eval_output_file" # Keep for debugging
|
|
@@ -370,22 +379,56 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
370
379
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
371
380
|
|
|
372
381
|
# Mark as failed in CSV
|
|
373
|
-
"$
|
|
382
|
+
echo "[WORKER-$$] Marking $candidate_id as failed in CSV" >&2
|
|
383
|
+
if ! "$PYTHON_CMD" -c "
|
|
374
384
|
import sys
|
|
375
385
|
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
376
386
|
from lib.evolution_csv import EvolutionCSV
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
387
|
+
try:
|
|
388
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
389
|
+
success = csv.update_candidate_status('$candidate_id', 'failed')
|
|
390
|
+
if not success:
|
|
391
|
+
print(f'ERROR: Failed to update status for {candidate_id}', file=sys.stderr)
|
|
392
|
+
sys.exit(1)
|
|
393
|
+
except Exception as e:
|
|
394
|
+
print(f'ERROR: Exception updating status: {e}', file=sys.stderr)
|
|
395
|
+
sys.exit(1)
|
|
396
|
+
" 2>&1; then
|
|
397
|
+
echo "[WORKER-$$] ERROR: Failed to update CSV status to failed" >&2
|
|
398
|
+
else
|
|
399
|
+
echo "[WORKER-$$] Successfully marked $candidate_id as failed" >&2
|
|
400
|
+
fi
|
|
380
401
|
|
|
402
|
+
# Clear CURRENT_CANDIDATE_ID before returning to prevent cleanup handler from resetting it
|
|
403
|
+
CURRENT_CANDIDATE_ID=""
|
|
381
404
|
return $exit_code
|
|
382
405
|
fi
|
|
383
406
|
}
|
|
384
407
|
|
|
408
|
+
# Don't reset running candidates on startup - they might be legitimately being processed by another worker
|
|
409
|
+
|
|
385
410
|
# Main worker loop
|
|
386
411
|
echo "[WORKER-$$] Worker started"
|
|
387
412
|
|
|
388
413
|
while true; do
|
|
414
|
+
# Debug: Show current status of all candidates
|
|
415
|
+
echo "[WORKER-$$] Current candidate statuses:" >&2
|
|
416
|
+
"$PYTHON_CMD" -c "
|
|
417
|
+
import sys
|
|
418
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
419
|
+
from lib.evolution_csv import EvolutionCSV
|
|
420
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
421
|
+
rows = csv._read_csv()
|
|
422
|
+
if rows:
|
|
423
|
+
start_idx = 1 if rows and rows[0] and rows[0][0].lower() == 'id' else 0
|
|
424
|
+
status_count = {}
|
|
425
|
+
for row in rows[start_idx:]:
|
|
426
|
+
if len(row) > 4:
|
|
427
|
+
status = row[4].strip() or 'pending'
|
|
428
|
+
status_count[status] = status_count.get(status, 0) + 1
|
|
429
|
+
print(f'Status counts: {status_count}', file=sys.stderr)
|
|
430
|
+
" 2>&1 || true
|
|
431
|
+
|
|
389
432
|
# Try to claim a pending candidate
|
|
390
433
|
candidate_info=$("$PYTHON_CMD" -c "
|
|
391
434
|
import sys
|
|
@@ -412,12 +455,26 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
412
455
|
# Set current candidate for cleanup
|
|
413
456
|
CURRENT_CANDIDATE_ID="$candidate_id"
|
|
414
457
|
|
|
415
|
-
# Process the candidate
|
|
416
|
-
|
|
458
|
+
# Process the candidate and capture exit code
|
|
459
|
+
process_candidate "$candidate_id" "$parent_id" "$description"
|
|
460
|
+
process_exit_code=$?
|
|
461
|
+
|
|
462
|
+
if [[ $process_exit_code -eq 0 ]]; then
|
|
417
463
|
echo "[WORKER-$$] Successfully processed $candidate_id"
|
|
464
|
+
elif [[ $process_exit_code -eq 77 ]]; then
|
|
465
|
+
# Special exit code 77 means AI failed to generate code
|
|
466
|
+
echo "[WORKER-$$] AI generation failed for $candidate_id - marking as failed-ai-retry"
|
|
467
|
+
# Mark with special status that indicates AI generation failed (not evaluation)
|
|
468
|
+
"$PYTHON_CMD" -c "
|
|
469
|
+
import sys
|
|
470
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
471
|
+
from lib.evolution_csv import EvolutionCSV
|
|
472
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
473
|
+
csv.update_candidate_status('$candidate_id', 'failed-ai-retry')
|
|
474
|
+
" 2>/dev/null || true
|
|
418
475
|
else
|
|
419
476
|
echo "[WORKER-$$] Failed to process $candidate_id"
|
|
420
|
-
#
|
|
477
|
+
# Other failures (evaluation errors, etc) mark as failed
|
|
421
478
|
"$PYTHON_CMD" -c "
|
|
422
479
|
import sys
|
|
423
480
|
sys.path.insert(0, '$SCRIPT_DIR/..')
|
package/lib/ai-cli.sh
CHANGED
|
@@ -19,6 +19,11 @@ call_ai_model_configured() {
|
|
|
19
19
|
ai_output=$(timeout 300 claude --dangerously-skip-permissions --model "$model_name" -p "$prompt" 2>&1)
|
|
20
20
|
local ai_exit_code=$?
|
|
21
21
|
;;
|
|
22
|
+
gpt-5)
|
|
23
|
+
local ai_output
|
|
24
|
+
ai_output=$(timeout 300 codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
25
|
+
local ai_exit_code=$?
|
|
26
|
+
;;
|
|
22
27
|
o3)
|
|
23
28
|
local ai_output
|
|
24
29
|
ai_output=$(timeout 300 codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
@@ -100,7 +105,7 @@ clean_ai_output() {
|
|
|
100
105
|
local model_name="$2"
|
|
101
106
|
|
|
102
107
|
# Handle codex-specific output format
|
|
103
|
-
if [[ "$model_name" == "codex" || "$model_name" == "o3" ]]; then
|
|
108
|
+
if [[ "$model_name" == "codex" || "$model_name" == "o3" || "$model_name" == "gpt-5" ]]; then
|
|
104
109
|
# Clean codex output - extract content between "codex" marker and "tokens used"
|
|
105
110
|
if echo "$output" | grep -q "^\[.*\] codex$"; then
|
|
106
111
|
# Extract content between "codex" line and "tokens used" line
|
|
@@ -191,7 +196,7 @@ call_ai_with_round_robin() {
|
|
|
191
196
|
ai_output=$(call_ai_model_configured "$model" "$prompt")
|
|
192
197
|
local ai_exit_code=$?
|
|
193
198
|
|
|
194
|
-
# Just check exit code
|
|
199
|
+
# Just check exit code
|
|
195
200
|
if [[ $ai_exit_code -eq 0 ]]; then
|
|
196
201
|
# Clean output if needed
|
|
197
202
|
ai_output=$(clean_ai_output "$ai_output" "$model")
|
package/lib/config.sh
CHANGED
|
@@ -49,17 +49,13 @@ DEFAULT_AUTO_IDEATE=true
|
|
|
49
49
|
# Default retry value
|
|
50
50
|
DEFAULT_MAX_RETRIES=3
|
|
51
51
|
|
|
52
|
-
# Default
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
DEFAULT_LLM_CLI_VALUES[3]='claude --dangerously-skip-permissions --model opus -p "{{PROMPT}}"'
|
|
60
|
-
DEFAULT_LLM_CLI_VALUES[4]='claude --dangerously-skip-permissions --model sonnet -p "{{PROMPT}}"'
|
|
61
|
-
DEFAULT_LLM_RUN="sonnet"
|
|
62
|
-
DEFAULT_LLM_IDEATE="gemini o3 opus"
|
|
52
|
+
# Default memory limit (in MB, 0 means no limit)
|
|
53
|
+
# Set to reasonable limit for ML workloads - about half of available system RAM
|
|
54
|
+
DEFAULT_MEMORY_LIMIT_MB=12288
|
|
55
|
+
|
|
56
|
+
# Default LLM CLI configuration - use simple variables instead of arrays
|
|
57
|
+
DEFAULT_LLM_RUN="sonnet gpt-5 sonnet gpt-5"
|
|
58
|
+
DEFAULT_LLM_IDEATE="gemini gpt-5 opus"
|
|
63
59
|
|
|
64
60
|
# Load configuration from config file
|
|
65
61
|
load_config() {
|
|
@@ -96,9 +92,13 @@ load_config() {
|
|
|
96
92
|
# Set retry default
|
|
97
93
|
MAX_RETRIES="$DEFAULT_MAX_RETRIES"
|
|
98
94
|
|
|
95
|
+
# Set memory limit default
|
|
96
|
+
MEMORY_LIMIT_MB="$DEFAULT_MEMORY_LIMIT_MB"
|
|
97
|
+
|
|
99
98
|
# Set LLM CLI defaults (compatibility for older bash)
|
|
100
99
|
# Initialize associative array for LLM commands
|
|
101
100
|
# Use simpler approach for compatibility
|
|
101
|
+
LLM_CLI_gpt_5='codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
102
102
|
LLM_CLI_o3='codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
103
103
|
LLM_CLI_codex='codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
104
104
|
LLM_CLI_gemini='gemini -y -p "{{PROMPT}}"'
|
|
@@ -202,12 +202,14 @@ load_config() {
|
|
|
202
202
|
# Model definition - key is model name, value is command template
|
|
203
203
|
# Remove single quotes from value if present
|
|
204
204
|
value=$(echo "$value" | sed "s/^'//;s/'$//")
|
|
205
|
+
# Convert dashes to underscores for bash variable names
|
|
206
|
+
var_key=$(echo "$key" | sed 's/-/_/g')
|
|
205
207
|
# Debug config loading
|
|
206
208
|
if [[ "${DEBUG_CONFIG:-}" == "true" ]]; then
|
|
207
|
-
echo "[CONFIG DEBUG] Setting LLM_CLI_${
|
|
209
|
+
echo "[CONFIG DEBUG] Setting LLM_CLI_${var_key} = '$value'" >&2
|
|
208
210
|
fi
|
|
209
211
|
# Use dynamic variable name for compatibility
|
|
210
|
-
eval "LLM_CLI_${
|
|
212
|
+
eval "LLM_CLI_${var_key}=\"$value\""
|
|
211
213
|
fi
|
|
212
214
|
else
|
|
213
215
|
# Handle top-level keys
|
|
@@ -221,6 +223,7 @@ load_config() {
|
|
|
221
223
|
python_cmd) PYTHON_CMD="$value" ;;
|
|
222
224
|
auto_ideate) AUTO_IDEATE="$value" ;;
|
|
223
225
|
max_retries) MAX_RETRIES="$value" ;;
|
|
226
|
+
memory_limit_mb) MEMORY_LIMIT_MB="$value" ;;
|
|
224
227
|
evolution_dir)
|
|
225
228
|
echo "[WARN] evolution_dir in config is ignored - automatically inferred from config file location" >&2
|
|
226
229
|
;;
|
|
@@ -316,14 +319,17 @@ show_config() {
|
|
|
316
319
|
echo " Lock timeout: $LOCK_TIMEOUT"
|
|
317
320
|
echo " Auto ideate: $AUTO_IDEATE"
|
|
318
321
|
echo " Max retries: $MAX_RETRIES"
|
|
322
|
+
echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
|
|
319
323
|
echo " LLM configuration:"
|
|
320
324
|
# Show LLM configurations using dynamic variable names
|
|
321
|
-
for model in o3 codex gemini opus sonnet; do
|
|
325
|
+
for model in gpt_5 o3 codex gemini opus sonnet; do
|
|
322
326
|
var_name="LLM_CLI_${model}"
|
|
323
327
|
if [[ -n "${!var_name}" ]]; then
|
|
324
|
-
|
|
328
|
+
# Convert underscore back to dash for display
|
|
329
|
+
display_name=$(echo "$model" | sed 's/_/-/g')
|
|
330
|
+
echo " $display_name: ${!var_name}"
|
|
325
331
|
fi
|
|
326
332
|
done
|
|
327
333
|
echo " LLM for run: $LLM_RUN"
|
|
328
334
|
echo " LLM for ideate: $LLM_IDEATE"
|
|
329
|
-
}
|
|
335
|
+
}
|
package/lib/csv_fixer.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CSV format fixer for claude-evolve
|
|
4
|
+
Ensures proper quoting of CSV fields, especially descriptions
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import csv
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
def fix_csv_format(input_file, output_file):
|
|
11
|
+
"""
|
|
12
|
+
Read a CSV file and ensure all fields are properly quoted.
|
|
13
|
+
The csv module handles quoting automatically based on content.
|
|
14
|
+
"""
|
|
15
|
+
with open(input_file, 'r') as infile:
|
|
16
|
+
reader = csv.reader(infile)
|
|
17
|
+
rows = list(reader)
|
|
18
|
+
|
|
19
|
+
with open(output_file, 'w', newline='') as outfile:
|
|
20
|
+
writer = csv.writer(outfile, quoting=csv.QUOTE_NONNUMERIC)
|
|
21
|
+
|
|
22
|
+
# Write all rows - csv.writer handles quoting automatically
|
|
23
|
+
for row in rows:
|
|
24
|
+
writer.writerow(row)
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
if len(sys.argv) != 3:
|
|
28
|
+
print("Usage: csv_fixer.py <input_file> <output_file>", file=sys.stderr)
|
|
29
|
+
sys.exit(1)
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
fix_csv_format(sys.argv[1], sys.argv[2])
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print(f"Error fixing CSV: {e}", file=sys.stderr)
|
|
35
|
+
sys.exit(1)
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Memory-limited execution wrapper for claude-evolve evaluations.
|
|
4
|
+
|
|
5
|
+
This script runs a command with memory limits to prevent runaway algorithms
|
|
6
|
+
from consuming all system memory and crashing the machine.
|
|
7
|
+
"""
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
import subprocess
|
|
11
|
+
import signal
|
|
12
|
+
import time
|
|
13
|
+
import resource
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
def set_memory_limit(limit_mb: int) -> None:
|
|
17
|
+
"""Set memory limit in MB using resource module."""
|
|
18
|
+
try:
|
|
19
|
+
# Convert MB to bytes
|
|
20
|
+
limit_bytes = limit_mb * 1024 * 1024
|
|
21
|
+
|
|
22
|
+
# Set virtual memory limit (address space)
|
|
23
|
+
# On macOS this is the most reliable way to limit memory
|
|
24
|
+
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
|
|
25
|
+
|
|
26
|
+
# Also try to set data segment limit if available
|
|
27
|
+
try:
|
|
28
|
+
resource.setrlimit(resource.RLIMIT_DATA, (limit_bytes, limit_bytes))
|
|
29
|
+
except (OSError, ValueError):
|
|
30
|
+
# Not available on all systems
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
print(f"[MEMORY] Set memory limit to {limit_mb}MB", file=sys.stderr)
|
|
34
|
+
|
|
35
|
+
except (OSError, ValueError) as e:
|
|
36
|
+
print(f"[MEMORY] Warning: Could not set memory limit: {e}", file=sys.stderr)
|
|
37
|
+
|
|
38
|
+
def monitor_memory_usage_native(process: subprocess.Popen, limit_mb: int) -> Optional[str]:
|
|
39
|
+
"""Monitor process memory usage using native tools and kill if it exceeds limits."""
|
|
40
|
+
# print(f"[MEMORY] Starting native monitoring for PID {process.pid} with limit {limit_mb}MB", file=sys.stderr)
|
|
41
|
+
|
|
42
|
+
while process.poll() is None:
|
|
43
|
+
try:
|
|
44
|
+
# Use ps command to get memory usage
|
|
45
|
+
ps_result = subprocess.run(
|
|
46
|
+
["ps", "-o", "rss=", "-p", str(process.pid)],
|
|
47
|
+
capture_output=True,
|
|
48
|
+
text=True,
|
|
49
|
+
timeout=1
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if ps_result.returncode == 0 and ps_result.stdout.strip():
|
|
53
|
+
# ps returns RSS in KB, convert to MB
|
|
54
|
+
memory_kb = int(ps_result.stdout.strip())
|
|
55
|
+
memory_mb = memory_kb / 1024
|
|
56
|
+
|
|
57
|
+
# print(f"[MEMORY] PID {process.pid} using {memory_mb:.1f}MB (limit: {limit_mb}MB)", file=sys.stderr)
|
|
58
|
+
|
|
59
|
+
if memory_mb > limit_mb:
|
|
60
|
+
print(f"[MEMORY] Process exceeded {limit_mb}MB limit (using {memory_mb:.1f}MB), terminating", file=sys.stderr)
|
|
61
|
+
# Kill the entire process group
|
|
62
|
+
try:
|
|
63
|
+
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
|
64
|
+
time.sleep(2) # Give it time to cleanup
|
|
65
|
+
if process.poll() is None:
|
|
66
|
+
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
|
|
67
|
+
except ProcessLookupError:
|
|
68
|
+
pass
|
|
69
|
+
return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
|
|
70
|
+
|
|
71
|
+
time.sleep(0.5) # Check every 500ms
|
|
72
|
+
|
|
73
|
+
except (subprocess.TimeoutExpired, ValueError, ProcessLookupError):
|
|
74
|
+
# Process might have terminated or ps command failed
|
|
75
|
+
time.sleep(0.5)
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# print(f"[MEMORY] Monitoring stopped for PID {process.pid}", file=sys.stderr)
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def monitor_memory_usage(process: subprocess.Popen, limit_mb: int) -> Optional[str]:
|
|
82
|
+
"""Monitor process memory usage and kill if it exceeds limits."""
|
|
83
|
+
try:
|
|
84
|
+
import psutil
|
|
85
|
+
ps_process = psutil.Process(process.pid)
|
|
86
|
+
|
|
87
|
+
while process.poll() is None:
|
|
88
|
+
try:
|
|
89
|
+
# Get memory usage in MB
|
|
90
|
+
memory_info = ps_process.memory_info()
|
|
91
|
+
memory_mb = memory_info.rss / (1024 * 1024)
|
|
92
|
+
|
|
93
|
+
if memory_mb > limit_mb:
|
|
94
|
+
print(f"[MEMORY] Process exceeded {limit_mb}MB limit (using {memory_mb:.1f}MB), terminating", file=sys.stderr)
|
|
95
|
+
# Kill the entire process group
|
|
96
|
+
try:
|
|
97
|
+
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
|
98
|
+
time.sleep(2) # Give it time to cleanup
|
|
99
|
+
if process.poll() is None:
|
|
100
|
+
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
|
|
101
|
+
except ProcessLookupError:
|
|
102
|
+
pass
|
|
103
|
+
return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
|
|
104
|
+
|
|
105
|
+
time.sleep(0.5) # Check every 500ms
|
|
106
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
107
|
+
# Process already terminated
|
|
108
|
+
break
|
|
109
|
+
except ImportError:
|
|
110
|
+
# psutil not available, use native monitoring
|
|
111
|
+
return monitor_memory_usage_native(process, limit_mb)
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def main():
|
|
116
|
+
if len(sys.argv) < 3:
|
|
117
|
+
print("Usage: memory_limit_wrapper.py <memory_limit_mb> <command> [args...]", file=sys.stderr)
|
|
118
|
+
sys.exit(1)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
memory_limit_mb = int(sys.argv[1])
|
|
122
|
+
except ValueError:
|
|
123
|
+
print(f"Error: Invalid memory limit '{sys.argv[1]}' - must be integer MB", file=sys.stderr)
|
|
124
|
+
sys.exit(1)
|
|
125
|
+
|
|
126
|
+
command = sys.argv[2:]
|
|
127
|
+
|
|
128
|
+
if memory_limit_mb <= 0:
|
|
129
|
+
print("[MEMORY] No memory limit set (0 or negative value)", file=sys.stderr)
|
|
130
|
+
# Just exec the command directly without limits
|
|
131
|
+
os.execvp(command[0], command)
|
|
132
|
+
|
|
133
|
+
# Set memory limits for this process (inherited by subprocess)
|
|
134
|
+
set_memory_limit(memory_limit_mb)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
# Start process in new process group for easier cleanup
|
|
138
|
+
process = subprocess.Popen(
|
|
139
|
+
command,
|
|
140
|
+
preexec_fn=os.setsid, # Create new process group
|
|
141
|
+
stdout=subprocess.PIPE,
|
|
142
|
+
stderr=subprocess.STDOUT,
|
|
143
|
+
universal_newlines=True,
|
|
144
|
+
bufsize=1 # Line buffered
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Monitor memory usage in background
|
|
148
|
+
memory_error = None
|
|
149
|
+
import threading
|
|
150
|
+
|
|
151
|
+
def memory_monitor():
|
|
152
|
+
nonlocal memory_error
|
|
153
|
+
memory_error = monitor_memory_usage(process, memory_limit_mb)
|
|
154
|
+
|
|
155
|
+
monitor_thread = threading.Thread(target=memory_monitor, daemon=True)
|
|
156
|
+
monitor_thread.start()
|
|
157
|
+
|
|
158
|
+
# Stream output in real-time
|
|
159
|
+
while True:
|
|
160
|
+
output = process.stdout.readline()
|
|
161
|
+
if output == '' and process.poll() is not None:
|
|
162
|
+
break
|
|
163
|
+
if output:
|
|
164
|
+
print(output.rstrip())
|
|
165
|
+
sys.stdout.flush()
|
|
166
|
+
|
|
167
|
+
# Wait for completion
|
|
168
|
+
return_code = process.wait()
|
|
169
|
+
|
|
170
|
+
# Check if we killed it due to memory
|
|
171
|
+
if memory_error:
|
|
172
|
+
print(f"[MEMORY] {memory_error}", file=sys.stderr)
|
|
173
|
+
sys.exit(137) # 128 + SIGKILL
|
|
174
|
+
|
|
175
|
+
sys.exit(return_code)
|
|
176
|
+
|
|
177
|
+
except FileNotFoundError:
|
|
178
|
+
print(f"Error: Command not found: {command[0]}", file=sys.stderr)
|
|
179
|
+
sys.exit(127)
|
|
180
|
+
except KeyboardInterrupt:
|
|
181
|
+
print("[MEMORY] Interrupted by user", file=sys.stderr)
|
|
182
|
+
try:
|
|
183
|
+
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
|
184
|
+
except:
|
|
185
|
+
pass
|
|
186
|
+
sys.exit(130)
|
|
187
|
+
except Exception as e:
|
|
188
|
+
print(f"[MEMORY] Unexpected error: {e}", file=sys.stderr)
|
|
189
|
+
sys.exit(1)
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
main()
|
package/package.json
CHANGED
package/templates/config.yaml
CHANGED
|
@@ -46,6 +46,11 @@ auto_ideate: true
|
|
|
46
46
|
# Maximum number of retries for failed candidates before marking as permanently failed
|
|
47
47
|
max_retries: 3
|
|
48
48
|
|
|
49
|
+
# Memory protection configuration
|
|
50
|
+
# Memory limit in MB for evaluation processes (0 = no limit)
|
|
51
|
+
# This prevents runaway algorithms from consuming all system memory
|
|
52
|
+
memory_limit_mb: 2048
|
|
53
|
+
|
|
49
54
|
# Parallel execution configuration
|
|
50
55
|
parallel:
|
|
51
56
|
# Enable parallel execution of evolution candidates
|
|
@@ -59,16 +64,11 @@ parallel:
|
|
|
59
64
|
|
|
60
65
|
# LLM/AI CLI configuration
|
|
61
66
|
llm_cli:
|
|
62
|
-
# How to run each CLI for each LLM option
|
|
63
|
-
# {{PROMPT}} will be replaced with the actual prompt text
|
|
64
|
-
o3: 'codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
65
|
-
codex: 'codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
66
|
-
gemini: 'gemini -y -p "{{PROMPT}}"'
|
|
67
|
-
opus: 'claude --dangerously-skip-permissions --model opus -p "{{PROMPT}}"'
|
|
68
|
-
sonnet: 'claude --dangerously-skip-permissions --model sonnet -p "{{PROMPT}}"'
|
|
69
|
-
|
|
70
67
|
# What to run for each sub-command
|
|
71
68
|
# Models are tried in order, with round-robin distribution across candidates
|
|
72
69
|
# You can repeat models for weighted selection (e.g., "sonnet sonnet gemini" for 2:1 ratio)
|
|
73
|
-
|
|
74
|
-
|
|
70
|
+
|
|
71
|
+
# commented out because these change over time; if you want to fix them in a particular
|
|
72
|
+
# configuration, uncomment them and set them
|
|
73
|
+
#run: sonnet
|
|
74
|
+
#ideate: gemini gpt-5 opus
|