claude-evolve 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/bin/claude-evolve-analyze +33 -16
- package/bin/claude-evolve-run +5 -5
- package/bin/claude-evolve-worker +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -196,6 +196,27 @@ your-project/
|
|
|
196
196
|
└── (your main project files)
|
|
197
197
|
```
|
|
198
198
|
|
|
199
|
+
## Environment Variables for Evaluators
|
|
200
|
+
|
|
201
|
+
When your evaluator.py runs, it has access to the `EXPERIMENT_ID` environment variable containing the current experiment's ID (e.g., `gen07-001`). This allows evaluators to:
|
|
202
|
+
|
|
203
|
+
- Save experiment-specific output files
|
|
204
|
+
- Log metrics with experiment identifiers
|
|
205
|
+
- Implement experiment-aware logic
|
|
206
|
+
- Track which algorithm variant is being evaluated
|
|
207
|
+
|
|
208
|
+
Example usage in evaluator.py:
|
|
209
|
+
```python
|
|
210
|
+
import os
|
|
211
|
+
|
|
212
|
+
# Get the current experiment ID
|
|
213
|
+
experiment_id = os.environ.get('EXPERIMENT_ID', 'unknown')
|
|
214
|
+
|
|
215
|
+
# Use it for logging or file naming
|
|
216
|
+
output_file = f"results_{experiment_id}.json"
|
|
217
|
+
print(f"Evaluating experiment: {experiment_id}")
|
|
218
|
+
```
|
|
219
|
+
|
|
199
220
|
## Configuration
|
|
200
221
|
|
|
201
222
|
Edit `evolution/config.yaml` to customize:
|
|
@@ -222,21 +222,16 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
|
|
|
222
222
|
echo "# Row ID Performance Generation" >"$data_file"
|
|
223
223
|
echo "# Generation AvgPerformance Color" >"$gen_avg_file"
|
|
224
224
|
|
|
225
|
-
# Get color by generation number
|
|
225
|
+
# Get color by generation number (rotates through 5 colors)
|
|
226
226
|
get_gen_color() {
|
|
227
227
|
local gen_num="$1"
|
|
228
|
-
|
|
228
|
+
local color_index=$(( (gen_num - 1) % 5 + 1 ))
|
|
229
|
+
case $color_index in
|
|
229
230
|
1) echo "#1f77b4" ;; # blue
|
|
230
231
|
2) echo "#ff7f0e" ;; # orange
|
|
231
232
|
3) echo "#2ca02c" ;; # green
|
|
232
233
|
4) echo "#d62728" ;; # red
|
|
233
234
|
5) echo "#9467bd" ;; # purple
|
|
234
|
-
6) echo "#8c564b" ;; # brown
|
|
235
|
-
7) echo "#e377c2" ;; # pink
|
|
236
|
-
8) echo "#7f7f7f" ;; # gray
|
|
237
|
-
9) echo "#bcbd22" ;; # olive
|
|
238
|
-
10) echo "#17becf" ;; # cyan
|
|
239
|
-
*) echo "#cccccc" ;; # default gray
|
|
240
235
|
esac
|
|
241
236
|
}
|
|
242
237
|
|
|
@@ -284,8 +279,9 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
|
|
|
284
279
|
fi
|
|
285
280
|
done <"$csv_file"
|
|
286
281
|
|
|
287
|
-
# Create generation averages file
|
|
282
|
+
# Create generation averages file and track max generation
|
|
288
283
|
gen_index=1
|
|
284
|
+
max_gen_num=0
|
|
289
285
|
for gen in $(cut -d' ' -f1 "$gen_data_temp" | sort -u); do
|
|
290
286
|
if grep -q "^$gen " "$gen_data_temp"; then
|
|
291
287
|
# Calculate average for this generation
|
|
@@ -294,6 +290,10 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
|
|
|
294
290
|
if [[ $count -gt 0 ]]; then
|
|
295
291
|
avg=$(echo "scale=4; $sum / $count" | bc -l 2>/dev/null || echo "0")
|
|
296
292
|
gen_num=$(echo "$gen" | sed 's/gen0*//')
|
|
293
|
+
# Track max generation number
|
|
294
|
+
if [[ $gen_num -gt $max_gen_num ]]; then
|
|
295
|
+
max_gen_num=$gen_num
|
|
296
|
+
fi
|
|
297
297
|
color=$(get_gen_color "$gen_num")
|
|
298
298
|
echo "$gen_index \"$gen\" $avg \"$color\"" >>"$gen_avg_file"
|
|
299
299
|
((gen_index++))
|
|
@@ -308,6 +308,28 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
|
|
|
308
308
|
|
|
309
309
|
# Generate dual plot
|
|
310
310
|
if [[ -s "$data_file" ]]; then
|
|
311
|
+
# Build dynamic plot command for generations
|
|
312
|
+
plot_cmd=""
|
|
313
|
+
for ((i=1; i<=max_gen_num; i++)); do
|
|
314
|
+
color=$(get_gen_color "$i")
|
|
315
|
+
if [[ -n $plot_cmd ]]; then
|
|
316
|
+
plot_cmd="$plot_cmd, \\"$'\n'
|
|
317
|
+
fi
|
|
318
|
+
plot_cmd="${plot_cmd} \"$data_file\" using (\$4==$i?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb \"$color\" pointsize 0.8 title \"Gen $i\""
|
|
319
|
+
done
|
|
320
|
+
# Add winner point
|
|
321
|
+
plot_cmd="$plot_cmd, \\"$'\n'
|
|
322
|
+
plot_cmd="${plot_cmd} \"$winner_file\" using 1:3 with points pointtype 7 pointsize 2 linecolor rgb \"#0066cc\" title \"Winner\""
|
|
323
|
+
|
|
324
|
+
# Build x-axis labels for generation chart
|
|
325
|
+
xtics_labels=""
|
|
326
|
+
for ((i=1; i<=max_gen_num; i++)); do
|
|
327
|
+
if [[ -n $xtics_labels ]]; then
|
|
328
|
+
xtics_labels="$xtics_labels, "
|
|
329
|
+
fi
|
|
330
|
+
xtics_labels="${xtics_labels}\"Gen$i\" $i"
|
|
331
|
+
done
|
|
332
|
+
|
|
311
333
|
gnuplot <<EOF
|
|
312
334
|
set terminal png size 1200,800
|
|
313
335
|
set output "$output_file"
|
|
@@ -324,12 +346,7 @@ set key outside right
|
|
|
324
346
|
set xtics auto
|
|
325
347
|
|
|
326
348
|
# Define colors for generations
|
|
327
|
-
plot
|
|
328
|
-
"$data_file" using (\$4==2?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#ff7f0e" pointsize 0.8 title "Gen 2", \\
|
|
329
|
-
"$data_file" using (\$4==3?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#2ca02c" pointsize 0.8 title "Gen 3", \\
|
|
330
|
-
"$data_file" using (\$4==4?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#d62728" pointsize 0.8 title "Gen 4", \\
|
|
331
|
-
"$data_file" using (\$4==5?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#9467bd" pointsize 0.8 title "Gen 5", \\
|
|
332
|
-
"$winner_file" using 1:3 with points pointtype 7 pointsize 2 linecolor rgb "#0066cc" title "Winner"
|
|
349
|
+
plot $plot_cmd
|
|
333
350
|
|
|
334
351
|
#=================== BOTTOM PLOT: Generation Averages ===================
|
|
335
352
|
set title "Average Performance by Generation" font ",14"
|
|
@@ -341,7 +358,7 @@ unset key
|
|
|
341
358
|
set grid y
|
|
342
359
|
|
|
343
360
|
# Set custom x-axis labels
|
|
344
|
-
set xtics (
|
|
361
|
+
set xtics ($xtics_labels)
|
|
345
362
|
|
|
346
363
|
plot "$gen_avg_file" using 1:3 with boxes linecolor rgb "#4CAF50" notitle
|
|
347
364
|
|
package/bin/claude-evolve-run
CHANGED
|
@@ -434,13 +434,13 @@ echo "[INFO] Algorithm ready at: $output_file"
|
|
|
434
434
|
|
|
435
435
|
# Run evaluator
|
|
436
436
|
echo "[INFO] Running evaluation..."
|
|
437
|
-
echo "[INFO] Executing: $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
437
|
+
echo "[INFO] Executing: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
438
438
|
eval_output=""
|
|
439
439
|
eval_exit_code=0
|
|
440
440
|
|
|
441
441
|
if [[ -n $timeout_seconds ]]; then
|
|
442
442
|
echo "[INFO] Evaluation timeout: ${timeout_seconds}s"
|
|
443
|
-
if eval_output=$(timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
443
|
+
if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
444
444
|
eval_exit_code=0
|
|
445
445
|
else
|
|
446
446
|
eval_exit_code=$?
|
|
@@ -455,7 +455,7 @@ if [[ -n $timeout_seconds ]]; then
|
|
|
455
455
|
fi
|
|
456
456
|
fi
|
|
457
457
|
else
|
|
458
|
-
if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
458
|
+
if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
459
459
|
eval_exit_code=0
|
|
460
460
|
else
|
|
461
461
|
eval_exit_code=$?
|
|
@@ -473,7 +473,7 @@ echo "----------------------------------------"
|
|
|
473
473
|
echo "=== EVALUATOR EXECUTION ==="
|
|
474
474
|
echo "ID: $id"
|
|
475
475
|
echo "Algorithm: $output_file"
|
|
476
|
-
echo "Command: $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
476
|
+
echo "Command: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
477
477
|
echo "Exit code: $eval_exit_code"
|
|
478
478
|
echo "Timestamp: $(date)"
|
|
479
479
|
echo
|
|
@@ -554,7 +554,7 @@ else
|
|
|
554
554
|
recovery_attempted=true
|
|
555
555
|
# Retry the evaluation
|
|
556
556
|
echo "[INFO] Retrying evaluation after recovery attempt..."
|
|
557
|
-
if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
557
|
+
if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
558
558
|
# Re-process the successful result
|
|
559
559
|
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
560
560
|
if [[ -n $score ]]; then
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -188,7 +188,7 @@ eval_exit_code=0
|
|
|
188
188
|
if [[ -n $timeout_seconds ]]; then
|
|
189
189
|
echo "[WORKER-$$] Evaluation timeout: ${timeout_seconds}s"
|
|
190
190
|
# For Modal compatibility, don't capture stderr
|
|
191
|
-
if eval_output=$(timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
|
|
191
|
+
if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
|
|
192
192
|
eval_exit_code=0
|
|
193
193
|
else
|
|
194
194
|
eval_exit_code=$?
|
|
@@ -200,7 +200,7 @@ if [[ -n $timeout_seconds ]]; then
|
|
|
200
200
|
fi
|
|
201
201
|
else
|
|
202
202
|
# For Modal compatibility, don't capture stderr
|
|
203
|
-
if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
|
|
203
|
+
if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
|
|
204
204
|
eval_exit_code=0
|
|
205
205
|
else
|
|
206
206
|
eval_exit_code=$?
|