claude-evolve 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -196,6 +196,27 @@ your-project/
196
196
  └── (your main project files)
197
197
  ```
198
198
 
199
+ ## Environment Variables for Evaluators
200
+
201
+ When your evaluator.py runs, it has access to the `EXPERIMENT_ID` environment variable containing the current experiment's ID (e.g., `gen07-001`). This allows evaluators to:
202
+
203
+ - Save experiment-specific output files
204
+ - Log metrics with experiment identifiers
205
+ - Implement experiment-aware logic
206
+ - Track which algorithm variant is being evaluated
207
+
208
+ Example usage in evaluator.py:
209
+ ```python
210
+ import os
211
+
212
+ # Get the current experiment ID
213
+ experiment_id = os.environ.get('EXPERIMENT_ID', 'unknown')
214
+
215
+ # Use it for logging or file naming
216
+ output_file = f"results_{experiment_id}.json"
217
+ print(f"Evaluating experiment: {experiment_id}")
218
+ ```
219
+
199
220
  ## Configuration
200
221
 
201
222
  Edit `evolution/config.yaml` to customize:
@@ -222,21 +222,16 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
222
222
  echo "# Row ID Performance Generation" >"$data_file"
223
223
  echo "# Generation AvgPerformance Color" >"$gen_avg_file"
224
224
 
225
- # Get color by generation number
225
+ # Get color by generation number (rotates through 5 colors)
226
226
  get_gen_color() {
227
227
  local gen_num="$1"
228
- case $gen_num in
228
+ local color_index=$(( (gen_num - 1) % 5 + 1 ))
229
+ case $color_index in
229
230
  1) echo "#1f77b4" ;; # blue
230
231
  2) echo "#ff7f0e" ;; # orange
231
232
  3) echo "#2ca02c" ;; # green
232
233
  4) echo "#d62728" ;; # red
233
234
  5) echo "#9467bd" ;; # purple
234
- 6) echo "#8c564b" ;; # brown
235
- 7) echo "#e377c2" ;; # pink
236
- 8) echo "#7f7f7f" ;; # gray
237
- 9) echo "#bcbd22" ;; # olive
238
- 10) echo "#17becf" ;; # cyan
239
- *) echo "#cccccc" ;; # default gray
240
235
  esac
241
236
  }
242
237
 
@@ -284,8 +279,9 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
284
279
  fi
285
280
  done <"$csv_file"
286
281
 
287
- # Create generation averages file
282
+ # Create generation averages file and track max generation
288
283
  gen_index=1
284
+ max_gen_num=0
289
285
  for gen in $(cut -d' ' -f1 "$gen_data_temp" | sort -u); do
290
286
  if grep -q "^$gen " "$gen_data_temp"; then
291
287
  # Calculate average for this generation
@@ -294,6 +290,10 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
294
290
  if [[ $count -gt 0 ]]; then
295
291
  avg=$(echo "scale=4; $sum / $count" | bc -l 2>/dev/null || echo "0")
296
292
  gen_num=$(echo "$gen" | sed 's/gen0*//')
293
+ # Track max generation number
294
+ if [[ $gen_num -gt $max_gen_num ]]; then
295
+ max_gen_num=$gen_num
296
+ fi
297
297
  color=$(get_gen_color "$gen_num")
298
298
  echo "$gen_index \"$gen\" $avg \"$color\"" >>"$gen_avg_file"
299
299
  ((gen_index++))
@@ -308,6 +308,28 @@ if command -v gnuplot >/dev/null 2>&1 && [[ $valid_performance_count -gt 0 ]]; t
308
308
 
309
309
  # Generate dual plot
310
310
  if [[ -s "$data_file" ]]; then
311
+ # Build dynamic plot command for generations
312
+ plot_cmd=""
313
+ for ((i=1; i<=max_gen_num; i++)); do
314
+ color=$(get_gen_color "$i")
315
+ if [[ -n $plot_cmd ]]; then
316
+ plot_cmd="$plot_cmd, \\"$'\n'
317
+ fi
318
+ plot_cmd="${plot_cmd} \"$data_file\" using (\$4==$i?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb \"$color\" pointsize 0.8 title \"Gen $i\""
319
+ done
320
+ # Add winner point
321
+ plot_cmd="$plot_cmd, \\"$'\n'
322
+ plot_cmd="${plot_cmd} \"$winner_file\" using 1:3 with points pointtype 7 pointsize 2 linecolor rgb \"#0066cc\" title \"Winner\""
323
+
324
+ # Build x-axis labels for generation chart
325
+ xtics_labels=""
326
+ for ((i=1; i<=max_gen_num; i++)); do
327
+ if [[ -n $xtics_labels ]]; then
328
+ xtics_labels="$xtics_labels, "
329
+ fi
330
+ xtics_labels="${xtics_labels}\"Gen$i\" $i"
331
+ done
332
+
311
333
  gnuplot <<EOF
312
334
  set terminal png size 1200,800
313
335
  set output "$output_file"
@@ -324,12 +346,7 @@ set key outside right
324
346
  set xtics auto
325
347
 
326
348
  # Define colors for generations
327
- plot "$data_file" using (\$4==1?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#1f77b4" pointsize 0.8 title "Gen 1", \\
328
- "$data_file" using (\$4==2?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#ff7f0e" pointsize 0.8 title "Gen 2", \\
329
- "$data_file" using (\$4==3?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#2ca02c" pointsize 0.8 title "Gen 3", \\
330
- "$data_file" using (\$4==4?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#d62728" pointsize 0.8 title "Gen 4", \\
331
- "$data_file" using (\$4==5?\$1:1/0):3 with linespoints linewidth 2 linecolor rgb "#9467bd" pointsize 0.8 title "Gen 5", \\
332
- "$winner_file" using 1:3 with points pointtype 7 pointsize 2 linecolor rgb "#0066cc" title "Winner"
349
+ plot $plot_cmd
333
350
 
334
351
  #=================== BOTTOM PLOT: Generation Averages ===================
335
352
  set title "Average Performance by Generation" font ",14"
@@ -341,7 +358,7 @@ unset key
341
358
  set grid y
342
359
 
343
360
  # Set custom x-axis labels
344
- set xtics ("Gen1" 1, "Gen2" 2, "Gen3" 3, "Gen4" 4, "Gen5" 5)
361
+ set xtics ($xtics_labels)
345
362
 
346
363
  plot "$gen_avg_file" using 1:3 with boxes linecolor rgb "#4CAF50" notitle
347
364
 
@@ -434,13 +434,13 @@ echo "[INFO] Algorithm ready at: $output_file"
434
434
 
435
435
  # Run evaluator
436
436
  echo "[INFO] Running evaluation..."
437
- echo "[INFO] Executing: $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
437
+ echo "[INFO] Executing: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
438
438
  eval_output=""
439
439
  eval_exit_code=0
440
440
 
441
441
  if [[ -n $timeout_seconds ]]; then
442
442
  echo "[INFO] Evaluation timeout: ${timeout_seconds}s"
443
- if eval_output=$(timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
443
+ if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
444
444
  eval_exit_code=0
445
445
  else
446
446
  eval_exit_code=$?
@@ -455,7 +455,7 @@ if [[ -n $timeout_seconds ]]; then
455
455
  fi
456
456
  fi
457
457
  else
458
- if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
458
+ if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
459
459
  eval_exit_code=0
460
460
  else
461
461
  eval_exit_code=$?
@@ -473,7 +473,7 @@ echo "----------------------------------------"
473
473
  echo "=== EVALUATOR EXECUTION ==="
474
474
  echo "ID: $id"
475
475
  echo "Algorithm: $output_file"
476
- echo "Command: $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
476
+ echo "Command: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
477
477
  echo "Exit code: $eval_exit_code"
478
478
  echo "Timestamp: $(date)"
479
479
  echo
@@ -554,7 +554,7 @@ else
554
554
  recovery_attempted=true
555
555
  # Retry the evaluation
556
556
  echo "[INFO] Retrying evaluation after recovery attempt..."
557
- if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
557
+ if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
558
558
  # Re-process the successful result
559
559
  if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
560
560
  if [[ -n $score ]]; then
@@ -188,7 +188,7 @@ eval_exit_code=0
188
188
  if [[ -n $timeout_seconds ]]; then
189
189
  echo "[WORKER-$$] Evaluation timeout: ${timeout_seconds}s"
190
190
  # For Modal compatibility, don't capture stderr
191
- if eval_output=$(timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
191
+ if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
192
192
  eval_exit_code=0
193
193
  else
194
194
  eval_exit_code=$?
@@ -200,7 +200,7 @@ if [[ -n $timeout_seconds ]]; then
200
200
  fi
201
201
  else
202
202
  # For Modal compatibility, don't capture stderr
203
- if eval_output=$("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
203
+ if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
204
204
  eval_exit_code=0
205
205
  else
206
206
  eval_exit_code=$?
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.3.3",
3
+ "version": "1.3.5",
4
4
  "bin": {
5
5
  "claude-evolve": "./bin/claude-evolve",
6
6
  "claude-evolve-main": "./bin/claude-evolve-main",