claude-evolve 1.3.25 → 1.3.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -1
- package/bin/claude-evolve-ideate +60 -79
- package/bin/claude-evolve-worker +19 -2
- package/package.json +1 -1
- package/templates/evaluator.py +14 -12
package/README.md
CHANGED
|
@@ -203,6 +203,32 @@ your-project/
|
|
|
203
203
|
└── (your main project files)
|
|
204
204
|
```
|
|
205
205
|
|
|
206
|
+
## Evaluator Output Format
|
|
207
|
+
|
|
208
|
+
Your evaluator must output a performance score to stdout. Three formats are supported:
|
|
209
|
+
|
|
210
|
+
### 1. Plain Number (Simplest)
|
|
211
|
+
Just output a single floating-point number:
|
|
212
|
+
```
|
|
213
|
+
1.077506371224117
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### 2. JSON with "score" field
|
|
217
|
+
```json
|
|
218
|
+
{"score": 0.95}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### 3. JSON with "performance" field
|
|
222
|
+
```json
|
|
223
|
+
{"performance": 1.234}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**Important notes:**
|
|
227
|
+
- Higher scores indicate better performance
|
|
228
|
+
- A score of 0 indicates complete failure
|
|
229
|
+
- Non-zero exit codes indicate evaluation errors
|
|
230
|
+
- Any additional output (warnings, logs) should go to stderr, not stdout
|
|
231
|
+
|
|
206
232
|
## Environment Variables for Evaluators
|
|
207
233
|
|
|
208
234
|
When your evaluator.py runs, it has access to the `EXPERIMENT_ID` environment variable containing the current experiment's ID (e.g., `gen07-001`). This allows evaluators to:
|
|
@@ -221,7 +247,10 @@ experiment_id = os.environ.get('EXPERIMENT_ID', 'unknown')
|
|
|
221
247
|
|
|
222
248
|
# Use it for logging or file naming
|
|
223
249
|
output_file = f"results_{experiment_id}.json"
|
|
224
|
-
print(f"Evaluating experiment: {experiment_id}")
|
|
250
|
+
print(f"Evaluating experiment: {experiment_id}", file=sys.stderr) # Use stderr for logs!
|
|
251
|
+
|
|
252
|
+
# Output just the score
|
|
253
|
+
print(score) # Simple number to stdout
|
|
225
254
|
```
|
|
226
255
|
|
|
227
256
|
## Configuration
|
package/bin/claude-evolve-ideate
CHANGED
|
@@ -78,7 +78,6 @@ call_claude_with_limit_check() {
|
|
|
78
78
|
|
|
79
79
|
# Parse arguments
|
|
80
80
|
use_strategies=true
|
|
81
|
-
no_ai=false
|
|
82
81
|
|
|
83
82
|
while [[ $# -gt 0 ]]; do
|
|
84
83
|
case $1 in
|
|
@@ -87,11 +86,10 @@ while [[ $# -gt 0 ]]; do
|
|
|
87
86
|
claude-evolve ideate - Generate new algorithm ideas using evolutionary strategies
|
|
88
87
|
|
|
89
88
|
USAGE:
|
|
90
|
-
claude-evolve ideate [--legacy N]
|
|
89
|
+
claude-evolve ideate [--legacy N]
|
|
91
90
|
|
|
92
91
|
OPTIONS:
|
|
93
92
|
--legacy N Use legacy mode with N ideas (ignores strategy config)
|
|
94
|
-
--no-ai Use manual entry mode instead of AI generation
|
|
95
93
|
--help Show this help message
|
|
96
94
|
|
|
97
95
|
DESCRIPTION:
|
|
@@ -116,10 +114,6 @@ EOF
|
|
|
116
114
|
exit 1
|
|
117
115
|
fi
|
|
118
116
|
;;
|
|
119
|
-
--no-ai)
|
|
120
|
-
no_ai=true
|
|
121
|
-
shift
|
|
122
|
-
;;
|
|
123
117
|
*)
|
|
124
118
|
echo "[ERROR] Unknown option: $1" >&2
|
|
125
119
|
exit 1
|
|
@@ -195,21 +189,6 @@ get_next_id() {
|
|
|
195
189
|
printf "gen%s-%03d" "$generation" $((max_id + 1))
|
|
196
190
|
}
|
|
197
191
|
|
|
198
|
-
# Add idea to CSV manually (fallback for manual mode)
|
|
199
|
-
add_idea_manual() {
|
|
200
|
-
local description="$1"
|
|
201
|
-
local based_on_id="$2"
|
|
202
|
-
local generation="$3"
|
|
203
|
-
local id
|
|
204
|
-
id=$(get_next_id "$generation")
|
|
205
|
-
|
|
206
|
-
# Escape quotes in description
|
|
207
|
-
local escaped_desc="${description//\"/\"\"}"
|
|
208
|
-
|
|
209
|
-
# Append to CSV
|
|
210
|
-
echo "${id},${based_on_id},\"${escaped_desc}\",," >>"$FULL_CSV_PATH"
|
|
211
|
-
echo "[INFO] Added idea: $description"
|
|
212
|
-
}
|
|
213
192
|
|
|
214
193
|
# Get top performers for parent selection
|
|
215
194
|
get_top_performers() {
|
|
@@ -246,47 +225,12 @@ with open('$FULL_CSV_PATH', 'r') as f:
|
|
|
246
225
|
"
|
|
247
226
|
}
|
|
248
227
|
|
|
249
|
-
# Manual entry mode
|
|
250
|
-
ideate_manual() {
|
|
251
|
-
local ideas_added=0
|
|
252
|
-
|
|
253
|
-
for ((i = 1; i <= TOTAL_IDEAS; i++)); do
|
|
254
|
-
if [[ $TOTAL_IDEAS -eq 1 ]]; then
|
|
255
|
-
read -r -p "Enter algorithm idea (or empty to skip): " description
|
|
256
|
-
else
|
|
257
|
-
read -r -p "Enter algorithm idea $i/$TOTAL_IDEAS (or empty to skip): " description
|
|
258
|
-
fi
|
|
259
|
-
|
|
260
|
-
if [[ -z $description ]]; then
|
|
261
|
-
echo "[INFO] Empty description, skipping idea"
|
|
262
|
-
continue
|
|
263
|
-
fi
|
|
264
|
-
|
|
265
|
-
add_idea_manual "$description" "" "$CURRENT_GENERATION"
|
|
266
|
-
((ideas_added++))
|
|
267
|
-
|
|
268
|
-
if [[ $i -lt $TOTAL_IDEAS ]]; then
|
|
269
|
-
read -r -p "Add another idea? (y/N) " continue_adding
|
|
270
|
-
if [[ $continue_adding != "y" && $continue_adding != "Y" ]]; then
|
|
271
|
-
break
|
|
272
|
-
fi
|
|
273
|
-
fi
|
|
274
|
-
done
|
|
275
|
-
|
|
276
|
-
echo "[INFO] Added $ideas_added idea(s) to $EVOLUTION_CSV"
|
|
277
|
-
}
|
|
278
228
|
|
|
279
229
|
# Generate ideas using AI with multi-strategy approach
|
|
280
230
|
ideate_ai_strategies() {
|
|
281
|
-
# Check for AI CLI (codex or claude)
|
|
282
|
-
if ! command -v codex >/dev/null 2>&1 && ! command -v claude >/dev/null 2>&1; then
|
|
283
|
-
echo "[WARN] No AI CLI found (codex or claude). Falling back to manual entry."
|
|
284
|
-
return 1
|
|
285
|
-
fi
|
|
286
|
-
|
|
287
231
|
if [[ ! -f "$FULL_BRIEF_PATH" ]]; then
|
|
288
|
-
echo "[
|
|
289
|
-
|
|
232
|
+
echo "[ERROR] $BRIEF_FILE not found. Run 'claude-evolve setup' first." >&2
|
|
233
|
+
exit 1
|
|
290
234
|
fi
|
|
291
235
|
|
|
292
236
|
# Get top performers
|
|
@@ -342,6 +286,16 @@ Requirements for new CSV rows:
|
|
|
342
286
|
- Each description should be one clear sentence describing a specific algorithmic change
|
|
343
287
|
- Descriptions should explore completely different approaches than existing ones
|
|
344
288
|
- All new rows should have empty performance and status fields
|
|
289
|
+
|
|
290
|
+
CRITICAL CSV FORMAT RULES:
|
|
291
|
+
- DO NOT modify the CSV header row
|
|
292
|
+
- DO NOT change the column order
|
|
293
|
+
- DO NOT add extra columns or fields
|
|
294
|
+
- DO NOT modify existing rows - only append new ones
|
|
295
|
+
- DO NOT add extra blank lines or formatting
|
|
296
|
+
- Maintain exact CSV format: id,basedOnId,description,performance,status
|
|
297
|
+
- Leave performance and status fields completely empty (just commas)
|
|
298
|
+
- Use proper CSV quoting only when descriptions contain commas
|
|
345
299
|
- CRITICAL: You must read existing evolution files to avoid suggesting changes that:
|
|
346
300
|
* Have already been tried and failed
|
|
347
301
|
* Are impossible given the codebase structure
|
|
@@ -395,6 +349,16 @@ Requirements for new CSV rows:
|
|
|
395
349
|
- Each description should be one clear sentence about parameter tuning
|
|
396
350
|
- Focus on adjusting hyperparameters, thresholds, sizes, learning rates
|
|
397
351
|
- All new rows should have empty performance and status fields
|
|
352
|
+
|
|
353
|
+
CRITICAL CSV FORMAT RULES:
|
|
354
|
+
- DO NOT modify the CSV header row
|
|
355
|
+
- DO NOT change the column order
|
|
356
|
+
- DO NOT add extra columns or fields
|
|
357
|
+
- DO NOT modify existing rows - only append new ones
|
|
358
|
+
- DO NOT add extra blank lines or formatting
|
|
359
|
+
- Maintain exact CSV format: id,basedOnId,description,performance,status
|
|
360
|
+
- Leave performance and status fields completely empty (just commas)
|
|
361
|
+
- Use proper CSV quoting only when descriptions contain commas
|
|
398
362
|
- CRITICAL: You must read the parent algorithm file to understand:
|
|
399
363
|
* What parameters are actually tunable in the code
|
|
400
364
|
* What changes made this algorithm successful vs its parent
|
|
@@ -448,6 +412,16 @@ Requirements for new CSV rows:
|
|
|
448
412
|
- Each description should be one clear sentence about architectural changes
|
|
449
413
|
- Keep core insights but change implementation approach
|
|
450
414
|
- All new rows should have empty performance and status fields
|
|
415
|
+
|
|
416
|
+
CRITICAL CSV FORMAT RULES:
|
|
417
|
+
- DO NOT modify the CSV header row
|
|
418
|
+
- DO NOT change the column order
|
|
419
|
+
- DO NOT add extra columns or fields
|
|
420
|
+
- DO NOT modify existing rows - only append new ones
|
|
421
|
+
- DO NOT add extra blank lines or formatting
|
|
422
|
+
- Maintain exact CSV format: id,basedOnId,description,performance,status
|
|
423
|
+
- Leave performance and status fields completely empty (just commas)
|
|
424
|
+
- Use proper CSV quoting only when descriptions contain commas
|
|
451
425
|
- CRITICAL: You must read the parent algorithm file to understand:
|
|
452
426
|
* What structural elements can be modified within the codebase constraints
|
|
453
427
|
* What architectural decisions led to this algorithm's success
|
|
@@ -501,6 +475,16 @@ Requirements for new CSV rows:
|
|
|
501
475
|
- Each description should be one clear sentence combining elements from different algorithms
|
|
502
476
|
- Be specific about what elements to merge
|
|
503
477
|
- All new rows should have empty performance and status fields
|
|
478
|
+
|
|
479
|
+
CRITICAL CSV FORMAT RULES:
|
|
480
|
+
- DO NOT modify the CSV header row
|
|
481
|
+
- DO NOT change the column order
|
|
482
|
+
- DO NOT add extra columns or fields
|
|
483
|
+
- DO NOT modify existing rows - only append new ones
|
|
484
|
+
- DO NOT add extra blank lines or formatting
|
|
485
|
+
- Maintain exact CSV format: id,basedOnId,description,performance,status
|
|
486
|
+
- Leave performance and status fields completely empty (just commas)
|
|
487
|
+
- Use proper CSV quoting only when descriptions contain commas
|
|
504
488
|
- CRITICAL: You must read the relevant algorithm files to:
|
|
505
489
|
* Identify the specific improvements that made each algorithm successful
|
|
506
490
|
* Understand which components are compatible for merging
|
|
@@ -523,15 +507,9 @@ Add exactly $count hybrid combination rows to the CSV file now."
|
|
|
523
507
|
|
|
524
508
|
# Legacy AI generation mode (for backward compatibility)
|
|
525
509
|
ideate_ai_legacy() {
|
|
526
|
-
# Check for AI CLI (codex or claude)
|
|
527
|
-
if ! command -v codex >/dev/null 2>&1 && ! command -v claude >/dev/null 2>&1; then
|
|
528
|
-
echo "[WARN] No AI CLI found (codex or claude). Falling back to manual entry."
|
|
529
|
-
return 1
|
|
530
|
-
fi
|
|
531
|
-
|
|
532
510
|
if [[ ! -f "$FULL_BRIEF_PATH" ]]; then
|
|
533
|
-
echo "[
|
|
534
|
-
|
|
511
|
+
echo "[ERROR] $BRIEF_FILE not found. Run 'claude-evolve setup' first." >&2
|
|
512
|
+
exit 1
|
|
535
513
|
fi
|
|
536
514
|
|
|
537
515
|
# Get top performers (pure shell)
|
|
@@ -576,6 +554,16 @@ Requirements for new CSV rows:
|
|
|
576
554
|
- Each description should be one clear sentence describing an algorithmic approach
|
|
577
555
|
- All new rows should have empty performance and status fields
|
|
578
556
|
|
|
557
|
+
CRITICAL CSV FORMAT RULES:
|
|
558
|
+
- DO NOT modify the CSV header row
|
|
559
|
+
- DO NOT change the column order
|
|
560
|
+
- DO NOT add extra columns or fields
|
|
561
|
+
- DO NOT modify existing rows - only append new ones
|
|
562
|
+
- DO NOT add extra blank lines or formatting
|
|
563
|
+
- Maintain exact CSV format: id,basedOnId,description,performance,status
|
|
564
|
+
- Leave performance and status fields completely empty (just commas)
|
|
565
|
+
- Use proper CSV quoting only when descriptions contain commas
|
|
566
|
+
|
|
579
567
|
Add exactly $TOTAL_IDEAS algorithm variation rows to the CSV file now."
|
|
580
568
|
|
|
581
569
|
echo "[INFO] Generating $TOTAL_IDEAS ideas (legacy mode)..."
|
|
@@ -591,19 +579,12 @@ CURRENT_GENERATION=$(get_next_generation)
|
|
|
591
579
|
echo "[INFO] Starting ideation for generation $CURRENT_GENERATION"
|
|
592
580
|
|
|
593
581
|
# Main execution
|
|
594
|
-
if [[ $
|
|
595
|
-
echo "[INFO] Manual entry mode"
|
|
596
|
-
ideate_manual
|
|
597
|
-
elif [[ $use_strategies == true ]]; then
|
|
582
|
+
if [[ $use_strategies == true ]]; then
|
|
598
583
|
echo "[INFO] Multi-strategy AI generation mode"
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
ideate_manual
|
|
602
|
-
fi
|
|
584
|
+
ideate_ai_strategies
|
|
585
|
+
echo "[INFO] Ideation complete! Check $EVOLUTION_CSV for new ideas."
|
|
603
586
|
else
|
|
604
587
|
echo "[INFO] Legacy AI generation mode"
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
ideate_manual
|
|
608
|
-
fi
|
|
588
|
+
ideate_ai_legacy
|
|
589
|
+
echo "[INFO] Ideation complete! Check $EVOLUTION_CSV for new ideas."
|
|
609
590
|
fi
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -239,7 +239,23 @@ fi
|
|
|
239
239
|
|
|
240
240
|
# Process results
|
|
241
241
|
if [[ $eval_exit_code -eq 0 ]]; then
|
|
242
|
-
#
|
|
242
|
+
# First, check if output is just a plain number
|
|
243
|
+
if [[ $eval_output =~ ^[[:space:]]*-?[0-9]+\.?[0-9]*[[:space:]]*$ ]]; then
|
|
244
|
+
score=$(echo "$eval_output" | tr -d ' ')
|
|
245
|
+
if (( $(echo "$score == 0" | bc -l) )); then
|
|
246
|
+
update_csv_row_with_lock "$candidate_id" "status" "failed"
|
|
247
|
+
update_csv_row_with_lock "$candidate_id" "performance" "$score"
|
|
248
|
+
echo "[WORKER-$$] ✗ Evaluation failed with score 0"
|
|
249
|
+
exit 1
|
|
250
|
+
else
|
|
251
|
+
update_csv_row_with_lock "$candidate_id" "performance" "$score"
|
|
252
|
+
update_csv_row_with_lock "$candidate_id" "status" "complete"
|
|
253
|
+
echo "[WORKER-$$] ✓ Evaluation complete, score: $score"
|
|
254
|
+
exit 0
|
|
255
|
+
fi
|
|
256
|
+
fi
|
|
257
|
+
|
|
258
|
+
# Try JSON "score" field
|
|
243
259
|
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
244
260
|
if [[ -n $score ]]; then
|
|
245
261
|
if (( $(echo "$score == 0" | bc -l) )); then
|
|
@@ -256,7 +272,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
256
272
|
fi
|
|
257
273
|
fi
|
|
258
274
|
|
|
259
|
-
# Try "performance" field
|
|
275
|
+
# Try JSON "performance" field
|
|
260
276
|
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
261
277
|
if [[ -n $score ]]; then
|
|
262
278
|
if (( $(echo "$score == 0" | bc -l) )); then
|
|
@@ -274,6 +290,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
274
290
|
fi
|
|
275
291
|
|
|
276
292
|
echo "[ERROR] No score found in evaluator output" >&2
|
|
293
|
+
echo "[ERROR] Expected: plain number (e.g., 1.23) or JSON with 'score' or 'performance' field" >&2
|
|
277
294
|
update_csv_row_with_lock "$candidate_id" "status" "failed"
|
|
278
295
|
exit 1
|
|
279
296
|
else
|
package/package.json
CHANGED
package/templates/evaluator.py
CHANGED
|
@@ -40,11 +40,10 @@ def evaluate_performance(algorithm_module):
|
|
|
40
40
|
end_time = time.time()
|
|
41
41
|
execution_time = end_time - start_time
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
43
|
+
# Calculate a performance score (higher is better)
|
|
44
|
+
score = 1.0 / execution_time if execution_time > 0 else 0
|
|
45
|
+
|
|
46
|
+
return score # Simple: just return the number
|
|
48
47
|
|
|
49
48
|
|
|
50
49
|
def main():
|
|
@@ -60,15 +59,18 @@ def main():
|
|
|
60
59
|
|
|
61
60
|
try:
|
|
62
61
|
algorithm_module = load_algorithm(algorithm_file)
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
score = evaluate_performance(algorithm_module)
|
|
63
|
+
|
|
64
|
+
# Option 1: Just print the number (simplest)
|
|
65
|
+
print(score)
|
|
66
|
+
|
|
67
|
+
# Option 2: Print as JSON (if you need more structure)
|
|
68
|
+
# print(json.dumps({"score": score}))
|
|
69
|
+
|
|
65
70
|
sys.exit(0)
|
|
66
71
|
except Exception as e:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"status": "failed"
|
|
70
|
-
}
|
|
71
|
-
print(json.dumps(error_result))
|
|
72
|
+
# Log errors to stderr, not stdout
|
|
73
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
72
74
|
sys.exit(1)
|
|
73
75
|
|
|
74
76
|
|