claude-evolve 1.3.25 → 1.3.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -203,6 +203,32 @@ your-project/
203
203
  └── (your main project files)
204
204
  ```
205
205
 
206
+ ## Evaluator Output Format
207
+
208
+ Your evaluator must output a performance score to stdout. Three formats are supported:
209
+
210
+ ### 1. Plain Number (Simplest)
211
+ Just output a single floating-point number:
212
+ ```
213
+ 1.077506371224117
214
+ ```
215
+
216
+ ### 2. JSON with "score" field
217
+ ```json
218
+ {"score": 0.95}
219
+ ```
220
+
221
+ ### 3. JSON with "performance" field
222
+ ```json
223
+ {"performance": 1.234}
224
+ ```
225
+
226
+ **Important notes:**
227
+ - Higher scores indicate better performance
228
+ - A score of 0 indicates complete failure
229
+ - Non-zero exit codes indicate evaluation errors
230
+ - Any additional output (warnings, logs) should go to stderr, not stdout
231
+
206
232
  ## Environment Variables for Evaluators
207
233
 
208
234
  When your evaluator.py runs, it has access to the `EXPERIMENT_ID` environment variable containing the current experiment's ID (e.g., `gen07-001`). This allows evaluators to:
@@ -221,7 +247,10 @@ experiment_id = os.environ.get('EXPERIMENT_ID', 'unknown')
221
247
 
222
248
  # Use it for logging or file naming
223
249
  output_file = f"results_{experiment_id}.json"
224
- print(f"Evaluating experiment: {experiment_id}")
250
+ print(f"Evaluating experiment: {experiment_id}", file=sys.stderr) # Use stderr for logs!
251
+
252
+ # Output just the score
253
+ print(score) # Simple number to stdout
225
254
  ```
226
255
 
227
256
  ## Configuration
@@ -78,7 +78,6 @@ call_claude_with_limit_check() {
78
78
 
79
79
  # Parse arguments
80
80
  use_strategies=true
81
- no_ai=false
82
81
 
83
82
  while [[ $# -gt 0 ]]; do
84
83
  case $1 in
@@ -87,11 +86,10 @@ while [[ $# -gt 0 ]]; do
87
86
  claude-evolve ideate - Generate new algorithm ideas using evolutionary strategies
88
87
 
89
88
  USAGE:
90
- claude-evolve ideate [--legacy N] [--no-ai]
89
+ claude-evolve ideate [--legacy N]
91
90
 
92
91
  OPTIONS:
93
92
  --legacy N Use legacy mode with N ideas (ignores strategy config)
94
- --no-ai Use manual entry mode instead of AI generation
95
93
  --help Show this help message
96
94
 
97
95
  DESCRIPTION:
@@ -116,10 +114,6 @@ EOF
116
114
  exit 1
117
115
  fi
118
116
  ;;
119
- --no-ai)
120
- no_ai=true
121
- shift
122
- ;;
123
117
  *)
124
118
  echo "[ERROR] Unknown option: $1" >&2
125
119
  exit 1
@@ -195,21 +189,6 @@ get_next_id() {
195
189
  printf "gen%s-%03d" "$generation" $((max_id + 1))
196
190
  }
197
191
 
198
- # Add idea to CSV manually (fallback for manual mode)
199
- add_idea_manual() {
200
- local description="$1"
201
- local based_on_id="$2"
202
- local generation="$3"
203
- local id
204
- id=$(get_next_id "$generation")
205
-
206
- # Escape quotes in description
207
- local escaped_desc="${description//\"/\"\"}"
208
-
209
- # Append to CSV
210
- echo "${id},${based_on_id},\"${escaped_desc}\",," >>"$FULL_CSV_PATH"
211
- echo "[INFO] Added idea: $description"
212
- }
213
192
 
214
193
  # Get top performers for parent selection
215
194
  get_top_performers() {
@@ -246,47 +225,12 @@ with open('$FULL_CSV_PATH', 'r') as f:
246
225
  "
247
226
  }
248
227
 
249
- # Manual entry mode
250
- ideate_manual() {
251
- local ideas_added=0
252
-
253
- for ((i = 1; i <= TOTAL_IDEAS; i++)); do
254
- if [[ $TOTAL_IDEAS -eq 1 ]]; then
255
- read -r -p "Enter algorithm idea (or empty to skip): " description
256
- else
257
- read -r -p "Enter algorithm idea $i/$TOTAL_IDEAS (or empty to skip): " description
258
- fi
259
-
260
- if [[ -z $description ]]; then
261
- echo "[INFO] Empty description, skipping idea"
262
- continue
263
- fi
264
-
265
- add_idea_manual "$description" "" "$CURRENT_GENERATION"
266
- ((ideas_added++))
267
-
268
- if [[ $i -lt $TOTAL_IDEAS ]]; then
269
- read -r -p "Add another idea? (y/N) " continue_adding
270
- if [[ $continue_adding != "y" && $continue_adding != "Y" ]]; then
271
- break
272
- fi
273
- fi
274
- done
275
-
276
- echo "[INFO] Added $ideas_added idea(s) to $EVOLUTION_CSV"
277
- }
278
228
 
279
229
  # Generate ideas using AI with multi-strategy approach
280
230
  ideate_ai_strategies() {
281
- # Check for AI CLI (codex or claude)
282
- if ! command -v codex >/dev/null 2>&1 && ! command -v claude >/dev/null 2>&1; then
283
- echo "[WARN] No AI CLI found (codex or claude). Falling back to manual entry."
284
- return 1
285
- fi
286
-
287
231
  if [[ ! -f "$FULL_BRIEF_PATH" ]]; then
288
- echo "[WARN] $BRIEF_FILE not found. Falling back to manual entry."
289
- return 1
232
+ echo "[ERROR] $BRIEF_FILE not found. Run 'claude-evolve setup' first." >&2
233
+ exit 1
290
234
  fi
291
235
 
292
236
  # Get top performers
@@ -342,6 +286,16 @@ Requirements for new CSV rows:
342
286
  - Each description should be one clear sentence describing a specific algorithmic change
343
287
  - Descriptions should explore completely different approaches than existing ones
344
288
  - All new rows should have empty performance and status fields
289
+
290
+ CRITICAL CSV FORMAT RULES:
291
+ - DO NOT modify the CSV header row
292
+ - DO NOT change the column order
293
+ - DO NOT add extra columns or fields
294
+ - DO NOT modify existing rows - only append new ones
295
+ - DO NOT add extra blank lines or formatting
296
+ - Maintain exact CSV format: id,basedOnId,description,performance,status
297
+ - Leave performance and status fields completely empty (just commas)
298
+ - Use proper CSV quoting only when descriptions contain commas
345
299
  - CRITICAL: You must read existing evolution files to avoid suggesting changes that:
346
300
  * Have already been tried and failed
347
301
  * Are impossible given the codebase structure
@@ -395,6 +349,16 @@ Requirements for new CSV rows:
395
349
  - Each description should be one clear sentence about parameter tuning
396
350
  - Focus on adjusting hyperparameters, thresholds, sizes, learning rates
397
351
  - All new rows should have empty performance and status fields
352
+
353
+ CRITICAL CSV FORMAT RULES:
354
+ - DO NOT modify the CSV header row
355
+ - DO NOT change the column order
356
+ - DO NOT add extra columns or fields
357
+ - DO NOT modify existing rows - only append new ones
358
+ - DO NOT add extra blank lines or formatting
359
+ - Maintain exact CSV format: id,basedOnId,description,performance,status
360
+ - Leave performance and status fields completely empty (just commas)
361
+ - Use proper CSV quoting only when descriptions contain commas
398
362
  - CRITICAL: You must read the parent algorithm file to understand:
399
363
  * What parameters are actually tunable in the code
400
364
  * What changes made this algorithm successful vs its parent
@@ -448,6 +412,16 @@ Requirements for new CSV rows:
448
412
  - Each description should be one clear sentence about architectural changes
449
413
  - Keep core insights but change implementation approach
450
414
  - All new rows should have empty performance and status fields
415
+
416
+ CRITICAL CSV FORMAT RULES:
417
+ - DO NOT modify the CSV header row
418
+ - DO NOT change the column order
419
+ - DO NOT add extra columns or fields
420
+ - DO NOT modify existing rows - only append new ones
421
+ - DO NOT add extra blank lines or formatting
422
+ - Maintain exact CSV format: id,basedOnId,description,performance,status
423
+ - Leave performance and status fields completely empty (just commas)
424
+ - Use proper CSV quoting only when descriptions contain commas
451
425
  - CRITICAL: You must read the parent algorithm file to understand:
452
426
  * What structural elements can be modified within the codebase constraints
453
427
  * What architectural decisions led to this algorithm's success
@@ -501,6 +475,16 @@ Requirements for new CSV rows:
501
475
  - Each description should be one clear sentence combining elements from different algorithms
502
476
  - Be specific about what elements to merge
503
477
  - All new rows should have empty performance and status fields
478
+
479
+ CRITICAL CSV FORMAT RULES:
480
+ - DO NOT modify the CSV header row
481
+ - DO NOT change the column order
482
+ - DO NOT add extra columns or fields
483
+ - DO NOT modify existing rows - only append new ones
484
+ - DO NOT add extra blank lines or formatting
485
+ - Maintain exact CSV format: id,basedOnId,description,performance,status
486
+ - Leave performance and status fields completely empty (just commas)
487
+ - Use proper CSV quoting only when descriptions contain commas
504
488
  - CRITICAL: You must read the relevant algorithm files to:
505
489
  * Identify the specific improvements that made each algorithm successful
506
490
  * Understand which components are compatible for merging
@@ -523,15 +507,9 @@ Add exactly $count hybrid combination rows to the CSV file now."
523
507
 
524
508
  # Legacy AI generation mode (for backward compatibility)
525
509
  ideate_ai_legacy() {
526
- # Check for AI CLI (codex or claude)
527
- if ! command -v codex >/dev/null 2>&1 && ! command -v claude >/dev/null 2>&1; then
528
- echo "[WARN] No AI CLI found (codex or claude). Falling back to manual entry."
529
- return 1
530
- fi
531
-
532
510
  if [[ ! -f "$FULL_BRIEF_PATH" ]]; then
533
- echo "[WARN] $BRIEF_FILE not found. Falling back to manual entry."
534
- return 1
511
+ echo "[ERROR] $BRIEF_FILE not found. Run 'claude-evolve setup' first." >&2
512
+ exit 1
535
513
  fi
536
514
 
537
515
  # Get top performers (pure shell)
@@ -576,6 +554,16 @@ Requirements for new CSV rows:
576
554
  - Each description should be one clear sentence describing an algorithmic approach
577
555
  - All new rows should have empty performance and status fields
578
556
 
557
+ CRITICAL CSV FORMAT RULES:
558
+ - DO NOT modify the CSV header row
559
+ - DO NOT change the column order
560
+ - DO NOT add extra columns or fields
561
+ - DO NOT modify existing rows - only append new ones
562
+ - DO NOT add extra blank lines or formatting
563
+ - Maintain exact CSV format: id,basedOnId,description,performance,status
564
+ - Leave performance and status fields completely empty (just commas)
565
+ - Use proper CSV quoting only when descriptions contain commas
566
+
579
567
  Add exactly $TOTAL_IDEAS algorithm variation rows to the CSV file now."
580
568
 
581
569
  echo "[INFO] Generating $TOTAL_IDEAS ideas (legacy mode)..."
@@ -591,19 +579,12 @@ CURRENT_GENERATION=$(get_next_generation)
591
579
  echo "[INFO] Starting ideation for generation $CURRENT_GENERATION"
592
580
 
593
581
  # Main execution
594
- if [[ $no_ai == true ]]; then
595
- echo "[INFO] Manual entry mode"
596
- ideate_manual
597
- elif [[ $use_strategies == true ]]; then
582
+ if [[ $use_strategies == true ]]; then
598
583
  echo "[INFO] Multi-strategy AI generation mode"
599
- if ! ideate_ai_strategies; then
600
- echo "[INFO] Falling back to manual entry"
601
- ideate_manual
602
- fi
584
+ ideate_ai_strategies
585
+ echo "[INFO] Ideation complete! Check $EVOLUTION_CSV for new ideas."
603
586
  else
604
587
  echo "[INFO] Legacy AI generation mode"
605
- if ! ideate_ai_legacy; then
606
- echo "[INFO] Falling back to manual entry"
607
- ideate_manual
608
- fi
588
+ ideate_ai_legacy
589
+ echo "[INFO] Ideation complete! Check $EVOLUTION_CSV for new ideas."
609
590
  fi
@@ -239,7 +239,23 @@ fi
239
239
 
240
240
  # Process results
241
241
  if [[ $eval_exit_code -eq 0 ]]; then
242
- # Extract score
242
+ # First, check if output is just a plain number
243
+ if [[ $eval_output =~ ^[[:space:]]*-?[0-9]+\.?[0-9]*[[:space:]]*$ ]]; then
244
+ score=$(echo "$eval_output" | tr -d ' ')
245
+ if (( $(echo "$score == 0" | bc -l) )); then
246
+ update_csv_row_with_lock "$candidate_id" "status" "failed"
247
+ update_csv_row_with_lock "$candidate_id" "performance" "$score"
248
+ echo "[WORKER-$$] ✗ Evaluation failed with score 0"
249
+ exit 1
250
+ else
251
+ update_csv_row_with_lock "$candidate_id" "performance" "$score"
252
+ update_csv_row_with_lock "$candidate_id" "status" "complete"
253
+ echo "[WORKER-$$] ✓ Evaluation complete, score: $score"
254
+ exit 0
255
+ fi
256
+ fi
257
+
258
+ # Try JSON "score" field
243
259
  if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
244
260
  if [[ -n $score ]]; then
245
261
  if (( $(echo "$score == 0" | bc -l) )); then
@@ -256,7 +272,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
256
272
  fi
257
273
  fi
258
274
 
259
- # Try "performance" field
275
+ # Try JSON "performance" field
260
276
  if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
261
277
  if [[ -n $score ]]; then
262
278
  if (( $(echo "$score == 0" | bc -l) )); then
@@ -274,6 +290,7 @@ if [[ $eval_exit_code -eq 0 ]]; then
274
290
  fi
275
291
 
276
292
  echo "[ERROR] No score found in evaluator output" >&2
293
+ echo "[ERROR] Expected: plain number (e.g., 1.23) or JSON with 'score' or 'performance' field" >&2
277
294
  update_csv_row_with_lock "$candidate_id" "status" "failed"
278
295
  exit 1
279
296
  else
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.3.25",
3
+ "version": "1.3.27",
4
4
  "bin": {
5
5
  "claude-evolve": "./bin/claude-evolve",
6
6
  "claude-evolve-main": "./bin/claude-evolve-main",
@@ -40,11 +40,10 @@ def evaluate_performance(algorithm_module):
40
40
  end_time = time.time()
41
41
  execution_time = end_time - start_time
42
42
 
43
- return {
44
- "execution_time": execution_time,
45
- "score": 1.0 / execution_time if execution_time > 0 else 0,
46
- "status": "success"
47
- }
43
+ # Calculate a performance score (higher is better)
44
+ score = 1.0 / execution_time if execution_time > 0 else 0
45
+
46
+ return score # Simple: just return the number
48
47
 
49
48
 
50
49
  def main():
@@ -60,15 +59,18 @@ def main():
60
59
 
61
60
  try:
62
61
  algorithm_module = load_algorithm(algorithm_file)
63
- metrics = evaluate_performance(algorithm_module)
64
- print(json.dumps(metrics))
62
+ score = evaluate_performance(algorithm_module)
63
+
64
+ # Option 1: Just print the number (simplest)
65
+ print(score)
66
+
67
+ # Option 2: Print as JSON (if you need more structure)
68
+ # print(json.dumps({"score": score}))
69
+
65
70
  sys.exit(0)
66
71
  except Exception as e:
67
- error_result = {
68
- "error": str(e),
69
- "status": "failed"
70
- }
71
- print(json.dumps(error_result))
72
+ # Log errors to stderr, not stdout
73
+ print(f"Error: {e}", file=sys.stderr)
72
74
  sys.exit(1)
73
75
 
74
76