claude-evolve 1.3.38 → 1.3.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,662 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Removed 'set -e' to prevent silent exits on CSV helper failures
4
-
5
- # Load configuration
6
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7
- # shellcheck source=../lib/config.sh
8
- source "$SCRIPT_DIR/../lib/config.sh"
9
-
10
- # Use CLAUDE_EVOLVE_CONFIG if set, otherwise default
11
- if [[ -n ${CLAUDE_EVOLVE_CONFIG:-} ]]; then
12
- load_config "$CLAUDE_EVOLVE_CONFIG"
13
- else
14
- load_config
15
- fi
16
-
17
- # Parse arguments
18
- timeout_seconds=""
19
-
20
- while [[ $# -gt 0 ]]; do
21
- case $1 in
22
- --timeout)
23
- if [[ -z ${2:-} ]] || [[ ! $2 =~ ^[0-9]+$ ]] || [[ $2 -eq 0 ]]; then
24
- echo "[ERROR] --timeout requires a positive integer (seconds)" >&2
25
- exit 1
26
- fi
27
- timeout_seconds="$2"
28
- shift 2
29
- ;;
30
- --parallel)
31
- force_parallel="true"
32
- shift
33
- ;;
34
- --sequential)
35
- force_sequential="true"
36
- shift
37
- ;;
38
- --keep-awake|--caffeinate)
39
- use_caffeinate="true"
40
- shift
41
- ;;
42
- --help)
43
- cat <<EOF
44
- claude-evolve run - Execute evolution candidates
45
-
46
- USAGE:
47
- claude-evolve run [OPTIONS]
48
-
49
- OPTIONS:
50
- --timeout <sec> Kill evaluator after specified seconds (default: no timeout)
51
- --parallel Force parallel execution mode
52
- --sequential Force sequential execution mode
53
- --keep-awake Prevent system sleep during execution (macOS only)
54
- --help Show this help message
55
-
56
- DESCRIPTION:
57
- Processes the oldest pending candidate from evolution.csv by:
58
- 1. Generating algorithm mutation using Claude
59
- 2. Running evaluator.py on the generated algorithm
60
- 3. Updating CSV with performance score and completion status
61
-
62
- Use --timeout to prevent runaway evaluations from blocking progress.
63
- EOF
64
- exit 0
65
- ;;
66
- *)
67
- echo "[ERROR] Unknown option: $1" >&2
68
- exit 1
69
- ;;
70
- esac
71
- done
72
-
73
- # Check if caffeinate should be used
74
- if [[ "$use_caffeinate" == "true" ]] && command -v caffeinate >/dev/null 2>&1; then
75
- echo "[INFO] Using caffeinate to prevent system sleep"
76
- # Re-run this script with caffeinate
77
- exec caffeinate -dims "$0" "$@"
78
- fi
79
-
80
- # Determine execution mode
81
- use_parallel=false
82
- if [[ "$force_parallel" == "true" ]]; then
83
- use_parallel=true
84
- echo "[INFO] Using parallel mode (forced via --parallel)"
85
- elif [[ "$force_sequential" == "true" ]]; then
86
- use_parallel=false
87
- echo "[INFO] Using sequential mode (forced via --sequential)"
88
- elif [[ "$PARALLEL_ENABLED" == "true" || "$PARALLEL_ENABLED" == "1" ]]; then
89
- use_parallel=true
90
- echo "[INFO] Using parallel mode (enabled in config)"
91
- else
92
- echo "[INFO] Using sequential mode (default)"
93
- fi
94
-
95
- if [[ "$use_parallel" == "true" ]]; then
96
- echo "[INFO] Starting parallel evolution run with up to $MAX_WORKERS workers"
97
- exec "$SCRIPT_DIR/claude-evolve-run-parallel" ${timeout_seconds:+--timeout "$timeout_seconds"}
98
- else
99
- echo "[INFO] Starting continuous evolution run..."
100
- echo "[INFO] Will continue running until no more pending candidates or 5 consecutive failures"
101
- fi
102
-
103
- [[ -n $timeout_seconds ]] && echo "[INFO] Using timeout: ${timeout_seconds} seconds per evaluation"
104
-
105
- # Prepare logging directory
106
- mkdir -p logs
107
-
108
- # Track consecutive failures
109
- consecutive_failures=0
110
- MAX_FAILURES=5
111
-
112
- # Track if we should continue after a failure
113
- should_continue_after_failure() {
114
- ((consecutive_failures++))
115
-
116
- if [[ $consecutive_failures -ge $MAX_FAILURES ]]; then
117
- echo "[ERROR] Too many consecutive failures ($consecutive_failures). Stopping evolution run." >&2
118
- return 1
119
- else
120
- echo "[WARN] Failure $consecutive_failures of $MAX_FAILURES. Continuing to next candidate..." >&2
121
- echo "----------------------------------------"
122
- return 0
123
- fi
124
- }
125
-
126
- # Validate workspace using config
127
- if [[ ! -d "$FULL_EVOLUTION_DIR" ]]; then
128
- echo "[ERROR] Evolution directory not found: $FULL_EVOLUTION_DIR. Run 'claude-evolve setup' first." >&2
129
- exit 1
130
- fi
131
-
132
- # Check required files
133
- if [[ ! -f "$FULL_CSV_PATH" ]]; then
134
- echo "[ERROR] CSV file not found: $FULL_CSV_PATH" >&2
135
- exit 1
136
- fi
137
-
138
- if [[ ! -f "$FULL_EVALUATOR_PATH" ]]; then
139
- echo "[ERROR] Evaluator not found: $FULL_EVALUATOR_PATH" >&2
140
- exit 1
141
- fi
142
-
143
- if [[ ! -f "$FULL_ALGORITHM_PATH" ]]; then
144
- echo "[ERROR] Algorithm not found: $FULL_ALGORITHM_PATH" >&2
145
- exit 1
146
- fi
147
-
148
- # Find oldest pending row (using CSV helper)
149
- find_empty_row() {
150
- "$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_helper.py" find_pending "$FULL_CSV_PATH"
151
- }
152
-
153
- # Get CSV row - replaced by csv_helper.py
154
-
155
- # Update CSV row (using CSV helper)
156
- update_csv_row() {
157
- local row_num="$1"
158
- local performance="$2"
159
- local status="$3"
160
-
161
- "$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_helper.py" update_row "$FULL_CSV_PATH" "$row_num" "$performance" "$status"
162
- }
163
-
164
- # Auto-recovery mechanism for common failures
165
- attempt_recovery() {
166
- local failure_type="$1"
167
- local error_output="$2"
168
- local row_num="$3"
169
- local id="$4"
170
- local description="$5"
171
-
172
- echo "[INFO] Attempting auto-recovery for $failure_type..."
173
-
174
- case "$failure_type" in
175
- "import_error")
176
- # Extract missing module name
177
- missing_module=$(echo "$error_output" | grep -o "No module named '[^']*'" | sed "s/No module named '\\([^']*\\)'/\\1/" | head -1)
178
- if [[ -n $missing_module ]]; then
179
- echo "[INFO] Detected missing module: $missing_module"
180
- echo "[INFO] Attempting to install via pip..."
181
-
182
- # Try to install the missing module
183
- if pip install "$missing_module" 2>&1; then
184
- echo "[INFO] Successfully installed $missing_module. Retrying evaluation..."
185
- return 0 # Signal to retry
186
- else
187
- echo "[WARN] Failed to install $missing_module automatically"
188
- echo "[INFO] Please install manually: pip install $missing_module"
189
- fi
190
- fi
191
- ;;
192
-
193
- "syntax_error")
194
- # For syntax errors, we could retry the mutation with additional guidance
195
- echo "[INFO] Detected syntax error in generated code"
196
- echo "[INFO] Retrying mutation with additional constraints..."
197
-
198
- # Update the CSV to retry this candidate with enhanced prompt
199
- update_csv_row "$row_num" "" "pending"
200
-
201
- # Add a recovery marker to the description to guide the next attempt
202
- new_desc="[RETRY: Fix syntax] $description"
203
- temp_file=$(mktemp)
204
- current_row=1
205
-
206
- while IFS=, read -r csv_id csv_based_on csv_desc csv_perf csv_stat; do
207
- if [[ $current_row -eq $row_num ]]; then
208
- echo "$csv_id,$csv_based_on,$new_desc,$csv_perf,pending"
209
- else
210
- echo "$csv_id,$csv_based_on,$csv_desc,$csv_perf,$csv_stat"
211
- fi
212
- ((current_row++))
213
- done <"$FULL_CSV_PATH" >"$temp_file"
214
-
215
- mv "$temp_file" "$FULL_CSV_PATH"
216
- return 0 # Signal to retry
217
- ;;
218
-
219
- "zero_score")
220
- # For zero scores, log additional diagnostic info
221
- echo "[INFO] Algorithm produced zero score - likely a logic error"
222
- echo "[INFO] Consider adding constraints to BRIEF.md to avoid this approach"
223
- ;;
224
- esac
225
-
226
- return 1 # No recovery possible
227
- }
228
-
229
- # Main evolution loop
230
- while true; do
231
- # Find next candidate
232
- if ! row_num=$(find_empty_row); then
233
- echo "[INFO] No more pending candidates found."
234
-
235
- # Check if auto ideation is enabled
236
- echo "[DEBUG] AUTO_IDEATE value: '$AUTO_IDEATE'"
237
- if [[ "$AUTO_IDEATE" == "true" || "$AUTO_IDEATE" == "1" ]]; then
238
- echo "[INFO] Auto ideation is enabled. Generating new ideas..."
239
-
240
- # Check if claude-evolve-ideate exists
241
- ideate_script="$SCRIPT_DIR/claude-evolve-ideate"
242
- if [[ ! -f "$ideate_script" ]]; then
243
- echo "[ERROR] claude-evolve-ideate script not found: $ideate_script" >&2
244
- echo "[INFO] Evolution run complete - no way to generate more ideas."
245
- exit 0
246
- fi
247
-
248
- # Generate new ideas using the multi-strategy approach
249
- echo "[INFO] Calling claude-evolve-ideate to generate new candidates..."
250
- if ! "$ideate_script"; then
251
- echo "[ERROR] Failed to generate new ideas" >&2
252
- echo "[INFO] Evolution run complete - ideation failed."
253
- exit 1
254
- fi
255
-
256
- echo "[INFO] New ideas generated successfully. Continuing evolution..."
257
- continue # Go back to start of loop to find the new candidates
258
- else
259
- echo "[INFO] Auto ideation is disabled. Evolution run complete."
260
- exit 0
261
- fi
262
- fi
263
-
264
- # Create log file for this iteration
265
- LOGFILE="logs/claude-$(date +%Y%m%d_%H%M%S).txt"
266
-
267
- # Get row data using CSV helper
268
- eval "$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_helper.py" get_row "$FULL_CSV_PATH" "$row_num")"
269
-
270
- # Variables are now set: id, basedOnId, description, performance, status
271
- # based_on_id is already set correctly by csv_helper.py
272
-
273
- # Check if ID is empty
274
- if [[ -z $id ]]; then
275
- echo "[ERROR] Empty ID found at row $row_num. CSV may be malformed." >&2
276
- exit 1
277
- fi
278
-
279
- echo "[INFO] Processing candidate ID: $id"
280
- echo "[INFO] Description: $description"
281
- echo "[INFO] Based on ID: $based_on_id"
282
-
283
- # Set interrupt handler - just exit without updating CSV status
284
- trap 'echo "[INFO] Evolution interrupted"; exit 130' INT
285
-
286
- # AIDEV-NOTE: Using common evolution processor logic to determine parent/output files
287
- # and check if processing should be skipped (handles self-parent detection)
288
-
289
- # Determine parent algorithm path
290
- if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
291
- parent_file="$FULL_ALGORITHM_PATH"
292
- echo "[INFO] Using base algorithm (basedonID is empty or 0)"
293
- else
294
- # Handle both old format (numeric) and new format (genXX-XXX)
295
- if [[ $based_on_id =~ ^[0-9]+$ ]]; then
296
- parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
297
- else
298
- parent_file="$FULL_OUTPUT_DIR/evolution_${based_on_id}.py"
299
- fi
300
-
301
- if [[ ! -f $parent_file ]]; then
302
- echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
303
- update_csv_row "$row_num" "" "failed"
304
- if should_continue_after_failure; then
305
- continue
306
- else
307
- exit 1
308
- fi
309
- fi
310
- fi
311
-
312
- echo "[INFO] Using parent algorithm: $parent_file"
313
-
314
- # Generate output file path
315
- if [[ $id =~ ^[0-9]+$ ]]; then
316
- output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
317
- else
318
- output_file="$FULL_OUTPUT_DIR/evolution_${id}.py"
319
- fi
320
- echo "[INFO] Generating algorithm mutation..."
321
-
322
- # Check if processing should be skipped using common logic
323
- eval "$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_processor.py" "$id" "$based_on_id" "$FULL_OUTPUT_DIR" "$ROOT_DIR" "$parent_file" "$output_file")"
324
-
325
- # Handle copy operation
326
- if [[ "$skip_copy" == "True" ]]; then
327
- echo "[INFO] ⚠️ Skipping copy - $reason"
328
- else
329
- cp "$parent_file" "$output_file"
330
- echo "[INFO] Copied parent algorithm to: $output_file"
331
- fi
332
-
333
- # Check for claude CLI
334
- claude_cmd="${CLAUDE_CMD:-claude}"
335
- if ! command -v "$claude_cmd" >/dev/null 2>&1; then
336
- echo "[ERROR] Claude CLI not found. Please install 'claude' CLI tool or set CLAUDE_CMD environment variable." >&2
337
- update_csv_row "$row_num" "" "failed"
338
- exit 1
339
- fi
340
-
341
- # Always use Claude Sonnet
342
- CLAUDE_MODEL="sonnet"
343
- echo "[INFO] Using Claude Sonnet for development"
344
-
345
- # Create mutation prompt - Claude will edit the file directly
346
- prompt="Edit the file $output_file to implement this specific change: $description
347
-
348
- IMPORTANT: Before starting the task, you MUST read and understand:
349
- 1. The project's CLAUDE.md file (if it exists) - this contains project-specific instructions
350
- 2. The user's global CLAUDE.md file at ~/.claude/CLAUDE.md (if it exists) - this contains general development principles
351
- 3. Ensure all your work follows the architectural and development guidelines from both files
352
-
353
- Requirements:
354
- - Edit the file directly (don't just provide comments or suggestions)
355
- - Maintain the same function signatures and interfaces
356
- - Make the specific change described above
357
- - Ensure the code runs without syntax errors
358
- - Add proper error handling if needed
359
-
360
- The file currently contains the parent algorithm. Modify it according to the description above while adhering to all guidelines from the CLAUDE.md files."
361
-
362
- # AIDEV-NOTE: Using common evolution processor logic for Claude processing decisions
363
- # Handle Claude mutation based on skip flags
364
- if [[ "$skip_claude" == "True" ]]; then
365
- echo "[INFO] ⚠️ Skipping Claude processing - $reason"
366
- else
367
- echo "[INFO] Calling Claude $CLAUDE_MODEL to apply mutation..."
368
- echo "[INFO] Claude will edit: $output_file"
369
- echo "[INFO] Logging to: ${LOGFILE}-developer"
370
-
371
- # Claude will edit the file directly - log both prompt and response
372
- {
373
- echo "=== EVOLUTION MUTATION PROMPT ==="
374
- echo "ID: $id"
375
- echo "Based on: $based_on_id"
376
- echo "Description: $description"
377
- echo "Output file: $output_file"
378
- echo "Model: $CLAUDE_MODEL"
379
- echo "Timestamp: $(date)"
380
- echo
381
- echo "$prompt"
382
- echo
383
- echo "=== CLAUDE RESPONSE ==="
384
- } >> "${LOGFILE}-developer"
385
-
386
- # Use tee to show output and log simultaneously, and capture output for limit detection
387
- claude_output=$(echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer")
388
- claude_exit_code=${PIPESTATUS[1]} # Get exit code from claude command, not tee
389
-
390
- # Check for usage limit
391
- if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
392
- # Extract timestamp if available
393
- limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
394
-
395
- # Print red error message
396
- echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
397
- echo -e "\033[31m[ERROR] Evolution halted due to API rate limits.\033[0m" >&2
398
-
399
- if [[ -n $limit_timestamp ]]; then
400
- # Convert timestamp to human-readable format
401
- limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
402
- echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
403
- fi
404
-
405
- echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
406
- echo -e "\033[33m[INFO] The current candidate will remain in 'pending' status.\033[0m" >&2
407
-
408
- # Don't mark as failed - leave it pending for retry later
409
- exit 1
410
- fi
411
-
412
- # Check for other failures
413
- if [[ $claude_exit_code -ne 0 ]]; then
414
- echo "[ERROR] Claude failed to mutate algorithm" >&2
415
- update_csv_row "$row_num" "" "failed"
416
- if should_continue_after_failure; then
417
- continue
418
- else
419
- exit 1
420
- fi
421
- fi
422
-
423
- echo "[INFO] Claude completed mutation"
424
- fi
425
-
426
- echo "[INFO] Algorithm ready at: $output_file"
427
-
428
- # Run evaluator
429
- echo "[INFO] Running evaluation..."
430
- echo "[INFO] Executing: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
431
- eval_output=""
432
- eval_exit_code=0
433
-
434
- if [[ -n $timeout_seconds ]]; then
435
- echo "[INFO] Evaluation timeout: ${timeout_seconds}s"
436
- if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
437
- eval_exit_code=0
438
- else
439
- eval_exit_code=$?
440
- if [[ $eval_exit_code -eq 124 ]]; then
441
- echo "[ERROR] Evaluation timed out after ${timeout_seconds} seconds" >&2
442
- update_csv_row "$row_num" "" "timeout"
443
- if should_continue_after_failure; then
444
- continue 2 # Continue outer while loop from nested context
445
- else
446
- exit 1
447
- fi
448
- fi
449
- fi
450
- else
451
- if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
452
- eval_exit_code=0
453
- else
454
- eval_exit_code=$?
455
- fi
456
- fi
457
-
458
- # Show evaluator output and log it
459
- echo "[INFO] Evaluator output:"
460
- echo "----------------------------------------"
461
- echo "$eval_output"
462
- echo "----------------------------------------"
463
-
464
- # Log evaluator phase
465
- {
466
- echo "=== EVALUATOR EXECUTION ==="
467
- echo "ID: $id"
468
- echo "Algorithm: $output_file"
469
- echo "Command: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
470
- echo "Exit code: $eval_exit_code"
471
- echo "Timestamp: $(date)"
472
- echo
473
- echo "=== EVALUATOR OUTPUT ==="
474
- echo "$eval_output"
475
- echo
476
- } >> "${LOGFILE}-evaluator"
477
-
478
- # Process results
479
- if [[ $eval_exit_code -eq 0 ]]; then
480
- # DEBUG: Show raw evaluator output
481
- echo "[DEBUG] Raw evaluator output:"
482
- echo "----------------------------------------"
483
- echo "$eval_output"
484
- echo "----------------------------------------"
485
-
486
- # Extract the last valid JSON line and score
487
- last_json=$(echo "$eval_output" | grep '^{.*}$' | tail -1)
488
-
489
- if [[ -n "$last_json" ]]; then
490
- echo "[DEBUG] Found JSON: $last_json"
491
-
492
- # Extract score from JSON
493
- score=$(echo "$last_json" | python3 -c "
494
- import sys, json
495
- try:
496
- data = json.loads(sys.stdin.read().strip())
497
- if 'performance' in data:
498
- print(data['performance'])
499
- elif 'score' in data:
500
- print(data['score'])
501
- else:
502
- sys.exit(1)
503
- except:
504
- sys.exit(1)
505
- " 2>/dev/null)
506
- fi
507
-
508
- if [[ -n "$score" ]]; then
509
- echo "[DEBUG] Extracted score via 'score' field: '$score'"
510
- if [[ -n $score ]]; then
511
- # Check if score is 0 and mark as failed
512
- if (( $(echo "$score == 0" | bc -l) )); then
513
- # Update CSV with full JSON data
514
- if [[ -n "$last_json" ]]; then
515
- python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"
516
- else
517
- update_csv_row "$row_num" "$score" "failed"
518
- fi
519
- echo "[INFO] ✗ Evaluation failed with score 0"
520
- echo "[INFO] Performance score: $score"
521
- if should_continue_after_failure; then
522
- continue 2
523
- else
524
- exit 1
525
- fi
526
- else
527
- # Update CSV with full JSON data
528
- if [[ -n "$last_json" ]]; then
529
- echo "[DEBUG] Updating CSV with full JSON data (branch 1)"
530
- echo "[DEBUG] CSV path: ${FULL_CSV_PATH}"
531
- echo "[DEBUG] Target ID: $id (row $row_num)"
532
- echo "[DEBUG] JSON data: $last_json"
533
- echo "[DEBUG] Running CSV helper..."
534
- if python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"; then
535
- echo "[DEBUG] CSV helper succeeded"
536
- else
537
- echo "[DEBUG] CSV helper failed with exit code $?"
538
- fi
539
- else
540
- update_csv_row "$row_num" "$score" "complete"
541
- fi
542
- echo "[INFO] ✓ Evaluation completed successfully"
543
- echo "[INFO] Performance score: $score"
544
- fi
545
- else
546
- # Try "performance" field - get only the last occurrence
547
- if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' ' | tail -1); then
548
- echo "[DEBUG] Extracted score via 'performance' field: '$score'"
549
- # Check if score is 0 and mark as failed
550
- if [ "$(echo "$score == 0" | bc -l)" = "1" ]; then
551
- if [[ -n "$last_json" ]]; then
552
- python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"
553
- else
554
- update_csv_row "$row_num" "$score" "failed"
555
- fi
556
- echo "[INFO] ✗ Evaluation failed with score 0"
557
- echo "[INFO] Performance score: $score"
558
- if should_continue_after_failure; then
559
- continue 2
560
- else
561
- exit 1
562
- fi
563
- else
564
- # Update CSV with full JSON data
565
- if [[ -n "$last_json" ]]; then
566
- echo "[DEBUG] Updating CSV with full JSON data (branch 2)"
567
- echo "[DEBUG] CSV path: ${FULL_CSV_PATH}"
568
- echo "[DEBUG] Target ID: $id (row $row_num)"
569
- echo "[DEBUG] JSON data: $last_json"
570
- echo "[DEBUG] Running CSV helper..."
571
- if python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"; then
572
- echo "[DEBUG] CSV helper succeeded"
573
- else
574
- echo "[DEBUG] CSV helper failed with exit code $?"
575
- fi
576
- else
577
- update_csv_row "$row_num" "$score" "complete"
578
- fi
579
- echo "[INFO] ✓ Evaluation completed successfully"
580
- echo "[INFO] Performance score: $score"
581
- fi
582
- else
583
- echo "[ERROR] No score found in evaluator output" >&2
584
- echo "[ERROR] Output: $eval_output" >&2
585
- update_csv_row "$row_num" "" "failed"
586
- if should_continue_after_failure; then
587
- continue 2
588
- else
589
- exit 1
590
- fi
591
- fi
592
- fi
593
- else
594
- echo "[ERROR] Failed to parse evaluator output" >&2
595
- echo "[ERROR] Output: $eval_output" >&2
596
- update_csv_row "$row_num" "" "failed"
597
- if should_continue_after_failure; then
598
- continue
599
- else
600
- exit 1
601
- fi
602
- fi
603
- else
604
- echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
605
- echo "[ERROR] Output: $eval_output" >&2
606
-
607
- # Check for common failure patterns and attempt recovery
608
- recovery_attempted=false
609
-
610
- # Check for import errors
611
- if echo "$eval_output" | grep -q "No module named"; then
612
- if attempt_recovery "import_error" "$eval_output" "$row_num" "$id" "$description"; then
613
- recovery_attempted=true
614
- # Retry the evaluation
615
- echo "[INFO] Retrying evaluation after recovery attempt..."
616
- if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
617
- # Re-process the successful result
618
- if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
619
- if [[ -n $score ]]; then
620
- if [ "$(echo "$score == 0" | bc -l)" = "1" ]; then
621
- update_csv_row "$row_num" "$score" "failed"
622
- echo "[INFO] ✗ Evaluation failed with score 0"
623
- else
624
- update_csv_row "$row_num" "$score" "complete"
625
- echo "[INFO] ✓ Recovery successful! Performance score: $score"
626
- consecutive_failures=0
627
- continue
628
- fi
629
- fi
630
- fi
631
- fi
632
- fi
633
- fi
634
-
635
- # Check for syntax errors
636
- if echo "$eval_output" | grep -q "SyntaxError"; then
637
- if attempt_recovery "syntax_error" "$eval_output" "$row_num" "$id" "$description"; then
638
- recovery_attempted=true
639
- # Skip to next iteration to retry with enhanced prompt
640
- consecutive_failures=0
641
- continue
642
- fi
643
- fi
644
-
645
- # If no recovery was successful, mark as failed
646
- if [[ $recovery_attempted == false ]]; then
647
- update_csv_row "$row_num" "" "failed"
648
- fi
649
-
650
- if should_continue_after_failure; then
651
- continue
652
- else
653
- exit 1
654
- fi
655
- fi
656
-
657
- echo "[INFO] Evolution cycle completed successfully!"
658
- consecutive_failures=0 # Reset failure counter on success
659
-
660
- echo "[INFO] Looking for next candidate..."
661
- echo "----------------------------------------"
662
- done # End of main evolution loop