claude-evolve 1.5.2 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,39 +84,76 @@ call_ai_for_ideation() {
84
84
  original_csv_count=0
85
85
  fi
86
86
 
87
- # Use centralized AI library
88
- local ai_output
89
- ai_output=$(call_ai_with_round_robin "$prompt" "ideate" "$gen_num")
90
- local ai_exit_code=$?
87
+ echo "[DEBUG] Original CSV has $original_csv_count data rows" >&2
91
88
 
92
- # Handle special exit codes
93
- # No special handling for exit codes anymore
89
+ # Get models for ideation
90
+ local model_list
91
+ model_list=$(get_models_for_command "ideate")
92
+ local models=()
93
+ read -ra models <<< "$model_list"
94
94
 
95
- if [[ $ai_exit_code -eq 0 ]]; then
96
- # For ideation, we need to verify the CSV file was actually modified
95
+ if [[ ${#models[@]} -eq 0 ]]; then
96
+ echo "[ERROR] No models configured for ideation" >&2
97
+ return 1
98
+ fi
99
+
100
+ # Calculate starting index for round-robin
101
+ local num_models=${#models[@]}
102
+ local start_index=$((gen_num % num_models))
103
+
104
+ # Create ordered list based on round-robin
105
+ local ordered_models=()
106
+ for ((i=0; i<num_models; i++)); do
107
+ local idx=$(((start_index + i) % num_models))
108
+ ordered_models+=("${models[$idx]}")
109
+ done
110
+
111
+ echo "[AI] Model order for ideate (round-robin): ${ordered_models[*]}" >&2
112
+
113
+ # Try each model until CSV changes
114
+ for model in "${ordered_models[@]}"; do
115
+ echo "[AI] Attempting ideate with $model" >&2
116
+
117
+ # Call the model directly
118
+ local ai_output
119
+ ai_output=$(call_ai_model_configured "$model" "$prompt")
120
+ local ai_exit_code=$?
121
+
122
+ echo "[AI] $model completed with exit code $ai_exit_code" >&2
123
+
124
+ # Check if the file was modified - this is ALL that matters
97
125
  if [[ -f "$temp_csv_file" ]]; then
98
126
  local new_csv_count
99
127
  new_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l)
100
128
 
101
129
  if [[ $new_csv_count -gt $original_csv_count ]]; then
102
- echo "[INFO] AI completed successfully and modified CSV ($new_csv_count vs $original_csv_count rows)" >&2
130
+ echo "[INFO] CSV was modified by $model ($new_csv_count vs $original_csv_count rows) - validating format..." >&2
131
+
132
+ # Post-process to ensure all description fields are quoted
133
+ local fixed_csv_file="${temp_csv_file}.fixed"
134
+
135
+ # Use the CSV fixer script
136
+ if "$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_fixer.py" "$temp_csv_file" "$fixed_csv_file"; then
137
+ mv "$fixed_csv_file" "$temp_csv_file"
138
+ echo "[INFO] CSV format validated and fixed if needed" >&2
139
+ else
140
+ echo "[WARN] CSV format validation failed, using original" >&2
141
+ fi
142
+
103
143
  return 0
104
144
  else
105
- echo "[INFO] $model returned exit code 0 but didn't modify CSV file" >&2
106
- echo "[DEBUG] Expected file: $temp_csv_file" >&2
145
+ echo "[INFO] CSV unchanged after $model (exit code: $ai_exit_code)" >&2
107
146
  echo "[DEBUG] Original count: $original_csv_count, New count: $new_csv_count" >&2
108
- return 1
147
+ # Continue to next model
109
148
  fi
110
149
  else
111
- echo "[INFO] Exit code 0 but temp CSV file not found: $temp_csv_file" >&2
112
- echo "[DEBUG] Current directory: $(pwd)" >&2
113
- echo "[DEBUG] Files matching temp-csv-*.csv:" >&2
114
- ls -la temp-csv-*.csv 2>&1 >&2
115
- return 1
150
+ echo "[INFO] Temp CSV file not found after $model: $temp_csv_file" >&2
151
+ # Continue to next model
116
152
  fi
117
- fi
153
+ done
118
154
 
119
- echo "[INFO] No AI model successfully modified the CSV file" >&2
155
+ # All models tried, none changed the file
156
+ echo "[ERROR] All AI models failed to generate ideas" >&2
120
157
  return 1
121
158
  }
122
159
 
@@ -461,8 +498,14 @@ Instructions:
461
498
  1. Add exactly $count new rows to the CSV
462
499
  2. Use the next available generation numbers (gen$CURRENT_GENERATION-XXX format)
463
500
  3. For each idea, create a row with: id,parent_id,description,,pending
464
- 4. For novel ideas: leave parent_id empty
465
- 5. For other idea types: use appropriate parent IDs from these top performers:
501
+ 4. CRITICAL CSV FORMATTING RULES:
502
+ - ALWAYS wrap the description field in double quotes
503
+ - If the description contains quotes, escape them by doubling them (\" becomes \"\")
504
+ - Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds\",,pending
505
+ - BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds,,pending
506
+ - NEVER omit quotes - unquoted descriptions cause CSV corruption
507
+ 5. For novel ideas: leave parent_id empty
508
+ 6. For other idea types: use appropriate parent IDs from these top performers:
466
509
  $top_performers
467
510
 
468
511
  IMPORTANT: Output the complete modified CSV file. Do not add any explanation or other text - just output the CSV."
@@ -839,9 +882,15 @@ CRITICAL INSTRUCTIONS:
839
882
  prompt+="
840
883
  6. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
841
884
  7. For each idea, create a row with: id,,description,,pending (empty parent_id for novel ideas)
842
- 8. Each description should be one clear sentence describing a novel algorithmic approach
843
- 9. Focus on creative, ambitious ideas that haven't been tried yet
844
- 10. Consider machine learning, new indicators, regime detection, risk management, etc.
885
+ 8. CRITICAL CSV FORMATTING RULES:
886
+ - ALWAYS wrap the description field in double quotes
887
+ - If the description contains quotes, escape them by doubling them (\" becomes \"\")
888
+ - Example: gen01-001,,\"Implement adaptive RSI thresholds based on volatility\",,pending
889
+ - BAD: gen01-001,,Implement adaptive RSI thresholds based on volatility,,pending
890
+ - NEVER omit quotes around descriptions - this causes CSV parsing errors
891
+ 9. Each description should be one clear sentence describing a novel algorithmic approach
892
+ 10. Focus on creative, ambitious ideas that haven't been tried yet
893
+ 11. Consider machine learning, new indicators, regime detection, risk management, etc.
845
894
 
846
895
  IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
847
896
  CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -939,8 +988,14 @@ CRITICAL INSTRUCTIONS:
939
988
  5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
940
989
  6. For each idea, create a row with: id,parent_id,description,,pending
941
990
  7. Each parent_id MUST be one of: $valid_parent_ids
942
- 8. Each description should focus on adjusting specific parameters that exist in the parent's source code
943
- 9. Include current and new parameter values (e.g., \"Lower rsi_entry from 21 to 18\")
991
+ 8. CRITICAL CSV FORMATTING RULES:
992
+ - ALWAYS wrap the description field in double quotes
993
+ - If the description contains quotes, escape them by doubling them (\" becomes \"\")
994
+ - Example: gen01-001,gen00-000,\"Lower rsi_entry from 21 to 18\",,pending
995
+ - BAD: gen01-001,gen00-000,Lower rsi_entry from 21 to 18,,pending
996
+ - NEVER omit quotes around descriptions - this causes CSV parsing errors
997
+ 9. Each description should focus on adjusting specific parameters that exist in the parent's source code
998
+ 10. Include current and new parameter values - for example: \"Lower rsi_entry from 21 to 18\"
944
999
 
945
1000
  IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
946
1001
  CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -1020,8 +1075,14 @@ CRITICAL INSTRUCTIONS:
1020
1075
  5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
1021
1076
  6. For each idea, create a row with: id,parent_id,description,,pending
1022
1077
  7. Each parent_id MUST be one of: $valid_parent_ids
1023
- 8. Each description should focus on architectural/structural changes based on the parent's actual code
1024
- 9. Reference actual components/methods found in the source code
1078
+ 8. CRITICAL CSV FORMATTING RULES:
1079
+ - ALWAYS wrap the description field in double quotes
1080
+ - If the description contains quotes, escape them by doubling them (\" becomes \"\")
1081
+ - Example: gen01-001,gen00-000,\"Add ML-based regime detection using LSTM\",,pending
1082
+ - BAD: gen01-001,gen00-000,Add ML-based regime detection using LSTM,,pending
1083
+ - NEVER omit quotes around descriptions - this causes CSV parsing errors
1084
+ 9. Each description should focus on architectural/structural changes based on the parent's actual code
1085
+ 10. Reference actual components/methods found in the source code
1025
1086
 
1026
1087
  IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
1027
1088
  CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -1101,8 +1162,14 @@ CRITICAL INSTRUCTIONS:
1101
1162
  5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
1102
1163
  6. For each idea, create a row with: id,parent_id,description,,pending
1103
1164
  7. Each parent_id MUST be one of: $valid_parent_ids (choose the primary parent)
1104
- 8. Each description should combine actual elements from 2+ algorithms based on their source code
1105
- 9. Reference specific components/features found in the actual source code
1165
+ 8. CRITICAL CSV FORMATTING RULES:
1166
+ - ALWAYS wrap the description field in double quotes
1167
+ - If the description contains quotes, escape them by doubling them (\" becomes \"\")
1168
+ - Example: gen01-001,gen00-000,\"Combine gen01-123's RSI logic with gen01-456's volatility scaling\",,pending
1169
+ - BAD: gen01-001,gen00-000,Combine gen01-123's RSI logic with gen01-456's volatility scaling,,pending
1170
+ - NEVER omit quotes around descriptions - this causes CSV parsing errors
1171
+ 9. Each description should combine actual elements from 2+ algorithms based on their source code
1172
+ 10. Reference specific components/features found in the actual source code
1106
1173
 
1107
1174
  IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
1108
1175
  CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -1195,8 +1262,14 @@ CRITICAL INSTRUCTIONS:
1195
1262
  4. If no gen$CURRENT_GENERATION entries exist yet, start with gen$CURRENT_GENERATION-001
1196
1263
  5. Use the Edit or MultiEdit tool to APPEND exactly $TOTAL_IDEAS new rows AT THE END of the CSV file
1197
1264
  6. For each idea, create a row with: id,parent_id,description,,pending
1198
- 7. Mix both parameter tuning and structural changes
1199
- 8. If building on existing algorithms, use their ID as parent_id, otherwise leave parent_id empty
1265
+ 7. CRITICAL CSV FORMATTING RULES:
1266
+ - ALWAYS wrap the description field in double quotes
1267
+ - If the description contains quotes, escape them by doubling them (\" becomes \"\")
1268
+ - Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds based on volatility\",,pending
1269
+ - BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds based on volatility,,pending
1270
+ - NEVER omit quotes around descriptions - this causes CSV parsing errors that corrupt the data
1271
+ 8. Mix both parameter tuning and structural changes
1272
+ 9. If building on existing algorithms, use their ID as parent_id, otherwise leave parent_id empty
1200
1273
 
1201
1274
  ⚠️ AVOID ONLY: Kelly floor/cap adjustments that assume leverage > 1.0 (these get clamped and have no effect)
1202
1275
 
@@ -478,6 +478,66 @@ ensure_baseline_entry
478
478
  # Flag to track API limit status
479
479
  api_limit_reached=false
480
480
 
481
+ # Check if previous generation has at least one completed item
482
+ check_previous_generation_has_completed() {
483
+ local csv_path="$1"
484
+
485
+ if [[ ! -f "$csv_path" ]]; then
486
+ # No CSV file yet - allow first ideation
487
+ return 0
488
+ fi
489
+
490
+ # Use Python to find the maximum generation and check if it has completed items
491
+ "$PYTHON_CMD" -c "
492
+ import csv
493
+ import sys
494
+
495
+ max_gen = 0
496
+ gen_completed = {}
497
+
498
+ try:
499
+ with open('$csv_path', 'r') as f:
500
+ reader = csv.reader(f)
501
+ next(reader, None) # Skip header
502
+ for row in reader:
503
+ if row and len(row) >= 5:
504
+ id_field = row[0].strip()
505
+ status = row[4].strip() if len(row) > 4 else 'pending'
506
+
507
+ if id_field.startswith('gen') and '-' in id_field:
508
+ try:
509
+ gen_part = id_field.split('-')[0] # e.g., 'gen01'
510
+ gen_num = int(gen_part[3:]) # Extract number after 'gen'
511
+ max_gen = max(max_gen, gen_num)
512
+
513
+ if gen_num not in gen_completed:
514
+ gen_completed[gen_num] = 0
515
+
516
+ if status == 'complete':
517
+ gen_completed[gen_num] += 1
518
+ except (ValueError, IndexError):
519
+ pass
520
+
521
+ # If max_gen is 0, no generations exist yet - allow ideation
522
+ if max_gen == 0:
523
+ sys.exit(0)
524
+
525
+ # Check if the most recent generation has at least one completed item
526
+ if gen_completed.get(max_gen, 0) > 0:
527
+ print(f'[INFO] Generation {max_gen:02d} has {gen_completed[max_gen]} completed items - allowing ideation', file=sys.stderr)
528
+ sys.exit(0)
529
+ else:
530
+ print(f'[INFO] Generation {max_gen:02d} has no completed items - blocking ideation to prevent endless loops', file=sys.stderr)
531
+ sys.exit(1)
532
+
533
+ except Exception as e:
534
+ print(f'[ERROR] Failed to check previous generation: {e}', file=sys.stderr)
535
+ # On error, allow ideation to maintain backward compatibility
536
+ sys.exit(0)
537
+ "
538
+ return $?
539
+ }
540
+
481
541
  # Main dispatch loop
482
542
  while true; do
483
543
  # Clean up finished workers
@@ -506,7 +566,17 @@ while true; do
506
566
 
507
567
  # Check if auto ideation is enabled
508
568
  if [[ "$AUTO_IDEATE" == "true" || "$AUTO_IDEATE" == "1" ]]; then
509
- echo "[DISPATCHER] Auto ideation is enabled. Generating new ideas..."
569
+ echo "[DISPATCHER] Auto ideation is enabled. Checking prerequisites..."
570
+
571
+ # Check if previous generation has at least one completed item
572
+ if ! check_previous_generation_has_completed "$FULL_CSV_PATH"; then
573
+ echo "[DISPATCHER] Evolution complete - previous generation has no completed items."
574
+ echo "[DISPATCHER] This prevents endless ideation loops when API limits are hit."
575
+ echo "[DISPATCHER] Wait for current generation to complete, then run 'claude-evolve ideate' manually."
576
+ break
577
+ fi
578
+
579
+ echo "[DISPATCHER] Prerequisites met. Generating new ideas..."
510
580
 
511
581
  # Check if claude-evolve-ideate exists
512
582
  ideate_script="$SCRIPT_DIR/claude-evolve-ideate"
@@ -244,7 +244,9 @@ try:
244
244
  # Show per-generation breakdown (unless brief mode)
245
245
  if not show_brief and stats_by_gen:
246
246
  print('📈 BY GENERATION:')
247
- for gen in sorted(stats_by_gen.keys()):
247
+ # Sort generations numerically by extracting the number after 'gen'
248
+ sorted_gens = sorted(stats_by_gen.keys(), key=lambda g: int(g[3:]) if g.startswith('gen') and g[3:].isdigit() else 0)
249
+ for gen in sorted_gens:
248
250
  data = stats_by_gen[gen]
249
251
  total = sum(data.values())
250
252
 
@@ -14,34 +14,11 @@ TERMINATION_SIGNAL=""
14
14
  # Cleanup function to handle termination
15
15
  cleanup_on_exit() {
16
16
  if [[ -n "$CURRENT_CANDIDATE_ID" ]]; then
17
- # Only mark as failed if it was a timeout (SIGTERM from timeout command)
18
- # For user interruption (Ctrl-C) or kill, leave it for retry
19
- if [[ "$TERMINATION_SIGNAL" == "TERM" ]]; then
20
- echo "[WORKER-$$] Timeout detected, marking $CURRENT_CANDIDATE_ID as failed" >&2
21
- "$PYTHON_CMD" -c "
22
- import sys
23
- sys.path.insert(0, '$SCRIPT_DIR/..')
24
- from lib.evolution_csv import EvolutionCSV
25
- try:
26
- with EvolutionCSV('$FULL_CSV_PATH') as csv:
27
- csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'failed')
28
- except:
29
- pass # Best effort cleanup
30
- " 2>/dev/null || true
31
- else
32
- echo "[WORKER-$$] Interrupted, leaving $CURRENT_CANDIDATE_ID for retry" >&2
33
- # Optionally reset to pending instead of leaving as running
34
- "$PYTHON_CMD" -c "
35
- import sys
36
- sys.path.insert(0, '$SCRIPT_DIR/..')
37
- from lib.evolution_csv import EvolutionCSV
38
- try:
39
- with EvolutionCSV('$FULL_CSV_PATH') as csv:
40
- csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'pending')
41
- except:
42
- pass # Best effort cleanup
43
- " 2>/dev/null || true
44
- fi
17
+ echo "[WORKER-$$] Worker terminated while processing $CURRENT_CANDIDATE_ID" >&2
18
+ # If we're interrupted while processing, leave it as "running"
19
+ # This prevents other workers from picking it up in the same session
20
+ # A human can manually reset to pending if needed
21
+ echo "[WORKER-$$] Leaving $CURRENT_CANDIDATE_ID in current state" >&2
45
22
  fi
46
23
  }
47
24
 
@@ -135,6 +112,12 @@ process_candidate() {
135
112
  echo "[WORKER-$$] Description: $description"
136
113
  echo "[WORKER-$$] Based on ID: $parent_id"
137
114
 
115
+ # Treat "baseline-000" parent ID as empty/baseline
116
+ if [[ "$parent_id" == "baseline-000" ]]; then
117
+ parent_id=""
118
+ echo "[WORKER-$$] Parent ID 'baseline-000' treated as baseline (empty parent)"
119
+ fi
120
+
138
121
  # Determine source algorithm
139
122
  local source_file
140
123
  if [[ -z "$parent_id" ]]; then
@@ -213,16 +196,26 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
213
196
 
214
197
  # Try AI models with round-robin based on candidate ID
215
198
  if ! call_ai_for_evolution "$evolution_prompt" "$candidate_id"; then
216
- echo "[WORKER-$$] ERROR: All AI models failed to generate code" >&2
199
+ echo "[WORKER-$$] ERROR: All AI models failed to generate code - leaving as pending for retry" >&2
217
200
  cd "$original_pwd"
218
201
  rm -f "$target_file" # Clean up on failure
219
- return 1
202
+ # Return with special code to indicate AI failure (should remain pending)
203
+ return 77
220
204
  fi
221
205
 
222
206
  # Restore working directory
223
207
  cd "$original_pwd"
224
208
 
225
209
  echo "[WORKER-$$] Evolution applied successfully"
210
+
211
+ # Check if the generated Python file has syntax errors
212
+ echo "[WORKER-$$] Checking Python syntax..." >&2
213
+ if ! "$PYTHON_CMD" -m py_compile "$target_file" 2>&1; then
214
+ echo "[WORKER-$$] ERROR: Generated Python file has syntax errors!" >&2
215
+ echo "[WORKER-$$] File: $target_file" >&2
216
+ # This is still an evaluation failure, not an AI failure
217
+ return 1
218
+ fi
226
219
  fi
227
220
  fi
228
221
 
@@ -239,11 +232,22 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
239
232
  eval_arg=""
240
233
  fi
241
234
  local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$eval_arg")
235
+
236
+ # Add memory limiting if configured
237
+ if [[ -n "$MEMORY_LIMIT_MB" ]] && [[ "$MEMORY_LIMIT_MB" -gt 0 ]]; then
238
+ eval_cmd=("$PYTHON_CMD" "$SCRIPT_DIR/../lib/memory_limit_wrapper.py" "$MEMORY_LIMIT_MB" "${eval_cmd[@]}")
239
+ fi
240
+
241
+ # Add timeout if configured
242
242
  [[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
243
243
 
244
244
  # Run evaluation with tee to both display and capture output
245
245
  # Use stdbuf to disable buffering for real-time output
246
- if stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2; then
246
+ # IMPORTANT: Use PIPESTATUS to get the exit code of the evaluation command, not tee
247
+ stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2
248
+ local eval_exit_code=${PIPESTATUS[0]} # Get exit code of first command in pipe
249
+
250
+ if [[ $eval_exit_code -eq 0 ]]; then
247
251
  local eval_end=$(date +%s)
248
252
  local eval_duration=$((eval_end - eval_start))
249
253
 
@@ -353,14 +357,19 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
353
357
  echo "[WORKER-$$] Output: $eval_output" >&2
354
358
  # rm -f "$eval_output_file" # Keep for debugging
355
359
  echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
360
+ # Clear CURRENT_CANDIDATE_ID before returning
361
+ CURRENT_CANDIDATE_ID=""
356
362
  return 1
357
363
  fi
358
364
 
359
365
  # Clean up temp file (comment out to keep for debugging)
360
366
  # rm -f "$eval_output_file"
361
367
  echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
368
+
369
+ # Clear CURRENT_CANDIDATE_ID on successful completion
370
+ CURRENT_CANDIDATE_ID=""
362
371
  else
363
- local exit_code=$?
372
+ local exit_code=$eval_exit_code
364
373
  # Read any output that was captured before failure
365
374
  eval_output=$(<"$eval_output_file")
366
375
  # rm -f "$eval_output_file" # Keep for debugging
@@ -370,22 +379,56 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
370
379
  echo "[WORKER-$$] Output: $eval_output" >&2
371
380
 
372
381
  # Mark as failed in CSV
373
- "$PYTHON_CMD" -c "
382
+ echo "[WORKER-$$] Marking $candidate_id as failed in CSV" >&2
383
+ if ! "$PYTHON_CMD" -c "
374
384
  import sys
375
385
  sys.path.insert(0, '$SCRIPT_DIR/..')
376
386
  from lib.evolution_csv import EvolutionCSV
377
- with EvolutionCSV('$FULL_CSV_PATH') as csv:
378
- csv.update_candidate_status('$candidate_id', 'failed')
379
- "
387
+ try:
388
+ with EvolutionCSV('$FULL_CSV_PATH') as csv:
389
+ success = csv.update_candidate_status('$candidate_id', 'failed')
390
+ if not success:
391
+ print(f'ERROR: Failed to update status for {candidate_id}', file=sys.stderr)
392
+ sys.exit(1)
393
+ except Exception as e:
394
+ print(f'ERROR: Exception updating status: {e}', file=sys.stderr)
395
+ sys.exit(1)
396
+ " 2>&1; then
397
+ echo "[WORKER-$$] ERROR: Failed to update CSV status to failed" >&2
398
+ else
399
+ echo "[WORKER-$$] Successfully marked $candidate_id as failed" >&2
400
+ fi
380
401
 
402
+ # Clear CURRENT_CANDIDATE_ID before returning to prevent cleanup handler from resetting it
403
+ CURRENT_CANDIDATE_ID=""
381
404
  return $exit_code
382
405
  fi
383
406
  }
384
407
 
408
+ # Don't reset running candidates on startup - they might be legitimately being processed by another worker
409
+
385
410
  # Main worker loop
386
411
  echo "[WORKER-$$] Worker started"
387
412
 
388
413
  while true; do
414
+ # Debug: Show current status of all candidates
415
+ echo "[WORKER-$$] Current candidate statuses:" >&2
416
+ "$PYTHON_CMD" -c "
417
+ import sys
418
+ sys.path.insert(0, '$SCRIPT_DIR/..')
419
+ from lib.evolution_csv import EvolutionCSV
420
+ with EvolutionCSV('$FULL_CSV_PATH') as csv:
421
+ rows = csv._read_csv()
422
+ if rows:
423
+ start_idx = 1 if rows and rows[0] and rows[0][0].lower() == 'id' else 0
424
+ status_count = {}
425
+ for row in rows[start_idx:]:
426
+ if len(row) > 4:
427
+ status = row[4].strip() or 'pending'
428
+ status_count[status] = status_count.get(status, 0) + 1
429
+ print(f'Status counts: {status_count}', file=sys.stderr)
430
+ " 2>&1 || true
431
+
389
432
  # Try to claim a pending candidate
390
433
  candidate_info=$("$PYTHON_CMD" -c "
391
434
  import sys
@@ -412,12 +455,26 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
412
455
  # Set current candidate for cleanup
413
456
  CURRENT_CANDIDATE_ID="$candidate_id"
414
457
 
415
- # Process the candidate
416
- if process_candidate "$candidate_id" "$parent_id" "$description"; then
458
+ # Process the candidate and capture exit code
459
+ process_candidate "$candidate_id" "$parent_id" "$description"
460
+ process_exit_code=$?
461
+
462
+ if [[ $process_exit_code -eq 0 ]]; then
417
463
  echo "[WORKER-$$] Successfully processed $candidate_id"
464
+ elif [[ $process_exit_code -eq 77 ]]; then
465
+ # Special exit code 77 means AI failed to generate code
466
+ echo "[WORKER-$$] AI generation failed for $candidate_id - marking as failed-ai-retry"
467
+ # Mark with special status that indicates AI generation failed (not evaluation)
468
+ "$PYTHON_CMD" -c "
469
+ import sys
470
+ sys.path.insert(0, '$SCRIPT_DIR/..')
471
+ from lib.evolution_csv import EvolutionCSV
472
+ with EvolutionCSV('$FULL_CSV_PATH') as csv:
473
+ csv.update_candidate_status('$candidate_id', 'failed-ai-retry')
474
+ " 2>/dev/null || true
418
475
  else
419
476
  echo "[WORKER-$$] Failed to process $candidate_id"
420
- # Ensure status is set to failed (might already be done in process_candidate)
477
+ # Other failures (evaluation errors, etc) mark as failed
421
478
  "$PYTHON_CMD" -c "
422
479
  import sys
423
480
  sys.path.insert(0, '$SCRIPT_DIR/..')
package/lib/ai-cli.sh CHANGED
@@ -19,6 +19,11 @@ call_ai_model_configured() {
19
19
  ai_output=$(timeout 300 claude --dangerously-skip-permissions --model "$model_name" -p "$prompt" 2>&1)
20
20
  local ai_exit_code=$?
21
21
  ;;
22
+ gpt-5)
23
+ local ai_output
24
+ ai_output=$(timeout 300 codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
25
+ local ai_exit_code=$?
26
+ ;;
22
27
  o3)
23
28
  local ai_output
24
29
  ai_output=$(timeout 300 codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
@@ -100,7 +105,7 @@ clean_ai_output() {
100
105
  local model_name="$2"
101
106
 
102
107
  # Handle codex-specific output format
103
- if [[ "$model_name" == "codex" || "$model_name" == "o3" ]]; then
108
+ if [[ "$model_name" == "codex" || "$model_name" == "o3" || "$model_name" == "gpt-5" ]]; then
104
109
  # Clean codex output - extract content between "codex" marker and "tokens used"
105
110
  if echo "$output" | grep -q "^\[.*\] codex$"; then
106
111
  # Extract content between "codex" line and "tokens used" line
@@ -191,7 +196,7 @@ call_ai_with_round_robin() {
191
196
  ai_output=$(call_ai_model_configured "$model" "$prompt")
192
197
  local ai_exit_code=$?
193
198
 
194
- # Just check exit code - no interpretation
199
+ # Just check exit code
195
200
  if [[ $ai_exit_code -eq 0 ]]; then
196
201
  # Clean output if needed
197
202
  ai_output=$(clean_ai_output "$ai_output" "$model")
package/lib/config.sh CHANGED
@@ -49,17 +49,13 @@ DEFAULT_AUTO_IDEATE=true
49
49
  # Default retry value
50
50
  DEFAULT_MAX_RETRIES=3
51
51
 
52
- # Default LLM CLI configuration (using eval for compatibility)
53
- declare -a DEFAULT_LLM_CLI_KEYS
54
- declare -a DEFAULT_LLM_CLI_VALUES
55
- DEFAULT_LLM_CLI_KEYS=(o3 codex gemini opus sonnet)
56
- DEFAULT_LLM_CLI_VALUES[0]='codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
57
- DEFAULT_LLM_CLI_VALUES[1]='codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
58
- DEFAULT_LLM_CLI_VALUES[2]='gemini -y -p "{{PROMPT}}"'
59
- DEFAULT_LLM_CLI_VALUES[3]='claude --dangerously-skip-permissions --model opus -p "{{PROMPT}}"'
60
- DEFAULT_LLM_CLI_VALUES[4]='claude --dangerously-skip-permissions --model sonnet -p "{{PROMPT}}"'
61
- DEFAULT_LLM_RUN="sonnet gemini"
62
- DEFAULT_LLM_IDEATE="opus o3"
52
+ # Default memory limit (in MB, 0 means no limit)
53
+ # Set to reasonable limit for ML workloads - about half of available system RAM
54
+ DEFAULT_MEMORY_LIMIT_MB=12288
55
+
56
+ # Default LLM CLI configuration - use simple variables instead of arrays
57
+ DEFAULT_LLM_RUN="sonnet gpt-5 sonnet gpt-5"
58
+ DEFAULT_LLM_IDEATE="gemini gpt-5 opus"
63
59
 
64
60
  # Load configuration from config file
65
61
  load_config() {
@@ -96,9 +92,13 @@ load_config() {
96
92
  # Set retry default
97
93
  MAX_RETRIES="$DEFAULT_MAX_RETRIES"
98
94
 
95
+ # Set memory limit default
96
+ MEMORY_LIMIT_MB="$DEFAULT_MEMORY_LIMIT_MB"
97
+
99
98
  # Set LLM CLI defaults (compatibility for older bash)
100
99
  # Initialize associative array for LLM commands
101
100
  # Use simpler approach for compatibility
101
+ LLM_CLI_gpt_5='codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
102
102
  LLM_CLI_o3='codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
103
103
  LLM_CLI_codex='codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
104
104
  LLM_CLI_gemini='gemini -y -p "{{PROMPT}}"'
@@ -202,12 +202,14 @@ load_config() {
202
202
  # Model definition - key is model name, value is command template
203
203
  # Remove single quotes from value if present
204
204
  value=$(echo "$value" | sed "s/^'//;s/'$//")
205
+ # Convert dashes to underscores for bash variable names
206
+ var_key=$(echo "$key" | sed 's/-/_/g')
205
207
  # Debug config loading
206
208
  if [[ "${DEBUG_CONFIG:-}" == "true" ]]; then
207
- echo "[CONFIG DEBUG] Setting LLM_CLI_${key} = '$value'" >&2
209
+ echo "[CONFIG DEBUG] Setting LLM_CLI_${var_key} = '$value'" >&2
208
210
  fi
209
211
  # Use dynamic variable name for compatibility
210
- eval "LLM_CLI_${key}=\"$value\""
212
+ eval "LLM_CLI_${var_key}=\"$value\""
211
213
  fi
212
214
  else
213
215
  # Handle top-level keys
@@ -221,6 +223,7 @@ load_config() {
221
223
  python_cmd) PYTHON_CMD="$value" ;;
222
224
  auto_ideate) AUTO_IDEATE="$value" ;;
223
225
  max_retries) MAX_RETRIES="$value" ;;
226
+ memory_limit_mb) MEMORY_LIMIT_MB="$value" ;;
224
227
  evolution_dir)
225
228
  echo "[WARN] evolution_dir in config is ignored - automatically inferred from config file location" >&2
226
229
  ;;
@@ -316,14 +319,17 @@ show_config() {
316
319
  echo " Lock timeout: $LOCK_TIMEOUT"
317
320
  echo " Auto ideate: $AUTO_IDEATE"
318
321
  echo " Max retries: $MAX_RETRIES"
322
+ echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
319
323
  echo " LLM configuration:"
320
324
  # Show LLM configurations using dynamic variable names
321
- for model in o3 codex gemini opus sonnet; do
325
+ for model in gpt_5 o3 codex gemini opus sonnet; do
322
326
  var_name="LLM_CLI_${model}"
323
327
  if [[ -n "${!var_name}" ]]; then
324
- echo " $model: ${!var_name}"
328
+ # Convert underscore back to dash for display
329
+ display_name=$(echo "$model" | sed 's/_/-/g')
330
+ echo " $display_name: ${!var_name}"
325
331
  fi
326
332
  done
327
333
  echo " LLM for run: $LLM_RUN"
328
334
  echo " LLM for ideate: $LLM_IDEATE"
329
- }
335
+ }