npm - claude-evolve - Versions diffs - 1.5.2 → 1.5.4 - Mend

claude-evolve 1.5.2 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +16 -1
package/bin/claude-evolve-analyze +42 -15
package/bin/claude-evolve-autostatus +2 -2
package/bin/claude-evolve-edit +182 -17
package/bin/claude-evolve-ideate +105 -32
package/bin/claude-evolve-run +71 -1
package/bin/claude-evolve-status +3 -1
package/bin/claude-evolve-worker +96 -39
package/lib/ai-cli.sh +7 -2
package/lib/config.sh +22 -16
package/lib/csv_fixer.py +35 -0
package/lib/memory_limit_wrapper.py +192 -0
package/package.json +1 -1
package/templates/config.yaml +10 -10

package/bin/claude-evolve-ideate CHANGED Viewed

@@ -84,39 +84,76 @@ call_ai_for_ideation() {
     original_csv_count=0
   fi
-  # Use centralized AI library
-  local ai_output
-  ai_output=$(call_ai_with_round_robin "$prompt" "ideate" "$gen_num")
-  local ai_exit_code=$?
+  echo "[DEBUG] Original CSV has $original_csv_count data rows" >&2
-  # Handle special exit codes
-  # No special handling for exit codes anymore
+  # Get models for ideation
+  local model_list
+  model_list=$(get_models_for_command "ideate")
+  local models=()
+  read -ra models <<< "$model_list"
-  if [[ $ai_exit_code -eq 0 ]]; then
-    # For ideation, we need to verify the CSV file was actually modified
+  if [[ ${#models[@]} -eq 0 ]]; then
+    echo "[ERROR] No models configured for ideation" >&2
+    return 1
+  fi
+  # Calculate starting index for round-robin
+  local num_models=${#models[@]}
+  local start_index=$((gen_num % num_models))
+  # Create ordered list based on round-robin
+  local ordered_models=()
+  for ((i=0; i<num_models; i++)); do
+    local idx=$(((start_index + i) % num_models))
+    ordered_models+=("${models[$idx]}")
+  done
+  echo "[AI] Model order for ideate (round-robin): ${ordered_models[*]}" >&2
+  # Try each model until CSV changes
+  for model in "${ordered_models[@]}"; do
+    echo "[AI] Attempting ideate with $model" >&2
+    # Call the model directly
+    local ai_output
+    ai_output=$(call_ai_model_configured "$model" "$prompt")
+    local ai_exit_code=$?
+    echo "[AI] $model completed with exit code $ai_exit_code" >&2
+    # Check if the file was modified - this is ALL that matters
     if [[ -f "$temp_csv_file" ]]; then
       local new_csv_count
       new_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l)
       if [[ $new_csv_count -gt $original_csv_count ]]; then
-        echo "[INFO] AI completed successfully and modified CSV ($new_csv_count vs $original_csv_count rows)" >&2
+        echo "[INFO] CSV was modified by $model ($new_csv_count vs $original_csv_count rows) - validating format..." >&2
+        # Post-process to ensure all description fields are quoted
+        local fixed_csv_file="${temp_csv_file}.fixed"
+        # Use the CSV fixer script
+        if "$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_fixer.py" "$temp_csv_file" "$fixed_csv_file"; then
+          mv "$fixed_csv_file" "$temp_csv_file"
+          echo "[INFO] CSV format validated and fixed if needed" >&2
+        else
+          echo "[WARN] CSV format validation failed, using original" >&2
+        fi
         return 0
       else
-        echo "[INFO] $model returned exit code 0 but didn't modify CSV file" >&2
-        echo "[DEBUG] Expected file: $temp_csv_file" >&2
+        echo "[INFO] CSV unchanged after $model (exit code: $ai_exit_code)" >&2
         echo "[DEBUG] Original count: $original_csv_count, New count: $new_csv_count" >&2
-        return 1
+        # Continue to next model
       fi
     else
-      echo "[INFO] Exit code 0 but temp CSV file not found: $temp_csv_file" >&2
-      echo "[DEBUG] Current directory: $(pwd)" >&2
-      echo "[DEBUG] Files matching temp-csv-*.csv:" >&2
-      ls -la temp-csv-*.csv 2>&1 >&2
-      return 1
+      echo "[INFO] Temp CSV file not found after $model: $temp_csv_file" >&2
+      # Continue to next model
     fi
-  fi
+  done
-  echo "[INFO] No AI model successfully modified the CSV file" >&2
+  # All models tried, none changed the file
+  echo "[ERROR] All AI models failed to generate ideas" >&2
   return 1
 }
@@ -461,8 +498,14 @@ Instructions:
 1. Add exactly $count new rows to the CSV
 2. Use the next available generation numbers (gen$CURRENT_GENERATION-XXX format)
 3. For each idea, create a row with: id,parent_id,description,,pending
-4. For novel ideas: leave parent_id empty
-5. For other idea types: use appropriate parent IDs from these top performers:
+4. CRITICAL CSV FORMATTING RULES:
+   - ALWAYS wrap the description field in double quotes
+   - If the description contains quotes, escape them by doubling them (\" becomes \"\")
+   - Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds\",,pending
+   - BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds,,pending
+   - NEVER omit quotes - unquoted descriptions cause CSV corruption
+5. For novel ideas: leave parent_id empty
+6. For other idea types: use appropriate parent IDs from these top performers:
 $top_performers
 IMPORTANT: Output the complete modified CSV file. Do not add any explanation or other text - just output the CSV."
@@ -839,9 +882,15 @@ CRITICAL INSTRUCTIONS:
   prompt+="
 6. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
 7. For each idea, create a row with: id,,description,,pending (empty parent_id for novel ideas)
-8. Each description should be one clear sentence describing a novel algorithmic approach
-9. Focus on creative, ambitious ideas that haven't been tried yet
-10. Consider machine learning, new indicators, regime detection, risk management, etc.
+8. CRITICAL CSV FORMATTING RULES:
+   - ALWAYS wrap the description field in double quotes
+   - If the description contains quotes, escape them by doubling them (\" becomes \"\")
+   - Example: gen01-001,,\"Implement adaptive RSI thresholds based on volatility\",,pending
+   - BAD: gen01-001,,Implement adaptive RSI thresholds based on volatility,,pending
+   - NEVER omit quotes around descriptions - this causes CSV parsing errors
+9. Each description should be one clear sentence describing a novel algorithmic approach
+10. Focus on creative, ambitious ideas that haven't been tried yet
+11. Consider machine learning, new indicators, regime detection, risk management, etc.
 IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
 CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -939,8 +988,14 @@ CRITICAL INSTRUCTIONS:
 5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
 6. For each idea, create a row with: id,parent_id,description,,pending
 7. Each parent_id MUST be one of: $valid_parent_ids
-8. Each description should focus on adjusting specific parameters that exist in the parent's source code
-9. Include current and new parameter values (e.g., \"Lower rsi_entry from 21 to 18\")
+8. CRITICAL CSV FORMATTING RULES:
+   - ALWAYS wrap the description field in double quotes
+   - If the description contains quotes, escape them by doubling them (\" becomes \"\")
+   - Example: gen01-001,gen00-000,\"Lower rsi_entry from 21 to 18\",,pending
+   - BAD: gen01-001,gen00-000,Lower rsi_entry from 21 to 18,,pending
+   - NEVER omit quotes around descriptions - this causes CSV parsing errors
+9. Each description should focus on adjusting specific parameters that exist in the parent's source code
+10. Include current and new parameter values - for example: \"Lower rsi_entry from 21 to 18\"
 IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
 CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -1020,8 +1075,14 @@ CRITICAL INSTRUCTIONS:
 5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
 6. For each idea, create a row with: id,parent_id,description,,pending
 7. Each parent_id MUST be one of: $valid_parent_ids
-8. Each description should focus on architectural/structural changes based on the parent's actual code
-9. Reference actual components/methods found in the source code
+8. CRITICAL CSV FORMATTING RULES:
+   - ALWAYS wrap the description field in double quotes
+   - If the description contains quotes, escape them by doubling them (\" becomes \"\")
+   - Example: gen01-001,gen00-000,\"Add ML-based regime detection using LSTM\",,pending
+   - BAD: gen01-001,gen00-000,Add ML-based regime detection using LSTM,,pending
+   - NEVER omit quotes around descriptions - this causes CSV parsing errors
+9. Each description should focus on architectural/structural changes based on the parent's actual code
+10. Reference actual components/methods found in the source code
 IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
 CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -1101,8 +1162,14 @@ CRITICAL INSTRUCTIONS:
 5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
 6. For each idea, create a row with: id,parent_id,description,,pending
 7. Each parent_id MUST be one of: $valid_parent_ids (choose the primary parent)
-8. Each description should combine actual elements from 2+ algorithms based on their source code
-9. Reference specific components/features found in the actual source code
+8. CRITICAL CSV FORMATTING RULES:
+   - ALWAYS wrap the description field in double quotes
+   - If the description contains quotes, escape them by doubling them (\" becomes \"\")
+   - Example: gen01-001,gen00-000,\"Combine gen01-123's RSI logic with gen01-456's volatility scaling\",,pending
+   - BAD: gen01-001,gen00-000,Combine gen01-123's RSI logic with gen01-456's volatility scaling,,pending
+   - NEVER omit quotes around descriptions - this causes CSV parsing errors
+9. Each description should combine actual elements from 2+ algorithms based on their source code
+10. Reference specific components/features found in the actual source code
 IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
 CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
@@ -1195,8 +1262,14 @@ CRITICAL INSTRUCTIONS:
 4. If no gen$CURRENT_GENERATION entries exist yet, start with gen$CURRENT_GENERATION-001
 5. Use the Edit or MultiEdit tool to APPEND exactly $TOTAL_IDEAS new rows AT THE END of the CSV file
 6. For each idea, create a row with: id,parent_id,description,,pending
-7. Mix both parameter tuning and structural changes
-8. If building on existing algorithms, use their ID as parent_id, otherwise leave parent_id empty
+7. CRITICAL CSV FORMATTING RULES:
+   - ALWAYS wrap the description field in double quotes
+   - If the description contains quotes, escape them by doubling them (\" becomes \"\")
+   - Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds based on volatility\",,pending
+   - BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds based on volatility,,pending
+   - NEVER omit quotes around descriptions - this causes CSV parsing errors that corrupt the data
+8. Mix both parameter tuning and structural changes
+9. If building on existing algorithms, use their ID as parent_id, otherwise leave parent_id empty
 ⚠️ AVOID ONLY: Kelly floor/cap adjustments that assume leverage > 1.0 (these get clamped and have no effect)

package/bin/claude-evolve-run CHANGED Viewed

@@ -478,6 +478,66 @@ ensure_baseline_entry
 # Flag to track API limit status
 api_limit_reached=false
+# Check if previous generation has at least one completed item
+check_previous_generation_has_completed() {
+  local csv_path="$1"
+  if [[ ! -f "$csv_path" ]]; then
+    # No CSV file yet - allow first ideation
+    return 0
+  fi
+  # Use Python to find the maximum generation and check if it has completed items
+  "$PYTHON_CMD" -c "
+import csv
+import sys
+max_gen = 0
+gen_completed = {}
+try:
+    with open('$csv_path', 'r') as f:
+        reader = csv.reader(f)
+        next(reader, None)  # Skip header
+        for row in reader:
+            if row and len(row) >= 5:
+                id_field = row[0].strip()
+                status = row[4].strip() if len(row) > 4 else 'pending'
+                if id_field.startswith('gen') and '-' in id_field:
+                    try:
+                        gen_part = id_field.split('-')[0]  # e.g., 'gen01'
+                        gen_num = int(gen_part[3:])  # Extract number after 'gen'
+                        max_gen = max(max_gen, gen_num)
+                        if gen_num not in gen_completed:
+                            gen_completed[gen_num] = 0
+                        if status == 'complete':
+                            gen_completed[gen_num] += 1
+                    except (ValueError, IndexError):
+                        pass
+    # If max_gen is 0, no generations exist yet - allow ideation
+    if max_gen == 0:
+        sys.exit(0)
+    # Check if the most recent generation has at least one completed item
+    if gen_completed.get(max_gen, 0) > 0:
+        print(f'[INFO] Generation {max_gen:02d} has {gen_completed[max_gen]} completed items - allowing ideation', file=sys.stderr)
+        sys.exit(0)
+    else:
+        print(f'[INFO] Generation {max_gen:02d} has no completed items - blocking ideation to prevent endless loops', file=sys.stderr)
+        sys.exit(1)
+except Exception as e:
+    print(f'[ERROR] Failed to check previous generation: {e}', file=sys.stderr)
+    # On error, allow ideation to maintain backward compatibility
+    sys.exit(0)
+"
+  return $?
+}
 # Main dispatch loop
 while true; do
   # Clean up finished workers
@@ -506,7 +566,17 @@ while true; do
     # Check if auto ideation is enabled
     if [[ "$AUTO_IDEATE" == "true" || "$AUTO_IDEATE" == "1" ]]; then
-      echo "[DISPATCHER] Auto ideation is enabled. Generating new ideas..."
+      echo "[DISPATCHER] Auto ideation is enabled. Checking prerequisites..."
+      # Check if previous generation has at least one completed item
+      if ! check_previous_generation_has_completed "$FULL_CSV_PATH"; then
+        echo "[DISPATCHER] Evolution complete - previous generation has no completed items."
+        echo "[DISPATCHER] This prevents endless ideation loops when API limits are hit."
+        echo "[DISPATCHER] Wait for current generation to complete, then run 'claude-evolve ideate' manually."
+        break
+      fi
+      echo "[DISPATCHER] Prerequisites met. Generating new ideas..."
       # Check if claude-evolve-ideate exists
       ideate_script="$SCRIPT_DIR/claude-evolve-ideate"

package/bin/claude-evolve-status CHANGED Viewed

@@ -244,7 +244,9 @@ try:
     # Show per-generation breakdown (unless brief mode)
     if not show_brief and stats_by_gen:
         print('📈 BY GENERATION:')
-        for gen in sorted(stats_by_gen.keys()):
+        # Sort generations numerically by extracting the number after 'gen'
+        sorted_gens = sorted(stats_by_gen.keys(), key=lambda g: int(g[3:]) if g.startswith('gen') and g[3:].isdigit() else 0)
+        for gen in sorted_gens:
             data = stats_by_gen[gen]
             total = sum(data.values())

package/bin/claude-evolve-worker CHANGED Viewed

@@ -14,34 +14,11 @@ TERMINATION_SIGNAL=""
 # Cleanup function to handle termination
 cleanup_on_exit() {
   if [[ -n "$CURRENT_CANDIDATE_ID" ]]; then
-    # Only mark as failed if it was a timeout (SIGTERM from timeout command)
-    # For user interruption (Ctrl-C) or kill, leave it for retry
-    if [[ "$TERMINATION_SIGNAL" == "TERM" ]]; then
-      echo "[WORKER-$$] Timeout detected, marking $CURRENT_CANDIDATE_ID as failed" >&2
-      "$PYTHON_CMD" -c "
-import sys
-sys.path.insert(0, '$SCRIPT_DIR/..')
-from lib.evolution_csv import EvolutionCSV
-try:
-    with EvolutionCSV('$FULL_CSV_PATH') as csv:
-        csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'failed')
-except:
-    pass  # Best effort cleanup
-" 2>/dev/null || true
-    else
-      echo "[WORKER-$$] Interrupted, leaving $CURRENT_CANDIDATE_ID for retry" >&2
-      # Optionally reset to pending instead of leaving as running
-      "$PYTHON_CMD" -c "
-import sys
-sys.path.insert(0, '$SCRIPT_DIR/..')
-from lib.evolution_csv import EvolutionCSV
-try:
-    with EvolutionCSV('$FULL_CSV_PATH') as csv:
-        csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'pending')
-except:
-    pass  # Best effort cleanup
-" 2>/dev/null || true
-    fi
+    echo "[WORKER-$$] Worker terminated while processing $CURRENT_CANDIDATE_ID" >&2
+    # If we're interrupted while processing, leave it as "running"
+    # This prevents other workers from picking it up in the same session
+    # A human can manually reset to pending if needed
+    echo "[WORKER-$$] Leaving $CURRENT_CANDIDATE_ID in current state" >&2
   fi
 }
@@ -135,6 +112,12 @@ process_candidate() {
   echo "[WORKER-$$] Description: $description"
   echo "[WORKER-$$] Based on ID: $parent_id"
+  # Treat "baseline-000" parent ID as empty/baseline
+  if [[ "$parent_id" == "baseline-000" ]]; then
+    parent_id=""
+    echo "[WORKER-$$] Parent ID 'baseline-000' treated as baseline (empty parent)"
+  fi
   # Determine source algorithm
   local source_file
   if [[ -z "$parent_id" ]]; then
@@ -213,16 +196,26 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
       # Try AI models with round-robin based on candidate ID
       if ! call_ai_for_evolution "$evolution_prompt" "$candidate_id"; then
-        echo "[WORKER-$$] ERROR: All AI models failed to generate code" >&2
+        echo "[WORKER-$$] ERROR: All AI models failed to generate code - leaving as pending for retry" >&2
         cd "$original_pwd"
         rm -f "$target_file"  # Clean up on failure
-        return 1
+        # Return with special code to indicate AI failure (should remain pending)
+        return 77
       fi
       # Restore working directory
       cd "$original_pwd"
       echo "[WORKER-$$] Evolution applied successfully"
+      # Check if the generated Python file has syntax errors
+      echo "[WORKER-$$] Checking Python syntax..." >&2
+      if ! "$PYTHON_CMD" -m py_compile "$target_file" 2>&1; then
+        echo "[WORKER-$$] ERROR: Generated Python file has syntax errors!" >&2
+        echo "[WORKER-$$] File: $target_file" >&2
+        # This is still an evaluation failure, not an AI failure
+        return 1
+      fi
     fi
   fi
@@ -239,11 +232,22 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
     eval_arg=""
   fi
   local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$eval_arg")
+  # Add memory limiting if configured
+  if [[ -n "$MEMORY_LIMIT_MB" ]] && [[ "$MEMORY_LIMIT_MB" -gt 0 ]]; then
+    eval_cmd=("$PYTHON_CMD" "$SCRIPT_DIR/../lib/memory_limit_wrapper.py" "$MEMORY_LIMIT_MB" "${eval_cmd[@]}")
+  fi
+  # Add timeout if configured
   [[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
   # Run evaluation with tee to both display and capture output
   # Use stdbuf to disable buffering for real-time output
-  if stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2; then
+  # IMPORTANT: Use PIPESTATUS to get the exit code of the evaluation command, not tee
+  stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2
+  local eval_exit_code=${PIPESTATUS[0]}  # Get exit code of first command in pipe
+  if [[ $eval_exit_code -eq 0 ]]; then
     local eval_end=$(date +%s)
     local eval_duration=$((eval_end - eval_start))
@@ -353,14 +357,19 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
       echo "[WORKER-$$] Output: $eval_output" >&2
       # rm -f "$eval_output_file"  # Keep for debugging
       echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
+      # Clear CURRENT_CANDIDATE_ID before returning
+      CURRENT_CANDIDATE_ID=""
       return 1
     fi
     # Clean up temp file (comment out to keep for debugging)
     # rm -f "$eval_output_file"
     echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
+    # Clear CURRENT_CANDIDATE_ID on successful completion
+    CURRENT_CANDIDATE_ID=""
   else
-    local exit_code=$?
+    local exit_code=$eval_exit_code
     # Read any output that was captured before failure
     eval_output=$(<"$eval_output_file")
     # rm -f "$eval_output_file"  # Keep for debugging
@@ -370,22 +379,56 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
     echo "[WORKER-$$] Output: $eval_output" >&2
     # Mark as failed in CSV
-    "$PYTHON_CMD" -c "
+    echo "[WORKER-$$] Marking $candidate_id as failed in CSV" >&2
+    if ! "$PYTHON_CMD" -c "
 import sys
 sys.path.insert(0, '$SCRIPT_DIR/..')
 from lib.evolution_csv import EvolutionCSV
-with EvolutionCSV('$FULL_CSV_PATH') as csv:
-    csv.update_candidate_status('$candidate_id', 'failed')
-"
+try:
+    with EvolutionCSV('$FULL_CSV_PATH') as csv:
+        success = csv.update_candidate_status('$candidate_id', 'failed')
+        if not success:
+            print(f'ERROR: Failed to update status for {candidate_id}', file=sys.stderr)
+            sys.exit(1)
+except Exception as e:
+    print(f'ERROR: Exception updating status: {e}', file=sys.stderr)
+    sys.exit(1)
+" 2>&1; then
+      echo "[WORKER-$$] ERROR: Failed to update CSV status to failed" >&2
+    else
+      echo "[WORKER-$$] Successfully marked $candidate_id as failed" >&2
+    fi
+    # Clear CURRENT_CANDIDATE_ID before returning to prevent cleanup handler from resetting it
+    CURRENT_CANDIDATE_ID=""
     return $exit_code
   fi
 }
+# Don't reset running candidates on startup - they might be legitimately being processed by another worker
 # Main worker loop
 echo "[WORKER-$$] Worker started"
 while true; do
+  # Debug: Show current status of all candidates
+  echo "[WORKER-$$] Current candidate statuses:" >&2
+  "$PYTHON_CMD" -c "
+import sys
+sys.path.insert(0, '$SCRIPT_DIR/..')
+from lib.evolution_csv import EvolutionCSV
+with EvolutionCSV('$FULL_CSV_PATH') as csv:
+    rows = csv._read_csv()
+    if rows:
+        start_idx = 1 if rows and rows[0] and rows[0][0].lower() == 'id' else 0
+        status_count = {}
+        for row in rows[start_idx:]:
+            if len(row) > 4:
+                status = row[4].strip() or 'pending'
+                status_count[status] = status_count.get(status, 0) + 1
+        print(f'Status counts: {status_count}', file=sys.stderr)
+" 2>&1 || true
   # Try to claim a pending candidate
   candidate_info=$("$PYTHON_CMD" -c "
 import sys
@@ -412,12 +455,26 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
   # Set current candidate for cleanup
   CURRENT_CANDIDATE_ID="$candidate_id"
-  # Process the candidate
-  if process_candidate "$candidate_id" "$parent_id" "$description"; then
+  # Process the candidate and capture exit code
+  process_candidate "$candidate_id" "$parent_id" "$description"
+  process_exit_code=$?
+  if [[ $process_exit_code -eq 0 ]]; then
     echo "[WORKER-$$] Successfully processed $candidate_id"
+  elif [[ $process_exit_code -eq 77 ]]; then
+    # Special exit code 77 means AI failed to generate code
+    echo "[WORKER-$$] AI generation failed for $candidate_id - marking as failed-ai-retry"
+    # Mark with special status that indicates AI generation failed (not evaluation)
+    "$PYTHON_CMD" -c "
+import sys
+sys.path.insert(0, '$SCRIPT_DIR/..')
+from lib.evolution_csv import EvolutionCSV
+with EvolutionCSV('$FULL_CSV_PATH') as csv:
+    csv.update_candidate_status('$candidate_id', 'failed-ai-retry')
+" 2>/dev/null || true
   else
     echo "[WORKER-$$] Failed to process $candidate_id"
-    # Ensure status is set to failed (might already be done in process_candidate)
+    # Other failures (evaluation errors, etc) mark as failed
     "$PYTHON_CMD" -c "
 import sys
 sys.path.insert(0, '$SCRIPT_DIR/..')

package/lib/ai-cli.sh CHANGED Viewed

@@ -19,6 +19,11 @@ call_ai_model_configured() {
       ai_output=$(timeout 300 claude --dangerously-skip-permissions --model "$model_name" -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
+    gpt-5)
+      local ai_output
+      ai_output=$(timeout 300 codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
     o3)
       local ai_output
       ai_output=$(timeout 300 codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
@@ -100,7 +105,7 @@ clean_ai_output() {
   local model_name="$2"
   # Handle codex-specific output format
-  if [[ "$model_name" == "codex" || "$model_name" == "o3" ]]; then
+  if [[ "$model_name" == "codex" || "$model_name" == "o3" || "$model_name" == "gpt-5" ]]; then
     # Clean codex output - extract content between "codex" marker and "tokens used"
     if echo "$output" | grep -q "^\[.*\] codex$"; then
       # Extract content between "codex" line and "tokens used" line
@@ -191,7 +196,7 @@ call_ai_with_round_robin() {
     ai_output=$(call_ai_model_configured "$model" "$prompt")
     local ai_exit_code=$?
-    # Just check exit code - no interpretation
+    # Just check exit code
     if [[ $ai_exit_code -eq 0 ]]; then
       # Clean output if needed
       ai_output=$(clean_ai_output "$ai_output" "$model")

package/lib/config.sh CHANGED Viewed

@@ -49,17 +49,13 @@ DEFAULT_AUTO_IDEATE=true
 # Default retry value
 DEFAULT_MAX_RETRIES=3
-# Default LLM CLI configuration (using eval for compatibility)
-declare -a DEFAULT_LLM_CLI_KEYS
-declare -a DEFAULT_LLM_CLI_VALUES
-DEFAULT_LLM_CLI_KEYS=(o3 codex gemini opus sonnet)
-DEFAULT_LLM_CLI_VALUES[0]='codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
-DEFAULT_LLM_CLI_VALUES[1]='codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
-DEFAULT_LLM_CLI_VALUES[2]='gemini -y -p "{{PROMPT}}"'
-DEFAULT_LLM_CLI_VALUES[3]='claude --dangerously-skip-permissions --model opus -p "{{PROMPT}}"'
-DEFAULT_LLM_CLI_VALUES[4]='claude --dangerously-skip-permissions --model sonnet -p "{{PROMPT}}"'
-DEFAULT_LLM_RUN="sonnet gemini"
-DEFAULT_LLM_IDEATE="opus o3"
+# Default memory limit (in MB, 0 means no limit)
+# Set to reasonable limit for ML workloads - about half of available system RAM
+DEFAULT_MEMORY_LIMIT_MB=12288
+# Default LLM CLI configuration - use simple variables instead of arrays
+DEFAULT_LLM_RUN="sonnet gpt-5 sonnet gpt-5"
+DEFAULT_LLM_IDEATE="gemini gpt-5 opus"
 # Load configuration from config file
 load_config() {
@@ -96,9 +92,13 @@ load_config() {
   # Set retry default
   MAX_RETRIES="$DEFAULT_MAX_RETRIES"
+  # Set memory limit default
+  MEMORY_LIMIT_MB="$DEFAULT_MEMORY_LIMIT_MB"
   # Set LLM CLI defaults (compatibility for older bash)
   # Initialize associative array for LLM commands
   # Use simpler approach for compatibility
+  LLM_CLI_gpt_5='codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
   LLM_CLI_o3='codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
   LLM_CLI_codex='codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
   LLM_CLI_gemini='gemini -y -p "{{PROMPT}}"'
@@ -202,12 +202,14 @@ load_config() {
           # Model definition - key is model name, value is command template
           # Remove single quotes from value if present
           value=$(echo "$value" | sed "s/^'//;s/'$//")
+          # Convert dashes to underscores for bash variable names
+          var_key=$(echo "$key" | sed 's/-/_/g')
           # Debug config loading
           if [[ "${DEBUG_CONFIG:-}" == "true" ]]; then
-            echo "[CONFIG DEBUG] Setting LLM_CLI_${key} = '$value'" >&2
+            echo "[CONFIG DEBUG] Setting LLM_CLI_${var_key} = '$value'" >&2
           fi
           # Use dynamic variable name for compatibility
-          eval "LLM_CLI_${key}=\"$value\""
+          eval "LLM_CLI_${var_key}=\"$value\""
         fi
       else
         # Handle top-level keys
@@ -221,6 +223,7 @@ load_config() {
           python_cmd) PYTHON_CMD="$value" ;;
           auto_ideate) AUTO_IDEATE="$value" ;;
           max_retries) MAX_RETRIES="$value" ;;
+          memory_limit_mb) MEMORY_LIMIT_MB="$value" ;;
           evolution_dir)
             echo "[WARN] evolution_dir in config is ignored - automatically inferred from config file location" >&2
             ;;
@@ -316,14 +319,17 @@ show_config() {
   echo "  Lock timeout: $LOCK_TIMEOUT"
   echo "  Auto ideate: $AUTO_IDEATE"
   echo "  Max retries: $MAX_RETRIES"
+  echo "  Memory limit: ${MEMORY_LIMIT_MB}MB"
   echo "  LLM configuration:"
   # Show LLM configurations using dynamic variable names
-  for model in o3 codex gemini opus sonnet; do
+  for model in gpt_5 o3 codex gemini opus sonnet; do
     var_name="LLM_CLI_${model}"
     if [[ -n "${!var_name}" ]]; then
-      echo "    $model: ${!var_name}"
+      # Convert underscore back to dash for display
+      display_name=$(echo "$model" | sed 's/_/-/g')
+      echo "    $display_name: ${!var_name}"
     fi
   done
   echo "  LLM for run: $LLM_RUN"
   echo "  LLM for ideate: $LLM_IDEATE"
-}
+}