claude-evolve 1.5.2 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/bin/claude-evolve-analyze +42 -15
- package/bin/claude-evolve-autostatus +2 -2
- package/bin/claude-evolve-edit +182 -17
- package/bin/claude-evolve-ideate +105 -32
- package/bin/claude-evolve-run +71 -1
- package/bin/claude-evolve-status +3 -1
- package/bin/claude-evolve-worker +96 -39
- package/lib/ai-cli.sh +7 -2
- package/lib/config.sh +22 -16
- package/lib/csv_fixer.py +35 -0
- package/lib/memory_limit_wrapper.py +192 -0
- package/package.json +1 -1
- package/templates/config.yaml +10 -10
package/bin/claude-evolve-ideate
CHANGED
|
@@ -84,39 +84,76 @@ call_ai_for_ideation() {
|
|
|
84
84
|
original_csv_count=0
|
|
85
85
|
fi
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
local ai_output
|
|
89
|
-
ai_output=$(call_ai_with_round_robin "$prompt" "ideate" "$gen_num")
|
|
90
|
-
local ai_exit_code=$?
|
|
87
|
+
echo "[DEBUG] Original CSV has $original_csv_count data rows" >&2
|
|
91
88
|
|
|
92
|
-
#
|
|
93
|
-
|
|
89
|
+
# Get models for ideation
|
|
90
|
+
local model_list
|
|
91
|
+
model_list=$(get_models_for_command "ideate")
|
|
92
|
+
local models=()
|
|
93
|
+
read -ra models <<< "$model_list"
|
|
94
94
|
|
|
95
|
-
if [[ $
|
|
96
|
-
|
|
95
|
+
if [[ ${#models[@]} -eq 0 ]]; then
|
|
96
|
+
echo "[ERROR] No models configured for ideation" >&2
|
|
97
|
+
return 1
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# Calculate starting index for round-robin
|
|
101
|
+
local num_models=${#models[@]}
|
|
102
|
+
local start_index=$((gen_num % num_models))
|
|
103
|
+
|
|
104
|
+
# Create ordered list based on round-robin
|
|
105
|
+
local ordered_models=()
|
|
106
|
+
for ((i=0; i<num_models; i++)); do
|
|
107
|
+
local idx=$(((start_index + i) % num_models))
|
|
108
|
+
ordered_models+=("${models[$idx]}")
|
|
109
|
+
done
|
|
110
|
+
|
|
111
|
+
echo "[AI] Model order for ideate (round-robin): ${ordered_models[*]}" >&2
|
|
112
|
+
|
|
113
|
+
# Try each model until CSV changes
|
|
114
|
+
for model in "${ordered_models[@]}"; do
|
|
115
|
+
echo "[AI] Attempting ideate with $model" >&2
|
|
116
|
+
|
|
117
|
+
# Call the model directly
|
|
118
|
+
local ai_output
|
|
119
|
+
ai_output=$(call_ai_model_configured "$model" "$prompt")
|
|
120
|
+
local ai_exit_code=$?
|
|
121
|
+
|
|
122
|
+
echo "[AI] $model completed with exit code $ai_exit_code" >&2
|
|
123
|
+
|
|
124
|
+
# Check if the file was modified - this is ALL that matters
|
|
97
125
|
if [[ -f "$temp_csv_file" ]]; then
|
|
98
126
|
local new_csv_count
|
|
99
127
|
new_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l)
|
|
100
128
|
|
|
101
129
|
if [[ $new_csv_count -gt $original_csv_count ]]; then
|
|
102
|
-
echo "[INFO]
|
|
130
|
+
echo "[INFO] CSV was modified by $model ($new_csv_count vs $original_csv_count rows) - validating format..." >&2
|
|
131
|
+
|
|
132
|
+
# Post-process to ensure all description fields are quoted
|
|
133
|
+
local fixed_csv_file="${temp_csv_file}.fixed"
|
|
134
|
+
|
|
135
|
+
# Use the CSV fixer script
|
|
136
|
+
if "$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_fixer.py" "$temp_csv_file" "$fixed_csv_file"; then
|
|
137
|
+
mv "$fixed_csv_file" "$temp_csv_file"
|
|
138
|
+
echo "[INFO] CSV format validated and fixed if needed" >&2
|
|
139
|
+
else
|
|
140
|
+
echo "[WARN] CSV format validation failed, using original" >&2
|
|
141
|
+
fi
|
|
142
|
+
|
|
103
143
|
return 0
|
|
104
144
|
else
|
|
105
|
-
echo "[INFO] $model
|
|
106
|
-
echo "[DEBUG] Expected file: $temp_csv_file" >&2
|
|
145
|
+
echo "[INFO] CSV unchanged after $model (exit code: $ai_exit_code)" >&2
|
|
107
146
|
echo "[DEBUG] Original count: $original_csv_count, New count: $new_csv_count" >&2
|
|
108
|
-
|
|
147
|
+
# Continue to next model
|
|
109
148
|
fi
|
|
110
149
|
else
|
|
111
|
-
echo "[INFO]
|
|
112
|
-
|
|
113
|
-
echo "[DEBUG] Files matching temp-csv-*.csv:" >&2
|
|
114
|
-
ls -la temp-csv-*.csv 2>&1 >&2
|
|
115
|
-
return 1
|
|
150
|
+
echo "[INFO] Temp CSV file not found after $model: $temp_csv_file" >&2
|
|
151
|
+
# Continue to next model
|
|
116
152
|
fi
|
|
117
|
-
|
|
153
|
+
done
|
|
118
154
|
|
|
119
|
-
|
|
155
|
+
# All models tried, none changed the file
|
|
156
|
+
echo "[ERROR] All AI models failed to generate ideas" >&2
|
|
120
157
|
return 1
|
|
121
158
|
}
|
|
122
159
|
|
|
@@ -461,8 +498,14 @@ Instructions:
|
|
|
461
498
|
1. Add exactly $count new rows to the CSV
|
|
462
499
|
2. Use the next available generation numbers (gen$CURRENT_GENERATION-XXX format)
|
|
463
500
|
3. For each idea, create a row with: id,parent_id,description,,pending
|
|
464
|
-
4.
|
|
465
|
-
|
|
501
|
+
4. CRITICAL CSV FORMATTING RULES:
|
|
502
|
+
- ALWAYS wrap the description field in double quotes
|
|
503
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
504
|
+
- Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds\",,pending
|
|
505
|
+
- BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds,,pending
|
|
506
|
+
- NEVER omit quotes - unquoted descriptions cause CSV corruption
|
|
507
|
+
5. For novel ideas: leave parent_id empty
|
|
508
|
+
6. For other idea types: use appropriate parent IDs from these top performers:
|
|
466
509
|
$top_performers
|
|
467
510
|
|
|
468
511
|
IMPORTANT: Output the complete modified CSV file. Do not add any explanation or other text - just output the CSV."
|
|
@@ -839,9 +882,15 @@ CRITICAL INSTRUCTIONS:
|
|
|
839
882
|
prompt+="
|
|
840
883
|
6. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
|
|
841
884
|
7. For each idea, create a row with: id,,description,,pending (empty parent_id for novel ideas)
|
|
842
|
-
8.
|
|
843
|
-
|
|
844
|
-
|
|
885
|
+
8. CRITICAL CSV FORMATTING RULES:
|
|
886
|
+
- ALWAYS wrap the description field in double quotes
|
|
887
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
888
|
+
- Example: gen01-001,,\"Implement adaptive RSI thresholds based on volatility\",,pending
|
|
889
|
+
- BAD: gen01-001,,Implement adaptive RSI thresholds based on volatility,,pending
|
|
890
|
+
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
891
|
+
9. Each description should be one clear sentence describing a novel algorithmic approach
|
|
892
|
+
10. Focus on creative, ambitious ideas that haven't been tried yet
|
|
893
|
+
11. Consider machine learning, new indicators, regime detection, risk management, etc.
|
|
845
894
|
|
|
846
895
|
IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
|
|
847
896
|
CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
|
|
@@ -939,8 +988,14 @@ CRITICAL INSTRUCTIONS:
|
|
|
939
988
|
5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
|
|
940
989
|
6. For each idea, create a row with: id,parent_id,description,,pending
|
|
941
990
|
7. Each parent_id MUST be one of: $valid_parent_ids
|
|
942
|
-
8.
|
|
943
|
-
|
|
991
|
+
8. CRITICAL CSV FORMATTING RULES:
|
|
992
|
+
- ALWAYS wrap the description field in double quotes
|
|
993
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
994
|
+
- Example: gen01-001,gen00-000,\"Lower rsi_entry from 21 to 18\",,pending
|
|
995
|
+
- BAD: gen01-001,gen00-000,Lower rsi_entry from 21 to 18,,pending
|
|
996
|
+
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
997
|
+
9. Each description should focus on adjusting specific parameters that exist in the parent's source code
|
|
998
|
+
10. Include current and new parameter values - for example: \"Lower rsi_entry from 21 to 18\"
|
|
944
999
|
|
|
945
1000
|
IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
|
|
946
1001
|
CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
|
|
@@ -1020,8 +1075,14 @@ CRITICAL INSTRUCTIONS:
|
|
|
1020
1075
|
5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
|
|
1021
1076
|
6. For each idea, create a row with: id,parent_id,description,,pending
|
|
1022
1077
|
7. Each parent_id MUST be one of: $valid_parent_ids
|
|
1023
|
-
8.
|
|
1024
|
-
|
|
1078
|
+
8. CRITICAL CSV FORMATTING RULES:
|
|
1079
|
+
- ALWAYS wrap the description field in double quotes
|
|
1080
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
1081
|
+
- Example: gen01-001,gen00-000,\"Add ML-based regime detection using LSTM\",,pending
|
|
1082
|
+
- BAD: gen01-001,gen00-000,Add ML-based regime detection using LSTM,,pending
|
|
1083
|
+
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
1084
|
+
9. Each description should focus on architectural/structural changes based on the parent's actual code
|
|
1085
|
+
10. Reference actual components/methods found in the source code
|
|
1025
1086
|
|
|
1026
1087
|
IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
|
|
1027
1088
|
CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
|
|
@@ -1101,8 +1162,14 @@ CRITICAL INSTRUCTIONS:
|
|
|
1101
1162
|
5. Use the Edit or MultiEdit tool to APPEND exactly $count new rows AT THE END of the CSV file
|
|
1102
1163
|
6. For each idea, create a row with: id,parent_id,description,,pending
|
|
1103
1164
|
7. Each parent_id MUST be one of: $valid_parent_ids (choose the primary parent)
|
|
1104
|
-
8.
|
|
1105
|
-
|
|
1165
|
+
8. CRITICAL CSV FORMATTING RULES:
|
|
1166
|
+
- ALWAYS wrap the description field in double quotes
|
|
1167
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
1168
|
+
- Example: gen01-001,gen00-000,\"Combine gen01-123's RSI logic with gen01-456's volatility scaling\",,pending
|
|
1169
|
+
- BAD: gen01-001,gen00-000,Combine gen01-123's RSI logic with gen01-456's volatility scaling,,pending
|
|
1170
|
+
- NEVER omit quotes around descriptions - this causes CSV parsing errors
|
|
1171
|
+
9. Each description should combine actual elements from 2+ algorithms based on their source code
|
|
1172
|
+
10. Reference specific components/features found in the actual source code
|
|
1106
1173
|
|
|
1107
1174
|
IMPORTANT: You must APPEND new rows to the existing CSV file. DO NOT replace the file contents. All existing rows must remain unchanged.
|
|
1108
1175
|
CRITICAL: You must use your file editing tools (Edit/MultiEdit) to modify the CSV file. DO NOT return CSV text - use your tools to edit the file directly.
|
|
@@ -1195,8 +1262,14 @@ CRITICAL INSTRUCTIONS:
|
|
|
1195
1262
|
4. If no gen$CURRENT_GENERATION entries exist yet, start with gen$CURRENT_GENERATION-001
|
|
1196
1263
|
5. Use the Edit or MultiEdit tool to APPEND exactly $TOTAL_IDEAS new rows AT THE END of the CSV file
|
|
1197
1264
|
6. For each idea, create a row with: id,parent_id,description,,pending
|
|
1198
|
-
7.
|
|
1199
|
-
|
|
1265
|
+
7. CRITICAL CSV FORMATTING RULES:
|
|
1266
|
+
- ALWAYS wrap the description field in double quotes
|
|
1267
|
+
- If the description contains quotes, escape them by doubling them (\" becomes \"\")
|
|
1268
|
+
- Example: gen01-001,gen00-000,\"Implement adaptive RSI thresholds based on volatility\",,pending
|
|
1269
|
+
- BAD: gen01-001,gen00-000,Implement adaptive RSI thresholds based on volatility,,pending
|
|
1270
|
+
- NEVER omit quotes around descriptions - this causes CSV parsing errors that corrupt the data
|
|
1271
|
+
8. Mix both parameter tuning and structural changes
|
|
1272
|
+
9. If building on existing algorithms, use their ID as parent_id, otherwise leave parent_id empty
|
|
1200
1273
|
|
|
1201
1274
|
⚠️ AVOID ONLY: Kelly floor/cap adjustments that assume leverage > 1.0 (these get clamped and have no effect)
|
|
1202
1275
|
|
package/bin/claude-evolve-run
CHANGED
|
@@ -478,6 +478,66 @@ ensure_baseline_entry
|
|
|
478
478
|
# Flag to track API limit status
|
|
479
479
|
api_limit_reached=false
|
|
480
480
|
|
|
481
|
+
# Check if previous generation has at least one completed item
|
|
482
|
+
check_previous_generation_has_completed() {
|
|
483
|
+
local csv_path="$1"
|
|
484
|
+
|
|
485
|
+
if [[ ! -f "$csv_path" ]]; then
|
|
486
|
+
# No CSV file yet - allow first ideation
|
|
487
|
+
return 0
|
|
488
|
+
fi
|
|
489
|
+
|
|
490
|
+
# Use Python to find the maximum generation and check if it has completed items
|
|
491
|
+
"$PYTHON_CMD" -c "
|
|
492
|
+
import csv
|
|
493
|
+
import sys
|
|
494
|
+
|
|
495
|
+
max_gen = 0
|
|
496
|
+
gen_completed = {}
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
with open('$csv_path', 'r') as f:
|
|
500
|
+
reader = csv.reader(f)
|
|
501
|
+
next(reader, None) # Skip header
|
|
502
|
+
for row in reader:
|
|
503
|
+
if row and len(row) >= 5:
|
|
504
|
+
id_field = row[0].strip()
|
|
505
|
+
status = row[4].strip() if len(row) > 4 else 'pending'
|
|
506
|
+
|
|
507
|
+
if id_field.startswith('gen') and '-' in id_field:
|
|
508
|
+
try:
|
|
509
|
+
gen_part = id_field.split('-')[0] # e.g., 'gen01'
|
|
510
|
+
gen_num = int(gen_part[3:]) # Extract number after 'gen'
|
|
511
|
+
max_gen = max(max_gen, gen_num)
|
|
512
|
+
|
|
513
|
+
if gen_num not in gen_completed:
|
|
514
|
+
gen_completed[gen_num] = 0
|
|
515
|
+
|
|
516
|
+
if status == 'complete':
|
|
517
|
+
gen_completed[gen_num] += 1
|
|
518
|
+
except (ValueError, IndexError):
|
|
519
|
+
pass
|
|
520
|
+
|
|
521
|
+
# If max_gen is 0, no generations exist yet - allow ideation
|
|
522
|
+
if max_gen == 0:
|
|
523
|
+
sys.exit(0)
|
|
524
|
+
|
|
525
|
+
# Check if the most recent generation has at least one completed item
|
|
526
|
+
if gen_completed.get(max_gen, 0) > 0:
|
|
527
|
+
print(f'[INFO] Generation {max_gen:02d} has {gen_completed[max_gen]} completed items - allowing ideation', file=sys.stderr)
|
|
528
|
+
sys.exit(0)
|
|
529
|
+
else:
|
|
530
|
+
print(f'[INFO] Generation {max_gen:02d} has no completed items - blocking ideation to prevent endless loops', file=sys.stderr)
|
|
531
|
+
sys.exit(1)
|
|
532
|
+
|
|
533
|
+
except Exception as e:
|
|
534
|
+
print(f'[ERROR] Failed to check previous generation: {e}', file=sys.stderr)
|
|
535
|
+
# On error, allow ideation to maintain backward compatibility
|
|
536
|
+
sys.exit(0)
|
|
537
|
+
"
|
|
538
|
+
return $?
|
|
539
|
+
}
|
|
540
|
+
|
|
481
541
|
# Main dispatch loop
|
|
482
542
|
while true; do
|
|
483
543
|
# Clean up finished workers
|
|
@@ -506,7 +566,17 @@ while true; do
|
|
|
506
566
|
|
|
507
567
|
# Check if auto ideation is enabled
|
|
508
568
|
if [[ "$AUTO_IDEATE" == "true" || "$AUTO_IDEATE" == "1" ]]; then
|
|
509
|
-
echo "[DISPATCHER] Auto ideation is enabled.
|
|
569
|
+
echo "[DISPATCHER] Auto ideation is enabled. Checking prerequisites..."
|
|
570
|
+
|
|
571
|
+
# Check if previous generation has at least one completed item
|
|
572
|
+
if ! check_previous_generation_has_completed "$FULL_CSV_PATH"; then
|
|
573
|
+
echo "[DISPATCHER] Evolution complete - previous generation has no completed items."
|
|
574
|
+
echo "[DISPATCHER] This prevents endless ideation loops when API limits are hit."
|
|
575
|
+
echo "[DISPATCHER] Wait for current generation to complete, then run 'claude-evolve ideate' manually."
|
|
576
|
+
break
|
|
577
|
+
fi
|
|
578
|
+
|
|
579
|
+
echo "[DISPATCHER] Prerequisites met. Generating new ideas..."
|
|
510
580
|
|
|
511
581
|
# Check if claude-evolve-ideate exists
|
|
512
582
|
ideate_script="$SCRIPT_DIR/claude-evolve-ideate"
|
package/bin/claude-evolve-status
CHANGED
|
@@ -244,7 +244,9 @@ try:
|
|
|
244
244
|
# Show per-generation breakdown (unless brief mode)
|
|
245
245
|
if not show_brief and stats_by_gen:
|
|
246
246
|
print('📈 BY GENERATION:')
|
|
247
|
-
|
|
247
|
+
# Sort generations numerically by extracting the number after 'gen'
|
|
248
|
+
sorted_gens = sorted(stats_by_gen.keys(), key=lambda g: int(g[3:]) if g.startswith('gen') and g[3:].isdigit() else 0)
|
|
249
|
+
for gen in sorted_gens:
|
|
248
250
|
data = stats_by_gen[gen]
|
|
249
251
|
total = sum(data.values())
|
|
250
252
|
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -14,34 +14,11 @@ TERMINATION_SIGNAL=""
|
|
|
14
14
|
# Cleanup function to handle termination
|
|
15
15
|
cleanup_on_exit() {
|
|
16
16
|
if [[ -n "$CURRENT_CANDIDATE_ID" ]]; then
|
|
17
|
-
|
|
18
|
-
#
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
import sys
|
|
23
|
-
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
24
|
-
from lib.evolution_csv import EvolutionCSV
|
|
25
|
-
try:
|
|
26
|
-
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
27
|
-
csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'failed')
|
|
28
|
-
except:
|
|
29
|
-
pass # Best effort cleanup
|
|
30
|
-
" 2>/dev/null || true
|
|
31
|
-
else
|
|
32
|
-
echo "[WORKER-$$] Interrupted, leaving $CURRENT_CANDIDATE_ID for retry" >&2
|
|
33
|
-
# Optionally reset to pending instead of leaving as running
|
|
34
|
-
"$PYTHON_CMD" -c "
|
|
35
|
-
import sys
|
|
36
|
-
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
37
|
-
from lib.evolution_csv import EvolutionCSV
|
|
38
|
-
try:
|
|
39
|
-
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
40
|
-
csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'pending')
|
|
41
|
-
except:
|
|
42
|
-
pass # Best effort cleanup
|
|
43
|
-
" 2>/dev/null || true
|
|
44
|
-
fi
|
|
17
|
+
echo "[WORKER-$$] Worker terminated while processing $CURRENT_CANDIDATE_ID" >&2
|
|
18
|
+
# If we're interrupted while processing, leave it as "running"
|
|
19
|
+
# This prevents other workers from picking it up in the same session
|
|
20
|
+
# A human can manually reset to pending if needed
|
|
21
|
+
echo "[WORKER-$$] Leaving $CURRENT_CANDIDATE_ID in current state" >&2
|
|
45
22
|
fi
|
|
46
23
|
}
|
|
47
24
|
|
|
@@ -135,6 +112,12 @@ process_candidate() {
|
|
|
135
112
|
echo "[WORKER-$$] Description: $description"
|
|
136
113
|
echo "[WORKER-$$] Based on ID: $parent_id"
|
|
137
114
|
|
|
115
|
+
# Treat "baseline-000" parent ID as empty/baseline
|
|
116
|
+
if [[ "$parent_id" == "baseline-000" ]]; then
|
|
117
|
+
parent_id=""
|
|
118
|
+
echo "[WORKER-$$] Parent ID 'baseline-000' treated as baseline (empty parent)"
|
|
119
|
+
fi
|
|
120
|
+
|
|
138
121
|
# Determine source algorithm
|
|
139
122
|
local source_file
|
|
140
123
|
if [[ -z "$parent_id" ]]; then
|
|
@@ -213,16 +196,26 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
|
|
|
213
196
|
|
|
214
197
|
# Try AI models with round-robin based on candidate ID
|
|
215
198
|
if ! call_ai_for_evolution "$evolution_prompt" "$candidate_id"; then
|
|
216
|
-
echo "[WORKER-$$] ERROR: All AI models failed to generate code" >&2
|
|
199
|
+
echo "[WORKER-$$] ERROR: All AI models failed to generate code - leaving as pending for retry" >&2
|
|
217
200
|
cd "$original_pwd"
|
|
218
201
|
rm -f "$target_file" # Clean up on failure
|
|
219
|
-
|
|
202
|
+
# Return with special code to indicate AI failure (should remain pending)
|
|
203
|
+
return 77
|
|
220
204
|
fi
|
|
221
205
|
|
|
222
206
|
# Restore working directory
|
|
223
207
|
cd "$original_pwd"
|
|
224
208
|
|
|
225
209
|
echo "[WORKER-$$] Evolution applied successfully"
|
|
210
|
+
|
|
211
|
+
# Check if the generated Python file has syntax errors
|
|
212
|
+
echo "[WORKER-$$] Checking Python syntax..." >&2
|
|
213
|
+
if ! "$PYTHON_CMD" -m py_compile "$target_file" 2>&1; then
|
|
214
|
+
echo "[WORKER-$$] ERROR: Generated Python file has syntax errors!" >&2
|
|
215
|
+
echo "[WORKER-$$] File: $target_file" >&2
|
|
216
|
+
# This is still an evaluation failure, not an AI failure
|
|
217
|
+
return 1
|
|
218
|
+
fi
|
|
226
219
|
fi
|
|
227
220
|
fi
|
|
228
221
|
|
|
@@ -239,11 +232,22 @@ CRITICAL: Do NOT use any git commands (git add, git commit, git reset, etc.). On
|
|
|
239
232
|
eval_arg=""
|
|
240
233
|
fi
|
|
241
234
|
local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$eval_arg")
|
|
235
|
+
|
|
236
|
+
# Add memory limiting if configured
|
|
237
|
+
if [[ -n "$MEMORY_LIMIT_MB" ]] && [[ "$MEMORY_LIMIT_MB" -gt 0 ]]; then
|
|
238
|
+
eval_cmd=("$PYTHON_CMD" "$SCRIPT_DIR/../lib/memory_limit_wrapper.py" "$MEMORY_LIMIT_MB" "${eval_cmd[@]}")
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
# Add timeout if configured
|
|
242
242
|
[[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
|
|
243
243
|
|
|
244
244
|
# Run evaluation with tee to both display and capture output
|
|
245
245
|
# Use stdbuf to disable buffering for real-time output
|
|
246
|
-
|
|
246
|
+
# IMPORTANT: Use PIPESTATUS to get the exit code of the evaluation command, not tee
|
|
247
|
+
stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2
|
|
248
|
+
local eval_exit_code=${PIPESTATUS[0]} # Get exit code of first command in pipe
|
|
249
|
+
|
|
250
|
+
if [[ $eval_exit_code -eq 0 ]]; then
|
|
247
251
|
local eval_end=$(date +%s)
|
|
248
252
|
local eval_duration=$((eval_end - eval_start))
|
|
249
253
|
|
|
@@ -353,14 +357,19 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
353
357
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
354
358
|
# rm -f "$eval_output_file" # Keep for debugging
|
|
355
359
|
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
360
|
+
# Clear CURRENT_CANDIDATE_ID before returning
|
|
361
|
+
CURRENT_CANDIDATE_ID=""
|
|
356
362
|
return 1
|
|
357
363
|
fi
|
|
358
364
|
|
|
359
365
|
# Clean up temp file (comment out to keep for debugging)
|
|
360
366
|
# rm -f "$eval_output_file"
|
|
361
367
|
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
368
|
+
|
|
369
|
+
# Clear CURRENT_CANDIDATE_ID on successful completion
|
|
370
|
+
CURRENT_CANDIDATE_ID=""
|
|
362
371
|
else
|
|
363
|
-
local exit_code
|
|
372
|
+
local exit_code=$eval_exit_code
|
|
364
373
|
# Read any output that was captured before failure
|
|
365
374
|
eval_output=$(<"$eval_output_file")
|
|
366
375
|
# rm -f "$eval_output_file" # Keep for debugging
|
|
@@ -370,22 +379,56 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
370
379
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
371
380
|
|
|
372
381
|
# Mark as failed in CSV
|
|
373
|
-
"$
|
|
382
|
+
echo "[WORKER-$$] Marking $candidate_id as failed in CSV" >&2
|
|
383
|
+
if ! "$PYTHON_CMD" -c "
|
|
374
384
|
import sys
|
|
375
385
|
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
376
386
|
from lib.evolution_csv import EvolutionCSV
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
387
|
+
try:
|
|
388
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
389
|
+
success = csv.update_candidate_status('$candidate_id', 'failed')
|
|
390
|
+
if not success:
|
|
391
|
+
print(f'ERROR: Failed to update status for {candidate_id}', file=sys.stderr)
|
|
392
|
+
sys.exit(1)
|
|
393
|
+
except Exception as e:
|
|
394
|
+
print(f'ERROR: Exception updating status: {e}', file=sys.stderr)
|
|
395
|
+
sys.exit(1)
|
|
396
|
+
" 2>&1; then
|
|
397
|
+
echo "[WORKER-$$] ERROR: Failed to update CSV status to failed" >&2
|
|
398
|
+
else
|
|
399
|
+
echo "[WORKER-$$] Successfully marked $candidate_id as failed" >&2
|
|
400
|
+
fi
|
|
380
401
|
|
|
402
|
+
# Clear CURRENT_CANDIDATE_ID before returning to prevent cleanup handler from resetting it
|
|
403
|
+
CURRENT_CANDIDATE_ID=""
|
|
381
404
|
return $exit_code
|
|
382
405
|
fi
|
|
383
406
|
}
|
|
384
407
|
|
|
408
|
+
# Don't reset running candidates on startup - they might be legitimately being processed by another worker
|
|
409
|
+
|
|
385
410
|
# Main worker loop
|
|
386
411
|
echo "[WORKER-$$] Worker started"
|
|
387
412
|
|
|
388
413
|
while true; do
|
|
414
|
+
# Debug: Show current status of all candidates
|
|
415
|
+
echo "[WORKER-$$] Current candidate statuses:" >&2
|
|
416
|
+
"$PYTHON_CMD" -c "
|
|
417
|
+
import sys
|
|
418
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
419
|
+
from lib.evolution_csv import EvolutionCSV
|
|
420
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
421
|
+
rows = csv._read_csv()
|
|
422
|
+
if rows:
|
|
423
|
+
start_idx = 1 if rows and rows[0] and rows[0][0].lower() == 'id' else 0
|
|
424
|
+
status_count = {}
|
|
425
|
+
for row in rows[start_idx:]:
|
|
426
|
+
if len(row) > 4:
|
|
427
|
+
status = row[4].strip() or 'pending'
|
|
428
|
+
status_count[status] = status_count.get(status, 0) + 1
|
|
429
|
+
print(f'Status counts: {status_count}', file=sys.stderr)
|
|
430
|
+
" 2>&1 || true
|
|
431
|
+
|
|
389
432
|
# Try to claim a pending candidate
|
|
390
433
|
candidate_info=$("$PYTHON_CMD" -c "
|
|
391
434
|
import sys
|
|
@@ -412,12 +455,26 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
412
455
|
# Set current candidate for cleanup
|
|
413
456
|
CURRENT_CANDIDATE_ID="$candidate_id"
|
|
414
457
|
|
|
415
|
-
# Process the candidate
|
|
416
|
-
|
|
458
|
+
# Process the candidate and capture exit code
|
|
459
|
+
process_candidate "$candidate_id" "$parent_id" "$description"
|
|
460
|
+
process_exit_code=$?
|
|
461
|
+
|
|
462
|
+
if [[ $process_exit_code -eq 0 ]]; then
|
|
417
463
|
echo "[WORKER-$$] Successfully processed $candidate_id"
|
|
464
|
+
elif [[ $process_exit_code -eq 77 ]]; then
|
|
465
|
+
# Special exit code 77 means AI failed to generate code
|
|
466
|
+
echo "[WORKER-$$] AI generation failed for $candidate_id - marking as failed-ai-retry"
|
|
467
|
+
# Mark with special status that indicates AI generation failed (not evaluation)
|
|
468
|
+
"$PYTHON_CMD" -c "
|
|
469
|
+
import sys
|
|
470
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
471
|
+
from lib.evolution_csv import EvolutionCSV
|
|
472
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
473
|
+
csv.update_candidate_status('$candidate_id', 'failed-ai-retry')
|
|
474
|
+
" 2>/dev/null || true
|
|
418
475
|
else
|
|
419
476
|
echo "[WORKER-$$] Failed to process $candidate_id"
|
|
420
|
-
#
|
|
477
|
+
# Other failures (evaluation errors, etc) mark as failed
|
|
421
478
|
"$PYTHON_CMD" -c "
|
|
422
479
|
import sys
|
|
423
480
|
sys.path.insert(0, '$SCRIPT_DIR/..')
|
package/lib/ai-cli.sh
CHANGED
|
@@ -19,6 +19,11 @@ call_ai_model_configured() {
|
|
|
19
19
|
ai_output=$(timeout 300 claude --dangerously-skip-permissions --model "$model_name" -p "$prompt" 2>&1)
|
|
20
20
|
local ai_exit_code=$?
|
|
21
21
|
;;
|
|
22
|
+
gpt-5)
|
|
23
|
+
local ai_output
|
|
24
|
+
ai_output=$(timeout 300 codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
25
|
+
local ai_exit_code=$?
|
|
26
|
+
;;
|
|
22
27
|
o3)
|
|
23
28
|
local ai_output
|
|
24
29
|
ai_output=$(timeout 300 codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
@@ -100,7 +105,7 @@ clean_ai_output() {
|
|
|
100
105
|
local model_name="$2"
|
|
101
106
|
|
|
102
107
|
# Handle codex-specific output format
|
|
103
|
-
if [[ "$model_name" == "codex" || "$model_name" == "o3" ]]; then
|
|
108
|
+
if [[ "$model_name" == "codex" || "$model_name" == "o3" || "$model_name" == "gpt-5" ]]; then
|
|
104
109
|
# Clean codex output - extract content between "codex" marker and "tokens used"
|
|
105
110
|
if echo "$output" | grep -q "^\[.*\] codex$"; then
|
|
106
111
|
# Extract content between "codex" line and "tokens used" line
|
|
@@ -191,7 +196,7 @@ call_ai_with_round_robin() {
|
|
|
191
196
|
ai_output=$(call_ai_model_configured "$model" "$prompt")
|
|
192
197
|
local ai_exit_code=$?
|
|
193
198
|
|
|
194
|
-
# Just check exit code
|
|
199
|
+
# Just check exit code
|
|
195
200
|
if [[ $ai_exit_code -eq 0 ]]; then
|
|
196
201
|
# Clean output if needed
|
|
197
202
|
ai_output=$(clean_ai_output "$ai_output" "$model")
|
package/lib/config.sh
CHANGED
|
@@ -49,17 +49,13 @@ DEFAULT_AUTO_IDEATE=true
|
|
|
49
49
|
# Default retry value
|
|
50
50
|
DEFAULT_MAX_RETRIES=3
|
|
51
51
|
|
|
52
|
-
# Default
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
DEFAULT_LLM_CLI_VALUES[3]='claude --dangerously-skip-permissions --model opus -p "{{PROMPT}}"'
|
|
60
|
-
DEFAULT_LLM_CLI_VALUES[4]='claude --dangerously-skip-permissions --model sonnet -p "{{PROMPT}}"'
|
|
61
|
-
DEFAULT_LLM_RUN="sonnet gemini"
|
|
62
|
-
DEFAULT_LLM_IDEATE="opus o3"
|
|
52
|
+
# Default memory limit (in MB, 0 means no limit)
|
|
53
|
+
# Set to reasonable limit for ML workloads - about half of available system RAM
|
|
54
|
+
DEFAULT_MEMORY_LIMIT_MB=12288
|
|
55
|
+
|
|
56
|
+
# Default LLM CLI configuration - use simple variables instead of arrays
|
|
57
|
+
DEFAULT_LLM_RUN="sonnet gpt-5 sonnet gpt-5"
|
|
58
|
+
DEFAULT_LLM_IDEATE="gemini gpt-5 opus"
|
|
63
59
|
|
|
64
60
|
# Load configuration from config file
|
|
65
61
|
load_config() {
|
|
@@ -96,9 +92,13 @@ load_config() {
|
|
|
96
92
|
# Set retry default
|
|
97
93
|
MAX_RETRIES="$DEFAULT_MAX_RETRIES"
|
|
98
94
|
|
|
95
|
+
# Set memory limit default
|
|
96
|
+
MEMORY_LIMIT_MB="$DEFAULT_MEMORY_LIMIT_MB"
|
|
97
|
+
|
|
99
98
|
# Set LLM CLI defaults (compatibility for older bash)
|
|
100
99
|
# Initialize associative array for LLM commands
|
|
101
100
|
# Use simpler approach for compatibility
|
|
101
|
+
LLM_CLI_gpt_5='codex exec -m gpt-5 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
102
102
|
LLM_CLI_o3='codex exec -m o3 --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
103
103
|
LLM_CLI_codex='codex exec --dangerously-bypass-approvals-and-sandbox "{{PROMPT}}"'
|
|
104
104
|
LLM_CLI_gemini='gemini -y -p "{{PROMPT}}"'
|
|
@@ -202,12 +202,14 @@ load_config() {
|
|
|
202
202
|
# Model definition - key is model name, value is command template
|
|
203
203
|
# Remove single quotes from value if present
|
|
204
204
|
value=$(echo "$value" | sed "s/^'//;s/'$//")
|
|
205
|
+
# Convert dashes to underscores for bash variable names
|
|
206
|
+
var_key=$(echo "$key" | sed 's/-/_/g')
|
|
205
207
|
# Debug config loading
|
|
206
208
|
if [[ "${DEBUG_CONFIG:-}" == "true" ]]; then
|
|
207
|
-
echo "[CONFIG DEBUG] Setting LLM_CLI_${
|
|
209
|
+
echo "[CONFIG DEBUG] Setting LLM_CLI_${var_key} = '$value'" >&2
|
|
208
210
|
fi
|
|
209
211
|
# Use dynamic variable name for compatibility
|
|
210
|
-
eval "LLM_CLI_${
|
|
212
|
+
eval "LLM_CLI_${var_key}=\"$value\""
|
|
211
213
|
fi
|
|
212
214
|
else
|
|
213
215
|
# Handle top-level keys
|
|
@@ -221,6 +223,7 @@ load_config() {
|
|
|
221
223
|
python_cmd) PYTHON_CMD="$value" ;;
|
|
222
224
|
auto_ideate) AUTO_IDEATE="$value" ;;
|
|
223
225
|
max_retries) MAX_RETRIES="$value" ;;
|
|
226
|
+
memory_limit_mb) MEMORY_LIMIT_MB="$value" ;;
|
|
224
227
|
evolution_dir)
|
|
225
228
|
echo "[WARN] evolution_dir in config is ignored - automatically inferred from config file location" >&2
|
|
226
229
|
;;
|
|
@@ -316,14 +319,17 @@ show_config() {
|
|
|
316
319
|
echo " Lock timeout: $LOCK_TIMEOUT"
|
|
317
320
|
echo " Auto ideate: $AUTO_IDEATE"
|
|
318
321
|
echo " Max retries: $MAX_RETRIES"
|
|
322
|
+
echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
|
|
319
323
|
echo " LLM configuration:"
|
|
320
324
|
# Show LLM configurations using dynamic variable names
|
|
321
|
-
for model in o3 codex gemini opus sonnet; do
|
|
325
|
+
for model in gpt_5 o3 codex gemini opus sonnet; do
|
|
322
326
|
var_name="LLM_CLI_${model}"
|
|
323
327
|
if [[ -n "${!var_name}" ]]; then
|
|
324
|
-
|
|
328
|
+
# Convert underscore back to dash for display
|
|
329
|
+
display_name=$(echo "$model" | sed 's/_/-/g')
|
|
330
|
+
echo " $display_name: ${!var_name}"
|
|
325
331
|
fi
|
|
326
332
|
done
|
|
327
333
|
echo " LLM for run: $LLM_RUN"
|
|
328
334
|
echo " LLM for ideate: $LLM_IDEATE"
|
|
329
|
-
}
|
|
335
|
+
}
|