claude-evolve 1.3.38 → 1.3.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -5
- package/bin/claude-evolve-analyze +34 -5
- package/bin/claude-evolve-cleanup +297 -0
- package/bin/claude-evolve-edit +293 -0
- package/bin/claude-evolve-ideate +6 -6
- package/bin/claude-evolve-main +35 -15
- package/bin/{claude-evolve-run-unified → claude-evolve-run} +143 -8
- package/bin/claude-evolve-status +220 -0
- package/bin/claude-evolve-worker +73 -11
- package/lib/config.sh +5 -3
- package/lib/csv-lock.sh +26 -4
- package/lib/csv_helper.py +1 -1
- package/package.json +1 -2
- package/templates/config.yaml +8 -7
- package/bin/claude-evolve-run-parallel.OLD +0 -389
- package/bin/claude-evolve-run.OLD +0 -662
|
@@ -1,662 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
# Removed 'set -e' to prevent silent exits on CSV helper failures
|
|
4
|
-
|
|
5
|
-
# Load configuration
|
|
6
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
7
|
-
# shellcheck source=../lib/config.sh
|
|
8
|
-
source "$SCRIPT_DIR/../lib/config.sh"
|
|
9
|
-
|
|
10
|
-
# Use CLAUDE_EVOLVE_CONFIG if set, otherwise default
|
|
11
|
-
if [[ -n ${CLAUDE_EVOLVE_CONFIG:-} ]]; then
|
|
12
|
-
load_config "$CLAUDE_EVOLVE_CONFIG"
|
|
13
|
-
else
|
|
14
|
-
load_config
|
|
15
|
-
fi
|
|
16
|
-
|
|
17
|
-
# Parse arguments
|
|
18
|
-
timeout_seconds=""
|
|
19
|
-
|
|
20
|
-
while [[ $# -gt 0 ]]; do
|
|
21
|
-
case $1 in
|
|
22
|
-
--timeout)
|
|
23
|
-
if [[ -z ${2:-} ]] || [[ ! $2 =~ ^[0-9]+$ ]] || [[ $2 -eq 0 ]]; then
|
|
24
|
-
echo "[ERROR] --timeout requires a positive integer (seconds)" >&2
|
|
25
|
-
exit 1
|
|
26
|
-
fi
|
|
27
|
-
timeout_seconds="$2"
|
|
28
|
-
shift 2
|
|
29
|
-
;;
|
|
30
|
-
--parallel)
|
|
31
|
-
force_parallel="true"
|
|
32
|
-
shift
|
|
33
|
-
;;
|
|
34
|
-
--sequential)
|
|
35
|
-
force_sequential="true"
|
|
36
|
-
shift
|
|
37
|
-
;;
|
|
38
|
-
--keep-awake|--caffeinate)
|
|
39
|
-
use_caffeinate="true"
|
|
40
|
-
shift
|
|
41
|
-
;;
|
|
42
|
-
--help)
|
|
43
|
-
cat <<EOF
|
|
44
|
-
claude-evolve run - Execute evolution candidates
|
|
45
|
-
|
|
46
|
-
USAGE:
|
|
47
|
-
claude-evolve run [OPTIONS]
|
|
48
|
-
|
|
49
|
-
OPTIONS:
|
|
50
|
-
--timeout <sec> Kill evaluator after specified seconds (default: no timeout)
|
|
51
|
-
--parallel Force parallel execution mode
|
|
52
|
-
--sequential Force sequential execution mode
|
|
53
|
-
--keep-awake Prevent system sleep during execution (macOS only)
|
|
54
|
-
--help Show this help message
|
|
55
|
-
|
|
56
|
-
DESCRIPTION:
|
|
57
|
-
Processes the oldest pending candidate from evolution.csv by:
|
|
58
|
-
1. Generating algorithm mutation using Claude
|
|
59
|
-
2. Running evaluator.py on the generated algorithm
|
|
60
|
-
3. Updating CSV with performance score and completion status
|
|
61
|
-
|
|
62
|
-
Use --timeout to prevent runaway evaluations from blocking progress.
|
|
63
|
-
EOF
|
|
64
|
-
exit 0
|
|
65
|
-
;;
|
|
66
|
-
*)
|
|
67
|
-
echo "[ERROR] Unknown option: $1" >&2
|
|
68
|
-
exit 1
|
|
69
|
-
;;
|
|
70
|
-
esac
|
|
71
|
-
done
|
|
72
|
-
|
|
73
|
-
# Check if caffeinate should be used
|
|
74
|
-
if [[ "$use_caffeinate" == "true" ]] && command -v caffeinate >/dev/null 2>&1; then
|
|
75
|
-
echo "[INFO] Using caffeinate to prevent system sleep"
|
|
76
|
-
# Re-run this script with caffeinate
|
|
77
|
-
exec caffeinate -dims "$0" "$@"
|
|
78
|
-
fi
|
|
79
|
-
|
|
80
|
-
# Determine execution mode
|
|
81
|
-
use_parallel=false
|
|
82
|
-
if [[ "$force_parallel" == "true" ]]; then
|
|
83
|
-
use_parallel=true
|
|
84
|
-
echo "[INFO] Using parallel mode (forced via --parallel)"
|
|
85
|
-
elif [[ "$force_sequential" == "true" ]]; then
|
|
86
|
-
use_parallel=false
|
|
87
|
-
echo "[INFO] Using sequential mode (forced via --sequential)"
|
|
88
|
-
elif [[ "$PARALLEL_ENABLED" == "true" || "$PARALLEL_ENABLED" == "1" ]]; then
|
|
89
|
-
use_parallel=true
|
|
90
|
-
echo "[INFO] Using parallel mode (enabled in config)"
|
|
91
|
-
else
|
|
92
|
-
echo "[INFO] Using sequential mode (default)"
|
|
93
|
-
fi
|
|
94
|
-
|
|
95
|
-
if [[ "$use_parallel" == "true" ]]; then
|
|
96
|
-
echo "[INFO] Starting parallel evolution run with up to $MAX_WORKERS workers"
|
|
97
|
-
exec "$SCRIPT_DIR/claude-evolve-run-parallel" ${timeout_seconds:+--timeout "$timeout_seconds"}
|
|
98
|
-
else
|
|
99
|
-
echo "[INFO] Starting continuous evolution run..."
|
|
100
|
-
echo "[INFO] Will continue running until no more pending candidates or 5 consecutive failures"
|
|
101
|
-
fi
|
|
102
|
-
|
|
103
|
-
[[ -n $timeout_seconds ]] && echo "[INFO] Using timeout: ${timeout_seconds} seconds per evaluation"
|
|
104
|
-
|
|
105
|
-
# Prepare logging directory
|
|
106
|
-
mkdir -p logs
|
|
107
|
-
|
|
108
|
-
# Track consecutive failures
|
|
109
|
-
consecutive_failures=0
|
|
110
|
-
MAX_FAILURES=5
|
|
111
|
-
|
|
112
|
-
# Track if we should continue after a failure
|
|
113
|
-
should_continue_after_failure() {
|
|
114
|
-
((consecutive_failures++))
|
|
115
|
-
|
|
116
|
-
if [[ $consecutive_failures -ge $MAX_FAILURES ]]; then
|
|
117
|
-
echo "[ERROR] Too many consecutive failures ($consecutive_failures). Stopping evolution run." >&2
|
|
118
|
-
return 1
|
|
119
|
-
else
|
|
120
|
-
echo "[WARN] Failure $consecutive_failures of $MAX_FAILURES. Continuing to next candidate..." >&2
|
|
121
|
-
echo "----------------------------------------"
|
|
122
|
-
return 0
|
|
123
|
-
fi
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
# Validate workspace using config
|
|
127
|
-
if [[ ! -d "$FULL_EVOLUTION_DIR" ]]; then
|
|
128
|
-
echo "[ERROR] Evolution directory not found: $FULL_EVOLUTION_DIR. Run 'claude-evolve setup' first." >&2
|
|
129
|
-
exit 1
|
|
130
|
-
fi
|
|
131
|
-
|
|
132
|
-
# Check required files
|
|
133
|
-
if [[ ! -f "$FULL_CSV_PATH" ]]; then
|
|
134
|
-
echo "[ERROR] CSV file not found: $FULL_CSV_PATH" >&2
|
|
135
|
-
exit 1
|
|
136
|
-
fi
|
|
137
|
-
|
|
138
|
-
if [[ ! -f "$FULL_EVALUATOR_PATH" ]]; then
|
|
139
|
-
echo "[ERROR] Evaluator not found: $FULL_EVALUATOR_PATH" >&2
|
|
140
|
-
exit 1
|
|
141
|
-
fi
|
|
142
|
-
|
|
143
|
-
if [[ ! -f "$FULL_ALGORITHM_PATH" ]]; then
|
|
144
|
-
echo "[ERROR] Algorithm not found: $FULL_ALGORITHM_PATH" >&2
|
|
145
|
-
exit 1
|
|
146
|
-
fi
|
|
147
|
-
|
|
148
|
-
# Find oldest pending row (using CSV helper)
|
|
149
|
-
find_empty_row() {
|
|
150
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_helper.py" find_pending "$FULL_CSV_PATH"
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
# Get CSV row - replaced by csv_helper.py
|
|
154
|
-
|
|
155
|
-
# Update CSV row (using CSV helper)
|
|
156
|
-
update_csv_row() {
|
|
157
|
-
local row_num="$1"
|
|
158
|
-
local performance="$2"
|
|
159
|
-
local status="$3"
|
|
160
|
-
|
|
161
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_helper.py" update_row "$FULL_CSV_PATH" "$row_num" "$performance" "$status"
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
# Auto-recovery mechanism for common failures
|
|
165
|
-
attempt_recovery() {
|
|
166
|
-
local failure_type="$1"
|
|
167
|
-
local error_output="$2"
|
|
168
|
-
local row_num="$3"
|
|
169
|
-
local id="$4"
|
|
170
|
-
local description="$5"
|
|
171
|
-
|
|
172
|
-
echo "[INFO] Attempting auto-recovery for $failure_type..."
|
|
173
|
-
|
|
174
|
-
case "$failure_type" in
|
|
175
|
-
"import_error")
|
|
176
|
-
# Extract missing module name
|
|
177
|
-
missing_module=$(echo "$error_output" | grep -o "No module named '[^']*'" | sed "s/No module named '\\([^']*\\)'/\\1/" | head -1)
|
|
178
|
-
if [[ -n $missing_module ]]; then
|
|
179
|
-
echo "[INFO] Detected missing module: $missing_module"
|
|
180
|
-
echo "[INFO] Attempting to install via pip..."
|
|
181
|
-
|
|
182
|
-
# Try to install the missing module
|
|
183
|
-
if pip install "$missing_module" 2>&1; then
|
|
184
|
-
echo "[INFO] Successfully installed $missing_module. Retrying evaluation..."
|
|
185
|
-
return 0 # Signal to retry
|
|
186
|
-
else
|
|
187
|
-
echo "[WARN] Failed to install $missing_module automatically"
|
|
188
|
-
echo "[INFO] Please install manually: pip install $missing_module"
|
|
189
|
-
fi
|
|
190
|
-
fi
|
|
191
|
-
;;
|
|
192
|
-
|
|
193
|
-
"syntax_error")
|
|
194
|
-
# For syntax errors, we could retry the mutation with additional guidance
|
|
195
|
-
echo "[INFO] Detected syntax error in generated code"
|
|
196
|
-
echo "[INFO] Retrying mutation with additional constraints..."
|
|
197
|
-
|
|
198
|
-
# Update the CSV to retry this candidate with enhanced prompt
|
|
199
|
-
update_csv_row "$row_num" "" "pending"
|
|
200
|
-
|
|
201
|
-
# Add a recovery marker to the description to guide the next attempt
|
|
202
|
-
new_desc="[RETRY: Fix syntax] $description"
|
|
203
|
-
temp_file=$(mktemp)
|
|
204
|
-
current_row=1
|
|
205
|
-
|
|
206
|
-
while IFS=, read -r csv_id csv_based_on csv_desc csv_perf csv_stat; do
|
|
207
|
-
if [[ $current_row -eq $row_num ]]; then
|
|
208
|
-
echo "$csv_id,$csv_based_on,$new_desc,$csv_perf,pending"
|
|
209
|
-
else
|
|
210
|
-
echo "$csv_id,$csv_based_on,$csv_desc,$csv_perf,$csv_stat"
|
|
211
|
-
fi
|
|
212
|
-
((current_row++))
|
|
213
|
-
done <"$FULL_CSV_PATH" >"$temp_file"
|
|
214
|
-
|
|
215
|
-
mv "$temp_file" "$FULL_CSV_PATH"
|
|
216
|
-
return 0 # Signal to retry
|
|
217
|
-
;;
|
|
218
|
-
|
|
219
|
-
"zero_score")
|
|
220
|
-
# For zero scores, log additional diagnostic info
|
|
221
|
-
echo "[INFO] Algorithm produced zero score - likely a logic error"
|
|
222
|
-
echo "[INFO] Consider adding constraints to BRIEF.md to avoid this approach"
|
|
223
|
-
;;
|
|
224
|
-
esac
|
|
225
|
-
|
|
226
|
-
return 1 # No recovery possible
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
# Main evolution loop
|
|
230
|
-
while true; do
|
|
231
|
-
# Find next candidate
|
|
232
|
-
if ! row_num=$(find_empty_row); then
|
|
233
|
-
echo "[INFO] No more pending candidates found."
|
|
234
|
-
|
|
235
|
-
# Check if auto ideation is enabled
|
|
236
|
-
echo "[DEBUG] AUTO_IDEATE value: '$AUTO_IDEATE'"
|
|
237
|
-
if [[ "$AUTO_IDEATE" == "true" || "$AUTO_IDEATE" == "1" ]]; then
|
|
238
|
-
echo "[INFO] Auto ideation is enabled. Generating new ideas..."
|
|
239
|
-
|
|
240
|
-
# Check if claude-evolve-ideate exists
|
|
241
|
-
ideate_script="$SCRIPT_DIR/claude-evolve-ideate"
|
|
242
|
-
if [[ ! -f "$ideate_script" ]]; then
|
|
243
|
-
echo "[ERROR] claude-evolve-ideate script not found: $ideate_script" >&2
|
|
244
|
-
echo "[INFO] Evolution run complete - no way to generate more ideas."
|
|
245
|
-
exit 0
|
|
246
|
-
fi
|
|
247
|
-
|
|
248
|
-
# Generate new ideas using the multi-strategy approach
|
|
249
|
-
echo "[INFO] Calling claude-evolve-ideate to generate new candidates..."
|
|
250
|
-
if ! "$ideate_script"; then
|
|
251
|
-
echo "[ERROR] Failed to generate new ideas" >&2
|
|
252
|
-
echo "[INFO] Evolution run complete - ideation failed."
|
|
253
|
-
exit 1
|
|
254
|
-
fi
|
|
255
|
-
|
|
256
|
-
echo "[INFO] New ideas generated successfully. Continuing evolution..."
|
|
257
|
-
continue # Go back to start of loop to find the new candidates
|
|
258
|
-
else
|
|
259
|
-
echo "[INFO] Auto ideation is disabled. Evolution run complete."
|
|
260
|
-
exit 0
|
|
261
|
-
fi
|
|
262
|
-
fi
|
|
263
|
-
|
|
264
|
-
# Create log file for this iteration
|
|
265
|
-
LOGFILE="logs/claude-$(date +%Y%m%d_%H%M%S).txt"
|
|
266
|
-
|
|
267
|
-
# Get row data using CSV helper
|
|
268
|
-
eval "$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/csv_helper.py" get_row "$FULL_CSV_PATH" "$row_num")"
|
|
269
|
-
|
|
270
|
-
# Variables are now set: id, basedOnId, description, performance, status
|
|
271
|
-
# based_on_id is already set correctly by csv_helper.py
|
|
272
|
-
|
|
273
|
-
# Check if ID is empty
|
|
274
|
-
if [[ -z $id ]]; then
|
|
275
|
-
echo "[ERROR] Empty ID found at row $row_num. CSV may be malformed." >&2
|
|
276
|
-
exit 1
|
|
277
|
-
fi
|
|
278
|
-
|
|
279
|
-
echo "[INFO] Processing candidate ID: $id"
|
|
280
|
-
echo "[INFO] Description: $description"
|
|
281
|
-
echo "[INFO] Based on ID: $based_on_id"
|
|
282
|
-
|
|
283
|
-
# Set interrupt handler - just exit without updating CSV status
|
|
284
|
-
trap 'echo "[INFO] Evolution interrupted"; exit 130' INT
|
|
285
|
-
|
|
286
|
-
# AIDEV-NOTE: Using common evolution processor logic to determine parent/output files
|
|
287
|
-
# and check if processing should be skipped (handles self-parent detection)
|
|
288
|
-
|
|
289
|
-
# Determine parent algorithm path
|
|
290
|
-
if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
|
|
291
|
-
parent_file="$FULL_ALGORITHM_PATH"
|
|
292
|
-
echo "[INFO] Using base algorithm (basedonID is empty or 0)"
|
|
293
|
-
else
|
|
294
|
-
# Handle both old format (numeric) and new format (genXX-XXX)
|
|
295
|
-
if [[ $based_on_id =~ ^[0-9]+$ ]]; then
|
|
296
|
-
parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
|
|
297
|
-
else
|
|
298
|
-
parent_file="$FULL_OUTPUT_DIR/evolution_${based_on_id}.py"
|
|
299
|
-
fi
|
|
300
|
-
|
|
301
|
-
if [[ ! -f $parent_file ]]; then
|
|
302
|
-
echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
|
|
303
|
-
update_csv_row "$row_num" "" "failed"
|
|
304
|
-
if should_continue_after_failure; then
|
|
305
|
-
continue
|
|
306
|
-
else
|
|
307
|
-
exit 1
|
|
308
|
-
fi
|
|
309
|
-
fi
|
|
310
|
-
fi
|
|
311
|
-
|
|
312
|
-
echo "[INFO] Using parent algorithm: $parent_file"
|
|
313
|
-
|
|
314
|
-
# Generate output file path
|
|
315
|
-
if [[ $id =~ ^[0-9]+$ ]]; then
|
|
316
|
-
output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
|
|
317
|
-
else
|
|
318
|
-
output_file="$FULL_OUTPUT_DIR/evolution_${id}.py"
|
|
319
|
-
fi
|
|
320
|
-
echo "[INFO] Generating algorithm mutation..."
|
|
321
|
-
|
|
322
|
-
# Check if processing should be skipped using common logic
|
|
323
|
-
eval "$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_processor.py" "$id" "$based_on_id" "$FULL_OUTPUT_DIR" "$ROOT_DIR" "$parent_file" "$output_file")"
|
|
324
|
-
|
|
325
|
-
# Handle copy operation
|
|
326
|
-
if [[ "$skip_copy" == "True" ]]; then
|
|
327
|
-
echo "[INFO] ⚠️ Skipping copy - $reason"
|
|
328
|
-
else
|
|
329
|
-
cp "$parent_file" "$output_file"
|
|
330
|
-
echo "[INFO] Copied parent algorithm to: $output_file"
|
|
331
|
-
fi
|
|
332
|
-
|
|
333
|
-
# Check for claude CLI
|
|
334
|
-
claude_cmd="${CLAUDE_CMD:-claude}"
|
|
335
|
-
if ! command -v "$claude_cmd" >/dev/null 2>&1; then
|
|
336
|
-
echo "[ERROR] Claude CLI not found. Please install 'claude' CLI tool or set CLAUDE_CMD environment variable." >&2
|
|
337
|
-
update_csv_row "$row_num" "" "failed"
|
|
338
|
-
exit 1
|
|
339
|
-
fi
|
|
340
|
-
|
|
341
|
-
# Always use Claude Sonnet
|
|
342
|
-
CLAUDE_MODEL="sonnet"
|
|
343
|
-
echo "[INFO] Using Claude Sonnet for development"
|
|
344
|
-
|
|
345
|
-
# Create mutation prompt - Claude will edit the file directly
|
|
346
|
-
prompt="Edit the file $output_file to implement this specific change: $description
|
|
347
|
-
|
|
348
|
-
IMPORTANT: Before starting the task, you MUST read and understand:
|
|
349
|
-
1. The project's CLAUDE.md file (if it exists) - this contains project-specific instructions
|
|
350
|
-
2. The user's global CLAUDE.md file at ~/.claude/CLAUDE.md (if it exists) - this contains general development principles
|
|
351
|
-
3. Ensure all your work follows the architectural and development guidelines from both files
|
|
352
|
-
|
|
353
|
-
Requirements:
|
|
354
|
-
- Edit the file directly (don't just provide comments or suggestions)
|
|
355
|
-
- Maintain the same function signatures and interfaces
|
|
356
|
-
- Make the specific change described above
|
|
357
|
-
- Ensure the code runs without syntax errors
|
|
358
|
-
- Add proper error handling if needed
|
|
359
|
-
|
|
360
|
-
The file currently contains the parent algorithm. Modify it according to the description above while adhering to all guidelines from the CLAUDE.md files."
|
|
361
|
-
|
|
362
|
-
# AIDEV-NOTE: Using common evolution processor logic for Claude processing decisions
|
|
363
|
-
# Handle Claude mutation based on skip flags
|
|
364
|
-
if [[ "$skip_claude" == "True" ]]; then
|
|
365
|
-
echo "[INFO] ⚠️ Skipping Claude processing - $reason"
|
|
366
|
-
else
|
|
367
|
-
echo "[INFO] Calling Claude $CLAUDE_MODEL to apply mutation..."
|
|
368
|
-
echo "[INFO] Claude will edit: $output_file"
|
|
369
|
-
echo "[INFO] Logging to: ${LOGFILE}-developer"
|
|
370
|
-
|
|
371
|
-
# Claude will edit the file directly - log both prompt and response
|
|
372
|
-
{
|
|
373
|
-
echo "=== EVOLUTION MUTATION PROMPT ==="
|
|
374
|
-
echo "ID: $id"
|
|
375
|
-
echo "Based on: $based_on_id"
|
|
376
|
-
echo "Description: $description"
|
|
377
|
-
echo "Output file: $output_file"
|
|
378
|
-
echo "Model: $CLAUDE_MODEL"
|
|
379
|
-
echo "Timestamp: $(date)"
|
|
380
|
-
echo
|
|
381
|
-
echo "$prompt"
|
|
382
|
-
echo
|
|
383
|
-
echo "=== CLAUDE RESPONSE ==="
|
|
384
|
-
} >> "${LOGFILE}-developer"
|
|
385
|
-
|
|
386
|
-
# Use tee to show output and log simultaneously, and capture output for limit detection
|
|
387
|
-
claude_output=$(echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "${LOGFILE}-developer")
|
|
388
|
-
claude_exit_code=${PIPESTATUS[1]} # Get exit code from claude command, not tee
|
|
389
|
-
|
|
390
|
-
# Check for usage limit
|
|
391
|
-
if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
|
|
392
|
-
# Extract timestamp if available
|
|
393
|
-
limit_timestamp=$(echo "$claude_output" | grep -o "Claude AI usage limit reached|[0-9]*" | cut -d'|' -f2)
|
|
394
|
-
|
|
395
|
-
# Print red error message
|
|
396
|
-
echo -e "\033[31m[ERROR] CLAUDE AI USAGE LIMIT REACHED!\033[0m" >&2
|
|
397
|
-
echo -e "\033[31m[ERROR] Evolution halted due to API rate limits.\033[0m" >&2
|
|
398
|
-
|
|
399
|
-
if [[ -n $limit_timestamp ]]; then
|
|
400
|
-
# Convert timestamp to human-readable format
|
|
401
|
-
limit_date=$(date -r "$limit_timestamp" "+%Y-%m-%d %H:%M:%S" 2>/dev/null || echo "Unknown time")
|
|
402
|
-
echo -e "\033[31m[ERROR] Limit will be released at: $limit_date\033[0m" >&2
|
|
403
|
-
fi
|
|
404
|
-
|
|
405
|
-
echo -e "\033[33m[INFO] Please wait for the rate limit to reset before continuing.\033[0m" >&2
|
|
406
|
-
echo -e "\033[33m[INFO] The current candidate will remain in 'pending' status.\033[0m" >&2
|
|
407
|
-
|
|
408
|
-
# Don't mark as failed - leave it pending for retry later
|
|
409
|
-
exit 1
|
|
410
|
-
fi
|
|
411
|
-
|
|
412
|
-
# Check for other failures
|
|
413
|
-
if [[ $claude_exit_code -ne 0 ]]; then
|
|
414
|
-
echo "[ERROR] Claude failed to mutate algorithm" >&2
|
|
415
|
-
update_csv_row "$row_num" "" "failed"
|
|
416
|
-
if should_continue_after_failure; then
|
|
417
|
-
continue
|
|
418
|
-
else
|
|
419
|
-
exit 1
|
|
420
|
-
fi
|
|
421
|
-
fi
|
|
422
|
-
|
|
423
|
-
echo "[INFO] Claude completed mutation"
|
|
424
|
-
fi
|
|
425
|
-
|
|
426
|
-
echo "[INFO] Algorithm ready at: $output_file"
|
|
427
|
-
|
|
428
|
-
# Run evaluator
|
|
429
|
-
echo "[INFO] Running evaluation..."
|
|
430
|
-
echo "[INFO] Executing: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
431
|
-
eval_output=""
|
|
432
|
-
eval_exit_code=0
|
|
433
|
-
|
|
434
|
-
if [[ -n $timeout_seconds ]]; then
|
|
435
|
-
echo "[INFO] Evaluation timeout: ${timeout_seconds}s"
|
|
436
|
-
if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
437
|
-
eval_exit_code=0
|
|
438
|
-
else
|
|
439
|
-
eval_exit_code=$?
|
|
440
|
-
if [[ $eval_exit_code -eq 124 ]]; then
|
|
441
|
-
echo "[ERROR] Evaluation timed out after ${timeout_seconds} seconds" >&2
|
|
442
|
-
update_csv_row "$row_num" "" "timeout"
|
|
443
|
-
if should_continue_after_failure; then
|
|
444
|
-
continue 2 # Continue outer while loop from nested context
|
|
445
|
-
else
|
|
446
|
-
exit 1
|
|
447
|
-
fi
|
|
448
|
-
fi
|
|
449
|
-
fi
|
|
450
|
-
else
|
|
451
|
-
if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
452
|
-
eval_exit_code=0
|
|
453
|
-
else
|
|
454
|
-
eval_exit_code=$?
|
|
455
|
-
fi
|
|
456
|
-
fi
|
|
457
|
-
|
|
458
|
-
# Show evaluator output and log it
|
|
459
|
-
echo "[INFO] Evaluator output:"
|
|
460
|
-
echo "----------------------------------------"
|
|
461
|
-
echo "$eval_output"
|
|
462
|
-
echo "----------------------------------------"
|
|
463
|
-
|
|
464
|
-
# Log evaluator phase
|
|
465
|
-
{
|
|
466
|
-
echo "=== EVALUATOR EXECUTION ==="
|
|
467
|
-
echo "ID: $id"
|
|
468
|
-
echo "Algorithm: $output_file"
|
|
469
|
-
echo "Command: EXPERIMENT_ID=$id $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
470
|
-
echo "Exit code: $eval_exit_code"
|
|
471
|
-
echo "Timestamp: $(date)"
|
|
472
|
-
echo
|
|
473
|
-
echo "=== EVALUATOR OUTPUT ==="
|
|
474
|
-
echo "$eval_output"
|
|
475
|
-
echo
|
|
476
|
-
} >> "${LOGFILE}-evaluator"
|
|
477
|
-
|
|
478
|
-
# Process results
|
|
479
|
-
if [[ $eval_exit_code -eq 0 ]]; then
|
|
480
|
-
# DEBUG: Show raw evaluator output
|
|
481
|
-
echo "[DEBUG] Raw evaluator output:"
|
|
482
|
-
echo "----------------------------------------"
|
|
483
|
-
echo "$eval_output"
|
|
484
|
-
echo "----------------------------------------"
|
|
485
|
-
|
|
486
|
-
# Extract the last valid JSON line and score
|
|
487
|
-
last_json=$(echo "$eval_output" | grep '^{.*}$' | tail -1)
|
|
488
|
-
|
|
489
|
-
if [[ -n "$last_json" ]]; then
|
|
490
|
-
echo "[DEBUG] Found JSON: $last_json"
|
|
491
|
-
|
|
492
|
-
# Extract score from JSON
|
|
493
|
-
score=$(echo "$last_json" | python3 -c "
|
|
494
|
-
import sys, json
|
|
495
|
-
try:
|
|
496
|
-
data = json.loads(sys.stdin.read().strip())
|
|
497
|
-
if 'performance' in data:
|
|
498
|
-
print(data['performance'])
|
|
499
|
-
elif 'score' in data:
|
|
500
|
-
print(data['score'])
|
|
501
|
-
else:
|
|
502
|
-
sys.exit(1)
|
|
503
|
-
except:
|
|
504
|
-
sys.exit(1)
|
|
505
|
-
" 2>/dev/null)
|
|
506
|
-
fi
|
|
507
|
-
|
|
508
|
-
if [[ -n "$score" ]]; then
|
|
509
|
-
echo "[DEBUG] Extracted score via 'score' field: '$score'"
|
|
510
|
-
if [[ -n $score ]]; then
|
|
511
|
-
# Check if score is 0 and mark as failed
|
|
512
|
-
if (( $(echo "$score == 0" | bc -l) )); then
|
|
513
|
-
# Update CSV with full JSON data
|
|
514
|
-
if [[ -n "$last_json" ]]; then
|
|
515
|
-
python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"
|
|
516
|
-
else
|
|
517
|
-
update_csv_row "$row_num" "$score" "failed"
|
|
518
|
-
fi
|
|
519
|
-
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
520
|
-
echo "[INFO] Performance score: $score"
|
|
521
|
-
if should_continue_after_failure; then
|
|
522
|
-
continue 2
|
|
523
|
-
else
|
|
524
|
-
exit 1
|
|
525
|
-
fi
|
|
526
|
-
else
|
|
527
|
-
# Update CSV with full JSON data
|
|
528
|
-
if [[ -n "$last_json" ]]; then
|
|
529
|
-
echo "[DEBUG] Updating CSV with full JSON data (branch 1)"
|
|
530
|
-
echo "[DEBUG] CSV path: ${FULL_CSV_PATH}"
|
|
531
|
-
echo "[DEBUG] Target ID: $id (row $row_num)"
|
|
532
|
-
echo "[DEBUG] JSON data: $last_json"
|
|
533
|
-
echo "[DEBUG] Running CSV helper..."
|
|
534
|
-
if python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"; then
|
|
535
|
-
echo "[DEBUG] CSV helper succeeded"
|
|
536
|
-
else
|
|
537
|
-
echo "[DEBUG] CSV helper failed with exit code $?"
|
|
538
|
-
fi
|
|
539
|
-
else
|
|
540
|
-
update_csv_row "$row_num" "$score" "complete"
|
|
541
|
-
fi
|
|
542
|
-
echo "[INFO] ✓ Evaluation completed successfully"
|
|
543
|
-
echo "[INFO] Performance score: $score"
|
|
544
|
-
fi
|
|
545
|
-
else
|
|
546
|
-
# Try "performance" field - get only the last occurrence
|
|
547
|
-
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' ' | tail -1); then
|
|
548
|
-
echo "[DEBUG] Extracted score via 'performance' field: '$score'"
|
|
549
|
-
# Check if score is 0 and mark as failed
|
|
550
|
-
if [ "$(echo "$score == 0" | bc -l)" = "1" ]; then
|
|
551
|
-
if [[ -n "$last_json" ]]; then
|
|
552
|
-
python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"
|
|
553
|
-
else
|
|
554
|
-
update_csv_row "$row_num" "$score" "failed"
|
|
555
|
-
fi
|
|
556
|
-
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
557
|
-
echo "[INFO] Performance score: $score"
|
|
558
|
-
if should_continue_after_failure; then
|
|
559
|
-
continue 2
|
|
560
|
-
else
|
|
561
|
-
exit 1
|
|
562
|
-
fi
|
|
563
|
-
else
|
|
564
|
-
# Update CSV with full JSON data
|
|
565
|
-
if [[ -n "$last_json" ]]; then
|
|
566
|
-
echo "[DEBUG] Updating CSV with full JSON data (branch 2)"
|
|
567
|
-
echo "[DEBUG] CSV path: ${FULL_CSV_PATH}"
|
|
568
|
-
echo "[DEBUG] Target ID: $id (row $row_num)"
|
|
569
|
-
echo "[DEBUG] JSON data: $last_json"
|
|
570
|
-
echo "[DEBUG] Running CSV helper..."
|
|
571
|
-
if python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"; then
|
|
572
|
-
echo "[DEBUG] CSV helper succeeded"
|
|
573
|
-
else
|
|
574
|
-
echo "[DEBUG] CSV helper failed with exit code $?"
|
|
575
|
-
fi
|
|
576
|
-
else
|
|
577
|
-
update_csv_row "$row_num" "$score" "complete"
|
|
578
|
-
fi
|
|
579
|
-
echo "[INFO] ✓ Evaluation completed successfully"
|
|
580
|
-
echo "[INFO] Performance score: $score"
|
|
581
|
-
fi
|
|
582
|
-
else
|
|
583
|
-
echo "[ERROR] No score found in evaluator output" >&2
|
|
584
|
-
echo "[ERROR] Output: $eval_output" >&2
|
|
585
|
-
update_csv_row "$row_num" "" "failed"
|
|
586
|
-
if should_continue_after_failure; then
|
|
587
|
-
continue 2
|
|
588
|
-
else
|
|
589
|
-
exit 1
|
|
590
|
-
fi
|
|
591
|
-
fi
|
|
592
|
-
fi
|
|
593
|
-
else
|
|
594
|
-
echo "[ERROR] Failed to parse evaluator output" >&2
|
|
595
|
-
echo "[ERROR] Output: $eval_output" >&2
|
|
596
|
-
update_csv_row "$row_num" "" "failed"
|
|
597
|
-
if should_continue_after_failure; then
|
|
598
|
-
continue
|
|
599
|
-
else
|
|
600
|
-
exit 1
|
|
601
|
-
fi
|
|
602
|
-
fi
|
|
603
|
-
else
|
|
604
|
-
echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
|
|
605
|
-
echo "[ERROR] Output: $eval_output" >&2
|
|
606
|
-
|
|
607
|
-
# Check for common failure patterns and attempt recovery
|
|
608
|
-
recovery_attempted=false
|
|
609
|
-
|
|
610
|
-
# Check for import errors
|
|
611
|
-
if echo "$eval_output" | grep -q "No module named"; then
|
|
612
|
-
if attempt_recovery "import_error" "$eval_output" "$row_num" "$id" "$description"; then
|
|
613
|
-
recovery_attempted=true
|
|
614
|
-
# Retry the evaluation
|
|
615
|
-
echo "[INFO] Retrying evaluation after recovery attempt..."
|
|
616
|
-
if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
617
|
-
# Re-process the successful result
|
|
618
|
-
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
619
|
-
if [[ -n $score ]]; then
|
|
620
|
-
if [ "$(echo "$score == 0" | bc -l)" = "1" ]; then
|
|
621
|
-
update_csv_row "$row_num" "$score" "failed"
|
|
622
|
-
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
623
|
-
else
|
|
624
|
-
update_csv_row "$row_num" "$score" "complete"
|
|
625
|
-
echo "[INFO] ✓ Recovery successful! Performance score: $score"
|
|
626
|
-
consecutive_failures=0
|
|
627
|
-
continue
|
|
628
|
-
fi
|
|
629
|
-
fi
|
|
630
|
-
fi
|
|
631
|
-
fi
|
|
632
|
-
fi
|
|
633
|
-
fi
|
|
634
|
-
|
|
635
|
-
# Check for syntax errors
|
|
636
|
-
if echo "$eval_output" | grep -q "SyntaxError"; then
|
|
637
|
-
if attempt_recovery "syntax_error" "$eval_output" "$row_num" "$id" "$description"; then
|
|
638
|
-
recovery_attempted=true
|
|
639
|
-
# Skip to next iteration to retry with enhanced prompt
|
|
640
|
-
consecutive_failures=0
|
|
641
|
-
continue
|
|
642
|
-
fi
|
|
643
|
-
fi
|
|
644
|
-
|
|
645
|
-
# If no recovery was successful, mark as failed
|
|
646
|
-
if [[ $recovery_attempted == false ]]; then
|
|
647
|
-
update_csv_row "$row_num" "" "failed"
|
|
648
|
-
fi
|
|
649
|
-
|
|
650
|
-
if should_continue_after_failure; then
|
|
651
|
-
continue
|
|
652
|
-
else
|
|
653
|
-
exit 1
|
|
654
|
-
fi
|
|
655
|
-
fi
|
|
656
|
-
|
|
657
|
-
echo "[INFO] Evolution cycle completed successfully!"
|
|
658
|
-
consecutive_failures=0 # Reset failure counter on success
|
|
659
|
-
|
|
660
|
-
echo "[INFO] Looking for next candidate..."
|
|
661
|
-
echo "----------------------------------------"
|
|
662
|
-
done # End of main evolution loop
|