claude-evolve 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-status +6 -1
- package/bin/claude-evolve-worker +138 -459
- package/package.json +1 -1
package/bin/claude-evolve-status
CHANGED
|
@@ -255,7 +255,12 @@ try:
|
|
|
255
255
|
status_str = f'{data[\"pending\"]}p {data[\"complete\"]}c {data[\"failed\"]}f {data[\"running\"]}r'
|
|
256
256
|
|
|
257
257
|
if gen_best:
|
|
258
|
-
|
|
258
|
+
# Check if this generation's best is the overall winner
|
|
259
|
+
if winner and gen_best[0] == winner[0]:
|
|
260
|
+
# Highlight in green if it's the overall winner
|
|
261
|
+
print(f' {gen}: {total} total ({status_str}) - best: \033[32m{gen_best[0]} ({gen_best[2]:.4f})\033[0m')
|
|
262
|
+
else:
|
|
263
|
+
print(f' {gen}: {total} total ({status_str}) - best: {gen_best[0]} ({gen_best[2]:.4f})')
|
|
259
264
|
else:
|
|
260
265
|
print(f' {gen}: {total} total ({status_str}) - best: none')
|
|
261
266
|
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -1,497 +1,176 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
-
# Worker process for parallel evolution execution
|
|
3
|
-
# Processes a single evolution candidate and exits
|
|
4
|
-
|
|
5
2
|
set -e
|
|
6
3
|
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
# Cleanup function for temp files
|
|
11
|
-
cleanup_temp() {
|
|
12
|
-
if [[ -n "$temp_file" && -f "$temp_file" ]]; then
|
|
13
|
-
rm -f "$temp_file"
|
|
14
|
-
echo "[WORKER-$$] Cleaned up temp file: $temp_file" >&2
|
|
15
|
-
fi
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
# Set trap to clean up temp files on exit
|
|
19
|
-
trap cleanup_temp EXIT INT TERM
|
|
20
|
-
|
|
21
|
-
# Function to handle failures with retry logic
|
|
22
|
-
handle_failure() {
|
|
23
|
-
local candidate_id="$1"
|
|
24
|
-
local current_status="$2"
|
|
25
|
-
local performance="${3:-0}"
|
|
26
|
-
|
|
27
|
-
# If this is already a retry, increment the retry count
|
|
28
|
-
if [[ $current_status =~ ^failed-retry([0-9]+)$ ]]; then
|
|
29
|
-
local retry_num=${BASH_REMATCH[1]}
|
|
30
|
-
local new_retry_num=$((retry_num + 1))
|
|
31
|
-
|
|
32
|
-
if [[ $new_retry_num -le $MAX_RETRIES ]]; then
|
|
33
|
-
local new_status="failed-retry${new_retry_num}"
|
|
34
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "$new_status"
|
|
35
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$performance"
|
|
36
|
-
echo "[WORKER-$$] ✗ Retry $retry_num failed, marked as $new_status"
|
|
37
|
-
exit 1
|
|
38
|
-
else
|
|
39
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "failed"
|
|
40
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$performance"
|
|
41
|
-
echo "[WORKER-$$] ✗ Max retries ($MAX_RETRIES) exceeded, marking as permanently failed"
|
|
42
|
-
exit 1
|
|
43
|
-
fi
|
|
44
|
-
elif [[ $current_status == "failed" ]]; then
|
|
45
|
-
# Initial failure, convert to retry1 if retries are enabled
|
|
46
|
-
if [[ $MAX_RETRIES -gt 0 ]]; then
|
|
47
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "failed-retry1"
|
|
48
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$performance"
|
|
49
|
-
echo "[WORKER-$$] ✗ Initial failure, marked as failed-retry1 for retry"
|
|
50
|
-
exit 1
|
|
51
|
-
else
|
|
52
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "failed"
|
|
53
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$performance"
|
|
54
|
-
echo "[WORKER-$$] ✗ Failed (retries disabled)"
|
|
55
|
-
# Use exit code 1 - systemic issue since retries are disabled
|
|
56
|
-
exit 1
|
|
57
|
-
fi
|
|
58
|
-
else
|
|
59
|
-
# Not a failure scenario, convert to retry1 if retries enabled
|
|
60
|
-
if [[ $MAX_RETRIES -gt 0 ]]; then
|
|
61
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "failed-retry1"
|
|
62
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$performance"
|
|
63
|
-
echo "[WORKER-$$] ✗ Evaluation failed, marked as failed-retry1 for retry"
|
|
64
|
-
exit 1
|
|
65
|
-
else
|
|
66
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "failed"
|
|
67
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$performance"
|
|
68
|
-
echo "[WORKER-$$] ✗ Evaluation failed (retries disabled)"
|
|
69
|
-
exit 1
|
|
70
|
-
fi
|
|
71
|
-
fi
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# Load configuration
|
|
75
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
76
|
-
# shellcheck source=../lib/config.sh
|
|
4
|
+
# Source configuration
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
|
|
77
6
|
source "$SCRIPT_DIR/../lib/config.sh"
|
|
78
|
-
# shellcheck source=../lib/csv-lock.sh
|
|
79
7
|
source "$SCRIPT_DIR/../lib/csv-lock.sh"
|
|
80
8
|
|
|
81
|
-
# Use CLAUDE_EVOLVE_CONFIG if set, otherwise default
|
|
82
|
-
if [[ -n ${CLAUDE_EVOLVE_CONFIG:-} ]]; then
|
|
83
|
-
load_config "$CLAUDE_EVOLVE_CONFIG"
|
|
84
|
-
else
|
|
85
|
-
load_config
|
|
86
|
-
fi
|
|
87
|
-
|
|
88
9
|
# Parse arguments
|
|
89
10
|
timeout_seconds=""
|
|
90
|
-
candidate_id=""
|
|
91
|
-
|
|
92
11
|
while [[ $# -gt 0 ]]; do
|
|
93
|
-
case $1 in
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
12
|
+
case "$1" in
|
|
13
|
+
--timeout)
|
|
14
|
+
timeout_seconds="$2"
|
|
15
|
+
shift 2
|
|
16
|
+
;;
|
|
17
|
+
*)
|
|
18
|
+
echo "[ERROR] Unknown argument: $1" >&2
|
|
97
19
|
exit 1
|
|
98
|
-
|
|
99
|
-
timeout_seconds="$2"
|
|
100
|
-
shift 2
|
|
101
|
-
;;
|
|
102
|
-
--id)
|
|
103
|
-
candidate_id="$2"
|
|
104
|
-
shift 2
|
|
105
|
-
;;
|
|
106
|
-
*)
|
|
107
|
-
echo "[ERROR] Unknown option: $1" >&2
|
|
108
|
-
exit 1
|
|
109
|
-
;;
|
|
20
|
+
;;
|
|
110
21
|
esac
|
|
111
22
|
done
|
|
112
23
|
|
|
113
|
-
#
|
|
114
|
-
if [[ -
|
|
115
|
-
|
|
116
|
-
if [[ -z $candidate_result ]]; then
|
|
117
|
-
echo "[DEBUG] No pending candidates found" >&2
|
|
118
|
-
exit 0
|
|
119
|
-
fi
|
|
120
|
-
|
|
121
|
-
# Parse candidate_id|original_status format
|
|
122
|
-
if [[ $candidate_result == *"|"* ]]; then
|
|
123
|
-
candidate_id="${candidate_result%|*}" # Everything before |
|
|
124
|
-
original_candidate_status="${candidate_result#*|}" # Everything after |
|
|
125
|
-
else
|
|
126
|
-
# Fallback for old format (shouldn't happen)
|
|
127
|
-
candidate_id="$candidate_result"
|
|
128
|
-
original_candidate_status=""
|
|
129
|
-
fi
|
|
130
|
-
else
|
|
131
|
-
# Mark specified candidate as running using UNIFIED LOGIC
|
|
132
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "running"
|
|
133
|
-
original_candidate_status="" # Unknown for manually specified candidates
|
|
134
|
-
fi
|
|
135
|
-
|
|
136
|
-
echo "[WORKER-$$] Processing candidate ID: $candidate_id"
|
|
137
|
-
|
|
138
|
-
# Validate workspace
|
|
139
|
-
if [[ ! -d "$FULL_EVOLUTION_DIR" ]]; then
|
|
140
|
-
echo "[ERROR] Evolution directory not found: $FULL_EVOLUTION_DIR" >&2
|
|
141
|
-
exit 1
|
|
142
|
-
fi
|
|
143
|
-
|
|
144
|
-
# Create log file for this run
|
|
145
|
-
mkdir -p logs
|
|
146
|
-
LOGFILE="logs/worker-${candidate_id}-$(date +%Y%m%d_%H%M%S).txt"
|
|
147
|
-
|
|
148
|
-
# Find candidate in CSV
|
|
149
|
-
row_data=""
|
|
150
|
-
if ! read_csv_with_lock csv_content; then
|
|
151
|
-
echo "[ERROR] Failed to read CSV" >&2
|
|
24
|
+
# Validate paths
|
|
25
|
+
if [[ ! -f "$FULL_CSV_PATH" ]]; then
|
|
26
|
+
echo "[WORKER-$$] CSV file not found: $FULL_CSV_PATH" >&2
|
|
152
27
|
exit 1
|
|
153
28
|
fi
|
|
154
29
|
|
|
155
|
-
#
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
csv_content = '''$csv_content'''
|
|
162
|
-
reader = csv.reader(io.StringIO(csv_content))
|
|
163
|
-
next(reader) # Skip header
|
|
164
|
-
|
|
165
|
-
found = False
|
|
166
|
-
for row in reader:
|
|
167
|
-
if len(row) >= 5 and row[0] == '$candidate_id':
|
|
168
|
-
# Escape special characters for shell
|
|
169
|
-
desc = row[2].replace('\\\\', '\\\\\\\\').replace('\"', '\\\\\"').replace('\$', '\\\\\$').replace('\`', '\\\\\`')
|
|
170
|
-
print(f'id=\"{row[0]}\"')
|
|
171
|
-
print(f'based_on_id=\"{row[1]}\"')
|
|
172
|
-
print(f'description=\"{desc}\"')
|
|
173
|
-
print(f'performance=\"{row[3]}\"')
|
|
174
|
-
print(f'status=\"{row[4]}\"')
|
|
175
|
-
print('found=true')
|
|
176
|
-
found = True
|
|
177
|
-
break
|
|
178
|
-
|
|
179
|
-
if not found:
|
|
180
|
-
print('found=false')
|
|
181
|
-
")"
|
|
182
|
-
|
|
183
|
-
if [[ $found == false ]]; then
|
|
184
|
-
echo "[ERROR] Candidate ID not found: $candidate_id" >&2
|
|
185
|
-
exit 1
|
|
186
|
-
fi
|
|
187
|
-
|
|
188
|
-
echo "[WORKER-$$] Description: $description"
|
|
189
|
-
echo "[WORKER-$$] Based on ID: $based_on_id"
|
|
190
|
-
|
|
191
|
-
# AIDEV-NOTE: Retry logic - detect if this is a retry attempt
|
|
192
|
-
is_retry=false
|
|
193
|
-
retry_count=0
|
|
194
|
-
# Use original_candidate_status for retry detection (if available), otherwise fall back to CSV status
|
|
195
|
-
retry_status="$original_candidate_status"
|
|
196
|
-
if [[ -z "$retry_status" ]]; then
|
|
197
|
-
retry_status="$status"
|
|
198
|
-
fi
|
|
199
|
-
|
|
200
|
-
if [[ $retry_status =~ ^failed-retry([0-9]+)$ ]]; then
|
|
201
|
-
is_retry=true
|
|
202
|
-
retry_count=${BASH_REMATCH[1]}
|
|
203
|
-
echo "[WORKER-$$] 🔄 Processing retry attempt #$retry_count"
|
|
204
|
-
elif [[ $retry_status == "failed" ]]; then
|
|
205
|
-
echo "[WORKER-$$] ⚠️ Initial failure detected - this should be converted to failed-retry1 to enable retries"
|
|
206
|
-
fi
|
|
207
|
-
|
|
208
|
-
# AIDEV-NOTE: Using common evolution processor logic for consistent handling
|
|
209
|
-
# Determine parent algorithm
|
|
210
|
-
if [[ -z $based_on_id || $based_on_id == "0" || $based_on_id == '""' ]]; then
|
|
211
|
-
parent_file="$FULL_ALGORITHM_PATH"
|
|
212
|
-
echo "[WORKER-$$] Using base algorithm"
|
|
213
|
-
else
|
|
214
|
-
# Handle both old and new format IDs
|
|
215
|
-
if [[ $based_on_id =~ ^[0-9]+$ ]]; then
|
|
216
|
-
parent_file="$FULL_OUTPUT_DIR/evolution_id${based_on_id}.py"
|
|
217
|
-
else
|
|
218
|
-
parent_file="$FULL_OUTPUT_DIR/evolution_${based_on_id}.py"
|
|
219
|
-
fi
|
|
220
|
-
|
|
221
|
-
if [[ ! -f $parent_file ]]; then
|
|
222
|
-
echo "[ERROR] Parent algorithm not found: $parent_file" >&2
|
|
223
|
-
handle_failure "$candidate_id" "$retry_status" "0"
|
|
224
|
-
exit 1
|
|
225
|
-
fi
|
|
226
|
-
fi
|
|
227
|
-
|
|
228
|
-
# Generate output file path
|
|
229
|
-
if [[ $id =~ ^[0-9]+$ ]]; then
|
|
230
|
-
output_file="$FULL_OUTPUT_DIR/evolution_id${id}.py"
|
|
231
|
-
else
|
|
232
|
-
output_file="$FULL_OUTPUT_DIR/evolution_${id}.py"
|
|
233
|
-
fi
|
|
234
|
-
|
|
235
|
-
# Use temp file for mutations to avoid partial/failed edits
|
|
236
|
-
temp_file="${output_file}.tmp$$"
|
|
237
|
-
|
|
238
|
-
# Check if processing should be skipped using common logic
|
|
239
|
-
# Set environment variable for retry detection
|
|
240
|
-
if [[ $is_retry == "true" ]]; then
|
|
241
|
-
export RETRY_CANDIDATE=true
|
|
242
|
-
else
|
|
243
|
-
export RETRY_CANDIDATE=false
|
|
244
|
-
fi
|
|
245
|
-
|
|
246
|
-
eval "$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_processor.py" "$id" "$based_on_id" "$FULL_OUTPUT_DIR" "$ROOT_DIR" "$parent_file" "$output_file")"
|
|
247
|
-
|
|
248
|
-
# Handle copy operation to temp file
|
|
249
|
-
if [[ "$skip_copy" == "True" ]]; then
|
|
250
|
-
echo "[WORKER-$$] ⚠️ Skipping copy - $reason"
|
|
251
|
-
elif [[ $is_retry == "true" ]]; then
|
|
252
|
-
# For retries, edit the existing failed algorithm in-place
|
|
253
|
-
if [[ -f "$output_file" ]]; then
|
|
254
|
-
cp "$output_file" "$temp_file"
|
|
255
|
-
echo "[WORKER-$$] 🔄 Copied existing algorithm for retry: $temp_file"
|
|
256
|
-
else
|
|
257
|
-
# Fallback to parent if existing file doesn't exist
|
|
258
|
-
cp "$parent_file" "$temp_file"
|
|
259
|
-
echo "[WORKER-$$] ⚠️ Existing algorithm not found, using parent: $temp_file"
|
|
260
|
-
fi
|
|
261
|
-
else
|
|
262
|
-
cp "$parent_file" "$temp_file"
|
|
263
|
-
echo "[WORKER-$$] Copied parent to temp file: $temp_file"
|
|
264
|
-
fi
|
|
265
|
-
|
|
266
|
-
# Handle Claude mutation based on skip flags
|
|
267
|
-
if [[ "$skip_claude" == "True" ]]; then
|
|
268
|
-
echo "[WORKER-$$] ⚠️ Skipping Claude processing - $reason"
|
|
269
|
-
# If we have a temp file but are skipping Claude, move it to final location
|
|
270
|
-
if [[ -f "$temp_file" ]]; then
|
|
271
|
-
mv "$temp_file" "$output_file"
|
|
272
|
-
echo "[WORKER-$$] Moved temp file to final location (no Claude processing)"
|
|
273
|
-
fi
|
|
274
|
-
else
|
|
275
|
-
# Check for claude CLI
|
|
276
|
-
claude_cmd="${CLAUDE_CMD:-claude}"
|
|
277
|
-
if ! command -v "$claude_cmd" >/dev/null 2>&1; then
|
|
278
|
-
echo "[ERROR] Claude CLI not found" >&2
|
|
279
|
-
handle_failure "$candidate_id" "$retry_status" "0"
|
|
280
|
-
exit 1
|
|
281
|
-
fi
|
|
30
|
+
# Process a single candidate
|
|
31
|
+
process_candidate() {
|
|
32
|
+
local candidate_id="$1"
|
|
33
|
+
local parent_id="$2"
|
|
34
|
+
local description="$3"
|
|
282
35
|
|
|
283
|
-
|
|
284
|
-
echo "[WORKER-$$]
|
|
36
|
+
echo "[WORKER-$$] Processing candidate ID: $candidate_id"
|
|
37
|
+
echo "[WORKER-$$] Description: $description"
|
|
38
|
+
echo "[WORKER-$$] Based on ID: $parent_id"
|
|
285
39
|
|
|
286
|
-
#
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
- Analyze the code for potential bugs (syntax errors, logical issues, missing imports, etc.)
|
|
292
|
-
- Fix any problems you find
|
|
293
|
-
- Ensure the code runs without errors
|
|
294
|
-
- Make sure it still implements the intended change: $description
|
|
295
|
-
- Add appropriate error handling and validation
|
|
296
|
-
- If possible, suggest a simple way to test this fix
|
|
297
|
-
|
|
298
|
-
This is retry attempt #$retry_count. Focus on making the code robust and correct."
|
|
40
|
+
# Determine source algorithm
|
|
41
|
+
local source_file
|
|
42
|
+
if [[ -z "$parent_id" ]]; then
|
|
43
|
+
echo "[WORKER-$$] Using base algorithm"
|
|
44
|
+
source_file="$FULL_ALGORITHM_PATH"
|
|
299
45
|
else
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
- Make the specific change described above
|
|
306
|
-
- Ensure the code runs without syntax errors
|
|
307
|
-
- Add proper error handling if needed
|
|
308
|
-
|
|
309
|
-
The file currently contains the parent algorithm. Modify it according to the description above."
|
|
310
|
-
fi
|
|
311
|
-
|
|
312
|
-
# Log prompt
|
|
313
|
-
{
|
|
314
|
-
echo "=== WORKER $$ - MUTATION PROMPT ==="
|
|
315
|
-
echo "ID: $id"
|
|
316
|
-
echo "Timestamp: $(date)"
|
|
317
|
-
echo "$prompt"
|
|
318
|
-
echo
|
|
319
|
-
} >> "$LOGFILE"
|
|
320
|
-
|
|
321
|
-
# Call Claude
|
|
322
|
-
echo "[WORKER-$$] Calling Claude to apply mutation..."
|
|
323
|
-
claude_output=$(echo "$prompt" | "$claude_cmd" --dangerously-skip-permissions --model $CLAUDE_MODEL -p 2>&1 | tee -a "$LOGFILE")
|
|
324
|
-
claude_exit_code=${PIPESTATUS[1]}
|
|
325
|
-
|
|
326
|
-
# Check for rate limit (multiple possible messages)
|
|
327
|
-
if echo "$claude_output" | grep -q -E "(usage limit|rate limit|limit reached|too many requests)"; then
|
|
328
|
-
echo "⚠️ Claude API rate limit reached" >&2
|
|
329
|
-
echo "⚠️ Claude output:" >&2
|
|
330
|
-
echo "$claude_output" >&2
|
|
331
|
-
# Clean up the temp file
|
|
332
|
-
if [[ -f "$temp_file" ]]; then
|
|
333
|
-
rm "$temp_file"
|
|
334
|
-
echo "[WORKER-$$] Cleaned up temp file due to rate limit" >&2
|
|
46
|
+
echo "[WORKER-$$] Using parent algorithm: $parent_id"
|
|
47
|
+
source_file="$FULL_OUTPUT_DIR/evolution_${parent_id}.py"
|
|
48
|
+
if [[ ! -f "$source_file" ]]; then
|
|
49
|
+
echo "[WORKER-$$] ERROR: Parent algorithm not found: $source_file" >&2
|
|
50
|
+
return 1
|
|
335
51
|
fi
|
|
336
|
-
# Reset to pending so it can be retried later
|
|
337
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "pending"
|
|
338
|
-
exit 2 # Special exit code for rate limit
|
|
339
52
|
fi
|
|
340
53
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
echo "⚠️ Claude output:" >&2
|
|
344
|
-
echo "$claude_output" >&2
|
|
345
|
-
# Clean up the temp file
|
|
346
|
-
if [[ -f "$temp_file" ]]; then
|
|
347
|
-
rm "$temp_file"
|
|
348
|
-
echo "[WORKER-$$] Cleaned up temp file due to Claude failure" >&2
|
|
349
|
-
fi
|
|
350
|
-
handle_failure "$candidate_id" "$retry_status" "0"
|
|
351
|
-
exit 1
|
|
352
|
-
fi
|
|
54
|
+
# Target file for evolution
|
|
55
|
+
local target_file="$FULL_OUTPUT_DIR/evolution_${candidate_id}.py"
|
|
353
56
|
|
|
354
|
-
#
|
|
355
|
-
if [[ -f "$
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
echo "[WORKER-$$]
|
|
370
|
-
|
|
371
|
-
else
|
|
372
|
-
# If we can't compare, assume it's okay and move the file
|
|
373
|
-
if [[ -f "$temp_file" ]]; then
|
|
374
|
-
mv "$temp_file" "$output_file"
|
|
375
|
-
echo "[WORKER-$$] Moved temp file to: $output_file"
|
|
376
|
-
fi
|
|
377
|
-
fi
|
|
378
|
-
fi
|
|
379
|
-
|
|
380
|
-
# Run evaluator
|
|
381
|
-
echo "[WORKER-$$] Running evaluation..."
|
|
382
|
-
eval_output=""
|
|
383
|
-
eval_exit_code=0
|
|
384
|
-
|
|
385
|
-
if [[ -n $timeout_seconds ]]; then
|
|
386
|
-
echo "[WORKER-$$] Evaluation timeout: ${timeout_seconds}s"
|
|
387
|
-
# For Modal compatibility, don't capture stderr
|
|
388
|
-
if eval_output=$(EXPERIMENT_ID="$id" timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
|
|
389
|
-
eval_exit_code=0
|
|
390
|
-
else
|
|
391
|
-
eval_exit_code=$?
|
|
392
|
-
if [[ $eval_exit_code -eq 124 ]]; then
|
|
393
|
-
echo "[ERROR] Evaluation timed out" >&2
|
|
394
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "timeout"
|
|
395
|
-
exit 1
|
|
57
|
+
# Check if processing should be skipped
|
|
58
|
+
if [[ -f "$target_file" ]]; then
|
|
59
|
+
echo "[WORKER-$$] � Skipping copy - File already exists - skipping all processing"
|
|
60
|
+
echo "[WORKER-$$] � Skipping Claude processing - File already exists - skipping all processing"
|
|
61
|
+
|
|
62
|
+
# Check if already evaluated
|
|
63
|
+
local current_status
|
|
64
|
+
current_status=$("$PYTHON_CMD" -c "
|
|
65
|
+
from lib.evolution_csv import EvolutionCSV
|
|
66
|
+
csv = EvolutionCSV('$FULL_CSV_PATH')
|
|
67
|
+
status = csv.get_candidate_status('$candidate_id')
|
|
68
|
+
print(status if status else 'unknown')
|
|
69
|
+
")
|
|
70
|
+
|
|
71
|
+
if [[ "$current_status" == "complete" ]]; then
|
|
72
|
+
echo "[WORKER-$$] Already evaluated - skipping"
|
|
73
|
+
return 0
|
|
396
74
|
fi
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
if eval_output=$(EXPERIMENT_ID="$id" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file"); then
|
|
401
|
-
eval_exit_code=0
|
|
75
|
+
|
|
76
|
+
# Run evaluation only
|
|
77
|
+
echo "[WORKER-$$] Running evaluation..."
|
|
402
78
|
else
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
#
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
echo
|
|
413
|
-
} >> "$LOGFILE"
|
|
79
|
+
# Copy source to target
|
|
80
|
+
echo "[WORKER-$$] Copying $source_file to $target_file"
|
|
81
|
+
cp "$source_file" "$target_file"
|
|
82
|
+
|
|
83
|
+
# Apply evolution using Claude
|
|
84
|
+
echo "[WORKER-$$] Applying evolution with Claude..."
|
|
85
|
+
local evolution_prompt="Modify the algorithm in $target_file based on this description: $description
|
|
86
|
+
|
|
87
|
+
The modification should be substantial and follow the description exactly. Make sure the algorithm still follows all interface requirements and can run properly.
|
|
414
88
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
handle_failure "$candidate_id" "$retry_status" "$score"
|
|
422
|
-
exit 1
|
|
423
|
-
else
|
|
424
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$score"
|
|
425
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "complete"
|
|
426
|
-
echo "[WORKER-$$] ✓ Evaluation complete, score: $score"
|
|
427
|
-
exit 0
|
|
89
|
+
Important: Make meaningful changes that match the description. Don't just add comments or make trivial adjustments."
|
|
90
|
+
|
|
91
|
+
if ! echo "$evolution_prompt" | claude --dangerously-skip-permissions -p 2>&1; then
|
|
92
|
+
echo "[WORKER-$$] ERROR: Claude evolution failed" >&2
|
|
93
|
+
rm -f "$target_file" # Clean up on failure
|
|
94
|
+
return 1
|
|
428
95
|
fi
|
|
96
|
+
|
|
97
|
+
echo "[WORKER-$$] Evolution applied successfully"
|
|
429
98
|
fi
|
|
430
99
|
|
|
431
|
-
#
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
100
|
+
# Run evaluation
|
|
101
|
+
echo "[WORKER-$$] Evaluating algorithm..."
|
|
102
|
+
local eval_output
|
|
103
|
+
local eval_start=$(date +%s)
|
|
104
|
+
|
|
105
|
+
# Prepare evaluation command
|
|
106
|
+
local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$candidate_id")
|
|
107
|
+
[[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
|
|
108
|
+
|
|
109
|
+
# Run evaluation and capture output
|
|
110
|
+
if eval_output=$("${eval_cmd[@]}" 2>&1); then
|
|
111
|
+
local eval_end=$(date +%s)
|
|
112
|
+
local eval_duration=$((eval_end - eval_start))
|
|
438
113
|
|
|
439
|
-
|
|
440
|
-
|
|
114
|
+
# Extract performance score
|
|
115
|
+
local score=$(echo "$eval_output" | grep -E "^SCORE:" | cut -d: -f2 | tr -d ' ')
|
|
441
116
|
|
|
442
|
-
if [[ -n $score
|
|
443
|
-
echo "[WORKER-$$]
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
117
|
+
if [[ -n "$score" ]]; then
|
|
118
|
+
echo "[WORKER-$$] Evaluation complete: score=$score (${eval_duration}s)"
|
|
119
|
+
|
|
120
|
+
# Update CSV with result
|
|
121
|
+
"$PYTHON_CMD" -c "
|
|
122
|
+
from lib.evolution_csv import EvolutionCSV
|
|
123
|
+
csv = EvolutionCSV('$FULL_CSV_PATH')
|
|
124
|
+
csv.update_candidate_status('$candidate_id', 'complete', performance='$score')
|
|
125
|
+
"
|
|
451
126
|
else
|
|
452
|
-
echo "[WORKER-$$]
|
|
453
|
-
|
|
127
|
+
echo "[WORKER-$$] ERROR: No score found in evaluation output" >&2
|
|
128
|
+
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
129
|
+
return 1
|
|
454
130
|
fi
|
|
131
|
+
else
|
|
132
|
+
local exit_code=$?
|
|
133
|
+
echo "[WORKER-$$] ERROR: Evaluation failed with exit code $exit_code" >&2
|
|
134
|
+
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
135
|
+
|
|
136
|
+
# Mark as failed in CSV
|
|
137
|
+
"$PYTHON_CMD" -c "
|
|
138
|
+
from lib.evolution_csv import EvolutionCSV
|
|
139
|
+
csv = EvolutionCSV('$FULL_CSV_PATH')
|
|
140
|
+
csv.update_candidate_status('$candidate_id', 'failed')
|
|
141
|
+
"
|
|
142
|
+
|
|
143
|
+
return $exit_code
|
|
455
144
|
fi
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# Main worker loop
|
|
148
|
+
echo "[WORKER-$$] Worker started"
|
|
149
|
+
|
|
150
|
+
while true; do
|
|
151
|
+
# Try to claim a pending candidate
|
|
152
|
+
candidate_info=$("$PYTHON_CMD" -c "
|
|
153
|
+
from lib.evolution_csv import EvolutionCSV
|
|
154
|
+
csv = EvolutionCSV('$FULL_CSV_PATH')
|
|
155
|
+
candidate = csv.claim_next_pending()
|
|
156
|
+
if candidate:
|
|
157
|
+
print(f'{candidate[\"id\"]}|{candidate.get(\"parent_id\", \"\")}|{candidate[\"description\"]}')
|
|
158
|
+
")
|
|
456
159
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
if [[ $(echo "$score == 0" | bc -l) == "1" ]]; then
|
|
461
|
-
handle_failure "$candidate_id" "$retry_status" "$score"
|
|
462
|
-
exit 1
|
|
463
|
-
else
|
|
464
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$score"
|
|
465
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "complete"
|
|
466
|
-
echo "[WORKER-$$] ✓ Evaluation complete, score: $score"
|
|
467
|
-
exit 0
|
|
468
|
-
fi
|
|
469
|
-
fi
|
|
160
|
+
if [[ -z "$candidate_info" ]]; then
|
|
161
|
+
# No pending work
|
|
162
|
+
break
|
|
470
163
|
fi
|
|
471
164
|
|
|
472
|
-
#
|
|
473
|
-
|
|
474
|
-
if [[ -n $score ]]; then
|
|
475
|
-
if [[ $(echo "$score == 0" | bc -l) == "1" ]]; then
|
|
476
|
-
handle_failure "$candidate_id" "$retry_status" "$score"
|
|
477
|
-
exit 1
|
|
478
|
-
else
|
|
479
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" perf "$candidate_id" "$score"
|
|
480
|
-
"$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$FULL_CSV_PATH" update "$candidate_id" "complete"
|
|
481
|
-
echo "[WORKER-$$] ✓ Evaluation complete, score: $score"
|
|
482
|
-
exit 0
|
|
483
|
-
fi
|
|
484
|
-
fi
|
|
485
|
-
fi
|
|
165
|
+
# Parse candidate info
|
|
166
|
+
IFS='|' read -r candidate_id parent_id description <<< "$candidate_info"
|
|
486
167
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
exit 1
|
|
497
|
-
fi
|
|
168
|
+
# Process the candidate
|
|
169
|
+
if process_candidate "$candidate_id" "$parent_id" "$description"; then
|
|
170
|
+
echo "[WORKER-$$] Successfully processed $candidate_id"
|
|
171
|
+
else
|
|
172
|
+
echo "[WORKER-$$] Failed to process $candidate_id"
|
|
173
|
+
fi
|
|
174
|
+
done
|
|
175
|
+
|
|
176
|
+
echo "[WORKER-$$] No more pending candidates, worker exiting"
|