claude-evolve 1.0.19 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-analyze +26 -25
- package/bin/claude-evolve-run +80 -40
- package/package.json +1 -1
|
@@ -81,24 +81,25 @@ while IFS=, read -r id _ desc perf status; do
|
|
|
81
81
|
((total++))
|
|
82
82
|
|
|
83
83
|
case "$status" in
|
|
84
|
-
"
|
|
84
|
+
"complete" | "completed")
|
|
85
|
+
((completed++))
|
|
86
|
+
# Only count performance for completed runs
|
|
87
|
+
if [[ -n $perf && $perf != "" ]]; then
|
|
88
|
+
total_performance=$(echo "$total_performance + $perf" | bc -l 2>/dev/null || echo "$total_performance")
|
|
89
|
+
((count_with_performance++))
|
|
90
|
+
|
|
91
|
+
# Check if this is the top performer
|
|
92
|
+
if [[ -z $top_score ]] || (($(echo "$perf > $top_score" | bc -l 2>/dev/null || echo "0"))); then
|
|
93
|
+
top_score="$perf"
|
|
94
|
+
top_id="$id"
|
|
95
|
+
top_desc="$desc"
|
|
96
|
+
fi
|
|
97
|
+
fi
|
|
98
|
+
;;
|
|
85
99
|
"running") ((running++)) ;;
|
|
86
100
|
"failed" | "timeout" | "interrupted") ((failed++)) ;;
|
|
87
101
|
*) ((pending++)) ;;
|
|
88
102
|
esac
|
|
89
|
-
|
|
90
|
-
# Track performance stats
|
|
91
|
-
if [[ -n $perf && $perf != "" ]]; then
|
|
92
|
-
total_performance=$(echo "$total_performance + $perf" | bc -l 2>/dev/null || echo "$total_performance")
|
|
93
|
-
((count_with_performance++))
|
|
94
|
-
|
|
95
|
-
# Check if this is the top performer
|
|
96
|
-
if [[ -z $top_score ]] || (($(echo "$perf > $top_score" | bc -l 2>/dev/null || echo "0"))); then
|
|
97
|
-
top_score="$perf"
|
|
98
|
-
top_id="$id"
|
|
99
|
-
top_desc="$desc"
|
|
100
|
-
fi
|
|
101
|
-
fi
|
|
102
103
|
done <"$csv_file"
|
|
103
104
|
|
|
104
105
|
# Display summary
|
|
@@ -153,17 +154,17 @@ plot "$data_file" using 1:2 with linespoints title "Performance"
|
|
|
153
154
|
EOF
|
|
154
155
|
|
|
155
156
|
rm -f "$data_file"
|
|
156
|
-
echo "Chart saved
|
|
157
|
-
|
|
158
|
-
#
|
|
159
|
-
if
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
157
|
+
echo "Chart saved to: $output_file"
|
|
158
|
+
|
|
159
|
+
# Always try to open chart (not just when --open is used)
|
|
160
|
+
if command -v open >/dev/null 2>&1; then
|
|
161
|
+
open "$output_file"
|
|
162
|
+
echo "Opening chart..."
|
|
163
|
+
elif command -v xdg-open >/dev/null 2>&1; then
|
|
164
|
+
xdg-open "$output_file"
|
|
165
|
+
echo "Opening chart..."
|
|
166
|
+
else
|
|
167
|
+
echo "[WARN] Cannot open chart automatically. View: $output_file"
|
|
167
168
|
fi
|
|
168
169
|
else
|
|
169
170
|
if [[ $count_with_performance -eq 0 ]]; then
|
package/bin/claude-evolve-run
CHANGED
|
@@ -49,8 +49,27 @@ EOF
|
|
|
49
49
|
esac
|
|
50
50
|
done
|
|
51
51
|
|
|
52
|
-
echo "[INFO] Starting evolution run..."
|
|
53
|
-
[
|
|
52
|
+
echo "[INFO] Starting continuous evolution run..."
|
|
53
|
+
echo "[INFO] Will continue running until no more pending candidates or 5 consecutive failures"
|
|
54
|
+
[[ -n $timeout_seconds ]] && echo "[INFO] Using timeout: ${timeout_seconds} seconds per evaluation"
|
|
55
|
+
|
|
56
|
+
# Track consecutive failures
|
|
57
|
+
consecutive_failures=0
|
|
58
|
+
MAX_FAILURES=5
|
|
59
|
+
|
|
60
|
+
# Track if we should continue after a failure
|
|
61
|
+
should_continue_after_failure() {
|
|
62
|
+
((consecutive_failures++))
|
|
63
|
+
|
|
64
|
+
if [[ $consecutive_failures -ge $MAX_FAILURES ]]; then
|
|
65
|
+
echo "[ERROR] Too many consecutive failures ($consecutive_failures). Stopping evolution run." >&2
|
|
66
|
+
return 1
|
|
67
|
+
else
|
|
68
|
+
echo "[WARN] Failure $consecutive_failures of $MAX_FAILURES. Continuing to next candidate..." >&2
|
|
69
|
+
echo "----------------------------------------"
|
|
70
|
+
return 0
|
|
71
|
+
fi
|
|
72
|
+
}
|
|
54
73
|
|
|
55
74
|
# Validate workspace using config
|
|
56
75
|
if [[ ! -d "$FULL_EVOLUTION_DIR" ]]; then
|
|
@@ -119,11 +138,13 @@ update_csv_row() {
|
|
|
119
138
|
mv "$temp_file" "$FULL_CSV_PATH"
|
|
120
139
|
}
|
|
121
140
|
|
|
122
|
-
#
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
141
|
+
# Main evolution loop
|
|
142
|
+
while true; do
|
|
143
|
+
# Find next candidate
|
|
144
|
+
if ! row_num=$(find_empty_row); then
|
|
145
|
+
echo "[INFO] No more pending candidates found. Evolution run complete!"
|
|
146
|
+
exit 0
|
|
147
|
+
fi
|
|
127
148
|
|
|
128
149
|
# Get row data
|
|
129
150
|
row_data=$(get_csv_row "$row_num")
|
|
@@ -157,7 +178,11 @@ else
|
|
|
157
178
|
if [[ ! -f $parent_file ]]; then
|
|
158
179
|
echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
|
|
159
180
|
update_csv_row "$row_num" "" "failed"
|
|
160
|
-
|
|
181
|
+
if should_continue_after_failure; then
|
|
182
|
+
continue
|
|
183
|
+
else
|
|
184
|
+
exit 1
|
|
185
|
+
fi
|
|
161
186
|
fi
|
|
162
187
|
fi
|
|
163
188
|
|
|
@@ -192,53 +217,47 @@ if [ $((LOOP_COUNTER % 4)) -eq 0 ]; then
|
|
|
192
217
|
echo -e "\033[33m**** MEGATHINKING MODE ACTIVATED ****\033[0m"
|
|
193
218
|
CLAUDE_MODEL="opus"
|
|
194
219
|
MEGATHINK_PREFIX="megathink: "
|
|
195
|
-
echo "[INFO] Using Claude Opus for architectural thinking
|
|
220
|
+
echo "[INFO] Using Claude Opus for architectural thinking"
|
|
196
221
|
else
|
|
197
222
|
CLAUDE_MODEL="sonnet"
|
|
198
223
|
MEGATHINK_PREFIX=""
|
|
199
|
-
echo "[INFO] Using Claude Sonnet for development
|
|
224
|
+
echo "[INFO] Using Claude Sonnet for development"
|
|
200
225
|
fi
|
|
201
226
|
|
|
202
227
|
# Increment and save counter
|
|
203
228
|
echo $((LOOP_COUNTER + 1)) > "$FULL_EVOLUTION_DIR/.loop_counter"
|
|
204
229
|
|
|
205
|
-
# Create mutation prompt
|
|
206
|
-
prompt="${MEGATHINK_PREFIX}
|
|
230
|
+
# Create mutation prompt - Claude will edit the file directly
|
|
231
|
+
prompt="${MEGATHINK_PREFIX}Please edit file $output_file to implement the following change for iteration $id: $description
|
|
207
232
|
|
|
208
|
-
|
|
209
|
-
$(cat "$FULL_BRIEF_PATH" 2>/dev/null || echo "No brief available")
|
|
233
|
+
There may be extra information in documentation in the $FULL_EVOLUTION_DIR/*.md files.
|
|
210
234
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
REQUESTED MODIFICATION:
|
|
214
|
-
$description
|
|
215
|
-
|
|
216
|
-
INSTRUCTIONS:
|
|
217
|
-
1. Read the existing algorithm file at $output_file
|
|
218
|
-
2. Apply the requested modification while preserving the core structure
|
|
219
|
-
3. Ensure the modified algorithm maintains the same interface (function signatures)
|
|
220
|
-
4. Include proper error handling and documentation
|
|
221
|
-
5. Overwrite the file with your improved version
|
|
222
|
-
6. Return ONLY the complete Python code without explanation
|
|
223
|
-
|
|
224
|
-
The output should be a complete, executable Python file that builds upon the existing algorithm."
|
|
235
|
+
The algorithm should maintain the same interface (function signatures) and include proper error handling."
|
|
225
236
|
|
|
226
237
|
# Generate mutation (skip for baseline)
|
|
227
238
|
if [[ $id == "000" || $id == "0" ]]; then
|
|
228
239
|
echo "[INFO] Baseline algorithm - skipping mutation, using original"
|
|
229
240
|
else
|
|
230
241
|
echo "[INFO] Calling Claude $CLAUDE_MODEL to apply mutation..."
|
|
242
|
+
echo "[INFO] Claude will edit: $output_file"
|
|
231
243
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
echo "[ERROR] Claude
|
|
244
|
+
# Claude will edit the file directly - we just need to run the command
|
|
245
|
+
if ! output=$(echo "$prompt" | "$claude_cmd" --model $CLAUDE_MODEL -p 2>&1); then
|
|
246
|
+
echo "[ERROR] Claude failed to mutate algorithm" >&2
|
|
247
|
+
echo "[ERROR] Claude output: $output" >&2
|
|
235
248
|
update_csv_row "$row_num" "" "failed"
|
|
236
|
-
|
|
249
|
+
if should_continue_after_failure; then
|
|
250
|
+
continue
|
|
251
|
+
else
|
|
252
|
+
exit 1
|
|
253
|
+
fi
|
|
237
254
|
fi
|
|
238
255
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
256
|
+
echo "[INFO] Claude completed mutation"
|
|
257
|
+
# Show Claude's response (it might contain useful information about what was changed)
|
|
258
|
+
if [[ -n $output ]]; then
|
|
259
|
+
echo "[INFO] Claude response: $output"
|
|
260
|
+
fi
|
|
242
261
|
fi
|
|
243
262
|
|
|
244
263
|
echo "[INFO] Algorithm ready at: $output_file"
|
|
@@ -258,7 +277,11 @@ if [[ -n $timeout_seconds ]]; then
|
|
|
258
277
|
if [[ $eval_exit_code -eq 124 ]]; then
|
|
259
278
|
echo "[ERROR] Evaluation timed out after ${timeout_seconds} seconds" >&2
|
|
260
279
|
update_csv_row "$row_num" "" "timeout"
|
|
261
|
-
|
|
280
|
+
if should_continue_after_failure; then
|
|
281
|
+
continue 2 # Continue outer while loop from nested context
|
|
282
|
+
else
|
|
283
|
+
exit 1
|
|
284
|
+
fi
|
|
262
285
|
fi
|
|
263
286
|
fi
|
|
264
287
|
else
|
|
@@ -293,20 +316,37 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
293
316
|
echo "[ERROR] No score found in evaluator output" >&2
|
|
294
317
|
echo "[ERROR] Output: $eval_output" >&2
|
|
295
318
|
update_csv_row "$row_num" "" "failed"
|
|
296
|
-
|
|
319
|
+
if should_continue_after_failure; then
|
|
320
|
+
continue 2
|
|
321
|
+
else
|
|
322
|
+
exit 1
|
|
323
|
+
fi
|
|
297
324
|
fi
|
|
298
325
|
fi
|
|
299
326
|
else
|
|
300
327
|
echo "[ERROR] Failed to parse evaluator output" >&2
|
|
301
328
|
echo "[ERROR] Output: $eval_output" >&2
|
|
302
329
|
update_csv_row "$row_num" "" "failed"
|
|
303
|
-
|
|
330
|
+
if should_continue_after_failure; then
|
|
331
|
+
continue
|
|
332
|
+
else
|
|
333
|
+
exit 1
|
|
334
|
+
fi
|
|
304
335
|
fi
|
|
305
336
|
else
|
|
306
337
|
echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
|
|
307
338
|
echo "[ERROR] Output: $eval_output" >&2
|
|
308
339
|
update_csv_row "$row_num" "" "failed"
|
|
309
|
-
|
|
340
|
+
if should_continue_after_failure; then
|
|
341
|
+
continue
|
|
342
|
+
else
|
|
343
|
+
exit 1
|
|
344
|
+
fi
|
|
310
345
|
fi
|
|
311
346
|
|
|
312
|
-
echo "[INFO] Evolution cycle completed successfully!"
|
|
347
|
+
echo "[INFO] Evolution cycle completed successfully!"
|
|
348
|
+
consecutive_failures=0 # Reset failure counter on success
|
|
349
|
+
|
|
350
|
+
echo "[INFO] Looking for next candidate..."
|
|
351
|
+
echo "----------------------------------------"
|
|
352
|
+
done # End of main evolution loop
|