claude-evolve 1.0.16 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-run +25 -11
- package/package.json +1 -1
package/bin/claude-evolve-run
CHANGED
|
@@ -223,24 +223,32 @@ INSTRUCTIONS:
|
|
|
223
223
|
|
|
224
224
|
The output should be a complete, executable Python file that builds upon the existing algorithm."
|
|
225
225
|
|
|
226
|
-
# Generate mutation
|
|
227
|
-
if
|
|
228
|
-
echo "[
|
|
229
|
-
|
|
230
|
-
|
|
226
|
+
# Generate mutation (skip for baseline)
|
|
227
|
+
if [[ $id == "000" || $id == "0" ]]; then
|
|
228
|
+
echo "[INFO] Baseline algorithm - skipping mutation, using original"
|
|
229
|
+
else
|
|
230
|
+
echo "[INFO] Calling Claude $CLAUDE_MODEL to apply mutation..."
|
|
231
|
+
if ! generated_code=$(echo "$prompt" | "$claude_cmd" --model $CLAUDE_MODEL); then
|
|
232
|
+
echo "[ERROR] Claude failed to generate algorithm mutation" >&2
|
|
233
|
+
update_csv_row "$row_num" "" "failed"
|
|
234
|
+
exit 1
|
|
235
|
+
fi
|
|
236
|
+
|
|
237
|
+
# Save generated algorithm (overwrite the copied file)
|
|
238
|
+
echo "$generated_code" >"$output_file"
|
|
239
|
+
echo "[INFO] Claude successfully mutated algorithm"
|
|
231
240
|
fi
|
|
232
241
|
|
|
233
|
-
|
|
234
|
-
echo "$generated_code" >"$output_file"
|
|
235
|
-
echo "[INFO] Updated algorithm: $output_file"
|
|
242
|
+
echo "[INFO] Algorithm ready at: $output_file"
|
|
236
243
|
|
|
237
244
|
# Run evaluator
|
|
238
245
|
echo "[INFO] Running evaluation..."
|
|
246
|
+
echo "[INFO] Executing: $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
239
247
|
eval_output=""
|
|
240
248
|
eval_exit_code=0
|
|
241
249
|
|
|
242
250
|
if [[ -n $timeout_seconds ]]; then
|
|
243
|
-
echo "[INFO]
|
|
251
|
+
echo "[INFO] Evaluation timeout: ${timeout_seconds}s"
|
|
244
252
|
if eval_output=$(timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
245
253
|
eval_exit_code=0
|
|
246
254
|
else
|
|
@@ -259,18 +267,24 @@ else
|
|
|
259
267
|
fi
|
|
260
268
|
fi
|
|
261
269
|
|
|
270
|
+
# Show evaluator output
|
|
271
|
+
echo "[INFO] Evaluator output:"
|
|
272
|
+
echo "----------------------------------------"
|
|
273
|
+
echo "$eval_output"
|
|
274
|
+
echo "----------------------------------------"
|
|
275
|
+
|
|
262
276
|
# Process results
|
|
263
277
|
if [[ $eval_exit_code -eq 0 ]]; then
|
|
264
278
|
# Extract score from JSON (simple grep approach)
|
|
265
279
|
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
266
280
|
if [[ -n $score ]]; then
|
|
267
|
-
update_csv_row "$row_num" "$score" "
|
|
281
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
268
282
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
269
283
|
echo "[INFO] Performance score: $score"
|
|
270
284
|
else
|
|
271
285
|
# Try "performance" field
|
|
272
286
|
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
273
|
-
update_csv_row "$row_num" "$score" "
|
|
287
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
274
288
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
275
289
|
echo "[INFO] Performance score: $score"
|
|
276
290
|
else
|