claude-evolve 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-run +37 -11
- package/package.json +1 -1
package/bin/claude-evolve-run
CHANGED
|
@@ -223,24 +223,44 @@ INSTRUCTIONS:
|
|
|
223
223
|
|
|
224
224
|
The output should be a complete, executable Python file that builds upon the existing algorithm."
|
|
225
225
|
|
|
226
|
-
# Generate mutation
|
|
227
|
-
if
|
|
228
|
-
echo "[
|
|
229
|
-
|
|
230
|
-
|
|
226
|
+
# Generate mutation (skip for baseline)
|
|
227
|
+
if [[ $id == "000" || $id == "0" ]]; then
|
|
228
|
+
echo "[INFO] Baseline algorithm - skipping mutation, using original"
|
|
229
|
+
else
|
|
230
|
+
echo "[INFO] Calling Claude $CLAUDE_MODEL to apply mutation..."
|
|
231
|
+
echo "[DEBUG] Claude command: $claude_cmd --model $CLAUDE_MODEL -p"
|
|
232
|
+
|
|
233
|
+
# Save prompt to temp file for debugging
|
|
234
|
+
local prompt_file="/tmp/claude-evolve-prompt-$$.txt"
|
|
235
|
+
echo "$prompt" > "$prompt_file"
|
|
236
|
+
echo "[DEBUG] Prompt saved to: $prompt_file ($(wc -l < "$prompt_file") lines)"
|
|
237
|
+
|
|
238
|
+
# Try with explicit pipe and error capture - NEED -p flag for piped input!
|
|
239
|
+
if ! generated_code=$(echo "$prompt" | "$claude_cmd" --model $CLAUDE_MODEL -p 2>&1); then
|
|
240
|
+
echo "[ERROR] Claude failed to generate algorithm mutation" >&2
|
|
241
|
+
echo "[ERROR] Claude output: $generated_code" >&2
|
|
242
|
+
rm -f "$prompt_file"
|
|
243
|
+
update_csv_row "$row_num" "" "failed"
|
|
244
|
+
exit 1
|
|
245
|
+
fi
|
|
246
|
+
|
|
247
|
+
rm -f "$prompt_file"
|
|
248
|
+
|
|
249
|
+
# Save generated algorithm (overwrite the copied file)
|
|
250
|
+
echo "$generated_code" >"$output_file"
|
|
251
|
+
echo "[INFO] Claude successfully mutated algorithm"
|
|
231
252
|
fi
|
|
232
253
|
|
|
233
|
-
|
|
234
|
-
echo "$generated_code" >"$output_file"
|
|
235
|
-
echo "[INFO] Updated algorithm: $output_file"
|
|
254
|
+
echo "[INFO] Algorithm ready at: $output_file"
|
|
236
255
|
|
|
237
256
|
# Run evaluator
|
|
238
257
|
echo "[INFO] Running evaluation..."
|
|
258
|
+
echo "[INFO] Executing: $PYTHON_CMD $FULL_EVALUATOR_PATH $output_file"
|
|
239
259
|
eval_output=""
|
|
240
260
|
eval_exit_code=0
|
|
241
261
|
|
|
242
262
|
if [[ -n $timeout_seconds ]]; then
|
|
243
|
-
echo "[INFO]
|
|
263
|
+
echo "[INFO] Evaluation timeout: ${timeout_seconds}s"
|
|
244
264
|
if eval_output=$(timeout "$timeout_seconds" "$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$output_file" 2>&1); then
|
|
245
265
|
eval_exit_code=0
|
|
246
266
|
else
|
|
@@ -259,18 +279,24 @@ else
|
|
|
259
279
|
fi
|
|
260
280
|
fi
|
|
261
281
|
|
|
282
|
+
# Show evaluator output
|
|
283
|
+
echo "[INFO] Evaluator output:"
|
|
284
|
+
echo "----------------------------------------"
|
|
285
|
+
echo "$eval_output"
|
|
286
|
+
echo "----------------------------------------"
|
|
287
|
+
|
|
262
288
|
# Process results
|
|
263
289
|
if [[ $eval_exit_code -eq 0 ]]; then
|
|
264
290
|
# Extract score from JSON (simple grep approach)
|
|
265
291
|
if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
266
292
|
if [[ -n $score ]]; then
|
|
267
|
-
update_csv_row "$row_num" "$score" "
|
|
293
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
268
294
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
269
295
|
echo "[INFO] Performance score: $score"
|
|
270
296
|
else
|
|
271
297
|
# Try "performance" field
|
|
272
298
|
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
273
|
-
update_csv_row "$row_num" "$score" "
|
|
299
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
274
300
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
275
301
|
echo "[INFO] Performance score: $score"
|
|
276
302
|
else
|