claude-evolve 1.3.30 → 1.3.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-run +76 -10
- package/bin/claude-evolve-run-clean +94 -0
- package/lib/csv_helper.py +1 -0
- package/package.json +1 -1
package/bin/claude-evolve-run
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
|
|
3
|
-
set -e
|
|
3
|
+
# Removed 'set -e' to prevent silent exits on CSV helper failures
|
|
4
4
|
|
|
5
5
|
# Load configuration
|
|
6
6
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
@@ -328,7 +328,7 @@ echo "[INFO] Copied parent algorithm to: $output_file"
|
|
|
328
328
|
# Check for claude CLI
|
|
329
329
|
claude_cmd="${CLAUDE_CMD:-claude}"
|
|
330
330
|
if ! command -v "$claude_cmd" >/dev/null 2>&1; then
|
|
331
|
-
echo "[ERROR] Claude CLI not found. Please install claude
|
|
331
|
+
echo "[ERROR] Claude CLI not found. Please install 'claude' CLI tool or set CLAUDE_CMD environment variable." >&2
|
|
332
332
|
update_csv_row "$row_num" "" "failed"
|
|
333
333
|
exit 1
|
|
334
334
|
fi
|
|
@@ -471,12 +471,45 @@ echo "----------------------------------------"
|
|
|
471
471
|
|
|
472
472
|
# Process results
|
|
473
473
|
if [[ $eval_exit_code -eq 0 ]]; then
|
|
474
|
-
#
|
|
475
|
-
|
|
474
|
+
# DEBUG: Show raw evaluator output
|
|
475
|
+
echo "[DEBUG] Raw evaluator output:"
|
|
476
|
+
echo "----------------------------------------"
|
|
477
|
+
echo "$eval_output"
|
|
478
|
+
echo "----------------------------------------"
|
|
479
|
+
|
|
480
|
+
# Extract the last valid JSON line and score
|
|
481
|
+
last_json=$(echo "$eval_output" | grep '^{.*}$' | tail -1)
|
|
482
|
+
|
|
483
|
+
if [[ -n "$last_json" ]]; then
|
|
484
|
+
echo "[DEBUG] Found JSON: $last_json"
|
|
485
|
+
|
|
486
|
+
# Extract score from JSON
|
|
487
|
+
score=$(echo "$last_json" | python3 -c "
|
|
488
|
+
import sys, json
|
|
489
|
+
try:
|
|
490
|
+
data = json.loads(sys.stdin.read().strip())
|
|
491
|
+
if 'performance' in data:
|
|
492
|
+
print(data['performance'])
|
|
493
|
+
elif 'score' in data:
|
|
494
|
+
print(data['score'])
|
|
495
|
+
else:
|
|
496
|
+
sys.exit(1)
|
|
497
|
+
except:
|
|
498
|
+
sys.exit(1)
|
|
499
|
+
" 2>/dev/null)
|
|
500
|
+
fi
|
|
501
|
+
|
|
502
|
+
if [[ -n "$score" ]]; then
|
|
503
|
+
echo "[DEBUG] Extracted score via 'score' field: '$score'"
|
|
476
504
|
if [[ -n $score ]]; then
|
|
477
505
|
# Check if score is 0 and mark as failed
|
|
478
506
|
if (( $(echo "$score == 0" | bc -l) )); then
|
|
479
|
-
|
|
507
|
+
# Update CSV with full JSON data
|
|
508
|
+
if [[ -n "$last_json" ]]; then
|
|
509
|
+
python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"
|
|
510
|
+
else
|
|
511
|
+
update_csv_row "$row_num" "$score" "failed"
|
|
512
|
+
fi
|
|
480
513
|
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
481
514
|
echo "[INFO] Performance score: $score"
|
|
482
515
|
if should_continue_after_failure; then
|
|
@@ -485,16 +518,35 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
485
518
|
exit 1
|
|
486
519
|
fi
|
|
487
520
|
else
|
|
488
|
-
|
|
521
|
+
# Update CSV with full JSON data
|
|
522
|
+
if [[ -n "$last_json" ]]; then
|
|
523
|
+
echo "[DEBUG] Updating CSV with full JSON data (branch 1)"
|
|
524
|
+
echo "[DEBUG] CSV path: ${FULL_CSV_PATH}"
|
|
525
|
+
echo "[DEBUG] Target ID: $id (row $row_num)"
|
|
526
|
+
echo "[DEBUG] JSON data: $last_json"
|
|
527
|
+
echo "[DEBUG] Running CSV helper..."
|
|
528
|
+
if python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"; then
|
|
529
|
+
echo "[DEBUG] CSV helper succeeded"
|
|
530
|
+
else
|
|
531
|
+
echo "[DEBUG] CSV helper failed with exit code $?"
|
|
532
|
+
fi
|
|
533
|
+
else
|
|
534
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
535
|
+
fi
|
|
489
536
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
490
537
|
echo "[INFO] Performance score: $score"
|
|
491
538
|
fi
|
|
492
539
|
else
|
|
493
|
-
# Try "performance" field
|
|
494
|
-
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
|
|
540
|
+
# Try "performance" field - get only the last occurrence
|
|
541
|
+
if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' ' | tail -1); then
|
|
542
|
+
echo "[DEBUG] Extracted score via 'performance' field: '$score'"
|
|
495
543
|
# Check if score is 0 and mark as failed
|
|
496
544
|
if [ "$(echo "$score == 0" | bc -l)" = "1" ]; then
|
|
497
|
-
|
|
545
|
+
if [[ -n "$last_json" ]]; then
|
|
546
|
+
python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"
|
|
547
|
+
else
|
|
548
|
+
update_csv_row "$row_num" "$score" "failed"
|
|
549
|
+
fi
|
|
498
550
|
echo "[INFO] ✗ Evaluation failed with score 0"
|
|
499
551
|
echo "[INFO] Performance score: $score"
|
|
500
552
|
if should_continue_after_failure; then
|
|
@@ -503,7 +555,21 @@ if [[ $eval_exit_code -eq 0 ]]; then
|
|
|
503
555
|
exit 1
|
|
504
556
|
fi
|
|
505
557
|
else
|
|
506
|
-
|
|
558
|
+
# Update CSV with full JSON data
|
|
559
|
+
if [[ -n "$last_json" ]]; then
|
|
560
|
+
echo "[DEBUG] Updating CSV with full JSON data (branch 2)"
|
|
561
|
+
echo "[DEBUG] CSV path: ${FULL_CSV_PATH}"
|
|
562
|
+
echo "[DEBUG] Target ID: $id (row $row_num)"
|
|
563
|
+
echo "[DEBUG] JSON data: $last_json"
|
|
564
|
+
echo "[DEBUG] Running CSV helper..."
|
|
565
|
+
if python3 "${SCRIPT_DIR}/../lib/csv_helper.py" update_with_json "${FULL_CSV_PATH}" "$id" "$last_json"; then
|
|
566
|
+
echo "[DEBUG] CSV helper succeeded"
|
|
567
|
+
else
|
|
568
|
+
echo "[DEBUG] CSV helper failed with exit code $?"
|
|
569
|
+
fi
|
|
570
|
+
else
|
|
571
|
+
update_csv_row "$row_num" "$score" "complete"
|
|
572
|
+
fi
|
|
507
573
|
echo "[INFO] ✓ Evaluation completed successfully"
|
|
508
574
|
echo "[INFO] Performance score: $score"
|
|
509
575
|
fi
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Simple, robust version - fail fast on malformed evaluator output
|
|
4
|
+
|
|
5
|
+
# Extract the score parsing section only
|
|
6
|
+
parse_evaluator_output() {
|
|
7
|
+
local eval_output="$1"
|
|
8
|
+
local score=""
|
|
9
|
+
|
|
10
|
+
echo "[DEBUG] Raw evaluator output:"
|
|
11
|
+
echo "----------------------------------------"
|
|
12
|
+
echo "$eval_output"
|
|
13
|
+
echo "----------------------------------------"
|
|
14
|
+
|
|
15
|
+
# Try to extract the last valid JSON line
|
|
16
|
+
last_json=$(echo "$eval_output" | grep '^{.*}$' | tail -1)
|
|
17
|
+
|
|
18
|
+
if [[ -n "$last_json" ]]; then
|
|
19
|
+
echo "[DEBUG] Found JSON line: $last_json"
|
|
20
|
+
|
|
21
|
+
# Parse with Python for robustness
|
|
22
|
+
score=$(echo "$last_json" | python3 -c "
|
|
23
|
+
import sys, json
|
|
24
|
+
try:
|
|
25
|
+
data = json.loads(sys.stdin.read().strip())
|
|
26
|
+
if 'performance' in data:
|
|
27
|
+
print(data['performance'])
|
|
28
|
+
elif 'score' in data:
|
|
29
|
+
print(data['score'])
|
|
30
|
+
else:
|
|
31
|
+
sys.exit(1)
|
|
32
|
+
except:
|
|
33
|
+
sys.exit(1)
|
|
34
|
+
" 2>/dev/null)
|
|
35
|
+
|
|
36
|
+
if [[ -n "$score" ]]; then
|
|
37
|
+
echo "[DEBUG] Extracted score: $score"
|
|
38
|
+
|
|
39
|
+
# Validate it's a clean number
|
|
40
|
+
if [[ "$score" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
41
|
+
echo "$score"
|
|
42
|
+
return 0
|
|
43
|
+
else
|
|
44
|
+
echo "[ERROR] Score is not a valid number: '$score'" >&2
|
|
45
|
+
return 1
|
|
46
|
+
fi
|
|
47
|
+
else
|
|
48
|
+
echo "[ERROR] Could not extract score from JSON" >&2
|
|
49
|
+
return 1
|
|
50
|
+
fi
|
|
51
|
+
else
|
|
52
|
+
echo "[ERROR] No valid JSON found in evaluator output" >&2
|
|
53
|
+
echo "[ERROR] Evaluator must output clean JSON with 'score' or 'performance' field" >&2
|
|
54
|
+
echo "[ERROR] Mixed logging/output is not allowed" >&2
|
|
55
|
+
return 1
|
|
56
|
+
fi
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Test with your sample
|
|
60
|
+
test_output='Evaluating algorithm: /Users/willer/Documents/GitHub/trading-strategies/evolution-mats/evolution_gen01-001.py
|
|
61
|
+
Copied /Users/willer/Documents/GitHub/trading-strategies/evolution-mats/evolution_gen01-001.py -> algorithm.py for evaluation
|
|
62
|
+
Starting MATS evaluation...
|
|
63
|
+
Running backtest from 2015-01-01 to 2025-06-01...
|
|
64
|
+
|
|
65
|
+
Evaluation completed successfully!
|
|
66
|
+
Performance: 1.0775
|
|
67
|
+
Total Return: 2839.30%
|
|
68
|
+
Sharpe: 1.310, Sortino: 1.661
|
|
69
|
+
Max Drawdown: -20.87%
|
|
70
|
+
Total Trades: 2604, Win Rate: 64.0%
|
|
71
|
+
|
|
72
|
+
Full results JSON:
|
|
73
|
+
{
|
|
74
|
+
"performance": 1.077506371224117,
|
|
75
|
+
"total_return": 28.392963641910857,
|
|
76
|
+
"yearly_return": 0.24689362518462943,
|
|
77
|
+
"sharpe": 1.3100255343786216,
|
|
78
|
+
"sortino": 1.6612254920789753,
|
|
79
|
+
"max_drawdown": -0.2086643676866254,
|
|
80
|
+
"volatility": 0.18846474263704896,
|
|
81
|
+
"total_trades": 2604,
|
|
82
|
+
"win_rate": 0.6401689708141322,
|
|
83
|
+
"profit_factor": 1.9441283380055399,
|
|
84
|
+
"final_value": 2939296.3641910856
|
|
85
|
+
}
|
|
86
|
+
Restored original algorithm.py
|
|
87
|
+
{"performance": 1.077506371224117, "total_return": 28.392963641910857, "yearly_return": 0.24689362518462943, "sharpe": 1.3100255343786216, "sortino": 1.6612254920789753, "max_drawdown": -0.2086643676866254, "volatility": 0.18846474263704896, "total_trades": 2604, "win_rate": 0.6401689708141322, "profit_factor": 1.9441283380055399, "final_value": 2939296.3641910856}'
|
|
88
|
+
|
|
89
|
+
echo "Testing parser..."
|
|
90
|
+
if result=$(parse_evaluator_output "$test_output"); then
|
|
91
|
+
echo "SUCCESS: Got score: $result"
|
|
92
|
+
else
|
|
93
|
+
echo "FAILED: Could not parse"
|
|
94
|
+
fi
|
package/lib/csv_helper.py
CHANGED
|
@@ -60,6 +60,7 @@ def update_row_with_fields(headers: list[str], rows: list[list[str]], target_id:
|
|
|
60
60
|
if row[0] == target_id:
|
|
61
61
|
for field, value in fields.items():
|
|
62
62
|
if field in col_indices:
|
|
63
|
+
print(f"[DEBUG] Updating field '{field}' with value: {repr(value)}", file=sys.stderr)
|
|
63
64
|
row[col_indices[field]] = str(value)
|
|
64
65
|
break
|
|
65
66
|
|