claude-evolve 1.11.11 → 1.11.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-check +146 -20
- package/lib/ai-cli.sh +7 -0
- package/lib/config.sh +7 -4
- package/package.json +1 -1
package/bin/claude-evolve-check
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# claude-evolve-check - Health check for AI model configurations
|
|
3
3
|
# Tests all configured AI models to verify they're working before starting evolution runs
|
|
4
|
+
#
|
|
5
|
+
# AIDEV-NOTE: Claude CLI has issues with timeout command and subshells.
|
|
6
|
+
# We test each model by running it directly and checking the result file.
|
|
4
7
|
|
|
5
8
|
set -e
|
|
6
9
|
|
|
@@ -15,19 +18,21 @@ NC='\033[0m' # No Color
|
|
|
15
18
|
SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
|
|
16
19
|
LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
|
|
17
20
|
|
|
18
|
-
# Source configuration
|
|
21
|
+
# Source configuration to get model lists
|
|
19
22
|
source "$LIB_DIR/config.sh"
|
|
20
23
|
load_config
|
|
21
24
|
|
|
22
|
-
#
|
|
23
|
-
source "$LIB_DIR/ai-cli.sh"
|
|
24
|
-
|
|
25
|
-
# Test timeout in seconds (short to fail fast)
|
|
25
|
+
# Test timeout in seconds
|
|
26
26
|
TEST_TIMEOUT=30
|
|
27
27
|
|
|
28
28
|
# Simple test prompt
|
|
29
29
|
TEST_PROMPT="Say hello in exactly 3 words."
|
|
30
30
|
|
|
31
|
+
# Temp files
|
|
32
|
+
TEMP_OUTPUT=$(mktemp)
|
|
33
|
+
TEMP_PID=$(mktemp)
|
|
34
|
+
trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
|
|
35
|
+
|
|
31
36
|
echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
|
|
32
37
|
echo "============================================"
|
|
33
38
|
echo
|
|
@@ -35,12 +40,11 @@ echo
|
|
|
35
40
|
# Collect all unique models into a simple list
|
|
36
41
|
ALL_MODELS=""
|
|
37
42
|
for model in $LLM_RUN $LLM_RUN_FALLBACK $LLM_IDEATE $LLM_IDEATE_FALLBACK; do
|
|
38
|
-
# Check if model already in list
|
|
39
43
|
if ! echo "$ALL_MODELS" | grep -q -w "$model"; then
|
|
40
44
|
ALL_MODELS="$ALL_MODELS $model"
|
|
41
45
|
fi
|
|
42
46
|
done
|
|
43
|
-
ALL_MODELS=$(echo "$ALL_MODELS" | xargs)
|
|
47
|
+
ALL_MODELS=$(echo "$ALL_MODELS" | xargs)
|
|
44
48
|
|
|
45
49
|
echo "Models to test: $ALL_MODELS"
|
|
46
50
|
echo
|
|
@@ -51,38 +55,160 @@ FAILED=0
|
|
|
51
55
|
FAILED_MODELS=""
|
|
52
56
|
TIMEOUT_MODELS=""
|
|
53
57
|
|
|
58
|
+
# Test a single model by writing a tiny test script and running it
|
|
59
|
+
test_model() {
|
|
60
|
+
local model="$1"
|
|
61
|
+
local prompt="$2"
|
|
62
|
+
local outfile="$3"
|
|
63
|
+
local max_wait="$4"
|
|
64
|
+
|
|
65
|
+
# Write a self-contained test script
|
|
66
|
+
local test_script=$(mktemp)
|
|
67
|
+
case "$model" in
|
|
68
|
+
opus|sonnet|haiku)
|
|
69
|
+
cat > "$test_script" << SCRIPT
|
|
70
|
+
#!/usr/bin/env bash
|
|
71
|
+
exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
|
|
72
|
+
SCRIPT
|
|
73
|
+
;;
|
|
74
|
+
opus-think)
|
|
75
|
+
cat > "$test_script" << 'SCRIPT'
|
|
76
|
+
#!/usr/bin/env bash
|
|
77
|
+
exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
|
|
78
|
+
|
|
79
|
+
$1"
|
|
80
|
+
SCRIPT
|
|
81
|
+
;;
|
|
82
|
+
sonnet-think)
|
|
83
|
+
cat > "$test_script" << 'SCRIPT'
|
|
84
|
+
#!/usr/bin/env bash
|
|
85
|
+
exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
|
|
86
|
+
|
|
87
|
+
$1"
|
|
88
|
+
SCRIPT
|
|
89
|
+
;;
|
|
90
|
+
gemini-pro)
|
|
91
|
+
cat > "$test_script" << 'SCRIPT'
|
|
92
|
+
#!/usr/bin/env bash
|
|
93
|
+
exec gemini -y -m gemini-3-pro-preview -p "$1"
|
|
94
|
+
SCRIPT
|
|
95
|
+
;;
|
|
96
|
+
gemini-flash|gemini-3-flash)
|
|
97
|
+
cat > "$test_script" << 'SCRIPT'
|
|
98
|
+
#!/usr/bin/env bash
|
|
99
|
+
exec gemini -y -m gemini-2.5-flash -p "$1"
|
|
100
|
+
SCRIPT
|
|
101
|
+
;;
|
|
102
|
+
kimi-coder)
|
|
103
|
+
cat > "$test_script" << 'SCRIPT'
|
|
104
|
+
#!/usr/bin/env bash
|
|
105
|
+
exec kimi --print -y -m kimi-for-coding -c "$1"
|
|
106
|
+
SCRIPT
|
|
107
|
+
;;
|
|
108
|
+
kimi-k2.5)
|
|
109
|
+
cat > "$test_script" << 'SCRIPT'
|
|
110
|
+
#!/usr/bin/env bash
|
|
111
|
+
exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
|
|
112
|
+
SCRIPT
|
|
113
|
+
;;
|
|
114
|
+
glm-5)
|
|
115
|
+
cat > "$test_script" << 'SCRIPT'
|
|
116
|
+
#!/usr/bin/env bash
|
|
117
|
+
exec opencode -m openrouter/z-ai/glm-5 run "$1"
|
|
118
|
+
SCRIPT
|
|
119
|
+
;;
|
|
120
|
+
glm-5-zai)
|
|
121
|
+
cat > "$test_script" << 'SCRIPT'
|
|
122
|
+
#!/usr/bin/env bash
|
|
123
|
+
exec opencode -m zai-coding-plan/glm-5 run "$1"
|
|
124
|
+
SCRIPT
|
|
125
|
+
;;
|
|
126
|
+
glm-zai)
|
|
127
|
+
cat > "$test_script" << 'SCRIPT'
|
|
128
|
+
#!/usr/bin/env bash
|
|
129
|
+
exec opencode -m zai-coding-plan/glm-4.7 run "$1"
|
|
130
|
+
SCRIPT
|
|
131
|
+
;;
|
|
132
|
+
codex-oss-local)
|
|
133
|
+
cat > "$test_script" << 'SCRIPT'
|
|
134
|
+
#!/usr/bin/env bash
|
|
135
|
+
exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
|
|
136
|
+
SCRIPT
|
|
137
|
+
;;
|
|
138
|
+
gpt5|gpt5high)
|
|
139
|
+
cat > "$test_script" << 'SCRIPT'
|
|
140
|
+
#!/usr/bin/env bash
|
|
141
|
+
exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
|
|
142
|
+
SCRIPT
|
|
143
|
+
;;
|
|
144
|
+
qwen)
|
|
145
|
+
cat > "$test_script" << 'SCRIPT'
|
|
146
|
+
#!/usr/bin/env bash
|
|
147
|
+
exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
|
|
148
|
+
SCRIPT
|
|
149
|
+
;;
|
|
150
|
+
*)
|
|
151
|
+
echo "Unknown model: $model" > "$outfile"
|
|
152
|
+
rm -f "$test_script"
|
|
153
|
+
return 1
|
|
154
|
+
;;
|
|
155
|
+
esac
|
|
156
|
+
chmod +x "$test_script"
|
|
157
|
+
|
|
158
|
+
# Run the test script in background, capture output to file
|
|
159
|
+
"$test_script" "$prompt" > "$outfile" 2>&1 &
|
|
160
|
+
local pid=$!
|
|
161
|
+
|
|
162
|
+
# Poll for completion
|
|
163
|
+
local elapsed=0
|
|
164
|
+
while kill -0 "$pid" 2>/dev/null; do
|
|
165
|
+
if [[ $elapsed -ge $max_wait ]]; then
|
|
166
|
+
kill "$pid" 2>/dev/null
|
|
167
|
+
sleep 1
|
|
168
|
+
kill -9 "$pid" 2>/dev/null
|
|
169
|
+
wait "$pid" 2>/dev/null
|
|
170
|
+
rm -f "$test_script"
|
|
171
|
+
return 124
|
|
172
|
+
fi
|
|
173
|
+
sleep 1
|
|
174
|
+
((elapsed++))
|
|
175
|
+
done
|
|
176
|
+
|
|
177
|
+
wait "$pid"
|
|
178
|
+
local rc=$?
|
|
179
|
+
rm -f "$test_script"
|
|
180
|
+
return $rc
|
|
181
|
+
}
|
|
182
|
+
|
|
54
183
|
# Test each model
|
|
55
184
|
for model in $ALL_MODELS; do
|
|
56
185
|
echo -n "Testing $model... "
|
|
57
186
|
|
|
58
|
-
# Run the test with timeout
|
|
59
187
|
START_TIME=$(date +%s)
|
|
60
188
|
|
|
61
|
-
# Call the model directly using the configured command
|
|
62
189
|
set +e
|
|
63
|
-
|
|
64
|
-
source '$LIB_DIR/config.sh'
|
|
65
|
-
load_config
|
|
66
|
-
source '$LIB_DIR/ai-cli.sh'
|
|
67
|
-
call_ai_model_configured '$model' '$TEST_PROMPT' 2>/dev/null
|
|
68
|
-
" 2>&1)
|
|
190
|
+
test_model "$model" "$TEST_PROMPT" "$TEMP_OUTPUT" "$TEST_TIMEOUT"
|
|
69
191
|
EXIT_CODE=$?
|
|
70
192
|
set -e
|
|
71
193
|
|
|
72
194
|
END_TIME=$(date +%s)
|
|
73
195
|
DURATION=$((END_TIME - START_TIME))
|
|
74
196
|
|
|
197
|
+
# Read output
|
|
198
|
+
OUTPUT=$(cat "$TEMP_OUTPUT" 2>/dev/null || true)
|
|
199
|
+
OUTPUT_LEN=${#OUTPUT}
|
|
200
|
+
|
|
75
201
|
# Check result
|
|
76
|
-
if [[ $EXIT_CODE -eq 0 ]] && [[ -n "$OUTPUT" ]] && [[ $
|
|
77
|
-
echo -e "${GREEN}✓ OK${NC} (${DURATION}s, ${
|
|
202
|
+
if [[ $EXIT_CODE -eq 0 ]] && [[ -n "$OUTPUT" ]] && [[ $OUTPUT_LEN -gt 10 ]]; then
|
|
203
|
+
echo -e "${GREEN}✓ OK${NC} (${DURATION}s, ${OUTPUT_LEN} chars)"
|
|
78
204
|
((PASSED++))
|
|
79
205
|
elif [[ $EXIT_CODE -eq 124 ]]; then
|
|
80
|
-
echo -e "${RED}✗ TIMEOUT${NC} (${TEST_TIMEOUT}s
|
|
206
|
+
echo -e "${RED}✗ TIMEOUT${NC} (${TEST_TIMEOUT}s)"
|
|
81
207
|
TIMEOUT_MODELS="$TIMEOUT_MODELS $model"
|
|
82
208
|
((FAILED++))
|
|
83
209
|
else
|
|
84
|
-
echo -e "${RED}✗ FAILED${NC} (exit=$EXIT_CODE, ${
|
|
85
|
-
if [[ -n "$OUTPUT" ]] && [[ $
|
|
210
|
+
echo -e "${RED}✗ FAILED${NC} (exit=$EXIT_CODE, ${OUTPUT_LEN} chars)"
|
|
211
|
+
if [[ -n "$OUTPUT" ]] && [[ $OUTPUT_LEN -lt 200 ]]; then
|
|
86
212
|
echo " Output: $OUTPUT"
|
|
87
213
|
fi
|
|
88
214
|
FAILED_MODELS="$FAILED_MODELS $model"
|
package/lib/ai-cli.sh
CHANGED
|
@@ -224,6 +224,13 @@ $prompt"
|
|
|
224
224
|
ai_output=$(timeout -k 30 600 opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
|
|
225
225
|
local ai_exit_code=$?
|
|
226
226
|
;;
|
|
227
|
+
qwen)
|
|
228
|
+
local ai_output
|
|
229
|
+
# Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
|
|
230
|
+
# Linear attention + sparse MoE, strong multimodal capabilities
|
|
231
|
+
ai_output=$(timeout -k 30 600 opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
|
|
232
|
+
local ai_exit_code=$?
|
|
233
|
+
;;
|
|
227
234
|
codex-oss-local)
|
|
228
235
|
# Codex-OSS via Codex CLI with Ollama backend
|
|
229
236
|
local ai_output
|
package/lib/config.sh
CHANGED
|
@@ -61,16 +61,19 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
|
|
|
61
61
|
# Primary: Strong models used in normal operation
|
|
62
62
|
# Fallback: Cheap/backup models used only when primary tier exhausted
|
|
63
63
|
#
|
|
64
|
-
# Run:
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
# Run: Primary models for code generation
|
|
65
|
+
# Sonnet 4.6: 79.6% SWE-bench, $3/$15 per M tokens
|
|
66
|
+
# GLM-5: 744B MoE, $0.80/M tokens, 77.8% SWE-bench
|
|
67
|
+
# Qwen: hybrid linear attention + sparse MoE, strong reasoning
|
|
68
|
+
DEFAULT_LLM_RUN="sonnet glm-5 glm-5 qwen kimi-k2.5"
|
|
69
|
+
DEFAULT_LLM_RUN_FALLBACK="gemini-3-flash codex-oss-local"
|
|
67
70
|
#
|
|
68
71
|
# Ideate: Only agentic models that can edit files reliably
|
|
69
72
|
# AIDEV-NOTE: Ideation REQUIRES file editing - non-agentic models (opencode run, codex) return text
|
|
70
73
|
# but don't actually edit files. Only use claude/gemini CLI, cursor-agent, or zai-coding-plan models.
|
|
71
74
|
# OpenRouter models (via opencode) are chat-only and CANNOT edit files for ideation.
|
|
72
75
|
DEFAULT_LLM_IDEATE="opus-think sonnet-think glm-5-zai gemini-pro kimi-coder"
|
|
73
|
-
DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-zai
|
|
76
|
+
DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-5-zai"
|
|
74
77
|
|
|
75
78
|
# Load configuration from a YAML file and update variables
|
|
76
79
|
_load_yaml_config() {
|