claude-evolve 1.11.11 → 1.11.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,9 @@
1
1
  #!/usr/bin/env bash
2
2
  # claude-evolve-check - Health check for AI model configurations
3
3
  # Tests all configured AI models to verify they're working before starting evolution runs
4
+ #
5
+ # AIDEV-NOTE: Claude CLI has issues with timeout command and subshells.
6
+ # We test each model by running it directly and checking the result file.
4
7
 
5
8
  set -e
6
9
 
@@ -15,19 +18,21 @@ NC='\033[0m' # No Color
15
18
  SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
16
19
  LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
17
20
 
18
- # Source configuration
21
+ # Source configuration to get model lists
19
22
  source "$LIB_DIR/config.sh"
20
23
  load_config
21
24
 
22
- # Source AI CLI for model calls
23
- source "$LIB_DIR/ai-cli.sh"
24
-
25
- # Test timeout in seconds (short to fail fast)
25
+ # Test timeout in seconds
26
26
  TEST_TIMEOUT=30
27
27
 
28
28
  # Simple test prompt
29
29
  TEST_PROMPT="Say hello in exactly 3 words."
30
30
 
31
+ # Temp files
32
+ TEMP_OUTPUT=$(mktemp)
33
+ TEMP_PID=$(mktemp)
34
+ trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
35
+
31
36
  echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
32
37
  echo "============================================"
33
38
  echo
@@ -35,12 +40,11 @@ echo
35
40
  # Collect all unique models into a simple list
36
41
  ALL_MODELS=""
37
42
  for model in $LLM_RUN $LLM_RUN_FALLBACK $LLM_IDEATE $LLM_IDEATE_FALLBACK; do
38
- # Check if model already in list
39
43
  if ! echo "$ALL_MODELS" | grep -q -w "$model"; then
40
44
  ALL_MODELS="$ALL_MODELS $model"
41
45
  fi
42
46
  done
43
- ALL_MODELS=$(echo "$ALL_MODELS" | xargs) # Trim whitespace
47
+ ALL_MODELS=$(echo "$ALL_MODELS" | xargs)
44
48
 
45
49
  echo "Models to test: $ALL_MODELS"
46
50
  echo
@@ -51,38 +55,160 @@ FAILED=0
51
55
  FAILED_MODELS=""
52
56
  TIMEOUT_MODELS=""
53
57
 
58
+ # Test a single model by writing a tiny test script and running it
59
+ test_model() {
60
+ local model="$1"
61
+ local prompt="$2"
62
+ local outfile="$3"
63
+ local max_wait="$4"
64
+
65
+ # Write a self-contained test script
66
+ local test_script=$(mktemp)
67
+ case "$model" in
68
+ opus|sonnet|haiku)
69
+ cat > "$test_script" << SCRIPT
70
+ #!/usr/bin/env bash
71
+ exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
72
+ SCRIPT
73
+ ;;
74
+ opus-think)
75
+ cat > "$test_script" << 'SCRIPT'
76
+ #!/usr/bin/env bash
77
+ exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
78
+
79
+ $1"
80
+ SCRIPT
81
+ ;;
82
+ sonnet-think)
83
+ cat > "$test_script" << 'SCRIPT'
84
+ #!/usr/bin/env bash
85
+ exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
86
+
87
+ $1"
88
+ SCRIPT
89
+ ;;
90
+ gemini-pro)
91
+ cat > "$test_script" << 'SCRIPT'
92
+ #!/usr/bin/env bash
93
+ exec gemini -y -m gemini-3-pro-preview -p "$1"
94
+ SCRIPT
95
+ ;;
96
+ gemini-flash|gemini-3-flash)
97
+ cat > "$test_script" << 'SCRIPT'
98
+ #!/usr/bin/env bash
99
+ exec gemini -y -m gemini-2.5-flash -p "$1"
100
+ SCRIPT
101
+ ;;
102
+ kimi-coder)
103
+ cat > "$test_script" << 'SCRIPT'
104
+ #!/usr/bin/env bash
105
+ exec kimi --print -y -m kimi-for-coding -c "$1"
106
+ SCRIPT
107
+ ;;
108
+ kimi-k2.5)
109
+ cat > "$test_script" << 'SCRIPT'
110
+ #!/usr/bin/env bash
111
+ exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
112
+ SCRIPT
113
+ ;;
114
+ glm-5)
115
+ cat > "$test_script" << 'SCRIPT'
116
+ #!/usr/bin/env bash
117
+ exec opencode -m openrouter/z-ai/glm-5 run "$1"
118
+ SCRIPT
119
+ ;;
120
+ glm-5-zai)
121
+ cat > "$test_script" << 'SCRIPT'
122
+ #!/usr/bin/env bash
123
+ exec opencode -m zai-coding-plan/glm-5 run "$1"
124
+ SCRIPT
125
+ ;;
126
+ glm-zai)
127
+ cat > "$test_script" << 'SCRIPT'
128
+ #!/usr/bin/env bash
129
+ exec opencode -m zai-coding-plan/glm-4.7 run "$1"
130
+ SCRIPT
131
+ ;;
132
+ codex-oss-local)
133
+ cat > "$test_script" << 'SCRIPT'
134
+ #!/usr/bin/env bash
135
+ exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
136
+ SCRIPT
137
+ ;;
138
+ gpt5|gpt5high)
139
+ cat > "$test_script" << 'SCRIPT'
140
+ #!/usr/bin/env bash
141
+ exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
142
+ SCRIPT
143
+ ;;
144
+ qwen)
145
+ cat > "$test_script" << 'SCRIPT'
146
+ #!/usr/bin/env bash
147
+ exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
148
+ SCRIPT
149
+ ;;
150
+ *)
151
+ echo "Unknown model: $model" > "$outfile"
152
+ rm -f "$test_script"
153
+ return 1
154
+ ;;
155
+ esac
156
+ chmod +x "$test_script"
157
+
158
+ # Run the test script in background, capture output to file
159
+ "$test_script" "$prompt" > "$outfile" 2>&1 &
160
+ local pid=$!
161
+
162
+ # Poll for completion
163
+ local elapsed=0
164
+ while kill -0 "$pid" 2>/dev/null; do
165
+ if [[ $elapsed -ge $max_wait ]]; then
166
+ kill "$pid" 2>/dev/null
167
+ sleep 1
168
+ kill -9 "$pid" 2>/dev/null
169
+ wait "$pid" 2>/dev/null
170
+ rm -f "$test_script"
171
+ return 124
172
+ fi
173
+ sleep 1
174
+ ((elapsed++))
175
+ done
176
+
177
+ wait "$pid"
178
+ local rc=$?
179
+ rm -f "$test_script"
180
+ return $rc
181
+ }
182
+
54
183
  # Test each model
55
184
  for model in $ALL_MODELS; do
56
185
  echo -n "Testing $model... "
57
186
 
58
- # Run the test with timeout
59
187
  START_TIME=$(date +%s)
60
188
 
61
- # Call the model directly using the configured command
62
189
  set +e
63
- OUTPUT=$(timeout -k 5 $TEST_TIMEOUT bash -c "
64
- source '$LIB_DIR/config.sh'
65
- load_config
66
- source '$LIB_DIR/ai-cli.sh'
67
- call_ai_model_configured '$model' '$TEST_PROMPT' 2>/dev/null
68
- " 2>&1)
190
+ test_model "$model" "$TEST_PROMPT" "$TEMP_OUTPUT" "$TEST_TIMEOUT"
69
191
  EXIT_CODE=$?
70
192
  set -e
71
193
 
72
194
  END_TIME=$(date +%s)
73
195
  DURATION=$((END_TIME - START_TIME))
74
196
 
197
+ # Read output
198
+ OUTPUT=$(cat "$TEMP_OUTPUT" 2>/dev/null || true)
199
+ OUTPUT_LEN=${#OUTPUT}
200
+
75
201
  # Check result
76
- if [[ $EXIT_CODE -eq 0 ]] && [[ -n "$OUTPUT" ]] && [[ ${#OUTPUT} -gt 10 ]]; then
77
- echo -e "${GREEN}✓ OK${NC} (${DURATION}s, ${#OUTPUT} chars)"
202
+ if [[ $EXIT_CODE -eq 0 ]] && [[ -n "$OUTPUT" ]] && [[ $OUTPUT_LEN -gt 10 ]]; then
203
+ echo -e "${GREEN}✓ OK${NC} (${DURATION}s, ${OUTPUT_LEN} chars)"
78
204
  ((PASSED++))
79
205
  elif [[ $EXIT_CODE -eq 124 ]]; then
80
- echo -e "${RED}✗ TIMEOUT${NC} (${TEST_TIMEOUT}s - likely permission dialog or hanging)"
206
+ echo -e "${RED}✗ TIMEOUT${NC} (${TEST_TIMEOUT}s)"
81
207
  TIMEOUT_MODELS="$TIMEOUT_MODELS $model"
82
208
  ((FAILED++))
83
209
  else
84
- echo -e "${RED}✗ FAILED${NC} (exit=$EXIT_CODE, ${#OUTPUT} chars)"
85
- if [[ -n "$OUTPUT" ]] && [[ ${#OUTPUT} -lt 200 ]]; then
210
+ echo -e "${RED}✗ FAILED${NC} (exit=$EXIT_CODE, ${OUTPUT_LEN} chars)"
211
+ if [[ -n "$OUTPUT" ]] && [[ $OUTPUT_LEN -lt 200 ]]; then
86
212
  echo " Output: $OUTPUT"
87
213
  fi
88
214
  FAILED_MODELS="$FAILED_MODELS $model"
package/lib/ai-cli.sh CHANGED
@@ -224,6 +224,13 @@ $prompt"
224
224
  ai_output=$(timeout -k 30 600 opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
225
225
  local ai_exit_code=$?
226
226
  ;;
227
+ qwen)
228
+ local ai_output
229
+ # Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
230
+ # Linear attention + sparse MoE, strong multimodal capabilities
231
+ ai_output=$(timeout -k 30 600 opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
232
+ local ai_exit_code=$?
233
+ ;;
227
234
  codex-oss-local)
228
235
  # Codex-OSS via Codex CLI with Ollama backend
229
236
  local ai_output
package/lib/config.sh CHANGED
@@ -61,16 +61,19 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
61
61
  # Primary: Strong models used in normal operation
62
62
  # Fallback: Cheap/backup models used only when primary tier exhausted
63
63
  #
64
- # Run: GLM-5 is now the primary model (744B MoE, $0.80/M tokens, 77.8% SWE-bench)
65
- DEFAULT_LLM_RUN="glm-5 glm-5 glm-5 kimi-k2.5 kimi-k2.5"
66
- DEFAULT_LLM_RUN_FALLBACK="gemini-3-flash codex-oss-local haiku"
64
+ # Run: Primary models for code generation
65
+ # Sonnet 4.6: 79.6% SWE-bench, $3/$15 per M tokens
66
+ # GLM-5: 744B MoE, $0.80/M tokens, 77.8% SWE-bench
67
+ # Qwen: hybrid linear attention + sparse MoE, strong reasoning
68
+ DEFAULT_LLM_RUN="sonnet glm-5 glm-5 qwen kimi-k2.5"
69
+ DEFAULT_LLM_RUN_FALLBACK="gemini-3-flash codex-oss-local"
67
70
  #
68
71
  # Ideate: Only agentic models that can edit files reliably
69
72
  # AIDEV-NOTE: Ideation REQUIRES file editing - non-agentic models (opencode run, codex) return text
70
73
  # but don't actually edit files. Only use claude/gemini CLI, cursor-agent, or zai-coding-plan models.
71
74
  # OpenRouter models (via opencode) are chat-only and CANNOT edit files for ideation.
72
75
  DEFAULT_LLM_IDEATE="opus-think sonnet-think glm-5-zai gemini-pro kimi-coder"
73
- DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-zai haiku"
76
+ DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-5-zai"
74
77
 
75
78
  # Load configuration from a YAML file and update variables
76
79
  _load_yaml_config() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.11.11",
3
+ "version": "1.11.13",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",