claude-evolve 1.11.12 → 1.11.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  # claude-evolve-check - Health check for AI model configurations
3
3
  # Tests all configured AI models to verify they're working before starting evolution runs
4
4
  #
5
- # AIDEV-NOTE: Claude CLI hangs when called from subshells (command substitution).
6
- # We use temp files instead of $() to avoid this issue.
5
+ # AIDEV-NOTE: Claude CLI has issues with timeout command and subshells.
6
+ # We test each model by running it directly and checking the result file.
7
7
 
8
8
  set -e
9
9
 
@@ -22,15 +22,16 @@ LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
22
22
  source "$LIB_DIR/config.sh"
23
23
  load_config
24
24
 
25
- # Test timeout in seconds (short to fail fast)
25
+ # Test timeout in seconds
26
26
  TEST_TIMEOUT=30
27
27
 
28
28
  # Simple test prompt
29
29
  TEST_PROMPT="Say hello in exactly 3 words."
30
30
 
31
- # Temp file for output (avoids subshell issues with claude CLI)
31
+ # Temp files
32
32
  TEMP_OUTPUT=$(mktemp)
33
- trap "rm -f $TEMP_OUTPUT" EXIT
33
+ TEMP_PID=$(mktemp)
34
+ trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
34
35
 
35
36
  echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
36
37
  echo "============================================"
@@ -39,12 +40,11 @@ echo
39
40
  # Collect all unique models into a simple list
40
41
  ALL_MODELS=""
41
42
  for model in $LLM_RUN $LLM_RUN_FALLBACK $LLM_IDEATE $LLM_IDEATE_FALLBACK; do
42
- # Check if model already in list
43
43
  if ! echo "$ALL_MODELS" | grep -q -w "$model"; then
44
44
  ALL_MODELS="$ALL_MODELS $model"
45
45
  fi
46
46
  done
47
- ALL_MODELS=$(echo "$ALL_MODELS" | xargs) # Trim whitespace
47
+ ALL_MODELS=$(echo "$ALL_MODELS" | xargs)
48
48
 
49
49
  echo "Models to test: $ALL_MODELS"
50
50
  echo
@@ -55,78 +55,147 @@ FAILED=0
55
55
  FAILED_MODELS=""
56
56
  TIMEOUT_MODELS=""
57
57
 
58
- # Test each model - inline commands to avoid subshell issues
59
- for model in $ALL_MODELS; do
60
- echo -n "Testing $model... "
61
-
62
- START_TIME=$(date +%s)
58
+ # Test a single model by writing a tiny test script and running it
59
+ test_model() {
60
+ local model="$1"
61
+ local prompt="$2"
62
+ local outfile="$3"
63
+ local max_wait="$4"
63
64
 
64
- # Run directly, write to temp file to avoid subshell
65
- set +e
65
+ # Write a self-contained test script
66
+ local test_script=$(mktemp)
66
67
  case "$model" in
67
68
  opus|sonnet|haiku)
68
- timeout -k 5 $TEST_TIMEOUT claude --dangerously-skip-permissions --mcp-config '' --model "$model" -p "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
69
- EXIT_CODE=$?
69
+ cat > "$test_script" << SCRIPT
70
+ #!/usr/bin/env bash
71
+ exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
72
+ SCRIPT
70
73
  ;;
71
- opus-think|sonnet-think)
72
- base_model="${model%-think}"
73
- timeout -k 5 $TEST_TIMEOUT claude --dangerously-skip-permissions --mcp-config '' --model "$base_model" -p "ultrathink
74
+ opus-think)
75
+ cat > "$test_script" << 'SCRIPT'
76
+ #!/usr/bin/env bash
77
+ exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
74
78
 
75
- $TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
76
- EXIT_CODE=$?
79
+ $1"
80
+ SCRIPT
81
+ ;;
82
+ sonnet-think)
83
+ cat > "$test_script" << 'SCRIPT'
84
+ #!/usr/bin/env bash
85
+ exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
86
+
87
+ $1"
88
+ SCRIPT
77
89
  ;;
78
90
  gemini-pro)
79
- timeout -k 5 $TEST_TIMEOUT gemini -y -m gemini-3-pro-preview -p "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
80
- EXIT_CODE=$?
91
+ cat > "$test_script" << 'SCRIPT'
92
+ #!/usr/bin/env bash
93
+ exec gemini -y -m gemini-3-pro-preview -p "$1"
94
+ SCRIPT
81
95
  ;;
82
96
  gemini-flash|gemini-3-flash)
83
- timeout -k 5 $TEST_TIMEOUT gemini -y -m gemini-2.5-flash -p "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
84
- EXIT_CODE=$?
97
+ cat > "$test_script" << 'SCRIPT'
98
+ #!/usr/bin/env bash
99
+ exec gemini -y -m gemini-2.5-flash -p "$1"
100
+ SCRIPT
85
101
  ;;
86
102
  kimi-coder)
87
- timeout -k 5 $TEST_TIMEOUT kimi --print -y -m kimi-for-coding -c "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
88
- EXIT_CODE=$?
103
+ cat > "$test_script" << 'SCRIPT'
104
+ #!/usr/bin/env bash
105
+ exec kimi --print -y -m kimi-for-coding -c "$1"
106
+ SCRIPT
89
107
  ;;
90
108
  kimi-k2.5)
91
- timeout -k 5 $TEST_TIMEOUT opencode -m openrouter/moonshotai/kimi-k2.5 run "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
92
- EXIT_CODE=$?
109
+ cat > "$test_script" << 'SCRIPT'
110
+ #!/usr/bin/env bash
111
+ exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
112
+ SCRIPT
93
113
  ;;
94
114
  glm-5)
95
- timeout -k 5 $TEST_TIMEOUT opencode -m openrouter/z-ai/glm-5 run "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
96
- EXIT_CODE=$?
115
+ cat > "$test_script" << 'SCRIPT'
116
+ #!/usr/bin/env bash
117
+ exec opencode -m openrouter/z-ai/glm-5 run "$1"
118
+ SCRIPT
97
119
  ;;
98
120
  glm-5-zai)
99
- timeout -k 5 $TEST_TIMEOUT opencode -m zai-coding-plan/glm-5 run "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
100
- EXIT_CODE=$?
121
+ cat > "$test_script" << 'SCRIPT'
122
+ #!/usr/bin/env bash
123
+ exec opencode -m zai-coding-plan/glm-5 run "$1"
124
+ SCRIPT
101
125
  ;;
102
126
  glm-zai)
103
- timeout -k 5 $TEST_TIMEOUT opencode -m zai-coding-plan/glm-4.7 run "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
104
- EXIT_CODE=$?
127
+ cat > "$test_script" << 'SCRIPT'
128
+ #!/usr/bin/env bash
129
+ exec opencode -m zai-coding-plan/glm-4.7 run "$1"
130
+ SCRIPT
105
131
  ;;
106
132
  codex-oss-local)
107
- timeout -k 5 $TEST_TIMEOUT codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
108
- EXIT_CODE=$?
133
+ cat > "$test_script" << 'SCRIPT'
134
+ #!/usr/bin/env bash
135
+ exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
136
+ SCRIPT
109
137
  ;;
110
138
  gpt5|gpt5high)
111
- timeout -k 5 $TEST_TIMEOUT codex exec --dangerously-bypass-approvals-and-sandbox "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
112
- EXIT_CODE=$?
139
+ cat > "$test_script" << 'SCRIPT'
140
+ #!/usr/bin/env bash
141
+ exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
142
+ SCRIPT
113
143
  ;;
114
144
  qwen)
115
- timeout -k 5 $TEST_TIMEOUT opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
116
- EXIT_CODE=$?
145
+ cat > "$test_script" << 'SCRIPT'
146
+ #!/usr/bin/env bash
147
+ exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
148
+ SCRIPT
117
149
  ;;
118
150
  *)
119
- echo "Unknown model: $model" > "$TEMP_OUTPUT"
120
- EXIT_CODE=1
151
+ echo "Unknown model: $model" > "$outfile"
152
+ rm -f "$test_script"
153
+ return 1
121
154
  ;;
122
155
  esac
156
+ chmod +x "$test_script"
157
+
158
+ # Run the test script in background, capture output to file
159
+ "$test_script" "$prompt" > "$outfile" 2>&1 &
160
+ local pid=$!
161
+
162
+ # Poll for completion
163
+ local elapsed=0
164
+ while kill -0 "$pid" 2>/dev/null; do
165
+ if [[ $elapsed -ge $max_wait ]]; then
166
+ kill "$pid" 2>/dev/null
167
+ sleep 1
168
+ kill -9 "$pid" 2>/dev/null
169
+ wait "$pid" 2>/dev/null
170
+ rm -f "$test_script"
171
+ return 124
172
+ fi
173
+ sleep 1
174
+ ((elapsed++))
175
+ done
176
+
177
+ wait "$pid"
178
+ local rc=$?
179
+ rm -f "$test_script"
180
+ return $rc
181
+ }
182
+
183
+ # Test each model
184
+ for model in $ALL_MODELS; do
185
+ echo -n "Testing $model... "
186
+
187
+ START_TIME=$(date +%s)
188
+
189
+ set +e
190
+ test_model "$model" "$TEST_PROMPT" "$TEMP_OUTPUT" "$TEST_TIMEOUT"
191
+ EXIT_CODE=$?
123
192
  set -e
124
193
 
125
194
  END_TIME=$(date +%s)
126
195
  DURATION=$((END_TIME - START_TIME))
127
196
 
128
- # Read output from temp file
129
- OUTPUT=$(cat "$TEMP_OUTPUT")
197
+ # Read output
198
+ OUTPUT=$(cat "$TEMP_OUTPUT" 2>/dev/null || true)
130
199
  OUTPUT_LEN=${#OUTPUT}
131
200
 
132
201
  # Check result
@@ -134,7 +203,7 @@ $TEST_PROMPT" > "$TEMP_OUTPUT" 2>&1
134
203
  echo -e "${GREEN}✓ OK${NC} (${DURATION}s, ${OUTPUT_LEN} chars)"
135
204
  ((PASSED++))
136
205
  elif [[ $EXIT_CODE -eq 124 ]]; then
137
- echo -e "${RED}✗ TIMEOUT${NC} (${TEST_TIMEOUT}s - likely permission dialog or hanging)"
206
+ echo -e "${RED}✗ TIMEOUT${NC} (${TEST_TIMEOUT}s)"
138
207
  TIMEOUT_MODELS="$TIMEOUT_MODELS $model"
139
208
  ((FAILED++))
140
209
  else
package/lib/config.sh CHANGED
@@ -61,18 +61,19 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
61
61
  # Primary: Strong models used in normal operation
62
62
  # Fallback: Cheap/backup models used only when primary tier exhausted
63
63
  #
64
- # Run: GLM-5 and Qwen are primary models for code generation
64
+ # Run: Primary models for code generation
65
+ # Sonnet 4.6: 79.6% SWE-bench, $3/$15 per M tokens
65
66
  # GLM-5: 744B MoE, $0.80/M tokens, 77.8% SWE-bench
66
67
  # Qwen: hybrid linear attention + sparse MoE, strong reasoning
67
- DEFAULT_LLM_RUN="glm-5 glm-5 qwen kimi-k2.5 kimi-k2.5"
68
- DEFAULT_LLM_RUN_FALLBACK="gemini-3-flash codex-oss-local haiku"
68
+ DEFAULT_LLM_RUN="sonnet glm-5 glm-5 qwen kimi-k2.5"
69
+ DEFAULT_LLM_RUN_FALLBACK="gemini-3-flash codex-oss-local"
69
70
  #
70
71
  # Ideate: Only agentic models that can edit files reliably
71
72
  # AIDEV-NOTE: Ideation REQUIRES file editing - non-agentic models (opencode run, codex) return text
72
73
  # but don't actually edit files. Only use claude/gemini CLI, cursor-agent, or zai-coding-plan models.
73
74
  # OpenRouter models (via opencode) are chat-only and CANNOT edit files for ideation.
74
75
  DEFAULT_LLM_IDEATE="opus-think sonnet-think glm-5-zai gemini-pro kimi-coder"
75
- DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-zai haiku"
76
+ DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-5-zai"
76
77
 
77
78
  # Load configuration from a YAML file and update variables
78
79
  _load_yaml_config() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.11.12",
3
+ "version": "1.11.13",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",