claude-evolve 1.11.19 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  # claude-evolve-check - Health check for AI model configurations
3
3
  # Tests all configured AI models to verify they're working before starting evolution runs
4
4
  #
5
- # AIDEV-NOTE: Claude CLI has issues with timeout command and subshells.
6
- # We test each model by running it directly and checking the result file.
5
+ # AIDEV-NOTE: Uses call_ai_model_configured from ai-cli.sh directly instead of
6
+ # maintaining a duplicate case statement. Single source of truth for model commands.
7
7
 
8
8
  set -e
9
9
 
@@ -18,9 +18,10 @@ NC='\033[0m' # No Color
18
18
  SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
19
19
  LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
20
20
 
21
- # Source configuration to get model lists
21
+ # Source configuration and ai-cli to get model lists and call function
22
22
  source "$LIB_DIR/config.sh"
23
23
  load_config
24
+ source "$LIB_DIR/ai-cli.sh"
24
25
 
25
26
  # Test timeout in seconds
26
27
  TEST_TIMEOUT=30
@@ -30,8 +31,7 @@ TEST_PROMPT="Say hello in exactly 3 words."
30
31
 
31
32
  # Temp files
32
33
  TEMP_OUTPUT=$(mktemp)
33
- TEMP_PID=$(mktemp)
34
- trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
34
+ trap "rm -f $TEMP_OUTPUT" EXIT
35
35
 
36
36
  echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
37
37
  echo "============================================"
@@ -55,150 +55,15 @@ FAILED=0
55
55
  FAILED_MODELS=""
56
56
  TIMEOUT_MODELS=""
57
57
 
58
- # Test a single model by writing a tiny test script and running it
58
+ # Test a single model using call_ai_model_configured from ai-cli.sh
59
59
  test_model() {
60
60
  local model="$1"
61
61
  local prompt="$2"
62
62
  local outfile="$3"
63
63
  local max_wait="$4"
64
64
 
65
- # Write a self-contained test script
66
- local test_script=$(mktemp)
67
- case "$model" in
68
- opus|sonnet|haiku)
69
- cat > "$test_script" << SCRIPT
70
- #!/usr/bin/env bash
71
- exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
72
- SCRIPT
73
- ;;
74
- opus-think)
75
- cat > "$test_script" << 'SCRIPT'
76
- #!/usr/bin/env bash
77
- exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
78
-
79
- $1"
80
- SCRIPT
81
- ;;
82
- sonnet-think)
83
- cat > "$test_script" << 'SCRIPT'
84
- #!/usr/bin/env bash
85
- exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
86
-
87
- $1"
88
- SCRIPT
89
- ;;
90
- gemini-pro)
91
- cat > "$test_script" << 'SCRIPT'
92
- #!/usr/bin/env bash
93
- exec gemini -y -m auto-gemini-3 -p "$1"
94
- SCRIPT
95
- ;;
96
- gemini-flash|gemini-3-flash)
97
- cat > "$test_script" << 'SCRIPT'
98
- #!/usr/bin/env bash
99
- exec gemini -y -m gemini-2.5-flash -p "$1"
100
- SCRIPT
101
- ;;
102
- kimi-coder)
103
- cat > "$test_script" << 'SCRIPT'
104
- #!/usr/bin/env bash
105
- exec kimi --print -y -m kimi-for-coding -c "$1"
106
- SCRIPT
107
- ;;
108
- kimi-k2.5)
109
- cat > "$test_script" << 'SCRIPT'
110
- #!/usr/bin/env bash
111
- exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
112
- SCRIPT
113
- ;;
114
- glm-5)
115
- cat > "$test_script" << 'SCRIPT'
116
- #!/usr/bin/env bash
117
- exec opencode -m openrouter/z-ai/glm-5 run "$1"
118
- SCRIPT
119
- ;;
120
- glm-5-zai)
121
- cat > "$test_script" << 'SCRIPT'
122
- #!/usr/bin/env bash
123
- exec opencode -m zai-coding-plan/glm-5 run "$1"
124
- SCRIPT
125
- ;;
126
- glm-zai)
127
- cat > "$test_script" << 'SCRIPT'
128
- #!/usr/bin/env bash
129
- exec opencode -m zai-coding-plan/glm-4.7 run "$1"
130
- SCRIPT
131
- ;;
132
- codex-oss-local)
133
- cat > "$test_script" << 'SCRIPT'
134
- #!/usr/bin/env bash
135
- exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
136
- SCRIPT
137
- ;;
138
- gpt5|gpt5high)
139
- cat > "$test_script" << 'SCRIPT'
140
- #!/usr/bin/env bash
141
- exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
142
- SCRIPT
143
- ;;
144
- codex-think)
145
- cat > "$test_script" << 'SCRIPT'
146
- #!/usr/bin/env bash
147
- exec codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$1"
148
- SCRIPT
149
- ;;
150
- codex-coding)
151
- cat > "$test_script" << 'SCRIPT'
152
- #!/usr/bin/env bash
153
- exec codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$1"
154
- SCRIPT
155
- ;;
156
- gpt-5.4)
157
- cat > "$test_script" << 'SCRIPT'
158
- #!/usr/bin/env bash
159
- exec codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$1"
160
- SCRIPT
161
- ;;
162
- gpt-5.2)
163
- cat > "$test_script" << 'SCRIPT'
164
- #!/usr/bin/env bash
165
- exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
166
- SCRIPT
167
- ;;
168
- gpt-5.3-codex)
169
- cat > "$test_script" << 'SCRIPT'
170
- #!/usr/bin/env bash
171
- exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
172
- SCRIPT
173
- ;;
174
- codex-spark|gpt-5.1-codex-mini)
175
- cat > "$test_script" << 'SCRIPT'
176
- #!/usr/bin/env bash
177
- exec codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$1"
178
- SCRIPT
179
- ;;
180
- gemini-5-flash)
181
- cat > "$test_script" << 'SCRIPT'
182
- #!/usr/bin/env bash
183
- exec gemini -y -m gemini-3-flash-preview -p "$1"
184
- SCRIPT
185
- ;;
186
- qwen-openrouter)
187
- cat > "$test_script" << 'SCRIPT'
188
- #!/usr/bin/env bash
189
- exec opencode -m openrouter/qwen/qwen3.6-plus:free run "$1"
190
- SCRIPT
191
- ;;
192
- *)
193
- echo "Unknown model: $model" > "$outfile"
194
- rm -f "$test_script"
195
- return 1
196
- ;;
197
- esac
198
- chmod +x "$test_script"
199
-
200
- # Run the test script in background, capture output to file
201
- "$test_script" "$prompt" > "$outfile" 2>&1 &
65
+ # Run call_ai_model_configured in background, capture output to file
66
+ call_ai_model_configured "$model" "$prompt" > "$outfile" 2>&1 &
202
67
  local pid=$!
203
68
 
204
69
  # Poll for completion
@@ -209,7 +74,6 @@ SCRIPT
209
74
  sleep 1
210
75
  kill -9 "$pid" 2>/dev/null
211
76
  wait "$pid" 2>/dev/null
212
- rm -f "$test_script"
213
77
  return 124
214
78
  fi
215
79
  sleep 1
@@ -217,9 +81,7 @@ SCRIPT
217
81
  done
218
82
 
219
83
  wait "$pid"
220
- local rc=$?
221
- rm -f "$test_script"
222
- return $rc
84
+ return $?
223
85
  }
224
86
 
225
87
  # Test each model
package/lib/ai-cli.sh CHANGED
@@ -53,13 +53,16 @@ EOF
53
53
  call_ai_model_configured() {
54
54
  local model_name="$1"
55
55
  local prompt="$2"
56
- local codex_gpt5_model="${CODEX_GPT5_MODEL:-gpt-5.2}"
56
+ local codex_gpt_model="${CODEX_GPT_MODEL:-${CODEX_GPT5_MODEL:-gpt-5.2}}"
57
57
 
58
58
  # Record start time
59
59
  local start_time=$(date +%s)
60
60
 
61
61
  # Build command directly based on model
62
+ # AIDEV-NOTE: Model names are role-based, never versioned. When upgrading a model,
63
+ # update the model ID in the command below, not the case label.
62
64
  case "$model_name" in
65
+ # --- Claude (subscription) ---
63
66
  opus)
64
67
  local ai_output
65
68
  ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$prompt" 2>&1)
@@ -70,219 +73,200 @@ call_ai_model_configured() {
70
73
  ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$prompt" 2>&1)
71
74
  local ai_exit_code=$?
72
75
  ;;
73
- sonnet-think)
76
+ haiku)
74
77
  local ai_output
75
- # Use extended thinking with sonnet 4.5 - prepend ultrathink instruction
76
- # AIDEV-NOTE: Extended thinking can take long for complex ideation
77
- local think_prompt="ultrathink
78
-
79
- $prompt"
80
- ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
78
+ ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
81
79
  local ai_exit_code=$?
82
80
  ;;
83
81
  opus-think)
84
82
  local ai_output
85
- # Use extended thinking with opus - prepend ultrathink instruction
86
- # AIDEV-NOTE: Extended thinking can take long for complex ideation
87
83
  local think_prompt="ultrathink
88
84
 
89
85
  $prompt"
90
86
  ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
91
87
  local ai_exit_code=$?
92
88
  ;;
93
- haiku)
89
+ sonnet-think)
94
90
  local ai_output
95
- ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
91
+ local think_prompt="ultrathink
92
+
93
+ $prompt"
94
+ ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
96
95
  local ai_exit_code=$?
97
96
  ;;
98
- gpt5high)
97
+ opus-openrouter)
99
98
  local ai_output
100
- ai_output=$(codex exec -m "$codex_gpt5_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
99
+ ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.7 run "$prompt" 2>&1)
101
100
  local ai_exit_code=$?
102
101
  ;;
103
- gpt5)
102
+ cursor-sonnet)
104
103
  local ai_output
105
- ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
104
+ ai_output=$(cursor-agent sonnet-4.6 -p "$prompt" 2>&1)
106
105
  local ai_exit_code=$?
107
106
  ;;
108
- codex-think)
107
+ cursor-opus)
109
108
  local ai_output
110
- # GPT-5.4 high reasoning - for ideation tasks requiring deep thinking
111
- ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
109
+ ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
112
110
  local ai_exit_code=$?
113
111
  ;;
114
- codex-coding)
112
+ # --- Codex/GPT (subscription) ---
113
+ gpt)
115
114
  local ai_output
116
- # GPT-5.4 medium reasoning - for coding/implementation tasks
117
- ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
115
+ ai_output=$(codex exec -m "$codex_gpt_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
118
116
  local ai_exit_code=$?
119
117
  ;;
120
- gpt-5.4)
118
+ gpt-high)
121
119
  local ai_output
122
- # GPT-5.4 - latest frontier agentic coding model via Codex CLI
123
- ai_output=$(codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
120
+ ai_output=$(codex exec -m "$codex_gpt_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
124
121
  local ai_exit_code=$?
125
122
  ;;
126
- gpt-5.2)
123
+ codex-think)
127
124
  local ai_output
128
- # GPT-5.2 via Codex CLI
129
- ai_output=$(codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
125
+ # High reasoning - for ideation tasks requiring deep thinking
126
+ ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
130
127
  local ai_exit_code=$?
131
128
  ;;
132
- gpt-5.3-codex)
129
+ codex-coding)
133
130
  local ai_output
134
- # GPT-5.3 Codex via Codex CLI
135
- ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
131
+ # Medium reasoning - for coding/implementation tasks
132
+ ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
136
133
  local ai_exit_code=$?
137
134
  ;;
138
- codex-spark|gpt-5.1-codex-mini)
135
+ codex-spark)
139
136
  local ai_output
140
- # GPT-5.1 Codex Mini - cheap/fast lightweight fallback via Codex CLI
137
+ # Cheap/fast lightweight fallback
141
138
  ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
142
139
  local ai_exit_code=$?
143
140
  ;;
144
- o3high)
145
- local ai_output
146
- ai_output=$(codex exec -m o3-mini -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
147
- local ai_exit_code=$?
148
- ;;
141
+ # --- Gemini (subscription) ---
149
142
  gemini-pro)
150
143
  local ai_output
151
- # Gemini 3 auto-routing (gemini-3.1-pro / gemini-3-flash) - streams output while working
144
+ # Auto-routing to best Gemini model - streams output while working
152
145
  ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
153
146
  local ai_exit_code=$?
154
147
  ;;
155
148
  gemini-flash)
156
149
  local ai_output
157
- # Gemini streams output while working
158
150
  ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
159
151
  local ai_exit_code=$?
160
152
  ;;
161
- gemini-5-flash)
153
+ gemini-cheap)
162
154
  local ai_output
163
- # Gemini 5 Flash - cheap fallback model
155
+ # Fast cheap fallback via gemini CLI
164
156
  ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
165
157
  local ai_exit_code=$?
166
158
  ;;
167
- gemini-3-pro-preview)
159
+ gemini-pro-openrouter)
168
160
  local ai_output
169
- # Gemini v3 Pro Preview via OpenRouter - EXPENSIVE
161
+ # Gemini Pro via OpenRouter - EXPENSIVE
170
162
  ai_output=$(opencode -m openrouter/google/gemini-3-pro-preview run "$prompt" 2>&1)
171
163
  local ai_exit_code=$?
172
164
  ;;
173
- gemini-3-flash)
165
+ # --- GLM / Z.AI ---
166
+ glm)
174
167
  local ai_output
175
- # Gemini 3 Flash - fast, cheap, strong thinker
176
- ai_output=$(opencode -m openrouter/google/gemini-3-flash-preview run "$prompt" 2>&1)
168
+ # Latest GLM flagship via OpenRouter
169
+ ai_output=$(opencode -m openrouter/z-ai/glm-5.1 run "$prompt" 2>&1)
177
170
  local ai_exit_code=$?
178
171
  ;;
179
- cursor-sonnet)
172
+ glm-zai)
180
173
  local ai_output
181
- ai_output=$(cursor-agent sonnet-4.5 -p "$prompt" 2>&1)
174
+ # Latest GLM via Z.AI agentic mode (may lag OpenRouter by one version)
175
+ ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
182
176
  local ai_exit_code=$?
183
177
  ;;
184
- cursor-opus)
178
+ # --- Qwen / Alibaba ---
179
+ qwen)
185
180
  local ai_output
186
- ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
181
+ # Latest Qwen flagship via OpenRouter
182
+ ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus run "$prompt" 2>&1)
187
183
  local ai_exit_code=$?
188
184
  ;;
189
- glm-openrouter)
185
+ qwen-coder)
190
186
  local ai_output
191
- ai_output=$(opencode -m openrouter/z-ai/glm-4.7 run "$prompt" 2>&1)
187
+ # Qwen coding specialist - large MoE
188
+ ai_output=$(opencode -m openrouter/qwen/qwen3-coder run "$prompt" 2>&1)
192
189
  local ai_exit_code=$?
193
190
  ;;
194
- glm-5)
191
+ # --- DeepSeek ---
192
+ deepseek)
195
193
  local ai_output
196
- # GLM-5: 744B MoE model, very cheap ($0.80/$2.56 per 1M tokens), 200K context
197
- # Released Feb 2026 - scores 77.8% SWE-bench, MIT license
198
- ai_output=$(opencode -m openrouter/z-ai/glm-5 run "$prompt" 2>&1)
194
+ # Latest DeepSeek via OpenRouter
195
+ ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
199
196
  local ai_exit_code=$?
200
197
  ;;
201
- glm-zai)
202
- # GLM 4.7 via Z.AI agentic mode -- can be slow sometimes
198
+ deepseek-local)
203
199
  local ai_output
204
- ai_output=$(opencode -m zai-coding-plan/glm-4.7 run "$prompt" 2>&1)
200
+ # DeepSeek via Codex CLI with Ollama cloud backend
201
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
205
202
  local ai_exit_code=$?
206
203
  ;;
207
- glm-5-zai)
208
- # GLM-5 via Z.AI agentic mode - supports file editing for ideation
209
- # 744B MoE, strong reasoning, can edit files
204
+ # --- Kimi / Moonshot ---
205
+ kimi-coder)
210
206
  local ai_output
211
- ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
207
+ # Kimi coding model via kimi CLI
208
+ ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
212
209
  local ai_exit_code=$?
213
210
  ;;
214
- deepseek-openrouter)
211
+ kimi-think)
215
212
  local ai_output
216
- ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
213
+ # Kimi thinking via kimi CLI
214
+ ai_output=$(kimi --print -c "$prompt" 2>&1)
217
215
  local ai_exit_code=$?
218
216
  ;;
219
- grok-code-fast-openrouter)
217
+ kimi-openrouter)
220
218
  local ai_output
221
- ai_output=$(opencode -m openrouter/x-ai/grok-code-fast-1 run "$prompt" 2>&1)
219
+ # Latest Kimi via OpenRouter
220
+ ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
222
221
  local ai_exit_code=$?
223
222
  ;;
224
- grok-4-openrouter)
223
+ # --- Grok / xAI ---
224
+ grok)
225
225
  local ai_output
226
- # EXPENSIVE - consider grok-4.1-fast instead
226
+ # Latest Grok via OpenRouter - EXPENSIVE
227
227
  ai_output=$(opencode -m openrouter/x-ai/grok-4 run "$prompt" 2>&1)
228
228
  local ai_exit_code=$?
229
229
  ;;
230
- grok-4.1-fast)
230
+ grok-fast)
231
231
  local ai_output
232
- # Grok 4.1 Fast - close to Grok 4 quality, much cheaper
232
+ # Grok fast variant - close to full quality, much cheaper
233
233
  ai_output=$(opencode -m openrouter/x-ai/grok-4.1-fast run "$prompt" 2>&1)
234
234
  local ai_exit_code=$?
235
235
  ;;
236
- opus-openrouter)
236
+ # --- MiniMax ---
237
+ minimax)
237
238
  local ai_output
238
- ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.1 run "$prompt" 2>&1)
239
+ # Latest MiniMax reasoning model via OpenRouter
240
+ ai_output=$(opencode -m openrouter/minimax/minimax-m2.7 run "$prompt" 2>&1)
239
241
  local ai_exit_code=$?
240
242
  ;;
241
- kimi-k2-openrouter)
243
+ # --- Ollama cloud models (flat-rate subscription) ---
244
+ ollama-glm)
242
245
  local ai_output
243
- # Kimi K2 Thinking via OpenRouter (no separate auth needed)
244
- ai_output=$(opencode -m openrouter/moonshotai/kimi-k2-thinking run "$prompt" 2>&1)
246
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m glm-5.1:cloud "$prompt" 2>&1)
245
247
  local ai_exit_code=$?
246
248
  ;;
247
- kimi-k2-think-moonshot)
249
+ ollama-gemma)
248
250
  local ai_output
249
- # Use kimi CLI directly (assumes kimi is installed and configured)
250
- ai_output=$(kimi --print -c "$prompt" 2>&1)
251
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m gemma4:31b-cloud "$prompt" 2>&1)
251
252
  local ai_exit_code=$?
252
253
  ;;
253
- kimi-coder)
254
+ ollama-minimax)
254
255
  local ai_output
255
- # Kimi for Coding model via kimi CLI (fast coding-focused model)
256
- # Use --print to see agent actions while still allowing file modifications
257
- ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
256
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m minimax-m2.7:cloud "$prompt" 2>&1)
258
257
  local ai_exit_code=$?
259
258
  ;;
260
- kimi-k2.5)
259
+ ollama-qwen)
261
260
  local ai_output
262
- # Kimi K2.5 - Moonshot's most powerful model (Jan 2025)
263
- # Native multimodal agentic model, stronger than GLM-4.7
264
- ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
261
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m qwen3.6:cloud "$prompt" 2>&1)
265
262
  local ai_exit_code=$?
266
263
  ;;
267
- qwen-openrouter)
268
- local ai_output
269
- # Qwen latest - Alibaba's flagship model (currently qwen3.6-plus, free promotional tier)
270
- # Linear attention + sparse MoE, strong multimodal capabilities
271
- ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus:free run "$prompt" 2>&1)
272
- local ai_exit_code=$?
273
- ;;
274
- codex-oss-local)
275
- # Codex-OSS via Codex CLI with Ollama backend
264
+ # --- Local inference ---
265
+ codex-local)
276
266
  local ai_output
277
267
  ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$prompt" 2>&1)
278
268
  local ai_exit_code=$?
279
269
  ;;
280
- deepseek-v3-llamacloud)
281
- # Deepseek via Codex CLI with Ollama cloud backend
282
- local ai_output
283
- ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
284
- local ai_exit_code=$?
285
- ;;
286
270
  esac
287
271
 
288
272
  # Debug: log model and prompt size
@@ -344,7 +328,7 @@ clean_ai_output() {
344
328
  local model_name="$2"
345
329
 
346
330
  # Handle codex-specific output format
347
- if [[ "$model_name" == "codex" || "$model_name" == "o3high" || "$model_name" == "gpt5high" ]]; then
331
+ if [[ "$model_name" == "codex" || "$model_name" == "gpt" || "$model_name" == "gpt-high" ]]; then
348
332
  # Clean codex output - extract content between "codex" marker and "tokens used"
349
333
  if echo "$output" | grep -q "^\[.*\] codex$"; then
350
334
  # Extract content between "codex" line and "tokens used" line
package/lib/ai_cli.py CHANGED
@@ -241,20 +241,33 @@ def get_fallback_models_for_command(command: str) -> List[str]:
241
241
  # in bash, because the bash `timeout` command causes claude CLI (and sometimes
242
242
  # gemini CLI) to hang when called from nested subprocess contexts.
243
243
  MODEL_TIMEOUTS = {
244
- # Claude models - 5 minutes for standard, 30 minutes for thinking
244
+ # Claude - 5 min standard, 30 min thinking
245
245
  'opus': 300, 'sonnet': 300, 'haiku': 300,
246
246
  'opus-think': 1800, 'sonnet-think': 1800,
247
- # Gemini - 30 min for pro (streams while working), 20 min for flash
248
- 'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
249
- 'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
250
- # Codex/OpenAI models - 10 min standard
247
+ 'opus-openrouter': 600, 'cursor-sonnet': 300, 'cursor-opus': 300,
248
+ # Codex/GPT - 10 min standard
249
+ 'gpt': 600, 'gpt-high': 900,
251
250
  'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
252
- 'gpt-5.4': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
253
- 'gpt-5.1-codex-mini': 300,
254
- # Z.AI agentic modes - 30 min (can be slow)
255
- 'glm-zai': 1800, 'glm-5-zai': 1800,
256
- # Codex local - 40 min (local inference can be slow)
257
- 'codex-oss-local': 2400,
251
+ # Gemini - 30 min for pro (streams), 10 min for flash
252
+ 'gemini-pro': 1800, 'gemini-flash': 1200,
253
+ 'gemini-cheap': 600, 'gemini-pro-openrouter': 1800,
254
+ # GLM / Z.AI - 30 min (Z.AI can be slow)
255
+ 'glm': 600, 'glm-zai': 1800,
256
+ # Qwen
257
+ 'qwen': 600, 'qwen-coder': 1200,
258
+ # DeepSeek
259
+ 'deepseek': 600, 'deepseek-local': 2400,
260
+ # Kimi
261
+ 'kimi-coder': 600, 'kimi-think': 900, 'kimi-openrouter': 600,
262
+ # Grok
263
+ 'grok': 600, 'grok-fast': 600,
264
+ # MiniMax
265
+ 'minimax': 600,
266
+ # Ollama cloud
267
+ 'ollama-glm': 1200, 'ollama-gemma': 1200,
268
+ 'ollama-minimax': 1200, 'ollama-qwen': 1200,
269
+ # Local inference
270
+ 'codex-local': 2400,
258
271
  }
259
272
  DEFAULT_MODEL_TIMEOUT = 600 # 10 minutes for everything else
260
273
 
package/lib/config.sh CHANGED
@@ -63,13 +63,14 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
63
63
  #
64
64
  # Run: Subscription-based agentic models for code generation
65
65
  # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
66
- DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet"
67
- DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash codex-spark"
66
+ # Ollama cloud models are flat-rate (subscription), so prefer them over per-token OpenRouter
67
+ DEFAULT_LLM_RUN="gemini-pro gemini-pro ollama-glm ollama-glm ollama-qwen ollama-qwen ollama-minimax ollama-minimax ollama-gemma ollama-gemma kimi-coder kimi-coder codex-coding codex-coding glm-zai qwen-coder minimax sonnet"
68
+ DEFAULT_LLM_RUN_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
68
69
  #
69
70
  # Ideate: Agentic models that can edit files for ideation
70
71
  # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
71
- DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter"
72
- DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash codex-spark"
72
+ DEFAULT_LLM_IDEATE="opus-think ollama-glm ollama-glm gemini-pro ollama-qwen ollama-minimax ollama-gemma kimi-coder gpt codex-think glm-zai qwen-coder minimax qwen"
73
+ DEFAULT_LLM_IDEATE_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
73
74
 
74
75
  # Load configuration from a YAML file and update variables
75
76
  _load_yaml_config() {
@@ -316,17 +317,6 @@ show_config() {
316
317
  echo " Max retries: $MAX_RETRIES"
317
318
  echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
318
319
  echo " Worker max candidates: $WORKER_MAX_CANDIDATES"
319
- echo " LLM configuration:"
320
- # Show LLM configurations using dynamic variable names
321
- for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
322
- var_name="LLM_CLI_${model}"
323
- var_value=$(eval echo "\$$var_name")
324
- if [[ -n "$var_value" ]]; then
325
- # Convert underscore back to dash for display
326
- display_name=$(echo "$model" | sed 's/_/-/g')
327
- echo " $display_name: $var_value"
328
- fi
329
- done
330
320
  echo " LLM for run: $LLM_RUN"
331
321
  echo " LLM for run (fallback): $LLM_RUN_FALLBACK"
332
322
  echo " LLM for ideate: $LLM_IDEATE"
@@ -164,7 +164,7 @@ Important: Make meaningful changes that match the description. Don't just add co
164
164
 
165
165
  IMPORTANT: If you need to read Python (.py) or CSV files, read them in chunks using offset and limit parameters to avoid context overload
166
166
  Example: Read(file_path='evolution_gen01-001.py', offset=0, limit=100) then Read(offset=100, limit=100), etc.
167
- This is especially important for models with smaller context windows (like GLM).
167
+ This is especially important for models with smaller context windows.
168
168
 
169
169
  CRITICAL: If you do not know how to implement what was asked for, or if the requested change is unclear or not feasible, you MUST refuse to make any changes. DO NOT modify the code if you are uncertain about the implementation. Simply respond that you cannot implement the requested change and explain why. It is better to refuse than to make incorrect or random changes."""
170
170
 
package/lib/llm_bandit.py CHANGED
@@ -356,7 +356,7 @@ if __name__ == "__main__":
356
356
  # Test the bandit
357
357
  print("Testing LLM Bandit...")
358
358
 
359
- models = ["opus", "sonnet", "gemini-pro", "gpt5"]
359
+ models = ["opus", "sonnet", "gemini-pro", "gpt"]
360
360
  bandit = LLMBandit(models, state_file="/tmp/test_bandit.json")
361
361
 
362
362
  # Simulate some runs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.11.19",
3
+ "version": "1.13.0",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",