claude-evolve 1.11.18 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +0 -0
  2. package/bin/claude-evolve-check +9 -135
  3. package/lib/__pycache__/ai_cli.cpython-310.pyc +0 -0
  4. package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
  5. package/lib/__pycache__/embedding.cpython-310.pyc +0 -0
  6. package/lib/__pycache__/embedding.cpython-314.pyc +0 -0
  7. package/lib/__pycache__/evolution_csv.cpython-310.pyc +0 -0
  8. package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
  9. package/lib/__pycache__/evolve_ideate.cpython-310.pyc +0 -0
  10. package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
  11. package/lib/__pycache__/log.cpython-310.pyc +0 -0
  12. package/lib/ai-cli.sh +93 -97
  13. package/lib/ai_cli.py +25 -11
  14. package/lib/config.py +0 -0
  15. package/lib/config.sh +5 -15
  16. package/lib/csv-lock.sh +0 -0
  17. package/lib/editor.sh +0 -0
  18. package/lib/evolution_csv.py +0 -0
  19. package/lib/evolution_processor.py +0 -0
  20. package/lib/evolve_ideate.py +0 -0
  21. package/lib/evolve_worker.py +1 -1
  22. package/lib/llm_bandit.py +1 -1
  23. package/lib/log.py +0 -0
  24. package/lib/meta_learning.py +0 -0
  25. package/lib/sandbox.sb +0 -0
  26. package/lib/sandbox_wrapper.py +0 -0
  27. package/package.json +1 -1
  28. package/templates/BRIEF.md +0 -0
  29. package/templates/algorithm.py +0 -0
  30. package/templates/config.yaml +9 -7
  31. package/templates/evaluator.py +0 -0
  32. package/lib/__pycache__/ai_cli.cpython-311.pyc +0 -0
  33. package/lib/__pycache__/evolution_csv.cpython-311.pyc +0 -0
  34. package/lib/__pycache__/evolution_csv.cpython-313.pyc +0 -0
  35. package/lib/__pycache__/evolve_run.cpython-311.pyc +0 -0
  36. package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
  37. package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
  38. package/lib/__pycache__/llm_bandit.cpython-314.pyc +0 -0
  39. package/lib/__pycache__/log.cpython-311.pyc +0 -0
  40. package/lib/__pycache__/log.cpython-314.pyc +0 -0
  41. package/lib/__pycache__/meta_learning.cpython-314.pyc +0 -0
  42. package/lib/__pycache__/sandbox_wrapper.cpython-314.pyc +0 -0
package/README.md CHANGED
File without changes
@@ -2,8 +2,8 @@
2
2
  # claude-evolve-check - Health check for AI model configurations
3
3
  # Tests all configured AI models to verify they're working before starting evolution runs
4
4
  #
5
- # AIDEV-NOTE: Claude CLI has issues with timeout command and subshells.
6
- # We test each model by running it directly and checking the result file.
5
+ # AIDEV-NOTE: Uses call_ai_model_configured from ai-cli.sh directly instead of
6
+ # maintaining a duplicate case statement. Single source of truth for model commands.
7
7
 
8
8
  set -e
9
9
 
@@ -18,9 +18,10 @@ NC='\033[0m' # No Color
18
18
  SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
19
19
  LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
20
20
 
21
- # Source configuration to get model lists
21
+ # Source configuration and ai-cli to get model lists and call function
22
22
  source "$LIB_DIR/config.sh"
23
23
  load_config
24
+ source "$LIB_DIR/ai-cli.sh"
24
25
 
25
26
  # Test timeout in seconds
26
27
  TEST_TIMEOUT=30
@@ -30,8 +31,7 @@ TEST_PROMPT="Say hello in exactly 3 words."
30
31
 
31
32
  # Temp files
32
33
  TEMP_OUTPUT=$(mktemp)
33
- TEMP_PID=$(mktemp)
34
- trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
34
+ trap "rm -f $TEMP_OUTPUT" EXIT
35
35
 
36
36
  echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
37
37
  echo "============================================"
@@ -55,138 +55,15 @@ FAILED=0
55
55
  FAILED_MODELS=""
56
56
  TIMEOUT_MODELS=""
57
57
 
58
- # Test a single model by writing a tiny test script and running it
58
+ # Test a single model using call_ai_model_configured from ai-cli.sh
59
59
  test_model() {
60
60
  local model="$1"
61
61
  local prompt="$2"
62
62
  local outfile="$3"
63
63
  local max_wait="$4"
64
64
 
65
- # Write a self-contained test script
66
- local test_script=$(mktemp)
67
- case "$model" in
68
- opus|sonnet|haiku)
69
- cat > "$test_script" << SCRIPT
70
- #!/usr/bin/env bash
71
- exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
72
- SCRIPT
73
- ;;
74
- opus-think)
75
- cat > "$test_script" << 'SCRIPT'
76
- #!/usr/bin/env bash
77
- exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
78
-
79
- $1"
80
- SCRIPT
81
- ;;
82
- sonnet-think)
83
- cat > "$test_script" << 'SCRIPT'
84
- #!/usr/bin/env bash
85
- exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
86
-
87
- $1"
88
- SCRIPT
89
- ;;
90
- gemini-pro)
91
- cat > "$test_script" << 'SCRIPT'
92
- #!/usr/bin/env bash
93
- exec gemini -y -m gemini-3-pro-preview -p "$1"
94
- SCRIPT
95
- ;;
96
- gemini-flash|gemini-3-flash)
97
- cat > "$test_script" << 'SCRIPT'
98
- #!/usr/bin/env bash
99
- exec gemini -y -m gemini-2.5-flash -p "$1"
100
- SCRIPT
101
- ;;
102
- kimi-coder)
103
- cat > "$test_script" << 'SCRIPT'
104
- #!/usr/bin/env bash
105
- exec kimi --print -y -m kimi-for-coding -c "$1"
106
- SCRIPT
107
- ;;
108
- kimi-k2.5)
109
- cat > "$test_script" << 'SCRIPT'
110
- #!/usr/bin/env bash
111
- exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
112
- SCRIPT
113
- ;;
114
- glm-5)
115
- cat > "$test_script" << 'SCRIPT'
116
- #!/usr/bin/env bash
117
- exec opencode -m openrouter/z-ai/glm-5 run "$1"
118
- SCRIPT
119
- ;;
120
- glm-5-zai)
121
- cat > "$test_script" << 'SCRIPT'
122
- #!/usr/bin/env bash
123
- exec opencode -m zai-coding-plan/glm-5 run "$1"
124
- SCRIPT
125
- ;;
126
- glm-zai)
127
- cat > "$test_script" << 'SCRIPT'
128
- #!/usr/bin/env bash
129
- exec opencode -m zai-coding-plan/glm-4.7 run "$1"
130
- SCRIPT
131
- ;;
132
- codex-oss-local)
133
- cat > "$test_script" << 'SCRIPT'
134
- #!/usr/bin/env bash
135
- exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
136
- SCRIPT
137
- ;;
138
- gpt5|gpt5high)
139
- cat > "$test_script" << 'SCRIPT'
140
- #!/usr/bin/env bash
141
- exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
142
- SCRIPT
143
- ;;
144
- gpt-5-codex)
145
- cat > "$test_script" << 'SCRIPT'
146
- #!/usr/bin/env bash
147
- exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
148
- SCRIPT
149
- ;;
150
- gpt-5.2)
151
- cat > "$test_script" << 'SCRIPT'
152
- #!/usr/bin/env bash
153
- exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
154
- SCRIPT
155
- ;;
156
- gpt-5.3-codex)
157
- cat > "$test_script" << 'SCRIPT'
158
- #!/usr/bin/env bash
159
- exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
160
- SCRIPT
161
- ;;
162
- gpt-5.3-codex-spark)
163
- cat > "$test_script" << 'SCRIPT'
164
- #!/usr/bin/env bash
165
- exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
166
- SCRIPT
167
- ;;
168
- gemini-5-flash)
169
- cat > "$test_script" << 'SCRIPT'
170
- #!/usr/bin/env bash
171
- exec gemini -y -m gemini-5-flash -p "$1"
172
- SCRIPT
173
- ;;
174
- qwen-openrouter)
175
- cat > "$test_script" << 'SCRIPT'
176
- #!/usr/bin/env bash
177
- exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
178
- SCRIPT
179
- ;;
180
- *)
181
- echo "Unknown model: $model" > "$outfile"
182
- rm -f "$test_script"
183
- return 1
184
- ;;
185
- esac
186
- chmod +x "$test_script"
187
-
188
- # Run the test script in background, capture output to file
189
- "$test_script" "$prompt" > "$outfile" 2>&1 &
65
+ # Run call_ai_model_configured in background, capture output to file
66
+ call_ai_model_configured "$model" "$prompt" > "$outfile" 2>&1 &
190
67
  local pid=$!
191
68
 
192
69
  # Poll for completion
@@ -197,7 +74,6 @@ SCRIPT
197
74
  sleep 1
198
75
  kill -9 "$pid" 2>/dev/null
199
76
  wait "$pid" 2>/dev/null
200
- rm -f "$test_script"
201
77
  return 124
202
78
  fi
203
79
  sleep 1
@@ -205,9 +81,7 @@ SCRIPT
205
81
  done
206
82
 
207
83
  wait "$pid"
208
- local rc=$?
209
- rm -f "$test_script"
210
- return $rc
84
+ return $?
211
85
  }
212
86
 
213
87
  # Test each model
package/lib/ai-cli.sh CHANGED
@@ -53,13 +53,16 @@ EOF
53
53
  call_ai_model_configured() {
54
54
  local model_name="$1"
55
55
  local prompt="$2"
56
- local codex_gpt5_model="${CODEX_GPT5_MODEL:-gpt-5.2}"
56
+ local codex_gpt_model="${CODEX_GPT_MODEL:-${CODEX_GPT5_MODEL:-gpt-5.2}}"
57
57
 
58
58
  # Record start time
59
59
  local start_time=$(date +%s)
60
60
 
61
61
  # Build command directly based on model
62
+ # AIDEV-NOTE: Model names are role-based, never versioned. When upgrading a model,
63
+ # update the model ID in the command below, not the case label.
62
64
  case "$model_name" in
65
+ # --- Claude (subscription) ---
63
66
  opus)
64
67
  local ai_output
65
68
  ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$prompt" 2>&1)
@@ -70,207 +73,200 @@ call_ai_model_configured() {
70
73
  ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$prompt" 2>&1)
71
74
  local ai_exit_code=$?
72
75
  ;;
73
- sonnet-think)
76
+ haiku)
77
+ local ai_output
78
+ ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
79
+ local ai_exit_code=$?
80
+ ;;
81
+ opus-think)
74
82
  local ai_output
75
- # Use extended thinking with sonnet 4.5 - prepend ultrathink instruction
76
- # AIDEV-NOTE: Extended thinking can take long for complex ideation
77
83
  local think_prompt="ultrathink
78
84
 
79
85
  $prompt"
80
- ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
86
+ ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
81
87
  local ai_exit_code=$?
82
88
  ;;
83
- opus-think)
89
+ sonnet-think)
84
90
  local ai_output
85
- # Use extended thinking with opus - prepend ultrathink instruction
86
- # AIDEV-NOTE: Extended thinking can take long for complex ideation
87
91
  local think_prompt="ultrathink
88
92
 
89
93
  $prompt"
90
- ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
94
+ ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
91
95
  local ai_exit_code=$?
92
96
  ;;
93
- haiku)
97
+ opus-openrouter)
94
98
  local ai_output
95
- ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
99
+ ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.1 run "$prompt" 2>&1)
96
100
  local ai_exit_code=$?
97
101
  ;;
98
- gpt5high)
102
+ cursor-sonnet)
99
103
  local ai_output
100
- ai_output=$(codex exec -m "$codex_gpt5_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
104
+ ai_output=$(cursor-agent sonnet-4.5 -p "$prompt" 2>&1)
101
105
  local ai_exit_code=$?
102
106
  ;;
103
- gpt5)
107
+ cursor-opus)
104
108
  local ai_output
105
- ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
109
+ ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
106
110
  local ai_exit_code=$?
107
111
  ;;
108
- gpt-5-codex)
112
+ # --- Codex/GPT (subscription) ---
113
+ gpt)
109
114
  local ai_output
110
- # GPT-5 Codex - code-specialized variant via Codex CLI
111
- ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
115
+ ai_output=$(codex exec -m "$codex_gpt_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
112
116
  local ai_exit_code=$?
113
117
  ;;
114
- gpt-5.2)
118
+ gpt-high)
115
119
  local ai_output
116
- # GPT-5.2 via Codex CLI
117
- ai_output=$(codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
120
+ ai_output=$(codex exec -m "$codex_gpt_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
118
121
  local ai_exit_code=$?
119
122
  ;;
120
- gpt-5.3-codex)
123
+ codex-think)
121
124
  local ai_output
122
- # GPT-5.3 Codex via Codex CLI
123
- ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
125
+ # High reasoning - for ideation tasks requiring deep thinking
126
+ ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
124
127
  local ai_exit_code=$?
125
128
  ;;
126
- gpt-5.3-codex-spark)
129
+ codex-coding)
127
130
  local ai_output
128
- # GPT-5.3 Codex Spark - lightweight fallback via Codex CLI
129
- ai_output=$(codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
131
+ # Medium reasoning - for coding/implementation tasks
132
+ ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
130
133
  local ai_exit_code=$?
131
134
  ;;
132
- o3high)
135
+ codex-spark)
133
136
  local ai_output
134
- ai_output=$(codex exec -m o3-mini -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
137
+ # Cheap/fast lightweight fallback
138
+ ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
135
139
  local ai_exit_code=$?
136
140
  ;;
141
+ # --- Gemini (subscription) ---
137
142
  gemini-pro)
138
143
  local ai_output
139
- # Gemini streams output while working
140
- ai_output=$(gemini -y -m gemini-3-pro-preview -p "$prompt" 2>&1)
144
+ # Auto-routing to best Gemini model - streams output while working
145
+ ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
141
146
  local ai_exit_code=$?
142
147
  ;;
143
148
  gemini-flash)
144
149
  local ai_output
145
- # Gemini streams output while working
146
150
  ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
147
151
  local ai_exit_code=$?
148
152
  ;;
149
- gemini-5-flash)
153
+ gemini-cheap)
150
154
  local ai_output
151
- # Gemini 5 Flash - cheap fallback model
152
- ai_output=$(gemini -y -m gemini-5-flash -p "$prompt" 2>&1)
155
+ # Fast cheap fallback via gemini CLI
156
+ ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
153
157
  local ai_exit_code=$?
154
158
  ;;
155
- gemini-3-pro-preview)
159
+ gemini-pro-openrouter)
156
160
  local ai_output
157
- # Gemini v3 Pro Preview via OpenRouter - EXPENSIVE
161
+ # Gemini Pro via OpenRouter - EXPENSIVE
158
162
  ai_output=$(opencode -m openrouter/google/gemini-3-pro-preview run "$prompt" 2>&1)
159
163
  local ai_exit_code=$?
160
164
  ;;
161
- gemini-3-flash)
165
+ # --- GLM / Z.AI ---
166
+ glm)
162
167
  local ai_output
163
- # Gemini 3 Flash - fast, cheap, strong thinker
164
- ai_output=$(opencode -m openrouter/google/gemini-3-flash-preview run "$prompt" 2>&1)
168
+ # Latest GLM flagship via OpenRouter
169
+ ai_output=$(opencode -m openrouter/z-ai/glm-5.1 run "$prompt" 2>&1)
165
170
  local ai_exit_code=$?
166
171
  ;;
167
- cursor-sonnet)
172
+ glm-zai)
168
173
  local ai_output
169
- ai_output=$(cursor-agent sonnet-4.5 -p "$prompt" 2>&1)
174
+ # Latest GLM via Z.AI agentic mode (may lag OpenRouter by one version)
175
+ ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
170
176
  local ai_exit_code=$?
171
177
  ;;
172
- cursor-opus)
178
+ # --- Qwen / Alibaba ---
179
+ qwen)
173
180
  local ai_output
174
- ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
181
+ # Latest Qwen flagship via OpenRouter
182
+ ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus run "$prompt" 2>&1)
175
183
  local ai_exit_code=$?
176
184
  ;;
177
- glm-openrouter)
185
+ qwen-coder)
178
186
  local ai_output
179
- ai_output=$(opencode -m openrouter/z-ai/glm-4.7 run "$prompt" 2>&1)
187
+ # Qwen coding specialist - large MoE
188
+ ai_output=$(opencode -m openrouter/qwen/qwen3-coder run "$prompt" 2>&1)
180
189
  local ai_exit_code=$?
181
190
  ;;
182
- glm-5)
191
+ # --- DeepSeek ---
192
+ deepseek)
183
193
  local ai_output
184
- # GLM-5: 744B MoE model, very cheap ($0.80/$2.56 per 1M tokens), 200K context
185
- # Released Feb 2026 - scores 77.8% SWE-bench, MIT license
186
- ai_output=$(opencode -m openrouter/z-ai/glm-5 run "$prompt" 2>&1)
194
+ # Latest DeepSeek via OpenRouter
195
+ ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
187
196
  local ai_exit_code=$?
188
197
  ;;
189
- glm-zai)
190
- # GLM 4.7 via Z.AI agentic mode -- can be slow sometimes
198
+ deepseek-local)
191
199
  local ai_output
192
- ai_output=$(opencode -m zai-coding-plan/glm-4.7 run "$prompt" 2>&1)
200
+ # DeepSeek via Codex CLI with Ollama cloud backend
201
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
193
202
  local ai_exit_code=$?
194
203
  ;;
195
- glm-5-zai)
196
- # GLM-5 via Z.AI agentic mode - supports file editing for ideation
197
- # 744B MoE, strong reasoning, can edit files
204
+ # --- Kimi / Moonshot ---
205
+ kimi-coder)
198
206
  local ai_output
199
- ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
207
+ # Kimi coding model via kimi CLI
208
+ ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
200
209
  local ai_exit_code=$?
201
210
  ;;
202
- deepseek-openrouter)
211
+ kimi-think)
203
212
  local ai_output
204
- ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
213
+ # Kimi thinking via kimi CLI
214
+ ai_output=$(kimi --print -c "$prompt" 2>&1)
205
215
  local ai_exit_code=$?
206
216
  ;;
207
- grok-code-fast-openrouter)
217
+ kimi-openrouter)
208
218
  local ai_output
209
- ai_output=$(opencode -m openrouter/x-ai/grok-code-fast-1 run "$prompt" 2>&1)
219
+ # Latest Kimi via OpenRouter
220
+ ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
210
221
  local ai_exit_code=$?
211
222
  ;;
212
- grok-4-openrouter)
223
+ # --- Grok / xAI ---
224
+ grok)
213
225
  local ai_output
214
- # EXPENSIVE - consider grok-4.1-fast instead
226
+ # Latest Grok via OpenRouter - EXPENSIVE
215
227
  ai_output=$(opencode -m openrouter/x-ai/grok-4 run "$prompt" 2>&1)
216
228
  local ai_exit_code=$?
217
229
  ;;
218
- grok-4.1-fast)
230
+ grok-fast)
219
231
  local ai_output
220
- # Grok 4.1 Fast - close to Grok 4 quality, much cheaper
232
+ # Grok fast variant - close to full quality, much cheaper
221
233
  ai_output=$(opencode -m openrouter/x-ai/grok-4.1-fast run "$prompt" 2>&1)
222
234
  local ai_exit_code=$?
223
235
  ;;
224
- opus-openrouter)
236
+ # --- MiniMax ---
237
+ minimax)
225
238
  local ai_output
226
- ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.1 run "$prompt" 2>&1)
239
+ # Latest MiniMax reasoning model via OpenRouter
240
+ ai_output=$(opencode -m openrouter/minimax/minimax-m2.7 run "$prompt" 2>&1)
227
241
  local ai_exit_code=$?
228
242
  ;;
229
- kimi-k2-openrouter)
243
+ # --- Ollama cloud models (flat-rate subscription) ---
244
+ ollama-glm)
230
245
  local ai_output
231
- # Kimi K2 Thinking via OpenRouter (no separate auth needed)
232
- ai_output=$(opencode -m openrouter/moonshotai/kimi-k2-thinking run "$prompt" 2>&1)
246
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m glm-5.1:cloud "$prompt" 2>&1)
233
247
  local ai_exit_code=$?
234
248
  ;;
235
- kimi-k2-think-moonshot)
249
+ ollama-gemma)
236
250
  local ai_output
237
- # Use kimi CLI directly (assumes kimi is installed and configured)
238
- ai_output=$(kimi --print -c "$prompt" 2>&1)
251
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m gemma4:31b-cloud "$prompt" 2>&1)
239
252
  local ai_exit_code=$?
240
253
  ;;
241
- kimi-coder)
254
+ ollama-minimax)
242
255
  local ai_output
243
- # Kimi for Coding model via kimi CLI (fast coding-focused model)
244
- # Use --print to see agent actions while still allowing file modifications
245
- ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
256
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m minimax-m2.7:cloud "$prompt" 2>&1)
246
257
  local ai_exit_code=$?
247
258
  ;;
248
- kimi-k2.5)
259
+ ollama-qwen)
249
260
  local ai_output
250
- # Kimi K2.5 - Moonshot's most powerful model (Jan 2025)
251
- # Native multimodal agentic model, stronger than GLM-4.7
252
- ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
261
+ ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m qwen3.5:cloud "$prompt" 2>&1)
253
262
  local ai_exit_code=$?
254
263
  ;;
255
- qwen-openrouter)
256
- local ai_output
257
- # Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
258
- # Linear attention + sparse MoE, strong multimodal capabilities
259
- ai_output=$(opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
260
- local ai_exit_code=$?
261
- ;;
262
- codex-oss-local)
263
- # Codex-OSS via Codex CLI with Ollama backend
264
+ # --- Local inference ---
265
+ codex-local)
264
266
  local ai_output
265
267
  ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$prompt" 2>&1)
266
268
  local ai_exit_code=$?
267
269
  ;;
268
- deepseek-v3-llamacloud)
269
- # Deepseek via Codex CLI with Ollama cloud backend
270
- local ai_output
271
- ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
272
- local ai_exit_code=$?
273
- ;;
274
270
  esac
275
271
 
276
272
  # Debug: log model and prompt size
@@ -332,7 +328,7 @@ clean_ai_output() {
332
328
  local model_name="$2"
333
329
 
334
330
  # Handle codex-specific output format
335
- if [[ "$model_name" == "codex" || "$model_name" == "o3high" || "$model_name" == "gpt5high" ]]; then
331
+ if [[ "$model_name" == "codex" || "$model_name" == "gpt" || "$model_name" == "gpt-high" ]]; then
336
332
  # Clean codex output - extract content between "codex" marker and "tokens used"
337
333
  if echo "$output" | grep -q "^\[.*\] codex$"; then
338
334
  # Extract content between "codex" line and "tokens used" line
package/lib/ai_cli.py CHANGED
@@ -241,19 +241,33 @@ def get_fallback_models_for_command(command: str) -> List[str]:
241
241
  # in bash, because the bash `timeout` command causes claude CLI (and sometimes
242
242
  # gemini CLI) to hang when called from nested subprocess contexts.
243
243
  MODEL_TIMEOUTS = {
244
- # Claude models - 5 minutes for standard, 30 minutes for thinking
244
+ # Claude - 5 min standard, 30 min thinking
245
245
  'opus': 300, 'sonnet': 300, 'haiku': 300,
246
246
  'opus-think': 1800, 'sonnet-think': 1800,
247
- # Gemini - 30 min for pro (streams while working), 20 min for flash
248
- 'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
249
- 'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
250
- # Codex/OpenAI models - 10 min standard
251
- 'gpt-5-codex': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
252
- 'gpt-5.3-codex-spark': 300,
253
- # Z.AI agentic modes - 30 min (can be slow)
254
- 'glm-zai': 1800, 'glm-5-zai': 1800,
255
- # Codex local - 40 min (local inference can be slow)
256
- 'codex-oss-local': 2400,
247
+ 'opus-openrouter': 600, 'cursor-sonnet': 300, 'cursor-opus': 300,
248
+ # Codex/GPT - 10 min standard
249
+ 'gpt': 600, 'gpt-high': 900,
250
+ 'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
251
+ # Gemini - 30 min for pro (streams), 10 min for flash
252
+ 'gemini-pro': 1800, 'gemini-flash': 1200,
253
+ 'gemini-cheap': 600, 'gemini-pro-openrouter': 1800,
254
+ # GLM / Z.AI - 30 min (Z.AI can be slow)
255
+ 'glm': 600, 'glm-zai': 1800,
256
+ # Qwen
257
+ 'qwen': 600, 'qwen-coder': 1200,
258
+ # DeepSeek
259
+ 'deepseek': 600, 'deepseek-local': 2400,
260
+ # Kimi
261
+ 'kimi-coder': 600, 'kimi-think': 900, 'kimi-openrouter': 600,
262
+ # Grok
263
+ 'grok': 600, 'grok-fast': 600,
264
+ # MiniMax
265
+ 'minimax': 600,
266
+ # Ollama cloud
267
+ 'ollama-glm': 1200, 'ollama-gemma': 1200,
268
+ 'ollama-minimax': 1200, 'ollama-qwen': 1200,
269
+ # Local inference
270
+ 'codex-local': 2400,
257
271
  }
258
272
  DEFAULT_MODEL_TIMEOUT = 600 # 10 minutes for everything else
259
273
 
package/lib/config.py CHANGED
File without changes
package/lib/config.sh CHANGED
@@ -63,13 +63,14 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
63
63
  #
64
64
  # Run: Subscription-based agentic models for code generation
65
65
  # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
66
- DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet"
67
- DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
66
+ # Ollama cloud models are flat-rate (subscription), so prefer them over per-token OpenRouter
67
+ DEFAULT_LLM_RUN="gemini-pro gemini-pro ollama-glm ollama-glm ollama-qwen ollama-qwen ollama-minimax ollama-minimax ollama-gemma ollama-gemma kimi-coder kimi-coder codex-coding codex-coding glm-zai qwen-coder minimax sonnet"
68
+ DEFAULT_LLM_RUN_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
68
69
  #
69
70
  # Ideate: Agentic models that can edit files for ideation
70
71
  # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
71
- DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex qwen-openrouter"
72
- DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
72
+ DEFAULT_LLM_IDEATE="opus-think ollama-glm ollama-glm gemini-pro ollama-qwen ollama-minimax ollama-gemma kimi-coder gpt codex-think glm-zai qwen-coder minimax qwen"
73
+ DEFAULT_LLM_IDEATE_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
73
74
 
74
75
  # Load configuration from a YAML file and update variables
75
76
  _load_yaml_config() {
@@ -316,17 +317,6 @@ show_config() {
316
317
  echo " Max retries: $MAX_RETRIES"
317
318
  echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
318
319
  echo " Worker max candidates: $WORKER_MAX_CANDIDATES"
319
- echo " LLM configuration:"
320
- # Show LLM configurations using dynamic variable names
321
- for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
322
- var_name="LLM_CLI_${model}"
323
- var_value=$(eval echo "\$$var_name")
324
- if [[ -n "$var_value" ]]; then
325
- # Convert underscore back to dash for display
326
- display_name=$(echo "$model" | sed 's/_/-/g')
327
- echo " $display_name: $var_value"
328
- fi
329
- done
330
320
  echo " LLM for run: $LLM_RUN"
331
321
  echo " LLM for run (fallback): $LLM_RUN_FALLBACK"
332
322
  echo " LLM for ideate: $LLM_IDEATE"
package/lib/csv-lock.sh CHANGED
File without changes
package/lib/editor.sh CHANGED
File without changes
File without changes
File without changes
File without changes
@@ -164,7 +164,7 @@ Important: Make meaningful changes that match the description. Don't just add co
164
164
 
165
165
  IMPORTANT: If you need to read Python (.py) or CSV files, read them in chunks using offset and limit parameters to avoid context overload
166
166
  Example: Read(file_path='evolution_gen01-001.py', offset=0, limit=100) then Read(offset=100, limit=100), etc.
167
- This is especially important for models with smaller context windows (like GLM).
167
+ This is especially important for models with smaller context windows.
168
168
 
169
169
  CRITICAL: If you do not know how to implement what was asked for, or if the requested change is unclear or not feasible, you MUST refuse to make any changes. DO NOT modify the code if you are uncertain about the implementation. Simply respond that you cannot implement the requested change and explain why. It is better to refuse than to make incorrect or random changes."""
170
170
 
package/lib/llm_bandit.py CHANGED
@@ -356,7 +356,7 @@ if __name__ == "__main__":
356
356
  # Test the bandit
357
357
  print("Testing LLM Bandit...")
358
358
 
359
- models = ["opus", "sonnet", "gemini-pro", "gpt5"]
359
+ models = ["opus", "sonnet", "gemini-pro", "gpt"]
360
360
  bandit = LLMBandit(models, state_file="/tmp/test_bandit.json")
361
361
 
362
362
  # Simulate some runs
package/lib/log.py CHANGED
File without changes
File without changes
package/lib/sandbox.sb CHANGED
File without changes
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.11.18",
3
+ "version": "1.12.0",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",
File without changes
File without changes
@@ -94,10 +94,10 @@ llm_cli:
94
94
 
95
95
  # Default configuration: sonnet at ~11%, rest doubled for cost savings
96
96
  # Commented out because these change over time; uncomment to override
97
- #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet
98
- #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex
99
- #run_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
100
- #ideate_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
97
+ #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet
98
+ #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter
99
+ #run_fallback: haiku glm-5-zai gemini-5-flash codex-spark
100
+ #ideate_fallback: haiku glm-5-zai gemini-5-flash codex-spark
101
101
 
102
102
  # Available models:
103
103
  # Claude (subscription-based, watch usage limits):
@@ -108,10 +108,12 @@ llm_cli:
108
108
  # - haiku: Claude Haiku via Claude CLI (cheap fallback)
109
109
  #
110
110
  # Codex/OpenAI (subscription-based):
111
- # - gpt-5-codex: GPT-5 Codex (code-specialized) via Codex CLI
111
+ # - codex-think: GPT-5.4 high reasoning effort (ideation)
112
+ # - codex-coding: GPT-5.4 medium reasoning effort (coding/run)
113
+ # - codex-spark: GPT-5.1 Codex Mini (lightweight fallback)
114
+ # - gpt-5.4: GPT-5.4 no reasoning effort override via Codex CLI
112
115
  # - gpt-5.2: GPT-5.2 via Codex CLI
113
- # - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
114
- # - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
116
+ # - gpt-5.3-codex: GPT-5.3 Codex (code-specialized) via Codex CLI
115
117
  # - gpt5: GPT-5 via Codex CLI (legacy alias)
116
118
  # - gpt5high: GPT-5 via Codex CLI (high reasoning)
117
119
  # - o3high: O3 via Codex CLI (high reasoning)
File without changes
Binary file
Binary file