claude-evolve 1.11.19 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-check +9 -147
- package/lib/ai-cli.sh +88 -104
- package/lib/ai_cli.py +24 -11
- package/lib/config.sh +5 -15
- package/lib/evolve_worker.py +1 -1
- package/lib/llm_bandit.py +1 -1
- package/package.json +1 -1
package/bin/claude-evolve-check
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
# claude-evolve-check - Health check for AI model configurations
|
|
3
3
|
# Tests all configured AI models to verify they're working before starting evolution runs
|
|
4
4
|
#
|
|
5
|
-
# AIDEV-NOTE:
|
|
6
|
-
#
|
|
5
|
+
# AIDEV-NOTE: Uses call_ai_model_configured from ai-cli.sh directly instead of
|
|
6
|
+
# maintaining a duplicate case statement. Single source of truth for model commands.
|
|
7
7
|
|
|
8
8
|
set -e
|
|
9
9
|
|
|
@@ -18,9 +18,10 @@ NC='\033[0m' # No Color
|
|
|
18
18
|
SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
|
|
19
19
|
LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
|
|
20
20
|
|
|
21
|
-
# Source configuration to get model lists
|
|
21
|
+
# Source configuration and ai-cli to get model lists and call function
|
|
22
22
|
source "$LIB_DIR/config.sh"
|
|
23
23
|
load_config
|
|
24
|
+
source "$LIB_DIR/ai-cli.sh"
|
|
24
25
|
|
|
25
26
|
# Test timeout in seconds
|
|
26
27
|
TEST_TIMEOUT=30
|
|
@@ -30,8 +31,7 @@ TEST_PROMPT="Say hello in exactly 3 words."
|
|
|
30
31
|
|
|
31
32
|
# Temp files
|
|
32
33
|
TEMP_OUTPUT=$(mktemp)
|
|
33
|
-
|
|
34
|
-
trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
|
|
34
|
+
trap "rm -f $TEMP_OUTPUT" EXIT
|
|
35
35
|
|
|
36
36
|
echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
|
|
37
37
|
echo "============================================"
|
|
@@ -55,150 +55,15 @@ FAILED=0
|
|
|
55
55
|
FAILED_MODELS=""
|
|
56
56
|
TIMEOUT_MODELS=""
|
|
57
57
|
|
|
58
|
-
# Test a single model
|
|
58
|
+
# Test a single model using call_ai_model_configured from ai-cli.sh
|
|
59
59
|
test_model() {
|
|
60
60
|
local model="$1"
|
|
61
61
|
local prompt="$2"
|
|
62
62
|
local outfile="$3"
|
|
63
63
|
local max_wait="$4"
|
|
64
64
|
|
|
65
|
-
#
|
|
66
|
-
|
|
67
|
-
case "$model" in
|
|
68
|
-
opus|sonnet|haiku)
|
|
69
|
-
cat > "$test_script" << SCRIPT
|
|
70
|
-
#!/usr/bin/env bash
|
|
71
|
-
exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
|
|
72
|
-
SCRIPT
|
|
73
|
-
;;
|
|
74
|
-
opus-think)
|
|
75
|
-
cat > "$test_script" << 'SCRIPT'
|
|
76
|
-
#!/usr/bin/env bash
|
|
77
|
-
exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
|
|
78
|
-
|
|
79
|
-
$1"
|
|
80
|
-
SCRIPT
|
|
81
|
-
;;
|
|
82
|
-
sonnet-think)
|
|
83
|
-
cat > "$test_script" << 'SCRIPT'
|
|
84
|
-
#!/usr/bin/env bash
|
|
85
|
-
exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
|
|
86
|
-
|
|
87
|
-
$1"
|
|
88
|
-
SCRIPT
|
|
89
|
-
;;
|
|
90
|
-
gemini-pro)
|
|
91
|
-
cat > "$test_script" << 'SCRIPT'
|
|
92
|
-
#!/usr/bin/env bash
|
|
93
|
-
exec gemini -y -m auto-gemini-3 -p "$1"
|
|
94
|
-
SCRIPT
|
|
95
|
-
;;
|
|
96
|
-
gemini-flash|gemini-3-flash)
|
|
97
|
-
cat > "$test_script" << 'SCRIPT'
|
|
98
|
-
#!/usr/bin/env bash
|
|
99
|
-
exec gemini -y -m gemini-2.5-flash -p "$1"
|
|
100
|
-
SCRIPT
|
|
101
|
-
;;
|
|
102
|
-
kimi-coder)
|
|
103
|
-
cat > "$test_script" << 'SCRIPT'
|
|
104
|
-
#!/usr/bin/env bash
|
|
105
|
-
exec kimi --print -y -m kimi-for-coding -c "$1"
|
|
106
|
-
SCRIPT
|
|
107
|
-
;;
|
|
108
|
-
kimi-k2.5)
|
|
109
|
-
cat > "$test_script" << 'SCRIPT'
|
|
110
|
-
#!/usr/bin/env bash
|
|
111
|
-
exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
|
|
112
|
-
SCRIPT
|
|
113
|
-
;;
|
|
114
|
-
glm-5)
|
|
115
|
-
cat > "$test_script" << 'SCRIPT'
|
|
116
|
-
#!/usr/bin/env bash
|
|
117
|
-
exec opencode -m openrouter/z-ai/glm-5 run "$1"
|
|
118
|
-
SCRIPT
|
|
119
|
-
;;
|
|
120
|
-
glm-5-zai)
|
|
121
|
-
cat > "$test_script" << 'SCRIPT'
|
|
122
|
-
#!/usr/bin/env bash
|
|
123
|
-
exec opencode -m zai-coding-plan/glm-5 run "$1"
|
|
124
|
-
SCRIPT
|
|
125
|
-
;;
|
|
126
|
-
glm-zai)
|
|
127
|
-
cat > "$test_script" << 'SCRIPT'
|
|
128
|
-
#!/usr/bin/env bash
|
|
129
|
-
exec opencode -m zai-coding-plan/glm-4.7 run "$1"
|
|
130
|
-
SCRIPT
|
|
131
|
-
;;
|
|
132
|
-
codex-oss-local)
|
|
133
|
-
cat > "$test_script" << 'SCRIPT'
|
|
134
|
-
#!/usr/bin/env bash
|
|
135
|
-
exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
|
|
136
|
-
SCRIPT
|
|
137
|
-
;;
|
|
138
|
-
gpt5|gpt5high)
|
|
139
|
-
cat > "$test_script" << 'SCRIPT'
|
|
140
|
-
#!/usr/bin/env bash
|
|
141
|
-
exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
|
|
142
|
-
SCRIPT
|
|
143
|
-
;;
|
|
144
|
-
codex-think)
|
|
145
|
-
cat > "$test_script" << 'SCRIPT'
|
|
146
|
-
#!/usr/bin/env bash
|
|
147
|
-
exec codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$1"
|
|
148
|
-
SCRIPT
|
|
149
|
-
;;
|
|
150
|
-
codex-coding)
|
|
151
|
-
cat > "$test_script" << 'SCRIPT'
|
|
152
|
-
#!/usr/bin/env bash
|
|
153
|
-
exec codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$1"
|
|
154
|
-
SCRIPT
|
|
155
|
-
;;
|
|
156
|
-
gpt-5.4)
|
|
157
|
-
cat > "$test_script" << 'SCRIPT'
|
|
158
|
-
#!/usr/bin/env bash
|
|
159
|
-
exec codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$1"
|
|
160
|
-
SCRIPT
|
|
161
|
-
;;
|
|
162
|
-
gpt-5.2)
|
|
163
|
-
cat > "$test_script" << 'SCRIPT'
|
|
164
|
-
#!/usr/bin/env bash
|
|
165
|
-
exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
|
|
166
|
-
SCRIPT
|
|
167
|
-
;;
|
|
168
|
-
gpt-5.3-codex)
|
|
169
|
-
cat > "$test_script" << 'SCRIPT'
|
|
170
|
-
#!/usr/bin/env bash
|
|
171
|
-
exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
|
|
172
|
-
SCRIPT
|
|
173
|
-
;;
|
|
174
|
-
codex-spark|gpt-5.1-codex-mini)
|
|
175
|
-
cat > "$test_script" << 'SCRIPT'
|
|
176
|
-
#!/usr/bin/env bash
|
|
177
|
-
exec codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$1"
|
|
178
|
-
SCRIPT
|
|
179
|
-
;;
|
|
180
|
-
gemini-5-flash)
|
|
181
|
-
cat > "$test_script" << 'SCRIPT'
|
|
182
|
-
#!/usr/bin/env bash
|
|
183
|
-
exec gemini -y -m gemini-3-flash-preview -p "$1"
|
|
184
|
-
SCRIPT
|
|
185
|
-
;;
|
|
186
|
-
qwen-openrouter)
|
|
187
|
-
cat > "$test_script" << 'SCRIPT'
|
|
188
|
-
#!/usr/bin/env bash
|
|
189
|
-
exec opencode -m openrouter/qwen/qwen3.6-plus:free run "$1"
|
|
190
|
-
SCRIPT
|
|
191
|
-
;;
|
|
192
|
-
*)
|
|
193
|
-
echo "Unknown model: $model" > "$outfile"
|
|
194
|
-
rm -f "$test_script"
|
|
195
|
-
return 1
|
|
196
|
-
;;
|
|
197
|
-
esac
|
|
198
|
-
chmod +x "$test_script"
|
|
199
|
-
|
|
200
|
-
# Run the test script in background, capture output to file
|
|
201
|
-
"$test_script" "$prompt" > "$outfile" 2>&1 &
|
|
65
|
+
# Run call_ai_model_configured in background, capture output to file
|
|
66
|
+
call_ai_model_configured "$model" "$prompt" > "$outfile" 2>&1 &
|
|
202
67
|
local pid=$!
|
|
203
68
|
|
|
204
69
|
# Poll for completion
|
|
@@ -209,7 +74,6 @@ SCRIPT
|
|
|
209
74
|
sleep 1
|
|
210
75
|
kill -9 "$pid" 2>/dev/null
|
|
211
76
|
wait "$pid" 2>/dev/null
|
|
212
|
-
rm -f "$test_script"
|
|
213
77
|
return 124
|
|
214
78
|
fi
|
|
215
79
|
sleep 1
|
|
@@ -217,9 +81,7 @@ SCRIPT
|
|
|
217
81
|
done
|
|
218
82
|
|
|
219
83
|
wait "$pid"
|
|
220
|
-
|
|
221
|
-
rm -f "$test_script"
|
|
222
|
-
return $rc
|
|
84
|
+
return $?
|
|
223
85
|
}
|
|
224
86
|
|
|
225
87
|
# Test each model
|
package/lib/ai-cli.sh
CHANGED
|
@@ -53,13 +53,16 @@ EOF
|
|
|
53
53
|
call_ai_model_configured() {
|
|
54
54
|
local model_name="$1"
|
|
55
55
|
local prompt="$2"
|
|
56
|
-
local
|
|
56
|
+
local codex_gpt_model="${CODEX_GPT_MODEL:-${CODEX_GPT5_MODEL:-gpt-5.2}}"
|
|
57
57
|
|
|
58
58
|
# Record start time
|
|
59
59
|
local start_time=$(date +%s)
|
|
60
60
|
|
|
61
61
|
# Build command directly based on model
|
|
62
|
+
# AIDEV-NOTE: Model names are role-based, never versioned. When upgrading a model,
|
|
63
|
+
# update the model ID in the command below, not the case label.
|
|
62
64
|
case "$model_name" in
|
|
65
|
+
# --- Claude (subscription) ---
|
|
63
66
|
opus)
|
|
64
67
|
local ai_output
|
|
65
68
|
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$prompt" 2>&1)
|
|
@@ -70,219 +73,200 @@ call_ai_model_configured() {
|
|
|
70
73
|
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$prompt" 2>&1)
|
|
71
74
|
local ai_exit_code=$?
|
|
72
75
|
;;
|
|
73
|
-
|
|
76
|
+
haiku)
|
|
74
77
|
local ai_output
|
|
75
|
-
|
|
76
|
-
# AIDEV-NOTE: Extended thinking can take long for complex ideation
|
|
77
|
-
local think_prompt="ultrathink
|
|
78
|
-
|
|
79
|
-
$prompt"
|
|
80
|
-
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
|
|
78
|
+
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
|
|
81
79
|
local ai_exit_code=$?
|
|
82
80
|
;;
|
|
83
81
|
opus-think)
|
|
84
82
|
local ai_output
|
|
85
|
-
# Use extended thinking with opus - prepend ultrathink instruction
|
|
86
|
-
# AIDEV-NOTE: Extended thinking can take long for complex ideation
|
|
87
83
|
local think_prompt="ultrathink
|
|
88
84
|
|
|
89
85
|
$prompt"
|
|
90
86
|
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
|
|
91
87
|
local ai_exit_code=$?
|
|
92
88
|
;;
|
|
93
|
-
|
|
89
|
+
sonnet-think)
|
|
94
90
|
local ai_output
|
|
95
|
-
|
|
91
|
+
local think_prompt="ultrathink
|
|
92
|
+
|
|
93
|
+
$prompt"
|
|
94
|
+
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
|
|
96
95
|
local ai_exit_code=$?
|
|
97
96
|
;;
|
|
98
|
-
|
|
97
|
+
opus-openrouter)
|
|
99
98
|
local ai_output
|
|
100
|
-
ai_output=$(
|
|
99
|
+
ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.7 run "$prompt" 2>&1)
|
|
101
100
|
local ai_exit_code=$?
|
|
102
101
|
;;
|
|
103
|
-
|
|
102
|
+
cursor-sonnet)
|
|
104
103
|
local ai_output
|
|
105
|
-
ai_output=$(
|
|
104
|
+
ai_output=$(cursor-agent sonnet-4.6 -p "$prompt" 2>&1)
|
|
106
105
|
local ai_exit_code=$?
|
|
107
106
|
;;
|
|
108
|
-
|
|
107
|
+
cursor-opus)
|
|
109
108
|
local ai_output
|
|
110
|
-
|
|
111
|
-
ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
109
|
+
ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
|
|
112
110
|
local ai_exit_code=$?
|
|
113
111
|
;;
|
|
114
|
-
|
|
112
|
+
# --- Codex/GPT (subscription) ---
|
|
113
|
+
gpt)
|
|
115
114
|
local ai_output
|
|
116
|
-
|
|
117
|
-
ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
115
|
+
ai_output=$(codex exec -m "$codex_gpt_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
118
116
|
local ai_exit_code=$?
|
|
119
117
|
;;
|
|
120
|
-
gpt-
|
|
118
|
+
gpt-high)
|
|
121
119
|
local ai_output
|
|
122
|
-
|
|
123
|
-
ai_output=$(codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
120
|
+
ai_output=$(codex exec -m "$codex_gpt_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
124
121
|
local ai_exit_code=$?
|
|
125
122
|
;;
|
|
126
|
-
|
|
123
|
+
codex-think)
|
|
127
124
|
local ai_output
|
|
128
|
-
#
|
|
129
|
-
ai_output=$(codex exec -m gpt-5.
|
|
125
|
+
# High reasoning - for ideation tasks requiring deep thinking
|
|
126
|
+
ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
130
127
|
local ai_exit_code=$?
|
|
131
128
|
;;
|
|
132
|
-
|
|
129
|
+
codex-coding)
|
|
133
130
|
local ai_output
|
|
134
|
-
#
|
|
135
|
-
ai_output=$(codex exec -m gpt-5.
|
|
131
|
+
# Medium reasoning - for coding/implementation tasks
|
|
132
|
+
ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
136
133
|
local ai_exit_code=$?
|
|
137
134
|
;;
|
|
138
|
-
codex-spark
|
|
135
|
+
codex-spark)
|
|
139
136
|
local ai_output
|
|
140
|
-
#
|
|
137
|
+
# Cheap/fast lightweight fallback
|
|
141
138
|
ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
142
139
|
local ai_exit_code=$?
|
|
143
140
|
;;
|
|
144
|
-
|
|
145
|
-
local ai_output
|
|
146
|
-
ai_output=$(codex exec -m o3-mini -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
147
|
-
local ai_exit_code=$?
|
|
148
|
-
;;
|
|
141
|
+
# --- Gemini (subscription) ---
|
|
149
142
|
gemini-pro)
|
|
150
143
|
local ai_output
|
|
151
|
-
#
|
|
144
|
+
# Auto-routing to best Gemini model - streams output while working
|
|
152
145
|
ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
|
|
153
146
|
local ai_exit_code=$?
|
|
154
147
|
;;
|
|
155
148
|
gemini-flash)
|
|
156
149
|
local ai_output
|
|
157
|
-
# Gemini streams output while working
|
|
158
150
|
ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
|
|
159
151
|
local ai_exit_code=$?
|
|
160
152
|
;;
|
|
161
|
-
gemini-
|
|
153
|
+
gemini-cheap)
|
|
162
154
|
local ai_output
|
|
163
|
-
#
|
|
155
|
+
# Fast cheap fallback via gemini CLI
|
|
164
156
|
ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
|
|
165
157
|
local ai_exit_code=$?
|
|
166
158
|
;;
|
|
167
|
-
gemini-
|
|
159
|
+
gemini-pro-openrouter)
|
|
168
160
|
local ai_output
|
|
169
|
-
# Gemini
|
|
161
|
+
# Gemini Pro via OpenRouter - EXPENSIVE
|
|
170
162
|
ai_output=$(opencode -m openrouter/google/gemini-3-pro-preview run "$prompt" 2>&1)
|
|
171
163
|
local ai_exit_code=$?
|
|
172
164
|
;;
|
|
173
|
-
|
|
165
|
+
# --- GLM / Z.AI ---
|
|
166
|
+
glm)
|
|
174
167
|
local ai_output
|
|
175
|
-
#
|
|
176
|
-
ai_output=$(opencode -m openrouter/
|
|
168
|
+
# Latest GLM flagship via OpenRouter
|
|
169
|
+
ai_output=$(opencode -m openrouter/z-ai/glm-5.1 run "$prompt" 2>&1)
|
|
177
170
|
local ai_exit_code=$?
|
|
178
171
|
;;
|
|
179
|
-
|
|
172
|
+
glm-zai)
|
|
180
173
|
local ai_output
|
|
181
|
-
|
|
174
|
+
# Latest GLM via Z.AI agentic mode (may lag OpenRouter by one version)
|
|
175
|
+
ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
|
|
182
176
|
local ai_exit_code=$?
|
|
183
177
|
;;
|
|
184
|
-
|
|
178
|
+
# --- Qwen / Alibaba ---
|
|
179
|
+
qwen)
|
|
185
180
|
local ai_output
|
|
186
|
-
|
|
181
|
+
# Latest Qwen flagship via OpenRouter
|
|
182
|
+
ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus run "$prompt" 2>&1)
|
|
187
183
|
local ai_exit_code=$?
|
|
188
184
|
;;
|
|
189
|
-
|
|
185
|
+
qwen-coder)
|
|
190
186
|
local ai_output
|
|
191
|
-
|
|
187
|
+
# Qwen coding specialist - large MoE
|
|
188
|
+
ai_output=$(opencode -m openrouter/qwen/qwen3-coder run "$prompt" 2>&1)
|
|
192
189
|
local ai_exit_code=$?
|
|
193
190
|
;;
|
|
194
|
-
|
|
191
|
+
# --- DeepSeek ---
|
|
192
|
+
deepseek)
|
|
195
193
|
local ai_output
|
|
196
|
-
#
|
|
197
|
-
|
|
198
|
-
ai_output=$(opencode -m openrouter/z-ai/glm-5 run "$prompt" 2>&1)
|
|
194
|
+
# Latest DeepSeek via OpenRouter
|
|
195
|
+
ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
|
|
199
196
|
local ai_exit_code=$?
|
|
200
197
|
;;
|
|
201
|
-
|
|
202
|
-
# GLM 4.7 via Z.AI agentic mode -- can be slow sometimes
|
|
198
|
+
deepseek-local)
|
|
203
199
|
local ai_output
|
|
204
|
-
|
|
200
|
+
# DeepSeek via Codex CLI with Ollama cloud backend
|
|
201
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
|
|
205
202
|
local ai_exit_code=$?
|
|
206
203
|
;;
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
# 744B MoE, strong reasoning, can edit files
|
|
204
|
+
# --- Kimi / Moonshot ---
|
|
205
|
+
kimi-coder)
|
|
210
206
|
local ai_output
|
|
211
|
-
|
|
207
|
+
# Kimi coding model via kimi CLI
|
|
208
|
+
ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
|
|
212
209
|
local ai_exit_code=$?
|
|
213
210
|
;;
|
|
214
|
-
|
|
211
|
+
kimi-think)
|
|
215
212
|
local ai_output
|
|
216
|
-
|
|
213
|
+
# Kimi thinking via kimi CLI
|
|
214
|
+
ai_output=$(kimi --print -c "$prompt" 2>&1)
|
|
217
215
|
local ai_exit_code=$?
|
|
218
216
|
;;
|
|
219
|
-
|
|
217
|
+
kimi-openrouter)
|
|
220
218
|
local ai_output
|
|
221
|
-
|
|
219
|
+
# Latest Kimi via OpenRouter
|
|
220
|
+
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
|
|
222
221
|
local ai_exit_code=$?
|
|
223
222
|
;;
|
|
224
|
-
|
|
223
|
+
# --- Grok / xAI ---
|
|
224
|
+
grok)
|
|
225
225
|
local ai_output
|
|
226
|
-
#
|
|
226
|
+
# Latest Grok via OpenRouter - EXPENSIVE
|
|
227
227
|
ai_output=$(opencode -m openrouter/x-ai/grok-4 run "$prompt" 2>&1)
|
|
228
228
|
local ai_exit_code=$?
|
|
229
229
|
;;
|
|
230
|
-
grok-
|
|
230
|
+
grok-fast)
|
|
231
231
|
local ai_output
|
|
232
|
-
# Grok
|
|
232
|
+
# Grok fast variant - close to full quality, much cheaper
|
|
233
233
|
ai_output=$(opencode -m openrouter/x-ai/grok-4.1-fast run "$prompt" 2>&1)
|
|
234
234
|
local ai_exit_code=$?
|
|
235
235
|
;;
|
|
236
|
-
|
|
236
|
+
# --- MiniMax ---
|
|
237
|
+
minimax)
|
|
237
238
|
local ai_output
|
|
238
|
-
|
|
239
|
+
# Latest MiniMax reasoning model via OpenRouter
|
|
240
|
+
ai_output=$(opencode -m openrouter/minimax/minimax-m2.7 run "$prompt" 2>&1)
|
|
239
241
|
local ai_exit_code=$?
|
|
240
242
|
;;
|
|
241
|
-
|
|
243
|
+
# --- Ollama cloud models (flat-rate subscription) ---
|
|
244
|
+
ollama-glm)
|
|
242
245
|
local ai_output
|
|
243
|
-
|
|
244
|
-
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2-thinking run "$prompt" 2>&1)
|
|
246
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m glm-5.1:cloud "$prompt" 2>&1)
|
|
245
247
|
local ai_exit_code=$?
|
|
246
248
|
;;
|
|
247
|
-
|
|
249
|
+
ollama-gemma)
|
|
248
250
|
local ai_output
|
|
249
|
-
|
|
250
|
-
ai_output=$(kimi --print -c "$prompt" 2>&1)
|
|
251
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m gemma4:31b-cloud "$prompt" 2>&1)
|
|
251
252
|
local ai_exit_code=$?
|
|
252
253
|
;;
|
|
253
|
-
|
|
254
|
+
ollama-minimax)
|
|
254
255
|
local ai_output
|
|
255
|
-
|
|
256
|
-
# Use --print to see agent actions while still allowing file modifications
|
|
257
|
-
ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
|
|
256
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m minimax-m2.7:cloud "$prompt" 2>&1)
|
|
258
257
|
local ai_exit_code=$?
|
|
259
258
|
;;
|
|
260
|
-
|
|
259
|
+
ollama-qwen)
|
|
261
260
|
local ai_output
|
|
262
|
-
|
|
263
|
-
# Native multimodal agentic model, stronger than GLM-4.7
|
|
264
|
-
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
|
|
261
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m qwen3.6:cloud "$prompt" 2>&1)
|
|
265
262
|
local ai_exit_code=$?
|
|
266
263
|
;;
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
# Qwen latest - Alibaba's flagship model (currently qwen3.6-plus, free promotional tier)
|
|
270
|
-
# Linear attention + sparse MoE, strong multimodal capabilities
|
|
271
|
-
ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus:free run "$prompt" 2>&1)
|
|
272
|
-
local ai_exit_code=$?
|
|
273
|
-
;;
|
|
274
|
-
codex-oss-local)
|
|
275
|
-
# Codex-OSS via Codex CLI with Ollama backend
|
|
264
|
+
# --- Local inference ---
|
|
265
|
+
codex-local)
|
|
276
266
|
local ai_output
|
|
277
267
|
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$prompt" 2>&1)
|
|
278
268
|
local ai_exit_code=$?
|
|
279
269
|
;;
|
|
280
|
-
deepseek-v3-llamacloud)
|
|
281
|
-
# Deepseek via Codex CLI with Ollama cloud backend
|
|
282
|
-
local ai_output
|
|
283
|
-
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
|
|
284
|
-
local ai_exit_code=$?
|
|
285
|
-
;;
|
|
286
270
|
esac
|
|
287
271
|
|
|
288
272
|
# Debug: log model and prompt size
|
|
@@ -344,7 +328,7 @@ clean_ai_output() {
|
|
|
344
328
|
local model_name="$2"
|
|
345
329
|
|
|
346
330
|
# Handle codex-specific output format
|
|
347
|
-
if [[ "$model_name" == "codex" || "$model_name" == "
|
|
331
|
+
if [[ "$model_name" == "codex" || "$model_name" == "gpt" || "$model_name" == "gpt-high" ]]; then
|
|
348
332
|
# Clean codex output - extract content between "codex" marker and "tokens used"
|
|
349
333
|
if echo "$output" | grep -q "^\[.*\] codex$"; then
|
|
350
334
|
# Extract content between "codex" line and "tokens used" line
|
package/lib/ai_cli.py
CHANGED
|
@@ -241,20 +241,33 @@ def get_fallback_models_for_command(command: str) -> List[str]:
|
|
|
241
241
|
# in bash, because the bash `timeout` command causes claude CLI (and sometimes
|
|
242
242
|
# gemini CLI) to hang when called from nested subprocess contexts.
|
|
243
243
|
MODEL_TIMEOUTS = {
|
|
244
|
-
# Claude
|
|
244
|
+
# Claude - 5 min standard, 30 min thinking
|
|
245
245
|
'opus': 300, 'sonnet': 300, 'haiku': 300,
|
|
246
246
|
'opus-think': 1800, 'sonnet-think': 1800,
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
'
|
|
250
|
-
# Codex/OpenAI models - 10 min standard
|
|
247
|
+
'opus-openrouter': 600, 'cursor-sonnet': 300, 'cursor-opus': 300,
|
|
248
|
+
# Codex/GPT - 10 min standard
|
|
249
|
+
'gpt': 600, 'gpt-high': 900,
|
|
251
250
|
'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
|
|
252
|
-
|
|
253
|
-
'
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
251
|
+
# Gemini - 30 min for pro (streams), 10 min for flash
|
|
252
|
+
'gemini-pro': 1800, 'gemini-flash': 1200,
|
|
253
|
+
'gemini-cheap': 600, 'gemini-pro-openrouter': 1800,
|
|
254
|
+
# GLM / Z.AI - 30 min (Z.AI can be slow)
|
|
255
|
+
'glm': 600, 'glm-zai': 1800,
|
|
256
|
+
# Qwen
|
|
257
|
+
'qwen': 600, 'qwen-coder': 1200,
|
|
258
|
+
# DeepSeek
|
|
259
|
+
'deepseek': 600, 'deepseek-local': 2400,
|
|
260
|
+
# Kimi
|
|
261
|
+
'kimi-coder': 600, 'kimi-think': 900, 'kimi-openrouter': 600,
|
|
262
|
+
# Grok
|
|
263
|
+
'grok': 600, 'grok-fast': 600,
|
|
264
|
+
# MiniMax
|
|
265
|
+
'minimax': 600,
|
|
266
|
+
# Ollama cloud
|
|
267
|
+
'ollama-glm': 1200, 'ollama-gemma': 1200,
|
|
268
|
+
'ollama-minimax': 1200, 'ollama-qwen': 1200,
|
|
269
|
+
# Local inference
|
|
270
|
+
'codex-local': 2400,
|
|
258
271
|
}
|
|
259
272
|
DEFAULT_MODEL_TIMEOUT = 600 # 10 minutes for everything else
|
|
260
273
|
|
package/lib/config.sh
CHANGED
|
@@ -63,13 +63,14 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
|
|
|
63
63
|
#
|
|
64
64
|
# Run: Subscription-based agentic models for code generation
|
|
65
65
|
# All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
# Ollama cloud models are flat-rate (subscription), so prefer them over per-token OpenRouter
|
|
67
|
+
DEFAULT_LLM_RUN="gemini-pro gemini-pro ollama-glm ollama-glm ollama-qwen ollama-qwen ollama-minimax ollama-minimax ollama-gemma ollama-gemma kimi-coder kimi-coder codex-coding codex-coding glm-zai qwen-coder minimax sonnet"
|
|
68
|
+
DEFAULT_LLM_RUN_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
|
|
68
69
|
#
|
|
69
70
|
# Ideate: Agentic models that can edit files for ideation
|
|
70
71
|
# All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
|
|
71
|
-
DEFAULT_LLM_IDEATE="opus-think glm-
|
|
72
|
-
DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-
|
|
72
|
+
DEFAULT_LLM_IDEATE="opus-think ollama-glm ollama-glm gemini-pro ollama-qwen ollama-minimax ollama-gemma kimi-coder gpt codex-think glm-zai qwen-coder minimax qwen"
|
|
73
|
+
DEFAULT_LLM_IDEATE_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
|
|
73
74
|
|
|
74
75
|
# Load configuration from a YAML file and update variables
|
|
75
76
|
_load_yaml_config() {
|
|
@@ -316,17 +317,6 @@ show_config() {
|
|
|
316
317
|
echo " Max retries: $MAX_RETRIES"
|
|
317
318
|
echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
|
|
318
319
|
echo " Worker max candidates: $WORKER_MAX_CANDIDATES"
|
|
319
|
-
echo " LLM configuration:"
|
|
320
|
-
# Show LLM configurations using dynamic variable names
|
|
321
|
-
for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
|
|
322
|
-
var_name="LLM_CLI_${model}"
|
|
323
|
-
var_value=$(eval echo "\$$var_name")
|
|
324
|
-
if [[ -n "$var_value" ]]; then
|
|
325
|
-
# Convert underscore back to dash for display
|
|
326
|
-
display_name=$(echo "$model" | sed 's/_/-/g')
|
|
327
|
-
echo " $display_name: $var_value"
|
|
328
|
-
fi
|
|
329
|
-
done
|
|
330
320
|
echo " LLM for run: $LLM_RUN"
|
|
331
321
|
echo " LLM for run (fallback): $LLM_RUN_FALLBACK"
|
|
332
322
|
echo " LLM for ideate: $LLM_IDEATE"
|
package/lib/evolve_worker.py
CHANGED
|
@@ -164,7 +164,7 @@ Important: Make meaningful changes that match the description. Don't just add co
|
|
|
164
164
|
|
|
165
165
|
IMPORTANT: If you need to read Python (.py) or CSV files, read them in chunks using offset and limit parameters to avoid context overload
|
|
166
166
|
Example: Read(file_path='evolution_gen01-001.py', offset=0, limit=100) then Read(offset=100, limit=100), etc.
|
|
167
|
-
This is especially important for models with smaller context windows
|
|
167
|
+
This is especially important for models with smaller context windows.
|
|
168
168
|
|
|
169
169
|
CRITICAL: If you do not know how to implement what was asked for, or if the requested change is unclear or not feasible, you MUST refuse to make any changes. DO NOT modify the code if you are uncertain about the implementation. Simply respond that you cannot implement the requested change and explain why. It is better to refuse than to make incorrect or random changes."""
|
|
170
170
|
|
package/lib/llm_bandit.py
CHANGED
|
@@ -356,7 +356,7 @@ if __name__ == "__main__":
|
|
|
356
356
|
# Test the bandit
|
|
357
357
|
print("Testing LLM Bandit...")
|
|
358
358
|
|
|
359
|
-
models = ["opus", "sonnet", "gemini-pro", "
|
|
359
|
+
models = ["opus", "sonnet", "gemini-pro", "gpt"]
|
|
360
360
|
bandit = LLMBandit(models, state_file="/tmp/test_bandit.json")
|
|
361
361
|
|
|
362
362
|
# Simulate some runs
|