claude-evolve 1.11.18 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -0
- package/bin/claude-evolve-check +9 -135
- package/lib/__pycache__/ai_cli.cpython-310.pyc +0 -0
- package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
- package/lib/__pycache__/embedding.cpython-310.pyc +0 -0
- package/lib/__pycache__/embedding.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-310.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-310.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
- package/lib/__pycache__/log.cpython-310.pyc +0 -0
- package/lib/ai-cli.sh +93 -97
- package/lib/ai_cli.py +25 -11
- package/lib/config.py +0 -0
- package/lib/config.sh +5 -15
- package/lib/csv-lock.sh +0 -0
- package/lib/editor.sh +0 -0
- package/lib/evolution_csv.py +0 -0
- package/lib/evolution_processor.py +0 -0
- package/lib/evolve_ideate.py +0 -0
- package/lib/evolve_worker.py +1 -1
- package/lib/llm_bandit.py +1 -1
- package/lib/log.py +0 -0
- package/lib/meta_learning.py +0 -0
- package/lib/sandbox.sb +0 -0
- package/lib/sandbox_wrapper.py +0 -0
- package/package.json +1 -1
- package/templates/BRIEF.md +0 -0
- package/templates/algorithm.py +0 -0
- package/templates/config.yaml +9 -7
- package/templates/evaluator.py +0 -0
- package/lib/__pycache__/ai_cli.cpython-311.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-311.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-313.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-311.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
- package/lib/__pycache__/llm_bandit.cpython-314.pyc +0 -0
- package/lib/__pycache__/log.cpython-311.pyc +0 -0
- package/lib/__pycache__/log.cpython-314.pyc +0 -0
- package/lib/__pycache__/meta_learning.cpython-314.pyc +0 -0
- package/lib/__pycache__/sandbox_wrapper.cpython-314.pyc +0 -0
package/README.md
CHANGED
|
File without changes
|
package/bin/claude-evolve-check
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
# claude-evolve-check - Health check for AI model configurations
|
|
3
3
|
# Tests all configured AI models to verify they're working before starting evolution runs
|
|
4
4
|
#
|
|
5
|
-
# AIDEV-NOTE:
|
|
6
|
-
#
|
|
5
|
+
# AIDEV-NOTE: Uses call_ai_model_configured from ai-cli.sh directly instead of
|
|
6
|
+
# maintaining a duplicate case statement. Single source of truth for model commands.
|
|
7
7
|
|
|
8
8
|
set -e
|
|
9
9
|
|
|
@@ -18,9 +18,10 @@ NC='\033[0m' # No Color
|
|
|
18
18
|
SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
|
|
19
19
|
LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
|
|
20
20
|
|
|
21
|
-
# Source configuration to get model lists
|
|
21
|
+
# Source configuration and ai-cli to get model lists and call function
|
|
22
22
|
source "$LIB_DIR/config.sh"
|
|
23
23
|
load_config
|
|
24
|
+
source "$LIB_DIR/ai-cli.sh"
|
|
24
25
|
|
|
25
26
|
# Test timeout in seconds
|
|
26
27
|
TEST_TIMEOUT=30
|
|
@@ -30,8 +31,7 @@ TEST_PROMPT="Say hello in exactly 3 words."
|
|
|
30
31
|
|
|
31
32
|
# Temp files
|
|
32
33
|
TEMP_OUTPUT=$(mktemp)
|
|
33
|
-
|
|
34
|
-
trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
|
|
34
|
+
trap "rm -f $TEMP_OUTPUT" EXIT
|
|
35
35
|
|
|
36
36
|
echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
|
|
37
37
|
echo "============================================"
|
|
@@ -55,138 +55,15 @@ FAILED=0
|
|
|
55
55
|
FAILED_MODELS=""
|
|
56
56
|
TIMEOUT_MODELS=""
|
|
57
57
|
|
|
58
|
-
# Test a single model
|
|
58
|
+
# Test a single model using call_ai_model_configured from ai-cli.sh
|
|
59
59
|
test_model() {
|
|
60
60
|
local model="$1"
|
|
61
61
|
local prompt="$2"
|
|
62
62
|
local outfile="$3"
|
|
63
63
|
local max_wait="$4"
|
|
64
64
|
|
|
65
|
-
#
|
|
66
|
-
|
|
67
|
-
case "$model" in
|
|
68
|
-
opus|sonnet|haiku)
|
|
69
|
-
cat > "$test_script" << SCRIPT
|
|
70
|
-
#!/usr/bin/env bash
|
|
71
|
-
exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
|
|
72
|
-
SCRIPT
|
|
73
|
-
;;
|
|
74
|
-
opus-think)
|
|
75
|
-
cat > "$test_script" << 'SCRIPT'
|
|
76
|
-
#!/usr/bin/env bash
|
|
77
|
-
exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
|
|
78
|
-
|
|
79
|
-
$1"
|
|
80
|
-
SCRIPT
|
|
81
|
-
;;
|
|
82
|
-
sonnet-think)
|
|
83
|
-
cat > "$test_script" << 'SCRIPT'
|
|
84
|
-
#!/usr/bin/env bash
|
|
85
|
-
exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
|
|
86
|
-
|
|
87
|
-
$1"
|
|
88
|
-
SCRIPT
|
|
89
|
-
;;
|
|
90
|
-
gemini-pro)
|
|
91
|
-
cat > "$test_script" << 'SCRIPT'
|
|
92
|
-
#!/usr/bin/env bash
|
|
93
|
-
exec gemini -y -m gemini-3-pro-preview -p "$1"
|
|
94
|
-
SCRIPT
|
|
95
|
-
;;
|
|
96
|
-
gemini-flash|gemini-3-flash)
|
|
97
|
-
cat > "$test_script" << 'SCRIPT'
|
|
98
|
-
#!/usr/bin/env bash
|
|
99
|
-
exec gemini -y -m gemini-2.5-flash -p "$1"
|
|
100
|
-
SCRIPT
|
|
101
|
-
;;
|
|
102
|
-
kimi-coder)
|
|
103
|
-
cat > "$test_script" << 'SCRIPT'
|
|
104
|
-
#!/usr/bin/env bash
|
|
105
|
-
exec kimi --print -y -m kimi-for-coding -c "$1"
|
|
106
|
-
SCRIPT
|
|
107
|
-
;;
|
|
108
|
-
kimi-k2.5)
|
|
109
|
-
cat > "$test_script" << 'SCRIPT'
|
|
110
|
-
#!/usr/bin/env bash
|
|
111
|
-
exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
|
|
112
|
-
SCRIPT
|
|
113
|
-
;;
|
|
114
|
-
glm-5)
|
|
115
|
-
cat > "$test_script" << 'SCRIPT'
|
|
116
|
-
#!/usr/bin/env bash
|
|
117
|
-
exec opencode -m openrouter/z-ai/glm-5 run "$1"
|
|
118
|
-
SCRIPT
|
|
119
|
-
;;
|
|
120
|
-
glm-5-zai)
|
|
121
|
-
cat > "$test_script" << 'SCRIPT'
|
|
122
|
-
#!/usr/bin/env bash
|
|
123
|
-
exec opencode -m zai-coding-plan/glm-5 run "$1"
|
|
124
|
-
SCRIPT
|
|
125
|
-
;;
|
|
126
|
-
glm-zai)
|
|
127
|
-
cat > "$test_script" << 'SCRIPT'
|
|
128
|
-
#!/usr/bin/env bash
|
|
129
|
-
exec opencode -m zai-coding-plan/glm-4.7 run "$1"
|
|
130
|
-
SCRIPT
|
|
131
|
-
;;
|
|
132
|
-
codex-oss-local)
|
|
133
|
-
cat > "$test_script" << 'SCRIPT'
|
|
134
|
-
#!/usr/bin/env bash
|
|
135
|
-
exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
|
|
136
|
-
SCRIPT
|
|
137
|
-
;;
|
|
138
|
-
gpt5|gpt5high)
|
|
139
|
-
cat > "$test_script" << 'SCRIPT'
|
|
140
|
-
#!/usr/bin/env bash
|
|
141
|
-
exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
|
|
142
|
-
SCRIPT
|
|
143
|
-
;;
|
|
144
|
-
gpt-5-codex)
|
|
145
|
-
cat > "$test_script" << 'SCRIPT'
|
|
146
|
-
#!/usr/bin/env bash
|
|
147
|
-
exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
|
|
148
|
-
SCRIPT
|
|
149
|
-
;;
|
|
150
|
-
gpt-5.2)
|
|
151
|
-
cat > "$test_script" << 'SCRIPT'
|
|
152
|
-
#!/usr/bin/env bash
|
|
153
|
-
exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
|
|
154
|
-
SCRIPT
|
|
155
|
-
;;
|
|
156
|
-
gpt-5.3-codex)
|
|
157
|
-
cat > "$test_script" << 'SCRIPT'
|
|
158
|
-
#!/usr/bin/env bash
|
|
159
|
-
exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
|
|
160
|
-
SCRIPT
|
|
161
|
-
;;
|
|
162
|
-
gpt-5.3-codex-spark)
|
|
163
|
-
cat > "$test_script" << 'SCRIPT'
|
|
164
|
-
#!/usr/bin/env bash
|
|
165
|
-
exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
|
|
166
|
-
SCRIPT
|
|
167
|
-
;;
|
|
168
|
-
gemini-5-flash)
|
|
169
|
-
cat > "$test_script" << 'SCRIPT'
|
|
170
|
-
#!/usr/bin/env bash
|
|
171
|
-
exec gemini -y -m gemini-5-flash -p "$1"
|
|
172
|
-
SCRIPT
|
|
173
|
-
;;
|
|
174
|
-
qwen-openrouter)
|
|
175
|
-
cat > "$test_script" << 'SCRIPT'
|
|
176
|
-
#!/usr/bin/env bash
|
|
177
|
-
exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
|
|
178
|
-
SCRIPT
|
|
179
|
-
;;
|
|
180
|
-
*)
|
|
181
|
-
echo "Unknown model: $model" > "$outfile"
|
|
182
|
-
rm -f "$test_script"
|
|
183
|
-
return 1
|
|
184
|
-
;;
|
|
185
|
-
esac
|
|
186
|
-
chmod +x "$test_script"
|
|
187
|
-
|
|
188
|
-
# Run the test script in background, capture output to file
|
|
189
|
-
"$test_script" "$prompt" > "$outfile" 2>&1 &
|
|
65
|
+
# Run call_ai_model_configured in background, capture output to file
|
|
66
|
+
call_ai_model_configured "$model" "$prompt" > "$outfile" 2>&1 &
|
|
190
67
|
local pid=$!
|
|
191
68
|
|
|
192
69
|
# Poll for completion
|
|
@@ -197,7 +74,6 @@ SCRIPT
|
|
|
197
74
|
sleep 1
|
|
198
75
|
kill -9 "$pid" 2>/dev/null
|
|
199
76
|
wait "$pid" 2>/dev/null
|
|
200
|
-
rm -f "$test_script"
|
|
201
77
|
return 124
|
|
202
78
|
fi
|
|
203
79
|
sleep 1
|
|
@@ -205,9 +81,7 @@ SCRIPT
|
|
|
205
81
|
done
|
|
206
82
|
|
|
207
83
|
wait "$pid"
|
|
208
|
-
|
|
209
|
-
rm -f "$test_script"
|
|
210
|
-
return $rc
|
|
84
|
+
return $?
|
|
211
85
|
}
|
|
212
86
|
|
|
213
87
|
# Test each model
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/ai-cli.sh
CHANGED
|
@@ -53,13 +53,16 @@ EOF
|
|
|
53
53
|
call_ai_model_configured() {
|
|
54
54
|
local model_name="$1"
|
|
55
55
|
local prompt="$2"
|
|
56
|
-
local
|
|
56
|
+
local codex_gpt_model="${CODEX_GPT_MODEL:-${CODEX_GPT5_MODEL:-gpt-5.2}}"
|
|
57
57
|
|
|
58
58
|
# Record start time
|
|
59
59
|
local start_time=$(date +%s)
|
|
60
60
|
|
|
61
61
|
# Build command directly based on model
|
|
62
|
+
# AIDEV-NOTE: Model names are role-based, never versioned. When upgrading a model,
|
|
63
|
+
# update the model ID in the command below, not the case label.
|
|
62
64
|
case "$model_name" in
|
|
65
|
+
# --- Claude (subscription) ---
|
|
63
66
|
opus)
|
|
64
67
|
local ai_output
|
|
65
68
|
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$prompt" 2>&1)
|
|
@@ -70,207 +73,200 @@ call_ai_model_configured() {
|
|
|
70
73
|
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$prompt" 2>&1)
|
|
71
74
|
local ai_exit_code=$?
|
|
72
75
|
;;
|
|
73
|
-
|
|
76
|
+
haiku)
|
|
77
|
+
local ai_output
|
|
78
|
+
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
|
|
79
|
+
local ai_exit_code=$?
|
|
80
|
+
;;
|
|
81
|
+
opus-think)
|
|
74
82
|
local ai_output
|
|
75
|
-
# Use extended thinking with sonnet 4.5 - prepend ultrathink instruction
|
|
76
|
-
# AIDEV-NOTE: Extended thinking can take long for complex ideation
|
|
77
83
|
local think_prompt="ultrathink
|
|
78
84
|
|
|
79
85
|
$prompt"
|
|
80
|
-
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model
|
|
86
|
+
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
|
|
81
87
|
local ai_exit_code=$?
|
|
82
88
|
;;
|
|
83
|
-
|
|
89
|
+
sonnet-think)
|
|
84
90
|
local ai_output
|
|
85
|
-
# Use extended thinking with opus - prepend ultrathink instruction
|
|
86
|
-
# AIDEV-NOTE: Extended thinking can take long for complex ideation
|
|
87
91
|
local think_prompt="ultrathink
|
|
88
92
|
|
|
89
93
|
$prompt"
|
|
90
|
-
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model
|
|
94
|
+
ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
|
|
91
95
|
local ai_exit_code=$?
|
|
92
96
|
;;
|
|
93
|
-
|
|
97
|
+
opus-openrouter)
|
|
94
98
|
local ai_output
|
|
95
|
-
ai_output=$(
|
|
99
|
+
ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.1 run "$prompt" 2>&1)
|
|
96
100
|
local ai_exit_code=$?
|
|
97
101
|
;;
|
|
98
|
-
|
|
102
|
+
cursor-sonnet)
|
|
99
103
|
local ai_output
|
|
100
|
-
ai_output=$(
|
|
104
|
+
ai_output=$(cursor-agent sonnet-4.5 -p "$prompt" 2>&1)
|
|
101
105
|
local ai_exit_code=$?
|
|
102
106
|
;;
|
|
103
|
-
|
|
107
|
+
cursor-opus)
|
|
104
108
|
local ai_output
|
|
105
|
-
ai_output=$(
|
|
109
|
+
ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
|
|
106
110
|
local ai_exit_code=$?
|
|
107
111
|
;;
|
|
108
|
-
|
|
112
|
+
# --- Codex/GPT (subscription) ---
|
|
113
|
+
gpt)
|
|
109
114
|
local ai_output
|
|
110
|
-
|
|
111
|
-
ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
115
|
+
ai_output=$(codex exec -m "$codex_gpt_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
112
116
|
local ai_exit_code=$?
|
|
113
117
|
;;
|
|
114
|
-
gpt-
|
|
118
|
+
gpt-high)
|
|
115
119
|
local ai_output
|
|
116
|
-
|
|
117
|
-
ai_output=$(codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
120
|
+
ai_output=$(codex exec -m "$codex_gpt_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
118
121
|
local ai_exit_code=$?
|
|
119
122
|
;;
|
|
120
|
-
|
|
123
|
+
codex-think)
|
|
121
124
|
local ai_output
|
|
122
|
-
#
|
|
123
|
-
ai_output=$(codex exec -m gpt-5.
|
|
125
|
+
# High reasoning - for ideation tasks requiring deep thinking
|
|
126
|
+
ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
124
127
|
local ai_exit_code=$?
|
|
125
128
|
;;
|
|
126
|
-
|
|
129
|
+
codex-coding)
|
|
127
130
|
local ai_output
|
|
128
|
-
#
|
|
129
|
-
ai_output=$(codex exec -m gpt-5.
|
|
131
|
+
# Medium reasoning - for coding/implementation tasks
|
|
132
|
+
ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
130
133
|
local ai_exit_code=$?
|
|
131
134
|
;;
|
|
132
|
-
|
|
135
|
+
codex-spark)
|
|
133
136
|
local ai_output
|
|
134
|
-
|
|
137
|
+
# Cheap/fast lightweight fallback
|
|
138
|
+
ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
135
139
|
local ai_exit_code=$?
|
|
136
140
|
;;
|
|
141
|
+
# --- Gemini (subscription) ---
|
|
137
142
|
gemini-pro)
|
|
138
143
|
local ai_output
|
|
139
|
-
# Gemini streams output while working
|
|
140
|
-
ai_output=$(gemini -y -m gemini-3
|
|
144
|
+
# Auto-routing to best Gemini model - streams output while working
|
|
145
|
+
ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
|
|
141
146
|
local ai_exit_code=$?
|
|
142
147
|
;;
|
|
143
148
|
gemini-flash)
|
|
144
149
|
local ai_output
|
|
145
|
-
# Gemini streams output while working
|
|
146
150
|
ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
|
|
147
151
|
local ai_exit_code=$?
|
|
148
152
|
;;
|
|
149
|
-
gemini-
|
|
153
|
+
gemini-cheap)
|
|
150
154
|
local ai_output
|
|
151
|
-
#
|
|
152
|
-
ai_output=$(gemini -y -m gemini-
|
|
155
|
+
# Fast cheap fallback via gemini CLI
|
|
156
|
+
ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
|
|
153
157
|
local ai_exit_code=$?
|
|
154
158
|
;;
|
|
155
|
-
gemini-
|
|
159
|
+
gemini-pro-openrouter)
|
|
156
160
|
local ai_output
|
|
157
|
-
# Gemini
|
|
161
|
+
# Gemini Pro via OpenRouter - EXPENSIVE
|
|
158
162
|
ai_output=$(opencode -m openrouter/google/gemini-3-pro-preview run "$prompt" 2>&1)
|
|
159
163
|
local ai_exit_code=$?
|
|
160
164
|
;;
|
|
161
|
-
|
|
165
|
+
# --- GLM / Z.AI ---
|
|
166
|
+
glm)
|
|
162
167
|
local ai_output
|
|
163
|
-
#
|
|
164
|
-
ai_output=$(opencode -m openrouter/
|
|
168
|
+
# Latest GLM flagship via OpenRouter
|
|
169
|
+
ai_output=$(opencode -m openrouter/z-ai/glm-5.1 run "$prompt" 2>&1)
|
|
165
170
|
local ai_exit_code=$?
|
|
166
171
|
;;
|
|
167
|
-
|
|
172
|
+
glm-zai)
|
|
168
173
|
local ai_output
|
|
169
|
-
|
|
174
|
+
# Latest GLM via Z.AI agentic mode (may lag OpenRouter by one version)
|
|
175
|
+
ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
|
|
170
176
|
local ai_exit_code=$?
|
|
171
177
|
;;
|
|
172
|
-
|
|
178
|
+
# --- Qwen / Alibaba ---
|
|
179
|
+
qwen)
|
|
173
180
|
local ai_output
|
|
174
|
-
|
|
181
|
+
# Latest Qwen flagship via OpenRouter
|
|
182
|
+
ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus run "$prompt" 2>&1)
|
|
175
183
|
local ai_exit_code=$?
|
|
176
184
|
;;
|
|
177
|
-
|
|
185
|
+
qwen-coder)
|
|
178
186
|
local ai_output
|
|
179
|
-
|
|
187
|
+
# Qwen coding specialist - large MoE
|
|
188
|
+
ai_output=$(opencode -m openrouter/qwen/qwen3-coder run "$prompt" 2>&1)
|
|
180
189
|
local ai_exit_code=$?
|
|
181
190
|
;;
|
|
182
|
-
|
|
191
|
+
# --- DeepSeek ---
|
|
192
|
+
deepseek)
|
|
183
193
|
local ai_output
|
|
184
|
-
#
|
|
185
|
-
|
|
186
|
-
ai_output=$(opencode -m openrouter/z-ai/glm-5 run "$prompt" 2>&1)
|
|
194
|
+
# Latest DeepSeek via OpenRouter
|
|
195
|
+
ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
|
|
187
196
|
local ai_exit_code=$?
|
|
188
197
|
;;
|
|
189
|
-
|
|
190
|
-
# GLM 4.7 via Z.AI agentic mode -- can be slow sometimes
|
|
198
|
+
deepseek-local)
|
|
191
199
|
local ai_output
|
|
192
|
-
|
|
200
|
+
# DeepSeek via Codex CLI with Ollama cloud backend
|
|
201
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
|
|
193
202
|
local ai_exit_code=$?
|
|
194
203
|
;;
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
# 744B MoE, strong reasoning, can edit files
|
|
204
|
+
# --- Kimi / Moonshot ---
|
|
205
|
+
kimi-coder)
|
|
198
206
|
local ai_output
|
|
199
|
-
|
|
207
|
+
# Kimi coding model via kimi CLI
|
|
208
|
+
ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
|
|
200
209
|
local ai_exit_code=$?
|
|
201
210
|
;;
|
|
202
|
-
|
|
211
|
+
kimi-think)
|
|
203
212
|
local ai_output
|
|
204
|
-
|
|
213
|
+
# Kimi thinking via kimi CLI
|
|
214
|
+
ai_output=$(kimi --print -c "$prompt" 2>&1)
|
|
205
215
|
local ai_exit_code=$?
|
|
206
216
|
;;
|
|
207
|
-
|
|
217
|
+
kimi-openrouter)
|
|
208
218
|
local ai_output
|
|
209
|
-
|
|
219
|
+
# Latest Kimi via OpenRouter
|
|
220
|
+
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
|
|
210
221
|
local ai_exit_code=$?
|
|
211
222
|
;;
|
|
212
|
-
|
|
223
|
+
# --- Grok / xAI ---
|
|
224
|
+
grok)
|
|
213
225
|
local ai_output
|
|
214
|
-
#
|
|
226
|
+
# Latest Grok via OpenRouter - EXPENSIVE
|
|
215
227
|
ai_output=$(opencode -m openrouter/x-ai/grok-4 run "$prompt" 2>&1)
|
|
216
228
|
local ai_exit_code=$?
|
|
217
229
|
;;
|
|
218
|
-
grok-
|
|
230
|
+
grok-fast)
|
|
219
231
|
local ai_output
|
|
220
|
-
# Grok
|
|
232
|
+
# Grok fast variant - close to full quality, much cheaper
|
|
221
233
|
ai_output=$(opencode -m openrouter/x-ai/grok-4.1-fast run "$prompt" 2>&1)
|
|
222
234
|
local ai_exit_code=$?
|
|
223
235
|
;;
|
|
224
|
-
|
|
236
|
+
# --- MiniMax ---
|
|
237
|
+
minimax)
|
|
225
238
|
local ai_output
|
|
226
|
-
|
|
239
|
+
# Latest MiniMax reasoning model via OpenRouter
|
|
240
|
+
ai_output=$(opencode -m openrouter/minimax/minimax-m2.7 run "$prompt" 2>&1)
|
|
227
241
|
local ai_exit_code=$?
|
|
228
242
|
;;
|
|
229
|
-
|
|
243
|
+
# --- Ollama cloud models (flat-rate subscription) ---
|
|
244
|
+
ollama-glm)
|
|
230
245
|
local ai_output
|
|
231
|
-
|
|
232
|
-
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2-thinking run "$prompt" 2>&1)
|
|
246
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m glm-5.1:cloud "$prompt" 2>&1)
|
|
233
247
|
local ai_exit_code=$?
|
|
234
248
|
;;
|
|
235
|
-
|
|
249
|
+
ollama-gemma)
|
|
236
250
|
local ai_output
|
|
237
|
-
|
|
238
|
-
ai_output=$(kimi --print -c "$prompt" 2>&1)
|
|
251
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m gemma4:31b-cloud "$prompt" 2>&1)
|
|
239
252
|
local ai_exit_code=$?
|
|
240
253
|
;;
|
|
241
|
-
|
|
254
|
+
ollama-minimax)
|
|
242
255
|
local ai_output
|
|
243
|
-
|
|
244
|
-
# Use --print to see agent actions while still allowing file modifications
|
|
245
|
-
ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
|
|
256
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m minimax-m2.7:cloud "$prompt" 2>&1)
|
|
246
257
|
local ai_exit_code=$?
|
|
247
258
|
;;
|
|
248
|
-
|
|
259
|
+
ollama-qwen)
|
|
249
260
|
local ai_output
|
|
250
|
-
|
|
251
|
-
# Native multimodal agentic model, stronger than GLM-4.7
|
|
252
|
-
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
|
|
261
|
+
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m qwen3.5:cloud "$prompt" 2>&1)
|
|
253
262
|
local ai_exit_code=$?
|
|
254
263
|
;;
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
# Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
|
|
258
|
-
# Linear attention + sparse MoE, strong multimodal capabilities
|
|
259
|
-
ai_output=$(opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
|
|
260
|
-
local ai_exit_code=$?
|
|
261
|
-
;;
|
|
262
|
-
codex-oss-local)
|
|
263
|
-
# Codex-OSS via Codex CLI with Ollama backend
|
|
264
|
+
# --- Local inference ---
|
|
265
|
+
codex-local)
|
|
264
266
|
local ai_output
|
|
265
267
|
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$prompt" 2>&1)
|
|
266
268
|
local ai_exit_code=$?
|
|
267
269
|
;;
|
|
268
|
-
deepseek-v3-llamacloud)
|
|
269
|
-
# Deepseek via Codex CLI with Ollama cloud backend
|
|
270
|
-
local ai_output
|
|
271
|
-
ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
|
|
272
|
-
local ai_exit_code=$?
|
|
273
|
-
;;
|
|
274
270
|
esac
|
|
275
271
|
|
|
276
272
|
# Debug: log model and prompt size
|
|
@@ -332,7 +328,7 @@ clean_ai_output() {
|
|
|
332
328
|
local model_name="$2"
|
|
333
329
|
|
|
334
330
|
# Handle codex-specific output format
|
|
335
|
-
if [[ "$model_name" == "codex" || "$model_name" == "
|
|
331
|
+
if [[ "$model_name" == "codex" || "$model_name" == "gpt" || "$model_name" == "gpt-high" ]]; then
|
|
336
332
|
# Clean codex output - extract content between "codex" marker and "tokens used"
|
|
337
333
|
if echo "$output" | grep -q "^\[.*\] codex$"; then
|
|
338
334
|
# Extract content between "codex" line and "tokens used" line
|
package/lib/ai_cli.py
CHANGED
|
@@ -241,19 +241,33 @@ def get_fallback_models_for_command(command: str) -> List[str]:
|
|
|
241
241
|
# in bash, because the bash `timeout` command causes claude CLI (and sometimes
|
|
242
242
|
# gemini CLI) to hang when called from nested subprocess contexts.
|
|
243
243
|
MODEL_TIMEOUTS = {
|
|
244
|
-
# Claude
|
|
244
|
+
# Claude - 5 min standard, 30 min thinking
|
|
245
245
|
'opus': 300, 'sonnet': 300, 'haiku': 300,
|
|
246
246
|
'opus-think': 1800, 'sonnet-think': 1800,
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
'
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
'
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
247
|
+
'opus-openrouter': 600, 'cursor-sonnet': 300, 'cursor-opus': 300,
|
|
248
|
+
# Codex/GPT - 10 min standard
|
|
249
|
+
'gpt': 600, 'gpt-high': 900,
|
|
250
|
+
'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
|
|
251
|
+
# Gemini - 30 min for pro (streams), 10 min for flash
|
|
252
|
+
'gemini-pro': 1800, 'gemini-flash': 1200,
|
|
253
|
+
'gemini-cheap': 600, 'gemini-pro-openrouter': 1800,
|
|
254
|
+
# GLM / Z.AI - 30 min (Z.AI can be slow)
|
|
255
|
+
'glm': 600, 'glm-zai': 1800,
|
|
256
|
+
# Qwen
|
|
257
|
+
'qwen': 600, 'qwen-coder': 1200,
|
|
258
|
+
# DeepSeek
|
|
259
|
+
'deepseek': 600, 'deepseek-local': 2400,
|
|
260
|
+
# Kimi
|
|
261
|
+
'kimi-coder': 600, 'kimi-think': 900, 'kimi-openrouter': 600,
|
|
262
|
+
# Grok
|
|
263
|
+
'grok': 600, 'grok-fast': 600,
|
|
264
|
+
# MiniMax
|
|
265
|
+
'minimax': 600,
|
|
266
|
+
# Ollama cloud
|
|
267
|
+
'ollama-glm': 1200, 'ollama-gemma': 1200,
|
|
268
|
+
'ollama-minimax': 1200, 'ollama-qwen': 1200,
|
|
269
|
+
# Local inference
|
|
270
|
+
'codex-local': 2400,
|
|
257
271
|
}
|
|
258
272
|
DEFAULT_MODEL_TIMEOUT = 600 # 10 minutes for everything else
|
|
259
273
|
|
package/lib/config.py
CHANGED
|
File without changes
|
package/lib/config.sh
CHANGED
|
@@ -63,13 +63,14 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
|
|
|
63
63
|
#
|
|
64
64
|
# Run: Subscription-based agentic models for code generation
|
|
65
65
|
# All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
# Ollama cloud models are flat-rate (subscription), so prefer them over per-token OpenRouter
|
|
67
|
+
DEFAULT_LLM_RUN="gemini-pro gemini-pro ollama-glm ollama-glm ollama-qwen ollama-qwen ollama-minimax ollama-minimax ollama-gemma ollama-gemma kimi-coder kimi-coder codex-coding codex-coding glm-zai qwen-coder minimax sonnet"
|
|
68
|
+
DEFAULT_LLM_RUN_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
|
|
68
69
|
#
|
|
69
70
|
# Ideate: Agentic models that can edit files for ideation
|
|
70
71
|
# All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
|
|
71
|
-
DEFAULT_LLM_IDEATE="opus-think glm-
|
|
72
|
-
DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-
|
|
72
|
+
DEFAULT_LLM_IDEATE="opus-think ollama-glm ollama-glm gemini-pro ollama-qwen ollama-minimax ollama-gemma kimi-coder gpt codex-think glm-zai qwen-coder minimax qwen"
|
|
73
|
+
DEFAULT_LLM_IDEATE_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
|
|
73
74
|
|
|
74
75
|
# Load configuration from a YAML file and update variables
|
|
75
76
|
_load_yaml_config() {
|
|
@@ -316,17 +317,6 @@ show_config() {
|
|
|
316
317
|
echo " Max retries: $MAX_RETRIES"
|
|
317
318
|
echo " Memory limit: ${MEMORY_LIMIT_MB}MB"
|
|
318
319
|
echo " Worker max candidates: $WORKER_MAX_CANDIDATES"
|
|
319
|
-
echo " LLM configuration:"
|
|
320
|
-
# Show LLM configurations using dynamic variable names
|
|
321
|
-
for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
|
|
322
|
-
var_name="LLM_CLI_${model}"
|
|
323
|
-
var_value=$(eval echo "\$$var_name")
|
|
324
|
-
if [[ -n "$var_value" ]]; then
|
|
325
|
-
# Convert underscore back to dash for display
|
|
326
|
-
display_name=$(echo "$model" | sed 's/_/-/g')
|
|
327
|
-
echo " $display_name: $var_value"
|
|
328
|
-
fi
|
|
329
|
-
done
|
|
330
320
|
echo " LLM for run: $LLM_RUN"
|
|
331
321
|
echo " LLM for run (fallback): $LLM_RUN_FALLBACK"
|
|
332
322
|
echo " LLM for ideate: $LLM_IDEATE"
|
package/lib/csv-lock.sh
CHANGED
|
File without changes
|
package/lib/editor.sh
CHANGED
|
File without changes
|
package/lib/evolution_csv.py
CHANGED
|
File without changes
|
|
File without changes
|
package/lib/evolve_ideate.py
CHANGED
|
File without changes
|
package/lib/evolve_worker.py
CHANGED
|
@@ -164,7 +164,7 @@ Important: Make meaningful changes that match the description. Don't just add co
|
|
|
164
164
|
|
|
165
165
|
IMPORTANT: If you need to read Python (.py) or CSV files, read them in chunks using offset and limit parameters to avoid context overload
|
|
166
166
|
Example: Read(file_path='evolution_gen01-001.py', offset=0, limit=100) then Read(offset=100, limit=100), etc.
|
|
167
|
-
This is especially important for models with smaller context windows
|
|
167
|
+
This is especially important for models with smaller context windows.
|
|
168
168
|
|
|
169
169
|
CRITICAL: If you do not know how to implement what was asked for, or if the requested change is unclear or not feasible, you MUST refuse to make any changes. DO NOT modify the code if you are uncertain about the implementation. Simply respond that you cannot implement the requested change and explain why. It is better to refuse than to make incorrect or random changes."""
|
|
170
170
|
|
package/lib/llm_bandit.py
CHANGED
|
@@ -356,7 +356,7 @@ if __name__ == "__main__":
|
|
|
356
356
|
# Test the bandit
|
|
357
357
|
print("Testing LLM Bandit...")
|
|
358
358
|
|
|
359
|
-
models = ["opus", "sonnet", "gemini-pro", "
|
|
359
|
+
models = ["opus", "sonnet", "gemini-pro", "gpt"]
|
|
360
360
|
bandit = LLMBandit(models, state_file="/tmp/test_bandit.json")
|
|
361
361
|
|
|
362
362
|
# Simulate some runs
|
package/lib/log.py
CHANGED
|
File without changes
|
package/lib/meta_learning.py
CHANGED
|
File without changes
|
package/lib/sandbox.sb
CHANGED
|
File without changes
|
package/lib/sandbox_wrapper.py
CHANGED
|
File without changes
|
package/package.json
CHANGED
package/templates/BRIEF.md
CHANGED
|
File without changes
|
package/templates/algorithm.py
CHANGED
|
File without changes
|
package/templates/config.yaml
CHANGED
|
@@ -94,10 +94,10 @@ llm_cli:
|
|
|
94
94
|
|
|
95
95
|
# Default configuration: sonnet at ~11%, rest doubled for cost savings
|
|
96
96
|
# Commented out because these change over time; uncomment to override
|
|
97
|
-
#run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder
|
|
98
|
-
#ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2
|
|
99
|
-
#run_fallback: haiku glm-5-zai gemini-5-flash
|
|
100
|
-
#ideate_fallback: haiku glm-5-zai gemini-5-flash
|
|
97
|
+
#run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet
|
|
98
|
+
#ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter
|
|
99
|
+
#run_fallback: haiku glm-5-zai gemini-5-flash codex-spark
|
|
100
|
+
#ideate_fallback: haiku glm-5-zai gemini-5-flash codex-spark
|
|
101
101
|
|
|
102
102
|
# Available models:
|
|
103
103
|
# Claude (subscription-based, watch usage limits):
|
|
@@ -108,10 +108,12 @@ llm_cli:
|
|
|
108
108
|
# - haiku: Claude Haiku via Claude CLI (cheap fallback)
|
|
109
109
|
#
|
|
110
110
|
# Codex/OpenAI (subscription-based):
|
|
111
|
-
# -
|
|
111
|
+
# - codex-think: GPT-5.4 high reasoning effort (ideation)
|
|
112
|
+
# - codex-coding: GPT-5.4 medium reasoning effort (coding/run)
|
|
113
|
+
# - codex-spark: GPT-5.1 Codex Mini (lightweight fallback)
|
|
114
|
+
# - gpt-5.4: GPT-5.4 no reasoning effort override via Codex CLI
|
|
112
115
|
# - gpt-5.2: GPT-5.2 via Codex CLI
|
|
113
|
-
# - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
|
|
114
|
-
# - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
|
|
116
|
+
# - gpt-5.3-codex: GPT-5.3 Codex (code-specialized) via Codex CLI
|
|
115
117
|
# - gpt5: GPT-5 via Codex CLI (legacy alias)
|
|
116
118
|
# - gpt5high: GPT-5 via Codex CLI (high reasoning)
|
|
117
119
|
# - o3high: O3 via Codex CLI (high reasoning)
|
package/templates/evaluator.py
CHANGED
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|