claude-evolve 1.11.16 → 1.11.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-check +31 -1
- package/lib/__pycache__/ai_cli.cpython-311.pyc +0 -0
- package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
- package/lib/__pycache__/embedding.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-311.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-313.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-311.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
- package/lib/__pycache__/llm_bandit.cpython-314.pyc +0 -0
- package/lib/__pycache__/log.cpython-311.pyc +0 -0
- package/lib/__pycache__/log.cpython-314.pyc +0 -0
- package/lib/__pycache__/meta_learning.cpython-314.pyc +0 -0
- package/lib/__pycache__/sandbox_wrapper.cpython-314.pyc +0 -0
- package/lib/ai-cli.sh +33 -3
- package/lib/ai_cli.py +4 -1
- package/lib/config.sh +5 -5
- package/lib/evolve_run.py +5 -1
- package/lib/evolve_worker.py +7 -1
- package/lib/llm_bandit.py +21 -5
- package/package.json +1 -1
- package/templates/config.yaml +32 -21
- package/lib/__pycache__/ai_cli.cpython-310.pyc +0 -0
- package/lib/__pycache__/embedding.cpython-310.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-310.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-310.pyc +0 -0
- package/lib/__pycache__/log.cpython-310.pyc +0 -0
package/bin/claude-evolve-check
CHANGED
|
@@ -141,7 +141,37 @@ SCRIPT
|
|
|
141
141
|
exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
|
|
142
142
|
SCRIPT
|
|
143
143
|
;;
|
|
144
|
-
|
|
144
|
+
gpt-5-codex)
|
|
145
|
+
cat > "$test_script" << 'SCRIPT'
|
|
146
|
+
#!/usr/bin/env bash
|
|
147
|
+
exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
|
|
148
|
+
SCRIPT
|
|
149
|
+
;;
|
|
150
|
+
gpt-5.2)
|
|
151
|
+
cat > "$test_script" << 'SCRIPT'
|
|
152
|
+
#!/usr/bin/env bash
|
|
153
|
+
exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
|
|
154
|
+
SCRIPT
|
|
155
|
+
;;
|
|
156
|
+
gpt-5.3-codex)
|
|
157
|
+
cat > "$test_script" << 'SCRIPT'
|
|
158
|
+
#!/usr/bin/env bash
|
|
159
|
+
exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
|
|
160
|
+
SCRIPT
|
|
161
|
+
;;
|
|
162
|
+
gpt-5.3-codex-spark)
|
|
163
|
+
cat > "$test_script" << 'SCRIPT'
|
|
164
|
+
#!/usr/bin/env bash
|
|
165
|
+
exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
|
|
166
|
+
SCRIPT
|
|
167
|
+
;;
|
|
168
|
+
gemini-5-flash)
|
|
169
|
+
cat > "$test_script" << 'SCRIPT'
|
|
170
|
+
#!/usr/bin/env bash
|
|
171
|
+
exec gemini -y -m gemini-5-flash -p "$1"
|
|
172
|
+
SCRIPT
|
|
173
|
+
;;
|
|
174
|
+
qwen-openrouter)
|
|
145
175
|
cat > "$test_script" << 'SCRIPT'
|
|
146
176
|
#!/usr/bin/env bash
|
|
147
177
|
exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/ai-cli.sh
CHANGED
|
@@ -105,6 +105,30 @@ $prompt"
|
|
|
105
105
|
ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
106
106
|
local ai_exit_code=$?
|
|
107
107
|
;;
|
|
108
|
+
gpt-5-codex)
|
|
109
|
+
local ai_output
|
|
110
|
+
# GPT-5 Codex - code-specialized variant via Codex CLI
|
|
111
|
+
ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
112
|
+
local ai_exit_code=$?
|
|
113
|
+
;;
|
|
114
|
+
gpt-5.2)
|
|
115
|
+
local ai_output
|
|
116
|
+
# GPT-5.2 via Codex CLI
|
|
117
|
+
ai_output=$(codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
118
|
+
local ai_exit_code=$?
|
|
119
|
+
;;
|
|
120
|
+
gpt-5.3-codex)
|
|
121
|
+
local ai_output
|
|
122
|
+
# GPT-5.3 Codex via Codex CLI
|
|
123
|
+
ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
124
|
+
local ai_exit_code=$?
|
|
125
|
+
;;
|
|
126
|
+
gpt-5.3-codex-spark)
|
|
127
|
+
local ai_output
|
|
128
|
+
# GPT-5.3 Codex Spark - lightweight fallback via Codex CLI
|
|
129
|
+
ai_output=$(codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
130
|
+
local ai_exit_code=$?
|
|
131
|
+
;;
|
|
108
132
|
o3high)
|
|
109
133
|
local ai_output
|
|
110
134
|
ai_output=$(codex exec -m o3-mini -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
|
|
@@ -122,6 +146,12 @@ $prompt"
|
|
|
122
146
|
ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
|
|
123
147
|
local ai_exit_code=$?
|
|
124
148
|
;;
|
|
149
|
+
gemini-5-flash)
|
|
150
|
+
local ai_output
|
|
151
|
+
# Gemini 5 Flash - cheap fallback model
|
|
152
|
+
ai_output=$(gemini -y -m gemini-5-flash -p "$prompt" 2>&1)
|
|
153
|
+
local ai_exit_code=$?
|
|
154
|
+
;;
|
|
125
155
|
gemini-3-pro-preview)
|
|
126
156
|
local ai_output
|
|
127
157
|
# Gemini v3 Pro Preview via OpenRouter - EXPENSIVE
|
|
@@ -222,7 +252,7 @@ $prompt"
|
|
|
222
252
|
ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
|
|
223
253
|
local ai_exit_code=$?
|
|
224
254
|
;;
|
|
225
|
-
qwen)
|
|
255
|
+
qwen-openrouter)
|
|
226
256
|
local ai_output
|
|
227
257
|
# Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
|
|
228
258
|
# Linear attention + sparse MoE, strong multimodal capabilities
|
|
@@ -258,8 +288,8 @@ $prompt"
|
|
|
258
288
|
echo "[AI] Raw output from $model_name:" >&2
|
|
259
289
|
echo "----------------------------------------" >&2
|
|
260
290
|
if [[ ${#ai_output} -gt 2000 ]]; then
|
|
261
|
-
echo "$ai_output
|
|
262
|
-
echo "
|
|
291
|
+
echo "... (truncated from ${#ai_output} characters to last 50 lines) ..." >&2
|
|
292
|
+
echo "$ai_output" | tail -50 >&2
|
|
263
293
|
else
|
|
264
294
|
echo "$ai_output" >&2
|
|
265
295
|
fi
|
package/lib/ai_cli.py
CHANGED
|
@@ -246,7 +246,10 @@ MODEL_TIMEOUTS = {
|
|
|
246
246
|
'opus-think': 1800, 'sonnet-think': 1800,
|
|
247
247
|
# Gemini - 30 min for pro (streams while working), 20 min for flash
|
|
248
248
|
'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
|
|
249
|
-
'gemini-3-pro-preview': 1800,
|
|
249
|
+
'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
|
|
250
|
+
# Codex/OpenAI models - 10 min standard
|
|
251
|
+
'gpt-5-codex': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
|
|
252
|
+
'gpt-5.3-codex-spark': 300,
|
|
250
253
|
# Z.AI agentic modes - 30 min (can be slow)
|
|
251
254
|
'glm-zai': 1800, 'glm-5-zai': 1800,
|
|
252
255
|
# Codex local - 40 min (local inference can be slow)
|
package/lib/config.sh
CHANGED
|
@@ -63,13 +63,13 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
|
|
|
63
63
|
#
|
|
64
64
|
# Run: Subscription-based agentic models for code generation
|
|
65
65
|
# All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
|
|
66
|
-
DEFAULT_LLM_RUN="
|
|
67
|
-
DEFAULT_LLM_RUN_FALLBACK="codex-
|
|
66
|
+
DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet"
|
|
67
|
+
DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
|
|
68
68
|
#
|
|
69
69
|
# Ideate: Agentic models that can edit files for ideation
|
|
70
70
|
# All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
|
|
71
|
-
DEFAULT_LLM_IDEATE="opus-think
|
|
72
|
-
DEFAULT_LLM_IDEATE_FALLBACK="
|
|
71
|
+
DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex qwen-openrouter"
|
|
72
|
+
DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
|
|
73
73
|
|
|
74
74
|
# Load configuration from a YAML file and update variables
|
|
75
75
|
_load_yaml_config() {
|
|
@@ -318,7 +318,7 @@ show_config() {
|
|
|
318
318
|
echo " Worker max candidates: $WORKER_MAX_CANDIDATES"
|
|
319
319
|
echo " LLM configuration:"
|
|
320
320
|
# Show LLM configurations using dynamic variable names
|
|
321
|
-
for model in gpt5high o3high codex gemini opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
|
|
321
|
+
for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
|
|
322
322
|
var_name="LLM_CLI_${model}"
|
|
323
323
|
var_value=$(eval echo "\$$var_name")
|
|
324
324
|
if [[ -n "$var_value" ]]; then
|
package/lib/evolve_run.py
CHANGED
|
@@ -71,7 +71,11 @@ class WorkerPool:
|
|
|
71
71
|
try:
|
|
72
72
|
# Don't capture output - let it stream directly to terminal
|
|
73
73
|
# This provides real-time visibility into which models are being used
|
|
74
|
-
|
|
74
|
+
# AIDEV-NOTE: Explicitly pass stdin=DEVNULL so workers don't inherit
|
|
75
|
+
# a closed/bad stdin FD from parent (e.g. when run via nohup or after
|
|
76
|
+
# terminal disconnect). Without this, Python workers crash at startup
|
|
77
|
+
# with "OSError: [Errno 9] Bad file descriptor" on sys stream init.
|
|
78
|
+
proc = subprocess.Popen(cmd, stdin=subprocess.DEVNULL)
|
|
75
79
|
self.workers[proc.pid] = proc
|
|
76
80
|
log(f"Spawned worker {proc.pid}")
|
|
77
81
|
return proc.pid
|
package/lib/evolve_worker.py
CHANGED
|
@@ -205,9 +205,15 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
205
205
|
preview = output[-300:] if output else "(empty)"
|
|
206
206
|
log(f"Bandit model {selected_model} completed but didn't modify file ({len(output)} chars), trying fallback...")
|
|
207
207
|
log(f"AI output preview: {preview}")
|
|
208
|
+
# AIDEV-NOTE: Report no-modification as failure to bandit
|
|
209
|
+
self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
|
|
210
|
+
log(f"Bandit update: {selected_model} no file modification")
|
|
208
211
|
|
|
209
212
|
except AIError as e:
|
|
210
213
|
log(f"Bandit model {selected_model} failed: {e}, trying fallback...")
|
|
214
|
+
# AIDEV-NOTE: Report AI-level failure to bandit so it learns to avoid broken models
|
|
215
|
+
self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
|
|
216
|
+
log(f"Bandit update: {selected_model} AI call failed")
|
|
211
217
|
|
|
212
218
|
# Fallback to round-based retry with all models
|
|
213
219
|
try:
|
|
@@ -229,7 +235,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
229
235
|
return True, model
|
|
230
236
|
else:
|
|
231
237
|
# AIDEV-NOTE: Log output so we can diagnose why file wasn't modified
|
|
232
|
-
preview = output[:
|
|
238
|
+
preview = output[-300:] if output else "(empty)"
|
|
233
239
|
log(f"AI completed but did not modify file ({len(output)} chars)")
|
|
234
240
|
log(f"AI output preview: {preview}")
|
|
235
241
|
return False, model
|
package/lib/llm_bandit.py
CHANGED
|
@@ -86,6 +86,12 @@ class LLMBandit:
|
|
|
86
86
|
# Baseline score for normalizing improvements
|
|
87
87
|
self._baseline_score: float = 0.0
|
|
88
88
|
|
|
89
|
+
# AIDEV-NOTE: Decay counter - only apply decay every N updates to prevent
|
|
90
|
+
# aggressive memory loss. With decay_factor=0.95 applied every update,
|
|
91
|
+
# n_completed floors at 1 after ~50 updates and the bandit can't learn.
|
|
92
|
+
self._updates_since_decay: int = 0
|
|
93
|
+
self._decay_interval: int = 50 # Apply decay every 50 updates
|
|
94
|
+
|
|
89
95
|
# Load existing state if available
|
|
90
96
|
if state_file and Path(state_file).exists():
|
|
91
97
|
self.load()
|
|
@@ -215,13 +221,23 @@ class LLMBandit:
|
|
|
215
221
|
return improvement
|
|
216
222
|
|
|
217
223
|
def _apply_decay(self) -> None:
|
|
218
|
-
"""Apply decay to reduce influence of old observations.
|
|
224
|
+
"""Apply decay to reduce influence of old observations.
|
|
225
|
+
|
|
226
|
+
AIDEV-NOTE: Only applies every _decay_interval updates to prevent
|
|
227
|
+
aggressive memory loss. The int() truncation on n_completed was
|
|
228
|
+
destroying the bandit's ability to learn from failures.
|
|
229
|
+
"""
|
|
230
|
+
self._updates_since_decay += 1
|
|
231
|
+
if self._updates_since_decay < self._decay_interval:
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
self._updates_since_decay = 0
|
|
219
235
|
for stats in self.models.values():
|
|
220
|
-
# Decay
|
|
236
|
+
# Decay totals to reduce influence of old observations
|
|
221
237
|
stats.total_improvement *= self.decay_factor
|
|
222
|
-
#
|
|
223
|
-
if stats.n_completed >
|
|
224
|
-
stats.n_completed = max(
|
|
238
|
+
# Decay counts but preserve enough memory to differentiate models
|
|
239
|
+
if stats.n_completed > 2:
|
|
240
|
+
stats.n_completed = max(2, int(stats.n_completed * self.decay_factor))
|
|
225
241
|
|
|
226
242
|
def save(self) -> None:
|
|
227
243
|
"""Persist state to file."""
|
package/package.json
CHANGED
package/templates/config.yaml
CHANGED
|
@@ -90,30 +90,41 @@ parallel:
|
|
|
90
90
|
llm_cli:
|
|
91
91
|
# What to run for each sub-command
|
|
92
92
|
# Models are tried in order, with round-robin distribution across candidates
|
|
93
|
-
# You can repeat models for weighted selection (e.g., "
|
|
93
|
+
# You can repeat models for weighted selection (e.g., "gemini-pro gemini-pro sonnet" for 2:1 ratio)
|
|
94
94
|
|
|
95
|
-
# Default configuration:
|
|
95
|
+
# Default configuration: sonnet at ~11%, rest doubled for cost savings
|
|
96
96
|
# Commented out because these change over time; uncomment to override
|
|
97
|
-
#run:
|
|
98
|
-
#ideate: opus-
|
|
97
|
+
#run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet
|
|
98
|
+
#ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex
|
|
99
|
+
#run_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
|
|
100
|
+
#ideate_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
|
|
99
101
|
|
|
100
102
|
# Available models:
|
|
101
|
-
#
|
|
102
|
-
# - sonnet
|
|
103
|
-
# -
|
|
104
|
-
# - opus
|
|
105
|
-
# -
|
|
106
|
-
# -
|
|
103
|
+
# Claude (subscription-based, watch usage limits):
|
|
104
|
+
# - sonnet: Claude Sonnet via Claude CLI
|
|
105
|
+
# - sonnet-think: Claude Sonnet with extended thinking (ultrathink prompt)
|
|
106
|
+
# - opus: Claude Opus via Claude CLI
|
|
107
|
+
# - opus-think: Claude Opus with extended thinking (ultrathink prompt)
|
|
108
|
+
# - haiku: Claude Haiku via Claude CLI (cheap fallback)
|
|
109
|
+
#
|
|
110
|
+
# Codex/OpenAI (subscription-based):
|
|
111
|
+
# - gpt-5-codex: GPT-5 Codex (code-specialized) via Codex CLI
|
|
112
|
+
# - gpt-5.2: GPT-5.2 via Codex CLI
|
|
113
|
+
# - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
|
|
114
|
+
# - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
|
|
115
|
+
# - gpt5: GPT-5 via Codex CLI (legacy alias)
|
|
107
116
|
# - gpt5high: GPT-5 via Codex CLI (high reasoning)
|
|
108
117
|
# - o3high: O3 via Codex CLI (high reasoning)
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
# -
|
|
112
|
-
# -
|
|
113
|
-
# -
|
|
114
|
-
#
|
|
115
|
-
#
|
|
116
|
-
# -
|
|
117
|
-
# - kimi-coder: Kimi for Coding
|
|
118
|
-
# -
|
|
119
|
-
# -
|
|
118
|
+
#
|
|
119
|
+
# Gemini (free tier available):
|
|
120
|
+
# - gemini-pro: Gemini 3 Pro Preview via Gemini CLI
|
|
121
|
+
# - gemini-5-flash: Gemini 5 Flash via Gemini CLI (cheap fallback)
|
|
122
|
+
# - gemini-flash: Gemini 2.5 Flash via Gemini CLI (legacy)
|
|
123
|
+
#
|
|
124
|
+
# Other free/cheap models:
|
|
125
|
+
# - glm-5-zai: GLM-5 via Z.AI agentic mode
|
|
126
|
+
# - kimi-coder: Kimi for Coding via kimi CLI (fast, good for code gen)
|
|
127
|
+
# - kimi-k2-openrouter: Kimi K2 Thinking via OpenRouter
|
|
128
|
+
# - codex-oss-local: Local model via Codex + Ollama
|
|
129
|
+
# - cursor-sonnet: Claude Sonnet via Cursor Agent CLI
|
|
130
|
+
# - cursor-opus: Claude Opus via Cursor Agent CLI
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|