claude-evolve 1.11.17 → 1.11.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,7 +90,7 @@ SCRIPT
90
90
  gemini-pro)
91
91
  cat > "$test_script" << 'SCRIPT'
92
92
  #!/usr/bin/env bash
93
- exec gemini -y -m gemini-3-pro-preview -p "$1"
93
+ exec gemini -y -m auto-gemini-3 -p "$1"
94
94
  SCRIPT
95
95
  ;;
96
96
  gemini-flash|gemini-3-flash)
@@ -141,10 +141,22 @@ SCRIPT
141
141
  exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
142
142
  SCRIPT
143
143
  ;;
144
- gpt-5-codex)
144
+ codex-think)
145
145
  cat > "$test_script" << 'SCRIPT'
146
146
  #!/usr/bin/env bash
147
- exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
147
+ exec codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$1"
148
+ SCRIPT
149
+ ;;
150
+ codex-coding)
151
+ cat > "$test_script" << 'SCRIPT'
152
+ #!/usr/bin/env bash
153
+ exec codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$1"
154
+ SCRIPT
155
+ ;;
156
+ gpt-5.4)
157
+ cat > "$test_script" << 'SCRIPT'
158
+ #!/usr/bin/env bash
159
+ exec codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$1"
148
160
  SCRIPT
149
161
  ;;
150
162
  gpt-5.2)
@@ -159,22 +171,22 @@ SCRIPT
159
171
  exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
160
172
  SCRIPT
161
173
  ;;
162
- gpt-5.3-codex-spark)
174
+ codex-spark|gpt-5.1-codex-mini)
163
175
  cat > "$test_script" << 'SCRIPT'
164
176
  #!/usr/bin/env bash
165
- exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
177
+ exec codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$1"
166
178
  SCRIPT
167
179
  ;;
168
180
  gemini-5-flash)
169
181
  cat > "$test_script" << 'SCRIPT'
170
182
  #!/usr/bin/env bash
171
- exec gemini -y -m gemini-5-flash -p "$1"
183
+ exec gemini -y -m gemini-3-flash-preview -p "$1"
172
184
  SCRIPT
173
185
  ;;
174
- qwen)
186
+ qwen-openrouter)
175
187
  cat > "$test_script" << 'SCRIPT'
176
188
  #!/usr/bin/env bash
177
- exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
189
+ exec opencode -m openrouter/qwen/qwen3.6-plus:free run "$1"
178
190
  SCRIPT
179
191
  ;;
180
192
  *)
package/lib/ai-cli.sh CHANGED
@@ -105,10 +105,22 @@ $prompt"
105
105
  ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
106
106
  local ai_exit_code=$?
107
107
  ;;
108
- gpt-5-codex)
108
+ codex-think)
109
109
  local ai_output
110
- # GPT-5 Codex - code-specialized variant via Codex CLI
111
- ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
110
+ # GPT-5.4 high reasoning - for ideation tasks requiring deep thinking
111
+ ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
112
+ local ai_exit_code=$?
113
+ ;;
114
+ codex-coding)
115
+ local ai_output
116
+ # GPT-5.4 medium reasoning - for coding/implementation tasks
117
+ ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
118
+ local ai_exit_code=$?
119
+ ;;
120
+ gpt-5.4)
121
+ local ai_output
122
+ # GPT-5.4 - latest frontier agentic coding model via Codex CLI
123
+ ai_output=$(codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
112
124
  local ai_exit_code=$?
113
125
  ;;
114
126
  gpt-5.2)
@@ -123,10 +135,10 @@ $prompt"
123
135
  ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
124
136
  local ai_exit_code=$?
125
137
  ;;
126
- gpt-5.3-codex-spark)
138
+ codex-spark|gpt-5.1-codex-mini)
127
139
  local ai_output
128
- # GPT-5.3 Codex Spark - lightweight fallback via Codex CLI
129
- ai_output=$(codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
140
+ # GPT-5.1 Codex Mini - cheap/fast lightweight fallback via Codex CLI
141
+ ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
130
142
  local ai_exit_code=$?
131
143
  ;;
132
144
  o3high)
@@ -136,8 +148,8 @@ $prompt"
136
148
  ;;
137
149
  gemini-pro)
138
150
  local ai_output
139
- # Gemini streams output while working
140
- ai_output=$(gemini -y -m gemini-3-pro-preview -p "$prompt" 2>&1)
151
+ # Gemini 3 auto-routing (gemini-3.1-pro / gemini-3-flash) - streams output while working
152
+ ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
141
153
  local ai_exit_code=$?
142
154
  ;;
143
155
  gemini-flash)
@@ -149,7 +161,7 @@ $prompt"
149
161
  gemini-5-flash)
150
162
  local ai_output
151
163
  # Gemini 5 Flash - cheap fallback model
152
- ai_output=$(gemini -y -m gemini-5-flash -p "$prompt" 2>&1)
164
+ ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
153
165
  local ai_exit_code=$?
154
166
  ;;
155
167
  gemini-3-pro-preview)
@@ -252,11 +264,11 @@ $prompt"
252
264
  ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
253
265
  local ai_exit_code=$?
254
266
  ;;
255
- qwen)
267
+ qwen-openrouter)
256
268
  local ai_output
257
- # Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
269
+ # Qwen latest - Alibaba's flagship model (currently qwen3.6-plus, free promotional tier)
258
270
  # Linear attention + sparse MoE, strong multimodal capabilities
259
- ai_output=$(opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
271
+ ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus:free run "$prompt" 2>&1)
260
272
  local ai_exit_code=$?
261
273
  ;;
262
274
  codex-oss-local)
@@ -288,8 +300,8 @@ $prompt"
288
300
  echo "[AI] Raw output from $model_name:" >&2
289
301
  echo "----------------------------------------" >&2
290
302
  if [[ ${#ai_output} -gt 2000 ]]; then
291
- echo "$ai_output" | head -50 >&2
292
- echo "... (truncated from ${#ai_output} characters to first 50 lines) ..." >&2
303
+ echo "... (truncated from ${#ai_output} characters to last 50 lines) ..." >&2
304
+ echo "$ai_output" | tail -50 >&2
293
305
  else
294
306
  echo "$ai_output" >&2
295
307
  fi
package/lib/ai_cli.py CHANGED
@@ -248,8 +248,9 @@ MODEL_TIMEOUTS = {
248
248
  'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
249
249
  'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
250
250
  # Codex/OpenAI models - 10 min standard
251
- 'gpt-5-codex': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
252
- 'gpt-5.3-codex-spark': 300,
251
+ 'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
252
+ 'gpt-5.4': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
253
+ 'gpt-5.1-codex-mini': 300,
253
254
  # Z.AI agentic modes - 30 min (can be slow)
254
255
  'glm-zai': 1800, 'glm-5-zai': 1800,
255
256
  # Codex local - 40 min (local inference can be slow)
package/lib/config.sh CHANGED
@@ -63,13 +63,13 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
63
63
  #
64
64
  # Run: Subscription-based agentic models for code generation
65
65
  # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
66
- DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet"
67
- DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
66
+ DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet"
67
+ DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash codex-spark"
68
68
  #
69
69
  # Ideate: Agentic models that can edit files for ideation
70
70
  # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
71
- DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex"
72
- DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
71
+ DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter"
72
+ DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash codex-spark"
73
73
 
74
74
  # Load configuration from a YAML file and update variables
75
75
  _load_yaml_config() {
package/lib/evolve_run.py CHANGED
@@ -71,7 +71,11 @@ class WorkerPool:
71
71
  try:
72
72
  # Don't capture output - let it stream directly to terminal
73
73
  # This provides real-time visibility into which models are being used
74
- proc = subprocess.Popen(cmd)
74
+ # AIDEV-NOTE: Explicitly pass stdin=DEVNULL so workers don't inherit
75
+ # a closed/bad stdin FD from parent (e.g. when run via nohup or after
76
+ # terminal disconnect). Without this, Python workers crash at startup
77
+ # with "OSError: [Errno 9] Bad file descriptor" on sys stream init.
78
+ proc = subprocess.Popen(cmd, stdin=subprocess.DEVNULL)
75
79
  self.workers[proc.pid] = proc
76
80
  log(f"Spawned worker {proc.pid}")
77
81
  return proc.pid
@@ -205,9 +205,15 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
205
205
  preview = output[-300:] if output else "(empty)"
206
206
  log(f"Bandit model {selected_model} completed but didn't modify file ({len(output)} chars), trying fallback...")
207
207
  log(f"AI output preview: {preview}")
208
+ # AIDEV-NOTE: Report no-modification as failure to bandit
209
+ self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
210
+ log(f"Bandit update: {selected_model} no file modification")
208
211
 
209
212
  except AIError as e:
210
213
  log(f"Bandit model {selected_model} failed: {e}, trying fallback...")
214
+ # AIDEV-NOTE: Report AI-level failure to bandit so it learns to avoid broken models
215
+ self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
216
+ log(f"Bandit update: {selected_model} AI call failed")
211
217
 
212
218
  # Fallback to round-based retry with all models
213
219
  try:
@@ -229,7 +235,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
229
235
  return True, model
230
236
  else:
231
237
  # AIDEV-NOTE: Log output so we can diagnose why file wasn't modified
232
- preview = output[:300] if output else "(empty)"
238
+ preview = output[-300:] if output else "(empty)"
233
239
  log(f"AI completed but did not modify file ({len(output)} chars)")
234
240
  log(f"AI output preview: {preview}")
235
241
  return False, model
package/lib/llm_bandit.py CHANGED
@@ -86,6 +86,12 @@ class LLMBandit:
86
86
  # Baseline score for normalizing improvements
87
87
  self._baseline_score: float = 0.0
88
88
 
89
+ # AIDEV-NOTE: Decay counter - only apply decay every N updates to prevent
90
+ # aggressive memory loss. With decay_factor=0.95 applied every update,
91
+ # n_completed floors at 1 after ~50 updates and the bandit can't learn.
92
+ self._updates_since_decay: int = 0
93
+ self._decay_interval: int = 50 # Apply decay every 50 updates
94
+
89
95
  # Load existing state if available
90
96
  if state_file and Path(state_file).exists():
91
97
  self.load()
@@ -215,13 +221,23 @@ class LLMBandit:
215
221
  return improvement
216
222
 
217
223
  def _apply_decay(self) -> None:
218
- """Apply decay to reduce influence of old observations."""
224
+ """Apply decay to reduce influence of old observations.
225
+
226
+ AIDEV-NOTE: Only applies every _decay_interval updates to prevent
227
+ aggressive memory loss. The int() truncation on n_completed was
228
+ destroying the bandit's ability to learn from failures.
229
+ """
230
+ self._updates_since_decay += 1
231
+ if self._updates_since_decay < self._decay_interval:
232
+ return
233
+
234
+ self._updates_since_decay = 0
219
235
  for stats in self.models.values():
220
- # Decay both counts and totals proportionally
236
+ # Decay totals to reduce influence of old observations
221
237
  stats.total_improvement *= self.decay_factor
222
- # Don't decay counts below a small floor to preserve some memory
223
- if stats.n_completed > 1:
224
- stats.n_completed = max(1, int(stats.n_completed * self.decay_factor))
238
+ # Decay counts but preserve enough memory to differentiate models
239
+ if stats.n_completed > 2:
240
+ stats.n_completed = max(2, int(stats.n_completed * self.decay_factor))
225
241
 
226
242
  def save(self) -> None:
227
243
  """Persist state to file."""
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.11.17",
3
+ "version": "1.11.19",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",
@@ -94,10 +94,10 @@ llm_cli:
94
94
 
95
95
  # Default configuration: sonnet at ~11%, rest doubled for cost savings
96
96
  # Commented out because these change over time; uncomment to override
97
- #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet
98
- #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex
99
- #run_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
100
- #ideate_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
97
+ #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet
98
+ #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter
99
+ #run_fallback: haiku glm-5-zai gemini-5-flash codex-spark
100
+ #ideate_fallback: haiku glm-5-zai gemini-5-flash codex-spark
101
101
 
102
102
  # Available models:
103
103
  # Claude (subscription-based, watch usage limits):
@@ -108,10 +108,12 @@ llm_cli:
108
108
  # - haiku: Claude Haiku via Claude CLI (cheap fallback)
109
109
  #
110
110
  # Codex/OpenAI (subscription-based):
111
- # - gpt-5-codex: GPT-5 Codex (code-specialized) via Codex CLI
111
+ # - codex-think: GPT-5.4 high reasoning effort (ideation)
112
+ # - codex-coding: GPT-5.4 medium reasoning effort (coding/run)
113
+ # - codex-spark: GPT-5.1 Codex Mini (lightweight fallback)
114
+ # - gpt-5.4: GPT-5.4 no reasoning effort override via Codex CLI
112
115
  # - gpt-5.2: GPT-5.2 via Codex CLI
113
- # - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
114
- # - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
116
+ # - gpt-5.3-codex: GPT-5.3 Codex (code-specialized) via Codex CLI
115
117
  # - gpt5: GPT-5 via Codex CLI (legacy alias)
116
118
  # - gpt5high: GPT-5 via Codex CLI (high reasoning)
117
119
  # - o3high: O3 via Codex CLI (high reasoning)