npm - claude-evolve - Versions diffs - 1.11.17 → 1.11.19 - Mend

claude-evolve 1.11.17 → 1.11.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/bin/claude-evolve-check CHANGED Viewed

@@ -90,7 +90,7 @@ SCRIPT
     gemini-pro)
       cat > "$test_script" << 'SCRIPT'
 #!/usr/bin/env bash
-exec gemini -y -m gemini-3-pro-preview -p "$1"
+exec gemini -y -m auto-gemini-3 -p "$1"
 SCRIPT
       ;;
     gemini-flash|gemini-3-flash)
@@ -141,10 +141,22 @@ SCRIPT
 exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
 SCRIPT
       ;;
-    gpt-5-codex)
+    codex-think)
       cat > "$test_script" << 'SCRIPT'
 #!/usr/bin/env bash
-exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
+exec codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$1"
+SCRIPT
+      ;;
+    codex-coding)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$1"
+SCRIPT
+      ;;
+    gpt-5.4)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$1"
 SCRIPT
       ;;
     gpt-5.2)
@@ -159,22 +171,22 @@ SCRIPT
 exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
 SCRIPT
       ;;
-    gpt-5.3-codex-spark)
+    codex-spark|gpt-5.1-codex-mini)
       cat > "$test_script" << 'SCRIPT'
 #!/usr/bin/env bash
-exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
+exec codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$1"
 SCRIPT
       ;;
     gemini-5-flash)
       cat > "$test_script" << 'SCRIPT'
 #!/usr/bin/env bash
-exec gemini -y -m gemini-5-flash -p "$1"
+exec gemini -y -m gemini-3-flash-preview -p "$1"
 SCRIPT
       ;;
-    qwen)
+    qwen-openrouter)
       cat > "$test_script" << 'SCRIPT'
 #!/usr/bin/env bash
-exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
+exec opencode -m openrouter/qwen/qwen3.6-plus:free run "$1"
 SCRIPT
       ;;
     *)

package/lib/ai-cli.sh CHANGED Viewed

@@ -105,10 +105,22 @@ $prompt"
       ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt-5-codex)
+    codex-think)
       local ai_output
-      # GPT-5 Codex - code-specialized variant via Codex CLI
-      ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      # GPT-5.4 high reasoning - for ideation tasks requiring deep thinking
+      ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
+    codex-coding)
+      local ai_output
+      # GPT-5.4 medium reasoning - for coding/implementation tasks
+      ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
+    gpt-5.4)
+      local ai_output
+      # GPT-5.4 - latest frontier agentic coding model via Codex CLI
+      ai_output=$(codex exec -m gpt-5.4 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
     gpt-5.2)
@@ -123,10 +135,10 @@ $prompt"
       ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt-5.3-codex-spark)
+    codex-spark|gpt-5.1-codex-mini)
       local ai_output
-      # GPT-5.3 Codex Spark - lightweight fallback via Codex CLI
-      ai_output=$(codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      # GPT-5.1 Codex Mini - cheap/fast lightweight fallback via Codex CLI
+      ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
     o3high)
@@ -136,8 +148,8 @@ $prompt"
       ;;
     gemini-pro)
       local ai_output
-      # Gemini streams output while working
-      ai_output=$(gemini -y -m gemini-3-pro-preview -p "$prompt" 2>&1)
+      # Gemini 3 auto-routing (gemini-3.1-pro / gemini-3-flash) - streams output while working
+      ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
     gemini-flash)
@@ -149,7 +161,7 @@ $prompt"
     gemini-5-flash)
       local ai_output
       # Gemini 5 Flash - cheap fallback model
-      ai_output=$(gemini -y -m gemini-5-flash -p "$prompt" 2>&1)
+      ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
     gemini-3-pro-preview)
@@ -252,11 +264,11 @@ $prompt"
       ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    qwen)
+    qwen-openrouter)
       local ai_output
-      # Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
+      # Qwen latest - Alibaba's flagship model (currently qwen3.6-plus, free promotional tier)
       # Linear attention + sparse MoE, strong multimodal capabilities
-      ai_output=$(opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
+      ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus:free run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
     codex-oss-local)
@@ -288,8 +300,8 @@ $prompt"
     echo "[AI] Raw output from $model_name:" >&2
     echo "----------------------------------------" >&2
     if [[ ${#ai_output} -gt 2000 ]]; then
-      echo "$ai_output" | head -50 >&2
-      echo "... (truncated from ${#ai_output} characters to first 50 lines) ..." >&2
+      echo "... (truncated from ${#ai_output} characters to last 50 lines) ..." >&2
+      echo "$ai_output" | tail -50 >&2
     else
       echo "$ai_output" >&2
     fi

package/lib/ai_cli.py CHANGED Viewed

@@ -248,8 +248,9 @@ MODEL_TIMEOUTS = {
     'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
     'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
     # Codex/OpenAI models - 10 min standard
-    'gpt-5-codex': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
-    'gpt-5.3-codex-spark': 300,
+    'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
+    'gpt-5.4': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
+    'gpt-5.1-codex-mini': 300,
     # Z.AI agentic modes - 30 min (can be slow)
     'glm-zai': 1800, 'glm-5-zai': 1800,
     # Codex local - 40 min (local inference can be slow)

package/lib/config.sh CHANGED Viewed

@@ -63,13 +63,13 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
 #
 # Run: Subscription-based agentic models for code generation
 # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
-DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet"
-DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
+DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet"
+DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash codex-spark"
 #
 # Ideate: Agentic models that can edit files for ideation
 # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
-DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex"
-DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
+DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter"
+DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash codex-spark"
 # Load configuration from a YAML file and update variables
 _load_yaml_config() {

package/lib/evolve_run.py CHANGED Viewed

@@ -71,7 +71,11 @@ class WorkerPool:
         try:
             # Don't capture output - let it stream directly to terminal
             # This provides real-time visibility into which models are being used
-            proc = subprocess.Popen(cmd)
+            # AIDEV-NOTE: Explicitly pass stdin=DEVNULL so workers don't inherit
+            # a closed/bad stdin FD from parent (e.g. when run via nohup or after
+            # terminal disconnect). Without this, Python workers crash at startup
+            # with "OSError: [Errno 9] Bad file descriptor" on sys stream init.
+            proc = subprocess.Popen(cmd, stdin=subprocess.DEVNULL)
             self.workers[proc.pid] = proc
             log(f"Spawned worker {proc.pid}")
             return proc.pid

package/lib/evolve_worker.py CHANGED Viewed

@@ -205,9 +205,15 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
                     preview = output[-300:] if output else "(empty)"
                     log(f"Bandit model {selected_model} completed but didn't modify file ({len(output)} chars), trying fallback...")
                     log(f"AI output preview: {preview}")
+                    # AIDEV-NOTE: Report no-modification as failure to bandit
+                    self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
+                    log(f"Bandit update: {selected_model} no file modification")
             except AIError as e:
                 log(f"Bandit model {selected_model} failed: {e}, trying fallback...")
+                # AIDEV-NOTE: Report AI-level failure to bandit so it learns to avoid broken models
+                self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
+                log(f"Bandit update: {selected_model} AI call failed")
         # Fallback to round-based retry with all models
         try:
@@ -229,7 +235,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
                 return True, model
             else:
                 # AIDEV-NOTE: Log output so we can diagnose why file wasn't modified
-                preview = output[:300] if output else "(empty)"
+                preview = output[-300:] if output else "(empty)"
                 log(f"AI completed but did not modify file ({len(output)} chars)")
                 log(f"AI output preview: {preview}")
                 return False, model

package/lib/llm_bandit.py CHANGED Viewed

@@ -86,6 +86,12 @@ class LLMBandit:
         # Baseline score for normalizing improvements
         self._baseline_score: float = 0.0
+        # AIDEV-NOTE: Decay counter - only apply decay every N updates to prevent
+        # aggressive memory loss. With decay_factor=0.95 applied every update,
+        # n_completed floors at 1 after ~50 updates and the bandit can't learn.
+        self._updates_since_decay: int = 0
+        self._decay_interval: int = 50  # Apply decay every 50 updates
         # Load existing state if available
         if state_file and Path(state_file).exists():
             self.load()
@@ -215,13 +221,23 @@ class LLMBandit:
         return improvement
     def _apply_decay(self) -> None:
-        """Apply decay to reduce influence of old observations."""
+        """Apply decay to reduce influence of old observations.
+        AIDEV-NOTE: Only applies every _decay_interval updates to prevent
+        aggressive memory loss. The int() truncation on n_completed was
+        destroying the bandit's ability to learn from failures.
+        """
+        self._updates_since_decay += 1
+        if self._updates_since_decay < self._decay_interval:
+            return
+        self._updates_since_decay = 0
         for stats in self.models.values():
-            # Decay both counts and totals proportionally
+            # Decay totals to reduce influence of old observations
             stats.total_improvement *= self.decay_factor
-            # Don't decay counts below a small floor to preserve some memory
-            if stats.n_completed > 1:
-                stats.n_completed = max(1, int(stats.n_completed * self.decay_factor))
+            # Decay counts but preserve enough memory to differentiate models
+            if stats.n_completed > 2:
+                stats.n_completed = max(2, int(stats.n_completed * self.decay_factor))
     def save(self) -> None:
         """Persist state to file."""

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-evolve",
-  "version": "1.11.17",
+  "version": "1.11.19",
   "bin": {
     "claude-evolve": "bin/claude-evolve",
     "claude-evolve-main": "bin/claude-evolve-main",

package/templates/config.yaml CHANGED Viewed

@@ -94,10 +94,10 @@ llm_cli:
   # Default configuration: sonnet at ~11%, rest doubled for cost savings
   # Commented out because these change over time; uncomment to override
-  #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet
-  #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex
-  #run_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
-  #ideate_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
+  #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet
+  #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter
+  #run_fallback: haiku glm-5-zai gemini-5-flash codex-spark
+  #ideate_fallback: haiku glm-5-zai gemini-5-flash codex-spark
   # Available models:
   # Claude (subscription-based, watch usage limits):
@@ -108,10 +108,12 @@ llm_cli:
   # - haiku: Claude Haiku via Claude CLI (cheap fallback)
   #
   # Codex/OpenAI (subscription-based):
-  # - gpt-5-codex: GPT-5 Codex (code-specialized) via Codex CLI
+  # - codex-think: GPT-5.4 high reasoning effort (ideation)
+  # - codex-coding: GPT-5.4 medium reasoning effort (coding/run)
+  # - codex-spark: GPT-5.1 Codex Mini (lightweight fallback)
+  # - gpt-5.4: GPT-5.4 no reasoning effort override via Codex CLI
   # - gpt-5.2: GPT-5.2 via Codex CLI
-  # - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
-  # - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
+  # - gpt-5.3-codex: GPT-5.3 Codex (code-specialized) via Codex CLI
   # - gpt5: GPT-5 via Codex CLI (legacy alias)
   # - gpt5high: GPT-5 via Codex CLI (high reasoning)
   # - o3high: O3 via Codex CLI (high reasoning)