npm - claude-evolve - Versions diffs - 1.11.16 → 1.11.18 - Mend

claude-evolve 1.11.16 → 1.11.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/bin/claude-evolve-check +31 -1
package/lib/__pycache__/ai_cli.cpython-311.pyc +0 -0
package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
package/lib/__pycache__/embedding.cpython-314.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-311.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-313.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
package/lib/__pycache__/evolve_run.cpython-311.pyc +0 -0
package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
package/lib/__pycache__/llm_bandit.cpython-314.pyc +0 -0
package/lib/__pycache__/log.cpython-311.pyc +0 -0
package/lib/__pycache__/log.cpython-314.pyc +0 -0
package/lib/__pycache__/meta_learning.cpython-314.pyc +0 -0
package/lib/__pycache__/sandbox_wrapper.cpython-314.pyc +0 -0
package/lib/ai-cli.sh +33 -3
package/lib/ai_cli.py +4 -1
package/lib/config.sh +5 -5
package/lib/evolve_run.py +5 -1
package/lib/evolve_worker.py +7 -1
package/lib/llm_bandit.py +21 -5
package/package.json +1 -1
package/templates/config.yaml +32 -21
package/lib/__pycache__/ai_cli.cpython-310.pyc +0 -0
package/lib/__pycache__/embedding.cpython-310.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-310.pyc +0 -0
package/lib/__pycache__/evolve_ideate.cpython-310.pyc +0 -0
package/lib/__pycache__/log.cpython-310.pyc +0 -0

package/bin/claude-evolve-check CHANGED Viewed

@@ -141,7 +141,37 @@ SCRIPT
 exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
 SCRIPT
       ;;
-    qwen)
+    gpt-5-codex)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
+SCRIPT
+      ;;
+    gpt-5.2)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
+SCRIPT
+      ;;
+    gpt-5.3-codex)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
+SCRIPT
+      ;;
+    gpt-5.3-codex-spark)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
+SCRIPT
+      ;;
+    gemini-5-flash)
+      cat > "$test_script" << 'SCRIPT'
+#!/usr/bin/env bash
+exec gemini -y -m gemini-5-flash -p "$1"
+SCRIPT
+      ;;
+    qwen-openrouter)
       cat > "$test_script" << 'SCRIPT'
 #!/usr/bin/env bash
 exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"

package/lib/__pycache__/ai_cli.cpython-311.pyc ADDED Viewed

Binary file

package/lib/__pycache__/ai_cli.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/embedding.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/evolution_csv.cpython-311.pyc ADDED Viewed

Binary file

package/lib/__pycache__/evolution_csv.cpython-313.pyc ADDED Viewed

Binary file

package/lib/__pycache__/evolution_csv.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/evolve_ideate.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/evolve_run.cpython-311.pyc ADDED Viewed

Binary file

package/lib/__pycache__/evolve_run.cpython-314.pyc ADDED Viewed

Binary file

package/lib/__pycache__/evolve_worker.cpython-314.pyc ADDED Viewed

Binary file

package/lib/__pycache__/llm_bandit.cpython-314.pyc ADDED Viewed

Binary file

package/lib/__pycache__/log.cpython-311.pyc ADDED Viewed

Binary file

package/lib/__pycache__/log.cpython-314.pyc ADDED Viewed

Binary file

package/lib/__pycache__/meta_learning.cpython-314.pyc ADDED Viewed

Binary file

package/lib/__pycache__/sandbox_wrapper.cpython-314.pyc ADDED Viewed

Binary file

package/lib/ai-cli.sh CHANGED Viewed

@@ -105,6 +105,30 @@ $prompt"
       ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
+    gpt-5-codex)
+      local ai_output
+      # GPT-5 Codex - code-specialized variant via Codex CLI
+      ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
+    gpt-5.2)
+      local ai_output
+      # GPT-5.2 via Codex CLI
+      ai_output=$(codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
+    gpt-5.3-codex)
+      local ai_output
+      # GPT-5.3 Codex via Codex CLI
+      ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
+    gpt-5.3-codex-spark)
+      local ai_output
+      # GPT-5.3 Codex Spark - lightweight fallback via Codex CLI
+      ai_output=$(codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
     o3high)
       local ai_output
       ai_output=$(codex exec -m o3-mini -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
@@ -122,6 +146,12 @@ $prompt"
       ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
+    gemini-5-flash)
+      local ai_output
+      # Gemini 5 Flash - cheap fallback model
+      ai_output=$(gemini -y -m gemini-5-flash -p "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
     gemini-3-pro-preview)
       local ai_output
       # Gemini v3 Pro Preview via OpenRouter - EXPENSIVE
@@ -222,7 +252,7 @@ $prompt"
       ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    qwen)
+    qwen-openrouter)
       local ai_output
       # Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
       # Linear attention + sparse MoE, strong multimodal capabilities
@@ -258,8 +288,8 @@ $prompt"
     echo "[AI] Raw output from $model_name:" >&2
     echo "----------------------------------------" >&2
     if [[ ${#ai_output} -gt 2000 ]]; then
-      echo "$ai_output" | head -50 >&2
-      echo "... (truncated from ${#ai_output} characters to first 50 lines) ..." >&2
+      echo "... (truncated from ${#ai_output} characters to last 50 lines) ..." >&2
+      echo "$ai_output" | tail -50 >&2
     else
       echo "$ai_output" >&2
     fi

package/lib/ai_cli.py CHANGED Viewed

@@ -246,7 +246,10 @@ MODEL_TIMEOUTS = {
     'opus-think': 1800, 'sonnet-think': 1800,
     # Gemini - 30 min for pro (streams while working), 20 min for flash
     'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
-    'gemini-3-pro-preview': 1800,
+    'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
+    # Codex/OpenAI models - 10 min standard
+    'gpt-5-codex': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
+    'gpt-5.3-codex-spark': 300,
     # Z.AI agentic modes - 30 min (can be slow)
     'glm-zai': 1800, 'glm-5-zai': 1800,
     # Codex local - 40 min (local inference can be slow)

package/lib/config.sh CHANGED Viewed

@@ -63,13 +63,13 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
 #
 # Run: Subscription-based agentic models for code generation
 # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
-DEFAULT_LLM_RUN="sonnet glm-5-zai kimi-coder gemini-pro"
-DEFAULT_LLM_RUN_FALLBACK="codex-oss-local"
+DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet"
+DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
 #
 # Ideate: Agentic models that can edit files for ideation
 # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
-DEFAULT_LLM_IDEATE="opus-think sonnet-think glm-5-zai gemini-pro kimi-coder"
-DEFAULT_LLM_IDEATE_FALLBACK="sonnet glm-5-zai"
+DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex qwen-openrouter"
+DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
 # Load configuration from a YAML file and update variables
 _load_yaml_config() {
@@ -318,7 +318,7 @@ show_config() {
   echo "  Worker max candidates: $WORKER_MAX_CANDIDATES"
   echo "  LLM configuration:"
   # Show LLM configurations using dynamic variable names
-  for model in gpt5high o3high codex gemini opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
+  for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
     var_name="LLM_CLI_${model}"
     var_value=$(eval echo "\$$var_name")
     if [[ -n "$var_value" ]]; then

package/lib/evolve_run.py CHANGED Viewed

@@ -71,7 +71,11 @@ class WorkerPool:
         try:
             # Don't capture output - let it stream directly to terminal
             # This provides real-time visibility into which models are being used
-            proc = subprocess.Popen(cmd)
+            # AIDEV-NOTE: Explicitly pass stdin=DEVNULL so workers don't inherit
+            # a closed/bad stdin FD from parent (e.g. when run via nohup or after
+            # terminal disconnect). Without this, Python workers crash at startup
+            # with "OSError: [Errno 9] Bad file descriptor" on sys stream init.
+            proc = subprocess.Popen(cmd, stdin=subprocess.DEVNULL)
             self.workers[proc.pid] = proc
             log(f"Spawned worker {proc.pid}")
             return proc.pid

package/lib/evolve_worker.py CHANGED Viewed

@@ -205,9 +205,15 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
                     preview = output[-300:] if output else "(empty)"
                     log(f"Bandit model {selected_model} completed but didn't modify file ({len(output)} chars), trying fallback...")
                     log(f"AI output preview: {preview}")
+                    # AIDEV-NOTE: Report no-modification as failure to bandit
+                    self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
+                    log(f"Bandit update: {selected_model} no file modification")
             except AIError as e:
                 log(f"Bandit model {selected_model} failed: {e}, trying fallback...")
+                # AIDEV-NOTE: Report AI-level failure to bandit so it learns to avoid broken models
+                self.bandit.update(selected_model, child_score=None, parent_score=self._parent_score)
+                log(f"Bandit update: {selected_model} AI call failed")
         # Fallback to round-based retry with all models
         try:
@@ -229,7 +235,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
                 return True, model
             else:
                 # AIDEV-NOTE: Log output so we can diagnose why file wasn't modified
-                preview = output[:300] if output else "(empty)"
+                preview = output[-300:] if output else "(empty)"
                 log(f"AI completed but did not modify file ({len(output)} chars)")
                 log(f"AI output preview: {preview}")
                 return False, model

package/lib/llm_bandit.py CHANGED Viewed

@@ -86,6 +86,12 @@ class LLMBandit:
         # Baseline score for normalizing improvements
         self._baseline_score: float = 0.0
+        # AIDEV-NOTE: Decay counter - only apply decay every N updates to prevent
+        # aggressive memory loss. With decay_factor=0.95 applied every update,
+        # n_completed floors at 1 after ~50 updates and the bandit can't learn.
+        self._updates_since_decay: int = 0
+        self._decay_interval: int = 50  # Apply decay every 50 updates
         # Load existing state if available
         if state_file and Path(state_file).exists():
             self.load()
@@ -215,13 +221,23 @@ class LLMBandit:
         return improvement
     def _apply_decay(self) -> None:
-        """Apply decay to reduce influence of old observations."""
+        """Apply decay to reduce influence of old observations.
+        AIDEV-NOTE: Only applies every _decay_interval updates to prevent
+        aggressive memory loss. The int() truncation on n_completed was
+        destroying the bandit's ability to learn from failures.
+        """
+        self._updates_since_decay += 1
+        if self._updates_since_decay < self._decay_interval:
+            return
+        self._updates_since_decay = 0
         for stats in self.models.values():
-            # Decay both counts and totals proportionally
+            # Decay totals to reduce influence of old observations
             stats.total_improvement *= self.decay_factor
-            # Don't decay counts below a small floor to preserve some memory
-            if stats.n_completed > 1:
-                stats.n_completed = max(1, int(stats.n_completed * self.decay_factor))
+            # Decay counts but preserve enough memory to differentiate models
+            if stats.n_completed > 2:
+                stats.n_completed = max(2, int(stats.n_completed * self.decay_factor))
     def save(self) -> None:
         """Persist state to file."""

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-evolve",
-  "version": "1.11.16",
+  "version": "1.11.18",
   "bin": {
     "claude-evolve": "bin/claude-evolve",
     "claude-evolve-main": "bin/claude-evolve-main",

package/templates/config.yaml CHANGED Viewed

@@ -90,30 +90,41 @@ parallel:
 llm_cli:
   # What to run for each sub-command
   # Models are tried in order, with round-robin distribution across candidates
-  # You can repeat models for weighted selection (e.g., "sonnet sonnet gemini" for 2:1 ratio)
+  # You can repeat models for weighted selection (e.g., "gemini-pro gemini-pro sonnet" for 2:1 ratio)
-  # Default configuration: 50/50 split between glm-zai and kimi-coder, commercial ideation
+  # Default configuration: sonnet at ~11%, rest doubled for cost savings
   # Commented out because these change over time; uncomment to override
-  #run: glm-zai kimi-coder glm-zai kimi-coder glm-zai kimi-coder codex-oss-local
-  #ideate: opus-openrouter kimi-k2-openrouter gemini-pro sonnet-think gpt5high grok-4-openrouter deepseek-openrouter glm-zai
+  #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet
+  #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex
+  #run_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
+  #ideate_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
   # Available models:
-  # - sonnet: Claude 3.5 Sonnet via Claude CLI
-  # - sonnet-think: Claude 3.5 Sonnet with extended thinking (ultrathink prompt)
-  # - opus: Claude 3 Opus via Claude CLI
-  # - opus-think: Claude 3 Opus with extended thinking (ultrathink prompt)
-  # - gemini: Gemini via Gemini CLI
-  # - gpt5: GPT-5 via Codex CLI (standard)
+  # Claude (subscription-based, watch usage limits):
+  # - sonnet: Claude Sonnet via Claude CLI
+  # - sonnet-think: Claude Sonnet with extended thinking (ultrathink prompt)
+  # - opus: Claude Opus via Claude CLI
+  # - opus-think: Claude Opus with extended thinking (ultrathink prompt)
+  # - haiku: Claude Haiku via Claude CLI (cheap fallback)
+  #
+  # Codex/OpenAI (subscription-based):
+  # - gpt-5-codex: GPT-5 Codex (code-specialized) via Codex CLI
+  # - gpt-5.2: GPT-5.2 via Codex CLI
+  # - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
+  # - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
+  # - gpt5: GPT-5 via Codex CLI (legacy alias)
   # - gpt5high: GPT-5 via Codex CLI (high reasoning)
   # - o3high: O3 via Codex CLI (high reasoning)
-  # - cursor-sonnet: Claude 3.5 Sonnet via Cursor Agent CLI
-  # - cursor-opus: Claude 3 Opus via Cursor Agent CLI
-  # - glm: GLM-4.6 via OpenCode CLI
-  # - grok-code-fast: Grok Code Fast 1 via OpenRouter
-  # - grok-4: Grok 4 via OpenRouter
-  # - opus-openrouter: Claude Opus 4.1 via OpenRouter
-  # - kimi-k2-openrouter: Kimi K2 Thinking via OpenRouter (RECOMMENDED - no separate auth)
-  # - kimi-k2-think-moonshot: Kimi K2 Thinking via Moonshot CLI (requires separate kimi CLI setup)
-  # - kimi-coder: Kimi for Coding model via kimi CLI (fast, good for code generation)
-  # - codex-qwen3: Qwen3-Coder via Codex + Ollama (local, free, RECOMMENDED)
-  # - aider-qwen3: Qwen3-Coder via Aider + Ollama (local, free, experimental)
+  #
+  # Gemini (free tier available):
+  # - gemini-pro: Gemini 3 Pro Preview via Gemini CLI
+  # - gemini-5-flash: Gemini 5 Flash via Gemini CLI (cheap fallback)
+  # - gemini-flash: Gemini 2.5 Flash via Gemini CLI (legacy)
+  #
+  # Other free/cheap models:
+  # - glm-5-zai: GLM-5 via Z.AI agentic mode
+  # - kimi-coder: Kimi for Coding via kimi CLI (fast, good for code gen)
+  # - kimi-k2-openrouter: Kimi K2 Thinking via OpenRouter
+  # - codex-oss-local: Local model via Codex + Ollama
+  # - cursor-sonnet: Claude Sonnet via Cursor Agent CLI
+  # - cursor-opus: Claude Opus via Cursor Agent CLI

package/lib/__pycache__/ai_cli.cpython-310.pyc DELETED Viewed

Binary file

package/lib/__pycache__/embedding.cpython-310.pyc DELETED Viewed

Binary file

package/lib/__pycache__/evolution_csv.cpython-310.pyc DELETED Viewed

Binary file

package/lib/__pycache__/evolve_ideate.cpython-310.pyc DELETED Viewed

Binary file

package/lib/__pycache__/log.cpython-310.pyc DELETED Viewed

Binary file