npm - claude-evolve - Versions diffs - 1.11.18 → 1.12.0 - Mend

claude-evolve 1.11.18 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +0 -0
package/bin/claude-evolve-check +9 -135
package/lib/__pycache__/ai_cli.cpython-310.pyc +0 -0
package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
package/lib/__pycache__/embedding.cpython-310.pyc +0 -0
package/lib/__pycache__/embedding.cpython-314.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-310.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
package/lib/__pycache__/evolve_ideate.cpython-310.pyc +0 -0
package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
package/lib/__pycache__/log.cpython-310.pyc +0 -0
package/lib/ai-cli.sh +93 -97
package/lib/ai_cli.py +25 -11
package/lib/config.py +0 -0
package/lib/config.sh +5 -15
package/lib/csv-lock.sh +0 -0
package/lib/editor.sh +0 -0
package/lib/evolution_csv.py +0 -0
package/lib/evolution_processor.py +0 -0
package/lib/evolve_ideate.py +0 -0
package/lib/evolve_worker.py +1 -1
package/lib/llm_bandit.py +1 -1
package/lib/log.py +0 -0
package/lib/meta_learning.py +0 -0
package/lib/sandbox.sb +0 -0
package/lib/sandbox_wrapper.py +0 -0
package/package.json +1 -1
package/templates/BRIEF.md +0 -0
package/templates/algorithm.py +0 -0
package/templates/config.yaml +9 -7
package/templates/evaluator.py +0 -0
package/lib/__pycache__/ai_cli.cpython-311.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-311.pyc +0 -0
package/lib/__pycache__/evolution_csv.cpython-313.pyc +0 -0
package/lib/__pycache__/evolve_run.cpython-311.pyc +0 -0
package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
package/lib/__pycache__/llm_bandit.cpython-314.pyc +0 -0
package/lib/__pycache__/log.cpython-311.pyc +0 -0
package/lib/__pycache__/log.cpython-314.pyc +0 -0
package/lib/__pycache__/meta_learning.cpython-314.pyc +0 -0
package/lib/__pycache__/sandbox_wrapper.cpython-314.pyc +0 -0

package/README.md CHANGED Viewed

File without changes

package/bin/claude-evolve-check CHANGED Viewed

@@ -2,8 +2,8 @@
 # claude-evolve-check - Health check for AI model configurations
 # Tests all configured AI models to verify they're working before starting evolution runs
 #
-# AIDEV-NOTE: Claude CLI has issues with timeout command and subshells.
-# We test each model by running it directly and checking the result file.
+# AIDEV-NOTE: Uses call_ai_model_configured from ai-cli.sh directly instead of
+# maintaining a duplicate case statement. Single source of truth for model commands.
 set -e
@@ -18,9 +18,10 @@ NC='\033[0m' # No Color
 SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
 LIB_DIR="$(dirname "$SCRIPT_DIR")/lib"
-# Source configuration to get model lists
+# Source configuration and ai-cli to get model lists and call function
 source "$LIB_DIR/config.sh"
 load_config
+source "$LIB_DIR/ai-cli.sh"
 # Test timeout in seconds
 TEST_TIMEOUT=30
@@ -30,8 +31,7 @@ TEST_PROMPT="Say hello in exactly 3 words."
 # Temp files
 TEMP_OUTPUT=$(mktemp)
-TEMP_PID=$(mktemp)
-trap "rm -f $TEMP_OUTPUT $TEMP_PID" EXIT
+trap "rm -f $TEMP_OUTPUT" EXIT
 echo -e "${CYAN}🔍 Claude Evolve - AI Model Health Check${NC}"
 echo "============================================"
@@ -55,138 +55,15 @@ FAILED=0
 FAILED_MODELS=""
 TIMEOUT_MODELS=""
-# Test a single model by writing a tiny test script and running it
+# Test a single model using call_ai_model_configured from ai-cli.sh
 test_model() {
   local model="$1"
   local prompt="$2"
   local outfile="$3"
   local max_wait="$4"
-  # Write a self-contained test script
-  local test_script=$(mktemp)
-  case "$model" in
-    opus|sonnet|haiku)
-      cat > "$test_script" << SCRIPT
-#!/usr/bin/env bash
-exec claude --dangerously-skip-permissions --mcp-config '' --model $model -p "\$1"
-SCRIPT
-      ;;
-    opus-think)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec claude --dangerously-skip-permissions --mcp-config '' --model opus -p "ultrathink
-$1"
-SCRIPT
-      ;;
-    sonnet-think)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "ultrathink
-$1"
-SCRIPT
-      ;;
-    gemini-pro)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec gemini -y -m gemini-3-pro-preview -p "$1"
-SCRIPT
-      ;;
-    gemini-flash|gemini-3-flash)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec gemini -y -m gemini-2.5-flash -p "$1"
-SCRIPT
-      ;;
-    kimi-coder)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec kimi --print -y -m kimi-for-coding -c "$1"
-SCRIPT
-      ;;
-    kimi-k2.5)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec opencode -m openrouter/moonshotai/kimi-k2.5 run "$1"
-SCRIPT
-      ;;
-    glm-5)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec opencode -m openrouter/z-ai/glm-5 run "$1"
-SCRIPT
-      ;;
-    glm-5-zai)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec opencode -m zai-coding-plan/glm-5 run "$1"
-SCRIPT
-      ;;
-    glm-zai)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec opencode -m zai-coding-plan/glm-4.7 run "$1"
-SCRIPT
-      ;;
-    codex-oss-local)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$1"
-SCRIPT
-      ;;
-    gpt5|gpt5high)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec codex exec --dangerously-bypass-approvals-and-sandbox "$1"
-SCRIPT
-      ;;
-    gpt-5-codex)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$1"
-SCRIPT
-      ;;
-    gpt-5.2)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$1"
-SCRIPT
-      ;;
-    gpt-5.3-codex)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$1"
-SCRIPT
-      ;;
-    gpt-5.3-codex-spark)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$1"
-SCRIPT
-      ;;
-    gemini-5-flash)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec gemini -y -m gemini-5-flash -p "$1"
-SCRIPT
-      ;;
-    qwen-openrouter)
-      cat > "$test_script" << 'SCRIPT'
-#!/usr/bin/env bash
-exec opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$1"
-SCRIPT
-      ;;
-    *)
-      echo "Unknown model: $model" > "$outfile"
-      rm -f "$test_script"
-      return 1
-      ;;
-  esac
-  chmod +x "$test_script"
-  # Run the test script in background, capture output to file
-  "$test_script" "$prompt" > "$outfile" 2>&1 &
+  # Run call_ai_model_configured in background, capture output to file
+  call_ai_model_configured "$model" "$prompt" > "$outfile" 2>&1 &
   local pid=$!
   # Poll for completion
@@ -197,7 +74,6 @@ SCRIPT
       sleep 1
       kill -9 "$pid" 2>/dev/null
       wait "$pid" 2>/dev/null
-      rm -f "$test_script"
       return 124
     fi
     sleep 1
@@ -205,9 +81,7 @@ SCRIPT
   done
   wait "$pid"
-  local rc=$?
-  rm -f "$test_script"
-  return $rc
+  return $?
 }
 # Test each model

package/lib/__pycache__/ai_cli.cpython-310.pyc ADDED Viewed

Binary file

package/lib/__pycache__/ai_cli.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/embedding.cpython-310.pyc ADDED Viewed

Binary file

package/lib/__pycache__/embedding.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/evolution_csv.cpython-310.pyc ADDED Viewed

Binary file

package/lib/__pycache__/evolution_csv.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/evolve_ideate.cpython-310.pyc ADDED Viewed

Binary file

package/lib/__pycache__/evolve_ideate.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/log.cpython-310.pyc ADDED Viewed

Binary file

package/lib/ai-cli.sh CHANGED Viewed

@@ -53,13 +53,16 @@ EOF
 call_ai_model_configured() {
   local model_name="$1"
   local prompt="$2"
-  local codex_gpt5_model="${CODEX_GPT5_MODEL:-gpt-5.2}"
+  local codex_gpt_model="${CODEX_GPT_MODEL:-${CODEX_GPT5_MODEL:-gpt-5.2}}"
   # Record start time
   local start_time=$(date +%s)
   # Build command directly based on model
+  # AIDEV-NOTE: Model names are role-based, never versioned. When upgrading a model,
+  # update the model ID in the command below, not the case label.
   case "$model_name" in
+    # --- Claude (subscription) ---
     opus)
       local ai_output
       ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$prompt" 2>&1)
@@ -70,207 +73,200 @@ call_ai_model_configured() {
       ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    sonnet-think)
+    haiku)
+      local ai_output
+      ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
+      local ai_exit_code=$?
+      ;;
+    opus-think)
       local ai_output
-      # Use extended thinking with sonnet 4.5 - prepend ultrathink instruction
-      # AIDEV-NOTE: Extended thinking can take long for complex ideation
       local think_prompt="ultrathink
 $prompt"
-      ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
+      ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    opus-think)
+    sonnet-think)
       local ai_output
-      # Use extended thinking with opus - prepend ultrathink instruction
-      # AIDEV-NOTE: Extended thinking can take long for complex ideation
       local think_prompt="ultrathink
 $prompt"
-      ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model opus -p "$think_prompt" 2>&1)
+      ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model sonnet -p "$think_prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    haiku)
+    opus-openrouter)
       local ai_output
-      ai_output=$(claude --dangerously-skip-permissions --mcp-config '' --model haiku -p "$prompt" 2>&1)
+      ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.1 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt5high)
+    cursor-sonnet)
       local ai_output
-      ai_output=$(codex exec -m "$codex_gpt5_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      ai_output=$(cursor-agent sonnet-4.5 -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt5)
+    cursor-opus)
       local ai_output
-      ai_output=$(codex exec -m "$codex_gpt5_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt-5-codex)
+    # --- Codex/GPT (subscription) ---
+    gpt)
       local ai_output
-      # GPT-5 Codex - code-specialized variant via Codex CLI
-      ai_output=$(codex exec -m gpt-5-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      ai_output=$(codex exec -m "$codex_gpt_model" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt-5.2)
+    gpt-high)
       local ai_output
-      # GPT-5.2 via Codex CLI
-      ai_output=$(codex exec -m gpt-5.2 --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      ai_output=$(codex exec -m "$codex_gpt_model" -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt-5.3-codex)
+    codex-think)
       local ai_output
-      # GPT-5.3 Codex via Codex CLI
-      ai_output=$(codex exec -m gpt-5.3-codex --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      # High reasoning - for ideation tasks requiring deep thinking
+      ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gpt-5.3-codex-spark)
+    codex-coding)
       local ai_output
-      # GPT-5.3 Codex Spark - lightweight fallback via Codex CLI
-      ai_output=$(codex exec -m gpt-5.3-codex-spark --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      # Medium reasoning - for coding/implementation tasks
+      ai_output=$(codex exec -m gpt-5.4 -c model_reasoning_effort="medium" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    o3high)
+    codex-spark)
       local ai_output
-      ai_output=$(codex exec -m o3-mini -c model_reasoning_effort="high" --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
+      # Cheap/fast lightweight fallback
+      ai_output=$(codex exec -m gpt-5.1-codex-mini --dangerously-bypass-approvals-and-sandbox "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
+    # --- Gemini (subscription) ---
     gemini-pro)
       local ai_output
-      # Gemini streams output while working
-      ai_output=$(gemini -y -m gemini-3-pro-preview -p "$prompt" 2>&1)
+      # Auto-routing to best Gemini model - streams output while working
+      ai_output=$(gemini -y -m auto-gemini-3 -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
     gemini-flash)
       local ai_output
-      # Gemini streams output while working
       ai_output=$(gemini -y -m gemini-2.5-flash -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gemini-5-flash)
+    gemini-cheap)
       local ai_output
-      # Gemini 5 Flash - cheap fallback model
-      ai_output=$(gemini -y -m gemini-5-flash -p "$prompt" 2>&1)
+      # Fast cheap fallback via gemini CLI
+      ai_output=$(gemini -y -m gemini-3-flash-preview -p "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gemini-3-pro-preview)
+    gemini-pro-openrouter)
       local ai_output
-      # Gemini v3 Pro Preview via OpenRouter - EXPENSIVE
+      # Gemini Pro via OpenRouter - EXPENSIVE
       ai_output=$(opencode -m openrouter/google/gemini-3-pro-preview run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    gemini-3-flash)
+    # --- GLM / Z.AI ---
+    glm)
       local ai_output
-      # Gemini 3 Flash - fast, cheap, strong thinker
-      ai_output=$(opencode -m openrouter/google/gemini-3-flash-preview run "$prompt" 2>&1)
+      # Latest GLM flagship via OpenRouter
+      ai_output=$(opencode -m openrouter/z-ai/glm-5.1 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    cursor-sonnet)
+    glm-zai)
       local ai_output
-      ai_output=$(cursor-agent sonnet-4.5 -p "$prompt" 2>&1)
+      # Latest GLM via Z.AI agentic mode (may lag OpenRouter by one version)
+      ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    cursor-opus)
+    # --- Qwen / Alibaba ---
+    qwen)
       local ai_output
-      ai_output=$(cursor-agent opus -p "$prompt" 2>&1)
+      # Latest Qwen flagship via OpenRouter
+      ai_output=$(opencode -m openrouter/qwen/qwen3.6-plus run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    glm-openrouter)
+    qwen-coder)
       local ai_output
-      ai_output=$(opencode -m openrouter/z-ai/glm-4.7 run "$prompt" 2>&1)
+      # Qwen coding specialist - large MoE
+      ai_output=$(opencode -m openrouter/qwen/qwen3-coder run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    glm-5)
+    # --- DeepSeek ---
+    deepseek)
       local ai_output
-      # GLM-5: 744B MoE model, very cheap ($0.80/$2.56 per 1M tokens), 200K context
-      # Released Feb 2026 - scores 77.8% SWE-bench, MIT license
-      ai_output=$(opencode -m openrouter/z-ai/glm-5 run "$prompt" 2>&1)
+      # Latest DeepSeek via OpenRouter
+      ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    glm-zai)
-      # GLM 4.7 via Z.AI agentic mode -- can be slow sometimes
+    deepseek-local)
       local ai_output
-      ai_output=$(opencode -m zai-coding-plan/glm-4.7 run "$prompt" 2>&1)
+      # DeepSeek via Codex CLI with Ollama cloud backend
+      ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    glm-5-zai)
-      # GLM-5 via Z.AI agentic mode - supports file editing for ideation
-      # 744B MoE, strong reasoning, can edit files
+    # --- Kimi / Moonshot ---
+    kimi-coder)
       local ai_output
-      ai_output=$(opencode -m zai-coding-plan/glm-5 run "$prompt" 2>&1)
+      # Kimi coding model via kimi CLI
+      ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    deepseek-openrouter)
+    kimi-think)
       local ai_output
-      ai_output=$(opencode -m openrouter/deepseek/deepseek-v3.2 run "$prompt" 2>&1)
+      # Kimi thinking via kimi CLI
+      ai_output=$(kimi --print -c "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    grok-code-fast-openrouter)
+    kimi-openrouter)
       local ai_output
-      ai_output=$(opencode -m openrouter/x-ai/grok-code-fast-1 run "$prompt" 2>&1)
+      # Latest Kimi via OpenRouter
+      ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    grok-4-openrouter)
+    # --- Grok / xAI ---
+    grok)
       local ai_output
-      # EXPENSIVE - consider grok-4.1-fast instead
+      # Latest Grok via OpenRouter - EXPENSIVE
       ai_output=$(opencode -m openrouter/x-ai/grok-4 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    grok-4.1-fast)
+    grok-fast)
       local ai_output
-      # Grok 4.1 Fast - close to Grok 4 quality, much cheaper
+      # Grok fast variant - close to full quality, much cheaper
       ai_output=$(opencode -m openrouter/x-ai/grok-4.1-fast run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    opus-openrouter)
+    # --- MiniMax ---
+    minimax)
       local ai_output
-      ai_output=$(opencode -m openrouter/anthropic/claude-opus-4.1 run "$prompt" 2>&1)
+      # Latest MiniMax reasoning model via OpenRouter
+      ai_output=$(opencode -m openrouter/minimax/minimax-m2.7 run "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    kimi-k2-openrouter)
+    # --- Ollama cloud models (flat-rate subscription) ---
+    ollama-glm)
       local ai_output
-      # Kimi K2 Thinking via OpenRouter (no separate auth needed)
-      ai_output=$(opencode -m openrouter/moonshotai/kimi-k2-thinking run "$prompt" 2>&1)
+      ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m glm-5.1:cloud "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    kimi-k2-think-moonshot)
+    ollama-gemma)
       local ai_output
-      # Use kimi CLI directly (assumes kimi is installed and configured)
-      ai_output=$(kimi --print -c "$prompt" 2>&1)
+      ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m gemma4:31b-cloud "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    kimi-coder)
+    ollama-minimax)
       local ai_output
-      # Kimi for Coding model via kimi CLI (fast coding-focused model)
-      # Use --print to see agent actions while still allowing file modifications
-      ai_output=$(kimi --print -y -m kimi-for-coding -c "$prompt" 2>&1)
+      ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m minimax-m2.7:cloud "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    kimi-k2.5)
+    ollama-qwen)
       local ai_output
-      # Kimi K2.5 - Moonshot's most powerful model (Jan 2025)
-      # Native multimodal agentic model, stronger than GLM-4.7
-      ai_output=$(opencode -m openrouter/moonshotai/kimi-k2.5 run "$prompt" 2>&1)
+      ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama -m qwen3.5:cloud "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    qwen-openrouter)
-      local ai_output
-      # Qwen latest - Alibaba's flagship model (currently qwen3.5-plus)
-      # Linear attention + sparse MoE, strong multimodal capabilities
-      ai_output=$(opencode -m openrouter/qwen/qwen3.5-plus-02-15 run "$prompt" 2>&1)
-      local ai_exit_code=$?
-      ;;
-    codex-oss-local)
-      # Codex-OSS via Codex CLI with Ollama backend
+    # --- Local inference ---
+    codex-local)
       local ai_output
       ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss --local-provider=ollama "$prompt" 2>&1)
       local ai_exit_code=$?
       ;;
-    deepseek-v3-llamacloud)
-      # Deepseek via Codex CLI with Ollama cloud backend
-      local ai_output
-      ai_output=$(codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --oss -m deepseek-v3.1:671b-cloud "$prompt" 2>&1)
-      local ai_exit_code=$?
-      ;;
   esac
   # Debug: log model and prompt size
@@ -332,7 +328,7 @@ clean_ai_output() {
   local model_name="$2"
   # Handle codex-specific output format
-  if [[ "$model_name" == "codex" || "$model_name" == "o3high" || "$model_name" == "gpt5high" ]]; then
+  if [[ "$model_name" == "codex" || "$model_name" == "gpt" || "$model_name" == "gpt-high" ]]; then
     # Clean codex output - extract content between "codex" marker and "tokens used"
     if echo "$output" | grep -q "^\[.*\] codex$"; then
       # Extract content between "codex" line and "tokens used" line

package/lib/ai_cli.py CHANGED Viewed

@@ -241,19 +241,33 @@ def get_fallback_models_for_command(command: str) -> List[str]:
 # in bash, because the bash `timeout` command causes claude CLI (and sometimes
 # gemini CLI) to hang when called from nested subprocess contexts.
 MODEL_TIMEOUTS = {
-    # Claude models - 5 minutes for standard, 30 minutes for thinking
+    # Claude - 5 min standard, 30 min thinking
     'opus': 300, 'sonnet': 300, 'haiku': 300,
     'opus-think': 1800, 'sonnet-think': 1800,
-    # Gemini - 30 min for pro (streams while working), 20 min for flash
-    'gemini-pro': 1800, 'gemini-flash': 1200, 'gemini-3-flash': 600,
-    'gemini-3-pro-preview': 1800, 'gemini-5-flash': 600,
-    # Codex/OpenAI models - 10 min standard
-    'gpt-5-codex': 600, 'gpt-5.2': 600, 'gpt-5.3-codex': 600,
-    'gpt-5.3-codex-spark': 300,
-    # Z.AI agentic modes - 30 min (can be slow)
-    'glm-zai': 1800, 'glm-5-zai': 1800,
-    # Codex local - 40 min (local inference can be slow)
-    'codex-oss-local': 2400,
+    'opus-openrouter': 600, 'cursor-sonnet': 300, 'cursor-opus': 300,
+    # Codex/GPT - 10 min standard
+    'gpt': 600, 'gpt-high': 900,
+    'codex-think': 900, 'codex-coding': 600, 'codex-spark': 300,
+    # Gemini - 30 min for pro (streams), 10 min for flash
+    'gemini-pro': 1800, 'gemini-flash': 1200,
+    'gemini-cheap': 600, 'gemini-pro-openrouter': 1800,
+    # GLM / Z.AI - 30 min (Z.AI can be slow)
+    'glm': 600, 'glm-zai': 1800,
+    # Qwen
+    'qwen': 600, 'qwen-coder': 1200,
+    # DeepSeek
+    'deepseek': 600, 'deepseek-local': 2400,
+    # Kimi
+    'kimi-coder': 600, 'kimi-think': 900, 'kimi-openrouter': 600,
+    # Grok
+    'grok': 600, 'grok-fast': 600,
+    # MiniMax
+    'minimax': 600,
+    # Ollama cloud
+    'ollama-glm': 1200, 'ollama-gemma': 1200,
+    'ollama-minimax': 1200, 'ollama-qwen': 1200,
+    # Local inference
+    'codex-local': 2400,
 }
 DEFAULT_MODEL_TIMEOUT = 600  # 10 minutes for everything else

package/lib/config.py CHANGED Viewed

File without changes

package/lib/config.sh CHANGED Viewed

@@ -63,13 +63,14 @@ DEFAULT_WORKER_MAX_CANDIDATES=3
 #
 # Run: Subscription-based agentic models for code generation
 # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
-DEFAULT_LLM_RUN="gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet"
-DEFAULT_LLM_RUN_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
+# Ollama cloud models are flat-rate (subscription), so prefer them over per-token OpenRouter
+DEFAULT_LLM_RUN="gemini-pro gemini-pro ollama-glm ollama-glm ollama-qwen ollama-qwen ollama-minimax ollama-minimax ollama-gemma ollama-gemma kimi-coder kimi-coder codex-coding codex-coding glm-zai qwen-coder minimax sonnet"
+DEFAULT_LLM_RUN_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
 #
 # Ideate: Agentic models that can edit files for ideation
 # All CLI tools (opencode, claude, gemini, kimi) are agentic and can edit files
-DEFAULT_LLM_IDEATE="opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex qwen-openrouter"
-DEFAULT_LLM_IDEATE_FALLBACK="haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark"
+DEFAULT_LLM_IDEATE="opus-think ollama-glm ollama-glm gemini-pro ollama-qwen ollama-minimax ollama-gemma kimi-coder gpt codex-think glm-zai qwen-coder minimax qwen"
+DEFAULT_LLM_IDEATE_FALLBACK="haiku ollama-glm ollama-gemma ollama-minimax ollama-qwen glm-zai gemini-cheap codex-spark qwen"
 # Load configuration from a YAML file and update variables
 _load_yaml_config() {
@@ -316,17 +317,6 @@ show_config() {
   echo "  Max retries: $MAX_RETRIES"
   echo "  Memory limit: ${MEMORY_LIMIT_MB}MB"
   echo "  Worker max candidates: $WORKER_MAX_CANDIDATES"
-  echo "  LLM configuration:"
-  # Show LLM configurations using dynamic variable names
-  for model in gpt5high o3high gpt_5_codex gpt_5_2 gpt_5_3_codex gpt_5_3_codex_spark codex gemini gemini_5_flash opus opus_think sonnet sonnet_think cursor_sonnet cursor_opus glm deepseek; do
-    var_name="LLM_CLI_${model}"
-    var_value=$(eval echo "\$$var_name")
-    if [[ -n "$var_value" ]]; then
-      # Convert underscore back to dash for display
-      display_name=$(echo "$model" | sed 's/_/-/g')
-      echo "    $display_name: $var_value"
-    fi
-  done
   echo "  LLM for run: $LLM_RUN"
   echo "  LLM for run (fallback): $LLM_RUN_FALLBACK"
   echo "  LLM for ideate: $LLM_IDEATE"

package/lib/csv-lock.sh CHANGED Viewed

File without changes

package/lib/editor.sh CHANGED Viewed

File without changes

package/lib/evolution_csv.py CHANGED Viewed

File without changes

package/lib/evolution_processor.py CHANGED Viewed

File without changes

package/lib/evolve_ideate.py CHANGED Viewed

File without changes

package/lib/evolve_worker.py CHANGED Viewed

@@ -164,7 +164,7 @@ Important: Make meaningful changes that match the description. Don't just add co
 IMPORTANT: If you need to read Python (.py) or CSV files, read them in chunks using offset and limit parameters to avoid context overload
 Example: Read(file_path='evolution_gen01-001.py', offset=0, limit=100) then Read(offset=100, limit=100), etc.
-This is especially important for models with smaller context windows (like GLM).
+This is especially important for models with smaller context windows.
 CRITICAL: If you do not know how to implement what was asked for, or if the requested change is unclear or not feasible, you MUST refuse to make any changes. DO NOT modify the code if you are uncertain about the implementation. Simply respond that you cannot implement the requested change and explain why. It is better to refuse than to make incorrect or random changes."""

package/lib/llm_bandit.py CHANGED Viewed

@@ -356,7 +356,7 @@ if __name__ == "__main__":
     # Test the bandit
     print("Testing LLM Bandit...")
-    models = ["opus", "sonnet", "gemini-pro", "gpt5"]
+    models = ["opus", "sonnet", "gemini-pro", "gpt"]
     bandit = LLMBandit(models, state_file="/tmp/test_bandit.json")
     # Simulate some runs

package/lib/log.py CHANGED Viewed

File without changes

package/lib/meta_learning.py CHANGED Viewed

File without changes

package/lib/sandbox.sb CHANGED Viewed

File without changes

package/lib/sandbox_wrapper.py CHANGED Viewed

File without changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-evolve",
-  "version": "1.11.18",
+  "version": "1.12.0",
   "bin": {
     "claude-evolve": "bin/claude-evolve",
     "claude-evolve-main": "bin/claude-evolve-main",

package/templates/BRIEF.md CHANGED Viewed

File without changes

package/templates/algorithm.py CHANGED Viewed

File without changes

package/templates/config.yaml CHANGED Viewed

@@ -94,10 +94,10 @@ llm_cli:
   # Default configuration: sonnet at ~11%, rest doubled for cost savings
   # Commented out because these change over time; uncomment to override
-  #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder gpt-5-codex gpt-5-codex sonnet
-  #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 gpt-5.3-codex
-  #run_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
-  #ideate_fallback: haiku glm-5-zai gemini-5-flash gpt-5.3-codex-spark
+  #run: gemini-pro gemini-pro glm-5-zai glm-5-zai kimi-coder kimi-coder codex-coding codex-coding sonnet
+  #ideate: opus-think glm-5-zai gemini-pro kimi-coder gpt-5.2 codex-think qwen-openrouter
+  #run_fallback: haiku glm-5-zai gemini-5-flash codex-spark
+  #ideate_fallback: haiku glm-5-zai gemini-5-flash codex-spark
   # Available models:
   # Claude (subscription-based, watch usage limits):
@@ -108,10 +108,12 @@ llm_cli:
   # - haiku: Claude Haiku via Claude CLI (cheap fallback)
   #
   # Codex/OpenAI (subscription-based):
-  # - gpt-5-codex: GPT-5 Codex (code-specialized) via Codex CLI
+  # - codex-think: GPT-5.4 high reasoning effort (ideation)
+  # - codex-coding: GPT-5.4 medium reasoning effort (coding/run)
+  # - codex-spark: GPT-5.1 Codex Mini (lightweight fallback)
+  # - gpt-5.4: GPT-5.4 no reasoning effort override via Codex CLI
   # - gpt-5.2: GPT-5.2 via Codex CLI
-  # - gpt-5.3-codex: GPT-5.3 Codex via Codex CLI
-  # - gpt-5.3-codex-spark: GPT-5.3 Codex Spark (lightweight fallback) via Codex CLI
+  # - gpt-5.3-codex: GPT-5.3 Codex (code-specialized) via Codex CLI
   # - gpt5: GPT-5 via Codex CLI (legacy alias)
   # - gpt5high: GPT-5 via Codex CLI (high reasoning)
   # - o3high: O3 via Codex CLI (high reasoning)