npm - wolverine-ai - Versions diffs - 2.2.2 → 2.3.0 - Mend

wolverine-ai 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/agent/agent-engine.js +29 -0
package/src/agent/goal-loop.js +3 -2
package/src/agent/sub-agents.js +10 -7
package/src/core/ai-client.js +65 -4
package/src/core/verifier.js +13 -3
package/src/core/wolverine.js +39 -10

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "2.2.2",
+  "version": "2.3.0",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/agent/agent-engine.js CHANGED Viewed

@@ -463,6 +463,35 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
       console.log(chalk.gray(`  🤖 Agent turn ${this.turnCount}/${this.maxTurns} (${this.totalTokens} tokens used)`));
+      // Compact context every 3 turns to prevent token blowup
+      // Turn 6 without compacting: ~95K tokens. With compacting: ~20K tokens.
+      if (this.turnCount > 1 && this.turnCount % 3 === 0 && this.messages.length > 4) {
+        try {
+          const { aiCall } = require("./ai-client") || require("../core/ai-client");
+          const { getModel: _gm } = require("./models") || require("../core/models");
+          const historyToCompact = this.messages.slice(1, -2); // keep system + last exchange
+          if (historyToCompact.length > 2) {
+            const historyText = historyToCompact.map(m => `${m.role}: ${(m.content || "").slice(0, 500)}`).join("\n");
+            const compactResult = await aiCall({
+              model: _gm("compacting"),
+              systemPrompt: "Summarize this agent conversation history into a concise status report. Keep: files read, changes made, errors found, what was tried. Remove: full file contents, redundant tool results.",
+              userPrompt: historyText.slice(0, 8000),
+              maxTokens: 512,
+              category: "brain",
+            });
+            if (compactResult.content) {
+              this.messages = [
+                this.messages[0], // system prompt
+                { role: "assistant", content: `[Prior work summary]\n${compactResult.content}` },
+                { role: "user", content: "Continue from where you left off." },
+                ...this.messages.slice(-2), // last exchange
+              ];
+              console.log(chalk.gray(`  📦 Compacted ${historyToCompact.length} messages → summary (${compactResult.content.length} chars)`));
+            }
+          }
+        } catch { /* compacting failed — continue with full context */ }
+      }
       let response;
       try {
         response = await aiCallWithHistory({

package/src/agent/goal-loop.js CHANGED Viewed

@@ -57,16 +57,17 @@ class GoalLoop {
         }
       }
-      // Attempt the fix
+      // Attempt the fix — pass prior attempts so the handler can include concise summary
       let attempt;
       try {
-        attempt = await this.onAttempt(iteration, context);
+        attempt = await this.onAttempt(iteration, context, this._attempts);
       } catch (err) {
         attempt = { healed: false, explanation: `Error: ${err.message}` };
       }
       this._attempts.push({
         iteration,
+        mode: attempt.mode || "unknown",
         success: attempt.healed,
         explanation: attempt.explanation || "No explanation",
       });

package/src/agent/sub-agents.js CHANGED Viewed

@@ -33,14 +33,17 @@ const AGENT_TOOL_SETS = {
 };
 // Default model + budget per agent type
+// Cost optimization: triage agents use cheap models (classifier slot = Haiku),
+// only the fixer needs the expensive coding model (Sonnet/Opus).
+// This cuts sub-agent cost by ~90% (6 Haiku calls vs 6 Sonnet calls).
 const AGENT_CONFIGS = {
-  explore:  { model: "reasoning", maxTurns: 5,  maxTokens: 10000 },
-  plan:     { model: "reasoning", maxTurns: 3,  maxTokens: 8000 },
-  fix:      { model: "coding",   maxTurns: 5,  maxTokens: 15000 },
-  verify:   { model: "reasoning", maxTurns: 3,  maxTokens: 5000 },
-  research: { model: "research",  maxTurns: 3,  maxTokens: 10000 },
-  security: { model: "audit",     maxTurns: 3,  maxTokens: 8000 },
-  database: { model: "coding",   maxTurns: 5,  maxTokens: 15000 },
+  explore:  { model: "classifier", maxTurns: 5,  maxTokens: 15000 },  // Haiku — just reading
+  plan:     { model: "classifier", maxTurns: 3,  maxTokens: 10000 },  // Haiku — simple planning
+  fix:      { model: "coding",    maxTurns: 5,  maxTokens: 50000 },  // Sonnet/Opus — needs reasoning
+  verify:   { model: "classifier", maxTurns: 3,  maxTokens: 8000 },   // Haiku — just checking
+  research: { model: "classifier", maxTurns: 3,  maxTokens: 10000 },  // Haiku — summarization
+  security: { model: "audit",     maxTurns: 3,  maxTokens: 8000 },   // Haiku — pattern matching
+  database: { model: "coding",    maxTurns: 5,  maxTokens: 50000 },  // Sonnet/Opus — needs reasoning
 };
 // System prompts per agent type

package/src/core/ai-client.js CHANGED Viewed

@@ -57,12 +57,71 @@ function isReasoningModel(model) {
 function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
+/**
+ * Per-model max output token limits (with 10% overestimation buffer).
+ * These are the actual API limits — requesting more than this fails.
+ */
+const MODEL_OUTPUT_LIMITS = {
+  // OpenAI — generous output limits
+  "gpt-4o":              17600,  // 16384 + 10%
+  "gpt-4o-mini":         17600,
+  "gpt-5":               17600,
+  "gpt-5.4":             17600,
+  "gpt-5.4-mini":        17600,
+  "gpt-5.4-nano":        17600,
+  "gpt-5-nano":          17600,
+  "o1":                  110000, // 100k + 10% (reasoning model, huge output)
+  "o1-mini":             72600,  // 66k + 10%
+  "o3":                  110000,
+  "o3-mini":             72600,
+  "o4-mini":             72600,
+  "gpt-5.1-codex":       17600,
+  "gpt-5.3-codex":       17600,
+  "codex-mini":          17600,
+  // Anthropic — each tier has different output limits
+  "claude-opus-4":       32000,  // 32k max output (no buffer needed, already generous)
+  "claude-sonnet-4":     17600,  // 16k + 10%
+  "claude-haiku-4":      8800,   // 8k + 10%
+  "claude-3-5-sonnet":   8800,
+  "claude-3-5-haiku":    8800,
+  "claude-3-opus":       4400,   // 4k + 10%
+  "claude-3-sonnet":     4400,
+  "claude-3-haiku":      4400,
+};
+/**
+ * Get the max output tokens for a model (with 10% buffer).
+ * Falls back to sensible defaults if model not in table.
+ */
+function _getOutputLimit(model) {
+  // Exact match
+  if (MODEL_OUTPUT_LIMITS[model]) return MODEL_OUTPUT_LIMITS[model];
+  // Prefix match (handles dated versions like claude-sonnet-4-6, claude-haiku-4-5-20250414)
+  for (const [prefix, limit] of Object.entries(MODEL_OUTPUT_LIMITS)) {
+    if (model.startsWith(prefix)) return limit;
+  }
+  // Defaults with 10% buffer
+  if (isAnthropicModel(model)) return 8800;   // 8k + 10% (safe Anthropic default)
+  return 17600;                                // 16k + 10% (safe OpenAI default)
+}
+/**
+ * Build token limit params for the API call.
+ * Respects per-model output limits and adds reasoning headroom.
+ */
 function tokenParam(model, limit) {
-  const effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
+  const maxOutput = _getOutputLimit(model);
+  // Reasoning models get 4x to accommodate chain-of-thought, but capped at model max
+  let effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
+  effectiveLimit = Math.min(effectiveLimit, maxOutput);
+  // Anthropic uses max_tokens directly (handled in _anthropicCall)
+  if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
   if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
   const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
   if (usesNewParam) return { max_completion_tokens: effectiveLimit };
-  return { max_tokens: limit };
+  return { max_tokens: effectiveLimit };
 }
 // ── Unified AI Call ──
@@ -121,10 +180,11 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
 async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice }) {
   const client = _getAnthropicClient();
+  const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
   const params = {
     model,
-    max_tokens: maxTokens,
+    max_tokens: outputLimit,
     messages: [{ role: "user", content: userPrompt }],
   };
@@ -203,9 +263,10 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
     }
   }
+  const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
   const params = {
     model,
-    max_tokens: maxTokens,
+    max_tokens: outputLimit,
     messages: merged,
   };

package/src/core/verifier.js CHANGED Viewed

@@ -131,7 +131,14 @@ function bootProbe(scriptPath, cwd, originalErrorSignature) {
  * @param {object} routeContext — optional { path, method } for route-level testing
  */
 async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext) {
-  const steps = routeContext?.path ? 3 : 2;
+  // Simple errors (TypeError, ReferenceError, SyntaxError) — trust syntax+boot, skip route probe.
+  // If the fix is wrong, ErrorMonitor will catch the 500 and re-trigger heal. This avoids
+  // the expensive cascade where a working fix gets rolled back because the route probe
+  // can't boot the full server in isolation.
+  const isSimpleError = /TypeError|ReferenceError|SyntaxError|Cannot find module/.test(originalErrorSignature || "");
+  const skipRouteProbe = isSimpleError;
+  const steps = (!skipRouteProbe && routeContext?.path) ? 3 : 2;
   console.log(chalk.yellow("\n🔬 Verifying fix...\n"));
   // Step 1: Syntax check
@@ -157,8 +164,9 @@ async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext)
   }
   console.log(chalk.green("  ✅ Process booted successfully"));
-  // Step 3: Route probe (if we know which route was failing)
-  if (routeContext?.path) {
+  // Step 3: Route probe — only for complex errors (not simple TypeError/ReferenceError)
+  // Simple errors: trust syntax+boot. ErrorMonitor is the safety net.
+  if (!skipRouteProbe && routeContext?.path) {
     console.log(chalk.gray(`  [3/${steps}] Route probe: ${routeContext.method || "GET"} ${routeContext.path}...`));
     const routeResult = await routeProbe(scriptPath, cwd, routeContext);
     if (routeResult.status === "failed") {
@@ -170,6 +178,8 @@ async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext)
     } else {
       console.log(chalk.gray(`  ⚠️  Route probe skipped: ${routeResult.reason || "unknown"}`));
     }
+  } else if (skipRouteProbe && routeContext?.path) {
+    console.log(chalk.gray(`  ⚡ Skipping route probe (simple error — ErrorMonitor is safety net)`));
   }
   return { verified: true, status: "fixed" };

package/src/core/wolverine.js CHANGED Viewed

@@ -203,7 +203,22 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
     } catch { /* non-fatal */ }
   }
-  // 6. Research — check past attempts to avoid loops
+  // 6. Check brain for cached fix — if we fixed this exact error before, replay it (zero tokens)
+  if (brain && brain._initialized && hasFile && repairHistory) {
+    try {
+      const pastRepairs = repairHistory.getAll().filter(r =>
+        r.success && r.file === parsed.filePath && r.error &&
+        parsed.errorMessage.includes(r.error.split(":").pop()?.trim()?.slice(0, 30))
+      );
+      if (pastRepairs.length > 0) {
+        const cached = pastRepairs[pastRepairs.length - 1];
+        console.log(chalk.gray(`  🧠 Found cached fix for similar error (${cached.mode}, ${cached.id})`));
+        if (logger) logger.info("heal.cached", `Cached fix found: ${cached.resolution?.slice(0, 80)}`, { cachedId: cached.id });
+      }
+    } catch {}
+  }
+  // 7. Research — check past attempts to avoid loops
   const researcher = new ResearchAgent({ brain, logger });
   let researchContext = "";
   try {
@@ -211,24 +226,38 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
     if (researchContext) console.log(chalk.gray(`  🔍 Research: found past context for this error`));
   } catch {}
-  // 7. Goal Loop — set goal, iterate until fixed or exhausted
-  // Iteration 1: fast path (CODING_MODEL)
-  // Iteration 2: agent path (REASONING_MODEL)
-  // Iteration 3: deep research (RESEARCH_MODEL) + agent retry
+  // 7b. Token budget by error complexity — simple bugs get tight caps
+  const isSimpleError = /TypeError|ReferenceError|SyntaxError|Cannot find module/.test(parsed.errorMessage);
+  const isModerateError = /ECONNREFUSED|timeout|ENOENT|EACCES|EADDRINUSE/.test(parsed.errorMessage);
+  const tokenBudget = isSimpleError
+    ? { fast: 5000, agent: 20000, subAgent: 15000 }
+    : isModerateError
+    ? { fast: 10000, agent: 50000, subAgent: 30000 }
+    : { fast: 15000, agent: 100000, subAgent: 50000 };
+  console.log(chalk.gray(`  💰 Token budget: ${isSimpleError ? "simple" : isModerateError ? "moderate" : "complex"} (agent: ${tokenBudget.agent})`));
+  // 8. Goal Loop — set goal, iterate until fixed or exhausted
   const loop = new GoalLoop({
     maxIterations: parseInt(process.env.WOLVERINE_MAX_RETRIES, 10) || 3,
     researcher,
     logger,
     goal: `Fix: ${parsed.errorMessage.slice(0, 80)}`,
-    onAttempt: async (iteration, researchCtx) => {
+    onAttempt: async (iteration, researchCtx, priorAttempts) => {
       // Create backup for this attempt
-      // Full server/ backup — includes all files, configs, databases
       const bid = backupManager.createBackup(`heal attempt ${iteration}: ${parsed.errorMessage.slice(0, 60)}`);
       backupManager.setErrorSignature(bid, errorSignature);
       if (logger) logger.info(EVENT_TYPES.BACKUP_CREATED, `Backup ${bid} (iteration ${iteration})`, { backupId: bid });
-      const fullContext = [brainContext, researchContext, researchCtx, envContext].filter(Boolean).join("\n");
+      // Build concise prior attempt summary instead of full context bleed
+      let priorSummary = "";
+      if (priorAttempts && priorAttempts.length > 0) {
+        priorSummary = "\nPRIOR ATTEMPTS (do NOT repeat):\n" + priorAttempts.map(a =>
+          `- Attempt ${a.iteration} (${a.mode}): ${a.explanation?.slice(0, 100)}`
+        ).join("\n") + "\n";
+      }
+      const fullContext = [brainContext, researchContext, researchCtx, envContext, priorSummary].filter(Boolean).join("\n");
       let result;
       if (iteration === 1 && hasFile) {
@@ -291,8 +320,8 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
         console.log(chalk.magenta(`  🤖 Agent path (${getModel("reasoning")})...`));
         const agent = new AgentEngine({
           sandbox, logger, cwd, mcp,
-          maxTurns: 8,
-          maxTokens: 25000,
+          maxTurns: isSimpleError ? 4 : 8,
+          maxTokens: tokenBudget.agent,
         });
         const agentResult = await agent.run({