npm - wolverine-ai - Versions diffs - 4.0.4 → 4.1.0 - Mend

wolverine-ai 4.0.4 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/brain/brain.js +4 -0
package/src/core/ai-client.js +9 -4
package/src/core/runner.js +31 -4
package/src/core/wolverine.js +16 -3
package/src/templates/server/config/settings.json +6 -6

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "4.0.4",
+  "version": "4.1.0",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/brain/brain.js CHANGED Viewed

@@ -133,6 +133,10 @@ const SEED_DOCS = [
     text: "Configuration: hybrid-always architecture — no provider selection. Users pick the best model for each of 8 task roles directly in settings.json 'models' section. Mix and match: wolverine for audit, claude for reasoning, gpt for coding. Provider auto-detected from model name. Embedding is separate ('embedding' key) — always wolverine-embedding-1 billed through credits (proxies to text-embedding-3-small at 2x markup). Secrets in .env.local. Config priority: env vars > settings.json > defaults.",
     metadata: { topic: "configuration" },
   },
+  {
+    text: "AI client prompt caching: all 3 providers cache automatically. Anthropic: system prompt marked cache_control:ephemeral, 90% cheaper on repeat calls within 5 min TTL. OpenAI: automatic prefix caching for >=1024 token prefixes, 50% cheaper on cached input, tracked via usage.prompt_tokens_details.cached_tokens. Wolverine/llama.cpp: cache_prompt:true in request body reuses KV cache for identical prefixes between requests, near-zero TTFT on second+ call in a heal pipeline. Cache savings tracked in analytics: cacheCreation (tokens written to cache) and cacheRead (tokens served from cache).",
+    metadata: { topic: "prompt-caching" },
+  },
   {
     text: "Platform telemetry: lightweight background process, zero-config. Default platform: api.wolverinenode.xyz. Auto-registers on first run (retries every 60s until platform responds), saves key to .wolverine/platform-key. Heartbeat payload matches PLATFORM.md spec: instanceId, server (name/port/uptime/status/pid), process (memoryMB/cpuPercent), routes, repairs, usage (tokens/cost/calls/byCategory), brain, backups. Offline-resilient: queues up to 1440 heartbeats locally, drains on reconnect. No chalk dependency, cached version/key in memory, minimal IO. Opt out: WOLVERINE_TELEMETRY=false. Override URL: WOLVERINE_PLATFORM_URL.",
     metadata: { topic: "platform-telemetry" },

package/src/core/ai-client.js CHANGED Viewed

@@ -21,8 +21,9 @@ function _extractTokens(usage) {
     output: usage.completion_tokens || usage.output_tokens || 0,
     // Anthropic cache fields
     cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
-    // OpenAI uses cache_read_tokens, Anthropic uses cache_read_input_tokens
-    cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens || 0,
+    // OpenAI prompt_tokens_details.cached_tokens + Anthropic cache_read_input_tokens
+    cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens
+      || usage.prompt_tokens_details?.cached_tokens || 0,
   };
 }
@@ -520,19 +521,20 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
   if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
   messages.push({ role: "user", content: userPrompt });
-  // No temperature for o-series and gpt-5+ (forbidden, causes error)
   const noTemp = /^(o[1-9]|gpt-5)/.test(model);
+  const isWolverine = detectProvider(model) === "wolverine";
   const params = {
     model, messages,
     ...(!noTemp ? { temperature: 0 } : {}),
     ...tokenParam(model, maxTokens),
     ..._reasoningParams(model),
+    // Prompt caching: llama.cpp reuses KV cache for identical prefixes
+    ...(isWolverine ? { cache_prompt: true } : {}),
   };
   if (tools && tools.length > 0) {
     params.tools = tools;
     params.tool_choice = toolChoice || "auto";
-    // Disable parallel calls for reliability — sequential is more predictable for healing
     params.parallel_tool_calls = false;
   }
@@ -589,11 +591,14 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
 async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
   const noTemp = /^(o[1-9]|gpt-5)/.test(model);
+  const isWolverine = detectProvider(model) === "wolverine";
   const params = {
     model, messages,
     ...(!noTemp ? { temperature: 0 } : {}),
     ...tokenParam(model, maxTokens),
     ..._reasoningParams(model),
+    // Prompt caching: llama.cpp KV cache reuse for multi-turn agent conversations
+    ...(isWolverine ? { cache_prompt: true } : {}),
   };
   if (tools && tools.length > 0) {
     params.tools = tools;

package/src/core/runner.js CHANGED Viewed

@@ -459,6 +459,13 @@ class WolverineRunner {
         return;
       }
+      // #28: SIGKILL = likely OOM — synthesize useful stderr for the heal pipeline
+      if (signal === "SIGKILL" && (!this._stderrBuffer.trim() || this._stderrBuffer.trim().length < 10)) {
+        this._stderrBuffer = `Process killed by SIGKILL (possible OOM). Memory limit may have been exceeded. Check memory usage patterns and reduce memory consumption.\nExit code: ${code}, Signal: ${signal}`;
+        console.log(chalk.red(`\n💀 Process killed by SIGKILL (possible OOM)`));
+        this.logger.error(EVENT_TYPES.PROCESS_CRASH, "SIGKILL — possible OOM", { exitCode: code, signal });
+      }
       // Killed by signal with no stderr — just restart, don't waste tokens healing
       if (!this._stderrBuffer.trim() || this._stderrBuffer.trim().length < 10) {
         console.log(chalk.yellow(`\n⚠️  Process killed (code: ${code}, signal: ${signal}) — no error to heal, restarting`));
@@ -483,13 +490,28 @@ class WolverineRunner {
       }
       this.retryCount++;
-      await this._healAndRestart();
+      // #3: Guard against unhandled rejections — don't let heal errors crash the parent
+      try {
+        await this._healAndRestart();
+      } catch (healErr) {
+        console.log(chalk.red(`  ⚠️  Heal error (recovering): ${healErr.message}`));
+        this._healInProgress = false;
+        this._healStatus = null;
+        if (this.running) this._spawn(); // restart without healing
+      }
     });
     this.child.on("error", (err) => {
       console.log(chalk.red(`Failed to start process: ${err.message}`));
       this.logger.error(EVENT_TYPES.PROCESS_CRASH, `Failed to start: ${err.message}`);
-      this.running = false;
+      // #10: Retry spawn after delay instead of permanently dying
+      if (this.running && this.retryCount < this.maxRetries) {
+        this.retryCount++;
+        console.log(chalk.yellow(`   Retrying spawn in 5s (attempt ${this.retryCount}/${this.maxRetries})...`));
+        setTimeout(() => { if (this.running) this._spawn(); }, 5000);
+      } else {
+        this.running = false;
+      }
     });
     // IPC channel: child reports caught 500 errors (Fastify/Express)
@@ -626,9 +648,14 @@ class WolverineRunner {
         }
       }
     } catch (err) {
-      console.log(chalk.red(`\n🐺 Wolverine encountered an error: ${err.message}`));
+      // #4: Don't permanently die on transient errors — restart without healing
+      console.log(chalk.red(`\n🐺 Wolverine heal error (recovering): ${err.message}`));
       this._healInProgress = false;
-      this.running = false;
+      this._healStatus = null;
+      if (this.running) {
+        console.log(chalk.yellow("   Restarting without healing..."));
+        this._spawn();
+      }
     }
   }

package/src/core/wolverine.js CHANGED Viewed

@@ -35,6 +35,17 @@ async function heal(opts) {
     if (err.message === "timeout") {
       console.log(chalk.red(`\n🐺 Heal timed out after ${HEAL_TIMEOUT_MS / 1000}s`));
       if (opts.logger) opts.logger.error(EVENT_TYPES.HEAL_FAILED, `Heal timed out after ${HEAL_TIMEOUT_MS / 1000}s`);
+      // #11: Rollback on timeout — the background _healImpl may have partially applied patches
+      if (opts.backupManager) {
+        try {
+          const all = opts.backupManager.getAll();
+          const latest = all.find(b => b.status === "unstable");
+          if (latest) {
+            opts.backupManager.rollbackTo(latest.id);
+            console.log(chalk.yellow(`  ↩️  Rolled back to ${latest.id} (timeout cleanup)`));
+          }
+        } catch {}
+      }
       return { healed: false, explanation: `Heal timed out after ${HEAL_TIMEOUT_MS / 1000}s` };
     }
     throw err;
@@ -312,7 +323,7 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
             errorMessage: parsed.errorMessage, stackTrace: parsed.stackTrace,
             extraContext: envContext,
           });
-          rateLimiter.record(errorSignature);
+          // #15: Don't record rate limit until AFTER verification — failed attempts shouldn't exhaust the limit
           // Execute shell commands first (npm install, mkdir, etc.)
           if (repair.commands && Array.isArray(repair.commands)) {
@@ -351,6 +362,7 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
           const verification = await verifyFix(parsed.filePath, cwd, errorSignature, routeContext);
           if (verification.verified) {
             backupManager.markVerified(bid);
+            rateLimiter.record(errorSignature);
             rateLimiter.clearSignature(errorSignature);
             // Track tool operations: file read + patch + verify + any commands
             // These are the same operations an agent would do with read_file/write_file/bash_exec
@@ -360,10 +372,11 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
             return { healed: true, explanation: repair.explanation, backupId: bid, mode: "fast" };
           }
-          backupManager.rollbackTo(bid);
+          // #13: Safe rollback — wrap in try/catch to prevent rollback-of-rollback loop
+          try { backupManager.rollbackTo(bid); } catch (rbErr) { console.log(chalk.red(`  ⚠️  Rollback failed: ${rbErr.message}`)); }
           return { healed: false, explanation: `Fast path: ${verification.status}` };
         } catch (err) {
-          backupManager.rollbackTo(bid);
+          try { backupManager.rollbackTo(bid); } catch (rbErr) { console.log(chalk.red(`  ⚠️  Rollback failed: ${rbErr.message}`)); }
           return { healed: false, explanation: `Fast path error: ${err.message}` };
         }
       } else if (iteration <= 2) {

package/src/templates/server/config/settings.json CHANGED Viewed

@@ -6,14 +6,14 @@
   },
   "models": {
-    "reasoning": "gpt-4o",
-    "coding": "gpt-4o",
-    "chat": "gpt-4o-mini",
+    "reasoning": "claude-sonnet-4-6",
+    "coding": "claude-sonnet-4-6",
+    "chat": "gpt-5.4-mini",
     "tool": "gpt-4o-mini",
     "classifier": "gpt-4o-mini",
-    "audit": "gpt-4o-mini",
-    "compacting": "gpt-4o-mini",
-    "research": "gpt-4o"
+    "audit": "wolverine-test-1",
+    "compacting": "wolverine-test-1",
+    "research": "claude-sonnet-4-6"
   },
   "embedding": "wolverine-embedding-1",