npm - wolverine-ai - Versions diffs - 3.0.0 → 3.0.1 - Mend

wolverine-ai 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +2 -0
package/package.json +1 -1
package/src/agent/agent-engine.js +10 -1
package/src/brain/brain.js +1 -1
package/src/core/ai-client.js +19 -3

package/README.md CHANGED Viewed

@@ -450,6 +450,8 @@ Three layers prevent token waste:
 | Technique | What it does | Cost |
 |-----------|-------------|------|
+| **Prompt caching** | Anthropic system prompt cached server-side — 90% cheaper on repeat calls | 12-16K tokens saved per heal |
+| **Tool result truncation** | Tool output capped at 4K chars — prevents context blowup from large reads | Up to 30K saved per turn |
 | **Zero-cost compaction** | Extracts structural signals (tools, files, errors) from history — no LLM call | $0.00 |
 | **Token estimation** | `text.length / 4` approximation — fast budget checks without tokenizer | 0ms |
 | **Error-graceful tools** | Tool errors returned as `[ERROR]` results, not thrown — agent decides next step | More resilient |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "3.0.0",
+  "version": "3.0.1",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/agent/agent-engine.js CHANGED Viewed

@@ -548,10 +548,19 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
         // Post-hook: audit/modify result
         _runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
+        // Tool result truncation: cap at 4K chars to prevent context blowup.
+        // One grep_code can return 30K+ chars — the model doesn't need all of it.
+        const MAX_TOOL_RESULT = 4000;
+        let toolContent = isError ? `[ERROR] ${result.content}` : result.content;
+        if (toolContent && toolContent.length > MAX_TOOL_RESULT) {
+          const truncated = toolContent.length - MAX_TOOL_RESULT;
+          toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n\n... (truncated ${truncated} chars. Use offset/limit for large results.)`;
+        }
         this.messages.push({
           role: "tool",
           tool_call_id: toolCall.id,
-          content: isError ? `[ERROR] ${result.content}` : result.content,
+          content: toolContent,
         });
         if (result.done) {

package/src/brain/brain.js CHANGED Viewed

@@ -258,7 +258,7 @@ const SEED_DOCS = [
     metadata: { topic: "token-protection" },
   },
   {
-    text: "Agent efficiency (claw-code patterns): (1) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
+    text: "Agent efficiency (claw-code patterns): (1) Anthropic prompt caching — system prompt marked with cache_control:{type:'ephemeral'}, cached server-side across agent turns, 90% cheaper on repeat calls (12-16K saved tokens per heal). (2) Tool result truncation — capped at 4K chars before entering message history, prevents context blowup from large grep/file reads. (3) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
     metadata: { topic: "agent-efficiency" },
   },
   {

package/src/core/ai-client.js CHANGED Viewed

@@ -13,6 +13,8 @@ function _extractTokens(usage) {
   return {
     input: usage.prompt_tokens || usage.input_tokens || 0,
     output: usage.completion_tokens || usage.output_tokens || 0,
+    cacheCreation: usage.cache_creation_input_tokens || 0,
+    cacheRead: usage.cache_read_input_tokens || 0,
   };
 }
@@ -188,9 +190,16 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
     messages: [{ role: "user", content: userPrompt }],
   };
-  if (systemPrompt) params.system = systemPrompt;
+  // Prompt caching: mark system prompt for Anthropic's server-side cache.
+  // Same system prompt across agent turns gets cached after first call — 90% cheaper.
+  if (systemPrompt) {
+    params.system = [{
+      type: "text",
+      text: systemPrompt,
+      cache_control: { type: "ephemeral" },
+    }];
+  }
-  // Convert OpenAI-style tools to Anthropic format
   if (tools && tools.length > 0) {
     params.tools = tools.map(_toAnthropicTool).filter(Boolean);
     if (toolChoice === "required") params.tool_choice = { type: "any" };
@@ -270,7 +279,14 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
     messages: merged,
   };
-  if (systemPrompt) params.system = systemPrompt;
+  // Prompt caching for multi-turn: system prompt cached across all turns
+  if (systemPrompt) {
+    params.system = [{
+      type: "text",
+      text: systemPrompt,
+      cache_control: { type: "ephemeral" },
+    }];
+  }
   if (tools && tools.length > 0) {
     params.tools = tools.map(_toAnthropicTool).filter(Boolean);