wolverine-ai 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -450,6 +450,8 @@ Three layers prevent token waste:
450
450
 
451
451
  | Technique | What it does | Cost |
452
452
  |-----------|-------------|------|
453
+ | **Prompt caching** | Anthropic system prompt cached server-side — 90% cheaper on repeat calls | 12-16K tokens saved per heal |
454
+ | **Tool result truncation** | Tool output capped at 4K chars — prevents context blowup from large reads | Up to 30K saved per turn |
453
455
  | **Zero-cost compaction** | Extracts structural signals (tools, files, errors) from history — no LLM call | $0.00 |
454
456
  | **Token estimation** | `text.length / 4` approximation — fast budget checks without tokenizer | 0ms |
455
457
  | **Error-graceful tools** | Tool errors returned as `[ERROR]` results, not thrown — agent decides next step | More resilient |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.0.0",
3
+ "version": "3.0.1",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -548,10 +548,19 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
548
548
  // Post-hook: audit/modify result
549
549
  _runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
550
550
 
551
+ // Tool result truncation: cap at 4K chars to prevent context blowup.
552
+ // One grep_code can return 30K+ chars — the model doesn't need all of it.
553
+ const MAX_TOOL_RESULT = 4000;
554
+ let toolContent = isError ? `[ERROR] ${result.content}` : result.content;
555
+ if (toolContent && toolContent.length > MAX_TOOL_RESULT) {
556
+ const truncated = toolContent.length - MAX_TOOL_RESULT;
557
+ toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n\n... (truncated ${truncated} chars. Use offset/limit for large results.)`;
558
+ }
559
+
551
560
  this.messages.push({
552
561
  role: "tool",
553
562
  tool_call_id: toolCall.id,
554
- content: isError ? `[ERROR] ${result.content}` : result.content,
563
+ content: toolContent,
555
564
  });
556
565
 
557
566
  if (result.done) {
@@ -258,7 +258,7 @@ const SEED_DOCS = [
258
258
  metadata: { topic: "token-protection" },
259
259
  },
260
260
  {
261
- text: "Agent efficiency (claw-code patterns): (1) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
261
+ text: "Agent efficiency (claw-code patterns): (1) Anthropic prompt caching — system prompt marked with cache_control:{type:'ephemeral'}, cached server-side across agent turns, 90% cheaper on repeat calls (12-16K saved tokens per heal). (2) Tool result truncation — capped at 4K chars before entering message history, prevents context blowup from large grep/file reads. (3) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
262
262
  metadata: { topic: "agent-efficiency" },
263
263
  },
264
264
  {
@@ -13,6 +13,8 @@ function _extractTokens(usage) {
13
13
  return {
14
14
  input: usage.prompt_tokens || usage.input_tokens || 0,
15
15
  output: usage.completion_tokens || usage.output_tokens || 0,
16
+ cacheCreation: usage.cache_creation_input_tokens || 0,
17
+ cacheRead: usage.cache_read_input_tokens || 0,
16
18
  };
17
19
  }
18
20
 
@@ -188,9 +190,16 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
188
190
  messages: [{ role: "user", content: userPrompt }],
189
191
  };
190
192
 
191
- if (systemPrompt) params.system = systemPrompt;
193
+ // Prompt caching: mark system prompt for Anthropic's server-side cache.
194
+ // Same system prompt across agent turns gets cached after first call — 90% cheaper.
195
+ if (systemPrompt) {
196
+ params.system = [{
197
+ type: "text",
198
+ text: systemPrompt,
199
+ cache_control: { type: "ephemeral" },
200
+ }];
201
+ }
192
202
 
193
- // Convert OpenAI-style tools to Anthropic format
194
203
  if (tools && tools.length > 0) {
195
204
  params.tools = tools.map(_toAnthropicTool).filter(Boolean);
196
205
  if (toolChoice === "required") params.tool_choice = { type: "any" };
@@ -270,7 +279,14 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
270
279
  messages: merged,
271
280
  };
272
281
 
273
- if (systemPrompt) params.system = systemPrompt;
282
+ // Prompt caching for multi-turn: system prompt cached across all turns
283
+ if (systemPrompt) {
284
+ params.system = [{
285
+ type: "text",
286
+ text: systemPrompt,
287
+ cache_control: { type: "ephemeral" },
288
+ }];
289
+ }
274
290
 
275
291
  if (tools && tools.length > 0) {
276
292
  params.tools = tools.map(_toAnthropicTool).filter(Boolean);