wolverine-ai 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/package.json +1 -1
- package/src/agent/agent-engine.js +10 -1
- package/src/brain/brain.js +1 -1
- package/src/core/ai-client.js +19 -3
package/README.md
CHANGED
|
@@ -450,6 +450,8 @@ Three layers prevent token waste:
|
|
|
450
450
|
|
|
451
451
|
| Technique | What it does | Cost |
|
|
452
452
|
|-----------|-------------|------|
|
|
453
|
+
| **Prompt caching** | Anthropic system prompt cached server-side — 90% cheaper on repeat calls | 12-16K tokens saved per heal |
|
|
454
|
+
| **Tool result truncation** | Tool output capped at 4K chars — prevents context blowup from large reads | Up to 30K saved per turn |
|
|
453
455
|
| **Zero-cost compaction** | Extracts structural signals (tools, files, errors) from history — no LLM call | $0.00 |
|
|
454
456
|
| **Token estimation** | `text.length / 4` approximation — fast budget checks without tokenizer | 0ms |
|
|
455
457
|
| **Error-graceful tools** | Tool errors returned as `[ERROR]` results, not thrown — agent decides next step | More resilient |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.1",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -548,10 +548,19 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
|
|
|
548
548
|
// Post-hook: audit/modify result
|
|
549
549
|
_runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
|
|
550
550
|
|
|
551
|
+
// Tool result truncation: cap at 4K chars to prevent context blowup.
|
|
552
|
+
// One grep_code can return 30K+ chars — the model doesn't need all of it.
|
|
553
|
+
const MAX_TOOL_RESULT = 4000;
|
|
554
|
+
let toolContent = isError ? `[ERROR] ${result.content}` : result.content;
|
|
555
|
+
if (toolContent && toolContent.length > MAX_TOOL_RESULT) {
|
|
556
|
+
const truncated = toolContent.length - MAX_TOOL_RESULT;
|
|
557
|
+
toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n\n... (truncated ${truncated} chars. Use offset/limit for large results.)`;
|
|
558
|
+
}
|
|
559
|
+
|
|
551
560
|
this.messages.push({
|
|
552
561
|
role: "tool",
|
|
553
562
|
tool_call_id: toolCall.id,
|
|
554
|
-
content:
|
|
563
|
+
content: toolContent,
|
|
555
564
|
});
|
|
556
565
|
|
|
557
566
|
if (result.done) {
|
package/src/brain/brain.js
CHANGED
|
@@ -258,7 +258,7 @@ const SEED_DOCS = [
|
|
|
258
258
|
metadata: { topic: "token-protection" },
|
|
259
259
|
},
|
|
260
260
|
{
|
|
261
|
-
text: "Agent efficiency (claw-code patterns): (1) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
|
|
261
|
+
text: "Agent efficiency (claw-code patterns): (1) Anthropic prompt caching — system prompt marked with cache_control:{type:'ephemeral'}, cached server-side across agent turns, 90% cheaper on repeat calls (12-16K saved tokens per heal). (2) Tool result truncation — capped at 4K chars before entering message history, prevents context blowup from large grep/file reads. (3) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
|
|
262
262
|
metadata: { topic: "agent-efficiency" },
|
|
263
263
|
},
|
|
264
264
|
{
|
package/src/core/ai-client.js
CHANGED
|
@@ -13,6 +13,8 @@ function _extractTokens(usage) {
|
|
|
13
13
|
return {
|
|
14
14
|
input: usage.prompt_tokens || usage.input_tokens || 0,
|
|
15
15
|
output: usage.completion_tokens || usage.output_tokens || 0,
|
|
16
|
+
cacheCreation: usage.cache_creation_input_tokens || 0,
|
|
17
|
+
cacheRead: usage.cache_read_input_tokens || 0,
|
|
16
18
|
};
|
|
17
19
|
}
|
|
18
20
|
|
|
@@ -188,9 +190,16 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
|
|
|
188
190
|
messages: [{ role: "user", content: userPrompt }],
|
|
189
191
|
};
|
|
190
192
|
|
|
191
|
-
|
|
193
|
+
// Prompt caching: mark system prompt for Anthropic's server-side cache.
|
|
194
|
+
// Same system prompt across agent turns gets cached after first call — 90% cheaper.
|
|
195
|
+
if (systemPrompt) {
|
|
196
|
+
params.system = [{
|
|
197
|
+
type: "text",
|
|
198
|
+
text: systemPrompt,
|
|
199
|
+
cache_control: { type: "ephemeral" },
|
|
200
|
+
}];
|
|
201
|
+
}
|
|
192
202
|
|
|
193
|
-
// Convert OpenAI-style tools to Anthropic format
|
|
194
203
|
if (tools && tools.length > 0) {
|
|
195
204
|
params.tools = tools.map(_toAnthropicTool).filter(Boolean);
|
|
196
205
|
if (toolChoice === "required") params.tool_choice = { type: "any" };
|
|
@@ -270,7 +279,14 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
|
|
|
270
279
|
messages: merged,
|
|
271
280
|
};
|
|
272
281
|
|
|
273
|
-
|
|
282
|
+
// Prompt caching for multi-turn: system prompt cached across all turns
|
|
283
|
+
if (systemPrompt) {
|
|
284
|
+
params.system = [{
|
|
285
|
+
type: "text",
|
|
286
|
+
text: systemPrompt,
|
|
287
|
+
cache_control: { type: "ephemeral" },
|
|
288
|
+
}];
|
|
289
|
+
}
|
|
274
290
|
|
|
275
291
|
if (tools && tools.length > 0) {
|
|
276
292
|
params.tools = tools.map(_toAnthropicTool).filter(Boolean);
|