wolverine-ai 2.9.1 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/package.json +1 -1
- package/src/agent/agent-engine.js +179 -29
- package/src/brain/brain.js +4 -0
- package/src/core/ai-client.js +19 -3
package/README.md
CHANGED
|
@@ -446,6 +446,27 @@ Three layers prevent token waste:
|
|
|
446
446
|
|
|
447
447
|
---
|
|
448
448
|
|
|
449
|
+
## Agent Efficiency (claw-code patterns)
|
|
450
|
+
|
|
451
|
+
| Technique | What it does | Cost |
|
|
452
|
+
|-----------|-------------|------|
|
|
453
|
+
| **Prompt caching** | Anthropic system prompt cached server-side — 90% cheaper on repeat calls | 12-16K tokens saved per heal |
|
|
454
|
+
| **Tool result truncation** | Tool output capped at 4K chars — prevents context blowup from large reads | Up to 30K saved per turn |
|
|
455
|
+
| **Zero-cost compaction** | Extracts structural signals (tools, files, errors) from history — no LLM call | $0.00 |
|
|
456
|
+
| **Token estimation** | `text.length / 4` approximation — fast budget checks without tokenizer | 0ms |
|
|
457
|
+
| **Error-graceful tools** | Tool errors returned as `[ERROR]` results, not thrown — agent decides next step | More resilient |
|
|
458
|
+
| **Pre/post tool hooks** | Shell commands in `.wolverine/hooks.json` — exit 0=allow, 2=deny | Extensible |
|
|
459
|
+
|
|
460
|
+
**Hook configuration** (`.wolverine/hooks.json`):
|
|
461
|
+
```json
|
|
462
|
+
{
|
|
463
|
+
"pre_tool_use": ["bash -c 'if [ \"$HOOK_TOOL_NAME\" = \"bash_exec\" ]; then exit 2; fi'"],
|
|
464
|
+
"post_tool_use": ["bash -c 'echo \"Tool: $HOOK_TOOL_NAME\" >> /tmp/audit.log'"]
|
|
465
|
+
}
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
---
|
|
469
|
+
|
|
449
470
|
## Cost Optimization
|
|
450
471
|
|
|
451
472
|
Wolverine minimizes AI spend through 7 techniques:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.1",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -463,33 +463,24 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
|
|
|
463
463
|
|
|
464
464
|
console.log(chalk.gray(` 🤖 Agent turn ${this.turnCount}/${this.maxTurns} (${this.totalTokens} tokens used)`));
|
|
465
465
|
|
|
466
|
-
//
|
|
467
|
-
//
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
this.messages[0], // system prompt
|
|
485
|
-
{ role: "assistant", content: `[Prior work summary]\n${compactResult.content}` },
|
|
486
|
-
{ role: "user", content: "Continue from where you left off." },
|
|
487
|
-
...this.messages.slice(-2), // last exchange
|
|
488
|
-
];
|
|
489
|
-
console.log(chalk.gray(` 📦 Compacted ${historyToCompact.length} messages → summary (${compactResult.content.length} chars)`));
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
} catch { /* compacting failed — continue with full context */ }
|
|
466
|
+
// Zero-cost structural compaction (claw-code pattern)
|
|
467
|
+
// Extracts signals from message history WITHOUT an LLM call.
|
|
468
|
+
// Preserves last 4 messages verbatim, summarizes older ones structurally.
|
|
469
|
+
// Triggers when estimated tokens > 10K (text.length / 4 approximation).
|
|
470
|
+
const estimatedTokens = this.messages.reduce((s, m) => s + _estimateTokens(m), 0);
|
|
471
|
+
if (this.messages.length > 6 && estimatedTokens > 10000) {
|
|
472
|
+
const preserveCount = 4; // keep system + last 3 exchanges
|
|
473
|
+
const toCompact = this.messages.slice(1, -preserveCount);
|
|
474
|
+
if (toCompact.length > 2) {
|
|
475
|
+
const summary = _structuralSummary(toCompact, this.filesRead, this.filesModified, this.toolCalls);
|
|
476
|
+
this.messages = [
|
|
477
|
+
this.messages[0], // system prompt
|
|
478
|
+
{ role: "assistant", content: summary },
|
|
479
|
+
{ role: "user", content: "Continue from where you left off." },
|
|
480
|
+
...this.messages.slice(-preserveCount),
|
|
481
|
+
];
|
|
482
|
+
console.log(chalk.gray(` 📦 Compacted ${toCompact.length} messages (${estimatedTokens} → ~${_estimateTokens({ content: summary })} tokens) — $0.00`));
|
|
483
|
+
}
|
|
493
484
|
}
|
|
494
485
|
|
|
495
486
|
let response;
|
|
@@ -533,11 +524,43 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
|
|
|
533
524
|
}
|
|
534
525
|
|
|
535
526
|
for (const toolCall of assistantMessage.tool_calls) {
|
|
536
|
-
|
|
527
|
+
// Error-graceful tool execution (claw-code pattern)
|
|
528
|
+
// Tool errors are returned as is_error results, not thrown.
|
|
529
|
+
// This lets the model see the error and decide how to proceed.
|
|
530
|
+
let result;
|
|
531
|
+
let isError = false;
|
|
532
|
+
try {
|
|
533
|
+
// Pre-hook: check if tool should be blocked
|
|
534
|
+
const hookResult = _runPreHook(toolCall.function?.name, toolCall.function?.arguments, this.cwd);
|
|
535
|
+
if (hookResult.denied) {
|
|
536
|
+
result = { content: `Blocked by hook: ${hookResult.message}` };
|
|
537
|
+
isError = true;
|
|
538
|
+
} else {
|
|
539
|
+
result = await this._executeTool(toolCall);
|
|
540
|
+
}
|
|
541
|
+
} catch (err) {
|
|
542
|
+
// Error-graceful: return error as tool result, don't break the loop
|
|
543
|
+
result = { content: `Tool error: ${err.message?.slice(0, 200)}` };
|
|
544
|
+
isError = true;
|
|
545
|
+
console.log(chalk.yellow(` ⚠️ Tool error (${toolCall.function?.name}): ${err.message?.slice(0, 80)}`));
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
// Post-hook: audit/modify result
|
|
549
|
+
_runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
|
|
550
|
+
|
|
551
|
+
// Tool result truncation: cap at 4K chars to prevent context blowup.
|
|
552
|
+
// One grep_code can return 30K+ chars — the model doesn't need all of it.
|
|
553
|
+
const MAX_TOOL_RESULT = 4000;
|
|
554
|
+
let toolContent = isError ? `[ERROR] ${result.content}` : result.content;
|
|
555
|
+
if (toolContent && toolContent.length > MAX_TOOL_RESULT) {
|
|
556
|
+
const truncated = toolContent.length - MAX_TOOL_RESULT;
|
|
557
|
+
toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n\n... (truncated ${truncated} chars. Use offset/limit for large results.)`;
|
|
558
|
+
}
|
|
559
|
+
|
|
537
560
|
this.messages.push({
|
|
538
561
|
role: "tool",
|
|
539
562
|
tool_call_id: toolCall.id,
|
|
540
|
-
content:
|
|
563
|
+
content: toolContent,
|
|
541
564
|
});
|
|
542
565
|
|
|
543
566
|
if (result.done) {
|
|
@@ -547,6 +570,7 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
|
|
|
547
570
|
filesModified: result.filesModified || this.filesModified,
|
|
548
571
|
turnCount: this.turnCount,
|
|
549
572
|
totalTokens: this.totalTokens,
|
|
573
|
+
toolCalls: this.toolCalls,
|
|
550
574
|
};
|
|
551
575
|
}
|
|
552
576
|
}
|
|
@@ -1090,4 +1114,130 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
|
|
|
1090
1114
|
}
|
|
1091
1115
|
}
|
|
1092
1116
|
|
|
1117
|
+
// ── Zero-Cost Compaction Helpers (claw-code pattern) ──
|
|
1118
|
+
|
|
1119
|
+
/**
|
|
1120
|
+
* Estimate tokens without a tokenizer. Fast approximation: text.length / 4 + 1.
|
|
1121
|
+
* Good enough for budget decisions — off by ~10% which is fine.
|
|
1122
|
+
*/
|
|
1123
|
+
function _estimateTokens(message) {
|
|
1124
|
+
if (!message) return 0;
|
|
1125
|
+
const content = message.content || "";
|
|
1126
|
+
const toolArgs = message.tool_calls?.reduce((s, tc) => s + (tc.function?.arguments?.length || 0), 0) || 0;
|
|
1127
|
+
return Math.ceil((content.length + toolArgs) / 4) + 1;
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
/**
|
|
1131
|
+
* Extract structural signals from message history WITHOUT an LLM call.
|
|
1132
|
+
* Returns a concise summary preserving: tools used, files touched, errors found,
|
|
1133
|
+
* what was tried, and pending work. Costs $0.00.
|
|
1134
|
+
*/
|
|
1135
|
+
function _structuralSummary(messages, filesRead, filesModified, toolCalls) {
|
|
1136
|
+
const toolsUsed = new Set();
|
|
1137
|
+
const filesReferenced = new Set();
|
|
1138
|
+
const errors = [];
|
|
1139
|
+
const userRequests = [];
|
|
1140
|
+
const actions = [];
|
|
1141
|
+
|
|
1142
|
+
for (const msg of messages) {
|
|
1143
|
+
if (msg.role === "user") {
|
|
1144
|
+
const text = (msg.content || "").slice(0, 160);
|
|
1145
|
+
if (text) userRequests.push(text);
|
|
1146
|
+
}
|
|
1147
|
+
if (msg.role === "assistant" && msg.tool_calls) {
|
|
1148
|
+
for (const tc of msg.tool_calls) {
|
|
1149
|
+
toolsUsed.add(tc.function?.name);
|
|
1150
|
+
// Extract file paths from tool args
|
|
1151
|
+
try {
|
|
1152
|
+
const args = JSON.parse(tc.function?.arguments || "{}");
|
|
1153
|
+
if (args.path) filesReferenced.add(args.path);
|
|
1154
|
+
if (args.pattern) filesReferenced.add(args.pattern);
|
|
1155
|
+
} catch {}
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
if (msg.role === "tool") {
|
|
1159
|
+
const content = msg.content || "";
|
|
1160
|
+
if (content.startsWith("[ERROR]") || content.includes("Error:")) {
|
|
1161
|
+
errors.push(content.slice(0, 100));
|
|
1162
|
+
}
|
|
1163
|
+
// Extract file paths from tool results
|
|
1164
|
+
const pathMatches = content.match(/(?:server|src)\/[^\s"']+/g);
|
|
1165
|
+
if (pathMatches) pathMatches.forEach(p => filesReferenced.add(p));
|
|
1166
|
+
}
|
|
1167
|
+
if (msg.role === "assistant" && msg.content) {
|
|
1168
|
+
const text = msg.content.slice(0, 100);
|
|
1169
|
+
if (text && !text.startsWith("[")) actions.push(text);
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
const lines = [
|
|
1174
|
+
"[Compacted conversation summary — $0.00, no LLM call]",
|
|
1175
|
+
`Messages compacted: ${messages.length}`,
|
|
1176
|
+
`Tools used: ${[...toolsUsed].join(", ") || "none"}`,
|
|
1177
|
+
`Files read: ${[...filesRead].slice(0, 10).join(", ") || "none"}`,
|
|
1178
|
+
`Files modified: ${[...filesModified].join(", ") || "none"}`,
|
|
1179
|
+
`Files referenced: ${[...filesReferenced].slice(0, 10).join(", ") || "none"}`,
|
|
1180
|
+
errors.length > 0 ? `Errors encountered: ${errors.slice(0, 3).join("; ")}` : null,
|
|
1181
|
+
userRequests.length > 0 ? `User requests: ${userRequests.slice(-2).join(" | ")}` : null,
|
|
1182
|
+
actions.length > 0 ? `Actions taken: ${actions.slice(-3).join(" | ")}` : null,
|
|
1183
|
+
].filter(Boolean);
|
|
1184
|
+
|
|
1185
|
+
return lines.join("\n");
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
// ── Pre/Post Tool Hooks (claw-code pattern) ──
|
|
1189
|
+
|
|
1190
|
+
/**
|
|
1191
|
+
* Pre-tool hook: check if tool execution should be blocked.
|
|
1192
|
+
* Reads hooks from .wolverine/hooks.json if it exists.
|
|
1193
|
+
* Exit code 0 = allow, 2 = deny.
|
|
1194
|
+
*/
|
|
1195
|
+
function _runPreHook(toolName, toolInput, cwd) {
|
|
1196
|
+
try {
|
|
1197
|
+
const hooksPath = path.join(cwd, ".wolverine", "hooks.json");
|
|
1198
|
+
if (!fs.existsSync(hooksPath)) return { denied: false };
|
|
1199
|
+
const hooks = JSON.parse(fs.readFileSync(hooksPath, "utf-8"));
|
|
1200
|
+
if (!hooks.pre_tool_use || hooks.pre_tool_use.length === 0) return { denied: false };
|
|
1201
|
+
|
|
1202
|
+
for (const cmd of hooks.pre_tool_use) {
|
|
1203
|
+
try {
|
|
1204
|
+
const { execSync } = require("child_process");
|
|
1205
|
+
execSync(cmd, {
|
|
1206
|
+
input: JSON.stringify({ event: "PreToolUse", tool_name: toolName, tool_input: toolInput }),
|
|
1207
|
+
env: { ...process.env, HOOK_TOOL_NAME: toolName || "", HOOK_TOOL_INPUT: (toolInput || "").slice(0, 1000) },
|
|
1208
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1209
|
+
timeout: 5000,
|
|
1210
|
+
});
|
|
1211
|
+
} catch (e) {
|
|
1212
|
+
if (e.status === 2) return { denied: true, message: (e.stdout?.toString() || "Hook denied").trim() };
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
} catch {}
|
|
1216
|
+
return { denied: false };
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
/**
|
|
1220
|
+
* Post-tool hook: audit/log tool execution.
|
|
1221
|
+
*/
|
|
1222
|
+
function _runPostHook(toolName, toolInput, toolOutput, isError, cwd) {
|
|
1223
|
+
try {
|
|
1224
|
+
const hooksPath = path.join(cwd, ".wolverine", "hooks.json");
|
|
1225
|
+
if (!fs.existsSync(hooksPath)) return;
|
|
1226
|
+
const hooks = JSON.parse(fs.readFileSync(hooksPath, "utf-8"));
|
|
1227
|
+
if (!hooks.post_tool_use || hooks.post_tool_use.length === 0) return;
|
|
1228
|
+
|
|
1229
|
+
for (const cmd of hooks.post_tool_use) {
|
|
1230
|
+
try {
|
|
1231
|
+
const { execSync } = require("child_process");
|
|
1232
|
+
execSync(cmd, {
|
|
1233
|
+
input: JSON.stringify({ event: "PostToolUse", tool_name: toolName, tool_input: toolInput, tool_output: (toolOutput || "").slice(0, 500), is_error: isError }),
|
|
1234
|
+
env: { ...process.env, HOOK_TOOL_NAME: toolName || "", HOOK_TOOL_IS_ERROR: isError ? "1" : "0" },
|
|
1235
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1236
|
+
timeout: 5000,
|
|
1237
|
+
});
|
|
1238
|
+
} catch {}
|
|
1239
|
+
}
|
|
1240
|
+
} catch {}
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1093
1243
|
module.exports = { AgentEngine, TOOL_DEFINITIONS, BLOCKED_COMMANDS };
|
package/src/brain/brain.js
CHANGED
|
@@ -257,6 +257,10 @@ const SEED_DOCS = [
|
|
|
257
257
|
text: "Token waste prevention: 3 layers. (1) Empty stderr guard — signal kills with no error output just restart, no AI ($0.00). (2) Loop guard — 3 failed heals on same error → stop and file bug report, no more AI calls. (3) Global rate limit — max 5 heals per 5 minutes regardless of error signature. Idle server burns exactly $0.00 in tokens.",
|
|
258
258
|
metadata: { topic: "token-protection" },
|
|
259
259
|
},
|
|
260
|
+
{
|
|
261
|
+
text: "Agent efficiency (claw-code patterns): (1) Anthropic prompt caching — system prompt marked with cache_control:{type:'ephemeral'}, cached server-side across agent turns, 90% cheaper on repeat calls (12-16K saved tokens per heal). (2) Tool result truncation — capped at 4K chars before entering message history, prevents context blowup from large grep/file reads. (3) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
|
|
262
|
+
metadata: { topic: "agent-efficiency" },
|
|
263
|
+
},
|
|
260
264
|
{
|
|
261
265
|
text: "Cost optimization: 7 techniques reduce heal cost from $0.31 to $0.02 for simple errors. (1) Verifier skips route probe for simple errors (TypeError/ReferenceError/SyntaxError) — trusts syntax+boot, ErrorMonitor is safety net. Prevents false-rejection cascades. (2) Sub-agents use Haiku (classifier model) for explore/plan/verify/research — only fixer uses Sonnet/Opus. 6 Haiku calls=$0.006 vs 6 Sonnet calls=$0.12. (3) Agent context compacted every 3 turns using compacting model — prevents 15K→95K token blowup. (4) Brain checked for cached fix patterns before AI — repeat errors cost $0. (5) Token budgets capped by error complexity: simple=20K agent budget, moderate=50K, complex=100K. Simple errors get 4 agent turns max. (6) Prior attempt summaries (not full context) passed between iterations — concise 'do NOT repeat' directives. (7) Fast path includes last known good backup code so AI can revert broken additions instead of patching around them.",
|
|
262
266
|
metadata: { topic: "cost-optimization" },
|
package/src/core/ai-client.js
CHANGED
|
@@ -13,6 +13,8 @@ function _extractTokens(usage) {
|
|
|
13
13
|
return {
|
|
14
14
|
input: usage.prompt_tokens || usage.input_tokens || 0,
|
|
15
15
|
output: usage.completion_tokens || usage.output_tokens || 0,
|
|
16
|
+
cacheCreation: usage.cache_creation_input_tokens || 0,
|
|
17
|
+
cacheRead: usage.cache_read_input_tokens || 0,
|
|
16
18
|
};
|
|
17
19
|
}
|
|
18
20
|
|
|
@@ -188,9 +190,16 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
|
|
|
188
190
|
messages: [{ role: "user", content: userPrompt }],
|
|
189
191
|
};
|
|
190
192
|
|
|
191
|
-
|
|
193
|
+
// Prompt caching: mark system prompt for Anthropic's server-side cache.
|
|
194
|
+
// Same system prompt across agent turns gets cached after first call — 90% cheaper.
|
|
195
|
+
if (systemPrompt) {
|
|
196
|
+
params.system = [{
|
|
197
|
+
type: "text",
|
|
198
|
+
text: systemPrompt,
|
|
199
|
+
cache_control: { type: "ephemeral" },
|
|
200
|
+
}];
|
|
201
|
+
}
|
|
192
202
|
|
|
193
|
-
// Convert OpenAI-style tools to Anthropic format
|
|
194
203
|
if (tools && tools.length > 0) {
|
|
195
204
|
params.tools = tools.map(_toAnthropicTool).filter(Boolean);
|
|
196
205
|
if (toolChoice === "required") params.tool_choice = { type: "any" };
|
|
@@ -270,7 +279,14 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
|
|
|
270
279
|
messages: merged,
|
|
271
280
|
};
|
|
272
281
|
|
|
273
|
-
|
|
282
|
+
// Prompt caching for multi-turn: system prompt cached across all turns
|
|
283
|
+
if (systemPrompt) {
|
|
284
|
+
params.system = [{
|
|
285
|
+
type: "text",
|
|
286
|
+
text: systemPrompt,
|
|
287
|
+
cache_control: { type: "ephemeral" },
|
|
288
|
+
}];
|
|
289
|
+
}
|
|
274
290
|
|
|
275
291
|
if (tools && tools.length > 0) {
|
|
276
292
|
params.tools = tools.map(_toAnthropicTool).filter(Boolean);
|