wolverine-ai 2.9.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -358,11 +358,12 @@ High-performance vector database that grows without slowing down:
358
358
 
359
359
  **Search performance** (scales gracefully):
360
360
 
361
- | Entries | Semantic Search | Keyword (BM25) |
362
- |---------|----------------|----------------|
363
- | 100 | 0.2ms | 0.005ms |
364
- | 1,000 | 0.4ms | 0.01ms |
365
- | 10,000 | 4.4ms | 0.1ms |
361
+ | Entries | Semantic Search | Keyword (BM25) | Clusters |
362
+ |---------|----------------|----------------|----------|
363
+ | 100 | 0.2ms | 0.005ms | 10 |
364
+ | 1,000 | 0.4ms | 0.01ms | 32 |
365
+ | 10,000 | 4.4ms | 0.1ms | 100 |
366
+ | 50,000 | 23.7ms | 0.5ms | 224 |
366
367
 
367
368
  **4 optimization techniques:**
368
369
  1. **Pre-normalized vectors** — cosine similarity = dot product (no sqrt per query)
@@ -445,6 +446,25 @@ Three layers prevent token waste:
445
446
 
446
447
  ---
447
448
 
449
+ ## Agent Efficiency (claw-code patterns)
450
+
451
+ | Technique | What it does | Cost |
452
+ |-----------|-------------|------|
453
+ | **Zero-cost compaction** | Extracts structural signals (tools, files, errors) from history — no LLM call | $0.00 |
454
+ | **Token estimation** | `text.length / 4` approximation — fast budget checks without tokenizer | 0ms |
455
+ | **Error-graceful tools** | Tool errors returned as `[ERROR]` results, not thrown — agent decides next step | More resilient |
456
+ | **Pre/post tool hooks** | Shell commands in `.wolverine/hooks.json` — exit 0=allow, 2=deny | Extensible |
457
+
458
+ **Hook configuration** (`.wolverine/hooks.json`):
459
+ ```json
460
+ {
461
+ "pre_tool_use": ["bash -c 'if [ \"$HOOK_TOOL_NAME\" = \"bash_exec\" ]; then exit 2; fi'"],
462
+ "post_tool_use": ["bash -c 'echo \"Tool: $HOOK_TOOL_NAME\" >> /tmp/audit.log'"]
463
+ }
464
+ ```
465
+
466
+ ---
467
+
448
468
  ## Cost Optimization
449
469
 
450
470
  Wolverine minimizes AI spend through 7 techniques:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "2.9.0",
3
+ "version": "3.0.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -463,33 +463,24 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
463
463
 
464
464
  console.log(chalk.gray(` 🤖 Agent turn ${this.turnCount}/${this.maxTurns} (${this.totalTokens} tokens used)`));
465
465
 
466
- // Compact context every 3 turns to prevent token blowup
467
- // Turn 6 without compacting: ~95K tokens. With compacting: ~20K tokens.
468
- if (this.turnCount > 1 && this.turnCount % 3 === 0 && this.messages.length > 4) {
469
- try {
470
- const { aiCall } = require("./ai-client") || require("../core/ai-client");
471
- const { getModel: _gm } = require("./models") || require("../core/models");
472
- const historyToCompact = this.messages.slice(1, -2); // keep system + last exchange
473
- if (historyToCompact.length > 2) {
474
- const historyText = historyToCompact.map(m => `${m.role}: ${(m.content || "").slice(0, 500)}`).join("\n");
475
- const compactResult = await aiCall({
476
- model: _gm("compacting"),
477
- systemPrompt: "Summarize this agent conversation history into a concise status report. Keep: files read, changes made, errors found, what was tried. Remove: full file contents, redundant tool results.",
478
- userPrompt: historyText.slice(0, 8000),
479
- maxTokens: 512,
480
- category: "brain",
481
- });
482
- if (compactResult.content) {
483
- this.messages = [
484
- this.messages[0], // system prompt
485
- { role: "assistant", content: `[Prior work summary]\n${compactResult.content}` },
486
- { role: "user", content: "Continue from where you left off." },
487
- ...this.messages.slice(-2), // last exchange
488
- ];
489
- console.log(chalk.gray(` 📦 Compacted ${historyToCompact.length} messages → summary (${compactResult.content.length} chars)`));
490
- }
491
- }
492
- } catch { /* compacting failed — continue with full context */ }
466
+ // Zero-cost structural compaction (claw-code pattern)
467
+ // Extracts signals from message history WITHOUT an LLM call.
468
+ // Preserves last 4 messages verbatim, summarizes older ones structurally.
469
+ // Triggers when estimated tokens > 10K (text.length / 4 approximation).
470
+ const estimatedTokens = this.messages.reduce((s, m) => s + _estimateTokens(m), 0);
471
+ if (this.messages.length > 6 && estimatedTokens > 10000) {
472
+ const preserveCount = 4; // keep system + last 3 exchanges
473
+ const toCompact = this.messages.slice(1, -preserveCount);
474
+ if (toCompact.length > 2) {
475
+ const summary = _structuralSummary(toCompact, this.filesRead, this.filesModified, this.toolCalls);
476
+ this.messages = [
477
+ this.messages[0], // system prompt
478
+ { role: "assistant", content: summary },
479
+ { role: "user", content: "Continue from where you left off." },
480
+ ...this.messages.slice(-preserveCount),
481
+ ];
482
+ console.log(chalk.gray(` 📦 Compacted ${toCompact.length} messages (${estimatedTokens} → ~${_estimateTokens({ content: summary })} tokens) — $0.00`));
483
+ }
493
484
  }
494
485
 
495
486
  let response;
@@ -533,11 +524,34 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
533
524
  }
534
525
 
535
526
  for (const toolCall of assistantMessage.tool_calls) {
536
- const result = await this._executeTool(toolCall);
527
+ // Error-graceful tool execution (claw-code pattern)
528
+ // Tool errors are returned as is_error results, not thrown.
529
+ // This lets the model see the error and decide how to proceed.
530
+ let result;
531
+ let isError = false;
532
+ try {
533
+ // Pre-hook: check if tool should be blocked
534
+ const hookResult = _runPreHook(toolCall.function?.name, toolCall.function?.arguments, this.cwd);
535
+ if (hookResult.denied) {
536
+ result = { content: `Blocked by hook: ${hookResult.message}` };
537
+ isError = true;
538
+ } else {
539
+ result = await this._executeTool(toolCall);
540
+ }
541
+ } catch (err) {
542
+ // Error-graceful: return error as tool result, don't break the loop
543
+ result = { content: `Tool error: ${err.message?.slice(0, 200)}` };
544
+ isError = true;
545
+ console.log(chalk.yellow(` ⚠️ Tool error (${toolCall.function?.name}): ${err.message?.slice(0, 80)}`));
546
+ }
547
+
548
+ // Post-hook: audit/modify result
549
+ _runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
550
+
537
551
  this.messages.push({
538
552
  role: "tool",
539
553
  tool_call_id: toolCall.id,
540
- content: result.content,
554
+ content: isError ? `[ERROR] ${result.content}` : result.content,
541
555
  });
542
556
 
543
557
  if (result.done) {
@@ -547,6 +561,7 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
547
561
  filesModified: result.filesModified || this.filesModified,
548
562
  turnCount: this.turnCount,
549
563
  totalTokens: this.totalTokens,
564
+ toolCalls: this.toolCalls,
550
565
  };
551
566
  }
552
567
  }
@@ -1090,4 +1105,130 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
1090
1105
  }
1091
1106
  }
1092
1107
 
1108
+ // ── Zero-Cost Compaction Helpers (claw-code pattern) ──
1109
+
1110
+ /**
1111
+ * Estimate tokens without a tokenizer. Fast approximation: text.length / 4 + 1.
1112
+ * Good enough for budget decisions — off by ~10% which is fine.
1113
+ */
1114
+ function _estimateTokens(message) {
1115
+ if (!message) return 0;
1116
+ const content = message.content || "";
1117
+ const toolArgs = message.tool_calls?.reduce((s, tc) => s + (tc.function?.arguments?.length || 0), 0) || 0;
1118
+ return Math.ceil((content.length + toolArgs) / 4) + 1;
1119
+ }
1120
+
1121
+ /**
1122
+ * Extract structural signals from message history WITHOUT an LLM call.
1123
+ * Returns a concise summary preserving: tools used, files touched, errors found,
1124
+ * what was tried, and pending work. Costs $0.00.
1125
+ */
1126
+ function _structuralSummary(messages, filesRead, filesModified, toolCalls) {
1127
+ const toolsUsed = new Set();
1128
+ const filesReferenced = new Set();
1129
+ const errors = [];
1130
+ const userRequests = [];
1131
+ const actions = [];
1132
+
1133
+ for (const msg of messages) {
1134
+ if (msg.role === "user") {
1135
+ const text = (msg.content || "").slice(0, 160);
1136
+ if (text) userRequests.push(text);
1137
+ }
1138
+ if (msg.role === "assistant" && msg.tool_calls) {
1139
+ for (const tc of msg.tool_calls) {
1140
+ toolsUsed.add(tc.function?.name);
1141
+ // Extract file paths from tool args
1142
+ try {
1143
+ const args = JSON.parse(tc.function?.arguments || "{}");
1144
+ if (args.path) filesReferenced.add(args.path);
1145
+ if (args.pattern) filesReferenced.add(args.pattern);
1146
+ } catch {}
1147
+ }
1148
+ }
1149
+ if (msg.role === "tool") {
1150
+ const content = msg.content || "";
1151
+ if (content.startsWith("[ERROR]") || content.includes("Error:")) {
1152
+ errors.push(content.slice(0, 100));
1153
+ }
1154
+ // Extract file paths from tool results
1155
+ const pathMatches = content.match(/(?:server|src)\/[^\s"']+/g);
1156
+ if (pathMatches) pathMatches.forEach(p => filesReferenced.add(p));
1157
+ }
1158
+ if (msg.role === "assistant" && msg.content) {
1159
+ const text = msg.content.slice(0, 100);
1160
+ if (text && !text.startsWith("[")) actions.push(text);
1161
+ }
1162
+ }
1163
+
1164
+ const lines = [
1165
+ "[Compacted conversation summary — $0.00, no LLM call]",
1166
+ `Messages compacted: ${messages.length}`,
1167
+ `Tools used: ${[...toolsUsed].join(", ") || "none"}`,
1168
+ `Files read: ${[...filesRead].slice(0, 10).join(", ") || "none"}`,
1169
+ `Files modified: ${[...filesModified].join(", ") || "none"}`,
1170
+ `Files referenced: ${[...filesReferenced].slice(0, 10).join(", ") || "none"}`,
1171
+ errors.length > 0 ? `Errors encountered: ${errors.slice(0, 3).join("; ")}` : null,
1172
+ userRequests.length > 0 ? `User requests: ${userRequests.slice(-2).join(" | ")}` : null,
1173
+ actions.length > 0 ? `Actions taken: ${actions.slice(-3).join(" | ")}` : null,
1174
+ ].filter(Boolean);
1175
+
1176
+ return lines.join("\n");
1177
+ }
1178
+
1179
+ // ── Pre/Post Tool Hooks (claw-code pattern) ──
1180
+
1181
+ /**
1182
+ * Pre-tool hook: check if tool execution should be blocked.
1183
+ * Reads hooks from .wolverine/hooks.json if it exists.
1184
+ * Exit code 0 = allow, 2 = deny.
1185
+ */
1186
+ function _runPreHook(toolName, toolInput, cwd) {
1187
+ try {
1188
+ const hooksPath = path.join(cwd, ".wolverine", "hooks.json");
1189
+ if (!fs.existsSync(hooksPath)) return { denied: false };
1190
+ const hooks = JSON.parse(fs.readFileSync(hooksPath, "utf-8"));
1191
+ if (!hooks.pre_tool_use || hooks.pre_tool_use.length === 0) return { denied: false };
1192
+
1193
+ for (const cmd of hooks.pre_tool_use) {
1194
+ try {
1195
+ const { execSync } = require("child_process");
1196
+ execSync(cmd, {
1197
+ input: JSON.stringify({ event: "PreToolUse", tool_name: toolName, tool_input: toolInput }),
1198
+ env: { ...process.env, HOOK_TOOL_NAME: toolName || "", HOOK_TOOL_INPUT: (toolInput || "").slice(0, 1000) },
1199
+ stdio: ["pipe", "pipe", "pipe"],
1200
+ timeout: 5000,
1201
+ });
1202
+ } catch (e) {
1203
+ if (e.status === 2) return { denied: true, message: (e.stdout?.toString() || "Hook denied").trim() };
1204
+ }
1205
+ }
1206
+ } catch {}
1207
+ return { denied: false };
1208
+ }
1209
+
1210
+ /**
1211
+ * Post-tool hook: audit/log tool execution.
1212
+ */
1213
+ function _runPostHook(toolName, toolInput, toolOutput, isError, cwd) {
1214
+ try {
1215
+ const hooksPath = path.join(cwd, ".wolverine", "hooks.json");
1216
+ if (!fs.existsSync(hooksPath)) return;
1217
+ const hooks = JSON.parse(fs.readFileSync(hooksPath, "utf-8"));
1218
+ if (!hooks.post_tool_use || hooks.post_tool_use.length === 0) return;
1219
+
1220
+ for (const cmd of hooks.post_tool_use) {
1221
+ try {
1222
+ const { execSync } = require("child_process");
1223
+ execSync(cmd, {
1224
+ input: JSON.stringify({ event: "PostToolUse", tool_name: toolName, tool_input: toolInput, tool_output: (toolOutput || "").slice(0, 500), is_error: isError }),
1225
+ env: { ...process.env, HOOK_TOOL_NAME: toolName || "", HOOK_TOOL_IS_ERROR: isError ? "1" : "0" },
1226
+ stdio: ["pipe", "pipe", "pipe"],
1227
+ timeout: 5000,
1228
+ });
1229
+ } catch {}
1230
+ }
1231
+ } catch {}
1232
+ }
1233
+
1093
1234
  module.exports = { AgentEngine, TOOL_DEFINITIONS, BLOCKED_COMMANDS };
@@ -54,7 +54,7 @@ const SEED_DOCS = [
54
54
  metadata: { topic: "perf-monitoring" },
55
55
  },
56
56
  {
57
- text: "Wolverine brain: high-performance vector database for long-term memory. 4 search optimizations: (1) Pre-normalized vectors — cosine similarity = dot product (no sqrt), 7x faster. (2) IVF index — vectors clustered into √N buckets via k-means++, search probes nearest 20% of clusters only. 10K entries: 4ms instead of 31ms. (3) BM25 keyword search — proper inverted index with TF-IDF scoring, O(query_tokens) not O(N). (4) Binary persistence — Float32Array buffers, 10x faster load than JSON. Grows gracefully: 100=0.2ms, 1K=0.4ms, 5K=2ms, 10K=4ms. Stores: function maps, errors, fixes, learnings, seed docs. Persisted to .wolverine/brain/.",
57
+ text: "Wolverine brain: high-performance vector database for long-term memory. 4 search optimizations: (1) Pre-normalized vectors — cosine similarity = dot product (no sqrt), 7x faster. (2) IVF index — k-means++ clustering into √N buckets (10 at 100 entries, 100 at 10K, 224 at 50K), search probes nearest 20% of clusters. (3) BM25 keyword search — inverted index with TF-IDF scoring, O(query_tokens) not O(N). (4) Binary persistence — Float32Array buffers, 10x faster load. Benchmarks: 100=0.2ms, 1K=0.4ms, 5K=2ms, 10K=4.4ms, 50K=23.7ms (was 160ms brute force). Stores: function maps, errors, fixes, learnings, seed docs. New seeds merged on framework update without erasing existing memories.",
58
58
  metadata: { topic: "brain" },
59
59
  },
60
60
  {
@@ -257,6 +257,10 @@ const SEED_DOCS = [
257
257
  text: "Token waste prevention: 3 layers. (1) Empty stderr guard — signal kills with no error output just restart, no AI ($0.00). (2) Loop guard — 3 failed heals on same error → stop and file bug report, no more AI calls. (3) Global rate limit — max 5 heals per 5 minutes regardless of error signature. Idle server burns exactly $0.00 in tokens.",
258
258
  metadata: { topic: "token-protection" },
259
259
  },
260
+ {
261
+ text: "Agent efficiency (claw-code patterns): (1) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
262
+ metadata: { topic: "agent-efficiency" },
263
+ },
260
264
  {
261
265
  text: "Cost optimization: 7 techniques reduce heal cost from $0.31 to $0.02 for simple errors. (1) Verifier skips route probe for simple errors (TypeError/ReferenceError/SyntaxError) — trusts syntax+boot, ErrorMonitor is safety net. Prevents false-rejection cascades. (2) Sub-agents use Haiku (classifier model) for explore/plan/verify/research — only fixer uses Sonnet/Opus. 6 Haiku calls=$0.006 vs 6 Sonnet calls=$0.12. (3) Agent context compacted every 3 turns using compacting model — prevents 15K→95K token blowup. (4) Brain checked for cached fix patterns before AI — repeat errors cost $0. (5) Token budgets capped by error complexity: simple=20K agent budget, moderate=50K, complex=100K. Simple errors get 4 agent turns max. (6) Prior attempt summaries (not full context) passed between iterations — concise 'do NOT repeat' directives. (7) Fast path includes last known good backup code so AI can revert broken additions instead of patching around them.",
262
266
  metadata: { topic: "cost-optimization" },