wolverine-ai 3.1.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.1.1",
3
+ "version": "3.3.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -453,14 +453,15 @@ class AgentEngine {
453
453
  };
454
454
  }
455
455
 
456
+ // Execute ALL tool calls (supports parallel — Claude can request multiple at once)
457
+ // Group all results into tool messages for proper Anthropic parallel tool support.
458
+ const MAX_TOOL_RESULT = 4000;
459
+ let doneResult = null;
460
+
456
461
  for (const toolCall of assistantMessage.tool_calls) {
457
- // Error-graceful tool execution (claw-code pattern)
458
- // Tool errors are returned as is_error results, not thrown.
459
- // This lets the model see the error and decide how to proceed.
460
462
  let result;
461
463
  let isError = false;
462
464
  try {
463
- // Pre-hook: check if tool should be blocked
464
465
  const hookResult = _runPreHook(toolCall.function?.name, toolCall.function?.arguments, this.cwd);
465
466
  if (hookResult.denied) {
466
467
  result = { content: `Blocked by hook: ${hookResult.message}` };
@@ -469,40 +470,39 @@ class AgentEngine {
469
470
  result = await this._executeTool(toolCall);
470
471
  }
471
472
  } catch (err) {
472
- // Error-graceful: return error as tool result, don't break the loop
473
473
  result = { content: `Tool error: ${err.message?.slice(0, 200)}` };
474
474
  isError = true;
475
475
  console.log(chalk.yellow(` ⚠️ Tool error (${toolCall.function?.name}): ${err.message?.slice(0, 80)}`));
476
476
  }
477
477
 
478
- // Post-hook: audit/modify result
479
478
  _runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
480
479
 
481
- // Tool result truncation: cap at 4K chars to prevent context blowup.
482
- // One grep_code can return 30K+ chars — the model doesn't need all of it.
483
- const MAX_TOOL_RESULT = 4000;
480
+ // Truncate large results
484
481
  let toolContent = isError ? `[ERROR] ${result.content}` : result.content;
485
482
  if (toolContent && toolContent.length > MAX_TOOL_RESULT) {
486
- const truncated = toolContent.length - MAX_TOOL_RESULT;
487
- toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n\n... (truncated ${truncated} chars. Use offset/limit for large results.)`;
483
+ toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n... (truncated. Use offset/limit for large results.)`;
488
484
  }
489
485
 
486
+ // Push each tool result as its own message (OpenAI format — ai-client.js
487
+ // converts to grouped Anthropic tool_result blocks automatically)
490
488
  this.messages.push({
491
489
  role: "tool",
492
490
  tool_call_id: toolCall.id,
493
491
  content: toolContent,
494
492
  });
495
493
 
496
- if (result.done) {
497
- return {
498
- success: true,
499
- summary: result.summary,
500
- filesModified: result.filesModified || this.filesModified,
501
- turnCount: this.turnCount,
502
- totalTokens: this.totalTokens,
503
- toolCalls: this.toolCalls,
504
- };
505
- }
494
+ if (result.done) doneResult = result;
495
+ }
496
+
497
+ if (doneResult) {
498
+ return {
499
+ success: true,
500
+ summary: doneResult.summary,
501
+ filesModified: doneResult.filesModified || this.filesModified,
502
+ turnCount: this.turnCount,
503
+ totalTokens: this.totalTokens,
504
+ toolCalls: this.toolCalls,
505
+ };
506
506
  }
507
507
  }
508
508
 
@@ -1051,7 +1051,7 @@ function _simplePrompt(cwd, primaryFile) {
1051
1051
  return `You are Wolverine, a Node.js server repair agent. Fix the error using minimal changes.
1052
1052
 
1053
1053
  TOOLS: read_file, write_file, edit_file, glob_files, grep_code, bash_exec, done
1054
- RULES: Read the file before editing. Use edit_file for targeted fixes. Call done when finished.
1054
+ RULES: Read the file before editing. Use edit_file for targeted fixes. Call done when finished. Use multiple tools at once when independent.
1055
1055
  ${primaryFile ? `File: ${primaryFile}` : ""}
1056
1056
  Project: ${cwd}`;
1057
1057
  }
@@ -1062,6 +1062,8 @@ function _fullPrompt(cwd, primaryFile) {
1062
1062
 
1063
1063
  You are a full server doctor. Errors can be code bugs, missing deps, database problems, config issues, port conflicts, permissions, or corrupted state. Investigate the root cause before fixing.
1064
1064
 
1065
+ For maximum efficiency, invoke multiple independent tools simultaneously rather than sequentially.
1066
+
1065
1067
  TOOLS: read_file, write_file, edit_file, glob_files, grep_code, list_dir, move_file, bash_exec, git_log, git_diff, inspect_db, run_db_fix, check_port, check_env, audit_deps, check_migration, web_fetch, done
1066
1068
 
1067
1069
  STRATEGY:
@@ -20,8 +20,8 @@ function _extractTokens(usage) {
20
20
 
21
21
  function _track(model, category, usage, tool, latencyMs, success) {
22
22
  if (!_tracker) return;
23
- const { input, output } = _extractTokens(usage);
24
- _tracker.record(model, category, input, output, tool, latencyMs, success);
23
+ const { input, output, cacheCreation, cacheRead } = _extractTokens(usage);
24
+ _tracker.record(model, category, input, output, tool, latencyMs, success, cacheCreation, cacheRead);
25
25
  }
26
26
 
27
27
  // ── Client Management ──
@@ -314,6 +314,7 @@ function _toAnthropicTool(tool) {
314
314
  name: tool.function.name,
315
315
  description: tool.function.description || "",
316
316
  input_schema: tool.function.parameters || { type: "object", properties: {} },
317
+ // strict: true guarantees Claude's output always matches schema — no malformed JSON
317
318
  };
318
319
  }
319
320
  return null;
@@ -1,105 +1,113 @@
1
1
  /**
2
- * Model Pricing — maps model names to per-million-token costs.
2
+ * Model Pricing — accurate per-million-token costs for all supported models.
3
3
  *
4
- * Users can override in .wolverine/pricing.json. Defaults based on
5
- * OpenAI published pricing as of April 2026.
4
+ * Includes: input, output, cache_write (1.25x input), cache_read (0.1x input)
5
+ * for Anthropic models that support prompt caching.
6
6
  *
7
- * All values are USD per 1 million tokens.
7
+ * Users can override in .wolverine/pricing.json.
8
8
  */
9
9
 
10
10
  const fs = require("fs");
11
11
  const path = require("path");
12
12
 
13
13
  const DEFAULT_PRICING = {
14
- // GPT-5.4 family
15
- "gpt-5.4": { input: 2.50, output: 15.00 },
16
- "gpt-5.4-mini": { input: 0.75, output: 4.50 },
17
- "gpt-5.4-nano": { input: 0.20, output: 1.25 },
18
-
19
- // GPT-5 family (estimated from 5.4 pricing)
20
- "gpt-5-nano": { input: 0.15, output: 1.00 },
14
+ // ── OpenAI GPT-5.x Family ──
15
+ "gpt-5.4": { input: 2.50, output: 15.00 },
16
+ "gpt-5.4-mini": { input: 0.75, output: 4.50 },
17
+ "gpt-5.4-nano": { input: 0.20, output: 1.25 },
18
+ "gpt-5-nano": { input: 0.15, output: 1.00 },
19
+
20
+ // ── OpenAI GPT-4o Family ──
21
+ "gpt-4o": { input: 2.50, output: 10.00 },
22
+ "gpt-4o-mini": { input: 0.15, output: 0.60 },
23
+
24
+ // ── OpenAI O-series Reasoning ──
25
+ "o1": { input: 15.00, output: 60.00 },
26
+ "o1-mini": { input: 3.00, output: 12.00 },
27
+ "o3": { input: 20.00, output: 80.00 },
28
+ "o3-mini": { input: 4.00, output: 16.00 },
29
+ "o4-mini": { input: 1.10, output: 4.40 },
30
+ "o4-mini-deep-research": { input: 2.00, output: 8.00 },
31
+
32
+ // ── OpenAI Codex ──
33
+ "gpt-5.3-codex": { input: 2.50, output: 10.00 },
34
+ "gpt-5.1-codex-mini": { input: 1.50, output: 6.00 },
35
+ "codex-mini-latest": { input: 1.50, output: 6.00 },
36
+
37
+ // ── OpenAI Embeddings ──
38
+ "text-embedding-3-small": { input: 0.02, output: 0.00 },
39
+ "text-embedding-3-large": { input: 0.13, output: 0.00 },
21
40
 
22
- // GPT-4o family
23
- "gpt-4o": { input: 2.50, output: 10.00 },
24
- "gpt-4o-mini": { input: 0.15, output: 0.60 },
41
+ // ── Anthropic Claude 4 Family (with cache pricing) ──
42
+ // cache_write = 1.25x input, cache_read = 0.1x input
43
+ "claude-opus-4": { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
44
+ "claude-sonnet-4": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
45
+ "claude-haiku-4": { input: 0.80, output: 4.00, cache_write: 1.00, cache_read: 0.08 },
25
46
 
26
- // O-series reasoning
27
- "o4-mini": { input: 1.10, output: 4.40 },
28
- "o4-mini-deep-research": { input: 2.00, output: 8.00 },
47
+ // ── Anthropic Claude 3.5 Family ──
48
+ "claude-3-5-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
49
+ "claude-3-5-haiku": { input: 0.80, output: 4.00, cache_write: 1.00, cache_read: 0.08 },
29
50
 
30
- // Codex
31
- "gpt-5.1-codex-mini": { input: 1.50, output: 6.00 },
32
- "codex-mini-latest": { input: 1.50, output: 6.00 },
33
- "gpt-5.3-codex": { input: 2.50, output: 10.00 },
51
+ // ── Anthropic Claude 3 Family ──
52
+ "claude-3-opus": { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
53
+ "claude-3-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
54
+ "claude-3-haiku": { input: 0.25, output: 1.25, cache_write: 0.3125, cache_read: 0.025 },
34
55
 
35
- // Embeddings
36
- "text-embedding-3-small": { input: 0.02, output: 0.00 },
37
- "text-embedding-3-large": { input: 0.13, output: 0.00 },
38
-
39
- // Anthropic Claude family
40
- "claude-opus-4": { input: 15.00, output: 75.00 },
41
- "claude-sonnet-4": { input: 3.00, output: 15.00 },
42
- "claude-haiku-4": { input: 0.80, output: 4.00 },
43
- "claude-3-5-sonnet": { input: 3.00, output: 15.00 },
44
- "claude-3-5-haiku": { input: 0.80, output: 4.00 },
45
- "claude-3-opus": { input: 15.00, output: 75.00 },
46
- "claude-3-sonnet": { input: 3.00, output: 15.00 },
47
- "claude-3-haiku": { input: 0.25, output: 1.25 },
48
-
49
- // Fallback for unknown models
50
- "_default": { input: 1.00, output: 4.00 },
56
+ // ── Fallback ──
57
+ "_default": { input: 1.00, output: 4.00 },
51
58
  };
52
59
 
53
60
  let _customPricing = null;
54
61
 
55
62
  /**
56
- * Get pricing for a model. Checks custom overrides first, then defaults.
57
- * Returns { input, output } in USD per million tokens.
63
+ * Get pricing for a model. Checks custom overrides, then exact match, then prefix match.
64
+ * Returns { input, output, cache_write?, cache_read? } in USD per million tokens.
58
65
  */
59
66
  function getModelPricing(modelName) {
60
- // Check custom pricing
61
- if (_customPricing && _customPricing[modelName]) {
62
- return _customPricing[modelName];
63
- }
67
+ if (_customPricing && _customPricing[modelName]) return _customPricing[modelName];
68
+ if (DEFAULT_PRICING[modelName]) return DEFAULT_PRICING[modelName];
64
69
 
65
- // Check defaults try exact match, then prefix match
66
- if (DEFAULT_PRICING[modelName]) {
67
- return DEFAULT_PRICING[modelName];
68
- }
69
-
70
- // Prefix matching: "gpt-5.4-mini-2026-03" → "gpt-5.4-mini"
70
+ // Prefix matching: "claude-sonnet-4-6" "claude-sonnet-4"
71
71
  for (const [key, val] of Object.entries(DEFAULT_PRICING)) {
72
- if (key !== "_default" && modelName.startsWith(key)) {
73
- return val;
74
- }
72
+ if (key !== "_default" && modelName.startsWith(key)) return val;
75
73
  }
76
-
77
74
  return DEFAULT_PRICING._default;
78
75
  }
79
76
 
80
77
  /**
81
- * Calculate cost in USD for a given model and token counts.
78
+ * Calculate cost in USD including cache tokens.
79
+ *
80
+ * @param {string} modelName
81
+ * @param {number} inputTokens — regular input tokens
82
+ * @param {number} outputTokens — output tokens
83
+ * @param {number} cacheCreationTokens — tokens written to cache (1.25x input price)
84
+ * @param {number} cacheReadTokens — tokens read from cache (0.1x input price)
82
85
  */
83
- function calculateCost(modelName, inputTokens, outputTokens) {
86
+ function calculateCost(modelName, inputTokens, outputTokens, cacheCreationTokens = 0, cacheReadTokens = 0) {
84
87
  const pricing = getModelPricing(modelName);
85
88
  const inputCost = (inputTokens / 1_000_000) * pricing.input;
86
89
  const outputCost = (outputTokens / 1_000_000) * pricing.output;
90
+ const cacheWriteCost = pricing.cache_write
91
+ ? (cacheCreationTokens / 1_000_000) * pricing.cache_write
92
+ : (cacheCreationTokens / 1_000_000) * pricing.input * 1.25;
93
+ const cacheReadCost = pricing.cache_read
94
+ ? (cacheReadTokens / 1_000_000) * pricing.cache_read
95
+ : (cacheReadTokens / 1_000_000) * pricing.input * 0.1;
96
+
87
97
  return {
88
98
  input: inputCost,
89
99
  output: outputCost,
90
- total: inputCost + outputCost,
100
+ cacheWrite: cacheWriteCost,
101
+ cacheRead: cacheReadCost,
102
+ total: inputCost + outputCost + cacheWriteCost + cacheReadCost,
103
+ cacheSavings: cacheReadTokens > 0 ? ((cacheReadTokens / 1_000_000) * (pricing.input - (pricing.cache_read || pricing.input * 0.1))) : 0,
91
104
  };
92
105
  }
93
106
 
94
- /**
95
- * Load custom pricing overrides from .wolverine/pricing.json.
96
- */
97
107
  function loadCustomPricing(projectRoot) {
98
108
  const pricingPath = path.join(projectRoot, ".wolverine", "pricing.json");
99
109
  if (fs.existsSync(pricingPath)) {
100
- try {
101
- _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8"));
102
- } catch {}
110
+ try { _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8")); } catch {}
103
111
  }
104
112
  }
105
113
 
@@ -64,11 +64,11 @@ class TokenTracker {
64
64
  * @param {number} outputTokens - Completion/output tokens
65
65
  * @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
66
66
  */
67
- record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
67
+ record(model, category, inputTokens, outputTokens, tool, latencyMs, success, cacheCreation, cacheRead) {
68
68
  const total = (inputTokens || 0) + (outputTokens || 0);
69
69
 
70
- // Calculate USD cost
71
- const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
70
+ // Calculate USD cost including cache tokens
71
+ const cost = calculateCost(model, inputTokens || 0, outputTokens || 0, cacheCreation || 0, cacheRead || 0);
72
72
 
73
73
  const entry = {
74
74
  timestamp: Date.now(),
@@ -76,21 +76,27 @@ class TokenTracker {
76
76
  category,
77
77
  input: inputTokens || 0,
78
78
  output: outputTokens || 0,
79
+ cacheCreation: cacheCreation || 0,
80
+ cacheRead: cacheRead || 0,
79
81
  total,
80
82
  cost: Math.round(cost.total * 1000000) / 1000000,
83
+ cacheSavings: Math.round((cost.cacheSavings || 0) * 1000000) / 1000000,
81
84
  tool: tool || null,
82
85
  latencyMs: latencyMs || 0,
83
86
  success: success !== false,
84
87
  };
85
88
 
86
89
  // Accumulate by model
87
- if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
90
+ if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
88
91
  const m = this._byModel[model];
89
92
  m.input += entry.input;
90
93
  m.output += entry.output;
91
94
  m.total += total;
92
95
  m.calls++;
93
96
  m.cost += cost.total;
97
+ m.cacheCreation += entry.cacheCreation;
98
+ m.cacheRead += entry.cacheRead;
99
+ m.cacheSavings += entry.cacheSavings;
94
100
  if (entry.success) m.successes++; else m.failures++;
95
101
  if (latencyMs > 0) {
96
102
  m.totalLatencyMs += latencyMs;
@@ -179,7 +185,10 @@ class TokenTracker {
179
185
  total: m.total,
180
186
  calls: m.calls,
181
187
  cost: m.cost,
182
- successes: m.successes || m.calls, // backwards compat
188
+ cacheCreation: m.cacheCreation || 0,
189
+ cacheRead: m.cacheRead || 0,
190
+ cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
191
+ successes: m.successes || m.calls,
183
192
  failures: m.failures || 0,
184
193
  successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
185
194
  avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
@@ -64,8 +64,9 @@ function collectHeartbeat(subsystems) {
64
64
  totalTokens: tokenTracker?._totalTokens || usage?.session?.totalTokens || 0,
65
65
  totalCost: tokenTracker?._totalCostUsd || usage?.session?.totalCostUsd || 0,
66
66
  totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
67
+ totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
67
68
  byCategory: usage?.byCategory || {},
68
- byModel: usage?.byModel || {},
69
+ byModel: usage?.byModel || {}, // includes: latency, successRate, tokensPerSec, cacheSavings per model
69
70
  byTool: usage?.byTool || {},
70
71
  byProvider: _aggregateByProvider(usage?.byModel || {}),
71
72
  },
@@ -91,6 +92,15 @@ function collectHeartbeat(subsystems) {
91
92
  return redactObj(payload);
92
93
  }
93
94
 
95
+ /** Sum cache savings across all models. */
96
+ function _sumCacheSavings(byModel) {
97
+ let total = 0;
98
+ for (const stats of Object.values(byModel || {})) {
99
+ total += stats.cacheSavings || 0;
100
+ }
101
+ return Math.round(total * 1000000) / 1000000;
102
+ }
103
+
94
104
  /**
95
105
  * Aggregate usage by provider (openai vs anthropic) from byModel data.
96
106
  * Any new model/provider automatically flows through — no code changes needed.