wolverine-ai 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.1.1",
3
+ "version": "3.2.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -20,8 +20,8 @@ function _extractTokens(usage) {
20
20
 
21
21
  function _track(model, category, usage, tool, latencyMs, success) {
22
22
  if (!_tracker) return;
23
- const { input, output } = _extractTokens(usage);
24
- _tracker.record(model, category, input, output, tool, latencyMs, success);
23
+ const { input, output, cacheCreation, cacheRead } = _extractTokens(usage);
24
+ _tracker.record(model, category, input, output, tool, latencyMs, success, cacheCreation, cacheRead);
25
25
  }
26
26
 
27
27
  // ── Client Management ──
@@ -1,105 +1,113 @@
1
1
  /**
2
- * Model Pricing — maps model names to per-million-token costs.
2
+ * Model Pricing — accurate per-million-token costs for all supported models.
3
3
  *
4
- * Users can override in .wolverine/pricing.json. Defaults based on
5
- * OpenAI published pricing as of April 2026.
4
+ * Includes: input, output, cache_write (1.25x input), cache_read (0.1x input)
5
+ * for Anthropic models that support prompt caching.
6
6
  *
7
- * All values are USD per 1 million tokens.
7
+ * Users can override in .wolverine/pricing.json.
8
8
  */
9
9
 
10
10
  const fs = require("fs");
11
11
  const path = require("path");
12
12
 
13
13
  const DEFAULT_PRICING = {
14
- // GPT-5.4 family
15
- "gpt-5.4": { input: 2.50, output: 15.00 },
16
- "gpt-5.4-mini": { input: 0.75, output: 4.50 },
17
- "gpt-5.4-nano": { input: 0.20, output: 1.25 },
18
-
19
- // GPT-5 family (estimated from 5.4 pricing)
20
- "gpt-5-nano": { input: 0.15, output: 1.00 },
14
+ // ── OpenAI GPT-5.x Family ──
15
+ "gpt-5.4": { input: 2.50, output: 15.00 },
16
+ "gpt-5.4-mini": { input: 0.75, output: 4.50 },
17
+ "gpt-5.4-nano": { input: 0.20, output: 1.25 },
18
+ "gpt-5-nano": { input: 0.15, output: 1.00 },
19
+
20
+ // ── OpenAI GPT-4o Family ──
21
+ "gpt-4o": { input: 2.50, output: 10.00 },
22
+ "gpt-4o-mini": { input: 0.15, output: 0.60 },
23
+
24
+ // ── OpenAI O-series Reasoning ──
25
+ "o1": { input: 15.00, output: 60.00 },
26
+ "o1-mini": { input: 3.00, output: 12.00 },
27
+ "o3": { input: 20.00, output: 80.00 },
28
+ "o3-mini": { input: 4.00, output: 16.00 },
29
+ "o4-mini": { input: 1.10, output: 4.40 },
30
+ "o4-mini-deep-research": { input: 2.00, output: 8.00 },
31
+
32
+ // ── OpenAI Codex ──
33
+ "gpt-5.3-codex": { input: 2.50, output: 10.00 },
34
+ "gpt-5.1-codex-mini": { input: 1.50, output: 6.00 },
35
+ "codex-mini-latest": { input: 1.50, output: 6.00 },
36
+
37
+ // ── OpenAI Embeddings ──
38
+ "text-embedding-3-small": { input: 0.02, output: 0.00 },
39
+ "text-embedding-3-large": { input: 0.13, output: 0.00 },
21
40
 
22
- // GPT-4o family
23
- "gpt-4o": { input: 2.50, output: 10.00 },
24
- "gpt-4o-mini": { input: 0.15, output: 0.60 },
41
+ // ── Anthropic Claude 4 Family (with cache pricing) ──
42
+ // cache_write = 1.25x input, cache_read = 0.1x input
43
+ "claude-opus-4": { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
44
+ "claude-sonnet-4": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
45
+ "claude-haiku-4": { input: 0.80, output: 4.00, cache_write: 1.00, cache_read: 0.08 },
25
46
 
26
- // O-series reasoning
27
- "o4-mini": { input: 1.10, output: 4.40 },
28
- "o4-mini-deep-research": { input: 2.00, output: 8.00 },
47
+ // ── Anthropic Claude 3.5 Family ──
48
+ "claude-3-5-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
49
+ "claude-3-5-haiku": { input: 0.80, output: 4.00, cache_write: 1.00, cache_read: 0.08 },
29
50
 
30
- // Codex
31
- "gpt-5.1-codex-mini": { input: 1.50, output: 6.00 },
32
- "codex-mini-latest": { input: 1.50, output: 6.00 },
33
- "gpt-5.3-codex": { input: 2.50, output: 10.00 },
51
+ // ── Anthropic Claude 3 Family ──
52
+ "claude-3-opus": { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
53
+ "claude-3-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
54
+ "claude-3-haiku": { input: 0.25, output: 1.25, cache_write: 0.3125, cache_read: 0.025 },
34
55
 
35
- // Embeddings
36
- "text-embedding-3-small": { input: 0.02, output: 0.00 },
37
- "text-embedding-3-large": { input: 0.13, output: 0.00 },
38
-
39
- // Anthropic Claude family
40
- "claude-opus-4": { input: 15.00, output: 75.00 },
41
- "claude-sonnet-4": { input: 3.00, output: 15.00 },
42
- "claude-haiku-4": { input: 0.80, output: 4.00 },
43
- "claude-3-5-sonnet": { input: 3.00, output: 15.00 },
44
- "claude-3-5-haiku": { input: 0.80, output: 4.00 },
45
- "claude-3-opus": { input: 15.00, output: 75.00 },
46
- "claude-3-sonnet": { input: 3.00, output: 15.00 },
47
- "claude-3-haiku": { input: 0.25, output: 1.25 },
48
-
49
- // Fallback for unknown models
50
- "_default": { input: 1.00, output: 4.00 },
56
+ // ── Fallback ──
57
+ "_default": { input: 1.00, output: 4.00 },
51
58
  };
52
59
 
53
60
  let _customPricing = null;
54
61
 
55
62
  /**
56
- * Get pricing for a model. Checks custom overrides first, then defaults.
57
- * Returns { input, output } in USD per million tokens.
63
+ * Get pricing for a model. Checks custom overrides, then exact match, then prefix match.
64
+ * Returns { input, output, cache_write?, cache_read? } in USD per million tokens.
58
65
  */
59
66
  function getModelPricing(modelName) {
60
- // Check custom pricing
61
- if (_customPricing && _customPricing[modelName]) {
62
- return _customPricing[modelName];
63
- }
67
+ if (_customPricing && _customPricing[modelName]) return _customPricing[modelName];
68
+ if (DEFAULT_PRICING[modelName]) return DEFAULT_PRICING[modelName];
64
69
 
65
- // Check defaults try exact match, then prefix match
66
- if (DEFAULT_PRICING[modelName]) {
67
- return DEFAULT_PRICING[modelName];
68
- }
69
-
70
- // Prefix matching: "gpt-5.4-mini-2026-03" → "gpt-5.4-mini"
70
+ // Prefix matching: "claude-sonnet-4-6" "claude-sonnet-4"
71
71
  for (const [key, val] of Object.entries(DEFAULT_PRICING)) {
72
- if (key !== "_default" && modelName.startsWith(key)) {
73
- return val;
74
- }
72
+ if (key !== "_default" && modelName.startsWith(key)) return val;
75
73
  }
76
-
77
74
  return DEFAULT_PRICING._default;
78
75
  }
79
76
 
80
77
  /**
81
- * Calculate cost in USD for a given model and token counts.
78
+ * Calculate cost in USD including cache tokens.
79
+ *
80
+ * @param {string} modelName
81
+ * @param {number} inputTokens — regular input tokens
82
+ * @param {number} outputTokens — output tokens
83
+ * @param {number} cacheCreationTokens — tokens written to cache (1.25x input price)
84
+ * @param {number} cacheReadTokens — tokens read from cache (0.1x input price)
82
85
  */
83
- function calculateCost(modelName, inputTokens, outputTokens) {
86
+ function calculateCost(modelName, inputTokens, outputTokens, cacheCreationTokens = 0, cacheReadTokens = 0) {
84
87
  const pricing = getModelPricing(modelName);
85
88
  const inputCost = (inputTokens / 1_000_000) * pricing.input;
86
89
  const outputCost = (outputTokens / 1_000_000) * pricing.output;
90
+ const cacheWriteCost = pricing.cache_write
91
+ ? (cacheCreationTokens / 1_000_000) * pricing.cache_write
92
+ : (cacheCreationTokens / 1_000_000) * pricing.input * 1.25;
93
+ const cacheReadCost = pricing.cache_read
94
+ ? (cacheReadTokens / 1_000_000) * pricing.cache_read
95
+ : (cacheReadTokens / 1_000_000) * pricing.input * 0.1;
96
+
87
97
  return {
88
98
  input: inputCost,
89
99
  output: outputCost,
90
- total: inputCost + outputCost,
100
+ cacheWrite: cacheWriteCost,
101
+ cacheRead: cacheReadCost,
102
+ total: inputCost + outputCost + cacheWriteCost + cacheReadCost,
103
+ cacheSavings: cacheReadTokens > 0 ? ((cacheReadTokens / 1_000_000) * (pricing.input - (pricing.cache_read || pricing.input * 0.1))) : 0,
91
104
  };
92
105
  }
93
106
 
94
- /**
95
- * Load custom pricing overrides from .wolverine/pricing.json.
96
- */
97
107
  function loadCustomPricing(projectRoot) {
98
108
  const pricingPath = path.join(projectRoot, ".wolverine", "pricing.json");
99
109
  if (fs.existsSync(pricingPath)) {
100
- try {
101
- _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8"));
102
- } catch {}
110
+ try { _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8")); } catch {}
103
111
  }
104
112
  }
105
113
 
@@ -64,11 +64,11 @@ class TokenTracker {
64
64
  * @param {number} outputTokens - Completion/output tokens
65
65
  * @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
66
66
  */
67
- record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
67
+ record(model, category, inputTokens, outputTokens, tool, latencyMs, success, cacheCreation, cacheRead) {
68
68
  const total = (inputTokens || 0) + (outputTokens || 0);
69
69
 
70
- // Calculate USD cost
71
- const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
70
+ // Calculate USD cost including cache tokens
71
+ const cost = calculateCost(model, inputTokens || 0, outputTokens || 0, cacheCreation || 0, cacheRead || 0);
72
72
 
73
73
  const entry = {
74
74
  timestamp: Date.now(),
@@ -76,21 +76,27 @@ class TokenTracker {
76
76
  category,
77
77
  input: inputTokens || 0,
78
78
  output: outputTokens || 0,
79
+ cacheCreation: cacheCreation || 0,
80
+ cacheRead: cacheRead || 0,
79
81
  total,
80
82
  cost: Math.round(cost.total * 1000000) / 1000000,
83
+ cacheSavings: Math.round((cost.cacheSavings || 0) * 1000000) / 1000000,
81
84
  tool: tool || null,
82
85
  latencyMs: latencyMs || 0,
83
86
  success: success !== false,
84
87
  };
85
88
 
86
89
  // Accumulate by model
87
- if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
90
+ if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
88
91
  const m = this._byModel[model];
89
92
  m.input += entry.input;
90
93
  m.output += entry.output;
91
94
  m.total += total;
92
95
  m.calls++;
93
96
  m.cost += cost.total;
97
+ m.cacheCreation += entry.cacheCreation;
98
+ m.cacheRead += entry.cacheRead;
99
+ m.cacheSavings += entry.cacheSavings;
94
100
  if (entry.success) m.successes++; else m.failures++;
95
101
  if (latencyMs > 0) {
96
102
  m.totalLatencyMs += latencyMs;
@@ -179,7 +185,10 @@ class TokenTracker {
179
185
  total: m.total,
180
186
  calls: m.calls,
181
187
  cost: m.cost,
182
- successes: m.successes || m.calls, // backwards compat
188
+ cacheCreation: m.cacheCreation || 0,
189
+ cacheRead: m.cacheRead || 0,
190
+ cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
191
+ successes: m.successes || m.calls,
183
192
  failures: m.failures || 0,
184
193
  successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
185
194
  avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
@@ -64,8 +64,9 @@ function collectHeartbeat(subsystems) {
64
64
  totalTokens: tokenTracker?._totalTokens || usage?.session?.totalTokens || 0,
65
65
  totalCost: tokenTracker?._totalCostUsd || usage?.session?.totalCostUsd || 0,
66
66
  totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
67
+ totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
67
68
  byCategory: usage?.byCategory || {},
68
- byModel: usage?.byModel || {},
69
+ byModel: usage?.byModel || {}, // includes: latency, successRate, tokensPerSec, cacheSavings per model
69
70
  byTool: usage?.byTool || {},
70
71
  byProvider: _aggregateByProvider(usage?.byModel || {}),
71
72
  },
@@ -91,6 +92,15 @@ function collectHeartbeat(subsystems) {
91
92
  return redactObj(payload);
92
93
  }
93
94
 
95
+ /** Sum cache savings across all models. */
96
+ function _sumCacheSavings(byModel) {
97
+ let total = 0;
98
+ for (const stats of Object.values(byModel || {})) {
99
+ total += stats.cacheSavings || 0;
100
+ }
101
+ return Math.round(total * 1000000) / 1000000;
102
+ }
103
+
94
104
  /**
95
105
  * Aggregate usage by provider (openai vs anthropic) from byModel data.
96
106
  * Any new model/provider automatically flows through — no code changes needed.