wolverine-ai 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/ai-client.js +2 -2
- package/src/logger/pricing.js +70 -62
- package/src/logger/token-tracker.js +14 -5
- package/src/platform/telemetry.js +11 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/core/ai-client.js
CHANGED
|
@@ -20,8 +20,8 @@ function _extractTokens(usage) {
|
|
|
20
20
|
|
|
21
21
|
function _track(model, category, usage, tool, latencyMs, success) {
|
|
22
22
|
if (!_tracker) return;
|
|
23
|
-
const { input, output } = _extractTokens(usage);
|
|
24
|
-
_tracker.record(model, category, input, output, tool, latencyMs, success);
|
|
23
|
+
const { input, output, cacheCreation, cacheRead } = _extractTokens(usage);
|
|
24
|
+
_tracker.record(model, category, input, output, tool, latencyMs, success, cacheCreation, cacheRead);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
// ── Client Management ──
|
package/src/logger/pricing.js
CHANGED
|
@@ -1,105 +1,113 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Model Pricing —
|
|
2
|
+
* Model Pricing — accurate per-million-token costs for all supported models.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Includes: input, output, cache_write (1.25x input), cache_read (0.1x input)
|
|
5
|
+
* for Anthropic models that support prompt caching.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
7
|
+
* Users can override in .wolverine/pricing.json.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
const fs = require("fs");
|
|
11
11
|
const path = require("path");
|
|
12
12
|
|
|
13
13
|
const DEFAULT_PRICING = {
|
|
14
|
-
// GPT-5.
|
|
15
|
-
"gpt-5.4":
|
|
16
|
-
"gpt-5.4-mini":
|
|
17
|
-
"gpt-5.4-nano":
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
14
|
+
// ── OpenAI GPT-5.x Family ──
|
|
15
|
+
"gpt-5.4": { input: 2.50, output: 15.00 },
|
|
16
|
+
"gpt-5.4-mini": { input: 0.75, output: 4.50 },
|
|
17
|
+
"gpt-5.4-nano": { input: 0.20, output: 1.25 },
|
|
18
|
+
"gpt-5-nano": { input: 0.15, output: 1.00 },
|
|
19
|
+
|
|
20
|
+
// ── OpenAI GPT-4o Family ──
|
|
21
|
+
"gpt-4o": { input: 2.50, output: 10.00 },
|
|
22
|
+
"gpt-4o-mini": { input: 0.15, output: 0.60 },
|
|
23
|
+
|
|
24
|
+
// ── OpenAI O-series Reasoning ──
|
|
25
|
+
"o1": { input: 15.00, output: 60.00 },
|
|
26
|
+
"o1-mini": { input: 3.00, output: 12.00 },
|
|
27
|
+
"o3": { input: 20.00, output: 80.00 },
|
|
28
|
+
"o3-mini": { input: 4.00, output: 16.00 },
|
|
29
|
+
"o4-mini": { input: 1.10, output: 4.40 },
|
|
30
|
+
"o4-mini-deep-research": { input: 2.00, output: 8.00 },
|
|
31
|
+
|
|
32
|
+
// ── OpenAI Codex ──
|
|
33
|
+
"gpt-5.3-codex": { input: 2.50, output: 10.00 },
|
|
34
|
+
"gpt-5.1-codex-mini": { input: 1.50, output: 6.00 },
|
|
35
|
+
"codex-mini-latest": { input: 1.50, output: 6.00 },
|
|
36
|
+
|
|
37
|
+
// ── OpenAI Embeddings ──
|
|
38
|
+
"text-embedding-3-small": { input: 0.02, output: 0.00 },
|
|
39
|
+
"text-embedding-3-large": { input: 0.13, output: 0.00 },
|
|
21
40
|
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
"
|
|
41
|
+
// ── Anthropic Claude 4 Family (with cache pricing) ──
|
|
42
|
+
// cache_write = 1.25x input, cache_read = 0.1x input
|
|
43
|
+
"claude-opus-4": { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
|
|
44
|
+
"claude-sonnet-4": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
|
|
45
|
+
"claude-haiku-4": { input: 0.80, output: 4.00, cache_write: 1.00, cache_read: 0.08 },
|
|
25
46
|
|
|
26
|
-
//
|
|
27
|
-
"
|
|
28
|
-
"
|
|
47
|
+
// ── Anthropic Claude 3.5 Family ──
|
|
48
|
+
"claude-3-5-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
|
|
49
|
+
"claude-3-5-haiku": { input: 0.80, output: 4.00, cache_write: 1.00, cache_read: 0.08 },
|
|
29
50
|
|
|
30
|
-
//
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
51
|
+
// ── Anthropic Claude 3 Family ──
|
|
52
|
+
"claude-3-opus": { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
|
|
53
|
+
"claude-3-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
|
|
54
|
+
"claude-3-haiku": { input: 0.25, output: 1.25, cache_write: 0.3125, cache_read: 0.025 },
|
|
34
55
|
|
|
35
|
-
//
|
|
36
|
-
"
|
|
37
|
-
"text-embedding-3-large": { input: 0.13, output: 0.00 },
|
|
38
|
-
|
|
39
|
-
// Anthropic Claude family
|
|
40
|
-
"claude-opus-4": { input: 15.00, output: 75.00 },
|
|
41
|
-
"claude-sonnet-4": { input: 3.00, output: 15.00 },
|
|
42
|
-
"claude-haiku-4": { input: 0.80, output: 4.00 },
|
|
43
|
-
"claude-3-5-sonnet": { input: 3.00, output: 15.00 },
|
|
44
|
-
"claude-3-5-haiku": { input: 0.80, output: 4.00 },
|
|
45
|
-
"claude-3-opus": { input: 15.00, output: 75.00 },
|
|
46
|
-
"claude-3-sonnet": { input: 3.00, output: 15.00 },
|
|
47
|
-
"claude-3-haiku": { input: 0.25, output: 1.25 },
|
|
48
|
-
|
|
49
|
-
// Fallback for unknown models
|
|
50
|
-
"_default": { input: 1.00, output: 4.00 },
|
|
56
|
+
// ── Fallback ──
|
|
57
|
+
"_default": { input: 1.00, output: 4.00 },
|
|
51
58
|
};
|
|
52
59
|
|
|
53
60
|
let _customPricing = null;
|
|
54
61
|
|
|
55
62
|
/**
|
|
56
|
-
* Get pricing for a model. Checks custom overrides
|
|
57
|
-
* Returns { input, output } in USD per million tokens.
|
|
63
|
+
* Get pricing for a model. Checks custom overrides, then exact match, then prefix match.
|
|
64
|
+
* Returns { input, output, cache_write?, cache_read? } in USD per million tokens.
|
|
58
65
|
*/
|
|
59
66
|
function getModelPricing(modelName) {
|
|
60
|
-
|
|
61
|
-
if (
|
|
62
|
-
return _customPricing[modelName];
|
|
63
|
-
}
|
|
67
|
+
if (_customPricing && _customPricing[modelName]) return _customPricing[modelName];
|
|
68
|
+
if (DEFAULT_PRICING[modelName]) return DEFAULT_PRICING[modelName];
|
|
64
69
|
|
|
65
|
-
//
|
|
66
|
-
if (DEFAULT_PRICING[modelName]) {
|
|
67
|
-
return DEFAULT_PRICING[modelName];
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// Prefix matching: "gpt-5.4-mini-2026-03" → "gpt-5.4-mini"
|
|
70
|
+
// Prefix matching: "claude-sonnet-4-6" → "claude-sonnet-4"
|
|
71
71
|
for (const [key, val] of Object.entries(DEFAULT_PRICING)) {
|
|
72
|
-
if (key !== "_default" && modelName.startsWith(key))
|
|
73
|
-
return val;
|
|
74
|
-
}
|
|
72
|
+
if (key !== "_default" && modelName.startsWith(key)) return val;
|
|
75
73
|
}
|
|
76
|
-
|
|
77
74
|
return DEFAULT_PRICING._default;
|
|
78
75
|
}
|
|
79
76
|
|
|
80
77
|
/**
|
|
81
|
-
* Calculate cost in USD
|
|
78
|
+
* Calculate cost in USD including cache tokens.
|
|
79
|
+
*
|
|
80
|
+
* @param {string} modelName
|
|
81
|
+
* @param {number} inputTokens — regular input tokens
|
|
82
|
+
* @param {number} outputTokens — output tokens
|
|
83
|
+
* @param {number} cacheCreationTokens — tokens written to cache (1.25x input price)
|
|
84
|
+
* @param {number} cacheReadTokens — tokens read from cache (0.1x input price)
|
|
82
85
|
*/
|
|
83
|
-
function calculateCost(modelName, inputTokens, outputTokens) {
|
|
86
|
+
function calculateCost(modelName, inputTokens, outputTokens, cacheCreationTokens = 0, cacheReadTokens = 0) {
|
|
84
87
|
const pricing = getModelPricing(modelName);
|
|
85
88
|
const inputCost = (inputTokens / 1_000_000) * pricing.input;
|
|
86
89
|
const outputCost = (outputTokens / 1_000_000) * pricing.output;
|
|
90
|
+
const cacheWriteCost = pricing.cache_write
|
|
91
|
+
? (cacheCreationTokens / 1_000_000) * pricing.cache_write
|
|
92
|
+
: (cacheCreationTokens / 1_000_000) * pricing.input * 1.25;
|
|
93
|
+
const cacheReadCost = pricing.cache_read
|
|
94
|
+
? (cacheReadTokens / 1_000_000) * pricing.cache_read
|
|
95
|
+
: (cacheReadTokens / 1_000_000) * pricing.input * 0.1;
|
|
96
|
+
|
|
87
97
|
return {
|
|
88
98
|
input: inputCost,
|
|
89
99
|
output: outputCost,
|
|
90
|
-
|
|
100
|
+
cacheWrite: cacheWriteCost,
|
|
101
|
+
cacheRead: cacheReadCost,
|
|
102
|
+
total: inputCost + outputCost + cacheWriteCost + cacheReadCost,
|
|
103
|
+
cacheSavings: cacheReadTokens > 0 ? ((cacheReadTokens / 1_000_000) * (pricing.input - (pricing.cache_read || pricing.input * 0.1))) : 0,
|
|
91
104
|
};
|
|
92
105
|
}
|
|
93
106
|
|
|
94
|
-
/**
|
|
95
|
-
* Load custom pricing overrides from .wolverine/pricing.json.
|
|
96
|
-
*/
|
|
97
107
|
function loadCustomPricing(projectRoot) {
|
|
98
108
|
const pricingPath = path.join(projectRoot, ".wolverine", "pricing.json");
|
|
99
109
|
if (fs.existsSync(pricingPath)) {
|
|
100
|
-
try {
|
|
101
|
-
_customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8"));
|
|
102
|
-
} catch {}
|
|
110
|
+
try { _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8")); } catch {}
|
|
103
111
|
}
|
|
104
112
|
}
|
|
105
113
|
|
|
@@ -64,11 +64,11 @@ class TokenTracker {
|
|
|
64
64
|
* @param {number} outputTokens - Completion/output tokens
|
|
65
65
|
* @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
|
|
66
66
|
*/
|
|
67
|
-
record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
|
|
67
|
+
record(model, category, inputTokens, outputTokens, tool, latencyMs, success, cacheCreation, cacheRead) {
|
|
68
68
|
const total = (inputTokens || 0) + (outputTokens || 0);
|
|
69
69
|
|
|
70
|
-
// Calculate USD cost
|
|
71
|
-
const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
|
|
70
|
+
// Calculate USD cost including cache tokens
|
|
71
|
+
const cost = calculateCost(model, inputTokens || 0, outputTokens || 0, cacheCreation || 0, cacheRead || 0);
|
|
72
72
|
|
|
73
73
|
const entry = {
|
|
74
74
|
timestamp: Date.now(),
|
|
@@ -76,21 +76,27 @@ class TokenTracker {
|
|
|
76
76
|
category,
|
|
77
77
|
input: inputTokens || 0,
|
|
78
78
|
output: outputTokens || 0,
|
|
79
|
+
cacheCreation: cacheCreation || 0,
|
|
80
|
+
cacheRead: cacheRead || 0,
|
|
79
81
|
total,
|
|
80
82
|
cost: Math.round(cost.total * 1000000) / 1000000,
|
|
83
|
+
cacheSavings: Math.round((cost.cacheSavings || 0) * 1000000) / 1000000,
|
|
81
84
|
tool: tool || null,
|
|
82
85
|
latencyMs: latencyMs || 0,
|
|
83
86
|
success: success !== false,
|
|
84
87
|
};
|
|
85
88
|
|
|
86
89
|
// Accumulate by model
|
|
87
|
-
if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
|
|
90
|
+
if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
|
|
88
91
|
const m = this._byModel[model];
|
|
89
92
|
m.input += entry.input;
|
|
90
93
|
m.output += entry.output;
|
|
91
94
|
m.total += total;
|
|
92
95
|
m.calls++;
|
|
93
96
|
m.cost += cost.total;
|
|
97
|
+
m.cacheCreation += entry.cacheCreation;
|
|
98
|
+
m.cacheRead += entry.cacheRead;
|
|
99
|
+
m.cacheSavings += entry.cacheSavings;
|
|
94
100
|
if (entry.success) m.successes++; else m.failures++;
|
|
95
101
|
if (latencyMs > 0) {
|
|
96
102
|
m.totalLatencyMs += latencyMs;
|
|
@@ -179,7 +185,10 @@ class TokenTracker {
|
|
|
179
185
|
total: m.total,
|
|
180
186
|
calls: m.calls,
|
|
181
187
|
cost: m.cost,
|
|
182
|
-
|
|
188
|
+
cacheCreation: m.cacheCreation || 0,
|
|
189
|
+
cacheRead: m.cacheRead || 0,
|
|
190
|
+
cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
|
|
191
|
+
successes: m.successes || m.calls,
|
|
183
192
|
failures: m.failures || 0,
|
|
184
193
|
successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
|
|
185
194
|
avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
|
|
@@ -64,8 +64,9 @@ function collectHeartbeat(subsystems) {
|
|
|
64
64
|
totalTokens: tokenTracker?._totalTokens || usage?.session?.totalTokens || 0,
|
|
65
65
|
totalCost: tokenTracker?._totalCostUsd || usage?.session?.totalCostUsd || 0,
|
|
66
66
|
totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
|
|
67
|
+
totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
|
|
67
68
|
byCategory: usage?.byCategory || {},
|
|
68
|
-
byModel: usage?.byModel || {},
|
|
69
|
+
byModel: usage?.byModel || {}, // includes: latency, successRate, tokensPerSec, cacheSavings per model
|
|
69
70
|
byTool: usage?.byTool || {},
|
|
70
71
|
byProvider: _aggregateByProvider(usage?.byModel || {}),
|
|
71
72
|
},
|
|
@@ -91,6 +92,15 @@ function collectHeartbeat(subsystems) {
|
|
|
91
92
|
return redactObj(payload);
|
|
92
93
|
}
|
|
93
94
|
|
|
95
|
+
/** Sum cache savings across all models. */
|
|
96
|
+
function _sumCacheSavings(byModel) {
|
|
97
|
+
let total = 0;
|
|
98
|
+
for (const stats of Object.values(byModel || {})) {
|
|
99
|
+
total += stats.cacheSavings || 0;
|
|
100
|
+
}
|
|
101
|
+
return Math.round(total * 1000000) / 1000000;
|
|
102
|
+
}
|
|
103
|
+
|
|
94
104
|
/**
|
|
95
105
|
* Aggregate usage by provider (openai vs anthropic) from byModel data.
|
|
96
106
|
* Any new model/provider automatically flows through — no code changes needed.
|