wolverine-ai 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "2.1.0",
3
+ "version": "2.1.1",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -36,9 +36,9 @@
36
36
  "coding": "claude-opus-4-6",
37
37
  "chat": "claude-haiku-4-5",
38
38
  "tool": "claude-opus-4-6",
39
- "classifier": "claude-haiku-4-5",
39
+ "classifier": "gpt-4o-mini",
40
40
  "audit": "gpt-4o-mini",
41
- "compacting": "gpt-4o-mini",
41
+ "compacting": "claude-sonnet-4-6",
42
42
  "research": "claude-sonnet-4-6",
43
43
  "embedding": "text-embedding-3-small"
44
44
  },
@@ -16,10 +16,10 @@ function _extractTokens(usage) {
16
16
  };
17
17
  }
18
18
 
19
- function _track(model, category, usage, tool) {
19
+ function _track(model, category, usage, tool, latencyMs, success) {
20
20
  if (!_tracker) return;
21
21
  const { input, output } = _extractTokens(usage);
22
- _tracker.record(model, category, input, output, tool);
22
+ _tracker.record(model, category, input, output, tool, latencyMs, success);
23
23
  }
24
24
 
25
25
  // ── Client Management ──
@@ -70,34 +70,50 @@ function tokenParam(model, limit) {
70
70
 
71
71
  async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools, toolChoice, category = "chat", tool }) {
72
72
  const provider = detectProvider(model);
73
+ const startMs = Date.now();
73
74
  let result;
74
75
 
75
- if (provider === "anthropic") {
76
- result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
77
- } else if (isResponsesModel(model)) {
78
- result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
79
- } else {
80
- result = await _chatCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
81
- }
76
+ try {
77
+ if (provider === "anthropic") {
78
+ result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
79
+ } else if (isResponsesModel(model)) {
80
+ result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
81
+ } else {
82
+ result = await _chatCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
83
+ }
82
84
 
83
- _track(model, category, result.usage, tool);
84
- return result;
85
+ const latencyMs = Date.now() - startMs;
86
+ _track(model, category, result.usage, tool, latencyMs, true);
87
+ return result;
88
+ } catch (err) {
89
+ const latencyMs = Date.now() - startMs;
90
+ _track(model, category, {}, tool, latencyMs, false);
91
+ throw err;
92
+ }
85
93
  }
86
94
 
87
95
  async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, category = "chat", tool }) {
88
96
  const provider = detectProvider(model);
97
+ const startMs = Date.now();
89
98
  let result;
90
99
 
91
- if (provider === "anthropic") {
92
- result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
93
- } else if (isResponsesModel(model)) {
94
- result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
95
- } else {
96
- result = await _chatCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
97
- }
100
+ try {
101
+ if (provider === "anthropic") {
102
+ result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
103
+ } else if (isResponsesModel(model)) {
104
+ result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
105
+ } else {
106
+ result = await _chatCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
107
+ }
98
108
 
99
- _track(model, category, result.usage, tool);
100
- return result;
109
+ const latencyMs = Date.now() - startMs;
110
+ _track(model, category, result.usage, tool, latencyMs, true);
111
+ return result;
112
+ } catch (err) {
113
+ const latencyMs = Date.now() - startMs;
114
+ _track(model, category, {}, tool, latencyMs, false);
115
+ throw err;
116
+ }
101
117
  }
102
118
 
103
119
  // ── Anthropic Implementation ──
@@ -64,9 +64,8 @@ class TokenTracker {
64
64
  * @param {number} outputTokens - Completion/output tokens
65
65
  * @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
66
66
  */
67
- record(model, category, inputTokens, outputTokens, tool) {
67
+ record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
68
68
  const total = (inputTokens || 0) + (outputTokens || 0);
69
- if (total === 0) return;
70
69
 
71
70
  // Calculate USD cost
72
71
  const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
@@ -78,17 +77,26 @@ class TokenTracker {
78
77
  input: inputTokens || 0,
79
78
  output: outputTokens || 0,
80
79
  total,
81
- cost: Math.round(cost.total * 1000000) / 1000000, // 6 decimal places
80
+ cost: Math.round(cost.total * 1000000) / 1000000,
82
81
  tool: tool || null,
82
+ latencyMs: latencyMs || 0,
83
+ success: success !== false,
83
84
  };
84
85
 
85
86
  // Accumulate by model
86
- if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0 };
87
- this._byModel[model].input += entry.input;
88
- this._byModel[model].output += entry.output;
89
- this._byModel[model].total += total;
90
- this._byModel[model].calls++;
91
- this._byModel[model].cost += cost.total;
87
+ if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
88
+ const m = this._byModel[model];
89
+ m.input += entry.input;
90
+ m.output += entry.output;
91
+ m.total += total;
92
+ m.calls++;
93
+ m.cost += cost.total;
94
+ if (entry.success) m.successes++; else m.failures++;
95
+ if (latencyMs > 0) {
96
+ m.totalLatencyMs += latencyMs;
97
+ if (latencyMs < m.minLatencyMs) m.minLatencyMs = latencyMs;
98
+ if (latencyMs > m.maxLatencyMs) m.maxLatencyMs = latencyMs;
99
+ }
92
100
 
93
101
  // Accumulate by category
94
102
  if (!this._byCategory[category]) this._byCategory[category] = { input: 0, output: 0, total: 0, calls: 0, cost: 0 };
@@ -142,7 +150,7 @@ class TokenTracker {
142
150
  duration: sessionDuration,
143
151
  tokensPerMinute,
144
152
  },
145
- byModel: this._byModel,
153
+ byModel: this._formatModelStats(),
146
154
  byCategory: this._byCategory,
147
155
  byTool: this._byTool,
148
156
  // Recent in-memory timeline
@@ -153,10 +161,37 @@ class TokenTracker {
153
161
  output: e.output,
154
162
  cat: e.category,
155
163
  model: e.model,
164
+ latencyMs: e.latencyMs || 0,
165
+ success: e.success !== false,
156
166
  })),
157
167
  };
158
168
  }
159
169
 
170
+ /**
171
+ * Format model stats with computed performance metrics.
172
+ */
173
+ _formatModelStats() {
174
+ const result = {};
175
+ for (const [model, m] of Object.entries(this._byModel)) {
176
+ result[model] = {
177
+ input: m.input,
178
+ output: m.output,
179
+ total: m.total,
180
+ calls: m.calls,
181
+ cost: m.cost,
182
+ successes: m.successes || m.calls, // backwards compat
183
+ failures: m.failures || 0,
184
+ successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
185
+ avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
186
+ minLatencyMs: m.minLatencyMs === Infinity ? 0 : (m.minLatencyMs || 0),
187
+ maxLatencyMs: m.maxLatencyMs || 0,
188
+ tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.total / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
189
+ costPerCall: m.calls > 0 ? Math.round((m.cost / m.calls) * 1000000) / 1000000 : 0,
190
+ };
191
+ }
192
+ return result;
193
+ }
194
+
160
195
  /**
161
196
  * Load full history from JSONL file. For dashboard charts across sessions.
162
197
  * @param {number} limit — max entries to return (default: 500)