wolverine-ai 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/ai-client.js +79 -17
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.4.0",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/core/ai-client.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
const OpenAI = require("openai");
|
|
2
2
|
const Anthropic = require("@anthropic-ai/sdk");
|
|
3
|
+
const chalk = require("chalk");
|
|
3
4
|
const { getModel, detectProvider } = require("./models");
|
|
4
5
|
|
|
5
6
|
let _openaiClient = null;
|
|
@@ -9,12 +10,14 @@ let _tracker = null;
|
|
|
9
10
|
function setTokenTracker(tracker) { _tracker = tracker; }
|
|
10
11
|
|
|
11
12
|
function _extractTokens(usage) {
|
|
12
|
-
if (!usage) return { input: 0, output: 0 };
|
|
13
|
+
if (!usage) return { input: 0, output: 0, cacheCreation: 0, cacheRead: 0 };
|
|
13
14
|
return {
|
|
14
15
|
input: usage.prompt_tokens || usage.input_tokens || 0,
|
|
15
16
|
output: usage.completion_tokens || usage.output_tokens || 0,
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
// Anthropic cache fields
|
|
18
|
+
cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
|
|
19
|
+
// OpenAI uses cache_read_tokens, Anthropic uses cache_read_input_tokens
|
|
20
|
+
cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens || 0,
|
|
18
21
|
};
|
|
19
22
|
}
|
|
20
23
|
|
|
@@ -121,9 +124,41 @@ function tokenParam(model, limit) {
|
|
|
121
124
|
// Anthropic uses max_tokens directly (handled in _anthropicCall)
|
|
122
125
|
if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
|
|
123
126
|
if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
+
// All modern OpenAI models use max_completion_tokens (max_tokens is deprecated)
|
|
128
|
+
return { max_completion_tokens: effectiveLimit };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Build OpenAI-specific params for reasoning models (o-series).
|
|
133
|
+
* - reasoning_effort: controls compute allocation (low/medium/high)
|
|
134
|
+
* - No temperature/top_p (forbidden on o-series)
|
|
135
|
+
*/
|
|
136
|
+
function _reasoningParams(model) {
|
|
137
|
+
if (!isReasoningModel(model)) return {};
|
|
138
|
+
// Default to medium effort — balances cost vs quality
|
|
139
|
+
// High effort for complex multi-file debugging, low for classification
|
|
140
|
+
return { reasoning_effort: process.env.WOLVERINE_REASONING_EFFORT || "medium" };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Retry with exponential backoff + jitter for rate limits.
|
|
145
|
+
*/
|
|
146
|
+
async function _withRetry(fn, maxRetries = 3) {
|
|
147
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
148
|
+
try {
|
|
149
|
+
return await fn();
|
|
150
|
+
} catch (err) {
|
|
151
|
+
const isRateLimit = err.status === 429 || err.code === "rate_limit_exceeded";
|
|
152
|
+
const isServerError = err.status >= 500;
|
|
153
|
+
if ((isRateLimit || isServerError) && attempt < maxRetries) {
|
|
154
|
+
const delay = Math.min(1000 * Math.pow(2, attempt) + Math.random() * 1000, 30000);
|
|
155
|
+
console.log(chalk.yellow(` ⏱️ API ${isRateLimit ? "rate limited" : "error"} — retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${maxRetries})`));
|
|
156
|
+
await new Promise(r => setTimeout(r, delay));
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
throw err;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
127
162
|
}
|
|
128
163
|
|
|
129
164
|
// ── Unified AI Call ──
|
|
@@ -206,7 +241,7 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
|
|
|
206
241
|
else if (toolChoice && toolChoice !== "auto") params.tool_choice = { type: "auto" };
|
|
207
242
|
}
|
|
208
243
|
|
|
209
|
-
const response = await client.messages.create(params);
|
|
244
|
+
const response = await _withRetry(() => client.messages.create(params));
|
|
210
245
|
return _normalizeAnthropicResponse(response);
|
|
211
246
|
}
|
|
212
247
|
|
|
@@ -292,7 +327,7 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
|
|
|
292
327
|
params.tools = tools.map(_toAnthropicTool).filter(Boolean);
|
|
293
328
|
}
|
|
294
329
|
|
|
295
|
-
const response = await client.messages.create(params);
|
|
330
|
+
const response = await _withRetry(() => client.messages.create(params));
|
|
296
331
|
|
|
297
332
|
// Return in chat-compatible format
|
|
298
333
|
const normalized = _normalizeAnthropicResponse(response);
|
|
@@ -377,7 +412,7 @@ async function _responsesCall(openai, { model, systemPrompt, userPrompt, maxToke
|
|
|
377
412
|
});
|
|
378
413
|
}
|
|
379
414
|
|
|
380
|
-
const response = await openai.responses.create(params);
|
|
415
|
+
const response = await _withRetry(() => openai.responses.create(params));
|
|
381
416
|
let content = "";
|
|
382
417
|
let toolCalls = null;
|
|
383
418
|
|
|
@@ -403,13 +438,31 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
|
|
|
403
438
|
if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
|
|
404
439
|
messages.push({ role: "user", content: userPrompt });
|
|
405
440
|
|
|
441
|
+
// No temperature for o-series and gpt-5+ (forbidden, causes error)
|
|
406
442
|
const noTemp = /^(o[1-9]|gpt-5)/.test(model);
|
|
407
|
-
const params = {
|
|
408
|
-
|
|
443
|
+
const params = {
|
|
444
|
+
model, messages,
|
|
445
|
+
...(!noTemp ? { temperature: 0 } : {}),
|
|
446
|
+
...tokenParam(model, maxTokens),
|
|
447
|
+
..._reasoningParams(model),
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
if (tools && tools.length > 0) {
|
|
451
|
+
params.tools = tools;
|
|
452
|
+
params.tool_choice = toolChoice || "auto";
|
|
453
|
+
// Disable parallel calls for reliability — sequential is more predictable for healing
|
|
454
|
+
params.parallel_tool_calls = false;
|
|
455
|
+
}
|
|
409
456
|
|
|
410
|
-
const response = await openai.chat.completions.create(params);
|
|
457
|
+
const response = await _withRetry(() => openai.chat.completions.create(params));
|
|
411
458
|
const choice = response.choices[0];
|
|
412
|
-
return {
|
|
459
|
+
return {
|
|
460
|
+
content: (choice.message.content || "").trim(),
|
|
461
|
+
toolCalls: choice.message.tool_calls || null,
|
|
462
|
+
usage: response.usage || {},
|
|
463
|
+
_raw: response,
|
|
464
|
+
_message: choice.message,
|
|
465
|
+
};
|
|
413
466
|
}
|
|
414
467
|
|
|
415
468
|
// ── OpenAI: Multi-turn (Responses + Chat) ──
|
|
@@ -435,7 +488,7 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
|
|
|
435
488
|
});
|
|
436
489
|
}
|
|
437
490
|
|
|
438
|
-
const response = await openai.responses.create(params);
|
|
491
|
+
const response = await _withRetry(() => openai.responses.create(params));
|
|
439
492
|
let content = "";
|
|
440
493
|
let toolCalls = null;
|
|
441
494
|
|
|
@@ -454,9 +507,18 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
|
|
|
454
507
|
|
|
455
508
|
async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
|
|
456
509
|
const noTemp = /^(o[1-9]|gpt-5)/.test(model);
|
|
457
|
-
const params = {
|
|
458
|
-
|
|
459
|
-
|
|
510
|
+
const params = {
|
|
511
|
+
model, messages,
|
|
512
|
+
...(!noTemp ? { temperature: 0 } : {}),
|
|
513
|
+
...tokenParam(model, maxTokens),
|
|
514
|
+
..._reasoningParams(model),
|
|
515
|
+
};
|
|
516
|
+
if (tools && tools.length > 0) {
|
|
517
|
+
params.tools = tools;
|
|
518
|
+
params.tool_choice = "auto";
|
|
519
|
+
params.parallel_tool_calls = false;
|
|
520
|
+
}
|
|
521
|
+
return _withRetry(() => openai.chat.completions.create(params));
|
|
460
522
|
}
|
|
461
523
|
|
|
462
524
|
// ── Fast Path Repair ──
|