wolverine-ai 2.2.2 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/ai-client.js +65 -4
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.3",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/core/ai-client.js
CHANGED
|
@@ -57,12 +57,71 @@ function isReasoningModel(model) {
|
|
|
57
57
|
|
|
58
58
|
function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
|
|
59
59
|
|
|
60
|
+
/**
|
|
61
|
+
* Per-model max output token limits (with 10% overestimation buffer).
|
|
62
|
+
* These are the actual API limits — requesting more than this fails.
|
|
63
|
+
*/
|
|
64
|
+
const MODEL_OUTPUT_LIMITS = {
|
|
65
|
+
// OpenAI — generous output limits
|
|
66
|
+
"gpt-4o": 17600, // 16384 + 10%
|
|
67
|
+
"gpt-4o-mini": 17600,
|
|
68
|
+
"gpt-5": 17600,
|
|
69
|
+
"gpt-5.4": 17600,
|
|
70
|
+
"gpt-5.4-mini": 17600,
|
|
71
|
+
"gpt-5.4-nano": 17600,
|
|
72
|
+
"gpt-5-nano": 17600,
|
|
73
|
+
"o1": 110000, // 100k + 10% (reasoning model, huge output)
|
|
74
|
+
"o1-mini": 72600, // 66k + 10%
|
|
75
|
+
"o3": 110000,
|
|
76
|
+
"o3-mini": 72600,
|
|
77
|
+
"o4-mini": 72600,
|
|
78
|
+
"gpt-5.1-codex": 17600,
|
|
79
|
+
"gpt-5.3-codex": 17600,
|
|
80
|
+
"codex-mini": 17600,
|
|
81
|
+
// Anthropic — each tier has different output limits
|
|
82
|
+
"claude-opus-4": 32000, // 32k max output (no buffer needed, already generous)
|
|
83
|
+
"claude-sonnet-4": 17600, // 16k + 10%
|
|
84
|
+
"claude-haiku-4": 8800, // 8k + 10%
|
|
85
|
+
"claude-3-5-sonnet": 8800,
|
|
86
|
+
"claude-3-5-haiku": 8800,
|
|
87
|
+
"claude-3-opus": 4400, // 4k + 10%
|
|
88
|
+
"claude-3-sonnet": 4400,
|
|
89
|
+
"claude-3-haiku": 4400,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Get the max output tokens for a model (with 10% buffer).
|
|
94
|
+
* Falls back to sensible defaults if model not in table.
|
|
95
|
+
*/
|
|
96
|
+
function _getOutputLimit(model) {
|
|
97
|
+
// Exact match
|
|
98
|
+
if (MODEL_OUTPUT_LIMITS[model]) return MODEL_OUTPUT_LIMITS[model];
|
|
99
|
+
// Prefix match (handles dated versions like claude-sonnet-4-6, claude-haiku-4-5-20250414)
|
|
100
|
+
for (const [prefix, limit] of Object.entries(MODEL_OUTPUT_LIMITS)) {
|
|
101
|
+
if (model.startsWith(prefix)) return limit;
|
|
102
|
+
}
|
|
103
|
+
// Defaults with 10% buffer
|
|
104
|
+
if (isAnthropicModel(model)) return 8800; // 8k + 10% (safe Anthropic default)
|
|
105
|
+
return 17600; // 16k + 10% (safe OpenAI default)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Build token limit params for the API call.
|
|
110
|
+
* Respects per-model output limits and adds reasoning headroom.
|
|
111
|
+
*/
|
|
60
112
|
function tokenParam(model, limit) {
|
|
61
|
-
const
|
|
113
|
+
const maxOutput = _getOutputLimit(model);
|
|
114
|
+
|
|
115
|
+
// Reasoning models get 4x to accommodate chain-of-thought, but capped at model max
|
|
116
|
+
let effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
|
|
117
|
+
effectiveLimit = Math.min(effectiveLimit, maxOutput);
|
|
118
|
+
|
|
119
|
+
// Anthropic uses max_tokens directly (handled in _anthropicCall)
|
|
120
|
+
if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
|
|
62
121
|
if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
|
|
63
122
|
const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
|
|
64
123
|
if (usesNewParam) return { max_completion_tokens: effectiveLimit };
|
|
65
|
-
return { max_tokens:
|
|
124
|
+
return { max_tokens: effectiveLimit };
|
|
66
125
|
}
|
|
67
126
|
|
|
68
127
|
// ── Unified AI Call ──
|
|
@@ -121,10 +180,11 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
|
|
|
121
180
|
|
|
122
181
|
async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice }) {
|
|
123
182
|
const client = _getAnthropicClient();
|
|
183
|
+
const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
|
|
124
184
|
|
|
125
185
|
const params = {
|
|
126
186
|
model,
|
|
127
|
-
max_tokens:
|
|
187
|
+
max_tokens: outputLimit,
|
|
128
188
|
messages: [{ role: "user", content: userPrompt }],
|
|
129
189
|
};
|
|
130
190
|
|
|
@@ -203,9 +263,10 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
|
|
|
203
263
|
}
|
|
204
264
|
}
|
|
205
265
|
|
|
266
|
+
const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
|
|
206
267
|
const params = {
|
|
207
268
|
model,
|
|
208
|
-
max_tokens:
|
|
269
|
+
max_tokens: outputLimit,
|
|
209
270
|
messages: merged,
|
|
210
271
|
};
|
|
211
272
|
|