wolverine-ai 2.2.1 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "2.2.1",
3
+ "version": "2.2.3",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -57,12 +57,71 @@ function isReasoningModel(model) {
57
57
 
58
58
  function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
59
59
 
60
+ /**
61
+ * Per-model max output token limits (with 10% overestimation buffer).
62
+ * These are the actual API limits — requesting more than this fails.
63
+ */
64
+ const MODEL_OUTPUT_LIMITS = {
65
+ // OpenAI — generous output limits
66
+ "gpt-4o": 17600, // 16384 + 10%
67
+ "gpt-4o-mini": 17600,
68
+ "gpt-5": 17600,
69
+ "gpt-5.4": 17600,
70
+ "gpt-5.4-mini": 17600,
71
+ "gpt-5.4-nano": 17600,
72
+ "gpt-5-nano": 17600,
73
+ "o1": 110000, // 100k + 10% (reasoning model, huge output)
74
+ "o1-mini": 72600, // 66k + 10%
75
+ "o3": 110000,
76
+ "o3-mini": 72600,
77
+ "o4-mini": 72600,
78
+ "gpt-5.1-codex": 17600,
79
+ "gpt-5.3-codex": 17600,
80
+ "codex-mini": 17600,
81
+ // Anthropic — each tier has different output limits
82
+ "claude-opus-4": 32000, // 32k max output (no buffer needed, already generous)
83
+ "claude-sonnet-4": 17600, // 16k + 10%
84
+ "claude-haiku-4": 8800, // 8k + 10%
85
+ "claude-3-5-sonnet": 8800,
86
+ "claude-3-5-haiku": 8800,
87
+ "claude-3-opus": 4400, // 4k + 10%
88
+ "claude-3-sonnet": 4400,
89
+ "claude-3-haiku": 4400,
90
+ };
91
+
92
+ /**
93
+ * Get the max output tokens for a model (with 10% buffer).
94
+ * Falls back to sensible defaults if model not in table.
95
+ */
96
+ function _getOutputLimit(model) {
97
+ // Exact match
98
+ if (MODEL_OUTPUT_LIMITS[model]) return MODEL_OUTPUT_LIMITS[model];
99
+ // Prefix match (handles dated versions like claude-sonnet-4-6, claude-haiku-4-5-20250414)
100
+ for (const [prefix, limit] of Object.entries(MODEL_OUTPUT_LIMITS)) {
101
+ if (model.startsWith(prefix)) return limit;
102
+ }
103
+ // Defaults with 10% buffer
104
+ if (isAnthropicModel(model)) return 8800; // 8k + 10% (safe Anthropic default)
105
+ return 17600; // 16k + 10% (safe OpenAI default)
106
+ }
107
+
108
+ /**
109
+ * Build token limit params for the API call.
110
+ * Respects per-model output limits and adds reasoning headroom.
111
+ */
60
112
  function tokenParam(model, limit) {
61
- const effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
113
+ const maxOutput = _getOutputLimit(model);
114
+
115
+ // Reasoning models get 4x to accommodate chain-of-thought, but capped at model max
116
+ let effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
117
+ effectiveLimit = Math.min(effectiveLimit, maxOutput);
118
+
119
+ // Anthropic uses max_tokens directly (handled in _anthropicCall)
120
+ if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
62
121
  if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
63
122
  const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
64
123
  if (usesNewParam) return { max_completion_tokens: effectiveLimit };
65
- return { max_tokens: limit };
124
+ return { max_tokens: effectiveLimit };
66
125
  }
67
126
 
68
127
  // ── Unified AI Call ──
@@ -121,10 +180,11 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
121
180
 
122
181
  async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice }) {
123
182
  const client = _getAnthropicClient();
183
+ const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
124
184
 
125
185
  const params = {
126
186
  model,
127
- max_tokens: maxTokens,
187
+ max_tokens: outputLimit,
128
188
  messages: [{ role: "user", content: userPrompt }],
129
189
  };
130
190
 
@@ -203,9 +263,10 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
203
263
  }
204
264
  }
205
265
 
266
+ const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
206
267
  const params = {
207
268
  model,
208
- max_tokens: maxTokens,
269
+ max_tokens: outputLimit,
209
270
  messages: merged,
210
271
  };
211
272
 
@@ -28,7 +28,7 @@ function normalizeRoute(routePath) {
28
28
  */
29
29
 
30
30
  class ErrorMonitor {
31
- constructor({ threshold = 3, windowMs = 30000, cooldownMs = 60000, onError, logger } = {}) {
31
+ constructor({ threshold = 1, windowMs = 30000, cooldownMs = 60000, onError, logger } = {}) {
32
32
  this.threshold = threshold; // consecutive 5xx before triggering heal
33
33
  this.windowMs = windowMs; // time window for counting errors
34
34
  this.cooldownMs = cooldownMs; // cooldown after triggering (prevent heal spam)