wolverine-ai 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.3.0",
3
+ "version": "3.4.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  const OpenAI = require("openai");
2
2
  const Anthropic = require("@anthropic-ai/sdk");
3
+ const chalk = require("chalk");
3
4
  const { getModel, detectProvider } = require("./models");
4
5
 
5
6
  let _openaiClient = null;
@@ -9,12 +10,14 @@ let _tracker = null;
9
10
  function setTokenTracker(tracker) { _tracker = tracker; }
10
11
 
11
12
  function _extractTokens(usage) {
12
- if (!usage) return { input: 0, output: 0 };
13
+ if (!usage) return { input: 0, output: 0, cacheCreation: 0, cacheRead: 0 };
13
14
  return {
14
15
  input: usage.prompt_tokens || usage.input_tokens || 0,
15
16
  output: usage.completion_tokens || usage.output_tokens || 0,
16
- cacheCreation: usage.cache_creation_input_tokens || 0,
17
- cacheRead: usage.cache_read_input_tokens || 0,
17
+ // Anthropic cache fields
18
+ cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
19
+ // OpenAI uses cache_read_tokens, Anthropic uses cache_read_input_tokens
20
+ cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens || 0,
18
21
  };
19
22
  }
20
23
 
@@ -121,9 +124,41 @@ function tokenParam(model, limit) {
121
124
  // Anthropic uses max_tokens directly (handled in _anthropicCall)
122
125
  if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
123
126
  if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
124
- const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
125
- if (usesNewParam) return { max_completion_tokens: effectiveLimit };
126
- return { max_tokens: effectiveLimit };
127
+ // All modern OpenAI models use max_completion_tokens (max_tokens is deprecated)
128
+ return { max_completion_tokens: effectiveLimit };
129
+ }
130
+
131
+ /**
132
+ * Build OpenAI-specific params for reasoning models (o-series).
133
+ * - reasoning_effort: controls compute allocation (low/medium/high)
134
+ * - No temperature/top_p (forbidden on o-series)
135
+ */
136
+ function _reasoningParams(model) {
137
+ if (!isReasoningModel(model)) return {};
138
+ // Default to medium effort — balances cost vs quality
139
+ // High effort for complex multi-file debugging, low for classification
140
+ return { reasoning_effort: process.env.WOLVERINE_REASONING_EFFORT || "medium" };
141
+ }
142
+
143
+ /**
144
+ * Retry with exponential backoff + jitter for rate limits.
145
+ */
146
+ async function _withRetry(fn, maxRetries = 3) {
147
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
148
+ try {
149
+ return await fn();
150
+ } catch (err) {
151
+ const isRateLimit = err.status === 429 || err.code === "rate_limit_exceeded";
152
+ const isServerError = err.status >= 500;
153
+ if ((isRateLimit || isServerError) && attempt < maxRetries) {
154
+ const delay = Math.min(1000 * Math.pow(2, attempt) + Math.random() * 1000, 30000);
155
+ console.log(chalk.yellow(` ⏱️ API ${isRateLimit ? "rate limited" : "error"} — retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${maxRetries})`));
156
+ await new Promise(r => setTimeout(r, delay));
157
+ continue;
158
+ }
159
+ throw err;
160
+ }
161
+ }
127
162
  }
128
163
 
129
164
  // ── Unified AI Call ──
@@ -206,7 +241,7 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
206
241
  else if (toolChoice && toolChoice !== "auto") params.tool_choice = { type: "auto" };
207
242
  }
208
243
 
209
- const response = await client.messages.create(params);
244
+ const response = await _withRetry(() => client.messages.create(params));
210
245
  return _normalizeAnthropicResponse(response);
211
246
  }
212
247
 
@@ -292,7 +327,7 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
292
327
  params.tools = tools.map(_toAnthropicTool).filter(Boolean);
293
328
  }
294
329
 
295
- const response = await client.messages.create(params);
330
+ const response = await _withRetry(() => client.messages.create(params));
296
331
 
297
332
  // Return in chat-compatible format
298
333
  const normalized = _normalizeAnthropicResponse(response);
@@ -377,7 +412,7 @@ async function _responsesCall(openai, { model, systemPrompt, userPrompt, maxToke
377
412
  });
378
413
  }
379
414
 
380
- const response = await openai.responses.create(params);
415
+ const response = await _withRetry(() => openai.responses.create(params));
381
416
  let content = "";
382
417
  let toolCalls = null;
383
418
 
@@ -403,13 +438,31 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
403
438
  if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
404
439
  messages.push({ role: "user", content: userPrompt });
405
440
 
441
+ // No temperature for o-series and gpt-5+ (forbidden, causes error)
406
442
  const noTemp = /^(o[1-9]|gpt-5)/.test(model);
407
- const params = { model, messages, ...(!noTemp ? { temperature: 0 } : {}), ...tokenParam(model, maxTokens) };
408
- if (tools && tools.length > 0) { params.tools = tools; params.tool_choice = toolChoice || "auto"; }
443
+ const params = {
444
+ model, messages,
445
+ ...(!noTemp ? { temperature: 0 } : {}),
446
+ ...tokenParam(model, maxTokens),
447
+ ..._reasoningParams(model),
448
+ };
449
+
450
+ if (tools && tools.length > 0) {
451
+ params.tools = tools;
452
+ params.tool_choice = toolChoice || "auto";
453
+ // Disable parallel calls for reliability — sequential is more predictable for healing
454
+ params.parallel_tool_calls = false;
455
+ }
409
456
 
410
- const response = await openai.chat.completions.create(params);
457
+ const response = await _withRetry(() => openai.chat.completions.create(params));
411
458
  const choice = response.choices[0];
412
- return { content: (choice.message.content || "").trim(), toolCalls: choice.message.tool_calls || null, usage: response.usage || {}, _raw: response, _message: choice.message };
459
+ return {
460
+ content: (choice.message.content || "").trim(),
461
+ toolCalls: choice.message.tool_calls || null,
462
+ usage: response.usage || {},
463
+ _raw: response,
464
+ _message: choice.message,
465
+ };
413
466
  }
414
467
 
415
468
  // ── OpenAI: Multi-turn (Responses + Chat) ──
@@ -435,7 +488,7 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
435
488
  });
436
489
  }
437
490
 
438
- const response = await openai.responses.create(params);
491
+ const response = await _withRetry(() => openai.responses.create(params));
439
492
  let content = "";
440
493
  let toolCalls = null;
441
494
 
@@ -454,9 +507,18 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
454
507
 
455
508
  async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
456
509
  const noTemp = /^(o[1-9]|gpt-5)/.test(model);
457
- const params = { model, messages, ...(!noTemp ? { temperature: 0 } : {}), ...tokenParam(model, maxTokens) };
458
- if (tools && tools.length > 0) { params.tools = tools; params.tool_choice = "auto"; }
459
- return openai.chat.completions.create(params);
510
+ const params = {
511
+ model, messages,
512
+ ...(!noTemp ? { temperature: 0 } : {}),
513
+ ...tokenParam(model, maxTokens),
514
+ ..._reasoningParams(model),
515
+ };
516
+ if (tools && tools.length > 0) {
517
+ params.tools = tools;
518
+ params.tool_choice = "auto";
519
+ params.parallel_tool_calls = false;
520
+ }
521
+ return _withRetry(() => openai.chat.completions.create(params));
460
522
  }
461
523
 
462
524
  // ── Fast Path Repair ──