wolverine-ai 3.3.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.3.0",
3
+ "version": "3.4.1",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  const OpenAI = require("openai");
2
2
  const Anthropic = require("@anthropic-ai/sdk");
3
+ const chalk = require("chalk");
3
4
  const { getModel, detectProvider } = require("./models");
4
5
 
5
6
  let _openaiClient = null;
@@ -7,14 +8,20 @@ let _anthropicClient = null;
7
8
  let _tracker = null;
8
9
 
9
10
  function setTokenTracker(tracker) { _tracker = tracker; }
11
+ function getTrackerSnapshot() {
12
+ if (!_tracker) return { tokens: 0, cost: 0, calls: 0 };
13
+ return { tokens: _tracker._totalTokens || 0, cost: _tracker._totalCostUsd || 0, calls: _tracker._totalCalls || 0 };
14
+ }
10
15
 
11
16
  function _extractTokens(usage) {
12
- if (!usage) return { input: 0, output: 0 };
17
+ if (!usage) return { input: 0, output: 0, cacheCreation: 0, cacheRead: 0 };
13
18
  return {
14
19
  input: usage.prompt_tokens || usage.input_tokens || 0,
15
20
  output: usage.completion_tokens || usage.output_tokens || 0,
16
- cacheCreation: usage.cache_creation_input_tokens || 0,
17
- cacheRead: usage.cache_read_input_tokens || 0,
21
+ // Anthropic cache fields
22
+ cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
23
+ // OpenAI uses cache_read_tokens, Anthropic uses cache_read_input_tokens
24
+ cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens || 0,
18
25
  };
19
26
  }
20
27
 
@@ -121,9 +128,41 @@ function tokenParam(model, limit) {
121
128
  // Anthropic uses max_tokens directly (handled in _anthropicCall)
122
129
  if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
123
130
  if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
124
- const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
125
- if (usesNewParam) return { max_completion_tokens: effectiveLimit };
126
- return { max_tokens: effectiveLimit };
131
+ // All modern OpenAI models use max_completion_tokens (max_tokens is deprecated)
132
+ return { max_completion_tokens: effectiveLimit };
133
+ }
134
+
135
+ /**
136
+ * Build OpenAI-specific params for reasoning models (o-series).
137
+ * - reasoning_effort: controls compute allocation (low/medium/high)
138
+ * - No temperature/top_p (forbidden on o-series)
139
+ */
140
+ function _reasoningParams(model) {
141
+ if (!isReasoningModel(model)) return {};
142
+ // Default to medium effort — balances cost vs quality
143
+ // High effort for complex multi-file debugging, low for classification
144
+ return { reasoning_effort: process.env.WOLVERINE_REASONING_EFFORT || "medium" };
145
+ }
146
+
147
+ /**
148
+ * Retry with exponential backoff + jitter for rate limits.
149
+ */
150
+ async function _withRetry(fn, maxRetries = 3) {
151
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
152
+ try {
153
+ return await fn();
154
+ } catch (err) {
155
+ const isRateLimit = err.status === 429 || err.code === "rate_limit_exceeded";
156
+ const isServerError = err.status >= 500;
157
+ if ((isRateLimit || isServerError) && attempt < maxRetries) {
158
+ const delay = Math.min(1000 * Math.pow(2, attempt) + Math.random() * 1000, 30000);
159
+ console.log(chalk.yellow(` ⏱️ API ${isRateLimit ? "rate limited" : "error"} — retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${maxRetries})`));
160
+ await new Promise(r => setTimeout(r, delay));
161
+ continue;
162
+ }
163
+ throw err;
164
+ }
165
+ }
127
166
  }
128
167
 
129
168
  // ── Unified AI Call ──
@@ -206,7 +245,7 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
206
245
  else if (toolChoice && toolChoice !== "auto") params.tool_choice = { type: "auto" };
207
246
  }
208
247
 
209
- const response = await client.messages.create(params);
248
+ const response = await _withRetry(() => client.messages.create(params));
210
249
  return _normalizeAnthropicResponse(response);
211
250
  }
212
251
 
@@ -292,7 +331,7 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
292
331
  params.tools = tools.map(_toAnthropicTool).filter(Boolean);
293
332
  }
294
333
 
295
- const response = await client.messages.create(params);
334
+ const response = await _withRetry(() => client.messages.create(params));
296
335
 
297
336
  // Return in chat-compatible format
298
337
  const normalized = _normalizeAnthropicResponse(response);
@@ -377,7 +416,7 @@ async function _responsesCall(openai, { model, systemPrompt, userPrompt, maxToke
377
416
  });
378
417
  }
379
418
 
380
- const response = await openai.responses.create(params);
419
+ const response = await _withRetry(() => openai.responses.create(params));
381
420
  let content = "";
382
421
  let toolCalls = null;
383
422
 
@@ -403,13 +442,31 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
403
442
  if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
404
443
  messages.push({ role: "user", content: userPrompt });
405
444
 
445
+ // No temperature for o-series and gpt-5+ (forbidden, causes error)
406
446
  const noTemp = /^(o[1-9]|gpt-5)/.test(model);
407
- const params = { model, messages, ...(!noTemp ? { temperature: 0 } : {}), ...tokenParam(model, maxTokens) };
408
- if (tools && tools.length > 0) { params.tools = tools; params.tool_choice = toolChoice || "auto"; }
447
+ const params = {
448
+ model, messages,
449
+ ...(!noTemp ? { temperature: 0 } : {}),
450
+ ...tokenParam(model, maxTokens),
451
+ ..._reasoningParams(model),
452
+ };
453
+
454
+ if (tools && tools.length > 0) {
455
+ params.tools = tools;
456
+ params.tool_choice = toolChoice || "auto";
457
+ // Disable parallel calls for reliability — sequential is more predictable for healing
458
+ params.parallel_tool_calls = false;
459
+ }
409
460
 
410
- const response = await openai.chat.completions.create(params);
461
+ const response = await _withRetry(() => openai.chat.completions.create(params));
411
462
  const choice = response.choices[0];
412
- return { content: (choice.message.content || "").trim(), toolCalls: choice.message.tool_calls || null, usage: response.usage || {}, _raw: response, _message: choice.message };
463
+ return {
464
+ content: (choice.message.content || "").trim(),
465
+ toolCalls: choice.message.tool_calls || null,
466
+ usage: response.usage || {},
467
+ _raw: response,
468
+ _message: choice.message,
469
+ };
413
470
  }
414
471
 
415
472
  // ── OpenAI: Multi-turn (Responses + Chat) ──
@@ -435,7 +492,7 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
435
492
  });
436
493
  }
437
494
 
438
- const response = await openai.responses.create(params);
495
+ const response = await _withRetry(() => openai.responses.create(params));
439
496
  let content = "";
440
497
  let toolCalls = null;
441
498
 
@@ -454,9 +511,18 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
454
511
 
455
512
  async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
456
513
  const noTemp = /^(o[1-9]|gpt-5)/.test(model);
457
- const params = { model, messages, ...(!noTemp ? { temperature: 0 } : {}), ...tokenParam(model, maxTokens) };
458
- if (tools && tools.length > 0) { params.tools = tools; params.tool_choice = "auto"; }
459
- return openai.chat.completions.create(params);
514
+ const params = {
515
+ model, messages,
516
+ ...(!noTemp ? { temperature: 0 } : {}),
517
+ ...tokenParam(model, maxTokens),
518
+ ..._reasoningParams(model),
519
+ };
520
+ if (tools && tools.length > 0) {
521
+ params.tools = tools;
522
+ params.tool_choice = "auto";
523
+ params.parallel_tool_calls = false;
524
+ }
525
+ return _withRetry(() => openai.chat.completions.create(params));
460
526
  }
461
527
 
462
528
  // ── Fast Path Repair ──
@@ -518,4 +584,4 @@ Include both if needed, or just one.`;
518
584
  }
519
585
  }
520
586
 
521
- module.exports = { requestRepair, getClient, tokenParam, aiCall, aiCallWithHistory, isResponsesModel, isAnthropicModel, setTokenTracker, detectProvider };
587
+ module.exports = { requestRepair, getClient, tokenParam, aiCall, aiCallWithHistory, isResponsesModel, isAnthropicModel, setTokenTracker, getTrackerSnapshot, detectProvider };
@@ -43,6 +43,9 @@ async function heal(opts) {
43
43
 
44
44
  async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupManager, logger, brain, mcp, skills, repairHistory, routeContext }) {
45
45
  const healStartTime = Date.now();
46
+ // Snapshot token tracker at heal start — diff at end = FULL pipeline cost
47
+ const { getTrackerSnapshot } = require("./ai-client");
48
+ const _snapshot = getTrackerSnapshot();
46
49
  const { redact, hasSecrets } = require("../security/secret-redactor");
47
50
 
48
51
  // Guard: don't burn tokens on empty stderr (signal kills, clean shutdowns, etc.)
@@ -155,11 +158,15 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
155
158
  if (opsFix.fixed) {
156
159
  console.log(chalk.green(` ⚡ Operational fix applied: ${opsFix.action}`));
157
160
  if (logger) logger.info(EVENT_TYPES.HEAL_SUCCESS, `Operational fix: ${opsFix.action}`, { action: opsFix.action });
161
+ // Record with FULL pipeline cost (includes injection scan, brain lookup, etc.)
162
+ const _endSnap = getTrackerSnapshot();
163
+ const pipelineTokens = _endSnap.tokens - _snapshot.tokens;
164
+ const pipelineCost = _endSnap.cost - _snapshot.cost;
158
165
  if (repairHistory) {
159
166
  repairHistory.record({
160
167
  error: parsed.errorMessage, file: parsed.filePath, line: parsed.line,
161
168
  resolution: opsFix.action, success: true, mode: "operational",
162
- model: "none", tokens: 0, cost: 0, iteration: 0,
169
+ model: getModel("audit"), tokens: pipelineTokens, cost: pipelineCost, iteration: 0,
163
170
  duration: Date.now() - healStartTime, filesModified: [],
164
171
  });
165
172
  }
@@ -396,13 +403,13 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
396
403
 
397
404
  backupManager.prune();
398
405
 
399
- // Record to repair history
406
+ // Record to repair history — use FULL pipeline cost (injection scan + brain + fix)
400
407
  if (repairHistory) {
401
408
  const duration = Date.now() - healStartTime;
402
- const tokenUsage = goalResult.agentStats?.totalTokens || 0;
403
- const { calculateCost } = require("../logger/pricing");
409
+ const _endSnap = getTrackerSnapshot();
410
+ const pipelineTokens = _endSnap.tokens - _snapshot.tokens;
411
+ const pipelineCost = _endSnap.cost - _snapshot.cost;
404
412
  const model = goalResult.mode === "fast" ? getModel("coding") : getModel("reasoning");
405
- const cost = calculateCost(model, tokenUsage * 0.7, tokenUsage * 0.3); // estimate in/out split
406
413
 
407
414
  repairHistory.record({
408
415
  error: parsed.errorMessage,
@@ -412,8 +419,8 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
412
419
  success: goalResult.success,
413
420
  mode: goalResult.mode || "unknown",
414
421
  model,
415
- tokens: tokenUsage,
416
- cost: cost.total,
422
+ tokens: pipelineTokens,
423
+ cost: pipelineCost,
417
424
  iteration: goalResult.iteration,
418
425
  duration,
419
426
  filesModified: goalResult.agentStats?.filesModified || [],