npm - wolverine-ai - Versions diffs - 3.3.0 → 3.4.1 - Mend

wolverine-ai 3.3.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "3.3.0",
+  "version": "3.4.1",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/core/ai-client.js CHANGED Viewed

@@ -1,5 +1,6 @@
 const OpenAI = require("openai");
 const Anthropic = require("@anthropic-ai/sdk");
+const chalk = require("chalk");
 const { getModel, detectProvider } = require("./models");
 let _openaiClient = null;
@@ -7,14 +8,20 @@ let _anthropicClient = null;
 let _tracker = null;
 function setTokenTracker(tracker) { _tracker = tracker; }
+function getTrackerSnapshot() {
+  if (!_tracker) return { tokens: 0, cost: 0, calls: 0 };
+  return { tokens: _tracker._totalTokens || 0, cost: _tracker._totalCostUsd || 0, calls: _tracker._totalCalls || 0 };
+}
 function _extractTokens(usage) {
-  if (!usage) return { input: 0, output: 0 };
+  if (!usage) return { input: 0, output: 0, cacheCreation: 0, cacheRead: 0 };
   return {
     input: usage.prompt_tokens || usage.input_tokens || 0,
     output: usage.completion_tokens || usage.output_tokens || 0,
-    cacheCreation: usage.cache_creation_input_tokens || 0,
-    cacheRead: usage.cache_read_input_tokens || 0,
+    // Anthropic cache fields
+    cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
+    // OpenAI uses cache_read_tokens, Anthropic uses cache_read_input_tokens
+    cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens || 0,
   };
 }
@@ -121,9 +128,41 @@ function tokenParam(model, limit) {
   // Anthropic uses max_tokens directly (handled in _anthropicCall)
   if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
   if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
-  const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
-  if (usesNewParam) return { max_completion_tokens: effectiveLimit };
-  return { max_tokens: effectiveLimit };
+  // All modern OpenAI models use max_completion_tokens (max_tokens is deprecated)
+  return { max_completion_tokens: effectiveLimit };
+}
+/**
+ * Build OpenAI-specific params for reasoning models (o-series).
+ * - reasoning_effort: controls compute allocation (low/medium/high)
+ * - No temperature/top_p (forbidden on o-series)
+ */
+function _reasoningParams(model) {
+  if (!isReasoningModel(model)) return {};
+  // Default to medium effort — balances cost vs quality
+  // High effort for complex multi-file debugging, low for classification
+  return { reasoning_effort: process.env.WOLVERINE_REASONING_EFFORT || "medium" };
+}
+/**
+ * Retry with exponential backoff + jitter for rate limits.
+ */
+async function _withRetry(fn, maxRetries = 3) {
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (err) {
+      const isRateLimit = err.status === 429 || err.code === "rate_limit_exceeded";
+      const isServerError = err.status >= 500;
+      if ((isRateLimit || isServerError) && attempt < maxRetries) {
+        const delay = Math.min(1000 * Math.pow(2, attempt) + Math.random() * 1000, 30000);
+        console.log(chalk.yellow(`  ⏱️ API ${isRateLimit ? "rate limited" : "error"} — retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${maxRetries})`));
+        await new Promise(r => setTimeout(r, delay));
+        continue;
+      }
+      throw err;
+    }
+  }
 }
 // ── Unified AI Call ──
@@ -206,7 +245,7 @@ async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tool
     else if (toolChoice && toolChoice !== "auto") params.tool_choice = { type: "auto" };
   }
-  const response = await client.messages.create(params);
+  const response = await _withRetry(() => client.messages.create(params));
   return _normalizeAnthropicResponse(response);
 }
@@ -292,7 +331,7 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
     params.tools = tools.map(_toAnthropicTool).filter(Boolean);
   }
-  const response = await client.messages.create(params);
+  const response = await _withRetry(() => client.messages.create(params));
   // Return in chat-compatible format
   const normalized = _normalizeAnthropicResponse(response);
@@ -377,7 +416,7 @@ async function _responsesCall(openai, { model, systemPrompt, userPrompt, maxToke
     });
   }
-  const response = await openai.responses.create(params);
+  const response = await _withRetry(() => openai.responses.create(params));
   let content = "";
   let toolCalls = null;
@@ -403,13 +442,31 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
   if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
   messages.push({ role: "user", content: userPrompt });
+  // No temperature for o-series and gpt-5+ (forbidden, causes error)
   const noTemp = /^(o[1-9]|gpt-5)/.test(model);
-  const params = { model, messages, ...(!noTemp ? { temperature: 0 } : {}), ...tokenParam(model, maxTokens) };
-  if (tools && tools.length > 0) { params.tools = tools; params.tool_choice = toolChoice || "auto"; }
+  const params = {
+    model, messages,
+    ...(!noTemp ? { temperature: 0 } : {}),
+    ...tokenParam(model, maxTokens),
+    ..._reasoningParams(model),
+  };
+  if (tools && tools.length > 0) {
+    params.tools = tools;
+    params.tool_choice = toolChoice || "auto";
+    // Disable parallel calls for reliability — sequential is more predictable for healing
+    params.parallel_tool_calls = false;
+  }
-  const response = await openai.chat.completions.create(params);
+  const response = await _withRetry(() => openai.chat.completions.create(params));
   const choice = response.choices[0];
-  return { content: (choice.message.content || "").trim(), toolCalls: choice.message.tool_calls || null, usage: response.usage || {}, _raw: response, _message: choice.message };
+  return {
+    content: (choice.message.content || "").trim(),
+    toolCalls: choice.message.tool_calls || null,
+    usage: response.usage || {},
+    _raw: response,
+    _message: choice.message,
+  };
 }
 // ── OpenAI: Multi-turn (Responses + Chat) ──
@@ -435,7 +492,7 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
     });
   }
-  const response = await openai.responses.create(params);
+  const response = await _withRetry(() => openai.responses.create(params));
   let content = "";
   let toolCalls = null;
@@ -454,9 +511,18 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
 async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
   const noTemp = /^(o[1-9]|gpt-5)/.test(model);
-  const params = { model, messages, ...(!noTemp ? { temperature: 0 } : {}), ...tokenParam(model, maxTokens) };
-  if (tools && tools.length > 0) { params.tools = tools; params.tool_choice = "auto"; }
-  return openai.chat.completions.create(params);
+  const params = {
+    model, messages,
+    ...(!noTemp ? { temperature: 0 } : {}),
+    ...tokenParam(model, maxTokens),
+    ..._reasoningParams(model),
+  };
+  if (tools && tools.length > 0) {
+    params.tools = tools;
+    params.tool_choice = "auto";
+    params.parallel_tool_calls = false;
+  }
+  return _withRetry(() => openai.chat.completions.create(params));
 }
 // ── Fast Path Repair ──
@@ -518,4 +584,4 @@ Include both if needed, or just one.`;
   }
 }
-module.exports = { requestRepair, getClient, tokenParam, aiCall, aiCallWithHistory, isResponsesModel, isAnthropicModel, setTokenTracker, detectProvider };
+module.exports = { requestRepair, getClient, tokenParam, aiCall, aiCallWithHistory, isResponsesModel, isAnthropicModel, setTokenTracker, getTrackerSnapshot, detectProvider };

package/src/core/wolverine.js CHANGED Viewed

@@ -43,6 +43,9 @@ async function heal(opts) {
 async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupManager, logger, brain, mcp, skills, repairHistory, routeContext }) {
   const healStartTime = Date.now();
+  // Snapshot token tracker at heal start — diff at end = FULL pipeline cost
+  const { getTrackerSnapshot } = require("./ai-client");
+  const _snapshot = getTrackerSnapshot();
   const { redact, hasSecrets } = require("../security/secret-redactor");
   // Guard: don't burn tokens on empty stderr (signal kills, clean shutdowns, etc.)
@@ -155,11 +158,15 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
   if (opsFix.fixed) {
     console.log(chalk.green(`  ⚡ Operational fix applied: ${opsFix.action}`));
     if (logger) logger.info(EVENT_TYPES.HEAL_SUCCESS, `Operational fix: ${opsFix.action}`, { action: opsFix.action });
+    // Record with FULL pipeline cost (includes injection scan, brain lookup, etc.)
+    const _endSnap = getTrackerSnapshot();
+    const pipelineTokens = _endSnap.tokens - _snapshot.tokens;
+    const pipelineCost = _endSnap.cost - _snapshot.cost;
     if (repairHistory) {
       repairHistory.record({
         error: parsed.errorMessage, file: parsed.filePath, line: parsed.line,
         resolution: opsFix.action, success: true, mode: "operational",
-        model: "none", tokens: 0, cost: 0, iteration: 0,
+        model: getModel("audit"), tokens: pipelineTokens, cost: pipelineCost, iteration: 0,
         duration: Date.now() - healStartTime, filesModified: [],
       });
     }
@@ -396,13 +403,13 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
   backupManager.prune();
-  // Record to repair history
+  // Record to repair history — use FULL pipeline cost (injection scan + brain + fix)
   if (repairHistory) {
     const duration = Date.now() - healStartTime;
-    const tokenUsage = goalResult.agentStats?.totalTokens || 0;
-    const { calculateCost } = require("../logger/pricing");
+    const _endSnap = getTrackerSnapshot();
+    const pipelineTokens = _endSnap.tokens - _snapshot.tokens;
+    const pipelineCost = _endSnap.cost - _snapshot.cost;
     const model = goalResult.mode === "fast" ? getModel("coding") : getModel("reasoning");
-    const cost = calculateCost(model, tokenUsage * 0.7, tokenUsage * 0.3); // estimate in/out split
     repairHistory.record({
       error: parsed.errorMessage,
@@ -412,8 +419,8 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
       success: goalResult.success,
       mode: goalResult.mode || "unknown",
       model,
-      tokens: tokenUsage,
-      cost: cost.total,
+      tokens: pipelineTokens,
+      cost: pipelineCost,
       iteration: goalResult.iteration,
       duration,
       filesModified: goalResult.agentStats?.filesModified || [],