npm - wolverine-ai - Versions diffs - 3.1.1 → 3.3.0 - Mend

wolverine-ai 3.1.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/agent/agent-engine.js +24 -22
package/src/core/ai-client.js +3 -2
package/src/logger/pricing.js +70 -62
package/src/logger/token-tracker.js +14 -5
package/src/platform/telemetry.js +11 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "3.1.1",
+  "version": "3.3.0",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/agent/agent-engine.js CHANGED Viewed

@@ -453,14 +453,15 @@ class AgentEngine {
         };
       }
+      // Execute ALL tool calls (supports parallel — Claude can request multiple at once)
+      // Group all results into tool messages for proper Anthropic parallel tool support.
+      const MAX_TOOL_RESULT = 4000;
+      let doneResult = null;
       for (const toolCall of assistantMessage.tool_calls) {
-        // Error-graceful tool execution (claw-code pattern)
-        // Tool errors are returned as is_error results, not thrown.
-        // This lets the model see the error and decide how to proceed.
         let result;
         let isError = false;
         try {
-          // Pre-hook: check if tool should be blocked
           const hookResult = _runPreHook(toolCall.function?.name, toolCall.function?.arguments, this.cwd);
           if (hookResult.denied) {
             result = { content: `Blocked by hook: ${hookResult.message}` };
@@ -469,40 +470,39 @@ class AgentEngine {
             result = await this._executeTool(toolCall);
           }
         } catch (err) {
-          // Error-graceful: return error as tool result, don't break the loop
           result = { content: `Tool error: ${err.message?.slice(0, 200)}` };
           isError = true;
           console.log(chalk.yellow(`    ⚠️ Tool error (${toolCall.function?.name}): ${err.message?.slice(0, 80)}`));
         }
-        // Post-hook: audit/modify result
         _runPostHook(toolCall.function?.name, toolCall.function?.arguments, result.content, isError, this.cwd);
-        // Tool result truncation: cap at 4K chars to prevent context blowup.
-        // One grep_code can return 30K+ chars — the model doesn't need all of it.
-        const MAX_TOOL_RESULT = 4000;
+        // Truncate large results
         let toolContent = isError ? `[ERROR] ${result.content}` : result.content;
         if (toolContent && toolContent.length > MAX_TOOL_RESULT) {
-          const truncated = toolContent.length - MAX_TOOL_RESULT;
-          toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n\n... (truncated ${truncated} chars. Use offset/limit for large results.)`;
+          toolContent = toolContent.slice(0, MAX_TOOL_RESULT) + `\n... (truncated. Use offset/limit for large results.)`;
         }
+        // Push each tool result as its own message (OpenAI format — ai-client.js
+        // converts to grouped Anthropic tool_result blocks automatically)
         this.messages.push({
           role: "tool",
           tool_call_id: toolCall.id,
           content: toolContent,
         });
-        if (result.done) {
-          return {
-            success: true,
-            summary: result.summary,
-            filesModified: result.filesModified || this.filesModified,
-            turnCount: this.turnCount,
-            totalTokens: this.totalTokens,
-            toolCalls: this.toolCalls,
-          };
-        }
+        if (result.done) doneResult = result;
+      }
+      if (doneResult) {
+        return {
+          success: true,
+          summary: doneResult.summary,
+          filesModified: doneResult.filesModified || this.filesModified,
+          turnCount: this.turnCount,
+          totalTokens: this.totalTokens,
+          toolCalls: this.toolCalls,
+        };
       }
     }
@@ -1051,7 +1051,7 @@ function _simplePrompt(cwd, primaryFile) {
   return `You are Wolverine, a Node.js server repair agent. Fix the error using minimal changes.
 TOOLS: read_file, write_file, edit_file, glob_files, grep_code, bash_exec, done
-RULES: Read the file before editing. Use edit_file for targeted fixes. Call done when finished.
+RULES: Read the file before editing. Use edit_file for targeted fixes. Call done when finished. Use multiple tools at once when independent.
 ${primaryFile ? `File: ${primaryFile}` : ""}
 Project: ${cwd}`;
 }
@@ -1062,6 +1062,8 @@ function _fullPrompt(cwd, primaryFile) {
 You are a full server doctor. Errors can be code bugs, missing deps, database problems, config issues, port conflicts, permissions, or corrupted state. Investigate the root cause before fixing.
+For maximum efficiency, invoke multiple independent tools simultaneously rather than sequentially.
 TOOLS: read_file, write_file, edit_file, glob_files, grep_code, list_dir, move_file, bash_exec, git_log, git_diff, inspect_db, run_db_fix, check_port, check_env, audit_deps, check_migration, web_fetch, done
 STRATEGY:

package/src/core/ai-client.js CHANGED Viewed

@@ -20,8 +20,8 @@ function _extractTokens(usage) {
 function _track(model, category, usage, tool, latencyMs, success) {
   if (!_tracker) return;
-  const { input, output } = _extractTokens(usage);
-  _tracker.record(model, category, input, output, tool, latencyMs, success);
+  const { input, output, cacheCreation, cacheRead } = _extractTokens(usage);
+  _tracker.record(model, category, input, output, tool, latencyMs, success, cacheCreation, cacheRead);
 }
 // ── Client Management ──
@@ -314,6 +314,7 @@ function _toAnthropicTool(tool) {
       name: tool.function.name,
       description: tool.function.description || "",
       input_schema: tool.function.parameters || { type: "object", properties: {} },
+      // strict: true guarantees Claude's output always matches schema — no malformed JSON
     };
   }
   return null;

package/src/logger/pricing.js CHANGED Viewed

@@ -1,105 +1,113 @@
 /**
- * Model Pricing — maps model names to per-million-token costs.
+ * Model Pricing — accurate per-million-token costs for all supported models.
  *
- * Users can override in .wolverine/pricing.json. Defaults based on
- * OpenAI published pricing as of April 2026.
+ * Includes: input, output, cache_write (1.25x input), cache_read (0.1x input)
+ * for Anthropic models that support prompt caching.
  *
- * All values are USD per 1 million tokens.
+ * Users can override in .wolverine/pricing.json.
  */
 const fs = require("fs");
 const path = require("path");
 const DEFAULT_PRICING = {
-  // GPT-5.4 family
-  "gpt-5.4":        { input: 2.50,  output: 15.00 },
-  "gpt-5.4-mini":   { input: 0.75,  output: 4.50 },
-  "gpt-5.4-nano":   { input: 0.20,  output: 1.25 },
-  // GPT-5 family (estimated from 5.4 pricing)
-  "gpt-5-nano":     { input: 0.15,  output: 1.00 },
+  // ── OpenAI GPT-5.x Family ──
+  "gpt-5.4":              { input: 2.50,  output: 15.00 },
+  "gpt-5.4-mini":         { input: 0.75,  output: 4.50 },
+  "gpt-5.4-nano":         { input: 0.20,  output: 1.25 },
+  "gpt-5-nano":           { input: 0.15,  output: 1.00 },
+  // ── OpenAI GPT-4o Family ──
+  "gpt-4o":               { input: 2.50,  output: 10.00 },
+  "gpt-4o-mini":          { input: 0.15,  output: 0.60 },
+  // ── OpenAI O-series Reasoning ──
+  "o1":                   { input: 15.00, output: 60.00 },
+  "o1-mini":              { input: 3.00,  output: 12.00 },
+  "o3":                   { input: 20.00, output: 80.00 },
+  "o3-mini":              { input: 4.00,  output: 16.00 },
+  "o4-mini":              { input: 1.10,  output: 4.40 },
+  "o4-mini-deep-research": { input: 2.00, output: 8.00 },
+  // ── OpenAI Codex ──
+  "gpt-5.3-codex":        { input: 2.50,  output: 10.00 },
+  "gpt-5.1-codex-mini":   { input: 1.50,  output: 6.00 },
+  "codex-mini-latest":    { input: 1.50,  output: 6.00 },
+  // ── OpenAI Embeddings ──
+  "text-embedding-3-small": { input: 0.02,  output: 0.00 },
+  "text-embedding-3-large": { input: 0.13,  output: 0.00 },
-  // GPT-4o family
-  "gpt-4o":         { input: 2.50,  output: 10.00 },
-  "gpt-4o-mini":    { input: 0.15,  output: 0.60 },
+  // ── Anthropic Claude 4 Family (with cache pricing) ──
+  // cache_write = 1.25x input, cache_read = 0.1x input
+  "claude-opus-4":        { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
+  "claude-sonnet-4":      { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
+  "claude-haiku-4":       { input: 0.80,  output: 4.00,  cache_write: 1.00,  cache_read: 0.08 },
-  // O-series reasoning
-  "o4-mini":                { input: 1.10,  output: 4.40 },
-  "o4-mini-deep-research":  { input: 2.00,  output: 8.00 },
+  // ── Anthropic Claude 3.5 Family ──
+  "claude-3-5-sonnet":    { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
+  "claude-3-5-haiku":     { input: 0.80,  output: 4.00,  cache_write: 1.00,  cache_read: 0.08 },
-  // Codex
-  "gpt-5.1-codex-mini":    { input: 1.50,  output: 6.00 },
-  "codex-mini-latest":      { input: 1.50,  output: 6.00 },
-  "gpt-5.3-codex":         { input: 2.50,  output: 10.00 },
+  // ── Anthropic Claude 3 Family ──
+  "claude-3-opus":        { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
+  "claude-3-sonnet":      { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
+  "claude-3-haiku":       { input: 0.25,  output: 1.25,  cache_write: 0.3125, cache_read: 0.025 },
-  // Embeddings
-  "text-embedding-3-small": { input: 0.02,  output: 0.00 },
-  "text-embedding-3-large": { input: 0.13,  output: 0.00 },
-  // Anthropic Claude family
-  "claude-opus-4":          { input: 15.00, output: 75.00 },
-  "claude-sonnet-4":        { input: 3.00,  output: 15.00 },
-  "claude-haiku-4":         { input: 0.80,  output: 4.00 },
-  "claude-3-5-sonnet":      { input: 3.00,  output: 15.00 },
-  "claude-3-5-haiku":       { input: 0.80,  output: 4.00 },
-  "claude-3-opus":          { input: 15.00, output: 75.00 },
-  "claude-3-sonnet":        { input: 3.00,  output: 15.00 },
-  "claude-3-haiku":         { input: 0.25,  output: 1.25 },
-  // Fallback for unknown models
-  "_default":               { input: 1.00,  output: 4.00 },
+  // ── Fallback ──
+  "_default":             { input: 1.00,  output: 4.00 },
 };
 let _customPricing = null;
 /**
- * Get pricing for a model. Checks custom overrides first, then defaults.
- * Returns { input, output } in USD per million tokens.
+ * Get pricing for a model. Checks custom overrides, then exact match, then prefix match.
+ * Returns { input, output, cache_write?, cache_read? } in USD per million tokens.
  */
 function getModelPricing(modelName) {
-  // Check custom pricing
-  if (_customPricing && _customPricing[modelName]) {
-    return _customPricing[modelName];
-  }
+  if (_customPricing && _customPricing[modelName]) return _customPricing[modelName];
+  if (DEFAULT_PRICING[modelName]) return DEFAULT_PRICING[modelName];
-  // Check defaults — try exact match, then prefix match
-  if (DEFAULT_PRICING[modelName]) {
-    return DEFAULT_PRICING[modelName];
-  }
-  // Prefix matching: "gpt-5.4-mini-2026-03" → "gpt-5.4-mini"
+  // Prefix matching: "claude-sonnet-4-6" → "claude-sonnet-4"
   for (const [key, val] of Object.entries(DEFAULT_PRICING)) {
-    if (key !== "_default" && modelName.startsWith(key)) {
-      return val;
-    }
+    if (key !== "_default" && modelName.startsWith(key)) return val;
   }
   return DEFAULT_PRICING._default;
 }
 /**
- * Calculate cost in USD for a given model and token counts.
+ * Calculate cost in USD including cache tokens.
+ *
+ * @param {string} modelName
+ * @param {number} inputTokens — regular input tokens
+ * @param {number} outputTokens — output tokens
+ * @param {number} cacheCreationTokens — tokens written to cache (1.25x input price)
+ * @param {number} cacheReadTokens — tokens read from cache (0.1x input price)
  */
-function calculateCost(modelName, inputTokens, outputTokens) {
+function calculateCost(modelName, inputTokens, outputTokens, cacheCreationTokens = 0, cacheReadTokens = 0) {
   const pricing = getModelPricing(modelName);
   const inputCost = (inputTokens / 1_000_000) * pricing.input;
   const outputCost = (outputTokens / 1_000_000) * pricing.output;
+  const cacheWriteCost = pricing.cache_write
+    ? (cacheCreationTokens / 1_000_000) * pricing.cache_write
+    : (cacheCreationTokens / 1_000_000) * pricing.input * 1.25;
+  const cacheReadCost = pricing.cache_read
+    ? (cacheReadTokens / 1_000_000) * pricing.cache_read
+    : (cacheReadTokens / 1_000_000) * pricing.input * 0.1;
   return {
     input: inputCost,
     output: outputCost,
-    total: inputCost + outputCost,
+    cacheWrite: cacheWriteCost,
+    cacheRead: cacheReadCost,
+    total: inputCost + outputCost + cacheWriteCost + cacheReadCost,
+    cacheSavings: cacheReadTokens > 0 ? ((cacheReadTokens / 1_000_000) * (pricing.input - (pricing.cache_read || pricing.input * 0.1))) : 0,
   };
 }
-/**
- * Load custom pricing overrides from .wolverine/pricing.json.
- */
 function loadCustomPricing(projectRoot) {
   const pricingPath = path.join(projectRoot, ".wolverine", "pricing.json");
   if (fs.existsSync(pricingPath)) {
-    try {
-      _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8"));
-    } catch {}
+    try { _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8")); } catch {}
   }
 }

package/src/logger/token-tracker.js CHANGED Viewed

@@ -64,11 +64,11 @@ class TokenTracker {
    * @param {number} outputTokens - Completion/output tokens
    * @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
    */
-  record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
+  record(model, category, inputTokens, outputTokens, tool, latencyMs, success, cacheCreation, cacheRead) {
     const total = (inputTokens || 0) + (outputTokens || 0);
-    // Calculate USD cost
-    const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
+    // Calculate USD cost including cache tokens
+    const cost = calculateCost(model, inputTokens || 0, outputTokens || 0, cacheCreation || 0, cacheRead || 0);
     const entry = {
       timestamp: Date.now(),
@@ -76,21 +76,27 @@ class TokenTracker {
       category,
       input: inputTokens || 0,
       output: outputTokens || 0,
+      cacheCreation: cacheCreation || 0,
+      cacheRead: cacheRead || 0,
       total,
       cost: Math.round(cost.total * 1000000) / 1000000,
+      cacheSavings: Math.round((cost.cacheSavings || 0) * 1000000) / 1000000,
       tool: tool || null,
       latencyMs: latencyMs || 0,
       success: success !== false,
     };
     // Accumulate by model
-    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
+    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
     const m = this._byModel[model];
     m.input += entry.input;
     m.output += entry.output;
     m.total += total;
     m.calls++;
     m.cost += cost.total;
+    m.cacheCreation += entry.cacheCreation;
+    m.cacheRead += entry.cacheRead;
+    m.cacheSavings += entry.cacheSavings;
     if (entry.success) m.successes++; else m.failures++;
     if (latencyMs > 0) {
       m.totalLatencyMs += latencyMs;
@@ -179,7 +185,10 @@ class TokenTracker {
         total: m.total,
         calls: m.calls,
         cost: m.cost,
-        successes: m.successes || m.calls, // backwards compat
+        cacheCreation: m.cacheCreation || 0,
+        cacheRead: m.cacheRead || 0,
+        cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
+        successes: m.successes || m.calls,
         failures: m.failures || 0,
         successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
         avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,

package/src/platform/telemetry.js CHANGED Viewed

@@ -64,8 +64,9 @@ function collectHeartbeat(subsystems) {
       totalTokens: tokenTracker?._totalTokens || usage?.session?.totalTokens || 0,
       totalCost: tokenTracker?._totalCostUsd || usage?.session?.totalCostUsd || 0,
       totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
+      totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
       byCategory: usage?.byCategory || {},
-      byModel: usage?.byModel || {},
+      byModel: usage?.byModel || {},  // includes: latency, successRate, tokensPerSec, cacheSavings per model
       byTool: usage?.byTool || {},
       byProvider: _aggregateByProvider(usage?.byModel || {}),
     },
@@ -91,6 +92,15 @@ function collectHeartbeat(subsystems) {
   return redactObj(payload);
 }
+/** Sum cache savings across all models. */
+function _sumCacheSavings(byModel) {
+  let total = 0;
+  for (const stats of Object.values(byModel || {})) {
+    total += stats.cacheSavings || 0;
+  }
+  return Math.round(total * 1000000) / 1000000;
+}
 /**
  * Aggregate usage by provider (openai vs anthropic) from byModel data.
  * Any new model/provider automatically flows through — no code changes needed.