npm - wolverine-ai - Versions diffs - 3.1.1 → 3.2.0 - Mend

wolverine-ai 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/src/core/ai-client.js +2 -2
package/src/logger/pricing.js +70 -62
package/src/logger/token-tracker.js +14 -5
package/src/platform/telemetry.js +11 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "3.1.1",
+  "version": "3.2.0",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/core/ai-client.js CHANGED Viewed

@@ -20,8 +20,8 @@ function _extractTokens(usage) {
 function _track(model, category, usage, tool, latencyMs, success) {
   if (!_tracker) return;
-  const { input, output } = _extractTokens(usage);
-  _tracker.record(model, category, input, output, tool, latencyMs, success);
+  const { input, output, cacheCreation, cacheRead } = _extractTokens(usage);
+  _tracker.record(model, category, input, output, tool, latencyMs, success, cacheCreation, cacheRead);
 }
 // ── Client Management ──

package/src/logger/pricing.js CHANGED Viewed

@@ -1,105 +1,113 @@
 /**
- * Model Pricing — maps model names to per-million-token costs.
+ * Model Pricing — accurate per-million-token costs for all supported models.
  *
- * Users can override in .wolverine/pricing.json. Defaults based on
- * OpenAI published pricing as of April 2026.
+ * Includes: input, output, cache_write (1.25x input), cache_read (0.1x input)
+ * for Anthropic models that support prompt caching.
  *
- * All values are USD per 1 million tokens.
+ * Users can override in .wolverine/pricing.json.
  */
 const fs = require("fs");
 const path = require("path");
 const DEFAULT_PRICING = {
-  // GPT-5.4 family
-  "gpt-5.4":        { input: 2.50,  output: 15.00 },
-  "gpt-5.4-mini":   { input: 0.75,  output: 4.50 },
-  "gpt-5.4-nano":   { input: 0.20,  output: 1.25 },
-  // GPT-5 family (estimated from 5.4 pricing)
-  "gpt-5-nano":     { input: 0.15,  output: 1.00 },
+  // ── OpenAI GPT-5.x Family ──
+  "gpt-5.4":              { input: 2.50,  output: 15.00 },
+  "gpt-5.4-mini":         { input: 0.75,  output: 4.50 },
+  "gpt-5.4-nano":         { input: 0.20,  output: 1.25 },
+  "gpt-5-nano":           { input: 0.15,  output: 1.00 },
+  // ── OpenAI GPT-4o Family ──
+  "gpt-4o":               { input: 2.50,  output: 10.00 },
+  "gpt-4o-mini":          { input: 0.15,  output: 0.60 },
+  // ── OpenAI O-series Reasoning ──
+  "o1":                   { input: 15.00, output: 60.00 },
+  "o1-mini":              { input: 3.00,  output: 12.00 },
+  "o3":                   { input: 20.00, output: 80.00 },
+  "o3-mini":              { input: 4.00,  output: 16.00 },
+  "o4-mini":              { input: 1.10,  output: 4.40 },
+  "o4-mini-deep-research": { input: 2.00, output: 8.00 },
+  // ── OpenAI Codex ──
+  "gpt-5.3-codex":        { input: 2.50,  output: 10.00 },
+  "gpt-5.1-codex-mini":   { input: 1.50,  output: 6.00 },
+  "codex-mini-latest":    { input: 1.50,  output: 6.00 },
+  // ── OpenAI Embeddings ──
+  "text-embedding-3-small": { input: 0.02,  output: 0.00 },
+  "text-embedding-3-large": { input: 0.13,  output: 0.00 },
-  // GPT-4o family
-  "gpt-4o":         { input: 2.50,  output: 10.00 },
-  "gpt-4o-mini":    { input: 0.15,  output: 0.60 },
+  // ── Anthropic Claude 4 Family (with cache pricing) ──
+  // cache_write = 1.25x input, cache_read = 0.1x input
+  "claude-opus-4":        { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
+  "claude-sonnet-4":      { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
+  "claude-haiku-4":       { input: 0.80,  output: 4.00,  cache_write: 1.00,  cache_read: 0.08 },
-  // O-series reasoning
-  "o4-mini":                { input: 1.10,  output: 4.40 },
-  "o4-mini-deep-research":  { input: 2.00,  output: 8.00 },
+  // ── Anthropic Claude 3.5 Family ──
+  "claude-3-5-sonnet":    { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
+  "claude-3-5-haiku":     { input: 0.80,  output: 4.00,  cache_write: 1.00,  cache_read: 0.08 },
-  // Codex
-  "gpt-5.1-codex-mini":    { input: 1.50,  output: 6.00 },
-  "codex-mini-latest":      { input: 1.50,  output: 6.00 },
-  "gpt-5.3-codex":         { input: 2.50,  output: 10.00 },
+  // ── Anthropic Claude 3 Family ──
+  "claude-3-opus":        { input: 15.00, output: 75.00, cache_write: 18.75, cache_read: 1.50 },
+  "claude-3-sonnet":      { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
+  "claude-3-haiku":       { input: 0.25,  output: 1.25,  cache_write: 0.3125, cache_read: 0.025 },
-  // Embeddings
-  "text-embedding-3-small": { input: 0.02,  output: 0.00 },
-  "text-embedding-3-large": { input: 0.13,  output: 0.00 },
-  // Anthropic Claude family
-  "claude-opus-4":          { input: 15.00, output: 75.00 },
-  "claude-sonnet-4":        { input: 3.00,  output: 15.00 },
-  "claude-haiku-4":         { input: 0.80,  output: 4.00 },
-  "claude-3-5-sonnet":      { input: 3.00,  output: 15.00 },
-  "claude-3-5-haiku":       { input: 0.80,  output: 4.00 },
-  "claude-3-opus":          { input: 15.00, output: 75.00 },
-  "claude-3-sonnet":        { input: 3.00,  output: 15.00 },
-  "claude-3-haiku":         { input: 0.25,  output: 1.25 },
-  // Fallback for unknown models
-  "_default":               { input: 1.00,  output: 4.00 },
+  // ── Fallback ──
+  "_default":             { input: 1.00,  output: 4.00 },
 };
 let _customPricing = null;
 /**
- * Get pricing for a model. Checks custom overrides first, then defaults.
- * Returns { input, output } in USD per million tokens.
+ * Get pricing for a model. Checks custom overrides, then exact match, then prefix match.
+ * Returns { input, output, cache_write?, cache_read? } in USD per million tokens.
  */
 function getModelPricing(modelName) {
-  // Check custom pricing
-  if (_customPricing && _customPricing[modelName]) {
-    return _customPricing[modelName];
-  }
+  if (_customPricing && _customPricing[modelName]) return _customPricing[modelName];
+  if (DEFAULT_PRICING[modelName]) return DEFAULT_PRICING[modelName];
-  // Check defaults — try exact match, then prefix match
-  if (DEFAULT_PRICING[modelName]) {
-    return DEFAULT_PRICING[modelName];
-  }
-  // Prefix matching: "gpt-5.4-mini-2026-03" → "gpt-5.4-mini"
+  // Prefix matching: "claude-sonnet-4-6" → "claude-sonnet-4"
   for (const [key, val] of Object.entries(DEFAULT_PRICING)) {
-    if (key !== "_default" && modelName.startsWith(key)) {
-      return val;
-    }
+    if (key !== "_default" && modelName.startsWith(key)) return val;
   }
   return DEFAULT_PRICING._default;
 }
 /**
- * Calculate cost in USD for a given model and token counts.
+ * Calculate cost in USD including cache tokens.
+ *
+ * @param {string} modelName
+ * @param {number} inputTokens — regular input tokens
+ * @param {number} outputTokens — output tokens
+ * @param {number} cacheCreationTokens — tokens written to cache (1.25x input price)
+ * @param {number} cacheReadTokens — tokens read from cache (0.1x input price)
  */
-function calculateCost(modelName, inputTokens, outputTokens) {
+function calculateCost(modelName, inputTokens, outputTokens, cacheCreationTokens = 0, cacheReadTokens = 0) {
   const pricing = getModelPricing(modelName);
   const inputCost = (inputTokens / 1_000_000) * pricing.input;
   const outputCost = (outputTokens / 1_000_000) * pricing.output;
+  const cacheWriteCost = pricing.cache_write
+    ? (cacheCreationTokens / 1_000_000) * pricing.cache_write
+    : (cacheCreationTokens / 1_000_000) * pricing.input * 1.25;
+  const cacheReadCost = pricing.cache_read
+    ? (cacheReadTokens / 1_000_000) * pricing.cache_read
+    : (cacheReadTokens / 1_000_000) * pricing.input * 0.1;
   return {
     input: inputCost,
     output: outputCost,
-    total: inputCost + outputCost,
+    cacheWrite: cacheWriteCost,
+    cacheRead: cacheReadCost,
+    total: inputCost + outputCost + cacheWriteCost + cacheReadCost,
+    cacheSavings: cacheReadTokens > 0 ? ((cacheReadTokens / 1_000_000) * (pricing.input - (pricing.cache_read || pricing.input * 0.1))) : 0,
   };
 }
-/**
- * Load custom pricing overrides from .wolverine/pricing.json.
- */
 function loadCustomPricing(projectRoot) {
   const pricingPath = path.join(projectRoot, ".wolverine", "pricing.json");
   if (fs.existsSync(pricingPath)) {
-    try {
-      _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8"));
-    } catch {}
+    try { _customPricing = JSON.parse(fs.readFileSync(pricingPath, "utf-8")); } catch {}
   }
 }

package/src/logger/token-tracker.js CHANGED Viewed

@@ -64,11 +64,11 @@ class TokenTracker {
    * @param {number} outputTokens - Completion/output tokens
    * @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
    */
-  record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
+  record(model, category, inputTokens, outputTokens, tool, latencyMs, success, cacheCreation, cacheRead) {
     const total = (inputTokens || 0) + (outputTokens || 0);
-    // Calculate USD cost
-    const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
+    // Calculate USD cost including cache tokens
+    const cost = calculateCost(model, inputTokens || 0, outputTokens || 0, cacheCreation || 0, cacheRead || 0);
     const entry = {
       timestamp: Date.now(),
@@ -76,21 +76,27 @@ class TokenTracker {
       category,
       input: inputTokens || 0,
       output: outputTokens || 0,
+      cacheCreation: cacheCreation || 0,
+      cacheRead: cacheRead || 0,
       total,
       cost: Math.round(cost.total * 1000000) / 1000000,
+      cacheSavings: Math.round((cost.cacheSavings || 0) * 1000000) / 1000000,
       tool: tool || null,
       latencyMs: latencyMs || 0,
       success: success !== false,
     };
     // Accumulate by model
-    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
+    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
     const m = this._byModel[model];
     m.input += entry.input;
     m.output += entry.output;
     m.total += total;
     m.calls++;
     m.cost += cost.total;
+    m.cacheCreation += entry.cacheCreation;
+    m.cacheRead += entry.cacheRead;
+    m.cacheSavings += entry.cacheSavings;
     if (entry.success) m.successes++; else m.failures++;
     if (latencyMs > 0) {
       m.totalLatencyMs += latencyMs;
@@ -179,7 +185,10 @@ class TokenTracker {
         total: m.total,
         calls: m.calls,
         cost: m.cost,
-        successes: m.successes || m.calls, // backwards compat
+        cacheCreation: m.cacheCreation || 0,
+        cacheRead: m.cacheRead || 0,
+        cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
+        successes: m.successes || m.calls,
         failures: m.failures || 0,
         successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
         avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,

package/src/platform/telemetry.js CHANGED Viewed

@@ -64,8 +64,9 @@ function collectHeartbeat(subsystems) {
       totalTokens: tokenTracker?._totalTokens || usage?.session?.totalTokens || 0,
       totalCost: tokenTracker?._totalCostUsd || usage?.session?.totalCostUsd || 0,
       totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
+      totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
       byCategory: usage?.byCategory || {},
-      byModel: usage?.byModel || {},
+      byModel: usage?.byModel || {},  // includes: latency, successRate, tokensPerSec, cacheSavings per model
       byTool: usage?.byTool || {},
       byProvider: _aggregateByProvider(usage?.byModel || {}),
     },
@@ -91,6 +92,15 @@ function collectHeartbeat(subsystems) {
   return redactObj(payload);
 }
+/** Sum cache savings across all models. */
+function _sumCacheSavings(byModel) {
+  let total = 0;
+  for (const stats of Object.values(byModel || {})) {
+    total += stats.cacheSavings || 0;
+  }
+  return Math.round(total * 1000000) / 1000000;
+}
 /**
  * Aggregate usage by provider (openai vs anthropic) from byModel data.
  * Any new model/provider automatically flows through — no code changes needed.