npm - wolverine-ai - Versions diffs - 3.5.0 → 3.6.0 - Mend

wolverine-ai 3.5.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/.env.example +5 -0
package/package.json +1 -1
package/server/config/settings.json +18 -1
package/server/routes/inference.js +324 -0
package/src/agent/agent-engine.js +55 -0
package/src/brain/embedder.js +1 -1
package/src/brain/function-map.js +15 -1
package/src/core/ai-client.js +21 -1
package/src/core/models.js +8 -1
package/src/dashboard/server.js +2 -2
package/src/logger/pricing.js +8 -0
package/src/logger/token-tracker.js +47 -5
package/src/monitor/perf-monitor.js +1 -1
package/src/notifications/notifier.js +1 -1
package/src/platform/telemetry.js +2 -1
package/src/security/injection-detector.js +1 -1

package/.env.example CHANGED Viewed

@@ -6,6 +6,11 @@
 # Your OpenAI API key (required)
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
+# ── Wolverine Inference (self-hosted models) ─────────────────────
+# Get your API key at wolverinenode.xyz — $1 = 100 credits
+# Set provider to "wolverine" in server/config/settings.json
+WOLVERINE_API_KEY=
 # ── Dashboard Admin Key  (make your own) ──────────────────────────────────────────
 # Required for the agent command interface on the dashboard.
 # Generate: node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "3.5.0",
+  "version": "3.6.0",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/server/config/settings.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "env": "development"
   },
-  "provider": "hybrid",
+  "provider": "wolverine",
   "openai_settings": {
     "reasoning": "gpt-5.4-mini",
@@ -43,6 +43,18 @@
     "embedding": "text-embedding-3-small"
   },
+  "wolverine_settings": {
+    "reasoning": "wolverine-test-1",
+    "coding": "wolverine-test-1",
+    "chat": "wolverine-test-1",
+    "tool": "wolverine-test-1",
+    "classifier": "wolverine-test-1",
+    "audit": "wolverine-test-1",
+    "compacting": "wolverine-test-1",
+    "research": "wolverine-test-1",
+    "embedding": "text-embedding-3-small"
+  },
   "server": {
     "port": 3000,
     "maxRetries": 3,
@@ -84,6 +96,11 @@
     "intervalMs": 300000
   },
+  "platform": {
+    "apiKey": "",
+    "cors": ["http://localhost:3000"]
+  },
   "dashboard": {},
   "cors": {

package/server/routes/inference.js ADDED Viewed

@@ -0,0 +1,324 @@
+const https = require("https");
+const http = require("http");
+const crypto = require("crypto");
+/**
+ * Wolverine Inference API
+ *
+ * Credit system: $1 = 100 credits. 1 credit = $0.01 of compute.
+ * Token pricing (in credits per million tokens):
+ *   wolverine-test-1: 1 credit input / 4 credits output per 1M tokens
+ *   (= $0.01/$0.04 per 1M — 15x cheaper than gpt-4o-mini, 80x cheaper than haiku)
+ *
+ * Rate limiting: per API key, configurable per tier.
+ * Queue: when GPU is at capacity, requests queue with timeout.
+ */
+const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "https://clips-third-players-binding.trycloudflare.com";
+// Pricing in CREDITS per million tokens ($1 = 100 credits)
+const MODEL_PRICING = {
+  "wolverine-test-1":  { input: 1.0, output: 4.0 },   // $0.01/$0.04 per 1M
+  "wolverine-coding":  { input: 1.0, output: 4.0 },
+  "wolverine-reasoning": { input: 2.5, output: 10.0 }, // heavier model when available
+};
+const MODEL_MAP = {
+  "wolverine-test-1": "wolverine-test-1",
+  "wolverine-coding": "wolverine-test-1",
+  "wolverine-reasoning": "wolverine-test-1",
+};
+const TIER_LIMITS = {
+  free: { rpm: 10, maxTokens: 1024 },
+  starter: { rpm: 60, maxTokens: 4096 },
+  pro: { rpm: 300, maxTokens: 4096 },
+  admin: { rpm: 9999, maxTokens: 4096 },
+};
+function tokenCost(model, inputTokens, outputTokens) {
+  const p = MODEL_PRICING[model] || MODEL_PRICING["wolverine-test-1"];
+  return ((inputTokens / 1_000_000) * p.input) + ((outputTokens / 1_000_000) * p.output);
+}
+// ── Request Queue (handles GPU saturation) ──
+const queue = [];
+let activeRequests = 0;
+const MAX_CONCURRENT = 8; // vLLM max-num-seqs
+const QUEUE_TIMEOUT_MS = 30000;
+function enqueue() {
+  return new Promise((resolve, reject) => {
+    if (activeRequests < MAX_CONCURRENT) {
+      activeRequests++;
+      resolve();
+      return;
+    }
+    const timer = setTimeout(() => {
+      const idx = queue.indexOf(entry);
+      if (idx >= 0) queue.splice(idx, 1);
+      reject(new Error("Queue timeout — GPU at capacity. Try again in a few seconds."));
+    }, QUEUE_TIMEOUT_MS);
+    const entry = { resolve: () => { clearTimeout(timer); activeRequests++; resolve(); }, reject };
+    queue.push(entry);
+  });
+}
+function dequeue() {
+  activeRequests = Math.max(0, activeRequests - 1);
+  if (queue.length > 0) {
+    const next = queue.shift();
+    next.resolve();
+  }
+}
+async function routes(fastify) {
+  const { pool } = require("../lib/db");
+  // Rate limit state (in-memory)
+  const rateWindows = new Map();
+  async function authenticate(request, reply) {
+    const apiKey = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
+    if (!apiKey) return reply.code(401).send({ error: { message: "API key required. Pass via Authorization: Bearer <key>", type: "auth_error" } });
+    // Platform key bypass
+    let settings = {};
+    try { settings = require("../config/settings.json"); } catch {}
+    if (apiKey === settings.platform?.apiKey) {
+      request.account = { api_key: apiKey, owner: "platform", tier: "admin", credits_remaining: 999999, rate_limit_rpm: 9999 };
+      return;
+    }
+    const result = await pool.query("SELECT * FROM api_credits WHERE api_key = $1", [apiKey]);
+    if (result.rows.length === 0) return reply.code(401).send({ error: { message: "Invalid API key", type: "auth_error" } });
+    const account = result.rows[0];
+    // Credit check
+    if (parseFloat(account.credits_remaining) <= 0) {
+      return reply.code(402).send({ error: { message: "Insufficient credits. Add credits at wolverinenode.xyz", type: "billing_error", credits_remaining: 0 } });
+    }
+    // Rate limit
+    const now = Date.now();
+    const window = rateWindows.get(apiKey) || { count: 0, resetAt: now + 60000 };
+    if (now > window.resetAt) { window.count = 0; window.resetAt = now + 60000; }
+    const limit = account.rate_limit_rpm || TIER_LIMITS[account.tier]?.rpm || 10;
+    if (window.count >= limit) {
+      const retryAfter = Math.ceil((window.resetAt - now) / 1000);
+      return reply.code(429).send({ error: { message: `Rate limit: ${limit} requests/min. Retry in ${retryAfter}s`, type: "rate_limit", retry_after: retryAfter } });
+    }
+    window.count++;
+    rateWindows.set(apiKey, window);
+    request.account = account;
+  }
+  // ── POST /chat/completions ──
+  fastify.post("/chat/completions", { preHandler: authenticate }, async (request, reply) => {
+    const body = request.body || {};
+    const requestedModel = body.model || "wolverine-test-1";
+    const account = request.account;
+    const tier = TIER_LIMITS[account.tier] || TIER_LIMITS.free;
+    const startMs = Date.now();
+    // Enforce max tokens per tier
+    if (body.max_tokens && body.max_tokens > tier.maxTokens) {
+      body.max_tokens = tier.maxTokens;
+    }
+    // Map model name for backend
+    const backendBody = { ...body, model: MODEL_MAP[requestedModel] || requestedModel };
+    // Queue if GPU saturated
+    try {
+      await enqueue();
+    } catch (err) {
+      return reply.code(503).send({ error: { message: err.message, type: "capacity_error", queue_length: queue.length } });
+    }
+    try {
+      const result = await proxyToInference("/v1/chat/completions", backendBody);
+      const latencyMs = Date.now() - startMs;
+      const usage = result.usage || {};
+      const inputTokens = usage.prompt_tokens || 0;
+      const outputTokens = usage.completion_tokens || 0;
+      const cost = tokenCost(requestedModel, inputTokens, outputTokens);
+      // Bill credits (skip for platform)
+      if (account.owner !== "platform") {
+        await pool.query(
+          "UPDATE api_credits SET credits_remaining = credits_remaining - $1, credits_used = credits_used + $1, last_used = NOW() WHERE api_key = $2",
+          [cost, account.api_key]
+        );
+        await pool.query(
+          "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, $3, $4, $5, $6, $7, true, $8)",
+          [account.api_key, requestedModel, inputTokens, outputTokens, inputTokens + outputTokens, cost, latencyMs, "/v1/chat/completions"]
+        );
+      }
+      // Rewrite response
+      if (result.model) result.model = requestedModel;
+      result.x_wolverine = {
+        credits_used: Math.round(cost * 1000000) / 1000000,
+        credits_remaining: Math.max(0, parseFloat(account.credits_remaining) - cost),
+        latency_ms: latencyMs,
+        queued: activeRequests > MAX_CONCURRENT,
+      };
+      return result;
+    } catch (err) {
+      if (account.owner !== "platform") {
+        await pool.query(
+          "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, 0, 0, 0, 0, $3, false, $4)",
+          [account.api_key, requestedModel, Date.now() - startMs, "/v1/chat/completions"]
+        ).catch(() => {});
+      }
+      return reply.code(502).send({ error: { message: `Inference error: ${err.message}`, type: "inference_error" } });
+    } finally {
+      dequeue();
+    }
+  });
+  // ── GET /models ──
+  fastify.get("/models", async () => ({
+    object: "list",
+    data: Object.entries(MODEL_PRICING).map(([id, p]) => ({
+      id, object: "model", owned_by: "wolverine",
+      created: Math.floor(Date.now() / 1000),
+      pricing: { input_credits_per_million: p.input, output_credits_per_million: p.output, usd_per_credit: 0.01 },
+    })),
+  }));
+  // ── POST /keys/create — generate new API key ──
+  fastify.post("/keys/create", { preHandler: authenticate }, async (request, reply) => {
+    const account = request.account;
+    if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can create API keys", type: "auth_error" } });
+    const { owner, email, credits, tier, rpm } = request.body || {};
+    if (!owner) return reply.code(400).send({ error: { message: "owner required", type: "validation_error" } });
+    const newKey = "wlv_" + crypto.randomBytes(24).toString("hex");
+    const keyTier = tier || "free";
+    const keyCredits = credits || (keyTier === "free" ? 10 : 0);
+    const keyRpm = rpm || TIER_LIMITS[keyTier]?.rpm || 10;
+    await pool.query(
+      "INSERT INTO api_credits (api_key, owner, email, credits_remaining, tier, plan_name, rate_limit_rpm) VALUES ($1, $2, $3, $4, $5, $6, $7)",
+      [newKey, owner, email || null, keyCredits, keyTier, keyTier, keyRpm]
+    );
+    return { api_key: newKey, owner, tier: keyTier, credits: keyCredits, rate_limit_rpm: keyRpm };
+  });
+  // ── POST /keys/add-credits — add credits to a key ──
+  fastify.post("/keys/add-credits", { preHandler: authenticate }, async (request, reply) => {
+    const account = request.account;
+    if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can add credits", type: "auth_error" } });
+    const { api_key, credits } = request.body || {};
+    if (!api_key || !credits) return reply.code(400).send({ error: { message: "api_key and credits required" } });
+    await pool.query("UPDATE api_credits SET credits_remaining = credits_remaining + $1 WHERE api_key = $2", [credits, api_key]);
+    const updated = await pool.query("SELECT credits_remaining FROM api_credits WHERE api_key = $1", [api_key]);
+    return { api_key, credits_added: credits, credits_remaining: parseFloat(updated.rows[0]?.credits_remaining || 0) };
+  });
+  // ── GET /keys — list all keys (admin only) ──
+  fastify.get("/keys", { preHandler: authenticate }, async (request, reply) => {
+    if (request.account.tier !== "admin") return reply.code(403).send({ error: { message: "Admin only" } });
+    const { rows } = await pool.query("SELECT api_key, owner, email, tier, credits_remaining, credits_used, rate_limit_rpm, created_at, last_used FROM api_credits ORDER BY created_at DESC");
+    return { keys: rows };
+  });
+  // ── GET /credits ──
+  fastify.get("/credits", { preHandler: authenticate }, async (request, reply) => {
+    const a = request.account;
+    return {
+      credits_remaining: parseFloat(a.credits_remaining),
+      credits_used: parseFloat(a.credits_used || 0),
+      usd_remaining: parseFloat(a.credits_remaining) * 0.01,
+      usd_used: parseFloat(a.credits_used || 0) * 0.01,
+      tier: a.tier, rate_limit_rpm: a.rate_limit_rpm, owner: a.owner,
+    };
+  });
+  // ── GET /usage ──
+  fastify.get("/usage", { preHandler: authenticate }, async (request, reply) => {
+    const apiKey = request.account.api_key;
+    const period = request.query.period || "7d";
+    const interval = { "1h": "1 hour", "1d": "1 day", "7d": "7 days", "30d": "30 days" }[period] || "7 days";
+    const summary = await pool.query(
+      `SELECT model, COUNT(*) AS calls, SUM(input_tokens) AS input, SUM(output_tokens) AS output,
+              SUM(total_tokens) AS tokens, SUM(cost) AS credits_spent, AVG(latency_ms) AS avg_latency,
+              COUNT(*) FILTER (WHERE success) AS successes
+       FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
+       GROUP BY model ORDER BY credits_spent DESC`, [apiKey, interval]
+    );
+    const timeline = await pool.query(
+      `SELECT date_trunc('hour', timestamp) AS hour, SUM(cost) AS credits, SUM(total_tokens) AS tokens, COUNT(*) AS calls
+       FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
+       GROUP BY hour ORDER BY hour`, [apiKey, interval]
+    );
+    const totalCredits = summary.rows.reduce((s, r) => s + parseFloat(r.credits_spent || 0), 0);
+    return {
+      period,
+      total_credits_spent: Math.round(totalCredits * 1000000) / 1000000,
+      total_usd_spent: Math.round(totalCredits * 0.01 * 1000000) / 1000000,
+      byModel: summary.rows.map(r => ({
+        model: r.model, calls: parseInt(r.calls), input: parseInt(r.input || 0), output: parseInt(r.output || 0),
+        tokens: parseInt(r.tokens || 0), credits_spent: parseFloat(r.credits_spent || 0),
+        usd_spent: parseFloat(r.credits_spent || 0) * 0.01,
+        avgLatencyMs: Math.round(parseFloat(r.avg_latency || 0)),
+        successRate: parseInt(r.calls) > 0 ? parseFloat(((parseInt(r.successes) / parseInt(r.calls)) * 100).toFixed(2)) : 0,
+      })),
+      timeline: timeline.rows.map(r => ({
+        hour: r.hour, credits: parseFloat(r.credits), tokens: parseInt(r.tokens), calls: parseInt(r.calls),
+      })),
+      queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT },
+    };
+  });
+  // ── GET /health ──
+  fastify.get("/health", async () => {
+    try {
+      const result = await proxyToInference("/health", null, "GET");
+      return { status: "ok", inference: result, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
+    } catch (err) {
+      return { status: "down", error: err.message, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
+    }
+  });
+}
+function proxyToInference(path, body, method = "POST") {
+  return new Promise((resolve, reject) => {
+    const url = new (require("url").URL)(INFERENCE_URL + path);
+    const client = url.protocol === "https:" ? https : http;
+    const bodyStr = body ? JSON.stringify(body) : null;
+    const req = client.request({
+      hostname: url.hostname,
+      port: url.port || (url.protocol === "https:" ? 443 : 80),
+      path: url.pathname,
+      method,
+      timeout: 120000,
+      headers: { "Content-Type": "application/json", ...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}) },
+    }, (res) => {
+      let data = "";
+      res.on("data", (c) => { data += c; });
+      res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); } });
+    });
+    req.on("error", reject);
+    req.on("timeout", () => { req.destroy(); reject(new Error("Inference timeout")); });
+    if (bodyStr) req.write(bodyStr);
+    req.end();
+  });
+}
+module.exports = routes;

package/src/agent/agent-engine.js CHANGED Viewed

@@ -448,6 +448,15 @@ class AgentEngine {
       const assistantMessage = choice.message || choice;
       this.messages.push(assistantMessage);
+      // Parse Gemma-style text tool calls: "call:tool_name{json_args}" → structured tool_calls
+      if ((!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) && assistantMessage.content) {
+        const parsed = _parseTextToolCalls(assistantMessage.content);
+        if (parsed.length > 0) {
+          assistantMessage.tool_calls = parsed;
+          console.log(chalk.gray(`  🔧 Parsed ${parsed.length} tool call(s) from text output`));
+        }
+      }
       if (!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) {
         if (assistantMessage.content) {
           console.log(chalk.gray(`  💬 ${(assistantMessage.content || "").slice(0, 200)}`));
@@ -1223,4 +1232,50 @@ function _runPostHook(toolName, toolInput, toolOutput, isError, cwd) {
   } catch {}
 }
+/**
+ * Parse Gemma-style text tool calls into OpenAI tool_calls format.
+ * Gemma outputs: "call:tool_name{json_args}" or "<|tool_call>call:tool_name{args}<tool_call|>"
+ * We convert to: [{ id, type: "function", function: { name, arguments } }]
+ */
+function _parseTextToolCalls(content) {
+  if (!content) return [];
+  const calls = [];
+  // Match patterns: call:name{args} or call:name{"key":"val"}
+  const patterns = [
+    /call:(\w+)\{([^}]*(?:\{[^}]*\}[^}]*)*)\}/g,  // call:name{args with nested braces}
+    /call:(\w+)\(([^)]*)\)/g,                        // call:name(args)
+  ];
+  for (const regex of patterns) {
+    let match;
+    while ((match = regex.exec(content)) !== null) {
+      const name = match[1];
+      let argsStr = match[2];
+      // Try to parse as JSON, otherwise build from key:value pairs
+      let args;
+      try {
+        // Clean up Gemma's quoting: path:"value" → "path":"value"
+        const cleaned = argsStr.replace(/(\w+)\s*:\s*/g, '"$1":').replace(/<\|"\|>/g, '"');
+        args = JSON.parse("{" + cleaned + "}");
+      } catch {
+        try { args = JSON.parse(argsStr); } catch {
+          // Last resort: treat as single string argument for the most common param
+          const paramGuess = argsStr.replace(/['"<|>]/g, "").trim();
+          if (name === "read_file" || name === "glob_files") args = { path: paramGuess };
+          else if (name === "grep_code") args = { pattern: paramGuess };
+          else if (name === "bash_exec") args = { command: paramGuess };
+          else if (name === "write_file") args = { path: paramGuess, content: "" };
+          else args = { input: paramGuess };
+        }
+      }
+      calls.push({
+        id: "call_" + Date.now().toString(36) + "_" + calls.length,
+        type: "function",
+        function: { name, arguments: JSON.stringify(args) },
+      });
+    }
+    if (calls.length > 0) break; // use first matching pattern
+  }
+  return calls;
+}
 module.exports = { AgentEngine, TOOL_DEFINITIONS, BLOCKED_COMMANDS };

package/src/brain/embedder.js CHANGED Viewed

@@ -115,7 +115,7 @@ async function compact(text) {
     systemPrompt: "Compress the following text into a dense, semantically rich summary. Keep all technical terms, function names, file paths, and error messages. Remove filler words. Output ONLY the compressed text, nothing else.",
     userPrompt: text,
     maxTokens: 256,
-    category: "brain",
+    category: "compacting",
   });
   return result.content || text;

package/src/brain/function-map.js CHANGED Viewed

@@ -17,7 +17,7 @@ const path = require("path");
  * - Config files (.env, .json, .yaml)
  */
-const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests"]);
+const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests", "examples", "public", "static", "assets", "__tests__", ".next", ".nuxt"]);
 const CODE_EXTENSIONS = new Set([".js", ".ts", ".mjs", ".cjs", ".jsx", ".tsx"]);
 const CONFIG_EXTENSIONS = new Set([".json", ".yaml", ".yml", ".toml", ".env"]);
@@ -52,6 +52,11 @@ function scanProject(projectRoot) {
   // Recursive scan
   _scanDir(root, root, map);
+  // Cap collections to prevent memory bloat on large projects
+  if (map.functions.length > 500) map.functions = map.functions.slice(0, 500);
+  if (map.classes.length > 200) map.classes = map.classes.slice(0, 200);
+  if (map.exports.length > 300) map.exports = map.exports.slice(0, 300);
   // Build summary
   map.summary = _buildSummary(map);
@@ -88,12 +93,21 @@ function _scanDir(dir, root, map) {
     map.files.push({ path: relPath, type: "code" });
+    // Skip large/minified files — they bloat memory and aren't useful for repair context
+    let stat;
+    try { stat = fs.statSync(fullPath); } catch { continue; }
+    if (stat.size > 100000) continue; // Skip files > 100KB (bundles, minified, generated)
     // Parse the file for patterns
     let content;
     try {
       content = fs.readFileSync(fullPath, "utf-8");
     } catch { continue; }
+    // Skip minified code (avg line length > 200 chars = likely minified)
+    const lines = content.split("\n");
+    if (lines.length > 0 && content.length / lines.length > 200) continue;
     _extractRoutes(content, relPath, map);
     _extractExports(content, relPath, map);
     _extractFunctions(content, relPath, map);

package/src/core/ai-client.js CHANGED Viewed

@@ -5,6 +5,7 @@ const { getModel, detectProvider } = require("./models");
 let _openaiClient = null;
 let _anthropicClient = null;
+let _wolverineClient = null;
 let _tracker = null;
 function setTokenTracker(tracker) { _tracker = tracker; }
@@ -35,9 +36,23 @@ function _track(model, category, usage, tool, latencyMs, success) {
 function getClient(provider) {
   if (provider === "anthropic") return _getAnthropicClient();
+  if (provider === "wolverine") return _getWolverineClient();
   return _getOpenAIClient();
 }
+function _getWolverineClient() {
+  if (!_wolverineClient) {
+    // Wolverine inference: direct to GPU (WOLVERINE_INFERENCE_URL) or via proxy (api.wolverinenode.xyz/v1)
+    // Direct URL = no auth needed (Vast tunnel). Proxy URL = needs WOLVERINE_API_KEY for billing.
+    const baseURL = process.env.WOLVERINE_INFERENCE_URL
+      ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
+      : "https://api.wolverinenode.xyz/v1";
+    const apiKey = process.env.WOLVERINE_API_KEY || "none";
+    _wolverineClient = new OpenAI({ apiKey, baseURL });
+  }
+  return _wolverineClient;
+}
 function _getOpenAIClient() {
   if (!_openaiClient) {
     const apiKey = process.env.OPENAI_API_KEY;
@@ -65,6 +80,7 @@ function isReasoningModel(model) {
 }
 function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
+function isWolverineModel(model) { return detectProvider(model) === "wolverine"; }
 /**
  * Per-model max output token limits (with 10% overestimation buffer).
@@ -176,6 +192,8 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
   try {
     if (provider === "anthropic") {
       result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
+    } else if (provider === "wolverine") {
+      result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
     } else if (isResponsesModel(model)) {
       result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
     } else {
@@ -200,6 +218,8 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
   try {
     if (provider === "anthropic") {
       result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
+    } else if (provider === "wolverine") {
+      result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
     } else if (isResponsesModel(model)) {
       result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
     } else {
@@ -573,7 +593,7 @@ ${backupSourceCode ? `## Last Known Working Version\n\`\`\`javascript\n${backupS
 "changes" is for code edits (optional, use for actual code fixes).
 Include both if needed, or just one.`;
-  const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "heal" });
+  const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "coding" });
   const content = result.content;
   const cleaned = content.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");

package/src/core/models.js CHANGED Viewed

@@ -15,7 +15,14 @@
  */
 function detectProvider(model) {
   if (!model) return "openai";
-  if (/^claude/i.test(model)) return "anthropic";
+  if (/^wolverine/i.test(model) || /^gemma/i.test(model)) return "wolverine";
+  if (/^claude/i.test(model) || /^anthropic/i.test(model)) return "anthropic";
+  if (/^gemini/i.test(model) || /^google/i.test(model)) return "google";
+  if (/^mistral/i.test(model) || /^codestral/i.test(model) || /^pixtral/i.test(model)) return "mistral";
+  if (/^llama/i.test(model) || /^meta/i.test(model)) return "meta";
+  if (/^deepseek/i.test(model)) return "deepseek";
+  if (/^command/i.test(model) || /^cohere/i.test(model)) return "cohere";
+  // Default: OpenAI (gpt-*, o1-*, o3-*, o4-*, codex-*, text-embedding-*, dall-e-*, etc.)
   return "openai";
 }

package/src/dashboard/server.js CHANGED Viewed

@@ -336,7 +336,7 @@ class DashboardServer {
         systemPrompt: "Route a command. Respond with two words: ROUTE SIZE.\nROUTE: SIMPLE (general knowledge/explanation, no live data needed), TOOLS (needs live server data, file contents, or endpoint calls), AGENT (create/modify/fix code).\nSIZE: SMALL, MEDIUM, LARGE.\nExamples: 'what is wolverine' → SIMPLE SMALL. 'what time is it' → TOOLS SMALL. 'show me index.js' → TOOLS SMALL. 'add endpoint' → AGENT SMALL. 'build auth' → AGENT LARGE.",
         userPrompt: command,
         maxTokens: 10,
-        category: "classify",
+        category: "classifier",
       });
       const raw = (result.content || "").trim().toUpperCase();
@@ -424,7 +424,7 @@ ${indexContent}
 Existing route files:
 ${existingRoutes || "(none)"}`,
       maxTokens: 2048,
-      category: "develop",
+      category: "tool",
     });
     const raw = (result.content || "").trim().replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");

package/src/logger/pricing.js CHANGED Viewed

@@ -53,6 +53,14 @@ const DEFAULT_PRICING = {
   "claude-3-sonnet":      { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
   "claude-3-haiku":       { input: 0.25,  output: 1.25,  cache_write: 0.3125, cache_read: 0.025 },
+  // ── Wolverine Self-Hosted (Gemma 4 via api.wolverinenode.xyz) ──
+  // Priced between Anthropic and OpenAI — cheaper than both
+  "wolverine-test-1":     { input: 0.10,  output: 0.40 },
+  "wolverine-gemma-26b":  { input: 0.25,  output: 1.00 },
+  "wolverine-gemma-8b":   { input: 0.10,  output: 0.40 },
+  "wolverine-coding":     { input: 0.10,  output: 0.40 },
+  "wolverine-reasoning":  { input: 0.25,  output: 1.00 },
   // ── Fallback ──
   "_default":             { input: 1.00,  output: 4.00 },
 };

package/src/logger/token-tracker.js CHANGED Viewed

@@ -33,6 +33,8 @@ class TokenTracker {
     this._byModel = {};
     // Per-category totals
     this._byCategory = {};
+    // Per-model-per-category cross-reference (model::category → stats)
+    this._byModelCategory = {};
     // Per-tool totals
     this._byTool = {};
     // Timeline: recent entries for charts (in-memory)
@@ -87,7 +89,7 @@ class TokenTracker {
     };
     // Accumulate by model
-    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
+    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, totalLatencyTokens: 0, timedCalls: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
     const m = this._byModel[model];
     m.input += entry.input;
     m.output += entry.output;
@@ -100,6 +102,8 @@ class TokenTracker {
     if (entry.success) m.successes++; else m.failures++;
     if (latencyMs > 0) {
       m.totalLatencyMs += latencyMs;
+      m.totalLatencyTokens += total;
+      m.timedCalls++;
       if (latencyMs < m.minLatencyMs) m.minLatencyMs = latencyMs;
       if (latencyMs > m.maxLatencyMs) m.maxLatencyMs = latencyMs;
     }
@@ -112,6 +116,18 @@ class TokenTracker {
     this._byCategory[category].calls++;
     this._byCategory[category].cost += cost.total;
+    // Accumulate by model+category cross-reference
+    const mcKey = `${model}::${category}`;
+    if (!this._byModelCategory[mcKey]) this._byModelCategory[mcKey] = { model, category, input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0 };
+    const mc = this._byModelCategory[mcKey];
+    mc.input += entry.input;
+    mc.output += entry.output;
+    mc.total += total;
+    mc.calls++;
+    mc.cost += cost.total;
+    if (entry.success) mc.successes++; else mc.failures++;
+    if (latencyMs > 0) mc.totalLatencyMs += latencyMs;
     // Accumulate by tool
     if (tool) {
       const toolKey = tool.split(" ")[0];
@@ -158,6 +174,7 @@ class TokenTracker {
       },
       byModel: this._formatModelStats(),
       byCategory: this._byCategory,
+      byModelCategory: this._formatModelCategoryStats(),
       byTool: this._byTool,
       // Recent in-memory timeline
       timeline: this._timeline.slice(-100).map(e => ({
@@ -188,19 +205,42 @@ class TokenTracker {
         cacheCreation: m.cacheCreation || 0,
         cacheRead: m.cacheRead || 0,
         cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
-        successes: m.successes || m.calls,
+        successes: m.successes != null ? m.successes : m.calls - (m.failures || 0),
         failures: m.failures || 0,
-        successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
-        avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
+        successRate: m.calls > 0 ? parseFloat((((m.calls - (m.failures || 0)) / m.calls) * 100).toFixed(2)) : 0,
+        // Latency normalized by token count
+        avgLatencyMs: (m.timedCalls || 0) > 0 ? Math.round(m.totalLatencyMs / m.timedCalls) : 0,
+        msPerKToken: (m.totalLatencyTokens || 0) > 0 ? Math.round((m.totalLatencyMs / m.totalLatencyTokens) * 1000) : 0,
+        tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.totalLatencyTokens || m.total) / (m.totalLatencyMs / 1000) * 10) / 10 : 0,
+        outputTokPerSecond: m.totalLatencyMs > 0 && m.output > 0 ? Math.round((m.output / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
+        timedCalls: m.timedCalls || 0,
         minLatencyMs: m.minLatencyMs === Infinity ? 0 : (m.minLatencyMs || 0),
         maxLatencyMs: m.maxLatencyMs || 0,
-        tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.total / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
         costPerCall: m.calls > 0 ? Math.round((m.cost / m.calls) * 1000000) / 1000000 : 0,
       };
     }
     return result;
   }
+  /**
+   * Format model+category cross-reference for analytics.
+   * Returns array of { model, category, calls, cost, tokens, successRate, avgLatencyMs }
+   */
+  _formatModelCategoryStats() {
+    return Object.values(this._byModelCategory).map(mc => ({
+      model: mc.model,
+      category: mc.category,
+      calls: mc.calls,
+      cost: Math.round(mc.cost * 1000000) / 1000000,
+      tokens: mc.total,
+      input: mc.input,
+      output: mc.output,
+      successRate: mc.calls > 0 ? parseFloat((((mc.calls - (mc.failures || 0)) / mc.calls) * 100).toFixed(2)) : 100,
+      avgLatencyMs: mc.calls > 0 && mc.totalLatencyMs > 0 ? Math.round(mc.totalLatencyMs / mc.calls) : 0,
+      tokensPerSecond: mc.totalLatencyMs > 0 ? Math.round((mc.total / (mc.totalLatencyMs / 1000)) * 10) / 10 : 0,
+    }));
+  }
   /**
    * Load full history from JSONL file. For dashboard charts across sessions.
    * @param {number} limit — max entries to return (default: 500)
@@ -253,6 +293,7 @@ class TokenTracker {
       lastSaved: Date.now(),
       byModel: this._byModel,
       byCategory: this._byCategory,
+      byModelCategory: this._byModelCategory,
       byTool: this._byTool,
       totalTokens: this._totalTokens,
       totalCalls: this._totalCalls,
@@ -275,6 +316,7 @@ class TokenTracker {
         const data = JSON.parse(fs.readFileSync(this.usagePath, "utf-8"));
         this._byModel = data.byModel || {};
         this._byCategory = data.byCategory || {};
+        this._byModelCategory = data.byModelCategory || {};
         this._byTool = data.byTool || {};
         this._totalTokens = data.totalTokens || 0;
         this._totalCalls = data.totalCalls || 0;

package/src/monitor/perf-monitor.js CHANGED Viewed

@@ -236,7 +236,7 @@ Provide a brief analysis and actionable suggestions. Focus on:
 Keep your response under 300 words. Be specific and actionable.`,
         maxTokens: 512,
-        category: "security",
+        category: "audit",
       });
       const analysis = result.content;

package/src/notifications/notifier.js CHANGED Viewed

@@ -172,7 +172,7 @@ class Notifier {
       systemPrompt: "You summarize server errors for developers. Write 1-2 short sentences. Be direct and actionable. Do not include any secrets, passwords, or API key values — only refer to them by name (e.g. 'the OPENAI_API_KEY').",
       userPrompt: `Summarize this error for a developer notification:\n\nCategory: ${classification.category}\nError: ${safeError}\n\nStack (first 300 chars): ${safeStack.slice(0, 300)}`,
       maxTokens: 100,
-      category: "security",
+      category: "audit",
     });
     // Double-sanitize the AI response (in case the AI echoes something)

package/src/platform/telemetry.js CHANGED Viewed

@@ -66,7 +66,8 @@ function collectHeartbeat(subsystems) {
       totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
       totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
       byCategory: usage?.byCategory || {},
-      byModel: usage?.byModel || {},  // includes: latency, successRate, tokensPerSec, cacheSavings per model
+      byModel: usage?.byModel || {},
+      byModelCategory: usage?.byModelCategory || [],
       byTool: usage?.byTool || {},
       byProvider: _aggregateByProvider(usage?.byModel || {}),
     },

package/src/security/injection-detector.js CHANGED Viewed

@@ -95,7 +95,7 @@ Respond with ONLY valid JSON:
     systemPrompt: "You are a security analyst. Respond with ONLY valid JSON.",
     userPrompt,
     maxTokens: 128,
-    category: "security",
+    category: "audit",
   });
   const content = result.content;