npm - wolverine-ai - Versions diffs - 3.4.1 → 3.6.0 - Mend

wolverine-ai 3.4.1 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/.env.example +5 -0
package/README.md +10 -6
package/bin/wolverine.js +11 -1
package/package.json +1 -3
package/server/config/settings.json +18 -1
package/server/routes/inference.js +324 -0
package/src/agent/agent-engine.js +86 -18
package/src/agent/goal-loop.js +11 -6
package/src/brain/brain.js +8 -4
package/src/brain/embedder.js +1 -1
package/src/brain/function-map.js +15 -1
package/src/core/ai-client.js +21 -1
package/src/core/error-hook.js +17 -1
package/src/core/models.js +8 -1
package/src/core/wolverine.js +69 -5
package/src/dashboard/server.js +2 -2
package/src/logger/pricing.js +8 -0
package/src/logger/token-tracker.js +47 -5
package/src/monitor/perf-monitor.js +1 -1
package/src/notifications/notifier.js +1 -1
package/src/platform/telemetry.js +2 -1
package/src/security/injection-detector.js +1 -1
package/src/skills/loop-guard.js +9 -2
package/CLAUDE.md +0 -146

package/src/brain/brain.js CHANGED Viewed

@@ -34,7 +34,7 @@ const SEED_DOCS = [
     metadata: { topic: "overview" },
   },
   {
-    text: "Wolverine heal pipeline: crash detected → error parsed (file, line, message, errorType) → prompt injection scan (AUDIT_MODEL) → rate limit check → operational fix attempt (missing_module → npm install, missing_file → create file, permission → chmod — zero AI tokens) → if operational fix doesn't apply → fast path repair (CODING_MODEL, supports both code changes AND shell commands like npm install) → if fast path fails → agent path (REASONING_MODEL with tools including bash_exec for npm install) → if agent fails → sub-agents (explore → plan → fix, fixer has bash_exec) → verify fix (syntax check + boot probe) → rollback on failure. Error types classified: missing_module, missing_file, permission, port_conflict, syntax, runtime, unknown.",
+    text: "Wolverine heal pipeline: crash detected → error parsed (file, line, message, errorType) → prompt injection scan (AUDIT_MODEL) → rate limit check (per-signature + global 5/5min cap) → operational fix attempt (missing_module → npm install, missing_file → create file with inferred config, permission → chmod, port conflict → kill stale process — zero AI tokens) → if operational fix doesn't apply → fast path repair (CODING_MODEL, supports both code changes AND shell commands like npm install) → if fast path fails → agent path (REASONING_MODEL with tools including bash_exec, 45s per-API-call timeout) → if agent fails → sub-agents (explore → plan → fix, fixer has bash_exec) → verify fix (syntax check + boot probe + error classification comparison) → rollback on failure. Error types classified: missing_module, missing_file, permission, port_conflict, syntax, runtime, unknown. Heal timeout: 5 minutes via Promise.race. Config-aware turn budget: simple=4, config/ENOENT=5, complex=8 turns.",
     metadata: { topic: "heal-pipeline" },
   },
   {
@@ -66,7 +66,7 @@ const SEED_DOCS = [
     metadata: { topic: "verification" },
   },
   {
-    text: "Wolverine multi-file agent: 15-turn agent loop with 18 tools across 7 categories. FILE: read_file (offset/limit), write_file (creates dirs), edit_file (find-and-replace), glob_files (pattern search), grep_code (regex with context), list_dir (directory listing with sizes), move_file (rename/relocate). SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port (find what uses a port), check_env (env vars, values redacted). DEPS: audit_deps (full npm health check), check_migration (known upgrade paths). RESEARCH: web_fetch. CONTROL: done. Used when fast path fails. Token budget 50k max.",
+    text: "Wolverine multi-file agent: turn-limited agent loop with 18 tools across 7 categories. Turn budget adapts to error type: simple (TypeError)=4, config/ENOENT=5, complex=8. Each AI call has 45s timeout via Promise.race — prevents indefinite hangs. If timeout occurs mid-fix, partial results returned. FILE: read_file (offset/limit), write_file (creates dirs), edit_file (find-and-replace), glob_files (pattern search), grep_code (regex with context), list_dir (directory listing with sizes), move_file (rename/relocate). SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port (find what uses a port), check_env (env vars, values redacted). DEPS: audit_deps (full npm health check), check_migration (known upgrade paths). RESEARCH: web_fetch (10s timeout). CONTROL: done. Prompt emphasizes fast action: fix immediately when solution is obvious, investigate only when cause unclear.",
     metadata: { topic: "agent" },
   },
   {
@@ -202,7 +202,7 @@ const SEED_DOCS = [
     metadata: { topic: "admin-auth" },
   },
   {
-    text: "Operational fix layer: before calling AI, wolverine checks for common non-code errors that can be fixed instantly with zero tokens. Pattern 1: 'Cannot find module X' (where X is a package name, not a relative path) → runs npm install X (or just npm install if package is already in package.json). Pattern 2: ENOENT on config/data files (.json, .yaml, .env, .log, etc.) → creates the missing file with sensible defaults (empty JSON {}, empty string). Pattern 3: EACCES/EPERM → chmod 755 on the file. This layer runs before the AI repair loop and handles ~30% of production crashes at zero cost.",
+    text: "Operational fix layer: before calling AI, wolverine checks for common non-code errors that can be fixed instantly with zero tokens. Pattern 1: 'Cannot find module X' (where X is a package name, not a relative path) → runs npm install X via deps skill diagnosis. Pattern 2: ENOENT on config/data files (.json, .yaml, .env, .log, etc.) → for JSON configs, reads the source code that loads the file to infer expected fields (apiUrl, timeout, etc.) and creates the file with correct structure; for other types, creates empty file. Pattern 3: EACCES/EPERM → chmod 755 on the file. Pattern 4: EADDRINUSE → finds and kills stale process on the port (lsof on Linux, netstat on Windows). This layer runs before the AI repair loop and handles ~30% of production crashes at zero cost.",
     metadata: { topic: "operational-fix" },
   },
   {
@@ -214,7 +214,7 @@ const SEED_DOCS = [
     metadata: { topic: "agent-fix-strategy" },
   },
   {
-    text: "Error Monitor: detects caught 500 errors that don't crash the process. Most production bugs are caught by Fastify/Express error handlers — the server stays alive but routes return 500. Wolverine's crash-based heal pipeline never triggers for these. ErrorMonitor tracks 5xx errors per route via IPC from child process. After N consecutive 500s within a time window (default: 3 failures in 30s), triggers the heal pipeline without killing the server. Error hook auto-injected via --require preload (no user code changes). Cooldown prevents heal spam (default: 60s per route). Stats available in dashboard and telemetry. Config: WOLVERINE_ERROR_THRESHOLD, WOLVERINE_ERROR_WINDOW_MS, WOLVERINE_ERROR_COOLDOWN_MS.",
+    text: "Error Monitor: detects caught 500 errors that don't crash the process. Most production bugs are caught by Fastify/Express error handlers — the server stays alive but routes return 500. Wolverine's crash-based heal pipeline never triggers for these. ErrorMonitor tracks 5xx errors per normalized route (/api/users/123 → /api/users/:id) via IPC from child process. Single error triggers heal (threshold=1, configurable). Error hook auto-injected via --require preload (no user code changes) — hooks Fastify onError + setErrorHandler wrapper + auto-registers default error handler if user never sets one (catches async route throws). Cooldown prevents heal spam (default: 60s per route). Health check failures also trigger heal (not just restart). Config: WOLVERINE_ERROR_THRESHOLD, WOLVERINE_ERROR_WINDOW_MS, WOLVERINE_ERROR_COOLDOWN_MS.",
     metadata: { topic: "error-monitor" },
   },
   {
@@ -265,6 +265,10 @@ const SEED_DOCS = [
     text: "Agent efficiency (claw-code patterns): (1) Anthropic prompt caching — system prompt marked with cache_control:{type:'ephemeral'}, cached server-side across agent turns, 90% cheaper on repeat calls (12-16K saved tokens per heal). (2) Tool result truncation — capped at 4K chars before entering message history, prevents context blowup from large grep/file reads. (3) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
     metadata: { topic: "agent-efficiency" },
   },
+  {
+    text: "Robustness guards: (1) Heal concurrency guard — _healInProgress flag prevents parallel heals from health monitor + crash handler racing. (2) Global rate limit — 5 heals per 5 minutes regardless of error signature, prevents infinite loop of different errors burning API quota. (3) Heal timeout — Promise.race wraps _healImpl() with 5-minute timeout, clears _healInProgress on timeout. (4) Per-API-call timeout — 45s timeout in agent engine via Promise.race, returns partial results if files already modified. (5) bash_exec enforced timeout — 30s default, 60s hard cap via Math.min(). (6) PID file race prevention — exit handler only deletes PID file if it still belongs to current process. (7) SIGTERM startup grace — 3s grace period ignores SIGTERM on startup, prevents restart scripts from killing both old and new processes. (8) Research timeout — deep research capped at 30s, deferred to iteration 3+ to avoid slowing early fix attempts.",
+    metadata: { topic: "robustness-guards" },
+  },
   {
     text: "Cost optimization: 7 techniques reduce heal cost from $0.31 to $0.02 for simple errors. (1) Verifier skips route probe for simple errors (TypeError/ReferenceError/SyntaxError) — trusts syntax+boot, ErrorMonitor is safety net. Prevents false-rejection cascades. (2) Sub-agents use Haiku (classifier model) for explore/plan/verify/research — only fixer uses Sonnet/Opus. 6 Haiku calls=$0.006 vs 6 Sonnet calls=$0.12. (3) Agent context compacted every 3 turns using compacting model — prevents 15K→95K token blowup. (4) Brain checked for cached fix patterns before AI — repeat errors cost $0. (5) Token budgets capped by error complexity: simple=20K agent budget, moderate=50K, complex=100K. Simple errors get 4 agent turns max. (6) Prior attempt summaries (not full context) passed between iterations — concise 'do NOT repeat' directives. (7) Fast path includes last known good backup code so AI can revert broken additions instead of patching around them.",
     metadata: { topic: "cost-optimization" },

package/src/brain/embedder.js CHANGED Viewed

@@ -115,7 +115,7 @@ async function compact(text) {
     systemPrompt: "Compress the following text into a dense, semantically rich summary. Keep all technical terms, function names, file paths, and error messages. Remove filler words. Output ONLY the compressed text, nothing else.",
     userPrompt: text,
     maxTokens: 256,
-    category: "brain",
+    category: "compacting",
   });
   return result.content || text;

package/src/brain/function-map.js CHANGED Viewed

@@ -17,7 +17,7 @@ const path = require("path");
  * - Config files (.env, .json, .yaml)
  */
-const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests"]);
+const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests", "examples", "public", "static", "assets", "__tests__", ".next", ".nuxt"]);
 const CODE_EXTENSIONS = new Set([".js", ".ts", ".mjs", ".cjs", ".jsx", ".tsx"]);
 const CONFIG_EXTENSIONS = new Set([".json", ".yaml", ".yml", ".toml", ".env"]);
@@ -52,6 +52,11 @@ function scanProject(projectRoot) {
   // Recursive scan
   _scanDir(root, root, map);
+  // Cap collections to prevent memory bloat on large projects
+  if (map.functions.length > 500) map.functions = map.functions.slice(0, 500);
+  if (map.classes.length > 200) map.classes = map.classes.slice(0, 200);
+  if (map.exports.length > 300) map.exports = map.exports.slice(0, 300);
   // Build summary
   map.summary = _buildSummary(map);
@@ -88,12 +93,21 @@ function _scanDir(dir, root, map) {
     map.files.push({ path: relPath, type: "code" });
+    // Skip large/minified files — they bloat memory and aren't useful for repair context
+    let stat;
+    try { stat = fs.statSync(fullPath); } catch { continue; }
+    if (stat.size > 100000) continue; // Skip files > 100KB (bundles, minified, generated)
     // Parse the file for patterns
     let content;
     try {
       content = fs.readFileSync(fullPath, "utf-8");
     } catch { continue; }
+    // Skip minified code (avg line length > 200 chars = likely minified)
+    const lines = content.split("\n");
+    if (lines.length > 0 && content.length / lines.length > 200) continue;
     _extractRoutes(content, relPath, map);
     _extractExports(content, relPath, map);
     _extractFunctions(content, relPath, map);

package/src/core/ai-client.js CHANGED Viewed

@@ -5,6 +5,7 @@ const { getModel, detectProvider } = require("./models");
 let _openaiClient = null;
 let _anthropicClient = null;
+let _wolverineClient = null;
 let _tracker = null;
 function setTokenTracker(tracker) { _tracker = tracker; }
@@ -35,9 +36,23 @@ function _track(model, category, usage, tool, latencyMs, success) {
 function getClient(provider) {
   if (provider === "anthropic") return _getAnthropicClient();
+  if (provider === "wolverine") return _getWolverineClient();
   return _getOpenAIClient();
 }
+function _getWolverineClient() {
+  if (!_wolverineClient) {
+    // Wolverine inference: direct to GPU (WOLVERINE_INFERENCE_URL) or via proxy (api.wolverinenode.xyz/v1)
+    // Direct URL = no auth needed (Vast tunnel). Proxy URL = needs WOLVERINE_API_KEY for billing.
+    const baseURL = process.env.WOLVERINE_INFERENCE_URL
+      ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
+      : "https://api.wolverinenode.xyz/v1";
+    const apiKey = process.env.WOLVERINE_API_KEY || "none";
+    _wolverineClient = new OpenAI({ apiKey, baseURL });
+  }
+  return _wolverineClient;
+}
 function _getOpenAIClient() {
   if (!_openaiClient) {
     const apiKey = process.env.OPENAI_API_KEY;
@@ -65,6 +80,7 @@ function isReasoningModel(model) {
 }
 function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
+function isWolverineModel(model) { return detectProvider(model) === "wolverine"; }
 /**
  * Per-model max output token limits (with 10% overestimation buffer).
@@ -176,6 +192,8 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
   try {
     if (provider === "anthropic") {
       result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
+    } else if (provider === "wolverine") {
+      result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
     } else if (isResponsesModel(model)) {
       result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
     } else {
@@ -200,6 +218,8 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
   try {
     if (provider === "anthropic") {
       result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
+    } else if (provider === "wolverine") {
+      result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
     } else if (isResponsesModel(model)) {
       result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
     } else {
@@ -573,7 +593,7 @@ ${backupSourceCode ? `## Last Known Working Version\n\`\`\`javascript\n${backupS
 "changes" is for code edits (optional, use for actual code fixes).
 Include both if needed, or just one.`;
-  const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "heal" });
+  const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "coding" });
   const content = result.content;
   const cleaned = content.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");

package/src/core/error-hook.js CHANGED Viewed

@@ -58,20 +58,36 @@ Module._load = function (request, parent, isMain) {
 function _hookFastify(fastify) {
   // Wrap setErrorHandler so our IPC reporting runs BEFORE the user's handler
   const origSetError = fastify.setErrorHandler;
+  let customErrorHandlerSet = false;
   fastify.setErrorHandler = function (userHandler) {
+    customErrorHandlerSet = true;
     return origSetError.call(this, function (error, request, reply) {
       _reportError(request.url, request.method, error);
       return userHandler.call(this, error, request, reply);
     });
   };
-  // Also add onError hook as a fallback (fires even if no custom error handler)
+  // Add onError hook as primary fallback — fires for all route errors in Fastify
   try {
     fastify.addHook("onError", function (request, reply, error, done) {
       _reportError(request.url, request.method, error);
       done();
     });
   } catch { /* addHook may fail if server is already started */ }
+  // Register a default error handler if user never calls setErrorHandler
+  // This ensures we catch async route throws even without a custom handler
+  try {
+    fastify.addHook("onReady", function (done) {
+      if (!customErrorHandlerSet) {
+        origSetError.call(fastify, function (error, request, reply) {
+          _reportError(request.url, request.method, error);
+          reply.code(error.statusCode || 500).send({ error: error.message });
+        });
+      }
+      done();
+    });
+  } catch { /* non-fatal */ }
 }
 function _hookExpress(app) {

package/src/core/models.js CHANGED Viewed

@@ -15,7 +15,14 @@
  */
 function detectProvider(model) {
   if (!model) return "openai";
-  if (/^claude/i.test(model)) return "anthropic";
+  if (/^wolverine/i.test(model) || /^gemma/i.test(model)) return "wolverine";
+  if (/^claude/i.test(model) || /^anthropic/i.test(model)) return "anthropic";
+  if (/^gemini/i.test(model) || /^google/i.test(model)) return "google";
+  if (/^mistral/i.test(model) || /^codestral/i.test(model) || /^pixtral/i.test(model)) return "mistral";
+  if (/^llama/i.test(model) || /^meta/i.test(model)) return "meta";
+  if (/^deepseek/i.test(model)) return "deepseek";
+  if (/^command/i.test(model) || /^cohere/i.test(model)) return "cohere";
+  // Default: OpenAI (gpt-*, o1-*, o3-*, o4-*, codex-*, text-embedding-*, dall-e-*, etc.)
   return "openai";
 }

package/src/core/wolverine.js CHANGED Viewed

@@ -332,9 +332,12 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
       } else if (iteration <= 2) {
         // Agent path — REASONING_MODEL (also handles iteration 1 when no file)
         console.log(chalk.magenta(`  🤖 Agent path (${getModel("reasoning")})...`));
+        // Tight turn budget: simple errors get 4 turns, ENOENT/config gets 5, complex gets 8
+        const isConfigError = /ENOENT|missing.*config|missing.*file|no such file/i.test(parsed.errorMessage);
+        const agentMaxTurns = isSimpleError ? 4 : isConfigError ? 5 : 8;
         const agent = new AgentEngine({
           sandbox, logger, cwd, mcp,
-          maxTurns: isSimpleError ? 4 : 8,
+          maxTurns: agentMaxTurns,
           maxTokens: tokenBudget.agent,
         });
@@ -496,12 +499,20 @@ async function tryOperationalFix(parsed, cwd, logger) {
     if (!rel.startsWith("..") && /\.(json|yaml|yml|toml|ini|conf|cfg|env|log|txt|csv|db|sqlite)$/i.test(missingFile)) {
       try {
         fs.mkdirSync(path.dirname(missingFile), { recursive: true });
-        // Create empty file or sensible default
         const ext = path.extname(missingFile).toLowerCase();
-        const defaults = { ".json": "{}", ".yaml": "", ".yml": "", ".log": "", ".txt": "", ".csv": "", ".env": "" };
-        fs.writeFileSync(missingFile, defaults[ext] || "", "utf-8");
+        // For JSON config files, try to infer expected structure from the code that loads them
+        let content = "";
+        if (ext === ".json") {
+          content = _inferJsonConfig(missingFile, cwd, parsed) || "{}";
+        } else {
+          const defaults = { ".yaml": "", ".yml": "", ".log": "", ".txt": "", ".csv": "", ".env": "" };
+          content = defaults[ext] || "";
+        }
+        fs.writeFileSync(missingFile, content, "utf-8");
         console.log(chalk.blue(`  📄 Created missing file: ${rel}`));
-        return { fixed: true, action: `Created missing file: ${rel}` };
+        return { fixed: true, action: `Created missing file: ${rel} with ${content === "{}" ? "empty" : "inferred"} config` };
       } catch {}
     }
   }
@@ -544,4 +555,57 @@ async function tryOperationalFix(parsed, cwd, logger) {
   return { fixed: false };
 }
+/**
+ * Try to infer JSON config structure by scanning the code that loads the file.
+ * Looks for property access patterns after require/readFile of the missing file.
+ * Returns a JSON string with empty/default values, or null if can't infer.
+ */
+function _inferJsonConfig(missingFile, cwd, parsed) {
+  const fs = require("fs");
+  const path = require("path");
+  // Find which source file loads the missing config
+  const basename = path.basename(missingFile);
+  const sourceFile = parsed.filePath;
+  if (!sourceFile) return null;
+  try {
+    const source = fs.readFileSync(sourceFile, "utf-8");
+    // Look for property accesses on the loaded config: config.apiUrl, config.timeout, etc.
+    const configVarMatch = source.match(new RegExp(`(?:const|let|var)\\s+(\\w+)\\s*=\\s*(?:require|JSON\\.parse).*${basename.replace(".", "\\.")}`));
+    if (!configVarMatch) return null;
+    const varName = configVarMatch[1];
+    // Find all property accesses: varName.prop or varName["prop"]
+    const propRegex = new RegExp(`${varName}\\.(\\w+)`, "g");
+    const bracketRegex = new RegExp(`${varName}\\["(\\w+)"\\]`, "g");
+    const props = new Set();
+    let m;
+    while ((m = propRegex.exec(source)) !== null) props.add(m[1]);
+    while ((m = bracketRegex.exec(source)) !== null) props.add(m[1]);
+    if (props.size === 0) return null;
+    // Build config with sensible defaults based on property names
+    const config = {};
+    for (const prop of props) {
+      const lower = prop.toLowerCase();
+      if (/url|endpoint|host|uri/.test(lower)) config[prop] = "http://localhost:3000";
+      else if (/port/.test(lower)) config[prop] = 3000;
+      else if (/timeout|delay|interval|ttl/.test(lower)) config[prop] = 5000;
+      else if (/key|token|secret/.test(lower)) config[prop] = "placeholder";
+      else if (/name/.test(lower)) config[prop] = "default";
+      else if (/enabled|active|debug/.test(lower)) config[prop] = true;
+      else if (/count|max|min|limit|size/.test(lower)) config[prop] = 10;
+      else if (/path|dir|file/.test(lower)) config[prop] = "./";
+      else config[prop] = "";
+    }
+    console.log(chalk.gray(`  🔍 Inferred ${props.size} config fields from ${path.basename(sourceFile)}: ${[...props].join(", ")}`));
+    return JSON.stringify(config, null, 2);
+  } catch {
+    return null;
+  }
+}
 module.exports = { heal };

package/src/dashboard/server.js CHANGED Viewed

@@ -336,7 +336,7 @@ class DashboardServer {
         systemPrompt: "Route a command. Respond with two words: ROUTE SIZE.\nROUTE: SIMPLE (general knowledge/explanation, no live data needed), TOOLS (needs live server data, file contents, or endpoint calls), AGENT (create/modify/fix code).\nSIZE: SMALL, MEDIUM, LARGE.\nExamples: 'what is wolverine' → SIMPLE SMALL. 'what time is it' → TOOLS SMALL. 'show me index.js' → TOOLS SMALL. 'add endpoint' → AGENT SMALL. 'build auth' → AGENT LARGE.",
         userPrompt: command,
         maxTokens: 10,
-        category: "classify",
+        category: "classifier",
       });
       const raw = (result.content || "").trim().toUpperCase();
@@ -424,7 +424,7 @@ ${indexContent}
 Existing route files:
 ${existingRoutes || "(none)"}`,
       maxTokens: 2048,
-      category: "develop",
+      category: "tool",
     });
     const raw = (result.content || "").trim().replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");

package/src/logger/pricing.js CHANGED Viewed

@@ -53,6 +53,14 @@ const DEFAULT_PRICING = {
   "claude-3-sonnet":      { input: 3.00,  output: 15.00, cache_write: 3.75,  cache_read: 0.30 },
   "claude-3-haiku":       { input: 0.25,  output: 1.25,  cache_write: 0.3125, cache_read: 0.025 },
+  // ── Wolverine Self-Hosted (Gemma 4 via api.wolverinenode.xyz) ──
+  // Priced between Anthropic and OpenAI — cheaper than both
+  "wolverine-test-1":     { input: 0.10,  output: 0.40 },
+  "wolverine-gemma-26b":  { input: 0.25,  output: 1.00 },
+  "wolverine-gemma-8b":   { input: 0.10,  output: 0.40 },
+  "wolverine-coding":     { input: 0.10,  output: 0.40 },
+  "wolverine-reasoning":  { input: 0.25,  output: 1.00 },
   // ── Fallback ──
   "_default":             { input: 1.00,  output: 4.00 },
 };

package/src/logger/token-tracker.js CHANGED Viewed

@@ -33,6 +33,8 @@ class TokenTracker {
     this._byModel = {};
     // Per-category totals
     this._byCategory = {};
+    // Per-model-per-category cross-reference (model::category → stats)
+    this._byModelCategory = {};
     // Per-tool totals
     this._byTool = {};
     // Timeline: recent entries for charts (in-memory)
@@ -87,7 +89,7 @@ class TokenTracker {
     };
     // Accumulate by model
-    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
+    if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, totalLatencyTokens: 0, timedCalls: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
     const m = this._byModel[model];
     m.input += entry.input;
     m.output += entry.output;
@@ -100,6 +102,8 @@ class TokenTracker {
     if (entry.success) m.successes++; else m.failures++;
     if (latencyMs > 0) {
       m.totalLatencyMs += latencyMs;
+      m.totalLatencyTokens += total;
+      m.timedCalls++;
       if (latencyMs < m.minLatencyMs) m.minLatencyMs = latencyMs;
       if (latencyMs > m.maxLatencyMs) m.maxLatencyMs = latencyMs;
     }
@@ -112,6 +116,18 @@ class TokenTracker {
     this._byCategory[category].calls++;
     this._byCategory[category].cost += cost.total;
+    // Accumulate by model+category cross-reference
+    const mcKey = `${model}::${category}`;
+    if (!this._byModelCategory[mcKey]) this._byModelCategory[mcKey] = { model, category, input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0 };
+    const mc = this._byModelCategory[mcKey];
+    mc.input += entry.input;
+    mc.output += entry.output;
+    mc.total += total;
+    mc.calls++;
+    mc.cost += cost.total;
+    if (entry.success) mc.successes++; else mc.failures++;
+    if (latencyMs > 0) mc.totalLatencyMs += latencyMs;
     // Accumulate by tool
     if (tool) {
       const toolKey = tool.split(" ")[0];
@@ -158,6 +174,7 @@ class TokenTracker {
       },
       byModel: this._formatModelStats(),
       byCategory: this._byCategory,
+      byModelCategory: this._formatModelCategoryStats(),
       byTool: this._byTool,
       // Recent in-memory timeline
       timeline: this._timeline.slice(-100).map(e => ({
@@ -188,19 +205,42 @@ class TokenTracker {
         cacheCreation: m.cacheCreation || 0,
         cacheRead: m.cacheRead || 0,
         cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
-        successes: m.successes || m.calls,
+        successes: m.successes != null ? m.successes : m.calls - (m.failures || 0),
         failures: m.failures || 0,
-        successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
-        avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
+        successRate: m.calls > 0 ? parseFloat((((m.calls - (m.failures || 0)) / m.calls) * 100).toFixed(2)) : 0,
+        // Latency normalized by token count
+        avgLatencyMs: (m.timedCalls || 0) > 0 ? Math.round(m.totalLatencyMs / m.timedCalls) : 0,
+        msPerKToken: (m.totalLatencyTokens || 0) > 0 ? Math.round((m.totalLatencyMs / m.totalLatencyTokens) * 1000) : 0,
+        tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.totalLatencyTokens || m.total) / (m.totalLatencyMs / 1000) * 10) / 10 : 0,
+        outputTokPerSecond: m.totalLatencyMs > 0 && m.output > 0 ? Math.round((m.output / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
+        timedCalls: m.timedCalls || 0,
         minLatencyMs: m.minLatencyMs === Infinity ? 0 : (m.minLatencyMs || 0),
         maxLatencyMs: m.maxLatencyMs || 0,
-        tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.total / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
         costPerCall: m.calls > 0 ? Math.round((m.cost / m.calls) * 1000000) / 1000000 : 0,
       };
     }
     return result;
   }
+  /**
+   * Format model+category cross-reference for analytics.
+   * Returns array of { model, category, calls, cost, tokens, successRate, avgLatencyMs }
+   */
+  _formatModelCategoryStats() {
+    return Object.values(this._byModelCategory).map(mc => ({
+      model: mc.model,
+      category: mc.category,
+      calls: mc.calls,
+      cost: Math.round(mc.cost * 1000000) / 1000000,
+      tokens: mc.total,
+      input: mc.input,
+      output: mc.output,
+      successRate: mc.calls > 0 ? parseFloat((((mc.calls - (mc.failures || 0)) / mc.calls) * 100).toFixed(2)) : 100,
+      avgLatencyMs: mc.calls > 0 && mc.totalLatencyMs > 0 ? Math.round(mc.totalLatencyMs / mc.calls) : 0,
+      tokensPerSecond: mc.totalLatencyMs > 0 ? Math.round((mc.total / (mc.totalLatencyMs / 1000)) * 10) / 10 : 0,
+    }));
+  }
   /**
    * Load full history from JSONL file. For dashboard charts across sessions.
    * @param {number} limit — max entries to return (default: 500)
@@ -253,6 +293,7 @@ class TokenTracker {
       lastSaved: Date.now(),
       byModel: this._byModel,
       byCategory: this._byCategory,
+      byModelCategory: this._byModelCategory,
       byTool: this._byTool,
       totalTokens: this._totalTokens,
       totalCalls: this._totalCalls,
@@ -275,6 +316,7 @@ class TokenTracker {
         const data = JSON.parse(fs.readFileSync(this.usagePath, "utf-8"));
         this._byModel = data.byModel || {};
         this._byCategory = data.byCategory || {};
+        this._byModelCategory = data.byModelCategory || {};
         this._byTool = data.byTool || {};
         this._totalTokens = data.totalTokens || 0;
         this._totalCalls = data.totalCalls || 0;

package/src/monitor/perf-monitor.js CHANGED Viewed

@@ -236,7 +236,7 @@ Provide a brief analysis and actionable suggestions. Focus on:
 Keep your response under 300 words. Be specific and actionable.`,
         maxTokens: 512,
-        category: "security",
+        category: "audit",
       });
       const analysis = result.content;

package/src/notifications/notifier.js CHANGED Viewed

@@ -172,7 +172,7 @@ class Notifier {
       systemPrompt: "You summarize server errors for developers. Write 1-2 short sentences. Be direct and actionable. Do not include any secrets, passwords, or API key values — only refer to them by name (e.g. 'the OPENAI_API_KEY').",
       userPrompt: `Summarize this error for a developer notification:\n\nCategory: ${classification.category}\nError: ${safeError}\n\nStack (first 300 chars): ${safeStack.slice(0, 300)}`,
       maxTokens: 100,
-      category: "security",
+      category: "audit",
     });
     // Double-sanitize the AI response (in case the AI echoes something)

package/src/platform/telemetry.js CHANGED Viewed

@@ -66,7 +66,8 @@ function collectHeartbeat(subsystems) {
       totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
       totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
       byCategory: usage?.byCategory || {},
-      byModel: usage?.byModel || {},  // includes: latency, successRate, tokensPerSec, cacheSavings per model
+      byModel: usage?.byModel || {},
+      byModelCategory: usage?.byModelCategory || [],
       byTool: usage?.byTool || {},
       byProvider: _aggregateByProvider(usage?.byModel || {}),
     },

package/src/security/injection-detector.js CHANGED Viewed

@@ -95,7 +95,7 @@ Respond with ONLY valid JSON:
     systemPrompt: "You are a security analyst. Respond with ONLY valid JSON.",
     userPrompt,
     maxTokens: 128,
-    category: "security",
+    category: "audit",
   });
   const content = result.content;

package/src/skills/loop-guard.js CHANGED Viewed

@@ -242,8 +242,15 @@ function ensureSingleProcess(cwd) {
     fs.writeFileSync(pidFile, String(process.pid), "utf-8");
   } catch {}
-  // Clean up on exit
-  process.on("exit", () => { try { fs.unlinkSync(pidFile); } catch {} });
+  // Clean up on exit — only delete if PID file still belongs to us
+  // (prevents race condition where old process deletes new process's PID)
+  const myPid = process.pid;
+  process.on("exit", () => {
+    try {
+      const current = parseInt(fs.readFileSync(pidFile, "utf-8").trim(), 10);
+      if (current === myPid) fs.unlinkSync(pidFile);
+    } catch {}
+  });
 }
 // ── Skill Metadata ──