wolverine-ai 3.4.1 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,7 +34,7 @@ const SEED_DOCS = [
34
34
  metadata: { topic: "overview" },
35
35
  },
36
36
  {
37
- text: "Wolverine heal pipeline: crash detected → error parsed (file, line, message, errorType) → prompt injection scan (AUDIT_MODEL) → rate limit check → operational fix attempt (missing_module → npm install, missing_file → create file, permission → chmod — zero AI tokens) → if operational fix doesn't apply → fast path repair (CODING_MODEL, supports both code changes AND shell commands like npm install) → if fast path fails → agent path (REASONING_MODEL with tools including bash_exec for npm install) → if agent fails → sub-agents (explore → plan → fix, fixer has bash_exec) → verify fix (syntax check + boot probe) → rollback on failure. Error types classified: missing_module, missing_file, permission, port_conflict, syntax, runtime, unknown.",
37
+ text: "Wolverine heal pipeline: crash detected → error parsed (file, line, message, errorType) → prompt injection scan (AUDIT_MODEL) → rate limit check (per-signature + global 5/5min cap) → operational fix attempt (missing_module → npm install, missing_file → create file with inferred config, permission → chmod, port conflict → kill stale process — zero AI tokens) → if operational fix doesn't apply → fast path repair (CODING_MODEL, supports both code changes AND shell commands like npm install) → if fast path fails → agent path (REASONING_MODEL with tools including bash_exec, 45s per-API-call timeout) → if agent fails → sub-agents (explore → plan → fix, fixer has bash_exec) → verify fix (syntax check + boot probe + error classification comparison) → rollback on failure. Error types classified: missing_module, missing_file, permission, port_conflict, syntax, runtime, unknown. Heal timeout: 5 minutes via Promise.race. Config-aware turn budget: simple=4, config/ENOENT=5, complex=8 turns.",
38
38
  metadata: { topic: "heal-pipeline" },
39
39
  },
40
40
  {
@@ -66,7 +66,7 @@ const SEED_DOCS = [
66
66
  metadata: { topic: "verification" },
67
67
  },
68
68
  {
69
- text: "Wolverine multi-file agent: 15-turn agent loop with 18 tools across 7 categories. FILE: read_file (offset/limit), write_file (creates dirs), edit_file (find-and-replace), glob_files (pattern search), grep_code (regex with context), list_dir (directory listing with sizes), move_file (rename/relocate). SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port (find what uses a port), check_env (env vars, values redacted). DEPS: audit_deps (full npm health check), check_migration (known upgrade paths). RESEARCH: web_fetch. CONTROL: done. Used when fast path fails. Token budget 50k max.",
69
+ text: "Wolverine multi-file agent: turn-limited agent loop with 18 tools across 7 categories. Turn budget adapts to error type: simple (TypeError)=4, config/ENOENT=5, complex=8. Each AI call has 45s timeout via Promise.race — prevents indefinite hangs. If timeout occurs mid-fix, partial results returned. FILE: read_file (offset/limit), write_file (creates dirs), edit_file (find-and-replace), glob_files (pattern search), grep_code (regex with context), list_dir (directory listing with sizes), move_file (rename/relocate). SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port (find what uses a port), check_env (env vars, values redacted). DEPS: audit_deps (full npm health check), check_migration (known upgrade paths). RESEARCH: web_fetch (10s timeout). CONTROL: done. Prompt emphasizes fast action: fix immediately when solution is obvious, investigate only when cause unclear.",
70
70
  metadata: { topic: "agent" },
71
71
  },
72
72
  {
@@ -202,7 +202,7 @@ const SEED_DOCS = [
202
202
  metadata: { topic: "admin-auth" },
203
203
  },
204
204
  {
205
- text: "Operational fix layer: before calling AI, wolverine checks for common non-code errors that can be fixed instantly with zero tokens. Pattern 1: 'Cannot find module X' (where X is a package name, not a relative path) → runs npm install X (or just npm install if package is already in package.json). Pattern 2: ENOENT on config/data files (.json, .yaml, .env, .log, etc.) → creates the missing file with sensible defaults (empty JSON {}, empty string). Pattern 3: EACCES/EPERM → chmod 755 on the file. This layer runs before the AI repair loop and handles ~30% of production crashes at zero cost.",
205
+ text: "Operational fix layer: before calling AI, wolverine checks for common non-code errors that can be fixed instantly with zero tokens. Pattern 1: 'Cannot find module X' (where X is a package name, not a relative path) → runs npm install X via deps skill diagnosis. Pattern 2: ENOENT on config/data files (.json, .yaml, .env, .log, etc.) → for JSON configs, reads the source code that loads the file to infer expected fields (apiUrl, timeout, etc.) and creates the file with correct structure; for other types, creates empty file. Pattern 3: EACCES/EPERM → chmod 755 on the file. Pattern 4: EADDRINUSE → finds and kills stale process on the port (lsof on Linux, netstat on Windows). This layer runs before the AI repair loop and handles ~30% of production crashes at zero cost.",
206
206
  metadata: { topic: "operational-fix" },
207
207
  },
208
208
  {
@@ -214,7 +214,7 @@ const SEED_DOCS = [
214
214
  metadata: { topic: "agent-fix-strategy" },
215
215
  },
216
216
  {
217
- text: "Error Monitor: detects caught 500 errors that don't crash the process. Most production bugs are caught by Fastify/Express error handlers — the server stays alive but routes return 500. Wolverine's crash-based heal pipeline never triggers for these. ErrorMonitor tracks 5xx errors per route via IPC from child process. After N consecutive 500s within a time window (default: 3 failures in 30s), triggers the heal pipeline without killing the server. Error hook auto-injected via --require preload (no user code changes). Cooldown prevents heal spam (default: 60s per route). Stats available in dashboard and telemetry. Config: WOLVERINE_ERROR_THRESHOLD, WOLVERINE_ERROR_WINDOW_MS, WOLVERINE_ERROR_COOLDOWN_MS.",
217
+ text: "Error Monitor: detects caught 500 errors that don't crash the process. Most production bugs are caught by Fastify/Express error handlers — the server stays alive but routes return 500. Wolverine's crash-based heal pipeline never triggers for these. ErrorMonitor tracks 5xx errors per normalized route (/api/users/123 → /api/users/:id) via IPC from child process. Single error triggers heal (threshold=1, configurable). Error hook auto-injected via --require preload (no user code changes) hooks Fastify onError + setErrorHandler wrapper + auto-registers default error handler if user never sets one (catches async route throws). Cooldown prevents heal spam (default: 60s per route). Health check failures also trigger heal (not just restart). Config: WOLVERINE_ERROR_THRESHOLD, WOLVERINE_ERROR_WINDOW_MS, WOLVERINE_ERROR_COOLDOWN_MS.",
218
218
  metadata: { topic: "error-monitor" },
219
219
  },
220
220
  {
@@ -265,6 +265,10 @@ const SEED_DOCS = [
265
265
  text: "Agent efficiency (claw-code patterns): (1) Anthropic prompt caching — system prompt marked with cache_control:{type:'ephemeral'}, cached server-side across agent turns, 90% cheaper on repeat calls (12-16K saved tokens per heal). (2) Tool result truncation — capped at 4K chars before entering message history, prevents context blowup from large grep/file reads. (3) Zero-cost structural compaction — extracts signals (tools used, files touched, errors found, actions taken) from message history WITHOUT an LLM call. Costs $0.00 vs old method that burned tokens on a compacting model. Triggers when estimated tokens > 10K (text.length/4 approximation). Preserves last 4 messages verbatim. (2) Token estimation — text.length/4+1, fast approximation without tokenizer, ~10% accurate. Used for budget decisions before API calls. (3) Error-graceful tools — tool errors returned as [ERROR] prefixed results, not thrown. Model sees the error and decides how to proceed. (4) Pre/post tool hooks — shell commands in .wolverine/hooks.json, exit 0=allow, 2=deny. Enables audit logging and policy enforcement without hard-coding.",
266
266
  metadata: { topic: "agent-efficiency" },
267
267
  },
268
+ {
269
+ text: "Robustness guards: (1) Heal concurrency guard — _healInProgress flag prevents parallel heals from health monitor + crash handler racing. (2) Global rate limit — 5 heals per 5 minutes regardless of error signature, prevents infinite loop of different errors burning API quota. (3) Heal timeout — Promise.race wraps _healImpl() with 5-minute timeout, clears _healInProgress on timeout. (4) Per-API-call timeout — 45s timeout in agent engine via Promise.race, returns partial results if files already modified. (5) bash_exec enforced timeout — 30s default, 60s hard cap via Math.min(). (6) PID file race prevention — exit handler only deletes PID file if it still belongs to current process. (7) SIGTERM startup grace — 3s grace period ignores SIGTERM on startup, prevents restart scripts from killing both old and new processes. (8) Research timeout — deep research capped at 30s, deferred to iteration 3+ to avoid slowing early fix attempts.",
270
+ metadata: { topic: "robustness-guards" },
271
+ },
268
272
  {
269
273
  text: "Cost optimization: 7 techniques reduce heal cost from $0.31 to $0.02 for simple errors. (1) Verifier skips route probe for simple errors (TypeError/ReferenceError/SyntaxError) — trusts syntax+boot, ErrorMonitor is safety net. Prevents false-rejection cascades. (2) Sub-agents use Haiku (classifier model) for explore/plan/verify/research — only fixer uses Sonnet/Opus. 6 Haiku calls=$0.006 vs 6 Sonnet calls=$0.12. (3) Agent context compacted every 3 turns using compacting model — prevents 15K→95K token blowup. (4) Brain checked for cached fix patterns before AI — repeat errors cost $0. (5) Token budgets capped by error complexity: simple=20K agent budget, moderate=50K, complex=100K. Simple errors get 4 agent turns max. (6) Prior attempt summaries (not full context) passed between iterations — concise 'do NOT repeat' directives. (7) Fast path includes last known good backup code so AI can revert broken additions instead of patching around them.",
270
274
  metadata: { topic: "cost-optimization" },
@@ -115,7 +115,7 @@ async function compact(text) {
115
115
  systemPrompt: "Compress the following text into a dense, semantically rich summary. Keep all technical terms, function names, file paths, and error messages. Remove filler words. Output ONLY the compressed text, nothing else.",
116
116
  userPrompt: text,
117
117
  maxTokens: 256,
118
- category: "brain",
118
+ category: "compacting",
119
119
  });
120
120
 
121
121
  return result.content || text;
@@ -17,7 +17,7 @@ const path = require("path");
17
17
  * - Config files (.env, .json, .yaml)
18
18
  */
19
19
 
20
- const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests"]);
20
+ const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests", "examples", "public", "static", "assets", "__tests__", ".next", ".nuxt"]);
21
21
  const CODE_EXTENSIONS = new Set([".js", ".ts", ".mjs", ".cjs", ".jsx", ".tsx"]);
22
22
  const CONFIG_EXTENSIONS = new Set([".json", ".yaml", ".yml", ".toml", ".env"]);
23
23
 
@@ -52,6 +52,11 @@ function scanProject(projectRoot) {
52
52
  // Recursive scan
53
53
  _scanDir(root, root, map);
54
54
 
55
+ // Cap collections to prevent memory bloat on large projects
56
+ if (map.functions.length > 500) map.functions = map.functions.slice(0, 500);
57
+ if (map.classes.length > 200) map.classes = map.classes.slice(0, 200);
58
+ if (map.exports.length > 300) map.exports = map.exports.slice(0, 300);
59
+
55
60
  // Build summary
56
61
  map.summary = _buildSummary(map);
57
62
 
@@ -88,12 +93,21 @@ function _scanDir(dir, root, map) {
88
93
 
89
94
  map.files.push({ path: relPath, type: "code" });
90
95
 
96
+ // Skip large/minified files — they bloat memory and aren't useful for repair context
97
+ let stat;
98
+ try { stat = fs.statSync(fullPath); } catch { continue; }
99
+ if (stat.size > 100000) continue; // Skip files > 100KB (bundles, minified, generated)
100
+
91
101
  // Parse the file for patterns
92
102
  let content;
93
103
  try {
94
104
  content = fs.readFileSync(fullPath, "utf-8");
95
105
  } catch { continue; }
96
106
 
107
+ // Skip minified code (avg line length > 200 chars = likely minified)
108
+ const lines = content.split("\n");
109
+ if (lines.length > 0 && content.length / lines.length > 200) continue;
110
+
97
111
  _extractRoutes(content, relPath, map);
98
112
  _extractExports(content, relPath, map);
99
113
  _extractFunctions(content, relPath, map);
@@ -5,6 +5,7 @@ const { getModel, detectProvider } = require("./models");
5
5
 
6
6
  let _openaiClient = null;
7
7
  let _anthropicClient = null;
8
+ let _wolverineClient = null;
8
9
  let _tracker = null;
9
10
 
10
11
  function setTokenTracker(tracker) { _tracker = tracker; }
@@ -35,9 +36,23 @@ function _track(model, category, usage, tool, latencyMs, success) {
35
36
 
36
37
  function getClient(provider) {
37
38
  if (provider === "anthropic") return _getAnthropicClient();
39
+ if (provider === "wolverine") return _getWolverineClient();
38
40
  return _getOpenAIClient();
39
41
  }
40
42
 
43
+ function _getWolverineClient() {
44
+ if (!_wolverineClient) {
45
+ // Wolverine inference: direct to GPU (WOLVERINE_INFERENCE_URL) or via proxy (api.wolverinenode.xyz/v1)
46
+ // Direct URL = no auth needed (Vast tunnel). Proxy URL = needs WOLVERINE_API_KEY for billing.
47
+ const baseURL = process.env.WOLVERINE_INFERENCE_URL
48
+ ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
49
+ : "https://api.wolverinenode.xyz/v1";
50
+ const apiKey = process.env.WOLVERINE_API_KEY || "none";
51
+ _wolverineClient = new OpenAI({ apiKey, baseURL });
52
+ }
53
+ return _wolverineClient;
54
+ }
55
+
41
56
  function _getOpenAIClient() {
42
57
  if (!_openaiClient) {
43
58
  const apiKey = process.env.OPENAI_API_KEY;
@@ -65,6 +80,7 @@ function isReasoningModel(model) {
65
80
  }
66
81
 
67
82
  function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
83
+ function isWolverineModel(model) { return detectProvider(model) === "wolverine"; }
68
84
 
69
85
  /**
70
86
  * Per-model max output token limits (with 10% overestimation buffer).
@@ -176,6 +192,8 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
176
192
  try {
177
193
  if (provider === "anthropic") {
178
194
  result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
195
+ } else if (provider === "wolverine") {
196
+ result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
179
197
  } else if (isResponsesModel(model)) {
180
198
  result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
181
199
  } else {
@@ -200,6 +218,8 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
200
218
  try {
201
219
  if (provider === "anthropic") {
202
220
  result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
221
+ } else if (provider === "wolverine") {
222
+ result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
203
223
  } else if (isResponsesModel(model)) {
204
224
  result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
205
225
  } else {
@@ -573,7 +593,7 @@ ${backupSourceCode ? `## Last Known Working Version\n\`\`\`javascript\n${backupS
573
593
  "changes" is for code edits (optional, use for actual code fixes).
574
594
  Include both if needed, or just one.`;
575
595
 
576
- const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "heal" });
596
+ const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "coding" });
577
597
  const content = result.content;
578
598
  const cleaned = content.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
579
599
 
@@ -58,20 +58,36 @@ Module._load = function (request, parent, isMain) {
58
58
  function _hookFastify(fastify) {
59
59
  // Wrap setErrorHandler so our IPC reporting runs BEFORE the user's handler
60
60
  const origSetError = fastify.setErrorHandler;
61
+ let customErrorHandlerSet = false;
61
62
  fastify.setErrorHandler = function (userHandler) {
63
+ customErrorHandlerSet = true;
62
64
  return origSetError.call(this, function (error, request, reply) {
63
65
  _reportError(request.url, request.method, error);
64
66
  return userHandler.call(this, error, request, reply);
65
67
  });
66
68
  };
67
69
 
68
- // Also add onError hook as a fallback (fires even if no custom error handler)
70
+ // Add onError hook as primary fallback fires for all route errors in Fastify
69
71
  try {
70
72
  fastify.addHook("onError", function (request, reply, error, done) {
71
73
  _reportError(request.url, request.method, error);
72
74
  done();
73
75
  });
74
76
  } catch { /* addHook may fail if server is already started */ }
77
+
78
+ // Register a default error handler if user never calls setErrorHandler
79
+ // This ensures we catch async route throws even without a custom handler
80
+ try {
81
+ fastify.addHook("onReady", function (done) {
82
+ if (!customErrorHandlerSet) {
83
+ origSetError.call(fastify, function (error, request, reply) {
84
+ _reportError(request.url, request.method, error);
85
+ reply.code(error.statusCode || 500).send({ error: error.message });
86
+ });
87
+ }
88
+ done();
89
+ });
90
+ } catch { /* non-fatal */ }
75
91
  }
76
92
 
77
93
  function _hookExpress(app) {
@@ -15,7 +15,14 @@
15
15
  */
16
16
  function detectProvider(model) {
17
17
  if (!model) return "openai";
18
- if (/^claude/i.test(model)) return "anthropic";
18
+ if (/^wolverine/i.test(model) || /^gemma/i.test(model)) return "wolverine";
19
+ if (/^claude/i.test(model) || /^anthropic/i.test(model)) return "anthropic";
20
+ if (/^gemini/i.test(model) || /^google/i.test(model)) return "google";
21
+ if (/^mistral/i.test(model) || /^codestral/i.test(model) || /^pixtral/i.test(model)) return "mistral";
22
+ if (/^llama/i.test(model) || /^meta/i.test(model)) return "meta";
23
+ if (/^deepseek/i.test(model)) return "deepseek";
24
+ if (/^command/i.test(model) || /^cohere/i.test(model)) return "cohere";
25
+ // Default: OpenAI (gpt-*, o1-*, o3-*, o4-*, codex-*, text-embedding-*, dall-e-*, etc.)
19
26
  return "openai";
20
27
  }
21
28
 
@@ -332,9 +332,12 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
332
332
  } else if (iteration <= 2) {
333
333
  // Agent path — REASONING_MODEL (also handles iteration 1 when no file)
334
334
  console.log(chalk.magenta(` 🤖 Agent path (${getModel("reasoning")})...`));
335
+ // Tight turn budget: simple errors get 4 turns, ENOENT/config gets 5, complex gets 8
336
+ const isConfigError = /ENOENT|missing.*config|missing.*file|no such file/i.test(parsed.errorMessage);
337
+ const agentMaxTurns = isSimpleError ? 4 : isConfigError ? 5 : 8;
335
338
  const agent = new AgentEngine({
336
339
  sandbox, logger, cwd, mcp,
337
- maxTurns: isSimpleError ? 4 : 8,
340
+ maxTurns: agentMaxTurns,
338
341
  maxTokens: tokenBudget.agent,
339
342
  });
340
343
 
@@ -496,12 +499,20 @@ async function tryOperationalFix(parsed, cwd, logger) {
496
499
  if (!rel.startsWith("..") && /\.(json|yaml|yml|toml|ini|conf|cfg|env|log|txt|csv|db|sqlite)$/i.test(missingFile)) {
497
500
  try {
498
501
  fs.mkdirSync(path.dirname(missingFile), { recursive: true });
499
- // Create empty file or sensible default
500
502
  const ext = path.extname(missingFile).toLowerCase();
501
- const defaults = { ".json": "{}", ".yaml": "", ".yml": "", ".log": "", ".txt": "", ".csv": "", ".env": "" };
502
- fs.writeFileSync(missingFile, defaults[ext] || "", "utf-8");
503
+
504
+ // For JSON config files, try to infer expected structure from the code that loads them
505
+ let content = "";
506
+ if (ext === ".json") {
507
+ content = _inferJsonConfig(missingFile, cwd, parsed) || "{}";
508
+ } else {
509
+ const defaults = { ".yaml": "", ".yml": "", ".log": "", ".txt": "", ".csv": "", ".env": "" };
510
+ content = defaults[ext] || "";
511
+ }
512
+
513
+ fs.writeFileSync(missingFile, content, "utf-8");
503
514
  console.log(chalk.blue(` 📄 Created missing file: ${rel}`));
504
- return { fixed: true, action: `Created missing file: ${rel}` };
515
+ return { fixed: true, action: `Created missing file: ${rel} with ${content === "{}" ? "empty" : "inferred"} config` };
505
516
  } catch {}
506
517
  }
507
518
  }
@@ -544,4 +555,57 @@ async function tryOperationalFix(parsed, cwd, logger) {
544
555
  return { fixed: false };
545
556
  }
546
557
 
558
+ /**
559
+ * Try to infer JSON config structure by scanning the code that loads the file.
560
+ * Looks for property access patterns after require/readFile of the missing file.
561
+ * Returns a JSON string with empty/default values, or null if can't infer.
562
+ */
563
+ function _inferJsonConfig(missingFile, cwd, parsed) {
564
+ const fs = require("fs");
565
+ const path = require("path");
566
+
567
+ // Find which source file loads the missing config
568
+ const basename = path.basename(missingFile);
569
+ const sourceFile = parsed.filePath;
570
+ if (!sourceFile) return null;
571
+
572
+ try {
573
+ const source = fs.readFileSync(sourceFile, "utf-8");
574
+ // Look for property accesses on the loaded config: config.apiUrl, config.timeout, etc.
575
+ const configVarMatch = source.match(new RegExp(`(?:const|let|var)\\s+(\\w+)\\s*=\\s*(?:require|JSON\\.parse).*${basename.replace(".", "\\.")}`));
576
+ if (!configVarMatch) return null;
577
+
578
+ const varName = configVarMatch[1];
579
+ // Find all property accesses: varName.prop or varName["prop"]
580
+ const propRegex = new RegExp(`${varName}\\.(\\w+)`, "g");
581
+ const bracketRegex = new RegExp(`${varName}\\["(\\w+)"\\]`, "g");
582
+ const props = new Set();
583
+ let m;
584
+ while ((m = propRegex.exec(source)) !== null) props.add(m[1]);
585
+ while ((m = bracketRegex.exec(source)) !== null) props.add(m[1]);
586
+
587
+ if (props.size === 0) return null;
588
+
589
+ // Build config with sensible defaults based on property names
590
+ const config = {};
591
+ for (const prop of props) {
592
+ const lower = prop.toLowerCase();
593
+ if (/url|endpoint|host|uri/.test(lower)) config[prop] = "http://localhost:3000";
594
+ else if (/port/.test(lower)) config[prop] = 3000;
595
+ else if (/timeout|delay|interval|ttl/.test(lower)) config[prop] = 5000;
596
+ else if (/key|token|secret/.test(lower)) config[prop] = "placeholder";
597
+ else if (/name/.test(lower)) config[prop] = "default";
598
+ else if (/enabled|active|debug/.test(lower)) config[prop] = true;
599
+ else if (/count|max|min|limit|size/.test(lower)) config[prop] = 10;
600
+ else if (/path|dir|file/.test(lower)) config[prop] = "./";
601
+ else config[prop] = "";
602
+ }
603
+
604
+ console.log(chalk.gray(` 🔍 Inferred ${props.size} config fields from ${path.basename(sourceFile)}: ${[...props].join(", ")}`));
605
+ return JSON.stringify(config, null, 2);
606
+ } catch {
607
+ return null;
608
+ }
609
+ }
610
+
547
611
  module.exports = { heal };
@@ -336,7 +336,7 @@ class DashboardServer {
336
336
  systemPrompt: "Route a command. Respond with two words: ROUTE SIZE.\nROUTE: SIMPLE (general knowledge/explanation, no live data needed), TOOLS (needs live server data, file contents, or endpoint calls), AGENT (create/modify/fix code).\nSIZE: SMALL, MEDIUM, LARGE.\nExamples: 'what is wolverine' → SIMPLE SMALL. 'what time is it' → TOOLS SMALL. 'show me index.js' → TOOLS SMALL. 'add endpoint' → AGENT SMALL. 'build auth' → AGENT LARGE.",
337
337
  userPrompt: command,
338
338
  maxTokens: 10,
339
- category: "classify",
339
+ category: "classifier",
340
340
  });
341
341
 
342
342
  const raw = (result.content || "").trim().toUpperCase();
@@ -424,7 +424,7 @@ ${indexContent}
424
424
  Existing route files:
425
425
  ${existingRoutes || "(none)"}`,
426
426
  maxTokens: 2048,
427
- category: "develop",
427
+ category: "tool",
428
428
  });
429
429
 
430
430
  const raw = (result.content || "").trim().replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
@@ -53,6 +53,14 @@ const DEFAULT_PRICING = {
53
53
  "claude-3-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
54
54
  "claude-3-haiku": { input: 0.25, output: 1.25, cache_write: 0.3125, cache_read: 0.025 },
55
55
 
56
+ // ── Wolverine Self-Hosted (Gemma 4 via api.wolverinenode.xyz) ──
57
+ // Priced between Anthropic and OpenAI — cheaper than both
58
+ "wolverine-test-1": { input: 0.10, output: 0.40 },
59
+ "wolverine-gemma-26b": { input: 0.25, output: 1.00 },
60
+ "wolverine-gemma-8b": { input: 0.10, output: 0.40 },
61
+ "wolverine-coding": { input: 0.10, output: 0.40 },
62
+ "wolverine-reasoning": { input: 0.25, output: 1.00 },
63
+
56
64
  // ── Fallback ──
57
65
  "_default": { input: 1.00, output: 4.00 },
58
66
  };
@@ -33,6 +33,8 @@ class TokenTracker {
33
33
  this._byModel = {};
34
34
  // Per-category totals
35
35
  this._byCategory = {};
36
+ // Per-model-per-category cross-reference (model::category → stats)
37
+ this._byModelCategory = {};
36
38
  // Per-tool totals
37
39
  this._byTool = {};
38
40
  // Timeline: recent entries for charts (in-memory)
@@ -87,7 +89,7 @@ class TokenTracker {
87
89
  };
88
90
 
89
91
  // Accumulate by model
90
- if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
92
+ if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, totalLatencyTokens: 0, timedCalls: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
91
93
  const m = this._byModel[model];
92
94
  m.input += entry.input;
93
95
  m.output += entry.output;
@@ -100,6 +102,8 @@ class TokenTracker {
100
102
  if (entry.success) m.successes++; else m.failures++;
101
103
  if (latencyMs > 0) {
102
104
  m.totalLatencyMs += latencyMs;
105
+ m.totalLatencyTokens += total;
106
+ m.timedCalls++;
103
107
  if (latencyMs < m.minLatencyMs) m.minLatencyMs = latencyMs;
104
108
  if (latencyMs > m.maxLatencyMs) m.maxLatencyMs = latencyMs;
105
109
  }
@@ -112,6 +116,18 @@ class TokenTracker {
112
116
  this._byCategory[category].calls++;
113
117
  this._byCategory[category].cost += cost.total;
114
118
 
119
+ // Accumulate by model+category cross-reference
120
+ const mcKey = `${model}::${category}`;
121
+ if (!this._byModelCategory[mcKey]) this._byModelCategory[mcKey] = { model, category, input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0 };
122
+ const mc = this._byModelCategory[mcKey];
123
+ mc.input += entry.input;
124
+ mc.output += entry.output;
125
+ mc.total += total;
126
+ mc.calls++;
127
+ mc.cost += cost.total;
128
+ if (entry.success) mc.successes++; else mc.failures++;
129
+ if (latencyMs > 0) mc.totalLatencyMs += latencyMs;
130
+
115
131
  // Accumulate by tool
116
132
  if (tool) {
117
133
  const toolKey = tool.split(" ")[0];
@@ -158,6 +174,7 @@ class TokenTracker {
158
174
  },
159
175
  byModel: this._formatModelStats(),
160
176
  byCategory: this._byCategory,
177
+ byModelCategory: this._formatModelCategoryStats(),
161
178
  byTool: this._byTool,
162
179
  // Recent in-memory timeline
163
180
  timeline: this._timeline.slice(-100).map(e => ({
@@ -188,19 +205,42 @@ class TokenTracker {
188
205
  cacheCreation: m.cacheCreation || 0,
189
206
  cacheRead: m.cacheRead || 0,
190
207
  cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
191
- successes: m.successes || m.calls,
208
+ successes: m.successes != null ? m.successes : m.calls - (m.failures || 0),
192
209
  failures: m.failures || 0,
193
- successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
194
- avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
210
+ successRate: m.calls > 0 ? parseFloat((((m.calls - (m.failures || 0)) / m.calls) * 100).toFixed(2)) : 0,
211
+ // Latency normalized by token count
212
+ avgLatencyMs: (m.timedCalls || 0) > 0 ? Math.round(m.totalLatencyMs / m.timedCalls) : 0,
213
+ msPerKToken: (m.totalLatencyTokens || 0) > 0 ? Math.round((m.totalLatencyMs / m.totalLatencyTokens) * 1000) : 0,
214
+ tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.totalLatencyTokens || m.total) / (m.totalLatencyMs / 1000) * 10) / 10 : 0,
215
+ outputTokPerSecond: m.totalLatencyMs > 0 && m.output > 0 ? Math.round((m.output / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
216
+ timedCalls: m.timedCalls || 0,
195
217
  minLatencyMs: m.minLatencyMs === Infinity ? 0 : (m.minLatencyMs || 0),
196
218
  maxLatencyMs: m.maxLatencyMs || 0,
197
- tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.total / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
198
219
  costPerCall: m.calls > 0 ? Math.round((m.cost / m.calls) * 1000000) / 1000000 : 0,
199
220
  };
200
221
  }
201
222
  return result;
202
223
  }
203
224
 
225
+ /**
226
+ * Format model+category cross-reference for analytics.
227
+ * Returns array of { model, category, calls, cost, tokens, successRate, avgLatencyMs }
228
+ */
229
+ _formatModelCategoryStats() {
230
+ return Object.values(this._byModelCategory).map(mc => ({
231
+ model: mc.model,
232
+ category: mc.category,
233
+ calls: mc.calls,
234
+ cost: Math.round(mc.cost * 1000000) / 1000000,
235
+ tokens: mc.total,
236
+ input: mc.input,
237
+ output: mc.output,
238
+ successRate: mc.calls > 0 ? parseFloat((((mc.calls - (mc.failures || 0)) / mc.calls) * 100).toFixed(2)) : 100,
239
+ avgLatencyMs: mc.calls > 0 && mc.totalLatencyMs > 0 ? Math.round(mc.totalLatencyMs / mc.calls) : 0,
240
+ tokensPerSecond: mc.totalLatencyMs > 0 ? Math.round((mc.total / (mc.totalLatencyMs / 1000)) * 10) / 10 : 0,
241
+ }));
242
+ }
243
+
204
244
  /**
205
245
  * Load full history from JSONL file. For dashboard charts across sessions.
206
246
  * @param {number} limit — max entries to return (default: 500)
@@ -253,6 +293,7 @@ class TokenTracker {
253
293
  lastSaved: Date.now(),
254
294
  byModel: this._byModel,
255
295
  byCategory: this._byCategory,
296
+ byModelCategory: this._byModelCategory,
256
297
  byTool: this._byTool,
257
298
  totalTokens: this._totalTokens,
258
299
  totalCalls: this._totalCalls,
@@ -275,6 +316,7 @@ class TokenTracker {
275
316
  const data = JSON.parse(fs.readFileSync(this.usagePath, "utf-8"));
276
317
  this._byModel = data.byModel || {};
277
318
  this._byCategory = data.byCategory || {};
319
+ this._byModelCategory = data.byModelCategory || {};
278
320
  this._byTool = data.byTool || {};
279
321
  this._totalTokens = data.totalTokens || 0;
280
322
  this._totalCalls = data.totalCalls || 0;
@@ -236,7 +236,7 @@ Provide a brief analysis and actionable suggestions. Focus on:
236
236
 
237
237
  Keep your response under 300 words. Be specific and actionable.`,
238
238
  maxTokens: 512,
239
- category: "security",
239
+ category: "audit",
240
240
  });
241
241
 
242
242
  const analysis = result.content;
@@ -172,7 +172,7 @@ class Notifier {
172
172
  systemPrompt: "You summarize server errors for developers. Write 1-2 short sentences. Be direct and actionable. Do not include any secrets, passwords, or API key values — only refer to them by name (e.g. 'the OPENAI_API_KEY').",
173
173
  userPrompt: `Summarize this error for a developer notification:\n\nCategory: ${classification.category}\nError: ${safeError}\n\nStack (first 300 chars): ${safeStack.slice(0, 300)}`,
174
174
  maxTokens: 100,
175
- category: "security",
175
+ category: "audit",
176
176
  });
177
177
 
178
178
  // Double-sanitize the AI response (in case the AI echoes something)
@@ -66,7 +66,8 @@ function collectHeartbeat(subsystems) {
66
66
  totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
67
67
  totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
68
68
  byCategory: usage?.byCategory || {},
69
- byModel: usage?.byModel || {}, // includes: latency, successRate, tokensPerSec, cacheSavings per model
69
+ byModel: usage?.byModel || {},
70
+ byModelCategory: usage?.byModelCategory || [],
70
71
  byTool: usage?.byTool || {},
71
72
  byProvider: _aggregateByProvider(usage?.byModel || {}),
72
73
  },
@@ -95,7 +95,7 @@ Respond with ONLY valid JSON:
95
95
  systemPrompt: "You are a security analyst. Respond with ONLY valid JSON.",
96
96
  userPrompt,
97
97
  maxTokens: 128,
98
- category: "security",
98
+ category: "audit",
99
99
  });
100
100
 
101
101
  const content = result.content;
@@ -242,8 +242,15 @@ function ensureSingleProcess(cwd) {
242
242
  fs.writeFileSync(pidFile, String(process.pid), "utf-8");
243
243
  } catch {}
244
244
 
245
- // Clean up on exit
246
- process.on("exit", () => { try { fs.unlinkSync(pidFile); } catch {} });
245
+ // Clean up on exit — only delete if PID file still belongs to us
246
+ // (prevents race condition where old process deletes new process's PID)
247
+ const myPid = process.pid;
248
+ process.on("exit", () => {
249
+ try {
250
+ const current = parseInt(fs.readFileSync(pidFile, "utf-8").trim(), 10);
251
+ if (current === myPid) fs.unlinkSync(pidFile);
252
+ } catch {}
253
+ });
247
254
  }
248
255
 
249
256
  // ── Skill Metadata ──