wolverine-ai 3.5.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -6,6 +6,11 @@
6
6
  # Your OpenAI API key (required)
7
7
  OPENAI_API_KEY=
8
8
  ANTHROPIC_API_KEY=
9
+
10
+ # ── Wolverine Inference (self-hosted models) ─────────────────────
11
+ # Get your API key at wolverinenode.xyz — $1 = 100 credits
12
+ # Set provider to "wolverine" in server/config/settings.json
13
+ WOLVERINE_API_KEY=
9
14
  # ── Dashboard Admin Key (make your own) ──────────────────────────────────────────
10
15
  # Required for the agent command interface on the dashboard.
11
16
  # Generate: node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.5.0",
3
+ "version": "3.6.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -5,7 +5,7 @@
5
5
  "env": "development"
6
6
  },
7
7
 
8
- "provider": "hybrid",
8
+ "provider": "wolverine",
9
9
 
10
10
  "openai_settings": {
11
11
  "reasoning": "gpt-5.4-mini",
@@ -43,6 +43,18 @@
43
43
  "embedding": "text-embedding-3-small"
44
44
  },
45
45
 
46
+ "wolverine_settings": {
47
+ "reasoning": "wolverine-test-1",
48
+ "coding": "wolverine-test-1",
49
+ "chat": "wolverine-test-1",
50
+ "tool": "wolverine-test-1",
51
+ "classifier": "wolverine-test-1",
52
+ "audit": "wolverine-test-1",
53
+ "compacting": "wolverine-test-1",
54
+ "research": "wolverine-test-1",
55
+ "embedding": "text-embedding-3-small"
56
+ },
57
+
46
58
  "server": {
47
59
  "port": 3000,
48
60
  "maxRetries": 3,
@@ -84,6 +96,11 @@
84
96
  "intervalMs": 300000
85
97
  },
86
98
 
99
+ "platform": {
100
+ "apiKey": "",
101
+ "cors": ["http://localhost:3000"]
102
+ },
103
+
87
104
  "dashboard": {},
88
105
 
89
106
  "cors": {
@@ -0,0 +1,324 @@
1
+ const https = require("https");
2
+ const http = require("http");
3
+ const crypto = require("crypto");
4
+
5
+ /**
6
+ * Wolverine Inference API
7
+ *
8
+ * Credit system: $1 = 100 credits. 1 credit = $0.01 of compute.
9
+ * Token pricing (in credits per million tokens):
10
+ * wolverine-test-1: 1 credit input / 4 credits output per 1M tokens
11
+ * (= $0.01/$0.04 per 1M — 15x cheaper than gpt-4o-mini, 80x cheaper than haiku)
12
+ *
13
+ * Rate limiting: per API key, configurable per tier.
14
+ * Queue: when GPU is at capacity, requests queue with timeout.
15
+ */
16
+
17
+ const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "https://clips-third-players-binding.trycloudflare.com";
18
+
19
+ // Pricing in CREDITS per million tokens ($1 = 100 credits)
20
+ const MODEL_PRICING = {
21
+ "wolverine-test-1": { input: 1.0, output: 4.0 }, // $0.01/$0.04 per 1M
22
+ "wolverine-coding": { input: 1.0, output: 4.0 },
23
+ "wolverine-reasoning": { input: 2.5, output: 10.0 }, // heavier model when available
24
+ };
25
+
26
+ const MODEL_MAP = {
27
+ "wolverine-test-1": "wolverine-test-1",
28
+ "wolverine-coding": "wolverine-test-1",
29
+ "wolverine-reasoning": "wolverine-test-1",
30
+ };
31
+
32
+ const TIER_LIMITS = {
33
+ free: { rpm: 10, maxTokens: 1024 },
34
+ starter: { rpm: 60, maxTokens: 4096 },
35
+ pro: { rpm: 300, maxTokens: 4096 },
36
+ admin: { rpm: 9999, maxTokens: 4096 },
37
+ };
38
+
39
+ function tokenCost(model, inputTokens, outputTokens) {
40
+ const p = MODEL_PRICING[model] || MODEL_PRICING["wolverine-test-1"];
41
+ return ((inputTokens / 1_000_000) * p.input) + ((outputTokens / 1_000_000) * p.output);
42
+ }
43
+
44
+ // ── Request Queue (handles GPU saturation) ──
45
+ const queue = [];
46
+ let activeRequests = 0;
47
+ const MAX_CONCURRENT = 8; // vLLM max-num-seqs
48
+ const QUEUE_TIMEOUT_MS = 30000;
49
+
50
+ function enqueue() {
51
+ return new Promise((resolve, reject) => {
52
+ if (activeRequests < MAX_CONCURRENT) {
53
+ activeRequests++;
54
+ resolve();
55
+ return;
56
+ }
57
+ const timer = setTimeout(() => {
58
+ const idx = queue.indexOf(entry);
59
+ if (idx >= 0) queue.splice(idx, 1);
60
+ reject(new Error("Queue timeout — GPU at capacity. Try again in a few seconds."));
61
+ }, QUEUE_TIMEOUT_MS);
62
+ const entry = { resolve: () => { clearTimeout(timer); activeRequests++; resolve(); }, reject };
63
+ queue.push(entry);
64
+ });
65
+ }
66
+
67
+ function dequeue() {
68
+ activeRequests = Math.max(0, activeRequests - 1);
69
+ if (queue.length > 0) {
70
+ const next = queue.shift();
71
+ next.resolve();
72
+ }
73
+ }
74
+
75
+ async function routes(fastify) {
76
+ const { pool } = require("../lib/db");
77
+
78
+ // Rate limit state (in-memory)
79
+ const rateWindows = new Map();
80
+
81
+ async function authenticate(request, reply) {
82
+ const apiKey = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
83
+ if (!apiKey) return reply.code(401).send({ error: { message: "API key required. Pass via Authorization: Bearer <key>", type: "auth_error" } });
84
+
85
+ // Platform key bypass
86
+ let settings = {};
87
+ try { settings = require("../config/settings.json"); } catch {}
88
+ if (apiKey === settings.platform?.apiKey) {
89
+ request.account = { api_key: apiKey, owner: "platform", tier: "admin", credits_remaining: 999999, rate_limit_rpm: 9999 };
90
+ return;
91
+ }
92
+
93
+ const result = await pool.query("SELECT * FROM api_credits WHERE api_key = $1", [apiKey]);
94
+ if (result.rows.length === 0) return reply.code(401).send({ error: { message: "Invalid API key", type: "auth_error" } });
95
+
96
+ const account = result.rows[0];
97
+
98
+ // Credit check
99
+ if (parseFloat(account.credits_remaining) <= 0) {
100
+ return reply.code(402).send({ error: { message: "Insufficient credits. Add credits at wolverinenode.xyz", type: "billing_error", credits_remaining: 0 } });
101
+ }
102
+
103
+ // Rate limit
104
+ const now = Date.now();
105
+ const window = rateWindows.get(apiKey) || { count: 0, resetAt: now + 60000 };
106
+ if (now > window.resetAt) { window.count = 0; window.resetAt = now + 60000; }
107
+ const limit = account.rate_limit_rpm || TIER_LIMITS[account.tier]?.rpm || 10;
108
+ if (window.count >= limit) {
109
+ const retryAfter = Math.ceil((window.resetAt - now) / 1000);
110
+ return reply.code(429).send({ error: { message: `Rate limit: ${limit} requests/min. Retry in ${retryAfter}s`, type: "rate_limit", retry_after: retryAfter } });
111
+ }
112
+ window.count++;
113
+ rateWindows.set(apiKey, window);
114
+
115
+ request.account = account;
116
+ }
117
+
118
+ // ── POST /chat/completions ──
119
+ fastify.post("/chat/completions", { preHandler: authenticate }, async (request, reply) => {
120
+ const body = request.body || {};
121
+ const requestedModel = body.model || "wolverine-test-1";
122
+ const account = request.account;
123
+ const tier = TIER_LIMITS[account.tier] || TIER_LIMITS.free;
124
+ const startMs = Date.now();
125
+
126
+ // Enforce max tokens per tier
127
+ if (body.max_tokens && body.max_tokens > tier.maxTokens) {
128
+ body.max_tokens = tier.maxTokens;
129
+ }
130
+
131
+ // Map model name for backend
132
+ const backendBody = { ...body, model: MODEL_MAP[requestedModel] || requestedModel };
133
+
134
+ // Queue if GPU saturated
135
+ try {
136
+ await enqueue();
137
+ } catch (err) {
138
+ return reply.code(503).send({ error: { message: err.message, type: "capacity_error", queue_length: queue.length } });
139
+ }
140
+
141
+ try {
142
+ const result = await proxyToInference("/v1/chat/completions", backendBody);
143
+ const latencyMs = Date.now() - startMs;
144
+
145
+ const usage = result.usage || {};
146
+ const inputTokens = usage.prompt_tokens || 0;
147
+ const outputTokens = usage.completion_tokens || 0;
148
+ const cost = tokenCost(requestedModel, inputTokens, outputTokens);
149
+
150
+ // Bill credits (skip for platform)
151
+ if (account.owner !== "platform") {
152
+ await pool.query(
153
+ "UPDATE api_credits SET credits_remaining = credits_remaining - $1, credits_used = credits_used + $1, last_used = NOW() WHERE api_key = $2",
154
+ [cost, account.api_key]
155
+ );
156
+ await pool.query(
157
+ "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, $3, $4, $5, $6, $7, true, $8)",
158
+ [account.api_key, requestedModel, inputTokens, outputTokens, inputTokens + outputTokens, cost, latencyMs, "/v1/chat/completions"]
159
+ );
160
+ }
161
+
162
+ // Rewrite response
163
+ if (result.model) result.model = requestedModel;
164
+ result.x_wolverine = {
165
+ credits_used: Math.round(cost * 1000000) / 1000000,
166
+ credits_remaining: Math.max(0, parseFloat(account.credits_remaining) - cost),
167
+ latency_ms: latencyMs,
168
+ queued: activeRequests > MAX_CONCURRENT,
169
+ };
170
+
171
+ return result;
172
+ } catch (err) {
173
+ if (account.owner !== "platform") {
174
+ await pool.query(
175
+ "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, 0, 0, 0, 0, $3, false, $4)",
176
+ [account.api_key, requestedModel, Date.now() - startMs, "/v1/chat/completions"]
177
+ ).catch(() => {});
178
+ }
179
+ return reply.code(502).send({ error: { message: `Inference error: ${err.message}`, type: "inference_error" } });
180
+ } finally {
181
+ dequeue();
182
+ }
183
+ });
184
+
185
+ // ── GET /models ──
186
+ fastify.get("/models", async () => ({
187
+ object: "list",
188
+ data: Object.entries(MODEL_PRICING).map(([id, p]) => ({
189
+ id, object: "model", owned_by: "wolverine",
190
+ created: Math.floor(Date.now() / 1000),
191
+ pricing: { input_credits_per_million: p.input, output_credits_per_million: p.output, usd_per_credit: 0.01 },
192
+ })),
193
+ }));
194
+
195
+ // ── POST /keys/create — generate new API key ──
196
+ fastify.post("/keys/create", { preHandler: authenticate }, async (request, reply) => {
197
+ const account = request.account;
198
+ if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can create API keys", type: "auth_error" } });
199
+
200
+ const { owner, email, credits, tier, rpm } = request.body || {};
201
+ if (!owner) return reply.code(400).send({ error: { message: "owner required", type: "validation_error" } });
202
+
203
+ const newKey = "wlv_" + crypto.randomBytes(24).toString("hex");
204
+ const keyTier = tier || "free";
205
+ const keyCredits = credits || (keyTier === "free" ? 10 : 0);
206
+ const keyRpm = rpm || TIER_LIMITS[keyTier]?.rpm || 10;
207
+
208
+ await pool.query(
209
+ "INSERT INTO api_credits (api_key, owner, email, credits_remaining, tier, plan_name, rate_limit_rpm) VALUES ($1, $2, $3, $4, $5, $6, $7)",
210
+ [newKey, owner, email || null, keyCredits, keyTier, keyTier, keyRpm]
211
+ );
212
+
213
+ return { api_key: newKey, owner, tier: keyTier, credits: keyCredits, rate_limit_rpm: keyRpm };
214
+ });
215
+
216
+ // ── POST /keys/add-credits — add credits to a key ──
217
+ fastify.post("/keys/add-credits", { preHandler: authenticate }, async (request, reply) => {
218
+ const account = request.account;
219
+ if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can add credits", type: "auth_error" } });
220
+
221
+ const { api_key, credits } = request.body || {};
222
+ if (!api_key || !credits) return reply.code(400).send({ error: { message: "api_key and credits required" } });
223
+
224
+ await pool.query("UPDATE api_credits SET credits_remaining = credits_remaining + $1 WHERE api_key = $2", [credits, api_key]);
225
+ const updated = await pool.query("SELECT credits_remaining FROM api_credits WHERE api_key = $1", [api_key]);
226
+ return { api_key, credits_added: credits, credits_remaining: parseFloat(updated.rows[0]?.credits_remaining || 0) };
227
+ });
228
+
229
+ // ── GET /keys — list all keys (admin only) ──
230
+ fastify.get("/keys", { preHandler: authenticate }, async (request, reply) => {
231
+ if (request.account.tier !== "admin") return reply.code(403).send({ error: { message: "Admin only" } });
232
+ const { rows } = await pool.query("SELECT api_key, owner, email, tier, credits_remaining, credits_used, rate_limit_rpm, created_at, last_used FROM api_credits ORDER BY created_at DESC");
233
+ return { keys: rows };
234
+ });
235
+
236
+ // ── GET /credits ──
237
+ fastify.get("/credits", { preHandler: authenticate }, async (request, reply) => {
238
+ const a = request.account;
239
+ return {
240
+ credits_remaining: parseFloat(a.credits_remaining),
241
+ credits_used: parseFloat(a.credits_used || 0),
242
+ usd_remaining: parseFloat(a.credits_remaining) * 0.01,
243
+ usd_used: parseFloat(a.credits_used || 0) * 0.01,
244
+ tier: a.tier, rate_limit_rpm: a.rate_limit_rpm, owner: a.owner,
245
+ };
246
+ });
247
+
248
+ // ── GET /usage ──
249
+ fastify.get("/usage", { preHandler: authenticate }, async (request, reply) => {
250
+ const apiKey = request.account.api_key;
251
+ const period = request.query.period || "7d";
252
+ const interval = { "1h": "1 hour", "1d": "1 day", "7d": "7 days", "30d": "30 days" }[period] || "7 days";
253
+
254
+ const summary = await pool.query(
255
+ `SELECT model, COUNT(*) AS calls, SUM(input_tokens) AS input, SUM(output_tokens) AS output,
256
+ SUM(total_tokens) AS tokens, SUM(cost) AS credits_spent, AVG(latency_ms) AS avg_latency,
257
+ COUNT(*) FILTER (WHERE success) AS successes
258
+ FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
259
+ GROUP BY model ORDER BY credits_spent DESC`, [apiKey, interval]
260
+ );
261
+
262
+ const timeline = await pool.query(
263
+ `SELECT date_trunc('hour', timestamp) AS hour, SUM(cost) AS credits, SUM(total_tokens) AS tokens, COUNT(*) AS calls
264
+ FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
265
+ GROUP BY hour ORDER BY hour`, [apiKey, interval]
266
+ );
267
+
268
+ const totalCredits = summary.rows.reduce((s, r) => s + parseFloat(r.credits_spent || 0), 0);
269
+
270
+ return {
271
+ period,
272
+ total_credits_spent: Math.round(totalCredits * 1000000) / 1000000,
273
+ total_usd_spent: Math.round(totalCredits * 0.01 * 1000000) / 1000000,
274
+ byModel: summary.rows.map(r => ({
275
+ model: r.model, calls: parseInt(r.calls), input: parseInt(r.input || 0), output: parseInt(r.output || 0),
276
+ tokens: parseInt(r.tokens || 0), credits_spent: parseFloat(r.credits_spent || 0),
277
+ usd_spent: parseFloat(r.credits_spent || 0) * 0.01,
278
+ avgLatencyMs: Math.round(parseFloat(r.avg_latency || 0)),
279
+ successRate: parseInt(r.calls) > 0 ? parseFloat(((parseInt(r.successes) / parseInt(r.calls)) * 100).toFixed(2)) : 0,
280
+ })),
281
+ timeline: timeline.rows.map(r => ({
282
+ hour: r.hour, credits: parseFloat(r.credits), tokens: parseInt(r.tokens), calls: parseInt(r.calls),
283
+ })),
284
+ queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT },
285
+ };
286
+ });
287
+
288
+ // ── GET /health ──
289
+ fastify.get("/health", async () => {
290
+ try {
291
+ const result = await proxyToInference("/health", null, "GET");
292
+ return { status: "ok", inference: result, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
293
+ } catch (err) {
294
+ return { status: "down", error: err.message, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
295
+ }
296
+ });
297
+ }
298
+
299
+ function proxyToInference(path, body, method = "POST") {
300
+ return new Promise((resolve, reject) => {
301
+ const url = new (require("url").URL)(INFERENCE_URL + path);
302
+ const client = url.protocol === "https:" ? https : http;
303
+ const bodyStr = body ? JSON.stringify(body) : null;
304
+
305
+ const req = client.request({
306
+ hostname: url.hostname,
307
+ port: url.port || (url.protocol === "https:" ? 443 : 80),
308
+ path: url.pathname,
309
+ method,
310
+ timeout: 120000,
311
+ headers: { "Content-Type": "application/json", ...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}) },
312
+ }, (res) => {
313
+ let data = "";
314
+ res.on("data", (c) => { data += c; });
315
+ res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); } });
316
+ });
317
+ req.on("error", reject);
318
+ req.on("timeout", () => { req.destroy(); reject(new Error("Inference timeout")); });
319
+ if (bodyStr) req.write(bodyStr);
320
+ req.end();
321
+ });
322
+ }
323
+
324
+ module.exports = routes;
@@ -448,6 +448,15 @@ class AgentEngine {
448
448
  const assistantMessage = choice.message || choice;
449
449
  this.messages.push(assistantMessage);
450
450
 
451
+ // Parse Gemma-style text tool calls: "call:tool_name{json_args}" → structured tool_calls
452
+ if ((!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) && assistantMessage.content) {
453
+ const parsed = _parseTextToolCalls(assistantMessage.content);
454
+ if (parsed.length > 0) {
455
+ assistantMessage.tool_calls = parsed;
456
+ console.log(chalk.gray(` 🔧 Parsed ${parsed.length} tool call(s) from text output`));
457
+ }
458
+ }
459
+
451
460
  if (!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) {
452
461
  if (assistantMessage.content) {
453
462
  console.log(chalk.gray(` 💬 ${(assistantMessage.content || "").slice(0, 200)}`));
@@ -1223,4 +1232,50 @@ function _runPostHook(toolName, toolInput, toolOutput, isError, cwd) {
1223
1232
  } catch {}
1224
1233
  }
1225
1234
 
1235
+ /**
1236
+ * Parse Gemma-style text tool calls into OpenAI tool_calls format.
1237
+ * Gemma outputs: "call:tool_name{json_args}" or "<|tool_call>call:tool_name{args}<tool_call|>"
1238
+ * We convert to: [{ id, type: "function", function: { name, arguments } }]
1239
+ */
1240
+ function _parseTextToolCalls(content) {
1241
+ if (!content) return [];
1242
+ const calls = [];
1243
+ // Match patterns: call:name{args} or call:name{"key":"val"}
1244
+ const patterns = [
1245
+ /call:(\w+)\{([^}]*(?:\{[^}]*\}[^}]*)*)\}/g, // call:name{args with nested braces}
1246
+ /call:(\w+)\(([^)]*)\)/g, // call:name(args)
1247
+ ];
1248
+ for (const regex of patterns) {
1249
+ let match;
1250
+ while ((match = regex.exec(content)) !== null) {
1251
+ const name = match[1];
1252
+ let argsStr = match[2];
1253
+ // Try to parse as JSON, otherwise build from key:value pairs
1254
+ let args;
1255
+ try {
1256
+ // Clean up Gemma's quoting: path:"value" → "path":"value"
1257
+ const cleaned = argsStr.replace(/(\w+)\s*:\s*/g, '"$1":').replace(/<\|"\|>/g, '"');
1258
+ args = JSON.parse("{" + cleaned + "}");
1259
+ } catch {
1260
+ try { args = JSON.parse(argsStr); } catch {
1261
+ // Last resort: treat as single string argument for the most common param
1262
+ const paramGuess = argsStr.replace(/['"<|>]/g, "").trim();
1263
+ if (name === "read_file" || name === "glob_files") args = { path: paramGuess };
1264
+ else if (name === "grep_code") args = { pattern: paramGuess };
1265
+ else if (name === "bash_exec") args = { command: paramGuess };
1266
+ else if (name === "write_file") args = { path: paramGuess, content: "" };
1267
+ else args = { input: paramGuess };
1268
+ }
1269
+ }
1270
+ calls.push({
1271
+ id: "call_" + Date.now().toString(36) + "_" + calls.length,
1272
+ type: "function",
1273
+ function: { name, arguments: JSON.stringify(args) },
1274
+ });
1275
+ }
1276
+ if (calls.length > 0) break; // use first matching pattern
1277
+ }
1278
+ return calls;
1279
+ }
1280
+
1226
1281
  module.exports = { AgentEngine, TOOL_DEFINITIONS, BLOCKED_COMMANDS };
@@ -115,7 +115,7 @@ async function compact(text) {
115
115
  systemPrompt: "Compress the following text into a dense, semantically rich summary. Keep all technical terms, function names, file paths, and error messages. Remove filler words. Output ONLY the compressed text, nothing else.",
116
116
  userPrompt: text,
117
117
  maxTokens: 256,
118
- category: "brain",
118
+ category: "compacting",
119
119
  });
120
120
 
121
121
  return result.content || text;
@@ -17,7 +17,7 @@ const path = require("path");
17
17
  * - Config files (.env, .json, .yaml)
18
18
  */
19
19
 
20
- const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests"]);
20
+ const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests", "examples", "public", "static", "assets", "__tests__", ".next", ".nuxt"]);
21
21
  const CODE_EXTENSIONS = new Set([".js", ".ts", ".mjs", ".cjs", ".jsx", ".tsx"]);
22
22
  const CONFIG_EXTENSIONS = new Set([".json", ".yaml", ".yml", ".toml", ".env"]);
23
23
 
@@ -52,6 +52,11 @@ function scanProject(projectRoot) {
52
52
  // Recursive scan
53
53
  _scanDir(root, root, map);
54
54
 
55
+ // Cap collections to prevent memory bloat on large projects
56
+ if (map.functions.length > 500) map.functions = map.functions.slice(0, 500);
57
+ if (map.classes.length > 200) map.classes = map.classes.slice(0, 200);
58
+ if (map.exports.length > 300) map.exports = map.exports.slice(0, 300);
59
+
55
60
  // Build summary
56
61
  map.summary = _buildSummary(map);
57
62
 
@@ -88,12 +93,21 @@ function _scanDir(dir, root, map) {
88
93
 
89
94
  map.files.push({ path: relPath, type: "code" });
90
95
 
96
+ // Skip large/minified files — they bloat memory and aren't useful for repair context
97
+ let stat;
98
+ try { stat = fs.statSync(fullPath); } catch { continue; }
99
+ if (stat.size > 100000) continue; // Skip files > 100KB (bundles, minified, generated)
100
+
91
101
  // Parse the file for patterns
92
102
  let content;
93
103
  try {
94
104
  content = fs.readFileSync(fullPath, "utf-8");
95
105
  } catch { continue; }
96
106
 
107
+ // Skip minified code (avg line length > 200 chars = likely minified)
108
+ const lines = content.split("\n");
109
+ if (lines.length > 0 && content.length / lines.length > 200) continue;
110
+
97
111
  _extractRoutes(content, relPath, map);
98
112
  _extractExports(content, relPath, map);
99
113
  _extractFunctions(content, relPath, map);
@@ -5,6 +5,7 @@ const { getModel, detectProvider } = require("./models");
5
5
 
6
6
  let _openaiClient = null;
7
7
  let _anthropicClient = null;
8
+ let _wolverineClient = null;
8
9
  let _tracker = null;
9
10
 
10
11
  function setTokenTracker(tracker) { _tracker = tracker; }
@@ -35,9 +36,23 @@ function _track(model, category, usage, tool, latencyMs, success) {
35
36
 
36
37
  function getClient(provider) {
37
38
  if (provider === "anthropic") return _getAnthropicClient();
39
+ if (provider === "wolverine") return _getWolverineClient();
38
40
  return _getOpenAIClient();
39
41
  }
40
42
 
43
+ function _getWolverineClient() {
44
+ if (!_wolverineClient) {
45
+ // Wolverine inference: direct to GPU (WOLVERINE_INFERENCE_URL) or via proxy (api.wolverinenode.xyz/v1)
46
+ // Direct URL = no auth needed (Vast tunnel). Proxy URL = needs WOLVERINE_API_KEY for billing.
47
+ const baseURL = process.env.WOLVERINE_INFERENCE_URL
48
+ ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
49
+ : "https://api.wolverinenode.xyz/v1";
50
+ const apiKey = process.env.WOLVERINE_API_KEY || "none";
51
+ _wolverineClient = new OpenAI({ apiKey, baseURL });
52
+ }
53
+ return _wolverineClient;
54
+ }
55
+
41
56
  function _getOpenAIClient() {
42
57
  if (!_openaiClient) {
43
58
  const apiKey = process.env.OPENAI_API_KEY;
@@ -65,6 +80,7 @@ function isReasoningModel(model) {
65
80
  }
66
81
 
67
82
  function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
83
+ function isWolverineModel(model) { return detectProvider(model) === "wolverine"; }
68
84
 
69
85
  /**
70
86
  * Per-model max output token limits (with 10% overestimation buffer).
@@ -176,6 +192,8 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
176
192
  try {
177
193
  if (provider === "anthropic") {
178
194
  result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
195
+ } else if (provider === "wolverine") {
196
+ result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
179
197
  } else if (isResponsesModel(model)) {
180
198
  result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
181
199
  } else {
@@ -200,6 +218,8 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
200
218
  try {
201
219
  if (provider === "anthropic") {
202
220
  result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
221
+ } else if (provider === "wolverine") {
222
+ result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
203
223
  } else if (isResponsesModel(model)) {
204
224
  result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
205
225
  } else {
@@ -573,7 +593,7 @@ ${backupSourceCode ? `## Last Known Working Version\n\`\`\`javascript\n${backupS
573
593
  "changes" is for code edits (optional, use for actual code fixes).
574
594
  Include both if needed, or just one.`;
575
595
 
576
- const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "heal" });
596
+ const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "coding" });
577
597
  const content = result.content;
578
598
  const cleaned = content.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
579
599
 
@@ -15,7 +15,14 @@
15
15
  */
16
16
  function detectProvider(model) {
17
17
  if (!model) return "openai";
18
- if (/^claude/i.test(model)) return "anthropic";
18
+ if (/^wolverine/i.test(model) || /^gemma/i.test(model)) return "wolverine";
19
+ if (/^claude/i.test(model) || /^anthropic/i.test(model)) return "anthropic";
20
+ if (/^gemini/i.test(model) || /^google/i.test(model)) return "google";
21
+ if (/^mistral/i.test(model) || /^codestral/i.test(model) || /^pixtral/i.test(model)) return "mistral";
22
+ if (/^llama/i.test(model) || /^meta/i.test(model)) return "meta";
23
+ if (/^deepseek/i.test(model)) return "deepseek";
24
+ if (/^command/i.test(model) || /^cohere/i.test(model)) return "cohere";
25
+ // Default: OpenAI (gpt-*, o1-*, o3-*, o4-*, codex-*, text-embedding-*, dall-e-*, etc.)
19
26
  return "openai";
20
27
  }
21
28
 
@@ -336,7 +336,7 @@ class DashboardServer {
336
336
  systemPrompt: "Route a command. Respond with two words: ROUTE SIZE.\nROUTE: SIMPLE (general knowledge/explanation, no live data needed), TOOLS (needs live server data, file contents, or endpoint calls), AGENT (create/modify/fix code).\nSIZE: SMALL, MEDIUM, LARGE.\nExamples: 'what is wolverine' → SIMPLE SMALL. 'what time is it' → TOOLS SMALL. 'show me index.js' → TOOLS SMALL. 'add endpoint' → AGENT SMALL. 'build auth' → AGENT LARGE.",
337
337
  userPrompt: command,
338
338
  maxTokens: 10,
339
- category: "classify",
339
+ category: "classifier",
340
340
  });
341
341
 
342
342
  const raw = (result.content || "").trim().toUpperCase();
@@ -424,7 +424,7 @@ ${indexContent}
424
424
  Existing route files:
425
425
  ${existingRoutes || "(none)"}`,
426
426
  maxTokens: 2048,
427
- category: "develop",
427
+ category: "tool",
428
428
  });
429
429
 
430
430
  const raw = (result.content || "").trim().replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
@@ -53,6 +53,14 @@ const DEFAULT_PRICING = {
53
53
  "claude-3-sonnet": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
54
54
  "claude-3-haiku": { input: 0.25, output: 1.25, cache_write: 0.3125, cache_read: 0.025 },
55
55
 
56
+ // ── Wolverine Self-Hosted (Gemma 4 via api.wolverinenode.xyz) ──
57
+ // Priced between Anthropic and OpenAI — cheaper than both
58
+ "wolverine-test-1": { input: 0.10, output: 0.40 },
59
+ "wolverine-gemma-26b": { input: 0.25, output: 1.00 },
60
+ "wolverine-gemma-8b": { input: 0.10, output: 0.40 },
61
+ "wolverine-coding": { input: 0.10, output: 0.40 },
62
+ "wolverine-reasoning": { input: 0.25, output: 1.00 },
63
+
56
64
  // ── Fallback ──
57
65
  "_default": { input: 1.00, output: 4.00 },
58
66
  };
@@ -33,6 +33,8 @@ class TokenTracker {
33
33
  this._byModel = {};
34
34
  // Per-category totals
35
35
  this._byCategory = {};
36
+ // Per-model-per-category cross-reference (model::category → stats)
37
+ this._byModelCategory = {};
36
38
  // Per-tool totals
37
39
  this._byTool = {};
38
40
  // Timeline: recent entries for charts (in-memory)
@@ -87,7 +89,7 @@ class TokenTracker {
87
89
  };
88
90
 
89
91
  // Accumulate by model
90
- if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
92
+ if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, totalLatencyTokens: 0, timedCalls: 0, minLatencyMs: Infinity, maxLatencyMs: 0, cacheCreation: 0, cacheRead: 0, cacheSavings: 0 };
91
93
  const m = this._byModel[model];
92
94
  m.input += entry.input;
93
95
  m.output += entry.output;
@@ -100,6 +102,8 @@ class TokenTracker {
100
102
  if (entry.success) m.successes++; else m.failures++;
101
103
  if (latencyMs > 0) {
102
104
  m.totalLatencyMs += latencyMs;
105
+ m.totalLatencyTokens += total;
106
+ m.timedCalls++;
103
107
  if (latencyMs < m.minLatencyMs) m.minLatencyMs = latencyMs;
104
108
  if (latencyMs > m.maxLatencyMs) m.maxLatencyMs = latencyMs;
105
109
  }
@@ -112,6 +116,18 @@ class TokenTracker {
112
116
  this._byCategory[category].calls++;
113
117
  this._byCategory[category].cost += cost.total;
114
118
 
119
+ // Accumulate by model+category cross-reference
120
+ const mcKey = `${model}::${category}`;
121
+ if (!this._byModelCategory[mcKey]) this._byModelCategory[mcKey] = { model, category, input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0 };
122
+ const mc = this._byModelCategory[mcKey];
123
+ mc.input += entry.input;
124
+ mc.output += entry.output;
125
+ mc.total += total;
126
+ mc.calls++;
127
+ mc.cost += cost.total;
128
+ if (entry.success) mc.successes++; else mc.failures++;
129
+ if (latencyMs > 0) mc.totalLatencyMs += latencyMs;
130
+
115
131
  // Accumulate by tool
116
132
  if (tool) {
117
133
  const toolKey = tool.split(" ")[0];
@@ -158,6 +174,7 @@ class TokenTracker {
158
174
  },
159
175
  byModel: this._formatModelStats(),
160
176
  byCategory: this._byCategory,
177
+ byModelCategory: this._formatModelCategoryStats(),
161
178
  byTool: this._byTool,
162
179
  // Recent in-memory timeline
163
180
  timeline: this._timeline.slice(-100).map(e => ({
@@ -188,19 +205,42 @@ class TokenTracker {
188
205
  cacheCreation: m.cacheCreation || 0,
189
206
  cacheRead: m.cacheRead || 0,
190
207
  cacheSavings: Math.round((m.cacheSavings || 0) * 1000000) / 1000000,
191
- successes: m.successes || m.calls,
208
+ successes: m.successes != null ? m.successes : m.calls - (m.failures || 0),
192
209
  failures: m.failures || 0,
193
- successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
194
- avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
210
+ successRate: m.calls > 0 ? parseFloat((((m.calls - (m.failures || 0)) / m.calls) * 100).toFixed(2)) : 0,
211
+ // Latency normalized by token count
212
+ avgLatencyMs: (m.timedCalls || 0) > 0 ? Math.round(m.totalLatencyMs / m.timedCalls) : 0,
213
+ msPerKToken: (m.totalLatencyTokens || 0) > 0 ? Math.round((m.totalLatencyMs / m.totalLatencyTokens) * 1000) : 0,
214
+ tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.totalLatencyTokens || m.total) / (m.totalLatencyMs / 1000) * 10) / 10 : 0,
215
+ outputTokPerSecond: m.totalLatencyMs > 0 && m.output > 0 ? Math.round((m.output / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
216
+ timedCalls: m.timedCalls || 0,
195
217
  minLatencyMs: m.minLatencyMs === Infinity ? 0 : (m.minLatencyMs || 0),
196
218
  maxLatencyMs: m.maxLatencyMs || 0,
197
- tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.total / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
198
219
  costPerCall: m.calls > 0 ? Math.round((m.cost / m.calls) * 1000000) / 1000000 : 0,
199
220
  };
200
221
  }
201
222
  return result;
202
223
  }
203
224
 
225
+ /**
226
+ * Format model+category cross-reference for analytics.
227
+ * Returns array of { model, category, calls, cost, tokens, successRate, avgLatencyMs }
228
+ */
229
+ _formatModelCategoryStats() {
230
+ return Object.values(this._byModelCategory).map(mc => ({
231
+ model: mc.model,
232
+ category: mc.category,
233
+ calls: mc.calls,
234
+ cost: Math.round(mc.cost * 1000000) / 1000000,
235
+ tokens: mc.total,
236
+ input: mc.input,
237
+ output: mc.output,
238
+ successRate: mc.calls > 0 ? parseFloat((((mc.calls - (mc.failures || 0)) / mc.calls) * 100).toFixed(2)) : 100,
239
+ avgLatencyMs: mc.calls > 0 && mc.totalLatencyMs > 0 ? Math.round(mc.totalLatencyMs / mc.calls) : 0,
240
+ tokensPerSecond: mc.totalLatencyMs > 0 ? Math.round((mc.total / (mc.totalLatencyMs / 1000)) * 10) / 10 : 0,
241
+ }));
242
+ }
243
+
204
244
  /**
205
245
  * Load full history from JSONL file. For dashboard charts across sessions.
206
246
  * @param {number} limit — max entries to return (default: 500)
@@ -253,6 +293,7 @@ class TokenTracker {
253
293
  lastSaved: Date.now(),
254
294
  byModel: this._byModel,
255
295
  byCategory: this._byCategory,
296
+ byModelCategory: this._byModelCategory,
256
297
  byTool: this._byTool,
257
298
  totalTokens: this._totalTokens,
258
299
  totalCalls: this._totalCalls,
@@ -275,6 +316,7 @@ class TokenTracker {
275
316
  const data = JSON.parse(fs.readFileSync(this.usagePath, "utf-8"));
276
317
  this._byModel = data.byModel || {};
277
318
  this._byCategory = data.byCategory || {};
319
+ this._byModelCategory = data.byModelCategory || {};
278
320
  this._byTool = data.byTool || {};
279
321
  this._totalTokens = data.totalTokens || 0;
280
322
  this._totalCalls = data.totalCalls || 0;
@@ -236,7 +236,7 @@ Provide a brief analysis and actionable suggestions. Focus on:
236
236
 
237
237
  Keep your response under 300 words. Be specific and actionable.`,
238
238
  maxTokens: 512,
239
- category: "security",
239
+ category: "audit",
240
240
  });
241
241
 
242
242
  const analysis = result.content;
@@ -172,7 +172,7 @@ class Notifier {
172
172
  systemPrompt: "You summarize server errors for developers. Write 1-2 short sentences. Be direct and actionable. Do not include any secrets, passwords, or API key values — only refer to them by name (e.g. 'the OPENAI_API_KEY').",
173
173
  userPrompt: `Summarize this error for a developer notification:\n\nCategory: ${classification.category}\nError: ${safeError}\n\nStack (first 300 chars): ${safeStack.slice(0, 300)}`,
174
174
  maxTokens: 100,
175
- category: "security",
175
+ category: "audit",
176
176
  });
177
177
 
178
178
  // Double-sanitize the AI response (in case the AI echoes something)
@@ -66,7 +66,8 @@ function collectHeartbeat(subsystems) {
66
66
  totalCalls: tokenTracker?._totalCalls || usage?.session?.totalCalls || 0,
67
67
  totalCacheSavings: _sumCacheSavings(usage?.byModel || {}),
68
68
  byCategory: usage?.byCategory || {},
69
- byModel: usage?.byModel || {}, // includes: latency, successRate, tokensPerSec, cacheSavings per model
69
+ byModel: usage?.byModel || {},
70
+ byModelCategory: usage?.byModelCategory || [],
70
71
  byTool: usage?.byTool || {},
71
72
  byProvider: _aggregateByProvider(usage?.byModel || {}),
72
73
  },
@@ -95,7 +95,7 @@ Respond with ONLY valid JSON:
95
95
  systemPrompt: "You are a security analyst. Respond with ONLY valid JSON.",
96
96
  userPrompt,
97
97
  maxTokens: 128,
98
- category: "security",
98
+ category: "audit",
99
99
  });
100
100
 
101
101
  const content = result.content;