wolverine-ai 3.5.0 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,329 @@
1
+ const https = require("https");
2
+ const http = require("http");
3
+ const crypto = require("crypto");
4
+
5
+ /**
6
+ * Wolverine Inference API
7
+ *
8
+ * Credit system: $1 = 100 credits. 1 credit = $0.01 of compute.
9
+ * Token pricing (in credits per million tokens):
10
+ * wolverine-test-1: 1 credit input / 4 credits output per 1M tokens
11
+ * (= $0.01/$0.04 per 1M — 15x cheaper than gpt-4o-mini, 80x cheaper than haiku)
12
+ *
13
+ * Rate limiting: per API key, configurable per tier.
14
+ * Queue: when GPU is at capacity, requests queue with timeout.
15
+ */
16
+
17
+ const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "http://ssh8.vast.ai:24233";
18
+ const GPU_KEY = process.env.WOLVERINE_GPU_KEY || "";
19
+
20
+ // Pricing in CREDITS per million tokens ($1 = 100 credits)
21
+ const MODEL_PRICING = {
22
+ "wolverine-test-1": { input: 1.0, output: 4.0 }, // $0.01/$0.04 per 1M
23
+ "wolverine-coding": { input: 1.0, output: 4.0 },
24
+ "wolverine-reasoning": { input: 2.5, output: 10.0 }, // heavier model when available
25
+ };
26
+
27
+ const MODEL_MAP = {
28
+ "wolverine-test-1": "wolverine-test-1",
29
+ "wolverine-coding": "wolverine-test-1",
30
+ "wolverine-reasoning": "wolverine-test-1",
31
+ };
32
+
33
+ const TIER_LIMITS = {
34
+ free: { rpm: 10, maxTokens: 1024 },
35
+ starter: { rpm: 60, maxTokens: 4096 },
36
+ pro: { rpm: 300, maxTokens: 4096 },
37
+ admin: { rpm: 9999, maxTokens: 4096 },
38
+ };
39
+
40
+ function tokenCost(model, inputTokens, outputTokens) {
41
+ const p = MODEL_PRICING[model] || MODEL_PRICING["wolverine-test-1"];
42
+ return ((inputTokens / 1_000_000) * p.input) + ((outputTokens / 1_000_000) * p.output);
43
+ }
44
+
45
+ // ── Request Queue (handles GPU saturation) ──
46
+ const queue = [];
47
+ let activeRequests = 0;
48
+ const MAX_CONCURRENT = 8; // vLLM max-num-seqs
49
+ const QUEUE_TIMEOUT_MS = 30000;
50
+
51
+ function enqueue() {
52
+ return new Promise((resolve, reject) => {
53
+ if (activeRequests < MAX_CONCURRENT) {
54
+ activeRequests++;
55
+ resolve();
56
+ return;
57
+ }
58
+ const timer = setTimeout(() => {
59
+ const idx = queue.indexOf(entry);
60
+ if (idx >= 0) queue.splice(idx, 1);
61
+ reject(new Error("Queue timeout — GPU at capacity. Try again in a few seconds."));
62
+ }, QUEUE_TIMEOUT_MS);
63
+ const entry = { resolve: () => { clearTimeout(timer); activeRequests++; resolve(); }, reject };
64
+ queue.push(entry);
65
+ });
66
+ }
67
+
68
+ function dequeue() {
69
+ activeRequests = Math.max(0, activeRequests - 1);
70
+ if (queue.length > 0) {
71
+ const next = queue.shift();
72
+ next.resolve();
73
+ }
74
+ }
75
+
76
+ async function routes(fastify) {
77
+ const { pool } = require("../lib/db");
78
+
79
+ // Rate limit state (in-memory)
80
+ const rateWindows = new Map();
81
+
82
+ async function authenticate(request, reply) {
83
+ const apiKey = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
84
+ if (!apiKey) return reply.code(401).send({ error: { message: "API key required. Pass via Authorization: Bearer <key>", type: "auth_error" } });
85
+
86
+ // Platform key bypass
87
+ let settings = {};
88
+ try { settings = require("../config/settings.json"); } catch {}
89
+ if (apiKey === settings.platform?.apiKey) {
90
+ request.account = { api_key: apiKey, owner: "platform", tier: "admin", credits_remaining: 999999, rate_limit_rpm: 9999 };
91
+ return;
92
+ }
93
+
94
+ const result = await pool.query("SELECT * FROM api_credits WHERE api_key = $1", [apiKey]);
95
+ if (result.rows.length === 0) return reply.code(401).send({ error: { message: "Invalid API key", type: "auth_error" } });
96
+
97
+ const account = result.rows[0];
98
+
99
+ // Credit check
100
+ if (parseFloat(account.credits_remaining) <= 0) {
101
+ return reply.code(402).send({ error: { message: "Insufficient credits. Add credits at wolverinenode.xyz", type: "billing_error", credits_remaining: 0 } });
102
+ }
103
+
104
+ // Rate limit
105
+ const now = Date.now();
106
+ const window = rateWindows.get(apiKey) || { count: 0, resetAt: now + 60000 };
107
+ if (now > window.resetAt) { window.count = 0; window.resetAt = now + 60000; }
108
+ const limit = account.rate_limit_rpm || TIER_LIMITS[account.tier]?.rpm || 10;
109
+ if (window.count >= limit) {
110
+ const retryAfter = Math.ceil((window.resetAt - now) / 1000);
111
+ return reply.code(429).send({ error: { message: `Rate limit: ${limit} requests/min. Retry in ${retryAfter}s`, type: "rate_limit", retry_after: retryAfter } });
112
+ }
113
+ window.count++;
114
+ rateWindows.set(apiKey, window);
115
+
116
+ request.account = account;
117
+ }
118
+
119
+ // ── POST /chat/completions ──
120
+ fastify.post("/chat/completions", { preHandler: authenticate }, async (request, reply) => {
121
+ const body = request.body || {};
122
+ const requestedModel = body.model || "wolverine-test-1";
123
+ const account = request.account;
124
+ const tier = TIER_LIMITS[account.tier] || TIER_LIMITS.free;
125
+ const startMs = Date.now();
126
+
127
+ // Enforce max tokens per tier
128
+ if (body.max_tokens && body.max_tokens > tier.maxTokens) {
129
+ body.max_tokens = tier.maxTokens;
130
+ }
131
+
132
+ // Map model name for backend
133
+ const backendBody = { ...body, model: MODEL_MAP[requestedModel] || requestedModel };
134
+
135
+ // Queue if GPU saturated
136
+ try {
137
+ await enqueue();
138
+ } catch (err) {
139
+ return reply.code(503).send({ error: { message: err.message, type: "capacity_error", queue_length: queue.length } });
140
+ }
141
+
142
+ try {
143
+ const result = await proxyToInference("/v1/chat/completions", backendBody);
144
+ const latencyMs = Date.now() - startMs;
145
+
146
+ const usage = result.usage || {};
147
+ const inputTokens = usage.prompt_tokens || 0;
148
+ const outputTokens = usage.completion_tokens || 0;
149
+ const cost = tokenCost(requestedModel, inputTokens, outputTokens);
150
+
151
+ // Bill credits (skip for platform)
152
+ if (account.owner !== "platform") {
153
+ await pool.query(
154
+ "UPDATE api_credits SET credits_remaining = credits_remaining - $1, credits_used = credits_used + $1, last_used = NOW() WHERE api_key = $2",
155
+ [cost, account.api_key]
156
+ );
157
+ await pool.query(
158
+ "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, $3, $4, $5, $6, $7, true, $8)",
159
+ [account.api_key, requestedModel, inputTokens, outputTokens, inputTokens + outputTokens, cost, latencyMs, "/v1/chat/completions"]
160
+ );
161
+ }
162
+
163
+ // Rewrite response
164
+ if (result.model) result.model = requestedModel;
165
+ result.x_wolverine = {
166
+ credits_used: Math.round(cost * 1000000) / 1000000,
167
+ credits_remaining: Math.max(0, parseFloat(account.credits_remaining) - cost),
168
+ latency_ms: latencyMs,
169
+ queued: activeRequests > MAX_CONCURRENT,
170
+ };
171
+
172
+ return result;
173
+ } catch (err) {
174
+ if (account.owner !== "platform") {
175
+ await pool.query(
176
+ "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, 0, 0, 0, 0, $3, false, $4)",
177
+ [account.api_key, requestedModel, Date.now() - startMs, "/v1/chat/completions"]
178
+ ).catch(() => {});
179
+ }
180
+ return reply.code(502).send({ error: { message: `Inference error: ${err.message}`, type: "inference_error" } });
181
+ } finally {
182
+ dequeue();
183
+ }
184
+ });
185
+
186
+ // ── GET /models ──
187
+ fastify.get("/models", async () => ({
188
+ object: "list",
189
+ data: Object.entries(MODEL_PRICING).map(([id, p]) => ({
190
+ id, object: "model", owned_by: "wolverine",
191
+ created: Math.floor(Date.now() / 1000),
192
+ pricing: { input_credits_per_million: p.input, output_credits_per_million: p.output, usd_per_credit: 0.01 },
193
+ })),
194
+ }));
195
+
196
+ // ── POST /keys/create — generate new API key ──
197
+ fastify.post("/keys/create", { preHandler: authenticate }, async (request, reply) => {
198
+ const account = request.account;
199
+ if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can create API keys", type: "auth_error" } });
200
+
201
+ const { owner, email, credits, tier, rpm } = request.body || {};
202
+ if (!owner) return reply.code(400).send({ error: { message: "owner required", type: "validation_error" } });
203
+
204
+ const newKey = "wlv_" + crypto.randomBytes(24).toString("hex");
205
+ const keyTier = tier || "free";
206
+ const keyCredits = credits || (keyTier === "free" ? 10 : 0);
207
+ const keyRpm = rpm || TIER_LIMITS[keyTier]?.rpm || 10;
208
+
209
+ await pool.query(
210
+ "INSERT INTO api_credits (api_key, owner, email, credits_remaining, tier, plan_name, rate_limit_rpm) VALUES ($1, $2, $3, $4, $5, $6, $7)",
211
+ [newKey, owner, email || null, keyCredits, keyTier, keyTier, keyRpm]
212
+ );
213
+
214
+ return { api_key: newKey, owner, tier: keyTier, credits: keyCredits, rate_limit_rpm: keyRpm };
215
+ });
216
+
217
+ // ── POST /keys/add-credits — add credits to a key ──
218
+ fastify.post("/keys/add-credits", { preHandler: authenticate }, async (request, reply) => {
219
+ const account = request.account;
220
+ if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can add credits", type: "auth_error" } });
221
+
222
+ const { api_key, credits } = request.body || {};
223
+ if (!api_key || !credits) return reply.code(400).send({ error: { message: "api_key and credits required" } });
224
+
225
+ await pool.query("UPDATE api_credits SET credits_remaining = credits_remaining + $1 WHERE api_key = $2", [credits, api_key]);
226
+ const updated = await pool.query("SELECT credits_remaining FROM api_credits WHERE api_key = $1", [api_key]);
227
+ return { api_key, credits_added: credits, credits_remaining: parseFloat(updated.rows[0]?.credits_remaining || 0) };
228
+ });
229
+
230
+ // ── GET /keys — list all keys (admin only) ──
231
+ fastify.get("/keys", { preHandler: authenticate }, async (request, reply) => {
232
+ if (request.account.tier !== "admin") return reply.code(403).send({ error: { message: "Admin only" } });
233
+ const { rows } = await pool.query("SELECT api_key, owner, email, tier, credits_remaining, credits_used, rate_limit_rpm, created_at, last_used FROM api_credits ORDER BY created_at DESC");
234
+ return { keys: rows };
235
+ });
236
+
237
+ // ── GET /credits ──
238
+ fastify.get("/credits", { preHandler: authenticate }, async (request, reply) => {
239
+ const a = request.account;
240
+ return {
241
+ credits_remaining: parseFloat(a.credits_remaining),
242
+ credits_used: parseFloat(a.credits_used || 0),
243
+ usd_remaining: parseFloat(a.credits_remaining) * 0.01,
244
+ usd_used: parseFloat(a.credits_used || 0) * 0.01,
245
+ tier: a.tier, rate_limit_rpm: a.rate_limit_rpm, owner: a.owner,
246
+ };
247
+ });
248
+
249
+ // ── GET /usage ──
250
+ fastify.get("/usage", { preHandler: authenticate }, async (request, reply) => {
251
+ const apiKey = request.account.api_key;
252
+ const period = request.query.period || "7d";
253
+ const interval = { "1h": "1 hour", "1d": "1 day", "7d": "7 days", "30d": "30 days" }[period] || "7 days";
254
+
255
+ const summary = await pool.query(
256
+ `SELECT model, COUNT(*) AS calls, SUM(input_tokens) AS input, SUM(output_tokens) AS output,
257
+ SUM(total_tokens) AS tokens, SUM(cost) AS credits_spent, AVG(latency_ms) AS avg_latency,
258
+ COUNT(*) FILTER (WHERE success) AS successes
259
+ FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
260
+ GROUP BY model ORDER BY credits_spent DESC`, [apiKey, interval]
261
+ );
262
+
263
+ const timeline = await pool.query(
264
+ `SELECT date_trunc('hour', timestamp) AS hour, SUM(cost) AS credits, SUM(total_tokens) AS tokens, COUNT(*) AS calls
265
+ FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
266
+ GROUP BY hour ORDER BY hour`, [apiKey, interval]
267
+ );
268
+
269
+ const totalCredits = summary.rows.reduce((s, r) => s + parseFloat(r.credits_spent || 0), 0);
270
+
271
+ return {
272
+ period,
273
+ total_credits_spent: Math.round(totalCredits * 1000000) / 1000000,
274
+ total_usd_spent: Math.round(totalCredits * 0.01 * 1000000) / 1000000,
275
+ byModel: summary.rows.map(r => ({
276
+ model: r.model, calls: parseInt(r.calls), input: parseInt(r.input || 0), output: parseInt(r.output || 0),
277
+ tokens: parseInt(r.tokens || 0), credits_spent: parseFloat(r.credits_spent || 0),
278
+ usd_spent: parseFloat(r.credits_spent || 0) * 0.01,
279
+ avgLatencyMs: Math.round(parseFloat(r.avg_latency || 0)),
280
+ successRate: parseInt(r.calls) > 0 ? parseFloat(((parseInt(r.successes) / parseInt(r.calls)) * 100).toFixed(2)) : 0,
281
+ })),
282
+ timeline: timeline.rows.map(r => ({
283
+ hour: r.hour, credits: parseFloat(r.credits), tokens: parseInt(r.tokens), calls: parseInt(r.calls),
284
+ })),
285
+ queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT },
286
+ };
287
+ });
288
+
289
+ // ── GET /health ──
290
+ fastify.get("/health", async () => {
291
+ try {
292
+ const result = await proxyToInference("/health", null, "GET");
293
+ return { status: "ok", inference: result, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
294
+ } catch (err) {
295
+ return { status: "down", error: err.message, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
296
+ }
297
+ });
298
+ }
299
+
300
+ function proxyToInference(path, body, method = "POST") {
301
+ return new Promise((resolve, reject) => {
302
+ const url = new (require("url").URL)(INFERENCE_URL + path);
303
+ const client = url.protocol === "https:" ? https : http;
304
+ const bodyStr = body ? JSON.stringify(body) : null;
305
+
306
+ const req = client.request({
307
+ hostname: url.hostname,
308
+ port: url.port || (url.protocol === "https:" ? 443 : 80),
309
+ path: url.pathname,
310
+ method,
311
+ timeout: 120000,
312
+ headers: {
313
+ "Content-Type": "application/json",
314
+ ...(GPU_KEY ? { "Authorization": `Bearer ${GPU_KEY}` } : {}),
315
+ ...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
316
+ },
317
+ }, (res) => {
318
+ let data = "";
319
+ res.on("data", (c) => { data += c; });
320
+ res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); } });
321
+ });
322
+ req.on("error", reject);
323
+ req.on("timeout", () => { req.destroy(); reject(new Error("Inference timeout")); });
324
+ if (bodyStr) req.write(bodyStr);
325
+ req.end();
326
+ });
327
+ }
328
+
329
+ module.exports = routes;
@@ -250,7 +250,7 @@ const TOOL_DEFINITIONS = [
250
250
  type: "function",
251
251
  function: {
252
252
  name: "run_db_fix",
253
- description: "Run a write query on a SQLite database to fix data issues: UPDATE invalid entries, DELETE corrupt rows, ALTER schema. Creates a backup first.",
253
+ description: "Run a write query on a SQLite database to fix data issues. IMPORTANT: Always use inspect_db FIRST to see the current state before writing. This tool auto-snapshots affected rows before and after the write. Creates a backup. Returns before/after state so you can verify the fix is correct.",
254
254
  parameters: {
255
255
  type: "object",
256
256
  properties: {
@@ -448,6 +448,15 @@ class AgentEngine {
448
448
  const assistantMessage = choice.message || choice;
449
449
  this.messages.push(assistantMessage);
450
450
 
451
+ // Parse Gemma-style text tool calls: "call:tool_name{json_args}" → structured tool_calls
452
+ if ((!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) && assistantMessage.content) {
453
+ const parsed = _parseTextToolCalls(assistantMessage.content);
454
+ if (parsed.length > 0) {
455
+ assistantMessage.tool_calls = parsed;
456
+ console.log(chalk.gray(` 🔧 Parsed ${parsed.length} tool call(s) from text output`));
457
+ }
458
+ }
459
+
451
460
  if (!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) {
452
461
  if (assistantMessage.content) {
453
462
  console.log(chalk.gray(` 💬 ${(assistantMessage.content || "").slice(0, 200)}`));
@@ -944,15 +953,60 @@ class AgentEngine {
944
953
  if (upper.startsWith("DROP DATABASE") || upper.includes("DROP TABLE sqlite_")) {
945
954
  return { content: "BLOCKED: Cannot drop system tables" };
946
955
  }
956
+
947
957
  // Backup the DB file first
948
958
  const backupPath = dbPath + ".wolverine-backup";
949
959
  fs.copyFileSync(dbPath, backupPath);
960
+
950
961
  const db = new Database(dbPath);
962
+
963
+ // SAFETY: Snapshot affected rows BEFORE the write
964
+ // Extract table name and WHERE clause to SELECT the rows that will change
965
+ let beforeSnapshot = "";
966
+ try {
967
+ const tableMatch = upper.match(/(?:UPDATE|DELETE\s+FROM|INSERT\s+INTO)\s+(\w+)/i);
968
+ const whereMatch = args.sql.match(/WHERE\s+(.+?)(?:;|$)/i);
969
+ if (tableMatch) {
970
+ const table = tableMatch[1];
971
+ const whereClause = whereMatch ? `WHERE ${whereMatch[1]}` : "";
972
+ const selectSql = `SELECT * FROM ${table} ${whereClause} LIMIT 20`;
973
+ try {
974
+ const before = db.prepare(selectSql).all();
975
+ if (before.length > 0) {
976
+ beforeSnapshot = `\n\nBEFORE STATE (${before.length} rows affected):\n${JSON.stringify(before, null, 2).slice(0, 2000)}`;
977
+ console.log(chalk.gray(` 🗃️ Snapshot: ${before.length} rows from ${table} ${whereClause ? whereClause.slice(0, 40) : "(all)"}`));
978
+ }
979
+ } catch { /* SELECT failed, might be INSERT into new table — that's fine */ }
980
+ }
981
+ } catch { /* snapshot failed, proceed with caution */ }
982
+
983
+ // Execute the fix
951
984
  const result = db.prepare(args.sql).run();
985
+
986
+ // SAFETY: Snapshot AFTER to show what changed
987
+ let afterSnapshot = "";
988
+ try {
989
+ const tableMatch = upper.match(/(?:UPDATE|DELETE\s+FROM|INSERT\s+INTO)\s+(\w+)/i);
990
+ const whereMatch = args.sql.match(/WHERE\s+(.+?)(?:;|$)/i);
991
+ if (tableMatch) {
992
+ const table = tableMatch[1];
993
+ const whereClause = whereMatch ? `WHERE ${whereMatch[1]}` : "";
994
+ const selectSql = `SELECT * FROM ${table} ${whereClause} LIMIT 20`;
995
+ try {
996
+ const after = db.prepare(selectSql).all();
997
+ afterSnapshot = `\n\nAFTER STATE (${after.length} rows):\n${JSON.stringify(after, null, 2).slice(0, 2000)}`;
998
+ } catch {}
999
+ }
1000
+ } catch {}
1001
+
952
1002
  db.close();
953
1003
  this.filesModified.push(args.db_path);
1004
+
1005
+ const summary = `SQL executed. Changes: ${result.changes}. Backup at: ${backupPath}${beforeSnapshot}${afterSnapshot}`;
954
1006
  console.log(chalk.green(` 🗃️ DB fix applied: ${args.sql.slice(0, 60)} (changes: ${result.changes})`));
955
- return { content: `SQL executed. Changes: ${result.changes}. Backup at: ${backupPath}` };
1007
+ if (beforeSnapshot) console.log(chalk.gray(` 🗃️ Before/after snapshot captured for audit`));
1008
+
1009
+ return { content: summary };
956
1010
  } catch (e) { return { content: `DB error: ${e.message}` }; }
957
1011
  }
958
1012
 
@@ -1085,14 +1139,23 @@ FAST FIXES (act immediately, don't investigate):
1085
1139
  - Missing env var → check_env → report it → done
1086
1140
 
1087
1141
  INVESTIGATION (only when cause is unclear):
1088
- - Database error → inspect_db then run_db_fix
1142
+ - Database error → inspect_db FIRST to see current state → understand what went wrong → run_db_fix with targeted fix
1089
1143
  - Unknown errors → grep_code, list_dir to find root cause
1090
1144
 
1145
+ DATABASE SAFETY:
1146
+ - ALWAYS inspect_db before run_db_fix — never write blind
1147
+ - run_db_fix auto-snapshots affected rows before/after — check the response to verify your fix
1148
+ - For bad data: understand WHY the data is wrong before changing it
1149
+ - For NaN/null errors: check if the data was corrupted or if the code should handle it
1150
+ - Prefer fixing code to handle edge cases over modifying production data
1151
+ - A database backup is created automatically before every write
1152
+
1091
1153
  RULES:
1092
1154
  1. Fix on turn 1-2 when possible. Investigation is a last resort.
1093
1155
  2. For ENOENT config files: read the code that requires the file, then create it with the expected structure.
1094
1156
  3. bash_exec for operational fixes, edit_file for code, write_file for missing files, run_db_fix for data
1095
- 4. Always call done with summary when finished never end without calling done.
1157
+ 4. For database errors: inspect first, fix data only when code can't reasonably handle the edge case
1158
+ 5. Always call done with summary when finished — never end without calling done.
1096
1159
  ${primaryFile ? `\nFile: ${primaryFile}` : ""}
1097
1160
  Project: ${cwd}`;
1098
1161
  }
@@ -1223,4 +1286,50 @@ function _runPostHook(toolName, toolInput, toolOutput, isError, cwd) {
1223
1286
  } catch {}
1224
1287
  }
1225
1288
 
1289
+ /**
1290
+ * Parse Gemma-style text tool calls into OpenAI tool_calls format.
1291
+ * Gemma outputs: "call:tool_name{json_args}" or "<|tool_call>call:tool_name{args}<tool_call|>"
1292
+ * We convert to: [{ id, type: "function", function: { name, arguments } }]
1293
+ */
1294
+ function _parseTextToolCalls(content) {
1295
+ if (!content) return [];
1296
+ const calls = [];
1297
+ // Match patterns: call:name{args} or call:name{"key":"val"}
1298
+ const patterns = [
1299
+ /call:(\w+)\{([^}]*(?:\{[^}]*\}[^}]*)*)\}/g, // call:name{args with nested braces}
1300
+ /call:(\w+)\(([^)]*)\)/g, // call:name(args)
1301
+ ];
1302
+ for (const regex of patterns) {
1303
+ let match;
1304
+ while ((match = regex.exec(content)) !== null) {
1305
+ const name = match[1];
1306
+ let argsStr = match[2];
1307
+ // Try to parse as JSON, otherwise build from key:value pairs
1308
+ let args;
1309
+ try {
1310
+ // Clean up Gemma's quoting: path:"value" → "path":"value"
1311
+ const cleaned = argsStr.replace(/(\w+)\s*:\s*/g, '"$1":').replace(/<\|"\|>/g, '"');
1312
+ args = JSON.parse("{" + cleaned + "}");
1313
+ } catch {
1314
+ try { args = JSON.parse(argsStr); } catch {
1315
+ // Last resort: treat as single string argument for the most common param
1316
+ const paramGuess = argsStr.replace(/['"<|>]/g, "").trim();
1317
+ if (name === "read_file" || name === "glob_files") args = { path: paramGuess };
1318
+ else if (name === "grep_code") args = { pattern: paramGuess };
1319
+ else if (name === "bash_exec") args = { command: paramGuess };
1320
+ else if (name === "write_file") args = { path: paramGuess, content: "" };
1321
+ else args = { input: paramGuess };
1322
+ }
1323
+ }
1324
+ calls.push({
1325
+ id: "call_" + Date.now().toString(36) + "_" + calls.length,
1326
+ type: "function",
1327
+ function: { name, arguments: JSON.stringify(args) },
1328
+ });
1329
+ }
1330
+ if (calls.length > 0) break; // use first matching pattern
1331
+ }
1332
+ return calls;
1333
+ }
1334
+
1226
1335
  module.exports = { AgentEngine, TOOL_DEFINITIONS, BLOCKED_COMMANDS };
@@ -218,7 +218,7 @@ const SEED_DOCS = [
218
218
  metadata: { topic: "error-monitor" },
219
219
  },
220
220
  {
221
- text: "Agent tool details: read_file supports offset/limit for large files. edit_file does surgical find-and-replace (preferred for small fixes). glob_files discovers files by pattern (**/*.js). grep_code does regex search with context lines. list_dir shows directory contents with file sizes. move_file relocates/renames files. bash_exec runs shell commands (30s default timeout, 60s hard cap, dangerous commands blocked: rm -rf /, git push --force, npm publish). inspect_db reads SQLite: action=tables (list), action=schema (CREATE statements), action=query (SELECT/PRAGMA only). run_db_fix writes SQLite: UPDATE/DELETE/INSERT/ALTER, auto-backs up db file first. check_port finds what process is using a port (netstat/lsof). check_env lists environment variables with values redacted. audit_deps runs full npm health check (vulnerabilities, outdated, peer deps, unused, lock file). check_migration returns known upgrade paths with before/after code patterns. web_fetch retrieves URL content.",
221
+ text: "Agent tool details: read_file supports offset/limit for large files. edit_file does surgical find-and-replace (preferred for small fixes). glob_files discovers files by pattern (**/*.js). grep_code does regex search with context lines. list_dir shows directory contents with file sizes. move_file relocates/renames files. bash_exec runs shell commands (30s default timeout, 60s hard cap, dangerous commands blocked: rm -rf /, git push --force, npm publish). inspect_db reads SQLite: action=tables (list), action=schema (CREATE statements), action=query (SELECT/PRAGMA only). run_db_fix writes SQLite with SAFETY: auto-snapshots affected rows BEFORE write (SELECT WHERE matching the UPDATE/DELETE), executes the fix, snapshots AFTER, returns before/after comparison so agent can verify. Always backs up the DB file. Agent MUST inspect_db before run_db_fix — never write blind. For NaN/null data errors: prefer fixing code to handle edge cases over modifying production data. check_port finds what process is using a port (netstat/lsof). check_env lists environment variables with values redacted. audit_deps runs full npm health check. check_migration returns known upgrade paths. web_fetch retrieves URL content.",
222
222
  metadata: { topic: "agent-tools-detail" },
223
223
  },
224
224
  {
@@ -115,7 +115,7 @@ async function compact(text) {
115
115
  systemPrompt: "Compress the following text into a dense, semantically rich summary. Keep all technical terms, function names, file paths, and error messages. Remove filler words. Output ONLY the compressed text, nothing else.",
116
116
  userPrompt: text,
117
117
  maxTokens: 256,
118
- category: "brain",
118
+ category: "compacting",
119
119
  });
120
120
 
121
121
  return result.content || text;
@@ -17,7 +17,7 @@ const path = require("path");
17
17
  * - Config files (.env, .json, .yaml)
18
18
  */
19
19
 
20
- const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests"]);
20
+ const SKIP_DIRS = new Set(["node_modules", ".wolverine", ".git", "dist", "build", "coverage", "src", "bin", "tests", "examples", "public", "static", "assets", "__tests__", ".next", ".nuxt"]);
21
21
  const CODE_EXTENSIONS = new Set([".js", ".ts", ".mjs", ".cjs", ".jsx", ".tsx"]);
22
22
  const CONFIG_EXTENSIONS = new Set([".json", ".yaml", ".yml", ".toml", ".env"]);
23
23
 
@@ -52,6 +52,11 @@ function scanProject(projectRoot) {
52
52
  // Recursive scan
53
53
  _scanDir(root, root, map);
54
54
 
55
+ // Cap collections to prevent memory bloat on large projects
56
+ if (map.functions.length > 500) map.functions = map.functions.slice(0, 500);
57
+ if (map.classes.length > 200) map.classes = map.classes.slice(0, 200);
58
+ if (map.exports.length > 300) map.exports = map.exports.slice(0, 300);
59
+
55
60
  // Build summary
56
61
  map.summary = _buildSummary(map);
57
62
 
@@ -88,12 +93,21 @@ function _scanDir(dir, root, map) {
88
93
 
89
94
  map.files.push({ path: relPath, type: "code" });
90
95
 
96
+ // Skip large/minified files — they bloat memory and aren't useful for repair context
97
+ let stat;
98
+ try { stat = fs.statSync(fullPath); } catch { continue; }
99
+ if (stat.size > 100000) continue; // Skip files > 100KB (bundles, minified, generated)
100
+
91
101
  // Parse the file for patterns
92
102
  let content;
93
103
  try {
94
104
  content = fs.readFileSync(fullPath, "utf-8");
95
105
  } catch { continue; }
96
106
 
107
+ // Skip minified code (avg line length > 200 chars = likely minified)
108
+ const lines = content.split("\n");
109
+ if (lines.length > 0 && content.length / lines.length > 200) continue;
110
+
97
111
  _extractRoutes(content, relPath, map);
98
112
  _extractExports(content, relPath, map);
99
113
  _extractFunctions(content, relPath, map);
@@ -5,6 +5,7 @@ const { getModel, detectProvider } = require("./models");
5
5
 
6
6
  let _openaiClient = null;
7
7
  let _anthropicClient = null;
8
+ let _wolverineClient = null;
8
9
  let _tracker = null;
9
10
 
10
11
  function setTokenTracker(tracker) { _tracker = tracker; }
@@ -35,9 +36,24 @@ function _track(model, category, usage, tool, latencyMs, success) {
35
36
 
36
37
  function getClient(provider) {
37
38
  if (provider === "anthropic") return _getAnthropicClient();
39
+ if (provider === "wolverine") return _getWolverineClient();
38
40
  return _getOpenAIClient();
39
41
  }
40
42
 
43
+ function _getWolverineClient() {
44
+ if (!_wolverineClient) {
45
+ // Wolverine inference: direct to GPU or via proxy
46
+ // WOLVERINE_GPU_KEY = internal key for direct GPU access (llama.cpp --api-key)
47
+ // WOLVERINE_API_KEY = user key for billed proxy access (api.wolverinenode.xyz)
48
+ const baseURL = process.env.WOLVERINE_INFERENCE_URL
49
+ ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
50
+ : "https://api.wolverinenode.xyz/v1";
51
+ const apiKey = process.env.WOLVERINE_GPU_KEY || process.env.WOLVERINE_API_KEY || "none";
52
+ _wolverineClient = new OpenAI({ apiKey, baseURL });
53
+ }
54
+ return _wolverineClient;
55
+ }
56
+
41
57
  function _getOpenAIClient() {
42
58
  if (!_openaiClient) {
43
59
  const apiKey = process.env.OPENAI_API_KEY;
@@ -65,6 +81,7 @@ function isReasoningModel(model) {
65
81
  }
66
82
 
67
83
  function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
84
+ function isWolverineModel(model) { return detectProvider(model) === "wolverine"; }
68
85
 
69
86
  /**
70
87
  * Per-model max output token limits (with 10% overestimation buffer).
@@ -176,6 +193,8 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
176
193
  try {
177
194
  if (provider === "anthropic") {
178
195
  result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
196
+ } else if (provider === "wolverine") {
197
+ result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
179
198
  } else if (isResponsesModel(model)) {
180
199
  result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
181
200
  } else {
@@ -200,6 +219,8 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
200
219
  try {
201
220
  if (provider === "anthropic") {
202
221
  result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
222
+ } else if (provider === "wolverine") {
223
+ result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
203
224
  } else if (isResponsesModel(model)) {
204
225
  result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
205
226
  } else {
@@ -573,7 +594,7 @@ ${backupSourceCode ? `## Last Known Working Version\n\`\`\`javascript\n${backupS
573
594
  "changes" is for code edits (optional, use for actual code fixes).
574
595
  Include both if needed, or just one.`;
575
596
 
576
- const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "heal" });
597
+ const result = await aiCall({ model, systemPrompt, userPrompt, maxTokens: 2048, category: "coding" });
577
598
  const content = result.content;
578
599
  const cleaned = content.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
579
600
 
@@ -97,11 +97,11 @@ function classifyError(errorMessage, fullStderr) {
97
97
  const full = (fullStderr || "").toLowerCase();
98
98
 
99
99
  // Missing npm package: Cannot find module 'cors' (not a relative path)
100
- if (/cannot find module '(?![./\\])/.test(msg) || /module_not_found/.test(full)) {
100
+ if (/cannot find module ['"](?![./\\])/.test(msg) || /module_not_found/.test(full)) {
101
101
  return "missing_module";
102
102
  }
103
103
  // Missing local file: Cannot find module './routes/api'
104
- if (/cannot find module '[./\\]/.test(msg) || /enoent/.test(msg)) {
104
+ if (/cannot find module ['"][./\\]/.test(msg) || /enoent/.test(msg)) {
105
105
  return "missing_file";
106
106
  }
107
107
  // Permission denied
@@ -15,7 +15,14 @@
15
15
  */
16
16
  function detectProvider(model) {
17
17
  if (!model) return "openai";
18
- if (/^claude/i.test(model)) return "anthropic";
18
+ if (/^wolverine/i.test(model) || /^gemma/i.test(model)) return "wolverine";
19
+ if (/^claude/i.test(model) || /^anthropic/i.test(model)) return "anthropic";
20
+ if (/^gemini/i.test(model) || /^google/i.test(model)) return "google";
21
+ if (/^mistral/i.test(model) || /^codestral/i.test(model) || /^pixtral/i.test(model)) return "mistral";
22
+ if (/^llama/i.test(model) || /^meta/i.test(model)) return "meta";
23
+ if (/^deepseek/i.test(model)) return "deepseek";
24
+ if (/^command/i.test(model) || /^cohere/i.test(model)) return "cohere";
25
+ // Default: OpenAI (gpt-*, o1-*, o3-*, o4-*, codex-*, text-embedding-*, dall-e-*, etc.)
19
26
  return "openai";
20
27
  }
21
28