wolverine-ai 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "1.6.0",
3
+ "version": "1.7.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -24,7 +24,7 @@
24
24
  },
25
25
 
26
26
  "cluster": {
27
- "mode": "auto",
27
+ "enabled": false,
28
28
  "workers": 0
29
29
  },
30
30
 
@@ -48,7 +48,7 @@
48
48
  },
49
49
 
50
50
  "errorMonitor": {
51
- "defaultThreshold": 3,
51
+ "defaultThreshold": 1,
52
52
  "windowMs": 30000,
53
53
  "cooldownMs": 60000
54
54
  },
package/server/index.js CHANGED
@@ -1,61 +1,85 @@
1
- const fastify = require("fastify")({ logger: false });
1
+ const cluster = require("cluster");
2
+ const os = require("os");
2
3
  const PORT = process.env.PORT || 3000;
3
4
 
4
- // Routes
5
- fastify.register(require("./routes/health"), { prefix: "/health" });
6
- fastify.register(require("./routes/api"), { prefix: "/api" });
7
- fastify.register(require("./routes/time"), { prefix: "/time" });
8
-
9
- // Root
10
- fastify.get("/", async () => ({
11
- name: "Wolverine Server",
12
- version: "1.0.0",
13
- status: "running",
14
- uptime: process.uptime(),
15
- }));
16
-
17
- // 404
18
- fastify.setNotFoundHandler((req, reply) => {
19
- reply.code(404).send({ error: "Not found", path: req.url });
20
- });
21
-
22
- // Error handler reports to Wolverine parent via IPC for auto-healing
23
- fastify.setErrorHandler((err, req, reply) => {
24
- console.error(`[ERROR] ${err.message}`);
25
- reply.code(500).send({ error: err.message });
26
-
27
- // Report to Wolverine via IPC (if running under wolverine)
28
- if (typeof process.send === "function") {
29
- try {
30
- // Extract file/line from stack trace
31
- let file = null, line = null;
32
- if (err.stack) {
33
- const frames = err.stack.split("\n");
34
- for (const frame of frames) {
35
- const m = frame.match(/\(([^)]+):(\d+):(\d+)\)/) || frame.match(/at\s+([^\s(]+):(\d+):(\d+)/);
36
- if (m && !m[1].includes("node_modules") && !m[1].includes("node:")) {
37
- file = m[1]; line = parseInt(m[2], 10); break;
5
+ // Cluster mode: master forks workers, workers run the server.
6
+ // Wolverine sets WOLVERINE_RECOMMENDED_WORKERS based on system detection.
7
+ // Set cluster.enabled=true in settings.json or WOLVERINE_CLUSTER=true to enable.
8
+ const clusterEnabled = process.env.WOLVERINE_CLUSTER === "true";
9
+ const workerCount = parseInt(process.env.WOLVERINE_RECOMMENDED_WORKERS, 10) || os.cpus().length;
10
+
11
+ if (clusterEnabled && cluster.isPrimary && workerCount > 1) {
12
+ console.log(`[CLUSTER] Primary ${process.pid} forking ${workerCount} workers`);
13
+ for (let i = 0; i < workerCount; i++) cluster.fork();
14
+
15
+ cluster.on("exit", (worker, code) => {
16
+ if (code !== 0) {
17
+ console.log(`[CLUSTER] Worker ${worker.process.pid} died (code ${code}), respawning...`);
18
+ cluster.fork();
19
+ }
20
+ });
21
+ } else {
22
+ // Single worker or cluster worker — run the server
23
+ const fastify = require("fastify")({ logger: false });
24
+
25
+ // Routes
26
+ fastify.register(require("./routes/health"), { prefix: "/health" });
27
+ fastify.register(require("./routes/api"), { prefix: "/api" });
28
+ fastify.register(require("./routes/time"), { prefix: "/time" });
29
+
30
+ // Root
31
+ fastify.get("/", async () => ({
32
+ name: "Wolverine Server",
33
+ version: "1.0.0",
34
+ status: "running",
35
+ uptime: process.uptime(),
36
+ pid: process.pid,
37
+ worker: cluster.isWorker ? cluster.worker.id : "primary",
38
+ }));
39
+
40
+ // 404
41
+ fastify.setNotFoundHandler((req, reply) => {
42
+ reply.code(404).send({ error: "Not found", path: req.url });
43
+ });
44
+
45
+ // Error handler — reports to Wolverine parent via IPC for auto-healing
46
+ fastify.setErrorHandler((err, req, reply) => {
47
+ console.error(`[ERROR] ${err.message}`);
48
+ reply.code(500).send({ error: err.message });
49
+
50
+ // Report to Wolverine via IPC (if running under wolverine)
51
+ if (typeof process.send === "function") {
52
+ try {
53
+ let file = null, line = null;
54
+ if (err.stack) {
55
+ const frames = err.stack.split("\n");
56
+ for (const frame of frames) {
57
+ const m = frame.match(/\(([^)]+):(\d+):(\d+)\)/) || frame.match(/at\s+([^\s(]+):(\d+):(\d+)/);
58
+ if (m && !m[1].includes("node_modules") && !m[1].includes("node:")) {
59
+ file = m[1]; line = parseInt(m[2], 10); break;
60
+ }
38
61
  }
39
62
  }
40
- }
41
- process.send({
42
- type: "route_error",
43
- path: req.url,
44
- method: req.method,
45
- statusCode: 500,
46
- message: err.message,
47
- stack: err.stack,
48
- file,
49
- line,
50
- timestamp: Date.now(),
51
- });
52
- } catch (_) { /* IPC send failed — non-fatal */ }
53
- }
54
- });
55
-
56
- fastify.listen({ port: PORT, host: "0.0.0.0" }, (err) => {
57
- if (err) { console.error(err); process.exit(1); }
58
- console.log(`Server running on http://localhost:${PORT}`);
59
- console.log(`Health: http://localhost:${PORT}/health`);
60
- console.log(`API: http://localhost:${PORT}/api`);
61
- });
63
+ process.send({
64
+ type: "route_error",
65
+ path: req.url,
66
+ method: req.method,
67
+ statusCode: 500,
68
+ message: err.message,
69
+ stack: err.stack,
70
+ file,
71
+ line,
72
+ timestamp: Date.now(),
73
+ });
74
+ } catch (_) { /* IPC send failed — non-fatal */ }
75
+ }
76
+ });
77
+
78
+ fastify.listen({ port: PORT, host: "0.0.0.0", reusePort: clusterEnabled }, (err) => {
79
+ if (err) { console.error(err); process.exit(1); }
80
+ const label = cluster.isWorker ? ` (worker ${cluster.worker.id})` : "";
81
+ console.log(`Server running on http://localhost:${PORT}${label}`);
82
+ console.log(`Health: http://localhost:${PORT}/health`);
83
+ console.log(`API: http://localhost:${PORT}/api`);
84
+ });
85
+ }
@@ -96,7 +96,7 @@ const SEED_DOCS = [
96
96
  metadata: { topic: "skill-sql-patterns" },
97
97
  },
98
98
  {
99
- text: "Database best practices: SafeDB uses split connections — separate read connection (concurrent, never waits) and write connection (single writer, FIFO queue). Write queue drains synchronously in one microtask, zero delays. WAL mode means readers never block writers. Each write is microseconds. db.transaction(fn) queues as single atomic unit. No busy_timeout, no blocking, no IPC. Reads: db.get(), db.all() are instant. Writes: db.run(), db.exec() go through queue.",
99
+ text: "Database best practices: SafeDB uses split connections — separate read connection (concurrent, never waits) and write connection (single writer, FIFO queue). Write queue drains synchronously in one microtask, zero delays. WAL mode means readers never block writers. Each write is microseconds. db.transaction(fn) queues as single atomic unit. No busy_timeout, no blocking, no IPC. Reads: db.get(), db.all() are instant. Writes: db.run(), db.exec() go through queue. Idempotent writes: db.idempotent(key, fn, ttlSeconds) executes fn only once per key — prevents double-charge/double-insert when retries or cluster workers duplicate a request. Idempotency keys stored in _idempotency table (auto-created on connect), shared across all workers via WAL mode.",
100
100
  metadata: { topic: "skill-sql-best-practices" },
101
101
  },
102
102
  {
@@ -120,7 +120,7 @@ const SEED_DOCS = [
120
120
  metadata: { topic: "process-manager" },
121
121
  },
122
122
  {
123
- text: "Auto-clustering: wolverine detects machine capabilities (cores, RAM, disk, platform, Docker/K8s, cloud provider) and forks optimal workers. 2 cores = 2 workers, 3-4 = cores-1, 5-8 = cores-1 cap 6, 9+ = cores/2 cap 16. Workers auto-respawn on crash with exponential backoff. CLI: --single (no cluster), --workers N (fixed), --info (show system). Settings in server/config/settings.json cluster.mode.",
123
+ text: "Cluster mode: server handles its own clustering (not wolverine-level). WOLVERINE_CLUSTER=true enables it. Server forks N workers (WOLVERINE_RECOMMENDED_WORKERS set by system detection). Workers share port 3000 via reusePort. Wolverine kills entire process tree on restart (_killProcessTree: taskkill /T on Windows, kill -pgid + pgrep -P on Linux). Idempotency protection prevents double-fire: idempotencyGuard() middleware deduplicates write requests across workers using shared SQLite _idempotency table. Client sends X-Idempotency-Key header, or auto-generated from method+path+body hash. All workers see the same table via WAL mode. SafeDB.idempotent(key, fn) for database-level dedup.",
124
124
  metadata: { topic: "clustering" },
125
125
  },
126
126
  {
@@ -220,9 +220,13 @@ const SEED_DOCS = [
220
220
  metadata: { topic: "agent-tools-v2" },
221
221
  },
222
222
  {
223
- text: "Server problem categories the agent can fix: CODE BUGS (SyntaxError, TypeError, ReferenceError → edit_file), DEPENDENCIES (Cannot find module → npm install, corrupted node_modules → rm + reinstall), DATABASE (invalid entries → run_db_fix UPDATE, missing table → CREATE TABLE, schema mismatch → ALTER TABLE, constraint violation → fix data or schema), CONFIG (invalid JSON → edit_file, missing env vars → write .env, wrong port → edit config), FILESYSTEM (misplaced files → move_file, missing directories → bash_exec mkdir, wrong permissions → chmod), NETWORK (port conflict → check_port + kill, service down → restart, connection refused → check config), STATE (corrupted cache → delete + restart, stale locks → remove lock file, git conflicts → resolve markers). The agent investigates before fixing — reads files, checks directories, inspects databases, never guesses.",
223
+ text: "Server problem categories the agent can fix: CODE BUGS (SyntaxError, TypeError, ReferenceError → edit_file), DEPENDENCIES (Cannot find module → npm install, corrupted node_modules → rm + reinstall), DATABASE (invalid entries → run_db_fix UPDATE, missing table → CREATE TABLE, schema mismatch → ALTER TABLE, constraint violation → fix data or schema), CONFIG (invalid JSON → edit_file, missing env vars → write .env, wrong port → edit config), FILESYSTEM (misplaced files → move_file, missing directories → bash_exec mkdir, wrong permissions → chmod), NETWORK (port conflict → check_port + kill, service down → restart, connection refused → check config), STATE (corrupted cache → delete + restart, stale locks → remove lock file, git conflicts → resolve markers), IDEMPOTENCY (double-fire → add idempotencyGuard middleware, missing idempotency key → add X-Idempotency-Key header support, duplicate DB entries → add UNIQUE constraint or use db.idempotent()). The agent investigates before fixing — reads files, checks directories, inspects databases, never guesses.",
224
224
  metadata: { topic: "server-problems" },
225
225
  },
226
+ {
227
+ text: "Idempotency protection: two layers prevent double-fire in cluster mode. Layer 1: idempotencyGuard() Fastify middleware — intercepts POST/PUT/PATCH/DELETE, checks X-Idempotency-Key header (or auto-generates key from method+path+body hash), queries _idempotency table. If key exists and not expired → return cached response with X-Idempotency-Cached:true header, skip handler. If new → pass through, idempotencyAfterHook() stores response. Layer 2: SafeDB.idempotent(key, fn) — database-level dedup. Wraps fn in transaction, checks key, executes only if new. Returns {executed:true/false, result, cached}. Keys expire after TTL (default 24h). All workers share the SQLite _idempotency table via WAL mode — globally consistent. Auto-pruned on connect and via db.pruneIdempotency().",
228
+ metadata: { topic: "idempotency" },
229
+ },
226
230
  {
227
231
  text: "Heal pipeline no longer requires a file path. When no file is identified from the error (database errors, config problems, port conflicts), the pipeline skips fast path and goes straight to the agent, which uses investigation tools (glob_files, grep_code, list_dir, inspect_db, check_env, check_port) to find the root cause. Agent verification for no-file errors: if agent made changes or ran commands, trust the agent's assessment. For file-based errors, verification uses syntax check + boot probe as before.",
228
232
  metadata: { topic: "fileless-heal" },
@@ -95,7 +95,7 @@ class WolverineRunner {
95
95
 
96
96
  // Error monitor — detects caught 500 errors without process crash
97
97
  this.errorMonitor = new ErrorMonitor({
98
- threshold: parseInt(process.env.WOLVERINE_ERROR_THRESHOLD, 10) || 3,
98
+ threshold: parseInt(process.env.WOLVERINE_ERROR_THRESHOLD, 10) || 1,
99
99
  windowMs: parseInt(process.env.WOLVERINE_ERROR_WINDOW_MS, 10) || 30000,
100
100
  cooldownMs: parseInt(process.env.WOLVERINE_ERROR_COOLDOWN_MS, 10) || 60000,
101
101
  logger: this.logger,
@@ -236,11 +236,11 @@ class WolverineRunner {
236
236
 
237
237
  oldChild.removeAllListeners("exit");
238
238
  oldChild.once("exit", onExit);
239
- oldChild.kill("SIGTERM");
239
+ this._killProcessTree(oldChild.pid, "SIGTERM");
240
240
 
241
241
  // Force kill if it doesn't exit in 3s
242
242
  setTimeout(() => {
243
- try { oldChild.kill("SIGKILL"); } catch {}
243
+ this._killProcessTree(oldChild.pid, "SIGKILL");
244
244
  onExit();
245
245
  }, 3000);
246
246
  } else {
@@ -278,13 +278,14 @@ class WolverineRunner {
278
278
 
279
279
  this.logger.info(EVENT_TYPES.PROCESS_STOP, "Wolverine stopped (graceful shutdown)");
280
280
 
281
- // Kill child — remove exit listener first so it doesn't trigger heal
281
+ // Kill child + all its descendants — remove exit listener first so it doesn't trigger heal
282
282
  if (this.child) {
283
+ const pid = this.child.pid;
283
284
  this.child.removeAllListeners("exit");
284
- this.child.kill("SIGTERM");
285
+ this._killProcessTree(pid, "SIGTERM");
285
286
  // Force kill after 3s if it doesn't respond
286
287
  setTimeout(() => {
287
- try { if (this.child) this.child.kill("SIGKILL"); } catch {}
288
+ this._killProcessTree(pid, "SIGKILL");
288
289
  }, 3000);
289
290
  this.child = null;
290
291
  }
@@ -304,9 +305,15 @@ class WolverineRunner {
304
305
  // Spawn with --require error-hook.js for IPC error reporting
305
306
  // The error hook auto-patches Fastify/Express to report caught 500s
306
307
  const errorHookPath = path.join(__dirname, "error-hook.js");
308
+ const sysInfo = require("./system-info").detect();
307
309
  this.child = spawn("node", ["--require", errorHookPath, this.scriptPath], {
308
310
  cwd: this.cwd,
309
- env: { ...process.env },
311
+ env: {
312
+ ...process.env,
313
+ // Tell the user's server how many workers to fork (if it uses clustering)
314
+ WOLVERINE_RECOMMENDED_WORKERS: String(sysInfo.recommended?.workers || 1),
315
+ WOLVERINE_MANAGED: "1", // Signal that wolverine is managing this process
316
+ },
310
317
  stdio: ["inherit", "inherit", "pipe", "ipc"],
311
318
  });
312
319
 
@@ -347,8 +354,9 @@ class WolverineRunner {
347
354
 
348
355
  // Kill the hung process — remove exit listener to prevent double-heal
349
356
  if (this.child) {
357
+ const pid = this.child.pid;
350
358
  this.child.removeAllListeners("exit");
351
- this.child.kill("SIGKILL");
359
+ this._killProcessTree(pid, "SIGKILL");
352
360
  this.child = null;
353
361
  }
354
362
 
@@ -587,6 +595,34 @@ class WolverineRunner {
587
595
  }
588
596
  }
589
597
 
598
+ /**
599
+ * Kill a process and all its children (process tree kill).
600
+ * Handles servers that fork workers internally — prevents orphaned processes.
601
+ */
602
+ _killProcessTree(pid, signal = "SIGTERM") {
603
+ if (!pid) return;
604
+ try {
605
+ if (process.platform === "win32") {
606
+ // taskkill /T kills the process tree
607
+ execSync(`taskkill /PID ${pid} /T /F`, { timeout: 3000, stdio: "ignore" });
608
+ } else {
609
+ // Kill the process group (negative PID)
610
+ try { process.kill(-pid, signal); } catch {}
611
+ // Also kill individual PID in case it's not a group leader
612
+ try { process.kill(pid, signal); } catch {}
613
+ // Find and kill children via pgrep
614
+ try {
615
+ const children = execSync(`pgrep -P ${pid} 2>/dev/null`, { encoding: "utf-8", timeout: 3000 }).trim();
616
+ if (children) {
617
+ for (const cpid of children.split("\n").map(p => parseInt(p, 10)).filter(Boolean)) {
618
+ try { process.kill(cpid, signal); } catch {}
619
+ }
620
+ }
621
+ } catch { /* no children or pgrep not available */ }
622
+ }
623
+ } catch { /* process already dead */ }
624
+ }
625
+
590
626
  _ensurePortFree() {
591
627
  const port = parseInt(process.env.PORT, 10) || 3000;
592
628
  try {
package/src/index.js CHANGED
@@ -33,7 +33,7 @@ const { scanProject } = require("./brain/function-map");
33
33
  const { detect: detectSystem } = require("./core/system-info");
34
34
  const { ClusterManager } = require("./core/cluster-manager");
35
35
  const { loadConfig, getConfig } = require("./core/config");
36
- const { sqlGuard, SafeDB, scanForInjection } = require("./skills/sql");
36
+ const { sqlGuard, SafeDB, scanForInjection, idempotencyGuard, idempotencyAfterHook } = require("./skills/sql");
37
37
 
38
38
  module.exports = {
39
39
  // Core
@@ -93,4 +93,6 @@ module.exports = {
93
93
  sqlGuard,
94
94
  SafeDB,
95
95
  scanForInjection,
96
+ idempotencyGuard,
97
+ idempotencyAfterHook,
96
98
  };
package/src/skills/sql.js CHANGED
@@ -201,6 +201,18 @@ class SafeDB {
201
201
  this._writer.pragma("foreign_keys = ON");
202
202
  this._writer.pragma("synchronous = NORMAL");
203
203
 
204
+ // Idempotency table — prevents double-execution of writes in cluster mode
205
+ this._writer.exec(`
206
+ CREATE TABLE IF NOT EXISTS _idempotency (
207
+ key TEXT PRIMARY KEY,
208
+ result TEXT,
209
+ created_at INTEGER DEFAULT (strftime('%s','now')),
210
+ expires_at INTEGER
211
+ )
212
+ `);
213
+ // Clean expired keys on connect
214
+ this._writer.exec(`DELETE FROM _idempotency WHERE expires_at < strftime('%s','now')`);
215
+
204
216
  } catch (err) {
205
217
  if (err.code === "MODULE_NOT_FOUND") {
206
218
  throw new Error("Install better-sqlite3: npm install better-sqlite3");
@@ -216,6 +228,49 @@ class SafeDB {
216
228
  process.on("exit", () => this.close());
217
229
  }
218
230
 
231
+ /**
232
+ * Idempotent write — execute fn only if this key hasn't been seen before.
233
+ * In cluster mode, prevents the same request from double-firing across workers.
234
+ *
235
+ * @param {string} key — unique idempotency key (e.g. from X-Idempotency-Key header)
236
+ * @param {Function} fn — function that performs the write, receives writerProxy
237
+ * @param {number} ttlSeconds — how long to remember this key (default: 86400 = 24h)
238
+ * @returns {{ executed: boolean, result: any }} — executed=false if key was already seen
239
+ */
240
+ idempotent(key, fn, ttlSeconds = 86400) {
241
+ this._assertOpen();
242
+ return this._enqueueWrite(() => {
243
+ // Check if key already executed
244
+ const existing = this._writer.prepare("SELECT result FROM _idempotency WHERE key = ? AND expires_at > strftime('%s','now')").get(key);
245
+ if (existing) {
246
+ return { executed: false, result: JSON.parse(existing.result || "null"), cached: true };
247
+ }
248
+
249
+ // Execute the write
250
+ const txn = this._writer.transaction(() => {
251
+ const result = fn(this._writerProxy());
252
+ // Store the key so duplicates are rejected
253
+ this._writer.prepare(
254
+ "INSERT OR REPLACE INTO _idempotency (key, result, expires_at) VALUES (?, ?, strftime('%s','now') + ?)"
255
+ ).run(key, JSON.stringify(result ?? null), ttlSeconds);
256
+ return result;
257
+ });
258
+
259
+ const result = txn();
260
+ return { executed: true, result, cached: false };
261
+ });
262
+ }
263
+
264
+ /**
265
+ * Clean up expired idempotency keys. Call periodically (e.g., every hour).
266
+ */
267
+ pruneIdempotency() {
268
+ this._assertOpen();
269
+ return this._enqueueWrite(() => {
270
+ return this._writer.prepare("DELETE FROM _idempotency WHERE expires_at < strftime('%s','now')").run();
271
+ });
272
+ }
273
+
219
274
  /**
220
275
  * Write query (INSERT, UPDATE, DELETE, CREATE).
221
276
  * Queued and executed in order. Returns a promise that resolves with the result.
@@ -327,27 +382,137 @@ class SafeDB {
327
382
  }
328
383
  }
329
384
 
385
+ // ── Idempotency Middleware ──────────────────────────────────────
386
+
387
+ /**
388
+ * Request idempotency middleware — prevents double-fire in cluster mode.
389
+ *
390
+ * How it works:
391
+ * 1. Client sends write request (POST/PUT/PATCH/DELETE) with X-Idempotency-Key header
392
+ * 2. Middleware checks if this key was already processed
393
+ * 3. If yes: return cached response (no re-execution)
394
+ * 4. If no: execute handler, cache response, return result
395
+ *
396
+ * Without the header, mutating requests get an auto-generated key based on
397
+ * method + path + body hash. This means identical retries are deduplicated
398
+ * even without client cooperation.
399
+ *
400
+ * In cluster mode (reusePort), a retry can land on a different worker.
401
+ * Since all workers share the same SQLite database (WAL mode), the
402
+ * idempotency table is visible to all workers instantly.
403
+ *
404
+ * Safe methods (GET, HEAD, OPTIONS) are always passed through — they're
405
+ * inherently idempotent.
406
+ *
407
+ * @param {object} options
408
+ * @param {SafeDB} options.db — SafeDB instance (must be connected)
409
+ * @param {number} options.ttlSeconds — how long to cache responses (default: 86400)
410
+ * @param {object} options.logger — wolverine EventLogger (optional)
411
+ */
412
+ function idempotencyGuard(options = {}) {
413
+ const db = options.db;
414
+ const ttlSeconds = options.ttlSeconds || 86400;
415
+ const logger = options.logger || null;
416
+ const crypto = require("crypto");
417
+
418
+ return async (req, res, next) => {
419
+ // Safe methods are inherently idempotent — pass through
420
+ const method = (req.method || "GET").toUpperCase();
421
+ if (["GET", "HEAD", "OPTIONS"].includes(method)) return next();
422
+
423
+ // Get or generate idempotency key
424
+ let key = req.headers["x-idempotency-key"] || req.headers["idempotency-key"];
425
+ if (!key) {
426
+ // Auto-generate from method + path + body hash
427
+ const bodyStr = typeof req.body === "string" ? req.body : JSON.stringify(req.body || "");
428
+ key = crypto.createHash("sha256").update(`${method}:${req.url}:${bodyStr}`).digest("hex");
429
+ }
430
+
431
+ if (!db || !db._writer) return next(); // No DB — can't check, pass through
432
+
433
+ try {
434
+ // Check idempotency table directly (read from writer for consistency)
435
+ const existing = db._writer.prepare(
436
+ "SELECT result FROM _idempotency WHERE key = ? AND expires_at > strftime('%s','now')"
437
+ ).get(key);
438
+
439
+ if (existing) {
440
+ // Already processed — return cached response
441
+ const cached = JSON.parse(existing.result || "null");
442
+ if (logger) logger.debug("idempotency.hit", `Duplicate request blocked: ${method} ${req.url}`, { key: key.slice(0, 16) });
443
+
444
+ const status = cached?.statusCode || 200;
445
+ const body = cached?.body || cached;
446
+ if (typeof res.code === "function") {
447
+ // Fastify
448
+ res.code(status).header("X-Idempotency-Cached", "true").send(body);
449
+ } else {
450
+ // Express
451
+ res.status(status).set("X-Idempotency-Cached", "true").json(body);
452
+ }
453
+ return;
454
+ }
455
+
456
+ // Not seen — attach key to request for the route handler to use
457
+ req._idempotencyKey = key;
458
+ req._idempotencyTtl = ttlSeconds;
459
+ } catch {
460
+ // DB error — don't block the request, just pass through
461
+ }
462
+
463
+ next();
464
+ };
465
+ }
466
+
467
+ /**
468
+ * After-response hook — stores the response for future idempotency checks.
469
+ * For Fastify, add as onSend hook. For Express, monkey-patch res.json.
470
+ *
471
+ * @param {SafeDB} db — connected SafeDB instance
472
+ */
473
+ function idempotencyAfterHook(db) {
474
+ return (req, reply, payload, done) => {
475
+ if (req._idempotencyKey && db && db._writer) {
476
+ try {
477
+ const statusCode = reply.statusCode || 200;
478
+ const result = JSON.stringify({ statusCode, body: typeof payload === "string" ? JSON.parse(payload) : payload });
479
+ db._writer.prepare(
480
+ "INSERT OR IGNORE INTO _idempotency (key, result, expires_at) VALUES (?, ?, strftime('%s','now') + ?)"
481
+ ).run(req._idempotencyKey, result, req._idempotencyTtl || 86400);
482
+ } catch { /* non-fatal */ }
483
+ }
484
+ done();
485
+ };
486
+ }
487
+
330
488
  // ── Skill Metadata (for SkillRegistry discovery) ──
331
489
 
332
490
  const SKILL_NAME = "sql";
333
- const SKILL_DESCRIPTION = "SQL database interface with injection prevention. Provides sqlGuard() middleware to block SQL injection on all endpoints, and SafeDB class for parameterized-only database queries.";
334
- const SKILL_KEYWORDS = ["sql", "database", "db", "query", "injection", "sqlite", "postgres", "mysql", "select", "insert", "update", "delete", "table", "schema", "migration", "parameterized"];
335
- const SKILL_USAGE = `// Protect all routes from SQL injection
336
- const { sqlGuard } = require("../src/skills/sql");
337
- app.use(sqlGuard({ logger: wolverineLogger }));
338
-
339
- // Cluster-safe database (each worker gets its own connection)
340
- const { SafeDB } = require("../src/skills/sql");
491
+ const SKILL_DESCRIPTION = "SQL database interface with injection prevention + idempotency. Provides sqlGuard() middleware to block SQL injection, idempotencyGuard() middleware to prevent double-fire in cluster mode, and SafeDB class for parameterized-only database queries with built-in idempotency key support.";
492
+ const SKILL_KEYWORDS = ["sql", "database", "db", "query", "injection", "sqlite", "postgres", "mysql", "select", "insert", "update", "delete", "table", "schema", "migration", "parameterized", "idempotent", "idempotency", "duplicate", "double", "cluster", "transaction"];
493
+ const SKILL_USAGE = `// Protect routes from SQL injection + double-fire
494
+ const { sqlGuard, idempotencyGuard, idempotencyAfterHook, SafeDB } = require("../src/skills/sql");
341
495
  const db = new SafeDB({ type: "sqlite", path: "./server/data.db" });
342
- await db.connect(); // WAL mode, busy_timeout=5s, write serialization
496
+ await db.connect();
497
+
498
+ // Middleware: injection prevention + idempotency (cluster-safe)
499
+ fastify.addHook("preHandler", sqlGuard({ logger }));
500
+ fastify.addHook("preHandler", idempotencyGuard({ db, logger }));
501
+ fastify.addHook("onSend", idempotencyAfterHook(db));
343
502
 
344
- // Reads (concurrent across workers)
503
+ // Reads (concurrent across workers — never waits)
345
504
  const users = db.all("SELECT * FROM users WHERE role = ?", ["admin"]);
346
505
 
347
- // Writes (serialized — no corruption)
506
+ // Writes (serialized FIFO queue — no corruption)
348
507
  db.run("INSERT INTO users (name, role) VALUES (?, ?)", ["Alice", "admin"]);
349
508
 
350
- // Batch writes (atomic transaction, single lock)
509
+ // Idempotent write prevents double-charge/double-insert in cluster mode
510
+ const result = await db.idempotent("order-abc-123", (tx) => {
511
+ tx.run("INSERT INTO orders (id, total) VALUES (?, ?)", ["abc-123", 99.99]);
512
+ return { orderId: "abc-123" };
513
+ }); // result.executed=true first time, false on retry
514
+
515
+ // Atomic transaction (all-or-nothing)
351
516
  db.transaction((tx) => {
352
517
  tx.run("INSERT INTO orders (user_id, total) VALUES (?, ?)", [1, 99.99]);
353
518
  tx.run("UPDATE users SET order_count = order_count + 1 WHERE id = ?", [1]);
@@ -364,6 +529,8 @@ module.exports = {
364
529
 
365
530
  // Middleware
366
531
  sqlGuard,
532
+ idempotencyGuard,
533
+ idempotencyAfterHook,
367
534
  scanForInjection,
368
535
  deepScan,
369
536
 
package/PLATFORM.md DELETED
@@ -1,450 +0,0 @@
1
- # Wolverine Platform — Multi-Server Analytics & Management
2
-
3
- ## Overview
4
-
5
- The Wolverine Platform aggregates data from hundreds/thousands of wolverine server instances into a single backend + frontend dashboard. Each wolverine instance runs independently and broadcasts lightweight telemetry to the platform.
6
-
7
- ```
8
- ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
9
- │ Wolverine #1 │ │ Wolverine #2 │ │ Wolverine #3 │ ... (N instances)
10
- │ server:3000 │ │ server:4000 │ │ server:5000 │
11
- │ dash:3001 │ │ dash:4001 │ │ dash:5001 │
12
- └──────┬───────┘ └──────┬───────┘ └──────┬───────┘
13
- │ │ │
14
- │ heartbeat │ heartbeat │ heartbeat
15
- │ (every 60s) │ (every 60s) │ (every 60s)
16
- ▼ ▼ ▼
17
- ┌─────────────────────────────────────────────────┐
18
- │ Wolverine Platform Backend │
19
- │ │
20
- │ POST /api/v1/heartbeat ← receive telemetry │
21
- │ GET /api/v1/servers ← list all instances │
22
- │ GET /api/v1/servers/:id ← single instance │
23
- │ GET /api/v1/analytics ← aggregated stats │
24
- │ GET /api/v1/alerts ← active alerts │
25
- │ WS /ws/live ← real-time stream │
26
- │ │
27
- │ Database: PostgreSQL (time-series optimized) │
28
- │ Cache: Redis (live state, pub/sub) │
29
- │ Queue: Bull/BullMQ (alert processing) │
30
- └─────────────────────────────────────────────────┘
31
-
32
-
33
- ┌─────────────────────────────────────────────────┐
34
- │ Wolverine Platform Frontend │
35
- │ │
36
- │ Fleet overview — all servers at a glance │
37
- │ Per-server deep dive — events, repairs, usage │
38
- │ Cost analytics — tokens, USD, by model │
39
- │ Alert management — acknowledge, escalate │
40
- │ Uptime history — SLA tracking over time │
41
- └─────────────────────────────────────────────────┘
42
- ```
43
-
44
- ---
45
-
46
- ## Telemetry Protocol
47
-
48
- ### Heartbeat Payload
49
-
50
- Each wolverine instance sends a heartbeat every **60 seconds** (configurable). This is the only outbound traffic — minimal network impact.
51
-
52
- ```json
53
- POST /api/v1/heartbeat
54
- Authorization: Bearer <PLATFORM_API_KEY>
55
- Content-Type: application/json
56
-
57
- {
58
- "instanceId": "wlv_a1b2c3d4",
59
- "version": "0.1.0",
60
- "timestamp": 1775073247574,
61
-
62
- "server": {
63
- "name": "my-api",
64
- "port": 3000,
65
- "uptime": 86400,
66
- "status": "healthy",
67
- "pid": 12345
68
- },
69
-
70
- "process": {
71
- "memoryMB": 128,
72
- "cpuPercent": 12,
73
- "peakMemoryMB": 256
74
- },
75
-
76
- "routes": {
77
- "total": 8,
78
- "healthy": 8,
79
- "unhealthy": 0,
80
- "slowest": { "path": "/api/search", "avgMs": 450 }
81
- },
82
-
83
- "repairs": {
84
- "total": 3,
85
- "successes": 2,
86
- "failures": 1,
87
- "lastRepair": {
88
- "error": "TypeError: Cannot read property 'id' of undefined",
89
- "resolution": "Added null check before accessing user.id",
90
- "tokens": 1820,
91
- "cost": 0.0045,
92
- "mode": "fast",
93
- "timestamp": 1775073200000
94
- }
95
- },
96
-
97
- "usage": {
98
- "totalTokens": 45000,
99
- "totalCost": 0.12,
100
- "totalCalls": 85,
101
- "byCategory": {
102
- "heal": { "tokens": 12000, "cost": 0.04, "calls": 5 },
103
- "chat": { "tokens": 25000, "cost": 0.05, "calls": 60 },
104
- "classify": { "tokens": 3000, "cost": 0.001, "calls": 15 },
105
- "develop": { "tokens": 5000, "cost": 0.03, "calls": 5 }
106
- },
107
- "byModel": {
108
- "gpt-5.4-mini": { "tokens": 30000, "cost": 0.06, "calls": 40 },
109
- "gpt-4o-mini": { "tokens": 15000, "cost": 0.02, "calls": 45 }
110
- },
111
- "byTool": {
112
- "call_endpoint": { "tokens": 5000, "cost": 0.01, "calls": 20 },
113
- "search_brain": { "tokens": 2000, "cost": 0.005, "calls": 10 }
114
- }
115
- },
116
-
117
- "brain": {
118
- "totalMemories": 45,
119
- "namespaces": { "docs": 23, "functions": 12, "errors": 5, "fixes": 3, "learnings": 2 }
120
- },
121
-
122
- "backups": {
123
- "total": 8,
124
- "stable": 3,
125
- "verified": 2,
126
- "unstable": 3
127
- },
128
-
129
- "alerts": [
130
- {
131
- "type": "memory_leak",
132
- "message": "Memory growing: +50MB over 10 samples",
133
- "severity": "warn",
134
- "timestamp": 1775073100000
135
- }
136
- ]
137
- }
138
- ```
139
-
140
- ### Design Principles
141
-
142
- - **Infrequent**: 1 heartbeat per 60 seconds = 1440/day per instance
143
- - **Small**: ~2KB per payload, gzipped < 500 bytes
144
- - **Idempotent**: same heartbeat can be sent twice safely (upsert by instanceId + timestamp)
145
- - **Offline-resilient**: if platform is down, wolverine queues heartbeats and replays on reconnect
146
- - **No PII**: never send secrets, user data, or source code in heartbeats
147
-
148
- ---
149
-
150
- ## Platform Backend Architecture
151
-
152
- ### Database Schema (PostgreSQL)
153
-
154
- ```sql
155
- -- Servers — one row per wolverine instance
156
- CREATE TABLE servers (
157
- id TEXT PRIMARY KEY, -- "wlv_a1b2c3d4"
158
- name TEXT NOT NULL,
159
- version TEXT,
160
- first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
161
- last_heartbeat TIMESTAMPTZ NOT NULL,
162
- status TEXT NOT NULL DEFAULT 'unknown', -- healthy, degraded, down, unknown
163
- config JSONB -- port, models, etc.
164
- );
165
-
166
- -- Time-series heartbeats — partitioned by day for scale
167
- CREATE TABLE heartbeats (
168
- id BIGSERIAL,
169
- server_id TEXT NOT NULL REFERENCES servers(id),
170
- timestamp TIMESTAMPTZ NOT NULL,
171
- uptime INTEGER,
172
- memory_mb INTEGER,
173
- cpu_percent INTEGER,
174
- routes_total INTEGER,
175
- routes_healthy INTEGER,
176
- routes_unhealthy INTEGER,
177
- tokens_total INTEGER,
178
- cost_total NUMERIC(10,6),
179
- repairs_total INTEGER,
180
- repairs_successes INTEGER,
181
- payload JSONB -- full heartbeat for deep queries
182
- ) PARTITION BY RANGE (timestamp);
183
-
184
- -- Create daily partitions automatically (pg_partman or manual)
185
- -- This allows dropping old data by partition instead of DELETE
186
-
187
- -- Repairs — detailed log of every fix
188
- CREATE TABLE repairs (
189
- id BIGSERIAL PRIMARY KEY,
190
- server_id TEXT NOT NULL REFERENCES servers(id),
191
- timestamp TIMESTAMPTZ NOT NULL,
192
- error TEXT,
193
- resolution TEXT,
194
- success BOOLEAN,
195
- mode TEXT, -- fast, agent, sub-agents
196
- model TEXT,
197
- tokens INTEGER,
198
- cost NUMERIC(10,6),
199
- iteration INTEGER,
200
- duration_ms INTEGER
201
- );
202
-
203
- -- Alerts — active and historical
204
- CREATE TABLE alerts (
205
- id BIGSERIAL PRIMARY KEY,
206
- server_id TEXT NOT NULL REFERENCES servers(id),
207
- type TEXT NOT NULL, -- memory_leak, route_down, crash_loop, etc.
208
- message TEXT,
209
- severity TEXT, -- info, warn, error, critical
210
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
211
- acknowledged_at TIMESTAMPTZ,
212
- resolved_at TIMESTAMPTZ,
213
- acknowledged_by TEXT
214
- );
215
-
216
- -- Usage aggregates — hourly rollups for fast analytics
217
- CREATE TABLE usage_hourly (
218
- server_id TEXT NOT NULL REFERENCES servers(id),
219
- hour TIMESTAMPTZ NOT NULL,
220
- tokens_total INTEGER DEFAULT 0,
221
- cost_total NUMERIC(10,6) DEFAULT 0,
222
- calls_total INTEGER DEFAULT 0,
223
- tokens_by_category JSONB,
224
- PRIMARY KEY (server_id, hour)
225
- );
226
-
227
- -- Indexes for common queries
228
- CREATE INDEX idx_heartbeats_server_time ON heartbeats (server_id, timestamp DESC);
229
- CREATE INDEX idx_repairs_server_time ON repairs (server_id, timestamp DESC);
230
- CREATE INDEX idx_alerts_active ON alerts (server_id) WHERE resolved_at IS NULL;
231
- CREATE INDEX idx_servers_status ON servers (status);
232
- ```
233
-
234
- ### API Endpoints
235
-
236
- ```
237
- Authentication: Bearer token (PLATFORM_API_KEY)
238
-
239
- POST /api/v1/heartbeat ← Receive heartbeat from wolverine instance
240
- → Upsert server, insert heartbeat, process alerts
241
- → Returns: { received: true, serverTime: "..." }
242
-
243
- GET /api/v1/servers ← List all instances
244
- → Query: ?status=healthy&sort=last_heartbeat&limit=50&offset=0
245
- → Returns: { servers: [...], total: 150, page: 1 }
246
-
247
- GET /api/v1/servers/:id ← Single instance detail
248
- → Returns: full server state + recent heartbeats + repairs + alerts
249
-
250
- GET /api/v1/servers/:id/heartbeats ← Heartbeat history
251
- → Query: ?from=2026-04-01&to=2026-04-02&interval=5m
252
- → Returns: time-series data for charting
253
-
254
- GET /api/v1/servers/:id/repairs ← Repair history for one server
255
- → Query: ?limit=50&success=true
256
- → Returns: { repairs: [...], stats: { total, successes, avgTokens } }
257
-
258
- GET /api/v1/analytics ← Fleet-wide aggregates
259
- → Query: ?period=24h or ?from=...&to=...
260
- → Returns: {
261
- totalServers, activeServers, totalRepairs, successRate,
262
- totalTokens, totalCost, tokensByCategory, costByModel,
263
- uptimePercent, avgResponseTime
264
- }
265
-
266
- GET /api/v1/analytics/cost ← Cost breakdown
267
- → Query: ?period=7d&groupBy=server|model|category
268
- → Returns: cost time-series + breakdown
269
-
270
- GET /api/v1/alerts ← Active alerts across fleet
271
- → Query: ?severity=critical&acknowledged=false
272
- → Returns: { alerts: [...], total: 5 }
273
-
274
- PATCH /api/v1/alerts/:id ← Acknowledge/resolve alert
275
- → Body: { action: "acknowledge" | "resolve", by: "admin@..." }
276
-
277
- WS /ws/live ← Real-time WebSocket stream
278
- → Streams: heartbeats, alerts, repairs as they arrive
279
- → Subscribe: { subscribe: ["heartbeat", "alert", "repair"] }
280
- → Filter: { servers: ["wlv_a1b2c3d4"] }
281
- ```
282
-
283
- ### Scaling Strategy
284
-
285
- ```
286
- 10 servers: Single PostgreSQL, single Node.js backend
287
- 100 servers: PostgreSQL with connection pooling (pgBouncer), Redis cache
288
- 1,000 servers: Partitioned heartbeats table, read replicas, queue workers
289
- 10,000 servers: TimescaleDB for time-series, horizontal API scaling, Kafka for ingestion
290
- 100,000+: Sharded by server_id, dedicated ingestion pipeline, ClickHouse for analytics
291
- ```
292
-
293
- **Key scaling decisions:**
294
- - Heartbeats are **append-only** — no updates, only inserts → perfect for time-series DBs
295
- - Hourly rollups in `usage_hourly` prevent expensive full-table scans for analytics
296
- - Partitioned by day → drop old data by partition (instant, no vacuum)
297
- - Redis caches the "current state" of each server (latest heartbeat) → fast fleet overview
298
- - WebSocket uses Redis pub/sub → horizontal scaling of frontend connections
299
- - Alert processing is async via job queue → doesn't block heartbeat ingestion
300
-
301
- ### Redis Structure
302
-
303
- ```
304
- wolverine:server:{id}:state ← Latest heartbeat (JSON, TTL 5min)
305
- wolverine:server:{id}:uptime ← Uptime counter (INCR every heartbeat)
306
- wolverine:servers:active ← Sorted set (score = last_heartbeat timestamp)
307
- wolverine:alerts:active ← Set of active alert IDs
308
- wolverine:stats:fleet ← Cached fleet-wide aggregates (TTL 30s)
309
- wolverine:pubsub:heartbeats ← Pub/sub channel for real-time streaming
310
- wolverine:pubsub:alerts ← Pub/sub channel for alert notifications
311
- ```
312
-
313
- ---
314
-
315
- ## Platform Frontend
316
-
317
- ### Pages
318
-
319
- **1. Fleet Overview**
320
- - Grid/list of all server instances
321
- - Color-coded status: green (healthy), yellow (degraded), red (down), gray (unknown)
322
- - Sortable by: status, uptime, memory, cost, last repair
323
- - Search/filter by name, status, tags
324
- - Fleet-wide stats bar: total servers, active, repairs today, cost today
325
-
326
- **2. Server Detail**
327
- - Real-time stats: memory, CPU, uptime, routes
328
- - Event timeline (same as local dashboard but from platform data)
329
- - Repair history with resolution details + token cost
330
- - Usage chart: tokens over time, cost over time
331
- - Route health table with response time trends
332
- - Backup status
333
- - Brain stats
334
-
335
- **3. Analytics**
336
- - Fleet-wide token usage over time (by day/hour)
337
- - Cost breakdown: by server, by model, by category
338
- - Repair success rate over time
339
- - Mean time to repair (MTTR) trend
340
- - Most expensive servers / most repaired servers
341
- - Uptime SLA tracking (99.9% target)
342
- - Response time percentiles across fleet
343
-
344
- **4. Alerts**
345
- - Active alerts sorted by severity
346
- - Acknowledge / resolve workflow
347
- - Alert history with resolution notes
348
- - Alert rules configuration (memory threshold, crash count, response time)
349
-
350
- **5. Cost Management**
351
- - Total spend by period (day/week/month)
352
- - Per-server cost ranking
353
- - Per-model cost ranking
354
- - Projected monthly cost based on current usage
355
- - Budget alerts (notify when approaching limit)
356
-
357
- ### Tech Stack Recommendation
358
-
359
- ```
360
- Frontend: Next.js + Tailwind + Recharts (or Tremor for dashboard components)
361
- Backend: Node.js + Express + PostgreSQL + Redis + BullMQ
362
- Auth: NextAuth.js or Clerk (team management)
363
- Hosting: Vercel (frontend) + Railway/Fly.io (backend) + Supabase (PostgreSQL)
364
- WebSocket: Socket.io or native WS through the backend
365
- ```
366
-
367
- ---
368
-
369
- ## Wolverine Client Integration
370
-
371
- ### New env variables for the wolverine instance:
372
-
373
- ```env
374
- # Platform telemetry (optional — wolverine works fine without it)
375
- WOLVERINE_PLATFORM_URL=https://api.wolverine.dev
376
- WOLVERINE_PLATFORM_KEY=wlvk_your_api_key_here
377
- WOLVERINE_INSTANCE_NAME=my-api-prod
378
- WOLVERINE_HEARTBEAT_INTERVAL_MS=60000
379
- ```
380
-
381
- ### Telemetry module to build in wolverine:
382
-
383
- ```
384
- src/platform/
385
- ├── telemetry.js ← Collects heartbeat data from all subsystems
386
- ├── heartbeat.js ← Sends heartbeat to platform on interval
387
- └── queue.js ← Queues heartbeats when platform is unreachable
388
- ```
389
-
390
- **telemetry.js** gathers data from:
391
- - `processMonitor.getMetrics()` → memory, CPU
392
- - `routeProber.getMetrics()` → route health
393
- - `tokenTracker.getAnalytics()` → usage
394
- - `repairHistory.getStats()` → repairs
395
- - `backupManager.getStats()` → backups
396
- - `brain.getStats()` → brain
397
- - `notifier` → active alerts
398
-
399
- **heartbeat.js** sends it:
400
- - HTTP POST to platform every 60s
401
- - Gzip compressed
402
- - Timeout: 5s (don't block if platform is slow)
403
- - On failure: queue locally, retry with exponential backoff
404
- - On reconnect: replay queued heartbeats
405
-
406
- **queue.js** handles offline resilience:
407
- - Append to `.wolverine/heartbeat-queue.jsonl` when platform unreachable
408
- - On next successful heartbeat, drain the queue (oldest first)
409
- - Max queue size: 1440 entries (24 hours of heartbeats)
410
- - After 24h, drop oldest entries (stale data isn't useful)
411
-
412
- ---
413
-
414
- ## Security Considerations
415
-
416
- - **Platform API key** per instance — revokable, rotatable
417
- - **Secret redactor** runs on heartbeat payload before sending (no env values leak)
418
- - **No source code** in heartbeats — only metrics, error messages (redacted), and stats
419
- - **TLS only** — platform endpoint must be HTTPS
420
- - **Rate limiting** on platform ingestion — max 1 heartbeat/second per instance
421
- - **Tenant isolation** — multi-tenant platform must scope data by organization
422
- - **Audit log** — track who acknowledged/resolved alerts
423
-
424
- ---
425
-
426
- ## Implementation Priority
427
-
428
- ### Phase 1: Core (1-2 weeks)
429
- 1. Platform backend: heartbeat ingestion + server listing + basic API
430
- 2. Wolverine telemetry module: collect + send heartbeats
431
- 3. Frontend: fleet overview + server detail page
432
- 4. PostgreSQL schema + Redis caching
433
-
434
- ### Phase 2: Analytics (1 week)
435
- 1. Hourly usage rollups
436
- 2. Cost analytics page
437
- 3. Repair history aggregation
438
- 4. Uptime tracking
439
-
440
- ### Phase 3: Alerting (1 week)
441
- 1. Alert rules engine
442
- 2. Acknowledge/resolve workflow
443
- 3. Email/Slack/webhook notifications
444
- 4. Alert history
445
-
446
- ### Phase 4: Scale (ongoing)
447
- 1. TimescaleDB migration for heartbeats
448
- 2. Horizontal API scaling
449
- 3. WebSocket real-time streaming
450
- 4. Team management + RBAC
@@ -1,70 +0,0 @@
1
- # Wolverine Server Best Practices
2
-
3
- Rules for building secure, scalable, well-structured servers. Wolverine's agent follows these when building or editing server code.
4
-
5
- ## Structure
6
-
7
- ```
8
- server/
9
- ├── index.js Entry point — app setup, middleware, route mounting, listen
10
- ├── routes/ Route modules — one file per resource
11
- │ ├── health.js Health check endpoint (always required)
12
- │ └── api.js API routes
13
- ├── middleware/ Custom middleware (auth, validation, logging)
14
- ├── models/ Data models / database schemas
15
- ├── services/ Business logic (keep routes thin)
16
- ├── config/ Configuration files
17
- └── utils/ Shared utilities
18
- ```
19
-
20
- ## Rules
21
-
22
- ### Ports
23
- - **Development**: use port 3000 (standard, no admin required, firewall-friendly)
24
- - **Production**: use port 443 (HTTPS) or 80 (HTTP) behind a reverse proxy (nginx/caddy)
25
- - **Never** use random high ports in production — they bypass firewalls and confuse load balancers
26
- - **Always** use HTTPS in production — terminate TLS at the reverse proxy, not in Node
27
- - Dashboard runs on port+1 automatically (3001 in dev, not exposed in prod)
28
-
29
- ### Security
30
- - Never expose secrets in responses — use env vars, never hardcode
31
- - Validate ALL input — Fastify has built-in JSON schema validation
32
- - Use HTTPS in production — reverse proxy (nginx/caddy) handles TLS
33
- - Rate limit public endpoints
34
- - Sanitize user input before database queries — use the SQL skill
35
- - Never return stack traces in production error responses
36
- - Use the sqlGuard() middleware on all routes that accept user input
37
-
38
- ### Scalability
39
- - Keep routes thin — business logic goes in services/
40
- - Use async/await, never block the event loop
41
- - Add a /health endpoint that returns status + uptime + memory
42
- - Use environment variables for all configuration
43
- - Structure for horizontal scaling — no in-memory session state
44
-
45
- ### Error Handling
46
- - Always have a global error handler middleware
47
- - Log errors with context (timestamp, request path, user)
48
- - Return consistent error response format: { error: "message" }
49
- - Never swallow errors silently
50
- - Use try/catch in async route handlers
51
-
52
- ### Code Quality
53
- - One route file per resource (users.js, orders.js, etc.)
54
- - Export express.Router() from each route file
55
- - Mount routes in index.js with clear prefixes
56
- - Use middleware for cross-cutting concerns (auth, logging)
57
- - Keep index.js under 50 lines — it's just wiring
58
-
59
- ### Database
60
- - Use connection pooling
61
- - Handle connection errors gracefully
62
- - Use migrations for schema changes
63
- - Never use string concatenation for queries — use parameterized queries
64
- - Close connections on process exit
65
-
66
- ### Monitoring
67
- - /health endpoint is mandatory
68
- - Log request duration for slow endpoint detection
69
- - Use structured logging (JSON format)
70
- - Track error rates per endpoint
package/TELEMETRY.md DELETED
@@ -1,108 +0,0 @@
1
- # Wolverine Telemetry
2
-
3
- Connect your Wolverine instance to a platform backend for fleet-wide monitoring, uptime tracking, and cost analytics.
4
-
5
- ## Setup
6
-
7
- ### 1. Deploy your platform backend
8
-
9
- See [PLATFORM.md](PLATFORM.md) for the full backend spec — database schema, API endpoints, scaling strategy.
10
-
11
- Your backend needs to implement:
12
- - `POST /api/v1/heartbeat` — receive heartbeat payloads
13
- - `GET /api/v1/servers` — list connected instances
14
- - Standard Bearer token auth
15
-
16
- ### 2. Configure your Wolverine instance
17
-
18
- Add to `.env.local`:
19
-
20
- ```env
21
- WOLVERINE_PLATFORM_URL=https://your-platform.com
22
- WOLVERINE_PLATFORM_KEY=your_api_key_here
23
- ```
24
-
25
- That's it. Wolverine starts sending heartbeats every 60 seconds.
26
-
27
- ### Optional settings
28
-
29
- ```env
30
- # Human-readable name (defaults to folder name)
31
- WOLVERINE_INSTANCE_NAME=my-api-prod
32
-
33
- # Heartbeat interval in ms (default: 60000 = 1 minute)
34
- WOLVERINE_HEARTBEAT_INTERVAL_MS=60000
35
- ```
36
-
37
- ### 3. Verify
38
-
39
- On startup you'll see:
40
-
41
- ```
42
- 📡 Platform: https://your-platform.com (every 60s)
43
- 📡 Instance: wlv_a8f3e9b1c4d7
44
- ```
45
-
46
- If the platform is unreachable, heartbeats queue locally in `.wolverine/heartbeat-queue.jsonl` and drain automatically when connectivity returns.
47
-
48
- ---
49
-
50
- ## Heartbeat Payload
51
-
52
- Each heartbeat is ~2KB JSON, sent every 60 seconds:
53
-
54
- ```json
55
- {
56
- "instanceId": "wlv_a8f3e9b1c4d7",
57
- "version": "0.1.0",
58
- "timestamp": 1775073247574,
59
- "server": {
60
- "name": "my-api",
61
- "port": 3000,
62
- "uptime": 86400,
63
- "status": "healthy",
64
- "pid": 12345
65
- },
66
- "process": {
67
- "memoryMB": 128,
68
- "cpuPercent": 12,
69
- "peakMemoryMB": 256
70
- },
71
- "routes": {
72
- "total": 8,
73
- "healthy": 8,
74
- "unhealthy": 0
75
- },
76
- "repairs": {
77
- "total": 3,
78
- "successes": 2,
79
- "failures": 1,
80
- "lastRepair": { "error": "...", "resolution": "...", "tokens": 1820, "cost": 0.0045 }
81
- },
82
- "usage": {
83
- "totalTokens": 45000,
84
- "totalCost": 0.12,
85
- "totalCalls": 85,
86
- "byCategory": { "heal": {...}, "chat": {...}, "develop": {...} }
87
- },
88
- "brain": { "totalMemories": 45 },
89
- "backups": { "total": 8, "stable": 3 }
90
- }
91
- ```
92
-
93
- ## Design
94
-
95
- - **Opt-in**: disabled unless `WOLVERINE_PLATFORM_URL` and `WOLVERINE_PLATFORM_KEY` are set
96
- - **Lightweight**: 1 request per 60s, ~2KB payload
97
- - **Offline-resilient**: queues locally when platform is down, replays on reconnect (max 24h / 1440 entries)
98
- - **Secure**: secrets redacted before sending, HTTPS supported, Bearer token auth
99
- - **No source code**: only metrics, redacted error messages, and stats
100
-
101
- ## Files
102
-
103
- ```
104
- src/platform/
105
- ├── telemetry.js — Collects metrics from all subsystems into heartbeat payload
106
- ├── heartbeat.js — Sends heartbeats on interval, handles failures
107
- └── queue.js — Offline queue with replay on reconnect
108
- ```