wolverine-ai 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/config/settings.json +2 -2
- package/server/index.js +81 -57
- package/src/brain/brain.js +7 -3
- package/src/core/runner.js +44 -8
- package/src/index.js +3 -1
- package/src/skills/sql.js +179 -12
- package/PLATFORM.md +0 -450
- package/SERVER_BEST_PRACTICES.md +0 -70
- package/TELEMETRY.md +0 -108
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.7.0",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
},
|
|
25
25
|
|
|
26
26
|
"cluster": {
|
|
27
|
-
"
|
|
27
|
+
"enabled": false,
|
|
28
28
|
"workers": 0
|
|
29
29
|
},
|
|
30
30
|
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
},
|
|
49
49
|
|
|
50
50
|
"errorMonitor": {
|
|
51
|
-
"defaultThreshold":
|
|
51
|
+
"defaultThreshold": 1,
|
|
52
52
|
"windowMs": 30000,
|
|
53
53
|
"cooldownMs": 60000
|
|
54
54
|
},
|
package/server/index.js
CHANGED
|
@@ -1,61 +1,85 @@
|
|
|
1
|
-
const
|
|
1
|
+
const cluster = require("cluster");
|
|
2
|
+
const os = require("os");
|
|
2
3
|
const PORT = process.env.PORT || 3000;
|
|
3
4
|
|
|
4
|
-
//
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
5
|
+
// Cluster mode: master forks workers, workers run the server.
|
|
6
|
+
// Wolverine sets WOLVERINE_RECOMMENDED_WORKERS based on system detection.
|
|
7
|
+
// Set cluster.enabled=true in settings.json or WOLVERINE_CLUSTER=true to enable.
|
|
8
|
+
const clusterEnabled = process.env.WOLVERINE_CLUSTER === "true";
|
|
9
|
+
const workerCount = parseInt(process.env.WOLVERINE_RECOMMENDED_WORKERS, 10) || os.cpus().length;
|
|
10
|
+
|
|
11
|
+
if (clusterEnabled && cluster.isPrimary && workerCount > 1) {
|
|
12
|
+
console.log(`[CLUSTER] Primary ${process.pid} forking ${workerCount} workers`);
|
|
13
|
+
for (let i = 0; i < workerCount; i++) cluster.fork();
|
|
14
|
+
|
|
15
|
+
cluster.on("exit", (worker, code) => {
|
|
16
|
+
if (code !== 0) {
|
|
17
|
+
console.log(`[CLUSTER] Worker ${worker.process.pid} died (code ${code}), respawning...`);
|
|
18
|
+
cluster.fork();
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
} else {
|
|
22
|
+
// Single worker or cluster worker — run the server
|
|
23
|
+
const fastify = require("fastify")({ logger: false });
|
|
24
|
+
|
|
25
|
+
// Routes
|
|
26
|
+
fastify.register(require("./routes/health"), { prefix: "/health" });
|
|
27
|
+
fastify.register(require("./routes/api"), { prefix: "/api" });
|
|
28
|
+
fastify.register(require("./routes/time"), { prefix: "/time" });
|
|
29
|
+
|
|
30
|
+
// Root
|
|
31
|
+
fastify.get("/", async () => ({
|
|
32
|
+
name: "Wolverine Server",
|
|
33
|
+
version: "1.0.0",
|
|
34
|
+
status: "running",
|
|
35
|
+
uptime: process.uptime(),
|
|
36
|
+
pid: process.pid,
|
|
37
|
+
worker: cluster.isWorker ? cluster.worker.id : "primary",
|
|
38
|
+
}));
|
|
39
|
+
|
|
40
|
+
// 404
|
|
41
|
+
fastify.setNotFoundHandler((req, reply) => {
|
|
42
|
+
reply.code(404).send({ error: "Not found", path: req.url });
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Error handler — reports to Wolverine parent via IPC for auto-healing
|
|
46
|
+
fastify.setErrorHandler((err, req, reply) => {
|
|
47
|
+
console.error(`[ERROR] ${err.message}`);
|
|
48
|
+
reply.code(500).send({ error: err.message });
|
|
49
|
+
|
|
50
|
+
// Report to Wolverine via IPC (if running under wolverine)
|
|
51
|
+
if (typeof process.send === "function") {
|
|
52
|
+
try {
|
|
53
|
+
let file = null, line = null;
|
|
54
|
+
if (err.stack) {
|
|
55
|
+
const frames = err.stack.split("\n");
|
|
56
|
+
for (const frame of frames) {
|
|
57
|
+
const m = frame.match(/\(([^)]+):(\d+):(\d+)\)/) || frame.match(/at\s+([^\s(]+):(\d+):(\d+)/);
|
|
58
|
+
if (m && !m[1].includes("node_modules") && !m[1].includes("node:")) {
|
|
59
|
+
file = m[1]; line = parseInt(m[2], 10); break;
|
|
60
|
+
}
|
|
38
61
|
}
|
|
39
62
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
})
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
});
|
|
63
|
+
process.send({
|
|
64
|
+
type: "route_error",
|
|
65
|
+
path: req.url,
|
|
66
|
+
method: req.method,
|
|
67
|
+
statusCode: 500,
|
|
68
|
+
message: err.message,
|
|
69
|
+
stack: err.stack,
|
|
70
|
+
file,
|
|
71
|
+
line,
|
|
72
|
+
timestamp: Date.now(),
|
|
73
|
+
});
|
|
74
|
+
} catch (_) { /* IPC send failed — non-fatal */ }
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
fastify.listen({ port: PORT, host: "0.0.0.0", reusePort: clusterEnabled }, (err) => {
|
|
79
|
+
if (err) { console.error(err); process.exit(1); }
|
|
80
|
+
const label = cluster.isWorker ? ` (worker ${cluster.worker.id})` : "";
|
|
81
|
+
console.log(`Server running on http://localhost:${PORT}${label}`);
|
|
82
|
+
console.log(`Health: http://localhost:${PORT}/health`);
|
|
83
|
+
console.log(`API: http://localhost:${PORT}/api`);
|
|
84
|
+
});
|
|
85
|
+
}
|
package/src/brain/brain.js
CHANGED
|
@@ -96,7 +96,7 @@ const SEED_DOCS = [
|
|
|
96
96
|
metadata: { topic: "skill-sql-patterns" },
|
|
97
97
|
},
|
|
98
98
|
{
|
|
99
|
-
text: "Database best practices: SafeDB uses split connections — separate read connection (concurrent, never waits) and write connection (single writer, FIFO queue). Write queue drains synchronously in one microtask, zero delays. WAL mode means readers never block writers. Each write is microseconds. db.transaction(fn) queues as single atomic unit. No busy_timeout, no blocking, no IPC. Reads: db.get(), db.all() are instant. Writes: db.run(), db.exec() go through queue.",
|
|
99
|
+
text: "Database best practices: SafeDB uses split connections — separate read connection (concurrent, never waits) and write connection (single writer, FIFO queue). Write queue drains synchronously in one microtask, zero delays. WAL mode means readers never block writers. Each write is microseconds. db.transaction(fn) queues as single atomic unit. No busy_timeout, no blocking, no IPC. Reads: db.get(), db.all() are instant. Writes: db.run(), db.exec() go through queue. Idempotent writes: db.idempotent(key, fn, ttlSeconds) executes fn only once per key — prevents double-charge/double-insert when retries or cluster workers duplicate a request. Idempotency keys stored in _idempotency table (auto-created on connect), shared across all workers via WAL mode.",
|
|
100
100
|
metadata: { topic: "skill-sql-best-practices" },
|
|
101
101
|
},
|
|
102
102
|
{
|
|
@@ -120,7 +120,7 @@ const SEED_DOCS = [
|
|
|
120
120
|
metadata: { topic: "process-manager" },
|
|
121
121
|
},
|
|
122
122
|
{
|
|
123
|
-
text: "
|
|
123
|
+
text: "Cluster mode: server handles its own clustering (not wolverine-level). WOLVERINE_CLUSTER=true enables it. Server forks N workers (WOLVERINE_RECOMMENDED_WORKERS set by system detection). Workers share port 3000 via reusePort. Wolverine kills entire process tree on restart (_killProcessTree: taskkill /T on Windows, kill -pgid + pgrep -P on Linux). Idempotency protection prevents double-fire: idempotencyGuard() middleware deduplicates write requests across workers using shared SQLite _idempotency table. Client sends X-Idempotency-Key header, or auto-generated from method+path+body hash. All workers see the same table via WAL mode. SafeDB.idempotent(key, fn) for database-level dedup.",
|
|
124
124
|
metadata: { topic: "clustering" },
|
|
125
125
|
},
|
|
126
126
|
{
|
|
@@ -220,9 +220,13 @@ const SEED_DOCS = [
|
|
|
220
220
|
metadata: { topic: "agent-tools-v2" },
|
|
221
221
|
},
|
|
222
222
|
{
|
|
223
|
-
text: "Server problem categories the agent can fix: CODE BUGS (SyntaxError, TypeError, ReferenceError → edit_file), DEPENDENCIES (Cannot find module → npm install, corrupted node_modules → rm + reinstall), DATABASE (invalid entries → run_db_fix UPDATE, missing table → CREATE TABLE, schema mismatch → ALTER TABLE, constraint violation → fix data or schema), CONFIG (invalid JSON → edit_file, missing env vars → write .env, wrong port → edit config), FILESYSTEM (misplaced files → move_file, missing directories → bash_exec mkdir, wrong permissions → chmod), NETWORK (port conflict → check_port + kill, service down → restart, connection refused → check config), STATE (corrupted cache → delete + restart, stale locks → remove lock file, git conflicts → resolve markers). The agent investigates before fixing — reads files, checks directories, inspects databases, never guesses.",
|
|
223
|
+
text: "Server problem categories the agent can fix: CODE BUGS (SyntaxError, TypeError, ReferenceError → edit_file), DEPENDENCIES (Cannot find module → npm install, corrupted node_modules → rm + reinstall), DATABASE (invalid entries → run_db_fix UPDATE, missing table → CREATE TABLE, schema mismatch → ALTER TABLE, constraint violation → fix data or schema), CONFIG (invalid JSON → edit_file, missing env vars → write .env, wrong port → edit config), FILESYSTEM (misplaced files → move_file, missing directories → bash_exec mkdir, wrong permissions → chmod), NETWORK (port conflict → check_port + kill, service down → restart, connection refused → check config), STATE (corrupted cache → delete + restart, stale locks → remove lock file, git conflicts → resolve markers), IDEMPOTENCY (double-fire → add idempotencyGuard middleware, missing idempotency key → add X-Idempotency-Key header support, duplicate DB entries → add UNIQUE constraint or use db.idempotent()). The agent investigates before fixing — reads files, checks directories, inspects databases, never guesses.",
|
|
224
224
|
metadata: { topic: "server-problems" },
|
|
225
225
|
},
|
|
226
|
+
{
|
|
227
|
+
text: "Idempotency protection: two layers prevent double-fire in cluster mode. Layer 1: idempotencyGuard() Fastify middleware — intercepts POST/PUT/PATCH/DELETE, checks X-Idempotency-Key header (or auto-generates key from method+path+body hash), queries _idempotency table. If key exists and not expired → return cached response with X-Idempotency-Cached:true header, skip handler. If new → pass through, idempotencyAfterHook() stores response. Layer 2: SafeDB.idempotent(key, fn) — database-level dedup. Wraps fn in transaction, checks key, executes only if new. Returns {executed:true/false, result, cached}. Keys expire after TTL (default 24h). All workers share the SQLite _idempotency table via WAL mode — globally consistent. Auto-pruned on connect and via db.pruneIdempotency().",
|
|
228
|
+
metadata: { topic: "idempotency" },
|
|
229
|
+
},
|
|
226
230
|
{
|
|
227
231
|
text: "Heal pipeline no longer requires a file path. When no file is identified from the error (database errors, config problems, port conflicts), the pipeline skips fast path and goes straight to the agent, which uses investigation tools (glob_files, grep_code, list_dir, inspect_db, check_env, check_port) to find the root cause. Agent verification for no-file errors: if agent made changes or ran commands, trust the agent's assessment. For file-based errors, verification uses syntax check + boot probe as before.",
|
|
228
232
|
metadata: { topic: "fileless-heal" },
|
package/src/core/runner.js
CHANGED
|
@@ -95,7 +95,7 @@ class WolverineRunner {
|
|
|
95
95
|
|
|
96
96
|
// Error monitor — detects caught 500 errors without process crash
|
|
97
97
|
this.errorMonitor = new ErrorMonitor({
|
|
98
|
-
threshold: parseInt(process.env.WOLVERINE_ERROR_THRESHOLD, 10) ||
|
|
98
|
+
threshold: parseInt(process.env.WOLVERINE_ERROR_THRESHOLD, 10) || 1,
|
|
99
99
|
windowMs: parseInt(process.env.WOLVERINE_ERROR_WINDOW_MS, 10) || 30000,
|
|
100
100
|
cooldownMs: parseInt(process.env.WOLVERINE_ERROR_COOLDOWN_MS, 10) || 60000,
|
|
101
101
|
logger: this.logger,
|
|
@@ -236,11 +236,11 @@ class WolverineRunner {
|
|
|
236
236
|
|
|
237
237
|
oldChild.removeAllListeners("exit");
|
|
238
238
|
oldChild.once("exit", onExit);
|
|
239
|
-
oldChild.
|
|
239
|
+
this._killProcessTree(oldChild.pid, "SIGTERM");
|
|
240
240
|
|
|
241
241
|
// Force kill if it doesn't exit in 3s
|
|
242
242
|
setTimeout(() => {
|
|
243
|
-
|
|
243
|
+
this._killProcessTree(oldChild.pid, "SIGKILL");
|
|
244
244
|
onExit();
|
|
245
245
|
}, 3000);
|
|
246
246
|
} else {
|
|
@@ -278,13 +278,14 @@ class WolverineRunner {
|
|
|
278
278
|
|
|
279
279
|
this.logger.info(EVENT_TYPES.PROCESS_STOP, "Wolverine stopped (graceful shutdown)");
|
|
280
280
|
|
|
281
|
-
// Kill child — remove exit listener first so it doesn't trigger heal
|
|
281
|
+
// Kill child + all its descendants — remove exit listener first so it doesn't trigger heal
|
|
282
282
|
if (this.child) {
|
|
283
|
+
const pid = this.child.pid;
|
|
283
284
|
this.child.removeAllListeners("exit");
|
|
284
|
-
this.
|
|
285
|
+
this._killProcessTree(pid, "SIGTERM");
|
|
285
286
|
// Force kill after 3s if it doesn't respond
|
|
286
287
|
setTimeout(() => {
|
|
287
|
-
|
|
288
|
+
this._killProcessTree(pid, "SIGKILL");
|
|
288
289
|
}, 3000);
|
|
289
290
|
this.child = null;
|
|
290
291
|
}
|
|
@@ -304,9 +305,15 @@ class WolverineRunner {
|
|
|
304
305
|
// Spawn with --require error-hook.js for IPC error reporting
|
|
305
306
|
// The error hook auto-patches Fastify/Express to report caught 500s
|
|
306
307
|
const errorHookPath = path.join(__dirname, "error-hook.js");
|
|
308
|
+
const sysInfo = require("./system-info").detect();
|
|
307
309
|
this.child = spawn("node", ["--require", errorHookPath, this.scriptPath], {
|
|
308
310
|
cwd: this.cwd,
|
|
309
|
-
env: {
|
|
311
|
+
env: {
|
|
312
|
+
...process.env,
|
|
313
|
+
// Tell the user's server how many workers to fork (if it uses clustering)
|
|
314
|
+
WOLVERINE_RECOMMENDED_WORKERS: String(sysInfo.recommended?.workers || 1),
|
|
315
|
+
WOLVERINE_MANAGED: "1", // Signal that wolverine is managing this process
|
|
316
|
+
},
|
|
310
317
|
stdio: ["inherit", "inherit", "pipe", "ipc"],
|
|
311
318
|
});
|
|
312
319
|
|
|
@@ -347,8 +354,9 @@ class WolverineRunner {
|
|
|
347
354
|
|
|
348
355
|
// Kill the hung process — remove exit listener to prevent double-heal
|
|
349
356
|
if (this.child) {
|
|
357
|
+
const pid = this.child.pid;
|
|
350
358
|
this.child.removeAllListeners("exit");
|
|
351
|
-
this.
|
|
359
|
+
this._killProcessTree(pid, "SIGKILL");
|
|
352
360
|
this.child = null;
|
|
353
361
|
}
|
|
354
362
|
|
|
@@ -587,6 +595,34 @@ class WolverineRunner {
|
|
|
587
595
|
}
|
|
588
596
|
}
|
|
589
597
|
|
|
598
|
+
/**
|
|
599
|
+
* Kill a process and all its children (process tree kill).
|
|
600
|
+
* Handles servers that fork workers internally — prevents orphaned processes.
|
|
601
|
+
*/
|
|
602
|
+
_killProcessTree(pid, signal = "SIGTERM") {
|
|
603
|
+
if (!pid) return;
|
|
604
|
+
try {
|
|
605
|
+
if (process.platform === "win32") {
|
|
606
|
+
// taskkill /T kills the process tree
|
|
607
|
+
execSync(`taskkill /PID ${pid} /T /F`, { timeout: 3000, stdio: "ignore" });
|
|
608
|
+
} else {
|
|
609
|
+
// Kill the process group (negative PID)
|
|
610
|
+
try { process.kill(-pid, signal); } catch {}
|
|
611
|
+
// Also kill individual PID in case it's not a group leader
|
|
612
|
+
try { process.kill(pid, signal); } catch {}
|
|
613
|
+
// Find and kill children via pgrep
|
|
614
|
+
try {
|
|
615
|
+
const children = execSync(`pgrep -P ${pid} 2>/dev/null`, { encoding: "utf-8", timeout: 3000 }).trim();
|
|
616
|
+
if (children) {
|
|
617
|
+
for (const cpid of children.split("\n").map(p => parseInt(p, 10)).filter(Boolean)) {
|
|
618
|
+
try { process.kill(cpid, signal); } catch {}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
} catch { /* no children or pgrep not available */ }
|
|
622
|
+
}
|
|
623
|
+
} catch { /* process already dead */ }
|
|
624
|
+
}
|
|
625
|
+
|
|
590
626
|
_ensurePortFree() {
|
|
591
627
|
const port = parseInt(process.env.PORT, 10) || 3000;
|
|
592
628
|
try {
|
package/src/index.js
CHANGED
|
@@ -33,7 +33,7 @@ const { scanProject } = require("./brain/function-map");
|
|
|
33
33
|
const { detect: detectSystem } = require("./core/system-info");
|
|
34
34
|
const { ClusterManager } = require("./core/cluster-manager");
|
|
35
35
|
const { loadConfig, getConfig } = require("./core/config");
|
|
36
|
-
const { sqlGuard, SafeDB, scanForInjection } = require("./skills/sql");
|
|
36
|
+
const { sqlGuard, SafeDB, scanForInjection, idempotencyGuard, idempotencyAfterHook } = require("./skills/sql");
|
|
37
37
|
|
|
38
38
|
module.exports = {
|
|
39
39
|
// Core
|
|
@@ -93,4 +93,6 @@ module.exports = {
|
|
|
93
93
|
sqlGuard,
|
|
94
94
|
SafeDB,
|
|
95
95
|
scanForInjection,
|
|
96
|
+
idempotencyGuard,
|
|
97
|
+
idempotencyAfterHook,
|
|
96
98
|
};
|
package/src/skills/sql.js
CHANGED
|
@@ -201,6 +201,18 @@ class SafeDB {
|
|
|
201
201
|
this._writer.pragma("foreign_keys = ON");
|
|
202
202
|
this._writer.pragma("synchronous = NORMAL");
|
|
203
203
|
|
|
204
|
+
// Idempotency table — prevents double-execution of writes in cluster mode
|
|
205
|
+
this._writer.exec(`
|
|
206
|
+
CREATE TABLE IF NOT EXISTS _idempotency (
|
|
207
|
+
key TEXT PRIMARY KEY,
|
|
208
|
+
result TEXT,
|
|
209
|
+
created_at INTEGER DEFAULT (strftime('%s','now')),
|
|
210
|
+
expires_at INTEGER
|
|
211
|
+
)
|
|
212
|
+
`);
|
|
213
|
+
// Clean expired keys on connect
|
|
214
|
+
this._writer.exec(`DELETE FROM _idempotency WHERE expires_at < strftime('%s','now')`);
|
|
215
|
+
|
|
204
216
|
} catch (err) {
|
|
205
217
|
if (err.code === "MODULE_NOT_FOUND") {
|
|
206
218
|
throw new Error("Install better-sqlite3: npm install better-sqlite3");
|
|
@@ -216,6 +228,49 @@ class SafeDB {
|
|
|
216
228
|
process.on("exit", () => this.close());
|
|
217
229
|
}
|
|
218
230
|
|
|
231
|
+
/**
|
|
232
|
+
* Idempotent write — execute fn only if this key hasn't been seen before.
|
|
233
|
+
* In cluster mode, prevents the same request from double-firing across workers.
|
|
234
|
+
*
|
|
235
|
+
* @param {string} key — unique idempotency key (e.g. from X-Idempotency-Key header)
|
|
236
|
+
* @param {Function} fn — function that performs the write, receives writerProxy
|
|
237
|
+
* @param {number} ttlSeconds — how long to remember this key (default: 86400 = 24h)
|
|
238
|
+
* @returns {{ executed: boolean, result: any }} — executed=false if key was already seen
|
|
239
|
+
*/
|
|
240
|
+
idempotent(key, fn, ttlSeconds = 86400) {
|
|
241
|
+
this._assertOpen();
|
|
242
|
+
return this._enqueueWrite(() => {
|
|
243
|
+
// Check if key already executed
|
|
244
|
+
const existing = this._writer.prepare("SELECT result FROM _idempotency WHERE key = ? AND expires_at > strftime('%s','now')").get(key);
|
|
245
|
+
if (existing) {
|
|
246
|
+
return { executed: false, result: JSON.parse(existing.result || "null"), cached: true };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Execute the write
|
|
250
|
+
const txn = this._writer.transaction(() => {
|
|
251
|
+
const result = fn(this._writerProxy());
|
|
252
|
+
// Store the key so duplicates are rejected
|
|
253
|
+
this._writer.prepare(
|
|
254
|
+
"INSERT OR REPLACE INTO _idempotency (key, result, expires_at) VALUES (?, ?, strftime('%s','now') + ?)"
|
|
255
|
+
).run(key, JSON.stringify(result ?? null), ttlSeconds);
|
|
256
|
+
return result;
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
const result = txn();
|
|
260
|
+
return { executed: true, result, cached: false };
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Clean up expired idempotency keys. Call periodically (e.g., every hour).
|
|
266
|
+
*/
|
|
267
|
+
pruneIdempotency() {
|
|
268
|
+
this._assertOpen();
|
|
269
|
+
return this._enqueueWrite(() => {
|
|
270
|
+
return this._writer.prepare("DELETE FROM _idempotency WHERE expires_at < strftime('%s','now')").run();
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
219
274
|
/**
|
|
220
275
|
* Write query (INSERT, UPDATE, DELETE, CREATE).
|
|
221
276
|
* Queued and executed in order. Returns a promise that resolves with the result.
|
|
@@ -327,27 +382,137 @@ class SafeDB {
|
|
|
327
382
|
}
|
|
328
383
|
}
|
|
329
384
|
|
|
385
|
+
// ── Idempotency Middleware ──────────────────────────────────────
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Request idempotency middleware — prevents double-fire in cluster mode.
|
|
389
|
+
*
|
|
390
|
+
* How it works:
|
|
391
|
+
* 1. Client sends write request (POST/PUT/PATCH/DELETE) with X-Idempotency-Key header
|
|
392
|
+
* 2. Middleware checks if this key was already processed
|
|
393
|
+
* 3. If yes: return cached response (no re-execution)
|
|
394
|
+
* 4. If no: execute handler, cache response, return result
|
|
395
|
+
*
|
|
396
|
+
* Without the header, mutating requests get an auto-generated key based on
|
|
397
|
+
* method + path + body hash. This means identical retries are deduplicated
|
|
398
|
+
* even without client cooperation.
|
|
399
|
+
*
|
|
400
|
+
* In cluster mode (reusePort), a retry can land on a different worker.
|
|
401
|
+
* Since all workers share the same SQLite database (WAL mode), the
|
|
402
|
+
* idempotency table is visible to all workers instantly.
|
|
403
|
+
*
|
|
404
|
+
* Safe methods (GET, HEAD, OPTIONS) are always passed through — they're
|
|
405
|
+
* inherently idempotent.
|
|
406
|
+
*
|
|
407
|
+
* @param {object} options
|
|
408
|
+
* @param {SafeDB} options.db — SafeDB instance (must be connected)
|
|
409
|
+
* @param {number} options.ttlSeconds — how long to cache responses (default: 86400)
|
|
410
|
+
* @param {object} options.logger — wolverine EventLogger (optional)
|
|
411
|
+
*/
|
|
412
|
+
function idempotencyGuard(options = {}) {
|
|
413
|
+
const db = options.db;
|
|
414
|
+
const ttlSeconds = options.ttlSeconds || 86400;
|
|
415
|
+
const logger = options.logger || null;
|
|
416
|
+
const crypto = require("crypto");
|
|
417
|
+
|
|
418
|
+
return async (req, res, next) => {
|
|
419
|
+
// Safe methods are inherently idempotent — pass through
|
|
420
|
+
const method = (req.method || "GET").toUpperCase();
|
|
421
|
+
if (["GET", "HEAD", "OPTIONS"].includes(method)) return next();
|
|
422
|
+
|
|
423
|
+
// Get or generate idempotency key
|
|
424
|
+
let key = req.headers["x-idempotency-key"] || req.headers["idempotency-key"];
|
|
425
|
+
if (!key) {
|
|
426
|
+
// Auto-generate from method + path + body hash
|
|
427
|
+
const bodyStr = typeof req.body === "string" ? req.body : JSON.stringify(req.body || "");
|
|
428
|
+
key = crypto.createHash("sha256").update(`${method}:${req.url}:${bodyStr}`).digest("hex");
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if (!db || !db._writer) return next(); // No DB — can't check, pass through
|
|
432
|
+
|
|
433
|
+
try {
|
|
434
|
+
// Check idempotency table directly (read from writer for consistency)
|
|
435
|
+
const existing = db._writer.prepare(
|
|
436
|
+
"SELECT result FROM _idempotency WHERE key = ? AND expires_at > strftime('%s','now')"
|
|
437
|
+
).get(key);
|
|
438
|
+
|
|
439
|
+
if (existing) {
|
|
440
|
+
// Already processed — return cached response
|
|
441
|
+
const cached = JSON.parse(existing.result || "null");
|
|
442
|
+
if (logger) logger.debug("idempotency.hit", `Duplicate request blocked: ${method} ${req.url}`, { key: key.slice(0, 16) });
|
|
443
|
+
|
|
444
|
+
const status = cached?.statusCode || 200;
|
|
445
|
+
const body = cached?.body || cached;
|
|
446
|
+
if (typeof res.code === "function") {
|
|
447
|
+
// Fastify
|
|
448
|
+
res.code(status).header("X-Idempotency-Cached", "true").send(body);
|
|
449
|
+
} else {
|
|
450
|
+
// Express
|
|
451
|
+
res.status(status).set("X-Idempotency-Cached", "true").json(body);
|
|
452
|
+
}
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Not seen — attach key to request for the route handler to use
|
|
457
|
+
req._idempotencyKey = key;
|
|
458
|
+
req._idempotencyTtl = ttlSeconds;
|
|
459
|
+
} catch {
|
|
460
|
+
// DB error — don't block the request, just pass through
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
next();
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* After-response hook — stores the response for future idempotency checks.
|
|
469
|
+
* For Fastify, add as onSend hook. For Express, monkey-patch res.json.
|
|
470
|
+
*
|
|
471
|
+
* @param {SafeDB} db — connected SafeDB instance
|
|
472
|
+
*/
|
|
473
|
+
function idempotencyAfterHook(db) {
|
|
474
|
+
return (req, reply, payload, done) => {
|
|
475
|
+
if (req._idempotencyKey && db && db._writer) {
|
|
476
|
+
try {
|
|
477
|
+
const statusCode = reply.statusCode || 200;
|
|
478
|
+
const result = JSON.stringify({ statusCode, body: typeof payload === "string" ? JSON.parse(payload) : payload });
|
|
479
|
+
db._writer.prepare(
|
|
480
|
+
"INSERT OR IGNORE INTO _idempotency (key, result, expires_at) VALUES (?, ?, strftime('%s','now') + ?)"
|
|
481
|
+
).run(req._idempotencyKey, result, req._idempotencyTtl || 86400);
|
|
482
|
+
} catch { /* non-fatal */ }
|
|
483
|
+
}
|
|
484
|
+
done();
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
|
|
330
488
|
// ── Skill Metadata (for SkillRegistry discovery) ──
|
|
331
489
|
|
|
332
490
|
const SKILL_NAME = "sql";
|
|
333
|
-
const SKILL_DESCRIPTION = "SQL database interface with injection prevention. Provides sqlGuard() middleware to block SQL injection
|
|
334
|
-
const SKILL_KEYWORDS = ["sql", "database", "db", "query", "injection", "sqlite", "postgres", "mysql", "select", "insert", "update", "delete", "table", "schema", "migration", "parameterized"];
|
|
335
|
-
const SKILL_USAGE = `// Protect
|
|
336
|
-
const { sqlGuard } = require("../src/skills/sql");
|
|
337
|
-
app.use(sqlGuard({ logger: wolverineLogger }));
|
|
338
|
-
|
|
339
|
-
// Cluster-safe database (each worker gets its own connection)
|
|
340
|
-
const { SafeDB } = require("../src/skills/sql");
|
|
491
|
+
const SKILL_DESCRIPTION = "SQL database interface with injection prevention + idempotency. Provides sqlGuard() middleware to block SQL injection, idempotencyGuard() middleware to prevent double-fire in cluster mode, and SafeDB class for parameterized-only database queries with built-in idempotency key support.";
|
|
492
|
+
const SKILL_KEYWORDS = ["sql", "database", "db", "query", "injection", "sqlite", "postgres", "mysql", "select", "insert", "update", "delete", "table", "schema", "migration", "parameterized", "idempotent", "idempotency", "duplicate", "double", "cluster", "transaction"];
|
|
493
|
+
const SKILL_USAGE = `// Protect routes from SQL injection + double-fire
|
|
494
|
+
const { sqlGuard, idempotencyGuard, idempotencyAfterHook, SafeDB } = require("../src/skills/sql");
|
|
341
495
|
const db = new SafeDB({ type: "sqlite", path: "./server/data.db" });
|
|
342
|
-
await db.connect();
|
|
496
|
+
await db.connect();
|
|
497
|
+
|
|
498
|
+
// Middleware: injection prevention + idempotency (cluster-safe)
|
|
499
|
+
fastify.addHook("preHandler", sqlGuard({ logger }));
|
|
500
|
+
fastify.addHook("preHandler", idempotencyGuard({ db, logger }));
|
|
501
|
+
fastify.addHook("onSend", idempotencyAfterHook(db));
|
|
343
502
|
|
|
344
|
-
// Reads (concurrent across workers)
|
|
503
|
+
// Reads (concurrent across workers — never waits)
|
|
345
504
|
const users = db.all("SELECT * FROM users WHERE role = ?", ["admin"]);
|
|
346
505
|
|
|
347
|
-
// Writes (serialized — no corruption)
|
|
506
|
+
// Writes (serialized FIFO queue — no corruption)
|
|
348
507
|
db.run("INSERT INTO users (name, role) VALUES (?, ?)", ["Alice", "admin"]);
|
|
349
508
|
|
|
350
|
-
//
|
|
509
|
+
// Idempotent write — prevents double-charge/double-insert in cluster mode
|
|
510
|
+
const result = await db.idempotent("order-abc-123", (tx) => {
|
|
511
|
+
tx.run("INSERT INTO orders (id, total) VALUES (?, ?)", ["abc-123", 99.99]);
|
|
512
|
+
return { orderId: "abc-123" };
|
|
513
|
+
}); // result.executed=true first time, false on retry
|
|
514
|
+
|
|
515
|
+
// Atomic transaction (all-or-nothing)
|
|
351
516
|
db.transaction((tx) => {
|
|
352
517
|
tx.run("INSERT INTO orders (user_id, total) VALUES (?, ?)", [1, 99.99]);
|
|
353
518
|
tx.run("UPDATE users SET order_count = order_count + 1 WHERE id = ?", [1]);
|
|
@@ -364,6 +529,8 @@ module.exports = {
|
|
|
364
529
|
|
|
365
530
|
// Middleware
|
|
366
531
|
sqlGuard,
|
|
532
|
+
idempotencyGuard,
|
|
533
|
+
idempotencyAfterHook,
|
|
367
534
|
scanForInjection,
|
|
368
535
|
deepScan,
|
|
369
536
|
|
package/PLATFORM.md
DELETED
|
@@ -1,450 +0,0 @@
|
|
|
1
|
-
# Wolverine Platform — Multi-Server Analytics & Management
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
The Wolverine Platform aggregates data from hundreds/thousands of wolverine server instances into a single backend + frontend dashboard. Each wolverine instance runs independently and broadcasts lightweight telemetry to the platform.
|
|
6
|
-
|
|
7
|
-
```
|
|
8
|
-
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
9
|
-
│ Wolverine #1 │ │ Wolverine #2 │ │ Wolverine #3 │ ... (N instances)
|
|
10
|
-
│ server:3000 │ │ server:4000 │ │ server:5000 │
|
|
11
|
-
│ dash:3001 │ │ dash:4001 │ │ dash:5001 │
|
|
12
|
-
└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
|
|
13
|
-
│ │ │
|
|
14
|
-
│ heartbeat │ heartbeat │ heartbeat
|
|
15
|
-
│ (every 60s) │ (every 60s) │ (every 60s)
|
|
16
|
-
▼ ▼ ▼
|
|
17
|
-
┌─────────────────────────────────────────────────┐
|
|
18
|
-
│ Wolverine Platform Backend │
|
|
19
|
-
│ │
|
|
20
|
-
│ POST /api/v1/heartbeat ← receive telemetry │
|
|
21
|
-
│ GET /api/v1/servers ← list all instances │
|
|
22
|
-
│ GET /api/v1/servers/:id ← single instance │
|
|
23
|
-
│ GET /api/v1/analytics ← aggregated stats │
|
|
24
|
-
│ GET /api/v1/alerts ← active alerts │
|
|
25
|
-
│ WS /ws/live ← real-time stream │
|
|
26
|
-
│ │
|
|
27
|
-
│ Database: PostgreSQL (time-series optimized) │
|
|
28
|
-
│ Cache: Redis (live state, pub/sub) │
|
|
29
|
-
│ Queue: Bull/BullMQ (alert processing) │
|
|
30
|
-
└─────────────────────────────────────────────────┘
|
|
31
|
-
│
|
|
32
|
-
▼
|
|
33
|
-
┌─────────────────────────────────────────────────┐
|
|
34
|
-
│ Wolverine Platform Frontend │
|
|
35
|
-
│ │
|
|
36
|
-
│ Fleet overview — all servers at a glance │
|
|
37
|
-
│ Per-server deep dive — events, repairs, usage │
|
|
38
|
-
│ Cost analytics — tokens, USD, by model │
|
|
39
|
-
│ Alert management — acknowledge, escalate │
|
|
40
|
-
│ Uptime history — SLA tracking over time │
|
|
41
|
-
└─────────────────────────────────────────────────┘
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
---
|
|
45
|
-
|
|
46
|
-
## Telemetry Protocol
|
|
47
|
-
|
|
48
|
-
### Heartbeat Payload
|
|
49
|
-
|
|
50
|
-
Each wolverine instance sends a heartbeat every **60 seconds** (configurable). This is the only outbound traffic — minimal network impact.
|
|
51
|
-
|
|
52
|
-
```json
|
|
53
|
-
POST /api/v1/heartbeat
|
|
54
|
-
Authorization: Bearer <PLATFORM_API_KEY>
|
|
55
|
-
Content-Type: application/json
|
|
56
|
-
|
|
57
|
-
{
|
|
58
|
-
"instanceId": "wlv_a1b2c3d4",
|
|
59
|
-
"version": "0.1.0",
|
|
60
|
-
"timestamp": 1775073247574,
|
|
61
|
-
|
|
62
|
-
"server": {
|
|
63
|
-
"name": "my-api",
|
|
64
|
-
"port": 3000,
|
|
65
|
-
"uptime": 86400,
|
|
66
|
-
"status": "healthy",
|
|
67
|
-
"pid": 12345
|
|
68
|
-
},
|
|
69
|
-
|
|
70
|
-
"process": {
|
|
71
|
-
"memoryMB": 128,
|
|
72
|
-
"cpuPercent": 12,
|
|
73
|
-
"peakMemoryMB": 256
|
|
74
|
-
},
|
|
75
|
-
|
|
76
|
-
"routes": {
|
|
77
|
-
"total": 8,
|
|
78
|
-
"healthy": 8,
|
|
79
|
-
"unhealthy": 0,
|
|
80
|
-
"slowest": { "path": "/api/search", "avgMs": 450 }
|
|
81
|
-
},
|
|
82
|
-
|
|
83
|
-
"repairs": {
|
|
84
|
-
"total": 3,
|
|
85
|
-
"successes": 2,
|
|
86
|
-
"failures": 1,
|
|
87
|
-
"lastRepair": {
|
|
88
|
-
"error": "TypeError: Cannot read property 'id' of undefined",
|
|
89
|
-
"resolution": "Added null check before accessing user.id",
|
|
90
|
-
"tokens": 1820,
|
|
91
|
-
"cost": 0.0045,
|
|
92
|
-
"mode": "fast",
|
|
93
|
-
"timestamp": 1775073200000
|
|
94
|
-
}
|
|
95
|
-
},
|
|
96
|
-
|
|
97
|
-
"usage": {
|
|
98
|
-
"totalTokens": 45000,
|
|
99
|
-
"totalCost": 0.12,
|
|
100
|
-
"totalCalls": 85,
|
|
101
|
-
"byCategory": {
|
|
102
|
-
"heal": { "tokens": 12000, "cost": 0.04, "calls": 5 },
|
|
103
|
-
"chat": { "tokens": 25000, "cost": 0.05, "calls": 60 },
|
|
104
|
-
"classify": { "tokens": 3000, "cost": 0.001, "calls": 15 },
|
|
105
|
-
"develop": { "tokens": 5000, "cost": 0.03, "calls": 5 }
|
|
106
|
-
},
|
|
107
|
-
"byModel": {
|
|
108
|
-
"gpt-5.4-mini": { "tokens": 30000, "cost": 0.06, "calls": 40 },
|
|
109
|
-
"gpt-4o-mini": { "tokens": 15000, "cost": 0.02, "calls": 45 }
|
|
110
|
-
},
|
|
111
|
-
"byTool": {
|
|
112
|
-
"call_endpoint": { "tokens": 5000, "cost": 0.01, "calls": 20 },
|
|
113
|
-
"search_brain": { "tokens": 2000, "cost": 0.005, "calls": 10 }
|
|
114
|
-
}
|
|
115
|
-
},
|
|
116
|
-
|
|
117
|
-
"brain": {
|
|
118
|
-
"totalMemories": 45,
|
|
119
|
-
"namespaces": { "docs": 23, "functions": 12, "errors": 5, "fixes": 3, "learnings": 2 }
|
|
120
|
-
},
|
|
121
|
-
|
|
122
|
-
"backups": {
|
|
123
|
-
"total": 8,
|
|
124
|
-
"stable": 3,
|
|
125
|
-
"verified": 2,
|
|
126
|
-
"unstable": 3
|
|
127
|
-
},
|
|
128
|
-
|
|
129
|
-
"alerts": [
|
|
130
|
-
{
|
|
131
|
-
"type": "memory_leak",
|
|
132
|
-
"message": "Memory growing: +50MB over 10 samples",
|
|
133
|
-
"severity": "warn",
|
|
134
|
-
"timestamp": 1775073100000
|
|
135
|
-
}
|
|
136
|
-
]
|
|
137
|
-
}
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
### Design Principles
|
|
141
|
-
|
|
142
|
-
- **Infrequent**: 1 heartbeat per 60 seconds = 1440/day per instance
|
|
143
|
-
- **Small**: ~2KB per payload, gzipped < 500 bytes
|
|
144
|
-
- **Idempotent**: same heartbeat can be sent twice safely (upsert by instanceId + timestamp)
|
|
145
|
-
- **Offline-resilient**: if platform is down, wolverine queues heartbeats and replays on reconnect
|
|
146
|
-
- **No PII**: never send secrets, user data, or source code in heartbeats
|
|
147
|
-
|
|
148
|
-
---
|
|
149
|
-
|
|
150
|
-
## Platform Backend Architecture
|
|
151
|
-
|
|
152
|
-
### Database Schema (PostgreSQL)
|
|
153
|
-
|
|
154
|
-
```sql
|
|
155
|
-
-- Servers — one row per wolverine instance
|
|
156
|
-
CREATE TABLE servers (
|
|
157
|
-
id TEXT PRIMARY KEY, -- "wlv_a1b2c3d4"
|
|
158
|
-
name TEXT NOT NULL,
|
|
159
|
-
version TEXT,
|
|
160
|
-
first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
161
|
-
last_heartbeat TIMESTAMPTZ NOT NULL,
|
|
162
|
-
status TEXT NOT NULL DEFAULT 'unknown', -- healthy, degraded, down, unknown
|
|
163
|
-
config JSONB -- port, models, etc.
|
|
164
|
-
);
|
|
165
|
-
|
|
166
|
-
-- Time-series heartbeats — partitioned by day for scale
|
|
167
|
-
CREATE TABLE heartbeats (
|
|
168
|
-
id BIGSERIAL,
|
|
169
|
-
server_id TEXT NOT NULL REFERENCES servers(id),
|
|
170
|
-
timestamp TIMESTAMPTZ NOT NULL,
|
|
171
|
-
uptime INTEGER,
|
|
172
|
-
memory_mb INTEGER,
|
|
173
|
-
cpu_percent INTEGER,
|
|
174
|
-
routes_total INTEGER,
|
|
175
|
-
routes_healthy INTEGER,
|
|
176
|
-
routes_unhealthy INTEGER,
|
|
177
|
-
tokens_total INTEGER,
|
|
178
|
-
cost_total NUMERIC(10,6),
|
|
179
|
-
repairs_total INTEGER,
|
|
180
|
-
repairs_successes INTEGER,
|
|
181
|
-
payload JSONB -- full heartbeat for deep queries
|
|
182
|
-
) PARTITION BY RANGE (timestamp);
|
|
183
|
-
|
|
184
|
-
-- Create daily partitions automatically (pg_partman or manual)
|
|
185
|
-
-- This allows dropping old data by partition instead of DELETE
|
|
186
|
-
|
|
187
|
-
-- Repairs — detailed log of every fix
|
|
188
|
-
CREATE TABLE repairs (
|
|
189
|
-
id BIGSERIAL PRIMARY KEY,
|
|
190
|
-
server_id TEXT NOT NULL REFERENCES servers(id),
|
|
191
|
-
timestamp TIMESTAMPTZ NOT NULL,
|
|
192
|
-
error TEXT,
|
|
193
|
-
resolution TEXT,
|
|
194
|
-
success BOOLEAN,
|
|
195
|
-
mode TEXT, -- fast, agent, sub-agents
|
|
196
|
-
model TEXT,
|
|
197
|
-
tokens INTEGER,
|
|
198
|
-
cost NUMERIC(10,6),
|
|
199
|
-
iteration INTEGER,
|
|
200
|
-
duration_ms INTEGER
|
|
201
|
-
);
|
|
202
|
-
|
|
203
|
-
-- Alerts — active and historical
|
|
204
|
-
CREATE TABLE alerts (
|
|
205
|
-
id BIGSERIAL PRIMARY KEY,
|
|
206
|
-
server_id TEXT NOT NULL REFERENCES servers(id),
|
|
207
|
-
type TEXT NOT NULL, -- memory_leak, route_down, crash_loop, etc.
|
|
208
|
-
message TEXT,
|
|
209
|
-
severity TEXT, -- info, warn, error, critical
|
|
210
|
-
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
211
|
-
acknowledged_at TIMESTAMPTZ,
|
|
212
|
-
resolved_at TIMESTAMPTZ,
|
|
213
|
-
acknowledged_by TEXT
|
|
214
|
-
);
|
|
215
|
-
|
|
216
|
-
-- Usage aggregates — hourly rollups for fast analytics
|
|
217
|
-
CREATE TABLE usage_hourly (
|
|
218
|
-
server_id TEXT NOT NULL REFERENCES servers(id),
|
|
219
|
-
hour TIMESTAMPTZ NOT NULL,
|
|
220
|
-
tokens_total INTEGER DEFAULT 0,
|
|
221
|
-
cost_total NUMERIC(10,6) DEFAULT 0,
|
|
222
|
-
calls_total INTEGER DEFAULT 0,
|
|
223
|
-
tokens_by_category JSONB,
|
|
224
|
-
PRIMARY KEY (server_id, hour)
|
|
225
|
-
);
|
|
226
|
-
|
|
227
|
-
-- Indexes for common queries
|
|
228
|
-
CREATE INDEX idx_heartbeats_server_time ON heartbeats (server_id, timestamp DESC);
|
|
229
|
-
CREATE INDEX idx_repairs_server_time ON repairs (server_id, timestamp DESC);
|
|
230
|
-
CREATE INDEX idx_alerts_active ON alerts (server_id) WHERE resolved_at IS NULL;
|
|
231
|
-
CREATE INDEX idx_servers_status ON servers (status);
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
### API Endpoints
|
|
235
|
-
|
|
236
|
-
```
|
|
237
|
-
Authentication: Bearer token (PLATFORM_API_KEY)
|
|
238
|
-
|
|
239
|
-
POST /api/v1/heartbeat ← Receive heartbeat from wolverine instance
|
|
240
|
-
→ Upsert server, insert heartbeat, process alerts
|
|
241
|
-
→ Returns: { received: true, serverTime: "..." }
|
|
242
|
-
|
|
243
|
-
GET /api/v1/servers ← List all instances
|
|
244
|
-
→ Query: ?status=healthy&sort=last_heartbeat&limit=50&offset=0
|
|
245
|
-
→ Returns: { servers: [...], total: 150, page: 1 }
|
|
246
|
-
|
|
247
|
-
GET /api/v1/servers/:id ← Single instance detail
|
|
248
|
-
→ Returns: full server state + recent heartbeats + repairs + alerts
|
|
249
|
-
|
|
250
|
-
GET /api/v1/servers/:id/heartbeats ← Heartbeat history
|
|
251
|
-
→ Query: ?from=2026-04-01&to=2026-04-02&interval=5m
|
|
252
|
-
→ Returns: time-series data for charting
|
|
253
|
-
|
|
254
|
-
GET /api/v1/servers/:id/repairs ← Repair history for one server
|
|
255
|
-
→ Query: ?limit=50&success=true
|
|
256
|
-
→ Returns: { repairs: [...], stats: { total, successes, avgTokens } }
|
|
257
|
-
|
|
258
|
-
GET /api/v1/analytics ← Fleet-wide aggregates
|
|
259
|
-
→ Query: ?period=24h or ?from=...&to=...
|
|
260
|
-
→ Returns: {
|
|
261
|
-
totalServers, activeServers, totalRepairs, successRate,
|
|
262
|
-
totalTokens, totalCost, tokensByCategory, costByModel,
|
|
263
|
-
uptimePercent, avgResponseTime
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
GET /api/v1/analytics/cost ← Cost breakdown
|
|
267
|
-
→ Query: ?period=7d&groupBy=server|model|category
|
|
268
|
-
→ Returns: cost time-series + breakdown
|
|
269
|
-
|
|
270
|
-
GET /api/v1/alerts ← Active alerts across fleet
|
|
271
|
-
→ Query: ?severity=critical&acknowledged=false
|
|
272
|
-
→ Returns: { alerts: [...], total: 5 }
|
|
273
|
-
|
|
274
|
-
PATCH /api/v1/alerts/:id ← Acknowledge/resolve alert
|
|
275
|
-
→ Body: { action: "acknowledge" | "resolve", by: "admin@..." }
|
|
276
|
-
|
|
277
|
-
WS /ws/live ← Real-time WebSocket stream
|
|
278
|
-
→ Streams: heartbeats, alerts, repairs as they arrive
|
|
279
|
-
→ Subscribe: { subscribe: ["heartbeat", "alert", "repair"] }
|
|
280
|
-
→ Filter: { servers: ["wlv_a1b2c3d4"] }
|
|
281
|
-
```
|
|
282
|
-
|
|
283
|
-
### Scaling Strategy
|
|
284
|
-
|
|
285
|
-
```
|
|
286
|
-
10 servers: Single PostgreSQL, single Node.js backend
|
|
287
|
-
100 servers: PostgreSQL with connection pooling (pgBouncer), Redis cache
|
|
288
|
-
1,000 servers: Partitioned heartbeats table, read replicas, queue workers
|
|
289
|
-
10,000 servers: TimescaleDB for time-series, horizontal API scaling, Kafka for ingestion
|
|
290
|
-
100,000+: Sharded by server_id, dedicated ingestion pipeline, ClickHouse for analytics
|
|
291
|
-
```
|
|
292
|
-
|
|
293
|
-
**Key scaling decisions:**
|
|
294
|
-
- Heartbeats are **append-only** — no updates, only inserts → perfect for time-series DBs
|
|
295
|
-
- Hourly rollups in `usage_hourly` prevent expensive full-table scans for analytics
|
|
296
|
-
- Partitioned by day → drop old data by partition (instant, no vacuum)
|
|
297
|
-
- Redis caches the "current state" of each server (latest heartbeat) → fast fleet overview
|
|
298
|
-
- WebSocket uses Redis pub/sub → horizontal scaling of frontend connections
|
|
299
|
-
- Alert processing is async via job queue → doesn't block heartbeat ingestion
|
|
300
|
-
|
|
301
|
-
### Redis Structure
|
|
302
|
-
|
|
303
|
-
```
|
|
304
|
-
wolverine:server:{id}:state ← Latest heartbeat (JSON, TTL 5min)
|
|
305
|
-
wolverine:server:{id}:uptime ← Uptime counter (INCR every heartbeat)
|
|
306
|
-
wolverine:servers:active ← Sorted set (score = last_heartbeat timestamp)
|
|
307
|
-
wolverine:alerts:active ← Set of active alert IDs
|
|
308
|
-
wolverine:stats:fleet ← Cached fleet-wide aggregates (TTL 30s)
|
|
309
|
-
wolverine:pubsub:heartbeats ← Pub/sub channel for real-time streaming
|
|
310
|
-
wolverine:pubsub:alerts ← Pub/sub channel for alert notifications
|
|
311
|
-
```
|
|
312
|
-
|
|
313
|
-
---
|
|
314
|
-
|
|
315
|
-
## Platform Frontend
|
|
316
|
-
|
|
317
|
-
### Pages
|
|
318
|
-
|
|
319
|
-
**1. Fleet Overview**
|
|
320
|
-
- Grid/list of all server instances
|
|
321
|
-
- Color-coded status: green (healthy), yellow (degraded), red (down), gray (unknown)
|
|
322
|
-
- Sortable by: status, uptime, memory, cost, last repair
|
|
323
|
-
- Search/filter by name, status, tags
|
|
324
|
-
- Fleet-wide stats bar: total servers, active, repairs today, cost today
|
|
325
|
-
|
|
326
|
-
**2. Server Detail**
|
|
327
|
-
- Real-time stats: memory, CPU, uptime, routes
|
|
328
|
-
- Event timeline (same as local dashboard but from platform data)
|
|
329
|
-
- Repair history with resolution details + token cost
|
|
330
|
-
- Usage chart: tokens over time, cost over time
|
|
331
|
-
- Route health table with response time trends
|
|
332
|
-
- Backup status
|
|
333
|
-
- Brain stats
|
|
334
|
-
|
|
335
|
-
**3. Analytics**
|
|
336
|
-
- Fleet-wide token usage over time (by day/hour)
|
|
337
|
-
- Cost breakdown: by server, by model, by category
|
|
338
|
-
- Repair success rate over time
|
|
339
|
-
- Mean time to repair (MTTR) trend
|
|
340
|
-
- Most expensive servers / most repaired servers
|
|
341
|
-
- Uptime SLA tracking (99.9% target)
|
|
342
|
-
- Response time percentiles across fleet
|
|
343
|
-
|
|
344
|
-
**4. Alerts**
|
|
345
|
-
- Active alerts sorted by severity
|
|
346
|
-
- Acknowledge / resolve workflow
|
|
347
|
-
- Alert history with resolution notes
|
|
348
|
-
- Alert rules configuration (memory threshold, crash count, response time)
|
|
349
|
-
|
|
350
|
-
**5. Cost Management**
|
|
351
|
-
- Total spend by period (day/week/month)
|
|
352
|
-
- Per-server cost ranking
|
|
353
|
-
- Per-model cost ranking
|
|
354
|
-
- Projected monthly cost based on current usage
|
|
355
|
-
- Budget alerts (notify when approaching limit)
|
|
356
|
-
|
|
357
|
-
### Tech Stack Recommendation
|
|
358
|
-
|
|
359
|
-
```
|
|
360
|
-
Frontend: Next.js + Tailwind + Recharts (or Tremor for dashboard components)
|
|
361
|
-
Backend: Node.js + Express + PostgreSQL + Redis + BullMQ
|
|
362
|
-
Auth: NextAuth.js or Clerk (team management)
|
|
363
|
-
Hosting: Vercel (frontend) + Railway/Fly.io (backend) + Supabase (PostgreSQL)
|
|
364
|
-
WebSocket: Socket.io or native WS through the backend
|
|
365
|
-
```
|
|
366
|
-
|
|
367
|
-
---
|
|
368
|
-
|
|
369
|
-
## Wolverine Client Integration
|
|
370
|
-
|
|
371
|
-
### New env variables for the wolverine instance:
|
|
372
|
-
|
|
373
|
-
```env
|
|
374
|
-
# Platform telemetry (optional — wolverine works fine without it)
|
|
375
|
-
WOLVERINE_PLATFORM_URL=https://api.wolverine.dev
|
|
376
|
-
WOLVERINE_PLATFORM_KEY=wlvk_your_api_key_here
|
|
377
|
-
WOLVERINE_INSTANCE_NAME=my-api-prod
|
|
378
|
-
WOLVERINE_HEARTBEAT_INTERVAL_MS=60000
|
|
379
|
-
```
|
|
380
|
-
|
|
381
|
-
### Telemetry module to build in wolverine:
|
|
382
|
-
|
|
383
|
-
```
|
|
384
|
-
src/platform/
|
|
385
|
-
├── telemetry.js ← Collects heartbeat data from all subsystems
|
|
386
|
-
├── heartbeat.js ← Sends heartbeat to platform on interval
|
|
387
|
-
└── queue.js ← Queues heartbeats when platform is unreachable
|
|
388
|
-
```
|
|
389
|
-
|
|
390
|
-
**telemetry.js** gathers data from:
|
|
391
|
-
- `processMonitor.getMetrics()` → memory, CPU
|
|
392
|
-
- `routeProber.getMetrics()` → route health
|
|
393
|
-
- `tokenTracker.getAnalytics()` → usage
|
|
394
|
-
- `repairHistory.getStats()` → repairs
|
|
395
|
-
- `backupManager.getStats()` → backups
|
|
396
|
-
- `brain.getStats()` → brain
|
|
397
|
-
- `notifier` → active alerts
|
|
398
|
-
|
|
399
|
-
**heartbeat.js** sends it:
|
|
400
|
-
- HTTP POST to platform every 60s
|
|
401
|
-
- Gzip compressed
|
|
402
|
-
- Timeout: 5s (don't block if platform is slow)
|
|
403
|
-
- On failure: queue locally, retry with exponential backoff
|
|
404
|
-
- On reconnect: replay queued heartbeats
|
|
405
|
-
|
|
406
|
-
**queue.js** handles offline resilience:
|
|
407
|
-
- Append to `.wolverine/heartbeat-queue.jsonl` when platform unreachable
|
|
408
|
-
- On next successful heartbeat, drain the queue (oldest first)
|
|
409
|
-
- Max queue size: 1440 entries (24 hours of heartbeats)
|
|
410
|
-
- After 24h, drop oldest entries (stale data isn't useful)
|
|
411
|
-
|
|
412
|
-
---
|
|
413
|
-
|
|
414
|
-
## Security Considerations
|
|
415
|
-
|
|
416
|
-
- **Platform API key** per instance — revokable, rotatable
|
|
417
|
-
- **Secret redactor** runs on heartbeat payload before sending (no env values leak)
|
|
418
|
-
- **No source code** in heartbeats — only metrics, error messages (redacted), and stats
|
|
419
|
-
- **TLS only** — platform endpoint must be HTTPS
|
|
420
|
-
- **Rate limiting** on platform ingestion — max 1 heartbeat/second per instance
|
|
421
|
-
- **Tenant isolation** — multi-tenant platform must scope data by organization
|
|
422
|
-
- **Audit log** — track who acknowledged/resolved alerts
|
|
423
|
-
|
|
424
|
-
---
|
|
425
|
-
|
|
426
|
-
## Implementation Priority
|
|
427
|
-
|
|
428
|
-
### Phase 1: Core (1-2 weeks)
|
|
429
|
-
1. Platform backend: heartbeat ingestion + server listing + basic API
|
|
430
|
-
2. Wolverine telemetry module: collect + send heartbeats
|
|
431
|
-
3. Frontend: fleet overview + server detail page
|
|
432
|
-
4. PostgreSQL schema + Redis caching
|
|
433
|
-
|
|
434
|
-
### Phase 2: Analytics (1 week)
|
|
435
|
-
1. Hourly usage rollups
|
|
436
|
-
2. Cost analytics page
|
|
437
|
-
3. Repair history aggregation
|
|
438
|
-
4. Uptime tracking
|
|
439
|
-
|
|
440
|
-
### Phase 3: Alerting (1 week)
|
|
441
|
-
1. Alert rules engine
|
|
442
|
-
2. Acknowledge/resolve workflow
|
|
443
|
-
3. Email/Slack/webhook notifications
|
|
444
|
-
4. Alert history
|
|
445
|
-
|
|
446
|
-
### Phase 4: Scale (ongoing)
|
|
447
|
-
1. TimescaleDB migration for heartbeats
|
|
448
|
-
2. Horizontal API scaling
|
|
449
|
-
3. WebSocket real-time streaming
|
|
450
|
-
4. Team management + RBAC
|
package/SERVER_BEST_PRACTICES.md
DELETED
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# Wolverine Server Best Practices
|
|
2
|
-
|
|
3
|
-
Rules for building secure, scalable, well-structured servers. Wolverine's agent follows these when building or editing server code.
|
|
4
|
-
|
|
5
|
-
## Structure
|
|
6
|
-
|
|
7
|
-
```
|
|
8
|
-
server/
|
|
9
|
-
├── index.js Entry point — app setup, middleware, route mounting, listen
|
|
10
|
-
├── routes/ Route modules — one file per resource
|
|
11
|
-
│ ├── health.js Health check endpoint (always required)
|
|
12
|
-
│ └── api.js API routes
|
|
13
|
-
├── middleware/ Custom middleware (auth, validation, logging)
|
|
14
|
-
├── models/ Data models / database schemas
|
|
15
|
-
├── services/ Business logic (keep routes thin)
|
|
16
|
-
├── config/ Configuration files
|
|
17
|
-
└── utils/ Shared utilities
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Rules
|
|
21
|
-
|
|
22
|
-
### Ports
|
|
23
|
-
- **Development**: use port 3000 (standard, no admin required, firewall-friendly)
|
|
24
|
-
- **Production**: use port 443 (HTTPS) or 80 (HTTP) behind a reverse proxy (nginx/caddy)
|
|
25
|
-
- **Never** use random high ports in production — they bypass firewalls and confuse load balancers
|
|
26
|
-
- **Always** use HTTPS in production — terminate TLS at the reverse proxy, not in Node
|
|
27
|
-
- Dashboard runs on port+1 automatically (3001 in dev, not exposed in prod)
|
|
28
|
-
|
|
29
|
-
### Security
|
|
30
|
-
- Never expose secrets in responses — use env vars, never hardcode
|
|
31
|
-
- Validate ALL input — Fastify has built-in JSON schema validation
|
|
32
|
-
- Use HTTPS in production — reverse proxy (nginx/caddy) handles TLS
|
|
33
|
-
- Rate limit public endpoints
|
|
34
|
-
- Sanitize user input before database queries — use the SQL skill
|
|
35
|
-
- Never return stack traces in production error responses
|
|
36
|
-
- Use the sqlGuard() middleware on all routes that accept user input
|
|
37
|
-
|
|
38
|
-
### Scalability
|
|
39
|
-
- Keep routes thin — business logic goes in services/
|
|
40
|
-
- Use async/await, never block the event loop
|
|
41
|
-
- Add a /health endpoint that returns status + uptime + memory
|
|
42
|
-
- Use environment variables for all configuration
|
|
43
|
-
- Structure for horizontal scaling — no in-memory session state
|
|
44
|
-
|
|
45
|
-
### Error Handling
|
|
46
|
-
- Always have a global error handler middleware
|
|
47
|
-
- Log errors with context (timestamp, request path, user)
|
|
48
|
-
- Return consistent error response format: { error: "message" }
|
|
49
|
-
- Never swallow errors silently
|
|
50
|
-
- Use try/catch in async route handlers
|
|
51
|
-
|
|
52
|
-
### Code Quality
|
|
53
|
-
- One route file per resource (users.js, orders.js, etc.)
|
|
54
|
-
- Export express.Router() from each route file
|
|
55
|
-
- Mount routes in index.js with clear prefixes
|
|
56
|
-
- Use middleware for cross-cutting concerns (auth, logging)
|
|
57
|
-
- Keep index.js under 50 lines — it's just wiring
|
|
58
|
-
|
|
59
|
-
### Database
|
|
60
|
-
- Use connection pooling
|
|
61
|
-
- Handle connection errors gracefully
|
|
62
|
-
- Use migrations for schema changes
|
|
63
|
-
- Never use string concatenation for queries — use parameterized queries
|
|
64
|
-
- Close connections on process exit
|
|
65
|
-
|
|
66
|
-
### Monitoring
|
|
67
|
-
- /health endpoint is mandatory
|
|
68
|
-
- Log request duration for slow endpoint detection
|
|
69
|
-
- Use structured logging (JSON format)
|
|
70
|
-
- Track error rates per endpoint
|
package/TELEMETRY.md
DELETED
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
# Wolverine Telemetry
|
|
2
|
-
|
|
3
|
-
Connect your Wolverine instance to a platform backend for fleet-wide monitoring, uptime tracking, and cost analytics.
|
|
4
|
-
|
|
5
|
-
## Setup
|
|
6
|
-
|
|
7
|
-
### 1. Deploy your platform backend
|
|
8
|
-
|
|
9
|
-
See [PLATFORM.md](PLATFORM.md) for the full backend spec — database schema, API endpoints, scaling strategy.
|
|
10
|
-
|
|
11
|
-
Your backend needs to implement:
|
|
12
|
-
- `POST /api/v1/heartbeat` — receive heartbeat payloads
|
|
13
|
-
- `GET /api/v1/servers` — list connected instances
|
|
14
|
-
- Standard Bearer token auth
|
|
15
|
-
|
|
16
|
-
### 2. Configure your Wolverine instance
|
|
17
|
-
|
|
18
|
-
Add to `.env.local`:
|
|
19
|
-
|
|
20
|
-
```env
|
|
21
|
-
WOLVERINE_PLATFORM_URL=https://your-platform.com
|
|
22
|
-
WOLVERINE_PLATFORM_KEY=your_api_key_here
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
That's it. Wolverine starts sending heartbeats every 60 seconds.
|
|
26
|
-
|
|
27
|
-
### Optional settings
|
|
28
|
-
|
|
29
|
-
```env
|
|
30
|
-
# Human-readable name (defaults to folder name)
|
|
31
|
-
WOLVERINE_INSTANCE_NAME=my-api-prod
|
|
32
|
-
|
|
33
|
-
# Heartbeat interval in ms (default: 60000 = 1 minute)
|
|
34
|
-
WOLVERINE_HEARTBEAT_INTERVAL_MS=60000
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
### 3. Verify
|
|
38
|
-
|
|
39
|
-
On startup you'll see:
|
|
40
|
-
|
|
41
|
-
```
|
|
42
|
-
📡 Platform: https://your-platform.com (every 60s)
|
|
43
|
-
📡 Instance: wlv_a8f3e9b1c4d7
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
If the platform is unreachable, heartbeats queue locally in `.wolverine/heartbeat-queue.jsonl` and drain automatically when connectivity returns.
|
|
47
|
-
|
|
48
|
-
---
|
|
49
|
-
|
|
50
|
-
## Heartbeat Payload
|
|
51
|
-
|
|
52
|
-
Each heartbeat is ~2KB JSON, sent every 60 seconds:
|
|
53
|
-
|
|
54
|
-
```json
|
|
55
|
-
{
|
|
56
|
-
"instanceId": "wlv_a8f3e9b1c4d7",
|
|
57
|
-
"version": "0.1.0",
|
|
58
|
-
"timestamp": 1775073247574,
|
|
59
|
-
"server": {
|
|
60
|
-
"name": "my-api",
|
|
61
|
-
"port": 3000,
|
|
62
|
-
"uptime": 86400,
|
|
63
|
-
"status": "healthy",
|
|
64
|
-
"pid": 12345
|
|
65
|
-
},
|
|
66
|
-
"process": {
|
|
67
|
-
"memoryMB": 128,
|
|
68
|
-
"cpuPercent": 12,
|
|
69
|
-
"peakMemoryMB": 256
|
|
70
|
-
},
|
|
71
|
-
"routes": {
|
|
72
|
-
"total": 8,
|
|
73
|
-
"healthy": 8,
|
|
74
|
-
"unhealthy": 0
|
|
75
|
-
},
|
|
76
|
-
"repairs": {
|
|
77
|
-
"total": 3,
|
|
78
|
-
"successes": 2,
|
|
79
|
-
"failures": 1,
|
|
80
|
-
"lastRepair": { "error": "...", "resolution": "...", "tokens": 1820, "cost": 0.0045 }
|
|
81
|
-
},
|
|
82
|
-
"usage": {
|
|
83
|
-
"totalTokens": 45000,
|
|
84
|
-
"totalCost": 0.12,
|
|
85
|
-
"totalCalls": 85,
|
|
86
|
-
"byCategory": { "heal": {...}, "chat": {...}, "develop": {...} }
|
|
87
|
-
},
|
|
88
|
-
"brain": { "totalMemories": 45 },
|
|
89
|
-
"backups": { "total": 8, "stable": 3 }
|
|
90
|
-
}
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
## Design
|
|
94
|
-
|
|
95
|
-
- **Opt-in**: disabled unless `WOLVERINE_PLATFORM_URL` and `WOLVERINE_PLATFORM_KEY` are set
|
|
96
|
-
- **Lightweight**: 1 request per 60s, ~2KB payload
|
|
97
|
-
- **Offline-resilient**: queues locally when platform is down, replays on reconnect (max 24h / 1440 entries)
|
|
98
|
-
- **Secure**: secrets redacted before sending, HTTPS supported, Bearer token auth
|
|
99
|
-
- **No source code**: only metrics, redacted error messages, and stats
|
|
100
|
-
|
|
101
|
-
## Files
|
|
102
|
-
|
|
103
|
-
```
|
|
104
|
-
src/platform/
|
|
105
|
-
├── telemetry.js — Collects metrics from all subsystems into heartbeat payload
|
|
106
|
-
├── heartbeat.js — Sends heartbeats on interval, handles failures
|
|
107
|
-
└── queue.js — Offline queue with replay on reconnect
|
|
108
|
-
```
|