wolverine-ai 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -1
- package/package.json +1 -1
- package/src/brain/brain.js +4 -0
- package/src/core/error-hook.js +127 -0
- package/src/core/runner.js +89 -2
- package/src/dashboard/server.js +2 -0
- package/src/index.js +2 -0
- package/src/monitor/error-monitor.js +121 -0
package/README.md
CHANGED
|
@@ -74,6 +74,7 @@ wolverine/
|
|
|
74
74
|
│ │ ├── models.js ← 10-model configuration system
|
|
75
75
|
│ │ ├── verifier.js ← Fix verification (syntax + boot probe)
|
|
76
76
|
│ │ ├── error-parser.js ← Stack trace parsing + error classification
|
|
77
|
+
│ │ ├── error-hook.js ← Auto-injected into child (IPC error reporting)
|
|
77
78
|
│ │ ├── patcher.js ← File patching with sandbox
|
|
78
79
|
│ │ ├── health-monitor.js← PM2-style health checks
|
|
79
80
|
│ │ ├── config.js ← Config loader (settings.json + env)
|
|
@@ -105,7 +106,8 @@ wolverine/
|
|
|
105
106
|
│ ├── monitor/ ← Performance + process management
|
|
106
107
|
│ │ ├── perf-monitor.js ← Endpoint response times + spam detection
|
|
107
108
|
│ │ ├── process-monitor.js← Memory/CPU/heartbeat + leak detection
|
|
108
|
-
│ │
|
|
109
|
+
│ │ ├── route-prober.js ← Auto-discovers and tests all routes
|
|
110
|
+
│ │ └── error-monitor.js ← Caught 500 error detection (no-crash healing)
|
|
109
111
|
│ ├── dashboard/ ← Web UI
|
|
110
112
|
│ │ └── server.js ← Real-time dashboard + command interface
|
|
111
113
|
│ ├── notifications/ ← Alerts
|
|
@@ -176,6 +178,26 @@ After fix:
|
|
|
176
178
|
→ Promote backup to stable after 30min uptime
|
|
177
179
|
```
|
|
178
180
|
|
|
181
|
+
### Caught Error Healing (No-Crash)
|
|
182
|
+
|
|
183
|
+
Most production bugs don't crash the process — Fastify/Express catch them and return 500. Wolverine now detects these too:
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
Route returns 500 (process still alive)
|
|
187
|
+
→ Error hook reports to parent via IPC (auto-injected, zero user code changes)
|
|
188
|
+
→ ErrorMonitor tracks consecutive 500s per route
|
|
189
|
+
→ 3 failures in 30s → triggers heal pipeline (same as crash healing)
|
|
190
|
+
→ Fix applied → server restarted → route prober verifies fix
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
| Setting | Default | Env Variable |
|
|
194
|
+
|---------|---------|-------------|
|
|
195
|
+
| Failure threshold | 3 | `WOLVERINE_ERROR_THRESHOLD` |
|
|
196
|
+
| Time window | 30s | `WOLVERINE_ERROR_WINDOW_MS` |
|
|
197
|
+
| Cooldown per route | 60s | `WOLVERINE_ERROR_COOLDOWN_MS` |
|
|
198
|
+
|
|
199
|
+
The error hook auto-patches Fastify and Express via `--require` preload. No middleware, no code changes to your server.
|
|
200
|
+
|
|
179
201
|
---
|
|
180
202
|
|
|
181
203
|
## Agent Tool Harness
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/brain/brain.js
CHANGED
|
@@ -211,6 +211,10 @@ const SEED_DOCS = [
|
|
|
211
211
|
text: "Agent fix strategy table: the agent system prompt includes a decision table mapping error patterns to correct fix actions. Cannot find module 'X' (package) → bash_exec: npm install X. Cannot find module './X' (local) → edit_file: fix require path. ENOENT → write_file: create missing file. EACCES → bash_exec: chmod. EADDRINUSE → bash_exec: kill process. SyntaxError → edit_file: fix code. TypeError → edit_file: fix logic. MODULE_NOT_FOUND + node_modules → bash_exec: rm -rf node_modules && npm install. The fast path AI response format now supports both 'changes' (code edits) and 'commands' (shell commands like npm install). Dangerous commands blocked: rm -rf /, format, mkfs.",
|
|
212
212
|
metadata: { topic: "agent-fix-strategy" },
|
|
213
213
|
},
|
|
214
|
+
{
|
|
215
|
+
text: "Error Monitor: detects caught 500 errors that don't crash the process. Most production bugs are caught by Fastify/Express error handlers — the server stays alive but routes return 500. Wolverine's crash-based heal pipeline never triggers for these. ErrorMonitor tracks 5xx errors per route via IPC from child process. After N consecutive 500s within a time window (default: 3 failures in 30s), triggers the heal pipeline without killing the server. Error hook auto-injected via --require preload (no user code changes). Cooldown prevents heal spam (default: 60s per route). Stats available in dashboard and telemetry. Config: WOLVERINE_ERROR_THRESHOLD, WOLVERINE_ERROR_WINDOW_MS, WOLVERINE_ERROR_COOLDOWN_MS.",
|
|
216
|
+
metadata: { topic: "error-monitor" },
|
|
217
|
+
},
|
|
214
218
|
];
|
|
215
219
|
|
|
216
220
|
class Brain {
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Error Hook — preloaded into the child server process via --require.
|
|
3
|
+
*
|
|
4
|
+
* Patches Fastify and Express error handlers to report caught errors
|
|
5
|
+
* back to the Wolverine parent process via IPC. This enables healing
|
|
6
|
+
* of 500 errors that don't crash the process.
|
|
7
|
+
*
|
|
8
|
+
* How it works:
|
|
9
|
+
* 1. Runner spawns child with: node --require ./src/core/error-hook.js server/index.js
|
|
10
|
+
* 2. This file hooks into Module._load to intercept fastify/express creation
|
|
11
|
+
* 3. When a framework instance is created, we add an error handler that sends IPC messages
|
|
12
|
+
* 4. Parent's ErrorMonitor receives the messages and triggers heal after threshold
|
|
13
|
+
*
|
|
14
|
+
* Zero changes to user's server code.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const Module = require("module");
|
|
18
|
+
const originalLoad = Module._load;
|
|
19
|
+
|
|
20
|
+
let _hooked = false;
|
|
21
|
+
|
|
22
|
+
Module._load = function (request, parent, isMain) {
|
|
23
|
+
const result = originalLoad.apply(this, arguments);
|
|
24
|
+
|
|
25
|
+
// Hook Fastify
|
|
26
|
+
if (request === "fastify" && typeof result === "function" && !_hooked) {
|
|
27
|
+
const originalFastify = result;
|
|
28
|
+
const wrapped = function (...args) {
|
|
29
|
+
const instance = originalFastify(...args);
|
|
30
|
+
_hookFastify(instance);
|
|
31
|
+
return instance;
|
|
32
|
+
};
|
|
33
|
+
// Preserve all properties (fastify.default, etc.)
|
|
34
|
+
Object.keys(originalFastify).forEach((key) => {
|
|
35
|
+
wrapped[key] = originalFastify[key];
|
|
36
|
+
});
|
|
37
|
+
_hooked = true;
|
|
38
|
+
return wrapped;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Hook Express
|
|
42
|
+
if (request === "express" && typeof result === "function" && !_hooked) {
|
|
43
|
+
const originalExpress = result;
|
|
44
|
+
const wrapped = function (...args) {
|
|
45
|
+
const app = originalExpress(...args);
|
|
46
|
+
_hookExpress(app);
|
|
47
|
+
return app;
|
|
48
|
+
};
|
|
49
|
+
Object.keys(originalExpress).forEach((key) => {
|
|
50
|
+
wrapped[key] = originalExpress[key];
|
|
51
|
+
});
|
|
52
|
+
_hooked = true;
|
|
53
|
+
return wrapped;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return result;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
function _hookFastify(fastify) {
|
|
60
|
+
// Use onReady to add hooks after all plugins are loaded
|
|
61
|
+
fastify.addHook("onReady", function (done) {
|
|
62
|
+
// Add a global error handler that reports to parent
|
|
63
|
+
fastify.addHook("onError", function (request, reply, error, done) {
|
|
64
|
+
_reportError(request.url, request.method, error);
|
|
65
|
+
done();
|
|
66
|
+
});
|
|
67
|
+
done();
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Also intercept the setErrorHandler if user sets one
|
|
71
|
+
const originalSetError = fastify.setErrorHandler.bind(fastify);
|
|
72
|
+
fastify.setErrorHandler = function (handler) {
|
|
73
|
+
return originalSetError(function (error, request, reply) {
|
|
74
|
+
_reportError(request.url, request.method, error);
|
|
75
|
+
return handler(error, request, reply);
|
|
76
|
+
});
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function _hookExpress(app) {
|
|
81
|
+
// For Express, we monkey-patch app.use to detect error middleware
|
|
82
|
+
// and also add our own at the end via a delayed hook
|
|
83
|
+
const originalListen = app.listen.bind(app);
|
|
84
|
+
app.listen = function (...args) {
|
|
85
|
+
// Add our error handler AFTER all user middleware
|
|
86
|
+
app.use(function wolverineErrorHandler(err, req, res, next) {
|
|
87
|
+
_reportError(req.originalUrl || req.url, req.method, err);
|
|
88
|
+
next(err);
|
|
89
|
+
});
|
|
90
|
+
return originalListen(...args);
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function _reportError(url, method, error) {
|
|
95
|
+
if (!process.send) return; // No IPC channel — not spawned by wolverine
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
// Extract file/line from stack trace
|
|
99
|
+
let file = null;
|
|
100
|
+
let line = null;
|
|
101
|
+
if (error && error.stack) {
|
|
102
|
+
const stackLines = error.stack.split("\n");
|
|
103
|
+
for (const sl of stackLines) {
|
|
104
|
+
const match = sl.match(/\(([^)]+):(\d+):(\d+)\)/) || sl.match(/at\s+([^\s(]+):(\d+):(\d+)/);
|
|
105
|
+
if (match && !match[1].includes("node_modules") && !match[1].includes("node:")) {
|
|
106
|
+
file = match[1];
|
|
107
|
+
line = parseInt(match[2], 10);
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
process.send({
|
|
114
|
+
type: "route_error",
|
|
115
|
+
path: url,
|
|
116
|
+
method: method || "GET",
|
|
117
|
+
statusCode: 500,
|
|
118
|
+
message: error?.message || "Unknown error",
|
|
119
|
+
stack: error?.stack?.slice(0, 2000) || "",
|
|
120
|
+
file,
|
|
121
|
+
line,
|
|
122
|
+
timestamp: Date.now(),
|
|
123
|
+
});
|
|
124
|
+
} catch {
|
|
125
|
+
// Silently fail — don't break the server for IPC issues
|
|
126
|
+
}
|
|
127
|
+
}
|
package/src/core/runner.js
CHANGED
|
@@ -20,6 +20,7 @@ const { ProcessMonitor } = require("../monitor/process-monitor");
|
|
|
20
20
|
const { RouteProber } = require("../monitor/route-prober");
|
|
21
21
|
const { startHeartbeat, stopHeartbeat } = require("../platform/heartbeat");
|
|
22
22
|
const { Notifier } = require("../notifications/notifier");
|
|
23
|
+
const { ErrorMonitor } = require("../monitor/error-monitor");
|
|
23
24
|
|
|
24
25
|
/**
|
|
25
26
|
* The Wolverine process runner — v3.
|
|
@@ -90,6 +91,15 @@ class WolverineRunner {
|
|
|
90
91
|
brain: this.brain,
|
|
91
92
|
});
|
|
92
93
|
|
|
94
|
+
// Error monitor — detects caught 500 errors without process crash
|
|
95
|
+
this.errorMonitor = new ErrorMonitor({
|
|
96
|
+
threshold: parseInt(process.env.WOLVERINE_ERROR_THRESHOLD, 10) || 3,
|
|
97
|
+
windowMs: parseInt(process.env.WOLVERINE_ERROR_WINDOW_MS, 10) || 30000,
|
|
98
|
+
cooldownMs: parseInt(process.env.WOLVERINE_ERROR_COOLDOWN_MS, 10) || 60000,
|
|
99
|
+
logger: this.logger,
|
|
100
|
+
onError: (routePath, errorDetails) => this._healFromError(routePath, errorDetails),
|
|
101
|
+
});
|
|
102
|
+
|
|
93
103
|
// Brain — semantic memory + project context
|
|
94
104
|
this.brain = new Brain(this.cwd);
|
|
95
105
|
|
|
@@ -120,6 +130,7 @@ class WolverineRunner {
|
|
|
120
130
|
repairHistory: this.repairHistory,
|
|
121
131
|
processMonitor: this.processMonitor,
|
|
122
132
|
routeProber: this.routeProber,
|
|
133
|
+
errorMonitor: this.errorMonitor,
|
|
123
134
|
});
|
|
124
135
|
|
|
125
136
|
// Stability tracking
|
|
@@ -287,10 +298,13 @@ class WolverineRunner {
|
|
|
287
298
|
this._stderrBuffer = "";
|
|
288
299
|
this._lastStartTime = Date.now();
|
|
289
300
|
|
|
290
|
-
|
|
301
|
+
// Spawn with --require error-hook.js for IPC error reporting
|
|
302
|
+
// The error hook auto-patches Fastify/Express to report caught 500s
|
|
303
|
+
const errorHookPath = path.join(__dirname, "error-hook.js");
|
|
304
|
+
this.child = spawn("node", ["--require", errorHookPath, this.scriptPath], {
|
|
291
305
|
cwd: this.cwd,
|
|
292
306
|
env: { ...process.env },
|
|
293
|
-
stdio: ["inherit", "inherit", "pipe"],
|
|
307
|
+
stdio: ["inherit", "inherit", "pipe", "ipc"],
|
|
294
308
|
});
|
|
295
309
|
|
|
296
310
|
this.child.stderr.on("data", (data) => {
|
|
@@ -367,6 +381,30 @@ class WolverineRunner {
|
|
|
367
381
|
this.logger.error(EVENT_TYPES.PROCESS_CRASH, `Failed to start: ${err.message}`);
|
|
368
382
|
this.running = false;
|
|
369
383
|
});
|
|
384
|
+
|
|
385
|
+
// IPC channel: child reports caught 500 errors (Fastify/Express)
|
|
386
|
+
this.child.on("message", (msg) => {
|
|
387
|
+
if (msg && msg.type === "route_error") {
|
|
388
|
+
const { redact } = require("../security/secret-redactor");
|
|
389
|
+
const safeMsg = redact(msg.message || "");
|
|
390
|
+
const safeStack = redact(msg.stack || "");
|
|
391
|
+
console.log(chalk.yellow(` 🔍 Caught error on ${msg.method} ${msg.path}: ${safeMsg.slice(0, 100)}`));
|
|
392
|
+
this.logger.warn("error_monitor.caught", `${msg.method} ${msg.path} → 500: ${safeMsg.slice(0, 200)}`, {
|
|
393
|
+
route: msg.path, method: msg.method, file: msg.file, line: msg.line,
|
|
394
|
+
});
|
|
395
|
+
this.errorMonitor.record(msg.path, msg.statusCode || 500, {
|
|
396
|
+
message: safeMsg,
|
|
397
|
+
stack: safeStack,
|
|
398
|
+
file: msg.file,
|
|
399
|
+
line: msg.line,
|
|
400
|
+
path: msg.path,
|
|
401
|
+
method: msg.method,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
// Reset error monitor on new spawn
|
|
407
|
+
this.errorMonitor.reset();
|
|
370
408
|
}
|
|
371
409
|
|
|
372
410
|
async _healAndRestart() {
|
|
@@ -432,6 +470,55 @@ class WolverineRunner {
|
|
|
432
470
|
}
|
|
433
471
|
}
|
|
434
472
|
|
|
473
|
+
/**
|
|
474
|
+
* Heal from a caught 500 error (ErrorMonitor threshold reached).
|
|
475
|
+
* Unlike crash healing, the server is still running — we heal and restart.
|
|
476
|
+
*/
|
|
477
|
+
async _healFromError(routePath, errorDetails) {
|
|
478
|
+
if (this._healInProgress || this._shuttingDown) return;
|
|
479
|
+
this._healInProgress = true;
|
|
480
|
+
|
|
481
|
+
console.log(chalk.yellow(`\n🐺 Wolverine healing caught error on ${routePath}...`));
|
|
482
|
+
this.logger.info("heal.error_monitor", `Healing caught 500 on ${routePath}`, { route: routePath });
|
|
483
|
+
|
|
484
|
+
// Build a synthetic stderr from the error details
|
|
485
|
+
const stderr = [
|
|
486
|
+
errorDetails.message || "Unknown error",
|
|
487
|
+
errorDetails.stack || "",
|
|
488
|
+
errorDetails.file ? ` at ${errorDetails.file}:${errorDetails.line || 0}` : "",
|
|
489
|
+
].filter(Boolean).join("\n");
|
|
490
|
+
|
|
491
|
+
try {
|
|
492
|
+
const result = await heal({
|
|
493
|
+
stderr,
|
|
494
|
+
cwd: this.cwd,
|
|
495
|
+
sandbox: this.sandbox,
|
|
496
|
+
redactor: this.redactor,
|
|
497
|
+
notifier: this.notifier,
|
|
498
|
+
rateLimiter: this.rateLimiter,
|
|
499
|
+
backupManager: this.backupManager,
|
|
500
|
+
logger: this.logger,
|
|
501
|
+
brain: this.brain,
|
|
502
|
+
mcp: this.mcp,
|
|
503
|
+
skills: this.skills,
|
|
504
|
+
repairHistory: this.repairHistory,
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
if (result.healed) {
|
|
508
|
+
console.log(chalk.green(`\n🐺 Wolverine healed ${routePath} via ${result.mode}! Restarting...\n`));
|
|
509
|
+
this.errorMonitor.clearRoute(routePath);
|
|
510
|
+
this._healInProgress = false;
|
|
511
|
+
this.restart();
|
|
512
|
+
} else {
|
|
513
|
+
console.log(chalk.red(`\n🐺 Could not heal ${routePath}: ${result.explanation}`));
|
|
514
|
+
this._healInProgress = false;
|
|
515
|
+
}
|
|
516
|
+
} catch (err) {
|
|
517
|
+
console.log(chalk.red(`\n🐺 Error during heal: ${err.message}`));
|
|
518
|
+
this._healInProgress = false;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
435
522
|
_startStabilityTimer() {
|
|
436
523
|
this._clearStabilityTimer();
|
|
437
524
|
this._stabilityTimer = setTimeout(() => {
|
package/src/dashboard/server.js
CHANGED
|
@@ -29,6 +29,7 @@ class DashboardServer {
|
|
|
29
29
|
this.repairHistory = options.repairHistory;
|
|
30
30
|
this.processMonitor = options.processMonitor;
|
|
31
31
|
this.routeProber = options.routeProber;
|
|
32
|
+
this.errorMonitor = options.errorMonitor;
|
|
32
33
|
|
|
33
34
|
this.auth = new AdminAuth();
|
|
34
35
|
this._sseClients = new Set();
|
|
@@ -869,6 +870,7 @@ ${context ? "\nBrain:\n" + context : ""}`,
|
|
|
869
870
|
session: this.logger ? this.logger.getSessionStats() : {},
|
|
870
871
|
backups: this.backupManager ? this.backupManager.getStats() : {},
|
|
871
872
|
health: this.healthMonitor ? this.healthMonitor.getStats() : {},
|
|
873
|
+
errorMonitor: this.errorMonitor ? this.errorMonitor.getStats() : {},
|
|
872
874
|
}));
|
|
873
875
|
}
|
|
874
876
|
|
package/src/index.js
CHANGED
|
@@ -23,6 +23,7 @@ const { spawnAgent, spawnParallel, exploreAndFix } = require("./agent/sub-agents
|
|
|
23
23
|
const { McpRegistry } = require("./mcp/mcp-registry");
|
|
24
24
|
const { McpSecurity } = require("./mcp/mcp-security");
|
|
25
25
|
const { PerfMonitor } = require("./monitor/perf-monitor");
|
|
26
|
+
const { ErrorMonitor } = require("./monitor/error-monitor");
|
|
26
27
|
const { DashboardServer } = require("./dashboard/server");
|
|
27
28
|
const { Notifier } = require("./notifications/notifier");
|
|
28
29
|
const { Brain } = require("./brain/brain");
|
|
@@ -72,6 +73,7 @@ module.exports = {
|
|
|
72
73
|
McpSecurity,
|
|
73
74
|
// Monitor
|
|
74
75
|
PerfMonitor,
|
|
76
|
+
ErrorMonitor,
|
|
75
77
|
// Dashboard
|
|
76
78
|
DashboardServer,
|
|
77
79
|
// Notifications
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
const chalk = require("chalk");
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Error Monitor — detects caught 500 errors that don't crash the process.
|
|
5
|
+
*
|
|
6
|
+
* Most production bugs are caught by Fastify/Express error handlers.
|
|
7
|
+
* The server stays alive but routes return 500. Wolverine's crash-based
|
|
8
|
+
* heal pipeline never triggers. This module bridges that gap.
|
|
9
|
+
*
|
|
10
|
+
* Tracks 5xx errors per route. After N consecutive failures within
|
|
11
|
+
* a time window, triggers the heal pipeline — without killing the server.
|
|
12
|
+
*
|
|
13
|
+
* Error flow: server error handler → IPC message → ErrorMonitor.record()
|
|
14
|
+
* → threshold reached → onError callback → heal()
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
class ErrorMonitor {
|
|
18
|
+
constructor({ threshold = 3, windowMs = 30000, cooldownMs = 60000, onError, logger } = {}) {
|
|
19
|
+
this.threshold = threshold; // consecutive 5xx before triggering heal
|
|
20
|
+
this.windowMs = windowMs; // time window for counting errors
|
|
21
|
+
this.cooldownMs = cooldownMs; // cooldown after triggering (prevent heal spam)
|
|
22
|
+
this.onError = onError; // callback: (routePath, errorDetails) => heal()
|
|
23
|
+
this.logger = logger;
|
|
24
|
+
this.routes = new Map(); // path → { count, firstSeen, lastError }
|
|
25
|
+
this._cooldowns = new Map(); // path → timestamp of last heal trigger
|
|
26
|
+
this._totalErrors = 0;
|
|
27
|
+
this._totalHeals = 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Record a route response. Call on every response from the child.
|
|
32
|
+
* @param {string} routePath — e.g. "/api/users"
|
|
33
|
+
* @param {number} statusCode — HTTP status
|
|
34
|
+
* @param {object} errorDetails — { message, stack, file, line }
|
|
35
|
+
*/
|
|
36
|
+
record(routePath, statusCode, errorDetails) {
|
|
37
|
+
if (statusCode < 500) {
|
|
38
|
+
// Success — reset the error counter for this route
|
|
39
|
+
if (this.routes.has(routePath)) {
|
|
40
|
+
this.routes.delete(routePath);
|
|
41
|
+
}
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
this._totalErrors++;
|
|
46
|
+
|
|
47
|
+
// Check cooldown — don't trigger heal for same route too quickly
|
|
48
|
+
const lastHeal = this._cooldowns.get(routePath);
|
|
49
|
+
if (lastHeal && Date.now() - lastHeal < this.cooldownMs) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const entry = this.routes.get(routePath) || { count: 0, firstSeen: Date.now(), lastError: null };
|
|
54
|
+
entry.count++;
|
|
55
|
+
entry.lastError = errorDetails;
|
|
56
|
+
|
|
57
|
+
// Reset if outside time window
|
|
58
|
+
if (Date.now() - entry.firstSeen > this.windowMs) {
|
|
59
|
+
entry.count = 1;
|
|
60
|
+
entry.firstSeen = Date.now();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
this.routes.set(routePath, entry);
|
|
64
|
+
|
|
65
|
+
if (entry.count >= this.threshold) {
|
|
66
|
+
this._totalHeals++;
|
|
67
|
+
console.log(chalk.yellow(`\n🔍 ErrorMonitor: ${routePath} failed ${entry.count}x in ${Math.round((Date.now() - entry.firstSeen) / 1000)}s — triggering heal`));
|
|
68
|
+
|
|
69
|
+
if (this.logger) {
|
|
70
|
+
this.logger.warn("error_monitor.threshold", `Route ${routePath} hit ${this.threshold} consecutive 500s`, {
|
|
71
|
+
route: routePath,
|
|
72
|
+
count: entry.count,
|
|
73
|
+
error: errorDetails?.message?.slice(0, 200),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Set cooldown and reset counter
|
|
78
|
+
this._cooldowns.set(routePath, Date.now());
|
|
79
|
+
this.routes.delete(routePath);
|
|
80
|
+
|
|
81
|
+
// Trigger the heal callback
|
|
82
|
+
if (this.onError) {
|
|
83
|
+
this.onError(routePath, errorDetails);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Clear a route's error state (e.g., after a successful heal).
|
|
90
|
+
*/
|
|
91
|
+
clearRoute(routePath) {
|
|
92
|
+
this.routes.delete(routePath);
|
|
93
|
+
this._cooldowns.delete(routePath);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Get stats for dashboard/telemetry.
|
|
98
|
+
*/
|
|
99
|
+
getStats() {
|
|
100
|
+
const activeRoutes = {};
|
|
101
|
+
for (const [path, entry] of this.routes) {
|
|
102
|
+
activeRoutes[path] = { count: entry.count, lastError: entry.lastError?.message?.slice(0, 100) };
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
totalErrors: this._totalErrors,
|
|
106
|
+
totalHeals: this._totalHeals,
|
|
107
|
+
activeRoutes,
|
|
108
|
+
trackedRoutes: this.routes.size,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Reset all state (e.g., after server restart).
|
|
114
|
+
*/
|
|
115
|
+
reset() {
|
|
116
|
+
this.routes.clear();
|
|
117
|
+
// Keep cooldowns — don't re-trigger immediately after restart
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
module.exports = { ErrorMonitor };
|