wolverine-ai 4.1.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "4.1.0",
3
+ "version": "4.3.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -289,6 +289,76 @@ const TOOL_DEFINITIONS = [
289
289
  },
290
290
  },
291
291
  // ── COMPLETION ──
292
+ {
293
+ type: "function",
294
+ function: {
295
+ name: "check_memory",
296
+ description: "Check system and process memory usage. Returns RSS, heap, free system memory, and whether memory pressure is detected.",
297
+ parameters: { type: "object", properties: {}, required: [] },
298
+ },
299
+ },
300
+ {
301
+ type: "function",
302
+ function: {
303
+ name: "list_processes",
304
+ description: "List running Node.js processes. Useful for finding zombie/orphan processes or port conflicts.",
305
+ parameters: { type: "object", properties: {}, required: [] },
306
+ },
307
+ },
308
+ {
309
+ type: "function",
310
+ function: {
311
+ name: "check_logs",
312
+ description: "Read recent log output. Returns the last N lines from the server's log source.",
313
+ parameters: {
314
+ type: "object",
315
+ properties: {
316
+ lines: { type: "number", description: "Number of lines to return (default: 50)" },
317
+ filter: { type: "string", description: "Optional grep filter pattern" },
318
+ },
319
+ required: [],
320
+ },
321
+ },
322
+ },
323
+ {
324
+ type: "function",
325
+ function: {
326
+ name: "restart_service",
327
+ description: "Trigger a graceful server restart. Use after applying fixes that require a process restart to take effect.",
328
+ parameters: { type: "object", properties: {}, required: [] },
329
+ },
330
+ },
331
+ {
332
+ type: "function",
333
+ function: {
334
+ name: "check_network",
335
+ description: "Check network connectivity: DNS resolution, port availability, and external service reachability.",
336
+ parameters: {
337
+ type: "object",
338
+ properties: {
339
+ host: { type: "string", description: "Hostname to check DNS for (optional)" },
340
+ port: { type: "number", description: "Port to check availability (optional)" },
341
+ url: { type: "string", description: "URL to check reachability (optional)" },
342
+ },
343
+ required: [],
344
+ },
345
+ },
346
+ },
347
+ {
348
+ type: "function",
349
+ function: {
350
+ name: "inspect_env",
351
+ description: "List environment variable names (NOT values) that are set. Checks if required vars exist without exposing secrets.",
352
+ parameters: {
353
+ type: "object",
354
+ properties: {
355
+ check: { type: "array", items: { type: "string" }, description: "Specific var names to check if set (optional)" },
356
+ },
357
+ required: [],
358
+ },
359
+ },
360
+ },
361
+ // ── TASK MANAGEMENT ──
292
362
  {
293
363
  type: "function",
294
364
  function: {
@@ -569,6 +639,12 @@ class AgentEngine {
569
639
  case "run_db_fix": return this._runDbFix(args);
570
640
  case "audit_deps": return this._auditDeps(args);
571
641
  case "check_migration": return this._checkMigration(args);
642
+ case "check_memory": return this._checkMemory(args);
643
+ case "list_processes": return this._listProcesses(args);
644
+ case "check_logs": return this._checkLogs(args);
645
+ case "restart_service": return this._restartService(args);
646
+ case "check_network": return this._checkNetwork(args);
647
+ case "inspect_env": return this._inspectEnv(args);
572
648
  case "done": return this._done(args);
573
649
  // Legacy aliases
574
650
  case "list_files": return this._globFiles({ pattern: (args.dir || ".") + "/*" + (args.pattern || "") });
@@ -1062,6 +1138,112 @@ class AgentEngine {
1062
1138
  } catch (e) { return { content: `Migration check error: ${e.message}` }; }
1063
1139
  }
1064
1140
 
1141
+ // ── SERVER DIAGNOSTICS ──
1142
+
1143
+ _checkMemory() {
1144
+ const os = require("os");
1145
+ const mem = process.memoryUsage();
1146
+ const totalMB = Math.round(os.totalmem() / 1048576);
1147
+ const freeMB = Math.round(os.freemem() / 1048576);
1148
+ const usedPct = Math.round((1 - os.freemem() / os.totalmem()) * 100);
1149
+ const lines = [
1150
+ `Process RSS: ${Math.round(mem.rss / 1048576)}MB`,
1151
+ `Process Heap: ${Math.round(mem.heapUsed / 1048576)}MB / ${Math.round(mem.heapTotal / 1048576)}MB`,
1152
+ `Process External: ${Math.round(mem.external / 1048576)}MB`,
1153
+ `System: ${freeMB}MB free / ${totalMB}MB total (${usedPct}% used)`,
1154
+ usedPct > 90 ? "⚠️ MEMORY PRESSURE DETECTED — system above 90% usage" : "✅ Memory OK",
1155
+ ];
1156
+ return { content: lines.join("\n") };
1157
+ }
1158
+
1159
+ _listProcesses() {
1160
+ try {
1161
+ const cmd = process.platform === "win32"
1162
+ ? 'tasklist /FI "IMAGENAME eq node.exe" /FO CSV /NH'
1163
+ : "ps aux | grep -E 'node|PID' | grep -v grep";
1164
+ const output = execSync(cmd, { encoding: "utf-8", timeout: 5000 }).trim();
1165
+ return { content: output || "(no node processes found)" };
1166
+ } catch (e) { return { content: `Error listing processes: ${e.message}` }; }
1167
+ }
1168
+
1169
+ _checkLogs(args) {
1170
+ const lines = args.lines || 50;
1171
+ const filter = args.filter || "";
1172
+ try {
1173
+ let cmd;
1174
+ if (process.platform === "win32") {
1175
+ cmd = `powershell -c "Get-Content -Tail ${lines} .wolverine\\events.log"`;
1176
+ } else {
1177
+ // Try journalctl first (systemd), fall back to log file
1178
+ cmd = `journalctl -u wolverine --no-pager -n ${lines} 2>/dev/null || tail -n ${lines} .wolverine/events.log 2>/dev/null || echo 'No logs found'`;
1179
+ }
1180
+ if (filter) cmd += ` | grep -i '${filter.replace(/'/g, "'\\''")}'`;
1181
+ const output = execSync(cmd, { encoding: "utf-8", timeout: 10000, cwd: this.cwd }).trim();
1182
+ return { content: output.slice(0, 4000) || "(empty)" };
1183
+ } catch (e) { return { content: `Error reading logs: ${e.message}` }; }
1184
+ }
1185
+
1186
+ _restartService() {
1187
+ // Signal the parent wolverine process to restart the child server
1188
+ // We can't directly restart — but we can signal via a file that the runner checks
1189
+ try {
1190
+ const restartFlag = path.join(this.cwd, ".wolverine", "restart-requested");
1191
+ fs.writeFileSync(restartFlag, Date.now().toString(), "utf-8");
1192
+ return { content: "Restart requested. The server will restart after this heal completes." };
1193
+ } catch (e) { return { content: `Error requesting restart: ${e.message}` }; }
1194
+ }
1195
+
1196
+ _checkNetwork(args) {
1197
+ const results = [];
1198
+ try {
1199
+ // DNS check
1200
+ if (args.host) {
1201
+ try {
1202
+ const dns = require("dns");
1203
+ const addresses = execSync(`node -e "require('dns').resolve('${args.host.replace(/'/g, "")}', (e,a) => console.log(e ? 'FAIL:'+e.code : a.join(',')))"`, { encoding: "utf-8", timeout: 5000 }).trim();
1204
+ results.push(`DNS ${args.host}: ${addresses}`);
1205
+ } catch (e) { results.push(`DNS ${args.host}: FAILED — ${e.message}`); }
1206
+ }
1207
+ // Port check
1208
+ if (args.port) {
1209
+ try {
1210
+ const portCmd = process.platform === "win32"
1211
+ ? `netstat -ano | findstr ":${args.port}"`
1212
+ : `ss -tlnp | grep ":${args.port}" || echo "port ${args.port} is free"`;
1213
+ const portResult = execSync(portCmd, { encoding: "utf-8", timeout: 3000 }).trim();
1214
+ results.push(`Port ${args.port}: ${portResult || "free"}`);
1215
+ } catch { results.push(`Port ${args.port}: free (nothing listening)`); }
1216
+ }
1217
+ // URL reachability
1218
+ if (args.url) {
1219
+ try {
1220
+ const urlResult = execSync(`node -e "require('${args.url.startsWith('https') ? 'https' : 'http'}').get('${args.url.replace(/'/g, "")}', r => { console.log(r.statusCode); r.resume(); }).on('error', e => console.log('FAIL:'+e.code))"`, { encoding: "utf-8", timeout: 10000 }).trim();
1221
+ results.push(`URL ${args.url}: ${urlResult}`);
1222
+ } catch (e) { results.push(`URL ${args.url}: FAILED — ${e.message}`); }
1223
+ }
1224
+ if (results.length === 0) results.push("Provide host, port, or url to check.");
1225
+ return { content: results.join("\n") };
1226
+ } catch (e) { return { content: `Network check error: ${e.message}` }; }
1227
+ }
1228
+
1229
+ _inspectEnv(args) {
1230
+ if (args.check && Array.isArray(args.check)) {
1231
+ const results = args.check.map(v => `${v}: ${process.env[v] ? "SET (" + process.env[v].length + " chars)" : "NOT SET"}`);
1232
+ return { content: results.join("\n") };
1233
+ }
1234
+ // List all env var names (not values) grouped by category
1235
+ const keys = Object.keys(process.env).sort();
1236
+ const wolverine = keys.filter(k => /^WOLVERINE|^PORT$|^NODE_ENV$/i.test(k));
1237
+ const apiKeys = keys.filter(k => /KEY|SECRET|TOKEN|PASSWORD|AUTH/i.test(k));
1238
+ const other = keys.filter(k => !wolverine.includes(k) && !apiKeys.includes(k));
1239
+ const lines = [
1240
+ `Wolverine vars (${wolverine.length}): ${wolverine.join(", ") || "none"}`,
1241
+ `Secret vars (${apiKeys.length}): ${apiKeys.map(k => k + "=" + (process.env[k] ? "SET" : "MISSING")).join(", ") || "none"}`,
1242
+ `Total env vars: ${keys.length}`,
1243
+ ];
1244
+ return { content: lines.join("\n") };
1245
+ }
1246
+
1065
1247
  _done(args) {
1066
1248
  console.log(chalk.green(` ✅ Agent done: ${args.summary}`));
1067
1249
  if (this.logger) {
@@ -66,7 +66,7 @@ const SEED_DOCS = [
66
66
  metadata: { topic: "verification" },
67
67
  },
68
68
  {
69
- text: "Wolverine multi-file agent: turn-limited agent loop with 18 tools across 7 categories. Turn budget adapts to error type: simple (TypeError)=4, config/ENOENT=5, complex=8. Each AI call has 45s timeout via Promise.race — prevents indefinite hangs. If timeout occurs mid-fix, partial results returned. FILE: read_file (offset/limit), write_file (creates dirs), edit_file (find-and-replace), glob_files (pattern search), grep_code (regex with context), list_dir (directory listing with sizes), move_file (rename/relocate). SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port (find what uses a port), check_env (env vars, values redacted). DEPS: audit_deps (full npm health check), check_migration (known upgrade paths). RESEARCH: web_fetch (10s timeout). CONTROL: done. Prompt emphasizes fast action: fix immediately when solution is obvious, investigate only when cause unclear.",
69
+ text: "Wolverine multi-file agent: turn-limited agent loop with 24 tools across 8 categories. Turn budget adapts to error type: simple (TypeError)=4, config/ENOENT=5, complex=8. Each AI call has 90s timeout via Promise.race. FILE: read_file, write_file, edit_file, glob_files, grep_code, list_dir, move_file. SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port, check_env, check_memory (RSS/heap/system with OOM detection), list_processes (find zombie/orphan node processes), check_logs (read recent journalctl/log file), check_network (DNS/port/URL reachability), inspect_env (list env var names without values, check if required vars exist). SERVER: restart_service (request graceful restart after fix). DEPS: audit_deps, check_migration. RESEARCH: web_fetch. CONTROL: done.",
70
70
  metadata: { topic: "agent" },
71
71
  },
72
72
  {
@@ -110,7 +110,7 @@ const SEED_DOCS = [
110
110
  metadata: { topic: "sub-agent-workflow" },
111
111
  },
112
112
  {
113
- text: "Sub-agent tool restrictions: explore gets read_file/glob/grep/git_log/git_diff/list_dir/check_env/check_port/inspect_db/audit_deps. plan gets read_file/glob/grep/list_dir/inspect_db/check_env/audit_deps/check_migration/search_brain. fix gets read_file/write_file/edit_file/glob/grep/bash_exec/move_file/run_db_fix/audit_deps. verify gets read_file/glob/grep/bash_exec/inspect_db/check_port. research gets read_file/grep/web_fetch/search_brain. security gets read_file/glob/grep/inspect_db. database gets read_file/write_file/edit_file/glob/grep/bash_exec/inspect_db/run_db_fix. Each type gets only the tools it needs.",
113
+ text: "Sub-agent tool restrictions: explore gets read_file/glob/grep/git_log/git_diff/list_dir/check_env/check_port/check_memory/check_network/list_processes/inspect_db/audit_deps. plan gets read_file/glob/grep/list_dir/inspect_db/check_env/inspect_env/audit_deps/check_migration. fix gets read_file/write_file/edit_file/glob/grep/bash_exec/move_file/run_db_fix/audit_deps/restart_service. verify gets read_file/glob/grep/bash_exec/inspect_db/check_port/check_memory/check_logs. research gets read_file/grep/web_fetch/check_logs. security gets read_file/glob/grep/inspect_db/inspect_env. database gets read_file/write_file/edit_file/glob/grep/bash_exec/inspect_db/run_db_fix. 24 tools total, each sub-agent type gets tools relevant to its role.",
114
114
  metadata: { topic: "sub-agent-tools" },
115
115
  },
116
116
  {
@@ -34,7 +34,15 @@ Module._load = function (request, parent, isMain) {
34
34
  _hookFastify(instance);
35
35
  return instance;
36
36
  };
37
- Object.keys(originalFastify).forEach((key) => { wrapped[key] = originalFastify[key]; });
37
+ // #23: Copy all own properties (including non-enumerable and symbols) to preserve prototype chain
38
+ for (const key of Object.getOwnPropertyNames(originalFastify)) {
39
+ if (key !== "length" && key !== "name" && key !== "prototype") {
40
+ try { wrapped[key] = originalFastify[key]; } catch {}
41
+ }
42
+ }
43
+ for (const sym of Object.getOwnPropertySymbols(originalFastify)) {
44
+ try { wrapped[sym] = originalFastify[sym]; } catch {}
45
+ }
38
46
  wrapped.default = wrapped; // ESM compat
39
47
  return wrapped;
40
48
  }
@@ -48,7 +56,15 @@ Module._load = function (request, parent, isMain) {
48
56
  _hookExpress(app);
49
57
  return app;
50
58
  };
51
- Object.keys(originalExpress).forEach((key) => { wrapped[key] = originalExpress[key]; });
59
+ // #23: Copy all own properties (including non-enumerable and symbols)
60
+ for (const key of Object.getOwnPropertyNames(originalExpress)) {
61
+ if (key !== "length" && key !== "name" && key !== "prototype") {
62
+ try { wrapped[key] = originalExpress[key]; } catch {}
63
+ }
64
+ }
65
+ for (const sym of Object.getOwnPropertySymbols(originalExpress)) {
66
+ try { wrapped[sym] = originalExpress[sym]; } catch {}
67
+ }
52
68
  return wrapped;
53
69
  }
54
70
 
@@ -94,9 +110,13 @@ function _hookExpress(app) {
94
110
  // Wrap app.listen to inject error middleware AFTER all user middleware
95
111
  const originalListen = app.listen;
96
112
  app.listen = function (...args) {
97
- app.use(function _wolverineErrorHook(err, req, res, next) {
98
- _reportError(req.originalUrl || req.url, req.method, err);
99
- next(err);
113
+ // #24: Use process.nextTick to ensure our error middleware is added AFTER
114
+ // any middleware registered synchronously after listen() is called
115
+ process.nextTick(() => {
116
+ app.use(function _wolverineErrorHook(err, req, res, next) {
117
+ _reportError(req.originalUrl || req.url, req.method, err);
118
+ next(err);
119
+ });
100
120
  });
101
121
  return originalListen.apply(this, args);
102
122
  };
@@ -114,7 +134,8 @@ function _reportError(url, method, error) {
114
134
  let file = null, line = null;
115
135
  if (error.stack) {
116
136
  for (const frame of error.stack.split("\n")) {
117
- const m = frame.match(/\(([^)]+):(\d+):(\d+)\)/) || frame.match(/at\s+([^\s(]+):(\d+):(\d+)/);
137
+ // #25: Second regex uses (.+) instead of ([^\s(]+) to handle Windows paths with spaces
138
+ const m = frame.match(/\(([^)]+):(\d+):(\d+)\)/) || frame.match(/at\s+(.+):(\d+):(\d+)/);
118
139
  if (m && !m[1].includes("node_modules") && !m[1].includes("node:")) {
119
140
  file = m[1]; line = parseInt(m[2], 10); break;
120
141
  }
@@ -284,7 +284,9 @@ class WolverineRunner {
284
284
  this._clearStabilityTimer();
285
285
  // Clear any pending heals — restart is a clean slate
286
286
  this._pendingErrorHeal = null;
287
- this._healInProgress = false;
287
+ // #1: Don't clear _healInProgress here — only the heal function itself should clear it
288
+ // #6: Clear stale heal status so dashboard doesn't show phantom heals
289
+ this._healStatus = null;
288
290
 
289
291
  if (this.child) {
290
292
  const oldChild = this.child;
@@ -295,11 +297,9 @@ class WolverineRunner {
295
297
  const onExit = () => {
296
298
  if (spawned) return; // Prevent double-spawn from exit + force-kill timeout
297
299
  spawned = true;
300
+ // #7: Don't call _ensurePortFree() here — _spawn() already calls it
298
301
  // Give port time to fully release (TIME_WAIT)
299
- setTimeout(() => {
300
- this._ensurePortFree();
301
- setTimeout(() => this._spawn(), 500);
302
- }, 500);
302
+ setTimeout(() => this._spawn(), 500);
303
303
  };
304
304
 
305
305
  oldChild.removeAllListeners("exit");
@@ -314,7 +314,7 @@ class WolverineRunner {
314
314
  }
315
315
  }, 3000);
316
316
  } else {
317
- this._ensurePortFree();
317
+ // #7: Don't call _ensurePortFree() here — _spawn() already calls it
318
318
  setTimeout(() => this._spawn(), 500);
319
319
  }
320
320
  }
@@ -394,7 +394,12 @@ class WolverineRunner {
394
394
  process.stderr.write(text);
395
395
  });
396
396
 
397
- this._startStabilityTimer();
397
+ // #27: Only start stability timer if there's a backup to promote — don't clear
398
+ // an existing timer on every spawn (e.g., auto-update restart shouldn't reset
399
+ // the stability countdown for a previously healed backup)
400
+ if (this._lastBackupId) {
401
+ this._startStabilityTimer();
402
+ }
398
403
 
399
404
  // Start process monitor (memory, CPU, heartbeat)
400
405
  if (this.child && this.child.pid) {
@@ -418,32 +423,46 @@ class WolverineRunner {
418
423
  this.healthMonitor.stop();
419
424
  this.healthMonitor.reset();
420
425
  this.healthMonitor.start(async (reason) => {
421
- if (this._healInProgress || !this.running) return;
422
- console.log(chalk.red(`\n🚨 Health check triggered heal (reason: ${reason})`));
423
- this.logger.error(EVENT_TYPES.HEALTH_UNRESPONSIVE, `Server unresponsive: ${reason}`, { reason });
424
- this.healthMonitor.stop();
425
-
426
- // Kill the hung process remove exit listener to prevent double-heal
427
- if (this.child) {
428
- const pid = this.child.pid;
429
- this.child.removeAllListeners("exit");
430
- this._killProcessTree(pid, "SIGKILL");
431
- this.child = null;
432
- }
426
+ try {
427
+ if (this._healInProgress || !this.running) return;
428
+ // #26: Claim the heal lock immediately — prevents exit event from starting
429
+ // a concurrent heal between our check and the child kill below
430
+ this._healInProgress = true;
431
+ console.log(chalk.red(`\n🚨 Health check triggered heal (reason: ${reason})`));
432
+ this.logger.error(EVENT_TYPES.HEALTH_UNRESPONSIVE, `Server unresponsive: ${reason}`, { reason });
433
+ this.healthMonitor.stop();
434
+
435
+ // Kill the hung process — remove exit listener to prevent double-heal
436
+ if (this.child) {
437
+ const pid = this.child.pid;
438
+ this.child.removeAllListeners("exit");
439
+ this._killProcessTree(pid, "SIGKILL");
440
+ this.child = null;
441
+ }
433
442
 
434
- // Synthesize error context for the heal pipeline
435
- this._stderrBuffer = `Server became unresponsive. Health check failed: ${reason}\n` +
436
- `The server was running but stopped responding to HTTP requests.\n` +
437
- `Possible causes: infinite loop, deadlock, memory exhaustion, blocked event loop.`;
443
+ // Synthesize error context for the heal pipeline
444
+ this._stderrBuffer = `Server became unresponsive. Health check failed: ${reason}\n` +
445
+ `The server was running but stopped responding to HTTP requests.\n` +
446
+ `Possible causes: infinite loop, deadlock, memory exhaustion, blocked event loop.`;
438
447
 
439
- this.retryCount++;
440
- if (this.retryCount > this.maxRetries) {
441
- console.log(chalk.red(`\n🛑 Max retries reached.`));
442
- this._logRollbackHint();
443
- this.running = false;
444
- return;
448
+ this.retryCount++;
449
+ if (this.retryCount > this.maxRetries) {
450
+ console.log(chalk.red(`\n🛑 Max retries reached.`));
451
+ this._logRollbackHint();
452
+ this.running = false;
453
+ this._healInProgress = false;
454
+ return;
455
+ }
456
+ // Release lock so _healAndRestart can acquire it
457
+ this._healInProgress = false;
458
+ await this._healAndRestart();
459
+ } catch (err) {
460
+ // #5: Prevent unhandled errors in health callback from crashing the parent
461
+ console.log(chalk.red(` ⚠️ Health callback error: ${err.message}`));
462
+ this._healInProgress = false;
463
+ this._healStatus = null;
464
+ if (this.running) this._spawn();
445
465
  }
446
- await this._healAndRestart();
447
466
  });
448
467
 
449
468
  this.child.on("exit", async (code, signal) => {
@@ -541,6 +560,8 @@ class WolverineRunner {
541
560
 
542
561
  async _healAndRestart() {
543
562
  if (this._healInProgress) return;
563
+ // #9: Bail if stop() was called during the window between crash and heal
564
+ if (this._shuttingDown) return;
544
565
  this._healInProgress = true;
545
566
  this._healStatus = { active: true, error: this._stderrBuffer.slice(0, 200), phase: "diagnosing", startedAt: Date.now() };
546
567
 
@@ -567,6 +588,8 @@ class WolverineRunner {
567
588
  }
568
589
 
569
590
  try {
591
+ // #9: Check again before expensive heal — stop() may have been called during loop guard
592
+ if (this._shuttingDown) { this._healInProgress = false; return; }
570
593
  const result = await heal({
571
594
  stderr: this._stderrBuffer,
572
595
  cwd: this.cwd,
@@ -606,6 +629,8 @@ class WolverineRunner {
606
629
  this._healStatus = null;
607
630
  // Clear pending errors — the heal fixed the root cause, stale errors are irrelevant
608
631
  this._pendingErrorHeal = null;
632
+ // #9: Don't restart if stop() was called while heal was running
633
+ if (this._shuttingDown) return;
609
634
  // Use restart() to properly kill old child before spawning — prevents EADDRINUSE
610
635
  this.restart();
611
636
  } else {
@@ -673,6 +698,15 @@ class WolverineRunner {
673
698
  }
674
699
  this._healInProgress = true;
675
700
 
701
+ // #8: Safety timeout — if heal hangs, force-release the lock after 6 minutes
702
+ const healTimeout = setTimeout(() => {
703
+ if (this._healInProgress) {
704
+ console.log(chalk.red(` ⚠️ _healFromError safety timeout (6min) — releasing heal lock`));
705
+ this._healInProgress = false;
706
+ this._healStatus = null;
707
+ }
708
+ }, 360000);
709
+
676
710
  console.log(chalk.yellow(`\n🐺 Wolverine healing caught error on ${routePath}...`));
677
711
  this._healStatus = { active: true, route: routePath, error: errorDetails?.message?.slice(0, 200), phase: "diagnosing", startedAt: Date.now() };
678
712
  this.logger.info("heal.error_monitor", `Healing caught 500 on ${routePath}`, { route: routePath });
@@ -727,6 +761,7 @@ class WolverineRunner {
727
761
  routeContext: { path: routePath, method: errorDetails?.method },
728
762
  });
729
763
 
764
+ clearTimeout(healTimeout);
730
765
  if (result.healed) {
731
766
  console.log(chalk.green(`\n🐺 Wolverine healed ${routePath} via ${result.mode}! Restarting...\n`));
732
767
  this.retryCount = 0; // Fresh start after successful heal
@@ -748,6 +783,7 @@ class WolverineRunner {
748
783
  this._healStatus = null;
749
784
  }
750
785
  } catch (err) {
786
+ clearTimeout(healTimeout);
751
787
  console.log(chalk.red(`\n🐺 Error during heal: ${err.message}`));
752
788
  this._healInProgress = false;
753
789
  this._healStatus = null;
@@ -23,6 +23,8 @@ const BOOT_PROBE_TIMEOUT_MS = 10000; // 10 seconds
23
23
  */
24
24
  function syntaxCheck(scriptPath) {
25
25
  return new Promise((resolve) => {
26
+ // #21: Guard against double resolve from exit + error firing in sequence
27
+ let settled = false;
26
28
  const child = spawn("node", ["--check", scriptPath], {
27
29
  stdio: ["ignore", "ignore", "pipe"],
28
30
  timeout: 5000,
@@ -32,6 +34,8 @@ function syntaxCheck(scriptPath) {
32
34
  child.stderr.on("data", (data) => { stderr += data.toString(); });
33
35
 
34
36
  child.on("exit", (code) => {
37
+ if (settled) return;
38
+ settled = true;
35
39
  resolve({
36
40
  valid: code === 0,
37
41
  error: code !== 0 ? stderr.trim() : undefined,
@@ -39,6 +43,8 @@ function syntaxCheck(scriptPath) {
39
43
  });
40
44
 
41
45
  child.on("error", (err) => {
46
+ if (settled) return;
47
+ settled = true;
42
48
  resolve({ valid: false, error: err.message });
43
49
  });
44
50
  });
@@ -71,6 +77,8 @@ function bootProbe(scriptPath, cwd, originalErrorSignature) {
71
77
  child.on("exit", (code) => {
72
78
  if (settled) return;
73
79
  settled = true;
80
+ // #19: Always ensure the child is killed after settling (handles orphan sub-processes)
81
+ try { child.kill("SIGTERM"); } catch {}
74
82
 
75
83
  if (code === 0) {
76
84
  resolve({ status: "alive" });
@@ -101,6 +109,8 @@ function bootProbe(scriptPath, cwd, originalErrorSignature) {
101
109
  child.on("error", (err) => {
102
110
  if (settled) return;
103
111
  settled = true;
112
+ // #19: Always ensure the child is killed after settling
113
+ try { child.kill("SIGTERM"); } catch {}
104
114
  resolve({ status: "crashed", stderr: err.message, sameError: false, exitCode: null });
105
115
  });
106
116
 
@@ -192,9 +202,10 @@ async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext)
192
202
  const relPath = path.relative(cwd, changedFile).replace(/\\/g, "/");
193
203
  if (relPath.startsWith("server/") && relPath.endsWith(".js")) {
194
204
  try {
195
- const { execSync } = require("child_process");
205
+ const { execFileSync } = require("child_process");
206
+ // #22: Use execFileSync with args array to prevent path injection via relPath
196
207
  const testCode = `try{require('./${relPath}');console.log('MODULE_OK')}catch(e){console.error(e.message);process.exit(1)}`;
197
- const out = execSync(`node -e "${testCode}"`, {
208
+ const out = execFileSync("node", ["-e", testCode], {
198
209
  cwd, timeout: 5000, encoding: "utf-8",
199
210
  env: { ...process.env, NODE_PATH: path.join(cwd, "node_modules") },
200
211
  });
@@ -237,6 +248,14 @@ function routeProbe(scriptPath, cwd, routeContext) {
237
248
  child.stdout.on("data", (d) => { stdout += d.toString(); });
238
249
  child.stderr.on("data", (d) => { stderr += d.toString(); });
239
250
 
251
+ // #20: Handle spawn errors (e.g., node binary not found)
252
+ child.on("error", (err) => {
253
+ clearInterval(checkPort);
254
+ if (settled) return;
255
+ settled = true;
256
+ resolve({ status: "failed", statusCode: 0, body: err.message });
257
+ });
258
+
240
259
  child.on("exit", () => {
241
260
  if (settled) return;
242
261
  settled = true;
@@ -165,7 +165,7 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
165
165
  // 4c. Pre-heal operational fix — detect common non-code errors
166
166
  // Some crashes aren't code bugs (missing npm packages, missing config files).
167
167
  // Fix these directly without wasting AI tokens.
168
- const opsFix = await tryOperationalFix(parsed, cwd, logger);
168
+ const opsFix = await tryOperationalFix(parsed, cwd, logger, sandbox);
169
169
  if (opsFix.fixed) {
170
170
  console.log(chalk.green(` ⚡ Operational fix applied: ${opsFix.action}`));
171
171
  if (logger) logger.info(EVENT_TYPES.HEAL_SUCCESS, `Operational fix: ${opsFix.action}`, { action: opsFix.action });
@@ -297,8 +297,15 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
297
297
  goal: `Fix: ${parsed.errorMessage.slice(0, 80)}`,
298
298
 
299
299
  onAttempt: async (iteration, researchCtx, priorAttempts) => {
300
- // Create backup for this attempt
301
- const bid = backupManager.createBackup(`heal attempt ${iteration}: ${parsed.errorMessage.slice(0, 60)}`);
300
+ // #12: Create backup for this attempt — if backup fails, skip the attempt entirely
301
+ let bid;
302
+ try {
303
+ bid = backupManager.createBackup(`heal attempt ${iteration}: ${parsed.errorMessage.slice(0, 60)}`);
304
+ } catch (backupErr) {
305
+ console.log(chalk.red(` ⚠️ Backup creation failed: ${backupErr.message} — skipping attempt`));
306
+ if (logger) logger.error(EVENT_TYPES.BACKUP_CREATED, `Backup failed: ${backupErr.message}`);
307
+ return { healed: false, explanation: `Backup creation failed: ${backupErr.message}` };
308
+ }
302
309
  backupManager.setErrorSignature(bid, errorSignature);
303
310
  if (logger) logger.info(EVENT_TYPES.BACKUP_CREATED, `Backup ${bid} (iteration ${iteration})`, { backupId: bid });
304
311
 
@@ -532,7 +539,7 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
532
539
  * Try to fix common operational errors without AI.
533
540
  * Returns { fixed: boolean, action: string }
534
541
  */
535
- async function tryOperationalFix(parsed, cwd, logger) {
542
+ async function tryOperationalFix(parsed, cwd, logger, sandbox) {
536
543
  const { execSync } = require("child_process");
537
544
  const msg = parsed.errorMessage || "";
538
545
 
@@ -565,9 +572,14 @@ async function tryOperationalFix(parsed, cwd, logger) {
565
572
 
566
573
  // Only auto-create if it's inside the project and looks like a config/data file
567
574
  const rel = path.relative(cwd, missingFile).replace(/\\/g, "/");
568
- if (!rel.startsWith("..") && /\.(json|yaml|yml|toml|ini|conf|cfg|env|log|txt|csv|db|sqlite)$/i.test(missingFile)) {
575
+ // #18: Validate through sandbox to prevent creating files outside allowed paths
576
+ let safePath = missingFile;
577
+ if (sandbox) {
578
+ try { safePath = sandbox.resolve(missingFile); } catch { safePath = null; }
579
+ }
580
+ if (safePath && !rel.startsWith("..") && /\.(json|yaml|yml|toml|ini|conf|cfg|env|log|txt|csv|db|sqlite)$/i.test(missingFile)) {
569
581
  try {
570
- fs.mkdirSync(path.dirname(missingFile), { recursive: true });
582
+ fs.mkdirSync(path.dirname(safePath), { recursive: true });
571
583
  const ext = path.extname(missingFile).toLowerCase();
572
584
 
573
585
  // For JSON config files, try to infer expected structure from the code or error message
@@ -600,7 +612,7 @@ async function tryOperationalFix(parsed, cwd, logger) {
600
612
  content = defaults[ext] || "";
601
613
  }
602
614
 
603
- fs.writeFileSync(missingFile, content, "utf-8");
615
+ fs.writeFileSync(safePath, content, "utf-8");
604
616
  console.log(chalk.blue(` 📄 Created missing file: ${rel}`));
605
617
  return { fixed: true, action: `Created missing file: ${rel} with ${content === "{}" ? "empty" : "inferred"} config` };
606
618
  } catch {}
@@ -659,10 +671,13 @@ function _inferJsonConfig(missingFile, cwd, parsed) {
659
671
  const sourceFile = parsed.filePath;
660
672
  if (!sourceFile) return null;
661
673
 
674
+ // #17: Escape all regex special characters in basename to prevent regex injection
675
+ const escapedBasename = basename.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
676
+
662
677
  try {
663
678
  const source = fs.readFileSync(sourceFile, "utf-8");
664
679
  // Look for property accesses on the loaded config: config.apiUrl, config.timeout, etc.
665
- const configVarMatch = source.match(new RegExp(`(?:const|let|var)\\s+(\\w+)\\s*=\\s*(?:require|JSON\\.parse).*${basename.replace(".", "\\.")}`));
680
+ const configVarMatch = source.match(new RegExp(`(?:const|let|var)\\s+(\\w+)\\s*=\\s*(?:require|JSON\\.parse).*${escapedBasename}`));
666
681
  if (!configVarMatch) return null;
667
682
 
668
683
  const varName = configVarMatch[1];