bgrun 3.10.2 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -4
- package/dashboard/app/api/deps/route.ts +49 -0
- package/dashboard/app/api/events/route.ts +10 -2
- package/dashboard/app/api/guard/route.ts +1 -1
- package/dashboard/app/api/guard-all/route.ts +50 -0
- package/dashboard/app/api/logs/rotate/route.ts +45 -0
- package/dashboard/app/api/processes/route.ts +67 -10
- package/dashboard/app/globals.css +547 -6
- package/dashboard/app/page.client.tsx +636 -68
- package/dashboard/app/page.tsx +52 -1
- package/dist/index.js +452 -36
- package/package.json +62 -60
- package/scripts/bgr-startup.ps1 +118 -0
- package/src/api.ts +3 -3
- package/src/bgrun.test.ts +109 -0
- package/src/commands/list.ts +3 -3
- package/src/commands/run.ts +17 -0
- package/src/deps.ts +126 -0
- package/src/guard.ts +157 -0
- package/src/index.ts +108 -3
- package/src/log-rotation.ts +93 -0
- package/src/logger.ts +4 -3
- package/src/platform.ts +39 -23
- package/src/server.ts +43 -3
- package/src/table.ts +3 -3
package/src/guard.ts
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BGR Standalone Process Guard
|
|
3
|
+
*
|
|
4
|
+
* Runs as an independent process that monitors ALL guarded processes
|
|
5
|
+
* (BGR_KEEP_ALIVE=true) and the dashboard itself. If the dashboard
|
|
6
|
+
* crashes, the guard restarts it. If any guarded process dies, the
|
|
7
|
+
* guard restarts it.
|
|
8
|
+
*
|
|
9
|
+
* This is the "outer shell" — it cannot be killed by a dashboard crash.
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* bgrun --guard # Start guard as a managed bgrun process
|
|
13
|
+
* bgrun --_guard-loop # (Internal) Actually run the guard loop
|
|
14
|
+
* bgrun --_guard-loop 30 # Check every 30 seconds
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { getAllProcesses, getProcess } from './db';
|
|
18
|
+
import { isProcessRunning, getProcessPorts, findChildPid } from './platform';
|
|
19
|
+
import { handleRun } from './commands/run';
|
|
20
|
+
import { parseEnvString } from './utils';
|
|
21
|
+
|
|
22
|
+
const DEFAULT_INTERVAL_MS = 30_000;
|
|
23
|
+
const MAX_BACKOFF_MS = 5 * 60_000; // 5 minutes max
|
|
24
|
+
const CRASH_THRESHOLD = 5; // Start backoff after this many restarts
|
|
25
|
+
const STABILITY_WINDOW_MS = 120_000; // 2 minutes stable = reset counter
|
|
26
|
+
|
|
27
|
+
interface GuardState {
|
|
28
|
+
restartCounts: Map<string, number>;
|
|
29
|
+
nextRestartTime: Map<string, number>;
|
|
30
|
+
lastSeenAlive: Map<string, number>;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const state: GuardState = {
|
|
34
|
+
restartCounts: new Map(),
|
|
35
|
+
nextRestartTime: new Map(),
|
|
36
|
+
lastSeenAlive: new Map(),
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
async function restartProcess(name: string): Promise<boolean> {
|
|
40
|
+
try {
|
|
41
|
+
await handleRun({
|
|
42
|
+
action: 'run',
|
|
43
|
+
name,
|
|
44
|
+
force: true,
|
|
45
|
+
remoteName: '',
|
|
46
|
+
});
|
|
47
|
+
return true;
|
|
48
|
+
} catch (err: any) {
|
|
49
|
+
console.error(`[guard] ✗ Failed to restart "${name}": ${err.message}`);
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function getBackoffMs(restartCount: number): number {
|
|
55
|
+
if (restartCount <= CRASH_THRESHOLD) return 0;
|
|
56
|
+
const exponent = restartCount - CRASH_THRESHOLD;
|
|
57
|
+
return Math.min(30_000 * Math.pow(2, exponent - 1), MAX_BACKOFF_MS);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function guardCycle(): Promise<void> {
|
|
61
|
+
try {
|
|
62
|
+
const processes = getAllProcesses();
|
|
63
|
+
if (processes.length === 0) return;
|
|
64
|
+
|
|
65
|
+
const now = Date.now();
|
|
66
|
+
let checked = 0;
|
|
67
|
+
let restarted = 0;
|
|
68
|
+
let skipped = 0;
|
|
69
|
+
|
|
70
|
+
for (const proc of processes) {
|
|
71
|
+
// Skip the guard process itself
|
|
72
|
+
if (proc.name === 'bgr-guard') continue;
|
|
73
|
+
|
|
74
|
+
const env = proc.env ? parseEnvString(proc.env) : {};
|
|
75
|
+
const isGuarded = env.BGR_KEEP_ALIVE === 'true';
|
|
76
|
+
const isDashboard = proc.name === 'bgr-dashboard';
|
|
77
|
+
|
|
78
|
+
// Guard both: explicitly guarded processes AND the dashboard
|
|
79
|
+
if (!isGuarded && !isDashboard) continue;
|
|
80
|
+
|
|
81
|
+
checked++;
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
const alive = await isProcessRunning(proc.pid, proc.command);
|
|
85
|
+
|
|
86
|
+
if (!alive && proc.pid > 0) {
|
|
87
|
+
// Check backoff
|
|
88
|
+
const nextRestart = state.nextRestartTime.get(proc.name) || 0;
|
|
89
|
+
if (now < nextRestart) {
|
|
90
|
+
const waitSecs = Math.round((nextRestart - now) / 1000);
|
|
91
|
+
skipped++;
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
console.log(`[guard] ⚠ "${proc.name}" (PID ${proc.pid}) is dead — restarting...`);
|
|
96
|
+
|
|
97
|
+
const success = await restartProcess(proc.name);
|
|
98
|
+
if (success) {
|
|
99
|
+
const count = (state.restartCounts.get(proc.name) || 0) + 1;
|
|
100
|
+
state.restartCounts.set(proc.name, count);
|
|
101
|
+
state.lastSeenAlive.delete(proc.name);
|
|
102
|
+
|
|
103
|
+
const backoff = getBackoffMs(count);
|
|
104
|
+
if (backoff > 0) {
|
|
105
|
+
state.nextRestartTime.set(proc.name, now + backoff);
|
|
106
|
+
console.log(`[guard] ✓ Restarted "${proc.name}" (#${count}). Crash loop: next check in ${Math.round(backoff / 1000)}s`);
|
|
107
|
+
} else {
|
|
108
|
+
console.log(`[guard] ✓ Restarted "${proc.name}" (#${count})`);
|
|
109
|
+
}
|
|
110
|
+
restarted++;
|
|
111
|
+
}
|
|
112
|
+
} else if (alive) {
|
|
113
|
+
// Track stability — if alive for STABILITY_WINDOW, reset counters
|
|
114
|
+
const count = state.restartCounts.get(proc.name) || 0;
|
|
115
|
+
if (count > 0) {
|
|
116
|
+
const lastSeen = state.lastSeenAlive.get(proc.name);
|
|
117
|
+
if (!lastSeen) {
|
|
118
|
+
state.lastSeenAlive.set(proc.name, now);
|
|
119
|
+
} else if (now - lastSeen > STABILITY_WINDOW_MS) {
|
|
120
|
+
state.restartCounts.delete(proc.name);
|
|
121
|
+
state.nextRestartTime.delete(proc.name);
|
|
122
|
+
state.lastSeenAlive.delete(proc.name);
|
|
123
|
+
console.log(`[guard] ✓ "${proc.name}" stable for ${Math.round(STABILITY_WINDOW_MS / 1000)}s — reset counters`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
} catch (err: any) {
|
|
128
|
+
console.error(`[guard] Error checking "${proc.name}": ${err.message}`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (restarted > 0) {
|
|
133
|
+
console.log(`[guard] Cycle: ${checked} checked, ${restarted} restarted, ${skipped} in backoff`);
|
|
134
|
+
}
|
|
135
|
+
} catch (err: any) {
|
|
136
|
+
console.error(`[guard] Error in guard cycle: ${err.message}`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export async function startGuardLoop(intervalMs: number = DEFAULT_INTERVAL_MS) {
|
|
141
|
+
const interval = intervalMs || DEFAULT_INTERVAL_MS;
|
|
142
|
+
|
|
143
|
+
console.log(`[guard] ═══════════════════════════════════════════`);
|
|
144
|
+
console.log(`[guard] 🛡️ BGR Standalone Guard started`);
|
|
145
|
+
console.log(`[guard] Check interval: ${interval / 1000}s`);
|
|
146
|
+
console.log(`[guard] Crash backoff threshold: ${CRASH_THRESHOLD} restarts`);
|
|
147
|
+
console.log(`[guard] Stability window: ${STABILITY_WINDOW_MS / 1000}s`);
|
|
148
|
+
console.log(`[guard] Monitoring: BGR_KEEP_ALIVE=true + bgr-dashboard`);
|
|
149
|
+
console.log(`[guard] Started: ${new Date().toLocaleString()}`);
|
|
150
|
+
console.log(`[guard] ═══════════════════════════════════════════`);
|
|
151
|
+
|
|
152
|
+
// Run initial check immediately
|
|
153
|
+
await guardCycle();
|
|
154
|
+
|
|
155
|
+
// Then run on interval
|
|
156
|
+
setInterval(guardCycle, interval);
|
|
157
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -12,7 +12,7 @@ import type { CommandOptions } from "./types";
|
|
|
12
12
|
import { error, announce } from "./logger";
|
|
13
13
|
// startServer is dynamically imported only when --_serve is used
|
|
14
14
|
// to avoid loading melina (which has side-effects) on every bgrun command
|
|
15
|
-
import { getHomeDir, getShellCommand, findChildPid, isProcessRunning, terminateProcess, getProcessPorts, killProcessOnPort, waitForPortFree } from "./platform";
|
|
15
|
+
import { getHomeDir, getShellCommand, findChildPid, isProcessRunning, terminateProcess, getProcessPorts, killProcessOnPort, waitForPortFree, isPortFree } from "./platform";
|
|
16
16
|
import { insertProcess, removeProcessByName, getProcess, retryDatabaseOperation, getDbInfo } from "./db";
|
|
17
17
|
import dedent from "dedent";
|
|
18
18
|
import chalk from "chalk";
|
|
@@ -38,6 +38,7 @@ async function showHelp() {
|
|
|
38
38
|
bgrun List all processes
|
|
39
39
|
bgrun [name] Show details for a process
|
|
40
40
|
bgrun --dashboard Launch web dashboard (managed by bgrun)
|
|
41
|
+
bgrun --guard Launch standalone process guard
|
|
41
42
|
bgrun --restart [name] Restart a process
|
|
42
43
|
bgrun --restart-all Restart ALL registered processes
|
|
43
44
|
bgrun --stop [name] Stop a process (keep in registry)
|
|
@@ -105,8 +106,10 @@ async function run() {
|
|
|
105
106
|
stdout: { type: 'string' },
|
|
106
107
|
stderr: { type: 'string' },
|
|
107
108
|
dashboard: { type: 'boolean' },
|
|
109
|
+
guard: { type: 'boolean' },
|
|
108
110
|
debug: { type: 'boolean' },
|
|
109
111
|
"_serve": { type: 'boolean' },
|
|
112
|
+
"_guard-loop": { type: 'boolean' },
|
|
110
113
|
port: { type: 'string' },
|
|
111
114
|
},
|
|
112
115
|
strict: false,
|
|
@@ -122,6 +125,15 @@ async function run() {
|
|
|
122
125
|
return;
|
|
123
126
|
}
|
|
124
127
|
|
|
128
|
+
// Internal: actually run the guard loop (spawned by --guard)
|
|
129
|
+
if (values['_guard-loop']) {
|
|
130
|
+
const { startGuardLoop } = await import("./guard");
|
|
131
|
+
const intervalStr = positionals[0];
|
|
132
|
+
const intervalMs = intervalStr ? parseInt(intervalStr) * 1000 : undefined;
|
|
133
|
+
await startGuardLoop(intervalMs);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
|
|
125
137
|
// Dashboard: spawn the dashboard server as a bgr-managed process
|
|
126
138
|
if (values.dashboard) {
|
|
127
139
|
const dashboardName = 'bgr-dashboard';
|
|
@@ -186,6 +198,21 @@ async function run() {
|
|
|
186
198
|
spawnEnv.BUN_PORT = requestedPort;
|
|
187
199
|
}
|
|
188
200
|
|
|
201
|
+
// Resolve the target port: --port flag > BUN_PORT env > default 3000
|
|
202
|
+
const targetPort = parseInt(requestedPort || Bun.env.BUN_PORT || '3000');
|
|
203
|
+
if (!isNaN(targetPort) && targetPort > 0) {
|
|
204
|
+
// Auto-kill whatever occupies the target port so dashboard always reclaims it
|
|
205
|
+
const portFree = await isPortFree(targetPort);
|
|
206
|
+
if (!portFree) {
|
|
207
|
+
console.log(chalk.yellow(` ⚡ Port ${targetPort} is occupied — reclaiming...`));
|
|
208
|
+
await killProcessOnPort(targetPort);
|
|
209
|
+
const freed = await waitForPortFree(targetPort, 5000);
|
|
210
|
+
if (!freed) {
|
|
211
|
+
console.log(chalk.red(` ⚠ Could not free port ${targetPort} — dashboard may pick a fallback port`));
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
189
216
|
const newProcess = Bun.spawn(getShellCommand(spawnCommand), {
|
|
190
217
|
env: spawnEnv,
|
|
191
218
|
cwd: bgrDir,
|
|
@@ -246,6 +273,85 @@ async function run() {
|
|
|
246
273
|
return;
|
|
247
274
|
}
|
|
248
275
|
|
|
276
|
+
// Guard: spawn the standalone guard as a bgr-managed process
|
|
277
|
+
if (values.guard) {
|
|
278
|
+
const guardName = 'bgr-guard';
|
|
279
|
+
const homePath = getHomeDir();
|
|
280
|
+
const bgrDir = join(homePath, '.bgr');
|
|
281
|
+
|
|
282
|
+
// Check if guard is already running
|
|
283
|
+
const existing = getProcess(guardName);
|
|
284
|
+
if (existing && await isProcessRunning(existing.pid)) {
|
|
285
|
+
announce(
|
|
286
|
+
`Guard is already running (PID ${existing.pid})\n\n` +
|
|
287
|
+
` Use ${chalk.yellow(`bgrun --stop ${guardName}`)} to stop it\n` +
|
|
288
|
+
` Use ${chalk.yellow(`bgrun --guard --force`)} to restart`,
|
|
289
|
+
'BGR Guard'
|
|
290
|
+
);
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Kill existing if force
|
|
295
|
+
if (existing) {
|
|
296
|
+
if (await isProcessRunning(existing.pid)) {
|
|
297
|
+
await terminateProcess(existing.pid);
|
|
298
|
+
}
|
|
299
|
+
await retryDatabaseOperation(() => removeProcessByName(guardName));
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const { resolve } = require('path');
|
|
303
|
+
const scriptPath = resolve(process.argv[1]);
|
|
304
|
+
const spawnCommand = `bun run ${scriptPath} --_guard-loop`;
|
|
305
|
+
const command = `bgrun --_guard-loop`;
|
|
306
|
+
const stdoutPath = join(bgrDir, `${guardName}-out.txt`);
|
|
307
|
+
const stderrPath = join(bgrDir, `${guardName}-err.txt`);
|
|
308
|
+
|
|
309
|
+
await Bun.write(stdoutPath, '');
|
|
310
|
+
await Bun.write(stderrPath, '');
|
|
311
|
+
|
|
312
|
+
const newProcess = Bun.spawn(getShellCommand(spawnCommand), {
|
|
313
|
+
env: { ...Bun.env },
|
|
314
|
+
cwd: bgrDir,
|
|
315
|
+
stdout: Bun.file(stdoutPath),
|
|
316
|
+
stderr: Bun.file(stderrPath),
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
newProcess.unref();
|
|
320
|
+
await sleep(1000);
|
|
321
|
+
const actualPid = await findChildPid(newProcess.pid);
|
|
322
|
+
|
|
323
|
+
await retryDatabaseOperation(() =>
|
|
324
|
+
insertProcess({
|
|
325
|
+
pid: actualPid,
|
|
326
|
+
workdir: bgrDir,
|
|
327
|
+
command,
|
|
328
|
+
name: guardName,
|
|
329
|
+
env: 'BGR_KEEP_ALIVE=false', // Guard doesn't guard itself
|
|
330
|
+
configPath: '',
|
|
331
|
+
stdout_path: stdoutPath,
|
|
332
|
+
stderr_path: stderrPath,
|
|
333
|
+
})
|
|
334
|
+
);
|
|
335
|
+
|
|
336
|
+
const msg = dedent`
|
|
337
|
+
${chalk.bold('🛡️ BGR Standalone Guard launched')}
|
|
338
|
+
${chalk.gray('─'.repeat(40))}
|
|
339
|
+
|
|
340
|
+
Monitors: All processes with BGR_KEEP_ALIVE=true
|
|
341
|
+
Also watches: bgr-dashboard (auto-restart if it dies)
|
|
342
|
+
Check interval: 30 seconds
|
|
343
|
+
Backoff: Exponential after 5 rapid crashes
|
|
344
|
+
|
|
345
|
+
${chalk.gray('─'.repeat(40))}
|
|
346
|
+
Process: ${chalk.white(guardName)} | PID: ${chalk.white(String(actualPid))}
|
|
347
|
+
|
|
348
|
+
${chalk.yellow(`bgrun ${guardName} --logs`)} View guard logs
|
|
349
|
+
${chalk.yellow(`bgrun --stop ${guardName}`)} Stop the guard
|
|
350
|
+
`;
|
|
351
|
+
announce(msg, 'BGR Guard');
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
|
|
249
355
|
if (values.version) {
|
|
250
356
|
console.log(`bgrun version: ${await getVersion()}`);
|
|
251
357
|
return;
|
|
@@ -435,6 +541,5 @@ async function run() {
|
|
|
435
541
|
}
|
|
436
542
|
|
|
437
543
|
run().catch(err => {
|
|
438
|
-
|
|
439
|
-
process.exit(1);
|
|
544
|
+
error(err);
|
|
440
545
|
});
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Log rotation for bgrun process output files.
|
|
3
|
+
*
|
|
4
|
+
* Ensures log files don't grow unbounded by:
|
|
5
|
+
* 1. Truncating on rotation (restart) — keeping last N lines
|
|
6
|
+
* 2. Size-based rotation — when file exceeds maxBytes, trim to last N lines
|
|
7
|
+
* 3. Periodic rotation check — runs on an interval in the dashboard
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { existsSync, statSync, readFileSync, writeFileSync } from 'fs'
|
|
11
|
+
|
|
12
|
+
const DEFAULT_MAX_BYTES = 10 * 1024 * 1024 // 10 MB
|
|
13
|
+
const DEFAULT_KEEP_LINES = 5000 // Keep last 5000 lines on rotation
|
|
14
|
+
const DEFAULT_CHECK_INTERVAL_MS = 60_000 // Check every 60s
|
|
15
|
+
|
|
16
|
+
/** Rotate a single log file if it exceeds maxBytes */
|
|
17
|
+
export function rotateLogFile(
|
|
18
|
+
filePath: string,
|
|
19
|
+
maxBytes: number = DEFAULT_MAX_BYTES,
|
|
20
|
+
keepLines: number = DEFAULT_KEEP_LINES,
|
|
21
|
+
): boolean {
|
|
22
|
+
try {
|
|
23
|
+
if (!existsSync(filePath)) return false
|
|
24
|
+
|
|
25
|
+
const stat = statSync(filePath)
|
|
26
|
+
if (stat.size <= maxBytes) return false
|
|
27
|
+
|
|
28
|
+
// Read file, keep last N lines
|
|
29
|
+
const content = readFileSync(filePath, 'utf-8')
|
|
30
|
+
const lines = content.split('\n')
|
|
31
|
+
|
|
32
|
+
if (lines.length <= keepLines) return false
|
|
33
|
+
|
|
34
|
+
const truncated = lines.slice(-keepLines)
|
|
35
|
+
const header = `--- [bgrun] Log rotated at ${new Date().toISOString()} (was ${lines.length} lines, ${formatBytes(stat.size)}) ---\n`
|
|
36
|
+
writeFileSync(filePath, header + truncated.join('\n'))
|
|
37
|
+
|
|
38
|
+
return true
|
|
39
|
+
} catch {
|
|
40
|
+
return false
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Rotate all log files for all processes */
|
|
45
|
+
export function rotateAllLogs(
|
|
46
|
+
getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
|
|
47
|
+
maxBytes: number = DEFAULT_MAX_BYTES,
|
|
48
|
+
keepLines: number = DEFAULT_KEEP_LINES,
|
|
49
|
+
): { rotated: string[]; checked: number } {
|
|
50
|
+
const processes = getProcesses()
|
|
51
|
+
const rotated: string[] = []
|
|
52
|
+
let checked = 0
|
|
53
|
+
|
|
54
|
+
for (const proc of processes) {
|
|
55
|
+
if (proc.stdout_path) {
|
|
56
|
+
checked++
|
|
57
|
+
if (rotateLogFile(proc.stdout_path, maxBytes, keepLines)) {
|
|
58
|
+
rotated.push(`${proc.name}/stdout`)
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (proc.stderr_path) {
|
|
62
|
+
checked++
|
|
63
|
+
if (rotateLogFile(proc.stderr_path, maxBytes, keepLines)) {
|
|
64
|
+
rotated.push(`${proc.name}/stderr`)
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return { rotated, checked }
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Start periodic log rotation */
|
|
73
|
+
export function startLogRotation(
|
|
74
|
+
getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
|
|
75
|
+
intervalMs: number = DEFAULT_CHECK_INTERVAL_MS,
|
|
76
|
+
maxBytes: number = DEFAULT_MAX_BYTES,
|
|
77
|
+
keepLines: number = DEFAULT_KEEP_LINES,
|
|
78
|
+
): ReturnType<typeof setInterval> {
|
|
79
|
+
console.log(`[logs] Log rotation active: max ${formatBytes(maxBytes)}/file, keep ${keepLines} lines, check every ${intervalMs / 1000}s`)
|
|
80
|
+
|
|
81
|
+
return setInterval(() => {
|
|
82
|
+
const { rotated } = rotateAllLogs(getProcesses, maxBytes, keepLines)
|
|
83
|
+
if (rotated.length > 0) {
|
|
84
|
+
console.log(`[logs] Rotated ${rotated.length} log(s): ${rotated.join(', ')}`)
|
|
85
|
+
}
|
|
86
|
+
}, intervalMs)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function formatBytes(bytes: number): string {
|
|
90
|
+
if (bytes >= 1_000_000) return `${(bytes / 1_000_000).toFixed(1)}MB`
|
|
91
|
+
if (bytes >= 1_000) return `${(bytes / 1_000).toFixed(0)}KB`
|
|
92
|
+
return `${bytes}B`
|
|
93
|
+
}
|
package/src/logger.ts
CHANGED
|
@@ -3,7 +3,7 @@ import chalk from "chalk";
|
|
|
3
3
|
|
|
4
4
|
export function announce(message: string, title?: string) {
|
|
5
5
|
console.log(
|
|
6
|
-
boxen(
|
|
6
|
+
boxen(message, {
|
|
7
7
|
padding: 1,
|
|
8
8
|
margin: 1,
|
|
9
9
|
borderColor: 'green',
|
|
@@ -14,9 +14,10 @@ export function announce(message: string, title?: string) {
|
|
|
14
14
|
);
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
export function error(message: string) {
|
|
17
|
+
export function error(message: string | Error) {
|
|
18
|
+
const text = message instanceof Error ? (message.stack || message.message) : String(message);
|
|
18
19
|
console.error(
|
|
19
|
-
boxen(chalk.red(
|
|
20
|
+
boxen(chalk.red(text), {
|
|
20
21
|
padding: 1,
|
|
21
22
|
margin: 1,
|
|
22
23
|
borderColor: 'red',
|
package/src/platform.ts
CHANGED
|
@@ -31,7 +31,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
|
|
|
31
31
|
// PID 0 means intentionally stopped — never alive
|
|
32
32
|
if (pid <= 0) return false;
|
|
33
33
|
|
|
34
|
-
return plat.measure(`PID ${pid} alive?`, async () => {
|
|
34
|
+
return (await plat.measure(`PID ${pid} alive?`, async () => {
|
|
35
35
|
try {
|
|
36
36
|
// Docker container detection
|
|
37
37
|
if (command && (command.includes('docker run') || command.includes('docker-compose up') || command.includes('docker compose up'))) {
|
|
@@ -48,7 +48,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
|
|
|
48
48
|
} catch {
|
|
49
49
|
return false;
|
|
50
50
|
}
|
|
51
|
-
});
|
|
51
|
+
})) ?? false;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
/**
|
|
@@ -224,8 +224,10 @@ export async function killProcessOnPort(port: number): Promise<void> {
|
|
|
224
224
|
if (alive) {
|
|
225
225
|
await $`taskkill /F /T /PID ${pid}`.nothrow().quiet();
|
|
226
226
|
console.log(`Killed process ${pid} using port ${port}`);
|
|
227
|
+
} else {
|
|
228
|
+
// Zombie socket — PID no longer exists but socket lingers in kernel
|
|
229
|
+
console.warn(`⚠ Port ${port} held by zombie PID ${pid} (process dead, socket stuck in kernel). Will clear on reboot or TCP timeout.`);
|
|
227
230
|
}
|
|
228
|
-
// else: zombie socket — PID no longer exists but socket lingers in kernel
|
|
229
231
|
}
|
|
230
232
|
} else {
|
|
231
233
|
// On Unix, use lsof
|
|
@@ -456,7 +458,7 @@ export async function findPidByPort(port: number, maxWaitMs = 8000): Promise<num
|
|
|
456
458
|
}
|
|
457
459
|
|
|
458
460
|
export async function readFileTail(filePath: string, lines?: number): Promise<string> {
|
|
459
|
-
return plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
|
|
461
|
+
return (await plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
|
|
460
462
|
try {
|
|
461
463
|
const content = await Bun.file(filePath).text();
|
|
462
464
|
|
|
@@ -470,7 +472,7 @@ export async function readFileTail(filePath: string, lines?: number): Promise<st
|
|
|
470
472
|
} catch (error) {
|
|
471
473
|
throw new Error(`Error reading file: ${error}`);
|
|
472
474
|
}
|
|
473
|
-
});
|
|
475
|
+
})) ?? '';
|
|
474
476
|
}
|
|
475
477
|
|
|
476
478
|
/**
|
|
@@ -484,56 +486,70 @@ export function copyFile(src: string, dest: string): void {
|
|
|
484
486
|
* Get memory usage of a process in bytes
|
|
485
487
|
*/
|
|
486
488
|
export async function getProcessMemory(pid: number): Promise<number> {
|
|
487
|
-
const map = await
|
|
488
|
-
return map.get(pid) || 0;
|
|
489
|
+
const map = await getProcessBatchResources([pid]);
|
|
490
|
+
return map.get(pid)?.memory || 0;
|
|
489
491
|
}
|
|
490
492
|
|
|
491
493
|
/**
|
|
492
|
-
* Get memory usage for a batch of PIDs
|
|
493
|
-
* Returns a Map of PID ->
|
|
494
|
+
* Get memory and CPU usage for a batch of PIDs.
|
|
495
|
+
* Returns a Map of PID -> { memory: bytes, cpu: number }.
|
|
496
|
+
* On Windows, CPU is cumulative time in seconds.
|
|
497
|
+
* On Unix, CPU is instantaneous percentage.
|
|
494
498
|
*
|
|
495
499
|
* Optimization: Fetches ALL processes in one go and filters in-memory
|
|
496
500
|
* to avoid spawning N subprocesses.
|
|
497
501
|
*/
|
|
498
|
-
export async function
|
|
502
|
+
export async function getProcessBatchResources(pids: number[]): Promise<Map<number, { memory: number, cpu: number }>> {
|
|
499
503
|
if (pids.length === 0) return new Map();
|
|
500
504
|
|
|
501
|
-
return await plat.measure(`Batch
|
|
502
|
-
const
|
|
505
|
+
return await plat.measure(`Batch resources (${pids.length} PIDs)`, async () => {
|
|
506
|
+
const resourceMap = new Map<number, { memory: number, cpu: number }>();
|
|
503
507
|
const pidSet = new Set(pids);
|
|
504
508
|
|
|
505
509
|
try {
|
|
506
510
|
if (isWindows()) {
|
|
507
|
-
const result = await $`powershell -Command "Get-Process | Select-Object Id, WorkingSet"`.nothrow().quiet().text();
|
|
511
|
+
const result = await $`powershell -Command "Get-Process | Select-Object Id, CPU, WorkingSet"`.nothrow().quiet().text();
|
|
508
512
|
const lines = result.trim().split('\n');
|
|
509
513
|
|
|
510
514
|
for (const line of lines) {
|
|
511
515
|
const trimmed = line.trim();
|
|
512
516
|
if (!trimmed || trimmed.startsWith('Id') || trimmed.startsWith('--')) continue;
|
|
513
517
|
|
|
518
|
+
// Replace multiple spaces with a single space to parse correctly
|
|
514
519
|
const parts = trimmed.split(/\s+/);
|
|
515
|
-
if (parts.length >=
|
|
516
|
-
const
|
|
517
|
-
|
|
520
|
+
if (parts.length >= 3) {
|
|
521
|
+
const pid = parseInt(parts[0]);
|
|
522
|
+
// CPU can sometimes be blank if process is just starting, handle that
|
|
523
|
+
let cpuStr = parts[1];
|
|
524
|
+
let memStr = parts[2];
|
|
525
|
+
if (parts.length === 2) {
|
|
526
|
+
// If CPU is missing, powershell might omit it and give just ID and WorkingSet
|
|
527
|
+
cpuStr = "0";
|
|
528
|
+
memStr = parts[1];
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const cpu = parseFloat(cpuStr) || 0;
|
|
532
|
+
const memory = parseInt(memStr) || 0;
|
|
518
533
|
|
|
519
|
-
if (!isNaN(
|
|
520
|
-
if (pidSet.has(
|
|
534
|
+
if (!isNaN(pid) && !isNaN(memory)) {
|
|
535
|
+
if (pidSet.has(pid)) resourceMap.set(pid, { memory, cpu });
|
|
521
536
|
}
|
|
522
537
|
}
|
|
523
538
|
}
|
|
524
539
|
} else {
|
|
525
|
-
const result = await $`ps -eo pid,rss`.nothrow().quiet().text();
|
|
540
|
+
const result = await $`ps -eo pid,pcpu,rss`.nothrow().quiet().text();
|
|
526
541
|
const lines = result.trim().split('\n');
|
|
527
542
|
|
|
528
543
|
for (let i = 1; i < lines.length; i++) {
|
|
529
544
|
const line = lines[i].trim();
|
|
530
545
|
if (!line) continue;
|
|
531
|
-
const [pidStr, rssStr] = line.split(/\s+/);
|
|
546
|
+
const [pidStr, cpuStr, rssStr] = line.split(/\s+/);
|
|
532
547
|
const pid = parseInt(pidStr);
|
|
533
|
-
const
|
|
548
|
+
const cpu = parseFloat(cpuStr) || 0;
|
|
549
|
+
const rss = parseInt(rssStr) || 0;
|
|
534
550
|
|
|
535
551
|
if (pidSet.has(pid)) {
|
|
536
|
-
|
|
552
|
+
resourceMap.set(pid, { memory: rss * 1024, cpu });
|
|
537
553
|
}
|
|
538
554
|
}
|
|
539
555
|
}
|
|
@@ -541,7 +557,7 @@ export async function getProcessBatchMemory(pids: number[]): Promise<Map<number,
|
|
|
541
557
|
// silently fail
|
|
542
558
|
}
|
|
543
559
|
|
|
544
|
-
return
|
|
560
|
+
return resourceMap;
|
|
545
561
|
}) ?? new Map();
|
|
546
562
|
}
|
|
547
563
|
|
package/src/server.ts
CHANGED
|
@@ -17,9 +17,17 @@ import path from 'path';
|
|
|
17
17
|
import { getAllProcesses, getProcess } from './db';
|
|
18
18
|
import { isProcessRunning } from './platform';
|
|
19
19
|
import { handleRun } from './commands/run';
|
|
20
|
+
import { parseEnvString } from './utils';
|
|
20
21
|
|
|
21
22
|
const GUARD_INTERVAL_MS = 30_000; // Check every 30 seconds
|
|
22
|
-
const GUARD_SKIP_NAMES = new Set(['bgr-dashboard']); // Don't try to restart ourselves
|
|
23
|
+
const GUARD_SKIP_NAMES = new Set(['bgr-dashboard', 'bgr-guard']); // Don't try to restart ourselves or external guard
|
|
24
|
+
|
|
25
|
+
// In-memory guard restart counter and timestamps (persists across module re-evaluations)
|
|
26
|
+
const _g = globalThis as any;
|
|
27
|
+
if (!_g.__bgrGuardRestartCounts) _g.__bgrGuardRestartCounts = new Map<string, number>();
|
|
28
|
+
if (!_g.__bgrGuardNextRestartTime) _g.__bgrGuardNextRestartTime = new Map<string, number>();
|
|
29
|
+
export const guardRestartCounts: Map<string, number> = _g.__bgrGuardRestartCounts;
|
|
30
|
+
const guardNextRestartTime: Map<string, number> = _g.__bgrGuardNextRestartTime;
|
|
23
31
|
|
|
24
32
|
export async function startServer() {
|
|
25
33
|
// Dynamic import to avoid melina's side-effect console.log at bundle load time
|
|
@@ -38,6 +46,10 @@ export async function startServer() {
|
|
|
38
46
|
|
|
39
47
|
// Start the built-in process guard
|
|
40
48
|
startGuard();
|
|
49
|
+
|
|
50
|
+
// Start log rotation (prevents unbounded log file growth)
|
|
51
|
+
const { startLogRotation } = await import('./log-rotation');
|
|
52
|
+
startLogRotation(() => getAllProcesses());
|
|
41
53
|
}
|
|
42
54
|
|
|
43
55
|
/**
|
|
@@ -67,11 +79,15 @@ function startGuard() {
|
|
|
67
79
|
if (GUARD_SKIP_NAMES.has(proc.name)) continue;
|
|
68
80
|
|
|
69
81
|
// Only guard processes with BGR_KEEP_ALIVE=true
|
|
70
|
-
const env = proc.env ? (
|
|
82
|
+
const env = proc.env ? parseEnvString(proc.env) : {};
|
|
71
83
|
if (env.BGR_KEEP_ALIVE !== 'true') continue;
|
|
72
84
|
|
|
73
85
|
const alive = await isProcessRunning(proc.pid, proc.command);
|
|
74
86
|
if (!alive) {
|
|
87
|
+
const now = Date.now();
|
|
88
|
+
const nextRestart = guardNextRestartTime.get(proc.name) || 0;
|
|
89
|
+
if (now < nextRestart) continue; // Still in backoff period
|
|
90
|
+
|
|
75
91
|
console.log(`[guard] ⚠ Guarded process "${proc.name}" (PID ${proc.pid}) is dead, restarting...`);
|
|
76
92
|
try {
|
|
77
93
|
await handleRun({
|
|
@@ -80,10 +96,34 @@ function startGuard() {
|
|
|
80
96
|
force: true,
|
|
81
97
|
remoteName: '',
|
|
82
98
|
});
|
|
83
|
-
|
|
99
|
+
|
|
100
|
+
// Track restart count
|
|
101
|
+
const prevCount = guardRestartCounts.get(proc.name) || 0;
|
|
102
|
+
const newCount = prevCount + 1;
|
|
103
|
+
guardRestartCounts.set(proc.name, newCount);
|
|
104
|
+
|
|
105
|
+
// Exponential backoff if it crashes repeatedly (more than 5 times)
|
|
106
|
+
if (newCount > 5) {
|
|
107
|
+
const backoffSeconds = Math.min(30 * Math.pow(2, newCount - 6), 300); // 30s, 60s, 120s, up to 5 mins
|
|
108
|
+
guardNextRestartTime.set(proc.name, Date.now() + (backoffSeconds * 1000));
|
|
109
|
+
console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount}). Crash loop detected: next check delayed by ${backoffSeconds}s.`);
|
|
110
|
+
} else {
|
|
111
|
+
console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount})`);
|
|
112
|
+
}
|
|
84
113
|
} catch (err: any) {
|
|
85
114
|
console.error(`[guard] ✗ Failed to restart "${proc.name}": ${err.message}`);
|
|
86
115
|
}
|
|
116
|
+
} else {
|
|
117
|
+
// Reset counter if process has been stable (alive at least once during check)
|
|
118
|
+
const prevCount = guardRestartCounts.get(proc.name) || 0;
|
|
119
|
+
if (prevCount > 0) {
|
|
120
|
+
const nextRestart = guardNextRestartTime.get(proc.name) || 0;
|
|
121
|
+
if (Date.now() > nextRestart + 60_000) {
|
|
122
|
+
// If it lived over 60s past its backoff threshold, consider it stable
|
|
123
|
+
guardRestartCounts.delete(proc.name);
|
|
124
|
+
guardNextRestartTime.delete(proc.name);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
87
127
|
}
|
|
88
128
|
}
|
|
89
129
|
} catch (err: any) {
|
package/src/table.ts
CHANGED
|
@@ -34,12 +34,12 @@ export function getTerminalWidth(): number {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
// Strip ANSI color codes for accurate length calculation
|
|
37
|
-
function stripAnsi(str: string): string {
|
|
37
|
+
export function stripAnsi(str: string): string {
|
|
38
38
|
return str.replace(/\u001b\[[0-9;]*m/g, "");
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
// Default truncator: trims the end of a string
|
|
42
|
-
function truncateString(str: string, maxLength: number): string {
|
|
42
|
+
export function truncateString(str: string, maxLength: number): string {
|
|
43
43
|
const stripped = stripAnsi(str);
|
|
44
44
|
if (stripped.length <= maxLength) return str;
|
|
45
45
|
const ellipsis = "…";
|
|
@@ -52,7 +52,7 @@ function truncateString(str: string, maxLength: number): string {
|
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
// Path truncator: trims the middle of a string
|
|
55
|
-
function truncatePath(str: string, maxLength: number): string {
|
|
55
|
+
export function truncatePath(str: string, maxLength: number): string {
|
|
56
56
|
const stripped = stripAnsi(str);
|
|
57
57
|
if (stripped.length <= maxLength) return str;
|
|
58
58
|
const ellipsis = "…";
|