bgrun 3.10.2 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/guard.ts ADDED
@@ -0,0 +1,157 @@
1
+ /**
2
+ * BGR Standalone Process Guard
3
+ *
4
+ * Runs as an independent process that monitors ALL guarded processes
5
+ * (BGR_KEEP_ALIVE=true) and the dashboard itself. If the dashboard
6
+ * crashes, the guard restarts it. If any guarded process dies, the
7
+ * guard restarts it.
8
+ *
9
+ * This is the "outer shell" — it cannot be killed by a dashboard crash.
10
+ *
11
+ * Usage:
12
+ * bgrun --guard # Start guard as a managed bgrun process
13
+ * bgrun --_guard-loop # (Internal) Actually run the guard loop
14
+ * bgrun --_guard-loop 30 # Check every 30 seconds
15
+ */
16
+
17
+ import { getAllProcesses, getProcess } from './db';
18
+ import { isProcessRunning, getProcessPorts, findChildPid } from './platform';
19
+ import { handleRun } from './commands/run';
20
+ import { parseEnvString } from './utils';
21
+
22
+ const DEFAULT_INTERVAL_MS = 30_000;
23
+ const MAX_BACKOFF_MS = 5 * 60_000; // 5 minutes max
24
+ const CRASH_THRESHOLD = 5; // Start backoff after this many restarts
25
+ const STABILITY_WINDOW_MS = 120_000; // 2 minutes stable = reset counter
26
+
27
+ interface GuardState {
28
+ restartCounts: Map<string, number>;
29
+ nextRestartTime: Map<string, number>;
30
+ lastSeenAlive: Map<string, number>;
31
+ }
32
+
33
+ const state: GuardState = {
34
+ restartCounts: new Map(),
35
+ nextRestartTime: new Map(),
36
+ lastSeenAlive: new Map(),
37
+ };
38
+
39
+ async function restartProcess(name: string): Promise<boolean> {
40
+ try {
41
+ await handleRun({
42
+ action: 'run',
43
+ name,
44
+ force: true,
45
+ remoteName: '',
46
+ });
47
+ return true;
48
+ } catch (err: any) {
49
+ console.error(`[guard] ✗ Failed to restart "${name}": ${err.message}`);
50
+ return false;
51
+ }
52
+ }
53
+
54
+ function getBackoffMs(restartCount: number): number {
55
+ if (restartCount <= CRASH_THRESHOLD) return 0;
56
+ const exponent = restartCount - CRASH_THRESHOLD;
57
+ return Math.min(30_000 * Math.pow(2, exponent - 1), MAX_BACKOFF_MS);
58
+ }
59
+
60
+ async function guardCycle(): Promise<void> {
61
+ try {
62
+ const processes = getAllProcesses();
63
+ if (processes.length === 0) return;
64
+
65
+ const now = Date.now();
66
+ let checked = 0;
67
+ let restarted = 0;
68
+ let skipped = 0;
69
+
70
+ for (const proc of processes) {
71
+ // Skip the guard process itself
72
+ if (proc.name === 'bgr-guard') continue;
73
+
74
+ const env = proc.env ? parseEnvString(proc.env) : {};
75
+ const isGuarded = env.BGR_KEEP_ALIVE === 'true';
76
+ const isDashboard = proc.name === 'bgr-dashboard';
77
+
78
+ // Guard both: explicitly guarded processes AND the dashboard
79
+ if (!isGuarded && !isDashboard) continue;
80
+
81
+ checked++;
82
+
83
+ try {
84
+ const alive = await isProcessRunning(proc.pid, proc.command);
85
+
86
+ if (!alive && proc.pid > 0) {
87
+ // Check backoff
88
+ const nextRestart = state.nextRestartTime.get(proc.name) || 0;
89
+ if (now < nextRestart) {
90
+ const waitSecs = Math.round((nextRestart - now) / 1000);
91
+ skipped++;
92
+ continue;
93
+ }
94
+
95
+ console.log(`[guard] ⚠ "${proc.name}" (PID ${proc.pid}) is dead — restarting...`);
96
+
97
+ const success = await restartProcess(proc.name);
98
+ if (success) {
99
+ const count = (state.restartCounts.get(proc.name) || 0) + 1;
100
+ state.restartCounts.set(proc.name, count);
101
+ state.lastSeenAlive.delete(proc.name);
102
+
103
+ const backoff = getBackoffMs(count);
104
+ if (backoff > 0) {
105
+ state.nextRestartTime.set(proc.name, now + backoff);
106
+ console.log(`[guard] ✓ Restarted "${proc.name}" (#${count}). Crash loop: next check in ${Math.round(backoff / 1000)}s`);
107
+ } else {
108
+ console.log(`[guard] ✓ Restarted "${proc.name}" (#${count})`);
109
+ }
110
+ restarted++;
111
+ }
112
+ } else if (alive) {
113
+ // Track stability — if alive for STABILITY_WINDOW, reset counters
114
+ const count = state.restartCounts.get(proc.name) || 0;
115
+ if (count > 0) {
116
+ const lastSeen = state.lastSeenAlive.get(proc.name);
117
+ if (!lastSeen) {
118
+ state.lastSeenAlive.set(proc.name, now);
119
+ } else if (now - lastSeen > STABILITY_WINDOW_MS) {
120
+ state.restartCounts.delete(proc.name);
121
+ state.nextRestartTime.delete(proc.name);
122
+ state.lastSeenAlive.delete(proc.name);
123
+ console.log(`[guard] ✓ "${proc.name}" stable for ${Math.round(STABILITY_WINDOW_MS / 1000)}s — reset counters`);
124
+ }
125
+ }
126
+ }
127
+ } catch (err: any) {
128
+ console.error(`[guard] Error checking "${proc.name}": ${err.message}`);
129
+ }
130
+ }
131
+
132
+ if (restarted > 0) {
133
+ console.log(`[guard] Cycle: ${checked} checked, ${restarted} restarted, ${skipped} in backoff`);
134
+ }
135
+ } catch (err: any) {
136
+ console.error(`[guard] Error in guard cycle: ${err.message}`);
137
+ }
138
+ }
139
+
140
+ export async function startGuardLoop(intervalMs: number = DEFAULT_INTERVAL_MS) {
141
+ const interval = intervalMs || DEFAULT_INTERVAL_MS;
142
+
143
+ console.log(`[guard] ═══════════════════════════════════════════`);
144
+ console.log(`[guard] 🛡️ BGR Standalone Guard started`);
145
+ console.log(`[guard] Check interval: ${interval / 1000}s`);
146
+ console.log(`[guard] Crash backoff threshold: ${CRASH_THRESHOLD} restarts`);
147
+ console.log(`[guard] Stability window: ${STABILITY_WINDOW_MS / 1000}s`);
148
+ console.log(`[guard] Monitoring: BGR_KEEP_ALIVE=true + bgr-dashboard`);
149
+ console.log(`[guard] Started: ${new Date().toLocaleString()}`);
150
+ console.log(`[guard] ═══════════════════════════════════════════`);
151
+
152
+ // Run initial check immediately
153
+ await guardCycle();
154
+
155
+ // Then run on interval
156
+ setInterval(guardCycle, interval);
157
+ }
package/src/index.ts CHANGED
@@ -12,7 +12,7 @@ import type { CommandOptions } from "./types";
12
12
  import { error, announce } from "./logger";
13
13
  // startServer is dynamically imported only when --_serve is used
14
14
  // to avoid loading melina (which has side-effects) on every bgrun command
15
- import { getHomeDir, getShellCommand, findChildPid, isProcessRunning, terminateProcess, getProcessPorts, killProcessOnPort, waitForPortFree } from "./platform";
15
+ import { getHomeDir, getShellCommand, findChildPid, isProcessRunning, terminateProcess, getProcessPorts, killProcessOnPort, waitForPortFree, isPortFree } from "./platform";
16
16
  import { insertProcess, removeProcessByName, getProcess, retryDatabaseOperation, getDbInfo } from "./db";
17
17
  import dedent from "dedent";
18
18
  import chalk from "chalk";
@@ -38,6 +38,7 @@ async function showHelp() {
38
38
  bgrun List all processes
39
39
  bgrun [name] Show details for a process
40
40
  bgrun --dashboard Launch web dashboard (managed by bgrun)
41
+ bgrun --guard Launch standalone process guard
41
42
  bgrun --restart [name] Restart a process
42
43
  bgrun --restart-all Restart ALL registered processes
43
44
  bgrun --stop [name] Stop a process (keep in registry)
@@ -105,8 +106,10 @@ async function run() {
105
106
  stdout: { type: 'string' },
106
107
  stderr: { type: 'string' },
107
108
  dashboard: { type: 'boolean' },
109
+ guard: { type: 'boolean' },
108
110
  debug: { type: 'boolean' },
109
111
  "_serve": { type: 'boolean' },
112
+ "_guard-loop": { type: 'boolean' },
110
113
  port: { type: 'string' },
111
114
  },
112
115
  strict: false,
@@ -122,6 +125,15 @@ async function run() {
122
125
  return;
123
126
  }
124
127
 
128
+ // Internal: actually run the guard loop (spawned by --guard)
129
+ if (values['_guard-loop']) {
130
+ const { startGuardLoop } = await import("./guard");
131
+ const intervalStr = positionals[0];
132
+ const intervalMs = intervalStr ? parseInt(intervalStr) * 1000 : undefined;
133
+ await startGuardLoop(intervalMs);
134
+ return;
135
+ }
136
+
125
137
  // Dashboard: spawn the dashboard server as a bgr-managed process
126
138
  if (values.dashboard) {
127
139
  const dashboardName = 'bgr-dashboard';
@@ -186,6 +198,21 @@ async function run() {
186
198
  spawnEnv.BUN_PORT = requestedPort;
187
199
  }
188
200
 
201
+ // Resolve the target port: --port flag > BUN_PORT env > default 3000
202
+ const targetPort = parseInt(requestedPort || Bun.env.BUN_PORT || '3000');
203
+ if (!isNaN(targetPort) && targetPort > 0) {
204
+ // Auto-kill whatever occupies the target port so dashboard always reclaims it
205
+ const portFree = await isPortFree(targetPort);
206
+ if (!portFree) {
207
+ console.log(chalk.yellow(` ⚡ Port ${targetPort} is occupied — reclaiming...`));
208
+ await killProcessOnPort(targetPort);
209
+ const freed = await waitForPortFree(targetPort, 5000);
210
+ if (!freed) {
211
+ console.log(chalk.red(` ⚠ Could not free port ${targetPort} — dashboard may pick a fallback port`));
212
+ }
213
+ }
214
+ }
215
+
189
216
  const newProcess = Bun.spawn(getShellCommand(spawnCommand), {
190
217
  env: spawnEnv,
191
218
  cwd: bgrDir,
@@ -246,6 +273,85 @@ async function run() {
246
273
  return;
247
274
  }
248
275
 
276
+ // Guard: spawn the standalone guard as a bgr-managed process
277
+ if (values.guard) {
278
+ const guardName = 'bgr-guard';
279
+ const homePath = getHomeDir();
280
+ const bgrDir = join(homePath, '.bgr');
281
+
282
+ // Check if guard is already running
283
+ const existing = getProcess(guardName);
284
+ if (existing && await isProcessRunning(existing.pid)) {
285
+ announce(
286
+ `Guard is already running (PID ${existing.pid})\n\n` +
287
+ ` Use ${chalk.yellow(`bgrun --stop ${guardName}`)} to stop it\n` +
288
+ ` Use ${chalk.yellow(`bgrun --guard --force`)} to restart`,
289
+ 'BGR Guard'
290
+ );
291
+ return;
292
+ }
293
+
294
+ // Kill existing if force
295
+ if (existing) {
296
+ if (await isProcessRunning(existing.pid)) {
297
+ await terminateProcess(existing.pid);
298
+ }
299
+ await retryDatabaseOperation(() => removeProcessByName(guardName));
300
+ }
301
+
302
+ const { resolve } = require('path');
303
+ const scriptPath = resolve(process.argv[1]);
304
+ const spawnCommand = `bun run ${scriptPath} --_guard-loop`;
305
+ const command = `bgrun --_guard-loop`;
306
+ const stdoutPath = join(bgrDir, `${guardName}-out.txt`);
307
+ const stderrPath = join(bgrDir, `${guardName}-err.txt`);
308
+
309
+ await Bun.write(stdoutPath, '');
310
+ await Bun.write(stderrPath, '');
311
+
312
+ const newProcess = Bun.spawn(getShellCommand(spawnCommand), {
313
+ env: { ...Bun.env },
314
+ cwd: bgrDir,
315
+ stdout: Bun.file(stdoutPath),
316
+ stderr: Bun.file(stderrPath),
317
+ });
318
+
319
+ newProcess.unref();
320
+ await sleep(1000);
321
+ const actualPid = await findChildPid(newProcess.pid);
322
+
323
+ await retryDatabaseOperation(() =>
324
+ insertProcess({
325
+ pid: actualPid,
326
+ workdir: bgrDir,
327
+ command,
328
+ name: guardName,
329
+ env: 'BGR_KEEP_ALIVE=false', // Guard doesn't guard itself
330
+ configPath: '',
331
+ stdout_path: stdoutPath,
332
+ stderr_path: stderrPath,
333
+ })
334
+ );
335
+
336
+ const msg = dedent`
337
+ ${chalk.bold('🛡️ BGR Standalone Guard launched')}
338
+ ${chalk.gray('─'.repeat(40))}
339
+
340
+ Monitors: All processes with BGR_KEEP_ALIVE=true
341
+ Also watches: bgr-dashboard (auto-restart if it dies)
342
+ Check interval: 30 seconds
343
+ Backoff: Exponential after 5 rapid crashes
344
+
345
+ ${chalk.gray('─'.repeat(40))}
346
+ Process: ${chalk.white(guardName)} | PID: ${chalk.white(String(actualPid))}
347
+
348
+ ${chalk.yellow(`bgrun ${guardName} --logs`)} View guard logs
349
+ ${chalk.yellow(`bgrun --stop ${guardName}`)} Stop the guard
350
+ `;
351
+ announce(msg, 'BGR Guard');
352
+ return;
353
+ }
354
+
249
355
  if (values.version) {
250
356
  console.log(`bgrun version: ${await getVersion()}`);
251
357
  return;
@@ -435,6 +541,5 @@ async function run() {
435
541
  }
436
542
 
437
543
  run().catch(err => {
438
- console.error(chalk.red(err));
439
- process.exit(1);
544
+ error(err);
440
545
  });
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Log rotation for bgrun process output files.
3
+ *
4
+ * Ensures log files don't grow unbounded by:
5
+ * 1. Truncating on rotation (restart) — keeping last N lines
6
+ * 2. Size-based rotation — when file exceeds maxBytes, trim to last N lines
7
+ * 3. Periodic rotation check — runs on an interval in the dashboard
8
+ */
9
+
10
+ import { existsSync, statSync, readFileSync, writeFileSync } from 'fs'
11
+
12
+ const DEFAULT_MAX_BYTES = 10 * 1024 * 1024 // 10 MB
13
+ const DEFAULT_KEEP_LINES = 5000 // Keep last 5000 lines on rotation
14
+ const DEFAULT_CHECK_INTERVAL_MS = 60_000 // Check every 60s
15
+
16
+ /** Rotate a single log file if it exceeds maxBytes */
17
+ export function rotateLogFile(
18
+ filePath: string,
19
+ maxBytes: number = DEFAULT_MAX_BYTES,
20
+ keepLines: number = DEFAULT_KEEP_LINES,
21
+ ): boolean {
22
+ try {
23
+ if (!existsSync(filePath)) return false
24
+
25
+ const stat = statSync(filePath)
26
+ if (stat.size <= maxBytes) return false
27
+
28
+ // Read file, keep last N lines
29
+ const content = readFileSync(filePath, 'utf-8')
30
+ const lines = content.split('\n')
31
+
32
+ if (lines.length <= keepLines) return false
33
+
34
+ const truncated = lines.slice(-keepLines)
35
+ const header = `--- [bgrun] Log rotated at ${new Date().toISOString()} (was ${lines.length} lines, ${formatBytes(stat.size)}) ---\n`
36
+ writeFileSync(filePath, header + truncated.join('\n'))
37
+
38
+ return true
39
+ } catch {
40
+ return false
41
+ }
42
+ }
43
+
44
+ /** Rotate all log files for all processes */
45
+ export function rotateAllLogs(
46
+ getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
47
+ maxBytes: number = DEFAULT_MAX_BYTES,
48
+ keepLines: number = DEFAULT_KEEP_LINES,
49
+ ): { rotated: string[]; checked: number } {
50
+ const processes = getProcesses()
51
+ const rotated: string[] = []
52
+ let checked = 0
53
+
54
+ for (const proc of processes) {
55
+ if (proc.stdout_path) {
56
+ checked++
57
+ if (rotateLogFile(proc.stdout_path, maxBytes, keepLines)) {
58
+ rotated.push(`${proc.name}/stdout`)
59
+ }
60
+ }
61
+ if (proc.stderr_path) {
62
+ checked++
63
+ if (rotateLogFile(proc.stderr_path, maxBytes, keepLines)) {
64
+ rotated.push(`${proc.name}/stderr`)
65
+ }
66
+ }
67
+ }
68
+
69
+ return { rotated, checked }
70
+ }
71
+
72
+ /** Start periodic log rotation */
73
+ export function startLogRotation(
74
+ getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
75
+ intervalMs: number = DEFAULT_CHECK_INTERVAL_MS,
76
+ maxBytes: number = DEFAULT_MAX_BYTES,
77
+ keepLines: number = DEFAULT_KEEP_LINES,
78
+ ): ReturnType<typeof setInterval> {
79
+ console.log(`[logs] Log rotation active: max ${formatBytes(maxBytes)}/file, keep ${keepLines} lines, check every ${intervalMs / 1000}s`)
80
+
81
+ return setInterval(() => {
82
+ const { rotated } = rotateAllLogs(getProcesses, maxBytes, keepLines)
83
+ if (rotated.length > 0) {
84
+ console.log(`[logs] Rotated ${rotated.length} log(s): ${rotated.join(', ')}`)
85
+ }
86
+ }, intervalMs)
87
+ }
88
+
89
+ function formatBytes(bytes: number): string {
90
+ if (bytes >= 1_000_000) return `${(bytes / 1_000_000).toFixed(1)}MB`
91
+ if (bytes >= 1_000) return `${(bytes / 1_000).toFixed(0)}KB`
92
+ return `${bytes}B`
93
+ }
package/src/logger.ts CHANGED
@@ -3,7 +3,7 @@ import chalk from "chalk";
3
3
 
4
4
  export function announce(message: string, title?: string) {
5
5
  console.log(
6
- boxen(chalk.white(message), {
6
+ boxen(message, {
7
7
  padding: 1,
8
8
  margin: 1,
9
9
  borderColor: 'green',
@@ -14,9 +14,10 @@ export function announce(message: string, title?: string) {
14
14
  );
15
15
  }
16
16
 
17
- export function error(message: string) {
17
+ export function error(message: string | Error) {
18
+ const text = message instanceof Error ? (message.stack || message.message) : String(message);
18
19
  console.error(
19
- boxen(chalk.red(message), {
20
+ boxen(chalk.red(text), {
20
21
  padding: 1,
21
22
  margin: 1,
22
23
  borderColor: 'red',
package/src/platform.ts CHANGED
@@ -31,7 +31,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
31
31
  // PID 0 means intentionally stopped — never alive
32
32
  if (pid <= 0) return false;
33
33
 
34
- return plat.measure(`PID ${pid} alive?`, async () => {
34
+ return (await plat.measure(`PID ${pid} alive?`, async () => {
35
35
  try {
36
36
  // Docker container detection
37
37
  if (command && (command.includes('docker run') || command.includes('docker-compose up') || command.includes('docker compose up'))) {
@@ -48,7 +48,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
48
48
  } catch {
49
49
  return false;
50
50
  }
51
- });
51
+ })) ?? false;
52
52
  }
53
53
 
54
54
  /**
@@ -224,8 +224,10 @@ export async function killProcessOnPort(port: number): Promise<void> {
224
224
  if (alive) {
225
225
  await $`taskkill /F /T /PID ${pid}`.nothrow().quiet();
226
226
  console.log(`Killed process ${pid} using port ${port}`);
227
+ } else {
228
+ // Zombie socket — PID no longer exists but socket lingers in kernel
229
+ console.warn(`⚠ Port ${port} held by zombie PID ${pid} (process dead, socket stuck in kernel). Will clear on reboot or TCP timeout.`);
227
230
  }
228
- // else: zombie socket — PID no longer exists but socket lingers in kernel
229
231
  }
230
232
  } else {
231
233
  // On Unix, use lsof
@@ -456,7 +458,7 @@ export async function findPidByPort(port: number, maxWaitMs = 8000): Promise<num
456
458
  }
457
459
 
458
460
  export async function readFileTail(filePath: string, lines?: number): Promise<string> {
459
- return plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
461
+ return (await plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
460
462
  try {
461
463
  const content = await Bun.file(filePath).text();
462
464
 
@@ -470,7 +472,7 @@ export async function readFileTail(filePath: string, lines?: number): Promise<st
470
472
  } catch (error) {
471
473
  throw new Error(`Error reading file: ${error}`);
472
474
  }
473
- });
475
+ })) ?? '';
474
476
  }
475
477
 
476
478
  /**
@@ -484,56 +486,70 @@ export function copyFile(src: string, dest: string): void {
484
486
  * Get memory usage of a process in bytes
485
487
  */
486
488
  export async function getProcessMemory(pid: number): Promise<number> {
487
- const map = await getProcessBatchMemory([pid]);
488
- return map.get(pid) || 0;
489
+ const map = await getProcessBatchResources([pid]);
490
+ return map.get(pid)?.memory || 0;
489
491
  }
490
492
 
491
493
  /**
492
- * Get memory usage for a batch of PIDs in bytes.
493
- * Returns a Map of PID -> Memory (bytes).
494
+ * Get memory and CPU usage for a batch of PIDs.
495
+ * Returns a Map of PID -> { memory: bytes, cpu: number }.
496
+ * On Windows, CPU is cumulative time in seconds.
497
+ * On Unix, CPU is instantaneous percentage.
494
498
  *
495
499
  * Optimization: Fetches ALL processes in one go and filters in-memory
496
500
  * to avoid spawning N subprocesses.
497
501
  */
498
- export async function getProcessBatchMemory(pids: number[]): Promise<Map<number, number>> {
502
+ export async function getProcessBatchResources(pids: number[]): Promise<Map<number, { memory: number, cpu: number }>> {
499
503
  if (pids.length === 0) return new Map();
500
504
 
501
- return await plat.measure(`Batch memory (${pids.length} PIDs)`, async () => {
502
- const memoryMap = new Map<number, number>();
505
+ return await plat.measure(`Batch resources (${pids.length} PIDs)`, async () => {
506
+ const resourceMap = new Map<number, { memory: number, cpu: number }>();
503
507
  const pidSet = new Set(pids);
504
508
 
505
509
  try {
506
510
  if (isWindows()) {
507
- const result = await $`powershell -Command "Get-Process | Select-Object Id, WorkingSet"`.nothrow().quiet().text();
511
+ const result = await $`powershell -Command "Get-Process | Select-Object Id, CPU, WorkingSet"`.nothrow().quiet().text();
508
512
  const lines = result.trim().split('\n');
509
513
 
510
514
  for (const line of lines) {
511
515
  const trimmed = line.trim();
512
516
  if (!trimmed || trimmed.startsWith('Id') || trimmed.startsWith('--')) continue;
513
517
 
518
+ // Replace multiple spaces with a single space to parse correctly
514
519
  const parts = trimmed.split(/\s+/);
515
- if (parts.length >= 2) {
516
- const val1 = parseInt(parts[0]);
517
- const val2 = parseInt(parts[parts.length - 1]);
520
+ if (parts.length >= 3) {
521
+ const pid = parseInt(parts[0]);
522
+ // CPU can sometimes be blank if process is just starting, handle that
523
+ let cpuStr = parts[1];
524
+ let memStr = parts[2];
525
+ if (parts.length === 2) {
526
+ // If CPU is missing, powershell might omit it and give just ID and WorkingSet
527
+ cpuStr = "0";
528
+ memStr = parts[1];
529
+ }
530
+
531
+ const cpu = parseFloat(cpuStr) || 0;
532
+ const memory = parseInt(memStr) || 0;
518
533
 
519
- if (!isNaN(val1) && !isNaN(val2)) {
520
- if (pidSet.has(val1)) memoryMap.set(val1, val2);
534
+ if (!isNaN(pid) && !isNaN(memory)) {
535
+ if (pidSet.has(pid)) resourceMap.set(pid, { memory, cpu });
521
536
  }
522
537
  }
523
538
  }
524
539
  } else {
525
- const result = await $`ps -eo pid,rss`.nothrow().quiet().text();
540
+ const result = await $`ps -eo pid,pcpu,rss`.nothrow().quiet().text();
526
541
  const lines = result.trim().split('\n');
527
542
 
528
543
  for (let i = 1; i < lines.length; i++) {
529
544
  const line = lines[i].trim();
530
545
  if (!line) continue;
531
- const [pidStr, rssStr] = line.split(/\s+/);
546
+ const [pidStr, cpuStr, rssStr] = line.split(/\s+/);
532
547
  const pid = parseInt(pidStr);
533
- const rss = parseInt(rssStr);
548
+ const cpu = parseFloat(cpuStr) || 0;
549
+ const rss = parseInt(rssStr) || 0;
534
550
 
535
551
  if (pidSet.has(pid)) {
536
- memoryMap.set(pid, rss * 1024);
552
+ resourceMap.set(pid, { memory: rss * 1024, cpu });
537
553
  }
538
554
  }
539
555
  }
@@ -541,7 +557,7 @@ export async function getProcessBatchMemory(pids: number[]): Promise<Map<number,
541
557
  // silently fail
542
558
  }
543
559
 
544
- return memoryMap;
560
+ return resourceMap;
545
561
  }) ?? new Map();
546
562
  }
547
563
 
package/src/server.ts CHANGED
@@ -17,9 +17,17 @@ import path from 'path';
17
17
  import { getAllProcesses, getProcess } from './db';
18
18
  import { isProcessRunning } from './platform';
19
19
  import { handleRun } from './commands/run';
20
+ import { parseEnvString } from './utils';
20
21
 
21
22
  const GUARD_INTERVAL_MS = 30_000; // Check every 30 seconds
22
- const GUARD_SKIP_NAMES = new Set(['bgr-dashboard']); // Don't try to restart ourselves
23
+ const GUARD_SKIP_NAMES = new Set(['bgr-dashboard', 'bgr-guard']); // Don't try to restart ourselves or external guard
24
+
25
+ // In-memory guard restart counter and timestamps (persists across module re-evaluations)
26
+ const _g = globalThis as any;
27
+ if (!_g.__bgrGuardRestartCounts) _g.__bgrGuardRestartCounts = new Map<string, number>();
28
+ if (!_g.__bgrGuardNextRestartTime) _g.__bgrGuardNextRestartTime = new Map<string, number>();
29
+ export const guardRestartCounts: Map<string, number> = _g.__bgrGuardRestartCounts;
30
+ const guardNextRestartTime: Map<string, number> = _g.__bgrGuardNextRestartTime;
23
31
 
24
32
  export async function startServer() {
25
33
  // Dynamic import to avoid melina's side-effect console.log at bundle load time
@@ -38,6 +46,10 @@ export async function startServer() {
38
46
 
39
47
  // Start the built-in process guard
40
48
  startGuard();
49
+
50
+ // Start log rotation (prevents unbounded log file growth)
51
+ const { startLogRotation } = await import('./log-rotation');
52
+ startLogRotation(() => getAllProcesses());
41
53
  }
42
54
 
43
55
  /**
@@ -67,11 +79,15 @@ function startGuard() {
67
79
  if (GUARD_SKIP_NAMES.has(proc.name)) continue;
68
80
 
69
81
  // Only guard processes with BGR_KEEP_ALIVE=true
70
- const env = proc.env ? (typeof proc.env === 'string' ? (() => { try { return JSON.parse(proc.env); } catch { return {}; } })() : proc.env) : {};
82
+ const env = proc.env ? parseEnvString(proc.env) : {};
71
83
  if (env.BGR_KEEP_ALIVE !== 'true') continue;
72
84
 
73
85
  const alive = await isProcessRunning(proc.pid, proc.command);
74
86
  if (!alive) {
87
+ const now = Date.now();
88
+ const nextRestart = guardNextRestartTime.get(proc.name) || 0;
89
+ if (now < nextRestart) continue; // Still in backoff period
90
+
75
91
  console.log(`[guard] ⚠ Guarded process "${proc.name}" (PID ${proc.pid}) is dead, restarting...`);
76
92
  try {
77
93
  await handleRun({
@@ -80,10 +96,34 @@ function startGuard() {
80
96
  force: true,
81
97
  remoteName: '',
82
98
  });
83
- console.log(`[guard] ✓ Restarted "${proc.name}"`);
99
+
100
+ // Track restart count
101
+ const prevCount = guardRestartCounts.get(proc.name) || 0;
102
+ const newCount = prevCount + 1;
103
+ guardRestartCounts.set(proc.name, newCount);
104
+
105
+ // Exponential backoff if it crashes repeatedly (more than 5 times)
106
+ if (newCount > 5) {
107
+ const backoffSeconds = Math.min(30 * Math.pow(2, newCount - 6), 300); // 30s, 60s, 120s, up to 5 mins
108
+ guardNextRestartTime.set(proc.name, Date.now() + (backoffSeconds * 1000));
109
+ console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount}). Crash loop detected: next check delayed by ${backoffSeconds}s.`);
110
+ } else {
111
+ console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount})`);
112
+ }
84
113
  } catch (err: any) {
85
114
  console.error(`[guard] ✗ Failed to restart "${proc.name}": ${err.message}`);
86
115
  }
116
+ } else {
117
+ // Reset counter if process has been stable (alive at least once during check)
118
+ const prevCount = guardRestartCounts.get(proc.name) || 0;
119
+ if (prevCount > 0) {
120
+ const nextRestart = guardNextRestartTime.get(proc.name) || 0;
121
+ if (Date.now() > nextRestart + 60_000) {
122
+ // If it lived over 60s past its backoff threshold, consider it stable
123
+ guardRestartCounts.delete(proc.name);
124
+ guardNextRestartTime.delete(proc.name);
125
+ }
126
+ }
87
127
  }
88
128
  }
89
129
  } catch (err: any) {
package/src/table.ts CHANGED
@@ -34,12 +34,12 @@ export function getTerminalWidth(): number {
34
34
  }
35
35
 
36
36
  // Strip ANSI color codes for accurate length calculation
37
- function stripAnsi(str: string): string {
37
+ export function stripAnsi(str: string): string {
38
38
  return str.replace(/\u001b\[[0-9;]*m/g, "");
39
39
  }
40
40
 
41
41
  // Default truncator: trims the end of a string
42
- function truncateString(str: string, maxLength: number): string {
42
+ export function truncateString(str: string, maxLength: number): string {
43
43
  const stripped = stripAnsi(str);
44
44
  if (stripped.length <= maxLength) return str;
45
45
  const ellipsis = "…";
@@ -52,7 +52,7 @@ function truncateString(str: string, maxLength: number): string {
52
52
  }
53
53
 
54
54
  // Path truncator: trims the middle of a string
55
- function truncatePath(str: string, maxLength: number): string {
55
+ export function truncatePath(str: string, maxLength: number): string {
56
56
  const stripped = stripAnsi(str);
57
57
  if (stripped.length <= maxLength) return str;
58
58
  const ellipsis = "…";