bgrun 3.10.2 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Log rotation for bgrun process output files.
3
+ *
4
+ * Ensures log files don't grow unbounded by:
5
+ * 1. Truncating on rotation (restart) — keeping last N lines
6
+ * 2. Size-based rotation — when file exceeds maxBytes, trim to last N lines
7
+ * 3. Periodic rotation check — runs on an interval in the dashboard
8
+ */
9
+
10
+ import { existsSync, statSync, readFileSync, writeFileSync } from 'fs'
11
+
12
+ const DEFAULT_MAX_BYTES = 10 * 1024 * 1024 // 10 MB
13
+ const DEFAULT_KEEP_LINES = 5000 // Keep last 5000 lines on rotation
14
+ const DEFAULT_CHECK_INTERVAL_MS = 60_000 // Check every 60s
15
+
16
+ /** Rotate a single log file if it exceeds maxBytes */
17
+ export function rotateLogFile(
18
+ filePath: string,
19
+ maxBytes: number = DEFAULT_MAX_BYTES,
20
+ keepLines: number = DEFAULT_KEEP_LINES,
21
+ ): boolean {
22
+ try {
23
+ if (!existsSync(filePath)) return false
24
+
25
+ const stat = statSync(filePath)
26
+ if (stat.size <= maxBytes) return false
27
+
28
+ // Read file, keep last N lines
29
+ const content = readFileSync(filePath, 'utf-8')
30
+ const lines = content.split('\n')
31
+
32
+ if (lines.length <= keepLines) return false
33
+
34
+ const truncated = lines.slice(-keepLines)
35
+ const header = `--- [bgrun] Log rotated at ${new Date().toISOString()} (was ${lines.length} lines, ${formatBytes(stat.size)}) ---\n`
36
+ writeFileSync(filePath, header + truncated.join('\n'))
37
+
38
+ return true
39
+ } catch {
40
+ return false
41
+ }
42
+ }
43
+
44
+ /** Rotate all log files for all processes */
45
+ export function rotateAllLogs(
46
+ getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
47
+ maxBytes: number = DEFAULT_MAX_BYTES,
48
+ keepLines: number = DEFAULT_KEEP_LINES,
49
+ ): { rotated: string[]; checked: number } {
50
+ const processes = getProcesses()
51
+ const rotated: string[] = []
52
+ let checked = 0
53
+
54
+ for (const proc of processes) {
55
+ if (proc.stdout_path) {
56
+ checked++
57
+ if (rotateLogFile(proc.stdout_path, maxBytes, keepLines)) {
58
+ rotated.push(`${proc.name}/stdout`)
59
+ }
60
+ }
61
+ if (proc.stderr_path) {
62
+ checked++
63
+ if (rotateLogFile(proc.stderr_path, maxBytes, keepLines)) {
64
+ rotated.push(`${proc.name}/stderr`)
65
+ }
66
+ }
67
+ }
68
+
69
+ return { rotated, checked }
70
+ }
71
+
72
+ /** Start periodic log rotation */
73
+ export function startLogRotation(
74
+ getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
75
+ intervalMs: number = DEFAULT_CHECK_INTERVAL_MS,
76
+ maxBytes: number = DEFAULT_MAX_BYTES,
77
+ keepLines: number = DEFAULT_KEEP_LINES,
78
+ ): ReturnType<typeof setInterval> {
79
+ console.log(`[logs] Log rotation active: max ${formatBytes(maxBytes)}/file, keep ${keepLines} lines, check every ${intervalMs / 1000}s`)
80
+
81
+ return setInterval(() => {
82
+ const { rotated } = rotateAllLogs(getProcesses, maxBytes, keepLines)
83
+ if (rotated.length > 0) {
84
+ console.log(`[logs] Rotated ${rotated.length} log(s): ${rotated.join(', ')}`)
85
+ }
86
+ }, intervalMs)
87
+ }
88
+
89
+ function formatBytes(bytes: number): string {
90
+ if (bytes >= 1_000_000) return `${(bytes / 1_000_000).toFixed(1)}MB`
91
+ if (bytes >= 1_000) return `${(bytes / 1_000).toFixed(0)}KB`
92
+ return `${bytes}B`
93
+ }
package/src/logger.ts CHANGED
@@ -3,7 +3,7 @@ import chalk from "chalk";
3
3
 
4
4
  export function announce(message: string, title?: string) {
5
5
  console.log(
6
- boxen(chalk.white(message), {
6
+ boxen(message, {
7
7
  padding: 1,
8
8
  margin: 1,
9
9
  borderColor: 'green',
@@ -14,9 +14,10 @@ export function announce(message: string, title?: string) {
14
14
  );
15
15
  }
16
16
 
17
- export function error(message: string) {
17
+ export function error(message: string | Error) {
18
+ const text = message instanceof Error ? (message.stack || message.message) : String(message);
18
19
  console.error(
19
- boxen(chalk.red(message), {
20
+ boxen(chalk.red(text), {
20
21
  padding: 1,
21
22
  margin: 1,
22
23
  borderColor: 'red',
package/src/platform.ts CHANGED
@@ -31,7 +31,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
31
31
  // PID 0 means intentionally stopped — never alive
32
32
  if (pid <= 0) return false;
33
33
 
34
- return plat.measure(`PID ${pid} alive?`, async () => {
34
+ return (await plat.measure(`PID ${pid} alive?`, async () => {
35
35
  try {
36
36
  // Docker container detection
37
37
  if (command && (command.includes('docker run') || command.includes('docker-compose up') || command.includes('docker compose up'))) {
@@ -48,7 +48,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
48
48
  } catch {
49
49
  return false;
50
50
  }
51
- });
51
+ })) ?? false;
52
52
  }
53
53
 
54
54
  /**
@@ -224,8 +224,10 @@ export async function killProcessOnPort(port: number): Promise<void> {
224
224
  if (alive) {
225
225
  await $`taskkill /F /T /PID ${pid}`.nothrow().quiet();
226
226
  console.log(`Killed process ${pid} using port ${port}`);
227
+ } else {
228
+ // Zombie socket — PID no longer exists but socket lingers in kernel
229
+ console.warn(`⚠ Port ${port} held by zombie PID ${pid} (process dead, socket stuck in kernel). Will clear on reboot or TCP timeout.`);
227
230
  }
228
- // else: zombie socket — PID no longer exists but socket lingers in kernel
229
231
  }
230
232
  } else {
231
233
  // On Unix, use lsof
@@ -456,7 +458,7 @@ export async function findPidByPort(port: number, maxWaitMs = 8000): Promise<num
456
458
  }
457
459
 
458
460
  export async function readFileTail(filePath: string, lines?: number): Promise<string> {
459
- return plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
461
+ return (await plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
460
462
  try {
461
463
  const content = await Bun.file(filePath).text();
462
464
 
@@ -470,7 +472,7 @@ export async function readFileTail(filePath: string, lines?: number): Promise<st
470
472
  } catch (error) {
471
473
  throw new Error(`Error reading file: ${error}`);
472
474
  }
473
- });
475
+ })) ?? '';
474
476
  }
475
477
 
476
478
  /**
@@ -484,56 +486,70 @@ export function copyFile(src: string, dest: string): void {
484
486
  * Get memory usage of a process in bytes
485
487
  */
486
488
  export async function getProcessMemory(pid: number): Promise<number> {
487
- const map = await getProcessBatchMemory([pid]);
488
- return map.get(pid) || 0;
489
+ const map = await getProcessBatchResources([pid]);
490
+ return map.get(pid)?.memory || 0;
489
491
  }
490
492
 
491
493
  /**
492
- * Get memory usage for a batch of PIDs in bytes.
493
- * Returns a Map of PID -> Memory (bytes).
494
+ * Get memory and CPU usage for a batch of PIDs.
495
+ * Returns a Map of PID -> { memory: bytes, cpu: number }.
496
+ * On Windows, CPU is cumulative time in seconds.
497
+ * On Unix, CPU is instantaneous percentage.
494
498
  *
495
499
  * Optimization: Fetches ALL processes in one go and filters in-memory
496
500
  * to avoid spawning N subprocesses.
497
501
  */
498
- export async function getProcessBatchMemory(pids: number[]): Promise<Map<number, number>> {
502
+ export async function getProcessBatchResources(pids: number[]): Promise<Map<number, { memory: number, cpu: number }>> {
499
503
  if (pids.length === 0) return new Map();
500
504
 
501
- return await plat.measure(`Batch memory (${pids.length} PIDs)`, async () => {
502
- const memoryMap = new Map<number, number>();
505
+ return await plat.measure(`Batch resources (${pids.length} PIDs)`, async () => {
506
+ const resourceMap = new Map<number, { memory: number, cpu: number }>();
503
507
  const pidSet = new Set(pids);
504
508
 
505
509
  try {
506
510
  if (isWindows()) {
507
- const result = await $`powershell -Command "Get-Process | Select-Object Id, WorkingSet"`.nothrow().quiet().text();
511
+ const result = await $`powershell -Command "Get-Process | Select-Object Id, CPU, WorkingSet"`.nothrow().quiet().text();
508
512
  const lines = result.trim().split('\n');
509
513
 
510
514
  for (const line of lines) {
511
515
  const trimmed = line.trim();
512
516
  if (!trimmed || trimmed.startsWith('Id') || trimmed.startsWith('--')) continue;
513
517
 
518
+ // Replace multiple spaces with a single space to parse correctly
514
519
  const parts = trimmed.split(/\s+/);
515
- if (parts.length >= 2) {
516
- const val1 = parseInt(parts[0]);
517
- const val2 = parseInt(parts[parts.length - 1]);
520
+ if (parts.length >= 3) {
521
+ const pid = parseInt(parts[0]);
522
+ // CPU can sometimes be blank if process is just starting, handle that
523
+ let cpuStr = parts[1];
524
+ let memStr = parts[2];
525
+ if (parts.length === 2) {
526
+ // If CPU is missing, powershell might omit it and give just ID and WorkingSet
527
+ cpuStr = "0";
528
+ memStr = parts[1];
529
+ }
530
+
531
+ const cpu = parseFloat(cpuStr) || 0;
532
+ const memory = parseInt(memStr) || 0;
518
533
 
519
- if (!isNaN(val1) && !isNaN(val2)) {
520
- if (pidSet.has(val1)) memoryMap.set(val1, val2);
534
+ if (!isNaN(pid) && !isNaN(memory)) {
535
+ if (pidSet.has(pid)) resourceMap.set(pid, { memory, cpu });
521
536
  }
522
537
  }
523
538
  }
524
539
  } else {
525
- const result = await $`ps -eo pid,rss`.nothrow().quiet().text();
540
+ const result = await $`ps -eo pid,pcpu,rss`.nothrow().quiet().text();
526
541
  const lines = result.trim().split('\n');
527
542
 
528
543
  for (let i = 1; i < lines.length; i++) {
529
544
  const line = lines[i].trim();
530
545
  if (!line) continue;
531
- const [pidStr, rssStr] = line.split(/\s+/);
546
+ const [pidStr, cpuStr, rssStr] = line.split(/\s+/);
532
547
  const pid = parseInt(pidStr);
533
- const rss = parseInt(rssStr);
548
+ const cpu = parseFloat(cpuStr) || 0;
549
+ const rss = parseInt(rssStr) || 0;
534
550
 
535
551
  if (pidSet.has(pid)) {
536
- memoryMap.set(pid, rss * 1024);
552
+ resourceMap.set(pid, { memory: rss * 1024, cpu });
537
553
  }
538
554
  }
539
555
  }
@@ -541,7 +557,7 @@ export async function getProcessBatchMemory(pids: number[]): Promise<Map<number,
541
557
  // silently fail
542
558
  }
543
559
 
544
- return memoryMap;
560
+ return resourceMap;
545
561
  }) ?? new Map();
546
562
  }
547
563
 
package/src/server.ts CHANGED
@@ -17,9 +17,17 @@ import path from 'path';
17
17
  import { getAllProcesses, getProcess } from './db';
18
18
  import { isProcessRunning } from './platform';
19
19
  import { handleRun } from './commands/run';
20
+ import { parseEnvString } from './utils';
20
21
 
21
22
  const GUARD_INTERVAL_MS = 30_000; // Check every 30 seconds
22
- const GUARD_SKIP_NAMES = new Set(['bgr-dashboard']); // Don't try to restart ourselves
23
+ const GUARD_SKIP_NAMES = new Set(['bgr-dashboard', 'bgr-guard']); // Don't try to restart ourselves or external guard
24
+
25
+ // In-memory guard restart counter and timestamps (persists across module re-evaluations)
26
+ const _g = globalThis as any;
27
+ if (!_g.__bgrGuardRestartCounts) _g.__bgrGuardRestartCounts = new Map<string, number>();
28
+ if (!_g.__bgrGuardNextRestartTime) _g.__bgrGuardNextRestartTime = new Map<string, number>();
29
+ export const guardRestartCounts: Map<string, number> = _g.__bgrGuardRestartCounts;
30
+ const guardNextRestartTime: Map<string, number> = _g.__bgrGuardNextRestartTime;
23
31
 
24
32
  export async function startServer() {
25
33
  // Dynamic import to avoid melina's side-effect console.log at bundle load time
@@ -38,6 +46,10 @@ export async function startServer() {
38
46
 
39
47
  // Start the built-in process guard
40
48
  startGuard();
49
+
50
+ // Start log rotation (prevents unbounded log file growth)
51
+ const { startLogRotation } = await import('./log-rotation');
52
+ startLogRotation(() => getAllProcesses());
41
53
  }
42
54
 
43
55
  /**
@@ -67,11 +79,15 @@ function startGuard() {
67
79
  if (GUARD_SKIP_NAMES.has(proc.name)) continue;
68
80
 
69
81
  // Only guard processes with BGR_KEEP_ALIVE=true
70
- const env = proc.env ? (typeof proc.env === 'string' ? (() => { try { return JSON.parse(proc.env); } catch { return {}; } })() : proc.env) : {};
82
+ const env = proc.env ? parseEnvString(proc.env) : {};
71
83
  if (env.BGR_KEEP_ALIVE !== 'true') continue;
72
84
 
73
85
  const alive = await isProcessRunning(proc.pid, proc.command);
74
86
  if (!alive) {
87
+ const now = Date.now();
88
+ const nextRestart = guardNextRestartTime.get(proc.name) || 0;
89
+ if (now < nextRestart) continue; // Still in backoff period
90
+
75
91
  console.log(`[guard] ⚠ Guarded process "${proc.name}" (PID ${proc.pid}) is dead, restarting...`);
76
92
  try {
77
93
  await handleRun({
@@ -80,10 +96,34 @@ function startGuard() {
80
96
  force: true,
81
97
  remoteName: '',
82
98
  });
83
- console.log(`[guard] ✓ Restarted "${proc.name}"`);
99
+
100
+ // Track restart count
101
+ const prevCount = guardRestartCounts.get(proc.name) || 0;
102
+ const newCount = prevCount + 1;
103
+ guardRestartCounts.set(proc.name, newCount);
104
+
105
+ // Exponential backoff if it crashes repeatedly (more than 5 times)
106
+ if (newCount > 5) {
107
+ const backoffSeconds = Math.min(30 * Math.pow(2, newCount - 6), 300); // 30s, 60s, 120s, up to 5 mins
108
+ guardNextRestartTime.set(proc.name, Date.now() + (backoffSeconds * 1000));
109
+ console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount}). Crash loop detected: next check delayed by ${backoffSeconds}s.`);
110
+ } else {
111
+ console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount})`);
112
+ }
84
113
  } catch (err: any) {
85
114
  console.error(`[guard] ✗ Failed to restart "${proc.name}": ${err.message}`);
86
115
  }
116
+ } else {
117
+ // Reset counter if process has been stable (alive at least once during check)
118
+ const prevCount = guardRestartCounts.get(proc.name) || 0;
119
+ if (prevCount > 0) {
120
+ const nextRestart = guardNextRestartTime.get(proc.name) || 0;
121
+ if (Date.now() > nextRestart + 60_000) {
122
+ // If it lived over 60s past its backoff threshold, consider it stable
123
+ guardRestartCounts.delete(proc.name);
124
+ guardNextRestartTime.delete(proc.name);
125
+ }
126
+ }
87
127
  }
88
128
  }
89
129
  } catch (err: any) {