bgrun 3.10.1 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -2
- package/dashboard/app/api/deps/route.ts +49 -0
- package/dashboard/app/api/guard/route.ts +50 -0
- package/dashboard/app/api/guard-all/route.ts +50 -0
- package/dashboard/app/api/logs/rotate/route.ts +45 -0
- package/dashboard/app/api/processes/route.ts +67 -10
- package/dashboard/app/globals.css +386 -6
- package/dashboard/app/page.client.tsx +257 -8
- package/dashboard/app/page.tsx +20 -1
- package/dist/index.js +462 -30
- package/package.json +61 -60
- package/src/api.ts +3 -3
- package/src/commands/list.ts +3 -3
- package/src/commands/run.ts +17 -0
- package/src/db.ts +8 -0
- package/src/deps.ts +126 -0
- package/src/guard.ts +157 -0
- package/src/index.ts +108 -3
- package/src/log-rotation.ts +93 -0
- package/src/logger.ts +4 -3
- package/src/platform.ts +39 -23
- package/src/server.ts +55 -11
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Log rotation for bgrun process output files.
|
|
3
|
+
*
|
|
4
|
+
* Ensures log files don't grow unbounded by:
|
|
5
|
+
* 1. Truncating on rotation (restart) — keeping last N lines
|
|
6
|
+
* 2. Size-based rotation — when file exceeds maxBytes, trim to last N lines
|
|
7
|
+
* 3. Periodic rotation check — runs on an interval in the dashboard
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { existsSync, statSync, readFileSync, writeFileSync } from 'fs'
|
|
11
|
+
|
|
12
|
+
const DEFAULT_MAX_BYTES = 10 * 1024 * 1024 // 10 MB
|
|
13
|
+
const DEFAULT_KEEP_LINES = 5000 // Keep last 5000 lines on rotation
|
|
14
|
+
const DEFAULT_CHECK_INTERVAL_MS = 60_000 // Check every 60s
|
|
15
|
+
|
|
16
|
+
/** Rotate a single log file if it exceeds maxBytes */
|
|
17
|
+
export function rotateLogFile(
|
|
18
|
+
filePath: string,
|
|
19
|
+
maxBytes: number = DEFAULT_MAX_BYTES,
|
|
20
|
+
keepLines: number = DEFAULT_KEEP_LINES,
|
|
21
|
+
): boolean {
|
|
22
|
+
try {
|
|
23
|
+
if (!existsSync(filePath)) return false
|
|
24
|
+
|
|
25
|
+
const stat = statSync(filePath)
|
|
26
|
+
if (stat.size <= maxBytes) return false
|
|
27
|
+
|
|
28
|
+
// Read file, keep last N lines
|
|
29
|
+
const content = readFileSync(filePath, 'utf-8')
|
|
30
|
+
const lines = content.split('\n')
|
|
31
|
+
|
|
32
|
+
if (lines.length <= keepLines) return false
|
|
33
|
+
|
|
34
|
+
const truncated = lines.slice(-keepLines)
|
|
35
|
+
const header = `--- [bgrun] Log rotated at ${new Date().toISOString()} (was ${lines.length} lines, ${formatBytes(stat.size)}) ---\n`
|
|
36
|
+
writeFileSync(filePath, header + truncated.join('\n'))
|
|
37
|
+
|
|
38
|
+
return true
|
|
39
|
+
} catch {
|
|
40
|
+
return false
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Rotate all log files for all processes */
|
|
45
|
+
export function rotateAllLogs(
|
|
46
|
+
getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
|
|
47
|
+
maxBytes: number = DEFAULT_MAX_BYTES,
|
|
48
|
+
keepLines: number = DEFAULT_KEEP_LINES,
|
|
49
|
+
): { rotated: string[]; checked: number } {
|
|
50
|
+
const processes = getProcesses()
|
|
51
|
+
const rotated: string[] = []
|
|
52
|
+
let checked = 0
|
|
53
|
+
|
|
54
|
+
for (const proc of processes) {
|
|
55
|
+
if (proc.stdout_path) {
|
|
56
|
+
checked++
|
|
57
|
+
if (rotateLogFile(proc.stdout_path, maxBytes, keepLines)) {
|
|
58
|
+
rotated.push(`${proc.name}/stdout`)
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (proc.stderr_path) {
|
|
62
|
+
checked++
|
|
63
|
+
if (rotateLogFile(proc.stderr_path, maxBytes, keepLines)) {
|
|
64
|
+
rotated.push(`${proc.name}/stderr`)
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return { rotated, checked }
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Start periodic log rotation */
|
|
73
|
+
export function startLogRotation(
|
|
74
|
+
getProcesses: () => Array<{ name: string; stdout_path: string; stderr_path: string }>,
|
|
75
|
+
intervalMs: number = DEFAULT_CHECK_INTERVAL_MS,
|
|
76
|
+
maxBytes: number = DEFAULT_MAX_BYTES,
|
|
77
|
+
keepLines: number = DEFAULT_KEEP_LINES,
|
|
78
|
+
): ReturnType<typeof setInterval> {
|
|
79
|
+
console.log(`[logs] Log rotation active: max ${formatBytes(maxBytes)}/file, keep ${keepLines} lines, check every ${intervalMs / 1000}s`)
|
|
80
|
+
|
|
81
|
+
return setInterval(() => {
|
|
82
|
+
const { rotated } = rotateAllLogs(getProcesses, maxBytes, keepLines)
|
|
83
|
+
if (rotated.length > 0) {
|
|
84
|
+
console.log(`[logs] Rotated ${rotated.length} log(s): ${rotated.join(', ')}`)
|
|
85
|
+
}
|
|
86
|
+
}, intervalMs)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function formatBytes(bytes: number): string {
|
|
90
|
+
if (bytes >= 1_000_000) return `${(bytes / 1_000_000).toFixed(1)}MB`
|
|
91
|
+
if (bytes >= 1_000) return `${(bytes / 1_000).toFixed(0)}KB`
|
|
92
|
+
return `${bytes}B`
|
|
93
|
+
}
|
package/src/logger.ts
CHANGED
|
@@ -3,7 +3,7 @@ import chalk from "chalk";
|
|
|
3
3
|
|
|
4
4
|
export function announce(message: string, title?: string) {
|
|
5
5
|
console.log(
|
|
6
|
-
boxen(
|
|
6
|
+
boxen(message, {
|
|
7
7
|
padding: 1,
|
|
8
8
|
margin: 1,
|
|
9
9
|
borderColor: 'green',
|
|
@@ -14,9 +14,10 @@ export function announce(message: string, title?: string) {
|
|
|
14
14
|
);
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
export function error(message: string) {
|
|
17
|
+
export function error(message: string | Error) {
|
|
18
|
+
const text = message instanceof Error ? (message.stack || message.message) : String(message);
|
|
18
19
|
console.error(
|
|
19
|
-
boxen(chalk.red(
|
|
20
|
+
boxen(chalk.red(text), {
|
|
20
21
|
padding: 1,
|
|
21
22
|
margin: 1,
|
|
22
23
|
borderColor: 'red',
|
package/src/platform.ts
CHANGED
|
@@ -31,7 +31,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
|
|
|
31
31
|
// PID 0 means intentionally stopped — never alive
|
|
32
32
|
if (pid <= 0) return false;
|
|
33
33
|
|
|
34
|
-
return plat.measure(`PID ${pid} alive?`, async () => {
|
|
34
|
+
return (await plat.measure(`PID ${pid} alive?`, async () => {
|
|
35
35
|
try {
|
|
36
36
|
// Docker container detection
|
|
37
37
|
if (command && (command.includes('docker run') || command.includes('docker-compose up') || command.includes('docker compose up'))) {
|
|
@@ -48,7 +48,7 @@ export async function isProcessRunning(pid: number, command?: string): Promise<b
|
|
|
48
48
|
} catch {
|
|
49
49
|
return false;
|
|
50
50
|
}
|
|
51
|
-
});
|
|
51
|
+
})) ?? false;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
/**
|
|
@@ -224,8 +224,10 @@ export async function killProcessOnPort(port: number): Promise<void> {
|
|
|
224
224
|
if (alive) {
|
|
225
225
|
await $`taskkill /F /T /PID ${pid}`.nothrow().quiet();
|
|
226
226
|
console.log(`Killed process ${pid} using port ${port}`);
|
|
227
|
+
} else {
|
|
228
|
+
// Zombie socket — PID no longer exists but socket lingers in kernel
|
|
229
|
+
console.warn(`⚠ Port ${port} held by zombie PID ${pid} (process dead, socket stuck in kernel). Will clear on reboot or TCP timeout.`);
|
|
227
230
|
}
|
|
228
|
-
// else: zombie socket — PID no longer exists but socket lingers in kernel
|
|
229
231
|
}
|
|
230
232
|
} else {
|
|
231
233
|
// On Unix, use lsof
|
|
@@ -456,7 +458,7 @@ export async function findPidByPort(port: number, maxWaitMs = 8000): Promise<num
|
|
|
456
458
|
}
|
|
457
459
|
|
|
458
460
|
export async function readFileTail(filePath: string, lines?: number): Promise<string> {
|
|
459
|
-
return plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
|
|
461
|
+
return (await plat.measure(`Read tail ${lines ?? 'all'}L`, async () => {
|
|
460
462
|
try {
|
|
461
463
|
const content = await Bun.file(filePath).text();
|
|
462
464
|
|
|
@@ -470,7 +472,7 @@ export async function readFileTail(filePath: string, lines?: number): Promise<st
|
|
|
470
472
|
} catch (error) {
|
|
471
473
|
throw new Error(`Error reading file: ${error}`);
|
|
472
474
|
}
|
|
473
|
-
});
|
|
475
|
+
})) ?? '';
|
|
474
476
|
}
|
|
475
477
|
|
|
476
478
|
/**
|
|
@@ -484,56 +486,70 @@ export function copyFile(src: string, dest: string): void {
|
|
|
484
486
|
* Get memory usage of a process in bytes
|
|
485
487
|
*/
|
|
486
488
|
export async function getProcessMemory(pid: number): Promise<number> {
|
|
487
|
-
const map = await
|
|
488
|
-
return map.get(pid) || 0;
|
|
489
|
+
const map = await getProcessBatchResources([pid]);
|
|
490
|
+
return map.get(pid)?.memory || 0;
|
|
489
491
|
}
|
|
490
492
|
|
|
491
493
|
/**
|
|
492
|
-
* Get memory usage for a batch of PIDs
|
|
493
|
-
* Returns a Map of PID ->
|
|
494
|
+
* Get memory and CPU usage for a batch of PIDs.
|
|
495
|
+
* Returns a Map of PID -> { memory: bytes, cpu: number }.
|
|
496
|
+
* On Windows, CPU is cumulative time in seconds.
|
|
497
|
+
* On Unix, CPU is instantaneous percentage.
|
|
494
498
|
*
|
|
495
499
|
* Optimization: Fetches ALL processes in one go and filters in-memory
|
|
496
500
|
* to avoid spawning N subprocesses.
|
|
497
501
|
*/
|
|
498
|
-
export async function
|
|
502
|
+
export async function getProcessBatchResources(pids: number[]): Promise<Map<number, { memory: number, cpu: number }>> {
|
|
499
503
|
if (pids.length === 0) return new Map();
|
|
500
504
|
|
|
501
|
-
return await plat.measure(`Batch
|
|
502
|
-
const
|
|
505
|
+
return await plat.measure(`Batch resources (${pids.length} PIDs)`, async () => {
|
|
506
|
+
const resourceMap = new Map<number, { memory: number, cpu: number }>();
|
|
503
507
|
const pidSet = new Set(pids);
|
|
504
508
|
|
|
505
509
|
try {
|
|
506
510
|
if (isWindows()) {
|
|
507
|
-
const result = await $`powershell -Command "Get-Process | Select-Object Id, WorkingSet"`.nothrow().quiet().text();
|
|
511
|
+
const result = await $`powershell -Command "Get-Process | Select-Object Id, CPU, WorkingSet"`.nothrow().quiet().text();
|
|
508
512
|
const lines = result.trim().split('\n');
|
|
509
513
|
|
|
510
514
|
for (const line of lines) {
|
|
511
515
|
const trimmed = line.trim();
|
|
512
516
|
if (!trimmed || trimmed.startsWith('Id') || trimmed.startsWith('--')) continue;
|
|
513
517
|
|
|
518
|
+
// Replace multiple spaces with a single space to parse correctly
|
|
514
519
|
const parts = trimmed.split(/\s+/);
|
|
515
|
-
if (parts.length >=
|
|
516
|
-
const
|
|
517
|
-
|
|
520
|
+
if (parts.length >= 3) {
|
|
521
|
+
const pid = parseInt(parts[0]);
|
|
522
|
+
// CPU can sometimes be blank if process is just starting, handle that
|
|
523
|
+
let cpuStr = parts[1];
|
|
524
|
+
let memStr = parts[2];
|
|
525
|
+
if (parts.length === 2) {
|
|
526
|
+
// If CPU is missing, powershell might omit it and give just ID and WorkingSet
|
|
527
|
+
cpuStr = "0";
|
|
528
|
+
memStr = parts[1];
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const cpu = parseFloat(cpuStr) || 0;
|
|
532
|
+
const memory = parseInt(memStr) || 0;
|
|
518
533
|
|
|
519
|
-
if (!isNaN(
|
|
520
|
-
if (pidSet.has(
|
|
534
|
+
if (!isNaN(pid) && !isNaN(memory)) {
|
|
535
|
+
if (pidSet.has(pid)) resourceMap.set(pid, { memory, cpu });
|
|
521
536
|
}
|
|
522
537
|
}
|
|
523
538
|
}
|
|
524
539
|
} else {
|
|
525
|
-
const result = await $`ps -eo pid,rss`.nothrow().quiet().text();
|
|
540
|
+
const result = await $`ps -eo pid,pcpu,rss`.nothrow().quiet().text();
|
|
526
541
|
const lines = result.trim().split('\n');
|
|
527
542
|
|
|
528
543
|
for (let i = 1; i < lines.length; i++) {
|
|
529
544
|
const line = lines[i].trim();
|
|
530
545
|
if (!line) continue;
|
|
531
|
-
const [pidStr, rssStr] = line.split(/\s+/);
|
|
546
|
+
const [pidStr, cpuStr, rssStr] = line.split(/\s+/);
|
|
532
547
|
const pid = parseInt(pidStr);
|
|
533
|
-
const
|
|
548
|
+
const cpu = parseFloat(cpuStr) || 0;
|
|
549
|
+
const rss = parseInt(rssStr) || 0;
|
|
534
550
|
|
|
535
551
|
if (pidSet.has(pid)) {
|
|
536
|
-
|
|
552
|
+
resourceMap.set(pid, { memory: rss * 1024, cpu });
|
|
537
553
|
}
|
|
538
554
|
}
|
|
539
555
|
}
|
|
@@ -541,7 +557,7 @@ export async function getProcessBatchMemory(pids: number[]): Promise<Map<number,
|
|
|
541
557
|
// silently fail
|
|
542
558
|
}
|
|
543
559
|
|
|
544
|
-
return
|
|
560
|
+
return resourceMap;
|
|
545
561
|
}) ?? new Map();
|
|
546
562
|
}
|
|
547
563
|
|
package/src/server.ts
CHANGED
|
@@ -17,9 +17,17 @@ import path from 'path';
|
|
|
17
17
|
import { getAllProcesses, getProcess } from './db';
|
|
18
18
|
import { isProcessRunning } from './platform';
|
|
19
19
|
import { handleRun } from './commands/run';
|
|
20
|
+
import { parseEnvString } from './utils';
|
|
20
21
|
|
|
21
22
|
const GUARD_INTERVAL_MS = 30_000; // Check every 30 seconds
|
|
22
|
-
const GUARD_SKIP_NAMES = new Set(['bgr-dashboard']); // Don't try to restart ourselves
|
|
23
|
+
const GUARD_SKIP_NAMES = new Set(['bgr-dashboard', 'bgr-guard']); // Don't try to restart ourselves or external guard
|
|
24
|
+
|
|
25
|
+
// In-memory guard restart counter and timestamps (persists across module re-evaluations)
|
|
26
|
+
const _g = globalThis as any;
|
|
27
|
+
if (!_g.__bgrGuardRestartCounts) _g.__bgrGuardRestartCounts = new Map<string, number>();
|
|
28
|
+
if (!_g.__bgrGuardNextRestartTime) _g.__bgrGuardNextRestartTime = new Map<string, number>();
|
|
29
|
+
export const guardRestartCounts: Map<string, number> = _g.__bgrGuardRestartCounts;
|
|
30
|
+
const guardNextRestartTime: Map<string, number> = _g.__bgrGuardNextRestartTime;
|
|
23
31
|
|
|
24
32
|
export async function startServer() {
|
|
25
33
|
// Dynamic import to avoid melina's side-effect console.log at bundle load time
|
|
@@ -38,21 +46,25 @@ export async function startServer() {
|
|
|
38
46
|
|
|
39
47
|
// Start the built-in process guard
|
|
40
48
|
startGuard();
|
|
49
|
+
|
|
50
|
+
// Start log rotation (prevents unbounded log file growth)
|
|
51
|
+
const { startLogRotation } = await import('./log-rotation');
|
|
52
|
+
startLogRotation(() => getAllProcesses());
|
|
41
53
|
}
|
|
42
54
|
|
|
43
55
|
/**
|
|
44
56
|
* Built-in Process Guard
|
|
45
57
|
*
|
|
46
58
|
* Runs as a background loop inside the dashboard process.
|
|
47
|
-
* Every GUARD_INTERVAL_MS,
|
|
48
|
-
*
|
|
49
|
-
* handleRun with force=true (same as `bgrun --restart <name>`).
|
|
59
|
+
* Every GUARD_INTERVAL_MS, checks processes with BGR_KEEP_ALIVE=true
|
|
60
|
+
* in their env and auto-restarts any that died.
|
|
50
61
|
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
62
|
+
* Only guarded processes (opted-in via dashboard toggle or env var) are
|
|
63
|
+
* monitored. Other processes are left alone even if they crash.
|
|
64
|
+
*
|
|
65
|
+
* Toggle guard per-process:
|
|
66
|
+
* - Dashboard UI: click the shield icon on any process row
|
|
67
|
+
* - CLI: set BGR_KEEP_ALIVE=true in the process env/config
|
|
56
68
|
*/
|
|
57
69
|
function startGuard() {
|
|
58
70
|
console.log(`[guard] ✓ Built-in process guard started (checking every ${GUARD_INTERVAL_MS / 1000}s)`);
|
|
@@ -66,9 +78,17 @@ function startGuard() {
|
|
|
66
78
|
// Skip the dashboard itself
|
|
67
79
|
if (GUARD_SKIP_NAMES.has(proc.name)) continue;
|
|
68
80
|
|
|
81
|
+
// Only guard processes with BGR_KEEP_ALIVE=true
|
|
82
|
+
const env = proc.env ? parseEnvString(proc.env) : {};
|
|
83
|
+
if (env.BGR_KEEP_ALIVE !== 'true') continue;
|
|
84
|
+
|
|
69
85
|
const alive = await isProcessRunning(proc.pid, proc.command);
|
|
70
86
|
if (!alive) {
|
|
71
|
-
|
|
87
|
+
const now = Date.now();
|
|
88
|
+
const nextRestart = guardNextRestartTime.get(proc.name) || 0;
|
|
89
|
+
if (now < nextRestart) continue; // Still in backoff period
|
|
90
|
+
|
|
91
|
+
console.log(`[guard] ⚠ Guarded process "${proc.name}" (PID ${proc.pid}) is dead, restarting...`);
|
|
72
92
|
try {
|
|
73
93
|
await handleRun({
|
|
74
94
|
action: 'run',
|
|
@@ -76,10 +96,34 @@ function startGuard() {
|
|
|
76
96
|
force: true,
|
|
77
97
|
remoteName: '',
|
|
78
98
|
});
|
|
79
|
-
|
|
99
|
+
|
|
100
|
+
// Track restart count
|
|
101
|
+
const prevCount = guardRestartCounts.get(proc.name) || 0;
|
|
102
|
+
const newCount = prevCount + 1;
|
|
103
|
+
guardRestartCounts.set(proc.name, newCount);
|
|
104
|
+
|
|
105
|
+
// Exponential backoff if it crashes repeatedly (more than 5 times)
|
|
106
|
+
if (newCount > 5) {
|
|
107
|
+
const backoffSeconds = Math.min(30 * Math.pow(2, newCount - 6), 300); // 30s, 60s, 120s, up to 5 mins
|
|
108
|
+
guardNextRestartTime.set(proc.name, Date.now() + (backoffSeconds * 1000));
|
|
109
|
+
console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount}). Crash loop detected: next check delayed by ${backoffSeconds}s.`);
|
|
110
|
+
} else {
|
|
111
|
+
console.log(`[guard] ✓ Restarted "${proc.name}" (restart #${newCount})`);
|
|
112
|
+
}
|
|
80
113
|
} catch (err: any) {
|
|
81
114
|
console.error(`[guard] ✗ Failed to restart "${proc.name}": ${err.message}`);
|
|
82
115
|
}
|
|
116
|
+
} else {
|
|
117
|
+
// Reset counter if process has been stable (alive at least once during check)
|
|
118
|
+
const prevCount = guardRestartCounts.get(proc.name) || 0;
|
|
119
|
+
if (prevCount > 0) {
|
|
120
|
+
const nextRestart = guardNextRestartTime.get(proc.name) || 0;
|
|
121
|
+
if (Date.now() > nextRestart + 60_000) {
|
|
122
|
+
// If it lived over 60s past its backoff threshold, consider it stable
|
|
123
|
+
guardRestartCounts.delete(proc.name);
|
|
124
|
+
guardNextRestartTime.delete(proc.name);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
83
127
|
}
|
|
84
128
|
}
|
|
85
129
|
} catch (err: any) {
|