bosun 0.34.7 → 0.34.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent-work-analyzer.mjs +157 -13
- package/cli.mjs +181 -116
- package/lib/logger.mjs +65 -1
- package/maintenance.mjs +102 -19
- package/monitor.mjs +894 -57
- package/package.json +1 -1
- package/setup-web-server.mjs +506 -48
- package/setup.mjs +48 -11
- package/task-store.mjs +19 -1
- package/ui/components/forms.js +6 -0
- package/ui/setup.html +1337 -112
- package/ui/tabs/agents.js +61 -49
- package/ui/tabs/control.js +13 -1
- package/ui/tabs/settings.js +22 -9
- package/ui/tabs/workflows.js +18 -2
- package/ui-server.mjs +211 -24
- package/update-check.mjs +54 -0
- package/workflow-engine.mjs +51 -10
- package/workflow-nodes.mjs +538 -15
- package/workflow-templates/agents.mjs +32 -4
- package/workflow-templates/github.mjs +102 -36
- package/workflow-templates/reliability.mjs +37 -3
- package/workflow-templates.mjs +181 -10
- package/workspace-manager.mjs +8 -1
package/agent-work-analyzer.mjs
CHANGED
|
@@ -63,6 +63,88 @@ const activeSessions = new Map();
|
|
|
63
63
|
// Alert cooldowns: "alert_type:attempt_id" -> timestamp
|
|
64
64
|
const alertCooldowns = new Map();
|
|
65
65
|
const ALERT_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes between same alert
|
|
66
|
+
const FAILED_SESSION_ALERT_MIN_COOLDOWN_MS = 60 * 60 * 1000; // Keep noisy failed-session summaries coarse-grained
|
|
67
|
+
const ALERT_COOLDOWN_RETENTION_MS = Math.max(
|
|
68
|
+
FAILED_SESSION_ALERT_MIN_COOLDOWN_MS * 3,
|
|
69
|
+
3 * 60 * 60 * 1000,
|
|
70
|
+
); // keep cooldown history bounded
|
|
71
|
+
const ALERT_COOLDOWN_REPLAY_MAX_BYTES = Math.max(
|
|
72
|
+
256 * 1024,
|
|
73
|
+
Number(process.env.AGENT_ALERT_COOLDOWN_REPLAY_MAX_BYTES || 2 * 1024 * 1024) || 2 * 1024 * 1024,
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
function getAlertCooldownMs(alert) {
|
|
77
|
+
const type = String(alert?.type || "").trim().toLowerCase();
|
|
78
|
+
if (type === "failed_session_high_errors") {
|
|
79
|
+
return Math.max(ALERT_COOLDOWN_MS, FAILED_SESSION_ALERT_MIN_COOLDOWN_MS);
|
|
80
|
+
}
|
|
81
|
+
return Math.max(0, ALERT_COOLDOWN_MS);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function extractTaskToken(value) {
|
|
85
|
+
const normalized = String(value || "").trim();
|
|
86
|
+
if (!normalized) return "";
|
|
87
|
+
const prefixMatch = normalized.match(
|
|
88
|
+
/^([0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})(?:-|$)/i,
|
|
89
|
+
);
|
|
90
|
+
return prefixMatch?.[1] || normalized;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function deriveAlertScopeId(alert) {
|
|
94
|
+
const taskId = extractTaskToken(alert?.task_id);
|
|
95
|
+
if (taskId) return taskId;
|
|
96
|
+
return extractTaskToken(alert?.attempt_id);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function buildAlertCooldownKey(alert) {
|
|
100
|
+
const type = String(alert?.type || "unknown").trim().toLowerCase() || "unknown";
|
|
101
|
+
const scopeId = deriveAlertScopeId(alert);
|
|
102
|
+
if (scopeId && (type === "failed_session_high_errors" || type === "stuck_agent")) {
|
|
103
|
+
return `${type}:task:${scopeId}`;
|
|
104
|
+
}
|
|
105
|
+
return `${type}:${String(alert?.attempt_id || "unknown")}`;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function pruneStaleAlertCooldowns(nowMs = Date.now()) {
|
|
109
|
+
const now = Number(nowMs) || Date.now();
|
|
110
|
+
const cutoff = now - ALERT_COOLDOWN_RETENTION_MS;
|
|
111
|
+
for (const [key, ts] of alertCooldowns.entries()) {
|
|
112
|
+
const lastTs = Number(ts);
|
|
113
|
+
if (!Number.isFinite(lastTs) || lastTs < cutoff) {
|
|
114
|
+
alertCooldowns.delete(key);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async function hydrateAlertCooldownsFromLog() {
|
|
120
|
+
if (!existsSync(ALERTS_LOG)) return;
|
|
121
|
+
try {
|
|
122
|
+
const fileStat = await stat(ALERTS_LOG);
|
|
123
|
+
if (!fileStat.size) return;
|
|
124
|
+
const start = Math.max(0, fileStat.size - ALERT_COOLDOWN_REPLAY_MAX_BYTES);
|
|
125
|
+
const stream = createReadStream(ALERTS_LOG, { start, encoding: "utf8" });
|
|
126
|
+
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
127
|
+
const maxCooldownMs = Math.max(ALERT_COOLDOWN_MS, FAILED_SESSION_ALERT_MIN_COOLDOWN_MS);
|
|
128
|
+
const cutoff = Date.now() - maxCooldownMs;
|
|
129
|
+
for await (const line of rl) {
|
|
130
|
+
const trimmed = String(line || "").trim();
|
|
131
|
+
if (!trimmed) continue;
|
|
132
|
+
try {
|
|
133
|
+
const entry = JSON.parse(trimmed);
|
|
134
|
+
const ts = Date.parse(String(entry?.timestamp || ""));
|
|
135
|
+
if (!Number.isFinite(ts) || ts < cutoff) continue;
|
|
136
|
+
const cooldownMs = getAlertCooldownMs(entry);
|
|
137
|
+
if (ts < Date.now() - cooldownMs) continue;
|
|
138
|
+
const key = String(entry?._cooldown_key || "").trim() || buildAlertCooldownKey(entry);
|
|
139
|
+
alertCooldowns.set(key, ts);
|
|
140
|
+
} catch {
|
|
141
|
+
// ignore malformed jsonl
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
} catch {
|
|
145
|
+
// best-effort hydration only
|
|
146
|
+
}
|
|
147
|
+
}
|
|
66
148
|
|
|
67
149
|
// ── Log Tailing ─────────────────────────────────────────────────────────────
|
|
68
150
|
|
|
@@ -70,6 +152,14 @@ let filePosition = 0;
|
|
|
70
152
|
let isRunning = false;
|
|
71
153
|
let stuckSweepTimer = null;
|
|
72
154
|
|
|
155
|
+
function parseEnvBoolean(value, fallback = false) {
|
|
156
|
+
if (value == null || value === "") return fallback;
|
|
157
|
+
const normalized = String(value).trim().toLowerCase();
|
|
158
|
+
if (["1", "true", "yes", "y", "on"].includes(normalized)) return true;
|
|
159
|
+
if (["0", "false", "no", "n", "off"].includes(normalized)) return false;
|
|
160
|
+
return fallback;
|
|
161
|
+
}
|
|
162
|
+
|
|
73
163
|
/**
|
|
74
164
|
* Start the analyzer loop
|
|
75
165
|
*/
|
|
@@ -88,14 +178,29 @@ export async function startAnalyzer() {
|
|
|
88
178
|
if (!existsSync(ALERTS_LOG)) {
|
|
89
179
|
await writeFile(ALERTS_LOG, "");
|
|
90
180
|
}
|
|
181
|
+
await hydrateAlertCooldownsFromLog();
|
|
91
182
|
} catch (err) {
|
|
92
183
|
console.warn(`[agent-work-analyzer] Failed to init alerts log: ${err.message}`);
|
|
93
184
|
}
|
|
94
185
|
|
|
95
|
-
// Initial
|
|
186
|
+
// Initial positioning for existing log.
|
|
187
|
+
// Default behavior is true tailing (start at EOF) to avoid replaying stale
|
|
188
|
+
// historical sessions on monitor restart, which can re-emit old alerts and
|
|
189
|
+
// trigger noisy false-positive loops. Operators can opt in to replay for
|
|
190
|
+
// forensics via AGENT_ANALYZER_REPLAY_STARTUP=1.
|
|
96
191
|
if (existsSync(AGENT_WORK_STREAM)) {
|
|
97
|
-
|
|
98
|
-
|
|
192
|
+
const replayStartup = parseEnvBoolean(
|
|
193
|
+
process.env.AGENT_ANALYZER_REPLAY_STARTUP,
|
|
194
|
+
false,
|
|
195
|
+
);
|
|
196
|
+
if (replayStartup) {
|
|
197
|
+
filePosition = await processLogFile(filePosition);
|
|
198
|
+
pruneStaleSessionsAfterReplay();
|
|
199
|
+
} else {
|
|
200
|
+
const streamStats = await stat(AGENT_WORK_STREAM);
|
|
201
|
+
filePosition = Math.max(0, Number(streamStats?.size || 0));
|
|
202
|
+
activeSessions.clear();
|
|
203
|
+
}
|
|
99
204
|
} else {
|
|
100
205
|
// Ensure the stream file exists so the watcher doesn't throw
|
|
101
206
|
try {
|
|
@@ -154,7 +259,12 @@ export function stopAnalyzer() {
|
|
|
154
259
|
async function processLogFile(startPosition) {
|
|
155
260
|
try {
|
|
156
261
|
const stats = await stat(AGENT_WORK_STREAM);
|
|
157
|
-
if (stats.size
|
|
262
|
+
if (stats.size < startPosition) {
|
|
263
|
+
// Log file was truncated/rotated. Reset offset so new entries are not
|
|
264
|
+
// skipped forever after rotation.
|
|
265
|
+
return 0;
|
|
266
|
+
}
|
|
267
|
+
if (stats.size === startPosition) {
|
|
158
268
|
return startPosition; // No new data
|
|
159
269
|
}
|
|
160
270
|
|
|
@@ -163,12 +273,29 @@ async function processLogFile(startPosition) {
|
|
|
163
273
|
encoding: "utf8",
|
|
164
274
|
});
|
|
165
275
|
|
|
166
|
-
|
|
167
|
-
|
|
276
|
+
let chunkText = "";
|
|
277
|
+
for await (const chunk of stream) {
|
|
278
|
+
chunkText += String(chunk || "");
|
|
279
|
+
}
|
|
280
|
+
if (!chunkText) {
|
|
281
|
+
return startPosition;
|
|
282
|
+
}
|
|
168
283
|
|
|
169
|
-
|
|
170
|
-
|
|
284
|
+
const lastNewlineIdx = chunkText.lastIndexOf("\n");
|
|
285
|
+
let processText = "";
|
|
286
|
+
let trailing = "";
|
|
287
|
+
if (lastNewlineIdx >= 0) {
|
|
288
|
+
processText = chunkText.slice(0, lastNewlineIdx + 1);
|
|
289
|
+
trailing = chunkText.slice(lastNewlineIdx + 1);
|
|
290
|
+
} else {
|
|
291
|
+
trailing = chunkText;
|
|
292
|
+
}
|
|
171
293
|
|
|
294
|
+
const lines = processText
|
|
295
|
+
.split(/\r?\n/)
|
|
296
|
+
.map((line) => String(line || "").trim())
|
|
297
|
+
.filter(Boolean);
|
|
298
|
+
for (const line of lines) {
|
|
172
299
|
try {
|
|
173
300
|
const event = JSON.parse(line);
|
|
174
301
|
await analyzeEvent(event);
|
|
@@ -179,7 +306,21 @@ async function processLogFile(startPosition) {
|
|
|
179
306
|
}
|
|
180
307
|
}
|
|
181
308
|
|
|
182
|
-
|
|
309
|
+
// If trailing text is present without newline, treat it as a potentially
|
|
310
|
+
// partial line and only consume it when it is valid JSON. This avoids data
|
|
311
|
+
// loss when writers flush an incomplete line temporarily.
|
|
312
|
+
const trailingTrimmed = String(trailing || "").trim();
|
|
313
|
+
if (trailingTrimmed) {
|
|
314
|
+
try {
|
|
315
|
+
const trailingEvent = JSON.parse(trailingTrimmed);
|
|
316
|
+
await analyzeEvent(trailingEvent);
|
|
317
|
+
return startPosition + Buffer.byteLength(chunkText, "utf8");
|
|
318
|
+
} catch {
|
|
319
|
+
return startPosition + Buffer.byteLength(processText, "utf8");
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return startPosition + Buffer.byteLength(processText, "utf8");
|
|
183
324
|
} catch (err) {
|
|
184
325
|
if (err.code !== "ENOENT") {
|
|
185
326
|
console.error(`[agent-work-analyzer] Error reading log: ${err.message}`);
|
|
@@ -450,11 +591,12 @@ function startStuckSweep() {
|
|
|
450
591
|
* @param {Object} alert - Alert data
|
|
451
592
|
*/
|
|
452
593
|
async function emitAlert(alert) {
|
|
453
|
-
const alertKey =
|
|
594
|
+
const alertKey = buildAlertCooldownKey(alert);
|
|
595
|
+
const cooldownMs = getAlertCooldownMs(alert);
|
|
454
596
|
|
|
455
597
|
// Check cooldown
|
|
456
598
|
const lastAlert = alertCooldowns.get(alertKey);
|
|
457
|
-
if (lastAlert && Date.now() - lastAlert <
|
|
599
|
+
if (lastAlert && Date.now() - lastAlert < cooldownMs) {
|
|
458
600
|
return; // Skip duplicate alerts
|
|
459
601
|
}
|
|
460
602
|
|
|
@@ -462,6 +604,7 @@ async function emitAlert(alert) {
|
|
|
462
604
|
|
|
463
605
|
const alertEntry = {
|
|
464
606
|
timestamp: new Date().toISOString(),
|
|
607
|
+
_cooldown_key: alertKey,
|
|
465
608
|
...alert,
|
|
466
609
|
};
|
|
467
610
|
|
|
@@ -477,7 +620,7 @@ async function emitAlert(alert) {
|
|
|
477
620
|
|
|
478
621
|
// ── Cleanup Old Sessions ────────────────────────────────────────────────────
|
|
479
622
|
|
|
480
|
-
setInterval(() => {
|
|
623
|
+
const cleanupTimer = setInterval(() => {
|
|
481
624
|
const cutoff = Date.now() - 60 * 60 * 1000; // 1 hour
|
|
482
625
|
|
|
483
626
|
for (const [attemptId, session] of activeSessions.entries()) {
|
|
@@ -486,7 +629,8 @@ setInterval(() => {
|
|
|
486
629
|
activeSessions.delete(attemptId);
|
|
487
630
|
}
|
|
488
631
|
}
|
|
632
|
+
pruneStaleAlertCooldowns();
|
|
489
633
|
}, 10 * 60 * 1000); // Cleanup every 10 minutes
|
|
634
|
+
cleanupTimer.unref?.();
|
|
490
635
|
|
|
491
636
|
// ── Exports ─────────────────────────────────────────────────────────────────
|
|
492
|
-
|
package/cli.mjs
CHANGED
|
@@ -34,6 +34,15 @@ import {
|
|
|
34
34
|
migrateFromLegacy,
|
|
35
35
|
} from "./compat.mjs";
|
|
36
36
|
|
|
37
|
+
const MONITOR_START_MAX_WAIT_MS = Math.max(
|
|
38
|
+
0,
|
|
39
|
+
Number(process.env.BOSUN_MONITOR_START_MAX_WAIT_MS || "15000") || 15000,
|
|
40
|
+
);
|
|
41
|
+
const MONITOR_START_RETRY_MS = Math.max(
|
|
42
|
+
100,
|
|
43
|
+
Number(process.env.BOSUN_MONITOR_START_RETRY_MS || "500") || 500,
|
|
44
|
+
);
|
|
45
|
+
|
|
37
46
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
38
47
|
const args = process.argv.slice(2);
|
|
39
48
|
|
|
@@ -1484,133 +1493,189 @@ function detectExistingMonitorLockOwner(excludePid = null) {
|
|
|
1484
1493
|
return null;
|
|
1485
1494
|
}
|
|
1486
1495
|
|
|
1496
|
+
function getRequiredMonitorRuntimeFiles(monitorPath) {
|
|
1497
|
+
const required = [monitorPath];
|
|
1498
|
+
const copilotDir = resolve(
|
|
1499
|
+
__dirname,
|
|
1500
|
+
"node_modules",
|
|
1501
|
+
"@github",
|
|
1502
|
+
"copilot",
|
|
1503
|
+
);
|
|
1504
|
+
const conptyAgentPath = resolve(copilotDir, "conpty_console_list_agent.js");
|
|
1505
|
+
if (process.platform === "win32" && existsSync(copilotDir)) {
|
|
1506
|
+
required.push(conptyAgentPath);
|
|
1507
|
+
}
|
|
1508
|
+
return required;
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
function listMissingFiles(paths) {
|
|
1512
|
+
return paths.filter((entry) => !existsSync(entry));
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
async function waitForMonitorRuntimeFiles(monitorPath) {
|
|
1516
|
+
const required = getRequiredMonitorRuntimeFiles(monitorPath);
|
|
1517
|
+
const startedAt = Date.now();
|
|
1518
|
+
let missing = listMissingFiles(required);
|
|
1519
|
+
while (
|
|
1520
|
+
missing.length > 0 &&
|
|
1521
|
+
Date.now() - startedAt < MONITOR_START_MAX_WAIT_MS
|
|
1522
|
+
) {
|
|
1523
|
+
await new Promise((resolveWait) => {
|
|
1524
|
+
setTimeout(resolveWait, MONITOR_START_RETRY_MS);
|
|
1525
|
+
});
|
|
1526
|
+
missing = listMissingFiles(required);
|
|
1527
|
+
}
|
|
1528
|
+
return {
|
|
1529
|
+
ready: missing.length === 0,
|
|
1530
|
+
missing,
|
|
1531
|
+
waitedMs: Date.now() - startedAt,
|
|
1532
|
+
};
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1487
1535
|
function runMonitor({ restartReason = "" } = {}) {
|
|
1488
1536
|
return new Promise((resolve, reject) => {
|
|
1489
1537
|
const monitorPath = fileURLToPath(
|
|
1490
1538
|
new URL("./monitor.mjs", import.meta.url),
|
|
1491
1539
|
);
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
}
|
|
1498
|
-
monitorChild = fork(monitorPath, process.argv.slice(2), {
|
|
1499
|
-
stdio: "inherit",
|
|
1500
|
-
execArgv: ["--max-old-space-size=4096"],
|
|
1501
|
-
env: childEnv,
|
|
1502
|
-
windowsHide: IS_DAEMON_CHILD && process.platform === "win32",
|
|
1503
|
-
});
|
|
1504
|
-
daemonCrashTracker.markStart();
|
|
1505
|
-
|
|
1506
|
-
monitorChild.on("exit", (code, signal) => {
|
|
1507
|
-
const childPid = monitorChild?.pid ?? null;
|
|
1508
|
-
monitorChild = null;
|
|
1509
|
-
if (code === SELF_RESTART_EXIT_CODE) {
|
|
1510
|
-
console.log(
|
|
1511
|
-
"\n \u21BB Monitor restarting with fresh modules...\n",
|
|
1512
|
-
);
|
|
1513
|
-
// Small delay to let file writes / port releases settle
|
|
1514
|
-
setTimeout(() => resolve(runMonitor({ restartReason: "self-restart" })), 2000);
|
|
1515
|
-
} else {
|
|
1516
|
-
const exitCode = code ?? (signal ? 1 : 0);
|
|
1517
|
-
const existingOwner =
|
|
1518
|
-
!gracefulShutdown && exitCode === 1
|
|
1519
|
-
? detectExistingMonitorLockOwner(childPid)
|
|
1520
|
-
: null;
|
|
1521
|
-
if (existingOwner) {
|
|
1522
|
-
console.log(
|
|
1523
|
-
`\n bosun is already running (PID ${existingOwner.pid}); exiting duplicate start.\n`,
|
|
1540
|
+
waitForMonitorRuntimeFiles(monitorPath)
|
|
1541
|
+
.then(({ ready, missing, waitedMs }) => {
|
|
1542
|
+
if (!ready) {
|
|
1543
|
+
throw new Error(
|
|
1544
|
+
`monitor runtime files missing after waiting ${Math.round(waitedMs / 1000)}s: ${missing.join(", ")}`,
|
|
1524
1545
|
);
|
|
1525
|
-
process.exit(0);
|
|
1526
|
-
return;
|
|
1527
1546
|
}
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
!gracefulShutdown &&
|
|
1532
|
-
(isOSKill || (IS_DAEMON_CHILD && exitCode !== 0));
|
|
1533
|
-
if (shouldAutoRestart) {
|
|
1534
|
-
const crashState = daemonCrashTracker.recordExit();
|
|
1535
|
-
daemonRestartCount += 1;
|
|
1536
|
-
const delayMs = isOSKill ? 5000 : DAEMON_RESTART_DELAY_MS;
|
|
1537
|
-
if (IS_DAEMON_CHILD && crashState.exceeded) {
|
|
1538
|
-
const durationSec = Math.max(
|
|
1539
|
-
1,
|
|
1540
|
-
Math.round(crashState.runDurationMs / 1000),
|
|
1541
|
-
);
|
|
1542
|
-
const windowSec = Math.max(
|
|
1543
|
-
1,
|
|
1544
|
-
Math.round(crashState.instantCrashWindowMs / 1000),
|
|
1545
|
-
);
|
|
1546
|
-
console.error(
|
|
1547
|
-
`\n ✖ Monitor crashed too quickly ${crashState.instantCrashCount} times in a row (each <= ${windowSec}s, latest ${durationSec}s). Auto-restart is now paused.`,
|
|
1548
|
-
);
|
|
1549
|
-
sendCrashNotification(exitCode, signal).finally(() =>
|
|
1550
|
-
process.exit(exitCode),
|
|
1551
|
-
);
|
|
1552
|
-
return;
|
|
1553
|
-
}
|
|
1554
|
-
if (
|
|
1555
|
-
IS_DAEMON_CHILD &&
|
|
1556
|
-
DAEMON_MAX_RESTARTS > 0 &&
|
|
1557
|
-
daemonRestartCount > DAEMON_MAX_RESTARTS
|
|
1558
|
-
) {
|
|
1559
|
-
console.error(
|
|
1560
|
-
`\n ✖ Monitor crashed too many times (${daemonRestartCount - 1} restarts, max ${DAEMON_MAX_RESTARTS}).`,
|
|
1561
|
-
);
|
|
1562
|
-
sendCrashNotification(exitCode, signal).finally(() =>
|
|
1563
|
-
process.exit(exitCode),
|
|
1564
|
-
);
|
|
1565
|
-
return;
|
|
1566
|
-
}
|
|
1567
|
-
const reasonLabel = signal
|
|
1568
|
-
? `signal ${signal}`
|
|
1569
|
-
: `exit code ${exitCode}`;
|
|
1570
|
-
const attemptLabel =
|
|
1571
|
-
IS_DAEMON_CHILD && DAEMON_MAX_RESTARTS > 0
|
|
1572
|
-
? `${daemonRestartCount}/${DAEMON_MAX_RESTARTS}`
|
|
1573
|
-
: `${daemonRestartCount}`;
|
|
1574
|
-
console.error(
|
|
1575
|
-
`\n ⚠ Monitor exited (${reasonLabel}) — auto-restarting in ${Math.max(1, Math.round(delayMs / 1000))}s${IS_DAEMON_CHILD ? ` [attempt ${attemptLabel}]` : ""}...`,
|
|
1547
|
+
if (waitedMs >= MONITOR_START_RETRY_MS) {
|
|
1548
|
+
console.warn(
|
|
1549
|
+
`[cli] delayed monitor start by ${Math.round(waitedMs / 1000)}s while waiting for runtime files to settle`,
|
|
1576
1550
|
);
|
|
1577
|
-
sendCrashNotification(exitCode, signal, {
|
|
1578
|
-
autoRestartInMs: delayMs,
|
|
1579
|
-
restartAttempt: daemonRestartCount,
|
|
1580
|
-
maxRestarts: IS_DAEMON_CHILD ? DAEMON_MAX_RESTARTS : 0,
|
|
1581
|
-
}).catch(() => {});
|
|
1582
|
-
setTimeout(
|
|
1583
|
-
() =>
|
|
1584
|
-
resolve(
|
|
1585
|
-
runMonitor({
|
|
1586
|
-
restartReason: isOSKill ? "os-kill" : "crash",
|
|
1587
|
-
}),
|
|
1588
|
-
),
|
|
1589
|
-
delayMs,
|
|
1590
|
-
);
|
|
1591
|
-
return;
|
|
1592
1551
|
}
|
|
1593
|
-
|
|
1594
|
-
if (
|
|
1595
|
-
|
|
1596
|
-
`\n ✖ Monitor crashed (${signal ? `signal ${signal}` : `exit code ${exitCode}`}) — sending crash notification...`,
|
|
1597
|
-
);
|
|
1598
|
-
sendCrashNotification(exitCode, signal).finally(() =>
|
|
1599
|
-
process.exit(exitCode),
|
|
1600
|
-
);
|
|
1552
|
+
const childEnv = { ...process.env };
|
|
1553
|
+
if (restartReason) {
|
|
1554
|
+
childEnv.BOSUN_MONITOR_RESTART_REASON = restartReason;
|
|
1601
1555
|
} else {
|
|
1602
|
-
|
|
1603
|
-
daemonCrashTracker.reset();
|
|
1604
|
-
process.exit(exitCode);
|
|
1556
|
+
delete childEnv.BOSUN_MONITOR_RESTART_REASON;
|
|
1605
1557
|
}
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1558
|
+
monitorChild = fork(monitorPath, process.argv.slice(2), {
|
|
1559
|
+
stdio: "inherit",
|
|
1560
|
+
execArgv: ["--max-old-space-size=4096"],
|
|
1561
|
+
env: childEnv,
|
|
1562
|
+
windowsHide: IS_DAEMON_CHILD && process.platform === "win32",
|
|
1563
|
+
});
|
|
1564
|
+
daemonCrashTracker.markStart();
|
|
1565
|
+
|
|
1566
|
+
monitorChild.on("exit", (code, signal) => {
|
|
1567
|
+
const childPid = monitorChild?.pid ?? null;
|
|
1568
|
+
monitorChild = null;
|
|
1569
|
+
if (code === SELF_RESTART_EXIT_CODE) {
|
|
1570
|
+
console.log(
|
|
1571
|
+
"\n ↻ Monitor restarting with fresh modules...\n",
|
|
1572
|
+
);
|
|
1573
|
+
// Small delay to let file writes / port releases settle
|
|
1574
|
+
setTimeout(() => resolve(runMonitor({ restartReason: "self-restart" })), 2000);
|
|
1575
|
+
} else {
|
|
1576
|
+
const exitCode = code ?? (signal ? 1 : 0);
|
|
1577
|
+
const existingOwner =
|
|
1578
|
+
!gracefulShutdown && exitCode === 1
|
|
1579
|
+
? detectExistingMonitorLockOwner(childPid)
|
|
1580
|
+
: null;
|
|
1581
|
+
if (existingOwner) {
|
|
1582
|
+
console.log(
|
|
1583
|
+
`\n bosun is already running (PID ${existingOwner.pid}); exiting duplicate start.\n`,
|
|
1584
|
+
);
|
|
1585
|
+
process.exit(0);
|
|
1586
|
+
return;
|
|
1587
|
+
}
|
|
1588
|
+
// 4294967295 (0xFFFFFFFF / -1 signed) = OS killed the process (OOM, external termination)
|
|
1589
|
+
const isOSKill = exitCode === 4294967295 || exitCode === -1;
|
|
1590
|
+
const shouldAutoRestart =
|
|
1591
|
+
!gracefulShutdown &&
|
|
1592
|
+
(isOSKill || (IS_DAEMON_CHILD && exitCode !== 0));
|
|
1593
|
+
if (shouldAutoRestart) {
|
|
1594
|
+
const crashState = daemonCrashTracker.recordExit();
|
|
1595
|
+
daemonRestartCount += 1;
|
|
1596
|
+
const delayMs = isOSKill ? 5000 : DAEMON_RESTART_DELAY_MS;
|
|
1597
|
+
if (IS_DAEMON_CHILD && crashState.exceeded) {
|
|
1598
|
+
const durationSec = Math.max(
|
|
1599
|
+
1,
|
|
1600
|
+
Math.round(crashState.runDurationMs / 1000),
|
|
1601
|
+
);
|
|
1602
|
+
const windowSec = Math.max(
|
|
1603
|
+
1,
|
|
1604
|
+
Math.round(crashState.instantCrashWindowMs / 1000),
|
|
1605
|
+
);
|
|
1606
|
+
console.error(
|
|
1607
|
+
`\n ✖ Monitor crashed too quickly ${crashState.instantCrashCount} times in a row (each <= ${windowSec}s, latest ${durationSec}s). Auto-restart is now paused.`,
|
|
1608
|
+
);
|
|
1609
|
+
sendCrashNotification(exitCode, signal).finally(() =>
|
|
1610
|
+
process.exit(exitCode),
|
|
1611
|
+
);
|
|
1612
|
+
return;
|
|
1613
|
+
}
|
|
1614
|
+
if (
|
|
1615
|
+
IS_DAEMON_CHILD &&
|
|
1616
|
+
DAEMON_MAX_RESTARTS > 0 &&
|
|
1617
|
+
daemonRestartCount > DAEMON_MAX_RESTARTS
|
|
1618
|
+
) {
|
|
1619
|
+
console.error(
|
|
1620
|
+
`\n ✖ Monitor crashed too many times (${daemonRestartCount - 1} restarts, max ${DAEMON_MAX_RESTARTS}).`,
|
|
1621
|
+
);
|
|
1622
|
+
sendCrashNotification(exitCode, signal).finally(() =>
|
|
1623
|
+
process.exit(exitCode),
|
|
1624
|
+
);
|
|
1625
|
+
return;
|
|
1626
|
+
}
|
|
1627
|
+
const reasonLabel = signal
|
|
1628
|
+
? `signal ${signal}`
|
|
1629
|
+
: `exit code ${exitCode}`;
|
|
1630
|
+
const attemptLabel =
|
|
1631
|
+
IS_DAEMON_CHILD && DAEMON_MAX_RESTARTS > 0
|
|
1632
|
+
? `${daemonRestartCount}/${DAEMON_MAX_RESTARTS}`
|
|
1633
|
+
: `${daemonRestartCount}`;
|
|
1634
|
+
console.error(
|
|
1635
|
+
`\n ⚠ Monitor exited (${reasonLabel}) — auto-restarting in ${Math.max(1, Math.round(delayMs / 1000))}s${IS_DAEMON_CHILD ? ` [attempt ${attemptLabel}]` : ""}...`,
|
|
1636
|
+
);
|
|
1637
|
+
sendCrashNotification(exitCode, signal, {
|
|
1638
|
+
autoRestartInMs: delayMs,
|
|
1639
|
+
restartAttempt: daemonRestartCount,
|
|
1640
|
+
maxRestarts: IS_DAEMON_CHILD ? DAEMON_MAX_RESTARTS : 0,
|
|
1641
|
+
}).catch(() => {});
|
|
1642
|
+
setTimeout(
|
|
1643
|
+
() =>
|
|
1644
|
+
resolve(
|
|
1645
|
+
runMonitor({
|
|
1646
|
+
restartReason: isOSKill ? "os-kill" : "crash",
|
|
1647
|
+
}),
|
|
1648
|
+
),
|
|
1649
|
+
delayMs,
|
|
1650
|
+
);
|
|
1651
|
+
return;
|
|
1652
|
+
}
|
|
1653
|
+
|
|
1654
|
+
if (exitCode !== 0 && !gracefulShutdown) {
|
|
1655
|
+
console.error(
|
|
1656
|
+
`\n ✖ Monitor crashed (${signal ? `signal ${signal}` : `exit code ${exitCode}`}) — sending crash notification...`,
|
|
1657
|
+
);
|
|
1658
|
+
sendCrashNotification(exitCode, signal).finally(() =>
|
|
1659
|
+
process.exit(exitCode),
|
|
1660
|
+
);
|
|
1661
|
+
} else {
|
|
1662
|
+
daemonRestartCount = 0;
|
|
1663
|
+
daemonCrashTracker.reset();
|
|
1664
|
+
process.exit(exitCode);
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1667
|
+
});
|
|
1668
|
+
|
|
1669
|
+
monitorChild.on("error", (err) => {
|
|
1670
|
+
monitorChild = null;
|
|
1671
|
+
console.error(`\n ✖ Monitor failed to start: ${err.message}`);
|
|
1672
|
+
sendCrashNotification(1, null).finally(() => reject(err));
|
|
1673
|
+
});
|
|
1674
|
+
})
|
|
1675
|
+
.catch((err) => {
|
|
1676
|
+
console.error(`\n ✖ Monitor failed to start: ${err.message}`);
|
|
1677
|
+
sendCrashNotification(1, null).finally(() => reject(err));
|
|
1678
|
+
});
|
|
1614
1679
|
});
|
|
1615
1680
|
}
|
|
1616
1681
|
|
package/lib/logger.mjs
CHANGED
|
@@ -68,6 +68,45 @@ let errorLogDirEnsured = false;
|
|
|
68
68
|
/** @type {Set<string>} Modules to always show at DEBUG level even when console is at INFO */
|
|
69
69
|
const verboseModules = new Set();
|
|
70
70
|
|
|
71
|
+
const STDERR_NOISE_PATTERNS = [
|
|
72
|
+
/^(?:\(node:\d+\)\s+)?ExperimentalWarning:\s+SQLite is an experimental feature.*$/i,
|
|
73
|
+
/^\(Use `node --trace-warnings .*`.*\)\s*$/i,
|
|
74
|
+
/^Use `node --trace-warnings .*`.*$/i,
|
|
75
|
+
/^warning:\s+in the working copy of '.*',\s+(?:CRLF will be replaced by LF|LF will be replaced by CRLF) the next time Git touches it\.?\s*$/i,
|
|
76
|
+
/^(?:\[maintenance\]\s+)?local\s+'[^']+'\s+diverged\s+\(\d+↑\s+\d+↓\)\s+but has uncommitted changes\s+[—-]\s+skipping\.?\s*$/i,
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
function normalizeStderrNoiseLine(line) {
|
|
80
|
+
return String(line || "")
|
|
81
|
+
.replace(/\u001b\[[0-9;]*[A-Za-z]/g, "")
|
|
82
|
+
.replace(/^\d{4}-\d{2}-\d{2}T[0-9:.+-]+Z?\s+/, "")
|
|
83
|
+
.replace(/^\d{2}:\d{2}:\d{2}(?:\.\d+)?\s+/, "")
|
|
84
|
+
.trim()
|
|
85
|
+
.replace(/^(?:\[[^\]]+\]\s*)+/, "")
|
|
86
|
+
.trim();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function shouldSuppressStderrNoise(text) {
|
|
90
|
+
const normalized = String(text || "")
|
|
91
|
+
.split(/\r?\n/)
|
|
92
|
+
.map((line) => normalizeStderrNoiseLine(line))
|
|
93
|
+
.filter(Boolean);
|
|
94
|
+
if (normalized.length === 0) return false;
|
|
95
|
+
return normalized.every((line) =>
|
|
96
|
+
STDERR_NOISE_PATTERNS.some((pattern) => pattern.test(line)),
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function stripKnownStderrNoiseLines(text) {
|
|
101
|
+
const rawLines = String(text || "").split(/\r?\n/);
|
|
102
|
+
const kept = rawLines.filter((line) => {
|
|
103
|
+
const normalized = normalizeStderrNoiseLine(line);
|
|
104
|
+
if (!normalized) return false;
|
|
105
|
+
return !STDERR_NOISE_PATTERNS.some((pattern) => pattern.test(normalized));
|
|
106
|
+
});
|
|
107
|
+
return kept.join("\n").trim();
|
|
108
|
+
}
|
|
109
|
+
|
|
71
110
|
// ── Configuration ───────────────────────────────────────────────────────────
|
|
72
111
|
|
|
73
112
|
/**
|
|
@@ -229,6 +268,7 @@ function writeToFile(levelName, module, msg) {
|
|
|
229
268
|
*/
|
|
230
269
|
function writeToErrorFile(levelName, module, msg) {
|
|
231
270
|
if (!errorLogFilePath) return;
|
|
271
|
+
if (shouldSuppressStderrNoise(msg)) return;
|
|
232
272
|
if (!errorLogDirEnsured) {
|
|
233
273
|
try {
|
|
234
274
|
mkdirSync(dirname(errorLogFilePath), { recursive: true });
|
|
@@ -482,6 +522,9 @@ export function installConsoleInterceptor(opts = {}) {
|
|
|
482
522
|
const msg = args
|
|
483
523
|
.map((a) => (typeof a === "string" ? a : String(a)))
|
|
484
524
|
.join(" ");
|
|
525
|
+
if (shouldSuppressStderrNoise(msg)) {
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
485
528
|
const tagMatch = typeof msg === "string" ? msg.match(TAG_RE) : null;
|
|
486
529
|
const mod = tagMatch?.[1] || "stderr";
|
|
487
530
|
if (logFilePath && LogLevel.WARN >= fileLevel) {
|
|
@@ -507,6 +550,9 @@ export function installConsoleInterceptor(opts = {}) {
|
|
|
507
550
|
return typeof a === "string" ? a : String(a);
|
|
508
551
|
})
|
|
509
552
|
.join(" ");
|
|
553
|
+
if (shouldSuppressStderrNoise(msg)) {
|
|
554
|
+
return;
|
|
555
|
+
}
|
|
510
556
|
const tagMatch = typeof msg === "string" ? msg.match(TAG_RE) : null;
|
|
511
557
|
const mod = tagMatch?.[1] || "stderr";
|
|
512
558
|
if (logFilePath && LogLevel.ERROR >= fileLevel) {
|
|
@@ -578,11 +624,29 @@ export function installConsoleInterceptor(opts = {}) {
|
|
|
578
624
|
throw err;
|
|
579
625
|
}
|
|
580
626
|
};
|
|
627
|
+
const acknowledgeSuppressedWrite = (...rest) => {
|
|
628
|
+
const cb = rest.find((value) => typeof value === "function");
|
|
629
|
+
if (cb) {
|
|
630
|
+
try {
|
|
631
|
+
cb();
|
|
632
|
+
} catch {
|
|
633
|
+
/* ignore callback failures for suppressed noise */
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
return true;
|
|
637
|
+
};
|
|
581
638
|
process.stderr.write = (chunk, ...rest) => {
|
|
582
639
|
if (!_inInterceptor) {
|
|
583
640
|
const text = typeof chunk === "string" ? chunk : chunk?.toString?.("utf8") || "";
|
|
584
641
|
if (text.trim()) {
|
|
585
|
-
|
|
642
|
+
if (shouldSuppressStderrNoise(text)) {
|
|
643
|
+
return acknowledgeSuppressedWrite(...rest);
|
|
644
|
+
}
|
|
645
|
+
const filtered = stripKnownStderrNoiseLines(text);
|
|
646
|
+
if (!filtered) {
|
|
647
|
+
return acknowledgeSuppressedWrite(...rest);
|
|
648
|
+
}
|
|
649
|
+
writeToErrorFile("STDERR", "process", filtered.replace(/\n$/, ""));
|
|
586
650
|
}
|
|
587
651
|
}
|
|
588
652
|
return safeStderrWrite(chunk, ...rest);
|