alvin-bot 4.8.8 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/dist/handlers/message.js +5 -2
- package/dist/index.js +14 -10
- package/dist/paths.js +2 -0
- package/dist/platforms/whatsapp-auth-helpers.js +53 -0
- package/dist/platforms/whatsapp.js +6 -2
- package/dist/services/browser-manager.js +470 -95
- package/dist/services/browser-webfetch.js +93 -0
- package/dist/services/cron-scheduling.js +142 -0
- package/dist/services/cron.js +32 -6
- package/dist/services/skills.js +15 -11
- package/dist/services/subagent-delivery.js +8 -2
- package/dist/services/subagents.js +49 -8
- package/dist/services/telegram.js +12 -3
- package/dist/services/watchdog-brake.js +113 -0
- package/dist/services/watchdog.js +56 -42
- package/dist/util/console-formatter.js +109 -0
- package/dist/util/debounce.js +24 -0
- package/dist/util/telegram-error-filter.js +62 -0
- package/dist/web/server.js +56 -0
- package/package.json +1 -1
- package/skills/browse/SKILL.md +123 -98
- package/test/browser-webfetch.test.ts +121 -0
- package/test/console-timestamps.test.ts +98 -0
- package/test/cron-restart-resilience.test.ts +191 -0
- package/test/debounce.test.ts +60 -0
- package/test/subagent-final-text.test.ts +132 -0
- package/test/telegram-error-filter.test.ts +85 -0
- package/test/watchdog-brake.test.ts +157 -0
- package/test/web-server-shutdown.test.ts +111 -0
- package/test/whatsapp-auth-resilience.test.ts +96 -0
|
@@ -27,15 +27,13 @@ import { resolve } from "path";
|
|
|
27
27
|
import os from "os";
|
|
28
28
|
import { execSync } from "child_process";
|
|
29
29
|
import { BOT_VERSION } from "../version.js";
|
|
30
|
+
import { decideBrakeAction, shouldResetCrashCounter, DEFAULTS, } from "./watchdog-brake.js";
|
|
30
31
|
const DATA_DIR = process.env.ALVIN_DATA_DIR || resolve(os.homedir(), ".alvin-bot");
|
|
31
32
|
const STATE_DIR = resolve(DATA_DIR, "state");
|
|
32
33
|
const BEACON_FILE = resolve(STATE_DIR, "watchdog.json");
|
|
33
34
|
const ALERT_FILE = resolve(STATE_DIR, "crash-loop.alert");
|
|
34
35
|
const BEACON_INTERVAL_MS = 30_000; // write a beacon every 30 s
|
|
35
|
-
|
|
36
|
-
const CRASH_BRAKE_THRESHOLD = 10; // after this many crashes in the window, brake
|
|
37
|
-
const STALE_BEACON_MS = 90_000; // a beacon older than this is considered "old enough that previous process really exited"
|
|
38
|
-
const RECOVERY_UPTIME_MS = 5 * 60 * 1000; // 5 min of clean uptime resets the counter
|
|
36
|
+
// Thresholds and windows live in watchdog-brake.ts DEFAULTS.
|
|
39
37
|
let beaconTimer = null;
|
|
40
38
|
let resetTimer = null;
|
|
41
39
|
let bootTime = 0;
|
|
@@ -57,7 +55,21 @@ function readBeacon() {
|
|
|
57
55
|
typeof parsed.crashCount === "number" &&
|
|
58
56
|
typeof parsed.crashWindowStart === "number" &&
|
|
59
57
|
typeof parsed.version === "string") {
|
|
60
|
-
|
|
58
|
+
// Older beacons don't have daily-counter fields — default them to
|
|
59
|
+
// 0/now so the brake logic treats this run as the start of the
|
|
60
|
+
// first daily window.
|
|
61
|
+
return {
|
|
62
|
+
lastBeat: parsed.lastBeat,
|
|
63
|
+
pid: parsed.pid,
|
|
64
|
+
bootTime: parsed.bootTime,
|
|
65
|
+
crashCount: parsed.crashCount,
|
|
66
|
+
crashWindowStart: parsed.crashWindowStart,
|
|
67
|
+
version: parsed.version,
|
|
68
|
+
dailyCrashCount: typeof parsed.dailyCrashCount === "number" ? parsed.dailyCrashCount : 0,
|
|
69
|
+
dailyCrashWindowStart: typeof parsed.dailyCrashWindowStart === "number"
|
|
70
|
+
? parsed.dailyCrashWindowStart
|
|
71
|
+
: Date.now(),
|
|
72
|
+
};
|
|
61
73
|
}
|
|
62
74
|
return null;
|
|
63
75
|
}
|
|
@@ -78,8 +90,9 @@ function writeAlert(reason, crashCount) {
|
|
|
78
90
|
const content = [
|
|
79
91
|
`Alvin Bot crash-loop brake hit at ${new Date().toISOString()}`,
|
|
80
92
|
`Version: ${BOT_VERSION}`,
|
|
81
|
-
`Crashes in the last ${
|
|
82
|
-
`
|
|
93
|
+
`Crashes in the last ${DEFAULTS.SHORT_WINDOW_MS / 60_000} minutes: ${crashCount}`,
|
|
94
|
+
`Short-window threshold: ${DEFAULTS.SHORT_BRAKE_THRESHOLD}`,
|
|
95
|
+
`Daily threshold: ${DEFAULTS.DAILY_BRAKE_THRESHOLD}`,
|
|
83
96
|
``,
|
|
84
97
|
`Reason: ${reason}`,
|
|
85
98
|
``,
|
|
@@ -147,36 +160,25 @@ export function startWatchdog() {
|
|
|
147
160
|
ensureStateDir();
|
|
148
161
|
bootTime = Date.now();
|
|
149
162
|
const previous = readBeacon();
|
|
150
|
-
|
|
151
|
-
|
|
163
|
+
const decision = decideBrakeAction(previous, bootTime);
|
|
164
|
+
if (decision.action === "brake") {
|
|
165
|
+
console.error(`[watchdog] crash-loop brake triggered: ${decision.reason}`);
|
|
166
|
+
writeAlert(decision.reason, previous?.crashCount ?? 0);
|
|
167
|
+
checkCrashLoopBrake();
|
|
168
|
+
// checkCrashLoopBrake calls process.exit — execution never reaches here.
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
let crashCount = decision.crashCount;
|
|
172
|
+
let crashWindowStart = decision.crashWindowStart;
|
|
173
|
+
let dailyCrashCount = decision.dailyCrashCount;
|
|
174
|
+
let dailyCrashWindowStart = decision.dailyCrashWindowStart;
|
|
152
175
|
if (previous) {
|
|
153
176
|
const timeSinceLastBeat = bootTime - previous.lastBeat;
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if (inWindow) {
|
|
160
|
-
crashCount = previous.crashCount + 1;
|
|
161
|
-
crashWindowStart = previous.crashWindowStart;
|
|
162
|
-
}
|
|
163
|
-
else {
|
|
164
|
-
// Previous crash was outside the window → reset counter
|
|
165
|
-
crashCount = 1;
|
|
166
|
-
}
|
|
167
|
-
console.log(`[watchdog] detected restart after ${Math.round(timeSinceLastBeat / 1000)}s — crash ${crashCount}/${CRASH_BRAKE_THRESHOLD} in current ${CRASH_WINDOW_MS / 60_000}min window`);
|
|
168
|
-
if (crashCount >= CRASH_BRAKE_THRESHOLD) {
|
|
169
|
-
console.error(`[watchdog] crash-loop brake triggered (${crashCount} crashes in ${CRASH_WINDOW_MS / 60_000}min)`);
|
|
170
|
-
writeAlert(`Process restarted ${crashCount} times within ${CRASH_WINDOW_MS / 60_000} minutes. Last beacon was ${Math.round(timeSinceLastBeat / 1000)}s ago. Most likely a deterministic crash on startup.`, crashCount);
|
|
171
|
-
// Re-use the brake check to unload + exit cleanly
|
|
172
|
-
checkCrashLoopBrake();
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
else {
|
|
176
|
-
// Previous beacon was old → process had clean uptime before exit,
|
|
177
|
-
// OR system was rebooted between runs. Reset crash count.
|
|
178
|
-
crashCount = 0;
|
|
179
|
-
crashWindowStart = bootTime;
|
|
177
|
+
if (timeSinceLastBeat < DEFAULTS.STALE_BEACON_MS) {
|
|
178
|
+
console.log(`[watchdog] detected restart after ${Math.round(timeSinceLastBeat / 1000)}s — ` +
|
|
179
|
+
`crash ${crashCount}/${DEFAULTS.SHORT_BRAKE_THRESHOLD} in current ` +
|
|
180
|
+
`${DEFAULTS.SHORT_WINDOW_MS / 60_000}min window, ` +
|
|
181
|
+
`${dailyCrashCount}/${DEFAULTS.DAILY_BRAKE_THRESHOLD} in current 24h window`);
|
|
180
182
|
}
|
|
181
183
|
}
|
|
182
184
|
// Write the first beacon immediately so a fresh restart updates the file
|
|
@@ -186,6 +188,8 @@ export function startWatchdog() {
|
|
|
186
188
|
bootTime,
|
|
187
189
|
crashCount,
|
|
188
190
|
crashWindowStart,
|
|
191
|
+
dailyCrashCount,
|
|
192
|
+
dailyCrashWindowStart,
|
|
189
193
|
version: BOT_VERSION,
|
|
190
194
|
});
|
|
191
195
|
// Periodic beacon writer
|
|
@@ -196,15 +200,20 @@ export function startWatchdog() {
|
|
|
196
200
|
bootTime,
|
|
197
201
|
crashCount,
|
|
198
202
|
crashWindowStart,
|
|
203
|
+
dailyCrashCount,
|
|
204
|
+
dailyCrashWindowStart,
|
|
199
205
|
version: BOT_VERSION,
|
|
200
206
|
});
|
|
201
207
|
}, BEACON_INTERVAL_MS);
|
|
202
|
-
// Schedule a recovery counter reset after
|
|
203
|
-
// uptime.
|
|
204
|
-
//
|
|
208
|
+
// Schedule a recovery counter reset after RESET_AFTER_MS (1 h by default)
|
|
209
|
+
// of clean uptime. The old policy was 5 min — too short because chronic
|
|
210
|
+
// crashes often had 5-10 min gaps and never tripped the brake.
|
|
205
211
|
resetTimer = setTimeout(() => {
|
|
206
|
-
|
|
207
|
-
|
|
212
|
+
const uptime = Date.now() - bootTime;
|
|
213
|
+
if (shouldResetCrashCounter(uptime) && crashCount > 0) {
|
|
214
|
+
console.log(`[watchdog] ${Math.round(uptime / 60_000)}min clean uptime — ` +
|
|
215
|
+
`resetting short-window crash counter from ${crashCount} to 0 ` +
|
|
216
|
+
`(daily counter ${dailyCrashCount} stays)`);
|
|
208
217
|
crashCount = 0;
|
|
209
218
|
crashWindowStart = Date.now();
|
|
210
219
|
writeBeacon({
|
|
@@ -213,11 +222,16 @@ export function startWatchdog() {
|
|
|
213
222
|
bootTime,
|
|
214
223
|
crashCount,
|
|
215
224
|
crashWindowStart,
|
|
225
|
+
dailyCrashCount,
|
|
226
|
+
dailyCrashWindowStart,
|
|
216
227
|
version: BOT_VERSION,
|
|
217
228
|
});
|
|
218
229
|
}
|
|
219
|
-
},
|
|
220
|
-
console.log(`[watchdog] started — beacon every ${BEACON_INTERVAL_MS / 1000}s,
|
|
230
|
+
}, DEFAULTS.RESET_AFTER_MS);
|
|
231
|
+
console.log(`[watchdog] started — beacon every ${BEACON_INTERVAL_MS / 1000}s, ` +
|
|
232
|
+
`brake at ${DEFAULTS.SHORT_BRAKE_THRESHOLD} crashes / ${DEFAULTS.SHORT_WINDOW_MS / 60_000}min ` +
|
|
233
|
+
`or ${DEFAULTS.DAILY_BRAKE_THRESHOLD} / 24h, ` +
|
|
234
|
+
`recovery after ${DEFAULTS.RESET_AFTER_MS / 60_000}min uptime`);
|
|
221
235
|
}
|
|
222
236
|
/**
|
|
223
237
|
* Stop the watchdog cleanly. Called from the shutdown handler in
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Console formatter — adds ISO timestamps to every console.log /
|
|
3
|
+
* console.warn / console.error call, and drops high-volume noise
|
|
4
|
+
* (libsignal session dumps, Claude CLI native-binary banner).
|
|
5
|
+
*
|
|
6
|
+
* Installed once at bootstrap time from src/index.ts. Idempotent.
|
|
7
|
+
*
|
|
8
|
+
* Why not pino / winston: those pull in several MB of deps and change
|
|
9
|
+
* the call-site ergonomics. Every caller in the bot today uses plain
|
|
10
|
+
* `console.log`; monkey-patching those is a 40-line change instead of
|
|
11
|
+
* a refactor of every file.
|
|
12
|
+
*/
|
|
13
|
+
import util from "node:util";
|
|
14
|
+
let snapshot = null;
|
|
15
|
+
/**
|
|
16
|
+
* Noise patterns from production logs that fill out.log/err.log with
|
|
17
|
+
* tens of KB per day without carrying useful signal. Added sparingly —
|
|
18
|
+
* every entry here is a line a human will never need to grep for.
|
|
19
|
+
*/
|
|
20
|
+
const NOISE_PATTERNS = [
|
|
21
|
+
// libsignal session dump header — the multi-line body following this
|
|
22
|
+
// line is silenced by the first-line detector below.
|
|
23
|
+
/^Closing session: SessionEntry \{/,
|
|
24
|
+
// libsignal prekey bundle swap notification
|
|
25
|
+
/^Closing open session in favor of incoming prekey bundle/,
|
|
26
|
+
// Claude CLI startup banner — spammed once per query
|
|
27
|
+
/^\[claude\] Native binary: /,
|
|
28
|
+
// libsignal Bad MAC — session desync, harmless, repeats endlessly
|
|
29
|
+
/^Session error:Error: Bad MAC Error: Bad MAC/,
|
|
30
|
+
];
|
|
31
|
+
/** Exported for testing. */
|
|
32
|
+
export function isNoisyLine(line) {
|
|
33
|
+
return NOISE_PATTERNS.some((re) => re.test(line));
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Track whether we're currently inside a libsignal multi-line dump. The
|
|
37
|
+
* dumps look like `Closing session: SessionEntry {` followed by several
|
|
38
|
+
* lines of buffer hex, closing with `}`. We swallow everything from the
|
|
39
|
+
* opening brace to its matching `}` line.
|
|
40
|
+
*/
|
|
41
|
+
let suppressDepth = 0;
|
|
42
|
+
function shouldSuppress(raw) {
|
|
43
|
+
const line = raw.trimEnd();
|
|
44
|
+
if (suppressDepth > 0) {
|
|
45
|
+
// Inside a multi-line dump — count braces on this line. The dumps
|
|
46
|
+
// only contain ASCII braces in the structural positions, so this
|
|
47
|
+
// is safe enough for production noise.
|
|
48
|
+
const opens = (line.match(/\{/g) || []).length;
|
|
49
|
+
const closes = (line.match(/\}/g) || []).length;
|
|
50
|
+
suppressDepth += opens;
|
|
51
|
+
suppressDepth -= closes;
|
|
52
|
+
if (suppressDepth < 0)
|
|
53
|
+
suppressDepth = 0;
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
if (isNoisyLine(line)) {
|
|
57
|
+
// If the noisy header opens a block, start suppressing its body.
|
|
58
|
+
const opens = (line.match(/\{/g) || []).length;
|
|
59
|
+
const closes = (line.match(/\}/g) || []).length;
|
|
60
|
+
suppressDepth = Math.max(0, opens - closes);
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
function formatWithTimestamp(method, stream) {
|
|
66
|
+
return (...args) => {
|
|
67
|
+
// Render args the same way console does — util.format handles %s / %d / objects.
|
|
68
|
+
const text = renderArgs(args);
|
|
69
|
+
if (shouldSuppress(text))
|
|
70
|
+
return;
|
|
71
|
+
const stamp = new Date().toISOString();
|
|
72
|
+
// Write directly to the stream so we don't recurse through console.
|
|
73
|
+
stream.write(`${stamp} ${text}\n`);
|
|
74
|
+
void method; // keep original ref alive for uninstall
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
function renderArgs(args) {
|
|
78
|
+
// Use Node's built-in util.format — it matches console.* exactly.
|
|
79
|
+
return util.format(...args);
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Install timestamp + noise-filter formatters on console.log/warn/info/error.
|
|
83
|
+
* Safe to call multiple times.
|
|
84
|
+
*/
|
|
85
|
+
export function installConsoleFormatter() {
|
|
86
|
+
if (snapshot)
|
|
87
|
+
return; // already installed
|
|
88
|
+
snapshot = {
|
|
89
|
+
log: console.log.bind(console),
|
|
90
|
+
warn: console.warn.bind(console),
|
|
91
|
+
error: console.error.bind(console),
|
|
92
|
+
info: console.info.bind(console),
|
|
93
|
+
};
|
|
94
|
+
console.log = formatWithTimestamp(snapshot.log, process.stdout);
|
|
95
|
+
console.info = formatWithTimestamp(snapshot.info, process.stdout);
|
|
96
|
+
console.warn = formatWithTimestamp(snapshot.warn, process.stderr);
|
|
97
|
+
console.error = formatWithTimestamp(snapshot.error, process.stderr);
|
|
98
|
+
}
|
|
99
|
+
/** Restore the original console methods. Used by tests + shutdown. */
|
|
100
|
+
export function uninstallConsoleFormatter() {
|
|
101
|
+
if (!snapshot)
|
|
102
|
+
return;
|
|
103
|
+
console.log = snapshot.log;
|
|
104
|
+
console.info = snapshot.info;
|
|
105
|
+
console.warn = snapshot.warn;
|
|
106
|
+
console.error = snapshot.error;
|
|
107
|
+
snapshot = null;
|
|
108
|
+
suppressDepth = 0;
|
|
109
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Trailing-edge debounce. Delays `fn` until `waitMs` has elapsed since
|
|
3
|
+
* the most recent call. Coalesces bursts into a single invocation with
|
|
4
|
+
* the most recent arguments.
|
|
5
|
+
*
|
|
6
|
+
* Used by fs.watch consumers (skills, plugins) where macOS FSEvents
|
|
7
|
+
* delivers many duplicate events for a single logical change.
|
|
8
|
+
*/
|
|
9
|
+
export function debounce(fn, waitMs) {
|
|
10
|
+
let timer = null;
|
|
11
|
+
let lastArgs = null;
|
|
12
|
+
return function debounced(...args) {
|
|
13
|
+
lastArgs = args;
|
|
14
|
+
if (timer)
|
|
15
|
+
clearTimeout(timer);
|
|
16
|
+
timer = setTimeout(() => {
|
|
17
|
+
timer = null;
|
|
18
|
+
const call = lastArgs;
|
|
19
|
+
lastArgs = null;
|
|
20
|
+
if (call)
|
|
21
|
+
fn(...call);
|
|
22
|
+
}, waitMs);
|
|
23
|
+
};
|
|
24
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Telegram error filter — single source of truth for "which grammy
|
|
3
|
+
* errors are harmless and should never reach the end user as a
|
|
4
|
+
* 'Fehler: ...' reply."
|
|
5
|
+
*
|
|
6
|
+
* Context: grammy's Bot API wrapper surfaces these as plain Error
|
|
7
|
+
* objects with the description baked into `.message`. Some call sites
|
|
8
|
+
* (live-stream edit races, callback-answer races after a modal was
|
|
9
|
+
* already dismissed, message-to-edit-gone races when the user just
|
|
10
|
+
* deleted the message) produce errors that are 100% benign — they
|
|
11
|
+
* just mean the UI state we were about to write is already there.
|
|
12
|
+
*
|
|
13
|
+
* This file centralises the list so we can update one regex and have
|
|
14
|
+
* the filter apply everywhere. Used by bot.catch(), by the streaming
|
|
15
|
+
* `telegram.ts` finalize path, by handlers/message.ts, and by any
|
|
16
|
+
* future caller that needs to decide "report this to the user or
|
|
17
|
+
* drop it silently."
|
|
18
|
+
*/
|
|
19
|
+
const HARMLESS_PATTERNS = [
|
|
20
|
+
// The big one — live-stream edit races
|
|
21
|
+
/message is not modified/i,
|
|
22
|
+
/specified new message content and reply markup are exactly the same/i,
|
|
23
|
+
// Callback-answer race: the user tapped a stale inline button
|
|
24
|
+
/query is too old and response timeout expired/i,
|
|
25
|
+
/query ID is invalid/i,
|
|
26
|
+
// The user deleted the message we were about to edit
|
|
27
|
+
/message to edit not found/i,
|
|
28
|
+
/message to delete not found/i,
|
|
29
|
+
/MESSAGE_ID_INVALID/i,
|
|
30
|
+
];
|
|
31
|
+
/**
|
|
32
|
+
* True if the error is one of the known-harmless Telegram races.
|
|
33
|
+
* Accepts Error objects, grammy's GrammyError (which has an additional
|
|
34
|
+
* `description` field), and plain strings. `null` / `undefined` return
|
|
35
|
+
* false so callers can use this directly in catch blocks.
|
|
36
|
+
*/
|
|
37
|
+
export function isHarmlessTelegramError(err) {
|
|
38
|
+
if (err === null || err === undefined)
|
|
39
|
+
return false;
|
|
40
|
+
let haystack = "";
|
|
41
|
+
if (typeof err === "string") {
|
|
42
|
+
haystack = err;
|
|
43
|
+
}
|
|
44
|
+
else if (err instanceof Error) {
|
|
45
|
+
haystack = err.message || "";
|
|
46
|
+
// grammy's GrammyError carries the server's reason on .description
|
|
47
|
+
const desc = err.description;
|
|
48
|
+
if (typeof desc === "string")
|
|
49
|
+
haystack += " " + desc;
|
|
50
|
+
}
|
|
51
|
+
else if (typeof err === "object") {
|
|
52
|
+
// Plain object — look for message/description fields
|
|
53
|
+
const obj = err;
|
|
54
|
+
if (typeof obj.message === "string")
|
|
55
|
+
haystack += obj.message;
|
|
56
|
+
if (typeof obj.description === "string")
|
|
57
|
+
haystack += " " + obj.description;
|
|
58
|
+
}
|
|
59
|
+
if (!haystack)
|
|
60
|
+
return false;
|
|
61
|
+
return HARMLESS_PATTERNS.some((re) => re.test(haystack));
|
|
62
|
+
}
|
package/dist/web/server.js
CHANGED
|
@@ -31,6 +31,9 @@ import { BOT_ROOT, ENV_FILE, PUBLIC_DIR, MEMORY_DIR, MEMORY_FILE, SOUL_FILE, DAT
|
|
|
31
31
|
import { broadcast } from "../services/broadcast.js";
|
|
32
32
|
import { BOT_VERSION } from "../version.js";
|
|
33
33
|
const WEB_PORT = parseInt(process.env.WEB_PORT || "3100");
|
|
34
|
+
/** Module-scope reference to the WebSocket server so stopWebServer() can
|
|
35
|
+
* tear it down together with the HTTP server. Set inside startWebServer(). */
|
|
36
|
+
let wsServerRef = null;
|
|
34
37
|
const WEB_PASSWORD = process.env.WEB_PASSWORD || "";
|
|
35
38
|
/** The actual port the Web UI is running on (may differ from WEB_PORT if busy). */
|
|
36
39
|
let actualWebPort = WEB_PORT;
|
|
@@ -1426,6 +1429,7 @@ export function startWebServer() {
|
|
|
1426
1429
|
});
|
|
1427
1430
|
});
|
|
1428
1431
|
const wss = new WebSocketServer({ server });
|
|
1432
|
+
wsServerRef = wss;
|
|
1429
1433
|
handleWebSocket(wss);
|
|
1430
1434
|
// Smart port: try WEB_PORT, increment if busy (up to +20)
|
|
1431
1435
|
const MAX_TRIES = 20;
|
|
@@ -1449,6 +1453,58 @@ export function startWebServer() {
|
|
|
1449
1453
|
tryListen(WEB_PORT);
|
|
1450
1454
|
return server;
|
|
1451
1455
|
}
|
|
1456
|
+
/**
|
|
1457
|
+
* Gracefully stop the web server so the port is released.
|
|
1458
|
+
*
|
|
1459
|
+
* Why this exists: `shutdown()` in src/index.ts used to stop grammy and the
|
|
1460
|
+
* scheduler but leave the HTTP server listening. macOS then held the
|
|
1461
|
+
* listening socket in the socket table, so launchd's next boot of the bot
|
|
1462
|
+
* hit `EADDRINUSE :::3100`, threw an Uncaught exception and crash-looped.
|
|
1463
|
+
*
|
|
1464
|
+
* What this does:
|
|
1465
|
+
* 1. Force-close idle keep-alive sockets (otherwise close() hangs on them).
|
|
1466
|
+
* 2. Force-close active open requests (long-poll clients, WebSocket
|
|
1467
|
+
* upgrades that never completed).
|
|
1468
|
+
* 3. Tear down the WebSocket server so its own sockets don't linger.
|
|
1469
|
+
* 4. Await `server.close()` so the listening socket is truly released
|
|
1470
|
+
* before the caller's shutdown continues.
|
|
1471
|
+
*
|
|
1472
|
+
* Safe to call multiple times; no-op when the server is already closed or
|
|
1473
|
+
* never listened. Never throws.
|
|
1474
|
+
*/
|
|
1475
|
+
export async function stopWebServer(server) {
|
|
1476
|
+
try {
|
|
1477
|
+
if (wsServerRef) {
|
|
1478
|
+
for (const client of wsServerRef.clients) {
|
|
1479
|
+
try {
|
|
1480
|
+
client.terminate();
|
|
1481
|
+
}
|
|
1482
|
+
catch { /* ignore */ }
|
|
1483
|
+
}
|
|
1484
|
+
await new Promise((resolve) => wsServerRef.close(() => resolve()));
|
|
1485
|
+
wsServerRef = null;
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
catch { /* ignore */ }
|
|
1489
|
+
if (!server.listening)
|
|
1490
|
+
return;
|
|
1491
|
+
try {
|
|
1492
|
+
// Node 18.2+ APIs — break any keep-alive / long-poll stalls so
|
|
1493
|
+
// server.close() can actually resolve.
|
|
1494
|
+
const s = server;
|
|
1495
|
+
if (typeof s.closeIdleConnections === "function")
|
|
1496
|
+
s.closeIdleConnections();
|
|
1497
|
+
if (typeof s.closeAllConnections === "function")
|
|
1498
|
+
s.closeAllConnections();
|
|
1499
|
+
}
|
|
1500
|
+
catch { /* ignore */ }
|
|
1501
|
+
await new Promise((resolve) => {
|
|
1502
|
+
// close() callback fires with an Error arg when the server wasn't
|
|
1503
|
+
// listening — we just resolve in either case. The caller only cares
|
|
1504
|
+
// that the port is free when this awaits.
|
|
1505
|
+
server.close(() => resolve());
|
|
1506
|
+
});
|
|
1507
|
+
}
|
|
1452
1508
|
/** Get the actual port the Web UI is running on. */
|
|
1453
1509
|
export function getWebPort() {
|
|
1454
1510
|
return actualWebPort;
|