alvin-bot 5.4.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  All notable changes to Alvin Bot are documented here.
4
4
 
5
+ ## [5.5.0] — 2026-05-18
6
+
7
+ ### The ⛔ Stop button now responds instantly — and honestly
8
+
9
+ Stopping a task is now crisp and truthful. The moment a task finishes,
10
+ the Stop button disappears, so you're never tapping a control for
11
+ something that's already done. And the feedback always matches reality:
12
+ if you tap Stop while Alvin is genuinely working, it stops and says so;
13
+ if the task had already completed, Alvin tells you that plainly instead
14
+ of implying it cut something short. If you hit Stop in that brief moment
15
+ while an answer is being prepared, that answer is now held back — "I
16
+ stopped it" means nothing more arrives. Anything Alvin had already
17
+ shown you stays exactly as it was.
18
+
19
+ ### Fewer false alerts — smarter health monitoring
20
+
21
+ Alvin's self-monitoring got a lot more trustworthy. A planned restart
22
+ or an update is no longer mistaken for a problem, and the daily health
23
+ summary only raises a flag when there's real evidence something is
24
+ actually wrong — so the alerts you do get are ones worth reading.
25
+ Routine background housekeeping no longer shows up as noise.
26
+
27
+ As always, this shipped after a full multi-pass review and a
28
+ fresh-install + stress verification on a clean separate machine.
29
+
5
30
  ## [5.4.0] — 2026-05-18
6
31
 
7
32
  ### Smoother background tasks — and Alvin always tells you the truth
@@ -1946,11 +1946,19 @@ export function registerCommands(bot) {
1946
1946
  const sessionKey = ctx.match[1];
1947
1947
  const session = getSession(sessionKey);
1948
1948
  const lang = session.language;
1949
- if (session.isProcessing) {
1949
+ // A1 — Capture isProcessing BEFORE requestStop (which sets it false)
1950
+ // so we can show the right toast: "stopped" vs "already finished".
1951
+ const wasProcessing = session.isProcessing;
1952
+ if (wasProcessing) {
1950
1953
  requestStop(session, "soft", buildStopDeps(session));
1951
1954
  }
1955
+ // A1 — Honest toast: if the turn had already finished when the button was
1956
+ // tapped, don't claim "stopped" — tell the user it was already done.
1957
+ const toastKey = wasProcessing
1958
+ ? "bot.cancel.stoppedToast"
1959
+ : "bot.cancel.alreadyDone";
1952
1960
  try {
1953
- await ctx.answerCallbackQuery({ text: t("bot.cancel.stoppedToast", lang) });
1961
+ await ctx.answerCallbackQuery({ text: t(toastKey, lang) });
1954
1962
  }
1955
1963
  catch { /* harmless grammy race */ }
1956
1964
  try {
@@ -122,6 +122,37 @@ const TOOL_ICONS = {
122
122
  WebFetch: "📡",
123
123
  Task: "🤖",
124
124
  };
125
+ // ── A3 — stop-suppress-undelivered pure predicate ────────────────────────────
126
+ /**
127
+ * Determine whether the final answer send should be suppressed because a stop
128
+ * was requested and no visible text has yet been delivered to the user.
129
+ *
130
+ * This closes the gap behind "I clicked Stop but it answered anyway": the
131
+ * Claude SDK delivers short answers atomically, so the for-await loop parks
132
+ * on IPC the whole time, and the complete answer arrives as one block. By the
133
+ * time the consumer bail fires at the top of the loop, the answer is computed
134
+ * and about to be sent. This guard is the only stoppable moment for atomic
135
+ * answers.
136
+ *
137
+ * HARD CONSTRAINT — no-retract invariant: if ANY visible text has already
138
+ * been streamed/committed to the user (visibleTextAlreadySent=true), the
139
+ * predicate returns false regardless of stop state. Partial output that
140
+ * already reached the user is NEVER retracted. The consumer bail in the
141
+ * for-await loop already handles mid-stream stops; this guard only acts on
142
+ * the final commit step.
143
+ *
144
+ * Truth table:
145
+ * stopRequested=truthy + visibleTextAlreadySent=false → true (suppress)
146
+ * stopRequested=truthy + visibleTextAlreadySent=true → false (no-retract)
147
+ * stopRequested=falsy + * → false (normal)
148
+ */
149
+ export function shouldSuppressFinalSend(args) {
150
+ if (!args.stopRequested)
151
+ return false;
152
+ if (args.visibleTextAlreadySent)
153
+ return false;
154
+ return true;
155
+ }
125
156
  // ── v5.2 live steering — pure routing helper ─────────────────────────────────
126
157
  /**
127
158
  * Decide how a mid-task message (arriving while `session.isProcessing`) should
@@ -785,19 +816,45 @@ export async function handleMessage(ctx) {
785
816
  /* harmless — notice is best-effort */
786
817
  }
787
818
  }
788
- // v5.1 stop: user stopped this query — do NOT finalize partial output
789
- // as a successful answer, no 👍, no history commit. The stop trigger
790
- // (/cancel | /stopall | ⛔ button) already acknowledged to the user.
791
- // The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
792
- // + typing indicator).
793
- if (session._stopRequested) {
794
- return;
795
- }
796
819
  if (bypassAborted) {
797
820
  // v4.12.3 — Bypass path took over; don't finalize, don't react 👍.
798
821
  // Just clean up and return. The finally block still fires.
799
822
  return;
800
823
  }
824
+ // A3 — Suppress-or-finalize gate for stopped turns.
825
+ //
826
+ // shouldSuppressFinalSend is the SINGLE gate controlling whether finalize runs:
827
+ //
828
+ // stop + no visible text (suppress=true):
829
+ // Skip finalize and all side-effects. Nothing reached the user — correct.
830
+ // The stop trigger (/cancel | /stopall | ⛔) already acknowledged this.
831
+ // The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
832
+ // + typing indicator).
833
+ //
834
+ // stop + visible text already sent (suppress=false, _stopRequested truthy):
835
+ // The no-retract invariant applies — partial output already shown must not
836
+ // be left visually unfinished. Run streamer.finalize to flush the throttle
837
+ // timer and drop the status line, then return BEFORE the completed-answer
838
+ // side-effects (👍 / broadcastResponseDone / addToHistory). A stopped turn
839
+ // is NOT a successfully completed turn.
840
+ //
841
+ // no stop (suppress=false, _stopRequested falsy):
842
+ // Normal path — fall through to finalize + all side-effects.
843
+ if (shouldSuppressFinalSend({
844
+ stopRequested: session._stopRequested,
845
+ visibleTextAlreadySent: streamer.hasSentText,
846
+ })) {
847
+ // Branch A: stop + no visible text → suppress entirely.
848
+ return;
849
+ }
850
+ if (session._stopRequested && streamer.hasSentText) {
851
+ // Branch B: stop + visible text already sent → finalize the partial cleanly
852
+ // (flushes throttle timer, clears status line) but do NOT emit the
853
+ // completed-answer signals or commit to history.
854
+ await streamer.finalize(finalText);
855
+ return;
856
+ }
857
+ // Branch C: normal (no stop) — fall through.
801
858
  await streamer.finalize(finalText);
802
859
  emit("message:sent", { userId, text: finalText, platform: "telegram" });
803
860
  // v4.5.0: tell observers the response is complete.
@@ -874,6 +931,15 @@ export async function handleMessage(ctx) {
874
931
  // but if a new turn started and re-populated _qHandle via onQueryHandle we
875
932
  // must NOT null it here — that would break Cycle-1 stop teeth for the new turn.
876
933
  if (session._turnId === _thisTurnId) {
934
+ // A2 — Remove the ⛔ Stop control message as the FIRST action when the
935
+ // turn ends, so the stale button disappears before any post-turn work.
936
+ // Best-effort: if it was already deleted or the bot lacks permission, ignore.
937
+ if (stopMsgId !== null) {
938
+ try {
939
+ await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
940
+ }
941
+ catch { /* harmless grammy race */ }
942
+ }
877
943
  session.isProcessing = false;
878
944
  session.abortController = null;
879
945
  // v5.2 — Close and clear the SteerChannel; reset per-turn ack flag.
@@ -887,14 +953,6 @@ export async function handleMessage(ctx) {
887
953
  session._stopRequested = null; // safe: token matches → no newer turn has set this
888
954
  session._turnId = null;
889
955
  }
890
- // v5.1 — Remove the ⛔ Stop control message (sent at processing start).
891
- // Best-effort: if it was already deleted or the bot lacks permission, ignore.
892
- if (stopMsgId !== null) {
893
- try {
894
- await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
895
- }
896
- catch { /* harmless grammy race */ }
897
- }
898
956
  // Check for queued messages — they'll be prepended to the next real message
899
957
  // Queue stays in session and gets consumed on next handleMessage call
900
958
  }
package/dist/i18n.js CHANGED
@@ -378,6 +378,12 @@ const strings = {
378
378
  es: "⛔ Detenido",
379
379
  fr: "⛔ Arrêté",
380
380
  },
381
+ "bot.cancel.alreadyDone": {
382
+ en: "Nothing running — that already finished.",
383
+ de: "Nichts läuft — das war schon fertig.",
384
+ es: "Nada en curso — eso ya terminó.",
385
+ fr: "Rien en cours — c'était déjà terminé.",
386
+ },
381
387
  // /model
382
388
  "bot.model.chooseHeader": {
383
389
  en: "🤖 *Choose model:*",
package/dist/index.js CHANGED
@@ -187,7 +187,7 @@ import { loadSkills } from "./services/skills.js";
187
187
  import { loadHooks } from "./services/hooks.js";
188
188
  import { registerShutdownHandler } from "./services/restart.js";
189
189
  import { cancelAllSubAgents } from "./services/subagents.js";
190
- import { startWatchdog, stopWatchdog, checkCrashLoopBrake } from "./services/watchdog.js";
190
+ import { startWatchdog, stopWatchdog, checkCrashLoopBrake, markExpectedRestart } from "./services/watchdog.js";
191
191
  import { getRegistry } from "./engine.js";
192
192
  import { scanAssets } from "./services/asset-index.js";
193
193
  // Scan asset directory and generate INDEX.json + INDEX.md
@@ -383,6 +383,12 @@ const shutdown = async () => {
383
383
  return;
384
384
  isShuttingDown = true;
385
385
  console.log("Graceful shutdown initiated...");
386
+ // Mark the imminent exit as an intentional restart so the next boot's
387
+ // decideBrakeAction does not count it as a crash. This covers launchctl
388
+ // unload/load (SIGTERM from launchd) in addition to /restart and /update
389
+ // which call markExpectedRestart() themselves before process.exit(0).
390
+ // Must run before stopWatchdog() (which just clears timers, not the beacon).
391
+ markExpectedRestart();
386
392
  // E2: shutdown-notification — await the async cancellation so running
387
393
  // agents can post a cancellation message to Telegram before the bot
388
394
  // stops. Capped at 5s internally so a hang can't block shutdown.
@@ -27,6 +27,25 @@ import { dirname } from "path";
27
27
  import { parseOutputFileStatus } from "./async-agent-parser.js";
28
28
  import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
29
29
  import { getAllSessions } from "./session.js";
30
+ /**
31
+ * B3 — Detect a permanent "target chat does not exist" delivery failure
32
+ * (Telegram 400 "Bad Request: chat not found"), e.g. the stale chat_id:1
33
+ * test agent. Such an agent must be abandoned, not retried forever.
34
+ *
35
+ * Kept as a local predicate (mirrors isChatNotFoundError in
36
+ * subagent-delivery.ts) so the watcher does NOT take a new hard
37
+ * dependency on a fresh subagent-delivery export — many test suites mock
38
+ * that module with only deliverSubAgentResult, and a destructured import
39
+ * of a non-mocked symbol would throw. Matched narrowly on the
40
+ * chat-not-found signature only.
41
+ */
42
+ function isChatNotFoundError(err) {
43
+ if (!err || typeof err !== "object")
44
+ return false;
45
+ const e = err;
46
+ const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
47
+ return /chat not found/i.test(haystack);
48
+ }
30
49
  /** How often the polling loop runs against each pending agent. */
31
50
  const POLL_INTERVAL_MS = 15_000;
32
51
  /** Hard ceiling per agent — 12h. After this, give up and deliver
@@ -199,22 +218,38 @@ export async function pollOnce() {
199
218
  const now = Date.now();
200
219
  const toRemove = [];
201
220
  const missingFileFailureMs = getMissingFileFailureMs();
221
+ // B3 — when a delivery attempt proves the target chat is permanently
222
+ // invalid ("chat not found", e.g. the stale chat_id:1 test agent),
223
+ // abandon the agent so the watcher never retries it. Without this, a
224
+ // pending agent with an invalid target spams stderr on every poll
225
+ // cycle (inflating errors_24h) and lingers until the 12h giveUpAt.
226
+ const abandonIfInvalidTarget = (entry, outcome) => {
227
+ if (!outcome.chatNotFound)
228
+ return;
229
+ if (!toRemove.includes(entry.agentId))
230
+ toRemove.push(entry.agentId);
231
+ console.warn(`[async-watcher] abandoning agent ${entry.agentId} — delivery target ` +
232
+ `chat ${String(entry.chatId)} not found (invalid/stale); will not retry`);
233
+ };
202
234
  for (const entry of pending.values()) {
203
235
  entry.lastCheckedAt = now;
204
236
  // Timeout check first — if the agent is past its giveUpAt, give up
205
237
  // regardless of whether the file shows progress.
206
238
  if (now >= entry.giveUpAt) {
207
- await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
239
+ const outcome = await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
240
+ abandonIfInvalidTarget(entry, outcome);
208
241
  toRemove.push(entry.agentId);
209
242
  continue;
210
243
  }
211
244
  const status = await parseOutputFileStatus(entry.outputFile);
212
245
  if (status.state === "completed") {
213
- await deliverAsCompleted(entry, status.output, status.tokensUsed);
246
+ const outcome = await deliverAsCompleted(entry, status.output, status.tokensUsed);
247
+ abandonIfInvalidTarget(entry, outcome);
214
248
  toRemove.push(entry.agentId);
215
249
  }
216
250
  else if (status.state === "failed") {
217
- await deliverAsFailure(entry, "error", status.error);
251
+ const outcome = await deliverAsFailure(entry, "error", status.error);
252
+ abandonIfInvalidTarget(entry, outcome);
218
253
  toRemove.push(entry.agentId);
219
254
  }
220
255
  else if (status.state === "missing" &&
@@ -222,7 +257,8 @@ export async function pollOnce() {
222
257
  // v4.14.2 — Zombie guard: the subprocess never created its
223
258
  // output file within `missingFileFailureMs` (default 10 min).
224
259
  // Declare failed instead of polling until the 12h giveUpAt.
225
- await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
260
+ const outcome = await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
261
+ abandonIfInvalidTarget(entry, outcome);
226
262
  toRemove.push(entry.agentId);
227
263
  }
228
264
  // running / missing-but-young → keep polling next cycle
@@ -254,13 +290,20 @@ async function deliverAsCompleted(entry, output, tokensUsed) {
254
290
  tokensUsed: tokensUsed ?? { input: 0, output: 0 },
255
291
  duration: Date.now() - entry.startedAt,
256
292
  };
293
+ let chatNotFound = false;
257
294
  try {
258
- await deliverSubAgentResult(info, result);
295
+ const outcome = await deliverSubAgentResult(info, result);
296
+ chatNotFound = !!outcome?.chatNotFound;
259
297
  }
260
298
  catch (err) {
261
299
  console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
300
+ // deliverSubAgentResult normally swallows send errors and reports
301
+ // chatNotFound via its return value; if it ever throws, still detect
302
+ // the permanent invalid-target case here.
303
+ chatNotFound = isChatNotFoundError(err);
262
304
  }
263
305
  decrementPendingCount(entry.sessionKey);
306
+ return { chatNotFound };
264
307
  }
265
308
  async function deliverAsFailure(entry, status, error) {
266
309
  const { deliverSubAgentResult } = await import("./subagent-delivery.js");
@@ -283,13 +326,17 @@ async function deliverAsFailure(entry, status, error) {
283
326
  duration: Date.now() - entry.startedAt,
284
327
  error,
285
328
  };
329
+ let chatNotFound = false;
286
330
  try {
287
- await deliverSubAgentResult(info, result);
331
+ const outcome = await deliverSubAgentResult(info, result);
332
+ chatNotFound = !!outcome?.chatNotFound;
288
333
  }
289
334
  catch (err) {
290
335
  console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
336
+ chatNotFound = isChatNotFoundError(err);
291
337
  }
292
338
  decrementPendingCount(entry.sessionKey);
339
+ return { chatNotFound };
293
340
  }
294
341
  // ── Test helpers ──────────────────────────────────────────────────
295
342
  /**
@@ -24,6 +24,22 @@ function isTelegramParseError(err) {
24
24
  const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
25
25
  return /can't parse entities|can't find end of the entity/i.test(haystack);
26
26
  }
27
+ /**
28
+ * B3 — A Telegram send rejected because the TARGET CHAT DOES NOT EXIST
29
+ * (HTTP 400 "Bad Request: chat not found"). This is a permanent,
30
+ * non-recoverable condition: the chat id is invalid (e.g. the stale
31
+ * chat_id:1 test agent), so every retry will fail identically and just
32
+ * spam stderr. Distinct from transient failures (network, rate-limit)
33
+ * which ARE worth retrying. Matched narrowly on the chat-not-found
34
+ * signature only — never on generic Bad Request.
35
+ */
36
+ export function isChatNotFoundError(err) {
37
+ if (!err || typeof err !== "object")
38
+ return false;
39
+ const e = err;
40
+ const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
41
+ return /chat not found/i.test(haystack);
42
+ }
27
43
  /**
28
44
  * Send a Markdown message with an automatic plain-text retry on parse
29
45
  * errors. Any other error propagates to the caller's outer catch.
@@ -251,28 +267,29 @@ export function createLiveStream(chatId, agentName) {
251
267
  * - "slack" / "discord" / "whatsapp" → delivery-registry lookup
252
268
  */
253
269
  export async function deliverSubAgentResult(info, result, opts = {}) {
270
+ const OK = { chatNotFound: false };
254
271
  // Implicit spawns: the Task-tool bridge in the main stream has already
255
272
  // surfaced the output; extra delivery would be duplication.
256
273
  if (info.source === "implicit")
257
- return;
274
+ return OK;
258
275
  const effective = opts.visibility ?? getVisibility();
259
276
  if (effective === "silent")
260
- return;
277
+ return OK;
261
278
  if (!info.parentChatId) {
262
279
  console.warn(`[subagent-delivery] missing parentChatId for ${info.name} (source=${info.source})`);
263
- return;
280
+ return OK;
264
281
  }
265
282
  // v4.14 — Platform routing. Telegram is the default path (unchanged).
266
283
  const platform = info.platform ?? "telegram";
267
284
  if (platform !== "telegram") {
268
285
  await deliverViaRegistry(platform, info, result);
269
- return;
286
+ return OK;
270
287
  }
271
288
  // ── Telegram path (v4.12.x behavior, unchanged) ──────────────────
272
289
  const api = getBotApi();
273
290
  if (!api) {
274
291
  console.warn(`[subagent-delivery] no bot api available for ${info.name}`);
275
- return;
292
+ return OK;
276
293
  }
277
294
  // Telegram's chatId is always a number at runtime; defensive cast.
278
295
  const tgChatId = typeof info.parentChatId === "number"
@@ -280,7 +297,7 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
280
297
  : Number(info.parentChatId);
281
298
  if (!Number.isFinite(tgChatId)) {
282
299
  console.warn(`[subagent-delivery] invalid telegram chatId for ${info.name}`);
283
- return;
300
+ return OK;
284
301
  }
285
302
  const banner = buildBanner(info, result);
286
303
  const body = result.output?.trim() || `(empty output)`;
@@ -297,12 +314,12 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
297
314
  console.error(`[subagent-delivery] file upload failed:`, err);
298
315
  await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
299
316
  }
300
- return;
317
+ return OK;
301
318
  }
302
319
  // Case 2: fits in a single message → banner + body joined
303
320
  if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
304
321
  await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
305
- return;
322
+ return OK;
306
323
  }
307
324
  // Case 3: medium output → banner as its own message, body chunked
308
325
  await sendWithMarkdownFallback(api, tgChatId, banner);
@@ -311,9 +328,15 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
311
328
  // arbitrary chunk boundaries would be inconsistent anyway.
312
329
  await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
313
330
  }
331
+ return OK;
314
332
  }
315
333
  catch (err) {
316
334
  console.error(`[subagent-delivery] send failed for ${info.name}:`, err);
335
+ // B3 — report a permanent invalid-target failure so the watcher can
336
+ // abandon this agent instead of retrying it forever. Any other error
337
+ // (network, rate-limit, parse) is NOT reported as chatNotFound, so the
338
+ // agent's normal retry/timeout lifecycle is unchanged.
339
+ return { chatNotFound: isChatNotFoundError(err) };
317
340
  }
318
341
  }
319
342
  /**
@@ -17,6 +17,15 @@ export class TelegramStreamer {
17
17
  this.api = api;
18
18
  this.replyTo = replyToMessageId;
19
19
  }
20
+ /**
21
+ * True when at least one message has been sent to the user (i.e. messageId
22
+ * is set). Used by the A3 suppress-undelivered guard in message.ts to
23
+ * determine whether visible text has already reached the user — if so, the
24
+ * no-retract invariant prevents suppressing the final send.
25
+ */
26
+ get hasSentText() {
27
+ return this.messageId !== null;
28
+ }
20
29
  /**
21
30
  * Set a transient status line (e.g. "📖 Read file.html…") that gets
22
31
  * appended to the current accumulated text. Passing null clears it.
@@ -33,12 +33,81 @@
33
33
  * ALVIN_TRENDS_INTERVAL_HOURS=24 → snapshot cadence
34
34
  * ALVIN_TRENDS_AI_AFTER_DAYS=7 → days of data before AI analysis kicks in
35
35
  */
36
- import { appendFileSync, existsSync, readFileSync, mkdirSync } from "fs";
36
+ import { appendFileSync, existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
37
37
  import { join, dirname } from "path";
38
38
  import { homedir } from "os";
39
39
  import { BOT_VERSION } from "../version.js";
40
40
  import { emitCritical } from "./critical-notify.js";
41
41
  const TRENDS_PATH = join(homedir(), ".alvin-bot", "state", "trends.jsonl");
42
+ /**
43
+ * B2 — peak-uptime high-water mark. The trends collector takes its FIRST
44
+ * snapshot ~60s after every boot (startTrendsCollector schedules it at
45
+ * 60_000ms). takeSnapshot() records uptime_s = process.uptime(), so the
46
+ * first post-restart sample is structurally ≈ 62s. With deliberate
47
+ * restarts (/update, launchctl reload) those ~62s samples dominate
48
+ * trends.jsonl, so the 30-day AI pass perpetually concludes "restart
49
+ * loop, never lives past ~62s" even when the process has actually been
50
+ * continuously up for hours by the time the daily snapshot fires.
51
+ *
52
+ * Fix: persist the MAXIMUM real uptime this bot has ever observed (across
53
+ * process generations) and record it on every snapshot as uptime_peak_s.
54
+ * The peak only ever derives from process.uptime() — it is never
55
+ * fabricated or extrapolated. The anomaly evaluation then keys on the
56
+ * peak (hasRepresentativeUptime), so a process that genuinely lived for
57
+ * hours is not flagged as a ~62s loop, while a genuine fast-restart loop
58
+ * (peak never climbs past the startup transient) still fires.
59
+ *
60
+ * Stored next to trends.jsonl (state/), honoring ALVIN_DATA_DIR so tests
61
+ * and non-default installs work. Survives restarts by design — that is
62
+ * the whole point of a high-water mark.
63
+ */
64
+ function trendsStateDir() {
65
+ const base = process.env.ALVIN_DATA_DIR || join(homedir(), ".alvin-bot");
66
+ return join(base, "state");
67
+ }
68
+ function uptimePeakPath() {
69
+ return join(trendsStateDir(), "uptime-peak.json");
70
+ }
71
+ /**
72
+ * The startup transient: takeSnapshot's first sample is taken ~60s after
73
+ * boot, so any uptime at/under this is indistinguishable from "just
74
+ * restarted". An uptime ABOVE this proves the process actually lived past
75
+ * the post-restart sampling window. 600s (10 min) is comfortably above
76
+ * the 60s first-sample delay + scheduling jitter and far below the 24h
77
+ * cron cadence, so a healthy bot trivially clears it while a real
78
+ * crash-loop (exits within seconds/a couple minutes) never does.
79
+ */
80
+ export const STARTUP_TRANSIENT_S = 600;
81
+ /**
82
+ * Read the persisted peak uptime, fold in the CURRENT real uptime, persist
83
+ * the (possibly larger) high-water mark, and return it. Pure w.r.t. time
84
+ * sources: the only uptime input is process.uptime() — nothing invented.
85
+ * Disk failures degrade gracefully to the current real uptime.
86
+ */
87
+ function bumpAndReadUptimePeak() {
88
+ const currentReal = Math.round(process.uptime());
89
+ let stored = 0;
90
+ try {
91
+ const raw = readFileSync(uptimePeakPath(), "utf-8");
92
+ const parsed = JSON.parse(raw);
93
+ if (typeof parsed.peak_s === "number" && Number.isFinite(parsed.peak_s) && parsed.peak_s > 0) {
94
+ stored = parsed.peak_s;
95
+ }
96
+ }
97
+ catch {
98
+ // No file yet / unreadable — start the high-water mark from the
99
+ // current real uptime. Not an error.
100
+ }
101
+ const peak = Math.max(stored, currentReal);
102
+ try {
103
+ mkdirSync(trendsStateDir(), { recursive: true });
104
+ writeFileSync(uptimePeakPath(), JSON.stringify({ peak_s: peak }), "utf-8");
105
+ }
106
+ catch {
107
+ // Disk full / permissions — non-fatal; we still return the in-memory peak.
108
+ }
109
+ return peak;
110
+ }
42
111
  const DEFAULT_INTERVAL_HOURS = 24;
43
112
  const DEFAULT_AI_THRESHOLD_DAYS = 7;
44
113
  const MAX_RETAIN_DAYS = 90;
@@ -54,6 +123,18 @@ const MAX_RETAIN_DAYS = 90;
54
123
  * (a successful, expected fallback — not an error)
55
124
  * - critical-notify's own delivery-outcome line, kept on stderr on
56
125
  * purpose so it stays visible even in brake/crash context
126
+ * - B3: subagent-delivery's "send failed … chat not found" line for a
127
+ * stale/test async-agent whose delivery target chat no longer exists
128
+ * (e.g. the recurring chat_id:1 test agent). This is benign noise,
129
+ * not a real fault: the target chat is invalid, the watcher now
130
+ * abandons such agents (see async-agent-watcher.ts), and counting it
131
+ * made errors_24h creep upward indefinitely on every poll cycle.
132
+ * The match is DELIBERATELY narrow — it requires BOTH the
133
+ * `[subagent-delivery] send failed` prefix AND a `chat not found`
134
+ * cause on the same line. A subagent-delivery failure for ANY other
135
+ * reason (network, rate-limit, parse) is still counted, and a
136
+ * `chat not found` from ANY OTHER subsystem (a real misconfigured
137
+ * target) is still counted.
57
138
  *
58
139
  * Counting those turned this very monitor into a false-alarm generator:
59
140
  * it flagged its OWN log lines plus every release's restart churn, so
@@ -65,7 +146,7 @@ const MAX_RETAIN_DAYS = 90;
65
146
  * any, get added here in one place instead of being chased across the
66
147
  * codebase.
67
148
  */
68
- export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed)).+/;
149
+ export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed|\[subagent-delivery\] send failed.*chat not found)).+/;
69
150
  let trendsTimer = null;
70
151
  function isDisabled() {
71
152
  return (process.env.ALVIN_DISABLE_TRENDS === "true" ||
@@ -134,6 +215,7 @@ function takeSnapshot(activeProvider) {
134
215
  return {
135
216
  ts: new Date().toISOString(),
136
217
  uptime_s: Math.round(process.uptime()),
218
+ uptime_peak_s: bumpAndReadUptimePeak(),
137
219
  rss_mb: Math.round(mem.rss / 1024 / 1024),
138
220
  heap_mb: Math.round(mem.heapUsed / 1024 / 1024),
139
221
  crashes_24h: readWatchdogCrashes24h(),
@@ -195,6 +277,92 @@ SUGGESTION: <one shell command OR observation for the operator>
195
277
  --- LAST {N} DAYS OF SNAPSHOTS ---
196
278
  {SNAPSHOTS}
197
279
  --- END ---`;
280
+ /**
281
+ * Returns true if at least one snapshot in `snaps` has a non-zero
282
+ * crashes_24h value, meaning a REAL crash (not an expected/deliberate
283
+ * restart) was recorded on that day.
284
+ *
285
+ * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
286
+ * /restart / /update) write the expectedRestart beacon flag and are NOT
287
+ * counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
288
+ * means the bot was only restarted intentionally — no real crash evidence.
289
+ *
290
+ * Pure function, exported for unit testing.
291
+ */
292
+ export function hasRealCrashEvidence(snaps) {
293
+ return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
294
+ }
295
+ /**
296
+ * B2 — Returns true if AT LEAST ONE snapshot proves the bot process
297
+ * genuinely lived past the startup transient (i.e. it is NOT a ~62s
298
+ * restart loop).
299
+ *
300
+ * The first per-boot snapshot is structurally taken ~60s after boot, so
301
+ * its raw uptime_s is always ≈ 62 regardless of how long the process
302
+ * subsequently runs. uptime_peak_s is the high-water mark of REAL
303
+ * process.uptime() carried across process generations, so a single
304
+ * snapshot whose peak exceeds STARTUP_TRANSIENT_S is hard evidence the
305
+ * process did live for a representative duration. Legacy pre-B2 lines
306
+ * have no uptime_peak_s — we fall back to their raw uptime_s, so a legacy
307
+ * 24h cron snapshot still counts as representative on its own.
308
+ *
309
+ * A genuine fast-restart loop never lets the peak climb past the
310
+ * transient, so it correctly returns false and the WARN still fires.
311
+ *
312
+ * Pure function, exported for unit testing.
313
+ */
314
+ export function hasRepresentativeUptime(snaps) {
315
+ return snaps.some((s) => {
316
+ const peak = typeof s.uptime_peak_s === "number" && Number.isFinite(s.uptime_peak_s)
317
+ ? s.uptime_peak_s
318
+ : typeof s.uptime_s === "number" && Number.isFinite(s.uptime_s)
319
+ ? s.uptime_s
320
+ : 0;
321
+ return peak > STARTUP_TRANSIENT_S;
322
+ });
323
+ }
324
+ /**
325
+ * B2/B4 — Pure crash/restart WARN suppression decision.
326
+ *
327
+ * Encodes the SAME two gates, in the SAME precedence, that dailyTask
328
+ * applies inline (B2 before B4). Extracted as a pure function purely so
329
+ * the gate COMPOSITION (not just each helper in isolation) is unit
330
+ * testable — the helpers are individually correct but the interaction
331
+ * is where the real-crash-loop-after-a-healthy-period regression lives.
332
+ *
333
+ * Returns the suppression reason, or "none" when the WARN must fire.
334
+ *
335
+ * - "representative-uptime" (B2): a deliberate-restart / sampling
336
+ * artifact — the AI saw ~62s uptimes but a snapshot peak proves the
337
+ * process actually lived past the startup transient. ONLY applies
338
+ * when there is no real crash evidence: a genuine crash loop after a
339
+ * prior healthy period still carries the persisted high peak, so
340
+ * without the crash-evidence guard B2 would permanently and silently
341
+ * swallow it. With the guard, crashes_24h>0 falls through to B4.
342
+ * - "no-crash-evidence" (B4): crash/restart pattern but crashes_24h===0
343
+ * everywhere (deliberate-restart-only, not a real crash loop).
344
+ * - "none": the WARN is real and must be emitted.
345
+ *
346
+ * Pure function, exported for unit testing.
347
+ */
348
+ export function evaluateCrashRestartSuppression(isCrashRestartPattern, snaps) {
349
+ if (!isCrashRestartPattern)
350
+ return "none";
351
+ const realCrash = hasRealCrashEvidence(snaps);
352
+ // B2: only the deliberate-restart / sampling-artifact case. A real
353
+ // crash loop (crashes_24h>0) must NOT be suppressed here even though
354
+ // the persisted uptime high-water mark still reads representative.
355
+ if (!realCrash && hasRepresentativeUptime(snaps))
356
+ return "representative-uptime";
357
+ // B4: crash/restart pattern with zero real crash evidence.
358
+ if (!realCrash)
359
+ return "no-crash-evidence";
360
+ return "none";
361
+ }
362
+ /** Test-only: take a snapshot without writing to trends.jsonl. */
363
+ export function __takeSnapshotForTest(activeProvider) {
364
+ return takeSnapshot(activeProvider);
365
+ }
198
366
  function parseTrendResponse(text) {
199
367
  if (/^ANOMALY:\s*NONE/im.test(text)) {
200
368
  return {
@@ -296,6 +464,38 @@ async function dailyTask(registry) {
296
464
  console.log(`📊 Trends AI: no anomaly detected`);
297
465
  return;
298
466
  }
467
+ const recentSnaps = readSnapshots(30);
468
+ const isCrashRestartPattern = /crash|restart|loop|uptime/i.test(result.description);
469
+ // B2 gate: suppress an "uptime stuck at ~62s / restart loop" WARN when
470
+ // the snapshots PROVE the process actually lived past the startup
471
+ // transient. The first per-boot snapshot is structurally sampled ~60s
472
+ // after boot, so raw uptime_s reads ≈62 even for a perfectly healthy
473
+ // bot that has been up for hours by the time the daily snapshot fires.
474
+ // uptime_peak_s is the high-water mark of real process.uptime() across
475
+ // process generations: if ANY snapshot's peak exceeds the transient,
476
+ // the "~62s loop" conclusion is factually false. A genuine fast-restart
477
+ // loop never lets the peak climb, so it is NOT suppressed here.
478
+ if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps) && hasRepresentativeUptime(recentSnaps)) {
479
+ console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
480
+ `uptime/restart pattern flagged but at least one snapshot shows a ` +
481
+ `representative peak uptime (>${STARTUP_TRANSIENT_S}s); the process ` +
482
+ `did live well past the post-restart sampling window, not a ~62s loop`);
483
+ return;
484
+ }
485
+ // B4 gate: suppress WARN when the AI flags a crash/restart-loop pattern
486
+ // but the historical snapshots contain ZERO real crash evidence
487
+ // (crashes_24h === 0 across the board). This happens when the bot was
488
+ // restarted deliberately (launchctl reload / /update / /restart) — those
489
+ // produce low uptimes that the AI reads as "restart loop", but the
490
+ // crash counter stays at 0 because markExpectedRestart() was written
491
+ // on each clean shutdown. A real crash loop WILL have crashes_24h > 0
492
+ // in at least one snapshot and will still fire the WARN.
493
+ if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps)) {
494
+ console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
495
+ `crash/restart pattern detected but crashes_24h=0 across all snapshots ` +
496
+ `(deliberate-restart-only, not a real crash loop)`);
497
+ return;
498
+ }
299
499
  console.log(`📊 Trends AI: ANOMALY (${result.severity}) — ${result.description}`);
300
500
  emitCritical({
301
501
  category: "custom",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "5.4.0",
3
+ "version": "5.5.0",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",