npm - alvin-bot - Versions diffs - 5.4.0 → 5.5.0 - Mend

alvin-bot 5.4.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +25 -0
package/dist/handlers/commands.js +10 -2
package/dist/handlers/message.js +74 -16
package/dist/i18n.js +6 -0
package/dist/index.js +7 -1
package/dist/services/async-agent-watcher.js +53 -6
package/dist/services/subagent-delivery.js +31 -8
package/dist/services/telegram.js +9 -0
package/dist/services/trends.js +202 -2
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 All notable changes to Alvin Bot are documented here.
+## [5.5.0] — 2026-05-18
+### The ⛔ Stop button now responds instantly — and honestly
+Stopping a task is now crisp and truthful. The moment a task finishes,
+the Stop button disappears, so you're never tapping a control for
+something that's already done. And the feedback always matches reality:
+if you tap Stop while Alvin is genuinely working, it stops and says so;
+if the task had already completed, Alvin tells you that plainly instead
+of implying it cut something short. If you hit Stop in that brief moment
+while an answer is being prepared, that answer is now held back — "I
+stopped it" means nothing more arrives. Anything Alvin had already
+shown you stays exactly as it was.
+### Fewer false alerts — smarter health monitoring
+Alvin's self-monitoring got a lot more trustworthy. A planned restart
+or an update is no longer mistaken for a problem, and the daily health
+summary only raises a flag when there's real evidence something is
+actually wrong — so the alerts you do get are ones worth reading.
+Routine background housekeeping no longer shows up as noise.
+As always, this shipped after a full multi-pass review and a
+fresh-install + stress verification on a clean separate machine.
 ## [5.4.0] — 2026-05-18
 ### Smoother background tasks — and Alvin always tells you the truth

package/dist/handlers/commands.js CHANGED Viewed

@@ -1946,11 +1946,19 @@ export function registerCommands(bot) {
         const sessionKey = ctx.match[1];
         const session = getSession(sessionKey);
         const lang = session.language;
-        if (session.isProcessing) {
+        // A1 — Capture isProcessing BEFORE requestStop (which sets it false)
+        // so we can show the right toast: "stopped" vs "already finished".
+        const wasProcessing = session.isProcessing;
+        if (wasProcessing) {
             requestStop(session, "soft", buildStopDeps(session));
         }
+        // A1 — Honest toast: if the turn had already finished when the button was
+        // tapped, don't claim "stopped" — tell the user it was already done.
+        const toastKey = wasProcessing
+            ? "bot.cancel.stoppedToast"
+            : "bot.cancel.alreadyDone";
         try {
-            await ctx.answerCallbackQuery({ text: t("bot.cancel.stoppedToast", lang) });
+            await ctx.answerCallbackQuery({ text: t(toastKey, lang) });
         }
         catch { /* harmless grammy race */ }
         try {

package/dist/handlers/message.js CHANGED Viewed

@@ -122,6 +122,37 @@ const TOOL_ICONS = {
     WebFetch: "📡",
     Task: "🤖",
 };
+// ── A3 — stop-suppress-undelivered pure predicate ────────────────────────────
+/**
+ * Determine whether the final answer send should be suppressed because a stop
+ * was requested and no visible text has yet been delivered to the user.
+ *
+ * This closes the gap behind "I clicked Stop but it answered anyway": the
+ * Claude SDK delivers short answers atomically, so the for-await loop parks
+ * on IPC the whole time, and the complete answer arrives as one block. By the
+ * time the consumer bail fires at the top of the loop, the answer is computed
+ * and about to be sent. This guard is the only stoppable moment for atomic
+ * answers.
+ *
+ * HARD CONSTRAINT — no-retract invariant: if ANY visible text has already
+ * been streamed/committed to the user (visibleTextAlreadySent=true), the
+ * predicate returns false regardless of stop state. Partial output that
+ * already reached the user is NEVER retracted. The consumer bail in the
+ * for-await loop already handles mid-stream stops; this guard only acts on
+ * the final commit step.
+ *
+ * Truth table:
+ *   stopRequested=truthy  + visibleTextAlreadySent=false → true  (suppress)
+ *   stopRequested=truthy  + visibleTextAlreadySent=true  → false (no-retract)
+ *   stopRequested=falsy   + *                            → false (normal)
+ */
+export function shouldSuppressFinalSend(args) {
+    if (!args.stopRequested)
+        return false;
+    if (args.visibleTextAlreadySent)
+        return false;
+    return true;
+}
 // ── v5.2 live steering — pure routing helper ─────────────────────────────────
 /**
  * Decide how a mid-task message (arriving while `session.isProcessing`) should
@@ -785,19 +816,45 @@ export async function handleMessage(ctx) {
                 /* harmless — notice is best-effort */
             }
         }
-        // v5.1 stop: user stopped this query — do NOT finalize partial output
-        // as a successful answer, no 👍, no history commit. The stop trigger
-        // (/cancel | /stopall | ⛔ button) already acknowledged to the user.
-        // The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
-        // + typing indicator).
-        if (session._stopRequested) {
-            return;
-        }
         if (bypassAborted) {
             // v4.12.3 — Bypass path took over; don't finalize, don't react 👍.
             // Just clean up and return. The finally block still fires.
             return;
         }
+        // A3 — Suppress-or-finalize gate for stopped turns.
+        //
+        // shouldSuppressFinalSend is the SINGLE gate controlling whether finalize runs:
+        //
+        //   stop + no visible text (suppress=true):
+        //     Skip finalize and all side-effects. Nothing reached the user — correct.
+        //     The stop trigger (/cancel | /stopall | ⛔) already acknowledged this.
+        //     The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
+        //     + typing indicator).
+        //
+        //   stop + visible text already sent (suppress=false, _stopRequested truthy):
+        //     The no-retract invariant applies — partial output already shown must not
+        //     be left visually unfinished. Run streamer.finalize to flush the throttle
+        //     timer and drop the status line, then return BEFORE the completed-answer
+        //     side-effects (👍 / broadcastResponseDone / addToHistory). A stopped turn
+        //     is NOT a successfully completed turn.
+        //
+        //   no stop (suppress=false, _stopRequested falsy):
+        //     Normal path — fall through to finalize + all side-effects.
+        if (shouldSuppressFinalSend({
+            stopRequested: session._stopRequested,
+            visibleTextAlreadySent: streamer.hasSentText,
+        })) {
+            // Branch A: stop + no visible text → suppress entirely.
+            return;
+        }
+        if (session._stopRequested && streamer.hasSentText) {
+            // Branch B: stop + visible text already sent → finalize the partial cleanly
+            // (flushes throttle timer, clears status line) but do NOT emit the
+            // completed-answer signals or commit to history.
+            await streamer.finalize(finalText);
+            return;
+        }
+        // Branch C: normal (no stop) — fall through.
         await streamer.finalize(finalText);
         emit("message:sent", { userId, text: finalText, platform: "telegram" });
         // v4.5.0: tell observers the response is complete.
@@ -874,6 +931,15 @@ export async function handleMessage(ctx) {
         // but if a new turn started and re-populated _qHandle via onQueryHandle we
         // must NOT null it here — that would break Cycle-1 stop teeth for the new turn.
         if (session._turnId === _thisTurnId) {
+            // A2 — Remove the ⛔ Stop control message as the FIRST action when the
+            // turn ends, so the stale button disappears before any post-turn work.
+            // Best-effort: if it was already deleted or the bot lacks permission, ignore.
+            if (stopMsgId !== null) {
+                try {
+                    await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
+                }
+                catch { /* harmless grammy race */ }
+            }
             session.isProcessing = false;
             session.abortController = null;
             // v5.2 — Close and clear the SteerChannel; reset per-turn ack flag.
@@ -887,14 +953,6 @@ export async function handleMessage(ctx) {
             session._stopRequested = null; // safe: token matches → no newer turn has set this
             session._turnId = null;
         }
-        // v5.1 — Remove the ⛔ Stop control message (sent at processing start).
-        // Best-effort: if it was already deleted or the bot lacks permission, ignore.
-        if (stopMsgId !== null) {
-            try {
-                await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
-            }
-            catch { /* harmless grammy race */ }
-        }
         // Check for queued messages — they'll be prepended to the next real message
         // Queue stays in session and gets consumed on next handleMessage call
     }

package/dist/i18n.js CHANGED Viewed

@@ -378,6 +378,12 @@ const strings = {
         es: "⛔ Detenido",
         fr: "⛔ Arrêté",
     },
+    "bot.cancel.alreadyDone": {
+        en: "Nothing running — that already finished.",
+        de: "Nichts läuft — das war schon fertig.",
+        es: "Nada en curso — eso ya terminó.",
+        fr: "Rien en cours — c'était déjà terminé.",
+    },
     // /model
     "bot.model.chooseHeader": {
         en: "🤖 *Choose model:*",

package/dist/index.js CHANGED Viewed

@@ -187,7 +187,7 @@ import { loadSkills } from "./services/skills.js";
 import { loadHooks } from "./services/hooks.js";
 import { registerShutdownHandler } from "./services/restart.js";
 import { cancelAllSubAgents } from "./services/subagents.js";
-import { startWatchdog, stopWatchdog, checkCrashLoopBrake } from "./services/watchdog.js";
+import { startWatchdog, stopWatchdog, checkCrashLoopBrake, markExpectedRestart } from "./services/watchdog.js";
 import { getRegistry } from "./engine.js";
 import { scanAssets } from "./services/asset-index.js";
 // Scan asset directory and generate INDEX.json + INDEX.md
@@ -383,6 +383,12 @@ const shutdown = async () => {
         return;
     isShuttingDown = true;
     console.log("Graceful shutdown initiated...");
+    // Mark the imminent exit as an intentional restart so the next boot's
+    // decideBrakeAction does not count it as a crash. This covers launchctl
+    // unload/load (SIGTERM from launchd) in addition to /restart and /update
+    // which call markExpectedRestart() themselves before process.exit(0).
+    // Must run before stopWatchdog() (which just clears timers, not the beacon).
+    markExpectedRestart();
     // E2: shutdown-notification — await the async cancellation so running
     // agents can post a cancellation message to Telegram before the bot
     // stops. Capped at 5s internally so a hang can't block shutdown.

package/dist/services/async-agent-watcher.js CHANGED Viewed

@@ -27,6 +27,25 @@ import { dirname } from "path";
 import { parseOutputFileStatus } from "./async-agent-parser.js";
 import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
 import { getAllSessions } from "./session.js";
+/**
+ * B3 — Detect a permanent "target chat does not exist" delivery failure
+ * (Telegram 400 "Bad Request: chat not found"), e.g. the stale chat_id:1
+ * test agent. Such an agent must be abandoned, not retried forever.
+ *
+ * Kept as a local predicate (mirrors isChatNotFoundError in
+ * subagent-delivery.ts) so the watcher does NOT take a new hard
+ * dependency on a fresh subagent-delivery export — many test suites mock
+ * that module with only deliverSubAgentResult, and a destructured import
+ * of a non-mocked symbol would throw. Matched narrowly on the
+ * chat-not-found signature only.
+ */
+function isChatNotFoundError(err) {
+    if (!err || typeof err !== "object")
+        return false;
+    const e = err;
+    const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
+    return /chat not found/i.test(haystack);
+}
 /** How often the polling loop runs against each pending agent. */
 const POLL_INTERVAL_MS = 15_000;
 /** Hard ceiling per agent — 12h. After this, give up and deliver
@@ -199,22 +218,38 @@ export async function pollOnce() {
     const now = Date.now();
     const toRemove = [];
     const missingFileFailureMs = getMissingFileFailureMs();
+    // B3 — when a delivery attempt proves the target chat is permanently
+    // invalid ("chat not found", e.g. the stale chat_id:1 test agent),
+    // abandon the agent so the watcher never retries it. Without this, a
+    // pending agent with an invalid target spams stderr on every poll
+    // cycle (inflating errors_24h) and lingers until the 12h giveUpAt.
+    const abandonIfInvalidTarget = (entry, outcome) => {
+        if (!outcome.chatNotFound)
+            return;
+        if (!toRemove.includes(entry.agentId))
+            toRemove.push(entry.agentId);
+        console.warn(`[async-watcher] abandoning agent ${entry.agentId} — delivery target ` +
+            `chat ${String(entry.chatId)} not found (invalid/stale); will not retry`);
+    };
     for (const entry of pending.values()) {
         entry.lastCheckedAt = now;
         // Timeout check first — if the agent is past its giveUpAt, give up
         // regardless of whether the file shows progress.
         if (now >= entry.giveUpAt) {
-            await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
+            const outcome = await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
             continue;
         }
         const status = await parseOutputFileStatus(entry.outputFile);
         if (status.state === "completed") {
-            await deliverAsCompleted(entry, status.output, status.tokensUsed);
+            const outcome = await deliverAsCompleted(entry, status.output, status.tokensUsed);
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
         }
         else if (status.state === "failed") {
-            await deliverAsFailure(entry, "error", status.error);
+            const outcome = await deliverAsFailure(entry, "error", status.error);
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
         }
         else if (status.state === "missing" &&
@@ -222,7 +257,8 @@ export async function pollOnce() {
             // v4.14.2 — Zombie guard: the subprocess never created its
             // output file within `missingFileFailureMs` (default 10 min).
             // Declare failed instead of polling until the 12h giveUpAt.
-            await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
+            const outcome = await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
         }
         // running / missing-but-young → keep polling next cycle
@@ -254,13 +290,20 @@ async function deliverAsCompleted(entry, output, tokensUsed) {
         tokensUsed: tokensUsed ?? { input: 0, output: 0 },
         duration: Date.now() - entry.startedAt,
     };
+    let chatNotFound = false;
     try {
-        await deliverSubAgentResult(info, result);
+        const outcome = await deliverSubAgentResult(info, result);
+        chatNotFound = !!outcome?.chatNotFound;
     }
     catch (err) {
         console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
+        // deliverSubAgentResult normally swallows send errors and reports
+        // chatNotFound via its return value; if it ever throws, still detect
+        // the permanent invalid-target case here.
+        chatNotFound = isChatNotFoundError(err);
     }
     decrementPendingCount(entry.sessionKey);
+    return { chatNotFound };
 }
 async function deliverAsFailure(entry, status, error) {
     const { deliverSubAgentResult } = await import("./subagent-delivery.js");
@@ -283,13 +326,17 @@ async function deliverAsFailure(entry, status, error) {
         duration: Date.now() - entry.startedAt,
         error,
     };
+    let chatNotFound = false;
     try {
-        await deliverSubAgentResult(info, result);
+        const outcome = await deliverSubAgentResult(info, result);
+        chatNotFound = !!outcome?.chatNotFound;
     }
     catch (err) {
         console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
+        chatNotFound = isChatNotFoundError(err);
     }
     decrementPendingCount(entry.sessionKey);
+    return { chatNotFound };
 }
 // ── Test helpers ──────────────────────────────────────────────────
 /**

package/dist/services/subagent-delivery.js CHANGED Viewed

@@ -24,6 +24,22 @@ function isTelegramParseError(err) {
     const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
     return /can't parse entities|can't find end of the entity/i.test(haystack);
 }
+/**
+ * B3 — A Telegram send rejected because the TARGET CHAT DOES NOT EXIST
+ * (HTTP 400 "Bad Request: chat not found"). This is a permanent,
+ * non-recoverable condition: the chat id is invalid (e.g. the stale
+ * chat_id:1 test agent), so every retry will fail identically and just
+ * spam stderr. Distinct from transient failures (network, rate-limit)
+ * which ARE worth retrying. Matched narrowly on the chat-not-found
+ * signature only — never on generic Bad Request.
+ */
+export function isChatNotFoundError(err) {
+    if (!err || typeof err !== "object")
+        return false;
+    const e = err;
+    const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
+    return /chat not found/i.test(haystack);
+}
 /**
  * Send a Markdown message with an automatic plain-text retry on parse
  * errors. Any other error propagates to the caller's outer catch.
@@ -251,28 +267,29 @@ export function createLiveStream(chatId, agentName) {
  *   - "slack" / "discord" / "whatsapp" → delivery-registry lookup
  */
 export async function deliverSubAgentResult(info, result, opts = {}) {
+    const OK = { chatNotFound: false };
     // Implicit spawns: the Task-tool bridge in the main stream has already
     // surfaced the output; extra delivery would be duplication.
     if (info.source === "implicit")
-        return;
+        return OK;
     const effective = opts.visibility ?? getVisibility();
     if (effective === "silent")
-        return;
+        return OK;
     if (!info.parentChatId) {
         console.warn(`[subagent-delivery] missing parentChatId for ${info.name} (source=${info.source})`);
-        return;
+        return OK;
     }
     // v4.14 — Platform routing. Telegram is the default path (unchanged).
     const platform = info.platform ?? "telegram";
     if (platform !== "telegram") {
         await deliverViaRegistry(platform, info, result);
-        return;
+        return OK;
     }
     // ── Telegram path (v4.12.x behavior, unchanged) ──────────────────
     const api = getBotApi();
     if (!api) {
         console.warn(`[subagent-delivery] no bot api available for ${info.name}`);
-        return;
+        return OK;
     }
     // Telegram's chatId is always a number at runtime; defensive cast.
     const tgChatId = typeof info.parentChatId === "number"
@@ -280,7 +297,7 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
         : Number(info.parentChatId);
     if (!Number.isFinite(tgChatId)) {
         console.warn(`[subagent-delivery] invalid telegram chatId for ${info.name}`);
-        return;
+        return OK;
     }
     const banner = buildBanner(info, result);
     const body = result.output?.trim() || `(empty output)`;
@@ -297,12 +314,12 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
                 console.error(`[subagent-delivery] file upload failed:`, err);
                 await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
             }
-            return;
+            return OK;
         }
         // Case 2: fits in a single message → banner + body joined
         if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
             await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
-            return;
+            return OK;
         }
         // Case 3: medium output → banner as its own message, body chunked
         await sendWithMarkdownFallback(api, tgChatId, banner);
@@ -311,9 +328,15 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
             // arbitrary chunk boundaries would be inconsistent anyway.
             await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
         }
+        return OK;
     }
     catch (err) {
         console.error(`[subagent-delivery] send failed for ${info.name}:`, err);
+        // B3 — report a permanent invalid-target failure so the watcher can
+        // abandon this agent instead of retrying it forever. Any other error
+        // (network, rate-limit, parse) is NOT reported as chatNotFound, so the
+        // agent's normal retry/timeout lifecycle is unchanged.
+        return { chatNotFound: isChatNotFoundError(err) };
     }
 }
 /**

package/dist/services/telegram.js CHANGED Viewed

@@ -17,6 +17,15 @@ export class TelegramStreamer {
         this.api = api;
         this.replyTo = replyToMessageId;
     }
+    /**
+     * True when at least one message has been sent to the user (i.e. messageId
+     * is set). Used by the A3 suppress-undelivered guard in message.ts to
+     * determine whether visible text has already reached the user — if so, the
+     * no-retract invariant prevents suppressing the final send.
+     */
+    get hasSentText() {
+        return this.messageId !== null;
+    }
     /**
      * Set a transient status line (e.g. "📖 Read file.html…") that gets
      * appended to the current accumulated text. Passing null clears it.

package/dist/services/trends.js CHANGED Viewed

@@ -33,12 +33,81 @@
  *   ALVIN_TRENDS_INTERVAL_HOURS=24         → snapshot cadence
  *   ALVIN_TRENDS_AI_AFTER_DAYS=7           → days of data before AI analysis kicks in
  */
-import { appendFileSync, existsSync, readFileSync, mkdirSync } from "fs";
+import { appendFileSync, existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
 import { join, dirname } from "path";
 import { homedir } from "os";
 import { BOT_VERSION } from "../version.js";
 import { emitCritical } from "./critical-notify.js";
 const TRENDS_PATH = join(homedir(), ".alvin-bot", "state", "trends.jsonl");
+/**
+ * B2 — peak-uptime high-water mark. The trends collector takes its FIRST
+ * snapshot ~60s after every boot (startTrendsCollector schedules it at
+ * 60_000ms). takeSnapshot() records uptime_s = process.uptime(), so the
+ * first post-restart sample is structurally ≈ 62s. With deliberate
+ * restarts (/update, launchctl reload) those ~62s samples dominate
+ * trends.jsonl, so the 30-day AI pass perpetually concludes "restart
+ * loop, never lives past ~62s" even when the process has actually been
+ * continuously up for hours by the time the daily snapshot fires.
+ *
+ * Fix: persist the MAXIMUM real uptime this bot has ever observed (across
+ * process generations) and record it on every snapshot as uptime_peak_s.
+ * The peak only ever derives from process.uptime() — it is never
+ * fabricated or extrapolated. The anomaly evaluation then keys on the
+ * peak (hasRepresentativeUptime), so a process that genuinely lived for
+ * hours is not flagged as a ~62s loop, while a genuine fast-restart loop
+ * (peak never climbs past the startup transient) still fires.
+ *
+ * Stored next to trends.jsonl (state/), honoring ALVIN_DATA_DIR so tests
+ * and non-default installs work. Survives restarts by design — that is
+ * the whole point of a high-water mark.
+ */
+function trendsStateDir() {
+    const base = process.env.ALVIN_DATA_DIR || join(homedir(), ".alvin-bot");
+    return join(base, "state");
+}
+function uptimePeakPath() {
+    return join(trendsStateDir(), "uptime-peak.json");
+}
+/**
+ * The startup transient: takeSnapshot's first sample is taken ~60s after
+ * boot, so any uptime at/under this is indistinguishable from "just
+ * restarted". An uptime ABOVE this proves the process actually lived past
+ * the post-restart sampling window. 600s (10 min) is comfortably above
+ * the 60s first-sample delay + scheduling jitter and far below the 24h
+ * cron cadence, so a healthy bot trivially clears it while a real
+ * crash-loop (exits within seconds/a couple minutes) never does.
+ */
+export const STARTUP_TRANSIENT_S = 600;
+/**
+ * Read the persisted peak uptime, fold in the CURRENT real uptime, persist
+ * the (possibly larger) high-water mark, and return it. Pure w.r.t. time
+ * sources: the only uptime input is process.uptime() — nothing invented.
+ * Disk failures degrade gracefully to the current real uptime.
+ */
+function bumpAndReadUptimePeak() {
+    const currentReal = Math.round(process.uptime());
+    let stored = 0;
+    try {
+        const raw = readFileSync(uptimePeakPath(), "utf-8");
+        const parsed = JSON.parse(raw);
+        if (typeof parsed.peak_s === "number" && Number.isFinite(parsed.peak_s) && parsed.peak_s > 0) {
+            stored = parsed.peak_s;
+        }
+    }
+    catch {
+        // No file yet / unreadable — start the high-water mark from the
+        // current real uptime. Not an error.
+    }
+    const peak = Math.max(stored, currentReal);
+    try {
+        mkdirSync(trendsStateDir(), { recursive: true });
+        writeFileSync(uptimePeakPath(), JSON.stringify({ peak_s: peak }), "utf-8");
+    }
+    catch {
+        // Disk full / permissions — non-fatal; we still return the in-memory peak.
+    }
+    return peak;
+}
 const DEFAULT_INTERVAL_HOURS = 24;
 const DEFAULT_AI_THRESHOLD_DAYS = 7;
 const MAX_RETAIN_DAYS = 90;
@@ -54,6 +123,18 @@ const MAX_RETAIN_DAYS = 90;
  *     (a successful, expected fallback — not an error)
  *   - critical-notify's own delivery-outcome line, kept on stderr on
  *     purpose so it stays visible even in brake/crash context
+ *   - B3: subagent-delivery's "send failed … chat not found" line for a
+ *     stale/test async-agent whose delivery target chat no longer exists
+ *     (e.g. the recurring chat_id:1 test agent). This is benign noise,
+ *     not a real fault: the target chat is invalid, the watcher now
+ *     abandons such agents (see async-agent-watcher.ts), and counting it
+ *     made errors_24h creep upward indefinitely on every poll cycle.
+ *     The match is DELIBERATELY narrow — it requires BOTH the
+ *     `[subagent-delivery] send failed` prefix AND a `chat not found`
+ *     cause on the same line. A subagent-delivery failure for ANY other
+ *     reason (network, rate-limit, parse) is still counted, and a
+ *     `chat not found` from ANY OTHER subsystem (a real misconfigured
+ *     target) is still counted.
  *
  * Counting those turned this very monitor into a false-alarm generator:
  * it flagged its OWN log lines plus every release's restart churn, so
@@ -65,7 +146,7 @@ const MAX_RETAIN_DAYS = 90;
  * any, get added here in one place instead of being chased across the
  * codebase.
  */
-export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed)).+/;
+export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed|\[subagent-delivery\] send failed.*chat not found)).+/;
 let trendsTimer = null;
 function isDisabled() {
     return (process.env.ALVIN_DISABLE_TRENDS === "true" ||
@@ -134,6 +215,7 @@ function takeSnapshot(activeProvider) {
     return {
         ts: new Date().toISOString(),
         uptime_s: Math.round(process.uptime()),
+        uptime_peak_s: bumpAndReadUptimePeak(),
         rss_mb: Math.round(mem.rss / 1024 / 1024),
         heap_mb: Math.round(mem.heapUsed / 1024 / 1024),
         crashes_24h: readWatchdogCrashes24h(),
@@ -195,6 +277,92 @@ SUGGESTION: <one shell command OR observation for the operator>
 --- LAST {N} DAYS OF SNAPSHOTS ---
 {SNAPSHOTS}
 --- END ---`;
+/**
+ * Returns true if at least one snapshot in `snaps` has a non-zero
+ * crashes_24h value, meaning a REAL crash (not an expected/deliberate
+ * restart) was recorded on that day.
+ *
+ * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
+ * /restart / /update) write the expectedRestart beacon flag and are NOT
+ * counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
+ * means the bot was only restarted intentionally — no real crash evidence.
+ *
+ * Pure function, exported for unit testing.
+ */
+export function hasRealCrashEvidence(snaps) {
+    return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
+}
+/**
+ * B2 — Returns true if AT LEAST ONE snapshot proves the bot process
+ * genuinely lived past the startup transient (i.e. it is NOT a ~62s
+ * restart loop).
+ *
+ * The first per-boot snapshot is structurally taken ~60s after boot, so
+ * its raw uptime_s is always ≈ 62 regardless of how long the process
+ * subsequently runs. uptime_peak_s is the high-water mark of REAL
+ * process.uptime() carried across process generations, so a single
+ * snapshot whose peak exceeds STARTUP_TRANSIENT_S is hard evidence the
+ * process did live for a representative duration. Legacy pre-B2 lines
+ * have no uptime_peak_s — we fall back to their raw uptime_s, so a legacy
+ * 24h cron snapshot still counts as representative on its own.
+ *
+ * A genuine fast-restart loop never lets the peak climb past the
+ * transient, so it correctly returns false and the WARN still fires.
+ *
+ * Pure function, exported for unit testing.
+ */
+export function hasRepresentativeUptime(snaps) {
+    return snaps.some((s) => {
+        const peak = typeof s.uptime_peak_s === "number" && Number.isFinite(s.uptime_peak_s)
+            ? s.uptime_peak_s
+            : typeof s.uptime_s === "number" && Number.isFinite(s.uptime_s)
+                ? s.uptime_s
+                : 0;
+        return peak > STARTUP_TRANSIENT_S;
+    });
+}
+/**
+ * B2/B4 — Pure crash/restart WARN suppression decision.
+ *
+ * Encodes the SAME two gates, in the SAME precedence, that dailyTask
+ * applies inline (B2 before B4). Extracted as a pure function purely so
+ * the gate COMPOSITION (not just each helper in isolation) is unit
+ * testable — the helpers are individually correct but the interaction
+ * is where the real-crash-loop-after-a-healthy-period regression lives.
+ *
+ * Returns the suppression reason, or "none" when the WARN must fire.
+ *
+ *  - "representative-uptime" (B2): a deliberate-restart / sampling
+ *    artifact — the AI saw ~62s uptimes but a snapshot peak proves the
+ *    process actually lived past the startup transient. ONLY applies
+ *    when there is no real crash evidence: a genuine crash loop after a
+ *    prior healthy period still carries the persisted high peak, so
+ *    without the crash-evidence guard B2 would permanently and silently
+ *    swallow it. With the guard, crashes_24h>0 falls through to B4.
+ *  - "no-crash-evidence" (B4): crash/restart pattern but crashes_24h===0
+ *    everywhere (deliberate-restart-only, not a real crash loop).
+ *  - "none": the WARN is real and must be emitted.
+ *
+ * Pure function, exported for unit testing.
+ */
+export function evaluateCrashRestartSuppression(isCrashRestartPattern, snaps) {
+    if (!isCrashRestartPattern)
+        return "none";
+    const realCrash = hasRealCrashEvidence(snaps);
+    // B2: only the deliberate-restart / sampling-artifact case. A real
+    // crash loop (crashes_24h>0) must NOT be suppressed here even though
+    // the persisted uptime high-water mark still reads representative.
+    if (!realCrash && hasRepresentativeUptime(snaps))
+        return "representative-uptime";
+    // B4: crash/restart pattern with zero real crash evidence.
+    if (!realCrash)
+        return "no-crash-evidence";
+    return "none";
+}
+/** Test-only: take a snapshot without writing to trends.jsonl. */
+export function __takeSnapshotForTest(activeProvider) {
+    return takeSnapshot(activeProvider);
+}
 function parseTrendResponse(text) {
     if (/^ANOMALY:\s*NONE/im.test(text)) {
         return {
@@ -296,6 +464,38 @@ async function dailyTask(registry) {
             console.log(`📊 Trends AI: no anomaly detected`);
             return;
         }
+        const recentSnaps = readSnapshots(30);
+        const isCrashRestartPattern = /crash|restart|loop|uptime/i.test(result.description);
+        // B2 gate: suppress an "uptime stuck at ~62s / restart loop" WARN when
+        // the snapshots PROVE the process actually lived past the startup
+        // transient. The first per-boot snapshot is structurally sampled ~60s
+        // after boot, so raw uptime_s reads ≈62 even for a perfectly healthy
+        // bot that has been up for hours by the time the daily snapshot fires.
+        // uptime_peak_s is the high-water mark of real process.uptime() across
+        // process generations: if ANY snapshot's peak exceeds the transient,
+        // the "~62s loop" conclusion is factually false. A genuine fast-restart
+        // loop never lets the peak climb, so it is NOT suppressed here.
+        if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps) && hasRepresentativeUptime(recentSnaps)) {
+            console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
+                `uptime/restart pattern flagged but at least one snapshot shows a ` +
+                `representative peak uptime (>${STARTUP_TRANSIENT_S}s); the process ` +
+                `did live well past the post-restart sampling window, not a ~62s loop`);
+            return;
+        }
+        // B4 gate: suppress WARN when the AI flags a crash/restart-loop pattern
+        // but the historical snapshots contain ZERO real crash evidence
+        // (crashes_24h === 0 across the board). This happens when the bot was
+        // restarted deliberately (launchctl reload / /update / /restart) — those
+        // produce low uptimes that the AI reads as "restart loop", but the
+        // crash counter stays at 0 because markExpectedRestart() was written
+        // on each clean shutdown. A real crash loop WILL have crashes_24h > 0
+        // in at least one snapshot and will still fire the WARN.
+        if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps)) {
+            console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
+                `crash/restart pattern detected but crashes_24h=0 across all snapshots ` +
+                `(deliberate-restart-only, not a real crash loop)`);
+            return;
+        }
         console.log(`📊 Trends AI: ANOMALY (${result.severity}) — ${result.description}`);
         emitCritical({
             category: "custom",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "alvin-bot",
-  "version": "5.4.0",
+  "version": "5.5.0",
   "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
   "type": "module",
   "main": "dist/index.js",