npm - alvin-bot - Versions diffs - 5.4.0 → 5.6.0 - Mend

alvin-bot 5.4.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +54 -0
package/dist/handlers/commands.js +23 -2
package/dist/handlers/message.js +74 -16
package/dist/i18n.js +15 -0
package/dist/index.js +7 -1
package/dist/providers/claude-sdk-provider.js +14 -0
package/dist/services/async-agent-watcher.js +53 -6
package/dist/services/subagent-delivery.js +133 -32
package/dist/services/subagents.js +19 -5
package/dist/services/telegram.js +9 -0
package/dist/services/trends.js +249 -2
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,60 @@
 All notable changes to Alvin Bot are documented here.
+## [5.6.0] — 2026-05-18
+### Background-task reports are now clean and to the point
+When a scheduled or background task finishes, Alvin now sends you
+just the result — a tight header (what ran, how long, tokens, success)
+and the actual answer — instead of a wall of its working notes. If a
+result is unusually long, the chat message stays short and the
+complete output comes attached as a file, so you never lose anything
+and never have to scroll through a transcript.
+### A clear confirmation when you stop something
+Press ⛔ Stop (or use /cancel) while Alvin is genuinely working and
+you now get a short, plain confirmation in your language that the work
+was halted — not just a fleeting button flash. If nothing was running,
+Alvin still tells you that honestly instead of pretending it stopped
+something.
+### Health alerts that don't cry wolf
+Alvin's self-monitoring now judges its health on recent activity, so a
+one-off rough patch no longer keeps it flagging a problem for weeks. A
+real issue still raises a flag promptly; a quiet, healthy bot stays
+quiet.
+As always, this shipped after a full multi-pass review and a
+fresh-install + stress verification on a clean separate machine.
+## [5.5.0] — 2026-05-18
+### The ⛔ Stop button now responds instantly — and honestly
+Stopping a task is now crisp and truthful. The moment a task finishes,
+the Stop button disappears, so you're never tapping a control for
+something that's already done. And the feedback always matches reality:
+if you tap Stop while Alvin is genuinely working, it stops and says so;
+if the task had already completed, Alvin tells you that plainly instead
+of implying it cut something short. If you hit Stop in that brief moment
+while an answer is being prepared, that answer is now held back — "I
+stopped it" means nothing more arrives. Anything Alvin had already
+shown you stays exactly as it was.
+### Fewer false alerts — smarter health monitoring
+Alvin's self-monitoring got a lot more trustworthy. A planned restart
+or an update is no longer mistaken for a problem, and the daily health
+summary only raises a flag when there's real evidence something is
+actually wrong — so the alerts you do get are ones worth reading.
+Routine background housekeeping no longer shows up as noise.
+As always, this shipped after a full multi-pass review and a
+fresh-install + stress verification on a clean separate machine.
 ## [5.4.0] — 2026-05-18
 ### Smoother background tasks — and Alvin always tells you the truth

package/dist/handlers/commands.js CHANGED Viewed

@@ -1918,6 +1918,10 @@ export function registerCommands(bot) {
         if (session.isProcessing) {
             requestStop(session, "soft", buildStopDeps(session));
             await ctx.reply(t("bot.cancel.cancelling", lang));
+            // V56-T2c — a real stop fired: follow the "cancelling…" notice with a
+            // brief confirmation that the work was actually halted (consistent UX
+            // with the ⛔ button). Best-effort — must never throw into the handler.
+            await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
         }
         else {
             await ctx.reply(t("bot.cancel.noRunning", lang));
@@ -1946,17 +1950,34 @@ export function registerCommands(bot) {
         const sessionKey = ctx.match[1];
         const session = getSession(sessionKey);
         const lang = session.language;
-        if (session.isProcessing) {
+        // A1 — Capture isProcessing BEFORE requestStop (which sets it false)
+        // so we can show the right toast: "stopped" vs "already finished".
+        const wasProcessing = session.isProcessing;
+        if (wasProcessing) {
             requestStop(session, "soft", buildStopDeps(session));
         }
+        // A1 — Honest toast: if the turn had already finished when the button was
+        // tapped, don't claim "stopped" — tell the user it was already done.
+        const toastKey = wasProcessing
+            ? "bot.cancel.stoppedToast"
+            : "bot.cancel.alreadyDone";
         try {
-            await ctx.answerCallbackQuery({ text: t("bot.cancel.stoppedToast", lang) });
+            await ctx.answerCallbackQuery({ text: t(toastKey, lang) });
         }
         catch { /* harmless grammy race */ }
         try {
             await ctx.editMessageReplyMarkup({});
         }
         catch { /* harmless grammy race — message may already be gone */ }
+        // V56-T2c — when a real stop genuinely fired (wasProcessing), also send a
+        // short in-chat confirmation in the session language so the user gets a
+        // persistent acknowledgement, not only the ephemeral toast. When nothing
+        // was running we deliberately stay silent here (v5.5.0 honesty: the
+        // alreadyDone toast already told the truth). Best-effort — must never
+        // throw into the handler.
+        if (wasProcessing) {
+            await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
+        }
     });
     // /restart — trigger a PM2-managed restart by exiting the process.
     // The PM2 supervisor picks up the exit and respawns with --update-env.

package/dist/handlers/message.js CHANGED Viewed

@@ -122,6 +122,37 @@ const TOOL_ICONS = {
     WebFetch: "📡",
     Task: "🤖",
 };
+// ── A3 — stop-suppress-undelivered pure predicate ────────────────────────────
+/**
+ * Determine whether the final answer send should be suppressed because a stop
+ * was requested and no visible text has yet been delivered to the user.
+ *
+ * This closes the gap behind "I clicked Stop but it answered anyway": the
+ * Claude SDK delivers short answers atomically, so the for-await loop parks
+ * on IPC the whole time, and the complete answer arrives as one block. By the
+ * time the consumer bail fires at the top of the loop, the answer is computed
+ * and about to be sent. This guard is the only stoppable moment for atomic
+ * answers.
+ *
+ * HARD CONSTRAINT — no-retract invariant: if ANY visible text has already
+ * been streamed/committed to the user (visibleTextAlreadySent=true), the
+ * predicate returns false regardless of stop state. Partial output that
+ * already reached the user is NEVER retracted. The consumer bail in the
+ * for-await loop already handles mid-stream stops; this guard only acts on
+ * the final commit step.
+ *
+ * Truth table:
+ *   stopRequested=truthy  + visibleTextAlreadySent=false → true  (suppress)
+ *   stopRequested=truthy  + visibleTextAlreadySent=true  → false (no-retract)
+ *   stopRequested=falsy   + *                            → false (normal)
+ */
+export function shouldSuppressFinalSend(args) {
+    if (!args.stopRequested)
+        return false;
+    if (args.visibleTextAlreadySent)
+        return false;
+    return true;
+}
 // ── v5.2 live steering — pure routing helper ─────────────────────────────────
 /**
  * Decide how a mid-task message (arriving while `session.isProcessing`) should
@@ -785,19 +816,45 @@ export async function handleMessage(ctx) {
                 /* harmless — notice is best-effort */
             }
         }
-        // v5.1 stop: user stopped this query — do NOT finalize partial output
-        // as a successful answer, no 👍, no history commit. The stop trigger
-        // (/cancel | /stopall | ⛔ button) already acknowledged to the user.
-        // The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
-        // + typing indicator).
-        if (session._stopRequested) {
-            return;
-        }
         if (bypassAborted) {
             // v4.12.3 — Bypass path took over; don't finalize, don't react 👍.
             // Just clean up and return. The finally block still fires.
             return;
         }
+        // A3 — Suppress-or-finalize gate for stopped turns.
+        //
+        // shouldSuppressFinalSend is the SINGLE gate controlling whether finalize runs:
+        //
+        //   stop + no visible text (suppress=true):
+        //     Skip finalize and all side-effects. Nothing reached the user — correct.
+        //     The stop trigger (/cancel | /stopall | ⛔) already acknowledged this.
+        //     The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
+        //     + typing indicator).
+        //
+        //   stop + visible text already sent (suppress=false, _stopRequested truthy):
+        //     The no-retract invariant applies — partial output already shown must not
+        //     be left visually unfinished. Run streamer.finalize to flush the throttle
+        //     timer and drop the status line, then return BEFORE the completed-answer
+        //     side-effects (👍 / broadcastResponseDone / addToHistory). A stopped turn
+        //     is NOT a successfully completed turn.
+        //
+        //   no stop (suppress=false, _stopRequested falsy):
+        //     Normal path — fall through to finalize + all side-effects.
+        if (shouldSuppressFinalSend({
+            stopRequested: session._stopRequested,
+            visibleTextAlreadySent: streamer.hasSentText,
+        })) {
+            // Branch A: stop + no visible text → suppress entirely.
+            return;
+        }
+        if (session._stopRequested && streamer.hasSentText) {
+            // Branch B: stop + visible text already sent → finalize the partial cleanly
+            // (flushes throttle timer, clears status line) but do NOT emit the
+            // completed-answer signals or commit to history.
+            await streamer.finalize(finalText);
+            return;
+        }
+        // Branch C: normal (no stop) — fall through.
         await streamer.finalize(finalText);
         emit("message:sent", { userId, text: finalText, platform: "telegram" });
         // v4.5.0: tell observers the response is complete.
@@ -874,6 +931,15 @@ export async function handleMessage(ctx) {
         // but if a new turn started and re-populated _qHandle via onQueryHandle we
         // must NOT null it here — that would break Cycle-1 stop teeth for the new turn.
         if (session._turnId === _thisTurnId) {
+            // A2 — Remove the ⛔ Stop control message as the FIRST action when the
+            // turn ends, so the stale button disappears before any post-turn work.
+            // Best-effort: if it was already deleted or the bot lacks permission, ignore.
+            if (stopMsgId !== null) {
+                try {
+                    await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
+                }
+                catch { /* harmless grammy race */ }
+            }
             session.isProcessing = false;
             session.abortController = null;
             // v5.2 — Close and clear the SteerChannel; reset per-turn ack flag.
@@ -887,14 +953,6 @@ export async function handleMessage(ctx) {
             session._stopRequested = null; // safe: token matches → no newer turn has set this
             session._turnId = null;
         }
-        // v5.1 — Remove the ⛔ Stop control message (sent at processing start).
-        // Best-effort: if it was already deleted or the bot lacks permission, ignore.
-        if (stopMsgId !== null) {
-            try {
-                await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
-            }
-            catch { /* harmless grammy race */ }
-        }
         // Check for queued messages — they'll be prepended to the next real message
         // Queue stays in session and gets consumed on next handleMessage call
     }

package/dist/i18n.js CHANGED Viewed

@@ -378,6 +378,21 @@ const strings = {
         es: "⛔ Detenido",
         fr: "⛔ Arrêté",
     },
+    "bot.cancel.alreadyDone": {
+        en: "Nothing running — that already finished.",
+        de: "Nichts läuft — das war schon fertig.",
+        es: "Nada en curso — eso ya terminó.",
+        fr: "Rien en cours — c'était déjà terminé.",
+    },
+    // Sent as a brief in-chat confirmation only when a stop GENUINELY halted
+    // running work (⛔ button / /cancel with work actually in progress). Not
+    // sent when nothing was running — that honest behavior stays unchanged.
+    "bot.cancel.confirmed": {
+        en: "⛔ Stopped — further work was halted.",
+        de: "⛔ Gestoppt — die weitere Arbeit wurde angehalten.",
+        es: "⛔ Detenido — se interrumpió el trabajo en curso.",
+        fr: "⛔ Arrêté — le travail en cours a été interrompu.",
+    },
     // /model
     "bot.model.chooseHeader": {
         en: "🤖 *Choose model:*",

package/dist/index.js CHANGED Viewed

@@ -187,7 +187,7 @@ import { loadSkills } from "./services/skills.js";
 import { loadHooks } from "./services/hooks.js";
 import { registerShutdownHandler } from "./services/restart.js";
 import { cancelAllSubAgents } from "./services/subagents.js";
-import { startWatchdog, stopWatchdog, checkCrashLoopBrake } from "./services/watchdog.js";
+import { startWatchdog, stopWatchdog, checkCrashLoopBrake, markExpectedRestart } from "./services/watchdog.js";
 import { getRegistry } from "./engine.js";
 import { scanAssets } from "./services/asset-index.js";
 // Scan asset directory and generate INDEX.json + INDEX.md
@@ -383,6 +383,12 @@ const shutdown = async () => {
         return;
     isShuttingDown = true;
     console.log("Graceful shutdown initiated...");
+    // Mark the imminent exit as an intentional restart so the next boot's
+    // decideBrakeAction does not count it as a crash. This covers launchctl
+    // unload/load (SIGTERM from launchd) in addition to /restart and /update
+    // which call markExpectedRestart() themselves before process.exit(0).
+    // Must run before stopWatchdog() (which just clears timers, not the beacon).
+    markExpectedRestart();
     // E2: shutdown-notification — await the async cancellation so running
     // agents can post a cancellation message to Telegram before the bot
     // stops. Capped at 5s internally so a hang can't block shutdown.

package/dist/providers/claude-sdk-provider.js CHANGED Viewed

@@ -446,9 +446,23 @@ export class ClaudeSDKProvider {
                             sessionResetRequested: true,
                         };
                     }
+                    // V56-T1 — Surface the SDK's authoritative final answer
+                    // separately from the accumulated narration. SDKResultSuccess
+                    // carries a single `result: string` that is the agent's actual
+                    // outcome (NOT the concatenation of every assistant turn).
+                    // SDKResultError has no `result` field — leave finalResult
+                    // undefined there so consumers fall back to buffered text.
+                    // This is the same source the detached-dispatch path already
+                    // prefers (`{"type":"result"}.result` in async-agent-parser).
+                    const finalResult = "subtype" in resultMsg &&
+                        resultMsg.subtype === "success" &&
+                        typeof resultMsg.result === "string"
+                        ? resultMsg.result
+                        : undefined;
                     yield {
                         type: "done",
                         text: accumulatedText || "",
+                        ...(finalResult !== undefined ? { finalResult } : {}),
                         sessionId: resultMsg.session_id || capturedSessionId,
                         costUsd: "total_cost_usd" in resultMsg ? resultMsg.total_cost_usd : 0,
                         inputTokens: inputTok,

package/dist/services/async-agent-watcher.js CHANGED Viewed

@@ -27,6 +27,25 @@ import { dirname } from "path";
 import { parseOutputFileStatus } from "./async-agent-parser.js";
 import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
 import { getAllSessions } from "./session.js";
+/**
+ * B3 — Detect a permanent "target chat does not exist" delivery failure
+ * (Telegram 400 "Bad Request: chat not found"), e.g. the stale chat_id:1
+ * test agent. Such an agent must be abandoned, not retried forever.
+ *
+ * Kept as a local predicate (mirrors isChatNotFoundError in
+ * subagent-delivery.ts) so the watcher does NOT take a new hard
+ * dependency on a fresh subagent-delivery export — many test suites mock
+ * that module with only deliverSubAgentResult, and a destructured import
+ * of a non-mocked symbol would throw. Matched narrowly on the
+ * chat-not-found signature only.
+ */
+function isChatNotFoundError(err) {
+    if (!err || typeof err !== "object")
+        return false;
+    const e = err;
+    const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
+    return /chat not found/i.test(haystack);
+}
 /** How often the polling loop runs against each pending agent. */
 const POLL_INTERVAL_MS = 15_000;
 /** Hard ceiling per agent — 12h. After this, give up and deliver
@@ -199,22 +218,38 @@ export async function pollOnce() {
     const now = Date.now();
     const toRemove = [];
     const missingFileFailureMs = getMissingFileFailureMs();
+    // B3 — when a delivery attempt proves the target chat is permanently
+    // invalid ("chat not found", e.g. the stale chat_id:1 test agent),
+    // abandon the agent so the watcher never retries it. Without this, a
+    // pending agent with an invalid target spams stderr on every poll
+    // cycle (inflating errors_24h) and lingers until the 12h giveUpAt.
+    const abandonIfInvalidTarget = (entry, outcome) => {
+        if (!outcome.chatNotFound)
+            return;
+        if (!toRemove.includes(entry.agentId))
+            toRemove.push(entry.agentId);
+        console.warn(`[async-watcher] abandoning agent ${entry.agentId} — delivery target ` +
+            `chat ${String(entry.chatId)} not found (invalid/stale); will not retry`);
+    };
     for (const entry of pending.values()) {
         entry.lastCheckedAt = now;
         // Timeout check first — if the agent is past its giveUpAt, give up
         // regardless of whether the file shows progress.
         if (now >= entry.giveUpAt) {
-            await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
+            const outcome = await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
             continue;
         }
         const status = await parseOutputFileStatus(entry.outputFile);
         if (status.state === "completed") {
-            await deliverAsCompleted(entry, status.output, status.tokensUsed);
+            const outcome = await deliverAsCompleted(entry, status.output, status.tokensUsed);
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
         }
         else if (status.state === "failed") {
-            await deliverAsFailure(entry, "error", status.error);
+            const outcome = await deliverAsFailure(entry, "error", status.error);
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
         }
         else if (status.state === "missing" &&
@@ -222,7 +257,8 @@ export async function pollOnce() {
             // v4.14.2 — Zombie guard: the subprocess never created its
             // output file within `missingFileFailureMs` (default 10 min).
             // Declare failed instead of polling until the 12h giveUpAt.
-            await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
+            const outcome = await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
+            abandonIfInvalidTarget(entry, outcome);
             toRemove.push(entry.agentId);
         }
         // running / missing-but-young → keep polling next cycle
@@ -254,13 +290,20 @@ async function deliverAsCompleted(entry, output, tokensUsed) {
         tokensUsed: tokensUsed ?? { input: 0, output: 0 },
         duration: Date.now() - entry.startedAt,
     };
+    let chatNotFound = false;
     try {
-        await deliverSubAgentResult(info, result);
+        const outcome = await deliverSubAgentResult(info, result);
+        chatNotFound = !!outcome?.chatNotFound;
     }
     catch (err) {
         console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
+        // deliverSubAgentResult normally swallows send errors and reports
+        // chatNotFound via its return value; if it ever throws, still detect
+        // the permanent invalid-target case here.
+        chatNotFound = isChatNotFoundError(err);
     }
     decrementPendingCount(entry.sessionKey);
+    return { chatNotFound };
 }
 async function deliverAsFailure(entry, status, error) {
     const { deliverSubAgentResult } = await import("./subagent-delivery.js");
@@ -283,13 +326,17 @@ async function deliverAsFailure(entry, status, error) {
         duration: Date.now() - entry.startedAt,
         error,
     };
+    let chatNotFound = false;
     try {
-        await deliverSubAgentResult(info, result);
+        const outcome = await deliverSubAgentResult(info, result);
+        chatNotFound = !!outcome?.chatNotFound;
     }
     catch (err) {
         console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
+        chatNotFound = isChatNotFoundError(err);
     }
     decrementPendingCount(entry.sessionKey);
+    return { chatNotFound };
 }
 // ── Test helpers ──────────────────────────────────────────────────
 /**

package/dist/services/subagent-delivery.js CHANGED Viewed

@@ -24,6 +24,22 @@ function isTelegramParseError(err) {
     const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
     return /can't parse entities|can't find end of the entity/i.test(haystack);
 }
+/**
+ * B3 — A Telegram send rejected because the TARGET CHAT DOES NOT EXIST
+ * (HTTP 400 "Bad Request: chat not found"). This is a permanent,
+ * non-recoverable condition: the chat id is invalid (e.g. the stale
+ * chat_id:1 test agent), so every retry will fail identically and just
+ * spam stderr. Distinct from transient failures (network, rate-limit)
+ * which ARE worth retrying. Matched narrowly on the chat-not-found
+ * signature only — never on generic Bad Request.
+ */
+export function isChatNotFoundError(err) {
+    if (!err || typeof err !== "object")
+        return false;
+    const e = err;
+    const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
+    return /chat not found/i.test(haystack);
+}
 /**
  * Send a Markdown message with an automatic plain-text retry on parse
  * errors. Any other error propagates to the caller's outer catch.
@@ -40,7 +56,52 @@ async function sendWithMarkdownFallback(api, chatId, text) {
     }
 }
 const MAX_TG_CHUNK = 3800; // below Telegram's 4096 limit with headroom
-const FILE_UPLOAD_THRESHOLD = 20_000; // switch to .md file upload above this
+// V56-T2 honesty fix — the .md file attachment is no longer gated on a
+// separate 20k threshold. It now triggers whenever the cap actually
+// truncates (isTruncated → body.length > BODY_CAP), so every truncated
+// delivery carries the full output as a file and the marker is honest.
+// (The prior 20k-only behavior is fully subsumed by isTruncated.)
+/**
+ * V56-T2 (Layer-2) — honest hard cap on the INLINE delivered body.
+ *
+ * V56-T1 made delivery carry the SDK final result instead of the whole
+ * transcript, but a final result can itself occasionally be very long.
+ * This bounds the inline-message body so a single agent answer can't
+ * flood the chat, while staying HONEST.
+ *
+ * Honesty contract (fixed after a review found a self-defeating
+ * regression): whenever `capBody` actually truncates — i.e. the body is
+ * non-empty AND longer than BODY_CAP — the delivery ALSO attaches the
+ * COMPLETE uncapped output as a `.md` file via the same upload
+ * mechanism the old >20000-char path already used. The marker
+ * therefore truthfully says the full output is *attached*, instead of
+ * the previous wording that pointed at a `~/.alvin-bot/logs/` file the
+ * cap path never actually wrote. Net effect: any truncated delivery =
+ * bounded inline message + full `.md` attachment; no lossy inline-only
+ * range remains. The old >20000 path is unchanged (it already attached
+ * the full body); this just extends "attach the full file" down to
+ * "whenever the cap truncated".
+ *
+ * This is a pure bounded slice + a fixed marker — NOT a structure-
+ * guessing heuristic. It no-ops on empty/whitespace so the
+ * `(empty output)` truncated-run signal keeps working (and no spurious
+ * file is attached for it).
+ */
+const BODY_CAP = 1800;
+const TRUNCATION_MARKER = "…(truncated for chat — full output attached)";
+/**
+ * True when `capBody` would actually truncate this body — the single
+ * source of truth for "did we drop content, so the full output must be
+ * attached as a file". Mirrors the `length > BODY_CAP` test in capBody.
+ */
+function isTruncated(body) {
+    return body.length > BODY_CAP;
+}
+function capBody(body) {
+    if (body.length <= BODY_CAP)
+        return body;
+    return `${body.slice(0, BODY_CAP)}\n\n${TRUNCATION_MARKER}`;
+}
 let injectedApi = null;
 let runtimeApi = null;
 /** Test-only hook for injecting a fake bot API. Production code must NEVER call this. */
@@ -251,28 +312,29 @@ export function createLiveStream(chatId, agentName) {
  *   - "slack" / "discord" / "whatsapp" → delivery-registry lookup
  */
 export async function deliverSubAgentResult(info, result, opts = {}) {
+    const OK = { chatNotFound: false };
     // Implicit spawns: the Task-tool bridge in the main stream has already
     // surfaced the output; extra delivery would be duplication.
     if (info.source === "implicit")
-        return;
+        return OK;
     const effective = opts.visibility ?? getVisibility();
     if (effective === "silent")
-        return;
+        return OK;
     if (!info.parentChatId) {
         console.warn(`[subagent-delivery] missing parentChatId for ${info.name} (source=${info.source})`);
-        return;
+        return OK;
     }
     // v4.14 — Platform routing. Telegram is the default path (unchanged).
     const platform = info.platform ?? "telegram";
     if (platform !== "telegram") {
         await deliverViaRegistry(platform, info, result);
-        return;
+        return OK;
     }
     // ── Telegram path (v4.12.x behavior, unchanged) ──────────────────
     const api = getBotApi();
     if (!api) {
         console.warn(`[subagent-delivery] no bot api available for ${info.name}`);
-        return;
+        return OK;
     }
     // Telegram's chatId is always a number at runtime; defensive cast.
     const tgChatId = typeof info.parentChatId === "number"
@@ -280,40 +342,70 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
         : Number(info.parentChatId);
     if (!Number.isFinite(tgChatId)) {
         console.warn(`[subagent-delivery] invalid telegram chatId for ${info.name}`);
-        return;
+        return OK;
     }
     const banner = buildBanner(info, result);
     const body = result.output?.trim() || `(empty output)`;
+    // V56-T2 — bounded variant for the INLINE message path. Whenever this
+    // actually truncates (isTruncated), the FULL uncapped `body` is also
+    // attached as a .md file below, so the cap never costs the user
+    // access to the complete result and the marker stays truthful.
+    const inlineBody = capBody(body);
     try {
-        // Case 1: very long output → file upload with a short banner
-        if (body.length > FILE_UPLOAD_THRESHOLD) {
+        // Truncated → honest delivery: short banner + bounded inline body
+        // (with the truthful "full output attached" marker) + the COMPLETE
+        // uncapped body as a .md file. This single branch covers the whole
+        // truncated range (mid-size AND the old > 20000-char range): there
+        // is no lossy inline-only range anymore. (The old >20000 behavior
+        // is unchanged — it already attached the full body; the change is
+        // that mid-size now also attaches it and the marker no longer
+        // points at a logs file that was never written.)
+        if (isTruncated(body)) {
             await sendWithMarkdownFallback(api, tgChatId, banner);
+            // The bounded inline body fits in one message (BODY_CAP=1800 plus
+            // the short marker is well under MAX_TG_CHUNK); send it as plain
+            // text so an unbalanced markdown slice can't crash the send.
+            await api.sendMessage(tgChatId, inlineBody.slice(0, MAX_TG_CHUNK));
             try {
                 const { InputFile } = await import("grammy");
                 const buf = Buffer.from(body, "utf-8");
                 await api.sendDocument(tgChatId, new InputFile(buf, `${info.name}.md`));
             }
             catch (err) {
+                // Upload failed → the bounded inline body was already delivered
+                // above, so the user still has something honest (banner + capped
+                // text + marker). The marker slightly over-promises here (file
+                // didn't attach) but this is the rare failure path, not the
+                // normal one, and there is no silent data loss.
                 console.error(`[subagent-delivery] file upload failed:`, err);
-                await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
             }
-            return;
+            return OK;
         }
-        // Case 2: fits in a single message → banner + body joined
-        if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
-            await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
-            return;
+        // Not truncated (body ≤ BODY_CAP) → unchanged passthrough.
+        // inlineBody === body here (capBody is a no-op), no marker, no file.
+        // Case A: fits in a single message → banner + body joined
+        if (inlineBody.length + banner.length + 2 <= MAX_TG_CHUNK) {
+            await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${inlineBody}`);
+            return OK;
         }
-        // Case 3: medium output → banner as its own message, body chunked
+        // Case B: defensive — a ≤1800-char body still under-runs MAX_TG_CHUNK
+        // with the banner, but keep the banner-then-chunk fallback for
+        // safety against an unusually long banner.
         await sendWithMarkdownFallback(api, tgChatId, banner);
-        for (let i = 0; i < body.length; i += MAX_TG_CHUNK) {
+        for (let i = 0; i < inlineBody.length; i += MAX_TG_CHUNK) {
             // Body chunks are always sent as plain text — markdown across
             // arbitrary chunk boundaries would be inconsistent anyway.
-            await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
+            await api.sendMessage(tgChatId, inlineBody.slice(i, i + MAX_TG_CHUNK));
         }
+        return OK;
     }
     catch (err) {
         console.error(`[subagent-delivery] send failed for ${info.name}:`, err);
+        // B3 — report a permanent invalid-target failure so the watcher can
+        // abandon this agent instead of retrying it forever. Any other error
+        // (network, rate-limit, parse) is NOT reported as chatNotFound, so the
+        // agent's normal retry/timeout lifecycle is unchanged.
+        return { chatNotFound: isChatNotFoundError(err) };
     }
 }
 /**
@@ -336,36 +428,45 @@ async function deliverViaRegistry(platform, info, result) {
     const chatId = info.parentChatId;
     const banner = buildBannerPlain(info, result);
     const body = result.output?.trim() || `(empty output)`;
+    // V56-T2 — same honest contract as the Telegram path. Whenever the
+    // cap truncates, the FULL uncapped `body` is attached as a .md file
+    // (if the adapter supports uploads) so the marker stays truthful and
+    // the complete output remains accessible.
+    const inlineBody = capBody(body);
     const NON_TG_CHUNK = 3800;
-    const FILE_THRESHOLD = 20_000;
     try {
-        // Very long output → file upload if supported, else truncated text
-        if (body.length > FILE_THRESHOLD) {
+        // Truncated → honest delivery: banner + bounded inline body (with
+        // the truthful "full output attached" marker) + the COMPLETE
+        // uncapped body as a .md file. Covers the whole truncated range
+        // (mid-size AND > the old 20k threshold) — no lossy inline-only
+        // range remains. If the adapter has no sendDocument or the upload
+        // fails, the bounded inline body still went out (honest, just no
+        // file) — no silent data loss.
+        if (isTruncated(body)) {
             await adapter.sendText(chatId, banner);
+            for (let i = 0; i < inlineBody.length; i += NON_TG_CHUNK) {
+                await adapter.sendText(chatId, inlineBody.slice(i, i + NON_TG_CHUNK));
+            }
             if (adapter.sendDocument) {
                 try {
                     await adapter.sendDocument(chatId, Buffer.from(body, "utf-8"), `${info.name}.md`);
-                    return;
                 }
                 catch (err) {
                     console.error(`[subagent-delivery] ${platform} file upload failed:`, err);
                 }
             }
-            // Fallback: chunked text if no file upload or upload failed
-            for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
-                await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
-            }
             return;
         }
-        // Fits in one message → combined
-        if (body.length + banner.length + 2 <= NON_TG_CHUNK) {
-            await adapter.sendText(chatId, `${banner}\n\n${body}`);
+        // Not truncated (body ≤ BODY_CAP) → unchanged passthrough.
+        // inlineBody === body here, no marker, no file.
+        if (inlineBody.length + banner.length + 2 <= NON_TG_CHUNK) {
+            await adapter.sendText(chatId, `${banner}\n\n${inlineBody}`);
             return;
         }
-        // Medium — banner first, then chunked body
+        // Defensive banner-then-chunk fallback (e.g. unusually long banner).
         await adapter.sendText(chatId, banner);
-        for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
-            await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
+        for (let i = 0; i < inlineBody.length; i += NON_TG_CHUNK) {
+            await adapter.sendText(chatId, inlineBody.slice(i, i + NON_TG_CHUNK));
         }
     }
     catch (err) {

package/dist/services/subagents.js CHANGED Viewed

@@ -288,7 +288,9 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             : os.homedir();
         const systemPrompt = `You are a sub-agent named "${resolvedName}". Complete the following task autonomously. Working directory: ${effectiveCwd}
-When done, return ONLY the final result/outcome, concisely. Do NOT narrate your intermediate steps, your reasoning, your tool calls, or a play-by-play of what you did — the orchestrator only needs the outcome (the answer, the report, the list, the artifact path), and on failure the error plus what was and wasn't done. No preamble, no "Here's what I did", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
+Do NOT send your own Telegram/chat/notification messages as a step, and do NOT use any tool or skill to message the user or post your progress — your final return value is the SOLE delivery path and the orchestrator delivers it for you. A self-sent message causes a duplicate the user sees twice.
+When done, return ONLY the final result/outcome itself, concisely — nothing else. Do NOT narrate, summarize, or recap your intermediate steps, your reasoning, your tool calls, your plan, or a play-by-play of what you did. The orchestrator needs ONLY the outcome (the answer, the report, the list, the artifact path); on failure, return the error plus exactly what was and wasn't done. No preamble, no meta-commentary, no "Here's what I did", no "I will now…", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
         // v4.12.2 — Map the toolset preset to an explicit allowedTools list.
         // The provider honors this override (see src/providers/claude-sdk-provider.ts
         // line ~140). Passing undefined = full access (provider default).
@@ -326,10 +328,22 @@ When done, return ONLY the final result/outcome, concisely. Do NOT narrate your
                 }
             }
             if (chunk.type === "done") {
-                // done.text is the authoritative final accumulated text from
-                // the provider. Prefer it over the buffered value so runs that
-                // end on a tool_use don't leave us with a pre-tool snippet.
-                if (chunk.text && chunk.text.length > 0) {
+                // V56-T1 — Prefer the SDK's authoritative FINAL result over the
+                // accumulated narration. The Claude Agent SDK emits a terminal
+                // `result` message whose single `result` field IS the agent's
+                // actual outcome; the provider surfaces it as `chunk.finalResult`.
+                // Using it here excludes the step-by-step narration BY
+                // CONSTRUCTION (it's a distinct SDK field, not a heuristic over
+                // concatenated text), matching what the detached-dispatch path
+                // already does. When the provider has no distinct final-result
+                // message (non-SDK providers, SDK error results), finalResult is
+                // undefined and we fall back to done.text — the previous
+                // authoritative-accumulated-text behaviour, so streamed-text
+                // consumers and the Fix #5 contract are unaffected.
+                if (typeof chunk.finalResult === "string" && chunk.finalResult.length > 0) {
+                    finalText = chunk.finalResult;
+                }
+                else if (chunk.text && chunk.text.length > 0) {
                     finalText = chunk.text;
                 }
                 inputTokens = chunk.inputTokens || 0;

package/dist/services/telegram.js CHANGED Viewed

@@ -17,6 +17,15 @@ export class TelegramStreamer {
         this.api = api;
         this.replyTo = replyToMessageId;
     }
+    /**
+     * True when at least one message has been sent to the user (i.e. messageId
+     * is set). Used by the A3 suppress-undelivered guard in message.ts to
+     * determine whether visible text has already reached the user — if so, the
+     * no-retract invariant prevents suppressing the final send.
+     */
+    get hasSentText() {
+        return this.messageId !== null;
+    }
     /**
      * Set a transient status line (e.g. "📖 Read file.html…") that gets
      * appended to the current accumulated text. Passing null clears it.

package/dist/services/trends.js CHANGED Viewed

@@ -33,12 +33,81 @@
  *   ALVIN_TRENDS_INTERVAL_HOURS=24         → snapshot cadence
  *   ALVIN_TRENDS_AI_AFTER_DAYS=7           → days of data before AI analysis kicks in
  */
-import { appendFileSync, existsSync, readFileSync, mkdirSync } from "fs";
+import { appendFileSync, existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
 import { join, dirname } from "path";
 import { homedir } from "os";
 import { BOT_VERSION } from "../version.js";
 import { emitCritical } from "./critical-notify.js";
 const TRENDS_PATH = join(homedir(), ".alvin-bot", "state", "trends.jsonl");
+/**
+ * B2 — peak-uptime high-water mark. The trends collector takes its FIRST
+ * snapshot ~60s after every boot (startTrendsCollector schedules it at
+ * 60_000ms). takeSnapshot() records uptime_s = process.uptime(), so the
+ * first post-restart sample is structurally ≈ 62s. With deliberate
+ * restarts (/update, launchctl reload) those ~62s samples dominate
+ * trends.jsonl, so the 30-day AI pass perpetually concludes "restart
+ * loop, never lives past ~62s" even when the process has actually been
+ * continuously up for hours by the time the daily snapshot fires.
+ *
+ * Fix: persist the MAXIMUM real uptime this bot has ever observed (across
+ * process generations) and record it on every snapshot as uptime_peak_s.
+ * The peak only ever derives from process.uptime() — it is never
+ * fabricated or extrapolated. The anomaly evaluation then keys on the
+ * peak (hasRepresentativeUptime), so a process that genuinely lived for
+ * hours is not flagged as a ~62s loop, while a genuine fast-restart loop
+ * (peak never climbs past the startup transient) still fires.
+ *
+ * Stored next to trends.jsonl (state/), honoring ALVIN_DATA_DIR so tests
+ * and non-default installs work. Survives restarts by design — that is
+ * the whole point of a high-water mark.
+ */
+function trendsStateDir() {
+    const base = process.env.ALVIN_DATA_DIR || join(homedir(), ".alvin-bot");
+    return join(base, "state");
+}
+function uptimePeakPath() {
+    return join(trendsStateDir(), "uptime-peak.json");
+}
+/**
+ * The startup transient: takeSnapshot's first sample is taken ~60s after
+ * boot, so any uptime at/under this is indistinguishable from "just
+ * restarted". An uptime ABOVE this proves the process actually lived past
+ * the post-restart sampling window. 600s (10 min) is comfortably above
+ * the 60s first-sample delay + scheduling jitter and far below the 24h
+ * cron cadence, so a healthy bot trivially clears it while a real
+ * crash-loop (exits within seconds/a couple minutes) never does.
+ */
+export const STARTUP_TRANSIENT_S = 600;
+/**
+ * Read the persisted peak uptime, fold in the CURRENT real uptime, persist
+ * the (possibly larger) high-water mark, and return it. Pure w.r.t. time
+ * sources: the only uptime input is process.uptime() — nothing invented.
+ * Disk failures degrade gracefully to the current real uptime.
+ */
+function bumpAndReadUptimePeak() {
+    const currentReal = Math.round(process.uptime());
+    let stored = 0;
+    try {
+        const raw = readFileSync(uptimePeakPath(), "utf-8");
+        const parsed = JSON.parse(raw);
+        if (typeof parsed.peak_s === "number" && Number.isFinite(parsed.peak_s) && parsed.peak_s > 0) {
+            stored = parsed.peak_s;
+        }
+    }
+    catch {
+        // No file yet / unreadable — start the high-water mark from the
+        // current real uptime. Not an error.
+    }
+    const peak = Math.max(stored, currentReal);
+    try {
+        mkdirSync(trendsStateDir(), { recursive: true });
+        writeFileSync(uptimePeakPath(), JSON.stringify({ peak_s: peak }), "utf-8");
+    }
+    catch {
+        // Disk full / permissions — non-fatal; we still return the in-memory peak.
+    }
+    return peak;
+}
 const DEFAULT_INTERVAL_HOURS = 24;
 const DEFAULT_AI_THRESHOLD_DAYS = 7;
 const MAX_RETAIN_DAYS = 90;
@@ -54,6 +123,18 @@ const MAX_RETAIN_DAYS = 90;
  *     (a successful, expected fallback — not an error)
  *   - critical-notify's own delivery-outcome line, kept on stderr on
  *     purpose so it stays visible even in brake/crash context
+ *   - B3: subagent-delivery's "send failed … chat not found" line for a
+ *     stale/test async-agent whose delivery target chat no longer exists
+ *     (e.g. the recurring chat_id:1 test agent). This is benign noise,
+ *     not a real fault: the target chat is invalid, the watcher now
+ *     abandons such agents (see async-agent-watcher.ts), and counting it
+ *     made errors_24h creep upward indefinitely on every poll cycle.
+ *     The match is DELIBERATELY narrow — it requires BOTH the
+ *     `[subagent-delivery] send failed` prefix AND a `chat not found`
+ *     cause on the same line. A subagent-delivery failure for ANY other
+ *     reason (network, rate-limit, parse) is still counted, and a
+ *     `chat not found` from ANY OTHER subsystem (a real misconfigured
+ *     target) is still counted.
  *
  * Counting those turned this very monitor into a false-alarm generator:
  * it flagged its OWN log lines plus every release's restart churn, so
@@ -65,7 +146,7 @@ const MAX_RETAIN_DAYS = 90;
  * any, get added here in one place instead of being chased across the
  * codebase.
  */
-export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed)).+/;
+export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed|\[subagent-delivery\] send failed.*chat not found)).+/;
 let trendsTimer = null;
 function isDisabled() {
     return (process.env.ALVIN_DISABLE_TRENDS === "true" ||
@@ -134,6 +215,7 @@ function takeSnapshot(activeProvider) {
     return {
         ts: new Date().toISOString(),
         uptime_s: Math.round(process.uptime()),
+        uptime_peak_s: bumpAndReadUptimePeak(),
         rss_mb: Math.round(mem.rss / 1024 / 1024),
         heap_mb: Math.round(mem.heapUsed / 1024 / 1024),
         crashes_24h: readWatchdogCrashes24h(),
@@ -195,6 +277,139 @@ SUGGESTION: <one shell command OR observation for the operator>
 --- LAST {N} DAYS OF SNAPSHOTS ---
 {SNAPSHOTS}
 --- END ---`;
+/**
+ * V56 — Recent crash-evidence window.
+ *
+ * hasRealCrashEvidence keys the WARN-suppression gate on whether ANY
+ * persisted snapshot recorded a real crash. Snapshots persist for up to
+ * MAX_RETAIN_DAYS and the AI pass reads the last 30 (≈30 days at the 24h
+ * cadence). If the WHOLE 30-day history is considered, a history briefly
+ * poisoned by miscounted deliberate restarts (pre-v5.5.0 accounting bug,
+ * fixed in v5.5.0 for NEW snapshots but the bad lines persist ~30 days)
+ * keeps crash-evidence "true" — so the B2/B4 gate never suppresses and the
+ * false WARN fires for ~a month instead of self-healing.
+ *
+ * Restricting the evidence check to the most recent ~48h means: once
+ * v5.5.0's correct accounting produces clean recent snapshots
+ * (crashes_24h=0), the false WARN clears within ~a day — while a GENUINE
+ * crash loop (real crashes in the recent window) still returns true and
+ * the WARN still fires (the protective purpose is intact).
+ *
+ * 48h (not 24h) is chosen because the snapshot cadence is ~24h
+ * (DEFAULT_INTERVAL_HOURS): a 48h window reliably retains the last 1–2
+ * daily snapshots even across day-boundary jitter / a skipped cron tick,
+ * so a genuine recent crash loop is never missed, while crash evidence
+ * older than ~2 days (the poisoned history) ages out and self-heals. A
+ * timestamp window (not "last N snapshots") is used so self-healing keys
+ * on real wall-clock time and is robust to cadence changes / test-tuned
+ * ALVIN_TRENDS_INTERVAL_HOURS.
+ */
+export const RECENT_CRASH_WINDOW_MS = 48 * 60 * 60 * 1000;
+/**
+ * Returns true if at least one snapshot WITHIN THE RECENT WINDOW has a
+ * non-zero crashes_24h value, meaning a REAL crash (not an
+ * expected/deliberate restart) was recorded recently.
+ *
+ * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
+ * /restart / /update) write the expectedRestart beacon flag and are NOT
+ * counted in dailyCrashCount. So crashes_24h === 0 across the recent
+ * snapshots means the bot was only restarted intentionally — no real
+ * crash evidence — even if OLDER snapshots were poisoned by the
+ * pre-v5.5.0 miscount (those age out of the window and the false WARN
+ * self-heals; see RECENT_CRASH_WINDOW_MS).
+ *
+ * Recency is determined from each snapshot's `ts` (ISO 8601, written by
+ * takeSnapshot via new Date().toISOString()). FAIL-SAFE: a snapshot whose
+ * `ts` is missing or unparseable is treated as in-window (counted) — a
+ * health monitor must fail toward "visible", never go blind on bad data.
+ *
+ * Pure function, exported for unit testing.
+ */
+export function hasRealCrashEvidence(snaps, nowMs = Date.now()) {
+    const cutoff = nowMs - RECENT_CRASH_WINDOW_MS;
+    return snaps.some((s) => {
+        if (!(typeof s.crashes_24h === "number" && s.crashes_24h > 0))
+            return false;
+        // FAIL-SAFE: no/garbage ts → treat as recent (never silence on bad data).
+        if (typeof s.ts !== "string")
+            return true;
+        const t = Date.parse(s.ts);
+        if (!Number.isFinite(t))
+            return true;
+        return t >= cutoff;
+    });
+}
+/**
+ * B2 — Returns true if AT LEAST ONE snapshot proves the bot process
+ * genuinely lived past the startup transient (i.e. it is NOT a ~62s
+ * restart loop).
+ *
+ * The first per-boot snapshot is structurally taken ~60s after boot, so
+ * its raw uptime_s is always ≈ 62 regardless of how long the process
+ * subsequently runs. uptime_peak_s is the high-water mark of REAL
+ * process.uptime() carried across process generations, so a single
+ * snapshot whose peak exceeds STARTUP_TRANSIENT_S is hard evidence the
+ * process did live for a representative duration. Legacy pre-B2 lines
+ * have no uptime_peak_s — we fall back to their raw uptime_s, so a legacy
+ * 24h cron snapshot still counts as representative on its own.
+ *
+ * A genuine fast-restart loop never lets the peak climb past the
+ * transient, so it correctly returns false and the WARN still fires.
+ *
+ * Pure function, exported for unit testing.
+ */
+export function hasRepresentativeUptime(snaps) {
+    return snaps.some((s) => {
+        const peak = typeof s.uptime_peak_s === "number" && Number.isFinite(s.uptime_peak_s)
+            ? s.uptime_peak_s
+            : typeof s.uptime_s === "number" && Number.isFinite(s.uptime_s)
+                ? s.uptime_s
+                : 0;
+        return peak > STARTUP_TRANSIENT_S;
+    });
+}
+/**
+ * B2/B4 — Pure crash/restart WARN suppression decision.
+ *
+ * Encodes the SAME two gates, in the SAME precedence, that dailyTask
+ * applies inline (B2 before B4). Extracted as a pure function purely so
+ * the gate COMPOSITION (not just each helper in isolation) is unit
+ * testable — the helpers are individually correct but the interaction
+ * is where the real-crash-loop-after-a-healthy-period regression lives.
+ *
+ * Returns the suppression reason, or "none" when the WARN must fire.
+ *
+ *  - "representative-uptime" (B2): a deliberate-restart / sampling
+ *    artifact — the AI saw ~62s uptimes but a snapshot peak proves the
+ *    process actually lived past the startup transient. ONLY applies
+ *    when there is no real crash evidence: a genuine crash loop after a
+ *    prior healthy period still carries the persisted high peak, so
+ *    without the crash-evidence guard B2 would permanently and silently
+ *    swallow it. With the guard, crashes_24h>0 falls through to B4.
+ *  - "no-crash-evidence" (B4): crash/restart pattern but crashes_24h===0
+ *    everywhere (deliberate-restart-only, not a real crash loop).
+ *  - "none": the WARN is real and must be emitted.
+ *
+ * Pure function, exported for unit testing.
+ */
+export function evaluateCrashRestartSuppression(isCrashRestartPattern, snaps) {
+    if (!isCrashRestartPattern)
+        return "none";
+    const realCrash = hasRealCrashEvidence(snaps);
+    // B2: only the deliberate-restart / sampling-artifact case. A real
+    // crash loop (crashes_24h>0) must NOT be suppressed here even though
+    // the persisted uptime high-water mark still reads representative.
+    if (!realCrash && hasRepresentativeUptime(snaps))
+        return "representative-uptime";
+    // B4: crash/restart pattern with zero real crash evidence.
+    if (!realCrash)
+        return "no-crash-evidence";
+    return "none";
+}
+/** Test-only: take a snapshot without writing to trends.jsonl. */
+export function __takeSnapshotForTest(activeProvider) {
+    return takeSnapshot(activeProvider);
+}
 function parseTrendResponse(text) {
     if (/^ANOMALY:\s*NONE/im.test(text)) {
         return {
@@ -296,6 +511,38 @@ async function dailyTask(registry) {
             console.log(`📊 Trends AI: no anomaly detected`);
             return;
         }
+        const recentSnaps = readSnapshots(30);
+        const isCrashRestartPattern = /crash|restart|loop|uptime/i.test(result.description);
+        // B2 gate: suppress an "uptime stuck at ~62s / restart loop" WARN when
+        // the snapshots PROVE the process actually lived past the startup
+        // transient. The first per-boot snapshot is structurally sampled ~60s
+        // after boot, so raw uptime_s reads ≈62 even for a perfectly healthy
+        // bot that has been up for hours by the time the daily snapshot fires.
+        // uptime_peak_s is the high-water mark of real process.uptime() across
+        // process generations: if ANY snapshot's peak exceeds the transient,
+        // the "~62s loop" conclusion is factually false. A genuine fast-restart
+        // loop never lets the peak climb, so it is NOT suppressed here.
+        if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps) && hasRepresentativeUptime(recentSnaps)) {
+            console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
+                `uptime/restart pattern flagged but at least one snapshot shows a ` +
+                `representative peak uptime (>${STARTUP_TRANSIENT_S}s); the process ` +
+                `did live well past the post-restart sampling window, not a ~62s loop`);
+            return;
+        }
+        // B4 gate: suppress WARN when the AI flags a crash/restart-loop pattern
+        // but the historical snapshots contain ZERO real crash evidence
+        // (crashes_24h === 0 across the board). This happens when the bot was
+        // restarted deliberately (launchctl reload / /update / /restart) — those
+        // produce low uptimes that the AI reads as "restart loop", but the
+        // crash counter stays at 0 because markExpectedRestart() was written
+        // on each clean shutdown. A real crash loop WILL have crashes_24h > 0
+        // in at least one snapshot and will still fire the WARN.
+        if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps)) {
+            console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
+                `crash/restart pattern detected but crashes_24h=0 across all snapshots ` +
+                `(deliberate-restart-only, not a real crash loop)`);
+            return;
+        }
         console.log(`📊 Trends AI: ANOMALY (${result.severity}) — ${result.description}`);
         emitCritical({
             category: "custom",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "alvin-bot",
-  "version": "5.4.0",
+  "version": "5.6.0",
   "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
   "type": "module",
   "main": "dist/index.js",