npm - alvin-bot - Versions diffs - 5.5.0 → 5.6.0 - Mend

alvin-bot 5.5.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +29 -0
package/dist/handlers/commands.js +13 -0
package/dist/i18n.js +9 -0
package/dist/providers/claude-sdk-provider.js +14 -0
package/dist/services/subagent-delivery.js +102 -24
package/dist/services/subagents.js +19 -5
package/dist/services/trends.js +54 -7
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,35 @@
 All notable changes to Alvin Bot are documented here.
+## [5.6.0] — 2026-05-18
+### Background-task reports are now clean and to the point
+When a scheduled or background task finishes, Alvin now sends you
+just the result — a tight header (what ran, how long, tokens, success)
+and the actual answer — instead of a wall of its working notes. If a
+result is unusually long, the chat message stays short and the
+complete output comes attached as a file, so you never lose anything
+and never have to scroll through a transcript.
+### A clear confirmation when you stop something
+Press ⛔ Stop (or use /cancel) while Alvin is genuinely working and
+you now get a short, plain confirmation in your language that the work
+was halted — not just a fleeting button flash. If nothing was running,
+Alvin still tells you that honestly instead of pretending it stopped
+something.
+### Health alerts that don't cry wolf
+Alvin's self-monitoring now judges its health on recent activity, so a
+one-off rough patch no longer keeps it flagging a problem for weeks. A
+real issue still raises a flag promptly; a quiet, healthy bot stays
+quiet.
+As always, this shipped after a full multi-pass review and a
+fresh-install + stress verification on a clean separate machine.
 ## [5.5.0] — 2026-05-18
 ### The ⛔ Stop button now responds instantly — and honestly

package/dist/handlers/commands.js CHANGED Viewed

@@ -1918,6 +1918,10 @@ export function registerCommands(bot) {
         if (session.isProcessing) {
             requestStop(session, "soft", buildStopDeps(session));
             await ctx.reply(t("bot.cancel.cancelling", lang));
+            // V56-T2c — a real stop fired: follow the "cancelling…" notice with a
+            // brief confirmation that the work was actually halted (consistent UX
+            // with the ⛔ button). Best-effort — must never throw into the handler.
+            await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
         }
         else {
             await ctx.reply(t("bot.cancel.noRunning", lang));
@@ -1965,6 +1969,15 @@ export function registerCommands(bot) {
             await ctx.editMessageReplyMarkup({});
         }
         catch { /* harmless grammy race — message may already be gone */ }
+        // V56-T2c — when a real stop genuinely fired (wasProcessing), also send a
+        // short in-chat confirmation in the session language so the user gets a
+        // persistent acknowledgement, not only the ephemeral toast. When nothing
+        // was running we deliberately stay silent here (v5.5.0 honesty: the
+        // alreadyDone toast already told the truth). Best-effort — must never
+        // throw into the handler.
+        if (wasProcessing) {
+            await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
+        }
     });
     // /restart — trigger a PM2-managed restart by exiting the process.
     // The PM2 supervisor picks up the exit and respawns with --update-env.

package/dist/i18n.js CHANGED Viewed

@@ -384,6 +384,15 @@ const strings = {
         es: "Nada en curso — eso ya terminó.",
         fr: "Rien en cours — c'était déjà terminé.",
     },
+    // Sent as a brief in-chat confirmation only when a stop GENUINELY halted
+    // running work (⛔ button / /cancel with work actually in progress). Not
+    // sent when nothing was running — that honest behavior stays unchanged.
+    "bot.cancel.confirmed": {
+        en: "⛔ Stopped — further work was halted.",
+        de: "⛔ Gestoppt — die weitere Arbeit wurde angehalten.",
+        es: "⛔ Detenido — se interrumpió el trabajo en curso.",
+        fr: "⛔ Arrêté — le travail en cours a été interrompu.",
+    },
     // /model
     "bot.model.chooseHeader": {
         en: "🤖 *Choose model:*",

package/dist/providers/claude-sdk-provider.js CHANGED Viewed

@@ -446,9 +446,23 @@ export class ClaudeSDKProvider {
                             sessionResetRequested: true,
                         };
                     }
+                    // V56-T1 — Surface the SDK's authoritative final answer
+                    // separately from the accumulated narration. SDKResultSuccess
+                    // carries a single `result: string` that is the agent's actual
+                    // outcome (NOT the concatenation of every assistant turn).
+                    // SDKResultError has no `result` field — leave finalResult
+                    // undefined there so consumers fall back to buffered text.
+                    // This is the same source the detached-dispatch path already
+                    // prefers (`{"type":"result"}.result` in async-agent-parser).
+                    const finalResult = "subtype" in resultMsg &&
+                        resultMsg.subtype === "success" &&
+                        typeof resultMsg.result === "string"
+                        ? resultMsg.result
+                        : undefined;
                     yield {
                         type: "done",
                         text: accumulatedText || "",
+                        ...(finalResult !== undefined ? { finalResult } : {}),
                         sessionId: resultMsg.session_id || capturedSessionId,
                         costUsd: "total_cost_usd" in resultMsg ? resultMsg.total_cost_usd : 0,
                         inputTokens: inputTok,

package/dist/services/subagent-delivery.js CHANGED Viewed

@@ -56,7 +56,52 @@ async function sendWithMarkdownFallback(api, chatId, text) {
     }
 }
 const MAX_TG_CHUNK = 3800; // below Telegram's 4096 limit with headroom
-const FILE_UPLOAD_THRESHOLD = 20_000; // switch to .md file upload above this
+// V56-T2 honesty fix — the .md file attachment is no longer gated on a
+// separate 20k threshold. It now triggers whenever the cap actually
+// truncates (isTruncated → body.length > BODY_CAP), so every truncated
+// delivery carries the full output as a file and the marker is honest.
+// (The prior 20k-only behavior is fully subsumed by isTruncated.)
+/**
+ * V56-T2 (Layer-2) — honest hard cap on the INLINE delivered body.
+ *
+ * V56-T1 made delivery carry the SDK final result instead of the whole
+ * transcript, but a final result can itself occasionally be very long.
+ * This bounds the inline-message body so a single agent answer can't
+ * flood the chat, while staying HONEST.
+ *
+ * Honesty contract (fixed after a review found a self-defeating
+ * regression): whenever `capBody` actually truncates — i.e. the body is
+ * non-empty AND longer than BODY_CAP — the delivery ALSO attaches the
+ * COMPLETE uncapped output as a `.md` file via the same upload
+ * mechanism the old >20000-char path already used. The marker
+ * therefore truthfully says the full output is *attached*, instead of
+ * the previous wording that pointed at a `~/.alvin-bot/logs/` file the
+ * cap path never actually wrote. Net effect: any truncated delivery =
+ * bounded inline message + full `.md` attachment; no lossy inline-only
+ * range remains. The old >20000 path is unchanged (it already attached
+ * the full body); this just extends "attach the full file" down to
+ * "whenever the cap truncated".
+ *
+ * This is a pure bounded slice + a fixed marker — NOT a structure-
+ * guessing heuristic. It no-ops on empty/whitespace so the
+ * `(empty output)` truncated-run signal keeps working (and no spurious
+ * file is attached for it).
+ */
+const BODY_CAP = 1800;
+const TRUNCATION_MARKER = "…(truncated for chat — full output attached)";
+/**
+ * True when `capBody` would actually truncate this body — the single
+ * source of truth for "did we drop content, so the full output must be
+ * attached as a file". Mirrors the `length > BODY_CAP` test in capBody.
+ */
+function isTruncated(body) {
+    return body.length > BODY_CAP;
+}
+function capBody(body) {
+    if (body.length <= BODY_CAP)
+        return body;
+    return `${body.slice(0, BODY_CAP)}\n\n${TRUNCATION_MARKER}`;
+}
 let injectedApi = null;
 let runtimeApi = null;
 /** Test-only hook for injecting a fake bot API. Production code must NEVER call this. */
@@ -301,32 +346,56 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
     }
     const banner = buildBanner(info, result);
     const body = result.output?.trim() || `(empty output)`;
+    // V56-T2 — bounded variant for the INLINE message path. Whenever this
+    // actually truncates (isTruncated), the FULL uncapped `body` is also
+    // attached as a .md file below, so the cap never costs the user
+    // access to the complete result and the marker stays truthful.
+    const inlineBody = capBody(body);
     try {
-        // Case 1: very long output → file upload with a short banner
-        if (body.length > FILE_UPLOAD_THRESHOLD) {
+        // Truncated → honest delivery: short banner + bounded inline body
+        // (with the truthful "full output attached" marker) + the COMPLETE
+        // uncapped body as a .md file. This single branch covers the whole
+        // truncated range (mid-size AND the old > 20000-char range): there
+        // is no lossy inline-only range anymore. (The old >20000 behavior
+        // is unchanged — it already attached the full body; the change is
+        // that mid-size now also attaches it and the marker no longer
+        // points at a logs file that was never written.)
+        if (isTruncated(body)) {
             await sendWithMarkdownFallback(api, tgChatId, banner);
+            // The bounded inline body fits in one message (BODY_CAP=1800 plus
+            // the short marker is well under MAX_TG_CHUNK); send it as plain
+            // text so an unbalanced markdown slice can't crash the send.
+            await api.sendMessage(tgChatId, inlineBody.slice(0, MAX_TG_CHUNK));
             try {
                 const { InputFile } = await import("grammy");
                 const buf = Buffer.from(body, "utf-8");
                 await api.sendDocument(tgChatId, new InputFile(buf, `${info.name}.md`));
             }
             catch (err) {
+                // Upload failed → the bounded inline body was already delivered
+                // above, so the user still has something honest (banner + capped
+                // text + marker). The marker slightly over-promises here (file
+                // didn't attach) but this is the rare failure path, not the
+                // normal one, and there is no silent data loss.
                 console.error(`[subagent-delivery] file upload failed:`, err);
-                await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
             }
             return OK;
         }
-        // Case 2: fits in a single message → banner + body joined
-        if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
-            await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
+        // Not truncated (body ≤ BODY_CAP) → unchanged passthrough.
+        // inlineBody === body here (capBody is a no-op), no marker, no file.
+        // Case A: fits in a single message → banner + body joined
+        if (inlineBody.length + banner.length + 2 <= MAX_TG_CHUNK) {
+            await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${inlineBody}`);
             return OK;
         }
-        // Case 3: medium output → banner as its own message, body chunked
+        // Case B: defensive — a ≤1800-char body still under-runs MAX_TG_CHUNK
+        // with the banner, but keep the banner-then-chunk fallback for
+        // safety against an unusually long banner.
         await sendWithMarkdownFallback(api, tgChatId, banner);
-        for (let i = 0; i < body.length; i += MAX_TG_CHUNK) {
+        for (let i = 0; i < inlineBody.length; i += MAX_TG_CHUNK) {
             // Body chunks are always sent as plain text — markdown across
             // arbitrary chunk boundaries would be inconsistent anyway.
-            await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
+            await api.sendMessage(tgChatId, inlineBody.slice(i, i + MAX_TG_CHUNK));
         }
         return OK;
     }
@@ -359,36 +428,45 @@ async function deliverViaRegistry(platform, info, result) {
     const chatId = info.parentChatId;
     const banner = buildBannerPlain(info, result);
     const body = result.output?.trim() || `(empty output)`;
+    // V56-T2 — same honest contract as the Telegram path. Whenever the
+    // cap truncates, the FULL uncapped `body` is attached as a .md file
+    // (if the adapter supports uploads) so the marker stays truthful and
+    // the complete output remains accessible.
+    const inlineBody = capBody(body);
     const NON_TG_CHUNK = 3800;
-    const FILE_THRESHOLD = 20_000;
     try {
-        // Very long output → file upload if supported, else truncated text
-        if (body.length > FILE_THRESHOLD) {
+        // Truncated → honest delivery: banner + bounded inline body (with
+        // the truthful "full output attached" marker) + the COMPLETE
+        // uncapped body as a .md file. Covers the whole truncated range
+        // (mid-size AND > the old 20k threshold) — no lossy inline-only
+        // range remains. If the adapter has no sendDocument or the upload
+        // fails, the bounded inline body still went out (honest, just no
+        // file) — no silent data loss.
+        if (isTruncated(body)) {
             await adapter.sendText(chatId, banner);
+            for (let i = 0; i < inlineBody.length; i += NON_TG_CHUNK) {
+                await adapter.sendText(chatId, inlineBody.slice(i, i + NON_TG_CHUNK));
+            }
             if (adapter.sendDocument) {
                 try {
                     await adapter.sendDocument(chatId, Buffer.from(body, "utf-8"), `${info.name}.md`);
-                    return;
                 }
                 catch (err) {
                     console.error(`[subagent-delivery] ${platform} file upload failed:`, err);
                 }
             }
-            // Fallback: chunked text if no file upload or upload failed
-            for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
-                await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
-            }
             return;
         }
-        // Fits in one message → combined
-        if (body.length + banner.length + 2 <= NON_TG_CHUNK) {
-            await adapter.sendText(chatId, `${banner}\n\n${body}`);
+        // Not truncated (body ≤ BODY_CAP) → unchanged passthrough.
+        // inlineBody === body here, no marker, no file.
+        if (inlineBody.length + banner.length + 2 <= NON_TG_CHUNK) {
+            await adapter.sendText(chatId, `${banner}\n\n${inlineBody}`);
             return;
         }
-        // Medium — banner first, then chunked body
+        // Defensive banner-then-chunk fallback (e.g. unusually long banner).
         await adapter.sendText(chatId, banner);
-        for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
-            await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
+        for (let i = 0; i < inlineBody.length; i += NON_TG_CHUNK) {
+            await adapter.sendText(chatId, inlineBody.slice(i, i + NON_TG_CHUNK));
         }
     }
     catch (err) {

package/dist/services/subagents.js CHANGED Viewed

@@ -288,7 +288,9 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             : os.homedir();
         const systemPrompt = `You are a sub-agent named "${resolvedName}". Complete the following task autonomously. Working directory: ${effectiveCwd}
-When done, return ONLY the final result/outcome, concisely. Do NOT narrate your intermediate steps, your reasoning, your tool calls, or a play-by-play of what you did — the orchestrator only needs the outcome (the answer, the report, the list, the artifact path), and on failure the error plus what was and wasn't done. No preamble, no "Here's what I did", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
+Do NOT send your own Telegram/chat/notification messages as a step, and do NOT use any tool or skill to message the user or post your progress — your final return value is the SOLE delivery path and the orchestrator delivers it for you. A self-sent message causes a duplicate the user sees twice.
+When done, return ONLY the final result/outcome itself, concisely — nothing else. Do NOT narrate, summarize, or recap your intermediate steps, your reasoning, your tool calls, your plan, or a play-by-play of what you did. The orchestrator needs ONLY the outcome (the answer, the report, the list, the artifact path); on failure, return the error plus exactly what was and wasn't done. No preamble, no meta-commentary, no "Here's what I did", no "I will now…", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
         // v4.12.2 — Map the toolset preset to an explicit allowedTools list.
         // The provider honors this override (see src/providers/claude-sdk-provider.ts
         // line ~140). Passing undefined = full access (provider default).
@@ -326,10 +328,22 @@ When done, return ONLY the final result/outcome, concisely. Do NOT narrate your
                 }
             }
             if (chunk.type === "done") {
-                // done.text is the authoritative final accumulated text from
-                // the provider. Prefer it over the buffered value so runs that
-                // end on a tool_use don't leave us with a pre-tool snippet.
-                if (chunk.text && chunk.text.length > 0) {
+                // V56-T1 — Prefer the SDK's authoritative FINAL result over the
+                // accumulated narration. The Claude Agent SDK emits a terminal
+                // `result` message whose single `result` field IS the agent's
+                // actual outcome; the provider surfaces it as `chunk.finalResult`.
+                // Using it here excludes the step-by-step narration BY
+                // CONSTRUCTION (it's a distinct SDK field, not a heuristic over
+                // concatenated text), matching what the detached-dispatch path
+                // already does. When the provider has no distinct final-result
+                // message (non-SDK providers, SDK error results), finalResult is
+                // undefined and we fall back to done.text — the previous
+                // authoritative-accumulated-text behaviour, so streamed-text
+                // consumers and the Fix #5 contract are unaffected.
+                if (typeof chunk.finalResult === "string" && chunk.finalResult.length > 0) {
+                    finalText = chunk.finalResult;
+                }
+                else if (chunk.text && chunk.text.length > 0) {
                     finalText = chunk.text;
                 }
                 inputTokens = chunk.inputTokens || 0;

package/dist/services/trends.js CHANGED Viewed

@@ -278,19 +278,66 @@ SUGGESTION: <one shell command OR observation for the operator>
 {SNAPSHOTS}
 --- END ---`;
 /**
- * Returns true if at least one snapshot in `snaps` has a non-zero
- * crashes_24h value, meaning a REAL crash (not an expected/deliberate
- * restart) was recorded on that day.
+ * V56 — Recent crash-evidence window.
+ *
+ * hasRealCrashEvidence keys the WARN-suppression gate on whether ANY
+ * persisted snapshot recorded a real crash. Snapshots persist for up to
+ * MAX_RETAIN_DAYS and the AI pass reads the last 30 (≈30 days at the 24h
+ * cadence). If the WHOLE 30-day history is considered, a history briefly
+ * poisoned by miscounted deliberate restarts (pre-v5.5.0 accounting bug,
+ * fixed in v5.5.0 for NEW snapshots but the bad lines persist ~30 days)
+ * keeps crash-evidence "true" — so the B2/B4 gate never suppresses and the
+ * false WARN fires for ~a month instead of self-healing.
+ *
+ * Restricting the evidence check to the most recent ~48h means: once
+ * v5.5.0's correct accounting produces clean recent snapshots
+ * (crashes_24h=0), the false WARN clears within ~a day — while a GENUINE
+ * crash loop (real crashes in the recent window) still returns true and
+ * the WARN still fires (the protective purpose is intact).
+ *
+ * 48h (not 24h) is chosen because the snapshot cadence is ~24h
+ * (DEFAULT_INTERVAL_HOURS): a 48h window reliably retains the last 1–2
+ * daily snapshots even across day-boundary jitter / a skipped cron tick,
+ * so a genuine recent crash loop is never missed, while crash evidence
+ * older than ~2 days (the poisoned history) ages out and self-heals. A
+ * timestamp window (not "last N snapshots") is used so self-healing keys
+ * on real wall-clock time and is robust to cadence changes / test-tuned
+ * ALVIN_TRENDS_INTERVAL_HOURS.
+ */
+export const RECENT_CRASH_WINDOW_MS = 48 * 60 * 60 * 1000;
+/**
+ * Returns true if at least one snapshot WITHIN THE RECENT WINDOW has a
+ * non-zero crashes_24h value, meaning a REAL crash (not an
+ * expected/deliberate restart) was recorded recently.
  *
  * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
  * /restart / /update) write the expectedRestart beacon flag and are NOT
- * counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
- * means the bot was only restarted intentionally — no real crash evidence.
+ * counted in dailyCrashCount. So crashes_24h === 0 across the recent
+ * snapshots means the bot was only restarted intentionally — no real
+ * crash evidence — even if OLDER snapshots were poisoned by the
+ * pre-v5.5.0 miscount (those age out of the window and the false WARN
+ * self-heals; see RECENT_CRASH_WINDOW_MS).
+ *
+ * Recency is determined from each snapshot's `ts` (ISO 8601, written by
+ * takeSnapshot via new Date().toISOString()). FAIL-SAFE: a snapshot whose
+ * `ts` is missing or unparseable is treated as in-window (counted) — a
+ * health monitor must fail toward "visible", never go blind on bad data.
  *
  * Pure function, exported for unit testing.
  */
-export function hasRealCrashEvidence(snaps) {
-    return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
+export function hasRealCrashEvidence(snaps, nowMs = Date.now()) {
+    const cutoff = nowMs - RECENT_CRASH_WINDOW_MS;
+    return snaps.some((s) => {
+        if (!(typeof s.crashes_24h === "number" && s.crashes_24h > 0))
+            return false;
+        // FAIL-SAFE: no/garbage ts → treat as recent (never silence on bad data).
+        if (typeof s.ts !== "string")
+            return true;
+        const t = Date.parse(s.ts);
+        if (!Number.isFinite(t))
+            return true;
+        return t >= cutoff;
+    });
 }
 /**
  * B2 — Returns true if AT LEAST ONE snapshot proves the bot process

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "alvin-bot",
-  "version": "5.5.0",
+  "version": "5.6.0",
   "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
   "type": "module",
   "main": "dist/index.js",