npm - alvin-bot - Versions diffs - 5.5.0 → 5.6.1 - Mend

alvin-bot 5.5.0 → 5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +43 -0
package/dist/handlers/commands.js +13 -0
package/dist/i18n.js +9 -0
package/dist/paths.js +7 -2
package/dist/providers/claude-sdk-provider.js +14 -0
package/dist/services/subagent-delivery.js +56 -16
package/dist/services/subagents.js +19 -5
package/dist/services/trends.js +54 -7
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,49 @@
 All notable changes to Alvin Bot are documented here.
+## [5.6.1] — 2026-05-18
+### Background-task results stay in the chat
+Results from scheduled and background tasks now appear directly in
+the chat as before. Only an output long enough to span more than two
+messages comes as a single attached file instead — keeping your chat
+tidy without ever splitting a result across a wall of messages. No
+"shortened" notices on normal-sized results; you stay in control of
+when something gets saved as a file.
+As always, verified with a fresh-install + stress test on a clean
+separate machine.
+## [5.6.0] — 2026-05-18
+### Background-task reports are now clean and to the point
+When a scheduled or background task finishes, Alvin now sends you
+just the result — a tight header (what ran, how long, tokens, success)
+and the actual answer — instead of a wall of its working notes. If a
+result is unusually long, the chat message stays short and the
+complete output comes attached as a file, so you never lose anything
+and never have to scroll through a transcript.
+### A clear confirmation when you stop something
+Press ⛔ Stop (or use /cancel) while Alvin is genuinely working and
+you now get a short, plain confirmation in your language that the work
+was halted — not just a fleeting button flash. If nothing was running,
+Alvin still tells you that honestly instead of pretending it stopped
+something.
+### Health alerts that don't cry wolf
+Alvin's self-monitoring now judges its health on recent activity, so a
+one-off rough patch no longer keeps it flagging a problem for weeks. A
+real issue still raises a flag promptly; a quiet, healthy bot stays
+quiet.
+As always, this shipped after a full multi-pass review and a
+fresh-install + stress verification on a clean separate machine.
 ## [5.5.0] — 2026-05-18
 ### The ⛔ Stop button now responds instantly — and honestly

package/dist/handlers/commands.js CHANGED Viewed

@@ -1918,6 +1918,10 @@ export function registerCommands(bot) {
         if (session.isProcessing) {
             requestStop(session, "soft", buildStopDeps(session));
             await ctx.reply(t("bot.cancel.cancelling", lang));
+            // V56-T2c — a real stop fired: follow the "cancelling…" notice with a
+            // brief confirmation that the work was actually halted (consistent UX
+            // with the ⛔ button). Best-effort — must never throw into the handler.
+            await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
         }
         else {
             await ctx.reply(t("bot.cancel.noRunning", lang));
@@ -1965,6 +1969,15 @@ export function registerCommands(bot) {
             await ctx.editMessageReplyMarkup({});
         }
         catch { /* harmless grammy race — message may already be gone */ }
+        // V56-T2c — when a real stop genuinely fired (wasProcessing), also send a
+        // short in-chat confirmation in the session language so the user gets a
+        // persistent acknowledgement, not only the ephemeral toast. When nothing
+        // was running we deliberately stay silent here (v5.5.0 honesty: the
+        // alreadyDone toast already told the truth). Best-effort — must never
+        // throw into the handler.
+        if (wasProcessing) {
+            await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
+        }
     });
     // /restart — trigger a PM2-managed restart by exiting the process.
     // The PM2 supervisor picks up the exit and respawns with --update-env.

package/dist/i18n.js CHANGED Viewed

@@ -384,6 +384,15 @@ const strings = {
         es: "Nada en curso — eso ya terminó.",
         fr: "Rien en cours — c'était déjà terminé.",
     },
+    // Sent as a brief in-chat confirmation only when a stop GENUINELY halted
+    // running work (⛔ button / /cancel with work actually in progress). Not
+    // sent when nothing was running — that honest behavior stays unchanged.
+    "bot.cancel.confirmed": {
+        en: "⛔ Stopped — further work was halted.",
+        de: "⛔ Gestoppt — die weitere Arbeit wurde angehalten.",
+        es: "⛔ Detenido — se interrumpió el trabajo en curso.",
+        fr: "⛔ Arrêté — le travail en cours a été interrompu.",
+    },
     // /model
     "bot.model.chooseHeader": {
         en: "🤖 *Choose model:*",

package/dist/paths.js CHANGED Viewed

@@ -19,8 +19,13 @@ export const DATA_DIR = resolve(process.env.ALVIN_DATA_DIR || resolve(os.homedir
 export const PUBLIC_DIR = resolve(BOT_ROOT, "web", "public");
 /** plugins/ — Plugin directory */
 export const PLUGINS_DIR = resolve(BOT_ROOT, "plugins");
-/** skills/ — Skill definitions */
-export const SKILLS_DIR = resolve(BOT_ROOT, "skills");
+/** skills/ — Skill definitions.
+ *  Defaults to BOT_ROOT/skills (repo). Override with ALVIN_SKILLS_DIR so
+ *  tests can redirect skill writes into a throwaway sandbox instead of
+ *  polluting the real repo. Default (no env) is byte-identical to before. */
+export const SKILLS_DIR = process.env.ALVIN_SKILLS_DIR
+    ? resolve(process.env.ALVIN_SKILLS_DIR)
+    : resolve(BOT_ROOT, "skills");
 /** User skills directory (custom, outside repo) */
 export const USER_SKILLS_DIR = resolve(DATA_DIR, "skills");
 /** Example/template files (always in repo) */

package/dist/providers/claude-sdk-provider.js CHANGED Viewed

@@ -446,9 +446,23 @@ export class ClaudeSDKProvider {
                             sessionResetRequested: true,
                         };
                     }
+                    // V56-T1 — Surface the SDK's authoritative final answer
+                    // separately from the accumulated narration. SDKResultSuccess
+                    // carries a single `result: string` that is the agent's actual
+                    // outcome (NOT the concatenation of every assistant turn).
+                    // SDKResultError has no `result` field — leave finalResult
+                    // undefined there so consumers fall back to buffered text.
+                    // This is the same source the detached-dispatch path already
+                    // prefers (`{"type":"result"}.result` in async-agent-parser).
+                    const finalResult = "subtype" in resultMsg &&
+                        resultMsg.subtype === "success" &&
+                        typeof resultMsg.result === "string"
+                        ? resultMsg.result
+                        : undefined;
                     yield {
                         type: "done",
                         text: accumulatedText || "",
+                        ...(finalResult !== undefined ? { finalResult } : {}),
                         sessionId: resultMsg.session_id || capturedSessionId,
                         costUsd: "total_cost_usd" in resultMsg ? resultMsg.total_cost_usd : 0,
                         inputTokens: inputTok,

package/dist/services/subagent-delivery.js CHANGED Viewed

@@ -56,7 +56,39 @@ async function sendWithMarkdownFallback(api, chatId, text) {
     }
 }
 const MAX_TG_CHUNK = 3800; // below Telegram's 4096 limit with headroom
-const FILE_UPLOAD_THRESHOLD = 20_000; // switch to .md file upload above this
+/**
+ * Post-v5.6.0 delivery routing — by message count, NOT by a truncating
+ * cap.
+ *
+ * v5.6.0 introduced an inline body cap (1800 chars + a
+ * "…(truncated for chat — full output attached)" marker) that ALWAYS
+ * attached the full body as a `.md` file whenever it truncated. The
+ * effect was that even a small ~4 KB result got truncated + filed,
+ * which the user disliked. That cap is removed entirely.
+ *
+ * V56-T1 ("deliver the final result, not the transcript") is kept — a
+ * normal final result is usually short and now simply appears inline
+ * like it did before v5.6.0.
+ *
+ * The body is routed by how many Telegram messages it would need
+ * (MAX_TG_CHUNK = 3800):
+ *   - body ≤ 1×MAX_TG_CHUNK            → ONE inline message
+ *   - 1×MAX_TG_CHUNK < body ≤ 2×       → inline across exactly 2
+ *                                         messages (no marker, no file)
+ *   - body > 2×MAX_TG_CHUNK (≥3 chunks)→ do NOT spam 3+ messages: send
+ *                                         the compact header + ONE
+ *                                         short neutral note + the FULL
+ *                                         (uncapped, complete) body as a
+ *                                         `.md` file attachment
+ *
+ * The `(empty output)` truncated-run signal (~14 chars) is tier-1, so
+ * it stays a single inline message with no note and no file.
+ *
+ * The file in the ≥3-chunk case is the COMPLETE body — nothing is cut,
+ * so the note must NOT say "truncated". It is a minimal neutral line.
+ */
+const FILE_THRESHOLD = MAX_TG_CHUNK * 2; // > this ⇒ would need ≥3 messages
+const FULL_RESULT_NOTE = "📎 Full result attached (too long for chat).";
 let injectedApi = null;
 let runtimeApi = null;
 /** Test-only hook for injecting a fake bot API. Production code must NEVER call this. */
@@ -302,26 +334,34 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
     const banner = buildBanner(info, result);
     const body = result.output?.trim() || `(empty output)`;
     try {
-        // Case 1: very long output → file upload with a short banner
-        if (body.length > FILE_UPLOAD_THRESHOLD) {
+        // Tier 3: body would need ≥3 Telegram messages → don't spam the
+        // chat. Send the compact header + ONE short neutral note + the FULL
+        // (uncapped, COMPLETE) body as a single `.md` file. Nothing is cut,
+        // so the note says nothing about truncation.
+        if (body.length > FILE_THRESHOLD) {
             await sendWithMarkdownFallback(api, tgChatId, banner);
+            await api.sendMessage(tgChatId, FULL_RESULT_NOTE);
             try {
                 const { InputFile } = await import("grammy");
                 const buf = Buffer.from(body, "utf-8");
                 await api.sendDocument(tgChatId, new InputFile(buf, `${info.name}.md`));
             }
             catch (err) {
+                // Upload failed → the user still has the banner + the note, so
+                // they know a result exists and is large. Rare failure path,
+                // no silent data loss (nothing was promised inline).
                 console.error(`[subagent-delivery] file upload failed:`, err);
-                await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
             }
             return OK;
         }
-        // Case 2: fits in a single message → banner + body joined
+        // Tier 1: body fits with the banner in a single message → join.
         if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
             await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
             return OK;
         }
-        // Case 3: medium output → banner as its own message, body chunked
+        // Tier 1/2: body alone needs 1 or 2 messages (≤ 2×MAX_TG_CHUNK).
+        // Send the banner, then the body chunked across at most 2 messages.
+        // No marker, no file — this is the pre-v5.6.0 inline behavior.
         await sendWithMarkdownFallback(api, tgChatId, banner);
         for (let i = 0; i < body.length; i += MAX_TG_CHUNK) {
             // Body chunks are always sent as plain text — markdown across
@@ -359,33 +399,33 @@ async function deliverViaRegistry(platform, info, result) {
     const chatId = info.parentChatId;
     const banner = buildBannerPlain(info, result);
     const body = result.output?.trim() || `(empty output)`;
-    const NON_TG_CHUNK = 3800;
-    const FILE_THRESHOLD = 20_000;
+    const NON_TG_CHUNK = MAX_TG_CHUNK; // same conservative 3800 cap
     try {
-        // Very long output → file upload if supported, else truncated text
+        // Tier 3: body would need ≥3 messages → don't spam the channel.
+        // Send the banner + ONE short neutral note + the FULL (uncapped,
+        // COMPLETE) body as a `.md` file (if the adapter supports uploads).
+        // Mirrors the Telegram path exactly. No truncation — the file is
+        // the complete result.
         if (body.length > FILE_THRESHOLD) {
             await adapter.sendText(chatId, banner);
+            await adapter.sendText(chatId, FULL_RESULT_NOTE);
             if (adapter.sendDocument) {
                 try {
                     await adapter.sendDocument(chatId, Buffer.from(body, "utf-8"), `${info.name}.md`);
-                    return;
                 }
                 catch (err) {
                     console.error(`[subagent-delivery] ${platform} file upload failed:`, err);
                 }
             }
-            // Fallback: chunked text if no file upload or upload failed
-            for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
-                await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
-            }
             return;
         }
-        // Fits in one message → combined
+        // Tier 1: body + banner fit in one message → join.
         if (body.length + banner.length + 2 <= NON_TG_CHUNK) {
             await adapter.sendText(chatId, `${banner}\n\n${body}`);
             return;
         }
-        // Medium — banner first, then chunked body
+        // Tier 1/2: banner, then body chunked across at most 2 messages.
+        // No marker, no file.
         await adapter.sendText(chatId, banner);
         for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
             await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));

package/dist/services/subagents.js CHANGED Viewed

@@ -288,7 +288,9 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             : os.homedir();
         const systemPrompt = `You are a sub-agent named "${resolvedName}". Complete the following task autonomously. Working directory: ${effectiveCwd}
-When done, return ONLY the final result/outcome, concisely. Do NOT narrate your intermediate steps, your reasoning, your tool calls, or a play-by-play of what you did — the orchestrator only needs the outcome (the answer, the report, the list, the artifact path), and on failure the error plus what was and wasn't done. No preamble, no "Here's what I did", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
+Do NOT send your own Telegram/chat/notification messages as a step, and do NOT use any tool or skill to message the user or post your progress — your final return value is the SOLE delivery path and the orchestrator delivers it for you. A self-sent message causes a duplicate the user sees twice.
+When done, return ONLY the final result/outcome itself, concisely — nothing else. Do NOT narrate, summarize, or recap your intermediate steps, your reasoning, your tool calls, your plan, or a play-by-play of what you did. The orchestrator needs ONLY the outcome (the answer, the report, the list, the artifact path); on failure, return the error plus exactly what was and wasn't done. No preamble, no meta-commentary, no "Here's what I did", no "I will now…", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
         // v4.12.2 — Map the toolset preset to an explicit allowedTools list.
         // The provider honors this override (see src/providers/claude-sdk-provider.ts
         // line ~140). Passing undefined = full access (provider default).
@@ -326,10 +328,22 @@ When done, return ONLY the final result/outcome, concisely. Do NOT narrate your
                 }
             }
             if (chunk.type === "done") {
-                // done.text is the authoritative final accumulated text from
-                // the provider. Prefer it over the buffered value so runs that
-                // end on a tool_use don't leave us with a pre-tool snippet.
-                if (chunk.text && chunk.text.length > 0) {
+                // V56-T1 — Prefer the SDK's authoritative FINAL result over the
+                // accumulated narration. The Claude Agent SDK emits a terminal
+                // `result` message whose single `result` field IS the agent's
+                // actual outcome; the provider surfaces it as `chunk.finalResult`.
+                // Using it here excludes the step-by-step narration BY
+                // CONSTRUCTION (it's a distinct SDK field, not a heuristic over
+                // concatenated text), matching what the detached-dispatch path
+                // already does. When the provider has no distinct final-result
+                // message (non-SDK providers, SDK error results), finalResult is
+                // undefined and we fall back to done.text — the previous
+                // authoritative-accumulated-text behaviour, so streamed-text
+                // consumers and the Fix #5 contract are unaffected.
+                if (typeof chunk.finalResult === "string" && chunk.finalResult.length > 0) {
+                    finalText = chunk.finalResult;
+                }
+                else if (chunk.text && chunk.text.length > 0) {
                     finalText = chunk.text;
                 }
                 inputTokens = chunk.inputTokens || 0;

package/dist/services/trends.js CHANGED Viewed

@@ -278,19 +278,66 @@ SUGGESTION: <one shell command OR observation for the operator>
 {SNAPSHOTS}
 --- END ---`;
 /**
- * Returns true if at least one snapshot in `snaps` has a non-zero
- * crashes_24h value, meaning a REAL crash (not an expected/deliberate
- * restart) was recorded on that day.
+ * V56 — Recent crash-evidence window.
+ *
+ * hasRealCrashEvidence keys the WARN-suppression gate on whether ANY
+ * persisted snapshot recorded a real crash. Snapshots persist for up to
+ * MAX_RETAIN_DAYS and the AI pass reads the last 30 (≈30 days at the 24h
+ * cadence). If the WHOLE 30-day history is considered, a history briefly
+ * poisoned by miscounted deliberate restarts (pre-v5.5.0 accounting bug,
+ * fixed in v5.5.0 for NEW snapshots but the bad lines persist ~30 days)
+ * keeps crash-evidence "true" — so the B2/B4 gate never suppresses and the
+ * false WARN fires for ~a month instead of self-healing.
+ *
+ * Restricting the evidence check to the most recent ~48h means: once
+ * v5.5.0's correct accounting produces clean recent snapshots
+ * (crashes_24h=0), the false WARN clears within ~a day — while a GENUINE
+ * crash loop (real crashes in the recent window) still returns true and
+ * the WARN still fires (the protective purpose is intact).
+ *
+ * 48h (not 24h) is chosen because the snapshot cadence is ~24h
+ * (DEFAULT_INTERVAL_HOURS): a 48h window reliably retains the last 1–2
+ * daily snapshots even across day-boundary jitter / a skipped cron tick,
+ * so a genuine recent crash loop is never missed, while crash evidence
+ * older than ~2 days (the poisoned history) ages out and self-heals. A
+ * timestamp window (not "last N snapshots") is used so self-healing keys
+ * on real wall-clock time and is robust to cadence changes / test-tuned
+ * ALVIN_TRENDS_INTERVAL_HOURS.
+ */
+export const RECENT_CRASH_WINDOW_MS = 48 * 60 * 60 * 1000;
+/**
+ * Returns true if at least one snapshot WITHIN THE RECENT WINDOW has a
+ * non-zero crashes_24h value, meaning a REAL crash (not an
+ * expected/deliberate restart) was recorded recently.
  *
  * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
  * /restart / /update) write the expectedRestart beacon flag and are NOT
- * counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
- * means the bot was only restarted intentionally — no real crash evidence.
+ * counted in dailyCrashCount. So crashes_24h === 0 across the recent
+ * snapshots means the bot was only restarted intentionally — no real
+ * crash evidence — even if OLDER snapshots were poisoned by the
+ * pre-v5.5.0 miscount (those age out of the window and the false WARN
+ * self-heals; see RECENT_CRASH_WINDOW_MS).
+ *
+ * Recency is determined from each snapshot's `ts` (ISO 8601, written by
+ * takeSnapshot via new Date().toISOString()). FAIL-SAFE: a snapshot whose
+ * `ts` is missing or unparseable is treated as in-window (counted) — a
+ * health monitor must fail toward "visible", never go blind on bad data.
  *
  * Pure function, exported for unit testing.
  */
-export function hasRealCrashEvidence(snaps) {
-    return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
+export function hasRealCrashEvidence(snaps, nowMs = Date.now()) {
+    const cutoff = nowMs - RECENT_CRASH_WINDOW_MS;
+    return snaps.some((s) => {
+        if (!(typeof s.crashes_24h === "number" && s.crashes_24h > 0))
+            return false;
+        // FAIL-SAFE: no/garbage ts → treat as recent (never silence on bad data).
+        if (typeof s.ts !== "string")
+            return true;
+        const t = Date.parse(s.ts);
+        if (!Number.isFinite(t))
+            return true;
+        return t >= cutoff;
+    });
 }
 /**
  * B2 — Returns true if AT LEAST ONE snapshot proves the bot process

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "alvin-bot",
-  "version": "5.5.0",
+  "version": "5.6.1",
   "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
   "type": "module",
   "main": "dist/index.js",