alvin-bot 5.5.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,35 @@
2
2
 
3
3
  All notable changes to Alvin Bot are documented here.
4
4
 
5
+ ## [5.6.0] — 2026-05-18
6
+
7
+ ### Background-task reports are now clean and to the point
8
+
9
+ When a scheduled or background task finishes, Alvin now sends you
10
+ just the result — a tight header (what ran, how long, tokens, success)
11
+ and the actual answer — instead of a wall of its working notes. If a
12
+ result is unusually long, the chat message stays short and the
13
+ complete output comes attached as a file, so you never lose anything
14
+ and never have to scroll through a transcript.
15
+
16
+ ### A clear confirmation when you stop something
17
+
18
+ Press ⛔ Stop (or use /cancel) while Alvin is genuinely working and
19
+ you now get a short, plain confirmation in your language that the work
20
+ was halted — not just a fleeting button flash. If nothing was running,
21
+ Alvin still tells you that honestly instead of pretending it stopped
22
+ something.
23
+
24
+ ### Health alerts that don't cry wolf
25
+
26
+ Alvin's self-monitoring now judges its health on recent activity, so a
27
+ one-off rough patch no longer keeps it flagging a problem for weeks. A
28
+ real issue still raises a flag promptly; a quiet, healthy bot stays
29
+ quiet.
30
+
31
+ As always, this shipped after a full multi-pass review and a
32
+ fresh-install + stress verification on a clean separate machine.
33
+
5
34
  ## [5.5.0] — 2026-05-18
6
35
 
7
36
  ### The ⛔ Stop button now responds instantly — and honestly
@@ -1918,6 +1918,10 @@ export function registerCommands(bot) {
1918
1918
  if (session.isProcessing) {
1919
1919
  requestStop(session, "soft", buildStopDeps(session));
1920
1920
  await ctx.reply(t("bot.cancel.cancelling", lang));
1921
+ // V56-T2c — a real stop fired: follow the "cancelling…" notice with a
1922
+ // brief confirmation that the work was actually halted (consistent UX
1923
+ // with the ⛔ button). Best-effort — must never throw into the handler.
1924
+ await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
1921
1925
  }
1922
1926
  else {
1923
1927
  await ctx.reply(t("bot.cancel.noRunning", lang));
@@ -1965,6 +1969,15 @@ export function registerCommands(bot) {
1965
1969
  await ctx.editMessageReplyMarkup({});
1966
1970
  }
1967
1971
  catch { /* harmless grammy race — message may already be gone */ }
1972
+ // V56-T2c — when a real stop genuinely fired (wasProcessing), also send a
1973
+ // short in-chat confirmation in the session language so the user gets a
1974
+ // persistent acknowledgement, not only the ephemeral toast. When nothing
1975
+ // was running we deliberately stay silent here (v5.5.0 honesty: the
1976
+ // alreadyDone toast already told the truth). Best-effort — must never
1977
+ // throw into the handler.
1978
+ if (wasProcessing) {
1979
+ await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
1980
+ }
1968
1981
  });
1969
1982
  // /restart — trigger a PM2-managed restart by exiting the process.
1970
1983
  // The PM2 supervisor picks up the exit and respawns with --update-env.
package/dist/i18n.js CHANGED
@@ -384,6 +384,15 @@ const strings = {
384
384
  es: "Nada en curso — eso ya terminó.",
385
385
  fr: "Rien en cours — c'était déjà terminé.",
386
386
  },
387
+ // Sent as a brief in-chat confirmation only when a stop GENUINELY halted
388
+ // running work (⛔ button / /cancel with work actually in progress). Not
389
+ // sent when nothing was running — that honest behavior stays unchanged.
390
+ "bot.cancel.confirmed": {
391
+ en: "⛔ Stopped — further work was halted.",
392
+ de: "⛔ Gestoppt — die weitere Arbeit wurde angehalten.",
393
+ es: "⛔ Detenido — se interrumpió el trabajo en curso.",
394
+ fr: "⛔ Arrêté — le travail en cours a été interrompu.",
395
+ },
387
396
  // /model
388
397
  "bot.model.chooseHeader": {
389
398
  en: "🤖 *Choose model:*",
@@ -446,9 +446,23 @@ export class ClaudeSDKProvider {
446
446
  sessionResetRequested: true,
447
447
  };
448
448
  }
449
+ // V56-T1 — Surface the SDK's authoritative final answer
450
+ // separately from the accumulated narration. SDKResultSuccess
451
+ // carries a single `result: string` that is the agent's actual
452
+ // outcome (NOT the concatenation of every assistant turn).
453
+ // SDKResultError has no `result` field — leave finalResult
454
+ // undefined there so consumers fall back to buffered text.
455
+ // This is the same source the detached-dispatch path already
456
+ // prefers (`{"type":"result"}.result` in async-agent-parser).
457
+ const finalResult = "subtype" in resultMsg &&
458
+ resultMsg.subtype === "success" &&
459
+ typeof resultMsg.result === "string"
460
+ ? resultMsg.result
461
+ : undefined;
449
462
  yield {
450
463
  type: "done",
451
464
  text: accumulatedText || "",
465
+ ...(finalResult !== undefined ? { finalResult } : {}),
452
466
  sessionId: resultMsg.session_id || capturedSessionId,
453
467
  costUsd: "total_cost_usd" in resultMsg ? resultMsg.total_cost_usd : 0,
454
468
  inputTokens: inputTok,
@@ -56,7 +56,52 @@ async function sendWithMarkdownFallback(api, chatId, text) {
56
56
  }
57
57
  }
58
58
  const MAX_TG_CHUNK = 3800; // below Telegram's 4096 limit with headroom
59
- const FILE_UPLOAD_THRESHOLD = 20_000; // switch to .md file upload above this
59
+ // V56-T2 honesty fix the .md file attachment is no longer gated on a
60
+ // separate 20k threshold. It now triggers whenever the cap actually
61
+ // truncates (isTruncated → body.length > BODY_CAP), so every truncated
62
+ // delivery carries the full output as a file and the marker is honest.
63
+ // (The prior 20k-only behavior is fully subsumed by isTruncated.)
64
+ /**
65
+ * V56-T2 (Layer-2) — honest hard cap on the INLINE delivered body.
66
+ *
67
+ * V56-T1 made delivery carry the SDK final result instead of the whole
68
+ * transcript, but a final result can itself occasionally be very long.
69
+ * This bounds the inline-message body so a single agent answer can't
70
+ * flood the chat, while staying HONEST.
71
+ *
72
+ * Honesty contract (fixed after a review found a self-defeating
73
+ * regression): whenever `capBody` actually truncates — i.e. the body is
74
+ * non-empty AND longer than BODY_CAP — the delivery ALSO attaches the
75
+ * COMPLETE uncapped output as a `.md` file via the same upload
76
+ * mechanism the old >20000-char path already used. The marker
77
+ * therefore truthfully says the full output is *attached*, instead of
78
+ * the previous wording that pointed at a `~/.alvin-bot/logs/` file the
79
+ * cap path never actually wrote. Net effect: any truncated delivery =
80
+ * bounded inline message + full `.md` attachment; no lossy inline-only
81
+ * range remains. The old >20000 path is unchanged (it already attached
82
+ * the full body); this just extends "attach the full file" down to
83
+ * "whenever the cap truncated".
84
+ *
85
+ * This is a pure bounded slice + a fixed marker — NOT a structure-
86
+ * guessing heuristic. It no-ops on empty/whitespace so the
87
+ * `(empty output)` truncated-run signal keeps working (and no spurious
88
+ * file is attached for it).
89
+ */
90
+ const BODY_CAP = 1800;
91
+ const TRUNCATION_MARKER = "…(truncated for chat — full output attached)";
92
+ /**
93
+ * True when `capBody` would actually truncate this body — the single
94
+ * source of truth for "did we drop content, so the full output must be
95
+ * attached as a file". Mirrors the `length > BODY_CAP` test in capBody.
96
+ */
97
+ function isTruncated(body) {
98
+ return body.length > BODY_CAP;
99
+ }
100
+ function capBody(body) {
101
+ if (body.length <= BODY_CAP)
102
+ return body;
103
+ return `${body.slice(0, BODY_CAP)}\n\n${TRUNCATION_MARKER}`;
104
+ }
60
105
  let injectedApi = null;
61
106
  let runtimeApi = null;
62
107
  /** Test-only hook for injecting a fake bot API. Production code must NEVER call this. */
@@ -301,32 +346,56 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
301
346
  }
302
347
  const banner = buildBanner(info, result);
303
348
  const body = result.output?.trim() || `(empty output)`;
349
+ // V56-T2 — bounded variant for the INLINE message path. Whenever this
350
+ // actually truncates (isTruncated), the FULL uncapped `body` is also
351
+ // attached as a .md file below, so the cap never costs the user
352
+ // access to the complete result and the marker stays truthful.
353
+ const inlineBody = capBody(body);
304
354
  try {
305
- // Case 1: very long output file upload with a short banner
306
- if (body.length > FILE_UPLOAD_THRESHOLD) {
355
+ // Truncated honest delivery: short banner + bounded inline body
356
+ // (with the truthful "full output attached" marker) + the COMPLETE
357
+ // uncapped body as a .md file. This single branch covers the whole
358
+ // truncated range (mid-size AND the old > 20000-char range): there
359
+ // is no lossy inline-only range anymore. (The old >20000 behavior
360
+ // is unchanged — it already attached the full body; the change is
361
+ // that mid-size now also attaches it and the marker no longer
362
+ // points at a logs file that was never written.)
363
+ if (isTruncated(body)) {
307
364
  await sendWithMarkdownFallback(api, tgChatId, banner);
365
+ // The bounded inline body fits in one message (BODY_CAP=1800 plus
366
+ // the short marker is well under MAX_TG_CHUNK); send it as plain
367
+ // text so an unbalanced markdown slice can't crash the send.
368
+ await api.sendMessage(tgChatId, inlineBody.slice(0, MAX_TG_CHUNK));
308
369
  try {
309
370
  const { InputFile } = await import("grammy");
310
371
  const buf = Buffer.from(body, "utf-8");
311
372
  await api.sendDocument(tgChatId, new InputFile(buf, `${info.name}.md`));
312
373
  }
313
374
  catch (err) {
375
+ // Upload failed → the bounded inline body was already delivered
376
+ // above, so the user still has something honest (banner + capped
377
+ // text + marker). The marker slightly over-promises here (file
378
+ // didn't attach) but this is the rare failure path, not the
379
+ // normal one, and there is no silent data loss.
314
380
  console.error(`[subagent-delivery] file upload failed:`, err);
315
- await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
316
381
  }
317
382
  return OK;
318
383
  }
319
- // Case 2: fits in a single message banner + body joined
320
- if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
321
- await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
384
+ // Not truncated (body BODY_CAP)unchanged passthrough.
385
+ // inlineBody === body here (capBody is a no-op), no marker, no file.
386
+ // Case A: fits in a single message → banner + body joined
387
+ if (inlineBody.length + banner.length + 2 <= MAX_TG_CHUNK) {
388
+ await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${inlineBody}`);
322
389
  return OK;
323
390
  }
324
- // Case 3: medium output banner as its own message, body chunked
391
+ // Case B: defensive a ≤1800-char body still under-runs MAX_TG_CHUNK
392
+ // with the banner, but keep the banner-then-chunk fallback for
393
+ // safety against an unusually long banner.
325
394
  await sendWithMarkdownFallback(api, tgChatId, banner);
326
- for (let i = 0; i < body.length; i += MAX_TG_CHUNK) {
395
+ for (let i = 0; i < inlineBody.length; i += MAX_TG_CHUNK) {
327
396
  // Body chunks are always sent as plain text — markdown across
328
397
  // arbitrary chunk boundaries would be inconsistent anyway.
329
- await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
398
+ await api.sendMessage(tgChatId, inlineBody.slice(i, i + MAX_TG_CHUNK));
330
399
  }
331
400
  return OK;
332
401
  }
@@ -359,36 +428,45 @@ async function deliverViaRegistry(platform, info, result) {
359
428
  const chatId = info.parentChatId;
360
429
  const banner = buildBannerPlain(info, result);
361
430
  const body = result.output?.trim() || `(empty output)`;
431
+ // V56-T2 — same honest contract as the Telegram path. Whenever the
432
+ // cap truncates, the FULL uncapped `body` is attached as a .md file
433
+ // (if the adapter supports uploads) so the marker stays truthful and
434
+ // the complete output remains accessible.
435
+ const inlineBody = capBody(body);
362
436
  const NON_TG_CHUNK = 3800;
363
- const FILE_THRESHOLD = 20_000;
364
437
  try {
365
- // Very long output file upload if supported, else truncated text
366
- if (body.length > FILE_THRESHOLD) {
438
+ // Truncated honest delivery: banner + bounded inline body (with
439
+ // the truthful "full output attached" marker) + the COMPLETE
440
+ // uncapped body as a .md file. Covers the whole truncated range
441
+ // (mid-size AND > the old 20k threshold) — no lossy inline-only
442
+ // range remains. If the adapter has no sendDocument or the upload
443
+ // fails, the bounded inline body still went out (honest, just no
444
+ // file) — no silent data loss.
445
+ if (isTruncated(body)) {
367
446
  await adapter.sendText(chatId, banner);
447
+ for (let i = 0; i < inlineBody.length; i += NON_TG_CHUNK) {
448
+ await adapter.sendText(chatId, inlineBody.slice(i, i + NON_TG_CHUNK));
449
+ }
368
450
  if (adapter.sendDocument) {
369
451
  try {
370
452
  await adapter.sendDocument(chatId, Buffer.from(body, "utf-8"), `${info.name}.md`);
371
- return;
372
453
  }
373
454
  catch (err) {
374
455
  console.error(`[subagent-delivery] ${platform} file upload failed:`, err);
375
456
  }
376
457
  }
377
- // Fallback: chunked text if no file upload or upload failed
378
- for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
379
- await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
380
- }
381
458
  return;
382
459
  }
383
- // Fits in one messagecombined
384
- if (body.length + banner.length + 2 <= NON_TG_CHUNK) {
385
- await adapter.sendText(chatId, `${banner}\n\n${body}`);
460
+ // Not truncated (body BODY_CAP) unchanged passthrough.
461
+ // inlineBody === body here, no marker, no file.
462
+ if (inlineBody.length + banner.length + 2 <= NON_TG_CHUNK) {
463
+ await adapter.sendText(chatId, `${banner}\n\n${inlineBody}`);
386
464
  return;
387
465
  }
388
- // Medium banner first, then chunked body
466
+ // Defensive banner-then-chunk fallback (e.g. unusually long banner).
389
467
  await adapter.sendText(chatId, banner);
390
- for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
391
- await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
468
+ for (let i = 0; i < inlineBody.length; i += NON_TG_CHUNK) {
469
+ await adapter.sendText(chatId, inlineBody.slice(i, i + NON_TG_CHUNK));
392
470
  }
393
471
  }
394
472
  catch (err) {
@@ -288,7 +288,9 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
288
288
  : os.homedir();
289
289
  const systemPrompt = `You are a sub-agent named "${resolvedName}". Complete the following task autonomously. Working directory: ${effectiveCwd}
290
290
 
291
- When done, return ONLY the final result/outcome, concisely. Do NOT narrate your intermediate steps, your reasoning, your tool calls, or a play-by-play of what you did the orchestrator only needs the outcome (the answer, the report, the list, the artifact path), and on failure the error plus what was and wasn't done. No preamble, no "Here's what I did", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
291
+ Do NOT send your own Telegram/chat/notification messages as a step, and do NOT use any tool or skill to message the user or post your progress your final return value is the SOLE delivery path and the orchestrator delivers it for you. A self-sent message causes a duplicate the user sees twice.
292
+
293
+ When done, return ONLY the final result/outcome itself, concisely — nothing else. Do NOT narrate, summarize, or recap your intermediate steps, your reasoning, your tool calls, your plan, or a play-by-play of what you did. The orchestrator needs ONLY the outcome (the answer, the report, the list, the artifact path); on failure, return the error plus exactly what was and wasn't done. No preamble, no meta-commentary, no "Here's what I did", no "I will now…", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
292
294
  // v4.12.2 — Map the toolset preset to an explicit allowedTools list.
293
295
  // The provider honors this override (see src/providers/claude-sdk-provider.ts
294
296
  // line ~140). Passing undefined = full access (provider default).
@@ -326,10 +328,22 @@ When done, return ONLY the final result/outcome, concisely. Do NOT narrate your
326
328
  }
327
329
  }
328
330
  if (chunk.type === "done") {
329
- // done.text is the authoritative final accumulated text from
330
- // the provider. Prefer it over the buffered value so runs that
331
- // end on a tool_use don't leave us with a pre-tool snippet.
332
- if (chunk.text && chunk.text.length > 0) {
331
+ // V56-T1 Prefer the SDK's authoritative FINAL result over the
332
+ // accumulated narration. The Claude Agent SDK emits a terminal
333
+ // `result` message whose single `result` field IS the agent's
334
+ // actual outcome; the provider surfaces it as `chunk.finalResult`.
335
+ // Using it here excludes the step-by-step narration BY
336
+ // CONSTRUCTION (it's a distinct SDK field, not a heuristic over
337
+ // concatenated text), matching what the detached-dispatch path
338
+ // already does. When the provider has no distinct final-result
339
+ // message (non-SDK providers, SDK error results), finalResult is
340
+ // undefined and we fall back to done.text — the previous
341
+ // authoritative-accumulated-text behaviour, so streamed-text
342
+ // consumers and the Fix #5 contract are unaffected.
343
+ if (typeof chunk.finalResult === "string" && chunk.finalResult.length > 0) {
344
+ finalText = chunk.finalResult;
345
+ }
346
+ else if (chunk.text && chunk.text.length > 0) {
333
347
  finalText = chunk.text;
334
348
  }
335
349
  inputTokens = chunk.inputTokens || 0;
@@ -278,19 +278,66 @@ SUGGESTION: <one shell command OR observation for the operator>
278
278
  {SNAPSHOTS}
279
279
  --- END ---`;
280
280
  /**
281
- * Returns true if at least one snapshot in `snaps` has a non-zero
282
- * crashes_24h value, meaning a REAL crash (not an expected/deliberate
283
- * restart) was recorded on that day.
281
+ * V56 Recent crash-evidence window.
282
+ *
283
+ * hasRealCrashEvidence keys the WARN-suppression gate on whether ANY
284
+ * persisted snapshot recorded a real crash. Snapshots persist for up to
285
+ * MAX_RETAIN_DAYS and the AI pass reads the last 30 (≈30 days at the 24h
286
+ * cadence). If the WHOLE 30-day history is considered, a history briefly
287
+ * poisoned by miscounted deliberate restarts (pre-v5.5.0 accounting bug,
288
+ * fixed in v5.5.0 for NEW snapshots but the bad lines persist ~30 days)
289
+ * keeps crash-evidence "true" — so the B2/B4 gate never suppresses and the
290
+ * false WARN fires for ~a month instead of self-healing.
291
+ *
292
+ * Restricting the evidence check to the most recent ~48h means: once
293
+ * v5.5.0's correct accounting produces clean recent snapshots
294
+ * (crashes_24h=0), the false WARN clears within ~a day — while a GENUINE
295
+ * crash loop (real crashes in the recent window) still returns true and
296
+ * the WARN still fires (the protective purpose is intact).
297
+ *
298
+ * 48h (not 24h) is chosen because the snapshot cadence is ~24h
299
+ * (DEFAULT_INTERVAL_HOURS): a 48h window reliably retains the last 1–2
300
+ * daily snapshots even across day-boundary jitter / a skipped cron tick,
301
+ * so a genuine recent crash loop is never missed, while crash evidence
302
+ * older than ~2 days (the poisoned history) ages out and self-heals. A
303
+ * timestamp window (not "last N snapshots") is used so self-healing keys
304
+ * on real wall-clock time and is robust to cadence changes / test-tuned
305
+ * ALVIN_TRENDS_INTERVAL_HOURS.
306
+ */
307
+ export const RECENT_CRASH_WINDOW_MS = 48 * 60 * 60 * 1000;
308
+ /**
309
+ * Returns true if at least one snapshot WITHIN THE RECENT WINDOW has a
310
+ * non-zero crashes_24h value, meaning a REAL crash (not an
311
+ * expected/deliberate restart) was recorded recently.
284
312
  *
285
313
  * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
286
314
  * /restart / /update) write the expectedRestart beacon flag and are NOT
287
- * counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
288
- * means the bot was only restarted intentionally — no real crash evidence.
315
+ * counted in dailyCrashCount. So crashes_24h === 0 across the recent
316
+ * snapshots means the bot was only restarted intentionally — no real
317
+ * crash evidence — even if OLDER snapshots were poisoned by the
318
+ * pre-v5.5.0 miscount (those age out of the window and the false WARN
319
+ * self-heals; see RECENT_CRASH_WINDOW_MS).
320
+ *
321
+ * Recency is determined from each snapshot's `ts` (ISO 8601, written by
322
+ * takeSnapshot via new Date().toISOString()). FAIL-SAFE: a snapshot whose
323
+ * `ts` is missing or unparseable is treated as in-window (counted) — a
324
+ * health monitor must fail toward "visible", never go blind on bad data.
289
325
  *
290
326
  * Pure function, exported for unit testing.
291
327
  */
292
- export function hasRealCrashEvidence(snaps) {
293
- return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
328
+ export function hasRealCrashEvidence(snaps, nowMs = Date.now()) {
329
+ const cutoff = nowMs - RECENT_CRASH_WINDOW_MS;
330
+ return snaps.some((s) => {
331
+ if (!(typeof s.crashes_24h === "number" && s.crashes_24h > 0))
332
+ return false;
333
+ // FAIL-SAFE: no/garbage ts → treat as recent (never silence on bad data).
334
+ if (typeof s.ts !== "string")
335
+ return true;
336
+ const t = Date.parse(s.ts);
337
+ if (!Number.isFinite(t))
338
+ return true;
339
+ return t >= cutoff;
340
+ });
294
341
  }
295
342
  /**
296
343
  * B2 — Returns true if AT LEAST ONE snapshot proves the bot process
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "5.5.0",
3
+ "version": "5.6.0",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",