alvin-bot 5.5.0 → 5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,49 @@
2
2
 
3
3
  All notable changes to Alvin Bot are documented here.
4
4
 
5
+ ## [5.6.1] — 2026-05-18
6
+
7
+ ### Background-task results stay in the chat
8
+
9
+ Results from scheduled and background tasks now appear directly in
10
+ the chat as before. Only an output long enough to span more than two
11
+ messages comes as a single attached file instead — keeping your chat
12
+ tidy without ever splitting a result across a wall of messages. No
13
+ "shortened" notices on normal-sized results; you stay in control of
14
+ when something gets saved as a file.
15
+
16
+ As always, verified with a fresh-install + stress test on a clean
17
+ separate machine.
18
+
19
+ ## [5.6.0] — 2026-05-18
20
+
21
+ ### Background-task reports are now clean and to the point
22
+
23
+ When a scheduled or background task finishes, Alvin now sends you
24
+ just the result — a tight header (what ran, how long, tokens, success)
25
+ and the actual answer — instead of a wall of its working notes. If a
26
+ result is unusually long, the chat message stays short and the
27
+ complete output comes attached as a file, so you never lose anything
28
+ and never have to scroll through a transcript.
29
+
30
+ ### A clear confirmation when you stop something
31
+
32
+ Press ⛔ Stop (or use /cancel) while Alvin is genuinely working and
33
+ you now get a short, plain confirmation in your language that the work
34
+ was halted — not just a fleeting button flash. If nothing was running,
35
+ Alvin still tells you that honestly instead of pretending it stopped
36
+ something.
37
+
38
+ ### Health alerts that don't cry wolf
39
+
40
+ Alvin's self-monitoring now judges its health on recent activity, so a
41
+ one-off rough patch no longer keeps it flagging a problem for weeks. A
42
+ real issue still raises a flag promptly; a quiet, healthy bot stays
43
+ quiet.
44
+
45
+ As always, this shipped after a full multi-pass review and a
46
+ fresh-install + stress verification on a clean separate machine.
47
+
5
48
  ## [5.5.0] — 2026-05-18
6
49
 
7
50
  ### The ⛔ Stop button now responds instantly — and honestly
@@ -1918,6 +1918,10 @@ export function registerCommands(bot) {
1918
1918
  if (session.isProcessing) {
1919
1919
  requestStop(session, "soft", buildStopDeps(session));
1920
1920
  await ctx.reply(t("bot.cancel.cancelling", lang));
1921
+ // V56-T2c — a real stop fired: follow the "cancelling…" notice with a
1922
+ // brief confirmation that the work was actually halted (consistent UX
1923
+ // with the ⛔ button). Best-effort — must never throw into the handler.
1924
+ await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
1921
1925
  }
1922
1926
  else {
1923
1927
  await ctx.reply(t("bot.cancel.noRunning", lang));
@@ -1965,6 +1969,15 @@ export function registerCommands(bot) {
1965
1969
  await ctx.editMessageReplyMarkup({});
1966
1970
  }
1967
1971
  catch { /* harmless grammy race — message may already be gone */ }
1972
+ // V56-T2c — when a real stop genuinely fired (wasProcessing), also send a
1973
+ // short in-chat confirmation in the session language so the user gets a
1974
+ // persistent acknowledgement, not only the ephemeral toast. When nothing
1975
+ // was running we deliberately stay silent here (v5.5.0 honesty: the
1976
+ // alreadyDone toast already told the truth). Best-effort — must never
1977
+ // throw into the handler.
1978
+ if (wasProcessing) {
1979
+ await ctx.reply(t("bot.cancel.confirmed", lang)).catch(() => { });
1980
+ }
1968
1981
  });
1969
1982
  // /restart — trigger a PM2-managed restart by exiting the process.
1970
1983
  // The PM2 supervisor picks up the exit and respawns with --update-env.
package/dist/i18n.js CHANGED
@@ -384,6 +384,15 @@ const strings = {
384
384
  es: "Nada en curso — eso ya terminó.",
385
385
  fr: "Rien en cours — c'était déjà terminé.",
386
386
  },
387
+ // Sent as a brief in-chat confirmation only when a stop GENUINELY halted
388
+ // running work (⛔ button / /cancel with work actually in progress). Not
389
+ // sent when nothing was running — that honest behavior stays unchanged.
390
+ "bot.cancel.confirmed": {
391
+ en: "⛔ Stopped — further work was halted.",
392
+ de: "⛔ Gestoppt — die weitere Arbeit wurde angehalten.",
393
+ es: "⛔ Detenido — se interrumpió el trabajo en curso.",
394
+ fr: "⛔ Arrêté — le travail en cours a été interrompu.",
395
+ },
387
396
  // /model
388
397
  "bot.model.chooseHeader": {
389
398
  en: "🤖 *Choose model:*",
package/dist/paths.js CHANGED
@@ -19,8 +19,13 @@ export const DATA_DIR = resolve(process.env.ALVIN_DATA_DIR || resolve(os.homedir
19
19
  export const PUBLIC_DIR = resolve(BOT_ROOT, "web", "public");
20
20
  /** plugins/ — Plugin directory */
21
21
  export const PLUGINS_DIR = resolve(BOT_ROOT, "plugins");
22
- /** skills/ — Skill definitions */
23
- export const SKILLS_DIR = resolve(BOT_ROOT, "skills");
22
+ /** skills/ — Skill definitions.
23
+ * Defaults to BOT_ROOT/skills (repo). Override with ALVIN_SKILLS_DIR so
24
+ * tests can redirect skill writes into a throwaway sandbox instead of
25
+ * polluting the real repo. Default (no env) is byte-identical to before. */
26
+ export const SKILLS_DIR = process.env.ALVIN_SKILLS_DIR
27
+ ? resolve(process.env.ALVIN_SKILLS_DIR)
28
+ : resolve(BOT_ROOT, "skills");
24
29
  /** User skills directory (custom, outside repo) */
25
30
  export const USER_SKILLS_DIR = resolve(DATA_DIR, "skills");
26
31
  /** Example/template files (always in repo) */
@@ -446,9 +446,23 @@ export class ClaudeSDKProvider {
446
446
  sessionResetRequested: true,
447
447
  };
448
448
  }
449
+ // V56-T1 — Surface the SDK's authoritative final answer
450
+ // separately from the accumulated narration. SDKResultSuccess
451
+ // carries a single `result: string` that is the agent's actual
452
+ // outcome (NOT the concatenation of every assistant turn).
453
+ // SDKResultError has no `result` field — leave finalResult
454
+ // undefined there so consumers fall back to buffered text.
455
+ // This is the same source the detached-dispatch path already
456
+ // prefers (`{"type":"result"}.result` in async-agent-parser).
457
+ const finalResult = "subtype" in resultMsg &&
458
+ resultMsg.subtype === "success" &&
459
+ typeof resultMsg.result === "string"
460
+ ? resultMsg.result
461
+ : undefined;
449
462
  yield {
450
463
  type: "done",
451
464
  text: accumulatedText || "",
465
+ ...(finalResult !== undefined ? { finalResult } : {}),
452
466
  sessionId: resultMsg.session_id || capturedSessionId,
453
467
  costUsd: "total_cost_usd" in resultMsg ? resultMsg.total_cost_usd : 0,
454
468
  inputTokens: inputTok,
@@ -56,7 +56,39 @@ async function sendWithMarkdownFallback(api, chatId, text) {
56
56
  }
57
57
  }
58
58
  const MAX_TG_CHUNK = 3800; // below Telegram's 4096 limit with headroom
59
- const FILE_UPLOAD_THRESHOLD = 20_000; // switch to .md file upload above this
59
+ /**
60
+ * Post-v5.6.0 delivery routing — by message count, NOT by a truncating
61
+ * cap.
62
+ *
63
+ * v5.6.0 introduced an inline body cap (1800 chars + a
64
+ * "…(truncated for chat — full output attached)" marker) that ALWAYS
65
+ * attached the full body as a `.md` file whenever it truncated. The
66
+ * effect was that even a small ~4 KB result got truncated + filed,
67
+ * which the user disliked. That cap is removed entirely.
68
+ *
69
+ * V56-T1 ("deliver the final result, not the transcript") is kept — a
70
+ * normal final result is usually short and now simply appears inline
71
+ * like it did before v5.6.0.
72
+ *
73
+ * The body is routed by how many Telegram messages it would need
74
+ * (MAX_TG_CHUNK = 3800):
75
+ * - body ≤ 1×MAX_TG_CHUNK → ONE inline message
76
+ * - 1×MAX_TG_CHUNK < body ≤ 2× → inline across exactly 2
77
+ * messages (no marker, no file)
78
+ * - body > 2×MAX_TG_CHUNK (≥3 chunks)→ do NOT spam 3+ messages: send
79
+ * the compact header + ONE
80
+ * short neutral note + the FULL
81
+ * (uncapped, complete) body as a
82
+ * `.md` file attachment
83
+ *
84
+ * The `(empty output)` truncated-run signal (~14 chars) is tier-1, so
85
+ * it stays a single inline message with no note and no file.
86
+ *
87
+ * The file in the ≥3-chunk case is the COMPLETE body — nothing is cut,
88
+ * so the note must NOT say "truncated". It is a minimal neutral line.
89
+ */
90
+ const FILE_THRESHOLD = MAX_TG_CHUNK * 2; // > this ⇒ would need ≥3 messages
91
+ const FULL_RESULT_NOTE = "📎 Full result attached (too long for chat).";
60
92
  let injectedApi = null;
61
93
  let runtimeApi = null;
62
94
  /** Test-only hook for injecting a fake bot API. Production code must NEVER call this. */
@@ -302,26 +334,34 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
302
334
  const banner = buildBanner(info, result);
303
335
  const body = result.output?.trim() || `(empty output)`;
304
336
  try {
305
- // Case 1: very long output file upload with a short banner
306
- if (body.length > FILE_UPLOAD_THRESHOLD) {
337
+ // Tier 3: body would need ≥3 Telegram messages don't spam the
338
+ // chat. Send the compact header + ONE short neutral note + the FULL
339
+ // (uncapped, COMPLETE) body as a single `.md` file. Nothing is cut,
340
+ // so the note says nothing about truncation.
341
+ if (body.length > FILE_THRESHOLD) {
307
342
  await sendWithMarkdownFallback(api, tgChatId, banner);
343
+ await api.sendMessage(tgChatId, FULL_RESULT_NOTE);
308
344
  try {
309
345
  const { InputFile } = await import("grammy");
310
346
  const buf = Buffer.from(body, "utf-8");
311
347
  await api.sendDocument(tgChatId, new InputFile(buf, `${info.name}.md`));
312
348
  }
313
349
  catch (err) {
350
+ // Upload failed → the user still has the banner + the note, so
351
+ // they know a result exists and is large. Rare failure path,
352
+ // no silent data loss (nothing was promised inline).
314
353
  console.error(`[subagent-delivery] file upload failed:`, err);
315
- await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
316
354
  }
317
355
  return OK;
318
356
  }
319
- // Case 2: fits in a single message → banner + body joined
357
+ // Tier 1: body fits with the banner in a single message → join.
320
358
  if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
321
359
  await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
322
360
  return OK;
323
361
  }
324
- // Case 3: medium output banner as its own message, body chunked
362
+ // Tier 1/2: body alone needs 1 or 2 messages (≤ 2×MAX_TG_CHUNK).
363
+ // Send the banner, then the body chunked across at most 2 messages.
364
+ // No marker, no file — this is the pre-v5.6.0 inline behavior.
325
365
  await sendWithMarkdownFallback(api, tgChatId, banner);
326
366
  for (let i = 0; i < body.length; i += MAX_TG_CHUNK) {
327
367
  // Body chunks are always sent as plain text — markdown across
@@ -359,33 +399,33 @@ async function deliverViaRegistry(platform, info, result) {
359
399
  const chatId = info.parentChatId;
360
400
  const banner = buildBannerPlain(info, result);
361
401
  const body = result.output?.trim() || `(empty output)`;
362
- const NON_TG_CHUNK = 3800;
363
- const FILE_THRESHOLD = 20_000;
402
+ const NON_TG_CHUNK = MAX_TG_CHUNK; // same conservative 3800 cap
364
403
  try {
365
- // Very long output file upload if supported, else truncated text
404
+ // Tier 3: body would need ≥3 messages don't spam the channel.
405
+ // Send the banner + ONE short neutral note + the FULL (uncapped,
406
+ // COMPLETE) body as a `.md` file (if the adapter supports uploads).
407
+ // Mirrors the Telegram path exactly. No truncation — the file is
408
+ // the complete result.
366
409
  if (body.length > FILE_THRESHOLD) {
367
410
  await adapter.sendText(chatId, banner);
411
+ await adapter.sendText(chatId, FULL_RESULT_NOTE);
368
412
  if (adapter.sendDocument) {
369
413
  try {
370
414
  await adapter.sendDocument(chatId, Buffer.from(body, "utf-8"), `${info.name}.md`);
371
- return;
372
415
  }
373
416
  catch (err) {
374
417
  console.error(`[subagent-delivery] ${platform} file upload failed:`, err);
375
418
  }
376
419
  }
377
- // Fallback: chunked text if no file upload or upload failed
378
- for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
379
- await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
380
- }
381
420
  return;
382
421
  }
383
- // Fits in one message → combined
422
+ // Tier 1: body + banner fit in one message → join.
384
423
  if (body.length + banner.length + 2 <= NON_TG_CHUNK) {
385
424
  await adapter.sendText(chatId, `${banner}\n\n${body}`);
386
425
  return;
387
426
  }
388
- // Medium banner first, then chunked body
427
+ // Tier 1/2: banner, then body chunked across at most 2 messages.
428
+ // No marker, no file.
389
429
  await adapter.sendText(chatId, banner);
390
430
  for (let i = 0; i < body.length; i += NON_TG_CHUNK) {
391
431
  await adapter.sendText(chatId, body.slice(i, i + NON_TG_CHUNK));
@@ -288,7 +288,9 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
288
288
  : os.homedir();
289
289
  const systemPrompt = `You are a sub-agent named "${resolvedName}". Complete the following task autonomously. Working directory: ${effectiveCwd}
290
290
 
291
- When done, return ONLY the final result/outcome, concisely. Do NOT narrate your intermediate steps, your reasoning, your tool calls, or a play-by-play of what you did the orchestrator only needs the outcome (the answer, the report, the list, the artifact path), and on failure the error plus what was and wasn't done. No preamble, no "Here's what I did", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
291
+ Do NOT send your own Telegram/chat/notification messages as a step, and do NOT use any tool or skill to message the user or post your progress your final return value is the SOLE delivery path and the orchestrator delivers it for you. A self-sent message causes a duplicate the user sees twice.
292
+
293
+ When done, return ONLY the final result/outcome itself, concisely — nothing else. Do NOT narrate, summarize, or recap your intermediate steps, your reasoning, your tool calls, your plan, or a play-by-play of what you did. The orchestrator needs ONLY the outcome (the answer, the report, the list, the artifact path); on failure, return the error plus exactly what was and wasn't done. No preamble, no meta-commentary, no "Here's what I did", no "I will now…", no step-by-step recap. Run status, duration and token usage are reported separately, so don't restate them.`;
292
294
  // v4.12.2 — Map the toolset preset to an explicit allowedTools list.
293
295
  // The provider honors this override (see src/providers/claude-sdk-provider.ts
294
296
  // line ~140). Passing undefined = full access (provider default).
@@ -326,10 +328,22 @@ When done, return ONLY the final result/outcome, concisely. Do NOT narrate your
326
328
  }
327
329
  }
328
330
  if (chunk.type === "done") {
329
- // done.text is the authoritative final accumulated text from
330
- // the provider. Prefer it over the buffered value so runs that
331
- // end on a tool_use don't leave us with a pre-tool snippet.
332
- if (chunk.text && chunk.text.length > 0) {
331
+ // V56-T1 Prefer the SDK's authoritative FINAL result over the
332
+ // accumulated narration. The Claude Agent SDK emits a terminal
333
+ // `result` message whose single `result` field IS the agent's
334
+ // actual outcome; the provider surfaces it as `chunk.finalResult`.
335
+ // Using it here excludes the step-by-step narration BY
336
+ // CONSTRUCTION (it's a distinct SDK field, not a heuristic over
337
+ // concatenated text), matching what the detached-dispatch path
338
+ // already does. When the provider has no distinct final-result
339
+ // message (non-SDK providers, SDK error results), finalResult is
340
+ // undefined and we fall back to done.text — the previous
341
+ // authoritative-accumulated-text behaviour, so streamed-text
342
+ // consumers and the Fix #5 contract are unaffected.
343
+ if (typeof chunk.finalResult === "string" && chunk.finalResult.length > 0) {
344
+ finalText = chunk.finalResult;
345
+ }
346
+ else if (chunk.text && chunk.text.length > 0) {
333
347
  finalText = chunk.text;
334
348
  }
335
349
  inputTokens = chunk.inputTokens || 0;
@@ -278,19 +278,66 @@ SUGGESTION: <one shell command OR observation for the operator>
278
278
  {SNAPSHOTS}
279
279
  --- END ---`;
280
280
  /**
281
- * Returns true if at least one snapshot in `snaps` has a non-zero
282
- * crashes_24h value, meaning a REAL crash (not an expected/deliberate
283
- * restart) was recorded on that day.
281
+ * V56 Recent crash-evidence window.
282
+ *
283
+ * hasRealCrashEvidence keys the WARN-suppression gate on whether ANY
284
+ * persisted snapshot recorded a real crash. Snapshots persist for up to
285
+ * MAX_RETAIN_DAYS and the AI pass reads the last 30 (≈30 days at the 24h
286
+ * cadence). If the WHOLE 30-day history is considered, a history briefly
287
+ * poisoned by miscounted deliberate restarts (pre-v5.5.0 accounting bug,
288
+ * fixed in v5.5.0 for NEW snapshots but the bad lines persist ~30 days)
289
+ * keeps crash-evidence "true" — so the B2/B4 gate never suppresses and the
290
+ * false WARN fires for ~a month instead of self-healing.
291
+ *
292
+ * Restricting the evidence check to the most recent ~48h means: once
293
+ * v5.5.0's correct accounting produces clean recent snapshots
294
+ * (crashes_24h=0), the false WARN clears within ~a day — while a GENUINE
295
+ * crash loop (real crashes in the recent window) still returns true and
296
+ * the WARN still fires (the protective purpose is intact).
297
+ *
298
+ * 48h (not 24h) is chosen because the snapshot cadence is ~24h
299
+ * (DEFAULT_INTERVAL_HOURS): a 48h window reliably retains the last 1–2
300
+ * daily snapshots even across day-boundary jitter / a skipped cron tick,
301
+ * so a genuine recent crash loop is never missed, while crash evidence
302
+ * older than ~2 days (the poisoned history) ages out and self-heals. A
303
+ * timestamp window (not "last N snapshots") is used so self-healing keys
304
+ * on real wall-clock time and is robust to cadence changes / test-tuned
305
+ * ALVIN_TRENDS_INTERVAL_HOURS.
306
+ */
307
+ export const RECENT_CRASH_WINDOW_MS = 48 * 60 * 60 * 1000;
308
+ /**
309
+ * Returns true if at least one snapshot WITHIN THE RECENT WINDOW has a
310
+ * non-zero crashes_24h value, meaning a REAL crash (not an
311
+ * expected/deliberate restart) was recorded recently.
284
312
  *
285
313
  * After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
286
314
  * /restart / /update) write the expectedRestart beacon flag and are NOT
287
- * counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
288
- * means the bot was only restarted intentionally — no real crash evidence.
315
+ * counted in dailyCrashCount. So crashes_24h === 0 across the recent
316
+ * snapshots means the bot was only restarted intentionally — no real
317
+ * crash evidence — even if OLDER snapshots were poisoned by the
318
+ * pre-v5.5.0 miscount (those age out of the window and the false WARN
319
+ * self-heals; see RECENT_CRASH_WINDOW_MS).
320
+ *
321
+ * Recency is determined from each snapshot's `ts` (ISO 8601, written by
322
+ * takeSnapshot via new Date().toISOString()). FAIL-SAFE: a snapshot whose
323
+ * `ts` is missing or unparseable is treated as in-window (counted) — a
324
+ * health monitor must fail toward "visible", never go blind on bad data.
289
325
  *
290
326
  * Pure function, exported for unit testing.
291
327
  */
292
- export function hasRealCrashEvidence(snaps) {
293
- return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
328
+ export function hasRealCrashEvidence(snaps, nowMs = Date.now()) {
329
+ const cutoff = nowMs - RECENT_CRASH_WINDOW_MS;
330
+ return snaps.some((s) => {
331
+ if (!(typeof s.crashes_24h === "number" && s.crashes_24h > 0))
332
+ return false;
333
+ // FAIL-SAFE: no/garbage ts → treat as recent (never silence on bad data).
334
+ if (typeof s.ts !== "string")
335
+ return true;
336
+ const t = Date.parse(s.ts);
337
+ if (!Number.isFinite(t))
338
+ return true;
339
+ return t >= cutoff;
340
+ });
294
341
  }
295
342
  /**
296
343
  * B2 — Returns true if AT LEAST ONE snapshot proves the bot process
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "5.5.0",
3
+ "version": "5.6.1",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",