alvin-bot 5.3.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -41,3 +41,103 @@ WEB_PORT=3100
41
41
 
42
42
  # === Custom Chrome (for WhatsApp, if not auto-detected) ===
43
43
  # CHROME_PATH=/usr/bin/google-chrome
44
+
45
+ # ===================================================================
46
+ # OPTIONAL — Security & Auth
47
+ # ===================================================================
48
+
49
+ # Auth mode for new users trying to talk to the bot.
50
+ # allowlist (default) — only ALLOWED_USERS can use the bot
51
+ # pairing — new users get a 6-digit pairing code; owner approves
52
+ # open — anyone can chat (for public bots)
53
+ # AUTH_MODE=allowlist
54
+
55
+ # Session isolation (how context is scoped):
56
+ # per-user (default) — each user gets their own session
57
+ # per-channel — everyone in the same channel shares a session
58
+ # per-channel-peer — per (channel, user) pair
59
+ # SESSION_MODE=per-user
60
+
61
+ # ===================================================================
62
+ # OPTIONAL — Text-to-Speech (TTS)
63
+ # ===================================================================
64
+
65
+ # TTS backend: "edge" (free, default) or "elevenlabs" (paid, higher quality)
66
+ # TTS_PROVIDER=edge
67
+
68
+ # ElevenLabs — set all three to use ElevenLabs TTS
69
+ # ELEVENLABS_API_KEY=
70
+ # ELEVENLABS_VOICE_ID=iP95p4xoKVk53GoZ742B
71
+ # ELEVENLABS_MODEL_ID=eleven_v3
72
+
73
+ # ===================================================================
74
+ # OPTIONAL — Webhooks
75
+ # ===================================================================
76
+
77
+ # Enable inbound webhook endpoint (POST /api/webhook) for external triggers
78
+ # WEBHOOK_ENABLED=false
79
+ # WEBHOOK_TOKEN=change-me-to-a-random-secret
80
+
81
+ # ===================================================================
82
+ # OPTIONAL — Sub-Agents & Compaction
83
+ # ===================================================================
84
+
85
+ # Maximum number of sub-agents that can run in parallel (default: 4)
86
+ # MAX_SUBAGENTS=4
87
+
88
+ # Sub-agent hard timeout in ms. -1 = unlimited (default: -1)
89
+ # SUBAGENT_TIMEOUT=-1
90
+
91
+ # Context compaction threshold in tokens (default: 80000)
92
+ # COMPACTION_THRESHOLD=80000
93
+
94
+ # ===================================================================
95
+ # OPTIONAL — Browser Automation
96
+ # ===================================================================
97
+
98
+ # Connect to an existing Chrome DevTools Protocol endpoint instead of
99
+ # launching a new browser instance.
100
+ # CDP_URL=ws://localhost:9222
101
+
102
+ # Port for the optional browser HTTP gateway (default: 3800)
103
+ # BROWSE_SERVER_PORT=3800
104
+
105
+ # ===================================================================
106
+ # OPTIONAL — Data Directory
107
+ # ===================================================================
108
+
109
+ # Override where alvin-bot stores its data (default: ~/.alvin-bot)
110
+ # ALVIN_DATA_DIR=/custom/path/to/data
111
+
112
+ # Live steering — inject follow-up instructions mid-generation (default: on)
113
+ # STEERING_ENABLED=true
114
+
115
+ # ===================================================================
116
+ # POWER / OWNER OPT-INS — unlock full capability
117
+ #
118
+ # These are safe-by-default for unconfigured installs. As the owner
119
+ # you can opt in to the full power mode for each feature.
120
+ # ===================================================================
121
+
122
+ # Shell & Python execution security:
123
+ # allowlist (default) — only a curated set of safe binaries (ls, cat, git,
124
+ # python3, node, etc.) can be executed by the bot
125
+ # full — unrestricted shell/Python — full agent power mode; set this
126
+ # when you want the bot to run arbitrary commands on your machine
127
+ # deny — block all exec/python tool calls (read-only agent)
128
+ # EXEC_SECURITY=allowlist
129
+
130
+ # Web UI host binding:
131
+ # 127.0.0.1 (default) — loopback only, not reachable from LAN or internet
132
+ # 0.0.0.0 — listen on all interfaces (expose to LAN/VPS/remote)
133
+ # If you set WEB_HOST=0.0.0.0 (or any non-loopback address), also set
134
+ # WEB_PASSWORD to protect the UI:
135
+ # WEB_HOST=127.0.0.1
136
+ # WEB_PASSWORD=your-strong-password
137
+
138
+ # Allow the bot to fetch localhost / LAN / internal URLs (SSRF guard):
139
+ # unset or 0 (default) — private IPs and loopback are blocked to prevent
140
+ # SSRF attacks from untrusted prompt content
141
+ # 1 — enable, so the bot can reach your local services, dev
142
+ # servers, and internal APIs (owner workflow on your own machine)
143
+ # ALLOW_PRIVATE_FETCH=0
package/CHANGELOG.md CHANGED
@@ -2,6 +2,71 @@
2
2
 
3
3
  All notable changes to Alvin Bot are documented here.
4
4
 
5
+ ## [5.5.0] — 2026-05-18
6
+
7
+ ### The ⛔ Stop button now responds instantly — and honestly
8
+
9
+ Stopping a task is now crisp and truthful. The moment a task finishes,
10
+ the Stop button disappears, so you're never tapping a control for
11
+ something that's already done. And the feedback always matches reality:
12
+ if you tap Stop while Alvin is genuinely working, it stops and says so;
13
+ if the task had already completed, Alvin tells you that plainly instead
14
+ of implying it cut something short. If you hit Stop in that brief moment
15
+ while an answer is being prepared, that answer is now held back — "I
16
+ stopped it" means nothing more arrives. Anything Alvin had already
17
+ shown you stays exactly as it was.
18
+
19
+ ### Fewer false alerts — smarter health monitoring
20
+
21
+ Alvin's self-monitoring got a lot more trustworthy. A planned restart
22
+ or an update is no longer mistaken for a problem, and the daily health
23
+ summary only raises a flag when there's real evidence something is
24
+ actually wrong — so the alerts you do get are ones worth reading.
25
+ Routine background housekeeping no longer shows up as noise.
26
+
27
+ As always, this shipped after a full multi-pass review and a
28
+ fresh-install + stress verification on a clean separate machine.
29
+
30
+ ## [5.4.0] — 2026-05-18
31
+
32
+ ### Smoother background tasks — and Alvin always tells you the truth
33
+
34
+ When you ask Alvin to go off and do something longer — research, a
35
+ multi-step job — it now reliably hands control straight back to you so
36
+ you can keep chatting while it works, then delivers the result as its
37
+ own message. And if a task does need to run inline for a moment,
38
+ Alvin says so honestly instead of implying you're free when you're
39
+ not. Talking to Alvin now feels exactly like working with a colleague
40
+ who's already on it: you're never left waiting or guessing.
41
+
42
+ ### Safer out of the box — with your full power one setting away
43
+
44
+ Alvin now ships with sensible, safe defaults so a fresh install is
45
+ solid for everyone, including people who just want to try it quickly.
46
+ Nothing about Alvin's capabilities has been taken away: if you want
47
+ the full, unrestricted superadmin experience it's a single documented
48
+ setting — your machine, your rules, your call. The new `.env.example`
49
+ spells out every option, including the "power" switches, in plain
50
+ language. You stay completely in control.
51
+
52
+ ### Reliability & robustness across the board
53
+
54
+ A broad pass to make Alvin steadier on long-running setups: no more
55
+ duplicate messages under load, cleaner interplay between stopping,
56
+ steering and background work, more accurate scheduling for custom
57
+ cron expressions, and tighter handling of edge cases throughout.
58
+ Verified end-to-end with a stress test on a clean separate machine.
59
+
60
+ ### A leaner, tidier install
61
+
62
+ Roughly 20 MB lighter to install, a calmer first-run experience
63
+ (optional features that aren't configured no longer look like
64
+ errors), better behavior on Windows and for non-German voice notes,
65
+ and a zero-config friendly default so a minimal setup just works.
66
+
67
+ As always, this shipped only after a full multi-pass review and a
68
+ fresh-install + stress verification on a clean second machine.
69
+
5
70
  ## [5.3.0] — 2026-05-18
6
71
 
7
72
  ### Talk to Alvin while it's working — no more interrupting yourself
@@ -429,8 +494,8 @@ A maintainer's local Mac that had been running alvin-bot under PM2 *before* the
429
494
  ```bash
430
495
  pm2 delete polyseus # any other PM2 entries
431
496
  pm2 save --force # empty dump
432
- launchctl unload ~/Library/LaunchAgents/pm2.alvin_de.plist 2>/dev/null
433
- rm -f ~/Library/LaunchAgents/pm2.alvin_de.plist
497
+ launchctl unload ~/Library/LaunchAgents/pm2.youruser.plist 2>/dev/null
498
+ rm -f ~/Library/LaunchAgents/pm2.youruser.plist
434
499
  pm2 kill
435
500
  npm uninstall -g pm2
436
501
  rm -rf ~/.pm2
@@ -2332,7 +2397,7 @@ Example:
2332
2397
  🤖 Alvin Bot v4.8.3
2333
2398
  Node v25.9.0 · darwin/arm64
2334
2399
 
2335
- 📁 Data dir: /Users/alvin_de/.alvin-bot
2400
+ 📁 Data dir: ~/.alvin-bot
2336
2401
  .env: ✅ present
2337
2402
  Provider: claude-sdk
2338
2403
 
package/README.md CHANGED
@@ -62,6 +62,8 @@ That's it. The setup wizard validates everything:
62
62
 
63
63
  **Requires:** Node.js 18+ ([nodejs.org](https://nodejs.org)) · Telegram bot token ([@BotFather](https://t.me/BotFather)) · Your Telegram user ID ([@userinfobot](https://t.me/userinfobot))
64
64
 
65
+ > **Native build note:** Alvin Bot uses `better-sqlite3` for indexed memory. Prebuilt binaries are included for common macOS and Linux environments so most installs need nothing extra. If your platform doesn't have a prebuilt binary and the optional native compilation is skipped, the bot still runs — semantic memory falls back gracefully to keyword search. A C++ toolchain (Xcode Command Line Tools on macOS, `build-essential` on Ubuntu) and Python 3 are only needed if you hit a build-from-source fallback.
66
+
65
67
  Free AI providers available — no credit card needed. **Privacy-first?** Pick the 🔒 **Offline — Gemma 4 E4B** option in setup for a fully local LLM via Ollama (macOS/Linux: automated install; Windows: manual).
66
68
 
67
69
  ### 🔐 A note on permission prompts
@@ -6,7 +6,7 @@
6
6
  "streamThrottleMs": 1500
7
7
  },
8
8
  "ai": {
9
- "primaryProvider": "claude-sdk",
9
+ "primaryProvider": "groq",
10
10
  "fallbackProviders": ["nvidia-kimi-k2.5", "nvidia-llama-3.3-70b"],
11
11
  "maxBudgetUsd": 5.0,
12
12
  "defaultEffort": "high"
package/dist/config.js CHANGED
@@ -26,8 +26,10 @@ export const config = {
26
26
  // Agent
27
27
  defaultWorkingDir: process.env.WORKING_DIR || os.homedir(),
28
28
  maxBudgetUsd: Number(process.env.MAX_BUDGET_USD) || 5.0,
29
- // Model provider (primary)
30
- primaryProvider: process.env.PRIMARY_PROVIDER || "claude-sdk",
29
+ // Model provider (primary). Default is "groq" — works on a fresh install
30
+ // with only BOT_TOKEN + GROQ_API_KEY. Set PRIMARY_PROVIDER=claude-sdk to
31
+ // use the Claude SDK (requires `claude login` / Claude Max subscription).
32
+ primaryProvider: process.env.PRIMARY_PROVIDER || "groq",
31
33
  fallbackProviders: (process.env.FALLBACK_PROVIDERS || "")
32
34
  .split(",")
33
35
  .map(s => s.trim())
@@ -80,8 +82,9 @@ export const config = {
80
82
  // Browser
81
83
  cdpUrl: process.env.CDP_URL || "",
82
84
  browseServerPort: Number(process.env.BROWSE_SERVER_PORT) || 3800,
83
- // Exec Security
84
- execSecurity: (process.env.EXEC_SECURITY || "full"),
85
+ // Exec Security — default is "allowlist" (safe). Set EXEC_SECURITY=full to
86
+ // allow shell pipelines, metacharacters, and arbitrary binaries (opt-in).
87
+ execSecurity: (process.env.EXEC_SECURITY || "allowlist"),
85
88
  };
86
89
  /**
87
90
  * Feature flag: btw live-steering. Default ON — only "false" or "0" disables.
@@ -1946,11 +1946,19 @@ export function registerCommands(bot) {
1946
1946
  const sessionKey = ctx.match[1];
1947
1947
  const session = getSession(sessionKey);
1948
1948
  const lang = session.language;
1949
- if (session.isProcessing) {
1949
+ // A1 — Capture isProcessing BEFORE requestStop (which sets it false)
1950
+ // so we can show the right toast: "stopped" vs "already finished".
1951
+ const wasProcessing = session.isProcessing;
1952
+ if (wasProcessing) {
1950
1953
  requestStop(session, "soft", buildStopDeps(session));
1951
1954
  }
1955
+ // A1 — Honest toast: if the turn had already finished when the button was
1956
+ // tapped, don't claim "stopped" — tell the user it was already done.
1957
+ const toastKey = wasProcessing
1958
+ ? "bot.cancel.stoppedToast"
1959
+ : "bot.cancel.alreadyDone";
1952
1960
  try {
1953
- await ctx.answerCallbackQuery({ text: t("bot.cancel.stoppedToast", lang) });
1961
+ await ctx.answerCallbackQuery({ text: t(toastKey, lang) });
1954
1962
  }
1955
1963
  catch { /* harmless grammy race */ }
1956
1964
  try {
@@ -74,7 +74,14 @@ export async function handleDocument(ctx) {
74
74
  // Download the file
75
75
  const file = await ctx.api.getFile(doc.file_id);
76
76
  const fileUrl = `https://api.telegram.org/file/bot${config.botToken}/${file.file_path}`;
77
- const localPath = path.join(TEMP_DIR, `doc_${Date.now()}_${filename}`);
77
+ // H2: strip any path components from the attacker-controlled file_name
78
+ // to prevent writing outside TEMP_DIR (e.g. file_name="../../../x").
79
+ const safeFilename = path.basename(filename);
80
+ const localPath = path.join(TEMP_DIR, `doc_${Date.now()}_${safeFilename}`);
81
+ // Containment assertion: resolved path must stay inside TEMP_DIR.
82
+ if (!path.resolve(localPath).startsWith(path.resolve(TEMP_DIR))) {
83
+ throw new Error("File path containment violation");
84
+ }
78
85
  await downloadFile(fileUrl, localPath);
79
86
  const caption = ctx.message?.caption || "";
80
87
  const userInstruction = caption || `Analysiere diese Datei: ${filename}`;
@@ -1,5 +1,6 @@
1
1
  import { InputFile, InlineKeyboard } from "grammy";
2
2
  import fs from "fs";
3
+ import crypto from "crypto";
3
4
  import { getSession, addToHistory, trackProviderUsage, buildSessionKey, getTelegramWorkspace, markSessionDirty } from "../services/session.js";
4
5
  import { resolveWorkspaceOrDefault, getWorkspace } from "../services/workspaces.js";
5
6
  import { TelegramStreamer } from "../services/telegram.js";
@@ -121,6 +122,37 @@ const TOOL_ICONS = {
121
122
  WebFetch: "📡",
122
123
  Task: "🤖",
123
124
  };
125
+ // ── A3 — stop-suppress-undelivered pure predicate ────────────────────────────
126
+ /**
127
+ * Determine whether the final answer send should be suppressed because a stop
128
+ * was requested and no visible text has yet been delivered to the user.
129
+ *
130
+ * This closes the gap behind "I clicked Stop but it answered anyway": the
131
+ * Claude SDK delivers short answers atomically, so the for-await loop parks
132
+ * on IPC the whole time, and the complete answer arrives as one block. By the
133
+ * time the consumer bail fires at the top of the loop, the answer is computed
134
+ * and about to be sent. This guard is the only stoppable moment for atomic
135
+ * answers.
136
+ *
137
+ * HARD CONSTRAINT — no-retract invariant: if ANY visible text has already
138
+ * been streamed/committed to the user (visibleTextAlreadySent=true), the
139
+ * predicate returns false regardless of stop state. Partial output that
140
+ * already reached the user is NEVER retracted. The consumer bail in the
141
+ * for-await loop already handles mid-stream stops; this guard only acts on
142
+ * the final commit step.
143
+ *
144
+ * Truth table:
145
+ * stopRequested=truthy + visibleTextAlreadySent=false → true (suppress)
146
+ * stopRequested=truthy + visibleTextAlreadySent=true → false (no-retract)
147
+ * stopRequested=falsy + * → false (normal)
148
+ */
149
+ export function shouldSuppressFinalSend(args) {
150
+ if (!args.stopRequested)
151
+ return false;
152
+ if (args.visibleTextAlreadySent)
153
+ return false;
154
+ return true;
155
+ }
124
156
  // ── v5.2 live steering — pure routing helper ─────────────────────────────────
125
157
  /**
126
158
  * Decide how a mid-task message (arriving while `session.isProcessing`) should
@@ -140,10 +172,34 @@ export function decideMidTaskRouting(args) {
140
172
  return "queue";
141
173
  if (args.shouldBypass)
142
174
  return "bypass";
143
- if (args.providerIsClaudeSdk && args.steeringEnabled && args.hasSteerChannel)
175
+ if (args.providerIsClaudeSdk && args.steeringEnabled && args.hasSteerChannel && args.hasLiveSdkQuery)
144
176
  return "steer";
145
177
  return "queue";
146
178
  }
179
+ // ── Cycle-3 P0 — background honesty guard ────────────────────────────────────
180
+ /**
181
+ * Detect when the bot falsely promised "running in the background — you can
182
+ * keep chatting" but actually ran a sync Task/Agent that blocked the session.
183
+ *
184
+ * Returns true when all of the following hold:
185
+ * 1. A Task/Agent chunk arrived WITHOUT `run_in_background: true` (i.e. the
186
+ * stuck-timer entered sync mode — `taskChunkSeenWithoutRunInBackground`).
187
+ * 2. No real background detach happened this turn:
188
+ * • `mcp__alvin__dispatch_agent` was NOT called (`dispatchAgentFired=false`)
189
+ * • `pendingBackgroundCount` did NOT increase (`pendingBackgroundDelta=0`)
190
+ *
191
+ * Exported so it can be unit-tested without a grammy Context mock.
192
+ */
193
+ export function detectUndetachedBackgroundClaim(args) {
194
+ if (!args.taskChunkSeenWithoutRunInBackground)
195
+ return false;
196
+ // Dead in production wiring (always false there — PATH A is detected via pendingBackgroundDelta); kept for explicit unit-test truth-table coverage.
197
+ if (args.dispatchAgentFired)
198
+ return false;
199
+ if (args.pendingBackgroundDelta > 0)
200
+ return false;
201
+ return true;
202
+ }
147
203
  /** React to a message with an emoji. Silently fails if reactions aren't supported. */
148
204
  async function react(ctx, emoji) {
149
205
  try {
@@ -210,6 +266,7 @@ export async function handleMessage(ctx) {
210
266
  providerIsClaudeSdk: _midTaskProviderIsSdk,
211
267
  steeringEnabled: isSteeringEnabled(),
212
268
  hasSteerChannel: !!session._steerChannel,
269
+ hasLiveSdkQuery: !!session._qHandle, // C-H3: require a live SDK query handle
213
270
  shouldBypass: _midTaskBypass,
214
271
  });
215
272
  if (_midTaskRoute === "bypass") {
@@ -234,16 +291,28 @@ export async function handleMessage(ctx) {
234
291
  // v5.2 — btw live steering: push mid-task message into the open
235
292
  // SteerChannel so the running claude-sdk query picks it up as a
236
293
  // streaming-input user message. No abort, no queue.
237
- session._steerChannel.push(text);
238
- await react(ctx, "📨");
239
- if (!session._steerAckSentThisTurn) {
294
+ // C-L2: push() returns boolean — only 📨/ack when accepted; reply bufferFull otherwise.
295
+ const steerAccepted = session._steerChannel.push(text);
296
+ if (steerAccepted) {
297
+ await react(ctx, "📨");
298
+ if (!session._steerAckSentThisTurn) {
299
+ try {
300
+ await ctx.reply(t("bot.steer.ack", session.language));
301
+ }
302
+ catch {
303
+ /* harmless grammy race */
304
+ }
305
+ session._steerAckSentThisTurn = true;
306
+ }
307
+ }
308
+ else {
309
+ // Buffer full or channel closed — tell the user honestly
240
310
  try {
241
- await ctx.reply(t("bot.steer.ack", session.language));
311
+ await ctx.reply(t("bot.steer.bufferFull", session.language));
242
312
  }
243
313
  catch {
244
314
  /* harmless grammy race */
245
315
  }
246
- session._steerAckSentThisTurn = true;
247
316
  }
248
317
  return;
249
318
  }
@@ -274,6 +343,13 @@ export async function handleMessage(ctx) {
274
343
  }
275
344
  session.isProcessing = true;
276
345
  session.abortController = new AbortController();
346
+ // C-H2 — Stamp a per-turn identity token so the finally block can detect
347
+ // whether a NEW turn has already started before it runs. If requestStop
348
+ // fires mid-turn and allows a new message to start a fresh turn (with its
349
+ // own new abortController + _steerChannel), the old turn's finally sees the
350
+ // token mismatch and skips the clobber — preserving the new turn's state.
351
+ const _thisTurnId = crypto.randomUUID();
352
+ session._turnId = _thisTurnId;
277
353
  // v4.12.3 — Clear any stale bypass flag from a previous aborted turn.
278
354
  // The flag is set by the bypass path right before it calls abort(),
279
355
  // read by the OLD handler's error path, and cleared here by the NEW
@@ -538,6 +614,13 @@ export async function handleMessage(ctx) {
538
614
  // (the empty-stream capturedSessionId) and the next turn loops again.
539
615
  // This is the second half of the empty-stream-loop fix.
540
616
  let sessionResetInStream = false;
617
+ // Cycle-3 P0 — background honesty guard tracking.
618
+ // `syncTaskSeenWithoutRunInBackground`: lifted from the stuckTimer.enterSync
619
+ // site below — true once a Task/Agent chunk arrives with no runInBackground.
620
+ // `pendingBackgroundCountAtTurnStart`: snapshot before the stream so we can
621
+ // compute the delta at turn end (dispatch_agent increments this counter).
622
+ let syncTaskSeenWithoutRunInBackground = false;
623
+ const pendingBackgroundCountAtTurnStart = session.pendingBackgroundCount ?? 0;
541
624
  for await (const chunk of registry.queryWithFallback(queryOpts, workspace.provider)) {
542
625
  // v5.1 — Bail as soon as requestStop() marks the session. The registry's
543
626
  // outer loop already guards against new provider attempts; this guard
@@ -554,6 +637,8 @@ export async function handleMessage(ctx) {
554
637
  chunk.toolUseId &&
555
638
  chunk.runInBackground !== true) {
556
639
  stuckTimer.enterSync(chunk.toolUseId);
640
+ // Cycle-3 P0 — lift the signal for honesty guard (same condition)
641
+ syncTaskSeenWithoutRunInBackground = true;
557
642
  }
558
643
  else if (chunk.type === "tool_result" && chunk.toolUseId) {
559
644
  // Any tool_result may match a pending sync entry. Set.delete is
@@ -710,19 +795,66 @@ export async function handleMessage(ctx) {
710
795
  break;
711
796
  }
712
797
  }
713
- // v5.1 stop: user stopped this query do NOT finalize partial output
714
- // as a successful answer, no 👍, no history commit. The stop trigger
715
- // (/cancel | /stopall | ⛔ button) already acknowledged to the user.
716
- // The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
717
- // + typing indicator).
718
- if (session._stopRequested) {
719
- return;
798
+ // Cycle-3 P0background honesty guard.
799
+ // If the turn ran a sync Task/Agent (blocking) and no real detach happened
800
+ // (no dispatch_agent, no pendingBackgroundCount increase), append one
801
+ // truthful notice so the user is never left with a false async promise.
802
+ // This fires only on "normal" turn endings — bypass-abort and user-stop
803
+ // are handled below and don't need the notice (neither promises async).
804
+ if (!bypassAborted &&
805
+ !timedOut &&
806
+ !session._stopRequested &&
807
+ detectUndetachedBackgroundClaim({
808
+ taskChunkSeenWithoutRunInBackground: syncTaskSeenWithoutRunInBackground,
809
+ dispatchAgentFired: false, // used purely via pendingBackgroundDelta below
810
+ pendingBackgroundDelta: (session.pendingBackgroundCount ?? 0) - pendingBackgroundCountAtTurnStart,
811
+ })) {
812
+ try {
813
+ await ctx.reply(t("bot.background.syncNotice", session.language));
814
+ }
815
+ catch {
816
+ /* harmless — notice is best-effort */
817
+ }
720
818
  }
721
819
  if (bypassAborted) {
722
820
  // v4.12.3 — Bypass path took over; don't finalize, don't react 👍.
723
821
  // Just clean up and return. The finally block still fires.
724
822
  return;
725
823
  }
824
+ // A3 — Suppress-or-finalize gate for stopped turns.
825
+ //
826
+ // shouldSuppressFinalSend is the SINGLE gate controlling whether finalize runs:
827
+ //
828
+ // stop + no visible text (suppress=true):
829
+ // Skip finalize and all side-effects. Nothing reached the user — correct.
830
+ // The stop trigger (/cancel | /stopall | ⛔) already acknowledged this.
831
+ // The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
832
+ // + typing indicator).
833
+ //
834
+ // stop + visible text already sent (suppress=false, _stopRequested truthy):
835
+ // The no-retract invariant applies — partial output already shown must not
836
+ // be left visually unfinished. Run streamer.finalize to flush the throttle
837
+ // timer and drop the status line, then return BEFORE the completed-answer
838
+ // side-effects (👍 / broadcastResponseDone / addToHistory). A stopped turn
839
+ // is NOT a successfully completed turn.
840
+ //
841
+ // no stop (suppress=false, _stopRequested falsy):
842
+ // Normal path — fall through to finalize + all side-effects.
843
+ if (shouldSuppressFinalSend({
844
+ stopRequested: session._stopRequested,
845
+ visibleTextAlreadySent: streamer.hasSentText,
846
+ })) {
847
+ // Branch A: stop + no visible text → suppress entirely.
848
+ return;
849
+ }
850
+ if (session._stopRequested && streamer.hasSentText) {
851
+ // Branch B: stop + visible text already sent → finalize the partial cleanly
852
+ // (flushes throttle timer, clears status line) but do NOT emit the
853
+ // completed-answer signals or commit to history.
854
+ await streamer.finalize(finalText);
855
+ return;
856
+ }
857
+ // Branch C: normal (no stop) — fall through.
726
858
  await streamer.finalize(finalText);
727
859
  emit("message:sent", { userId, text: finalText, platform: "telegram" });
728
860
  // v4.5.0: tell observers the response is complete.
@@ -790,25 +922,36 @@ export async function handleMessage(ctx) {
790
922
  finally {
791
923
  stuckTimer.cancel();
792
924
  clearInterval(typingInterval);
793
- session.isProcessing = false;
794
- session.abortController = null;
795
- // v5.1 Clear stop-hardening state so the next turn starts clean.
796
- session._qHandle = null;
797
- session._stopRequested = null;
798
- // v5.2 Close and clear the SteerChannel; reset per-turn ack flag.
799
- try {
800
- session._steerChannel?.close();
801
- }
802
- catch { /* ignore */ }
803
- session._steerChannel = null;
804
- session._steerAckSentThisTurn = false;
805
- // v5.1 — Remove the ⛔ Stop control message (sent at processing start).
806
- // Best-effort: if it was already deleted or the bot lacks permission, ignore.
807
- if (stopMsgId !== null) {
925
+ // C-H2 — Single-writer guard: only reset lifecycle fields if this turn's
926
+ // token still matches the session's current token. If requestStop fired
927
+ // mid-turn and a NEW turn has already started (and stamped a new _turnId),
928
+ // then _turnId !== _thisTurnId and we SKIP the reset — the new turn owns
929
+ // these fields. _qHandle and _stopRequested are included in the gate:
930
+ // requestStop already nulled _qHandle before returning (after interruptQuery),
931
+ // but if a new turn started and re-populated _qHandle via onQueryHandle we
932
+ // must NOT null it here — that would break Cycle-1 stop teeth for the new turn.
933
+ if (session._turnId === _thisTurnId) {
934
+ // A2 Remove the ⛔ Stop control message as the FIRST action when the
935
+ // turn ends, so the stale button disappears before any post-turn work.
936
+ // Best-effort: if it was already deleted or the bot lacks permission, ignore.
937
+ if (stopMsgId !== null) {
938
+ try {
939
+ await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
940
+ }
941
+ catch { /* harmless grammy race */ }
942
+ }
943
+ session.isProcessing = false;
944
+ session.abortController = null;
945
+ // v5.2 — Close and clear the SteerChannel; reset per-turn ack flag.
808
946
  try {
809
- await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
947
+ session._steerChannel?.close();
810
948
  }
811
- catch { /* harmless grammy race */ }
949
+ catch { /* ignore */ }
950
+ session._steerChannel = null;
951
+ session._steerAckSentThisTurn = false;
952
+ session._qHandle = null; // safe: token matches → no newer turn owns this
953
+ session._stopRequested = null; // safe: token matches → no newer turn has set this
954
+ session._turnId = null;
812
955
  }
813
956
  // Check for queued messages — they'll be prepended to the next real message
814
957
  // Queue stays in session and gets consumed on next handleMessage call
package/dist/i18n.js CHANGED
@@ -331,6 +331,14 @@ const strings = {
331
331
  es: "(externo, activo)",
332
332
  fr: "(externe, en cours)",
333
333
  },
334
+ // background honesty notice — emitted when a sync Task blocked the turn
335
+ // (Cycle-3 P0 fix: don't falsely promise "you can keep chatting")
336
+ "bot.background.syncNotice": {
337
+ en: "ℹ️ That ran inline and took a while — I couldn't take new messages until it finished.",
338
+ de: "ℹ️ Das lief inline und hat eine Weile gedauert — ich konnte währenddessen keine neuen Nachrichten entgegennehmen.",
339
+ es: "ℹ️ Eso se ejecutó en línea y tardó un rato — no pude recibir nuevos mensajes hasta que terminó.",
340
+ fr: "ℹ️ Cela s'est exécuté en ligne et a pris un moment — je ne pouvais pas recevoir de nouveaux messages tant que ce n'était pas terminé.",
341
+ },
334
342
  // live steering ack (Task 4 — btw feature)
335
343
  "bot.steer.ack": {
336
344
  en: "📨 Noted — Alvin will factor that in without restarting.",
@@ -338,6 +346,13 @@ const strings = {
338
346
  es: "📨 Anotado — Alvin lo tendrá en cuenta sin reiniciar.",
339
347
  fr: "📨 Noté — Alvin en tiendra compte sans redémarrer.",
340
348
  },
349
+ // C-L2: steer buffer full — honest reply when the steer cap is reached
350
+ "bot.steer.bufferFull": {
351
+ en: "⚠️ Steer buffer full — this message wasn't queued. Alvin is still running; try again in a moment.",
352
+ de: "⚠️ Steer-Puffer voll — diese Nachricht wurde nicht übernommen. Alvin läuft noch; versuch es gleich nochmal.",
353
+ es: "⚠️ Búfer de dirección lleno — este mensaje no se añadió. Alvin sigue en marcha; inténtalo de nuevo en un momento.",
354
+ fr: "⚠️ Tampon de direction plein — ce message n'a pas été pris en compte. Alvin tourne toujours ; réessaie dans un instant.",
355
+ },
341
356
  // /cancel
342
357
  "bot.cancel.cancelling": {
343
358
  en: "Cancelling request…",
@@ -363,6 +378,12 @@ const strings = {
363
378
  es: "⛔ Detenido",
364
379
  fr: "⛔ Arrêté",
365
380
  },
381
+ "bot.cancel.alreadyDone": {
382
+ en: "Nothing running — that already finished.",
383
+ de: "Nichts läuft — das war schon fertig.",
384
+ es: "Nada en curso — eso ya terminó.",
385
+ fr: "Rien en cours — c'était déjà terminé.",
386
+ },
366
387
  // /model
367
388
  "bot.model.chooseHeader": {
368
389
  en: "🤖 *Choose model:*",
package/dist/index.js CHANGED
@@ -81,6 +81,18 @@ import { MEMORY_DIR as SEC_MEM_DIR, DATA_DIR as SEC_DATA_DIR } from "./paths.js"
81
81
  console.warn(` ${r.path}: ${r.error}`);
82
82
  }
83
83
  }
84
+ // M5: Ensure DATA_DIR itself is 0700 (owner-only traverse). ensureDataDirs()
85
+ // above handles new installs; this belt-and-suspenders catches the case where
86
+ // the dir was created by a pre-M5 version with 0755 and the bot is restarting.
87
+ if (process.platform !== "win32") {
88
+ try {
89
+ const { chmodSync } = await import("fs");
90
+ chmodSync(SEC_DATA_DIR, 0o700);
91
+ }
92
+ catch {
93
+ // Best-effort — network filesystems may not support chmod
94
+ }
95
+ }
84
96
  }
85
97
  // 4. Crash-loop brake check — if we've crashed N times in a short window,
86
98
  // refuse to start, write an alert file, and unload our LaunchAgent so
@@ -175,7 +187,7 @@ import { loadSkills } from "./services/skills.js";
175
187
  import { loadHooks } from "./services/hooks.js";
176
188
  import { registerShutdownHandler } from "./services/restart.js";
177
189
  import { cancelAllSubAgents } from "./services/subagents.js";
178
- import { startWatchdog, stopWatchdog, checkCrashLoopBrake } from "./services/watchdog.js";
190
+ import { startWatchdog, stopWatchdog, checkCrashLoopBrake, markExpectedRestart } from "./services/watchdog.js";
179
191
  import { getRegistry } from "./engine.js";
180
192
  import { scanAssets } from "./services/asset-index.js";
181
193
  // Scan asset directory and generate INDEX.json + INDEX.md
@@ -371,6 +383,12 @@ const shutdown = async () => {
371
383
  return;
372
384
  isShuttingDown = true;
373
385
  console.log("Graceful shutdown initiated...");
386
+ // Mark the imminent exit as an intentional restart so the next boot's
387
+ // decideBrakeAction does not count it as a crash. This covers launchctl
388
+ // unload/load (SIGTERM from launchd) in addition to /restart and /update
389
+ // which call markExpectedRestart() themselves before process.exit(0).
390
+ // Must run before stopWatchdog() (which just clears timers, not the beacon).
391
+ markExpectedRestart();
374
392
  // E2: shutdown-notification — await the async cancellation so running
375
393
  // agents can post a cancellation message to Telegram before the bot
376
394
  // stops. Capped at 5s internally so a hang can't block shutdown.