npm - polygram - Versions diffs - 0.3.6 → 0.4.1 - Mend

polygram 0.3.6 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +23 -19
package/config.example.json +2 -2
package/lib/abort-detector.js +63 -0
package/lib/db.js +16 -1
package/lib/net-errors.js +94 -0
package/lib/process-manager.js +77 -23
package/lib/status-reactions.js +168 -0
package/lib/stream-reply.js +5 -1
package/lib/telegram-format.js +36 -0
package/lib/telegram.js +98 -7
package/lib/typing-indicator.js +143 -0
package/migrations/005-polling-state.sql +14 -0
package/package.json +5 -4
package/polygram.js +151 -48
package/scripts/doctor.js +324 -0
package/scripts/smoke.js +0 -122

package/lib/telegram.js CHANGED Viewed

@@ -20,6 +20,52 @@
  */
 const crypto = require('crypto');
+const { toTelegramMarkdown } = require('./telegram-format');
+const { isSafeToRetry } = require('./net-errors');
+// Topic deletion race: a user can delete a forum topic while a turn is in
+// flight, turning a valid `message_thread_id` into a 404. Telegram's error
+// string is specific enough to pattern-match; on hit we retry without the
+// thread param so the reply still lands in the chat root.
+const THREAD_NOT_FOUND_RE = /(Bad Request:\s*message thread not found|TOPIC_DELETED)/i;
+function isThreadNotFound(err) {
+  const msg = err && (err.description || err.message);
+  return typeof msg === 'string' && THREAD_NOT_FOUND_RE.test(msg);
+}
+// Short linear backoff before the single pre-connect retry. 150ms is long
+// enough for DNS / local network glitches to clear, short enough that a
+// user turn finishing doesn't notice.
+const PRE_CONNECT_RETRY_DELAY_MS = 150;
+function sleep(ms) {
+  return new Promise((r) => setTimeout(r, ms));
+}
+// Methods whose `text` / `caption` fields we auto-format into MarkdownV2.
+// Anything else passes through untouched (setMessageReaction, sendSticker,
+// deleteMessage, etc. have no text to format).
+const FORMATTABLE_METHODS = new Set(['sendMessage', 'editMessageText']);
+// Apply Claude-markdown → Telegram-MarkdownV2 conversion in-place on the
+// params object. Skipped if:
+//   - Method doesn't carry formattable text.
+//   - Caller already set a parse_mode (respect explicit choice).
+//   - Caller opted out via meta.plainText.
+// On any conversion failure we silently fall through to plain text.
+function applyFormatting(method, params, meta) {
+  if (meta.plainText === true) return;
+  if (!FORMATTABLE_METHODS.has(method)) return;
+  if (params.parse_mode != null) return;
+  const field = params.text ? 'text' : (params.caption ? 'caption' : null);
+  if (!field) return;
+  const { text: converted, parseMode } = toTelegramMarkdown(params[field]);
+  if (parseMode) {
+    params[field] = converted;
+    params.parse_mode = parseMode;
+  }
+}
 // Synthetic negative msg_id for a pending outbound row. 48 random bits — the
 // birthday bound for collision within the (chat_id, msg_id) unique constraint
@@ -48,9 +94,15 @@ function deriveOutboundText(method, params, meta) {
 async function send({ bot, method, params, db = null, meta = {}, logger = console }) {
   const chatId = params.chat_id != null ? String(params.chat_id) : null;
   const threadId = params.message_thread_id != null ? String(params.message_thread_id) : null;
+  // Capture outbound text BEFORE markdown-escaping so the transcript stays
+  // human-readable. "Mr. O'Brien said 3.14" is searchable; "Mr\. O'Brien
+  // said 3\.14" is not. The user's chat view shows the rendered text, which
+  // matches the DB row modulo heading/bullet downgrades.
   const text = deriveOutboundText(method, params, meta);
   const tracksMessage = !METHODS_WITHOUT_MSG.has(method);
+  applyFormatting(method, params, meta);
   let rowId = null;
   if (db && tracksMessage && chatId) {
     const pendingId = nextPendingId();
@@ -73,16 +125,55 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
   }
   let res;
+  const attempt = async (p) => bot.api.raw[method](p);
   try {
-    res = await bot.api.raw[method](params);
+    try {
+      res = await attempt(params);
+    } catch (err) {
+      // Pre-connect errors (DNS flap, TCP refused, net unreach) never
+      // reached Telegram, so retrying can't double-send. Retry ONCE after
+      // a short delay before treating as fatal. Post-connect errors
+      // (ETIMEDOUT, EPIPE, 5xx) are NOT retried — the message might have
+      // landed server-side.
+      if (isSafeToRetry(err)) {
+        try { db?.logEvent('telegram-retry', { chat_id: chatId, method, code: err.code, name: err.name }); }
+        catch {}
+        await sleep(PRE_CONNECT_RETRY_DELAY_MS);
+        res = await attempt(params);
+      } else {
+        throw err;
+      }
+    }
   } catch (err) {
-    if (rowId != null && db) {
-      try { db.markOutboundFailed(rowId, err.message); }
-      catch (e) { logger.error(`[telegram] markOutboundFailed: ${e.message}`); }
-      try { db.logEvent('telegram-api-error', { chat_id: chatId, method, error: err.message }); }
-      catch (e) { logger.error(`[telegram] logEvent: ${e.message}`); }
+    // Forum topic was deleted mid-turn — retry to chat root rather than
+    // failing the whole reply. Only for methods that accept a thread id
+    // (send*), and only once per call.
+    if (isThreadNotFound(err) && params.message_thread_id != null) {
+      const retryParams = { ...params };
+      delete retryParams.message_thread_id;
+      try {
+        logger.error?.(`[telegram] ${method}: thread gone, retrying without thread_id`);
+        res = await bot.api.raw[method](retryParams);
+        try { db?.logEvent('telegram-thread-fallback', { chat_id: chatId, method, original_thread_id: String(params.message_thread_id) }); }
+        catch {}
+      } catch (err2) {
+        if (rowId != null && db) {
+          try { db.markOutboundFailed(rowId, err2.message); }
+          catch (e) { logger.error(`[telegram] markOutboundFailed: ${e.message}`); }
+          try { db.logEvent('telegram-api-error', { chat_id: chatId, method, error: err2.message }); }
+          catch (e) { logger.error(`[telegram] logEvent: ${e.message}`); }
+        }
+        throw err2;
+      }
+    } else {
+      if (rowId != null && db) {
+        try { db.markOutboundFailed(rowId, err.message); }
+        catch (e) { logger.error(`[telegram] markOutboundFailed: ${e.message}`); }
+        try { db.logEvent('telegram-api-error', { chat_id: chatId, method, error: err.message }); }
+        catch (e) { logger.error(`[telegram] logEvent: ${e.message}`); }
+      }
+      throw err;
     }
-    throw err;
   }
   if (rowId != null && db) {

package/lib/typing-indicator.js ADDED Viewed

@@ -0,0 +1,143 @@
+/**
+ * Typing indicator with circuit breaker.
+ *
+ * Problem: sendChatAction('typing') is called every 4s while a turn is in
+ * flight. If the bot was removed from a chat, blocked by a user, or the
+ * chat was deleted, the API returns 401 Forbidden. The naive `.catch(()=>{})`
+ * that polygram had before meant we'd keep hammering the API for the
+ * duration of the (already-doomed) turn — hundreds of failed requests that
+ * chip away at rate-limit budget and drown real signal in logs.
+ *
+ * Fix (mirrors OpenClaw's createTelegramSendChatActionHandler pattern):
+ * per-chat circuit breaker with exponential backoff. After N consecutive
+ * 401s we suspend for this chat entirely — no more typing pings until the
+ * next successful turn resets the counter.
+ *
+ * State is per-chat so one dead chat doesn't silence the bot everywhere.
+ * We keep it in-memory (not DB-persisted) — restart clears and we'll find
+ * out again the first time we try; the cost of being re-wrong is just a
+ * handful of 401s, not worth persisting.
+ */
+const DEFAULT_INTERVAL_MS = 4000;
+const DEFAULT_MAX_CONSECUTIVE_401 = 10;
+const DEFAULT_MAX_BACKOFF_MS = 300_000; // 5 min — matches OpenClaw
+// Shared state keyed by chat_id. Exported via resetChatTypingState() for tests.
+const chatState = new Map();
+function getState(chatId) {
+  let s = chatState.get(chatId);
+  if (!s) {
+    s = { failures: 0, suspendedUntil: 0 };
+    chatState.set(chatId, s);
+  }
+  return s;
+}
+function isAuthFailure(err) {
+  const code = err?.error_code ?? err?.status;
+  const desc = err?.description || err?.message || '';
+  return code === 401 || code === 403 || /Forbidden|Unauthorized|bot was blocked|chat not found/i.test(desc);
+}
+// Exponential backoff: 1s, 2s, 4s, 8s, …, capped at maxBackoffMs.
+function backoffDelay(failures, maxBackoffMs) {
+  const ms = Math.min(maxBackoffMs, 1000 * Math.pow(2, Math.max(0, failures - 1)));
+  return ms;
+}
+/**
+ * Start the typing-indicator loop for a chat. Returns a stop function.
+ *
+ * @param {object} deps
+ * @param {import('grammy').Bot} deps.bot
+ * @param {string|number} deps.chatId
+ * @param {string} [deps.threadId]
+ * @param {number} [deps.intervalMs]
+ * @param {number} [deps.maxConsecutive401]
+ * @param {number} [deps.maxBackoffMs]
+ * @param {object} [deps.logger] - { error, log } — default console
+ * @param {(evt: {kind: string, chat_id: string, detail?: object}) => void} [deps.onEvent]
+ *     Hook for polygram's `events` DB log.
+ */
+function startTyping({
+  bot, chatId, threadId,
+  intervalMs = DEFAULT_INTERVAL_MS,
+  maxConsecutive401 = DEFAULT_MAX_CONSECUTIVE_401,
+  maxBackoffMs = DEFAULT_MAX_BACKOFF_MS,
+  logger = console,
+  onEvent = null,
+} = {}) {
+  const key = String(chatId);
+  const opts = threadId ? { message_thread_id: threadId } : {};
+  let timer = null;
+  let stopped = false;
+  const tick = async () => {
+    if (stopped) return;
+    const s = getState(key);
+    if (s.suspendedUntil > Date.now()) return;
+    try {
+      await bot.api.sendChatAction(chatId, 'typing', opts);
+      // Success — reset failure counter.
+      if (s.failures > 0) {
+        onEvent?.({ kind: 'typing-recovered', chat_id: key, detail: { after_failures: s.failures } });
+      }
+      s.failures = 0;
+      s.suspendedUntil = 0;
+    } catch (err) {
+      if (!isAuthFailure(err)) {
+        // Other errors (network blip, 500, etc.): don't open the circuit.
+        // Let the next tick try again. Log once at high verbosity.
+        logger.error?.(`[typing] ${key}: ${err?.description || err?.message}`);
+        return;
+      }
+      s.failures += 1;
+      if (s.failures >= maxConsecutive401) {
+        // Circuit fully open — suspend for the maxBackoffMs window; won't
+        // try again until then. Successful turns (or a subsequent tick past
+        // the suspend window) will test the waters.
+        s.suspendedUntil = Date.now() + maxBackoffMs;
+        onEvent?.({ kind: 'typing-suspended', chat_id: key, detail: {
+          failures: s.failures, suspend_ms: maxBackoffMs,
+        } });
+        logger.error?.(`[typing] ${key}: ${s.failures} consecutive auth failures; suspending ${maxBackoffMs / 1000}s`);
+      } else {
+        // Partial open — back off for an exponentially growing window.
+        s.suspendedUntil = Date.now() + backoffDelay(s.failures, maxBackoffMs);
+      }
+    }
+  };
+  // Fire once immediately, then every intervalMs.
+  tick();
+  timer = setInterval(tick, intervalMs);
+  timer.unref?.();
+  return () => {
+    stopped = true;
+    if (timer) clearInterval(timer);
+    timer = null;
+  };
+}
+function resetChatTypingState(chatId) {
+  if (chatId == null) chatState.clear();
+  else chatState.delete(String(chatId));
+}
+function getChatTypingState(chatId) {
+  return chatState.get(String(chatId));
+}
+module.exports = {
+  startTyping,
+  resetChatTypingState,
+  getChatTypingState,
+  isAuthFailure,
+  backoffDelay,
+  DEFAULT_INTERVAL_MS,
+  DEFAULT_MAX_CONSECUTIVE_401,
+  DEFAULT_MAX_BACKOFF_MS,
+};

package/migrations/005-polling-state.sql ADDED Viewed

@@ -0,0 +1,14 @@
+-- Persist grammy's update offset so a polygram restart doesn't re-process
+-- the entire getUpdates backlog from the last 24h. Grammy's in-memory
+-- offset resets to 0 on boot; Telegram replies with every unconfirmed
+-- update. For a bot that went down overnight with active chats, that can
+-- mean re-running dozens of turns on stale messages.
+--
+-- One row per bot. Row is upserted on every successful getUpdates batch
+-- that returned at least one update.
+CREATE TABLE IF NOT EXISTS polling_state (
+  bot_name       TEXT PRIMARY KEY,
+  last_update_id INTEGER NOT NULL,
+  ts             INTEGER NOT NULL
+);

package/package.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "name": "polygram",
-  "version": "0.3.6",
+  "version": "0.4.1",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {
     "polygram": "polygram.js",
     "polygram-split-db": "scripts/split-db.js",
     "polygram-ipc": "scripts/ipc-smoke.js",
-    "polygram-smoke": "scripts/smoke.js"
+    "polygram-doctor": "scripts/doctor.js"
   },
   "files": [
     "polygram.js",
@@ -16,7 +16,7 @@
     "migrations/",
     "scripts/split-db.js",
     "scripts/ipc-smoke.js",
-    "scripts/smoke.js",
+    "scripts/doctor.js",
     "skills/",
     "commands/",
     ".claude-plugin/",
@@ -61,6 +61,7 @@
   "type": "commonjs",
   "dependencies": {
     "better-sqlite3": "^12.9.0",
-    "grammy": "^1.42.0"
+    "grammy": "^1.42.0",
+    "telegramify-markdown": "^1.3.3"
   }
 }

package/polygram.js CHANGED Viewed

@@ -32,6 +32,9 @@ const { parseBotArg, parseDbArg, filterConfigToBot } = require('./lib/config-sco
 const { createStore: createPairingsStore, parseTtl: parsePairingTtl } = require('./lib/pairings');
 const { transcribe: transcribeVoice, isVoiceAttachment } = require('./lib/voice');
 const { createStreamer } = require('./lib/stream-reply');
+const { isAbortRequest } = require('./lib/abort-detector');
+const { startTyping } = require('./lib/typing-indicator');
+const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
 const {
   createStore: createApprovalsStore,
   matchesAnyPattern: matchesApprovalPattern,
@@ -79,6 +82,7 @@ let ipcCloser = null;
 let BOT_NAME = null;  // string, frozen after boot
 let bot = null;       // grammy Bot for BOT_NAME
 let streamers = new Map();  // sessionKey -> active Streamer (while turn is in flight)
+let reactors = new Map();   // sessionKey -> active ReactionManager (while turn is in flight)
 // Allowlist of env var names passed through to spawned Claude processes.
 // Anything not listed here is dropped to prevent leaked secrets/ssh agents
@@ -515,7 +519,12 @@ async function sendToProcess(sessionKey, prompt) {
   const chatId = getChatIdFromKey(sessionKey);
   const chatConfig = config.chats[chatId];
   const timeoutMs = (chatConfig.timeout || config.defaults.timeout) * 1000;
-  return pm.send(sessionKey, prompt, { timeoutMs });
+  // Wall-clock ceiling (seconds). Overridable per-chat via chatConfig.maxTurn
+  // or globally via config.defaults.maxTurn. 30 min default is generous for
+  // long audits; stuck API calls rarely run that long without firing the
+  // idle timer first. Unit: seconds → milliseconds.
+  const maxTurnMs = (chatConfig.maxTurn || config.defaults?.maxTurn || 1800) * 1000;
+  return pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs });
 }
 // ─── Message queue (per-chat) ───────────────────────────────────────
@@ -572,15 +581,10 @@ async function processQueue(sessionKey) {
 const drainQueuesForChat = (chatId) => drainQueuesForChatImpl(queues, chatId);
-// ─── Typing indicator ───────────────────────────────────────────────
-function startTyping(bot, chatId, threadId) {
-  const opts = threadId ? { message_thread_id: threadId } : {};
-  const send = () => bot.api.sendChatAction(chatId, 'typing', opts).catch(() => {});
-  send();
-  const interval = setInterval(send, 4000);
-  return () => clearInterval(interval);
-}
+// Typing indicator is imported from lib/typing-indicator — it adds a
+// per-chat circuit breaker with exponential backoff so a chat that
+// permanently 401s (bot blocked, chat deleted) doesn't have us
+// hammering sendChatAction every 4s for the full turn duration.
 // ─── Response parsing (stickers, reactions) ─────────────────────────
@@ -736,7 +740,7 @@ async function handleApprovalRequest(req) {
         chat_id: apprCfg.adminChatId,
         text: approvalCardText(row),
         reply_markup: buildApprovalKeyboard(row.id, row.callback_token),
-      }, { source: 'approval-request', botName: BOT_NAME });
+      }, { source: 'approval-request', botName: BOT_NAME, plainText: true });
       if (sent?.message_id) {
         approvals.setApproverMsgId(row.id, sent.message_id);
       }
@@ -1106,46 +1110,77 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
   });
   const prompt = formatPrompt(msg, sessionCtx, downloaded);
-  const stopTyping = startTyping(bot, chatId, threadId);
+  const stopTyping = startTyping({
+    bot, chatId, threadId,
+    logger: { error: (m) => console.error(`[${label}] ${m}`) },
+    onEvent: (e) => dbWrite(() => db.logEvent(e.kind, {
+      bot: BOT_NAME, chat_id: e.chat_id, ...(e.detail || {}),
+    }), `log ${e.kind}`),
+  });
   const botCfg = config.bot || {};
-  const streamEnabled = botCfg.streamReplies === true;
   const outMetaBase = {
-    source: streamEnabled ? 'bot-reply-stream' : 'bot-reply',
+    source: 'bot-reply-stream',
     botName: BOT_NAME,
     model: chatConfig.model,
     effort: chatConfig.effort,
   };
-  let streamer = null;
-  if (streamEnabled) {
-    streamer = createStreamer({
-      send: async (text) => tg(bot, 'sendMessage', {
-        chat_id: chatId, text,
-        reply_parameters: { message_id: msg.message_id },
-        ...(threadId && { message_thread_id: threadId }),
-      }, outMetaBase),
-      edit: async (messageId, text) => {
-        try {
-          return await bot.api.editMessageText(chatId, messageId, text);
-        } catch (err) {
-          // Stream-edit failures would otherwise be invisible — edits bypass
-          // tg() so there's no messages row reflecting the attempt. Log to
-          // events so stuck streams leave a forensic trail.
-          dbWrite(() => db.logEvent('telegram-edit-failed', {
-            chat_id: chatId, msg_id: messageId,
-            api_error: err.message?.slice(0, 200),
-            bot: BOT_NAME,
-          }), 'log telegram-edit-failed');
-          throw err;
-        }
-      },
-      minChars: botCfg.streamMinChars,
-      throttleMs: botCfg.streamThrottleMs,
-      logger: { error: (m) => console.error(`[${label}] ${m}`) },
-    });
-    streamers.set(sessionKey, streamer);
-  }
+  // Streaming is unconditional as of 0.4.0 — matches OpenClaw's model and
+  // eliminates the "stuck at 15min typing" complaint from the non-streaming
+  // code path. For short responses the streamer stays idle and we fall
+  // through to the normal send path via finalize() returning streamed=false.
+  const streamer = createStreamer({
+    send: async (text) => tg(bot, 'sendMessage', {
+      chat_id: chatId, text,
+      // allow_sending_without_reply: long-running turns give the user
+      // plenty of time to delete their original message. Without this
+      // flag, Telegram rejects the reply with MESSAGE_NOT_FOUND and the
+      // whole streamed answer is lost. With it, the reply simply lands
+      // as a standalone message.
+      reply_parameters: { message_id: msg.message_id, allow_sending_without_reply: true },
+      ...(threadId && { message_thread_id: threadId }),
+    }, outMetaBase),
+    edit: async (messageId, text) => {
+      try {
+        return await bot.api.editMessageText(chatId, messageId, text);
+      } catch (err) {
+        // Stream-edit failures would otherwise be invisible — edits bypass
+        // tg() so there's no messages row reflecting the attempt. Log to
+        // events so stuck streams leave a forensic trail.
+        dbWrite(() => db.logEvent('telegram-edit-failed', {
+          chat_id: chatId, msg_id: messageId,
+          api_error: err.message?.slice(0, 200),
+          bot: BOT_NAME,
+        }), 'log telegram-edit-failed');
+        throw err;
+      }
+    },
+    minChars: botCfg.streamMinChars,
+    throttleMs: botCfg.streamThrottleMs,
+    logger: { error: (m) => console.error(`[${label}] ${m}`) },
+  });
+  streamers.set(sessionKey, streamer);
+  // Status reactions on the user's message: 👀 queued → 🤔 thinking →
+  // 👨‍💻 coding / ⚡ web / 🔥 tool → 👍 done / 🤯 error. Silent (no
+  // notifications), updates in place, one emoji per message. Uses
+  // setMessageReaction which skips the DB row (the tg() wrapper
+  // short-circuits that method), so no transcript spam.
+  const reactor = createReactionManager({
+    apply: async (emoji) => {
+      const params = {
+        chat_id: chatId,
+        message_id: msg.message_id,
+        reaction: emoji ? [{ type: 'emoji', emoji }] : [],
+      };
+      await tg(bot, 'setMessageReaction', params,
+        { source: 'status-reaction', botName: BOT_NAME });
+    },
+    logError: (m) => console.error(`[${label}] ${m}`),
+  });
+  reactors.set(sessionKey, reactor);
+  reactor.setState('THINKING');
   try {
     const result = await sendToProcess(sessionKey, prompt);
@@ -1155,7 +1190,10 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     if (result.error) {
       console.error(`[${label}] Error (${elapsed}s):`, result.error);
+      reactor.setState('ERROR');
       if (!result.text) return;
+    } else {
+      reactor.setState('DONE');
     }
     if (!result.text || result.text === 'NO_REPLY') return;
@@ -1165,7 +1203,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     // Streamed text path: finalise the live-edit and, if the full response
     // overflows Telegram's 4096 cap, send remainder as follow-up chunks.
-    if (streamer && parsed.text) {
+    if (parsed.text) {
       const fin = await streamer.finalize(parsed.text);
       if (fin.streamed) {
         if (parsed.text.length > TG_MAX_LEN) {
@@ -1221,14 +1259,24 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
   } catch (err) {
-    if (streamer) {
-      // Generic suffix — err.message can leak internal paths/state.
-      await streamer.finalize('', { errorSuffix: 'stream interrupted' }).catch(() => {});
+    // Generic suffix — err.message can leak internal paths/state.
+    await streamer.finalize('', { errorSuffix: 'stream interrupted' }).catch(() => {});
+    // Signal the failure to the user's message reaction. Timeout gets its
+    // own face; anything else is generic error.
+    if (/wall-clock ceiling|idle with no Claude activity/i.test(err?.message || '')) {
+      reactor.setState('TIMEOUT');
+    } else {
+      reactor.setState('ERROR');
     }
     throw err;
   } finally {
     stopTyping();
-    if (streamer) streamers.delete(sessionKey);
+    streamers.delete(sessionKey);
+    // Give the reactor a beat to flush the terminal state (DONE/ERROR/TIMEOUT
+    // bypass throttle so this is instant in practice; the stop() below
+    // guards against any late transition leaking after the turn ends).
+    reactor.stop();
+    reactors.delete(sessionKey);
   }
 }
@@ -1390,6 +1438,36 @@ function createBot(token) {
     const rawText = ctx.message.text || '';
     const cleanText = mentionRe ? rawText.replace(mentionRe, '').trim() : rawText.trim();
+    // Abort: skip the queue entirely. Matches bilingual natural-language
+    // cues ("stop" / "стоп" / "cancel" / "отмена" / …) and explicit
+    // slash commands (/stop, /abort, /cancel). Kills the active Claude
+    // subprocess and drains queued messages for this chat. Replies so
+    // the user sees the bot heard them — silent abort is worse than
+    // acknowledged abort.
+    if (isAbortRequest(cleanText)) {
+      const threadId = ctx.message.message_thread_id?.toString();
+      const sessionKey = getSessionKey(chatId, threadId, chatConfig);
+      const hadActive = pm.has(sessionKey) && !!pm.get(sessionKey)?.inFlight;
+      const dropped = drainQueuesForChat(chatId);
+      await pm.killChat(chatId).catch(() => {});
+      dbWrite(() => db.logEvent('abort-requested', {
+        chat_id: chatId, user_id: ctx.message.from?.id || null,
+        had_active: hadActive, queued_dropped: dropped,
+        trigger: cleanText.slice(0, 40),
+      }), 'log abort-requested');
+      const reply = hadActive || dropped
+        ? (dropped ? `Остановлено. Очередь очищена (${dropped}).` : 'Остановлено.')
+        : 'Нечего останавливать.';
+      try {
+        await tg(bot, 'sendMessage', {
+          chat_id: chatId, text: reply,
+          reply_parameters: { message_id: ctx.message.message_id, allow_sending_without_reply: true },
+          ...(threadId && { message_thread_id: threadId }),
+        }, { source: 'abort-ack', botName: BOT_NAME });
+      } catch {}
+      return;
+    }
     const botAllowsCommands = !!config.bot?.allowConfigCommands;
     const isAdminCmd = botAllowsCommands && ADMIN_CMD_RE.test(cleanText);
     const isPairClaim = PAIR_CLAIM_RE.test(cleanText);
@@ -1508,7 +1586,21 @@ async function pollBot(bot) {
   await bot.api.deleteWebhook();
+  // Restore polling offset from DB so a restart doesn't re-process the
+  // backlog Telegram has accumulated while we were down. Grammy's in-memory
+  // offset resets to 0 each boot, which makes getUpdates return every
+  // un-confirmed update since the last ack — for an overnight outage that
+  // can mean replaying dozens of stale messages.
   let offset = 0;
+  try {
+    const saved = db?.getPollingOffset?.(BOT_NAME);
+    if (saved && saved > 0) {
+      offset = saved + 1;
+      console.log(`[${BOT_NAME}] resuming polling from update_id ${saved}`);
+    }
+  } catch (err) {
+    console.error(`[${BOT_NAME}] getPollingOffset failed: ${err.message}`);
+  }
   let running = true;
   bot._lastPollTs = Date.now();
@@ -1545,6 +1637,13 @@ async function pollBot(bot) {
           console.error(`[${BOT_NAME}] Handler error:`, err.message);
         }
       }
+      // Persist offset after batch dispatch so a crash mid-batch only risks
+      // re-processing the unacked updates. We write only on non-empty batches
+      // to avoid churning the row on every 25s idle poll.
+      if (updates.length > 0) {
+        dbWrite(() => db.savePollingOffset(BOT_NAME, updates[updates.length - 1].update_id),
+          'save polling offset');
+      }
       // No sleep on the success path: long-poll already blocks up to 25s
       // when idle. Sleeping here would add latency with no gain.
     } catch (err) {
@@ -1666,6 +1765,10 @@ async function main() {
       const s = streamers.get(sessionKey);
       if (s) s.onChunk(partial).catch(() => {});
     },
+    onToolUse: (sessionKey, toolName) => {
+      const r = reactors.get(sessionKey);
+      if (r) r.setState(classifyToolName(toolName));
+    },
   });
   console.log(`polygram (LRU cap=${cap}, SQLite source of truth)`);