polygram 0.3.6 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/telegram.js CHANGED
@@ -20,6 +20,52 @@
20
20
  */
21
21
 
22
22
  const crypto = require('crypto');
23
+ const { toTelegramMarkdown } = require('./telegram-format');
24
+ const { isSafeToRetry } = require('./net-errors');
25
+
26
+ // Topic deletion race: a user can delete a forum topic while a turn is in
27
+ // flight, turning a valid `message_thread_id` into a 404. Telegram's error
28
+ // string is specific enough to pattern-match; on hit we retry without the
29
+ // thread param so the reply still lands in the chat root.
30
+ const THREAD_NOT_FOUND_RE = /(Bad Request:\s*message thread not found|TOPIC_DELETED)/i;
31
+
32
+ function isThreadNotFound(err) {
33
+ const msg = err && (err.description || err.message);
34
+ return typeof msg === 'string' && THREAD_NOT_FOUND_RE.test(msg);
35
+ }
36
+
37
+ // Short linear backoff before the single pre-connect retry. 150ms is long
38
+ // enough for DNS / local network glitches to clear, short enough that a
39
+ // user turn finishing doesn't notice.
40
+ const PRE_CONNECT_RETRY_DELAY_MS = 150;
41
+
42
+ function sleep(ms) {
43
+ return new Promise((r) => setTimeout(r, ms));
44
+ }
45
+
46
+ // Methods whose `text` / `caption` fields we auto-format into MarkdownV2.
47
+ // Anything else passes through untouched (setMessageReaction, sendSticker,
48
+ // deleteMessage, etc. have no text to format).
49
+ const FORMATTABLE_METHODS = new Set(['sendMessage', 'editMessageText']);
50
+
51
+ // Apply Claude-markdown → Telegram-MarkdownV2 conversion in-place on the
52
+ // params object. Skipped if:
53
+ // - Method doesn't carry formattable text.
54
+ // - Caller already set a parse_mode (respect explicit choice).
55
+ // - Caller opted out via meta.plainText.
56
+ // On any conversion failure we silently fall through to plain text.
57
+ function applyFormatting(method, params, meta) {
58
+ if (meta.plainText === true) return;
59
+ if (!FORMATTABLE_METHODS.has(method)) return;
60
+ if (params.parse_mode != null) return;
61
+ const field = params.text ? 'text' : (params.caption ? 'caption' : null);
62
+ if (!field) return;
63
+ const { text: converted, parseMode } = toTelegramMarkdown(params[field]);
64
+ if (parseMode) {
65
+ params[field] = converted;
66
+ params.parse_mode = parseMode;
67
+ }
68
+ }
23
69
 
24
70
  // Synthetic negative msg_id for a pending outbound row. 48 random bits — the
25
71
  // birthday bound for collision within the (chat_id, msg_id) unique constraint
@@ -48,9 +94,15 @@ function deriveOutboundText(method, params, meta) {
48
94
  async function send({ bot, method, params, db = null, meta = {}, logger = console }) {
49
95
  const chatId = params.chat_id != null ? String(params.chat_id) : null;
50
96
  const threadId = params.message_thread_id != null ? String(params.message_thread_id) : null;
97
+ // Capture outbound text BEFORE markdown-escaping so the transcript stays
98
+ // human-readable. "Mr. O'Brien said 3.14" is searchable; "Mr\. O'Brien
99
+ // said 3\.14" is not. The user's chat view shows the rendered text, which
100
+ // matches the DB row modulo heading/bullet downgrades.
51
101
  const text = deriveOutboundText(method, params, meta);
52
102
  const tracksMessage = !METHODS_WITHOUT_MSG.has(method);
53
103
 
104
+ applyFormatting(method, params, meta);
105
+
54
106
  let rowId = null;
55
107
  if (db && tracksMessage && chatId) {
56
108
  const pendingId = nextPendingId();
@@ -73,16 +125,55 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
73
125
  }
74
126
 
75
127
  let res;
128
+ const attempt = async (p) => bot.api.raw[method](p);
76
129
  try {
77
- res = await bot.api.raw[method](params);
130
+ try {
131
+ res = await attempt(params);
132
+ } catch (err) {
133
+ // Pre-connect errors (DNS flap, TCP refused, net unreach) never
134
+ // reached Telegram, so retrying can't double-send. Retry ONCE after
135
+ // a short delay before treating as fatal. Post-connect errors
136
+ // (ETIMEDOUT, EPIPE, 5xx) are NOT retried — the message might have
137
+ // landed server-side.
138
+ if (isSafeToRetry(err)) {
139
+ try { db?.logEvent('telegram-retry', { chat_id: chatId, method, code: err.code, name: err.name }); }
140
+ catch {}
141
+ await sleep(PRE_CONNECT_RETRY_DELAY_MS);
142
+ res = await attempt(params);
143
+ } else {
144
+ throw err;
145
+ }
146
+ }
78
147
  } catch (err) {
79
- if (rowId != null && db) {
80
- try { db.markOutboundFailed(rowId, err.message); }
81
- catch (e) { logger.error(`[telegram] markOutboundFailed: ${e.message}`); }
82
- try { db.logEvent('telegram-api-error', { chat_id: chatId, method, error: err.message }); }
83
- catch (e) { logger.error(`[telegram] logEvent: ${e.message}`); }
148
+ // Forum topic was deleted mid-turn — retry to chat root rather than
149
+ // failing the whole reply. Only for methods that accept a thread id
150
+ // (send*), and only once per call.
151
+ if (isThreadNotFound(err) && params.message_thread_id != null) {
152
+ const retryParams = { ...params };
153
+ delete retryParams.message_thread_id;
154
+ try {
155
+ logger.error?.(`[telegram] ${method}: thread gone, retrying without thread_id`);
156
+ res = await bot.api.raw[method](retryParams);
157
+ try { db?.logEvent('telegram-thread-fallback', { chat_id: chatId, method, original_thread_id: String(params.message_thread_id) }); }
158
+ catch {}
159
+ } catch (err2) {
160
+ if (rowId != null && db) {
161
+ try { db.markOutboundFailed(rowId, err2.message); }
162
+ catch (e) { logger.error(`[telegram] markOutboundFailed: ${e.message}`); }
163
+ try { db.logEvent('telegram-api-error', { chat_id: chatId, method, error: err2.message }); }
164
+ catch (e) { logger.error(`[telegram] logEvent: ${e.message}`); }
165
+ }
166
+ throw err2;
167
+ }
168
+ } else {
169
+ if (rowId != null && db) {
170
+ try { db.markOutboundFailed(rowId, err.message); }
171
+ catch (e) { logger.error(`[telegram] markOutboundFailed: ${e.message}`); }
172
+ try { db.logEvent('telegram-api-error', { chat_id: chatId, method, error: err.message }); }
173
+ catch (e) { logger.error(`[telegram] logEvent: ${e.message}`); }
174
+ }
175
+ throw err;
84
176
  }
85
- throw err;
86
177
  }
87
178
 
88
179
  if (rowId != null && db) {
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Typing indicator with circuit breaker.
3
+ *
4
+ * Problem: sendChatAction('typing') is called every 4s while a turn is in
5
+ * flight. If the bot was removed from a chat, blocked by a user, or the
6
+ * chat was deleted, the API returns 401 Forbidden. The naive `.catch(()=>{})`
7
+ * that polygram had before meant we'd keep hammering the API for the
8
+ * duration of the (already-doomed) turn — hundreds of failed requests that
9
+ * chip away at rate-limit budget and drown real signal in logs.
10
+ *
11
+ * Fix (mirrors OpenClaw's createTelegramSendChatActionHandler pattern):
12
+ * per-chat circuit breaker with exponential backoff. After N consecutive
13
+ * 401s we suspend for this chat entirely — no more typing pings until the
14
+ * next successful turn resets the counter.
15
+ *
16
+ * State is per-chat so one dead chat doesn't silence the bot everywhere.
17
+ * We keep it in-memory (not DB-persisted) — restart clears and we'll find
18
+ * out again the first time we try; the cost of being re-wrong is just a
19
+ * handful of 401s, not worth persisting.
20
+ */
21
+
22
+ const DEFAULT_INTERVAL_MS = 4000;
23
+ const DEFAULT_MAX_CONSECUTIVE_401 = 10;
24
+ const DEFAULT_MAX_BACKOFF_MS = 300_000; // 5 min — matches OpenClaw
25
+
26
+ // Shared state keyed by chat_id. Exported via resetChatTypingState() for tests.
27
+ const chatState = new Map();
28
+
29
+ function getState(chatId) {
30
+ let s = chatState.get(chatId);
31
+ if (!s) {
32
+ s = { failures: 0, suspendedUntil: 0 };
33
+ chatState.set(chatId, s);
34
+ }
35
+ return s;
36
+ }
37
+
38
+ function isAuthFailure(err) {
39
+ const code = err?.error_code ?? err?.status;
40
+ const desc = err?.description || err?.message || '';
41
+ return code === 401 || code === 403 || /Forbidden|Unauthorized|bot was blocked|chat not found/i.test(desc);
42
+ }
43
+
44
+ // Exponential backoff: 1s, 2s, 4s, 8s, …, capped at maxBackoffMs.
45
+ function backoffDelay(failures, maxBackoffMs) {
46
+ const ms = Math.min(maxBackoffMs, 1000 * Math.pow(2, Math.max(0, failures - 1)));
47
+ return ms;
48
+ }
49
+
50
+ /**
51
+ * Start the typing-indicator loop for a chat. Returns a stop function.
52
+ *
53
+ * @param {object} deps
54
+ * @param {import('grammy').Bot} deps.bot
55
+ * @param {string|number} deps.chatId
56
+ * @param {string} [deps.threadId]
57
+ * @param {number} [deps.intervalMs]
58
+ * @param {number} [deps.maxConsecutive401]
59
+ * @param {number} [deps.maxBackoffMs]
60
+ * @param {object} [deps.logger] - { error, log } — default console
61
+ * @param {(evt: {kind: string, chat_id: string, detail?: object}) => void} [deps.onEvent]
62
+ * Hook for polygram's `events` DB log.
63
+ */
64
+ function startTyping({
65
+ bot, chatId, threadId,
66
+ intervalMs = DEFAULT_INTERVAL_MS,
67
+ maxConsecutive401 = DEFAULT_MAX_CONSECUTIVE_401,
68
+ maxBackoffMs = DEFAULT_MAX_BACKOFF_MS,
69
+ logger = console,
70
+ onEvent = null,
71
+ } = {}) {
72
+ const key = String(chatId);
73
+ const opts = threadId ? { message_thread_id: threadId } : {};
74
+ let timer = null;
75
+ let stopped = false;
76
+
77
+ const tick = async () => {
78
+ if (stopped) return;
79
+ const s = getState(key);
80
+ if (s.suspendedUntil > Date.now()) return;
81
+ try {
82
+ await bot.api.sendChatAction(chatId, 'typing', opts);
83
+ // Success — reset failure counter.
84
+ if (s.failures > 0) {
85
+ onEvent?.({ kind: 'typing-recovered', chat_id: key, detail: { after_failures: s.failures } });
86
+ }
87
+ s.failures = 0;
88
+ s.suspendedUntil = 0;
89
+ } catch (err) {
90
+ if (!isAuthFailure(err)) {
91
+ // Other errors (network blip, 500, etc.): don't open the circuit.
92
+ // Let the next tick try again. Log once at high verbosity.
93
+ logger.error?.(`[typing] ${key}: ${err?.description || err?.message}`);
94
+ return;
95
+ }
96
+ s.failures += 1;
97
+ if (s.failures >= maxConsecutive401) {
98
+ // Circuit fully open — suspend for the maxBackoffMs window; won't
99
+ // try again until then. Successful turns (or a subsequent tick past
100
+ // the suspend window) will test the waters.
101
+ s.suspendedUntil = Date.now() + maxBackoffMs;
102
+ onEvent?.({ kind: 'typing-suspended', chat_id: key, detail: {
103
+ failures: s.failures, suspend_ms: maxBackoffMs,
104
+ } });
105
+ logger.error?.(`[typing] ${key}: ${s.failures} consecutive auth failures; suspending ${maxBackoffMs / 1000}s`);
106
+ } else {
107
+ // Partial open — back off for an exponentially growing window.
108
+ s.suspendedUntil = Date.now() + backoffDelay(s.failures, maxBackoffMs);
109
+ }
110
+ }
111
+ };
112
+
113
+ // Fire once immediately, then every intervalMs.
114
+ tick();
115
+ timer = setInterval(tick, intervalMs);
116
+ timer.unref?.();
117
+
118
+ return () => {
119
+ stopped = true;
120
+ if (timer) clearInterval(timer);
121
+ timer = null;
122
+ };
123
+ }
124
+
125
+ function resetChatTypingState(chatId) {
126
+ if (chatId == null) chatState.clear();
127
+ else chatState.delete(String(chatId));
128
+ }
129
+
130
+ function getChatTypingState(chatId) {
131
+ return chatState.get(String(chatId));
132
+ }
133
+
134
+ module.exports = {
135
+ startTyping,
136
+ resetChatTypingState,
137
+ getChatTypingState,
138
+ isAuthFailure,
139
+ backoffDelay,
140
+ DEFAULT_INTERVAL_MS,
141
+ DEFAULT_MAX_CONSECUTIVE_401,
142
+ DEFAULT_MAX_BACKOFF_MS,
143
+ };
@@ -0,0 +1,14 @@
1
+ -- Persist grammy's update offset so a polygram restart doesn't re-process
2
+ -- the entire getUpdates backlog from the last 24h. Grammy's in-memory
3
+ -- offset resets to 0 on boot; Telegram replies with every unconfirmed
4
+ -- update. For a bot that went down overnight with active chats, that can
5
+ -- mean re-running dozens of turns on stale messages.
6
+ --
7
+ -- One row per bot. Row is upserted on every successful getUpdates batch
8
+ -- that returned at least one update.
9
+
10
+ CREATE TABLE IF NOT EXISTS polling_state (
11
+ bot_name TEXT PRIMARY KEY,
12
+ last_update_id INTEGER NOT NULL,
13
+ ts INTEGER NOT NULL
14
+ );
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.3.6",
3
+ "version": "0.4.1",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc-client.js",
6
6
  "bin": {
7
7
  "polygram": "polygram.js",
8
8
  "polygram-split-db": "scripts/split-db.js",
9
9
  "polygram-ipc": "scripts/ipc-smoke.js",
10
- "polygram-smoke": "scripts/smoke.js"
10
+ "polygram-doctor": "scripts/doctor.js"
11
11
  },
12
12
  "files": [
13
13
  "polygram.js",
@@ -16,7 +16,7 @@
16
16
  "migrations/",
17
17
  "scripts/split-db.js",
18
18
  "scripts/ipc-smoke.js",
19
- "scripts/smoke.js",
19
+ "scripts/doctor.js",
20
20
  "skills/",
21
21
  "commands/",
22
22
  ".claude-plugin/",
@@ -61,6 +61,7 @@
61
61
  "type": "commonjs",
62
62
  "dependencies": {
63
63
  "better-sqlite3": "^12.9.0",
64
- "grammy": "^1.42.0"
64
+ "grammy": "^1.42.0",
65
+ "telegramify-markdown": "^1.3.3"
65
66
  }
66
67
  }
package/polygram.js CHANGED
@@ -32,6 +32,9 @@ const { parseBotArg, parseDbArg, filterConfigToBot } = require('./lib/config-sco
32
32
  const { createStore: createPairingsStore, parseTtl: parsePairingTtl } = require('./lib/pairings');
33
33
  const { transcribe: transcribeVoice, isVoiceAttachment } = require('./lib/voice');
34
34
  const { createStreamer } = require('./lib/stream-reply');
35
+ const { isAbortRequest } = require('./lib/abort-detector');
36
+ const { startTyping } = require('./lib/typing-indicator');
37
+ const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
35
38
  const {
36
39
  createStore: createApprovalsStore,
37
40
  matchesAnyPattern: matchesApprovalPattern,
@@ -79,6 +82,7 @@ let ipcCloser = null;
79
82
  let BOT_NAME = null; // string, frozen after boot
80
83
  let bot = null; // grammy Bot for BOT_NAME
81
84
  let streamers = new Map(); // sessionKey -> active Streamer (while turn is in flight)
85
+ let reactors = new Map(); // sessionKey -> active ReactionManager (while turn is in flight)
82
86
 
83
87
  // Allowlist of env var names passed through to spawned Claude processes.
84
88
  // Anything not listed here is dropped to prevent leaked secrets/ssh agents
@@ -515,7 +519,12 @@ async function sendToProcess(sessionKey, prompt) {
515
519
  const chatId = getChatIdFromKey(sessionKey);
516
520
  const chatConfig = config.chats[chatId];
517
521
  const timeoutMs = (chatConfig.timeout || config.defaults.timeout) * 1000;
518
- return pm.send(sessionKey, prompt, { timeoutMs });
522
+ // Wall-clock ceiling (seconds). Overridable per-chat via chatConfig.maxTurn
523
+ // or globally via config.defaults.maxTurn. 30 min default is generous for
524
+ // long audits; stuck API calls rarely run that long without firing the
525
+ // idle timer first. Unit: seconds → milliseconds.
526
+ const maxTurnMs = (chatConfig.maxTurn || config.defaults?.maxTurn || 1800) * 1000;
527
+ return pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs });
519
528
  }
520
529
 
521
530
  // ─── Message queue (per-chat) ───────────────────────────────────────
@@ -572,15 +581,10 @@ async function processQueue(sessionKey) {
572
581
 
573
582
  const drainQueuesForChat = (chatId) => drainQueuesForChatImpl(queues, chatId);
574
583
 
575
- // ─── Typing indicator ───────────────────────────────────────────────
576
-
577
- function startTyping(bot, chatId, threadId) {
578
- const opts = threadId ? { message_thread_id: threadId } : {};
579
- const send = () => bot.api.sendChatAction(chatId, 'typing', opts).catch(() => {});
580
- send();
581
- const interval = setInterval(send, 4000);
582
- return () => clearInterval(interval);
583
- }
584
+ // Typing indicator is imported from lib/typing-indicator — it adds a
585
+ // per-chat circuit breaker with exponential backoff so a chat that
586
+ // permanently 401s (bot blocked, chat deleted) doesn't have us
587
+ // hammering sendChatAction every 4s for the full turn duration.
584
588
 
585
589
  // ─── Response parsing (stickers, reactions) ─────────────────────────
586
590
 
@@ -736,7 +740,7 @@ async function handleApprovalRequest(req) {
736
740
  chat_id: apprCfg.adminChatId,
737
741
  text: approvalCardText(row),
738
742
  reply_markup: buildApprovalKeyboard(row.id, row.callback_token),
739
- }, { source: 'approval-request', botName: BOT_NAME });
743
+ }, { source: 'approval-request', botName: BOT_NAME, plainText: true });
740
744
  if (sent?.message_id) {
741
745
  approvals.setApproverMsgId(row.id, sent.message_id);
742
746
  }
@@ -1106,46 +1110,77 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1106
1110
  });
1107
1111
 
1108
1112
  const prompt = formatPrompt(msg, sessionCtx, downloaded);
1109
- const stopTyping = startTyping(bot, chatId, threadId);
1113
+ const stopTyping = startTyping({
1114
+ bot, chatId, threadId,
1115
+ logger: { error: (m) => console.error(`[${label}] ${m}`) },
1116
+ onEvent: (e) => dbWrite(() => db.logEvent(e.kind, {
1117
+ bot: BOT_NAME, chat_id: e.chat_id, ...(e.detail || {}),
1118
+ }), `log ${e.kind}`),
1119
+ });
1110
1120
 
1111
1121
  const botCfg = config.bot || {};
1112
- const streamEnabled = botCfg.streamReplies === true;
1113
1122
  const outMetaBase = {
1114
- source: streamEnabled ? 'bot-reply-stream' : 'bot-reply',
1123
+ source: 'bot-reply-stream',
1115
1124
  botName: BOT_NAME,
1116
1125
  model: chatConfig.model,
1117
1126
  effort: chatConfig.effort,
1118
1127
  };
1119
1128
 
1120
- let streamer = null;
1121
- if (streamEnabled) {
1122
- streamer = createStreamer({
1123
- send: async (text) => tg(bot, 'sendMessage', {
1124
- chat_id: chatId, text,
1125
- reply_parameters: { message_id: msg.message_id },
1126
- ...(threadId && { message_thread_id: threadId }),
1127
- }, outMetaBase),
1128
- edit: async (messageId, text) => {
1129
- try {
1130
- return await bot.api.editMessageText(chatId, messageId, text);
1131
- } catch (err) {
1132
- // Stream-edit failures would otherwise be invisible — edits bypass
1133
- // tg() so there's no messages row reflecting the attempt. Log to
1134
- // events so stuck streams leave a forensic trail.
1135
- dbWrite(() => db.logEvent('telegram-edit-failed', {
1136
- chat_id: chatId, msg_id: messageId,
1137
- api_error: err.message?.slice(0, 200),
1138
- bot: BOT_NAME,
1139
- }), 'log telegram-edit-failed');
1140
- throw err;
1141
- }
1142
- },
1143
- minChars: botCfg.streamMinChars,
1144
- throttleMs: botCfg.streamThrottleMs,
1145
- logger: { error: (m) => console.error(`[${label}] ${m}`) },
1146
- });
1147
- streamers.set(sessionKey, streamer);
1148
- }
1129
+ // Streaming is unconditional as of 0.4.0 — matches OpenClaw's model and
1130
+ // eliminates the "stuck at 15min typing" complaint from the non-streaming
1131
+ // code path. For short responses the streamer stays idle and we fall
1132
+ // through to the normal send path via finalize() returning streamed=false.
1133
+ const streamer = createStreamer({
1134
+ send: async (text) => tg(bot, 'sendMessage', {
1135
+ chat_id: chatId, text,
1136
+ // allow_sending_without_reply: long-running turns give the user
1137
+ // plenty of time to delete their original message. Without this
1138
+ // flag, Telegram rejects the reply with MESSAGE_NOT_FOUND and the
1139
+ // whole streamed answer is lost. With it, the reply simply lands
1140
+ // as a standalone message.
1141
+ reply_parameters: { message_id: msg.message_id, allow_sending_without_reply: true },
1142
+ ...(threadId && { message_thread_id: threadId }),
1143
+ }, outMetaBase),
1144
+ edit: async (messageId, text) => {
1145
+ try {
1146
+ return await bot.api.editMessageText(chatId, messageId, text);
1147
+ } catch (err) {
1148
+ // Stream-edit failures would otherwise be invisible — edits bypass
1149
+ // tg() so there's no messages row reflecting the attempt. Log to
1150
+ // events so stuck streams leave a forensic trail.
1151
+ dbWrite(() => db.logEvent('telegram-edit-failed', {
1152
+ chat_id: chatId, msg_id: messageId,
1153
+ api_error: err.message?.slice(0, 200),
1154
+ bot: BOT_NAME,
1155
+ }), 'log telegram-edit-failed');
1156
+ throw err;
1157
+ }
1158
+ },
1159
+ minChars: botCfg.streamMinChars,
1160
+ throttleMs: botCfg.streamThrottleMs,
1161
+ logger: { error: (m) => console.error(`[${label}] ${m}`) },
1162
+ });
1163
+ streamers.set(sessionKey, streamer);
1164
+
1165
+ // Status reactions on the user's message: 👀 queued → 🤔 thinking →
1166
+ // 👨‍💻 coding / ⚡ web / 🔥 tool → 👍 done / 🤯 error. Silent (no
1167
+ // notifications), updates in place, one emoji per message. Uses
1168
+ // setMessageReaction which skips the DB row (the tg() wrapper
1169
+ // short-circuits that method), so no transcript spam.
1170
+ const reactor = createReactionManager({
1171
+ apply: async (emoji) => {
1172
+ const params = {
1173
+ chat_id: chatId,
1174
+ message_id: msg.message_id,
1175
+ reaction: emoji ? [{ type: 'emoji', emoji }] : [],
1176
+ };
1177
+ await tg(bot, 'setMessageReaction', params,
1178
+ { source: 'status-reaction', botName: BOT_NAME });
1179
+ },
1180
+ logError: (m) => console.error(`[${label}] ${m}`),
1181
+ });
1182
+ reactors.set(sessionKey, reactor);
1183
+ reactor.setState('THINKING');
1149
1184
 
1150
1185
  try {
1151
1186
  const result = await sendToProcess(sessionKey, prompt);
@@ -1155,7 +1190,10 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1155
1190
 
1156
1191
  if (result.error) {
1157
1192
  console.error(`[${label}] Error (${elapsed}s):`, result.error);
1193
+ reactor.setState('ERROR');
1158
1194
  if (!result.text) return;
1195
+ } else {
1196
+ reactor.setState('DONE');
1159
1197
  }
1160
1198
 
1161
1199
  if (!result.text || result.text === 'NO_REPLY') return;
@@ -1165,7 +1203,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1165
1203
 
1166
1204
  // Streamed text path: finalise the live-edit and, if the full response
1167
1205
  // overflows Telegram's 4096 cap, send remainder as follow-up chunks.
1168
- if (streamer && parsed.text) {
1206
+ if (parsed.text) {
1169
1207
  const fin = await streamer.finalize(parsed.text);
1170
1208
  if (fin.streamed) {
1171
1209
  if (parsed.text.length > TG_MAX_LEN) {
@@ -1221,14 +1259,24 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1221
1259
 
1222
1260
  console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
1223
1261
  } catch (err) {
1224
- if (streamer) {
1225
- // Generic suffix err.message can leak internal paths/state.
1226
- await streamer.finalize('', { errorSuffix: 'stream interrupted' }).catch(() => {});
1262
+ // Generic suffix — err.message can leak internal paths/state.
1263
+ await streamer.finalize('', { errorSuffix: 'stream interrupted' }).catch(() => {});
1264
+ // Signal the failure to the user's message reaction. Timeout gets its
1265
+ // own face; anything else is generic error.
1266
+ if (/wall-clock ceiling|idle with no Claude activity/i.test(err?.message || '')) {
1267
+ reactor.setState('TIMEOUT');
1268
+ } else {
1269
+ reactor.setState('ERROR');
1227
1270
  }
1228
1271
  throw err;
1229
1272
  } finally {
1230
1273
  stopTyping();
1231
- if (streamer) streamers.delete(sessionKey);
1274
+ streamers.delete(sessionKey);
1275
+ // Give the reactor a beat to flush the terminal state (DONE/ERROR/TIMEOUT
1276
+ // bypass throttle so this is instant in practice; the stop() below
1277
+ // guards against any late transition leaking after the turn ends).
1278
+ reactor.stop();
1279
+ reactors.delete(sessionKey);
1232
1280
  }
1233
1281
  }
1234
1282
 
@@ -1390,6 +1438,36 @@ function createBot(token) {
1390
1438
  const rawText = ctx.message.text || '';
1391
1439
  const cleanText = mentionRe ? rawText.replace(mentionRe, '').trim() : rawText.trim();
1392
1440
 
1441
+ // Abort: skip the queue entirely. Matches bilingual natural-language
1442
+ // cues ("stop" / "стоп" / "cancel" / "отмена" / …) and explicit
1443
+ // slash commands (/stop, /abort, /cancel). Kills the active Claude
1444
+ // subprocess and drains queued messages for this chat. Replies so
1445
+ // the user sees the bot heard them — silent abort is worse than
1446
+ // acknowledged abort.
1447
+ if (isAbortRequest(cleanText)) {
1448
+ const threadId = ctx.message.message_thread_id?.toString();
1449
+ const sessionKey = getSessionKey(chatId, threadId, chatConfig);
1450
+ const hadActive = pm.has(sessionKey) && !!pm.get(sessionKey)?.inFlight;
1451
+ const dropped = drainQueuesForChat(chatId);
1452
+ await pm.killChat(chatId).catch(() => {});
1453
+ dbWrite(() => db.logEvent('abort-requested', {
1454
+ chat_id: chatId, user_id: ctx.message.from?.id || null,
1455
+ had_active: hadActive, queued_dropped: dropped,
1456
+ trigger: cleanText.slice(0, 40),
1457
+ }), 'log abort-requested');
1458
+ const reply = hadActive || dropped
1459
+ ? (dropped ? `Остановлено. Очередь очищена (${dropped}).` : 'Остановлено.')
1460
+ : 'Нечего останавливать.';
1461
+ try {
1462
+ await tg(bot, 'sendMessage', {
1463
+ chat_id: chatId, text: reply,
1464
+ reply_parameters: { message_id: ctx.message.message_id, allow_sending_without_reply: true },
1465
+ ...(threadId && { message_thread_id: threadId }),
1466
+ }, { source: 'abort-ack', botName: BOT_NAME });
1467
+ } catch {}
1468
+ return;
1469
+ }
1470
+
1393
1471
  const botAllowsCommands = !!config.bot?.allowConfigCommands;
1394
1472
  const isAdminCmd = botAllowsCommands && ADMIN_CMD_RE.test(cleanText);
1395
1473
  const isPairClaim = PAIR_CLAIM_RE.test(cleanText);
@@ -1508,7 +1586,21 @@ async function pollBot(bot) {
1508
1586
 
1509
1587
  await bot.api.deleteWebhook();
1510
1588
 
1589
+ // Restore polling offset from DB so a restart doesn't re-process the
1590
+ // backlog Telegram has accumulated while we were down. Grammy's in-memory
1591
+ // offset resets to 0 each boot, which makes getUpdates return every
1592
+ // un-confirmed update since the last ack — for an overnight outage that
1593
+ // can mean replaying dozens of stale messages.
1511
1594
  let offset = 0;
1595
+ try {
1596
+ const saved = db?.getPollingOffset?.(BOT_NAME);
1597
+ if (saved && saved > 0) {
1598
+ offset = saved + 1;
1599
+ console.log(`[${BOT_NAME}] resuming polling from update_id ${saved}`);
1600
+ }
1601
+ } catch (err) {
1602
+ console.error(`[${BOT_NAME}] getPollingOffset failed: ${err.message}`);
1603
+ }
1512
1604
  let running = true;
1513
1605
  bot._lastPollTs = Date.now();
1514
1606
 
@@ -1545,6 +1637,13 @@ async function pollBot(bot) {
1545
1637
  console.error(`[${BOT_NAME}] Handler error:`, err.message);
1546
1638
  }
1547
1639
  }
1640
+ // Persist offset after batch dispatch so a crash mid-batch only risks
1641
+ // re-processing the unacked updates. We write only on non-empty batches
1642
+ // to avoid churning the row on every 25s idle poll.
1643
+ if (updates.length > 0) {
1644
+ dbWrite(() => db.savePollingOffset(BOT_NAME, updates[updates.length - 1].update_id),
1645
+ 'save polling offset');
1646
+ }
1548
1647
  // No sleep on the success path: long-poll already blocks up to 25s
1549
1648
  // when idle. Sleeping here would add latency with no gain.
1550
1649
  } catch (err) {
@@ -1666,6 +1765,10 @@ async function main() {
1666
1765
  const s = streamers.get(sessionKey);
1667
1766
  if (s) s.onChunk(partial).catch(() => {});
1668
1767
  },
1768
+ onToolUse: (sessionKey, toolName) => {
1769
+ const r = reactors.get(sessionKey);
1770
+ if (r) r.setState(classifyToolName(toolName));
1771
+ },
1669
1772
  });
1670
1773
 
1671
1774
  console.log(`polygram (LRU cap=${cap}, SQLite source of truth)`);