polygram 0.8.0 → 0.9.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/lib/{agent-loader.js → agents/loader.js} +6 -8
  3. package/lib/{approvals.js → approvals/store.js} +28 -5
  4. package/lib/{approval-ui.js → approvals/ui.js} +1 -17
  5. package/lib/config.js +121 -0
  6. package/lib/{error-classify.js → error/classify.js} +25 -34
  7. package/lib/handlers/abort.js +89 -0
  8. package/lib/handlers/approvals.js +361 -0
  9. package/lib/handlers/autosteer.js +94 -0
  10. package/lib/handlers/config-callback.js +118 -0
  11. package/lib/handlers/config-ui.js +104 -0
  12. package/lib/handlers/dispatcher.js +263 -0
  13. package/lib/handlers/download.js +182 -0
  14. package/lib/handlers/extract-attachments.js +97 -0
  15. package/lib/handlers/ipc-send.js +80 -0
  16. package/lib/handlers/poll.js +140 -0
  17. package/lib/handlers/record-inbound.js +88 -0
  18. package/lib/handlers/slash-commands.js +319 -0
  19. package/lib/handlers/voice.js +107 -0
  20. package/lib/pm-interface.js +27 -29
  21. package/lib/sdk/build-options.js +177 -0
  22. package/lib/sdk/callbacks.js +213 -0
  23. package/lib/{process-manager-sdk.js → sdk/process-manager.js} +19 -31
  24. package/lib/{telegram.js → telegram/api.js} +2 -2
  25. package/lib/{telegram-prompt.js → telegram/display-hint.js} +0 -14
  26. package/lib/{stream-reply.js → telegram/streamer.js} +4 -4
  27. package/package.json +2 -3
  28. package/polygram.js +347 -2581
  29. package/scripts/doctor.js +1 -1
  30. package/scripts/ipc-smoke.js +1 -10
  31. package/bin/approval-hook.js +0 -113
  32. package/lib/approval-waiters.js +0 -201
  33. package/lib/pm-router.js +0 -201
  34. package/lib/process-manager.js +0 -806
  35. /package/lib/{auto-resume.js → db/auto-resume.js} +0 -0
  36. /package/lib/{inbox.js → db/inbox.js} +0 -0
  37. /package/lib/{pairings.js → db/pairings.js} +0 -0
  38. /package/lib/{replay-window.js → db/replay-window.js} +0 -0
  39. /package/lib/{sent-cache.js → db/sent-cache.js} +0 -0
  40. /package/lib/{sessions.js → db/sessions.js} +0 -0
  41. /package/lib/{net-errors.js → error/net.js} +0 -0
  42. /package/lib/{ipc-client.js → ipc/client.js} +0 -0
  43. /package/lib/{ipc-file-validator.js → ipc/file-validator.js} +0 -0
  44. /package/lib/{ipc-server.js → ipc/server.js} +0 -0
  45. /package/lib/{telegram-chunk.js → telegram/chunk.js} +0 -0
  46. /package/lib/{deliver.js → telegram/deliver.js} +0 -0
  47. /package/lib/{telegram-format.js → telegram/format.js} +0 -0
  48. /package/lib/{parse-response.js → telegram/parse.js} +0 -0
  49. /package/lib/{status-reactions.js → telegram/reactions.js} +0 -0
  50. /package/lib/{typing-indicator.js → telegram/typing.js} +0 -0
  51. /package/lib/{voice.js → telegram/voice.js} +0 -0
package/polygram.js CHANGED
@@ -2,79 +2,90 @@
2
2
  /**
3
3
  * Telegram Bridge for Claude Code — Persistent Sessions
4
4
  *
5
- * Each chat gets a persistent claude process (stream-json multi-turn).
6
- * Process stays warm: no cold start, full prompt caching.
5
+ * Each chat gets a long-lived `@anthropic-ai/claude-agent-sdk` Query
6
+ * held warm in lib/process-manager-sdk.js. No cold start; full prompt
7
+ * caching across turns.
7
8
  *
8
9
  * Architecture:
9
10
  * Telegram (grammy long-poll) → polygram receives message
10
11
  * → looks up per-chat config (model, effort, agent, cwd)
11
- * → sends to persistent claude process via stdin (stream-json)
12
- * → reads response from stdout (stream-json)
13
- * → sends reply to Telegram
12
+ * → routes to the per-chat Query via pm.send / pm.injectUserMessage
13
+ * → streams the assistant reply back to the chat live
14
14
  * → writes every in/out message to per-bot SQLite (source of truth)
15
15
  *
16
- * Chat commands: /model <model>, /effort <level>, /config
16
+ * Chat commands: /model, /effort, /config, /context, /compact,
17
+ * /new, /reset, /reload, /agent, /stop.
17
18
  */
18
19
 
20
+ 'use strict';
21
+
19
22
  const { Bot } = require('grammy');
20
- const { spawn } = require('child_process');
21
23
  const fs = require('fs');
22
24
  const path = require('path');
23
25
  const processGuard = require('./lib/process-guard');
24
26
  const dbClient = require('./lib/db');
25
- const { migrateJsonToDb, getClaudeSessionId } = require('./lib/sessions');
27
+ const { migrateJsonToDb, getClaudeSessionId } = require('./lib/db/sessions');
26
28
  const { buildPrompt } = require('./lib/prompt');
27
- const { filterAttachments, MAX_FILE_BYTES } = require('./lib/attachments');
28
- const { ProcessManager } = require('./lib/process-manager');
29
- // 0.8.0 Phase 3: SDK-backed pm available behind POLYGRAM_USE_SDK=1.
29
+ const { filterAttachments } = require('./lib/attachments');
30
+ // 0.9.0: SDK ProcessManager is the only pm. CLI pm
31
+ // (lib/process-manager.js) deleted in commit 6.
30
32
  // Both implementations expose the same public API (constructor +
31
33
  // callbacks), so the rest of polygram.js doesn't branch beyond the
32
34
  // pick-at-startup. Phase 4 deletes the CLI version after Phase 5
33
35
  // soak proves SDK stable. See docs/0.8.0-architecture-decisions.md.
34
- const { ProcessManagerSdk, extractAssistantText } = require('./lib/process-manager-sdk');
36
+ const { ProcessManagerSdk, extractAssistantText } = require('./lib/sdk/process-manager');
35
37
  // rc.42: autosteer-buffer module deleted. Native SDK priority push
36
38
  // (pm.injectUserMessage) replaces the buffer + PostToolBatch detour.
37
39
  const { createAutosteeredRefs } = require('./lib/autosteered-refs');
38
- const { makeRouterPolicy, createPmRouter } = require('./lib/pm-router');
40
+ const { createBuildSdkOptions } = require('./lib/sdk/build-options');
41
+ const { createSdkCallbacks } = require('./lib/sdk/callbacks');
42
+ const { createTranscribeVoiceAttachments } = require('./lib/handlers/voice');
43
+ const { createDownloadAttachments } = require('./lib/handlers/download');
44
+ const { createHandleConfigCallback } = require('./lib/handlers/config-callback');
45
+ const { createHandleAbort } = require('./lib/handlers/abort');
46
+ const { createAutosteerHandlers } = require('./lib/handlers/autosteer');
47
+ const { createSlashCommands } = require('./lib/handlers/slash-commands');
48
+ const { createApprovals } = require('./lib/handlers/approvals');
39
49
  const { canonicalizeToolInput } = require('./lib/canonical-json');
40
50
  const {
41
- buildApprovalKeyboard,
42
51
  buildApprovalKeyboardWithAlways,
43
52
  formatToolInputForCard,
44
53
  approvalCardText,
45
- } = require('./lib/approval-ui');
54
+ } = require('./lib/approvals/ui');
46
55
  const { buildHistoryBlock } = require('./lib/history-preload');
47
56
  const { formatContextReply, maybeContextFullHint } = require('./lib/context-format');
48
- const { appendDisplayHint, appendDisplayHintCliArgs } = require('./lib/telegram-prompt');
57
+ // appendDisplayHint moved with buildSdkOptions extraction (commit 18)
58
+ // only consumer of that import is now lib/sdk/build-options.js itself.
49
59
  const { createAbortGrace } = require('./lib/abort-grace');
50
- const agentLoader = require('./lib/agent-loader');
51
- const USE_SDK = process.env.POLYGRAM_USE_SDK === '1';
52
- const { createSender } = require('./lib/telegram');
60
+ const agentLoader = require('./lib/agents/loader');
61
+ const { createSender } = require('./lib/telegram/api');
53
62
  const { createAsyncLock } = require('./lib/async-lock');
54
- const { sweepInbox } = require('./lib/inbox');
63
+ const { sweepInbox } = require('./lib/db/inbox');
55
64
  const { parseBotArg, parseDbArg, filterConfigToBot } = require('./lib/config-scope');
56
- const { createStore: createPairingsStore, parseTtl: parsePairingTtl } = require('./lib/pairings');
57
- const { transcribe: transcribeVoice, isVoiceAttachment } = require('./lib/voice');
58
- const { createStreamer } = require('./lib/stream-reply');
59
- const { chunkMarkdownText } = require('./lib/telegram-chunk');
60
- const { deliverReplies } = require('./lib/deliver');
65
+ const { createStore: createPairingsStore, parseTtl: parsePairingTtl } = require('./lib/db/pairings');
66
+ const { transcribe: transcribeVoice, isVoiceAttachment } = require('./lib/telegram/voice');
67
+ const { createStreamer } = require('./lib/telegram/streamer');
68
+ const { chunkMarkdownText } = require('./lib/telegram/chunk');
69
+ const { deliverReplies } = require('./lib/telegram/deliver');
61
70
  const { announce, shouldAnnounce } = require('./lib/announces');
62
71
  const { isAbortRequest } = require('./lib/abort-detector');
63
- const { startTyping } = require('./lib/typing-indicator');
64
- const { redactBotToken } = require('./lib/net-errors');
65
- const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
72
+ const { startTyping } = require('./lib/telegram/typing');
73
+ // redactBotToken moved with download extraction (commit 21) — only
74
+ // consumer is lib/handlers/download.js.
75
+ const { createReactionManager, classifyToolName } = require('./lib/telegram/reactions');
66
76
  const { createMediaGroupBuffer } = require('./lib/media-group-buffer');
67
- const { classify: classifyError, isTransientHttpError } = require('./lib/error-classify');
68
- const { createAutoResumeTracker, isAutoResumable } = require('./lib/auto-resume');
69
- const { resolveReplayWindowMs } = require('./lib/replay-window');
70
- const { validateIpcFileParam } = require('./lib/ipc-file-validator');
77
+ const { classify: classifyError, isTransientHttpError } = require('./lib/error/classify');
78
+ const { createAutoResumeTracker, isAutoResumable } = require('./lib/db/auto-resume');
79
+ const { resolveReplayWindowMs } = require('./lib/db/replay-window');
80
+ // validateIpcFileParam moved with handleSendOverIpc to
81
+ // lib/handlers/ipc-send.js (commit 36).
71
82
  const {
72
83
  createStore: createApprovalsStore,
73
84
  matchesAnyPattern: matchesApprovalPattern,
74
85
  tokensEqual: approvalTokensEqual,
75
86
  DEFAULT_TIMEOUT_MS: APPROVAL_DEFAULT_TIMEOUT_MS,
76
- } = require('./lib/approvals');
77
- const ipcServer = require('./lib/ipc-server');
87
+ } = require('./lib/approvals/store');
88
+ const ipcServer = require('./lib/ipc/server');
78
89
 
79
90
  // ─── Config ──────────────────────────────────────────────────────────
80
91
  //
@@ -107,7 +118,7 @@ let db;
107
118
  let tg; // unified sender, created after db opens
108
119
  let pairings; // pairings store, created after db opens
109
120
  let approvals; // approvals store, created after db opens
110
- let approvalWaiters = new Map(); // approval_id -> { resolve, reject, timer }
121
+ // approvalWaiters Map moved to lib/handlers/approvals.js (commit 29).
111
122
  let approvalSweepTimer = null;
112
123
  let ipcCloser = null;
113
124
  // BOT_NAME and bot are set once in main() after filterConfigToBot. Because
@@ -124,79 +135,24 @@ let bot = null; // grammy Bot for BOT_NAME
124
135
  // Allowlist of env var names passed through to spawned Claude processes.
125
136
  // Anything not listed here is dropped to prevent leaked secrets/ssh agents
126
137
  // from being read by a prompt-injected child. Prefixes match any var whose
127
- // name starts with that string.
128
- const CHILD_ENV_ALLOWLIST = new Set([
129
- 'PATH', 'HOME', 'USER', 'LOGNAME', 'SHELL', 'TERM', 'COLORTERM',
130
- 'TMPDIR', 'TMP', 'TEMP', 'TZ', 'LANG', 'PWD', 'SHLVL',
131
- ]);
132
- const CHILD_ENV_PREFIXES = ['LC_', 'NODE_', 'CLAUDE_', 'ANTHROPIC_'];
133
-
134
- function filterEnv(src) {
135
- const out = {};
136
- for (const [k, v] of Object.entries(src)) {
137
- if (CHILD_ENV_ALLOWLIST.has(k) || CHILD_ENV_PREFIXES.some((p) => k.startsWith(p))) {
138
- out[k] = v;
139
- }
140
- }
141
- return out;
142
- }
138
+ // name starts with that string. (filterEnv + lists moved to
139
+ // lib/sdk/build-options.js with the buildSdkOptions extraction.)
143
140
 
144
- function loadConfig() {
145
- config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
146
- }
141
+ // Config helpers extracted to lib/config.js. Thin wrappers below
142
+ // keep the existing call shape — polygram.js owns the module-level
143
+ // `config` / `stickerMap` / `emojiToSticker` and assigns from the
144
+ // pure I/O functions.
145
+ const configIO = require('./lib/config');
146
+ const { isWellFormedMessage, isWellFormedCallbackQuery } = configIO;
147
147
 
148
+ function loadConfig() { config = configIO.loadConfig(CONFIG_PATH); }
148
149
  function saveConfig() {
149
- // Atomic read-merge-write. In-memory `config` is FILTERED (only one bot +
150
- // its chats) because filterConfigToBot narrowed it at boot. Writing it
151
- // back directly would clobber every OTHER bot's section on disk. Instead:
152
- // read the current on-disk config fresh, apply our bot-scoped changes,
153
- // write the merged result. This is safe against parallel writers because
154
- // each bot only mutates entries inside its own scope (its bot entry + its
155
- // own chats), and we use rename for atomicity.
156
- const onDisk = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
157
-
158
- // Apply our bot's changes.
159
- if (BOT_NAME && config.bots?.[BOT_NAME]) {
160
- onDisk.bots = onDisk.bots || {};
161
- onDisk.bots[BOT_NAME] = config.bots[BOT_NAME];
162
- }
163
- // Apply chat changes — only chats the filter left in our memory belong
164
- // to this bot; overwrite those keys, leave the rest of onDisk.chats alone.
165
- if (config.chats) {
166
- onDisk.chats = onDisk.chats || {};
167
- for (const [chatId, chat] of Object.entries(config.chats)) {
168
- onDisk.chats[chatId] = chat;
169
- }
170
- }
171
- // Top-level non-bot-scoped fields (defaults, maxWarmProcesses, etc.)
172
- // reflect ops-wide policy. Only copy if our in-memory value is newer —
173
- // but detecting that is hard; simplest safe rule is: don't touch them
174
- // from a bot-scoped process. Leave onDisk's values as-is.
175
-
176
- const tmp = `${CONFIG_PATH}.tmp.${process.pid}`;
177
- fs.writeFileSync(tmp, JSON.stringify(onDisk, null, 2));
178
- fs.renameSync(tmp, CONFIG_PATH);
150
+ configIO.saveConfig({ configPath: CONFIG_PATH, botName: BOT_NAME, config });
179
151
  }
180
-
181
152
  function loadStickers() {
182
- try {
183
- const data = JSON.parse(fs.readFileSync(STICKERS_PATH, 'utf8'));
184
- for (const [name, s] of Object.entries(data.stickers || {})) {
185
- stickerMap[name] = s.file_id;
186
- if (s.emoji) emojiToSticker[s.emoji] = s.file_id;
187
- }
188
- console.log(`Stickers: ${Object.keys(stickerMap).join(', ')}`);
189
- } catch { console.log('No sticker map found'); }
190
- }
191
-
192
- // Quick shape check before we start hashing/DB-writing on an update.
193
- // Telegram updates are user-controlled; a hostile or malformed payload
194
- // without chat.id / message_id would throw deep in recordInbound.
195
- function isWellFormedMessage(msg) {
196
- return !!(msg
197
- && msg.chat
198
- && (typeof msg.chat.id === 'number' || typeof msg.chat.id === 'bigint')
199
- && typeof msg.message_id === 'number');
153
+ const { stickerMap: m, emojiToSticker: e } = configIO.loadStickers(STICKERS_PATH);
154
+ Object.assign(stickerMap, m);
155
+ Object.assign(emojiToSticker, e);
200
156
  }
201
157
 
202
158
  // ─── Session key — moved to lib/session-key.js so tests can import it. ─
@@ -286,405 +242,27 @@ function logEvent(kind, detail) {
286
242
  dbWrite(() => db.logEvent(kind, detail), `log ${kind}`);
287
243
  }
288
244
 
289
- function recordInbound(msg) {
290
- // 0.6.4 wrapped the body in db.raw.transaction(...) for atomicity, but
291
- // the wrap itself runs at call time — before dbWrite's null-db guard
292
- // kicks in. A late-arriving inbound during shutdown (after db.raw.close())
293
- // would TypeError and unhandled-reject grammy's update handler. Restore
294
- // best-effort semantics with an explicit early-out.
295
- if (!db) return;
296
- const chatId = msg.chat.id.toString();
297
- const threadId = msg.message_thread_id?.toString() || null;
298
- const user = msg.from?.first_name || msg.from?.username || null;
299
- const attachments = extractAttachments(msg);
300
- const chatConfig = config.chats[chatId];
301
- const ts = (msg.date || Math.floor(Date.now() / 1000)) * 1000;
302
-
303
- // Atomic message + attachments write: all-or-nothing so a half-applied
304
- // record can never leave a message row with zero (or partial) attachment
305
- // rows that boot replay would silently treat as "no media." Wrapping in
306
- // db.raw.transaction also collapses the message + N-attachment fsyncs
307
- // into one commit (perf win for media groups: 7-attachment albums go
308
- // from 8 sync writes to 1).
309
- const writeInbound = db.raw.transaction(() => {
310
- db.insertMessage({
311
- chat_id: chatId,
312
- thread_id: threadId,
313
- msg_id: msg.message_id,
314
- user,
315
- user_id: msg.from?.id || null,
316
- text: msg.text || msg.caption || '',
317
- reply_to_id: msg.reply_to_message?.message_id || null,
318
- direction: 'in',
319
- source: 'polygram',
320
- bot_name: BOT_NAME,
321
- model: chatConfig?.model || null,
322
- effort: chatConfig?.effort || null,
323
- ts,
324
- });
325
-
326
- if (!attachments.length) return;
327
- // Look up the just-inserted (or ON-CONFLICT-updated) message row id
328
- // so attachments can FK to it. lastInsertRowid is unreliable across
329
- // the upsert path; an explicit lookup is cheap and always correct.
330
- const messageId = db.getInboundMessageId({ chat_id: chatId, msg_id: msg.message_id });
331
- if (!messageId) return;
332
- // Edit-safe insert: Telegram edited_message events re-fire
333
- // recordInbound with the same (chat_id, msg_id). polygram doesn't
334
- // currently handle media-edit cases (Bot API does support
335
- // editMessageMedia, but we don't process it specially — the typical
336
- // edit is text/caption). If rows already exist for this message_id
337
- // they're correct as-is — re-inserting would (a) duplicate them,
338
- // (b) reset download_status back to 'pending' and lose the
339
- // local_path we already fetched. If we add media-edit support
340
- // later, this guard needs to compare file_unique_id and replace
341
- // selectively rather than skipping wholesale.
342
- if (db.getAttachmentsByMessage(messageId).length > 0) return;
343
- for (const att of attachments) {
344
- db.insertAttachment({
345
- message_id: messageId,
346
- chat_id: chatId,
347
- msg_id: msg.message_id,
348
- thread_id: threadId,
349
- bot_name: BOT_NAME,
350
- file_id: att.file_id,
351
- file_unique_id: att.file_unique_id,
352
- kind: att.kind,
353
- name: att.name,
354
- mime_type: att.mime_type,
355
- size_bytes: att.size,
356
- ts,
357
- });
358
- }
359
- });
360
-
361
- dbWrite(() => writeInbound(), `insert inbound ${chatId}/${msg.message_id}`);
362
- }
363
-
364
-
365
- // ─── Attachment download ────────────────────────────────────────────
366
-
367
- function sanitizeFilename(name) {
368
- if (!name) return 'file';
369
- return name.replace(/[\/\\:\0]/g, '_').slice(0, 120);
370
- }
371
-
372
- // Short, filesystem-safe handle from the file_unique_id for auto-gen names.
373
- // Telegram guarantees file_unique_id is stable per file across sessions; 8
374
- // chars is collision-safe within a chat (~48 bits) and short enough to read.
375
- // Falls back to msg.message_id for the rare case where file_unique_id is
376
- // unset (very old Bot API rows). The pre-0.6.15 pattern embedded message_id
377
- // directly, which then went stale after media-group reassignment rewrote
378
- // the row's msg_id → name and msg_id disagreed about which Telegram message
379
- // the file came from. Using file_unique_id makes the name stable across
380
- // reassignment.
381
- function shortFileTag(fileUniqueId, fallback) {
382
- if (fileUniqueId) {
383
- return String(fileUniqueId).replace(/[^A-Za-z0-9_-]/g, '').slice(0, 8) || String(fallback);
384
- }
385
- return String(fallback);
386
- }
387
-
388
- function extractAttachments(msg) {
389
- // Media-group bundling path: when we synthesised a single message from
390
- // several siblings sharing a media_group_id, the merged attachment list
391
- // was pre-computed in `_mergedAttachments`. Return it directly instead
392
- // of running the per-field extraction against the primary message.
393
- if (Array.isArray(msg._mergedAttachments)) return msg._mergedAttachments;
394
-
395
- const items = [];
396
- if (msg.document) {
397
- const d = msg.document;
398
- items.push({
399
- file_id: d.file_id,
400
- file_unique_id: d.file_unique_id,
401
- name: d.file_name || `document-${shortFileTag(d.file_unique_id, msg.message_id)}`,
402
- mime_type: d.mime_type || 'application/octet-stream',
403
- size: d.file_size || 0,
404
- kind: 'document',
405
- });
406
- }
407
- if (msg.photo && msg.photo.length > 0) {
408
- const largest = msg.photo[msg.photo.length - 1];
409
- items.push({
410
- file_id: largest.file_id,
411
- file_unique_id: largest.file_unique_id,
412
- name: `photo-${shortFileTag(largest.file_unique_id, msg.message_id)}.jpg`,
413
- mime_type: 'image/jpeg',
414
- size: largest.file_size || 0,
415
- kind: 'photo',
416
- });
417
- }
418
- if (msg.voice) {
419
- items.push({
420
- file_id: msg.voice.file_id,
421
- file_unique_id: msg.voice.file_unique_id,
422
- name: `voice-${shortFileTag(msg.voice.file_unique_id, msg.message_id)}.ogg`,
423
- mime_type: msg.voice.mime_type || 'audio/ogg',
424
- size: msg.voice.file_size || 0,
425
- kind: 'voice',
426
- });
427
- }
428
- if (msg.audio) {
429
- const a = msg.audio;
430
- items.push({
431
- file_id: a.file_id,
432
- file_unique_id: a.file_unique_id,
433
- name: a.file_name || `audio-${shortFileTag(a.file_unique_id, msg.message_id)}.mp3`,
434
- mime_type: a.mime_type || 'audio/mpeg',
435
- size: a.file_size || 0,
436
- kind: 'audio',
437
- });
438
- }
439
- if (msg.video) {
440
- const v = msg.video;
441
- items.push({
442
- file_id: v.file_id,
443
- file_unique_id: v.file_unique_id,
444
- name: v.file_name || `video-${shortFileTag(v.file_unique_id, msg.message_id)}.mp4`,
445
- mime_type: v.mime_type || 'video/mp4',
446
- size: v.file_size || 0,
447
- kind: 'video',
448
- });
449
- }
450
- return items;
451
- }
245
+ // recordInbound extracted to lib/handlers/record-inbound.js. Wired
246
+ // in main() once db + config + extractAttachments are available.
247
+ let recordInbound = null;
452
248
 
453
- async function transcribeVoiceAttachments(downloaded, { chatId, msgId, label, botApi, threadId }) {
454
- const voiceCfg = config.bot?.voice || config.voice;
455
- if (!voiceCfg?.enabled) return { ackEmitted: false };
456
- const provider = voiceCfg.provider || 'openai';
457
- const providerCfg = voiceCfg[provider] || {};
458
- const targets = downloaded.filter((a) => isVoiceAttachment(a) && a.path);
459
- if (!targets.length) return { ackEmitted: false };
460
-
461
- // Acknowledge receipt with a reaction so the user knows we heard them.
462
- // Cheap, robust (no state), and survives transcription failure.
463
- // 0.7.4 (item G): we report `ackEmitted: true` so the caller can skip
464
- // the reactor's QUEUED → 👀 transition. Pre-fix, 👂 was visible for
465
- // ~milliseconds before 👀 overwrote it on the same message — wasted
466
- // API call and confusing flicker. Now 👂 stays until Claude actually
467
- // starts work and the reactor flips to THINKING (🤔).
468
- const ack = voiceCfg.ackReaction || '👂';
469
- let ackEmitted = false;
470
- if (ack && botApi) {
471
- ackEmitted = true;
472
- tg(botApi, 'setMessageReaction', {
473
- chat_id: chatId, message_id: msgId,
474
- reaction: [{ type: 'emoji', emoji: ack }],
475
- }, { source: 'voice-ack', botName: BOT_NAME }).catch((err) => {
476
- console.error(`[${label}] voice ack reaction failed: ${err.message}`);
477
- });
478
- }
479
249
 
480
- await Promise.all(targets.map(async (a) => {
481
- try {
482
- const opts = {
483
- provider,
484
- ...providerCfg,
485
- language: voiceCfg.language || 'auto',
486
- maxDurationSec: voiceCfg.maxDurationSec,
487
- maxDurationBytesPerSec: voiceCfg.maxDurationBytesPerSec,
488
- };
489
- const r = await transcribeVoice(a.path, opts);
490
- a.transcription = r;
491
- console.log(`[${label}] transcribed ${a.kind} (${r.duration_sec?.toFixed?.(1) || '?'}s, ${r.text.length} chars)`);
492
- logEvent('voice-transcribed', {
493
- chat_id: chatId, msg_id: msgId,
494
- provider: r.provider, language: r.language,
495
- duration_sec: r.duration_sec, chars: r.text.length,
496
- cost_usd: r.cost_usd,
497
- });
498
- } catch (err) {
499
- console.error(`[${label}] transcribe failed for ${a.name}: ${err.message}`);
500
- logEvent('voice-transcribe-failed', {
501
- chat_id: chatId, msg_id: msgId, name: a.name, error: err.message,
502
- });
503
- }
504
- }));
250
+ // ─── Attachment extraction ──────────────────────────────────────────
251
+ // extractAttachments + shortFileTag live in lib/handlers/extract-attachments.js.
252
+ // sanitizeFilename moved with downloadAttachments to lib/handlers/download.js.
253
+ const { extractAttachments } = require('./lib/handlers/extract-attachments');
254
+ const { createRecordInbound } = require('./lib/handlers/record-inbound');
255
+ const { createHandleSendOverIpc } = require('./lib/handlers/ipc-send');
256
+ const { createDispatcher } = require('./lib/handlers/dispatcher');
257
+ const { createPollLoop } = require('./lib/handlers/poll');
505
258
 
506
- // Persist transcription:
507
- // - Per-attachment: setAttachmentTranscription stores the full
508
- // transcription object (text + language + duration + provider) as
509
- // JSON in the attachments.transcription column. buildVoiceTags
510
- // parses it back when building the prompt.
511
- // - Message-level: setMessageText updates messages.text with the
512
- // combined transcript so FTS finds "what Maria said" via the
513
- // normal chat search path.
514
- const successful = targets.filter((a) => a.transcription?.text);
515
- if (!successful.length) return { ackEmitted };
516
- for (const a of successful) {
517
- if (a.id != null) {
518
- dbWrite(() => db.setAttachmentTranscription(a.id, JSON.stringify(a.transcription)),
519
- `setAttachmentTranscription ${a.id}`);
520
- }
521
- }
522
- const combinedText = successful.map((a) => a.transcription.text).join(' ').trim();
523
- dbWrite(() => db.setMessageText({
524
- chat_id: chatId, msg_id: msgId, text: combinedText,
525
- }), 'persist voice transcription');
526
- return { ackEmitted };
527
- }
259
+ // transcribeVoiceAttachments extracted to lib/handlers/voice.js.
260
+ // Wired in main() once db + tg + config are available.
261
+ let transcribeVoiceAttachments = null;
528
262
 
529
- // Bounded concurrency for parallel fetches. A 10-photo album used to be
530
- // 10× per-photo latency (each `await fetch` was serial); now in-flight
531
- // downloads are capped to a small pool. Telegram's per-bot rate limit is
532
- // ~30 req/s, so 6 concurrent fetches is comfortably under and keeps the
533
- // happy path responsive without burning sockets on a 100-file edge case.
534
- // Override via `config.bot.attachmentConcurrency` (per-bot) for ops-time
535
- // rate-limit tuning.
536
- const ATTACHMENT_DOWNLOAD_CONCURRENCY_DEFAULT = 6;
537
- function attachmentConcurrency() {
538
- const v = Number(config.bot?.attachmentConcurrency);
539
- return (Number.isInteger(v) && v > 0) ? v : ATTACHMENT_DOWNLOAD_CONCURRENCY_DEFAULT;
540
- }
541
-
542
- // Per-attachment download. Pure function over (att, deps) → result. Pulled
543
- // out of the loop so downloadAttachments can run several in parallel.
544
- async function downloadOneAttachment(bot, token, chatId, msg, chatDir, att) {
545
- // Reuse path: row already says downloaded AND the file is on disk.
546
- if (att.download_status === 'downloaded' && att.local_path) {
547
- try {
548
- if (fs.statSync(att.local_path).size > 0) {
549
- return { ...att, path: att.local_path, size: att.size_bytes || 0, error: null };
550
- }
551
- } catch { /* fall through to refetch */ }
552
- }
553
- try {
554
- const fileInfo = await bot.api.getFile(att.file_id);
555
- if (!fileInfo?.file_path) throw new Error('no file_path from getFile');
556
- const url = `https://api.telegram.org/file/bot${token}/${fileInfo.file_path}`;
557
- const res = await fetch(url);
558
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
559
- // Three-layer size enforcement, in order of cheapness:
560
- // 1. Content-Length header — fail-fast before reading any body.
561
- // 2. Streaming chunk-by-chunk accumulation — abort the moment the
562
- // cumulative byte count crosses the cap. This is the layer that
563
- // protects against an attacker omitting Content-Length: pre-0.6.14
564
- // we read the whole `res.arrayBuffer()` into RAM first and only
565
- // then checked the size. With the per-bot ATTACHMENT_DOWNLOAD_
566
- // CONCURRENCY default of 6, six unbounded reads in flight could
567
- // pin arbitrary RSS — real OOM angle for a malicious upload.
568
- // 3. Final post-buffer check is now redundant but cheap; left as
569
- // defense-in-depth in case the streaming logic is ever changed.
570
- const cl = parseInt(res.headers.get('content-length') || '0', 10);
571
- if (cl > MAX_FILE_BYTES) {
572
- throw new Error(`content-length ${cl} exceeds per-file cap ${MAX_FILE_BYTES}`);
573
- }
574
- let total = 0;
575
- const chunks = [];
576
- if (res.body && typeof res.body.getReader === 'function') {
577
- const reader = res.body.getReader();
578
- while (true) {
579
- const { done, value } = await reader.read();
580
- if (done) break;
581
- total += value.byteLength;
582
- if (total > MAX_FILE_BYTES) {
583
- try { await reader.cancel(); } catch {}
584
- throw new Error(`stream ${total}+ bytes exceeds per-file cap ${MAX_FILE_BYTES}`);
585
- }
586
- chunks.push(value);
587
- }
588
- } else {
589
- // Fallback for runtimes without WHATWG streams (shouldn't fire on
590
- // Node 22+). Same arrayBuffer path as before, with the post-check.
591
- const ab = await res.arrayBuffer();
592
- if (ab.byteLength > MAX_FILE_BYTES) {
593
- throw new Error(`body ${ab.byteLength} bytes exceeds per-file cap ${MAX_FILE_BYTES}`);
594
- }
595
- chunks.push(new Uint8Array(ab));
596
- total = ab.byteLength;
597
- }
598
- const buf = Buffer.concat(chunks.map((c) => Buffer.from(c.buffer, c.byteOffset, c.byteLength)));
599
- if (buf.length > MAX_FILE_BYTES) {
600
- throw new Error(`body ${buf.length} bytes exceeds per-file cap ${MAX_FILE_BYTES}`);
601
- }
602
- const safeName = sanitizeFilename(att.name);
603
- // Embed file_unique_id so two attachments with the same msg_id+name
604
- // (album, resend) can't silently overwrite each other. Telegram
605
- // guarantees file_unique_id is stable and globally unique per file.
606
- const uniq = att.file_unique_id ? `-${att.file_unique_id}` : '';
607
- const localName = `${msg.message_id}${uniq}-${safeName}`;
608
- const localPath = path.join(chatDir, localName);
609
- // Atomic write: create a temp with the unique PID+timestamp suffix,
610
- // fill it, then rename to the canonical name. A crash mid-write leaves
611
- // a `.tmp.*` file (swept later) rather than a truncated canonical file
612
- // that the EEXIST dedup branch would happily serve on next request.
613
- if (fs.existsSync(localPath)) {
614
- console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (already on disk, reusing)`);
615
- } else {
616
- const tmpPath = `${localPath}.tmp.${process.pid}.${Date.now()}`;
617
- try {
618
- fs.writeFileSync(tmpPath, buf, { flag: 'wx' });
619
- fs.renameSync(tmpPath, localPath);
620
- } catch (e) {
621
- // Clean up stray tmp on any failure; if the rename fell through
622
- // because another process beat us, EEXIST on the target is fine.
623
- try { fs.unlinkSync(tmpPath); } catch {}
624
- if (e.code !== 'EEXIST') throw e;
625
- console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (race: already on disk)`);
626
- }
627
- }
628
- console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (${buf.length} bytes) → ${localPath}`);
629
- dbWrite(() => db.markAttachmentDownloaded(att.id, {
630
- local_path: localPath, size_bytes: att.size_bytes || buf.length,
631
- }), `markAttachmentDownloaded ${att.id}`);
632
- return { ...att, path: localPath, size: att.size_bytes || buf.length, error: null };
633
- } catch (err) {
634
- // Don't drop the attachment silently — push it through with the
635
- // failure noted. buildAttachmentTags renders this as
636
- // <attachment-failed reason="..." /> so claude tells the user
637
- // "I couldn't see your <kind>" instead of pretending it received
638
- // text only.
639
- //
640
- // Token redaction: the fetch URL embeds bot${TOKEN} (Telegram CDN
641
- // requirement) and some undici/network error variants stringify
642
- // the request including the URL into err.message. Persisting that
643
- // raw to attachments.download_error or stderr would leak the bot
644
- // token. 0.6.14: centralized in net-errors.redactBotToken which
645
- // also handles URL-encoded (%3A) variants and bare token shapes
646
- // missed by the original regex.
647
- const raw = (err.message || 'unknown').slice(0, 200);
648
- const reason = redactBotToken(raw);
649
- console.error(`[attach] download failed for ${att.name}: ${reason}`);
650
- dbWrite(() => db.markAttachmentFailed(att.id, reason),
651
- `markAttachmentFailed ${att.id}`);
652
- return { ...att, path: null, error: reason };
653
- }
654
- }
655
-
656
- // 0.6.0: takes attachment ROW objects from the DB (not raw extracted
657
- // metadata). Each row has an `id` so we can mark status as we go.
658
- // On replay: a row with status='downloaded' and a local_path that's
659
- // still on disk is reused without re-fetching. Anything else (failed,
660
- // missing file, never downloaded) hits Telegram's CDN.
661
- //
662
- // 0.6.7: parallel fetches with bounded concurrency. The inner work is
663
- // stateless per-attachment (only writes go to DB / disk via paths
664
- // keyed on file_unique_id, so two parallel downloads can't collide).
665
- // Order of `results` is preserved by writing into a fixed-size array
666
- // at the original index — important so the prompt sees attachments in
667
- // the same order the user sent them in an album.
668
- async function downloadAttachments(bot, token, chatId, msg, rows) {
669
- if (!rows.length) return [];
670
- const chatDir = path.join(INBOX_DIR, String(chatId));
671
- fs.mkdirSync(chatDir, { recursive: true });
672
-
673
- const results = new Array(rows.length);
674
- let cursor = 0;
675
- const workers = Array.from(
676
- { length: Math.min(attachmentConcurrency(), rows.length) },
677
- async () => {
678
- while (true) {
679
- const idx = cursor++;
680
- if (idx >= rows.length) return;
681
- results[idx] = await downloadOneAttachment(bot, token, chatId, msg, chatDir, rows[idx]);
682
- }
683
- },
684
- );
685
- await Promise.all(workers);
686
- return results;
687
- }
263
+ // downloadAttachments extracted to lib/handlers/download.js. Wired
264
+ // in main() once config + db + dbWrite + INBOX_DIR are available.
265
+ let downloadAttachments = null;
688
266
 
689
267
 
690
268
  // ─── Prompt formatting ──────────────────────────────────────────────
@@ -787,222 +365,11 @@ async function clearAutosteeredReactions(sessionKey) {
787
365
  return autosteeredRefs.clear(sessionKey);
788
366
  }
789
367
 
790
- function spawnClaude(sessionKey, ctx) {
791
- const { chatConfig, existingSessionId, label, chatId } = ctx;
792
- // 0.7.3: Claude Code's Chrome-extension integration (browser
793
- // automation via the "Claude in Chrome" extension) is OPT-IN and
794
- // NOT enabled by default in `claude`. Polygram lets chats turn it
795
- // on via `config.chats.<id>.chrome: true` (chat-level wins) or
796
- // `config.bot.chrome: true` (per-bot default). When opting in, the
797
- // extension must be installed in the daemon-user's Chrome and the
798
- // user must have a live Aqua session (so Chrome is running). Falls
799
- // back to --no-chrome for chats that don't opt in (matches our
800
- // pre-0.7.3 default — defensive against any "enabled by default"
801
- // that might have been set in claude's persistent state).
802
- const wantChrome = chatConfig.chrome != null
803
- ? chatConfig.chrome === true
804
- : config.bot?.chrome === true;
805
- const args = [
806
- '-p',
807
- '--input-format', 'stream-json',
808
- '--output-format', 'stream-json',
809
- '--verbose',
810
- '--model', chatConfig.model || config.defaults.model,
811
- '--effort', chatConfig.effort || config.defaults.effort,
812
- '--permission-mode', 'bypassPermissions',
813
- wantChrome ? '--chrome' : '--no-chrome',
814
- ];
815
- if (chatConfig.agent) args.push('--agent', chatConfig.agent);
816
- if (existingSessionId) args.push('--resume', existingSessionId);
817
- // Polygram-side display constraints — same hint the SDK pm appends
818
- // via Options.systemPrompt. Keeps the table-width rule in
819
- // infrastructure, not in agent docs.
820
- args.push(...appendDisplayHintCliArgs());
821
-
822
- console.log(`[${label}] Spawning process (${chatConfig.model}/${chatConfig.effort})`);
823
-
824
- // Scrub env to an allowlist: under bypassPermissions a prompt-injected
825
- // child can exfiltrate any env var, so we pass only what Claude Code and
826
- // normal shell tools need. TELEGRAM_BOT_TOKEN is opt-in per bot via
827
- // config.bot.needsToken — partner bots go through polygram for every
828
- // outbound message and never need direct API access.
829
- const botConfig = config.bot || {};
830
- const childEnv = filterEnv(process.env);
831
- childEnv.HOME = CHILD_HOME;
832
- childEnv.CLAUDE_CHANNEL_BOT = BOT_NAME;
833
- // Approval hook integration: the hook runs as a child of Claude and reads
834
- // these to route its IPC. POLYGRAM_TURN_ID isn't set here (one session can
835
- // run many turns) — the hook treats it as optional.
836
- childEnv.POLYGRAM_BOT = BOT_NAME;
837
- childEnv.POLYGRAM_CHAT_ID = String(chatId || '');
838
- // Allow the PreToolUse approval hook to authenticate to the IPC socket.
839
- if (process.env.POLYGRAM_IPC_SECRET) childEnv.POLYGRAM_IPC_SECRET = process.env.POLYGRAM_IPC_SECRET;
840
- if (botConfig.needsToken) {
841
- childEnv.TELEGRAM_BOT_TOKEN = botConfig.token || '';
842
- }
843
-
844
- const proc = spawn(CLAUDE_BIN, args, {
845
- cwd: chatConfig.cwd,
846
- env: childEnv,
847
- stdio: ['pipe', 'pipe', 'pipe'],
848
- });
849
- proc.stderr.on('data', (d) => {
850
- const m = d.toString().trim();
851
- if (m) console.error(`[${label}] stderr: ${m.slice(0, 200)}`);
852
- });
853
- return proc;
854
- }
855
-
856
- /**
857
- * 0.8.0 Phase 3 — SDK pm spawn factory.
858
- *
859
- * Replacement for `spawnClaude` when POLYGRAM_USE_SDK=1. Returns
860
- * SdkOptions for the SDK pm to pass to `query({ prompt, options })`.
861
- * The SDK pm wraps this in its inputController + iteration loop;
862
- * polygram only needs to compose the Options object.
863
- *
864
- * Per v4 plan §6.5.7 — explicit env enumeration (Options.env is
865
- * SHADOW per Phase 0 gate 33), bypassPermissions +
866
- * allowDangerouslySkipPermissions both set for forward-compat,
867
- * agent-loader composes per-chat agent into systemPrompt + skills +
868
- * mcpServers, optional resume sessionId for continuity.
869
- */
870
- function buildSdkOptions(sessionKey, ctx) {
871
- const { chatConfig, existingSessionId, label, chatId, threadId } = ctx;
872
-
873
- // rc.48: per-topic config overrides. When `topics[threadId]` is an
874
- // object (not a legacy string label), these fields take precedence
875
- // over chat-level config. Principal use case: scope a single topic
876
- // to a different agent, cwd, or permissionMode (e.g. flip music
877
- // topic from `bypassPermissions` to `default` so settings.json
878
- // permissions.allow gates apply).
879
- //
880
- // Per-topic overrides only apply meaningfully when isolateTopics is
881
- // true — each topic has its own SDK Query. With isolateTopics: false
882
- // all topics share one Query whose options are fixed at first-spawn.
883
- // Startup validation (validateTopicConfigs) emits a one-time warning
884
- // when a chat has topic overrides + isolateTopics: false.
885
- const topicConfig = getTopicConfig(chatConfig, threadId);
886
- const effectiveAgent = topicConfig.agent || chatConfig.agent;
887
-
888
- // Per-chat agent (D14): if pinned, load & compose. Failure is
889
- // non-fatal — chat falls back to defaults; logged for ops.
890
- let agentBundle = null;
891
- if (effectiveAgent) {
892
- try {
893
- agentBundle = agentLoader.loadAgent(effectiveAgent, {
894
- homeDir: CHILD_HOME,
895
- // Pass cwd so the loader checks Claude Code's project-level
896
- // path (`<cwd>/.claude/agents/<name>.md`) before the
897
- // user-level path or polygram's directory convention. rc.48:
898
- // topic-level cwd takes precedence so topic-scoped agents
899
- // load from the topic's project dir.
900
- cwd: topicConfig.cwd || chatConfig.cwd,
901
- logger: console,
902
- });
903
- } catch (err) {
904
- console.error(`[${label}] agent-loader: ${err.message}`);
905
- logEvent('agent-load-failed', {
906
- chat_id: chatId, agent: effectiveAgent, error: err.message,
907
- topic: threadId || null,
908
- });
909
- }
910
- }
911
-
912
- const effectiveModel = topicConfig.model || chatConfig.model;
913
- const effectiveEffort = topicConfig.effort || chatConfig.effort;
914
- const agentSuffix = effectiveAgent && effectiveAgent !== chatConfig.agent
915
- ? ` agent=${effectiveAgent}` : '';
916
- console.log(`[${label}] Spawning SDK Query (${effectiveModel}/${effectiveEffort}${agentSuffix})`);
917
-
918
- // Env: SHADOW semantics (gate 33) — must enumerate every var
919
- // pollygram needs in the spawned worker.
920
- const botConfig = config.bot || {};
921
- const childEnv = filterEnv(process.env);
922
- childEnv.HOME = CHILD_HOME;
923
- childEnv.CLAUDE_CHANNEL_BOT = BOT_NAME;
924
- if (process.env.POLYGRAM_IPC_SECRET) {
925
- childEnv.POLYGRAM_IPC_SECRET = process.env.POLYGRAM_IPC_SECRET;
926
- }
927
- if (botConfig.needsToken) {
928
- childEnv.TELEGRAM_BOT_TOKEN = botConfig.token || '';
929
- }
930
-
931
- // 0.8.0 Phase 2 step 6: in-process approval flow via canUseTool.
932
- // Wire up only when approvals.gatedTools is configured for this
933
- // bot — otherwise leave canUseTool unset and rely on
934
- // bypassPermissions for the full allow-all path.
935
- const apprCfg = config.bot?.approvals;
936
- const useCanUseTool = apprCfg && apprCfg.adminChatId
937
- && Array.isArray(apprCfg.gatedTools) && apprCfg.gatedTools.length > 0;
938
-
939
- // rc.42: PostToolBatch hook removed. Native SDK priority push
940
- // (pm.injectUserMessage) replaces the absorb-via-additionalContext
941
- // detour. The autosteered ✍ reaction now clears via the regular
942
- // turn-end path (handleMessage finally + success branches —
943
- // existing rc.38 cleanup).
944
-
945
- // rc.52: dropped the SDK SessionStart hook registration. The hook
946
- // never fired in production (verified: zero history-preloaded events
947
- // across both production DBs since rc.21; SDK runtime grep showed
948
- // exactly one occurrence of "SessionStart" — in the type listing,
949
- // not in dispatch logic). The history preload is now done inline at
950
- // formatPrompt time when the upcoming Query has no resume target,
951
- // via lib/history-preload.buildHistoryBlock. See formatPrompt above.
952
- const baseOpts = {
953
- model: chatConfig.model || config.defaults.model,
954
- effort: chatConfig.effort || config.defaults.effort,
955
- cwd: chatConfig.cwd,
956
- env: childEnv,
957
- // permissionMode 'default' when canUseTool is wired (so the SDK
958
- // actually consults our callback). Otherwise stick with
959
- // bypassPermissions (matches today's CLI behaviour).
960
- permissionMode: useCanUseTool ? 'default' : 'bypassPermissions',
961
- allowDangerouslySkipPermissions: !useCanUseTool,
962
- ...(useCanUseTool && { canUseTool: makeCanUseTool(sessionKey) }),
963
- hooks: {},
964
- executable: 'node',
965
- ...(existingSessionId && { resume: existingSessionId }),
966
- ...(process.env.POLYGRAM_CLAUDE_BIN && {
967
- pathToClaudeCodeExecutable: process.env.POLYGRAM_CLAUDE_BIN,
968
- }),
969
- };
970
-
971
- // Compose with agent overlay + chat-level config + per-topic overrides.
972
- // agent-loader precedence: topicConfig > chatConfig > agent > defaults.
973
- // The chatConfig keys we care about for SDK options are
974
- // model/effort/cwd/thinking; others (agent, chrome, isolateTopics)
975
- // are polygram-only and stripped by composeSdkOptions.
976
- const composed = agentLoader.composeSdkOptions(
977
- {
978
- // chat-level overrides — only the keys SDK understands.
979
- model: chatConfig.model,
980
- effort: chatConfig.effort,
981
- cwd: chatConfig.cwd,
982
- ...(chatConfig.thinking && { thinking: chatConfig.thinking }),
983
- },
984
- agentBundle,
985
- baseOpts,
986
- topicConfig, // rc.48: highest precedence — overrides chat-level fields.
987
- );
988
-
989
- // rc.48: keep permissionMode + allowDangerouslySkipPermissions
990
- // consistent. baseOpts sets allowDangerouslySkipPermissions=true when
991
- // permissionMode='bypassPermissions'. If a topic flipped permissionMode
992
- // to anything else (typically 'default' to gate via settings.json),
993
- // also disable allowDangerouslySkipPermissions so the SDK actually
994
- // honours the permission gates instead of skipping them.
995
- if (composed.permissionMode && composed.permissionMode !== 'bypassPermissions') {
996
- composed.allowDangerouslySkipPermissions = false;
997
- }
998
-
999
- // Append polygram's display constraints to the systemPrompt.
1000
- // Infrastructure-layer hint — the agent's own prompt covers
1001
- // business logic; polygram adds "your output renders in Telegram,
1002
- // here's the width budget for tables".
1003
- composed.systemPrompt = appendDisplayHint(composed.systemPrompt);
1004
- return composed;
1005
- }
368
+ // SDK pm spawn factory — extracted to lib/sdk/build-options.js.
369
+ // `buildSdkOptions` is wired in main() via createBuildSdkOptions(deps)
370
+ // once the runtime context (config, BOT_NAME, makeCanUseTool, logEvent)
371
+ // is available.
372
+ let buildSdkOptions = null;
1006
373
 
1007
374
  function buildSpawnContext(sessionKey) {
1008
375
  const chatId = getChatIdFromKey(sessionKey);
@@ -1069,293 +436,28 @@ async function sendToProcess(sessionKey, prompt, context = {}) {
1069
436
  // - emit a `queue-depth-warning` event if the count ever exceeds a
1070
437
  // threshold (abnormal inbound rate, slow pre-work, stuck bot)
1071
438
  // - (future) drain on shutdown if we want clean exit
1072
- // Threshold for the queue-depth-warning event (operator-tuned signal
1073
- // for "this session is getting hammered"). Override via
1074
- // `config.bot.queueWarnThreshold`. Below the threshold no event fires.
1075
- const CONCURRENT_WARN_THRESHOLD_DEFAULT = 20;
1076
- function queueWarnThreshold() {
1077
- const v = Number(config.bot?.queueWarnThreshold);
1078
- return (Number.isInteger(v) && v > 0) ? v : CONCURRENT_WARN_THRESHOLD_DEFAULT;
1079
- }
1080
- const inFlightHandlers = new Map(); // sessionKey → count
1081
-
1082
- // Set true by the SIGTERM/SIGINT handler. Module-scoped so the
1083
- // fire-and-forget catch in dispatchHandleMessage can check it: when
1084
- // polygram is going down, in-flight handlers reject with "Process
1085
- // killed" / "Process exited" but those failures aren't "real" — the
1086
- // next boot's replay will re-dispatch them. Suppressing the user-facing
1087
- // error reply during shutdown removes a misleading post-mortem apology
1088
- // the user shouldn't see. (The boot replay's own _isReplay flag handles
1089
- // the OTHER half: suppressing if the replay itself fails.)
439
+ // dispatcher state. The dispatcher itself (errorReplyText,
440
+ // queueWarnThreshold, dispatchHandleMessage, attemptAutoResume,
441
+ // inFlightHandlers Map) lives in lib/handlers/dispatcher.js;
442
+ // polygram owns the abortGrace + autoResumeTracker instances
443
+ // + the isShuttingDown flag and threads them in.
1090
444
  let isShuttingDown = false;
1091
-
1092
- // Map a handler-error to a user-facing reply that says what happened
1093
- // and what to do next. The technical strings come from process-manager
1094
- // (idle / wall-clock timeouts) and node child_process (Process exited /
1095
- // killed). Anything we don't recognise falls back to a generic line
1096
- // with a single-line snippet of the error so the user can at least
1097
- // distinguish unique failures from the obvious "try again" cases.
1098
- // 0.7.7: errorReplyText delegates to lib/error-classify.js so the
1099
- // regex tables live in one place and stay in sync with future SDK
1100
- // error subtypes (the 0.8.0 migration extends the classifier rather
1101
- // than adding more if-branches here).
1102
- //
1103
- // classify() returns { kind, userMessage, isTransient, autoRecover }.
1104
- // `userMessage: null` is a deliberate "suppress reply" signal —
1105
- // today only used by INTERRUPTED in the abort-grace window. Callers
1106
- // that already gate on isSessionRecentlyAborted will short-circuit
1107
- // before reaching here, but we honour `null` defensively.
1108
- function errorReplyText(err) {
1109
- const { userMessage } = classifyError(err);
1110
- return userMessage; // may be null — caller must handle
1111
- }
1112
-
1113
- // Sessions the operator just /stop'd (or natural-language "стоп").
1114
- // rc.25: extracted to lib/abort-grace.js so the timestamp/window
1115
- // logic has its own unit tests. Behaviour identical: any pending
1116
- // rejected within the grace window is considered abort-caused —
1117
- // its generic error reply is suppressed and the streamer warning
1118
- // is skipped.
1119
445
  const abortGrace = createAbortGrace();
1120
-
1121
446
  function markSessionAborted(sessionKey) { abortGrace.mark(sessionKey); }
1122
447
  function isSessionRecentlyAborted(sessionKey) { return abortGrace.isRecent(sessionKey); }
1123
-
1124
- // rc.54: per-session cooldown for auto-resume on 300s no-activity
1125
- // timeout. Without the cooldown, a permanently-wedged tool would
1126
- // trigger an infinite resume → timeout → resume loop.
1127
448
  const autoResumeTracker = createAutoResumeTracker();
449
+ const contextHintShown = new Set();
450
+ let dispatchHandleMessage = null;
451
+ let attemptAutoResume = null;
452
+ let errorReplyText = null;
453
+ let queueWarnThreshold = null;
454
+ let inFlightHandlers = null;
1128
455
 
1129
456
  // rc.59: once-per-cycle gate for the contextHint. A session is added
1130
457
  // to this Set when the hint fires, removed when the SDK emits
1131
458
  // compact_boundary (so the next cycle can fire its own hint). Without
1132
459
  // this gate, a chat over-threshold would see the hint on every turn
1133
460
  // until auto-compact — noisy enough that Ivan called it out.
1134
- const contextHintShown = new Set();
1135
-
1136
- // rc.54: spawn a fresh Query resuming the same session_id and ask
1137
- // Claude to continue the timed-out work. The killed pm Query has
1138
- // already torn down the wedged subprocess (via pm.kill on timeout);
1139
- // getOrSpawnForChat creates a new entry that picks up the saved
1140
- // session_id from `sessions` table and sets `--resume <id>` on the
1141
- // SDK Options. The continuation message tells Claude what happened
1142
- // and that it has full prior context to keep going.
1143
- //
1144
- // Returns the result.text on success (already-sent to chat); throws
1145
- // on any failure (caller writes auto-resume-failed event + falls
1146
- // back to the standard timeout reply).
1147
- async function attemptAutoResume(sessionKey, chatId, originalMsg, bot) {
1148
- const threadId = originalMsg.message_thread_id || null;
1149
- // 1. Tell the user we're auto-resuming so they don't think nothing
1150
- // happened. Threaded under the original user message.
1151
- await tg(bot, 'sendMessage', {
1152
- chat_id: chatId,
1153
- text: '🔁 Auto-resuming after timeout — continuing where the previous turn left off.',
1154
- reply_parameters: { message_id: originalMsg.message_id },
1155
- ...(threadId && { message_thread_id: threadId }),
1156
- }, { source: 'auto-resume-indicator', botName: BOT_NAME }).catch((sendErr) => {
1157
- // Indicator is informational; don't fail the whole resume on it.
1158
- console.error(`[${sessionKey}] auto-resume indicator send failed: ${sendErr.message}`);
1159
- });
1160
-
1161
- // 2. Continuation prompt. Plain text — no XML wrapper. The SDK
1162
- // Query resumes the saved session_id, so Claude has full prior
1163
- // transcript context including its own partially-streamed text
1164
- // and tool calls. We just need to tell it WHAT happened and
1165
- // that it should pick up where it left off.
1166
- const continuation = '[polygram] Your previous turn timed out at 300s with no Claude activity (likely a wedged tool call — long Bash, hanging MCP, or stuck subagent). Continue from where you left off; do not restart from scratch. If the same operation would just hang again, abort it and tell me.';
1167
-
1168
- // 3. No-op streamer + reactor. We don't need to stream the resume
1169
- // turn's response (we'll send it as one message at the end). pm
1170
- // invokes streamer/reactor methods only when present; passing
1171
- // minimal stubs keeps pm happy.
1172
- const noopStreamer = {
1173
- onChunk: async () => {},
1174
- forceNewMessage: () => {},
1175
- finalize: async () => ({ streamed: false }),
1176
- flushDraft: async () => {},
1177
- discard: async () => {},
1178
- };
1179
- const noopReactor = {
1180
- setState: () => {},
1181
- heartbeat: () => {},
1182
- clear: async () => {},
1183
- stop: () => {},
1184
- };
1185
-
1186
- const result = await sendToProcess(sessionKey, continuation, {
1187
- streamer: noopStreamer,
1188
- reactor: noopReactor,
1189
- sourceMsgId: originalMsg.message_id,
1190
- threadId,
1191
- onFirstStream: () => {},
1192
- });
1193
-
1194
- if (result?.error) {
1195
- throw new Error(`auto-resume turn errored: ${String(result.error).slice(0, 200)}`);
1196
- }
1197
- if (!result?.text) {
1198
- throw new Error('auto-resume turn produced no text');
1199
- }
1200
-
1201
- // 4. Send the continuation reply as regular Telegram message(s),
1202
- // threaded under the original user message. Reuse the existing
1203
- // chunked-delivery + markdown-formatting primitives.
1204
- const chunks = chunkMarkdownText(result.text, TG_MAX_LEN);
1205
- await deliverReplies({
1206
- bot,
1207
- send: (b, method, params, m) => tg(b, method, params, m),
1208
- chatId,
1209
- threadId,
1210
- chunks,
1211
- replyToMessageId: originalMsg.message_id,
1212
- meta: { source: 'auto-resume-reply', botName: BOT_NAME },
1213
- logger: { error: (m) => console.error(`[${sessionKey}] auto-resume deliver: ${m}`) },
1214
- });
1215
-
1216
- return result.text;
1217
- }
1218
-
1219
- // Called by bot.on('message') for every regular (non-admin, non-pair)
1220
- // message. Runs handleMessage in a fire-and-forget manner with centralised
1221
- // error handling. Replaces the old processQueue loop.
1222
- function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
1223
- const count = (inFlightHandlers.get(sessionKey) || 0) + 1;
1224
- inFlightHandlers.set(sessionKey, count);
1225
- const warnAt = queueWarnThreshold();
1226
- if (count === warnAt) {
1227
- logEvent('queue-depth-warning', {
1228
- chat_id: chatId, session_key: sessionKey,
1229
- in_flight: count, threshold: warnAt,
1230
- });
1231
- }
1232
- handleMessage(sessionKey, chatId, msg, bot).catch((err) => {
1233
- const wasAborted = isSessionRecentlyAborted(sessionKey);
1234
- const isReplay = msg._isReplay === true;
1235
- console.error(`[${sessionKey}] Error:`, err.message);
1236
- // Mark the row terminal so the right thing happens on next boot:
1237
- // - aborted: user explicitly stopped → 'aborted' (not replayable)
1238
- // - shutting down on a NEW turn: 'replay-pending' so the next
1239
- // boot picks it up via getReplayCandidates. Pre-0.6.12 we marked
1240
- // 'failed' here, which excluded the row from replay — a clean
1241
- // restart between user send and reply silently dropped the turn.
1242
- // - shutting down on a REPLAY turn (msg._isReplay=true): keep
1243
- // 'replay-attempted' so the one-shot guard from 0.6.4 still
1244
- // holds. Without this, a replay interrupted by another shutdown
1245
- // would be promoted to 'replay-pending' and the next boot would
1246
- // replay it AGAIN — infinite loop on chained shutdowns.
1247
- // - everything else: 'failed' (genuine claude crash / timeout etc).
1248
- const status = wasAborted
1249
- ? 'aborted'
1250
- : isShuttingDown
1251
- ? (isReplay ? 'replay-attempted' : 'replay-pending')
1252
- : 'failed';
1253
- dbWrite(() => db.setInboundHandlerStatus({
1254
- chat_id: chatId, msg_id: msg.message_id, status,
1255
- }), `set handler_status=${status}`);
1256
- logEvent('handler-error', {
1257
- chat_id: chatId, session_key: sessionKey,
1258
- msg_id: msg?.message_id,
1259
- error: err.message?.slice(0, 500),
1260
- stack: err.stack?.split('\n').slice(0, 5).join('\n'),
1261
- aborted: wasAborted || undefined,
1262
- replay: isReplay || undefined,
1263
- });
1264
- // rc.55: surface replay failures with a meaningful message. Pre-rc.55
1265
- // any boot-replay turn that failed for ANY reason was silently dropped
1266
- // (the original logic assumed "user typed this minutes ago and moved
1267
- // on"). But the rc.51-onward boot-replay path is a recovery primitive,
1268
- // not stale-message handling — when it fails, the user IS still waiting
1269
- // for their answer. The Shumabit@UMI thread :24 wall-clock incident on
1270
- // 2026-05-04 hit exactly this: original turn SIGHUP'd by deploy → boot-
1271
- // replay redispatched → replay hit 1800s wall-clock → user saw nothing
1272
- // and didn't know their work had been lost.
1273
- //
1274
- // Now: send a tailored message on replay failures. Still suppress when
1275
- // the replay itself was killed by ANOTHER shutdown (the next boot will
1276
- // redispatch — same logic as before, just narrower).
1277
- if (isReplay && !wasAborted && !isShuttingDown) {
1278
- tg(bot, 'sendMessage', {
1279
- chat_id: chatId,
1280
- text: '⚠️ This turn was interrupted and didn\'t complete on retry — please rephrase or simplify, or split into smaller steps.',
1281
- reply_parameters: { message_id: msg.message_id },
1282
- }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
1283
- console.error(`[${sessionKey}] failed to send replay-failure reply: ${replyErr.message}`);
1284
- });
1285
- }
1286
- // Suppress the user-facing error reply when:
1287
- // - boot replay (user typed this minutes ago and moved on)
1288
- // - polygram is shutting down (the failure is "Process killed" /
1289
- // "Process exited" which isn't a real error — boot replay will
1290
- // re-dispatch it on next start)
1291
- // - user just /stop'd (already saw their abort acknowledgement)
1292
- if (!wasAborted && !isReplay && !isShuttingDown) {
1293
- // rc.54: auto-resume on 300s no-activity timeout. Spawn a fresh
1294
- // Query resuming the same session_id and inject a continuation
1295
- // nudge. This recovers from wedged tool calls (long Bash, hung
1296
- // MCP, stuck subagent) that polygram's watchdog catches but
1297
- // currently leaves the user stranded with "try resending".
1298
- // Skipped when the failed turn was ITSELF an auto-resume
1299
- // (msg._isAutoResume) to prevent recursion; per-session
1300
- // cooldown blocks tight loops on permanent wedges.
1301
- const isResumeTurn = msg._isAutoResume === true;
1302
- const resumable = !isResumeTurn && isAutoResumable({
1303
- error: err, aborted: wasAborted, replay: isReplay, shuttingDown: isShuttingDown,
1304
- });
1305
- if (resumable && !autoResumeTracker.isInCooldown(sessionKey)) {
1306
- autoResumeTracker.markAttempt(sessionKey);
1307
- logEvent('auto-resume-attempted', {
1308
- chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
1309
- original_error: err.message?.slice(0, 200),
1310
- });
1311
- attemptAutoResume(sessionKey, chatId, msg, bot)
1312
- .then(() => {
1313
- logEvent('auto-resume-success', {
1314
- chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
1315
- });
1316
- autoResumeTracker.clear(sessionKey);
1317
- })
1318
- .catch((resumeErr) => {
1319
- console.error(`[${sessionKey}] auto-resume failed: ${resumeErr?.message}`);
1320
- logEvent('auto-resume-failed', {
1321
- chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
1322
- error: resumeErr?.message?.slice(0, 200),
1323
- });
1324
- // Fall back to the original error reply so the user isn't
1325
- // left with just the 🔁 indicator and no answer.
1326
- const fallbackText = errorReplyText(err);
1327
- if (fallbackText) {
1328
- tg(bot, 'sendMessage', {
1329
- chat_id: chatId, text: fallbackText,
1330
- reply_parameters: { message_id: msg.message_id },
1331
- }, { source: 'error-reply', botName: BOT_NAME }).catch(() => {});
1332
- }
1333
- });
1334
- return;
1335
- }
1336
- // 0.7.7: errorReplyText may return null when the classifier
1337
- // says "suppress reply" (e.g. INTERRUPTED inside abort grace —
1338
- // user already saw their /stop ack). Skip the send call in
1339
- // that case rather than dispatching empty text (which would
1340
- // 400 at the lib/telegram.js empty-text guard added in 0.7.4).
1341
- const replyText = errorReplyText(err);
1342
- if (replyText) {
1343
- tg(bot, 'sendMessage', {
1344
- chat_id: chatId,
1345
- text: replyText,
1346
- reply_parameters: { message_id: msg.message_id },
1347
- }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
1348
- console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
1349
- });
1350
- }
1351
- }
1352
- }).finally(() => {
1353
- const n = (inFlightHandlers.get(sessionKey) || 1) - 1;
1354
- if (n <= 0) inFlightHandlers.delete(sessionKey);
1355
- else inFlightHandlers.set(sessionKey, n);
1356
- });
1357
- }
1358
-
1359
461
  // Per-session lock ordering stdin writes. Module is I/O-pure.
1360
462
  const stdinLock = createAsyncLock();
1361
463
 
@@ -1364,687 +466,77 @@ const stdinLock = createAsyncLock();
1364
466
  // permanently 401s (bot blocked, chat deleted) doesn't have us
1365
467
  // hammering sendChatAction every 4s for the full turn duration.
1366
468
 
1367
- // ─── Response parsing (stickers, reactions) ─────────────────────────
1368
- // Implementation lives in lib/parse-response.js so tests can require it
1369
- // without starting a bot (polygram.js is a top-level script that calls
1370
- // main() at bottom). The wrapper here supplies the runtime stickerMap /
1371
- // emojiToSticker that the parser looks up against.
1372
- //
1373
- // 0.7.5: parser also recognises a literal `[sticker:NAME]` pattern in
1374
- // addition to single-emoji shortcuts. Claude reads its own past outbound
1375
- // rows on session resume, sees `[sticker:working]` (the placeholder
1376
- // deriveOutboundText synthesises for sendSticker rows), and starts
1377
- // mimicking the format as plain text. Without the new branch the
1378
- // placeholder was rendered verbatim in the chat instead of swapped for
1379
- // the actual sticker.
1380
- const {
1381
- parseResponse: parseResponseImpl,
1382
- stripInlineTags: stripInlineTagsImpl,
1383
- } = require('./lib/parse-response');
1384
- function parseResponse(text) {
1385
- return parseResponseImpl(text, { stickerMap, emojiToSticker });
1386
- }
1387
- // rc.67: pre-processor for the streamer. Strips recognised inline
1388
- // `[sticker:NAME]` and any `[react:EMOJI]` tags BEFORE the chunk is
1389
- // committed to the bubble + DB row, so the user never sees a literal
1390
- // tag even when the turn-end finalize path doesn't manage to clean it
1391
- // (interrupt, error, hung query, edit failure, or the stickerMap-miss
1392
- // no-op branch). parseResponse continues to surface the same tags in
1393
- // `parsed.stickers[]` / `parsed.reactions[]` for outbound dispatch via
1394
- // sendInlineStickers / sendInlineReactions.
1395
- function stripInlineTagsForStreamer(text) {
1396
- return stripInlineTagsImpl(text, { stickerMap });
1397
- }
1398
-
1399
- // ─── Cron/IPC send ─────────────────────────────────────────────────
1400
-
1401
- // Allowlist of Telegram Bot API methods external callers (cron) may invoke.
1402
- // Broader than sendMessage to cover receipts, error reports, quick replies.
1403
- // Deliberately excludes destructive ops (deleteMessage, banChatMember, etc.);
1404
- // cron has no business calling those.
1405
- const IPC_SEND_ALLOWED_METHODS = new Set([
1406
- 'sendMessage',
1407
- 'sendPhoto',
1408
- 'sendDocument',
1409
- 'sendSticker',
1410
- 'sendChatAction',
1411
- 'editMessageText',
1412
- 'setMessageReaction',
1413
- ]);
1414
-
1415
- async function handleSendOverIpc(req) {
1416
- const { method, params = {}, source } = req || {};
1417
- if (!method) throw new Error('method required');
1418
- if (!IPC_SEND_ALLOWED_METHODS.has(method)) {
1419
- throw new Error(`method not allowed: ${method}`);
1420
- }
1421
- if (!bot) throw new Error(`bot process not ready`);
1422
-
1423
- // Enforce: chat_id must belong to this bot (no cross-bot sends).
1424
- // After filterConfigToBot, config.chats only contains our chats.
1425
- const chatId = params.chat_id != null ? String(params.chat_id) : null;
1426
- if (chatId && !config.chats[chatId]) {
1427
- throw new Error(`chat not owned by ${BOT_NAME}: ${chatId}`);
1428
- }
1429
-
1430
- // rc.58: catch the most common file-upload mistakes before Telegram
1431
- // rejects with a confusing error. validateIpcFileParam returns a
1432
- // human-readable error string when the file param is malformed,
1433
- // null otherwise.
1434
- const fileParamErr = validateIpcFileParam(method, params);
1435
- if (fileParamErr) throw new Error(fileParamErr);
1436
-
1437
- const sendRes = await tg(bot, method, params, {
1438
- source: source || 'ipc',
1439
- botName: BOT_NAME,
1440
- });
1441
- return { result: sendRes };
1442
- }
1443
-
1444
- // ─── Approvals ─────────────────────────────────────────────────────
1445
- // rc.20: pure UI builders moved to lib/approval-ui.js for testability.
1446
- // Imported above (buildApprovalKeyboard, buildApprovalKeyboardWithAlways,
1447
- // approvalCardText, formatToolInputForCard).
1448
-
1449
- // /model and /effort inline keyboard. `show` controls which row(s) appear:
1450
- // 'model', 'effort', or 'all'. The current value gets a ✓ marker so the
1451
- // user can see at a glance what's selected.
1452
- const MODEL_OPTIONS = ['opus', 'sonnet', 'haiku'];
1453
- const EFFORT_OPTIONS = ['low', 'medium', 'high', 'xhigh', 'max'];
1454
-
1455
- function buildConfigKeyboard(chatConfig, show = 'all') {
1456
- const rows = [];
1457
- if (show === 'model' || show === 'all') {
1458
- rows.push(MODEL_OPTIONS.map((m) => ({
1459
- text: m === chatConfig.model ? `✓ ${m}` : m,
1460
- callback_data: `cfg:model:${m}`,
1461
- })));
1462
- }
1463
- if (show === 'effort' || show === 'all') {
1464
- rows.push(EFFORT_OPTIONS.map((e) => ({
1465
- text: e === chatConfig.effort ? `✓ ${e}` : e,
1466
- callback_data: `cfg:effort:${e}`,
1467
- })));
1468
- }
1469
- return { inline_keyboard: rows };
1470
- }
1471
-
1472
- // Card text shown above the inline keyboard. Includes plain-language
1473
- // guidance on when to pick which model / effort, since most users
1474
- // (especially in shared groups) don't know which option to tap.
1475
- //
1476
- // Model versions: polygram passes the alias ("opus", "sonnet", "haiku")
1477
- // to claude, which resolves it to the latest snapshot. We mirror those
1478
- // snapshots here for display only, and verify them on each release with
1479
- // `claude --model <alias>` (probing the system:init event's `model`
1480
- // field). Bumping is a manual step — when claude releases a new
1481
- // snapshot the alias maps to, update this map and ship.
1482
- const MODEL_VERSIONS_DESC = {
1483
- opus: 'claude-opus-4-7',
1484
- sonnet: 'claude-sonnet-4-6',
1485
- haiku: 'claude-haiku-4-5',
1486
- };
1487
-
1488
- function formatConfigInfoText(chatConfig, show, sessionKey) {
1489
- const alive = pm.has(sessionKey) && !pm.get(sessionKey).closed;
1490
- const ver = MODEL_VERSIONS_DESC[chatConfig.model] || chatConfig.model;
1491
- const head = `Model: ${chatConfig.model} (${ver})\nEffort: ${chatConfig.effort}\nAgent: ${chatConfig.agent}\nProcess: ${alive ? 'warm' : 'cold'}\nSession: ${getClaudeSessionId(db, sessionKey)?.slice(0, 8) || 'new'}`;
1492
-
1493
- const modelHelp = [
1494
- '',
1495
- '**Models**',
1496
- '🧠 **opus** — deep analysis, code refactor, multi-source reconciliation. ~5× sonnet cost.',
1497
- '🤖 **sonnet** — default. Most ops, code review, document summary.',
1498
- '⚡ **haiku** — quick simple tasks, classification, lookup.',
1499
- ].join('\n');
1500
-
1501
- const effortHelp = [
1502
- '',
1503
- '**Effort** — ceiling on how much Claude can think. Simple questions get fast replies; hard ones spend more tokens. Safe to set higher — Claude scales down automatically when it doesn\'t need to think.',
1504
- '• **low** — fast replies, minimum reasoning. Casual chat, simple lookups.',
1505
- '• **medium** — balanced default. Fits most use cases.',
1506
- '• **high** — multi-step tasks. Audit, debug, multi-source analysis.',
1507
- '• **xhigh** / **max** — heaviest. Hard reasoning, edge cases.',
1508
- ].join('\n');
1509
-
1510
- let body = head;
1511
- if (show === 'model' || show === 'all') body += '\n' + modelHelp;
1512
- if (show === 'effort' || show === 'all') body += '\n' + effortHelp;
1513
- return body;
1514
- }
1515
-
1516
- // rc.20: approvalCardText + safeParse moved to lib/approval-ui.js.
1517
-
1518
- // 0.8.0-rc.18+: canonicalizeToolInput moved to lib/canonical-json.js
1519
- // for testability. Same function, no behavior change.
1520
-
1521
- /**
1522
- * 0.8.0 Phase 2 step 6: SDK canUseTool callback. Hands back to the
1523
- * SDK an async PermissionResult per `@anthropic-ai/claude-agent-sdk`
1524
- * sdk.d.ts:146-188.
1525
- *
1526
- * Flow (per v4 plan §4.2):
1527
- * 1. If no approval config → allow.
1528
- * 2. Look up chat_tool_decisions for short-circuit (always-allow/
1529
- * always-deny by exact / prefix / regex match). If found,
1530
- * return that decision.
1531
- * 3. Match against config.bot.approvals.gatedTools; if not gated,
1532
- * allow.
1533
- * 4. Issue pending_approvals row (with tool_use_id dedup); post
1534
- * 4-button card to admin chat; park resolver in
1535
- * approvalWaiters Map; race against opts.signal + timeout.
1536
- * 5. Return PermissionResult; the SDK lets the tool run or denies.
1537
- *
1538
- * Reuses the existing approvals store + approvalWaiters Map (same
1539
- * shape as today's IPC flow). Both paths can coexist on the same
1540
- * daemon — IPC for CLI pm chats, canUseTool for SDK pm chats.
1541
- */
1542
- function makeCanUseTool(sessionKey) {
1543
- const chatId = getChatIdFromKey(sessionKey);
1544
- const threadId = sessionKey.includes(':') ? sessionKey.split(':')[1] : null;
1545
- return async function canUseTool(toolName, input, opts) {
1546
- const apprCfg = config.bot?.approvals;
1547
- if (!apprCfg || !apprCfg.adminChatId) {
1548
- // Not configured for this bot → allow everything (matches CLI
1549
- // pm's bypassPermissions today when approvals not set).
1550
- return { behavior: 'allow' };
1551
- }
1552
-
1553
- const canonicalInput = canonicalizeToolInput(input);
1554
-
1555
- // Step 2: chat_tool_decisions short-circuit.
1556
- try {
1557
- const persisted = db.lookupChatToolDecision({
1558
- bot_name: BOT_NAME, chat_id: chatId, tool_name: toolName,
1559
- canonical_input: canonicalInput, now: Date.now(),
1560
- });
1561
- if (persisted) {
1562
- logEvent('canusetool-shortcircuit', {
1563
- chat_id: chatId, tool_name: toolName,
1564
- decision: persisted.decision, match_type: persisted.match_type,
1565
- tool_use_id: opts?.toolUseID || null,
1566
- });
1567
- if (persisted.decision === 'allow') return { behavior: 'allow' };
1568
- return { behavior: 'deny', message: 'matched persisted always-deny rule' };
1569
- }
1570
- } catch (err) {
1571
- console.error(`[${sessionKey}] chat_tool_decisions lookup: ${err.message}`);
1572
- // Non-fatal — fall through to gating + card.
1573
- }
1574
-
1575
- // Step 3: gating check.
1576
- const gated = matchesApprovalPattern(toolName, input, apprCfg.gatedTools || []);
1577
- if (!gated.matched) return { behavior: 'allow' };
1578
-
1579
- // Step 4: issue + post + park.
1580
- const row = approvals.issue({
1581
- bot_name: BOT_NAME, turn_id: opts?.toolUseID || null,
1582
- requester_chat_id: chatId,
1583
- approver_chat_id: String(apprCfg.adminChatId),
1584
- tool_name: toolName, tool_input: input,
1585
- timeoutMs: apprCfg.timeoutMs || APPROVAL_DEFAULT_TIMEOUT_MS,
1586
- });
1587
- if (opts?.toolUseID) {
1588
- // Persist the SDK's stable per-call ID so dedup-by-toolUseId
1589
- // works on retries (same call, same row).
1590
- try {
1591
- approvals.setToolUseId?.(row.id, opts.toolUseID);
1592
- } catch { /* swallow if older approvals store */ }
1593
- }
1594
- if (!bot) {
1595
- approvals.resolve({ id: row.id, status: 'cancelled', reason: 'bot not ready' });
1596
- return { behavior: 'deny', message: 'bot not ready' };
1597
- }
1598
- if (!row.reused || !row.approver_msg_id) {
1599
- try {
1600
- const sent = await tg(bot, 'sendMessage', {
1601
- chat_id: apprCfg.adminChatId,
1602
- text: approvalCardText(row),
1603
- reply_markup: buildApprovalKeyboardWithAlways(row.id, row.callback_token),
1604
- }, { source: 'canusetool-card', botName: BOT_NAME, plainText: true });
1605
- if (sent?.message_id) approvals.setApproverMsgId(row.id, sent.message_id);
1606
- } catch (err) {
1607
- console.error(`[${sessionKey}] failed to post canUseTool card: ${err.message}`);
1608
- approvals.resolve({ id: row.id, status: 'cancelled', reason: `post failed: ${err.message}` });
1609
- return { behavior: 'deny', message: `post failed: ${err.message}` };
1610
- }
1611
- }
1612
-
1613
- // Step 5: race signal + timeout + click.
1614
- return await new Promise((resolve) => {
1615
- let settled = false;
1616
- const settle = (decision) => {
1617
- if (settled) return;
1618
- settled = true;
1619
- clearTimeout(timer);
1620
- if (opts?.signal && sigCleanup) {
1621
- try { opts.signal.removeEventListener('abort', sigCleanup); }
1622
- catch { /* swallow */ }
1623
- }
1624
- dropWaiter(row.id, wrappedResolve);
1625
- resolve(decision);
1626
- };
1627
- const timer = setTimeout(() => {
1628
- approvals.resolve({ id: row.id, status: 'timeout' }).catch?.(() => {});
1629
- settle({ behavior: 'deny', message: 'approval timed out' });
1630
- }, Math.max(1000, row.timeout_ts - Date.now()));
1631
- const sigCleanup = opts?.signal
1632
- ? () => settle({ behavior: 'deny', message: 'aborted' })
1633
- : null;
1634
- if (opts?.signal && sigCleanup) {
1635
- opts.signal.addEventListener('abort', sigCleanup, { once: true });
1636
- }
1637
- const wrappedResolve = (decision, reason, extra) => {
1638
- // decision here is from resolveApprovalWaiter:
1639
- // 'approved' | 'denied' | 'approved-always' | 'denied-always'
1640
- // Map to SDK PermissionResult shape. extra carries
1641
- // updatedPermissions for the always-* variants.
1642
- if (decision === 'approved' || decision === 'approved-always') {
1643
- settle({
1644
- behavior: 'allow',
1645
- ...(decision === 'approved-always' && extra?.updatedPermissions
1646
- ? { updatedPermissions: extra.updatedPermissions }
1647
- : {}),
1648
- });
1649
- } else {
1650
- settle({
1651
- behavior: 'deny',
1652
- message: reason || decision || 'denied',
1653
- });
1654
- }
1655
- };
1656
- const list = approvalWaiters.get(row.id) || [];
1657
- list.push(wrappedResolve);
1658
- approvalWaiters.set(row.id, list);
1659
- });
1660
- };
1661
- }
1662
-
1663
- async function handleApprovalRequest(req) {
1664
- const { bot_name, chat_id, turn_id, tool_name, tool_input } = req;
1665
- if (!chat_id || !tool_name) {
1666
- throw new Error('chat_id, tool_name required');
1667
- }
1668
- // Per-bot process: the caller's bot_name must match ours if provided.
1669
- if (bot_name && bot_name !== BOT_NAME) {
1670
- throw new Error(`wrong bot: socket is ${BOT_NAME}, request is for ${bot_name}`);
1671
- }
1672
-
1673
- const apprCfg = config.bot?.approvals;
1674
- if (!apprCfg || !apprCfg.adminChatId) {
1675
- return { decision: 'not-gated', reason: 'approvals not configured for this bot' };
1676
- }
1677
-
1678
- const gated = matchesApprovalPattern(tool_name, tool_input, apprCfg.gatedTools || []);
1679
- if (!gated.matched) {
1680
- return { decision: 'not-gated' };
1681
- }
1682
-
1683
- // Issue pending row (with dedup). Row persists the bot_name for archive/
1684
- // audit queries across per-bot DBs.
1685
- const row = approvals.issue({
1686
- bot_name: BOT_NAME, turn_id, requester_chat_id: chat_id,
1687
- approver_chat_id: String(apprCfg.adminChatId),
1688
- tool_name, tool_input,
1689
- timeoutMs: apprCfg.timeoutMs || APPROVAL_DEFAULT_TIMEOUT_MS,
1690
- });
1691
-
1692
- if (!bot) {
1693
- approvals.resolve({ id: row.id, status: 'cancelled', reason: 'bot process not ready' });
1694
- return { decision: 'denied', reason: 'bot process not ready' };
1695
- }
1696
-
1697
- if (!row.reused || !row.approver_msg_id) {
1698
- try {
1699
- const sent = await tg(bot, 'sendMessage', {
1700
- chat_id: apprCfg.adminChatId,
1701
- text: approvalCardText(row),
1702
- reply_markup: buildApprovalKeyboard(row.id, row.callback_token),
1703
- }, { source: 'approval-request', botName: BOT_NAME, plainText: true });
1704
- if (sent?.message_id) {
1705
- approvals.setApproverMsgId(row.id, sent.message_id);
1706
- }
1707
- } catch (err) {
1708
- console.error(`[${BOT_NAME}] failed to post approval card: ${err.message}`);
1709
- approvals.resolve({ id: row.id, status: 'cancelled', reason: `post failed: ${err.message}` });
1710
- return { decision: 'denied', reason: `post failed: ${err.message}` };
1711
- }
1712
- }
1713
-
1714
- // Block until callback resolves us, or timeout fires. Multiple dedup'd
1715
- // callers can queue on the same id — they all get the same decision.
1716
- return await new Promise((resolve) => {
1717
- const timer = setTimeout(() => {
1718
- dropWaiter(row.id, wrappedResolve);
1719
- resolve({ decision: 'timeout', reason: 'operator did not respond in time' });
1720
- }, Math.max(1000, row.timeout_ts - Date.now()));
1721
-
1722
- const wrappedResolve = (decision, reason) => {
1723
- clearTimeout(timer);
1724
- // Translate 'approved-always' / 'denied-always' to plain
1725
- // approve/deny for the IPC caller — the IPC hook protocol
1726
- // doesn't carry persistence state, only the bool decision.
1727
- if (decision === 'approved-always') decision = 'approved';
1728
- else if (decision === 'denied-always') decision = 'denied';
1729
- resolve({ decision, reason });
1730
- };
1731
-
1732
- const list = approvalWaiters.get(row.id) || [];
1733
- list.push(wrappedResolve);
1734
- approvalWaiters.set(row.id, list);
1735
- });
1736
- }
1737
-
1738
- function dropWaiter(id, fn) {
1739
- const list = approvalWaiters.get(id);
1740
- if (!list) return;
1741
- const i = list.indexOf(fn);
1742
- if (i !== -1) list.splice(i, 1);
1743
- if (list.length === 0) approvalWaiters.delete(id);
1744
- }
1745
-
1746
- function resolveApprovalWaiter(id, decision, reason, extra) {
1747
- // `extra` carries SDK-shape updatedPermissions for always-* clicks.
1748
- // IPC waiters (CLI pm) ignore it; SDK canUseTool waiters use it
1749
- // to populate PermissionResult.updatedPermissions so the in-flight
1750
- // Query picks up the new rule for the rest of the turn.
1751
- const list = approvalWaiters.get(id);
1752
- if (!list) return;
1753
- approvalWaiters.delete(id);
1754
- for (const fn of list) {
1755
- try { fn(decision, reason, extra); } catch {}
1756
- }
1757
- }
1758
-
1759
- async function handleApprovalCallback(ctx) {
1760
- const data = ctx.callbackQuery?.data || '';
1761
- // 0.8.0 Phase 2 step 6: extended pattern accepts the 4-button
1762
- // SDK canUseTool format. `approve-always` / `deny-always`
1763
- // additionally write a row to chat_tool_decisions so subsequent
1764
- // calls to the same tool with the same input short-circuit.
1765
- const m = String(data).match(/^(approve|deny|approve-always|deny-always):(\d+):(\S+)$/);
1766
- if (!m) return;
1767
- const decision = m[1];
1768
- const id = parseInt(m[2], 10);
1769
- const token = m[3];
1770
-
1771
- const row = approvals.getById(id);
1772
- if (!row) {
1773
- await ctx.answerCallbackQuery({ text: 'Unknown approval.', show_alert: true }).catch(() => {});
1774
- return;
1775
- }
1776
- if (!approvalTokensEqual(row.callback_token, token)) {
1777
- logEvent('approval-token-mismatch', {
1778
- id, from_user: ctx.from?.id,
1779
- // Don't log the sent_token — attackers guessing it don't need to know
1780
- // which prefix they got close on.
1781
- });
1782
- await ctx.answerCallbackQuery({ text: 'Bad token.', show_alert: true }).catch(() => {});
1783
- return;
1784
- }
1785
- if (row.status !== 'pending') {
1786
- await ctx.answerCallbackQuery({ text: `Already ${row.status}.`, show_alert: true }).catch(() => {});
1787
- return;
1788
- }
1789
-
1790
- // Only the configured approver chat is authoritative. Our process only
1791
- // serves one bot, so config.bot.approvals is the authoritative source.
1792
- const apprCfg = config.bot?.approvals;
1793
- const expectedChat = String(apprCfg?.adminChatId || '');
1794
- if (String(ctx.chat?.id) !== expectedChat) {
1795
- logEvent('approval-foreign-chat', {
1796
- id, from_chat: ctx.chat?.id, expected: expectedChat,
1797
- });
1798
- await ctx.answerCallbackQuery({ text: 'Not authorised here.', show_alert: true }).catch(() => {});
1799
- return;
1800
- }
1801
-
1802
- // 0.8.0 Phase 2 step 6: parse always-variants. The base status
1803
- // ('approved' / 'denied') drives existing logic + card edit;
1804
- // `isAlways` triggers the chat_tool_decisions persistence
1805
- // below (after the atomic SQL resolve succeeds, so we don't
1806
- // write a "always" rule for a stale double-click).
1807
- const isApprove = decision === 'approve' || decision === 'approve-always';
1808
- const isAlways = decision === 'approve-always' || decision === 'deny-always';
1809
- const status = isApprove ? 'approved' : 'denied';
1810
- const user = ctx.from?.first_name || ctx.from?.username || null;
1811
- const userId = ctx.from?.id || null;
1812
- // SQL-level atomic resolve: UPDATE ... WHERE status='pending' — so in a
1813
- // double-click race only one of the two callers writes. If ours was
1814
- // second, changes=0 → stale decision; tell the clicker and don't edit
1815
- // the card a second time (the winner already did).
1816
- const changes = approvals.resolve({
1817
- id, status,
1818
- decided_by_user_id: userId, decided_by_user: user,
1819
- });
1820
- if (changes === 0) {
1821
- const fresh = approvals.getById(id);
1822
- await ctx.answerCallbackQuery({
1823
- text: `Already ${fresh?.status || 'resolved'}.`,
1824
- show_alert: true,
1825
- }).catch(() => {});
1826
- return;
1827
- }
1828
- logEvent('approval-resolved', {
1829
- id, status, by: userId, user, bot: BOT_NAME,
1830
- });
1831
-
1832
- // Edit the card to show the decision. 0.6.14: routed through tg() so
1833
- // the edit gets the same write-before-send DB row, plain-text policy
1834
- // (no parse_mode injection from tool input), and logged failure
1835
- // surface as every other outbound. Pre-0.6.14 this called
1836
- // ctx.api.editMessageText directly — same bypass class as the two
1837
- // already-routed in 0.6.8 (approval-timeout and pair-onboarding).
1838
- try {
1839
- const fresh = approvals.getById(id);
1840
- await tg(bot, 'editMessageText', {
1841
- chat_id: row.approver_chat_id,
1842
- message_id: row.approver_msg_id,
1843
- text: approvalCardText(fresh, {
1844
- resolvedBy: `${status === 'approved' ? '✅ Approved' : '❌ Denied'} by ${user || userId}`,
1845
- }),
1846
- }, { source: 'approval-card-decision', botName: BOT_NAME, plainText: true });
1847
- } catch (err) {
1848
- console.error(`[${BOT_NAME}] edit approval card failed: ${err.message}`);
1849
- }
1850
- // 0.8.0 Phase 2 step 6: persist always-* clicks to chat_tool_decisions
1851
- // so subsequent SDK canUseTool calls for the same (bot, chat, tool,
1852
- // input) short-circuit without prompting. Use prefix match by default
1853
- // (allows minor argument variations) — the user can hand-edit to
1854
- // exact / regex via SQL if they want narrower rules.
1855
- let updatedPermissions = null;
1856
- if (isAlways) {
1857
- try {
1858
- const canonical = canonicalizeToolInput(row.tool_input);
1859
- db.insertChatToolDecision({
1860
- bot_name: BOT_NAME,
1861
- chat_id: row.requester_chat_id,
1862
- tool_name: row.tool_name,
1863
- match_type: 'prefix', // most useful default; exact would be too narrow
1864
- input_pattern: canonical,
1865
- decision: status === 'approved' ? 'allow' : 'deny',
1866
- issued_by_user_id: userId ? String(userId) : null,
1867
- expires_ts: null,
1868
- });
1869
- logEvent('chat-tool-decision-persisted', {
1870
- chat_id: row.requester_chat_id,
1871
- tool_name: row.tool_name,
1872
- decision: status === 'approved' ? 'allow' : 'deny',
1873
- match_type: 'prefix',
1874
- });
1875
- // Build SDK-shape updatedPermissions so the in-flight Query
1876
- // also picks up the rule for the rest of THIS turn (avoids
1877
- // re-prompting on the next sibling tool call).
1878
- updatedPermissions = [{
1879
- type: 'addRules',
1880
- rules: [{
1881
- toolName: row.tool_name,
1882
- decision: status === 'approved' ? 'allow' : 'deny',
1883
- }],
1884
- }];
1885
- } catch (err) {
1886
- console.error(`[${BOT_NAME}] chat_tool_decisions persist failed: ${err.message}`);
1887
- // Non-fatal — the one-time decision still propagates below.
1888
- }
1889
- }
1890
-
1891
- await ctx.answerCallbackQuery({ text: status }).catch(() => {});
1892
-
1893
- // Pass the original decision token back to the waiter so it can
1894
- // distinguish 'approved-always' (SDK gets updatedPermissions)
1895
- // from plain 'approved'. CLI IPC waiters strip back to plain
1896
- // approve/deny in their wrappedResolve.
1897
- resolveApprovalWaiter(id, decision === 'approve-always' ? 'approved-always'
1898
- : decision === 'deny-always' ? 'denied-always'
1899
- : status, undefined, { updatedPermissions });
1900
- }
1901
-
1902
- // Handles taps on the /model and /effort inline keyboard buttons. Same
1903
- // outcome as the text-typed `/model sonnet` flow: mutate chatConfig,
1904
- // trigger graceful respawn, log config change, edit the message to show
1905
- // the new ✓ marker.
1906
- async function handleConfigCallback(ctx) {
1907
- const data = ctx.callbackQuery?.data || '';
1908
- const m = String(data).match(/^cfg:(model|effort):(\S+)$/);
1909
- if (!m) return;
1910
- const setting = m[1];
1911
- const value = m[2];
1912
-
1913
- const chatId = String(ctx.callbackQuery.message?.chat?.id || '');
1914
- const chatConfig = config.chats[chatId];
1915
- if (!chatConfig) {
1916
- await ctx.answerCallbackQuery({ text: 'Chat not configured', show_alert: true }).catch(() => {});
1917
- return;
1918
- }
1919
- if (!config.bot?.allowConfigCommands) {
1920
- await ctx.answerCallbackQuery({ text: 'Config commands disabled', show_alert: true }).catch(() => {});
1921
- return;
1922
- }
1923
-
1924
- const validValues = setting === 'model' ? MODEL_OPTIONS : EFFORT_OPTIONS;
1925
- if (!validValues.includes(value)) {
1926
- await ctx.answerCallbackQuery({ text: `Invalid ${setting}` }).catch(() => {});
1927
- return;
1928
- }
1929
-
1930
- const oldValue = chatConfig[setting];
1931
- if (oldValue === value) {
1932
- await ctx.answerCallbackQuery({ text: `Already ${value}` }).catch(() => {});
1933
- return;
1934
- }
1935
-
1936
- chatConfig[setting] = value;
1937
- const cmdUserId = ctx.callbackQuery.from?.id || null;
1938
- const cmdUser = ctx.callbackQuery.from?.first_name || ctx.callbackQuery.from?.username || null;
1939
- dbWrite(() => db.logConfigChange({
1940
- chat_id: chatId, thread_id: null, field: setting,
1941
- old_value: oldValue, new_value: value,
1942
- user: cmdUser, user_id: cmdUserId, source: 'inline-button',
1943
- }), `log ${setting} change`);
1944
-
1945
- // Graceful application of the change to the topic's session. With
1946
- // isolateTopics=false sessionKey is the chat (one shared session). With
1947
- // isolateTopics=true sessionKey carries the topic, so other topics'
1948
- // in-flight turns are not disturbed and the card update + button toast
1949
- // only affect the user's own context.
1950
- //
1951
- // CLI pm: requestRespawn drains pending turns then kills the process;
1952
- // the next user message spawns fresh with the updated chatConfig.
1953
- // SDK pm: applies live to the running Query via setModel /
1954
- // applyFlagSettings — no respawn needed, change takes effect for the
1955
- // rest of the in-flight turn AND all future ones. Falls back to
1956
- // {killed: false} if neither method is available, leaving the new
1957
- // chatConfig value to be picked up by the next cold spawn.
1958
- const callbackThreadId = ctx.callbackQuery.message?.message_thread_id?.toString() || null;
1959
- const callbackSessionKey = getSessionKey(chatId, callbackThreadId, chatConfig);
1960
- const reason = setting === 'model' ? 'model-change' : 'effort-change';
1961
- // Feature-detect on the routed pm for this specific session, not on
1962
- // the router itself (the router exposes every method as a forwarding
1963
- // shim so `typeof pm.X` is always 'function').
1964
- const pmForCb = pm.pickFor(callbackSessionKey);
1965
- let respawn;
1966
- if (typeof pmForCb.requestRespawn === 'function') {
1967
- respawn = pmForCb.requestRespawn(callbackSessionKey, reason);
1968
- } else if (setting === 'effort' && typeof pmForCb.applyFlagSettings === 'function') {
1969
- const ok = await pmForCb.applyFlagSettings(callbackSessionKey, { effortLevel: value });
1970
- respawn = { killed: ok };
1971
- } else if (setting === 'model' && typeof pmForCb.setModel === 'function') {
1972
- const ok = await pmForCb.setModel(callbackSessionKey, value);
1973
- respawn = { killed: ok };
1974
- } else {
1975
- respawn = { killed: false };
1976
- }
1977
- const anyActive = !respawn.killed;
1978
-
1979
- // Re-render the card with updated ✓ + the same help text shown initially.
1980
- // Detect original card type (model-only / effort-only / both) by counting
1981
- // rows in the existing reply_markup so the user sees the same layout
1982
- // they tapped into.
1983
- const existingRows = ctx.callbackQuery.message?.reply_markup?.inline_keyboard?.length || 0;
1984
- const showRow = existingRows >= 2 ? 'all' : setting;
1985
- // chatId works as a session-key proxy here for the warm-process check
1986
- // (isolateTopics chats might have multiple keys but for this card we
1987
- // just want a representative state).
1988
- const newInfo = formatConfigInfoText(chatConfig, showRow, chatId);
1989
- const newKeyboard = buildConfigKeyboard(chatConfig, showRow);
1990
- try {
1991
- // Pre-format the markdown→HTML ourselves so editMessageText can be
1992
- // called with the right parse_mode (the bot.api.editMessageText path
1993
- // bypasses tg() / applyFormatting in the chat-action approval card,
1994
- // but here we DO want HTML).
1995
- const { toTelegramHtml } = require('./lib/telegram-format');
1996
- const { text: html, parseMode } = toTelegramHtml(newInfo);
1997
- await ctx.editMessageText(html, {
1998
- reply_markup: newKeyboard,
1999
- ...(parseMode && { parse_mode: parseMode }),
2000
- });
2001
- } catch (err) {
2002
- console.error(`[${BOT_NAME}] config-card edit failed: ${err.message}`);
2003
- }
2004
-
2005
- const ackText = anyActive
2006
- ? `${setting} → ${value} — switching when finished`
2007
- : `${setting} → ${value}`;
2008
- await ctx.answerCallbackQuery({ text: ackText }).catch(() => {});
469
+ // ─── Response parsing (stickers, reactions) ─────────────────────────
470
+ // Implementation lives in lib/parse-response.js so tests can require it
471
+ // without starting a bot (polygram.js is a top-level script that calls
472
+ // main() at bottom). The wrapper here supplies the runtime stickerMap /
473
+ // emojiToSticker that the parser looks up against.
474
+ //
475
+ // 0.7.5: parser also recognises a literal `[sticker:NAME]` pattern in
476
+ // addition to single-emoji shortcuts. Claude reads its own past outbound
477
+ // rows on session resume, sees `[sticker:working]` (the placeholder
478
+ // deriveOutboundText synthesises for sendSticker rows), and starts
479
+ // mimicking the format as plain text. Without the new branch the
480
+ // placeholder was rendered verbatim in the chat instead of swapped for
481
+ // the actual sticker.
482
+ const {
483
+ parseResponse: parseResponseImpl,
484
+ stripInlineTags: stripInlineTagsImpl,
485
+ } = require('./lib/telegram/parse');
486
+ function parseResponse(text) {
487
+ return parseResponseImpl(text, { stickerMap, emojiToSticker });
2009
488
  }
2010
-
2011
- function startApprovalSweeper(intervalMs = 30_000) {
2012
- return setInterval(() => {
2013
- let rows;
2014
- try {
2015
- rows = approvals.sweepTimedOut();
2016
- } catch (err) {
2017
- // Silent failure here is invisible death — pending approvals time out
2018
- // with no operator signal. Log loudly.
2019
- console.error(`[approvals] sweeper DB error: ${err.message}`);
2020
- logEvent('approval-sweep-failed', {
2021
- error: err.message?.slice(0, 300),
2022
- });
2023
- return;
2024
- }
2025
- for (const row of rows) {
2026
- approvals.resolve({ id: row.id, status: 'timeout' });
2027
- logEvent('approval-timeout', {
2028
- id: row.id, bot: BOT_NAME, tool: row.tool_name,
2029
- });
2030
- resolveApprovalWaiter(row.id, 'timeout', 'swept');
2031
- // Best-effort: edit the card to show the timeout. Routed through
2032
- // tg() so the edit gets the same plain-text formatting policy as
2033
- // the original card post (no parse_mode injection from tool input)
2034
- // AND lands in the transcript like every other outbound. Pre-0.6.8
2035
- // this called bot.api.editMessageText directly and bypassed both.
2036
- if (bot && row.approver_msg_id) {
2037
- tg(bot, 'editMessageText', {
2038
- chat_id: row.approver_chat_id,
2039
- message_id: row.approver_msg_id,
2040
- text: approvalCardText(approvals.getById(row.id), { resolvedBy: '⏰ Timed out' }),
2041
- }, { source: 'approval-card-timeout', botName: BOT_NAME, plainText: true })
2042
- .catch((err) => console.error(`[${BOT_NAME}] approval-card-timeout edit: ${err.message}`));
2043
- }
2044
- }
2045
- }, intervalMs);
489
+ // rc.67: pre-processor for the streamer. Strips recognised inline
490
+ // `[sticker:NAME]` and any `[react:EMOJI]` tags BEFORE the chunk is
491
+ // committed to the bubble + DB row, so the user never sees a literal
492
+ // tag even when the turn-end finalize path doesn't manage to clean it
493
+ // (interrupt, error, hung query, edit failure, or the stickerMap-miss
494
+ // no-op branch). parseResponse continues to surface the same tags in
495
+ // `parsed.stickers[]` / `parsed.reactions[]` for outbound dispatch via
496
+ // sendInlineStickers / sendInlineReactions.
497
+ function stripInlineTagsForStreamer(text) {
498
+ return stripInlineTagsImpl(text, { stickerMap });
2046
499
  }
2047
500
 
501
+ // ─── Cron/IPC send — extracted to lib/handlers/ipc-send.js ──────────
502
+ let handleSendOverIpc = null;
503
+
504
+ // ─── Approvals ─────────────────────────────────────────────────────
505
+ // Pure UI builders live in lib/approval-ui.js for testability.
506
+ // Imported above (buildApprovalKeyboardWithAlways, approvalCardText,
507
+ // formatToolInputForCard).
508
+
509
+ // Config card UI moved to lib/handlers/config-ui.js. polygram.js
510
+ // keeps a thin formatConfigInfoText wrapper since it needs the
511
+ // runtime pm + db + getClaudeSessionId; buildConfigKeyboard is
512
+ // pure and re-exported.
513
+ const {
514
+ buildConfigKeyboard,
515
+ createFormatConfigInfoText,
516
+ MODEL_OPTIONS,
517
+ EFFORT_OPTIONS,
518
+ MODEL_VERSIONS_DESC,
519
+ } = require('./lib/handlers/config-ui');
520
+ let formatConfigInfoText = null;
521
+ // CRITICAL: these placeholders MUST exist or 'use strict' boot fails
522
+ // with ReferenceError. v4 review (commit 39) found 4 missing — restored.
523
+ // Each handler is wired in main() once its deps exist.
524
+ let handleConfigCallback = null;
525
+ let handleAbortIfRequested = null;
526
+ let autosteer = null;
527
+ let dispatchSlashCommand = null;
528
+
529
+ // rc.20: approvalCardText + safeParse moved to lib/approvals/ui.js.
530
+ // 0.9.0 commit 29: makeCanUseTool / handleApprovalCallback /
531
+ // resolveApprovalWaiter / dropWaiter / startApprovalSweeper extracted
532
+ // to lib/handlers/approvals.js. Wired in main() once db + bot + tg +
533
+ // approvals store are available.
534
+ let makeCanUseTool = null;
535
+ let handleApprovalCallback = null;
536
+ let resolveApprovalWaiter = null;
537
+ let startApprovalSweeper = null;
538
+ let cancelAllWaiters = null;
539
+
2048
540
  // Parse /pair-code args: /pair-code [--chat <id>] [--scope user|chat] [--ttl 10m] [--note "..."]
2049
541
  function parsePairCodeArgs(text) {
2050
542
  const out = {};
@@ -2142,342 +634,14 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
2142
634
  await sendReply(info, { params: { reply_markup } });
2143
635
  return;
2144
636
  }
2145
- // 0.8.0 Phase 2 step 7: /new and /reset slash commands. Both close
2146
- // the current Query (if any), clear the claude_session_id from the
2147
- // sessions table, and post "✨ Started a fresh session." Next user
2148
- // message starts a fresh subprocess with no resume.
2149
- //
2150
- // Equivalent UX to OpenClaw's /new and /reset handlers
2151
- // (pi-embedded:40594 BARE_SESSION_RESET_PROMPT). Required by the
2152
- // 85%-context-full hint (Phase 2 step 4) and by classifier-driven
2153
- // auto-recovery (step 8) — both reference these commands.
2154
- // 0.8.0 Phase 2 step 9: /context slash command. On-demand context-
2155
- // usage report. Only meaningful under SDK pm (CLI pm has no
2156
- // getContextUsage equivalent); CLI path replies with a hint.
2157
- if (botAllowsCommands && text === '/context') {
2158
- if (!pm.isSdkFor(sessionKey)) {
2159
- await sendReply('📚 /context requires the SDK pm. This chat is on the CLI pm path.');
2160
- return;
2161
- }
2162
- const entry = pm.get(sessionKey);
2163
- const q = entry?.query;
2164
- if (!q || typeof q.getContextUsage !== 'function') {
2165
- await sendReply('📚 No active session yet — send a message first, then /context.');
2166
- return;
2167
- }
2168
- try {
2169
- const u = await q.getContextUsage();
2170
- await sendReply(formatContextReply(u));
2171
- } catch (err) {
2172
- console.error(`[${label}] /context failed: ${err.message}`);
2173
- await sendReply(`📚 Couldn't fetch context info: ${err.message}`);
2174
- }
2175
- return;
2176
- }
2177
- // 0.8.0-rc.22: /compact <preserve text> — manual SDK compaction with
2178
- // user-supplied preservation instructions. The SDK's CLI binary
2179
- // recognises "/compact" as a slash command via streamInput.push
2180
- // (verified by scripts/spikes/compact-via-streaminput.mjs: PreCompact
2181
- // hook fires with trigger:'manual', compact_boundary event lands).
2182
- // We push the raw text "/compact <instructions>" through the SDK's
2183
- // input controller; the SDK handles parsing + compaction internally.
2184
- if (botAllowsCommands && text.startsWith('/compact')) {
2185
- if (!pm.isSdkFor(sessionKey)) {
2186
- await sendReply('🗜️ /compact requires the SDK pm. This chat is on the CLI pm path.');
2187
- return;
2188
- }
2189
- // rc.64: if the in-memory session was evicted (LRU cap pressure)
2190
- // but there's a saved Claude session_id in DB, auto-spawn the
2191
- // Query with --resume so /compact has something to work with.
2192
- // Pre-rc.64 we returned "🗜️ No active session" — confusing
2193
- // because the user just had a conversation 5 minutes ago, the
2194
- // session went idle, LRU evicted it, and "No active session"
2195
- // reads as "I never knew you" instead of "I unloaded your
2196
- // session, hold on."
2197
- let entry = pm.get(sessionKey);
2198
- if (!entry) {
2199
- const savedSessionId = getClaudeSessionId(db, sessionKey);
2200
- if (!savedSessionId) {
2201
- await sendReply('🗜️ No conversation to compact yet. Send a message first, then /compact.');
2202
- return;
2203
- }
2204
- try {
2205
- entry = await getOrSpawnForChat(sessionKey);
2206
- } catch (err) {
2207
- console.error(`[${label}] /compact spawn-resume: ${err.message}`);
2208
- await sendReply(`🗜️ Couldn't load session for compaction: ${err.message}`);
2209
- return;
2210
- }
2211
- if (!entry) {
2212
- await sendReply('🗜️ Session not loadable (config missing).');
2213
- return;
2214
- }
2215
- logEvent('compact-spawn-resumed', {
2216
- chat_id: chatId, thread_id: threadIdStr, session_key: sessionKey,
2217
- resumed_session_id: savedSessionId,
2218
- });
2219
- }
2220
- if (!entry?.inputController?.push) {
2221
- await sendReply('🗜️ Session not ready for /compact (no input controller).');
2222
- return;
2223
- }
2224
- // Push the literal "/compact ..." text into the input stream.
2225
- // The SDK parses leading "/" as a slash command and triggers
2226
- // manual compaction; user's preserve instructions land in
2227
- // PreCompactHookInput.custom_instructions.
2228
- try {
2229
- entry.inputController.push({
2230
- type: 'user',
2231
- message: { role: 'user', content: text },
2232
- parent_tool_use_id: null,
2233
- });
2234
- logEvent('compact-command', {
2235
- chat_id: chatId, thread_id: threadIdStr, session_key: sessionKey,
2236
- text_len: text.length,
2237
- // rc.65: store the full /compact text so boot-time orphan
2238
- // recovery can silently re-push it after a deploy
2239
- // interrupted compaction. Agent-supplied hints (e.g.
2240
- // "/compact keep the Q3 commission decisions") need to be
2241
- // preserved verbatim for the retry to summarize correctly.
2242
- text,
2243
- user: cmdUser, user_id: cmdUserId,
2244
- });
2245
- const hasHint = text.length > '/compact'.length + 1;
2246
- await sendReply(hasHint
2247
- ? '🗜️ Compacting with your hint…'
2248
- : '🗜️ Compacting…');
2249
- } catch (err) {
2250
- console.error(`[${label}] /compact push: ${err.message}`);
2251
- await sendReply(`🗜️ Couldn't trigger compact: ${err.message}`);
2252
- }
2253
- return;
2254
- }
2255
- // 0.8.0-rc.46: /reload — close + respawn the SDK Query while
2256
- // PRESERVING the persisted claude_session_id. The next user
2257
- // message resumes the conversation (model has prior context via
2258
- // SDK resume) but the spawn re-reads agent / skill / plugin files
2259
- // from disk, so any local edits to ~/.claude/agents/<name>.md or
2260
- // skill files take effect.
2261
- //
2262
- // Difference vs /new:
2263
- // /new → resetSession clears session_id → fresh conversation
2264
- // /reload → kill closes Query, session_id preserved → same
2265
- // conversation continues with fresh agent/skill code
2266
- //
2267
- // Mechanism: pm.kill drains the pending queue (with code 'KILLED')
2268
- // and closes the SDK Query. Crucially it does NOT call
2269
- // db.clearSessionId — the contract test
2270
- // 'pm.kill does NOT call db.clearSessionId' pins this invariant.
2271
- // The next user message hits getOrSpawn → no warm Query → spawn
2272
- // fresh with `resume: <session_id>` from sessions table → SDK
2273
- // restores conversation history → fresh files loaded.
2274
- if (botAllowsCommands && text === '/reload') {
2275
- if (pm.has(sessionKey)) {
2276
- try { await pm.kill(sessionKey); }
2277
- catch (err) { console.error(`[${label}] kill on /reload: ${err.message}`); }
2278
- }
2279
- logEvent('session-reload-command', {
2280
- chat_id: chatId, command: text,
2281
- user: cmdUser, user_id: cmdUserId,
2282
- });
2283
- await sendReply('🔄 Reloaded. Next message picks up the conversation with fresh skills/agents.');
2284
- return;
2285
- }
2286
-
2287
- if (botAllowsCommands && (text === '/new' || text === '/reset')) {
2288
- let drained = 0;
2289
- const target = pm.pickFor(sessionKey);
2290
- if (typeof target.resetSession === 'function') {
2291
- try {
2292
- const r = await target.resetSession(sessionKey, { reason: text.slice(1) });
2293
- drained = r?.drainedPendings ?? 0;
2294
- } catch (err) {
2295
- console.error(`[${label}] resetSession ${text}: ${err.message}`);
2296
- }
2297
- } else {
2298
- // CLI pm fallback: kill the session; sessions table cleared
2299
- // via clearSessionId in pm's proc.on('close') resume-fail
2300
- // path (lib/process-manager.js:457). We force the path by
2301
- // setting the kill rationale so the close handler treats it
2302
- // as a successful reset.
2303
- try { await pm.kill(sessionKey); }
2304
- catch (err) { console.error(`[${label}] kill on ${text}: ${err.message}`); }
2305
- try { db.clearSessionId(sessionKey); } catch { /* swallow */ }
2306
- }
2307
- // rc.59: clear contextHint once-per-cycle gate so a freshly-reset
2308
- // session can fire its own hint when context fills up again.
2309
- contextHintShown.delete(sessionKey);
2310
- logEvent('session-reset-command', {
2311
- chat_id: chatId, command: text, drained_pendings: drained,
2312
- user: cmdUser, user_id: cmdUserId,
2313
- });
2314
- await sendReply('✨ Started a fresh session.');
2315
- return;
2316
- }
2317
- // 0.8.0-rc.9: /steer command removed. Mid-turn user input is
2318
- // handled implicitly by autosteer — any follow-up message during
2319
- // an in-flight SDK turn flows through autosteerBuffer +
2320
- // PostToolBatch hook. No explicit command needed; matches Claude
2321
- // Code interactive UX where you just keep typing.
2322
- // Graceful application of a model/effort change to the user's CURRENT
2323
- // session only. With isolateTopics=false the sessionKey is just the
2324
- // chat (one shared session for the whole chat — every topic
2325
- // respawns implicitly). With isolateTopics=true each topic is a
2326
- // separate session, and a /model in topic A should NOT disturb
2327
- // topic B's in-flight turn or post a phantom "✓ Using sonnet now"
2328
- // in a topic that didn't ask.
2329
- //
2330
- // CLI pm: requestRespawn drains pending turns then kills the process;
2331
- // the next user message spawns fresh with the updated chatConfig.
2332
- // SDK pm: applies live to the running Query via setModel /
2333
- // applyFlagSettings — no respawn needed, change takes effect for
2334
- // the rest of the in-flight turn AND all future ones.
2335
- const applyConfigChange = async (reason, setting, value) => {
2336
- const target = pm.pickFor(sessionKey);
2337
- if (typeof target.requestRespawn === 'function') {
2338
- const res = target.requestRespawn(sessionKey, reason);
2339
- return { queued: res.queued, anyActive: !res.killed };
2340
- }
2341
- if (setting === 'effort' && typeof target.applyFlagSettings === 'function') {
2342
- const ok = await target.applyFlagSettings(sessionKey, { effortLevel: value });
2343
- return { queued: 0, anyActive: !ok };
2344
- }
2345
- if (setting === 'model' && typeof target.setModel === 'function') {
2346
- const ok = await target.setModel(sessionKey, value);
2347
- return { queued: 0, anyActive: !ok };
2348
- }
2349
- return { queued: 0, anyActive: false };
2350
- };
2351
-
2352
- if (botAllowsCommands && text.startsWith('/model ')) {
2353
- const newModel = text.slice(7).trim();
2354
- if (['opus', 'sonnet', 'haiku'].includes(newModel)) {
2355
- const oldModel = chatConfig.model;
2356
- // Ephemeral: in-memory only, reverts to config.json on restart.
2357
- chatConfig.model = newModel;
2358
- dbWrite(() => db.logConfigChange({
2359
- chat_id: chatId, thread_id: threadIdStr, field: 'model',
2360
- old_value: oldModel, new_value: newModel,
2361
- user: cmdUser, user_id: cmdUserId, source: 'command',
2362
- }), 'log model change');
2363
- const { anyActive } = await applyConfigChange('model-change', 'model', newModel);
2364
- const ver = MODEL_VERSIONS[newModel] || newModel;
2365
- const suffix = anyActive ? ` — I'll switch when I finish` : '';
2366
- await sendReply(`Model → ${newModel} (${ver})${suffix}`);
2367
- } else {
2368
- await sendReply(`Unknown model. Use: opus, sonnet, haiku`);
2369
- }
2370
- return;
2371
- }
2372
- if (botAllowsCommands && text.startsWith('/effort ')) {
2373
- const newEffort = text.slice(8).trim();
2374
- if (['low', 'medium', 'high', 'xhigh', 'max'].includes(newEffort)) {
2375
- const oldEffort = chatConfig.effort;
2376
- // Ephemeral: in-memory only, reverts to config.json on restart.
2377
- chatConfig.effort = newEffort;
2378
- dbWrite(() => db.logConfigChange({
2379
- chat_id: chatId, thread_id: threadIdStr, field: 'effort',
2380
- old_value: oldEffort, new_value: newEffort,
2381
- user: cmdUser, user_id: cmdUserId, source: 'command',
2382
- }), 'log effort change');
2383
- const { anyActive } = await applyConfigChange('effort-change', 'effort', newEffort);
2384
- const suffix = anyActive ? ` — I'll switch when I finish` : '';
2385
- await sendReply(`Effort → ${newEffort}${suffix}`);
2386
- } else {
2387
- await sendReply(`Unknown effort. Use: low, medium, high, xhigh, max`);
2388
- }
2389
- return;
2390
- }
2391
- // Admin-only pairing commands — chat must match config.bot.adminChatId.
2392
- // allowConfigCommands alone is NOT sufficient: that flag gates /model and
2393
- // /effort which only affect the current chat. Pairing issues cross-chat
2394
- // trust and must be narrowed further.
2395
- const adminChatId = config.bot?.adminChatId ? String(config.bot.adminChatId) : null;
2396
- const isAdminChat = adminChatId && String(chatId) === adminChatId;
2397
-
2398
- if (botAllowsCommands && text.startsWith('/pair-code')) {
2399
- if (!isAdminChat) { await sendReply('Pairing commands are admin-only; run from the admin chat.'); return; }
2400
- const issuerId = cmdUserId;
2401
- if (!issuerId) { await sendReply('No user id on request'); return; }
2402
- const args = parsePairCodeArgs(text);
2403
- try {
2404
- const out = pairings.issueCode({
2405
- bot_name: BOT_NAME,
2406
- chat_id: args.chat || null,
2407
- scope: args.scope || 'user',
2408
- issued_by_user_id: issuerId,
2409
- ttlMs: args.ttl ? parsePairingTtl(args.ttl) : undefined,
2410
- note: args.note || null,
2411
- });
2412
- logEvent('pair-code-issued', {
2413
- bot: BOT_NAME, by: issuerId, scope: out.scope,
2414
- chat_id: out.chat_id, note: out.note,
2415
- });
2416
- const ttlLabel = args.ttl || '10m';
2417
- const chatLabel = out.chat_id ? `chat ${out.chat_id}` : 'any chat';
2418
- await sendReply(
2419
- `Code: ${out.code}\nexpires: ${ttlLabel}\nscope: ${out.scope} (${chatLabel})${out.note ? `\nnote: ${out.note}` : ''}\n\nShare with user:\n/pair ${out.code}`,
2420
- );
2421
- } catch (err) {
2422
- await sendReply(`Could not issue code: ${err.message}`);
2423
- }
2424
- return;
2425
- }
2426
- if (botAllowsCommands && text.startsWith('/pairings')) {
2427
- if (!isAdminChat) { await sendReply('Pairing commands are admin-only; run from the admin chat.'); return; }
2428
- const rows = pairings.listActive(BOT_NAME);
2429
- if (!rows.length) { await sendReply('No active pairings.'); return; }
2430
- const lines = rows.map(r => {
2431
- const chat = r.chat_id ? `chat ${r.chat_id}` : 'any chat';
2432
- const granted = new Date(r.granted_ts).toISOString().slice(0, 16).replace('T', ' ');
2433
- const note = r.note ? ` — ${r.note}` : '';
2434
- return `• user ${r.user_id} — ${chat} — ${granted}${note}`;
2435
- });
2436
- await sendReply(`Active pairings (${rows.length}):\n${lines.join('\n')}`);
2437
- return;
2438
- }
2439
- if (botAllowsCommands && text.startsWith('/unpair ')) {
2440
- if (!isAdminChat) { await sendReply('Pairing commands are admin-only; run from the admin chat.'); return; }
2441
- const arg = text.slice(8).trim();
2442
- const targetId = parseInt(arg, 10);
2443
- if (!Number.isFinite(targetId)) {
2444
- await sendReply('Usage: /unpair <user_id>'); return;
2445
- }
2446
- const n = pairings.revokeByUser({ bot_name: BOT_NAME, user_id: targetId });
2447
- logEvent('pair-revoked', {
2448
- bot: BOT_NAME, user_id: targetId, by: cmdUserId, count: n,
2449
- });
2450
- await sendReply(n ? `Revoked ${n} pairing(s) for user ${targetId}.` : `No active pairings for user ${targetId}.`);
2451
- return;
2452
- }
2453
- // /pair <CODE> — open to anyone, no admin gate (the code IS the auth)
2454
- if (text.startsWith('/pair ') && !text.startsWith('/pair-code') && !text.startsWith('/pairings')) {
2455
- if (!cmdUserId) { await sendReply('No user id on request'); return; }
2456
- const code = text.slice(6).trim();
2457
- const res = pairings.claimCode({
2458
- code, claimer_user_id: cmdUserId,
2459
- chat_id: chatId, bot_name: BOT_NAME,
2460
- });
2461
- logEvent('pair-claim-attempt', {
2462
- bot: BOT_NAME, user_id: cmdUserId, chat_id: chatId,
2463
- ok: res.ok, reason: res.reason,
2464
- });
2465
- if (res.ok) {
2466
- const chatLabel = res.chat_id ? `chat ${res.chat_id}` : `every chat ${BOT_NAME} is in`;
2467
- await sendReply(`Paired. You can use me in ${chatLabel}.${res.note ? `\n(${res.note})` : ''}`);
2468
- } else {
2469
- // Collapse specific failure reasons into a single "invalid or expired"
2470
- // response to prevent enumeration: distinguishing "wrong-chat" from
2471
- // "not-found" would tell an attacker a valid code prefix. The
2472
- // pair-claim-attempt event above still logs the precise reason for
2473
- // operator audit.
2474
- const userMsg = res.reason === 'rate-limited'
2475
- ? 'Too many attempts. Try again later.'
2476
- : 'Invalid or expired code.';
2477
- await sendReply(userMsg);
2478
- }
2479
- return;
2480
- }
637
+ // Slash command dispatch extracted to lib/handlers/slash-commands.js.
638
+ // Covers /context /compact /reload /new /reset /model /effort
639
+ // /pair-code /pairings /unpair /pair. Returns true when handled;
640
+ // caller short-circuits.
641
+ if (await dispatchSlashCommand({
642
+ text, sessionKey, chatId, threadIdStr, chatConfig,
643
+ cmdUser, cmdUserId, label, sendReply,
644
+ })) return;
2481
645
 
2482
646
  const t0 = Date.now();
2483
647
 
@@ -2759,19 +923,10 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
2759
923
  // voice transcription, don't overwrite it. Let onFirstStream
2760
924
  // promote to 🤔 when Claude actually starts work.
2761
925
  //
2762
- // rc.32 second guard: skip THINKING if we're going to autosteer
2763
- // this message. Pre-fix, autosteer messages briefly showed
2764
- // 🤔 → ✍ (THINKING set here, then AUTOSTEERED set inside the
2765
- // autosteer block ~50 lines below). The flash was visible to
2766
- // users. Detect the autosteer pre-condition (SDK pm active AND
2767
- // session in-flight AND chat hasn't opted out) and skip.
2768
- const willAutosteer = pm.has(sessionKey)
2769
- && pm.get(sessionKey)?.inFlight
2770
- && pm.isSdkFor(sessionKey)
2771
- && (chatConfig.autosteer != null
2772
- ? chatConfig.autosteer !== false
2773
- : config.bot?.autosteer !== false);
2774
- if (!voiceAck.ackEmitted && !willAutosteer) {
926
+ // Skip the 🤔 flash for messages that are about to be
927
+ // autosteered. willAutosteer evaluates the same pre-condition
928
+ // tryAutosteer would.
929
+ if (!voiceAck.ackEmitted && !autosteer.willAutosteer(sessionKey, chatConfig)) {
2775
930
  reactor.setState('THINKING');
2776
931
  }
2777
932
 
@@ -2781,86 +936,33 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
2781
936
  // exit paths below (NO_REPLY, streamed-reply preview-becomes-final,
2782
937
  // discard+redeliver, regular reply at end) call it directly.
2783
938
 
2784
- // 0.8.0 Phase 2 step 1 — AUTOSTEER. If SDK pm is active AND there's
2785
- // already an in-flight turn for this session AND autosteer isn't
2786
- // disabled in config, route this user message via pm.steer()
2787
- // instead of pm.send(). Matches OpenClaw's default UX: typing a
2788
- // follow-up while the bot is mid-reply MERGES into the active
2789
- // turn rather than queueing as a separate response. Saves a turn,
2790
- // saves tokens, feels more conversational.
2791
- //
2792
- // Opt-out: config.bot.autosteer === false (or per-chat
2793
- // chatConfig.autosteer === false). CLI pm always falls through
2794
- // to the queue-FIFO path (no steer primitive on stream-json).
939
+ // AUTOSTEER. If session is in-flight AND autosteer isn't disabled,
940
+ // route this user message via pm.injectUserMessage instead of
941
+ // pm.send (OpenClaw "merge into active" UX). The steered message
942
+ // gets a reaction so the user knows it landed; the in-flight
943
+ // turn's response covers both messages.
2795
944
  //
2796
- // The steered message gets a ✍ reaction so the user knows it
2797
- // landed; no separate reply is generated (the in-flight turn's
2798
- // response covers both messages, OpenClaw-style).
945
+ // Mode: chatConfig.autosteerMode = 'merge' (priority='next', default)
946
+ // | 'queue' (priority='later'). Spike findings in
947
+ // scripts/spikes/native-queue.mjs.
2799
948
  //
2800
- // Reaction emoji must be from Telegram's curated allowlist
2801
- // (~60 standard emoji per core.telegram.org/bots/api#availablereactions).
2802
- // 🛞 (steering wheel) is NOT on it Telegram returns
2803
- // 400: REACTION_INVALID. ✍ ("writing/noting") is on the list and
2804
- // conveys "incorporating this".
2805
- const chatAutosteer = chatConfig.autosteer != null
2806
- ? chatConfig.autosteer
2807
- : config.bot?.autosteer;
2808
- // 0.8.0-rc.42: autosteer is now native — push the user message
2809
- // directly onto the SDK's input controller with a priority hint.
2810
- // The SDK manages absorption / queueing per the U7 spike findings
2811
- // (scripts/spikes/native-queue.mjs, 2026-05-01):
2812
- //
2813
- // priority='next' (default): absorb into current turn at the next
2814
- // natural pause (between tool calls / after subagent return).
2815
- // ONE result event for the whole chain — same UX as the
2816
- // deleted autosteer-buffer + PostToolBatch flow.
2817
- // priority='later': queue for after current turn ends. SEPARATE
2818
- // result event per absorbed message. Cleaner per-msg lifecycle
2819
- // for chats that prefer accurate cost-row attribution.
2820
- //
2821
- // Per-chat opt-in via `chatConfig.autosteerMode: 'merge' | 'queue'`.
2822
- // 'merge' → priority='next' (default). 'queue' → priority='later'.
2823
- //
2824
- // Pre-rc.42 this used a custom autosteerBuffer + PostToolBatch hook
2825
- // returning <channel source="user-followup"> additionalContext. The
2826
- // SDK at the time rejected priority='now' mid-tool-use (m87 gate);
2827
- // 'next'/'later' were never tested. The U7 spike confirmed all
2828
- // three priorities now work cleanly — so the buffer/hook detour is
2829
- // gone. ~250 LOC + 2 test files deleted.
2830
- //
2831
- // CLI pm has no inputController push primitive, so it falls
2832
- // through to FIFO pm.send (same UX as 0.7.x — queued behind active).
2833
- const autosteerEnabled = chatAutosteer !== false
2834
- && pm.isSdkFor(sessionKey);
2835
- if (autosteerEnabled && pm.has(sessionKey)) {
2836
- const entry = pm.get(sessionKey);
2837
- if (entry?.inFlight) {
2838
- const autosteerMode = chatConfig.autosteerMode != null
2839
- ? chatConfig.autosteerMode
2840
- : config.bot?.autosteerMode;
2841
- const priority = autosteerMode === 'queue' ? 'later' : 'next';
2842
- const ok = pm.injectUserMessage(sessionKey, {
2843
- content: prompt,
2844
- priority,
2845
- });
2846
- if (ok) {
2847
- autosteeredRefs.add(sessionKey, { chatId, msgId: msg.message_id });
2848
- logEvent('autosteer', {
2849
- chat_id: chatId, msg_id: msg.message_id,
2850
- text_len: prompt?.length ?? 0,
2851
- priority,
2852
- });
2853
- stopTyping();
2854
- // setState('AUTOSTEERED') is terminal — bypasses the throttle,
2855
- // serializes after any in-flight QUEUED apply via applyChain.
2856
- await reactor.setState('AUTOSTEERED');
2857
- // rc.38: AUTOSTEERED is terminal; stop the reactor's STALL /
2858
- // TIMEOUT timers so they don't pin the closure for up to 30s.
2859
- reactor.stop();
2860
- markReplied();
2861
- return;
2862
- }
2863
- }
949
+ // Reaction emoji must be from Telegram's curated allowlist (~60
950
+ // standard emoji per core.telegram.org/bots/api#availablereactions).
951
+ // 🛞 is NOT on it (400: REACTION_INVALID). ✍ ("writing/noting")
952
+ // is on the list and conveys "incorporating this".
953
+ const steered = autosteer.tryAutosteer({
954
+ sessionKey, chatConfig, chatId, msg, prompt,
955
+ });
956
+ if (steered.autosteered) {
957
+ stopTyping();
958
+ // setState('AUTOSTEERED') is terminal bypasses throttle,
959
+ // serializes after any in-flight QUEUED apply via applyChain.
960
+ await reactor.setState('AUTOSTEERED');
961
+ // AUTOSTEERED is terminal; stop the reactor's STALL / TIMEOUT
962
+ // timers so they don't pin the closure for up to 30s.
963
+ reactor.stop();
964
+ markReplied();
965
+ return;
2864
966
  }
2865
967
 
2866
968
  try {
@@ -2914,12 +1016,9 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
2914
1016
  // role_ordering / context_overflow / missing_tool_input),
2915
1017
  // tell pm to reset the session NOW so the user's NEXT
2916
1018
  // message starts fresh — without them having to type /new.
2917
- // Only fires when pm.resetSession is available (SDK pm
2918
- // path); CLI pm doesn't have the method.
2919
1019
  const cls = classifyError(result.error);
2920
- const recoverTarget = pm.pickFor(sessionKey);
2921
- if (cls.autoRecover === 'reset_session' && typeof recoverTarget.resetSession === 'function') {
2922
- recoverTarget.resetSession(sessionKey, { reason: cls.kind })
1020
+ if (cls.autoRecover === 'reset_session') {
1021
+ pm.resetSession(sessionKey, { reason: cls.kind })
2923
1022
  .catch((err) => console.error(`[${label}] auto-reset failed: ${err.message}`));
2924
1023
  logEvent('auto-recover', {
2925
1024
  chat_id: chatId, kind: cls.kind, action: 'reset_session',
@@ -2951,14 +1050,12 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
2951
1050
  // result, the followup messages (if any) get their own SDK
2952
1051
  // pause to absorb at, no special handling needed.
2953
1052
 
2954
- // 0.8.0 Phase 2 step 4: context-full live hint. After a
2955
- // successful turn, peek at SDK's getContextUsage(); if past
2956
- // the threshold, post a quiet hint so the user knows /new
2957
- // will help. SDK pm only CLI pm has no equivalent (no
2958
- // Query object, no getContextUsage). OPT-IN per-chat or
2959
- // per-bot most chats don't want the noise. Per-chat takes
2960
- // precedence over per-bot so admins (Ivan DM) can opt in
2961
- // without forcing it on every other chat.
1053
+ // Context-full live hint. After a successful turn, peek at
1054
+ // SDK's getContextUsage(); if past the threshold, post a
1055
+ // quiet hint so the user knows /new will help. OPT-IN
1056
+ // per-chat or per-botmost chats don't want the noise.
1057
+ // Per-chat takes precedence over per-bot so admins (Ivan DM)
1058
+ // can opt in without forcing it on every other chat.
2962
1059
  //
2963
1060
  // rc.56: threshold default lowered to 70% (was 85%) because
2964
1061
  // the SDK auto-compacts mid-turn at ~85% — by the time
@@ -2977,7 +1074,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
2977
1074
  // session first crosses the threshold, mark the flag, suppress
2978
1075
  // subsequent fires. Cleared on compact-boundary so the next
2979
1076
  // cycle (if it crosses again) will fire fresh.
2980
- if (pm.isSdkFor(sessionKey) && chatCtxHint === true && !contextHintShown.has(sessionKey)) {
1077
+ if (chatCtxHint === true && !contextHintShown.has(sessionKey)) {
2981
1078
  const entry = pm.get(sessionKey);
2982
1079
  const q = entry?.query;
2983
1080
  if (q && typeof q.getContextUsage === 'function') {
@@ -3443,91 +1540,11 @@ function createBot(token) {
3443
1540
  const rawText = msg.text || '';
3444
1541
  const cleanText = mentionRe ? rawText.replace(mentionRe, '').trim() : rawText.trim();
3445
1542
 
3446
- // Abort: skip the queue entirely. Matches bilingual natural-language
3447
- // cues ("stop" / "стоп" / "cancel" / "отмена" / …) and explicit
3448
- // slash commands (/stop, /abort, /cancel). Kills the active Claude
3449
- // subprocess and drains queued messages for this chat. Replies so
3450
- // the user sees the bot heard them — silent abort is worse than
3451
- // acknowledged abort.
3452
- if (isAbortRequest(cleanText)) {
3453
- const threadId = msg.message_thread_id?.toString();
3454
- const sessionKey = getSessionKey(chatId, threadId, chatConfig);
3455
- const hadActive = pm.has(sessionKey) && !!pm.get(sessionKey)?.inFlight;
3456
- // Mark BEFORE killing: the 'close' event fires almost immediately
3457
- // after SIGTERM, and processQueue's catch needs to see the flag to
3458
- // skip the generic error-reply. If we marked after, there'd be a
3459
- // race where the error-reply slips through.
3460
- if (hadActive) markSessionAborted(sessionKey);
3461
- // 0.8.0 Phase 2 step 2: under SDK pm, prefer interrupt() +
3462
- // drainQueue() — keeps the Query alive (cheap to reuse for
3463
- // the user's next message), no respawn cost. Falls back to
3464
- // pm.kill() under CLI pm, which is the original behaviour.
3465
- //
3466
- // Why both: interrupt() cancels the in-flight turn at SDK
3467
- // level WITHOUT tearing down the subprocess; drainQueue()
3468
- // rejects every queued pending with err.code='INTERRUPTED'
3469
- // so the abort-grace classifier suppresses error replies.
3470
- //
3471
- // Kill ONLY the user's own session, not every topic in the
3472
- // chat. Pre-0.6.5 this was pm.killChat(chatId) which fanned
3473
- // out across all topics under isolateTopics=true: the user
3474
- // typed "stop" in topic A and the bot tore down topic B's
3475
- // in-flight turn, surfacing a 💥 reply to topic B's user
3476
- // (whose key was never marked aborted, so the abort grace
3477
- // window didn't apply). With isolateTopics=false the
3478
- // sessionKey is the chat itself, so killing one session is
3479
- // the same as killing the chat — behavior unchanged for the
3480
- // common case.
3481
- const stopTarget = pm.pickFor(sessionKey);
3482
- if (typeof stopTarget.interrupt === 'function') {
3483
- await stopTarget.interrupt(sessionKey).catch((err) =>
3484
- console.error(`[${BOT_NAME}] interrupt failed: ${err.message}`));
3485
- if (typeof stopTarget.drainQueue === 'function') {
3486
- stopTarget.drainQueue(sessionKey, 'INTERRUPTED');
3487
- }
3488
- } else {
3489
- await stopTarget.kill(sessionKey).catch((err) =>
3490
- console.error(`[${BOT_NAME}] abort kill failed: ${err.message}`));
3491
- }
3492
- // rc.42: autosteer buffer is gone (native SDK priority push).
3493
- // Followups already pushed onto the SDK's input controller will
3494
- // be drained by drainQueue() / kill() on the entry — no separate
3495
- // buffer to clear.
3496
- // Clear ✍ reactions on already-autosteered messages from this
3497
- // aborted turn — they're now dead context.
3498
- clearAutosteeredReactions(sessionKey).catch(() => {});
3499
- logEvent('abort-requested', {
3500
- chat_id: chatId, user_id: msg.from?.id || null,
3501
- had_active: hadActive,
3502
- trigger: cleanText.slice(0, 40),
3503
- });
3504
- // Reply in the same language the user aborted in. Cyrillic-detection
3505
- // is crude but reliable for ru/en (the only two cue sets we ship).
3506
- // 0.6.12 fix: pre-0.6.5 the message included a "queue cleared (N)"
3507
- // suffix when N > 0; that came from drainQueuesForChat which always
3508
- // returned 0 in the concurrent model and was deleted in 0.6.5.
3509
- // The reference to `dropped` here was missed, so EVERY abort hit
3510
- // a ReferenceError that the surrounding `catch {}` swallowed —
3511
- // leaving the user with no ack at all. Reduced to "stopped vs
3512
- // nothing-to-stop" since the queue-cleared variant was already
3513
- // dead.
3514
- const lang = /[а-яё]/i.test(cleanText) ? 'ru' : 'en';
3515
- const strs = {
3516
- en: { stopped: 'Stopped.', nothing: 'Nothing to stop.' },
3517
- ru: { stopped: 'Остановлено.', nothing: 'Нечего останавливать.' },
3518
- }[lang];
3519
- const reply = hadActive ? strs.stopped : strs.nothing;
3520
- try {
3521
- await tg(bot, 'sendMessage', {
3522
- chat_id: chatId, text: reply,
3523
- reply_parameters: { message_id: msg.message_id, allow_sending_without_reply: true },
3524
- ...(threadId && { message_thread_id: threadId }),
3525
- }, { source: 'abort-ack', botName: BOT_NAME });
3526
- } catch (err) {
3527
- // Don't swallow silently — if the ack itself fails, the operator
3528
- // needs to know (the user typed stop and saw nothing).
3529
- console.error(`[${BOT_NAME}] abort-ack send failed: ${err.message}`);
3530
- }
1543
+ // Abort: skip the queue entirely. Matches bilingual natural-
1544
+ // language cues + slash variants. handleAbortIfRequested
1545
+ // (lib/handlers/abort.js) returns true when handled — short-
1546
+ // circuit out of dispatch.
1547
+ if (await handleAbortIfRequested(msg, chatId, chatConfig, cleanText)) {
3531
1548
  return;
3532
1549
  }
3533
1550
 
@@ -3645,8 +1662,18 @@ function createBot(token) {
3645
1662
  });
3646
1663
 
3647
1664
  bot.on('callback_query:data', async (ctx) => {
1665
+ if (!isWellFormedCallbackQuery(ctx.callbackQuery)) {
1666
+ logEvent('malformed-update', {
1667
+ bot: BOT_NAME,
1668
+ update_id: ctx.update?.update_id,
1669
+ reason: 'callback_query missing message/from/data or inline-mode',
1670
+ });
1671
+ // Best-effort ack so Telegram stops re-sending. May fail silently.
1672
+ await ctx.answerCallbackQuery({ text: 'Stale or invalid button.' }).catch(() => {});
1673
+ return;
1674
+ }
3648
1675
  try {
3649
- const data = ctx.callbackQuery?.data || '';
1676
+ const data = ctx.callbackQuery.data;
3650
1677
  if (data.startsWith('cfg:')) {
3651
1678
  await handleConfigCallback(ctx);
3652
1679
  } else {
@@ -3737,117 +1764,9 @@ function createBot(token) {
3737
1764
  return bot;
3738
1765
  }
3739
1766
 
3740
- // ─── Manual polling ─────────────────────────────────────────────────
3741
-
3742
- async function pollBot(bot) {
3743
- await bot.init();
3744
- bot._setBotUsername(bot.botInfo.username);
3745
- console.log(`[${BOT_NAME}] Bot @${bot.botInfo.username} ready`);
3746
-
3747
- await bot.api.deleteWebhook();
3748
-
3749
- // Restore polling offset from DB so a restart doesn't re-process the
3750
- // backlog Telegram has accumulated while we were down. Grammy's in-memory
3751
- // offset resets to 0 each boot, which makes getUpdates return every
3752
- // un-confirmed update since the last ack — for an overnight outage that
3753
- // can mean replaying dozens of stale messages.
3754
- let offset = 0;
3755
- try {
3756
- const saved = db?.getPollingOffset?.(BOT_NAME);
3757
- if (saved && saved > 0) {
3758
- offset = saved + 1;
3759
- console.log(`[${BOT_NAME}] resuming polling from update_id ${saved}`);
3760
- }
3761
- } catch (err) {
3762
- console.error(`[${BOT_NAME}] getPollingOffset failed: ${err.message}`);
3763
- }
3764
- let running = true;
3765
- bot._lastPollTs = Date.now();
3766
-
3767
- bot._stop = () => { running = false; };
3768
-
3769
- while (running) {
3770
- try {
3771
- const updates = await bot.api.getUpdates({
3772
- offset,
3773
- // Long-poll: Telegram holds the connection up to 25s waiting for
3774
- // updates. When something arrives it returns immediately; empty
3775
- // windows cost ~0 local CPU. Drops median inbound latency vs the
3776
- // old short-poll-every-1s.
3777
- timeout: 25,
3778
- allowed_updates: ['message', 'edited_message', 'callback_query'],
3779
- });
3780
- bot._lastPollTs = Date.now();
3781
-
3782
- for (const update of updates) {
3783
- offset = update.update_id + 1;
3784
- if (update.message && isWellFormedMessage(update.message)) {
3785
- const m = update.message;
3786
- const chatId = m.chat.id.toString();
3787
- const chatConfig = config.chats[chatId];
3788
- const threadId = m.message_thread_id?.toString();
3789
- // rc.57: use getTopicName() helper which handles BOTH legacy string
3790
- // form and rc.48 object form. Pre-rc.57 the direct lookup
3791
- // `chatConfig.topics[threadId]` template-literal'd into "[object Object]"
3792
- // because rc.48 topics are objects like {name:"Music",agent:"...",...}.
3793
- const topicName = threadId
3794
- ? (chatConfig ? getTopicName(chatConfig, threadId) : threadId)
3795
- : null;
3796
- const chatLabel = chatConfig?.name || chatId;
3797
- const label = topicName ? `${chatLabel}/${topicName}` : chatLabel;
3798
- console.log(`[${BOT_NAME}] ← ${label}: ${(m.text || m.caption || '(media)').slice(0, 60)}`);
3799
- }
3800
- try {
3801
- await bot.handleUpdate(update);
3802
- } catch (err) {
3803
- console.error(`[${BOT_NAME}] Handler error:`, err.message);
3804
- }
3805
- }
3806
- // Persist offset after batch dispatch so a crash mid-batch only risks
3807
- // re-processing the unacked updates. We write only on non-empty batches
3808
- // to avoid churning the row on every 25s idle poll.
3809
- if (updates.length > 0) {
3810
- dbWrite(() => db.savePollingOffset(BOT_NAME, updates[updates.length - 1].update_id),
3811
- 'save polling offset');
3812
- }
3813
- // No sleep on the success path: long-poll already blocks up to 25s
3814
- // when idle. Sleeping here would add latency with no gain.
3815
- } catch (err) {
3816
- if (!running) break;
3817
- if (err.error_code === 409) {
3818
- console.log(`[${BOT_NAME}] 409, waiting 3s...`);
3819
- await new Promise(r => setTimeout(r, 3000));
3820
- } else {
3821
- console.error(`[${BOT_NAME}] Poll error:`, err.message);
3822
- await new Promise(r => setTimeout(r, 3000));
3823
- }
3824
- }
3825
- }
3826
- }
3827
-
3828
- // Watchdog: if the poll loop hasn't ticked in POLL_STALL_MS, log an event
3829
- // so external monitoring (or a human reading `events`) can see it. Launchd
3830
- // restarts the whole process on death, so we don't exit here — a stalled
3831
- // grammy poll is usually transient (network flap, Telegram 5xx).
3832
- const POLL_STALL_MS = 120_000;
3833
- function startPollWatchdog(bot) {
3834
- let stalled = false;
3835
- return setInterval(() => {
3836
- const now = Date.now();
3837
- const age = now - (bot._lastPollTs || 0);
3838
- if (age > POLL_STALL_MS) {
3839
- if (!stalled) {
3840
- console.error(`[${BOT_NAME}] poll-stalled: no tick in ${Math.round(age / 1000)}s`);
3841
- logEvent('poll-stalled', { bot: BOT_NAME, stall_ms: age });
3842
- stalled = true;
3843
- }
3844
- } else if (stalled) {
3845
- console.log(`[${BOT_NAME}] poll-recovered after stall`);
3846
- logEvent('poll-recovered', { bot: BOT_NAME });
3847
- stalled = false;
3848
- }
3849
- }, 30_000);
3850
- }
1767
+ // ─── Manual polling — extracted to lib/handlers/poll.js ────────────
1768
+ let pollBot = null;
1769
+ let startPollWatchdog = null;
3851
1770
 
3852
1771
  // ─── Main ───────────────────────────────────────────────────────────
3853
1772
 
@@ -3942,281 +1861,126 @@ async function main() {
3942
1861
 
3943
1862
  const cap = config.maxWarmProcesses || DEFAULT_MAX_WARM_PROCS;
3944
1863
 
3945
- // 0.8.0-rc.6: per-chat pm selection. Three modes:
3946
- // 1. POLYGRAM_USE_SDK=1 with no POLYGRAM_SDK_CHATS list → all chats SDK
3947
- // 2. POLYGRAM_SDK_CHATS=id1,id2,... → those chats
3948
- // use SDK; everyone else uses CLI (both pms live in the daemon)
3949
- // 3. neither set → all chats CLI
3950
- // The per-chat mode lets us soak SDK pm against real traffic in one
3951
- // chat (Ivan's DM) while keeping partner-facing chats on the
3952
- // battle-tested CLI path. When both pms run, killChat /shutdown
3953
- // broadcast to both; everything else routes per-sessionKey via
3954
- // pickPmFor() based on the chat's set membership.
3955
- // rc.17: router policy + proxy live in lib/pm-router.js for
3956
- // testability. Policy parses env config and produces
3957
- // pickPmKindFor; createPmRouter wraps the cli/sdk pms with the
3958
- // routed surface.
3959
- const { sdkActive, sdkAllChats, sdkSomeChats, sdkChatIdSet, pickPmKindFor } = makeRouterPolicy({
3960
- useSdkAll: USE_SDK,
3961
- sdkChats: String(process.env.POLYGRAM_SDK_CHATS || '').split(','),
3962
- getChatIdFromKey,
1864
+ // 0.9.0: single pm. SDK ProcessManager is the only impl after the
1865
+ // CLI pm + dual-pm router were deleted (see lib/pm-router.js
1866
+ // header comment for the migration history).
1867
+ // 0.9.0 commit 19: SDK lifecycle callbacks live in lib/sdk/callbacks.js;
1868
+ // factory threads the runtime context in via DI. onRespawn dropped
1869
+ // earlier SDK pm applies /model + /effort live so there's no
1870
+ // drain event to hook.
1871
+ const sdkCallbacks = createSdkCallbacks({
1872
+ db, dbWrite, config, bot, botName: BOT_NAME, tg, logEvent,
1873
+ classifyToolName, announce, shouldAnnounce, contextHintShown,
1874
+ extractAssistantText, getChatIdFromKey, getThreadIdFromKey,
1875
+ logger: console,
3963
1876
  });
3964
-
3965
- // Shared callbacks: identical instance passed to both pms so a
3966
- // chat's lifecycle events look the same regardless of which pm
3967
- // is handling it.
3968
1877
  const pmOpts = {
3969
1878
  cap,
3970
1879
  db,
3971
1880
  logger: console,
3972
- onInit: (sessionKey, event, entry) => {
3973
- dbWrite(() => db.upsertSession({
3974
- session_key: sessionKey,
3975
- chat_id: entry.chatId,
3976
- thread_id: entry.threadId,
3977
- claude_session_id: event.session_id,
3978
- agent: config.chats[entry.chatId]?.agent || null,
3979
- cwd: config.chats[entry.chatId]?.cwd || null,
3980
- model: config.chats[entry.chatId]?.model || null,
3981
- effort: config.chats[entry.chatId]?.effort || null,
3982
- }), `upsert session ${sessionKey}`);
3983
- },
3984
- onClose: (sessionKey, code, entry) => {
3985
- console.log(`[${entry.label}] Process exited (code ${code})`);
3986
- logEvent('process-close', { chat_id: entry.chatId, session_key: sessionKey, code });
3987
- },
3988
- onStreamChunk: (sessionKey, partial, entry) => {
3989
- // Route to the head pending's per-turn streamer. In the 0.4.8
3990
- // concurrent-pending model, there can be N pendings queued — only
3991
- // the HEAD is the turn Claude is actively emitting events for.
3992
- const head = entry.pendingQueue?.[0];
3993
- const s = head?.context?.streamer;
3994
- if (s) s.onChunk(partial).catch(() => {});
3995
- // 0.8.0-rc.16: heartbeat the reactor so long text generation
3996
- // doesn't trip the 10s STALL → 🥱 / 30s TIMEOUT → 😨 promotion.
3997
- // Pre-rc.16 the reactor only got setState calls at turn start
3998
- // (THINKING) and per-tool (CODING/TOOL/...); pure text turns
3999
- // hit STALL within 10s of streaming. heartbeat() re-arms the
4000
- // stall timers without changing the visible emoji.
4001
- const r = head?.context?.reactor;
4002
- if (r && typeof r.heartbeat === 'function') r.heartbeat();
4003
- },
4004
- onToolUse: (sessionKey, toolName, entry) => {
4005
- const head = entry.pendingQueue?.[0];
4006
- const r = head?.context?.reactor;
4007
- if (r) r.setState(classifyToolName(toolName));
4008
- // 0.7.0 (Phase J): opt-in subagent announce. When Claude uses
4009
- // the Task tool to spawn a subagent, post a brief informational
4010
- // message to the chat so the user knows a heavier turn is in
4011
- // progress. ON by default (rc.9+) — set per-chat
4012
- // `announceSubagents: false` (or per-bot) to silence.
4013
- // Per-chat debounce 30s prevents announce-storms in tool-heavy
4014
- // turns.
4015
- const chatCfg = config.chats[entry.chatId] || {};
4016
- const optOut = chatCfg.announceSubagents != null
4017
- ? chatCfg.announceSubagents === false
4018
- : config.bot?.announceSubagents === false;
4019
- if (toolName === 'Task' && !optOut) {
4020
- if (shouldAnnounce(entry.chatId)) {
4021
- announce({
4022
- send: (b, method, params, m) => tg(b, method, params, m),
4023
- bot, chatId: entry.chatId,
4024
- threadId: head?.context?.threadId ?? null,
4025
- text: '🤖 Spawning subagent…',
4026
- meta: { botName: BOT_NAME, source: 'subagent-announce' },
4027
- logger: { error: (m) => console.error(`[${entry.label}] ${m}`) },
4028
- });
4029
- }
4030
- }
4031
- },
4032
- // 0.7.0 (Phase F): each new top-level assistant message gets its
4033
- // own bubble. When Claude emits text, then tool_use, then more
4034
- // text in a NEW assistant message (typical of tool-heavy turns),
4035
- // the previous bubble's content stays visible as a "thinking out
4036
- // loud" intermediate; the new message starts fresh below.
4037
- onAssistantMessageStart: (sessionKey, entry) => {
4038
- const head = entry.pendingQueue?.[0];
4039
- const s = head?.context?.streamer;
4040
- if (s) s.forceNewMessage();
4041
- // rc.25: heartbeat at every assistant-message boundary too. A
4042
- // long thinking phase (effort=high, 30+ s before first chunk)
4043
- // doesn't fire onStreamChunk. Without this, the freeze timer
4044
- // could expire while the model is "still thinking but about
4045
- // to speak".
4046
- const r = head?.context?.reactor;
4047
- if (r && typeof r.heartbeat === 'function') r.heartbeat();
4048
- },
4049
- // rc.47: autonomous wakeup forwarding. Fires when an SDK
4050
- // assistant message arrives with no head pending — typical
4051
- // ScheduleWakeup case where the agent self-fires without an
4052
- // inbound user message. We derive chat_id (always) and thread_id
4053
- // (when isolateTopics) from the sessionKey, then send the text
4054
- // to that chat/topic. Subagent messages were already filtered
4055
- // upstream (parent_tool_use_id != null check in pm-sdk).
4056
- //
4057
- // Best-effort send: failures are logged but don't propagate —
4058
- // an autonomous turn that can't be delivered shouldn't crash
4059
- // the daemon. Telemetry emitted as `autonomous-wakeup-message`
4060
- // so we can audit how often these fire and whether any get lost.
4061
- onAutonomousAssistantMessage: (sessionKey, msg /* , entry */) => {
4062
- try {
4063
- const text = extractAssistantText(msg);
4064
- if (!text) return;
4065
- const chatId = getChatIdFromKey(sessionKey);
4066
- const threadIdRaw = getThreadIdFromKey(sessionKey);
4067
- const threadId = threadIdRaw ? parseInt(threadIdRaw, 10) : null;
4068
- if (!bot) {
4069
- console.error(`[${BOT_NAME}] autonomous wakeup: bot not ready, dropping ${text.length} chars`);
4070
- return;
4071
- }
4072
- const params = {
4073
- chat_id: chatId,
4074
- text,
4075
- ...(Number.isInteger(threadId) && { message_thread_id: threadId }),
4076
- };
4077
- // Don't `await` — keep the pm-sdk event loop unblocked. The
4078
- // tg() wrapper handles its own retries / chunking.
4079
- tg(bot, 'sendMessage', params,
4080
- { source: 'autonomous-wakeup', botName: BOT_NAME }).catch((err) => {
4081
- console.error(`[${BOT_NAME}] autonomous wakeup send failed: ${err.message}`);
4082
- });
4083
- logEvent('autonomous-wakeup-message', {
4084
- chat_id: chatId,
4085
- session_key: sessionKey,
4086
- thread_id: threadIdRaw,
4087
- text_len: text.length,
4088
- });
4089
- } catch (err) {
4090
- console.error(`[${BOT_NAME}] autonomous wakeup handler: ${err.message}`);
4091
- }
4092
- },
4093
- // rc.29 onThinking removed — replaced by simpler timer-based
4094
- // approach in handleMessage (post-QUEUED setState). The
4095
- // SDK-thinking-block detection added complexity (partial-message
4096
- // bandwidth, thinking config mapping) without solving the actual
4097
- // user-visible problem ("show me the bot is working"). The lib-
4098
- // side handler in pm-sdk stays for any future caller; polygram
4099
- // doesn't wire it.
4100
- // 0.8.0 Phase 2 step 5: SDK auto-compaction observability. Fires
4101
- // when SDK emits SDKCompactBoundaryMessage (between turns or
4102
- // mid-turn — see Phase 0 gate 8.5). Surfaces a quiet system
4103
- // status note to the chat so the user knows the bot is busy
4104
- // reorganising context (compaction can take seconds, during
4105
- // which the bot looks unresponsive). ON by default (rc.12+) —
4106
- // set per-chat or per-bot `announceCompact: false` to silence.
4107
- // Only fires under SDK pm — the CLI pm has no equivalent event.
4108
- //
4109
- // Wording is intentionally non-technical — the user doesn't
4110
- // care about "compaction" or "tokens"; they just want to know
4111
- // the bot didn't hang.
4112
- onCompactBoundary: async (sessionKey, msg, entry) => {
4113
- // rc.59: clear the contextHint once-per-cycle gate. After
4114
- // compaction, context drops below threshold; if it climbs back
4115
- // up the next cycle should fire a fresh hint. Without this
4116
- // clear, a session that fired its hint pre-compact would never
4117
- // fire again for the rest of the daemon's life.
4118
- contextHintShown.delete(sessionKey);
4119
-
4120
- const chatCfg = config.chats[entry.chatId] || {};
4121
- const optOut = chatCfg.announceCompact != null
4122
- ? chatCfg.announceCompact === false
4123
- : config.bot?.announceCompact === false;
4124
- if (optOut) return;
4125
- const threadId = entry.threadId || undefined;
4126
-
4127
- // rc.62: word the message based on what actually happened.
4128
- // Pre-rc.62 we said "💭 Catching up on history, one moment…"
4129
- // for every compact_boundary — but the event fires AFTER
4130
- // compaction completed, not during. The "one moment…" wording
4131
- // implied more work was coming, leaving the user confused
4132
- // when nothing followed. Now: distinguish manual vs auto and
4133
- // surface the actual compression ratio.
4134
- const meta = msg?.compact_metadata || {};
4135
- const trigger = meta.trigger; // 'manual' | 'auto'
4136
- const preTokens = meta.pre_tokens;
4137
- const postTokens = meta.post_tokens;
4138
- const durationMs = meta.duration_ms;
4139
- const fmtTok = (n) => {
4140
- if (n == null) return null;
4141
- if (n >= 1000) return `${(n / 1000).toFixed(1)}k`;
4142
- return String(n);
4143
- };
4144
- const ratio = (preTokens && postTokens) ? `${fmtTok(preTokens)} → ${fmtTok(postTokens)}` : null;
4145
- const duration = durationMs ? `${(durationMs / 1000).toFixed(1)}s` : null;
4146
- const stats = [ratio, duration].filter(Boolean).join(', ');
4147
-
4148
- let text;
4149
- if (trigger === 'manual') {
4150
- // /compact — the user explicitly asked. Compaction is DONE,
4151
- // session is idle, ready for next message.
4152
- text = stats
4153
- ? `✅ Compacted (${stats}). Ready for your next message.`
4154
- : `✅ Compacted. Ready for your next message.`;
4155
- } else {
4156
- // Auto-compaction (mid-turn). The agent is continuing, this
4157
- // is just informational — show that something happened
4158
- // without implying "wait, more is coming."
4159
- text = stats
4160
- ? `💭 Auto-compacted (${stats}). Continuing…`
4161
- : `💭 Auto-compacted. Continuing…`;
4162
- }
4163
-
4164
- try {
4165
- await tg(bot, 'sendMessage', {
4166
- chat_id: entry.chatId,
4167
- text,
4168
- ...(threadId ? { message_thread_id: threadId } : {}),
4169
- }, { source: 'compact-boundary', botName: BOT_NAME });
4170
- } catch (err) {
4171
- console.error(`[${entry.label}] compact-boundary post: ${err.message}`);
4172
- }
4173
- },
4174
- // Fires after a graceful /model or /effort drain has actually
4175
- // swapped to the new settings. Post a confirmation back to the
4176
- // chat ONLY when wasDrained=true — the user actively waited for an
4177
- // in-flight turn to finish before the switch took effect, so the
4178
- // explicit "switched" message is meaningful. When the kill was
4179
- // immediate (queue empty), the inline-card update + button toast
4180
- // already convey "done", and a separate message is just noise.
4181
- onRespawn: (sessionKey, reason, entry, wasDrained) => {
4182
- if (!wasDrained) return;
4183
- const chatId = entry.chatId;
4184
- if (!chatId) return;
4185
- const chatConfig = config.chats[chatId];
4186
- if (!chatConfig) return;
4187
- const text = reason === 'model-change'
4188
- ? `✓ Using ${chatConfig.model} now.`
4189
- : reason === 'effort-change'
4190
- ? `✓ Effort is ${chatConfig.effort} now.`
4191
- : `✓ Ready.`;
4192
- const threadId = entry.threadId || undefined;
4193
- tg(bot, 'sendMessage', {
4194
- chat_id: chatId, text,
4195
- ...(threadId && { message_thread_id: threadId }),
4196
- }, { source: 'respawn-confirm', botName: BOT_NAME }).catch(() => {});
4197
- },
1881
+ ...sdkCallbacks,
4198
1882
  };
4199
1883
 
4200
- // Instantiate the actual pm(s). When sdkActive is false we still
4201
- // build a CLI pm; SDK pm is null. When sdkActive is true we always
4202
- // build BOTH so chats outside the SDK list still get the CLI path.
4203
- const cliPm = new ProcessManager({ ...pmOpts, spawnFn: spawnClaude });
4204
- const sdkPm = sdkActive
4205
- ? new ProcessManagerSdk({ ...pmOpts, spawnFn: buildSdkOptions })
4206
- : null;
4207
-
4208
- // Routing pm: same surface as a single pm, but per-method routing
4209
- // through pickPmKindFor(sessionKey). Per-method semantics
4210
- // documented in lib/pm-router.js.
4211
- pm = createPmRouter({ cliPm, sdkPm, pickPmKindFor });
4212
-
4213
- if (sdkAllChats) {
4214
- console.log('[polygram] using SDK ProcessManager (all chats)');
4215
- } else if (sdkSomeChats) {
4216
- console.log(`[polygram] router active: SDK pm for chats {${Array.from(sdkChatIdSet).join(',')}}, CLI pm for everyone else`);
4217
- } else {
4218
- console.log('[polygram] using CLI ProcessManager');
4219
- }
1884
+ // 0.9.0 wiring order matters here. The factories destructure
1885
+ // their `pm` / `bot` deps by value at call time; the closures
1886
+ // they return then reference those CAPTURED references at every
1887
+ // future call. So `pm` and `bot` must be assigned BEFORE the
1888
+ // factory that consumes them runs — late-rebinding the
1889
+ // module-level `let pm` doesn't propagate into the factory's
1890
+ // captured local. v3 architecture review caught this as a
1891
+ // production-blocking bug after commit 29.
1892
+ //
1893
+ // Order encoded below:
1894
+ // 1. bot = createBot(token) — needed by approvals + abort
1895
+ // 2. createApprovals(...) — provides makeCanUseTool
1896
+ // 3. createBuildSdkOptions(...) — uses makeCanUseTool
1897
+ // 4. pm = new ProcessManagerSdk(...) uses buildSdkOptions
1898
+ // 5. handler factories that need pm/bot — see them as live refs
1899
+
1900
+ bot = createBot(config.bot.token);
1901
+
1902
+ ({
1903
+ makeCanUseTool,
1904
+ handleApprovalCallback,
1905
+ resolveApprovalWaiter,
1906
+ startApprovalSweeper,
1907
+ cancelAllWaiters,
1908
+ } = createApprovals({
1909
+ config, db, bot, botName: BOT_NAME, tg, logEvent,
1910
+ approvals, getChatIdFromKey, logger: console,
1911
+ }));
1912
+ buildSdkOptions = createBuildSdkOptions({
1913
+ config,
1914
+ botName: BOT_NAME,
1915
+ childHome: CHILD_HOME,
1916
+ makeCanUseTool,
1917
+ logEvent,
1918
+ logger: console,
1919
+ });
1920
+ transcribeVoiceAttachments = createTranscribeVoiceAttachments({
1921
+ config, db, dbWrite, tg, logEvent,
1922
+ transcribeVoice, isVoiceAttachment,
1923
+ botName: BOT_NAME, logger: console,
1924
+ });
1925
+ downloadAttachments = createDownloadAttachments({
1926
+ config, db, dbWrite, inboxDir: INBOX_DIR, logger: console,
1927
+ });
1928
+ pm = new ProcessManagerSdk({ ...pmOpts, spawnFn: buildSdkOptions });
1929
+ // formatConfigInfoText MUST be wired BEFORE createHandleConfigCallback
1930
+ // — the latter destructures formatConfigInfoText from its deps at
1931
+ // call time and captures the value (closure-by-value). v4 reviewer
1932
+ // caught this as the same class as the v3 BLOCKER.
1933
+ formatConfigInfoText = createFormatConfigInfoText({
1934
+ pm, db, getClaudeSessionId,
1935
+ });
1936
+ handleConfigCallback = createHandleConfigCallback({
1937
+ config, db, dbWrite, pm, getSessionKey,
1938
+ formatConfigInfoText, buildConfigKeyboard,
1939
+ botName: BOT_NAME, logger: console,
1940
+ });
1941
+ handleAbortIfRequested = createHandleAbort({
1942
+ pm, bot, tg, logEvent, isAbortRequest,
1943
+ markSessionAborted, clearAutosteeredReactions, getSessionKey,
1944
+ botName: BOT_NAME, logger: console,
1945
+ });
1946
+ autosteer = createAutosteerHandlers({
1947
+ config, pm, autosteeredRefs, logEvent,
1948
+ });
1949
+ recordInbound = createRecordInbound({
1950
+ db, dbWrite, config, botName: BOT_NAME, extractAttachments,
1951
+ });
1952
+ handleSendOverIpc = createHandleSendOverIpc({
1953
+ config, bot, tg, botName: BOT_NAME,
1954
+ });
1955
+ ({
1956
+ dispatchHandleMessage,
1957
+ attemptAutoResume,
1958
+ errorReplyText,
1959
+ queueWarnThreshold,
1960
+ inFlightHandlers,
1961
+ } = createDispatcher({
1962
+ config, db, dbWrite, tg, botName: BOT_NAME, logEvent,
1963
+ handleMessage, sendToProcess,
1964
+ classifyError, isAutoResumable,
1965
+ abortGrace, autoResumeTracker,
1966
+ chunkMarkdownText, deliverReplies,
1967
+ TG_MAX_LEN,
1968
+ getIsShuttingDown: () => isShuttingDown,
1969
+ logger: console,
1970
+ }));
1971
+ ({ pollBot, startPollWatchdog } = createPollLoop({
1972
+ db, dbWrite, config, botName: BOT_NAME,
1973
+ isWellFormedMessage, getTopicName,
1974
+ logger: console,
1975
+ }));
1976
+ dispatchSlashCommand = createSlashCommands({
1977
+ config, db, dbWrite, pm, pairings, parsePairingTtl,
1978
+ contextHintShown, formatContextReply, getClaudeSessionId,
1979
+ getOrSpawnForChat, parsePairCodeArgs,
1980
+ modelVersionsDesc: MODEL_VERSIONS_DESC,
1981
+ botName: BOT_NAME, logEvent, logger: console,
1982
+ });
1983
+ console.log('[polygram] using SDK ProcessManager');
4220
1984
 
4221
1985
  console.log(`polygram (LRU cap=${cap}, SQLite source of truth)`);
4222
1986
  console.log(`Chats: ${Object.entries(config.chats).map(([id, c]) => `${c.name} (${c.model}/${c.effort})`).join(', ')}`);
@@ -4241,7 +2005,8 @@ async function main() {
4241
2005
  }
4242
2006
  }
4243
2007
 
4244
- bot = createBot(config.bot.token);
2008
+ // bot was created earlier in main() — see the wiring-order block
2009
+ // around the factory creations.
4245
2010
 
4246
2011
  // Graceful shutdown: stop accepting new inbound, drain in-flight pendings
4247
2012
  // up to SHUTDOWN_DRAIN_MS, then mark anything still unfinished so boot
@@ -4296,10 +2061,12 @@ async function main() {
4296
2061
  if (approvalSweepTimer) clearInterval(approvalSweepTimer);
4297
2062
  if (ipcCloser) ipcCloser.close().catch(() => {});
4298
2063
  try { fs.unlinkSync(ipcServer.secretPathFor(BOT_NAME)); } catch {}
4299
- for (const list of approvalWaiters.values()) {
4300
- for (const fn of list) { try { fn('cancelled', 'polygram shutting down'); } catch {} }
4301
- }
4302
- approvalWaiters.clear();
2064
+ // Reject every parked canUseTool waiter so the SDK doesn't
2065
+ // hang on a dangling approval Promise. v4 review caught this:
2066
+ // commit 29 moved the Map into lib/handlers/approvals.js; the
2067
+ // old `approvalWaiters` reference here would ReferenceError
2068
+ // mid-shutdown and prevent pm/DB cleanup.
2069
+ if (cancelAllWaiters) cancelAllWaiters('cancelled', 'polygram shutting down');
4303
2070
  if (pm) await pm.shutdown().catch(() => {});
4304
2071
  if (db) {
4305
2072
  try { db.logEvent('polygram-stop'); db.raw.close(); } catch {}
@@ -4335,7 +2102,6 @@ async function main() {
4335
2102
  path: ipcServer.socketPathFor(BOT_NAME),
4336
2103
  secret: ipcSecret,
4337
2104
  handlers: {
4338
- approval_request: handleApprovalRequest,
4339
2105
  ping: async () => ({ pong: true, bot: BOT_NAME }),
4340
2106
  send: (req) => handleSendOverIpc(req),
4341
2107
  },
@@ -4547,7 +2313,7 @@ async function main() {
4547
2313
  console.error(`[${BOT_NAME}] Fatal:`, err.message);
4548
2314
  });
4549
2315
 
4550
- const watchdogTimer = startPollWatchdog(bot);
2316
+ const watchdogTimer = startPollWatchdog(bot, { logEvent });
4551
2317
  process.once('exit', () => clearInterval(watchdogTimer));
4552
2318
 
4553
2319
  await pollPromise;