polygram 0.4.6 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.4.6",
4
+ "version": "0.4.9",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
6
6
  "keywords": [
7
7
  "telegram",
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Per-key chain lock. Each acquire() returns a release function; the next
3
+ * acquire() awaits the previous one's release.
4
+ *
5
+ * Used by polygram to serialise stdin writes per session. Pre-work
6
+ * (attachment download, voice transcription, prompt formatting) runs
7
+ * concurrently; only the stdin write itself is serialised so Claude
8
+ * reads messages in arrival order and replies come out in the same
9
+ * order.
10
+ *
11
+ * Deliberately minimal — no timeouts, no cancellation, no fairness
12
+ * guarantees beyond FIFO. Callers are expected to ALWAYS call release,
13
+ * even on error paths, or the lock leaks (blocks all future acquires
14
+ * for that key forever).
15
+ */
16
+
17
+ function createAsyncLock() {
18
+ const chains = new Map(); // key → Promise of last release
19
+
20
+ return {
21
+ async acquire(key) {
22
+ const prev = chains.get(key) || Promise.resolve();
23
+ let release;
24
+ const next = new Promise((resolve) => { release = resolve; });
25
+ chains.set(key, prev.then(() => next));
26
+ await prev;
27
+ // Return a wrapper that also clears the chain entry when this is
28
+ // the last holder — avoids the Map growing unbounded across the
29
+ // lifetime of the process.
30
+ return () => {
31
+ if (chains.get(key) === prev.then(() => next)) {
32
+ chains.delete(key);
33
+ }
34
+ release();
35
+ };
36
+ },
37
+ get size() { return chains.size; },
38
+ };
39
+ }
40
+
41
+ module.exports = { createAsyncLock };
@@ -1,11 +1,26 @@
1
1
  /**
2
- * LRU-bounded warm process pool.
2
+ * LRU-bounded warm process pool with FIFO pending queue per process.
3
3
  *
4
- * - No idle timeout: processes die only via eviction or graceful kill.
5
- * - Never evict an in-flight process.
6
- * - Graceful SIGTERM, then SIGKILL after 3 s fallback.
7
- * - If `--resume <id>` fails on spawn, clear the session_id so the next
8
- * message spawns fresh.
4
+ * Each `entry` owns ONE claude subprocess. Messages sent via `send()` are
5
+ * appended to `entry.pendingQueue` and their prompt is written to the
6
+ * subprocess stdin. Claude processes stdin in FIFO order and emits one
7
+ * `result` event per turn. Each result resolves the oldest pending
8
+ * (queue head).
9
+ *
10
+ * Timers (idle + wall-clock) are only armed for the HEAD of the queue —
11
+ * the turn Claude is currently working on. When the head is shifted,
12
+ * the next pending becomes head and its timers arm fresh. This avoids
13
+ * the footgun of "pending #2's timer started ticking when its stdin
14
+ * was written, but Claude spent 5 minutes on pending #1 first → #2
15
+ * times out before Claude sees it".
16
+ *
17
+ * Timer fire rejects ONLY that pending (policy: don't kill the whole
18
+ * subprocess, other in-flight work is probably fine). If the subprocess
19
+ * is truly stuck, its head pending will time out repeatedly.
20
+ *
21
+ * The `onStreamChunk` and `onToolUse` callbacks pass the live `entry` so
22
+ * callers can inspect `entry.pendingQueue[0]` to route output to the
23
+ * correct turn's streamer / reactor / source message.
9
24
  *
10
25
  * All I/O (spawn, db) is injected for testability.
11
26
  */
@@ -17,21 +32,7 @@ const DEFAULT_KILL_TIMEOUT_MS = 3000;
17
32
 
18
33
  /**
19
34
  * Pull user-visible text from a stream-json `assistant` event.
20
- * Claude Code emits one event per assistant step; each carries a
21
- * `message.content[]` of blocks. Only `text` blocks are returned —
22
- * `tool_use` blocks still trigger the idle-timer reset in the caller
23
- * (they count as Claude activity) but are NOT rendered to Telegram.
24
- * Streaming every tool call to chat produces a noisy "_Calling X_"
25
- * ladder that adds no information users can act on.
26
- *
27
- * Trailing-colon normalisation: Claude writes preambles like "Checking
28
- * this:" followed by a tool_use. Because we hide tool_use in the stream,
29
- * the colon becomes an orphan pointing at invisible work. Replace a
30
- * trailing `:` with `…` — the ellipsis reads as "doing it now" and
31
- * preserves the natural flow. Only the LAST colon in the joined text is
32
- * touched; mid-sentence colons ("Here's the plan: step 1, step 2")
33
- * stay intact. Also guards against `::` sequences (code / emoticons) by
34
- * requiring the preceding char to not also be `:`.
35
+ * See header for colon-normalisation / tool_use-filter rationale.
35
36
  */
36
37
  function extractAssistantText(event) {
37
38
  const blocks = event?.message?.content;
@@ -53,11 +54,11 @@ class ProcessManager {
53
54
  db = null,
54
55
  logger = console,
55
56
  killTimeoutMs = DEFAULT_KILL_TIMEOUT_MS,
56
- onInit = null, // (sessionKey, event) → void (system init)
57
- onResult = null, // (sessionKey, event) → void (turn result)
58
- onClose = null, // (sessionKey, code) → void
59
- onStreamChunk = null,// (sessionKey, partialText, entry) → void (per assistant event)
60
- onToolUse = null, // (sessionKey, toolName, entry) → void (per tool_use block)
57
+ onInit = null, // (sessionKey, event, entry) → void
58
+ onResult = null, // (sessionKey, event, entry, pending) → void
59
+ onClose = null, // (sessionKey, code, entry) → void
60
+ onStreamChunk = null,// (sessionKey, partialText, entry) → void routes to pendingQueue[0]
61
+ onToolUse = null, // (sessionKey, toolName, entry) → void routes to pendingQueue[0]
61
62
  } = {}) {
62
63
  if (!spawnFn) throw new Error('spawnFn required');
63
64
  this.cap = cap;
@@ -89,10 +90,6 @@ class ProcessManager {
89
90
  return Array.from(this.procs.keys());
90
91
  }
91
92
 
92
- /**
93
- * Return existing entry or spawn a new one. Evicts LRU if at capacity.
94
- * Throws if at capacity and all entries are in-flight.
95
- */
96
93
  async getOrSpawn(sessionKey, spawnContext) {
97
94
  const existing = this.procs.get(sessionKey);
98
95
  if (existing && !existing.closed) {
@@ -123,6 +120,30 @@ class ProcessManager {
123
120
  return true;
124
121
  }
125
122
 
123
+ /**
124
+ * Request a graceful respawn (e.g. because /model or /effort changed).
125
+ * If the queue is empty, kill now; otherwise mark the entry so it kills
126
+ * itself when the last pending resolves. Next send() respawns fresh
127
+ * with whatever config spawnFn reads at that moment.
128
+ */
129
+ requestRespawn(sessionKey, reason = 'config-change') {
130
+ const entry = this.procs.get(sessionKey);
131
+ if (!entry || entry.closed) return { killed: false, queued: 0 };
132
+ entry.needsRespawn = reason;
133
+ this._logEvent('respawn-requested', {
134
+ session_key: sessionKey,
135
+ chat_id: entry.chatId,
136
+ reason,
137
+ queued: entry.pendingQueue.length,
138
+ });
139
+ if (entry.pendingQueue.length === 0) {
140
+ // Fire-and-forget — caller doesn't need to await the kill.
141
+ this.kill(sessionKey).catch(() => {});
142
+ return { killed: true, queued: 0 };
143
+ }
144
+ return { killed: false, queued: entry.pendingQueue.length };
145
+ }
146
+
126
147
  async kill(sessionKey) {
127
148
  const entry = this.procs.get(sessionKey);
128
149
  if (!entry) return;
@@ -136,10 +157,11 @@ class ProcessManager {
136
157
  }, this.killTimeoutMs);
137
158
  entry.proc.once('close', () => { clearTimeout(timer); resolve(); });
138
159
  });
139
- if (entry.pending) {
140
- const { reject } = entry.pending;
141
- entry.pending = null;
142
- reject(new Error('Process killed'));
160
+ // Reject all pendings in the queue (if any survived the 'close' handler).
161
+ while (entry.pendingQueue.length > 0) {
162
+ const p = entry.pendingQueue.shift();
163
+ p.clearTimers?.();
164
+ p.reject(new Error('Process killed'));
143
165
  }
144
166
  }
145
167
 
@@ -164,16 +186,15 @@ class ProcessManager {
164
186
  sessionKey,
165
187
  proc,
166
188
  rl,
167
- pending: null,
189
+ pendingQueue: [],
168
190
  lastUsedTs: Date.now(),
169
191
  inFlight: false,
170
192
  closed: false,
193
+ needsRespawn: null,
171
194
  sessionId: ctx.existingSessionId || null,
172
195
  chatId: ctx.chatId || null,
173
196
  threadId: ctx.threadId || null,
174
197
  label: ctx.label || sessionKey,
175
- // Stream accumulator — cleared at each turn start (on send()).
176
- streamText: '',
177
198
  };
178
199
 
179
200
  rl.on('line', (line) => {
@@ -181,27 +202,31 @@ class ProcessManager {
181
202
  try { event = JSON.parse(line); }
182
203
  catch { this.logger.error(`[${entry.label}] non-JSON: ${line.slice(0, 200)}`); return; }
183
204
 
205
+ // Fix A: ANY stream-json event counts as Claude activity. Reset the
206
+ // idle timer on the HEAD pending (the turn Claude is working on),
207
+ // regardless of event type. Subagent runs emit `user`-type
208
+ // tool_result events between the parent's assistant events — those
209
+ // previously did NOT reset the timer, causing false timeouts during
210
+ // long subagent work.
211
+ const head = entry.pendingQueue[0];
212
+ if (head) head.resetIdleTimer?.();
213
+
184
214
  if (event.type === 'system' && event.subtype === 'init') {
185
215
  entry.sessionId = event.session_id;
186
216
  if (this.onInit) this.onInit(sessionKey, event, entry);
187
217
  }
188
- if (event.type === 'assistant' && entry.pending) {
189
- // Any assistant step (text block, tool_use, tool_result) counts as
190
- // Claude activity — reset the idle timeout so long turns don't
191
- // wall-clock out.
192
- entry.pending.resetIdleTimer?.();
218
+
219
+ if (event.type === 'assistant' && head) {
193
220
  if (this.onStreamChunk) {
194
221
  const added = extractAssistantText(event);
195
222
  if (added) {
196
- entry.streamText = entry.streamText
197
- ? `${entry.streamText}\n\n${added}`
223
+ head.streamText = head.streamText
224
+ ? `${head.streamText}\n\n${added}`
198
225
  : added;
199
- try { this.onStreamChunk(sessionKey, entry.streamText, entry); }
226
+ try { this.onStreamChunk(sessionKey, head.streamText, entry); }
200
227
  catch (err) { this.logger.error(`[${entry.label}] onStreamChunk: ${err.message}`); }
201
228
  }
202
229
  }
203
- // Emit tool_use blocks separately so callers (e.g. status reactions)
204
- // can react to each tool name without re-parsing stream text.
205
230
  if (this.onToolUse) {
206
231
  const blocks = event.message?.content;
207
232
  if (Array.isArray(blocks)) {
@@ -214,28 +239,46 @@ class ProcessManager {
214
239
  }
215
240
  }
216
241
  }
217
- if (event.type === 'result' && entry.pending) {
218
- const { resolve } = entry.pending;
219
- entry.pending = null;
220
- entry.inFlight = false;
221
- if (this.onResult) this.onResult(sessionKey, event, entry);
222
- resolve({
242
+
243
+ if (event.type === 'result' && head) {
244
+ entry.pendingQueue.shift();
245
+ head.clearTimers();
246
+ if (this.onResult) this.onResult(sessionKey, event, entry, head);
247
+ head.resolve({
223
248
  text: event.result || '',
224
249
  sessionId: event.session_id,
225
250
  cost: event.total_cost_usd,
226
251
  duration: event.duration_ms,
227
252
  error: event.subtype === 'success' ? null : (event.error || event.subtype),
228
253
  });
254
+ // Activate next head or settle idle state.
255
+ if (entry.pendingQueue.length > 0) {
256
+ entry.pendingQueue[0].activate();
257
+ } else {
258
+ entry.inFlight = false;
259
+ // Graceful drain-and-respawn: if caller asked for a respawn
260
+ // (e.g. /model change) and we just emptied the queue, kill now.
261
+ if (entry.needsRespawn) {
262
+ const reason = entry.needsRespawn;
263
+ entry.needsRespawn = null;
264
+ this._logEvent('respawn-draining', {
265
+ session_key: sessionKey,
266
+ chat_id: entry.chatId,
267
+ reason,
268
+ });
269
+ this.kill(sessionKey).catch(() => {});
270
+ }
271
+ }
229
272
  }
230
273
  });
231
274
 
232
275
  proc.on('close', (code) => {
233
276
  entry.closed = true;
234
- if (entry.pending) {
235
- const { reject } = entry.pending;
236
- entry.pending = null;
237
- entry.inFlight = false;
238
- reject(new Error(`Process exited (code ${code})`));
277
+ entry.inFlight = false;
278
+ while (entry.pendingQueue.length > 0) {
279
+ const p = entry.pendingQueue.shift();
280
+ p.clearTimers?.();
281
+ p.reject(new Error(`Process exited (code ${code})`));
239
282
  }
240
283
  this.procs.delete(sessionKey);
241
284
  if (code !== 0 && ctx.existingSessionId && this.db?.clearSessionId) {
@@ -250,11 +293,11 @@ class ProcessManager {
250
293
  proc.on('error', (err) => {
251
294
  this.logger.error(`[${entry.label}] proc error: ${err.message}`);
252
295
  entry.closed = true;
253
- if (entry.pending) {
254
- const { reject } = entry.pending;
255
- entry.pending = null;
256
- entry.inFlight = false;
257
- reject(err);
296
+ entry.inFlight = false;
297
+ while (entry.pendingQueue.length > 0) {
298
+ const p = entry.pendingQueue.shift();
299
+ p.clearTimers?.();
300
+ p.reject(err);
258
301
  }
259
302
  this.procs.delete(sessionKey);
260
303
  });
@@ -263,98 +306,113 @@ class ProcessManager {
263
306
  return entry;
264
307
  }
265
308
 
266
- send(sessionKey, prompt, { timeoutMs = 600_000, maxTurnMs = 30 * 60_000 } = {}) {
309
+ /**
310
+ * Append a turn to the queue. The returned promise resolves when Claude
311
+ * emits a `result` event for this turn (they emerge in stdin-write
312
+ * order). The underlying stdin write happens synchronously inside this
313
+ * call — the caller should have already serialised writes across
314
+ * sessions via an external lock if order matters.
315
+ *
316
+ * Options:
317
+ * timeoutMs — idle timer between Claude events (default 10min)
318
+ * maxTurnMs — wall-clock ceiling from "activate" time (default 30min)
319
+ * context — opaque object stored on the pending (polygram puts
320
+ * streamer, reactor, sourceMsgId here for its own use)
321
+ */
322
+ send(sessionKey, prompt, {
323
+ timeoutMs = 600_000,
324
+ maxTurnMs = 30 * 60_000,
325
+ context = {},
326
+ } = {}) {
267
327
  return new Promise((resolve, reject) => {
268
328
  const entry = this.procs.get(sessionKey);
269
329
  if (!entry || entry.closed) return reject(new Error('No process for session'));
270
- if (entry.pending) return reject(new Error('Process busy'));
271
- // Race: proc may have emitted 'close' between getOrSpawn and send, in
272
- // which case entry.closed is true but handlers could still be draining.
273
- // Also guard against a destroyed/ended stdin pipe explicitly — writing
274
- // to a closed pipe would either throw EPIPE or silently buffer.
275
330
  if (!entry.proc.stdin || entry.proc.stdin.destroyed || !entry.proc.stdin.writable) {
276
331
  return reject(new Error('Process stdin not writable'));
277
332
  }
333
+ // If this entry is awaiting respawn, refuse new sends — the caller
334
+ // should wait for the respawn to complete (which happens when the
335
+ // current queue drains).
336
+ if (entry.needsRespawn) {
337
+ return reject(new Error(`Session awaiting respawn (${entry.needsRespawn})`));
338
+ }
278
339
 
279
- entry.inFlight = true;
280
340
  entry.lastUsedTs = Date.now();
281
- entry.pending = { resolve, reject };
282
- entry.streamText = '';
283
-
284
- // Timer handles kept in closure vars (not entry.pending), because
285
- // the result-event handler in rl.on('line') sets entry.pending = null
286
- // BEFORE calling the wrapped resolve. Reading from entry.pending
287
- // after null-out gave undefined → clearTimeout was never called →
288
- // the default 30-min maxTurnMs timer stayed armed and held Node's
289
- // event loop open, hanging the test runner on CI.
341
+
290
342
  let idleTimer = null;
291
343
  let maxTimer = null;
344
+ let activated = false;
345
+
292
346
  const clearTimers = () => {
293
347
  if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
294
348
  if (maxTimer) { clearTimeout(maxTimer); maxTimer = null; }
295
349
  };
296
350
 
297
- // Timer fire path. New in 0.3.9: after rejecting, SIGTERM the
298
- // subprocess. Previously we only rejected the promise and left the
299
- // stuck claude running the next message would write stdin to a
300
- // zombie process. Killing fires the 'close' handler which cleans
301
- // up the LRU entry, so the next send() gets a fresh spawn.
351
+ const pending = {
352
+ resolve: (r) => { clearTimers(); resolve(r); },
353
+ reject: (e) => { clearTimers(); reject(e); },
354
+ clearTimers,
355
+ startedAt: null,
356
+ streamText: '',
357
+ context,
358
+ idleTimer: null,
359
+ maxTimer: null,
360
+ activated: false,
361
+ };
362
+
302
363
  const fireTimeout = (reason) => {
303
- if (!entry.pending) return;
304
- clearTimers();
305
- entry.pending = null;
306
- entry.inFlight = false;
307
- try { entry.proc.kill('SIGTERM'); } catch {}
364
+ // Only act if we're still the head; if we've been shifted/killed
365
+ // already, this is a stale callback.
366
+ if (entry.pendingQueue[0] !== pending) return;
308
367
  this._logEvent('turn-timeout', {
309
368
  session_key: sessionKey,
310
369
  chat_id: entry.chatId,
311
370
  reason,
312
371
  });
313
- reject(new Error(reason));
372
+ // Remove from queue, reject. Per Q1 policy: don't kill the
373
+ // subprocess — later pendings might still be fine.
374
+ entry.pendingQueue.shift();
375
+ pending.reject(new Error(reason));
376
+ // Activate next head if any, else idle.
377
+ if (entry.pendingQueue.length > 0) {
378
+ entry.pendingQueue[0].activate();
379
+ } else {
380
+ entry.inFlight = false;
381
+ }
314
382
  };
315
383
 
316
- // Idle timeout: counts N seconds of SILENCE from Claude. Reset on
317
- // every assistant event so long productive turns (multi-tool
318
- // reasoning) don't falsely trip.
319
- // Note on .unref(): an earlier revision called unref() on both
320
- // timers to avoid holding the node event loop open in tests. That
321
- // broke Node's test runner on CI ("Promise resolution is still
322
- // pending but the event loop has already resolved") — the runner
323
- // detects unref'd timers as a drained loop and cancels awaiters
324
- // before the timer can fire. Production polygram stays alive via
325
- // grammy's poll loop + child process pipes; we don't need unref.
326
384
  const armIdle = () => setTimeout(
327
385
  () => fireTimeout(`Timeout: ${timeoutMs / 1000}s idle with no Claude activity`),
328
386
  timeoutMs,
329
387
  );
330
- idleTimer = armIdle();
331
- entry.pending.idleTimer = idleTimer;
332
- entry.pending.resetIdleTimer = () => {
333
- if (idleTimer) clearTimeout(idleTimer);
334
- if (entry.pending) {
335
- idleTimer = armIdle();
336
- entry.pending.idleTimer = idleTimer;
337
- }
388
+
389
+ pending.activate = () => {
390
+ if (activated) return;
391
+ activated = true;
392
+ pending.activated = true;
393
+ pending.startedAt = Date.now();
394
+ idleTimer = armIdle();
395
+ pending.idleTimer = idleTimer;
396
+ maxTimer = setTimeout(
397
+ () => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
398
+ maxTurnMs,
399
+ );
400
+ pending.maxTimer = maxTimer;
338
401
  };
339
402
 
340
- // Wall-clock ceiling: fires at maxTurnMs regardless of activity.
341
- // Catches stuck API calls that emit occasional events (keeping the
342
- // idle timer alive) but never produce a result. OpenClaw's only
343
- // timer was wall-clock; polygram's 0.3.5 change replaced it with
344
- // idle-reset, creating a gap this restores as a last-resort.
345
- maxTimer = setTimeout(
346
- () => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
347
- maxTurnMs,
348
- );
349
- entry.pending.maxTimer = maxTimer;
403
+ pending.resetIdleTimer = () => {
404
+ if (!activated) return;
405
+ if (idleTimer) clearTimeout(idleTimer);
406
+ idleTimer = armIdle();
407
+ pending.idleTimer = idleTimer;
408
+ };
350
409
 
351
- // Legacy alias: some callers / tests refer to entry.pending.timer.
352
- entry.pending.timer = idleTimer;
410
+ entry.pendingQueue.push(pending);
411
+ entry.inFlight = true;
353
412
 
354
- const wrappedResolve = entry.pending.resolve;
355
- const wrappedReject = entry.pending.reject;
356
- entry.pending.resolve = (r) => { clearTimers(); wrappedResolve(r); };
357
- entry.pending.reject = (e) => { clearTimers(); wrappedReject(e); };
413
+ // If we're the only pending, activate immediately. Otherwise wait
414
+ // until the preceding pending is shifted out.
415
+ if (entry.pendingQueue.length === 1) pending.activate();
358
416
 
359
417
  try {
360
418
  entry.proc.stdin.write(JSON.stringify({
@@ -362,11 +420,10 @@ class ProcessManager {
362
420
  message: { role: 'user', content: prompt },
363
421
  }) + '\n');
364
422
  } catch (err) {
365
- clearTimers();
366
- entry.pending = null;
367
- entry.inFlight = false;
368
- reject(err);
369
- return;
423
+ const idx = entry.pendingQueue.indexOf(pending);
424
+ if (idx !== -1) entry.pendingQueue.splice(idx, 1);
425
+ if (entry.pendingQueue.length === 0) entry.inFlight = false;
426
+ pending.reject(err);
370
427
  }
371
428
  });
372
429
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.4.6",
3
+ "version": "0.4.9",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc-client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -26,7 +26,7 @@ const { buildPrompt } = require('./lib/prompt');
26
26
  const { filterAttachments, MAX_FILE_BYTES } = require('./lib/attachments');
27
27
  const { ProcessManager } = require('./lib/process-manager');
28
28
  const { createSender } = require('./lib/telegram');
29
- const { drainQueuesForChat: drainQueuesForChatImpl } = require('./lib/queue-utils');
29
+ const { createAsyncLock } = require('./lib/async-lock');
30
30
  const { sweepInbox } = require('./lib/inbox');
31
31
  const { parseBotArg, parseDbArg, filterConfigToBot } = require('./lib/config-scope');
32
32
  const { createStore: createPairingsStore, parseTtl: parsePairingTtl } = require('./lib/pairings');
@@ -82,8 +82,11 @@ let ipcCloser = null;
82
82
  // single-valued), we keep them as plain module-level variables — not a map.
83
83
  let BOT_NAME = null; // string, frozen after boot
84
84
  let bot = null; // grammy Bot for BOT_NAME
85
- let streamers = new Map(); // sessionKey -> active Streamer (while turn is in flight)
86
- let reactors = new Map(); // sessionKey -> active ReactionManager (while turn is in flight)
85
+ // 0.4.8 note: streamer + reactor are per-turn, not per-session. They live
86
+ // on the pending's `context` object in the pm pendingQueue, keyed to the
87
+ // specific turn (not the session). The old per-session Maps were a bug
88
+ // for concurrent pendings — the second send() would overwrite the first's
89
+ // streamer reference before the first turn finished.
87
90
 
88
91
  // Allowlist of env var names passed through to spawned Claude processes.
89
92
  // Anything not listed here is dropped to prevent leaked secrets/ssh agents
@@ -520,90 +523,103 @@ async function getOrSpawnForChat(sessionKey) {
520
523
  return pm.getOrSpawn(sessionKey, ctx);
521
524
  }
522
525
 
523
- async function sendToProcess(sessionKey, prompt) {
526
+ async function sendToProcess(sessionKey, prompt, context = {}) {
524
527
  const entry = await getOrSpawnForChat(sessionKey);
525
528
  if (!entry) throw new Error('No process for chat');
526
529
  const chatId = getChatIdFromKey(sessionKey);
527
530
  const chatConfig = config.chats[chatId];
528
531
  const timeoutMs = (chatConfig.timeout || config.defaults.timeout) * 1000;
529
- // Wall-clock ceiling (seconds). Overridable per-chat via chatConfig.maxTurn
530
- // or globally via config.defaults.maxTurn. 30 min default is generous for
531
- // long audits; stuck API calls rarely run that long without firing the
532
- // idle timer first. Unit: seconds → milliseconds.
533
532
  const maxTurnMs = (chatConfig.maxTurn || config.defaults?.maxTurn || 1800) * 1000;
534
- return pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs });
535
- }
536
-
537
- // ─── Message queue (per-chat) ───────────────────────────────────────
538
-
539
- const queues = {};
540
- const processing = {};
541
- const MAX_QUEUE_DEPTH = 50; // per chat — cron storm or spammer insurance
542
-
543
- async function enqueue(sessionKey, chatId, msg, bot) {
544
- if (!queues[sessionKey]) queues[sessionKey] = [];
545
- if (queues[sessionKey].length >= MAX_QUEUE_DEPTH) {
546
- // Drop oldest rather than rejecting newest — the user's freshest
547
- // intent is more valuable than backlog. Emit an event so operators
548
- // see this rather than a queue silently degrading.
549
- queues[sessionKey].shift();
550
- dbWrite(() => db.logEvent('queue-overflow', {
551
- chat_id: chatId, session_key: sessionKey, cap: MAX_QUEUE_DEPTH,
552
- }), 'log queue-overflow');
533
+ // Per-session stdin lock orders the write step, not the result-wait.
534
+ // pm.send's Promise executor writes stdin synchronously, so as soon as
535
+ // pm.send returns (not resolves — returns), the stdin write has
536
+ // happened. We release the lock right after that and await the result
537
+ // OUTSIDE the lock — otherwise one long turn would serialise the whole
538
+ // session, which is what we're trying to escape.
539
+ const release = await stdinLock.acquire(sessionKey);
540
+ let resultPromise;
541
+ try {
542
+ resultPromise = pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs, context });
543
+ } finally {
544
+ release();
553
545
  }
554
- queues[sessionKey].push({ msg, bot, chatId });
555
- if (!processing[sessionKey]) processQueue(sessionKey);
546
+ return resultPromise;
556
547
  }
557
548
 
549
+ // ─── Message dispatch ───────────────────────────────────────────────
550
+
551
+ // 0.4.8: per-session concurrent dispatch. No FIFO polygram-level queue any
552
+ // more — inbound messages immediately kick off handleMessage. Pre-work
553
+ // (attachment download, voice transcription) runs in parallel across
554
+ // messages; a per-session stdin lock (in handleMessage) orders the
555
+ // eventual pm.send writes so Claude reads user messages in arrival order
556
+ // and replies come out in the same order.
557
+ //
558
+ // We still track in-flight handleMessage calls per session so we can:
559
+ // - emit a `queue-depth-warning` event if the count ever exceeds a
560
+ // threshold (abnormal inbound rate, slow pre-work, stuck bot)
561
+ // - (future) drain on shutdown if we want clean exit
562
+ const CONCURRENT_WARN_THRESHOLD = 20;
563
+ const inFlightHandlers = new Map(); // sessionKey → count
564
+
558
565
  // Sessions the operator just /stop'd (or natural-language "стоп"). Entries
559
- // suppress the generic "Sorry, I couldn't process" reply below — the abort
560
- // handler already sent its own "Остановлено." ack, and the subsequent
561
- // handleMessage rejection from the killed subprocess would otherwise
562
- // spam a second contradictory message. Cleared on first use; long-lived
563
- // only if the abort kills something that never finishes rejecting.
566
+ // suppress the generic "Sorry, I couldn't process" reply — the abort
567
+ // handler already sent its own "Остановлено." ack, and handleMessage
568
+ // rejections from the killed subprocess would otherwise spam a second
569
+ // contradictory message.
564
570
  const abortedSessions = new Set();
565
571
 
566
572
  function markSessionAborted(sessionKey) {
567
573
  abortedSessions.add(sessionKey);
568
574
  }
569
575
 
570
- async function processQueue(sessionKey) {
571
- processing[sessionKey] = true;
572
- while (queues[sessionKey]?.length > 0) {
573
- const { msg, bot, chatId } = queues[sessionKey].shift();
574
- try {
575
- await handleMessage(sessionKey, chatId, msg, bot);
576
- } catch (err) {
577
- const wasAborted = abortedSessions.has(sessionKey);
578
- if (wasAborted) abortedSessions.delete(sessionKey);
579
- // Raw err.message can carry host paths, DB columns, internal state.
580
- // Surface a generic message to the user; log the detail to events
581
- // so operators can still debug.
582
- console.error(`[${sessionKey}] Error:`, err.message);
583
- dbWrite(() => db.logEvent('handler-error', {
584
- chat_id: chatId, session_key: sessionKey,
585
- msg_id: msg?.message_id,
586
- error: err.message?.slice(0, 500),
587
- stack: err.stack?.split('\n').slice(0, 5).join('\n'),
588
- aborted: wasAborted || undefined,
589
- }), 'log handler-error');
590
- if (!wasAborted) {
591
- try {
592
- await tg(bot, 'sendMessage', {
593
- chat_id: chatId,
594
- text: `Sorry, I couldn't process that message. The operator has been notified.`,
595
- reply_parameters: { message_id: msg.message_id },
596
- }, { source: 'error-reply', botName: BOT_NAME });
597
- } catch (replyErr) {
598
- console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
599
- }
600
- }
601
- }
576
+ // Called by bot.on('message') for every regular (non-admin, non-pair)
577
+ // message. Runs handleMessage in a fire-and-forget manner with centralised
578
+ // error handling. Replaces the old processQueue loop.
579
+ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
580
+ const count = (inFlightHandlers.get(sessionKey) || 0) + 1;
581
+ inFlightHandlers.set(sessionKey, count);
582
+ if (count === CONCURRENT_WARN_THRESHOLD) {
583
+ dbWrite(() => db.logEvent('queue-depth-warning', {
584
+ chat_id: chatId, session_key: sessionKey,
585
+ in_flight: count, threshold: CONCURRENT_WARN_THRESHOLD,
586
+ }), 'log queue-depth-warning');
602
587
  }
603
- processing[sessionKey] = false;
588
+ handleMessage(sessionKey, chatId, msg, bot).catch((err) => {
589
+ const wasAborted = abortedSessions.has(sessionKey);
590
+ if (wasAborted) abortedSessions.delete(sessionKey);
591
+ console.error(`[${sessionKey}] Error:`, err.message);
592
+ dbWrite(() => db.logEvent('handler-error', {
593
+ chat_id: chatId, session_key: sessionKey,
594
+ msg_id: msg?.message_id,
595
+ error: err.message?.slice(0, 500),
596
+ stack: err.stack?.split('\n').slice(0, 5).join('\n'),
597
+ aborted: wasAborted || undefined,
598
+ }), 'log handler-error');
599
+ if (!wasAborted) {
600
+ tg(bot, 'sendMessage', {
601
+ chat_id: chatId,
602
+ text: `Sorry, I couldn't process that message. The operator has been notified.`,
603
+ reply_parameters: { message_id: msg.message_id },
604
+ }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
605
+ console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
606
+ });
607
+ }
608
+ }).finally(() => {
609
+ const n = (inFlightHandlers.get(sessionKey) || 1) - 1;
610
+ if (n <= 0) inFlightHandlers.delete(sessionKey);
611
+ else inFlightHandlers.set(sessionKey, n);
612
+ });
604
613
  }
605
614
 
606
- const drainQueuesForChat = (chatId) => drainQueuesForChatImpl(queues, chatId);
615
+ // drainQueuesForChat is retained as a no-op for backwards compat with
616
+ // call sites in /model, /effort, chat-migration, and abort handlers.
617
+ // Returns 0 always; a drain isn't meaningful in the concurrent model —
618
+ // callers that want to abort should rely on pm.killChat.
619
+ const drainQueuesForChat = (_chatId) => 0;
620
+
621
+ // Per-session lock ordering stdin writes. Module is I/O-pure.
622
+ const stdinLock = createAsyncLock();
607
623
 
608
624
  // Typing indicator is imported from lib/typing-indicator — it adds a
609
625
  // per-chat circuit breaker with exponential backoff so a chat that
@@ -975,6 +991,25 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
975
991
  await sendReply(info);
976
992
  return;
977
993
  }
994
+ // Helper: request respawn across ALL sessionKeys owned by this chat (one
995
+ // per topic if isolateTopics=true, otherwise just the single chat-level
996
+ // key). Graceful: in-flight turns drain on old settings, new turns use
997
+ // the new settings. Returns total pending turns across all keys so the
998
+ // reply can tell the user.
999
+ const requestRespawnForChat = (reason) => {
1000
+ const prefix = String(chatId);
1001
+ let totalQueued = 0;
1002
+ let anyActive = false;
1003
+ for (const key of pm.keys()) {
1004
+ if (key === prefix || key.startsWith(prefix + ':')) {
1005
+ const res = pm.requestRespawn(key, reason);
1006
+ totalQueued += res.queued;
1007
+ if (!res.killed) anyActive = true;
1008
+ }
1009
+ }
1010
+ return { queued: totalQueued, anyActive };
1011
+ };
1012
+
978
1013
  if (botAllowsCommands && text.startsWith('/model ')) {
979
1014
  const newModel = text.slice(7).trim();
980
1015
  if (['opus', 'sonnet', 'haiku'].includes(newModel)) {
@@ -986,11 +1021,10 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
986
1021
  old_value: oldModel, new_value: newModel,
987
1022
  user: cmdUser, user_id: cmdUserId, source: 'command',
988
1023
  }), 'log model change');
989
- const droppedModel = drainQueuesForChat(chatId);
990
- if (droppedModel) dbWrite(() => db.logEvent('queue-drained', { chat_id: chatId, reason: 'model-change', dropped: droppedModel }), 'log queue-drained');
991
- await pm.killChat(chatId);
1024
+ const { anyActive } = requestRespawnForChat('model-change');
992
1025
  const ver = MODEL_VERSIONS[newModel] || newModel;
993
- await sendReply(`Model ${newModel} (${ver})`);
1026
+ const suffix = anyActive ? ` I'll switch when I finish` : '';
1027
+ await sendReply(`Model → ${newModel} (${ver})${suffix}`);
994
1028
  } else {
995
1029
  await sendReply(`Unknown model. Use: opus, sonnet, haiku`);
996
1030
  }
@@ -1007,10 +1041,9 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1007
1041
  old_value: oldEffort, new_value: newEffort,
1008
1042
  user: cmdUser, user_id: cmdUserId, source: 'command',
1009
1043
  }), 'log effort change');
1010
- const droppedEffort = drainQueuesForChat(chatId);
1011
- if (droppedEffort) dbWrite(() => db.logEvent('queue-drained', { chat_id: chatId, reason: 'effort-change', dropped: droppedEffort }), 'log queue-drained');
1012
- await pm.killChat(chatId);
1013
- await sendReply(`Effort → ${newEffort}`);
1044
+ const { anyActive } = requestRespawnForChat('effort-change');
1045
+ const suffix = anyActive ? ` I'll switch when I finish` : '';
1046
+ await sendReply(`Effort → ${newEffort}${suffix}`);
1014
1047
  } else {
1015
1048
  await sendReply(`Unknown effort. Use: low, medium, high, xhigh, max`);
1016
1049
  }
@@ -1194,7 +1227,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1194
1227
  throttleMs: botCfg.streamThrottleMs,
1195
1228
  logger: { error: (m) => console.error(`[${label}] ${m}`) },
1196
1229
  });
1197
- streamers.set(sessionKey, streamer);
1230
+ // streamer is registered with this turn via pm.send's context (below)
1198
1231
 
1199
1232
  // Status reactions on the user's message: 👀 queued → 🤔 thinking →
1200
1233
  // 👨‍💻 coding / ⚡ web / 🔥 tool → 👍 done / 🤯 error. Silent (no
@@ -1213,11 +1246,15 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1213
1246
  },
1214
1247
  logError: (m) => console.error(`[${label}] ${m}`),
1215
1248
  });
1216
- reactors.set(sessionKey, reactor);
1217
1249
  reactor.setState('THINKING');
1218
1250
 
1219
1251
  try {
1220
- const result = await sendToProcess(sessionKey, prompt);
1252
+ // Pass streamer + reactor as per-turn context. pm's callbacks pick
1253
+ // them off entry.pendingQueue[0].context so concurrent pendings each
1254
+ // get routed to their own streamer/reactor.
1255
+ const result = await sendToProcess(sessionKey, prompt, {
1256
+ streamer, reactor, sourceMsgId: msg.message_id,
1257
+ });
1221
1258
  const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
1222
1259
 
1223
1260
  stopTyping();
@@ -1305,12 +1342,12 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1305
1342
  throw err;
1306
1343
  } finally {
1307
1344
  stopTyping();
1308
- streamers.delete(sessionKey);
1345
+ // streamer is per-turn and not stored in any session Map in 0.4.8
1309
1346
  // Give the reactor a beat to flush the terminal state (DONE/ERROR/TIMEOUT
1310
1347
  // bypass throttle so this is instant in practice; the stop() below
1311
1348
  // guards against any late transition leaking after the turn ends).
1312
1349
  reactor.stop();
1313
- reactors.delete(sessionKey);
1350
+ // reactor is per-turn and not stored in any session Map in 0.4.8
1314
1351
  }
1315
1352
  }
1316
1353
 
@@ -1513,7 +1550,7 @@ function createBot(token) {
1513
1550
 
1514
1551
  const threadId = msg.message_thread_id?.toString();
1515
1552
  const sessionKey = getSessionKey(chatId, threadId, chatConfig);
1516
- await enqueue(sessionKey, chatId, msg, bot);
1553
+ dispatchHandleMessage(sessionKey, chatId, msg, bot);
1517
1554
  };
1518
1555
 
1519
1556
  // Media-group buffer: coalesce multi-photo uploads (Telegram delivers
@@ -1854,12 +1891,17 @@ async function main() {
1854
1891
  console.log(`[${entry.label}] Process exited (code ${code})`);
1855
1892
  dbWrite(() => db.logEvent('process-close', { chat_id: entry.chatId, session_key: sessionKey, code }), 'log process-close');
1856
1893
  },
1857
- onStreamChunk: (sessionKey, partial) => {
1858
- const s = streamers.get(sessionKey);
1894
+ onStreamChunk: (sessionKey, partial, entry) => {
1895
+ // Route to the head pending's per-turn streamer. In the 0.4.8
1896
+ // concurrent-pending model, there can be N pendings queued — only
1897
+ // the HEAD is the turn Claude is actively emitting events for.
1898
+ const head = entry.pendingQueue?.[0];
1899
+ const s = head?.context?.streamer;
1859
1900
  if (s) s.onChunk(partial).catch(() => {});
1860
1901
  },
1861
- onToolUse: (sessionKey, toolName) => {
1862
- const r = reactors.get(sessionKey);
1902
+ onToolUse: (sessionKey, toolName, entry) => {
1903
+ const head = entry.pendingQueue?.[0];
1904
+ const r = head?.context?.reactor;
1863
1905
  if (r) r.setState(classifyToolName(toolName));
1864
1906
  },
1865
1907
  });