npm - polygram - Versions diffs - 0.4.6 → 0.4.9 - Mend

polygram 0.4.6 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.claude-plugin/plugin.json +1 -1
package/lib/async-lock.js +41 -0
package/lib/process-manager.js +186 -129
package/package.json +1 -1
package/polygram.js +128 -86

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.4.6",
+  "version": "0.4.9",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
   "keywords": [
     "telegram",

package/lib/async-lock.js ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Per-key chain lock. Each acquire() returns a release function; the next
+ * acquire() awaits the previous one's release.
+ *
+ * Used by polygram to serialise stdin writes per session. Pre-work
+ * (attachment download, voice transcription, prompt formatting) runs
+ * concurrently; only the stdin write itself is serialised so Claude
+ * reads messages in arrival order and replies come out in the same
+ * order.
+ *
+ * Deliberately minimal — no timeouts, no cancellation, no fairness
+ * guarantees beyond FIFO. Callers are expected to ALWAYS call release,
+ * even on error paths, or the lock leaks (blocks all future acquires
+ * for that key forever).
+ */
+function createAsyncLock() {
+  const chains = new Map(); // key → Promise of last release
+  return {
+    async acquire(key) {
+      const prev = chains.get(key) || Promise.resolve();
+      let release;
+      const next = new Promise((resolve) => { release = resolve; });
+      chains.set(key, prev.then(() => next));
+      await prev;
+      // Return a wrapper that also clears the chain entry when this is
+      // the last holder — avoids the Map growing unbounded across the
+      // lifetime of the process.
+      return () => {
+        if (chains.get(key) === prev.then(() => next)) {
+          chains.delete(key);
+        }
+        release();
+      };
+    },
+    get size() { return chains.size; },
+  };
+}
+module.exports = { createAsyncLock };

package/lib/process-manager.js CHANGED Viewed

@@ -1,11 +1,26 @@
 /**
- * LRU-bounded warm process pool.
+ * LRU-bounded warm process pool with FIFO pending queue per process.
  *
- * - No idle timeout: processes die only via eviction or graceful kill.
- * - Never evict an in-flight process.
- * - Graceful SIGTERM, then SIGKILL after 3 s fallback.
- * - If `--resume <id>` fails on spawn, clear the session_id so the next
- *   message spawns fresh.
+ * Each `entry` owns ONE claude subprocess. Messages sent via `send()` are
+ * appended to `entry.pendingQueue` and their prompt is written to the
+ * subprocess stdin. Claude processes stdin in FIFO order and emits one
+ * `result` event per turn. Each result resolves the oldest pending
+ * (queue head).
+ *
+ * Timers (idle + wall-clock) are only armed for the HEAD of the queue —
+ * the turn Claude is currently working on. When the head is shifted,
+ * the next pending becomes head and its timers arm fresh. This avoids
+ * the footgun of "pending #2's timer started ticking when its stdin
+ * was written, but Claude spent 5 minutes on pending #1 first → #2
+ * times out before Claude sees it".
+ *
+ * Timer fire rejects ONLY that pending (policy: don't kill the whole
+ * subprocess, other in-flight work is probably fine). If the subprocess
+ * is truly stuck, its head pending will time out repeatedly.
+ *
+ * The `onStreamChunk` and `onToolUse` callbacks pass the live `entry` so
+ * callers can inspect `entry.pendingQueue[0]` to route output to the
+ * correct turn's streamer / reactor / source message.
  *
  * All I/O (spawn, db) is injected for testability.
  */
@@ -17,21 +32,7 @@ const DEFAULT_KILL_TIMEOUT_MS = 3000;
 /**
  * Pull user-visible text from a stream-json `assistant` event.
- * Claude Code emits one event per assistant step; each carries a
- * `message.content[]` of blocks. Only `text` blocks are returned —
- * `tool_use` blocks still trigger the idle-timer reset in the caller
- * (they count as Claude activity) but are NOT rendered to Telegram.
- * Streaming every tool call to chat produces a noisy "_Calling X_"
- * ladder that adds no information users can act on.
- *
- * Trailing-colon normalisation: Claude writes preambles like "Checking
- * this:" followed by a tool_use. Because we hide tool_use in the stream,
- * the colon becomes an orphan pointing at invisible work. Replace a
- * trailing `:` with `…` — the ellipsis reads as "doing it now" and
- * preserves the natural flow. Only the LAST colon in the joined text is
- * touched; mid-sentence colons ("Here's the plan: step 1, step 2")
- * stay intact. Also guards against `::` sequences (code / emoticons) by
- * requiring the preceding char to not also be `:`.
+ * See header for colon-normalisation / tool_use-filter rationale.
  */
 function extractAssistantText(event) {
   const blocks = event?.message?.content;
@@ -53,11 +54,11 @@ class ProcessManager {
     db = null,
     logger = console,
     killTimeoutMs = DEFAULT_KILL_TIMEOUT_MS,
-    onInit = null,       // (sessionKey, event) → void (system init)
-    onResult = null,     // (sessionKey, event) → void (turn result)
-    onClose = null,      // (sessionKey, code) → void
-    onStreamChunk = null,// (sessionKey, partialText, entry) → void (per assistant event)
-    onToolUse = null,    // (sessionKey, toolName, entry) → void (per tool_use block)
+    onInit = null,       // (sessionKey, event, entry) → void
+    onResult = null,     // (sessionKey, event, entry, pending) → void
+    onClose = null,      // (sessionKey, code, entry) → void
+    onStreamChunk = null,// (sessionKey, partialText, entry) → void — routes to pendingQueue[0]
+    onToolUse = null,    // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
   } = {}) {
     if (!spawnFn) throw new Error('spawnFn required');
     this.cap = cap;
@@ -89,10 +90,6 @@ class ProcessManager {
     return Array.from(this.procs.keys());
   }
-  /**
-   * Return existing entry or spawn a new one. Evicts LRU if at capacity.
-   * Throws if at capacity and all entries are in-flight.
-   */
   async getOrSpawn(sessionKey, spawnContext) {
     const existing = this.procs.get(sessionKey);
     if (existing && !existing.closed) {
@@ -123,6 +120,30 @@ class ProcessManager {
     return true;
   }
+  /**
+   * Request a graceful respawn (e.g. because /model or /effort changed).
+   * If the queue is empty, kill now; otherwise mark the entry so it kills
+   * itself when the last pending resolves. Next send() respawns fresh
+   * with whatever config spawnFn reads at that moment.
+   */
+  requestRespawn(sessionKey, reason = 'config-change') {
+    const entry = this.procs.get(sessionKey);
+    if (!entry || entry.closed) return { killed: false, queued: 0 };
+    entry.needsRespawn = reason;
+    this._logEvent('respawn-requested', {
+      session_key: sessionKey,
+      chat_id: entry.chatId,
+      reason,
+      queued: entry.pendingQueue.length,
+    });
+    if (entry.pendingQueue.length === 0) {
+      // Fire-and-forget — caller doesn't need to await the kill.
+      this.kill(sessionKey).catch(() => {});
+      return { killed: true, queued: 0 };
+    }
+    return { killed: false, queued: entry.pendingQueue.length };
+  }
   async kill(sessionKey) {
     const entry = this.procs.get(sessionKey);
     if (!entry) return;
@@ -136,10 +157,11 @@ class ProcessManager {
       }, this.killTimeoutMs);
       entry.proc.once('close', () => { clearTimeout(timer); resolve(); });
     });
-    if (entry.pending) {
-      const { reject } = entry.pending;
-      entry.pending = null;
-      reject(new Error('Process killed'));
+    // Reject all pendings in the queue (if any survived the 'close' handler).
+    while (entry.pendingQueue.length > 0) {
+      const p = entry.pendingQueue.shift();
+      p.clearTimers?.();
+      p.reject(new Error('Process killed'));
     }
   }
@@ -164,16 +186,15 @@ class ProcessManager {
       sessionKey,
       proc,
       rl,
-      pending: null,
+      pendingQueue: [],
       lastUsedTs: Date.now(),
       inFlight: false,
       closed: false,
+      needsRespawn: null,
       sessionId: ctx.existingSessionId || null,
       chatId: ctx.chatId || null,
       threadId: ctx.threadId || null,
       label: ctx.label || sessionKey,
-      // Stream accumulator — cleared at each turn start (on send()).
-      streamText: '',
     };
     rl.on('line', (line) => {
@@ -181,27 +202,31 @@ class ProcessManager {
       try { event = JSON.parse(line); }
       catch { this.logger.error(`[${entry.label}] non-JSON: ${line.slice(0, 200)}`); return; }
+      // Fix A: ANY stream-json event counts as Claude activity. Reset the
+      // idle timer on the HEAD pending (the turn Claude is working on),
+      // regardless of event type. Subagent runs emit `user`-type
+      // tool_result events between the parent's assistant events — those
+      // previously did NOT reset the timer, causing false timeouts during
+      // long subagent work.
+      const head = entry.pendingQueue[0];
+      if (head) head.resetIdleTimer?.();
       if (event.type === 'system' && event.subtype === 'init') {
         entry.sessionId = event.session_id;
         if (this.onInit) this.onInit(sessionKey, event, entry);
       }
-      if (event.type === 'assistant' && entry.pending) {
-        // Any assistant step (text block, tool_use, tool_result) counts as
-        // Claude activity — reset the idle timeout so long turns don't
-        // wall-clock out.
-        entry.pending.resetIdleTimer?.();
+      if (event.type === 'assistant' && head) {
         if (this.onStreamChunk) {
           const added = extractAssistantText(event);
           if (added) {
-            entry.streamText = entry.streamText
-              ? `${entry.streamText}\n\n${added}`
+            head.streamText = head.streamText
+              ? `${head.streamText}\n\n${added}`
               : added;
-            try { this.onStreamChunk(sessionKey, entry.streamText, entry); }
+            try { this.onStreamChunk(sessionKey, head.streamText, entry); }
             catch (err) { this.logger.error(`[${entry.label}] onStreamChunk: ${err.message}`); }
           }
         }
-        // Emit tool_use blocks separately so callers (e.g. status reactions)
-        // can react to each tool name without re-parsing stream text.
         if (this.onToolUse) {
           const blocks = event.message?.content;
           if (Array.isArray(blocks)) {
@@ -214,28 +239,46 @@ class ProcessManager {
           }
         }
       }
-      if (event.type === 'result' && entry.pending) {
-        const { resolve } = entry.pending;
-        entry.pending = null;
-        entry.inFlight = false;
-        if (this.onResult) this.onResult(sessionKey, event, entry);
-        resolve({
+      if (event.type === 'result' && head) {
+        entry.pendingQueue.shift();
+        head.clearTimers();
+        if (this.onResult) this.onResult(sessionKey, event, entry, head);
+        head.resolve({
           text: event.result || '',
           sessionId: event.session_id,
           cost: event.total_cost_usd,
           duration: event.duration_ms,
           error: event.subtype === 'success' ? null : (event.error || event.subtype),
         });
+        // Activate next head or settle idle state.
+        if (entry.pendingQueue.length > 0) {
+          entry.pendingQueue[0].activate();
+        } else {
+          entry.inFlight = false;
+          // Graceful drain-and-respawn: if caller asked for a respawn
+          // (e.g. /model change) and we just emptied the queue, kill now.
+          if (entry.needsRespawn) {
+            const reason = entry.needsRespawn;
+            entry.needsRespawn = null;
+            this._logEvent('respawn-draining', {
+              session_key: sessionKey,
+              chat_id: entry.chatId,
+              reason,
+            });
+            this.kill(sessionKey).catch(() => {});
+          }
+        }
       }
     });
     proc.on('close', (code) => {
       entry.closed = true;
-      if (entry.pending) {
-        const { reject } = entry.pending;
-        entry.pending = null;
-        entry.inFlight = false;
-        reject(new Error(`Process exited (code ${code})`));
+      entry.inFlight = false;
+      while (entry.pendingQueue.length > 0) {
+        const p = entry.pendingQueue.shift();
+        p.clearTimers?.();
+        p.reject(new Error(`Process exited (code ${code})`));
       }
       this.procs.delete(sessionKey);
       if (code !== 0 && ctx.existingSessionId && this.db?.clearSessionId) {
@@ -250,11 +293,11 @@ class ProcessManager {
     proc.on('error', (err) => {
       this.logger.error(`[${entry.label}] proc error: ${err.message}`);
       entry.closed = true;
-      if (entry.pending) {
-        const { reject } = entry.pending;
-        entry.pending = null;
-        entry.inFlight = false;
-        reject(err);
+      entry.inFlight = false;
+      while (entry.pendingQueue.length > 0) {
+        const p = entry.pendingQueue.shift();
+        p.clearTimers?.();
+        p.reject(err);
       }
       this.procs.delete(sessionKey);
     });
@@ -263,98 +306,113 @@ class ProcessManager {
     return entry;
   }
-  send(sessionKey, prompt, { timeoutMs = 600_000, maxTurnMs = 30 * 60_000 } = {}) {
+  /**
+   * Append a turn to the queue. The returned promise resolves when Claude
+   * emits a `result` event for this turn (they emerge in stdin-write
+   * order). The underlying stdin write happens synchronously inside this
+   * call — the caller should have already serialised writes across
+   * sessions via an external lock if order matters.
+   *
+   * Options:
+   *   timeoutMs — idle timer between Claude events (default 10min)
+   *   maxTurnMs — wall-clock ceiling from "activate" time (default 30min)
+   *   context   — opaque object stored on the pending (polygram puts
+   *               streamer, reactor, sourceMsgId here for its own use)
+   */
+  send(sessionKey, prompt, {
+    timeoutMs = 600_000,
+    maxTurnMs = 30 * 60_000,
+    context = {},
+  } = {}) {
     return new Promise((resolve, reject) => {
       const entry = this.procs.get(sessionKey);
       if (!entry || entry.closed) return reject(new Error('No process for session'));
-      if (entry.pending) return reject(new Error('Process busy'));
-      // Race: proc may have emitted 'close' between getOrSpawn and send, in
-      // which case entry.closed is true but handlers could still be draining.
-      // Also guard against a destroyed/ended stdin pipe explicitly — writing
-      // to a closed pipe would either throw EPIPE or silently buffer.
       if (!entry.proc.stdin || entry.proc.stdin.destroyed || !entry.proc.stdin.writable) {
         return reject(new Error('Process stdin not writable'));
       }
+      // If this entry is awaiting respawn, refuse new sends — the caller
+      // should wait for the respawn to complete (which happens when the
+      // current queue drains).
+      if (entry.needsRespawn) {
+        return reject(new Error(`Session awaiting respawn (${entry.needsRespawn})`));
+      }
-      entry.inFlight = true;
       entry.lastUsedTs = Date.now();
-      entry.pending = { resolve, reject };
-      entry.streamText = '';
-      // Timer handles kept in closure vars (not entry.pending), because
-      // the result-event handler in rl.on('line') sets entry.pending = null
-      // BEFORE calling the wrapped resolve. Reading from entry.pending
-      // after null-out gave undefined → clearTimeout was never called →
-      // the default 30-min maxTurnMs timer stayed armed and held Node's
-      // event loop open, hanging the test runner on CI.
       let idleTimer = null;
       let maxTimer = null;
+      let activated = false;
       const clearTimers = () => {
         if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
         if (maxTimer) { clearTimeout(maxTimer); maxTimer = null; }
       };
-      // Timer fire path. New in 0.3.9: after rejecting, SIGTERM the
-      // subprocess. Previously we only rejected the promise and left the
-      // stuck claude running — the next message would write stdin to a
-      // zombie process. Killing fires the 'close' handler which cleans
-      // up the LRU entry, so the next send() gets a fresh spawn.
+      const pending = {
+        resolve: (r) => { clearTimers(); resolve(r); },
+        reject: (e) => { clearTimers(); reject(e); },
+        clearTimers,
+        startedAt: null,
+        streamText: '',
+        context,
+        idleTimer: null,
+        maxTimer: null,
+        activated: false,
+      };
       const fireTimeout = (reason) => {
-        if (!entry.pending) return;
-        clearTimers();
-        entry.pending = null;
-        entry.inFlight = false;
-        try { entry.proc.kill('SIGTERM'); } catch {}
+        // Only act if we're still the head; if we've been shifted/killed
+        // already, this is a stale callback.
+        if (entry.pendingQueue[0] !== pending) return;
         this._logEvent('turn-timeout', {
           session_key: sessionKey,
           chat_id: entry.chatId,
           reason,
         });
-        reject(new Error(reason));
+        // Remove from queue, reject. Per Q1 policy: don't kill the
+        // subprocess — later pendings might still be fine.
+        entry.pendingQueue.shift();
+        pending.reject(new Error(reason));
+        // Activate next head if any, else idle.
+        if (entry.pendingQueue.length > 0) {
+          entry.pendingQueue[0].activate();
+        } else {
+          entry.inFlight = false;
+        }
       };
-      // Idle timeout: counts N seconds of SILENCE from Claude. Reset on
-      // every assistant event so long productive turns (multi-tool
-      // reasoning) don't falsely trip.
-      // Note on .unref(): an earlier revision called unref() on both
-      // timers to avoid holding the node event loop open in tests. That
-      // broke Node's test runner on CI ("Promise resolution is still
-      // pending but the event loop has already resolved") — the runner
-      // detects unref'd timers as a drained loop and cancels awaiters
-      // before the timer can fire. Production polygram stays alive via
-      // grammy's poll loop + child process pipes; we don't need unref.
       const armIdle = () => setTimeout(
         () => fireTimeout(`Timeout: ${timeoutMs / 1000}s idle with no Claude activity`),
         timeoutMs,
       );
-      idleTimer = armIdle();
-      entry.pending.idleTimer = idleTimer;
-      entry.pending.resetIdleTimer = () => {
-        if (idleTimer) clearTimeout(idleTimer);
-        if (entry.pending) {
-          idleTimer = armIdle();
-          entry.pending.idleTimer = idleTimer;
-        }
+      pending.activate = () => {
+        if (activated) return;
+        activated = true;
+        pending.activated = true;
+        pending.startedAt = Date.now();
+        idleTimer = armIdle();
+        pending.idleTimer = idleTimer;
+        maxTimer = setTimeout(
+          () => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
+          maxTurnMs,
+        );
+        pending.maxTimer = maxTimer;
       };
-      // Wall-clock ceiling: fires at maxTurnMs regardless of activity.
-      // Catches stuck API calls that emit occasional events (keeping the
-      // idle timer alive) but never produce a result. OpenClaw's only
-      // timer was wall-clock; polygram's 0.3.5 change replaced it with
-      // idle-reset, creating a gap this restores as a last-resort.
-      maxTimer = setTimeout(
-        () => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
-        maxTurnMs,
-      );
-      entry.pending.maxTimer = maxTimer;
+      pending.resetIdleTimer = () => {
+        if (!activated) return;
+        if (idleTimer) clearTimeout(idleTimer);
+        idleTimer = armIdle();
+        pending.idleTimer = idleTimer;
+      };
-      // Legacy alias: some callers / tests refer to entry.pending.timer.
-      entry.pending.timer = idleTimer;
+      entry.pendingQueue.push(pending);
+      entry.inFlight = true;
-      const wrappedResolve = entry.pending.resolve;
-      const wrappedReject = entry.pending.reject;
-      entry.pending.resolve = (r) => { clearTimers(); wrappedResolve(r); };
-      entry.pending.reject = (e) => { clearTimers(); wrappedReject(e); };
+      // If we're the only pending, activate immediately. Otherwise wait
+      // until the preceding pending is shifted out.
+      if (entry.pendingQueue.length === 1) pending.activate();
       try {
         entry.proc.stdin.write(JSON.stringify({
@@ -362,11 +420,10 @@ class ProcessManager {
           message: { role: 'user', content: prompt },
         }) + '\n');
       } catch (err) {
-        clearTimers();
-        entry.pending = null;
-        entry.inFlight = false;
-        reject(err);
-        return;
+        const idx = entry.pendingQueue.indexOf(pending);
+        if (idx !== -1) entry.pendingQueue.splice(idx, 1);
+        if (entry.pendingQueue.length === 0) entry.inFlight = false;
+        pending.reject(err);
       }
     });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.4.6",
+  "version": "0.4.9",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -26,7 +26,7 @@ const { buildPrompt } = require('./lib/prompt');
 const { filterAttachments, MAX_FILE_BYTES } = require('./lib/attachments');
 const { ProcessManager } = require('./lib/process-manager');
 const { createSender } = require('./lib/telegram');
-const { drainQueuesForChat: drainQueuesForChatImpl } = require('./lib/queue-utils');
+const { createAsyncLock } = require('./lib/async-lock');
 const { sweepInbox } = require('./lib/inbox');
 const { parseBotArg, parseDbArg, filterConfigToBot } = require('./lib/config-scope');
 const { createStore: createPairingsStore, parseTtl: parsePairingTtl } = require('./lib/pairings');
@@ -82,8 +82,11 @@ let ipcCloser = null;
 // single-valued), we keep them as plain module-level variables — not a map.
 let BOT_NAME = null;  // string, frozen after boot
 let bot = null;       // grammy Bot for BOT_NAME
-let streamers = new Map();  // sessionKey -> active Streamer (while turn is in flight)
-let reactors = new Map();   // sessionKey -> active ReactionManager (while turn is in flight)
+// 0.4.8 note: streamer + reactor are per-turn, not per-session. They live
+// on the pending's `context` object in the pm pendingQueue, keyed to the
+// specific turn (not the session). The old per-session Maps were a bug
+// for concurrent pendings — the second send() would overwrite the first's
+// streamer reference before the first turn finished.
 // Allowlist of env var names passed through to spawned Claude processes.
 // Anything not listed here is dropped to prevent leaked secrets/ssh agents
@@ -520,90 +523,103 @@ async function getOrSpawnForChat(sessionKey) {
   return pm.getOrSpawn(sessionKey, ctx);
 }
-async function sendToProcess(sessionKey, prompt) {
+async function sendToProcess(sessionKey, prompt, context = {}) {
   const entry = await getOrSpawnForChat(sessionKey);
   if (!entry) throw new Error('No process for chat');
   const chatId = getChatIdFromKey(sessionKey);
   const chatConfig = config.chats[chatId];
   const timeoutMs = (chatConfig.timeout || config.defaults.timeout) * 1000;
-  // Wall-clock ceiling (seconds). Overridable per-chat via chatConfig.maxTurn
-  // or globally via config.defaults.maxTurn. 30 min default is generous for
-  // long audits; stuck API calls rarely run that long without firing the
-  // idle timer first. Unit: seconds → milliseconds.
   const maxTurnMs = (chatConfig.maxTurn || config.defaults?.maxTurn || 1800) * 1000;
-  return pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs });
-}
-// ─── Message queue (per-chat) ───────────────────────────────────────
-const queues = {};
-const processing = {};
-const MAX_QUEUE_DEPTH = 50; // per chat — cron storm or spammer insurance
-async function enqueue(sessionKey, chatId, msg, bot) {
-  if (!queues[sessionKey]) queues[sessionKey] = [];
-  if (queues[sessionKey].length >= MAX_QUEUE_DEPTH) {
-    // Drop oldest rather than rejecting newest — the user's freshest
-    // intent is more valuable than backlog. Emit an event so operators
-    // see this rather than a queue silently degrading.
-    queues[sessionKey].shift();
-    dbWrite(() => db.logEvent('queue-overflow', {
-      chat_id: chatId, session_key: sessionKey, cap: MAX_QUEUE_DEPTH,
-    }), 'log queue-overflow');
+  // Per-session stdin lock orders the write step, not the result-wait.
+  // pm.send's Promise executor writes stdin synchronously, so as soon as
+  // pm.send returns (not resolves — returns), the stdin write has
+  // happened. We release the lock right after that and await the result
+  // OUTSIDE the lock — otherwise one long turn would serialise the whole
+  // session, which is what we're trying to escape.
+  const release = await stdinLock.acquire(sessionKey);
+  let resultPromise;
+  try {
+    resultPromise = pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs, context });
+  } finally {
+    release();
   }
-  queues[sessionKey].push({ msg, bot, chatId });
-  if (!processing[sessionKey]) processQueue(sessionKey);
+  return resultPromise;
 }
+// ─── Message dispatch ───────────────────────────────────────────────
+// 0.4.8: per-session concurrent dispatch. No FIFO polygram-level queue any
+// more — inbound messages immediately kick off handleMessage. Pre-work
+// (attachment download, voice transcription) runs in parallel across
+// messages; a per-session stdin lock (in handleMessage) orders the
+// eventual pm.send writes so Claude reads user messages in arrival order
+// and replies come out in the same order.
+//
+// We still track in-flight handleMessage calls per session so we can:
+//   - emit a `queue-depth-warning` event if the count ever exceeds a
+//     threshold (abnormal inbound rate, slow pre-work, stuck bot)
+//   - (future) drain on shutdown if we want clean exit
+const CONCURRENT_WARN_THRESHOLD = 20;
+const inFlightHandlers = new Map(); // sessionKey → count
 // Sessions the operator just /stop'd (or natural-language "стоп"). Entries
-// suppress the generic "Sorry, I couldn't process" reply below — the abort
-// handler already sent its own "Остановлено." ack, and the subsequent
-// handleMessage rejection from the killed subprocess would otherwise
-// spam a second contradictory message. Cleared on first use; long-lived
-// only if the abort kills something that never finishes rejecting.
+// suppress the generic "Sorry, I couldn't process" reply — the abort
+// handler already sent its own "Остановлено." ack, and handleMessage
+// rejections from the killed subprocess would otherwise spam a second
+// contradictory message.
 const abortedSessions = new Set();
 function markSessionAborted(sessionKey) {
   abortedSessions.add(sessionKey);
 }
-async function processQueue(sessionKey) {
-  processing[sessionKey] = true;
-  while (queues[sessionKey]?.length > 0) {
-    const { msg, bot, chatId } = queues[sessionKey].shift();
-    try {
-      await handleMessage(sessionKey, chatId, msg, bot);
-    } catch (err) {
-      const wasAborted = abortedSessions.has(sessionKey);
-      if (wasAborted) abortedSessions.delete(sessionKey);
-      // Raw err.message can carry host paths, DB columns, internal state.
-      // Surface a generic message to the user; log the detail to events
-      // so operators can still debug.
-      console.error(`[${sessionKey}] Error:`, err.message);
-      dbWrite(() => db.logEvent('handler-error', {
-        chat_id: chatId, session_key: sessionKey,
-        msg_id: msg?.message_id,
-        error: err.message?.slice(0, 500),
-        stack: err.stack?.split('\n').slice(0, 5).join('\n'),
-        aborted: wasAborted || undefined,
-      }), 'log handler-error');
-      if (!wasAborted) {
-        try {
-          await tg(bot, 'sendMessage', {
-            chat_id: chatId,
-            text: `Sorry, I couldn't process that message. The operator has been notified.`,
-            reply_parameters: { message_id: msg.message_id },
-          }, { source: 'error-reply', botName: BOT_NAME });
-        } catch (replyErr) {
-          console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
-        }
-      }
-    }
+// Called by bot.on('message') for every regular (non-admin, non-pair)
+// message. Runs handleMessage in a fire-and-forget manner with centralised
+// error handling. Replaces the old processQueue loop.
+function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
+  const count = (inFlightHandlers.get(sessionKey) || 0) + 1;
+  inFlightHandlers.set(sessionKey, count);
+  if (count === CONCURRENT_WARN_THRESHOLD) {
+    dbWrite(() => db.logEvent('queue-depth-warning', {
+      chat_id: chatId, session_key: sessionKey,
+      in_flight: count, threshold: CONCURRENT_WARN_THRESHOLD,
+    }), 'log queue-depth-warning');
   }
-  processing[sessionKey] = false;
+  handleMessage(sessionKey, chatId, msg, bot).catch((err) => {
+    const wasAborted = abortedSessions.has(sessionKey);
+    if (wasAborted) abortedSessions.delete(sessionKey);
+    console.error(`[${sessionKey}] Error:`, err.message);
+    dbWrite(() => db.logEvent('handler-error', {
+      chat_id: chatId, session_key: sessionKey,
+      msg_id: msg?.message_id,
+      error: err.message?.slice(0, 500),
+      stack: err.stack?.split('\n').slice(0, 5).join('\n'),
+      aborted: wasAborted || undefined,
+    }), 'log handler-error');
+    if (!wasAborted) {
+      tg(bot, 'sendMessage', {
+        chat_id: chatId,
+        text: `Sorry, I couldn't process that message. The operator has been notified.`,
+        reply_parameters: { message_id: msg.message_id },
+      }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
+        console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
+      });
+    }
+  }).finally(() => {
+    const n = (inFlightHandlers.get(sessionKey) || 1) - 1;
+    if (n <= 0) inFlightHandlers.delete(sessionKey);
+    else inFlightHandlers.set(sessionKey, n);
+  });
 }
-const drainQueuesForChat = (chatId) => drainQueuesForChatImpl(queues, chatId);
+// drainQueuesForChat is retained as a no-op for backwards compat with
+// call sites in /model, /effort, chat-migration, and abort handlers.
+// Returns 0 always; a drain isn't meaningful in the concurrent model —
+// callers that want to abort should rely on pm.killChat.
+const drainQueuesForChat = (_chatId) => 0;
+// Per-session lock ordering stdin writes. Module is I/O-pure.
+const stdinLock = createAsyncLock();
 // Typing indicator is imported from lib/typing-indicator — it adds a
 // per-chat circuit breaker with exponential backoff so a chat that
@@ -975,6 +991,25 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     await sendReply(info);
     return;
   }
+  // Helper: request respawn across ALL sessionKeys owned by this chat (one
+  // per topic if isolateTopics=true, otherwise just the single chat-level
+  // key). Graceful: in-flight turns drain on old settings, new turns use
+  // the new settings. Returns total pending turns across all keys so the
+  // reply can tell the user.
+  const requestRespawnForChat = (reason) => {
+    const prefix = String(chatId);
+    let totalQueued = 0;
+    let anyActive = false;
+    for (const key of pm.keys()) {
+      if (key === prefix || key.startsWith(prefix + ':')) {
+        const res = pm.requestRespawn(key, reason);
+        totalQueued += res.queued;
+        if (!res.killed) anyActive = true;
+      }
+    }
+    return { queued: totalQueued, anyActive };
+  };
   if (botAllowsCommands && text.startsWith('/model ')) {
     const newModel = text.slice(7).trim();
     if (['opus', 'sonnet', 'haiku'].includes(newModel)) {
@@ -986,11 +1021,10 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
         old_value: oldModel, new_value: newModel,
         user: cmdUser, user_id: cmdUserId, source: 'command',
       }), 'log model change');
-      const droppedModel = drainQueuesForChat(chatId);
-      if (droppedModel) dbWrite(() => db.logEvent('queue-drained', { chat_id: chatId, reason: 'model-change', dropped: droppedModel }), 'log queue-drained');
-      await pm.killChat(chatId);
+      const { anyActive } = requestRespawnForChat('model-change');
       const ver = MODEL_VERSIONS[newModel] || newModel;
-      await sendReply(`Model → ${newModel} (${ver})`);
+      const suffix = anyActive ? ` — I'll switch when I finish` : '';
+      await sendReply(`Model → ${newModel} (${ver})${suffix}`);
     } else {
       await sendReply(`Unknown model. Use: opus, sonnet, haiku`);
     }
@@ -1007,10 +1041,9 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
         old_value: oldEffort, new_value: newEffort,
         user: cmdUser, user_id: cmdUserId, source: 'command',
       }), 'log effort change');
-      const droppedEffort = drainQueuesForChat(chatId);
-      if (droppedEffort) dbWrite(() => db.logEvent('queue-drained', { chat_id: chatId, reason: 'effort-change', dropped: droppedEffort }), 'log queue-drained');
-      await pm.killChat(chatId);
-      await sendReply(`Effort → ${newEffort}`);
+      const { anyActive } = requestRespawnForChat('effort-change');
+      const suffix = anyActive ? ` — I'll switch when I finish` : '';
+      await sendReply(`Effort → ${newEffort}${suffix}`);
     } else {
       await sendReply(`Unknown effort. Use: low, medium, high, xhigh, max`);
     }
@@ -1194,7 +1227,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     throttleMs: botCfg.streamThrottleMs,
     logger: { error: (m) => console.error(`[${label}] ${m}`) },
   });
-  streamers.set(sessionKey, streamer);
+  // streamer is registered with this turn via pm.send's context (below)
   // Status reactions on the user's message: 👀 queued → 🤔 thinking →
   // 👨‍💻 coding / ⚡ web / 🔥 tool → 👍 done / 🤯 error. Silent (no
@@ -1213,11 +1246,15 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     },
     logError: (m) => console.error(`[${label}] ${m}`),
   });
-  reactors.set(sessionKey, reactor);
   reactor.setState('THINKING');
   try {
-    const result = await sendToProcess(sessionKey, prompt);
+    // Pass streamer + reactor as per-turn context. pm's callbacks pick
+    // them off entry.pendingQueue[0].context so concurrent pendings each
+    // get routed to their own streamer/reactor.
+    const result = await sendToProcess(sessionKey, prompt, {
+      streamer, reactor, sourceMsgId: msg.message_id,
+    });
     const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
     stopTyping();
@@ -1305,12 +1342,12 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     throw err;
   } finally {
     stopTyping();
-    streamers.delete(sessionKey);
+    // streamer is per-turn and not stored in any session Map in 0.4.8
     // Give the reactor a beat to flush the terminal state (DONE/ERROR/TIMEOUT
     // bypass throttle so this is instant in practice; the stop() below
     // guards against any late transition leaking after the turn ends).
     reactor.stop();
-    reactors.delete(sessionKey);
+    // reactor is per-turn and not stored in any session Map in 0.4.8
   }
 }
@@ -1513,7 +1550,7 @@ function createBot(token) {
     const threadId = msg.message_thread_id?.toString();
     const sessionKey = getSessionKey(chatId, threadId, chatConfig);
-    await enqueue(sessionKey, chatId, msg, bot);
+    dispatchHandleMessage(sessionKey, chatId, msg, bot);
   };
   // Media-group buffer: coalesce multi-photo uploads (Telegram delivers
@@ -1854,12 +1891,17 @@ async function main() {
       console.log(`[${entry.label}] Process exited (code ${code})`);
       dbWrite(() => db.logEvent('process-close', { chat_id: entry.chatId, session_key: sessionKey, code }), 'log process-close');
     },
-    onStreamChunk: (sessionKey, partial) => {
-      const s = streamers.get(sessionKey);
+    onStreamChunk: (sessionKey, partial, entry) => {
+      // Route to the head pending's per-turn streamer. In the 0.4.8
+      // concurrent-pending model, there can be N pendings queued — only
+      // the HEAD is the turn Claude is actively emitting events for.
+      const head = entry.pendingQueue?.[0];
+      const s = head?.context?.streamer;
       if (s) s.onChunk(partial).catch(() => {});
     },
-    onToolUse: (sessionKey, toolName) => {
-      const r = reactors.get(sessionKey);
+    onToolUse: (sessionKey, toolName, entry) => {
+      const head = entry.pendingQueue?.[0];
+      const r = head?.context?.reactor;
       if (r) r.setState(classifyToolName(toolName));
     },
   });