npm - polygram - Versions diffs - 0.5.3 → 0.5.5 - Mend

polygram 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/db.js CHANGED Viewed

@@ -74,10 +74,10 @@ function wrap(db) {
   const insertOutboundPendingStmt = db.prepare(`
     INSERT INTO messages (
       chat_id, thread_id, user, text, direction, source, bot_name,
-      turn_id, session_id, status, ts, msg_id
+      turn_id, session_id, status, ts, msg_id, reply_to_id
     ) VALUES (
       @chat_id, @thread_id, @user, @text, 'out', @source, @bot_name,
-      @turn_id, @session_id, 'pending', @ts, @pending_id
+      @turn_id, @session_id, 'pending', @ts, @pending_id, @reply_to_id
     )
   `);
@@ -198,6 +198,7 @@ function wrap(db) {
         session_id: row.session_id || null,
         ts: row.ts || Date.now(),
         pending_id: row.pending_id,
+        reply_to_id: row.reply_to_id ?? null,
       });
     },
@@ -313,9 +314,11 @@ function wrap(db) {
     // Find inbound messages that were being processed when polygram stopped.
     // Scoped by bot_name via the chat_id → config mapping, so each bot only
-    // replays its own turns on boot. Scoped by olderThanMs (default 30 min)
-    // so we never resurrect ancient messages after a long outage.
-    getReplayCandidates({ chatIds, olderThanMs = 30 * 60 * 1000, limit = 100 } = {}) {
+    // replays its own turns on boot. Scoped by olderThanMs (default 3 min)
+    // so we never resurrect ancient messages — anything older than a few
+    // minutes is from before the user moved on, replaying it just confuses
+    // the conversation.
+    getReplayCandidates({ chatIds, olderThanMs = 3 * 60 * 1000, limit = 100 } = {}) {
       if (!Array.isArray(chatIds) || chatIds.length === 0) return [];
       const cutoff = Date.now() - olderThanMs;
       const placeholders = chatIds.map(() => '?').join(',');

package/lib/telegram.js CHANGED Viewed

@@ -114,6 +114,11 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
   applyFormatting(method, params, meta);
+  // Capture which inbound this reply targets so the boot-replay dedupe
+  // (`hasOutboundReplyTo`) can match outbound→inbound. Without this every
+  // restart would re-dispatch already-answered messages.
+  const replyToId = params.reply_parameters?.message_id ?? null;
   let rowId = null;
   if (db && tracksMessage && chatId) {
     const pendingId = nextPendingId();
@@ -128,6 +133,7 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
         turn_id: meta.turnId || null,
         session_id: meta.sessionId || null,
         pending_id: pendingId,
+        reply_to_id: replyToId,
       });
       rowId = result?.lastInsertRowid ?? null;
     } catch (err) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.5.3",
+  "version": "0.5.5",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -530,20 +530,29 @@ async function sendToProcess(sessionKey, prompt, context = {}) {
   const chatConfig = config.chats[chatId];
   const timeoutMs = (chatConfig.timeout || config.defaults.timeout) * 1000;
   const maxTurnMs = (chatConfig.maxTurn || config.defaults?.maxTurn || 1800) * 1000;
-  // Per-session stdin lock orders the write step, not the result-wait.
-  // pm.send's Promise executor writes stdin synchronously, so as soon as
-  // pm.send returns (not resolves — returns), the stdin write has
-  // happened. We release the lock right after that and await the result
-  // OUTSIDE the lock — otherwise one long turn would serialise the whole
-  // session, which is what we're trying to escape.
+  // Hold the per-session lock across the FULL turn (write + result wait),
+  // not just the stdin write. Claude's stream-json input mode batches any
+  // user messages that arrive while a turn is in flight into the next
+  // turn — so writing pendingB's prompt while pendingA is still being
+  // worked on causes Claude to batch B+C and emit ONE result for them,
+  // leaving pendingC stuck forever (reactor stuck on 👀, reply mis-routed,
+  // 10-min idle timer eventually fires for the orphan).
+  //
+  // We tested this directly: 3 user messages written rapidly produced
+  // result#1="A" and result#2="B\nC" — pending#3 never got a result.
+  //
+  // Holding the lock across the whole turn means Claude never has more
+  // than one user message in its stdin buffer at once, so it can't batch.
+  // Cost: slight latency for back-to-back user messages — the second one
+  // waits for the first turn to finish before starting. The reactor on
+  // the queued message stays at 👀 (QUEUED) until its turn actually
+  // starts, which is the correct UX (and what the user already expects).
   const release = await stdinLock.acquire(sessionKey);
-  let resultPromise;
   try {
-    resultPromise = pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs, context });
+    return await pm.send(sessionKey, prompt, { timeoutMs, maxTurnMs, context });
   } finally {
     release();
   }
-  return resultPromise;
 }
 // ─── Message dispatch ───────────────────────────────────────────────
@@ -562,6 +571,17 @@ async function sendToProcess(sessionKey, prompt, context = {}) {
 const CONCURRENT_WARN_THRESHOLD = 20;
 const inFlightHandlers = new Map(); // sessionKey → count
+// Set true by the SIGTERM/SIGINT handler. Module-scoped so the
+// fire-and-forget catch in dispatchHandleMessage can check it: when
+// polygram is going down, in-flight handlers reject with "Process
+// killed" / "Process exited" but those failures aren't "real" — the
+// next boot's replay will re-dispatch them. Suppressing the user-facing
+// "Sorry, I couldn't process" during shutdown removes a misleading
+// post-mortem apology that the user shouldn't have seen in the first
+// place. (The boot replay's own _isReplay flag handles the OTHER half:
+// suppressing the apology if the replay itself fails.)
+let isShuttingDown = false;
 // Sessions the operator just /stop'd (or natural-language "стоп"). Keyed
 // by sessionKey → timestamp of abort. ANY pending that rejects within
 // ABORT_GRACE_MS of the mark is considered abort-caused — its generic
@@ -601,6 +621,7 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
   }
   handleMessage(sessionKey, chatId, msg, bot).catch((err) => {
     const wasAborted = isSessionRecentlyAborted(sessionKey);
+    const isReplay = msg._isReplay === true;
     console.error(`[${sessionKey}] Error:`, err.message);
     // Mark the row as 'failed' so boot replay doesn't re-dispatch it.
     // Exception: aborted sessions → 'aborted' (same — not replayable).
@@ -615,8 +636,15 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
       error: err.message?.slice(0, 500),
       stack: err.stack?.split('\n').slice(0, 5).join('\n'),
       aborted: wasAborted || undefined,
+      replay: isReplay || undefined,
     }), 'log handler-error');
-    if (!wasAborted) {
+    // Suppress the "Sorry, I couldn't process" reply when:
+    //  - boot replay (user typed this minutes ago and moved on)
+    //  - polygram is shutting down (the failure is "Process killed" /
+    //    "Process exited" which isn't a real error — boot replay will
+    //    re-dispatch it on next start)
+    //  - user just /stop'd (already saw their abort acknowledgement)
+    if (!wasAborted && !isReplay && !isShuttingDown) {
       tg(bot, 'sendMessage', {
         chat_id: chatId,
         text: `Sorry, I couldn't process that message. The operator has been notified.`,
@@ -1129,9 +1157,14 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
   // Mark the inbound row as 'dispatched' so the boot replay loop knows
   // this turn started. Cleared to 'replied' (or 'failed') when done.
-  dbWrite(() => db.setInboundHandlerStatus({
-    chat_id: chatId, msg_id: msg.message_id, status: 'dispatched',
-  }), 'set handler_status=dispatched');
+  // Replays are pre-marked 'replay-attempted' by the boot loop and we
+  // must NOT overwrite that — it's the one-shot guard that keeps a
+  // failing-mid-flight replay from re-replaying on every subsequent boot.
+  if (!msg._isReplay) {
+    dbWrite(() => db.setInboundHandlerStatus({
+      chat_id: chatId, msg_id: msg.message_id, status: 'dispatched',
+    }), 'set handler_status=dispatched');
+  }
   const text = msg.text || msg.caption || '';
   const threadId = msg.message_thread_id;
@@ -1426,6 +1459,16 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
   // at which point we flip to THINKING (🤔).
   reactor.setState('QUEUED');
+  // Mark the inbound row terminal so boot replay doesn't pick it up again.
+  // Must fire down EVERY non-throwing exit path (early returns for error/
+  // NO_REPLY, streamed-reply early return, regular reply at end). 0.5.4
+  // hardened this — earlier versions only marked at the bottom of try, so
+  // streamed replies (which return at line ~1477) left handler_status
+  // stuck at 'dispatched' forever, causing replay loops on every restart.
+  const markReplied = () => dbWrite(() => db.setInboundHandlerStatus({
+    chat_id: chatId, msg_id: msg.message_id, status: 'replied',
+  }), 'set handler_status=replied');
   try {
     // Pass streamer + reactor as per-turn context. pm's callbacks pick
     // them off entry.pendingQueue[0].context so concurrent pendings each
@@ -1441,7 +1484,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     if (result.error) {
       console.error(`[${label}] Error (${elapsed}s):`, result.error);
       reactor.setState('ERROR');
-      if (!result.text) return;
+      if (!result.text) { markReplied(); return; }
     } else {
       // Clear the progress reaction instead of stamping 👍 — the reply
       // bubble itself is the "done" signal and a permanent thumbs-up on
@@ -1450,7 +1493,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
       reactor.clear().catch(() => {});
     }
-    if (!result.text || result.text === 'NO_REPLY') return;
+    if (!result.text || result.text === 'NO_REPLY') { markReplied(); return; }
     const parsed = parseResponse(result.text);
     const outMeta = { ...outMetaBase, sessionId: result.sessionId, costUsd: result.cost };
@@ -1474,6 +1517,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
           }
         }
         console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | streamed | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
+        markReplied();
         return;
       }
       // Not streamed (response too short) — fall through to normal path.
@@ -1512,11 +1556,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     }
     console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
-    // Success: mark the inbound row 'replied' so boot replay doesn't
-    // pick it up again on restart.
-    dbWrite(() => db.setInboundHandlerStatus({
-      chat_id: chatId, msg_id: msg.message_id, status: 'replied',
-    }), 'set handler_status=replied');
+    markReplied();
   } catch (err) {
     // If the user just aborted this session, silently finalise the stream
     // without the scary "⚠ stream interrupted" banner. The user has already
@@ -2130,10 +2170,9 @@ async function main() {
   // replay picks it up. Prevents "Sorry, I couldn't process that message"
   // from showing on every restart.
   const SHUTDOWN_DRAIN_MS = 30_000;
-  let shuttingDown = false;
   const shutdown = async () => {
-    if (shuttingDown) return;
-    shuttingDown = true;
+    if (isShuttingDown) return;
+    isShuttingDown = true;
     console.log('\nShutting down...');
     // 1. Stop accepting new inbound first so nothing new queues behind the drain.
     if (bot && bot._stop) bot._stop();
@@ -2256,6 +2295,19 @@ async function main() {
         }
         const chatConfig = config.chats[row.chat_id];
         if (!chatConfig) { skipped += 1; continue; }
+        // Tag the reconstructed message so dispatchHandleMessage knows
+        // (a) to suppress the "Sorry I couldn't process" error reply on
+        // failure and (b) to flag handler-error events as replay.
+        reconstructed._isReplay = true;
+        // Pre-mark 'replay-attempted' so even if this attempt is killed
+        // mid-turn by yet another restart, the next boot won't replay it
+        // again. Replay is one-shot — handleMessage will overwrite to
+        // 'replied' on success, or the catch will overwrite to 'failed'.
+        // Worst case (polygram dies before either): row stays
+        // 'replay-attempted', getReplayCandidates skips it, no loop.
+        dbWrite(() => db.setInboundHandlerStatus({
+          chat_id: row.chat_id, msg_id: row.msg_id, status: 'replay-attempted',
+        }), 'set handler_status=replay-attempted');
         const sessionKey = getSessionKey(row.chat_id, row.thread_id, chatConfig);
         dispatchHandleMessage(sessionKey, row.chat_id, reconstructed, bot);
         replayed += 1;