npm - polygram - Versions diffs - 0.8.0-rc.52 → 0.8.0-rc.54 - Mend

polygram 0.8.0-rc.52 → 0.8.0-rc.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.claude-plugin/plugin.json +1 -1
package/lib/auto-resume.js +101 -0
package/lib/telegram-prompt.js +20 -21
package/package.json +1 -1
package/polygram.js +132 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.8.0-rc.52",
+  "version": "0.8.0-rc.54",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
   "keywords": [
     "telegram",

package/lib/auto-resume.js ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * rc.54: auto-resume on 300s no-activity timeout.
+ *
+ * Background — the rc.54 incident pattern:
+ *   When polygram's per-turn watchdog fires "Timeout: 300s idle with
+ *   no Claude activity", the running SDK Query is torn down and the
+ *   user gets the friendly "⏳ I went quiet too long without finishing.
+ *   Try resending or simplifying." message. The session_id is preserved,
+ *   so the *next* user message resumes context — but the work the user
+ *   was waiting for is dropped on the floor.
+ *
+ *   Most timeouts are wedged tool calls (long Bash, hanging MCP, stuck
+ *   subagent). The wedged subprocess is dead by the time the watchdog
+ *   fires; a fresh resume of the same session_id will spawn a clean
+ *   Query and Claude has full prior context to continue.
+ *
+ * What this module provides: a per-session cooldown tracker so we
+ * don't auto-resume in a tight loop when the wedge is permanent.
+ *
+ *   - markAttempt(sessionKey) — record we just tried an auto-resume
+ *   - isInCooldown(sessionKey) — true if we attempted within the
+ *     cooldown window (default 10 min). Caller skips auto-resume and
+ *     falls back to the existing user-facing timeout reply.
+ *   - clear(sessionKey) — drop the timestamp (e.g. a successful turn
+ *     completed since the auto-resume — we're back to healthy).
+ */
+'use strict';
+const DEFAULT_COOLDOWN_MS = 10 * 60 * 1000; // 10 min
+function createAutoResumeTracker({ cooldownMs = DEFAULT_COOLDOWN_MS, now = Date.now } = {}) {
+  const lastAttemptAt = new Map();
+  return {
+    /**
+     * Returns true if the most recent attempt for this sessionKey was
+     * within `cooldownMs` ago. Use to gate further auto-resume
+     * attempts when a wedge keeps recurring.
+     */
+    isInCooldown(sessionKey) {
+      const ts = lastAttemptAt.get(sessionKey);
+      if (ts == null) return false;
+      return now() - ts < cooldownMs;
+    },
+    /**
+     * Record an auto-resume attempt. Call BEFORE dispatching the
+     * resumed turn so a fast follow-up timeout can still see this
+     * session is in cooldown.
+     */
+    markAttempt(sessionKey) {
+      lastAttemptAt.set(sessionKey, now());
+    },
+    /**
+     * Clear the cooldown for a session — called when a normal turn
+     * succeeds, signalling the session is healthy again. Without
+     * this, a session that auto-resumed once would be locked out of
+     * future auto-resumes for the full 10 min even after recovery.
+     */
+    clear(sessionKey) {
+      lastAttemptAt.delete(sessionKey);
+    },
+    /**
+     * Reset all tracked sessions. Called by daemon reload, tests.
+     */
+    reset() {
+      lastAttemptAt.clear();
+    },
+    // Test hooks
+    _size() { return lastAttemptAt.size; },
+    _get(sessionKey) { return lastAttemptAt.get(sessionKey); },
+  };
+}
+/**
+ * Decide whether an error is a candidate for auto-resume.
+ *
+ * Gates:
+ *   - error message matches the 300s no-activity timeout pattern
+ *     (NOT the wall-clock ceiling — that's usually a runaway, not
+ *     a wedge; resuming might just runaway again)
+ *   - NOT user-aborted (the user explicitly /stop'd; never resume)
+ *   - NOT a boot-replay (the user typed this minutes ago and moved
+ *     on; resuming now is more confusing than helpful)
+ *   - NOT during shutdown (boot replay will pick it up)
+ */
+function isAutoResumable({ error, aborted, replay, shuttingDown }) {
+  if (aborted || replay || shuttingDown) return false;
+  const msg = String(error?.message || error || '');
+  return /idle with no Claude activity/i.test(msg);
+}
+module.exports = {
+  createAutoResumeTracker,
+  isAutoResumable,
+  DEFAULT_COOLDOWN_MS,
+};

package/lib/telegram-prompt.js CHANGED Viewed

@@ -28,36 +28,35 @@
 const TELEGRAM_TABLE_WIDTH_BUDGET = 40;
 const POLYGRAM_DISPLAY_HINT = [
-  '## Telegram display constraints',
+  '## Telegram display rules',
   '',
-  'Your replies are sent to Telegram. The user reads them on phone or desktop.',
+  'Your replies render in the Telegram client. Phone is the design target.',
   '',
-  '**Tables:** Telegram renders markdown tables as monospace `<pre>` blocks.',
-  `On mobile portrait, lines wrap after ~${TELEGRAM_TABLE_WIDTH_BUDGET} chars and look broken.`,
+  '### Tables — HARD RULE',
   '',
-  '- Use a markdown table when **every** rendered row (including separators',
-  `  and padding) fits in ${TELEGRAM_TABLE_WIDTH_BUDGET} chars or fewer.`,
-  '- If any row would exceed that budget, **drop the table** and switch to',
-  '  vertical "row blocks": one entity per paragraph, **bold** headline,',
-  '  then `Field: value` per data point. Example:',
+  `Before emitting any markdown table, count the longest row in characters (including pipes \`|\`, padding, and separator dashes). If that row is longer than ${TELEGRAM_TABLE_WIDTH_BUDGET}, you MUST NOT emit a table. Use row blocks instead.`,
   '',
-  '  ```',
-  '  **Mini dress Keen → Black dress mini**',
-  '  COGS: ฿546 → ฿1144 (2.1×)',
-  '  Margin: 84.8% → 77% ↓',
+  'This applies even when the user is on desktop. Tables don\'t scroll horizontally on mobile; they wrap and become unreadable. Row blocks always work on every surface.',
   '',
-  '  **Tank top Sway → Top voluminous cotton**',
-  '  COGS: ฿360 → ฿947 (2.6×)',
-  '  Margin: 78.7% → 73% ↓',
-  '  ```',
+  '**Row block format:** one entity per paragraph, **bold** headline, then `Field: value` lines.',
   '',
-  '- Decide row-by-row before emitting; do not start a wide table assuming',
-  '  the user can scroll.',
+  '```',
+  '**Mini dress Keen → Black dress mini**',
+  'COGS: ฿546 → ฿1144 (2.1×)',
+  'Margin: 84.8% → 77% ↓',
+  '',
+  '**Tank top Sway → Top voluminous cotton**',
+  'COGS: ฿360 → ฿947 (2.6×)',
+  'Margin: 78.7% → 73% ↓',
+  '```',
+  '',
+  'Do NOT start a wide table assuming the user can scroll. Decide BEFORE you start writing the first `|` whether all rows will fit. If unsure, use row blocks — they\'re always safe.',
+  '',
+  '### Other Telegram quirks',
   '',
-  'Other Telegram quirks:',
   '- Headers `#`, `##`, `###` render as plain text — use **bold** for emphasis.',
   '- Horizontal rules render as a thin divider line.',
-  '- Long replies stream in chunks, so prefer concise structure over walls of text.',
+  '- Long replies stream in chunks; prefer concise structure over walls of text.',
 ].join('\n');
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.8.0-rc.52",
+  "version": "0.8.0-rc.54",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -65,6 +65,7 @@ const { redactBotToken } = require('./lib/net-errors');
 const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
 const { createMediaGroupBuffer } = require('./lib/media-group-buffer');
 const { classify: classifyError, isTransientHttpError } = require('./lib/error-classify');
+const { createAutoResumeTracker, isAutoResumable } = require('./lib/auto-resume');
 const {
   createStore: createApprovalsStore,
   matchesAnyPattern: matchesApprovalPattern,
@@ -1118,6 +1119,94 @@ const abortGrace = createAbortGrace();
 function markSessionAborted(sessionKey) { abortGrace.mark(sessionKey); }
 function isSessionRecentlyAborted(sessionKey) { return abortGrace.isRecent(sessionKey); }
+// rc.54: per-session cooldown for auto-resume on 300s no-activity
+// timeout. Without the cooldown, a permanently-wedged tool would
+// trigger an infinite resume → timeout → resume loop.
+const autoResumeTracker = createAutoResumeTracker();
+// rc.54: spawn a fresh Query resuming the same session_id and ask
+// Claude to continue the timed-out work. The killed pm Query has
+// already torn down the wedged subprocess (via pm.kill on timeout);
+// getOrSpawnForChat creates a new entry that picks up the saved
+// session_id from `sessions` table and sets `--resume <id>` on the
+// SDK Options. The continuation message tells Claude what happened
+// and that it has full prior context to keep going.
+//
+// Returns the result.text on success (already-sent to chat); throws
+// on any failure (caller writes auto-resume-failed event + falls
+// back to the standard timeout reply).
+async function attemptAutoResume(sessionKey, chatId, originalMsg, bot) {
+  const threadId = originalMsg.message_thread_id || null;
+  // 1. Tell the user we're auto-resuming so they don't think nothing
+  //    happened. Threaded under the original user message.
+  await tg(bot, 'sendMessage', {
+    chat_id: chatId,
+    text: '🔁 Auto-resuming after timeout — continuing where the previous turn left off.',
+    reply_parameters: { message_id: originalMsg.message_id },
+    ...(threadId && { message_thread_id: threadId }),
+  }, { source: 'auto-resume-indicator', botName: BOT_NAME }).catch((sendErr) => {
+    // Indicator is informational; don't fail the whole resume on it.
+    console.error(`[${sessionKey}] auto-resume indicator send failed: ${sendErr.message}`);
+  });
+  // 2. Continuation prompt. Plain text — no XML wrapper. The SDK
+  //    Query resumes the saved session_id, so Claude has full prior
+  //    transcript context including its own partially-streamed text
+  //    and tool calls. We just need to tell it WHAT happened and
+  //    that it should pick up where it left off.
+  const continuation = '[polygram] Your previous turn timed out at 300s with no Claude activity (likely a wedged tool call — long Bash, hanging MCP, or stuck subagent). Continue from where you left off; do not restart from scratch. If the same operation would just hang again, abort it and tell me.';
+  // 3. No-op streamer + reactor. We don't need to stream the resume
+  //    turn's response (we'll send it as one message at the end). pm
+  //    invokes streamer/reactor methods only when present; passing
+  //    minimal stubs keeps pm happy.
+  const noopStreamer = {
+    onChunk: async () => {},
+    forceNewMessage: () => {},
+    finalize: async () => ({ streamed: false }),
+    flushDraft: async () => {},
+    discard: async () => {},
+  };
+  const noopReactor = {
+    setState: () => {},
+    heartbeat: () => {},
+    clear: async () => {},
+    stop: () => {},
+  };
+  const result = await sendToProcess(sessionKey, continuation, {
+    streamer: noopStreamer,
+    reactor: noopReactor,
+    sourceMsgId: originalMsg.message_id,
+    threadId,
+    onFirstStream: () => {},
+  });
+  if (result?.error) {
+    throw new Error(`auto-resume turn errored: ${String(result.error).slice(0, 200)}`);
+  }
+  if (!result?.text) {
+    throw new Error('auto-resume turn produced no text');
+  }
+  // 4. Send the continuation reply as regular Telegram message(s),
+  //    threaded under the original user message. Reuse the existing
+  //    chunked-delivery + markdown-formatting primitives.
+  const chunks = chunkMarkdownText(result.text, TG_MAX_LEN);
+  await deliverReplies({
+    bot,
+    send: (b, method, params, m) => tg(b, method, params, m),
+    chatId,
+    threadId,
+    chunks,
+    replyToMessageId: originalMsg.message_id,
+    meta: { source: 'auto-resume-reply', botName: BOT_NAME },
+    logger: { error: (m) => console.error(`[${sessionKey}] auto-resume deliver: ${m}`) },
+  });
+  return result.text;
+}
 // Called by bot.on('message') for every regular (non-admin, non-pair)
 // message. Runs handleMessage in a fire-and-forget manner with centralised
 // error handling. Replaces the old processQueue loop.
@@ -1170,6 +1259,49 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
     //    re-dispatch it on next start)
     //  - user just /stop'd (already saw their abort acknowledgement)
     if (!wasAborted && !isReplay && !isShuttingDown) {
+      // rc.54: auto-resume on 300s no-activity timeout. Spawn a fresh
+      // Query resuming the same session_id and inject a continuation
+      // nudge. This recovers from wedged tool calls (long Bash, hung
+      // MCP, stuck subagent) that polygram's watchdog catches but
+      // currently leaves the user stranded with "try resending".
+      // Skipped when the failed turn was ITSELF an auto-resume
+      // (msg._isAutoResume) to prevent recursion; per-session
+      // cooldown blocks tight loops on permanent wedges.
+      const isResumeTurn = msg._isAutoResume === true;
+      const resumable = !isResumeTurn && isAutoResumable({
+        error: err, aborted: wasAborted, replay: isReplay, shuttingDown: isShuttingDown,
+      });
+      if (resumable && !autoResumeTracker.isInCooldown(sessionKey)) {
+        autoResumeTracker.markAttempt(sessionKey);
+        logEvent('auto-resume-attempted', {
+          chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
+          original_error: err.message?.slice(0, 200),
+        });
+        attemptAutoResume(sessionKey, chatId, msg, bot)
+          .then(() => {
+            logEvent('auto-resume-success', {
+              chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
+            });
+            autoResumeTracker.clear(sessionKey);
+          })
+          .catch((resumeErr) => {
+            console.error(`[${sessionKey}] auto-resume failed: ${resumeErr?.message}`);
+            logEvent('auto-resume-failed', {
+              chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
+              error: resumeErr?.message?.slice(0, 200),
+            });
+            // Fall back to the original error reply so the user isn't
+            // left with just the 🔁 indicator and no answer.
+            const fallbackText = errorReplyText(err);
+            if (fallbackText) {
+              tg(bot, 'sendMessage', {
+                chat_id: chatId, text: fallbackText,
+                reply_parameters: { message_id: msg.message_id },
+              }, { source: 'error-reply', botName: BOT_NAME }).catch(() => {});
+            }
+          });
+        return;
+      }
       // 0.7.7: errorReplyText may return null when the classifier
       // says "suppress reply" (e.g. INTERRUPTED inside abort grace —
       // user already saw their /stop ack). Skip the send call in