npm - polygram - Versions diffs - 0.7.0 → 0.7.2 - Mend

polygram 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +1 -1
package/lib/announces.js +91 -11
package/lib/deliver.js +12 -3
package/lib/sent-cache.js +59 -11
package/lib/stream-reply.js +44 -19
package/lib/telegram-chunk.js +29 -19
package/lib/telegram.js +40 -15
package/package.json +1 -1
package/polygram.js +97 -25

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.7.0",
+  "version": "0.7.2",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
   "keywords": [
     "telegram",

package/README.md CHANGED Viewed

@@ -364,7 +364,7 @@ foreign-chat clicks are rejected. Default-deny on IPC error.
 ## Development
 ```bash
-npm test           # 619 tests, 153 suites, node:test, no external services
+npm test           # 643 tests, 158 suites, node:test, no external services
 npm run coverage   # native test coverage (Node 22+, no devDeps)
 npm start -- --bot my-bot
 npm run split-db -- --config config.json --dry-run

package/lib/announces.js CHANGED Viewed

@@ -12,28 +12,102 @@
  * or `config.chats.<id>.announceSubagents`), so existing chats see
  * no behavior change.
  *
- * Note: this is the minimal MVP of OpenClaw's full subagent-announce
- * queue (which has debounce, drop policies, multi-channel routing).
- * Polygram's "subagents" are in-process (Claude Code Task tool), so
- * the announce path is just a one-shot informational sendMessage.
+ * 0.7.1 redesign: factory-based with split read/write predicates
+ * (canAnnounce / markAnnounced) and lazy GC. Pre-0.7.1 had a
+ * module-scoped Map and a mutate-on-check `shouldAnnounce` predicate
+ * — both anti-patterns flagged in design review. The free-function
+ * API (`shouldAnnounce`, `announce`) is preserved for back-compat,
+ * delegating to a default singleton.
  */
 const SUBAGENT_DEBOUNCE_MS = 30_000;
 /**
- * Per-chat debounce so a turn that spawns 5 subagents back-to-back
- * doesn't post 5 announces. Module-scoped Map keyed by chatId →
- * timestamp of last announce.
+ * Per-chat debounce tracker. Returns:
+ *   - canAnnounce(chatId): true if this chat hasn't announced within
+ *     the debounce window. Pure read, NO mutation — safe for
+ *     speculative checks.
+ *   - markAnnounced(chatId): records `now` as the last announce time
+ *     for this chat. Caller invokes after a successful send.
+ *   - sweep(): drops entries older than `2 * debounceMs`. Called lazily
+ *     on every canAnnounce check past a soft threshold.
+ *   - size(): for tests / diagnostics.
+ *   - clear(): for test isolation.
  */
-const lastAnnounceByChat = new Map();
+function createAnnouncer({
+  debounceMs = SUBAGENT_DEBOUNCE_MS,
+  clock = Date.now,
+  sweepThreshold = 1000,
+} = {}) {
+  const lastAnnounceByChat = new Map();
+  function key(chatId) { return String(chatId); }
+  function sweep() {
+    const cutoff = clock() - 2 * debounceMs;
+    for (const [k, ts] of lastAnnounceByChat) {
+      if (ts < cutoff) lastAnnounceByChat.delete(k);
+    }
+  }
+  function canAnnounce(chatId) {
+    if (lastAnnounceByChat.size > sweepThreshold) sweep();
+    const prev = lastAnnounceByChat.get(key(chatId));
+    return prev == null || (clock() - prev) >= debounceMs;
+  }
+  function markAnnounced(chatId) {
+    lastAnnounceByChat.set(key(chatId), clock());
+  }
+  return {
+    canAnnounce, markAnnounced, sweep,
+    get size() { return lastAnnounceByChat.size; },
+    clear() { lastAnnounceByChat.clear(); },
+  };
+}
+// Default per-process state for the back-compat free-function API.
+// Pre-0.7.1, this was the only API. Long-running daemons should still
+// prefer createAnnouncer() for tests / multi-bot isolation, but
+// polygram.js's single-bot-per-process model means the singleton works
+// fine for the production path. The Map is pruned lazily inside
+// shouldAnnounce when it grows past the sweep threshold.
+const _defaultLastAnnouncements = new Map();
+const _DEFAULT_SWEEP_THRESHOLD = 1000;
+/**
+ * Back-compat: pre-0.7.1 callers used `shouldAnnounce(chatId, now?,
+ * debounceMs?)` which is a "predicate that mutates" — call site is
+ * `if (shouldAnnounce(id)) await sendAnnounce()`. The mutation happens
+ * eagerly. Preserved verbatim for callers; new code should use
+ * `createAnnouncer()` and the explicit canAnnounce/markAnnounced split.
+ *
+ * 0.7.1: added lazy sweep so the Map doesn't grow unbounded over a
+ * multi-week-uptime daemon.
+ */
 function shouldAnnounce(chatId, now = Date.now(), debounceMs = SUBAGENT_DEBOUNCE_MS) {
-  const prev = lastAnnounceByChat.get(String(chatId));
+  if (_defaultLastAnnouncements.size > _DEFAULT_SWEEP_THRESHOLD) {
+    const cutoff = now - 2 * debounceMs;
+    for (const [k, ts] of _defaultLastAnnouncements) {
+      if (ts < cutoff) _defaultLastAnnouncements.delete(k);
+    }
+  }
+  const key = String(chatId);
+  const prev = _defaultLastAnnouncements.get(key);
   if (prev != null && now - prev < debounceMs) return false;
-  lastAnnounceByChat.set(String(chatId), now);
+  _defaultLastAnnouncements.set(key, now);
   return true;
 }
+/**
+ * Reset the default singleton state (for tests). Not exported in
+ * production docs.
+ */
+function _resetDefaultAnnouncerForTests() {
+  _defaultLastAnnouncements.clear();
+}
 /**
  * Send a plain-text announce (no markdown processing, no reply linkage).
  * Caller passes `tg(bot, method, params, meta)` as `send` so we don't
@@ -67,4 +141,10 @@ async function announce({
   }
 }
-module.exports = { announce, shouldAnnounce, SUBAGENT_DEBOUNCE_MS };
+module.exports = {
+  announce,
+  shouldAnnounce,
+  createAnnouncer,
+  SUBAGENT_DEBOUNCE_MS,
+  _resetDefaultAnnouncerForTests,
+};

package/lib/deliver.js CHANGED Viewed

@@ -37,8 +37,13 @@ async function deliverReplies({
   logger = console,
 }) {
   if (!Array.isArray(chunks) || chunks.length === 0) {
-    return { sent: [], failed: [] };
+    return { sent: [], failed: [], results: [] };
   }
+  // 0.7.1: results[] preserves correspondence with chunks[] — results[i]
+  // describes what happened to chunks[i]. sent[]/failed[] are projections
+  // for back-compat with callers that already use them; they no longer
+  // ambiguously mean "the i-th success/failure" vs "chunk i's outcome".
+  const results = [];
   const sent = [];
   const failed = [];
   for (let i = 0; i < chunks.length; i++) {
@@ -56,14 +61,18 @@ async function deliverReplies({
     try {
       const res = await send(bot, 'sendMessage', params, meta);
       const msgId = res?.message_id ?? null;
+      results.push({ index: i, status: 'ok', messageId: msgId });
       sent.push(msgId);
     } catch (err) {
       logger.error?.(`[deliver] chunk ${i + 1}/${chunks.length} failed: ${err.message}`);
+      results.push({ index: i, status: 'fail', error: err.message });
       failed.push({ index: i, error: err.message });
-      // Keep going — partial delivery is better than total loss.
+      // Keep going — partial delivery is better than total loss. Caller
+      // should inspect failed.length and surface a warning to the user
+      // (see polygram.js handleMessage's stream-redeliver event log).
     }
   }
-  return { sent, failed };
+  return { sent, failed, results };
 }
 module.exports = { deliverReplies };

package/lib/sent-cache.js CHANGED Viewed

@@ -17,13 +17,54 @@
 const TTL_MS = 24 * 60 * 60 * 1000;
 const CLEANUP_THRESHOLD = 100;
+// 0.7.1: hard cap on per-chat Map size. CLEANUP_THRESHOLD only drops
+// EXPIRED entries — if a busy chat sends >100 fresh messages within
+// 24h, GC finds nothing to drop and the inner Map grows unbounded.
+// Cap evicts oldest entries past this point regardless of TTL.
+const MAX_PER_CHAT = 500;
+// 0.7.1: outer Map sweep — drop chats whose inner Map has been empty
+// long enough that we're sure no live caller still references it.
+const OUTER_SWEEP_THRESHOLD = 1000;
-function createSentCache() {
+function createSentCache({
+  ttlMs = TTL_MS,
+  cleanupThreshold = CLEANUP_THRESHOLD,
+  maxPerChat = MAX_PER_CHAT,
+  outerSweepThreshold = OUTER_SWEEP_THRESHOLD,
+  clock = Date.now,
+} = {}) {
   // chatKey → Map<msgId, ts>
   const sentMessages = new Map();
   function chatKey(chatId) { return String(chatId); }
+  function gcInner(entry) {
+    const cutoff = clock() - ttlMs;
+    for (const [id, ts] of entry) if (ts < cutoff) entry.delete(id);
+    // After TTL prune, if still over the hard cap, drop oldest entries
+    // (insertion order in Map iteration). This handles the busy-chat
+    // case where 1000 messages all sent within 24h would otherwise
+    // leak.
+    if (entry.size > maxPerChat) {
+      const dropCount = entry.size - maxPerChat;
+      let i = 0;
+      for (const id of entry.keys()) {
+        if (i >= dropCount) break;
+        entry.delete(id);
+        i += 1;
+      }
+    }
+  }
+  function gcOuter() {
+    // Drop chat entries that are entirely empty (their inner Map was
+    // drained by gcInner). Without this the outer Map's chatId set
+    // grows by one per ever-active-then-idle chat, slowly leaking.
+    for (const [k, entry] of sentMessages) {
+      if (entry.size === 0) sentMessages.delete(k);
+    }
+  }
   function record(chatId, messageId) {
     if (chatId == null || messageId == null) return;
     const key = chatKey(chatId);
@@ -32,13 +73,10 @@ function createSentCache() {
       entry = new Map();
       sentMessages.set(key, entry);
     }
-    entry.set(messageId, Date.now());
-    // Lazy GC: when the per-chat map gets crowded, drop expired
-    // entries. Cheap (O(n) over n ≤ 100 + a bit) and amortises to O(1).
-    if (entry.size > CLEANUP_THRESHOLD) {
-      const cutoff = Date.now() - TTL_MS;
-      for (const [id, ts] of entry) if (ts < cutoff) entry.delete(id);
-    }
+    entry.set(messageId, clock());
+    if (entry.size > cleanupThreshold) gcInner(entry);
+    // Periodic outer sweep — runs only when the outer Map gets crowded.
+    if (sentMessages.size > outerSweepThreshold) gcOuter();
   }
   function wasSent(chatId, messageId) {
@@ -48,8 +86,10 @@ function createSentCache() {
     if (!entry) return false;
     const ts = entry.get(messageId);
     if (ts == null) return false;
-    if (Date.now() - ts > TTL_MS) {
+    if (clock() - ts > ttlMs) {
       entry.delete(messageId);
+      // If we just emptied the inner Map, drop the outer entry too.
+      if (entry.size === 0) sentMessages.delete(key);
       return false;
     }
     return true;
@@ -61,11 +101,19 @@ function createSentCache() {
     return total;
   }
+  function chatCount() { return sentMessages.size; }
   function clear() {
     sentMessages.clear();
   }
-  return { record, wasSent, size, clear };
+  return { record, wasSent, size, chatCount, clear };
 }
-module.exports = { createSentCache, TTL_MS, CLEANUP_THRESHOLD };
+module.exports = {
+  createSentCache,
+  TTL_MS,
+  CLEANUP_THRESHOLD,
+  MAX_PER_CHAT,
+  OUTER_SWEEP_THRESHOLD,
+};

package/lib/stream-reply.js CHANGED Viewed

@@ -1,24 +1,31 @@
 /**
  * Live streaming-reply state machine for a single turn.
  *
- * Lifecycle (0.7.0):
+ * Lifecycle:
  *   idle  -> (text >= minChars) -> live
  *   live  -> (subsequent chunks) -> live       (throttled edits)
+ *   live  -> flushDraft()         -> live      (drains pending edit)
  *   live  -> forceNewMessage()    -> idle      (next chunk = new bubble)
  *   live  -> discard()            -> finalized (bubble deleted)
  *   any   -> finalize(finalText)  -> finalized
  *
- * The streamer never talks to Telegram directly — callers inject `send(text)`,
- * `edit(msg_id, text)`, and (new in 0.7.0) optional `deleteMessage(msg_id)`.
+ * The streamer never talks to Telegram directly — callers inject
+ * `send(text)`, `edit(msg_id, text)`, and (optional) `deleteMessage(msg_id)`.
  * That keeps polygram.js in charge of transcript writes, sticker/reaction
  * routing, and error handling; this module is just a cadence machine.
  *
- * 0.7.0 finalize() returns rich result so the caller can decide whether the
- * preview's last edit IS the final reply, or whether to discard the preview
- * and redeliver via deliverReplies (overflow / final edit failed). This is
- * the OpenClaw pattern: short replies preview-becomes-final (no flicker),
- * long replies preview-deleted-redelivered (single coherent bubble flow at
- * chat bottom).
+ * `finalize()` returns a rich result so the caller can decide whether the
+ * preview's last edit IS the final reply, or whether to discard the
+ * preview and redeliver via deliverReplies (overflow / final edit failed):
+ *
+ *   { kind: implicit, see flags below }
+ *   { streamed: false }                                  — never went live
+ *   { streamed: true, finalEditOk: true }                — preview = final
+ *   { streamed: true, finalEditOk: false, overflow: true } — body too long
+ *   { streamed: true, finalEditOk: false, overflow: false } — edit failed
+ *
+ * Short replies preview-becomes-final (no flicker, single bubble); long
+ * replies preview-deleted-redelivered (chunks land at chat bottom).
  *
  * Test-friendly: inject `clock` (now() fn) and `schedule` (setTimeout-like)
  * so a fake clock can drive throttle timing deterministically.
@@ -50,12 +57,19 @@ function createStreamer({
   let lastEditTs = 0;
   let pendingEdit = null;   // timer id
   let flushPromise = null;  // ongoing edit promise (for back-pressure)
+  // 0.7.2: msg_ids of bubbles that have been superseded by
+  // forceNewMessage(). The caller (polygram.js handleMessage at
+  // end-of-turn) reads getArchived() and issues deleteMessage on
+  // each — matches OpenClaw's archivedAnswerPreviews cleanup so
+  // the user sees only the final answer's bubble, not every
+  // "thinking out loud" intermediate from a tool-heavy turn.
+  const archived = [];
-  // 0.7.0: this is the LIVE-EDIT truncation, used during streaming
-  // when latestText overshoots maxLen. The trailing "..." signals to
-  // the user that more is coming. At finalize time, we DON'T truncate
-  // — we either edit-to-final-as-is (caller already chunked correctly)
-  // or signal overflow back to the caller.
+  // LIVE-EDIT truncation only — used during streaming when latestText
+  // overshoots maxLen. The trailing "..." signals to the user that more
+  // is coming. Finalize doesn't truncate: overflow is handled by
+  // signalling the caller to discard-and-redeliver via chunkMarkdownText,
+  // which preserves all content without any byte-cut.
   function truncateForLive(s) {
     if (s.length <= maxLen) return s;
     return s.slice(0, maxLen - 3) + '...';
@@ -132,15 +146,20 @@ function createStreamer({
     if (flushPromise) { try { await flushPromise; } catch {} }
   }
-  // 0.7.0: reset bubble state so the next onChunk creates a NEW message.
-  // Used by the upcoming Phase 7 F (forceNewMessage on assistant-
-  // message-start) — when Claude emits a new top-level assistant message
-  // mid-turn (post tool-result), we want it in its own bubble below
-  // the previous one, not appended via edit.
+  // Reset bubble state so the next onChunk creates a NEW message.
+  // Used by `onAssistantMessageStart` in process-manager.js when Claude
+  // emits a new top-level assistant message mid-turn (post tool-result):
+  // we want it in its own bubble below the previous one, not appended
+  // via editMessageText to the original.
   function forceNewMessage() {
     if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
     // Don't await flushPromise — the caller has decided to start a new
     // message; whatever the old bubble shows is "done".
+    // 0.7.2: track the previous bubble's msgId for end-of-turn cleanup.
+    // Without this, every intermediate "thinking out loud" assistant
+    // message in a tool-heavy turn leaves a permanent bubble in the
+    // chat — the user wants only the final answer's bubble visible.
+    if (msgId != null) archived.push(msgId);
     msgId = null;
     currentText = '';
     latestText = '';
@@ -232,6 +251,11 @@ function createStreamer({
     }
   }
+  // 0.7.2: snapshot of bubble msgIds that forceNewMessage() superseded.
+  // Returns a copy so callers can't mutate internal state. polygram.js
+  // reads this at end-of-turn and issues deleteMessage on each.
+  function getArchived() { return archived.slice(); }
   return {
     onChunk,
     finalize,
@@ -239,6 +263,7 @@ function createStreamer({
     forceNewMessage,
     discard,
     archive,
+    getArchived,
     // Introspection for tests:
     get state() { return state; },
     get msgId() { return msgId; },

package/lib/telegram-chunk.js CHANGED Viewed

@@ -1,24 +1,26 @@
 /**
  * Markdown-aware chunking for Telegram-bound replies.
  *
- * Direct port of OpenClaw's chunkMarkdownText (`extensions/telegram` uses
- * `chunkerMode: 'markdown'`). The naive byte-cut chunker we shipped pre-0.7.0
- * landed boundaries mid-word and mid-HTML-tag, which Telegram's parse_mode=HTML
- * rejected with `400 can't parse entities` — bubbles froze and content got
- * dropped (see msg 10794 incident). This algorithm guarantees:
+ * Direct port of OpenClaw's chunkMarkdownText. The naive byte-cut
+ * chunker we shipped before this would land boundaries mid-word and
+ * mid-HTML-tag, which Telegram's parse_mode=HTML rejected with
+ * `400 can't parse entities` — bubbles froze and content got dropped.
+ *
+ * Guarantees:
  *
  *   1. No chunk exceeds `limit`.
  *   2. Breaks prefer newlines over whitespace over hard-cut.
- *   3. Code fences (```...```) are never broken silently — if a chunk would
- *      land inside a fence, we close it on chunk N and re-open with the same
- *      marker + language on chunk N+1, so each chunk is independently
- *      parseable.
- *   4. Parenthesised expressions `(...)` aren't broken at whitespace inside
- *      the parens (avoids splitting `[markdown link](http://example.com/...)`).
+ *   3. Code fences (```...```) are never broken silently — if a chunk
+ *      would land inside a fence, we close it on chunk N and re-open
+ *      with the same marker + language on chunk N+1, so each chunk is
+ *      independently parseable.
+ *   4. Parenthesised expressions `(...)` aren't broken at whitespace
+ *      inside the parens (avoids splitting markdown-link syntax like
+ *      `[label](http://example.com/...)`).
  *
- * Plain `chunkText` (no fence handling) is exported for callers that already
- * know the input has no markdown — primarily code paths handling raw user
- * input echoes or non-text payloads.
+ * Plain `chunkText` (no fence handling) is exported for callers that
+ * already know the input has no markdown — primarily code paths
+ * handling raw user input echoes or non-text payloads.
  */
 // ─── Code-fence span detection ──────────────────────────────────────
@@ -105,11 +107,19 @@ function scanParenAwareBreakpoints(window, isAllowed = () => true) {
 // ─── Chunkers ────────────────────────────────────────────────────────
-// Common early-out: empty / non-positive limit / fits-in-one returns
-// directly so the loop bodies can assume there's real work to do.
+// Common early-out: empty / fits-in-one returns directly so the loop
+// bodies can assume there's real work to do. `limit ≤ 0` is treated as
+// a programmer error and throws — silently returning [text] would let
+// a misread config pass through a body that exceeds Telegram's actual
+// 4096-char cap, which the chunker exists to prevent.
 function resolveChunkEarlyReturn(text, limit) {
-  if (!text) return [];
-  if (limit <= 0) return [text];
+  if (typeof limit !== 'number' || !Number.isFinite(limit) || limit <= 0) {
+    throw new RangeError(`chunk limit must be a positive number; got ${limit}`);
+  }
+  if (text == null || text === '') return [];
+  if (typeof text !== 'string') {
+    throw new TypeError(`chunk text must be a string; got ${typeof text}`);
+  }
   if (text.length <= limit) return [text];
   return undefined;
 }
@@ -119,7 +129,7 @@ function resolveChunkEarlyReturn(text, limit) {
 // Negative / out-of-range break indices fall back to hard-cut at limit.
 function chunkTextByBreakResolver(text, limit, resolveBreakIndex) {
   if (!text) return [];
-  if (limit <= 0 || text.length <= limit) return [text];
+  if (text.length <= limit) return [text];
   const chunks = [];
   let remaining = text;
   while (remaining.length > limit) {

package/lib/telegram.js CHANGED Viewed

@@ -173,20 +173,32 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
   let res;
   const rawAttempt = async (p) => bot.api.raw[method](p);
-  // safeAttempt wraps every API call with three OpenClaw-style fallbacks:
-  //   1. MESSAGE_NOT_MODIFIED on editMessageText → swallow as success.
-  //      The streamer's debounced edit can land on text that exactly
-  //      matches the bubble's current state (no-op edit). Telegram
-  //      returns 400; we treat it as success and skip the noise.
-  //   2. HTML parse error (`can't parse entities` etc) → retry the
-  //      same call as plain text, no parse_mode, original raw value
-  //      restored to the formatted field. Saves the call when our
-  //      markdown→HTML conversion produces malformed output (the
-  //      msg-10794 case: streamer truncate cut mid `**bold**` marker).
-  //   3. 429 rate limit → sleep retry_after seconds, retry once.
-  //      Telegram's per-bot limit is ~30 req/s; high-effort xhigh turns
-  //      with many parallel sessions can occasionally hit it. Honor
-  //      Telegram's hint rather than bombing the call.
+  // OpenClaw-style fallback layers, composing outermost-to-innermost:
+  //
+  //   withThreadFallback (outer)  — strips message_thread_id and
+  //                                 retries on TOPIC_DELETED / "thread
+  //                                 not found"
+  //   withPreConnectRetry         — single retry on transient pre-
+  //                                 connect errors (DNS flap, TCP
+  //                                 refused, ENETUNREACH); never
+  //                                 retries post-connect errors that
+  //                                 might have landed
+  //   safeAttempt                 — sleeps `retry_after` and retries
+  //                                 once on 429
+  //   tryOnce (innermost)         — handles two per-call recoveries:
+  //                                 (a) MESSAGE_NOT_MODIFIED on
+  //                                 editMessageText → synthetic
+  //                                 success (streamer debounce often
+  //                                 lands on no-op edits, Telegram
+  //                                 returns 400 we don't want to
+  //                                 propagate); (b) HTML parse error
+  //                                 → retry as plain text with the
+  //                                 raw pre-conversion field value
+  //                                 restored
+  //   rawAttempt                  — bot.api.raw[method](params)
+  //
+  // Each layer is a closure built per call; allocation cost is
+  // negligible vs. network RTT.
   const RETRY_AFTER_CAP_MS = 60_000;
   const tryOnce = async (p) => {
     try {
@@ -209,7 +221,20 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
         const plainParams = { ...p };
         delete plainParams.parse_mode;
         plainParams[formatField] = rawFieldValue;
-        return await rawAttempt(plainParams);
+        try {
+          return await rawAttempt(plainParams);
+        } catch (plainErr) {
+          // 0.7.1: if the plain retry also fails, preserve BOTH errors
+          // in the message that propagates. Pre-fix, only `plainErr`
+          // bubbled up — operators investigating from markOutboundFailed
+          // saw e.g. "Forbidden: bot was kicked" and missed that the
+          // ORIGINAL failure was a markdown→HTML parse bug.
+          const origMsg = redactBotToken(err.message)?.slice(0, 200);
+          const wrapped = new Error(`plain-retry failed (after HTML parse error: ${origMsg}): ${plainErr.message}`);
+          if (plainErr.code) wrapped.code = plainErr.code;
+          if (plainErr.parameters) wrapped.parameters = plainErr.parameters;
+          throw wrapped;
+        }
       }
       throw err;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.7.0",
+  "version": "0.7.2",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -1664,21 +1664,34 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     ...(linkPreview === false ? { linkPreview: false } : {}),
   };
+  // 0.7.2: only the FIRST bubble in a turn quotes the user's message
+  // via reply_parameters. When a tool-heavy turn produces multiple
+  // assistant messages (each spawning its own bubble via
+  // forceNewMessage), subsequent bubbles shouldn't re-quote the user
+  // — the chat would show N copies of the same quoted message stacked
+  // vertically. After the first send, the flag flips and subsequent
+  // initial-sends omit reply_parameters.
+  let firstBubbleSent = false;
   // Streaming is unconditional as of 0.4.0 — matches OpenClaw's model and
   // eliminates the "stuck at 15min typing" complaint from the non-streaming
   // code path. For short responses the streamer stays idle and we fall
   // through to the normal send path via finalize() returning streamed=false.
   const streamer = createStreamer({
-    send: async (text) => tg(bot, 'sendMessage', {
-      chat_id: chatId, text,
-      // allow_sending_without_reply: long-running turns give the user
-      // plenty of time to delete their original message. Without this
-      // flag, Telegram rejects the reply with MESSAGE_NOT_FOUND and the
-      // whole streamed answer is lost. With it, the reply simply lands
-      // as a standalone message.
-      reply_parameters: { message_id: msg.message_id, allow_sending_without_reply: true },
-      ...(threadId && { message_thread_id: threadId }),
-    }, outMetaBase),
+    send: async (text) => {
+      const params = {
+        chat_id: chatId, text,
+        ...(threadId && { message_thread_id: threadId }),
+      };
+      if (!firstBubbleSent) {
+        // allow_sending_without_reply: long-running turns give the user
+        // plenty of time to delete their original message. Without this
+        // flag, Telegram rejects the reply with MESSAGE_NOT_FOUND and the
+        // whole streamed answer is lost.
+        params.reply_parameters = { message_id: msg.message_id, allow_sending_without_reply: true };
+        firstBubbleSent = true;
+      }
+      return tg(bot, 'sendMessage', params, outMetaBase);
+    },
     edit: async (messageId, text) => {
       try {
         // Route edits through tg() so applyFormatting runs (MarkdownV2
@@ -1725,6 +1738,34 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
   });
   // streamer is registered with this turn via pm.send's context (below)
+  // 0.7.2: clean up bubbles superseded by forceNewMessage() — the
+  // intermediate "thinking out loud" assistant messages that fired in
+  // a tool-heavy turn. Without this, every tool-result cycle leaves a
+  // permanent bubble in the chat (see the screenshot from the post-
+  // 0.7.1 deploy where six bubbles appeared for one logical turn).
+  // Matches OpenClaw's archivedAnswerPreviews end-of-turn cleanup.
+  // Call AFTER finalize/discard decisions so we never delete the
+  // bubble that's the final reply.
+  async function cleanupArchivedBubbles() {
+    const archived = streamer.getArchived?.() || [];
+    if (archived.length === 0) return;
+    for (const messageId of archived) {
+      try {
+        await tg(bot, 'deleteMessage', {
+          chat_id: chatId, message_id: messageId,
+        }, { source: 'bot-reply-archived-cleanup', botName: BOT_NAME });
+      } catch (err) {
+        // Non-fatal — message may be >48h old or already gone.
+        // Operator-visible only via the events table.
+        console.error(`[${label}] archived-cleanup ${messageId}: ${err.message}`);
+      }
+    }
+    logEvent('telegram-archived-cleanup', {
+      chat_id: chatId, msg_id: msg.message_id, count: archived.length,
+      bot: BOT_NAME,
+    });
+  }
   // Status reactions on the user's message: 👀 queued → 🤔 thinking →
   // 👨‍💻 coding / ⚡ web / 🔥 tool → 👍 done / 🤯 error. Silent (no
   // notifications), updates in place, one emoji per message. Uses
@@ -1748,12 +1789,13 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
   // at which point we flip to THINKING (🤔).
   reactor.setState('QUEUED');
-  // Mark the inbound row terminal so boot replay doesn't pick it up again.
-  // Must fire down EVERY non-throwing exit path (early returns for error/
-  // NO_REPLY, streamed-reply early return, regular reply at end). 0.5.4
-  // hardened this — earlier versions only marked at the bottom of try, so
-  // streamed replies (which return at line ~1477) left handler_status
-  // stuck at 'dispatched' forever, causing replay loops on every restart.
+  // Mark the inbound row terminal so boot replay doesn't pick it up
+  // again. Must fire down EVERY non-throwing exit path (early returns
+  // for error / NO_REPLY, streamed-reply preview-becomes-final, the
+  // discard+redeliver branch, regular reply at end). Earlier versions
+  // only marked at the bottom of try, so streamed-reply early returns
+  // left handler_status stuck at 'dispatched' forever and the next
+  // boot replayed every long turn.
   const markReplied = () => dbWrite(() => db.setInboundHandlerStatus({
     chat_id: chatId, msg_id: msg.message_id, status: 'replied',
   }), 'set handler_status=replied');
@@ -1800,6 +1842,13 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     // those still markReplied silently.
     if (result.text === 'NO_REPLY') { markReplied(); return; }
     if (!result.text) {
+      // 0.7.1: if the fallback send itself fails, throw rather than
+      // silently markReplied — the user gets nothing AND the inbound
+      // is marked replied so boot replay won't redispatch. Same
+      // anti-pattern that caused msg-10794. Promote to a thrown error
+      // so dispatchHandleMessage's catch branches correctly:
+      //   shutdown   → 'replay-pending' (boot replay retries)
+      //   runtime    → 'failed' + user-visible apology via errorReplyText
       try {
         await tg(bot, 'sendMessage', {
           chat_id: chatId,
@@ -1808,7 +1857,12 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
           reply_parameters: { message_id: msg.message_id, allow_sending_without_reply: true },
         }, { ...outMetaBase, source: 'empty-response-fallback' });
       } catch (err) {
-        console.error(`[${label}] empty-response fallback send failed: ${err.message}`);
+        reactor.setState('ERROR');
+        logEvent('telegram-empty-response-fallback-failed', {
+          chat_id: chatId, msg_id: msg.message_id, bot: BOT_NAME,
+          error: err.message?.slice(0, 200),
+        });
+        throw new Error(`empty-response fallback send failed: ${err.message}`);
       }
       logEvent('telegram-empty-response-fallback', {
         chat_id: chatId, msg_id: msg.message_id, bot: BOT_NAME,
@@ -1820,7 +1874,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     const parsed = parseResponse(result.text);
     const outMeta = { ...outMetaBase, sessionId: result.sessionId, costUsd: result.cost };
-    // 0.7.0 streamed text path: OpenClaw's preview-becomes-final flow.
+    // OpenClaw's preview-becomes-final flow:
     //
     //   1. flushDraft() — drain any pending throttled edit so the
     //      bubble's visible state is up-to-date before deciding.
@@ -1828,12 +1882,11 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     //      final body. Returns rich result describing whether the
     //      preview can stand as the final reply.
     //   3a. finalEditOk:true        → preview IS final, done.
-    //   3b. overflow OR !finalEditOk → discard preview, redeliver via
-    //      deliverReplies(chunkMarkdownText(...)). This is the path
-    //      that fixes msg-10794: if the live bubble couldn't render
-    //      the full body (size or parse error), we delete it cleanly
-    //      and send the proper chunks fresh at chat bottom — no
-    //      content lost, no stranded edit-failure bubble.
+    //   3b. overflow OR !finalEditOk → discard preview, redeliver
+    //      via deliverReplies(chunkMarkdownText(...)). The bubble
+    //      couldn't render the full body (size or parse error), so
+    //      we delete it cleanly and send the proper chunks fresh at
+    //      chat bottom — no content lost, no stranded bubble.
     if (parsed.text) {
       await streamer.flushDraft();
       const fin = await streamer.finalize(parsed.text);
@@ -1841,6 +1894,7 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
         if (fin.finalEditOk) {
           // Preview was successfully edited to the final text.
           // No follow-up messages needed.
+          await cleanupArchivedBubbles();
           console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | streamed | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
           markReplied();
           return;
@@ -1868,7 +1922,25 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
           delivered: r.sent.length, failed: r.failed.length,
           bot: BOT_NAME,
         });
-        console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | streamed-redeliver(${reason}, ${chunks.length} chunks) | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
+        // 0.7.1: surface partial-failure to the user. Without this,
+        // a chunk-3-of-5 failure leaves a coherent-looking reply with
+        // a silent gap (the user reads chunks 1, 2, 4, 5 unaware
+        // that chunk 3 was dropped). Append a warning + flip the
+        // reactor to ERROR so something visible signals "look here".
+        if (r.failed.length > 0) {
+          reactor.setState('ERROR');
+          try {
+            await tg(bot, 'sendMessage', {
+              chat_id: chatId,
+              text: `⚠️ ${r.failed.length} of ${chunks.length} message parts failed to deliver. The reply may be incomplete — please retry.`,
+              ...(threadId && { message_thread_id: threadId }),
+            }, { ...outMetaBase, source: 'partial-delivery-warning' });
+          } catch (warnErr) {
+            console.error(`[${label}] partial-delivery warning failed: ${warnErr.message}`);
+          }
+        }
+        await cleanupArchivedBubbles();
+        console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | streamed-redeliver(${reason}, ${chunks.length} chunks${r.failed.length ? `, ${r.failed.length} failed` : ''}) | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
         markReplied();
         return;
       }