npm - polygram - Versions diffs - 0.6.15 → 0.7.0 - Mend

polygram 0.6.15 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +1 -1
package/lib/announces.js +70 -0
package/lib/deliver.js +69 -0
package/lib/net-errors.js +52 -3
package/lib/process-manager.js +30 -6
package/lib/sent-cache.js +71 -0
package/lib/stream-reply.js +127 -18
package/lib/telegram-chunk.js +278 -0
package/lib/telegram-format.js +107 -1
package/lib/telegram.js +134 -39
package/package.json +1 -1
package/polygram.js +156 -47

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.6.15",
+  "version": "0.7.0",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
   "keywords": [
     "telegram",

package/README.md CHANGED Viewed

@@ -364,7 +364,7 @@ foreign-chat clicks are rejected. Default-deny on IPC error.
 ## Development
 ```bash
-npm test           # 500 tests, 115 suites, node:test, no external services
+npm test           # 619 tests, 153 suites, node:test, no external services
 npm run coverage   # native test coverage (Node 22+, no devDeps)
 npm start -- --bot my-bot
 npm run split-db -- --config config.json --dry-run

package/lib/announces.js ADDED Viewed

@@ -0,0 +1,70 @@
+/**
+ * Subagent / informational announces — a thin OpenClaw-style helper for
+ * sending small "I'm doing X" messages to a chat without mixing into
+ * the main reply flow.
+ *
+ * Polygram's user-facing surface for "shumabit is working" is the
+ * status reaction on the user's message (👀 → 🤔 → tool icons →
+ * 👍/💥). For tool-heavy turns where the user wants more visibility
+ * — e.g. shumabit delegating to a subagent via Claude Code's Task
+ * tool — an opt-in announce can post a brief informational message
+ * to the chat. Off by default (`config.bots.<bot>.announceSubagents`
+ * or `config.chats.<id>.announceSubagents`), so existing chats see
+ * no behavior change.
+ *
+ * Note: this is the minimal MVP of OpenClaw's full subagent-announce
+ * queue (which has debounce, drop policies, multi-channel routing).
+ * Polygram's "subagents" are in-process (Claude Code Task tool), so
+ * the announce path is just a one-shot informational sendMessage.
+ */
+const SUBAGENT_DEBOUNCE_MS = 30_000;
+/**
+ * Per-chat debounce so a turn that spawns 5 subagents back-to-back
+ * doesn't post 5 announces. Module-scoped Map keyed by chatId →
+ * timestamp of last announce.
+ */
+const lastAnnounceByChat = new Map();
+function shouldAnnounce(chatId, now = Date.now(), debounceMs = SUBAGENT_DEBOUNCE_MS) {
+  const prev = lastAnnounceByChat.get(String(chatId));
+  if (prev != null && now - prev < debounceMs) return false;
+  lastAnnounceByChat.set(String(chatId), now);
+  return true;
+}
+/**
+ * Send a plain-text announce (no markdown processing, no reply linkage).
+ * Caller passes `tg(bot, method, params, meta)` as `send` so we don't
+ * have to import the full lib/telegram.js dependency tree here.
+ */
+async function announce({
+  send,
+  bot,
+  chatId,
+  threadId = null,
+  text,
+  meta = {},
+  logger = console,
+}) {
+  if (!text) return null;
+  const params = {
+    chat_id: chatId,
+    text,
+    ...(threadId != null ? { message_thread_id: threadId } : {}),
+  };
+  try {
+    return await send(bot, 'sendMessage', params, {
+      ...meta,
+      source: meta.source || 'announce',
+      plainText: true,            // skip markdown→HTML
+      linkPreview: false,         // never preview-card for announces
+    });
+  } catch (err) {
+    logger.error?.(`[announce] failed: ${err.message}`);
+    return null;
+  }
+}
+module.exports = { announce, shouldAnnounce, SUBAGENT_DEBOUNCE_MS };

package/lib/deliver.js ADDED Viewed

@@ -0,0 +1,69 @@
+/**
+ * Chunked reply delivery primitive.
+ *
+ * `deliverReplies` is the polygram analog of OpenClaw's `deliverReplies` —
+ * a small loop over already-chunked text that sends each chunk as its own
+ * Telegram message via the `send()` wrapper from lib/telegram.js (which
+ * does write-before-send, HTML→plain fallback, and MESSAGE_NOT_MODIFIED
+ * swallowing).
+ *
+ * Polygram's old code path inlined a `for (chunk of chunkText(rest))` loop
+ * inside polygram.js with an ad-hoc `tg()` call. That worked, but mixed
+ * delivery concerns into the streaming-finalize logic, and made testing
+ * the multi-message path painful. With this primitive, the new
+ * "preview-becomes-final" flow in handleMessage (Phase 5) just calls:
+ *
+ *   await deliverReplies({ bot, chatId, threadId, chunks, replyToMessageId, ... })
+ *
+ * — and gets back `{ sent, failed }` arrays of message_ids per chunk.
+ *
+ * Behavior:
+ *   - Sends `chunks[0]` first with `reply_parameters` (so the answer
+ *     visually anchors to the user's question). Subsequent chunks omit
+ *     `reply_parameters` — chaining replies would clutter the chat.
+ *   - On chunk failure, logs and continues to the next chunk. We'd
+ *     rather deliver partial content than abort the whole reply.
+ *   - Empty input returns `{ sent: [], failed: [] }` immediately.
+ */
+async function deliverReplies({
+  bot,
+  send, // (bot, method, params, meta) → res — usually createSender(db, logger)(...) or tg
+  chatId,
+  threadId = null,
+  chunks,
+  replyToMessageId = null,
+  meta = {},
+  logger = console,
+}) {
+  if (!Array.isArray(chunks) || chunks.length === 0) {
+    return { sent: [], failed: [] };
+  }
+  const sent = [];
+  const failed = [];
+  for (let i = 0; i < chunks.length; i++) {
+    const params = {
+      chat_id: chatId,
+      text: chunks[i],
+    };
+    if (threadId != null) params.message_thread_id = threadId;
+    if (i === 0 && replyToMessageId != null) {
+      // allow_sending_without_reply: long turns give the user time to
+      // delete their original message; without this flag Telegram
+      // rejects with MESSAGE_NOT_FOUND and the whole reply is lost.
+      params.reply_parameters = { message_id: replyToMessageId, allow_sending_without_reply: true };
+    }
+    try {
+      const res = await send(bot, 'sendMessage', params, meta);
+      const msgId = res?.message_id ?? null;
+      sent.push(msgId);
+    } catch (err) {
+      logger.error?.(`[deliver] chunk ${i + 1}/${chunks.length} failed: ${err.message}`);
+      failed.push({ index: i, error: err.message });
+      // Keep going — partial delivery is better than total loss.
+    }
+  }
+  return { sent, failed };
+}
+module.exports = { deliverReplies };

package/lib/net-errors.js CHANGED Viewed

@@ -30,20 +30,55 @@ const PRE_CONNECT_ERROR_CODES = new Set([
 // Transient errors that are recoverable but may have made it partway. DO
 // NOT auto-retry these — the risk of double-delivery outweighs the gain.
 // Surface them to the caller and let humans decide.
+//
+// 0.7.0 added the UND_ERR_* family + ECONNABORTED / ERR_NETWORK to match
+// OpenClaw's set (extensions/telegram/src/network-errors.ts). Node 22+
+// uses undici as its default fetch impl, so these surface in real
+// production traffic — pre-0.7.0 we'd silently misclassify them as
+// non-network errors.
 const RECOVERABLE_ERROR_CODES = new Set([
-  'ETIMEDOUT',    // TCP timeout after connect (message may have landed)
-  'EPIPE',        // write after close — outcome indeterminate
-  'EAGAIN',       // socket would block — reader should retry
+  'ETIMEDOUT',          // TCP timeout after connect (message may have landed)
+  'EPIPE',              // write after close — outcome indeterminate
+  'EAGAIN',             // socket would block — reader should retry
+  'ESOCKETTIMEDOUT',    // socket-level timeout (axios/legacy node)
+  'ECONNABORTED',       // connection aborted by client (timeout-induced)
+  'ERR_NETWORK',        // generic network error code
+  'UND_ERR_CONNECT_TIMEOUT', // undici: connection timeout
+  'UND_ERR_HEADERS_TIMEOUT', // undici: response headers timeout
+  'UND_ERR_BODY_TIMEOUT',    // undici: response body timeout
+  'UND_ERR_SOCKET',          // undici: socket error
+  'UND_ERR_ABORTED',         // undici: request aborted
 ]);
 // Error.name values emitted by undici/node for transient conditions.
+// 0.7.0 added the undici-specific timeout error names; the new fetch
+// impl in Node 22+ surfaces these as `err.name` rather than `err.code`
+// in some shapes.
 const RECOVERABLE_ERROR_NAMES = new Set([
   'AbortError',
   'TimeoutError',
   'FetchError',
   'SocketError',
+  'ConnectTimeoutError',
+  'HeadersTimeoutError',
+  'BodyTimeoutError',
 ]);
+// Message-substring matchers for transient errors. undici sometimes
+// wraps a network failure in a generic "fetch failed" without setting
+// .code or .name — only the message tells us it's a network error.
+//
+// These are matched ONLY when the error doesn't already have a code or
+// name we recognise, to avoid double-counting and to keep the broad
+// matcher from catching unrelated errors that happen to include the
+// substring.
+const RECOVERABLE_MESSAGE_SNIPPETS = [
+  'fetch failed',
+  'undici',
+  'network error',
+  'network request',
+];
 function extractCode(err) {
   if (!err) return null;
   return err.code
@@ -67,6 +102,11 @@ function isSafeToRetry(err) {
   return code != null && PRE_CONNECT_ERROR_CODES.has(code);
 }
+function extractMessage(err) {
+  if (!err) return '';
+  return String(err.message || err.cause?.message || err.description || '').toLowerCase();
+}
 /**
  * Is this a transient network error — recoverable in the sense that the
  * connection may work next time, but NOT safe to auto-retry because the
@@ -80,6 +120,13 @@ function isTransientNetworkError(err) {
   }
   const name = extractName(err);
   if (name && RECOVERABLE_ERROR_NAMES.has(name)) return true;
+  // 0.7.0: only fall through to message-snippet matching when the
+  // error has no recognised code/name — avoids false-positive matches
+  // on unrelated errors that happen to mention "network".
+  const message = extractMessage(err);
+  if (message && RECOVERABLE_MESSAGE_SNIPPETS.some((s) => message.includes(s))) {
+    return true;
+  }
   return false;
 }
@@ -113,9 +160,11 @@ module.exports = {
   PRE_CONNECT_ERROR_CODES,
   RECOVERABLE_ERROR_CODES,
   RECOVERABLE_ERROR_NAMES,
+  RECOVERABLE_MESSAGE_SNIPPETS,
   isSafeToRetry,
   isTransientNetworkError,
   extractCode,
   extractName,
+  extractMessage,
   redactBotToken,
 };

package/lib/process-manager.js CHANGED Viewed

@@ -59,6 +59,7 @@ class ProcessManager {
     onClose = null,       // (sessionKey, code, entry) → void
     onStreamChunk = null, // (sessionKey, partialText, entry) → void — routes to pendingQueue[0]
     onToolUse = null,     // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
+    onAssistantMessageStart = null, // (sessionKey, entry) → void — fires when a NEW top-level assistant message begins (after a previous one ended). Used by polygram.js to call streamer.forceNewMessage() so each assistant message gets its own bubble.
     onRespawn = null,     // (sessionKey, reason, entry) → void — fires after graceful drain-and-kill
   } = {}) {
     if (!spawnFn) throw new Error('spawnFn required');
@@ -72,6 +73,7 @@ class ProcessManager {
     this.onClose = onClose;
     this.onStreamChunk = onStreamChunk;
     this.onToolUse = onToolUse;
+    this.onAssistantMessageStart = onAssistantMessageStart;
     this.onRespawn = onRespawn;
     this.procs = new Map();
   }
@@ -275,12 +277,34 @@ class ProcessManager {
       }
       if (event.type === 'assistant' && head) {
-        if (this.onStreamChunk) {
-          const added = extractAssistantText(event);
-          if (added) {
-            head.streamText = head.streamText
-              ? `${head.streamText}\n\n${added}`
-              : added;
+        // 0.7.0 (Phase F): detect message_id transitions to split bubbles
+        // per top-level assistant message. Each Anthropic stream-json
+        // 'assistant' event carries event.message.id; the same id across
+        // events means cumulative updates to the same message, a new
+        // id means a new message (typically after a tool-result cycle).
+        const messageId = event.message?.id;
+        const added = extractAssistantText(event);
+        if (added) {
+          // Pre-0.7.0 we did `streamText = streamText + '\n\n' + added`,
+          // which DUPLICATED text on every update because `added` is
+          // the cumulative full text-so-far of the current assistant
+          // message (not a delta). 0.7.0 REPLACES instead — the new
+          // text is already cumulative — and uses messageId boundaries
+          // to fire onAssistantMessageStart for each new top-level
+          // assistant message. The streamer responds by force-creating
+          // a fresh bubble, so each assistant message gets its own.
+          const isNewMessage = head.lastAssistantMessageId != null
+            && messageId != null
+            && head.lastAssistantMessageId !== messageId
+            && head.streamText
+            && head.streamText.length > 0;
+          if (isNewMessage && this.onAssistantMessageStart) {
+            try { this.onAssistantMessageStart(sessionKey, entry); }
+            catch (err) { this.logger.error(`[${entry.label}] onAssistantMessageStart: ${err.message}`); }
+          }
+          if (messageId != null) head.lastAssistantMessageId = messageId;
+          head.streamText = added;
+          if (this.onStreamChunk) {
             try { this.onStreamChunk(sessionKey, head.streamText, entry); }
             catch (err) { this.logger.error(`[${entry.label}] onStreamChunk: ${err.message}`); }
           }

package/lib/sent-cache.js ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * In-memory cache of message IDs the bot has sent, per chat.
+ *
+ * Port of OpenClaw's `sent-message-cache` (send-DVX_zY9w.js:1014-1041).
+ * Use case: filter the bot's own messages out of activation logic in
+ * group chats — a bot reply with a URL would otherwise auto-trigger
+ * a self-reply if the chat's activation rule includes "any message
+ * with a URL". Polygram's existing `messages` table can answer the
+ * same question via SQL (`direction = 'out' AND chat_id AND msg_id`),
+ * but the in-memory cache is O(1) for the high-frequency callers
+ * (every inbound message reaction handler).
+ *
+ * 24-hour TTL: Telegram messages older than 48h can't be reacted to
+ * anyway, so 24h is a comfortable working set. Per-chat cleanup runs
+ * lazily when the chat exceeds 100 entries.
+ */
+const TTL_MS = 24 * 60 * 60 * 1000;
+const CLEANUP_THRESHOLD = 100;
+function createSentCache() {
+  // chatKey → Map<msgId, ts>
+  const sentMessages = new Map();
+  function chatKey(chatId) { return String(chatId); }
+  function record(chatId, messageId) {
+    if (chatId == null || messageId == null) return;
+    const key = chatKey(chatId);
+    let entry = sentMessages.get(key);
+    if (!entry) {
+      entry = new Map();
+      sentMessages.set(key, entry);
+    }
+    entry.set(messageId, Date.now());
+    // Lazy GC: when the per-chat map gets crowded, drop expired
+    // entries. Cheap (O(n) over n ≤ 100 + a bit) and amortises to O(1).
+    if (entry.size > CLEANUP_THRESHOLD) {
+      const cutoff = Date.now() - TTL_MS;
+      for (const [id, ts] of entry) if (ts < cutoff) entry.delete(id);
+    }
+  }
+  function wasSent(chatId, messageId) {
+    if (chatId == null || messageId == null) return false;
+    const key = chatKey(chatId);
+    const entry = sentMessages.get(key);
+    if (!entry) return false;
+    const ts = entry.get(messageId);
+    if (ts == null) return false;
+    if (Date.now() - ts > TTL_MS) {
+      entry.delete(messageId);
+      return false;
+    }
+    return true;
+  }
+  function size() {
+    let total = 0;
+    for (const entry of sentMessages.values()) total += entry.size;
+    return total;
+  }
+  function clear() {
+    sentMessages.clear();
+  }
+  return { record, wasSent, size, clear };
+}
+module.exports = { createSentCache, TTL_MS, CLEANUP_THRESHOLD };

package/lib/stream-reply.js CHANGED Viewed

@@ -1,15 +1,24 @@
 /**
  * Live streaming-reply state machine for a single turn.
  *
- * Lifecycle per turn:
+ * Lifecycle (0.7.0):
  *   idle  -> (text >= minChars) -> live
  *   live  -> (subsequent chunks) -> live       (throttled edits)
- *   idle|live -> finalize(finalText) -> done
+ *   live  -> forceNewMessage()    -> idle      (next chunk = new bubble)
+ *   live  -> discard()            -> finalized (bubble deleted)
+ *   any   -> finalize(finalText)  -> finalized
  *
- * The streamer never talks to Telegram directly — callers inject
- * `send(text)` (returns {message_id}) and `edit(msg_id, text)`. That keeps
- * polygram.js in charge of transcript writes, sticker/reaction routing, and
- * error handling; this module is just a cadence machine.
+ * The streamer never talks to Telegram directly — callers inject `send(text)`,
+ * `edit(msg_id, text)`, and (new in 0.7.0) optional `deleteMessage(msg_id)`.
+ * That keeps polygram.js in charge of transcript writes, sticker/reaction
+ * routing, and error handling; this module is just a cadence machine.
+ *
+ * 0.7.0 finalize() returns rich result so the caller can decide whether the
+ * preview's last edit IS the final reply, or whether to discard the preview
+ * and redeliver via deliverReplies (overflow / final edit failed). This is
+ * the OpenClaw pattern: short replies preview-becomes-final (no flicker),
+ * long replies preview-deleted-redelivered (single coherent bubble flow at
+ * chat bottom).
  *
  * Test-friendly: inject `clock` (now() fn) and `schedule` (setTimeout-like)
  * so a fake clock can drive throttle timing deterministically.
@@ -25,6 +34,7 @@ const DEFAULT_THROTTLE_MS = 1000;
 function createStreamer({
   send,                                   // async (text) -> { message_id }
   edit,                                   // async (msg_id, text) -> void
+  deleteMessage = null,                   // async (msg_id) -> void  [optional]
   minChars = DEFAULT_MIN_CHARS,
   throttleMs = DEFAULT_THROTTLE_MS,
   maxLen = 4096,
@@ -41,7 +51,12 @@ function createStreamer({
   let pendingEdit = null;   // timer id
   let flushPromise = null;  // ongoing edit promise (for back-pressure)
-  function truncate(s) {
+  // 0.7.0: this is the LIVE-EDIT truncation, used during streaming
+  // when latestText overshoots maxLen. The trailing "..." signals to
+  // the user that more is coming. At finalize time, we DON'T truncate
+  // — we either edit-to-final-as-is (caller already chunked correctly)
+  // or signal overflow back to the caller.
+  function truncateForLive(s) {
     if (s.length <= maxLen) return s;
     return s.slice(0, maxLen - 3) + '...';
   }
@@ -56,7 +71,7 @@ function createStreamer({
     if (state === 'idle') {
       if (text.length < minChars) return;
       state = 'live';
-      currentText = truncate(text);
+      currentText = truncateForLive(text);
       try {
         const res = await send(currentText);
         msgId = res?.message_id ?? null;
@@ -90,7 +105,7 @@ function createStreamer({
   async function flush() {
     pendingEdit = null;
     if (state !== 'live' || msgId == null) return;
-    const next = truncate(latestText);
+    const next = truncateForLive(latestText);
     if (next === currentText) return;
     lastEditTs = clock();
     currentText = next;
@@ -98,38 +113,132 @@ function createStreamer({
       flushPromise = edit(msgId, currentText);
       await flushPromise;
     } catch (err) {
-      // Non-fatal — maybe 429. Log and keep going; next chunk will retry.
+      // Non-fatal — maybe 429 or transient. Log and keep going; next
+      // chunk will retry. The HTML→plain fallback in lib/telegram.js
+      // already handles the most common cause (parse error from
+      // truncate cutting mid-tag).
       logger.error(`[stream] edit failed: ${err.message}`);
     } finally {
       flushPromise = null;
     }
   }
+  // 0.7.0: explicitly drain any pending edit. Useful when the caller
+  // is about to make a finalize/discard decision and wants the bubble's
+  // visual state to be accurate (no stale half-rendered text under a
+  // pending timer).
+  async function flushDraft() {
+    if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; await flush(); }
+    if (flushPromise) { try { await flushPromise; } catch {} }
+  }
+  // 0.7.0: reset bubble state so the next onChunk creates a NEW message.
+  // Used by the upcoming Phase 7 F (forceNewMessage on assistant-
+  // message-start) — when Claude emits a new top-level assistant message
+  // mid-turn (post tool-result), we want it in its own bubble below
+  // the previous one, not appended via edit.
+  function forceNewMessage() {
+    if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
+    // Don't await flushPromise — the caller has decided to start a new
+    // message; whatever the old bubble shows is "done".
+    msgId = null;
+    currentText = '';
+    latestText = '';
+    state = 'idle';
+    lastEditTs = 0;
+  }
+  // 0.7.0: delete the current bubble via the injected deleteMessage
+  // callback. Used when the final reply overflows the preview's single-
+  // message capacity, so handleMessage will discard the preview and
+  // redeliver via deliverReplies (chunks land at chat bottom).
+  //
+  // Works whether state is 'live' OR 'finalized' — handleMessage's
+  // typical flow is finalize() → finalEditOk false → discard. The
+  // bubble's msgId is preserved through finalize so we can still
+  // delete it. If deleteMessage isn't provided, we just transition
+  // state without touching Telegram — the bubble stays at its last
+  // edited content, becoming a vestigial "head" of the conversation.
+  async function discard() {
+    if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
+    if (flushPromise) { try { await flushPromise; } catch {} }
+    const idToDelete = msgId;
+    state = 'finalized';
+    msgId = null;
+    let deleted = false;
+    if (idToDelete && typeof deleteMessage === 'function') {
+      try {
+        await deleteMessage(idToDelete);
+        deleted = true;
+      } catch (err) {
+        // Telegram rejects deletions of messages older than 48h or
+        // already-deleted ones. Non-fatal — the redelivery happens
+        // either way.
+        logger.warn?.(`[stream] discard deleteMessage failed: ${err.message}`);
+      }
+    }
+    return { msgId: idToDelete, deleted };
+  }
+  // 0.7.0: snapshot for callers that want to track the bubble's id
+  // for later cleanup (e.g. archive a superseded preview when
+  // forceNewMessage was called and the previous bubble should be
+  // deleted at end-of-turn).
+  function archive() {
+    return { msgId, currentText };
+  }
+  // 0.7.0: rich result. `finalEditOk` tells caller whether the preview
+  // can stand as the final reply (true) or needs to be replaced via
+  // discard + deliverReplies (false). `overflow` is the one specific
+  // reason: body wouldn't fit in a single Telegram message.
   async function finalize(finalText, { errorSuffix = null } = {}) {
-    if (state === 'finalized') return { streamed: false, msgId };
+    if (state === 'finalized') return { streamed: false, msgId, finalEditOk: false, overflow: false };
     if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
     if (flushPromise) { try { await flushPromise; } catch {} }
     if (state === 'idle') {
       state = 'finalized';
-      return { streamed: false, msgId: null };
+      return { streamed: false, msgId: null, finalEditOk: false, overflow: false };
     }
-    // live → finalize: one last edit with the full answer.
+    // live → finalize.
     state = 'finalized';
     let body = finalText ?? latestText;
     if (errorSuffix) body = `${body}\n\n⚠️ ${errorSuffix}`;
-    const next = truncate(body);
-    if (next !== currentText) {
-      try { await edit(msgId, next); currentText = next; }
-      catch (err) { logger.error(`[stream] final edit failed: ${err.message}`); }
+    // If body overflows the single-message cap, the caller needs to
+    // discard this bubble and redeliver via chunks. Don't try to edit.
+    if (body.length > maxLen) {
+      return { streamed: true, msgId, finalText: body, finalEditOk: false, overflow: true };
+    }
+    // Body fits. Try one last edit to bring the bubble to the final
+    // text. If that succeeds, preview-IS-final and caller can return
+    // without redelivering. If it fails (e.g. parse error after our
+    // wrapper exhausts its retry, or a 5xx), caller should discard
+    // and redeliver — the bubble's content is unreliable.
+    if (body === currentText) {
+      // Already correct — no edit needed.
+      return { streamed: true, msgId, finalText: body, finalEditOk: true, overflow: false };
+    }
+    try {
+      await edit(msgId, body);
+      currentText = body;
+      return { streamed: true, msgId, finalText: body, finalEditOk: true, overflow: false };
+    } catch (err) {
+      logger.error(`[stream] final edit failed: ${err.message}`);
+      return { streamed: true, msgId, finalText: body, finalEditOk: false, overflow: false };
     }
-    return { streamed: true, msgId, finalText: next };
   }
   return {
     onChunk,
     finalize,
+    flushDraft,
+    forceNewMessage,
+    discard,
+    archive,
     // Introspection for tests:
     get state() { return state; },
     get msgId() { return msgId; },