npm - switchroom - Versions diffs - 0.13.12 → 0.13.14 - Mend

switchroom 0.13.12 → 0.13.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cli/switchroom.js +60 -5
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +290 -88
package/telegram-plugin/final-answer-detect.ts +83 -0
package/telegram-plugin/gateway/gateway.ts +213 -11
package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +17 -5
package/telegram-plugin/pending-work-progress.ts +377 -0
package/telegram-plugin/runtime-metrics.ts +20 -0
package/telegram-plugin/silent-end.ts +37 -11
package/telegram-plugin/tests/final-answer-detect.test.ts +89 -0
package/telegram-plugin/tests/pending-work-progress.test.ts +354 -0
package/telegram-plugin/tests/silent-end.test.ts +118 -0
package/telegram-plugin/uat/scenarios/cross-turn-pending-progress-dm.test.ts +237 -0

package/telegram-plugin/final-answer-detect.ts ADDED Viewed

@@ -0,0 +1,83 @@
+/**
+ * final-answer-detect.ts — #1664 "did this reply deliver the final answer?"
+ *
+ * Background. An agent often ends a turn with its real answer as plain
+ * assistant transcript text instead of a `reply` / `stream_reply` tool
+ * call. The gateway renders that transcript as a live Telegram draft
+ * (`sendMessageDraft`) and, at turn_end, retracts the draft — so the
+ * answer is never finalized and the user watches it vanish (#1664).
+ *
+ * The gateway's `replyCalled` flag flips on the FIRST reply / stream_reply
+ * tool use and stays true for the rest of the turn. It cannot distinguish
+ * "the model sent an interim ack" from "the model sent its real answer" —
+ * both set `replyCalled`. The silent-end re-prompt safety net needs a
+ * finer signal: it must engage when a turn ended with only an interim
+ * ack and the real answer left as transcript text.
+ *
+ * This module is that finer signal — a pure predicate the gateway calls
+ * for each reply that lands. A turn whose every reply was classified
+ * "interim" ends with `CurrentTurn.finalAnswerDelivered === false`, which
+ * triggers the re-prompt; a turn with at least one "final" reply does not.
+ *
+ * Keeping the policy in one unit-testable function is the point — the
+ * gateway is a multi-thousand-line module that's expensive to import in a
+ * test. See `telegram-plugin/tests/final-answer-detect.test.ts`.
+ *
+ * The fix re-prompts the model; it never materializes the draft into a
+ * message (`reference/principles.md`: the model communicates, the
+ * framework is the safety net). So a false "interim" classification is
+ * cheap (one extra re-prompt) and a false "final" classification is the
+ * dangerous one (a real answer left undelivered) — the length backstop
+ * exists to make the dangerous miss rare.
+ */
+/**
+ * Length backstop for the final-answer classification. The pacing
+ * contract (`docs/telegram-style.md`) says interim updates pass
+ * `disable_notification: true` and the final answer omits it — so a
+ * notification-bearing reply is the primary "final answer" signal. But a
+ * model that mis-marks a genuinely substantive reply as interim
+ * (`disable_notification: true` on what is really the answer) would
+ * otherwise leave the turn looking undelivered. Any reply at or above
+ * this many characters therefore ALSO counts as the final answer,
+ * regardless of the notification flag. 200 chars is comfortably longer
+ * than a typical interim ack ("on it", "looking into that…", "give me a
+ * sec") and short enough that a real answer almost always clears it.
+ */
+export const FINAL_ANSWER_MIN_CHARS = 200
+export interface FinalAnswerReplyInput {
+  /** The reply text the model sent (the model's own answer text, before
+   * any HTML conversion or Telegraph-link substitution). */
+  text: string
+  /** The `disable_notification` argument the reply tool was called with.
+   * `true` is the pacing contract's "interim update" marker; the final
+   * answer omits it (effectively `false`). */
+  disableNotification: boolean
+  /** For `stream_reply` only: whether this call carried `done: true`. A
+   * `done: true` call explicitly closes the stream and IS the final
+   * answer by definition. Pass `false` for the plain `reply` tool. */
+  done?: boolean
+}
+/**
+ * Pure predicate: did this reply deliver the turn's final answer (as
+ * opposed to an interim ack)? `true` if ANY of:
+ *
+ *   - `done === true` — a `stream_reply` terminal call; the model
+ *     explicitly closed the stream, so this is the final answer.
+ *   - `disableNotification === false` — the pacing contract's explicit
+ *     "final answer" signal (interim updates set it `true`).
+ *   - `text.length >= FINAL_ANSWER_MIN_CHARS` — the length backstop for
+ *     a substantive answer mis-marked as interim.
+ *
+ * The gateway ORs this across every reply in a turn; once one reply
+ * qualifies, `CurrentTurn.finalAnswerDelivered` latches true and the
+ * silent-end re-prompt will not engage for that turn.
+ */
+export function isFinalAnswerReply(input: FinalAnswerReplyInput): boolean {
+  if (input.done === true) return true
+  if (!input.disableNotification) return true
+  if (input.text.length >= FINAL_ANSWER_MIN_CHARS) return true
+  return false
+}

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -76,7 +76,9 @@ import {
 import { emitRuntimeMetric } from '../runtime-metrics.js'
 import { classifyInbound } from '../inbound-classifier.js'
 import * as silencePoke from '../silence-poke.js'
-import { writeSilentEndState, clearSilentEndState, recordSilentTurnEnd } from '../silent-end.js'
+import * as pendingProgress from '../pending-work-progress.js'
+import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
+import { isFinalAnswerReply } from '../final-answer-detect.js'
 import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
 import { type SessionEvent } from '../session-tail.js'
 import {
@@ -1191,6 +1193,19 @@ type CurrentTurn = {
   startedAt: number
   gatewayReceiveAt: number
   replyCalled: boolean
+  // #1664 — whether the model has delivered its *final answer* this turn
+  // (as opposed to only an interim ack). `replyCalled` flips on the first
+  // reply / stream_reply tool_use and stays true for the rest of the turn,
+  // so it cannot tell "ack only" from "ack + real answer". This flag is the
+  // finer signal the silent-end re-prompt needs: it is set only when a reply
+  // actually lands AND `isFinalAnswerReply` (final-answer-detect.ts)
+  // classifies it as the final answer — notification-bearing, or long
+  // enough to be substantive, or a stream_reply done=true — OR when the
+  // turn-flush safety net legitimately emits the model's terminal text. A
+  // turn that ends with this still `false` triggers the silent-end re-prompt
+  // even though `replyCalled` is true — the #1664 case where the real answer
+  // ended up as plain transcript text rendered into an ephemeral draft.
+  finalAnswerDelivered: boolean
   capturedText: string[]
   orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
   registryKey: string | null
@@ -3135,6 +3150,7 @@ silencePoke.startTimer({
     // Drop silence-poke state and clear turn-active so the next inbound
     // for this chat starts a fresh turn instead of queueing forever.
     silencePoke.endTurn(fbKey)
+    pendingProgress.noteTurnEnd(fbKey)
     purgeReactionTracking(fbKey)
     // Defense-in-depth: the fallback's purgeReactionTracking above
     // clears the canonical statusKey(chatId, threadId) for fbKey
@@ -3192,6 +3208,34 @@ silencePoke.startTimer({
   },
 })
+// #1445 cross-turn pending-async ambient. When a turn ends after the
+// model dispatched background async work (Agent / Task / Bash run-in-
+// background) and the model has stopped speaking, keep editing the
+// model's last reply in place at 60s intervals so the user sees
+// ambient liveness during the wait. Edits are silent, never spawn a
+// new pinged message, and stop the moment the user re-engages or the
+// model synthesises a handback. The full design rationale lives in
+// `pending-work-progress.ts`'s header docblock. Kill switch:
+// `SWITCHROOM_DISABLE_PENDING_PROGRESS=1`.
+pendingProgress.startTimer({
+  editMessage: async (ctx) => {
+    await swallowingApiCall(
+      () =>
+        lockedBot.api.editMessageText(
+          ctx.chatId,
+          ctx.messageId,
+          ctx.newText,
+        ),
+      {
+        chat_id: ctx.chatId,
+        verb: 'pending-progress-edit',
+        ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}),
+      },
+    )
+  },
+  emitMetric: (event) => emitRuntimeMetric(event),
+})
 // Per-agent buffer for synthetic inbounds the gateway couldn't deliver
 // because the bridge wasn't connected at send-time. Drained on
 // bridge-register so a fresh client picks up missed wake-ups before
@@ -3564,6 +3608,22 @@ const ipcServer: IpcServer = createIpcServer({
             label.length > 0 ? label : null,
             Date.now(),
           )
+          // #1445 cross-turn pending-async ambient. Mark the chat as
+          // having dispatched background work this turn so a turn_end
+          // that follows activates the edit-in-place ambient line.
+          // Covers `Agent` / `Task` (the harness-managed async path
+          // — handback channel turn clears it) and `Bash` with
+          // run_in_background:true (model is expected to poll
+          // BashOutput; the ambient ticks until next inbound or the
+          // 30-min budget cap).
+          const evInput = ev.input as { run_in_background?: boolean } | undefined
+          if (
+            ev.toolName === 'Agent'
+            || ev.toolName === 'Task'
+            || (ev.toolName === 'Bash' && evInput?.run_in_background === true)
+          ) {
+            pendingProgress.noteAsyncDispatch(key)
+          }
         }
       } else if (ev.kind === 'tool_result') {
         // #1292: drain the in-flight entry. Idempotent on unknown ids
@@ -4066,6 +4126,13 @@ async function executeUpdateChecklist(args: Record<string, unknown>): Promise<{
 }
 async function executeReply(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
+  // #1664 — pin the turn this reply belongs to at entry. The
+  // finalAnswerDelivered write near the end of this function runs after
+  // several awaits; turn-pinning (the #1067 pattern used across the
+  // gateway) keeps the write attributed to THIS turn rather than reading
+  // module-scope currentTurn, which a future refactor could let roll over
+  // mid-call.
+  const turn = currentTurn
   const chat_id = args.chat_id as string
   if (!chat_id) throw new Error('reply: chat_id is required')
   const rawText = args.text as string | undefined
@@ -4370,6 +4437,22 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     }
   }
+  // #1445 cross-turn pending-async ambient. Capture the last text
+  // chunk as the anchor — if this turn ends with a pending async
+  // dispatch, the framework edits THIS message in place every 60s
+  // with a `— still working (Nm)` suffix until the user re-engages.
+  // Multi-chunk replies: anchor is the LAST chunk (edits append to
+  // the visually-trailing message; earlier chunks are left intact).
+  if (sentIds.length === chunks.length && chunks.length > 0) {
+    const anchorMsgId = sentIds[chunks.length - 1]
+    if (typeof anchorMsgId === 'number') {
+      pendingProgress.noteOutbound(statusKey(chat_id, threadId), {
+        messageId: anchorMsgId,
+        text: chunks[chunks.length - 1],
+      })
+    }
+  }
   // #273: when files is 2-10 photos, batch them into a single
   // sendMediaGroup album rather than N separate sendPhoto calls. The
   // user's device fires one notification for the album instead of N
@@ -4488,6 +4571,19 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     } catch (err) {
       process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
     }
+    // #1664 — mark the turn's final answer as delivered when this reply
+    // looks like the real answer rather than an interim ack. The
+    // classification (notification-bearing OR substantive length) lives
+    // in `isFinalAnswerReply`. Without this, a turn that ack'd then ended
+    // with the real answer as plain transcript text (#1664) would look
+    // "delivered" because replyCalled is true — and the silent-end
+    // re-prompt would never engage. `rawText` is the model's own answer
+    // text, measured before HTML conversion / Telegraph-link
+    // substitution. Writes `turn` (pinned at executeReply entry) so the
+    // flag always lands on the turn this reply belongs to.
+    if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
+      turn.finalAnswerDelivered = true
+    }
   }
   process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(',')}] chunks=${chunks.length}\n`)
@@ -4501,6 +4597,8 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
 }
 async function executeStreamReply(args: Record<string, unknown>): Promise<unknown> {
+  // #1664 — pin the turn at entry; see executeReply for the rationale.
+  const turn = currentTurn
   if (!args.chat_id) throw new Error('stream_reply: chat_id is required')
   if (args.text == null || args.text === '') throw new Error('stream_reply: text is required and cannot be empty')
@@ -4679,6 +4777,32 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
     const sChatId = args.chat_id as string
     const sThreadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
     outboundDedup.record(sChatId, sThreadId, args.text as string, Date.now())
+    // #1445 cross-turn pending-async ambient. The terminal stream_reply
+    // (done=true) is the user-visible anchor for any cross-turn wait
+    // that follows. Capture it so if this turn ends with a pending
+    // async dispatch, the framework edits THIS message in place at
+    // intervals.
+    pendingProgress.noteOutbound(statusKey(sChatId, sThreadId), {
+      messageId: result.messageId,
+      text: args.text as string,
+    })
+  }
+  // #1664 — mark the turn's final answer as delivered. For stream_reply a
+  // call with done=true IS the final answer by definition (the model
+  // explicitly closed the stream). A non-terminal stream_reply chunk also
+  // counts when it carries the final-answer signals — notification-bearing
+  // OR substantive length — via the same `isFinalAnswerReply` predicate
+  // executeReply uses. See the CurrentTurn.finalAnswerDelivered doc-comment
+  // for why replyCalled is not a sufficient signal here.
+  if (
+    turn != null &&
+    isFinalAnswerReply({
+      text: (args.text as string | undefined) ?? '',
+      disableNotification: args.disable_notification === true,
+      done: args.done === true,
+    })
+  ) {
+    turn.finalAnswerDelivered = true
   }
   return { content: [{ type: 'text', text: `${result.status} (id: ${result.messageId ?? 'pending'})` }] }
 }
@@ -5675,6 +5799,25 @@ function handleSessionEvent(ev: SessionEvent): void {
       // Drain any orphaned typing-wrap entries left over from a crashed
       // prior turn before resetting focus.
       typingWrapper.drainAll()
+      if (ev.chatId) {
+        // #1445 cross-turn pending-async ambient — backstop for the
+        // `handleInbound` path's `clearPending('inbound')`. The
+        // inbound path covers real user messages, but synthesised
+        // wakes (subagent-handback channel turn, cron fires, vault
+        // grant resumes, restart markers) push directly to
+        // `pendingInboundBuffer` and bypass `handleInbound`. The
+        // `enqueue` session-event fires for EVERY fresh turn atom
+        // regardless of source — clearing here drops any prior turn's
+        // ambient before the new turn's `noteOutbound` lands. The
+        // call is idempotent so it's safe to fire in addition to the
+        // inbound-path clear (for the real-inbound case, this is a
+        // no-op because state was already deleted by then).
+        const enqThreadId = ev.threadId != null ? Number(ev.threadId) : undefined
+        pendingProgress.clearPending(
+          statusKey(ev.chatId, enqThreadId),
+          'handback',
+        )
+      }
       if (ev.chatId) {
         // Issue #195: if a previous turn left an answer-lane stream open
         // (rapid steer/queue), force it to a new generation so its in-flight
@@ -5697,6 +5840,7 @@ function handleSessionEvent(ev: SessionEvent): void {
           startedAt,
           gatewayReceiveAt: startedAt,
           replyCalled: false,
+          finalAnswerDelivered: false,
           capturedText: [],
           orphanedReplyTimeoutId: null,
           registryKey: null,
@@ -5815,6 +5959,22 @@ function handleSessionEvent(ev: SessionEvent): void {
       // #1067: snapshot at entry. The answer-stream creation closures
       // below also read `turn` instead of currentTurn so they pin to
       // this turn's chat for the stream's lifetime.
+      //
+      // #1664 ordering note: a `text` event can arrive AFTER turn_end has
+      // nulled currentTurn (the issue observed `answer_lane_update
+      // transport:"draft"` firing post-turn_end). Such a late event is
+      // dropped here by the `turn != null` guard — it is NOT folded back
+      // into the just-ended turn. That is deliberate and safe: by the
+      // time this fires, the turn atom has been handed to
+      // endCurrentTurnAtomic and turn_end has already run its flush /
+      // silent-end decision; re-opening a closed turn (re-creating an
+      // answer stream, re-evaluating decideTurnFlush) would be a large,
+      // race-prone change. The #1664 safety net does not depend on
+      // catching the late text: a turn whose real answer lost the race
+      // ends with finalAnswerDelivered=false, so recordUndeliveredTurnEnd
+      // engages the Stop-hook re-prompt and the model re-delivers the
+      // answer through the reply tool. The dropped draft text is
+      // recovered by re-prompt, not by post-hoc materialization.
       const turn = currentTurn
       if (turn != null) {
         turn.capturedText.push(ev.text)
@@ -5975,6 +6135,7 @@ function handleSessionEvent(ev: SessionEvent): void {
         // full message above). Match the pattern used at the regular
         // turn-end path (line ~5039) and the wedged-turn path (~5290).
         silencePoke.endTurn(ceKey)
+        pendingProgress.noteTurnEnd(ceKey)
         // Issue #195: tear down the answer-lane stream on context-exhaustion
         // bail-out. The user is being told the session needs /restart, so any
         // partially-streamed answer would be misleading.
@@ -6160,6 +6321,7 @@ function handleSessionEvent(ev: SessionEvent): void {
           try { removeTurnActiveMarker(STATE_DIR) } catch { /* best-effort */ }
           signalTracker.clear(tKey)
           silencePoke.endTurn(tKey)
+          pendingProgress.noteTurnEnd(tKey)
         }
         lastPtyPreviewByChat.delete(statusKey(chatId, threadId))
         pendingPtyPartial = null
@@ -6181,6 +6343,18 @@ function handleSessionEvent(ev: SessionEvent): void {
         const backstopThreadId = threadId
         const backstopCtrl = ctrl
+        // #1664 — turn-flush only fires when !replyCalled (decideTurnFlush
+        // returns 'reply-called' otherwise). It legitimately delivers the
+        // model's terminal text as the answer, so the turn IS answered.
+        // Mark it now so the early-return below skips the silent-end
+        // re-prompt for a turn whose answer is genuinely on its way out.
+        // (The IIFE that actually sends runs after this branch's `return`;
+        // since the silent-end block is on the sibling reply-called path
+        // that this branch never reaches, this set is belt-and-braces —
+        // it keeps the captured `turn` atom internally consistent for any
+        // future reader.)
+        turn.finalAnswerDelivered = true
         // #654 deterministic double-message fix. Hand off the pinned
         // progress card BEFORE state reset so the driver doesn't keep
         // editing it while turn-flush is rewriting it with the answer.
@@ -6222,6 +6396,7 @@ function handleSessionEvent(ev: SessionEvent): void {
           const tKey = statusKey(chatId, threadId)
           signalTracker.clear(tKey)
           silencePoke.endTurn(tKey)
+          pendingProgress.noteTurnEnd(tKey)
         }
         void (async () => {
@@ -6413,17 +6588,31 @@ function handleSessionEvent(ev: SessionEvent): void {
           longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
           ended_via: outboundMetrics.outboundCount > 0 ? 'reply' : 'silent',
         })
-        // #1122 PR4 / #1161: deterministic silent-end handling (see the
-        // silent-marker path above for the rationale).
-        //   - first silent-end → recordSilentTurnEnd writes the state
-        //     file so the Stop hook (silent-end-interrupt-stop.mjs)
-        //     blocks the session-end and re-prompts the agent to reply.
+        // #1122 PR4 / #1161 / #1664: deterministic undelivered-turn
+        // handling (see the silent-marker path above for the rationale).
+        //   - first undelivered turn-end → recordSilentTurnEnd writes the
+        //     state file so the Stop hook (silent-end-interrupt-stop.mjs)
+        //     blocks the session-end and re-prompts the agent to deliver.
         //   - the Stop-hook re-prompt is already spent and the agent is
-        //     STILL silent → recordSilentTurnEnd returns exhausted:true;
-        //     deliver a user-facing fallback so the turn never just
-        //     vanishes (the user otherwise only sees the card disappear).
-        if (outboundMetrics.outboundCount === 0) {
-          const silentEnd = recordSilentTurnEnd({
+        //     STILL undelivered → recordSilentTurnEnd returns
+        //     exhausted:true; deliver a user-facing fallback so the turn
+        //     never just vanishes (the user otherwise only sees the card
+        //     disappear).
+        //
+        // #1664 — the trigger is "no final answer delivered", not "zero
+        // outbound". `outboundCount === 0` is now just the special case
+        // where nothing landed at all. The added case: the model sent an
+        // interim ack via reply/stream_reply (outboundCount > 0,
+        // replyCalled = true) but ended the turn with its real answer as
+        // plain transcript text — rendered into an ephemeral answer-lane
+        // draft and retracted at turn_end, never finalized. finalAnswer-
+        // Delivered stays false there, so the re-prompt engages and the
+        // model re-delivers the answer through the reply tool. NO_REPLY /
+        // HEARTBEAT_OK silent-marker turns return earlier and never reach
+        // this path. The turn-flush 'flush' branch also returns earlier
+        // (and sets finalAnswerDelivered=true defensively).
+        if (turn.finalAnswerDelivered === false) {
+          const silentEnd = recordUndeliveredTurnEnd({
             chatId,
             threadId: threadId ?? null,
             turnKey: tKey,
@@ -6454,6 +6643,7 @@ function handleSessionEvent(ev: SessionEvent): void {
         }
         signalTracker.clear(tKey)
         silencePoke.endTurn(tKey)
+        pendingProgress.noteTurnEnd(tKey)
       }
       lastPtyPreviewByChat.delete(statusKey(chatId, threadId))
       pendingPtyPartial = null
@@ -7676,6 +7866,18 @@ async function handleInbound(
         // the framework can nudge the model if it goes quiet past the
         // soft / firm thresholds.
         silencePoke.startTurn(statusKey(chat_id, messageThreadId), Date.now())
+        // #1445 cross-turn pending-async ambient. A new turn starting
+        // (user inbound, synthesised wake, or handback channel) is the
+        // signal that the model is about to re-engage — clear any
+        // pending-progress edits anchored to the *prior* turn's
+        // outbound so the framework stops talking over the new turn.
+        // clearPending drops the per-key state outright, so the new
+        // turn's `tool_use(Agent|Task|Bash bg)` + outbound capture
+        // afresh via `noteAsyncDispatch` / `noteOutbound`.
+        pendingProgress.clearPending(
+          statusKey(chat_id, messageThreadId),
+          'inbound',
+        )
         // Human-feel UX: hold a continuous `typing…` indicator for the
         // WHOLE turn, not just the split-second a reply is transmitted.
         // A person you message shows as typing the entire time they

package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs CHANGED Viewed

@@ -2,12 +2,20 @@
 /**
  * Stop hook — auto-interrupt for silent-end turns.
  *
- * When a Claude Code session ends without the agent having called reply or
- * stream_reply (a "silent-end"), the Telegram gateway writes a state file at
+ * When a Claude Code session ends without the agent delivering a final
+ * answer to the user, the Telegram gateway writes a state file at
  * $TELEGRAM_STATE_DIR/silent-end-pending.json. This hook reads that file and,
  * if a first-time silent-end is detected (retryCount === 0), returns a
  * decision:block to re-prompt the agent instead of letting the session close.
  *
+ * #1664 — "no final answer delivered" covers two cases: (a) the turn ended
+ * with zero outbound (the original case), and (b) the model sent only an
+ * interim ack via reply/stream_reply but left its real answer as plain
+ * transcript text, which the gateway renders into an ephemeral draft and
+ * never finalizes. The re-prompt below tells the model to send its answer
+ * through the reply tool, or reply NO_REPLY if it genuinely has nothing to
+ * add / already delivered.
+ *
  * On the second silent-end (retryCount >= MAX_RETRIES), the hook allows the
  * stop. The gateway's turn-end path (recordSilentTurnEnd in silent-end.ts)
  * detects the exhausted re-prompt and delivers a user-facing fallback
@@ -104,9 +112,13 @@ function main() {
     JSON.stringify({
       decision: 'block',
       reason:
-        'You ran tools but never sent a reply to the user. ' +
-        'Call mcp__switchroom-telegram__reply or mcp__switchroom-telegram__stream_reply (with done=true) ' +
-        'to send your final answer now.',
+        'This turn is ending without your final answer reaching the user. ' +
+        'If you wrote an answer as plain text (not via a tool), the user ' +
+        'cannot see it — only text sent through the reply tool is delivered. ' +
+        'Send your final answer now by calling mcp__switchroom-telegram__reply ' +
+        '(or mcp__switchroom-telegram__stream_reply with done=true). ' +
+        'If your final answer has already reached the user, or you ' +
+        'intentionally have nothing to add, reply with exactly NO_REPLY.',
     }),
   )
   process.exit(0)