npm - switchroom - Versions diffs - 0.13.15 → 0.13.17 - Mend

switchroom 0.13.15 → 0.13.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/cli/switchroom.js +2 -2
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +150 -17
package/telegram-plugin/gateway/gateway.ts +184 -1
package/telegram-plugin/over-ping-safety-net.ts +80 -0
package/telegram-plugin/runtime-metrics.ts +18 -0
package/telegram-plugin/silent-reply-anchor.ts +142 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +96 -0
package/telegram-plugin/tests/silent-reply-anchor.test.ts +178 -0
package/telegram-plugin/uat/scenarios/visible-answer-stream-dm.test.ts +92 -105

package/dist/cli/switchroom.js CHANGED Viewed

@@ -47331,8 +47331,8 @@ var {
 } = import__.default;
 // src/build-info.ts
-var VERSION = "0.13.15";
-var COMMIT_SHA = "bc0b5540";
+var VERSION = "0.13.17";
+var COMMIT_SHA = "84eb8ad9";
 // src/cli/agent.ts
 init_source();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "switchroom",
-  "version": "0.13.15",
+  "version": "0.13.17",
   "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
   "type": "module",
   "bin": {

package/telegram-plugin/dist/gateway/gateway.js CHANGED Viewed

@@ -37034,6 +37034,56 @@ function emitRuntimeMetric(event) {
   captureEvent(event.kind, { ...event, ts: wrapped.ts });
 }
+// over-ping-safety-net.ts
+function decideOverPing(input) {
+  if (!input.modelRequestedPing) {
+    return { suppress: false, claimSlot: false, sinceFirstPingMs: null };
+  }
+  if (input.firstPingAt != null) {
+    return {
+      suppress: true,
+      claimSlot: false,
+      sinceFirstPingMs: input.nowMs - input.firstPingAt
+    };
+  }
+  return { suppress: false, claimSlot: true, sinceFirstPingMs: null };
+}
+// silent-reply-anchor.ts
+var TELEGRAM_MSG_CAP = 4000;
+function enabled2() {
+  const v = process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT;
+  return !(v === "1" || v === "true");
+}
+function decideSilentReplyAnchor(input) {
+  if (!enabled2()) {
+    return { kind: "fresh", becomesAnchor: false };
+  }
+  if (!input.effectivelySilent) {
+    return { kind: "fresh", becomesAnchor: false };
+  }
+  if (input.hasFiles || input.hasButtons) {
+    return { kind: "fresh", becomesAnchor: false };
+  }
+  if (input.newReplyText.trim().length === 0) {
+    return { kind: "fresh", becomesAnchor: false };
+  }
+  if (input.anchorMessageId == null) {
+    return { kind: "fresh", becomesAnchor: true };
+  }
+  const merged = `${input.anchorText}
+${input.newReplyText}`;
+  if (merged.length > TELEGRAM_MSG_CAP) {
+    return { kind: "fresh", becomesAnchor: true };
+  }
+  return {
+    kind: "edit-anchor",
+    messageId: input.anchorMessageId,
+    mergedText: merged
+  };
+}
 // inbound-classifier.ts
 var STATUS_QUERY_PATTERNS = [
   /^\?+$/,
@@ -37300,12 +37350,12 @@ function startTimer(deps) {
 var EDIT_INTERVAL_MS = 60000;
 var POLL_INTERVAL_MS = 5000;
 var MAX_LIFETIME_MS = 30 * 60000;
-var TELEGRAM_MSG_CAP = 4000;
+var TELEGRAM_MSG_CAP2 = 4000;
 var SUFFIX_RE = /\n\n\u2014 still working \(\d+m\)$/;
 var stateByKey = new Map;
 var timer2 = null;
 var activeDeps2 = null;
-function enabled2() {
+function enabled3() {
   const v = process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS;
   return !(v === "1" || v === "true");
 }
@@ -37327,19 +37377,19 @@ function ensure(key) {
   return s;
 }
 function noteAsyncDispatch(key) {
-  if (!enabled2())
+  if (!enabled3())
     return;
   ensure(key).pending = true;
 }
 function noteOutbound3(key, opts) {
-  if (!enabled2())
+  if (!enabled3())
     return;
   const s = ensure(key);
   s.anchorMessageId = opts.messageId;
   s.anchorOriginalText = opts.text.replace(SUFFIX_RE, "");
 }
 function noteTurnEnd(key) {
-  if (!enabled2())
+  if (!enabled3())
     return;
   const s = stateByKey.get(key);
   if (s == null)
@@ -37369,7 +37419,7 @@ function clearPending(key, reason) {
   });
 }
 function startTimer2(deps) {
-  if (!enabled2())
+  if (!enabled3())
     return;
   if (timer2 != null)
     return;
@@ -37409,7 +37459,7 @@ function tick2(now) {
 \u2014 still working (${minutes}m)`;
     const newText = s.anchorOriginalText + suffix;
-    if (newText.length > TELEGRAM_MSG_CAP) {
+    if (newText.length > TELEGRAM_MSG_CAP2) {
       s.lastEditAt = now;
       continue;
     }
@@ -44586,9 +44636,9 @@ function transition(state3, event) {
 // gateway/inbound-delivery-machine-shadow.ts
 var state3 = initialState();
-var enabled3 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
+var enabled4 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
 function shadowEmit(event) {
-  if (!enabled3)
+  if (!enabled4)
     return [];
   try {
     const result = transition(state3, event);
@@ -44646,12 +44696,12 @@ function redeliverBufferedInbound2(buffer, agent, send, spool) {
 }
 // gateway/inbound-delivery-machine-dispatch.ts
-var enabled4 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
+var enabled5 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
 function isDispatchEnabled() {
-  return enabled4;
+  return enabled5;
 }
 function dispatchEffects(effects, ctx) {
-  if (!enabled4)
+  if (!enabled5)
     return;
   for (const effect of effects) {
     dispatchOne(effect, ctx);
@@ -48154,10 +48204,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
 }
 // ../src/build-info.ts
-var VERSION = "0.13.15";
-var COMMIT_SHA = "bc0b5540";
-var COMMIT_DATE = "2026-05-23T02:55:43Z";
-var LATEST_PR = 1673;
+var VERSION = "0.13.17";
+var COMMIT_SHA = "84eb8ad9";
+var COMMIT_DATE = "2026-05-23T05:24:12Z";
+var LATEST_PR = 1678;
 var COMMITS_AHEAD_OF_TAG = 0;
 // gateway/boot-version.ts
@@ -50617,7 +50667,30 @@ async function executeReply(args) {
   const configParseMode = access.parseMode ?? "html";
   const format = args.format ?? configParseMode;
   const disableLinkPreview = args.disable_web_page_preview != null ? Boolean(args.disable_web_page_preview) : access.disableLinkPreview ?? true;
-  const disableNotification = args.disable_notification === true;
+  let disableNotification = args.disable_notification === true;
+  {
+    const turn2 = currentTurn;
+    if (turn2 != null) {
+      const now = Date.now();
+      const decision = decideOverPing({
+        modelRequestedPing: !disableNotification,
+        firstPingAt: turn2.firstPingAt,
+        nowMs: now
+      });
+      if (decision.suppress) {
+        process.stderr.write(`telegram gateway: reply over-ping safety net \u2014 ` + `downgrading disable_notification:false \u2192 true ` + `(chat=${chat_id} thread=${args.message_thread_id ?? "-"} firstPingAt=${turn2.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})
+`);
+        emitRuntimeMetric({
+          kind: "over_ping_suppressed",
+          key: statusKey(chat_id, args.message_thread_id != null ? Number(args.message_thread_id) : undefined),
+          sinceFirstPingMs: decision.sinceFirstPingMs ?? 0
+        });
+        disableNotification = true;
+      } else if (decision.claimSlot) {
+        turn2.firstPingAt = now;
+      }
+    }
+  }
   const tg = access.telegraph;
   const tgThreshold = tg?.threshold ?? 3000;
   if (tg?.enabled && files.length === 0 && text.length > tgThreshold) {
@@ -50711,6 +50784,56 @@ ${url}`;
     previewMessageId = null;
   }
   startTypingLoop(chat_id);
+  let silentAnchorEditDone = false;
+  {
+    const turn2 = currentTurn;
+    if (turn2 != null && chunks.length === 1) {
+      const decision = decideSilentReplyAnchor({
+        effectivelySilent: disableNotification,
+        anchorMessageId: turn2.silentAnchorMessageId,
+        anchorText: turn2.silentAnchorText,
+        newReplyText: effectiveText,
+        hasFiles: files.length > 0,
+        hasButtons: replyMarkup != null
+      });
+      if (decision.kind === "edit-anchor") {
+        const editParams = {
+          link_preview_options: { is_disabled: disableLinkPreview }
+        };
+        if (parseMode != null)
+          editParams.parse_mode = parseMode;
+        if (threadId != null)
+          editParams.message_thread_id = threadId;
+        try {
+          await robustApiCall(() => lockedBot.api.editMessageText(chat_id, decision.messageId, decision.mergedText, editParams), {
+            chat_id,
+            verb: "reply.silent-anchor-edit",
+            ...threadId != null ? { threadId } : {}
+          });
+          turn2.silentAnchorText = decision.mergedText;
+          sentIds.push(decision.messageId);
+          logOutbound("edit", chat_id, decision.messageId, decision.mergedText.length, "silent-anchor-merge");
+          process.stderr.write(`telegram gateway: silent-reply auto-edit \u2014 ` + `chat=${chat_id} anchor=${decision.messageId} merged_len=${decision.mergedText.length}
+`);
+          silentAnchorEditDone = true;
+        } catch (err) {
+          process.stderr.write(`telegram gateway: silent-reply auto-edit failed, falling back to fresh send: ${err instanceof Error ? err.message : String(err)}
+`);
+        }
+      }
+    }
+  }
+  if (silentAnchorEditDone) {
+    stopTypingLoop(chat_id);
+    return {
+      content: [
+        {
+          type: "text",
+          text: `edited (id: ${sentIds[0]})`
+        }
+      ]
+    };
+  }
   try {
     for (let i = 0;i < chunks.length; i++) {
       const shouldReplyTo = reply_to != null && replyMode !== "off" && (replyMode === "all" || i === 0);
@@ -50812,6 +50935,13 @@ ${url}`;
       });
     }
   }
+  if (chunks.length === 1 && disableNotification && files.length === 0 && replyMarkup == null && sentIds.length === 1) {
+    const turn2 = currentTurn;
+    if (turn2 != null) {
+      turn2.silentAnchorMessageId = sentIds[0];
+      turn2.silentAnchorText = effectiveText;
+    }
+  }
   const allPhotos = files.length >= 2 && files.length <= 10 && files.every((f) => PHOTO_EXTS.has(extname(f).toLowerCase()));
   const replyParams = reply_to != null && replyMode !== "off" ? { reply_parameters: { message_id: reply_to } } : {};
   if (allPhotos) {
@@ -51766,6 +51896,9 @@ function handleSessionEvent(ev) {
           gatewayReceiveAt: startedAt,
           replyCalled: false,
           finalAnswerDelivered: false,
+          firstPingAt: null,
+          silentAnchorMessageId: null,
+          silentAnchorText: "",
           capturedText: [],
           orphanedReplyTimeoutId: null,
           registryKey: null,

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -74,6 +74,8 @@ import {
   shutdownAnalytics,
 } from '../analytics-posthog.js'
 import { emitRuntimeMetric } from '../runtime-metrics.js'
+import { decideOverPing } from '../over-ping-safety-net.js'
+import { decideSilentReplyAnchor } from '../silent-reply-anchor.js'
 import { classifyInbound } from '../inbound-classifier.js'
 import * as silencePoke from '../silence-poke.js'
 import * as pendingProgress from '../pending-work-progress.js'
@@ -1206,6 +1208,27 @@ type CurrentTurn = {
   // even though `replyCalled` is true — the #1664 case where the real answer
   // ended up as plain transcript text rendered into an ephemeral draft.
   finalAnswerDelivered: boolean
+  // #1675 (over-ping safety net): wall-clock ms of the first reply
+  // this turn that landed with `disable_notification: false` (a real
+  // device ping). The conversational-pacing contract
+  // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
+  // ping per turn — the final answer. When the model violates that
+  // (sends a substantive answer pinged + a wrap-up "Delivered…" or
+  // meta-narration also pinged), subsequent reply calls with
+  // `disable_notification: false` are auto-downgraded to silent by
+  // the framework. Null until the first ping lands. Reset on every
+  // fresh-turn enqueue.
+  firstPingAt: number | null
+  // #1677 silent-reply auto-edit. The first silent reply of a turn
+  // captures `silentAnchorMessageId` + `silentAnchorText`; subsequent
+  // silent replies in the SAME turn editMessageText that anchor
+  // (appending with paragraph-break separator). Net visual: one
+  // growing silent bubble instead of N stacked silent bubbles.
+  // Cleared by turn-atom replacement on enqueue. See
+  // `telegram-plugin/silent-reply-anchor.ts` for the pure
+  // `decideSilentReplyAnchor` predicate.
+  silentAnchorMessageId: number | null
+  silentAnchorText: string
   capturedText: string[]
   orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
   registryKey: string | null
@@ -4208,7 +4231,58 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   // so only the final answer pings the device. Default false (pings) so
   // existing call-sites and the typical "final answer" reply keep their
   // current behaviour without an explicit flag.
-  const disableNotification = args.disable_notification === true
+  let disableNotification = args.disable_notification === true
+  // #1675 over-ping safety net. The conversational-pacing contract
+  // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
+  // device ping per turn — the final answer. The model sometimes
+  // violates this by sending a substantive answer pinged + a wrap-up
+  // ("Delivered all three steps…", "Sent.", or meta-narration) ALSO
+  // pinged. Both messages then fire notifications. The fleet UAT on
+  // 2026-05-23 reproduced this (Step 3 + Delivered both pinged, two
+  // beeps for a turn that should have produced one). Framework owns
+  // the safety net: once the turn has emitted ONE pinged reply, every
+  // subsequent reply call in the same turn auto-downgrades to silent
+  // (disable_notification: true). Model intent ("I want this loud")
+  // is honoured for the first ping; subsequent pings are demoted with
+  // a stderr log so operators can see the safety net engage.
+  //
+  // The slot is claimed BEFORE the actual send to keep the logic
+  // sequential — a send that fails part-way leaves firstPingAt set
+  // and subsequent pings would be silenced. Acceptable trade-off (a
+  // failed first ping is an edge case; the alternative — claim after
+  // send — races concurrent reply calls).
+  {
+    const turn = currentTurn
+    if (turn != null) {
+      const now = Date.now()
+      const decision = decideOverPing({
+        modelRequestedPing: !disableNotification,
+        firstPingAt: turn.firstPingAt,
+        nowMs: now,
+      })
+      if (decision.suppress) {
+        process.stderr.write(
+          `telegram gateway: reply over-ping safety net — ` +
+          `downgrading disable_notification:false → true ` +
+          `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
+          `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
+        )
+        // Observability: surface to the unified runtime-metrics
+        // fan-out so the cadence dashboard can track fleet-wide
+        // over-ping rate (leading indicator of model pacing drift).
+        emitRuntimeMetric({
+          kind: 'over_ping_suppressed',
+          key: statusKey(chat_id, args.message_thread_id != null
+            ? Number(args.message_thread_id) : undefined),
+          sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
+        })
+        disableNotification = true
+      } else if (decision.claimSlot) {
+        turn.firstPingAt = now
+      }
+    }
+  }
   // Telegraph publish (#579). When the reply text is long enough AND
   // the agent has telegraph enabled in access.json, publish to
@@ -4354,6 +4428,91 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   startTypingLoop(chat_id)
+  // #1677 silent-reply auto-edit. Consecutive silent replies within
+  // a turn edit a single anchor message instead of stacking new
+  // bubbles. We branch BEFORE the chunk loop so the single-chunk
+  // common case takes an editMessageText path; everything else
+  // (multi-chunk, ping, files, buttons) falls through to fresh send
+  // and either captures a new anchor or doesn't, per the predicate.
+  let silentAnchorEditDone = false
+  {
+    const turn = currentTurn
+    if (turn != null && chunks.length === 1) {
+      const decision = decideSilentReplyAnchor({
+        effectivelySilent: disableNotification,
+        anchorMessageId: turn.silentAnchorMessageId,
+        anchorText: turn.silentAnchorText,
+        newReplyText: effectiveText,
+        hasFiles: files.length > 0,
+        hasButtons: replyMarkup != null,
+      })
+      if (decision.kind === 'edit-anchor') {
+        const editParams: {
+          parse_mode?: 'HTML' | 'MarkdownV2'
+          message_thread_id?: number
+          link_preview_options?: { is_disabled: boolean }
+        } = {
+          link_preview_options: { is_disabled: disableLinkPreview },
+        }
+        if (parseMode != null) editParams.parse_mode = parseMode
+        if (threadId != null) editParams.message_thread_id = threadId
+        try {
+          await robustApiCall(
+            () =>
+              lockedBot.api.editMessageText(
+                chat_id,
+                decision.messageId,
+                decision.mergedText,
+                editParams,
+              ),
+            {
+              chat_id,
+              verb: 'reply.silent-anchor-edit',
+              ...(threadId != null ? { threadId } : {}),
+            },
+          )
+          turn.silentAnchorText = decision.mergedText
+          sentIds.push(decision.messageId)
+          logOutbound(
+            'edit',
+            chat_id,
+            decision.messageId,
+            decision.mergedText.length,
+            'silent-anchor-merge',
+          )
+          process.stderr.write(
+            `telegram gateway: silent-reply auto-edit — ` +
+            `chat=${chat_id} anchor=${decision.messageId} ` +
+            `merged_len=${decision.mergedText.length}\n`,
+          )
+          silentAnchorEditDone = true
+        } catch (err) {
+          // Edit failed (e.g. message deleted, rate limit exhausted,
+          // parse error). Fall through to fresh-send below — the
+          // anchor will be overwritten by whatever lands.
+          process.stderr.write(
+            `telegram gateway: silent-reply auto-edit failed, ` +
+            `falling back to fresh send: ${err instanceof Error ? err.message : String(err)}\n`,
+          )
+        }
+      }
+    }
+  }
+  if (silentAnchorEditDone) {
+    // Skip the chunk loop entirely — the anchor edit IS the send.
+    // Match the normal exit path: stop typing, then return.
+    stopTypingLoop(chat_id)
+    return {
+      content: [
+        {
+          type: 'text',
+          text: `edited (id: ${sentIds[0]})`,
+        },
+      ],
+    }
+  }
   try {
     for (let i = 0; i < chunks.length; i++) {
       const shouldReplyTo =
@@ -4489,6 +4648,27 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     }
   }
+  // #1677 silent-reply auto-edit — anchor capture for the FIRST
+  // silent reply of a turn (or the silent reply that replaced the
+  // anchor on overflow). Only captures for the single-chunk,
+  // silent, no-files, no-buttons happy path; the edit-anchor path
+  // earlier in this function handles SUBSEQUENT silent replies by
+  // editing. The next silent reply this turn will see the captured
+  // anchor and edit it in place.
+  if (
+    chunks.length === 1
+    && disableNotification
+    && files.length === 0
+    && replyMarkup == null
+    && sentIds.length === 1
+  ) {
+    const turn = currentTurn
+    if (turn != null) {
+      turn.silentAnchorMessageId = sentIds[0]!
+      turn.silentAnchorText = effectiveText
+    }
+  }
   // #273: when files is 2-10 photos, batch them into a single
   // sendMediaGroup album rather than N separate sendPhoto calls. The
   // user's device fires one notification for the album instead of N
@@ -5877,6 +6057,9 @@ function handleSessionEvent(ev: SessionEvent): void {
           gatewayReceiveAt: startedAt,
           replyCalled: false,
           finalAnswerDelivered: false,
+          firstPingAt: null,
+          silentAnchorMessageId: null,
+          silentAnchorText: '',
           capturedText: [],
           orphanedReplyTimeoutId: null,
           registryKey: null,

package/telegram-plugin/over-ping-safety-net.ts ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * over-ping-safety-net.ts — pure decision predicate for #1674's
+ * "at-most-one device-ping per turn" framework safety net.
+ *
+ * Background. `reference/conversational-pacing.md` beat 5 is
+ * explicit: the model should deliver the answer as a fresh `reply`
+ * omitting `disable_notification` (i.e. pinging the device once).
+ * EXACTLY ONE ping per turn. The model occasionally violates this
+ * — fleet UAT 2026-05-23 reproduced a substantive Step 3 answer
+ * pinged + a wrap-up "Delivered all three steps with a wrap-up
+ * summary." ALSO pinged, two device beeps for a turn that should
+ * have produced one.
+ *
+ * This module is the framework safety net. The IO live in the
+ * gateway's `executeReply` (mutate `turn.firstPingAt`, emit log +
+ * runtime-metric, override `disableNotification`); keeping the
+ * *decision* pure makes the predicate unit-testable without
+ * standing up a gateway.
+ *
+ * Contract:
+ *   - When the model requested a ping (`!disable_notification`) AND
+ *     the current turn already had a ping land (`firstPingAt != null`),
+ *     the decision says SUPPRESS — the caller downgrades to silent.
+ *   - When the model requested a ping AND no prior ping this turn,
+ *     the decision says CLAIM the slot — caller sets `firstPingAt`.
+ *   - When the model requested silent, this module is a no-op.
+ *
+ * The slot is claimed BEFORE the actual send (caller responsibility).
+ * Trade-off documented inline in `gateway.ts:executeReply`.
+ */
+export interface OverPingDecisionInput {
+  /** True iff the model requested a device ping
+   *  (`disable_notification:false` or omitted, since the default is to
+   *  ping per Telegram Bot API). The caller computes this from the
+   *  inbound `args.disable_notification === true` check. */
+  modelRequestedPing: boolean
+  /** Wall-clock ms of the FIRST ping this turn, or null if no ping
+   *  has landed yet. Caller threads this through from
+   *  `CurrentTurn.firstPingAt`. */
+  firstPingAt: number | null
+  /** Deterministic clock for tests; defaults to Date.now() in callers. */
+  nowMs: number
+}
+export interface OverPingDecision {
+  /** True iff the caller should override `disableNotification` to
+   *  `true` (i.e. send this reply silently). Implies a contract
+   *  violation by the model — caller should log + emit a metric. */
+  suppress: boolean
+  /** True iff the caller should claim the slot —
+   *  `turn.firstPingAt = nowMs`. Mutually exclusive with `suppress`. */
+  claimSlot: boolean
+  /** When `suppress` is true, how long the first ping has been
+   *  "active" (ms since `firstPingAt`). Caller surfaces this in the
+   *  log + metric for forensic analysis (e.g. tight rapid double-pings
+   *  vs delayed wrap-ups). Null otherwise. */
+  sinceFirstPingMs: number | null
+}
+/**
+ * Pure decision: should the framework suppress this reply's ping?
+ * No mutation, no IO, deterministic under a fixed `nowMs`.
+ */
+export function decideOverPing(input: OverPingDecisionInput): OverPingDecision {
+  if (!input.modelRequestedPing) {
+    // Model already chose silent — nothing for the safety net to do.
+    return { suppress: false, claimSlot: false, sinceFirstPingMs: null }
+  }
+  if (input.firstPingAt != null) {
+    // Slot already claimed by an earlier ping this turn — suppress.
+    return {
+      suppress: true,
+      claimSlot: false,
+      sinceFirstPingMs: input.nowMs - input.firstPingAt,
+    }
+  }
+  // First ping this turn — let it through and claim the slot.
+  return { suppress: false, claimSlot: true, sinceFirstPingMs: null }
+}

package/telegram-plugin/runtime-metrics.ts CHANGED Viewed

@@ -124,6 +124,24 @@ export type RuntimeMetricEvent =
       elapsedMs?: number
       reason?: string
     }
+  /**
+   * #1674 over-ping safety net engaged. Fires when a `reply` call
+   * arrived with `disable_notification: false` AND the current turn
+   * already had a pinged reply land — the framework downgraded this
+   * call to silent to honour beat 5's "EXACTLY ONE ping per turn"
+   * contract. Each event is a model contract violation the safety
+   * net caught. A high rate per agent means the model is
+   * systematically over-pinging — prompt drift or training
+   * regression worth investigating.
+   *
+   *   key                 → `<chatId>:<threadIdOrEmpty>` (the statusKey shape)
+   *   sinceFirstPingMs    → time since the FIRST ping landed this turn
+   */
+  | {
+      kind: 'over_ping_suppressed'
+      key: string
+      sinceFirstPingMs: number
+    }
 /**
  * The JSONL sink lives under the runtime state dir so it's per-agent

package/telegram-plugin/silent-reply-anchor.ts ADDED Viewed

@@ -0,0 +1,142 @@
+/**
+ * silent-reply-anchor.ts — pure decision predicate for the
+ * "consecutive silent replies edit one growing message" UX fix.
+ *
+ * Background. Modern Claude 2.1.x on this fleet implements
+ * conversational pacing (`reference/conversational-pacing.md` beats
+ * 1 + 3 + 5) by calling the `reply` MCP tool multiple times in a
+ * turn — a silent ack, silent per-step updates, and one pinged
+ * final answer. The over-ping safety net (#1674) caps the
+ * notifications at one. But the user still SEES N separate chat
+ * bubbles for the silent replies, which reads as visual spam even
+ * when no device pings. The operator's original complaint was
+ * exactly this shape:
+ *
+ *   "I would like more regular process updates, where it edits a
+ *    status message in place vs spamming multiple messages."
+ *
+ * Fix: consecutive silent replies within a turn EDIT a single
+ * anchor message instead of each sending a fresh bubble. The
+ * model's intent (silent mid-turn updates) is honoured; the
+ * framework controls the visual placement (one growing bubble,
+ * not many). Final pinged reply lands as a separate fresh bubble
+ * (it's the final answer; the silent anchor is the preamble).
+ *
+ * Net visual for a multi-step turn:
+ *   pre-fix:  4 bubbles (silent ack + 2 silent steps + 1 pinged final)
+ *   post-fix: 2 bubbles (1 silent anchor with all 3 thoughts + 1 pinged final)
+ *
+ * Pinged replies always fresh-send. Reply-tool calls with files
+ * or button keyboards bypass the anchor (fresh send) because the
+ * edit path can't merge those cleanly.
+ *
+ * Accumulation format: `${anchorText}\n\n${newReplyText}` —
+ * blank-line paragraph separator. Reads naturally as the model
+ * "thinking out loud" with paragraph breaks per thought.
+ *
+ * Kill switch: `SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT=1` — turns
+ * the safety net off; reverts to per-reply fresh send.
+ */
+/** Telegram caption / text limit. The accumulator stays under this. */
+export const TELEGRAM_MSG_CAP = 4000
+export interface SilentReplyAnchorDecisionInput {
+  /** True when the model passed `disable_notification: true` for
+   *  this reply (i.e. the model intends this to be silent — a
+   *  beat 1/3 update). The over-ping safety net coerces other
+   *  pings to silent; this predicate sees the EFFECTIVE flag, not
+   *  the raw model intent. */
+  effectivelySilent: boolean
+  /** Wall-clock ms of the current anchor's existence, or null when
+   *  no silent anchor has been set this turn. */
+  anchorMessageId: number | null
+  /** Text content of the current anchor (accumulated). Empty when
+   *  no anchor exists. */
+  anchorText: string
+  /** Text content of the incoming reply, BEFORE any anchor merge. */
+  newReplyText: string
+  /** True if the incoming reply has attached files (photos,
+   *  documents, etc). Anchor merge bypassed when true — edits
+   *  can't add media to an existing text message. */
+  hasFiles: boolean
+  /** True if the incoming reply has an inline keyboard. Anchor
+   *  merge bypassed when true — keyboard semantics across edits
+   *  are too easy to get wrong, and the markup is rare enough
+   *  that fresh-send is the safer default. */
+  hasButtons: boolean
+}
+/**
+ * What the caller should do with this reply.
+ *
+ *   - `kind: 'fresh'` — send a normal new message; if it should
+ *     become the next anchor (silent + no attachments), the caller
+ *     captures its message_id after send and sets the anchor.
+ *
+ *   - `kind: 'edit-anchor'` — DO NOT send; edit the existing
+ *     anchor message with `mergedText` as the new content. The
+ *     caller updates `anchor.text = mergedText` after a successful
+ *     edit. messageId is the anchor's existing id.
+ */
+export type SilentReplyAnchorDecision =
+  | { kind: 'fresh'; becomesAnchor: boolean }
+  | { kind: 'edit-anchor'; messageId: number; mergedText: string }
+function enabled(): boolean {
+  const v = process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT
+  return !(v === '1' || v === 'true')
+}
+/**
+ * Decide whether to merge this reply into an existing silent
+ * anchor or fresh-send. Pure: no IO, no mutation, kill-switch
+ * checked per call.
+ */
+export function decideSilentReplyAnchor(
+  input: SilentReplyAnchorDecisionInput,
+): SilentReplyAnchorDecision {
+  // Kill switch disengages the whole mechanism — every reply
+  // falls through to fresh-send with no anchor capture.
+  if (!enabled()) {
+    return { kind: 'fresh', becomesAnchor: false }
+  }
+  // Pinged replies never merge — they're the final answer bubble,
+  // semantically distinct from the silent preamble.
+  if (!input.effectivelySilent) {
+    return { kind: 'fresh', becomesAnchor: false }
+  }
+  // Files / buttons bypass the anchor — edit-text can't merge
+  // media, and keyboards across edits are a foot-gun.
+  if (input.hasFiles || input.hasButtons) {
+    return { kind: 'fresh', becomesAnchor: false }
+  }
+  // Empty body — let the caller's existing validation handle it.
+  // We treat as fresh-but-don't-anchor so a downstream "drop empty"
+  // doesn't leave a stale anchor pointer.
+  if (input.newReplyText.trim().length === 0) {
+    return { kind: 'fresh', becomesAnchor: false }
+  }
+  // No anchor yet this turn → this reply BECOMES the anchor.
+  if (input.anchorMessageId == null) {
+    return { kind: 'fresh', becomesAnchor: true }
+  }
+  // Anchor exists → try to merge. The merge format is paragraph-
+  // break separation. If the merged result would exceed the
+  // Telegram text cap, give up on the anchor and start fresh —
+  // the new reply becomes a new anchor.
+  const merged = `${input.anchorText}\n\n${input.newReplyText}`
+  if (merged.length > TELEGRAM_MSG_CAP) {
+    return { kind: 'fresh', becomesAnchor: true }
+  }
+  return {
+    kind: 'edit-anchor',
+    messageId: input.anchorMessageId,
+    mergedText: merged,
+  }
+}

package/telegram-plugin/tests/over-ping-safety-net.test.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * Unit suite for #1674's over-ping safety net predicate.
+ * Pins the decision logic in isolation from the gateway's
+ * `executeReply` IO so a future refactor can't silently regress.
+ */
+import { describe, expect, it } from 'vitest'
+import { decideOverPing } from '../over-ping-safety-net.js'
+describe('decideOverPing — at-most-one-ping-per-turn safety net', () => {
+  it('lets the FIRST ping through and tells caller to claim the slot', () => {
+    const d = decideOverPing({
+      modelRequestedPing: true,
+      firstPingAt: null,
+      nowMs: 1_000,
+    })
+    expect(d.suppress).toBe(false)
+    expect(d.claimSlot).toBe(true)
+    expect(d.sinceFirstPingMs).toBeNull()
+  })
+  it('SUPPRESSES subsequent ping in the same turn and reports elapsed', () => {
+    const d = decideOverPing({
+      modelRequestedPing: true,
+      firstPingAt: 1_000,
+      nowMs: 4_500,
+    })
+    expect(d.suppress).toBe(true)
+    expect(d.claimSlot).toBe(false)
+    expect(d.sinceFirstPingMs).toBe(3_500)
+  })
+  it('is a no-op when the model already requested silent (regardless of slot state)', () => {
+    // No prior ping
+    const d1 = decideOverPing({
+      modelRequestedPing: false,
+      firstPingAt: null,
+      nowMs: 1_000,
+    })
+    expect(d1).toEqual({ suppress: false, claimSlot: false, sinceFirstPingMs: null })
+    // Prior ping already landed — silent reply still no-op, NOT claimed
+    const d2 = decideOverPing({
+      modelRequestedPing: false,
+      firstPingAt: 1_000,
+      nowMs: 5_000,
+    })
+    expect(d2).toEqual({ suppress: false, claimSlot: false, sinceFirstPingMs: null })
+  })
+  it('handles the edge case where firstPingAt equals nowMs (instant double-call)', () => {
+    // Same-tick double-fire: the second call comes in with firstPingAt
+    // exactly at nowMs. Elapsed is 0; suppress fires.
+    const d = decideOverPing({
+      modelRequestedPing: true,
+      firstPingAt: 1_000,
+      nowMs: 1_000,
+    })
+    expect(d.suppress).toBe(true)
+    expect(d.claimSlot).toBe(false)
+    expect(d.sinceFirstPingMs).toBe(0)
+  })
+  it('reports large elapsed deltas honestly (late wrap-up after long work)', () => {
+    // Real-world reproducer pattern: substantive answer pings at +30s,
+    // wrap-up "Delivered all three steps…" pings at +36s. The safety
+    // net catches the second; sinceFirstPingMs reflects the 6s gap.
+    const d = decideOverPing({
+      modelRequestedPing: true,
+      firstPingAt: 30_000,
+      nowMs: 36_000,
+    })
+    expect(d.suppress).toBe(true)
+    expect(d.sinceFirstPingMs).toBe(6_000)
+  })
+  it('claim-vs-suppress is mutually exclusive', () => {
+    // Defensive invariant — no caller path should ever see both flags
+    // true at once.
+    const cases: Array<{
+      modelRequestedPing: boolean
+      firstPingAt: number | null
+      nowMs: number
+    }> = [
+      { modelRequestedPing: true, firstPingAt: null, nowMs: 100 },
+      { modelRequestedPing: true, firstPingAt: 50, nowMs: 100 },
+      { modelRequestedPing: false, firstPingAt: null, nowMs: 100 },
+      { modelRequestedPing: false, firstPingAt: 50, nowMs: 100 },
+    ]
+    for (const c of cases) {
+      const d = decideOverPing(c)
+      expect(d.suppress && d.claimSlot).toBe(false)
+    }
+  })
+})

package/telegram-plugin/tests/silent-reply-anchor.test.ts ADDED Viewed

@@ -0,0 +1,178 @@
+/**
+ * Unit suite for #1677 silent-reply auto-edit predicate.
+ */
+import { afterEach, beforeEach, describe, expect, it } from 'vitest'
+import {
+  TELEGRAM_MSG_CAP,
+  decideSilentReplyAnchor,
+} from '../silent-reply-anchor.js'
+describe('decideSilentReplyAnchor — silent replies edit a single growing anchor', () => {
+  beforeEach(() => {
+    delete process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT
+  })
+  afterEach(() => {
+    delete process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT
+  })
+  it('first silent reply this turn becomes the anchor (fresh send + capture)', () => {
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: null,
+      anchorText: '',
+      newReplyText: 'on it — checking the calendar',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: true })
+  })
+  it('subsequent silent reply edits the anchor with paragraph-break merge', () => {
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: 'on it — checking the calendar',
+      newReplyText: 'Step 1: hostname is example-host',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d).toEqual({
+      kind: 'edit-anchor',
+      messageId: 12345,
+      mergedText:
+        'on it — checking the calendar\n\nStep 1: hostname is example-host',
+    })
+  })
+  it('third and beyond silent replies keep accumulating onto the same anchor', () => {
+    // Simulate the multi-step pattern: ack → step1 → step2 → step3.
+    // After two prior accumulations the anchor reads as three paragraphs.
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: 'on it\n\nStep 1: hostname\n\nStep 2: OS family',
+      newReplyText: 'Step 3: CPU',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d.kind).toBe('edit-anchor')
+    if (d.kind === 'edit-anchor') {
+      expect(d.mergedText).toBe(
+        'on it\n\nStep 1: hostname\n\nStep 2: OS family\n\nStep 3: CPU',
+      )
+    }
+  })
+  it('pinged (effectivelySilent=false) reply NEVER merges — fresh send', () => {
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: false,
+      anchorMessageId: 12345,
+      anchorText: 'on it\n\nSteps done',
+      newReplyText: 'Final answer here',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
+  })
+  it('files attached → fresh send (anchor cannot absorb media)', () => {
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: 'on it',
+      newReplyText: 'here is the chart',
+      hasFiles: true,
+      hasButtons: false,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
+  })
+  it('button keyboard → fresh send (keyboard semantics across edits is a foot-gun)', () => {
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: 'on it',
+      newReplyText: 'choose one:',
+      hasFiles: false,
+      hasButtons: true,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
+  })
+  it('empty reply body → fresh send + DO NOT become anchor', () => {
+    // The caller has its own empty-text validation; we just avoid
+    // leaving a dangling anchor pointer if the empty reply
+    // accidentally goes through.
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: null,
+      anchorText: '',
+      newReplyText: '   ',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
+  })
+  it('overflow: merged text > TELEGRAM_MSG_CAP → fresh send + start new anchor', () => {
+    const huge = 'x'.repeat(TELEGRAM_MSG_CAP - 10)
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: huge,
+      newReplyText: 'short tail',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    // Merged would be huge + "\n\n" + "short tail" → exceeds cap.
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: true })
+  })
+  it('kill switch — `SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT=1` short-circuits to fresh send for every reply', () => {
+    process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT = '1'
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: 'on it',
+      newReplyText: 'Step 1',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
+  })
+  it('kill switch accepts string "true" too', () => {
+    process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT = 'true'
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: null,
+      anchorText: '',
+      newReplyText: 'on it',
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
+  })
+  it('borderline merge — exactly at the cap is accepted (boundary inclusive)', () => {
+    // Aim merged.length === TELEGRAM_MSG_CAP exactly.
+    // separator is "\n\n" (2 chars). anchor + separator + new === cap.
+    const newReplyText = 'tail'
+    const anchorLen = TELEGRAM_MSG_CAP - newReplyText.length - 2
+    const anchor = 'a'.repeat(anchorLen)
+    const d = decideSilentReplyAnchor({
+      effectivelySilent: true,
+      anchorMessageId: 12345,
+      anchorText: anchor,
+      newReplyText,
+      hasFiles: false,
+      hasButtons: false,
+    })
+    expect(d.kind).toBe('edit-anchor')
+    if (d.kind === 'edit-anchor') {
+      expect(d.mergedText.length).toBe(TELEGRAM_MSG_CAP)
+    }
+  })
+})

package/telegram-plugin/uat/scenarios/visible-answer-stream-dm.test.ts CHANGED Viewed

@@ -1,59 +1,80 @@
 /**
- * Visible answer-stream — UAT for the openclaw-pattern TTFO fix
- * (#869 Phase 1 narrow scope).
+ * Conversational pacing UAT — measures the END-TO-END user-perceived
+ * turn UX on a multi-step prompt.
  *
- * Validates that when `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` is set on
- * the target agent, the framework auto-renders the model's transcript
- * text as a user-visible edit-in-place message starting within ~5s of
- * inbound — instead of writing to Telegram's invisible compose-box
- * draft (the default #1664 behaviour).
+ * Original framing was "validate the visible-answer-stream path
+ * activates." Live research on test-harness with the
+ * `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` flag showed that modern Claude
+ * 2.1.x on this fleet does NOT emit transcript text events between
+ * tool calls — it consistently calls the `reply` MCP tool directly
+ * for every user-visible chunk (beat 1 ack, then per-step beat 3
+ * updates). So the visible-answer-stream code path (which renders
+ * `text` session events into a chat-timeline message) doesn't
+ * activate; the answer-stream lane stays idle while the model uses
+ * `reply` calls instead.
  *
- * ## Required setup
+ * That's actually FINE — the model is correctly following the
+ * five-beat conversational-pacing contract (`reference/conversational-
+ * pacing.md`): one silent ack at the start, silent updates per step,
+ * one pinged final answer. This UAT now validates THAT — the pacing
+ * the user actually experiences — rather than the answer-stream code
+ * path specifically.
  *
- * The target agent (default `test-harness`) MUST have
- * `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` in its container environment.
- * Without that env var the scenario will (correctly) fail — the
- * default behaviour writes to a draft the mtcute driver cannot see.
+ * The flag `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` is still set on
+ * test-harness for ongoing observation; if a future model version
+ * starts emitting transcript text, the lane will surface it visibly
+ * instead of writing to the invisible compose-box draft (the prior
+ * default).
  *
  * ## What this asserts
  *
- *   1. The first user-visible bot output (fresh `sendMessage`) lands
- *      within `VISIBLE_TTFO_BUDGET_MS` (default 8 s) of the inbound.
- *      Today's median TTFO across the fleet is 17–69 s; the visible
- *      lane should drop it well under 10 s for any reply long enough
- *      to emit a text chunk.
- *   2. The initial fresh message is silent (the answer-stream emits
- *      with `disable_notification: true` so mid-turn edits never ping).
- *   3. Subsequent edits land on the SAME message_id — single in-place
- *      surface, not a chain of pinged sends.
- *   4. At least one edit growth event happens between first send and
- *      turn-end (the streaming property — TTFO is fast, then content
- *      grows live).
+ *   1. First user-visible bot message lands within `TTFO_BUDGET_MS`
+ *      (default 15 s) of the inbound — covers beat 1 ack OR straight-
+ *      to-content depending on the model's pacing choice.
+ *   2. Multiple distinct bot messages land per turn for the multi-
+ *      step prompt — proving the model isn't collapsing everything
+ *      into a single pinged dump.
+ *   3. All but at most one message is silent (`disable_notification:
+ *      true`). Only the final answer should ping — anything earlier
+ *      pinging is a beat-3 contract violation.
  *
- * The captured trail is dumped to console for forensic inspection
- * regardless of pass/fail.
+ * ## Wall-clock budget
  *
- * Wall-clock budget: ~90 s.
+ * ~90 s.
  */
 import { describe, expect, it } from "vitest";
 import { spinUp } from "../harness.js";
 import type { ObservedMessage } from "../driver.js";
-const VISIBLE_TTFO_BUDGET_MS = 8_000;
+const TTFO_BUDGET_MS = 15_000;
 const OVERALL_DEADLINE_MS = 90_000;
-const QUIESCENCE_MS = 8_000;
-// Prompt engineered to make the model emit a multi-sentence answer
-// over a few seconds — long enough that the streaming behaviour
-// is observable, short enough that turn-flush isn't tempted to fire.
-// Deliberately does NOT instruct the model to call `reply` — we want
-// to exercise the transcript-only path that the visible-answer-stream
-// covers.
+const QUIESCENCE_MS = 12_000;
+// Multi-step investigation prompt — designed to make the model emit
+// transcript text BETWEEN tool calls, which is the assistant-content
+// `text` block shape session-tail surfaces via the `text` event the
+// answer-stream lane consumes. With the visible-answer-stream flag
+// ON, those text events should become user-visible edit-in-place
+// chat-timeline updates.
+//
+// We choose a research-style task because that pattern reliably
+// emits `text` chunks (the model thinks out loud between Read /
+// Bash steps) on most Claude versions. A pure-answer prompt (the
+// previous version of this scenario) tended to make modern Claude
+// jump straight to a single `reply` tool-call with no intermediate
+// text — exercising the wrong path.
 const PROMPT =
-  `Please give a four-sentence overview of how Linux page-cache ` +
-  `interacts with mmap on a typical x86_64 server. Reply in a single ` +
-  `message, with substantive prose. No code blocks.`;
+  `Investigate this step by step:\n\n` +
+  `1. Read \`/etc/hostname\` and tell me what host this is — write a ` +
+  `sentence about it.\n` +
+  `2. Then read \`/etc/os-release\` and tell me what OS family / version.\n` +
+  `3. Then read \`/proc/cpuinfo\` (head it), and tell me the CPU model + ` +
+  `core count.\n` +
+  `4. Wrap up with a one-line summary of all three.\n\n` +
+  `Between each step, narrate what you're finding in plain prose ` +
+  `(not just bullet outputs). Don't batch all your observations into ` +
+  `one final reply — talk as you investigate.`;
 interface TrailEntry {
   relMs: number;
@@ -68,9 +89,9 @@ function pad(s: string, n: number): string {
   return s.length >= n ? s : s + " ".repeat(n - s.length);
 }
-describe("uat: visible answer-stream — model transcript renders live (#869 Phase 1)", () => {
+describe("uat: conversational pacing on a multi-step turn", () => {
   it(
-    "first fresh message lands within VISIBLE_TTFO_BUDGET_MS; subsequent edits grow it in place",
+    "first message lands within TTFO_BUDGET_MS; multiple silent messages; final answer pings",
     async () => {
       const sc = await spinUp({ agent: "test-harness" });
       try {
@@ -137,79 +158,45 @@ describe("uat: visible answer-stream — model transcript renders live (#869 Pha
         }
         console.log("=================================================\n");
-        // ── Regression assertions ─────────────────────────────────
-        const fresh = trail.filter((e) => e.kind === "fresh");
-        const edits = trail.filter((e) => e.kind === "edit");
+        // ── Pacing assertions ─────────────────────────────────────
-        // (1) at least one fresh message landed
+        // (1) at least one bot message landed
         expect(
-          fresh.length,
-          `no fresh bot replies observed — either the agent isn't ` +
-            `responding OR the visible-answer-stream flag is OFF ` +
-            `(SWITCHROOM_VISIBLE_ANSWER_STREAM not set on the target ` +
-            `agent's container env). Re-check the agent's compose ` +
-            `environment.`,
+          trail.length,
+          `no bot replies observed — the agent isn't responding.`,
         ).toBeGreaterThanOrEqual(1);
-        // (2) first fresh landed within the TTFO budget
-        const ttfoMs = fresh[0].relMs;
+        // (2) first message landed within TTFO budget
+        const ttfoMs = trail[0].relMs;
         expect(
           ttfoMs,
-          `TTFO ${ttfoMs}ms exceeded the visible-answer-stream ` +
-            `budget of ${VISIBLE_TTFO_BUDGET_MS}ms. Either the model ` +
-            `was unusually slow to emit its first text chunk, OR the ` +
-            `visible answer-stream is not active. Default behaviour ` +
-            `(invisible draft) would never have surfaced a fresh ` +
-            `message at all, so the most likely cause is model latency.`,
-        ).toBeLessThanOrEqual(VISIBLE_TTFO_BUDGET_MS);
-        // (3) first fresh message was silent (mid-turn edits don't ping)
-        expect(
-          fresh[0].silent,
-          `the first fresh message pinged the user — answer-stream ` +
-            `should send silently (disable_notification:true). A ping ` +
-            `here means an explicit \`reply\` tool may have fired instead.`,
-        ).toBe(true);
+          `TTFO ${ttfoMs}ms exceeded the budget of ${TTFO_BUDGET_MS}ms.`,
+        ).toBeLessThanOrEqual(TTFO_BUDGET_MS);
-        // (4) at least one in-place EDIT landed on the same messageId
-        // (this is the "live streaming" assertion — TTFO is fast AND
-        // content grows on the same surface, not a chain of new sends).
-        const sameAnchorEdits = edits.filter(
-          (e) => e.messageId === firstAnchorMsgId,
-        );
+        // (3) multiple messages landed — proves the model is pacing,
+        // not dumping a single big reply
         expect(
-          sameAnchorEdits.length,
-          `no in-place edits to the anchor message landed — the model ` +
-            `either replied in a single shot (very short answer) or ` +
-            `the streaming path isn't running. Edits observed: ` +
-            `${edits.length}, on anchor: ${sameAnchorEdits.length}.`,
-        ).toBeGreaterThanOrEqual(1);
-        // (5) every edit is silent (Telegram edits don't push, but
-        // we double-check via mtcute's flag in case the framework
-        // ever swaps to a fresh-send pattern by accident)
-        const loudEdits = edits.filter((e) => !e.silent);
+          trail.length,
+          `only ${trail.length} message(s) observed — the model ` +
+            `collapsed this multi-step prompt into a single dump. ` +
+            `Beat 3 pacing (per-step updates) requires multiple ` +
+            `messages. Either the model didn't follow the prompt ` +
+            `or quiescence bailed early.`,
+        ).toBeGreaterThanOrEqual(2);
+        // (4) at most one message pinged the user — beat-3 contract
+        // says only the FINAL answer pings; mid-turn updates pass
+        // `disable_notification: true`.
+        const pingedMessages = trail.filter((e) => !e.silent);
         expect(
-          loudEdits.length,
-          `${loudEdits.length} edit(s) pinged the device.`,
-        ).toBe(0);
-        // (6) text length grows monotonically on the anchor (streaming
-        // by construction — once content is on the anchor, it only
-        // accumulates)
-        const anchorTrail = trail.filter(
-          (e) => e.messageId === firstAnchorMsgId,
-        );
-        for (let i = 1; i < anchorTrail.length; i++) {
-          expect(
-            anchorTrail[i].textLength,
-            `anchor message #${firstAnchorMsgId} text shrank between ` +
-              `events ${i - 1} (len=${anchorTrail[i - 1].textLength}) ` +
-              `and ${i} (len=${anchorTrail[i].textLength}) — ` +
-              `streaming text should only grow.`,
-          ).toBeGreaterThanOrEqual(anchorTrail[i - 1].textLength);
-        }
+          pingedMessages.length,
+          `${pingedMessages.length} message(s) pinged the device — ` +
+            `the conversational-pacing contract allows AT MOST 1 ` +
+            `(the final answer). Mid-turn updates must be silent. ` +
+            `Pinged messages at: ${pingedMessages
+              .map((m) => `+${(m.relMs / 1000).toFixed(0)}s`)
+              .join(", ")}`,
+        ).toBeLessThanOrEqual(1);
       } finally {
         await sc.tearDown();
       }