npm - switchroom - Versions diffs - 0.15.45 → 0.16.4 - Mend

switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

package/dist/agent-scheduler/index.js +122 -88
package/dist/auth-broker/index.js +463 -177
package/dist/cli/autoaccept-poll.js +4842 -35
package/dist/cli/drive-write-pretool.mjs +17 -14
package/dist/cli/notion-write-pretool.mjs +117 -86
package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
package/dist/cli/self-improve-stop.mjs +428 -0
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +3158 -1178
package/dist/host-control/main.js +2833 -355
package/dist/vault/approvals/kernel-server.js +7479 -7439
package/dist/vault/broker/server.js +11312 -11272
package/examples/minimal.yaml +1 -0
package/examples/switchroom.yaml +1 -0
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +88 -1
package/profiles/_shared/execution-discipline.md.hbs +18 -0
package/profiles/default/CLAUDE.md.hbs +0 -19
package/telegram-plugin/.claude-plugin/plugin.json +2 -2
package/telegram-plugin/answer-stream-flag.ts +12 -49
package/telegram-plugin/answer-stream.ts +5 -150
package/telegram-plugin/auth-snapshot-format.ts +280 -48
package/telegram-plugin/auto-fallback-fleet.ts +44 -1
package/telegram-plugin/context-exhaustion.ts +12 -0
package/telegram-plugin/demo-mask.ts +154 -0
package/telegram-plugin/dist/bridge/bridge.js +167 -124
package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
package/telegram-plugin/dist/server.js +215 -172
package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
package/telegram-plugin/draft-stream.ts +47 -410
package/telegram-plugin/final-answer-detect.ts +17 -12
package/telegram-plugin/fleet-fallback-resume.ts +131 -0
package/telegram-plugin/format.ts +56 -19
package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
package/telegram-plugin/gateway/auth-command.ts +70 -14
package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
package/telegram-plugin/gateway/current-turn-map.ts +188 -0
package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
package/telegram-plugin/gateway/effort-command.ts +8 -3
package/telegram-plugin/gateway/emission-authority.ts +369 -0
package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
package/telegram-plugin/gateway/gateway.ts +1837 -291
package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
package/telegram-plugin/gateway/represent-guard.ts +72 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
package/telegram-plugin/gateway/status-surface-log.ts +14 -3
package/telegram-plugin/history.ts +33 -11
package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
package/telegram-plugin/issues-card.ts +4 -0
package/telegram-plugin/model-unavailable.ts +124 -0
package/telegram-plugin/narrative-dedup.ts +69 -0
package/telegram-plugin/over-ping-safety-net.ts +70 -4
package/telegram-plugin/package.json +3 -3
package/telegram-plugin/pending-work-progress.ts +12 -0
package/telegram-plugin/permission-rule.ts +32 -5
package/telegram-plugin/permission-title.ts +152 -9
package/telegram-plugin/quota-check.ts +13 -0
package/telegram-plugin/quota-watch.ts +135 -7
package/telegram-plugin/registry/turns-schema.test.ts +24 -0
package/telegram-plugin/registry/turns-schema.ts +9 -0
package/telegram-plugin/runtime-metrics.ts +13 -0
package/telegram-plugin/session-tail.ts +96 -11
package/telegram-plugin/silence-poke.ts +170 -24
package/telegram-plugin/slot-banner-driver.ts +3 -0
package/telegram-plugin/status-no-truncate.ts +44 -0
package/telegram-plugin/status-reactions.ts +20 -3
package/telegram-plugin/stream-controller.ts +4 -23
package/telegram-plugin/stream-reply-handler.ts +6 -24
package/telegram-plugin/streaming-metrics.ts +91 -0
package/telegram-plugin/subagent-watcher.ts +212 -66
package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
package/telegram-plugin/tests/answer-stream.test.ts +2 -411
package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
package/telegram-plugin/tests/demo-mask.test.ts +127 -0
package/telegram-plugin/tests/draft-stream.test.ts +0 -827
package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
package/telegram-plugin/tests/feed-survival.test.ts +526 -0
package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
package/telegram-plugin/tests/history.test.ts +60 -0
package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
package/telegram-plugin/tests/permission-rule.test.ts +17 -0
package/telegram-plugin/tests/permission-title.test.ts +206 -17
package/telegram-plugin/tests/quota-watch.test.ts +252 -9
package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
package/telegram-plugin/tests/represent-guard.test.ts +162 -0
package/telegram-plugin/tests/session-tail.test.ts +147 -3
package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
package/telegram-plugin/tests/telegram-format.test.ts +101 -6
package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
package/telegram-plugin/tests/tool-labels.test.ts +67 -0
package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
package/telegram-plugin/tests/welcome-text.test.ts +32 -3
package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
package/telegram-plugin/tool-activity-summary.ts +375 -58
package/telegram-plugin/turn-liveness-floor.ts +240 -0
package/telegram-plugin/uat/assertions.ts +115 -0
package/telegram-plugin/uat/driver.ts +68 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
package/telegram-plugin/welcome-text.ts +13 -1
package/telegram-plugin/worker-activity-feed.ts +157 -82
package/telegram-plugin/draft-transport.ts +0 -122
package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
package/telegram-plugin/tests/draft-transport.test.ts +0 -211

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -75,7 +75,8 @@ import {
 } from './permission-timeout.js'
 import { pickRecoveredPermissionOrigin } from './permission-card-origin.js'
 import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
-import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
+import { appendActivityLabel, clipNarrative, renderActivityFeedWithNested, type SessionActivityHeader } from '../tool-activity-summary.js'
+import { REPLY_TOOLS, isDraftOfReply } from '../narrative-dedup.js'
 import { toolLabel } from '../tool-labels.js'
 import { createTypingWrapper } from '../typing-wrap.js'
 import { type DraftStreamHandle } from '../draft-stream.js'
@@ -97,15 +98,16 @@ import {
   shutdownAnalytics,
 } from '../analytics-posthog.js'
 import { emitRuntimeMetric } from '../runtime-metrics.js'
-import { decideOverPing } from '../over-ping-safety-net.js'
+import { decideOverPing, type OverPingDecision } from '../over-ping-safety-net.js'
 import { decideSilentReplyAnchor } from '../silent-reply-anchor.js'
 import { classifyInbound } from '../inbound-classifier.js'
 import * as silencePoke from '../silence-poke.js'
 import * as pendingProgress from '../pending-work-progress.js'
 import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
-import { isFinalAnswerReply, isSubstantiveFinalReply } from '../final-answer-detect.js'
+import { isFinalAnswerReply, isSubstantiveFinalReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
+import { deriveTurnRole, decideTerminalReason, parsePostAnswerLivenessMs, evaluatePostAnswerLiveness, type LoopRole } from '../turn-liveness-floor.js'
 import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
-import { parseVisibleAnswerStreamEnabled, parseDraftLaneRetiredEnabled, resolveAnswerLaneConfig } from '../answer-stream-flag.js'
+import { parseVisibleAnswerStreamEnabled, resolveAnswerLaneConfig } from '../answer-stream-flag.js'
 import { type SessionEvent } from '../session-tail.js'
 import {
   shouldSuppressToolActivity,
@@ -132,6 +134,7 @@ import {
 } from './microsoft-connect-flow.js'
 import { resolveAuthBrokerSocketPath } from '../../src/auth/broker/client.js'
 import { createFleetFallbackGate } from '../fleet-fallback-gate.js'
+import { createFleetFallbackResumeGate } from '../fleet-fallback-resume.js'
 import { resolveExhaustUntil } from './exhaust-until.js'
 import {
   pendingAuthAddFlows,
@@ -165,7 +168,7 @@ import {
   formatModelUnavailableCard,
   resolveModelUnavailableFromOperatorEvent,
 } from '../model-unavailable.js'
-import { runFleetAutoFallback, renderFallbackFailureNotice, evaluateFallbackFailureNotice, type FallbackFailureNoticeState } from '../auto-fallback-fleet.js'
+import { runFleetAutoFallback, renderFallbackFailureNotice, evaluateFallbackFailureNotice, evaluateAllBlockedNotice, type FallbackFailureNoticeState, type FallbackAllBlockedNoticeState } from '../auto-fallback-fleet.js'
 import { startRestartWatchdog } from './restart-watchdog.js'
 import { validateStringArray } from './access-validator.js'
@@ -221,6 +224,7 @@ import {
   isContextExhaustionText,
   shouldArmOrphanedReplyTimeout,
   ORPHANED_REPLY_TIMEOUT_MS,
+  ORPHANED_REPLY_MAX_REARMS,
 } from '../context-exhaustion.js'
 import {
   decideTurnFlush,
@@ -326,11 +330,24 @@ import {
 } from './obligation-ledger.js'
 import { loadObligations, persistObligations } from './obligation-store.js'
 import { driveEscalation } from './escalation-drive.js'
+import { shouldSuppressRepresent } from './represent-guard.js'
 import { createInboundSpool } from './inbound-spool.js'
 import { purgeStaleTurnsForChat } from './turn-state-purge.js'
 import { decideInboundDelivery } from './inbound-delivery-gate.js'
 import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
 import { decideFeedReopen } from './feed-reopen-gate.js'
+import {
+  mayOpenActivityCard,
+  computeCrossTurnAnswerDelivered,
+  type FeedOpenProducer,
+  type FeedOpenGateDeps,
+} from './feed-open-gate.js'
+import {
+  EmissionAuthority,
+  EMISSION_AUTHORITY_ENABLED,
+  type CardDrainGateCtx,
+} from './emission-authority.js'
+import { CurrentTurnMap } from './current-turn-map.js'
 import { resolveAnswerThreadId } from './answer-thread-resolve.js'
 import {
   createDeliveryQueue,
@@ -411,6 +428,7 @@ import {
   // preceding shutdown only" semantics.
   clearCleanShutdownMarker,
   shouldSuppressRecoveryBanner,
+  shouldSuppressBootResume,
   resolveShutdownMarker,
   DEFAULT_MAX_AGE_MS as CLEAN_SHUTDOWN_MAX_AGE_MS,
 } from './clean-shutdown-marker.js'
@@ -468,8 +486,10 @@ import {
   resolveQuotaWatchTuning,
   buildQuotaClaimKey,
   QUOTA_WATCH_CLAIM_WINDOW_MS,
+  isLiveCorroboration,
 } from '../quota-watch.js'
 import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
+import { maskUsername, maskVaultKey } from '../demo-mask.js'
 import {
   writeTurnActiveMarker,
   touchTurnActiveMarker,
@@ -739,20 +759,6 @@ const AGENT_ADMIN = process.env.SWITCHROOM_AGENT_ADMIN === 'true'
 const bot = new Bot(TOKEN)
 installTgPostLogger(bot)
-// Draft-answer-lane retirement (2026-06-05): default RETIRED so the live answer
-// lane uses a real, mtcute-observable message instead of the invisible
-// compose-box draft. Declared HERE (above the boot-probe block) because
-// `sendMessageDraftFn` below reads it — keep it above its first use to avoid a
-// temporal-dead-zone ReferenceError at boot. Kill switch
-// SWITCHROOM_DRAFT_ANSWER_LANE=0 restores the legacy draft.
-const DRAFT_ANSWER_LANE_RETIRED = parseDraftLaneRetiredEnabled(process.env.SWITCHROOM_DRAFT_ANSWER_LANE)
-// ─── sendMessageDraft boot probe ──────────────────────────────────────────
-// grammY 1.x exposes all Telegram Bot API methods through bot.api.raw.
-// bot.api.sendMessageDraft (the typed wrapper) takes chat_id as number, but
-// answer-stream passes chatId as string, so we bridge through raw with an
-// explicit Number() cast and positional → object param translation.
-const _rawSendMessageDraft = (bot.api.raw as unknown as Record<string, unknown>).sendMessageDraft
 const GRAMMY_VERSION: string = (() => {
   try {
     const raw = readFileSync(new URL('../../node_modules/grammy/package.json', import.meta.url), 'utf8')
@@ -761,22 +767,6 @@ const GRAMMY_VERSION: string = (() => {
     return 'unknown'
   }
 })()
-const sendMessageDraftFn: (
-  (chatId: string, draftId: number, text: string, params?: { message_thread_id?: number; parse_mode?: 'HTML' }) => Promise<unknown>
-) | undefined =
-  // When the draft lane is retired (default), force this undefined so BOTH
-  // consumers (the answer-stream config + the stream_reply handler) drop the
-  // draft transport and fall back to visible message transport — the single
-  // chokepoint for the retirement.
-  !DRAFT_ANSWER_LANE_RETIRED && typeof _rawSendMessageDraft === 'function'
-    ? (chatId, draftId, text, params) =>
-        (_rawSendMessageDraft as (args: Record<string, unknown>) => Promise<unknown>)({
-          chat_id: Number(chatId),
-          draft_id: draftId,
-          text,
-          ...(params ?? {}),
-        })
-    : undefined
 // ─── sendChecklist / editMessageChecklist boot probes ─────────────────────
 // grammY 1.x exposes new Telegram Bot API methods via bot.api.raw before the
@@ -1157,43 +1147,70 @@ try {
   const pending = findLatestTurnIfInterrupted(turnsDb)
   const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
   if (pending != null && selfAgent) {
-    // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
-    // interrupted work older than RESUME_MAX_AGE_MS — selectResumeBuilder
-    // downgrades a stale 'resume' to the passive 'report' so the user is told
-    // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
-    // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
-    const RESUME_MAX_AGE_MS = (() => {
-      const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
-      return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
-    })()
-    const kind = selectResumeBuilder(pending.ended_via, {
-      ageMs: Math.max(0, Date.now() - pending.started_at),
-      maxAgeMs: RESUME_MAX_AGE_MS,
+    // Clean-shutdown gate: suppress auto-resume when the prior shutdown was
+    // operator/roll/CLI-initiated (clean). A clean-shutdown marker present and
+    // fresh means the agent was asked to stop; the "interrupted" turn was
+    // abandoned by that decision. Replaying it on every planned restart wastes
+    // subscription quota for no user benefit. Only unclean exits (crash/OOM/
+    // unexpected kill) should auto-resume.
+    //
+    // NOTE: GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH is defined lower in this file
+    // (module-init order); we compute the path inline here using the same
+    // formula so we can read it at boot-resume time.
+    // SWITCHROOM_BOOT_RESUME_ALWAYS=1 is an escape hatch that restores
+    // unconditional resume if needed.
+    const bootResumeMarkerPath =
+      process.env.SWITCHROOM_GATEWAY_CLEAN_SHUTDOWN_MARKER ?? join(STATE_DIR, 'clean-shutdown.json')
+    const bootResumeCleanMarker = readCleanShutdownMarker(bootResumeMarkerPath)
+    const bootResumeForceAlways = process.env.SWITCHROOM_BOOT_RESUME_ALWAYS === '1'
+    const bootResumeSuppressed = shouldSuppressBootResume(bootResumeCleanMarker, Date.now(), {
+      forceAlways: bootResumeForceAlways,
     })
-    if (kind === 'resume') {
-      bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
-    } else if (kind === 'report') {
-      // idleMs: this boot's measured marker age if it just classified this
-      // turn; otherwise recover it from the persisted interrupt_reason (a
-      // later boot, marker already swept); else fall back to total runtime.
-      let idleMs = pending.turn_key === timeoutTurnKey && markerAgeMs != null ? markerAgeMs : null
-      if (idleMs == null && pending.interrupt_reason) {
-        try {
-          const parsed = JSON.parse(pending.interrupt_reason) as { idleMs?: unknown }
-          if (typeof parsed.idleMs === 'number' && Number.isFinite(parsed.idleMs)) idleMs = parsed.idleMs
-        } catch { /* malformed snapshot — fall through */ }
-      }
-      if (idleMs == null) idleMs = Math.max(0, Date.now() - pending.started_at)
-      bootResumeInbound = {
-        agent: selfAgent,
-        msg: buildResumeWatchdogReportInbound({ turn: pending, idleMs }),
-      }
-    }
-    if (bootResumeInbound != null) {
+    if (bootResumeSuppressed) {
       process.stderr.write(
-        `telegram gateway: boot-resume queued kind=${kind} turnKey=${pending.turn_key} ` +
-        `endedVia=${pending.ended_via ?? 'open'} chat=${pending.chat_id}\n`,
+        `telegram gateway: boot-resume suppressed (clean shutdown` +
+        `${bootResumeCleanMarker?.reason ? ` reason=${JSON.stringify(bootResumeCleanMarker.reason)}` : ''}` +
+        `) — unclean exits still resume turnKey=${pending.turn_key}\n`,
       )
+    } else {
+      // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
+      // interrupted work older than RESUME_MAX_AGE_MS — selectResumeBuilder
+      // downgrades a stale 'resume' to the passive 'report' so the user is told
+      // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
+      // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
+      const RESUME_MAX_AGE_MS = (() => {
+        const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
+        return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
+      })()
+      const kind = selectResumeBuilder(pending.ended_via, {
+        ageMs: Math.max(0, Date.now() - pending.started_at),
+        maxAgeMs: RESUME_MAX_AGE_MS,
+      })
+      if (kind === 'resume') {
+        bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
+      } else if (kind === 'report') {
+        // idleMs: this boot's measured marker age if it just classified this
+        // turn; otherwise recover it from the persisted interrupt_reason (a
+        // later boot, marker already swept); else fall back to total runtime.
+        let idleMs = pending.turn_key === timeoutTurnKey && markerAgeMs != null ? markerAgeMs : null
+        if (idleMs == null && pending.interrupt_reason) {
+          try {
+            const parsed = JSON.parse(pending.interrupt_reason) as { idleMs?: unknown }
+            if (typeof parsed.idleMs === 'number' && Number.isFinite(parsed.idleMs)) idleMs = parsed.idleMs
+          } catch { /* malformed snapshot — fall through */ }
+        }
+        if (idleMs == null) idleMs = Math.max(0, Date.now() - pending.started_at)
+        bootResumeInbound = {
+          agent: selfAgent,
+          msg: buildResumeWatchdogReportInbound({ turn: pending, idleMs }),
+        }
+      }
+      if (bootResumeInbound != null) {
+        process.stderr.write(
+          `telegram gateway: boot-resume queued kind=${kind} turnKey=${pending.turn_key} ` +
+          `endedVia=${pending.ended_via ?? 'open'} chat=${pending.chat_id}\n`,
+        )
+      }
     }
   }
@@ -1425,6 +1442,21 @@ const activeTurnStartedAt = new Map<string, number>()
 // reading activeTurnStartedAt because they want the receipt timestamp.
 const claudeBusyKeys = new Set<string>()
+/**
+ * #2527 observability: count emoji transitions per status-reaction controller
+ * so `turn_no_reply_warn` can report how many reaction changes happened while
+ * producing zero text. Keyed by statusKey(chatId, threadId); cleared in
+ * purgeReactionTracking alongside the controller itself.
+ */
+const reactionTransitionCounts = new Map<string, number>()
+/**
+ * #2527 observability: tracks which (chatId:threadId) keys have already emitted
+ * a `turn_reply_timing` event this turn so we only fire it on the FIRST text
+ * reply. Cleared in purgeReactionTracking at turn-end alongside the controller.
+ */
+const firstTextReplyLogged = new Set<string>()
 /**
  * Helper: stamp a claudeBusyKeys entry for an inbound about to be
  * handed to claude. Pulls the thread id from the top-level field if
@@ -1487,6 +1519,19 @@ const deliveryQueue = createDeliveryQueue<InboundMessage>()
 // SWITCHROOM_OBLIGATION_LEDGER=0 → every hook below is a no-op → zero change.
 const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== '0'
 const OBLIGATION_REPRESENT_MAX = 2
+// Minimum reply length (chars) the duplicate-represent guard (#2472/#2474) treats
+// as "the user was answered". DECOUPLED from the escalate branch's 200-char proxy:
+// for the represent guard ANY genuine assistant reply — even a terse "Yes — done."
+// — satisfies the obligation, so suppressing the duplicate re-ask must not require
+// 200 chars. Default 1 (any non-empty real reply; empty/whitespace rows are
+// clamped out inside hasOutboundDeliveredSince). Override via env for tuning. Safe
+// because only recordOutbound writes role='assistant' rows — progress-card edits
+// and typing indicators are never counted.
+const OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS = (() => {
+  const raw = process.env.SWITCHROOM_OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS
+  const n = raw != null ? Number.parseInt(raw, 10) : NaN
+  return Number.isFinite(n) && n >= 1 ? n : 1
+})()
 const OBLIGATION_SWEEP_MS = 5_000
 // Bound on escalation SEND attempts. The escalation now closes only AFTER a
 // successful send (a transient failure stays OPEN and retries next sweep), so a
@@ -1710,6 +1755,38 @@ const FEED_REOPEN_AFTER_ACK_ENABLED =
 const FEED_HEARTBEAT_ENABLED = process.env.SWITCHROOM_FEED_HEARTBEAT !== '0'
 const FEED_HEARTBEAT_TICK_MS = 6_000
 const FEED_HEARTBEAT_MIN_STALE_MS = 6_000
+// Liveness-driven feed open. The activity feed is otherwise TOOL-driven — it
+// opens only when a tool emits a non-null label. A turn dominated by thinking
+// or by suppressed-by-design tools (typing / memory recall / reply) emits no
+// label, so the feed never opens and a long turn reads as pure silence until
+// the 300s silence-poke (the #680 dark-turn). When a turn has been alive >=
+// FEED_LIVENESS_OPEN_MS with no feed yet, open a minimal "Working…" feed so the
+// user always has a live indicator; the first real tool label edits it with
+// real content. Runs on the heartbeat interval, so the effective open lands in
+// [threshold, threshold + FEED_HEARTBEAT_TICK_MS). Kill switch:
+// SWITCHROOM_FEED_LIVENESS_OPEN=0. Default on.
+const FEED_LIVENESS_OPEN_ENABLED = process.env.SWITCHROOM_FEED_LIVENESS_OPEN !== '0'
+const FEED_LIVENESS_OPEN_MS = (() => {
+  const raw = process.env.SWITCHROOM_FEED_LIVENESS_OPEN_MS
+  const n = raw ? Number(raw) : NaN
+  return Number.isFinite(n) && n > 0 ? n : 12_000
+})()
+// Post-answer background-agent liveness STALENESS CAP (Fix 2 / #2587 supersede,
+// concern 3). The `feedHeartbeatTick` post-answer branch re-renders a "background
+// agent still working" card every FEED_HEARTBEAT_TICK_MS while the sub-agent
+// watcher keeps advancing `turn.subagentActivityAt`. Without a cap that card kept
+// emitting `state:'running'` with an ever-climbing `elapsed` FOREVER — even after
+// the worker's `onFinish` froze the timestamp — because (unlike the pre-answer
+// path's `FEED_LIVENESS_OPEN_MS` recency cap) the post-answer branch had no
+// staleness bound. This cap mirrors that pre-answer pattern: once the worker's
+// last advance is older than the cap, the heartbeat stops re-rendering and the
+// card freezes at its last state. Parsed via the same pure `parsePostAnswerLivenessMs`
+// helper (positive int or 0); `|| 30_000` supplies a default-ON 30s cap, so an
+// unset env keeps the cap active. Override with SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS.
+const POST_ANSWER_LIVENESS_STALE_MS = parsePostAnswerLivenessMs(
+  process.env.SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS,
+) || 30_000
 /** Compact mm/ss-ish elapsed for the live feed suffix: "18s", "1m05s". */
 function formatFeedElapsed(ms: number): string {
@@ -1884,6 +1961,27 @@ type CurrentTurn = {
   sourceMessageId: number | null
   startedAt: number
   gatewayReceiveAt: number
+  // #2527 — the single turn-provenance discriminator, stamped once at
+  // enqueue from the channel envelope `source`. `user` (a human is waiting:
+  // never-silent guarantee + mid-turn floor), `system` (cron/scheduled:
+  // silence is legitimate). The gateway never builds a turn atom for a
+  // sub-agent, so `sub-agent` never appears here. Read by the mid-turn floor
+  // eligibility and the role-aware terminal reaction gate. Replaces the
+  // scattered `chatType`/`chatId==null`/`source==='cron'` predicates.
+  role: LoopRole
+  // PR1 (cross-turn stale-card guard, design `docs/message-emission-determinism.md`
+  // §9 lever 4 / race C/D). Present ONLY when this turn is a cross-turn SYNTHETIC
+  // surface whose card OPEN must be gated against an answer already delivered in
+  // an EARLIER turn — i.e. an `obligation_represent` re-delivery (and the
+  // liveness/heartbeat timer firing on it). `sinceMs` is the obligation's
+  // `openedAt` — the moment the obligation was RAISED — so the card-OPEN gate
+  // asks `hasOutboundDeliveredSince(chat, openedAt)`: did a substantive answer
+  // already land since then? Stamped at the turn ctor from a pending marker that
+  // `obligationSweep` writes when it pushes the represent inbound (see
+  // `pendingCrossTurnGate`). `undefined` for a normal foreground turn → the
+  // cross-turn lever-4 gate is inert there (the foreground turn's own card is
+  // governed only by the per-turn `finalAnswerEverDelivered` latch, lever 1).
+  crossTurnGate?: { sinceMs: number }
   replyCalled: boolean
   // #1664 — whether the model has delivered its *final answer* this turn
   // (as opposed to only an interim ack). `replyCalled` flips on the first
@@ -1912,6 +2010,18 @@ type CurrentTurn = {
   // Reset to false on every fresh-turn enqueue alongside
   // `finalAnswerDelivered`.
   finalAnswerSubstantive: boolean
+  // Sticky "a substantive final answer has been delivered this turn" latch
+  // (design `docs/message-emission-determinism.md` §9 preamble / R0). Distinct
+  // from the MUTABLE `finalAnswerDelivered`, which the ack-reopen path clears
+  // mid-turn (`feed-reopen-gate.ts:157`) so an "On it…" ack keeps a live feed
+  // (#2141). Ordering gates (the no-OPEN-after-final card gate, lever 1) MUST
+  // key on this sticky latch, not the mutable flag — keying on the mutable flag
+  // is a no-op on exactly the ack-first turn where the reorder originates. Set
+  // true ONLY at the points that set `finalAnswerDelivered = true` AND only when
+  // the reply is `isSubstantiveFinalReply`; NEVER cleared by reopen. Reset to
+  // false ONLY at turn start, mirroring `activityEverOpened`'s sticky-true
+  // contract.
+  finalAnswerEverDelivered: boolean
   // #1675 (over-ping safety net): wall-clock ms of the first reply
   // this turn that landed with `disable_notification: false` (a real
   // device ping). The conversational-pacing contract
@@ -1923,6 +2033,18 @@ type CurrentTurn = {
   // the framework. Null until the first ping lands. Reset on every
   // fresh-turn enqueue.
   firstPingAt: number | null
+  // Notification ownership (R8 / PR-2 — design `docs/message-emission-
+  // determinism.md` §over-ping). Whether the send that CLAIMED this turn's
+  // ping slot (`firstPingAt`) was itself a *substantive* final answer
+  // (`isSubstantiveFinalReply`) as opposed to a short interim ACK. The
+  // over-ping safety net keys on this so a substantive answer pinging AFTER
+  // an ack already pinged is UPGRADED (let through, owns the slot) rather
+  // than silenced — otherwise "the reply is last but the phone never buzzed
+  // for the answer." Set ATOMICALLY with `firstPingAt` (same synchronous
+  // block, no await between) on a claim/upgrade so a racing second reply
+  // reads a consistent pair. Init false; reset to false on every fresh-turn
+  // enqueue alongside `firstPingAt`.
+  firstPingWasSubstantive: boolean
   // #1677 silent-reply auto-edit. The first silent reply of a turn
   // captures `silentAnchorMessageId` + `silentAnchorText`; subsequent
   // silent replies in the SAME turn editMessageText that anchor
@@ -1935,6 +2057,13 @@ type CurrentTurn = {
   silentAnchorText: string
   capturedText: string[]
   orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
+  // How many times the orphaned-reply backstop timer has been re-armed
+  // mid-tool-call instead of firing a synthetic turn_end. Bounded so a
+  // genuinely wedged single long-running tool still surfaces: the cap is
+  // ORPHANED_REPLY_MAX_REARMS (20 × 30 s = 10 min of genuine tool activity).
+  // Reset to 0 on a fresh enqueue; NOT reset on text/tool_label re-arms —
+  // only a new turn resets the budget.
+  orphanedReplyRearmCount: number
   // Component 3 (turn-origin reply routing). A stable per-turn identity,
   // `${registryKey-or-chatKey}#${startedAt}`, assigned when the turn
   // starts and stamped into the inbound meta (`origin_turn_id`) so a reply
@@ -1962,6 +2091,14 @@ type CurrentTurn = {
   // Phase 1 of #332: count of tool_use events in the current turn, for
   // the tool_call_count column in the turns registry.
   toolCallCount: number
+  // Count of tool_label events that passed the isTelegramSurfaceTool guard
+  // this turn — the deterministic, surface-tool-excluded step count used by
+  // the `✓ N steps` activity-feed total and the `tools=` lifecycle log.
+  // Incremented in `case 'tool_label':` AFTER the surface-tool guard so
+  // reply/stream_reply/edit_message/react are never counted. send_typing and
+  // sync_retain are suppressed at the hook (computeLabel returns null) and
+  // never arrive as tool_label events — excluded automatically.
+  labeledToolCount: number
   // Tool-activity summary — mirrors Claude Code's native chat-UI
   // rendering ("Ran 5 commands, read a file"). Counters are
   // incremented in `case 'tool_use'`; `activityMessageId` holds the
@@ -2002,11 +2139,45 @@ type CurrentTurn = {
   // step that emits no new label doesn't read as frozen (the feed is otherwise
   // pull-only). undefined until the first label of the turn renders.
   lastToolLabelAt?: number
+  // Fix 2 (post-answer background-agent liveness): wall-clock timestamp last
+  // updated by the sub-agent/workflow watcher's onProgress callback whenever
+  // it surfaces a NEW sub-agent step AFTER this turn's substantive answer was
+  // delivered. Written INDEPENDENTLY of the tool_label path so the drop-guard
+  // (`shouldReopenFeedAfterAck` / `finalAnswerSubstantive`) cannot gate it.
+  // `feedHeartbeatTick` reads THIS (not `lastToolLabelAt`, which is frozen by
+  // the drop-guard) to drive the post-answer liveness card — the core fix for
+  // #2587's inert state. undefined until the first post-answer watcher advance.
+  subagentActivityAt?: number
+  // Sticky wall-clock timestamp when finalAnswerEverDelivered first latched
+  // true this turn. Allows the heartbeat to distinguish "tool label arrived
+  // before the answer" (lastToolLabelAt ≤ finalAnswerDeliveredAt, inert) from
+  // "sub-agent active after the answer" (subagentActivityAt >
+  // finalAnswerDeliveredAt, liveness card warranted). undefined until the
+  // first substantive final answer of the turn.
+  finalAnswerDeliveredAt?: number
   // Accumulating friendly-action feed for this turn. Each non-surface
   // tool_label appends a line via `appendActivityLabel`; the feed renders
   // (via `renderActivityFeed`) as a capped chronological list into the
   // in-place edited activity message and clears on reply. Reset per turn.
   mirrorLines: string[]
+  // Narrative-dedup gate state (JSONL-text-narrative primitive). A `text`
+  // block is held here for ONE lookahead step so the next event (a tool_use
+  // or turn_end) can decide draft-then-send (SUPPRESS, it duplicates the
+  // reply) vs working-narration (SHOW it as a transient mirrorLines step).
+  // Null when nothing is pending. The pure decision lives in
+  // narrative-dedup.ts; this slot is the per-turn cursor. Reset per turn.
+  // Invariant `chat-is-the-single-source-of-truth`: a SHOWN narrative is
+  // rendered through the SAME appendActivityLabel→renderStepFeed path as a
+  // tool step — a transient, clipped, rolling-window line replaced by the
+  // next event, never a persisted parallel mirror.
+  pendingNarrative: { text: string } | null
+  // Most-recently-seen reply/stream_reply `input.text` for this turn — the
+  // ACTUAL delivered answer surface. Set wherever a REPLY_TOOL tool_use is
+  // handled in the reducer. `flushPendingNarrativeAtTurnEnd` compares a
+  // trailing narrative block against THIS (not capturedText.join(''), which
+  // can mis-suppress when the model emits the same short string twice in a
+  // turn). Empty string until the turn delivers a reply. Reset per turn.
+  lastReplyText: string
   // Model A — foreground sub-agent nesting. A foreground sub-agent (Task/Agent
   // with no run_in_background) runs INSIDE this turn while the parent blocks at
   // the Task tool, so its live steps nest under the parent's activity feed
@@ -2019,9 +2190,169 @@ type CurrentTurn = {
   // gates on minInitialChars). Materialized and cleared at turn_end.
   answerStream: AnswerStreamHandle | null
   isDm: boolean
+  // PR-4a (message-emission-determinism, `emission-authority.ts`). The
+  // per-foreground-turn emission-authority façade the foreground-lane card/ping
+  // emission call sites route through. Constructed ONCE per turn in the ctor
+  // with the chat/thread key passed in explicitly (the PR-4e seam). Per-turn
+  // only — a fresh `CurrentTurn` literal gets a fresh façade, so it never
+  // persists across turns. Optional in the type so the bounded recently-ended
+  // registry's older entries (and any hand-built test turn) tolerate its
+  // absence; `emissionAuthorityFor` lazily backfills one when missing.
+  emissionAuthority?: EmissionAuthority
+}
+// PR-4e — the singleton `currentTurn` is RETAINED as (a) the flag-OFF store and
+// (b) the flag-ON "most-recent-set" MIRROR. Every GLOBAL-liveness read in this
+// file (`isBusy`, the `if (currentTurn != null) return` poke guards, the
+// orphaned-reply guard, the synchronous-to-live-turn `const turn = currentTurn`
+// captures) keeps reading this variable, so under the sequential-CLI invariant
+// (the most-recently-set turn IS the live turn) those reads stay byte-identical.
+// The per-topic isolation lives in `currentTurnMap.byKey`: a LATE async event
+// captured for topic A resolves A's authority by ITS OWN key even after the live
+// turn flipped to topic B (see current-turn-map.ts). Under the flag OFF the map
+// is never written and this is exactly the old singleton.
+let currentTurn: CurrentTurn | null = null
+const currentTurnMap = new CurrentTurnMap<CurrentTurn>()
+/**
+ * Set the live turn for `key`. Flag-branches in ONE place (inside the map):
+ * flag-OFF assigns the singleton only; flag-ON sets the per-topic entry AND
+ * updates the most-recent mirror. We keep the module-scope `currentTurn`
+ * variable in lock-step with the map's mirror so the 140 unchanged global reads
+ * see the same value.
+ */
+function setCurrentTurn(turn: CurrentTurn, key: string): void {
+  currentTurnMap.set(turn, key)
+  currentTurn = currentTurnMap.get() // mirror most-recent-set (== `turn`)
 }
-let currentTurn: CurrentTurn | null = null
+/**
+ * End (delete) the live turn for `key`, iff `key` still maps to `turn`. Routes
+ * the clear through the keyed accessor (leak-close-at-origin) and re-syncs the
+ * module-scope mirror to the map's mirror.
+ */
+function endCurrentTurnForKey(turn: CurrentTurn, key: string): boolean {
+  const ended = currentTurnMap.endTurnForKey(turn, key)
+  currentTurn = currentTurnMap.get() // re-sync mirror (null iff it pointed at turn)
+  return ended
+}
+/**
+ * Clear the ENTIRE per-topic store + mirror (disconnect-flush / bridge-died:
+ * every entry is a ghost).
+ */
+function clearAllCurrentTurns(): void {
+  currentTurnMap.clearAll()
+  currentTurn = null
+}
+/**
+ * Is `turn` still the live turn FOR ITS OWN topic? Flag-OFF: `currentTurn ===
+ * turn` (the ambient check, verbatim). Flag-ON: `byKey.get(turn'sKey) === turn`,
+ * so a B-flip never falsifies A's own liveness. The callsites keep the literal
+ * `currentTurn === turn` in source (the silence-liveness-wiring oracle) by
+ * inlining the flag-OFF branch and delegating the flag-ON branch here.
+ */
+function turnLiveForItsTopic(turn: CurrentTurn): boolean {
+  return currentTurnMap.isLiveForKey(
+    turn,
+    statusKey(turn.sessionChatId, turn.sessionThreadId),
+  )
+}
+/**
+ * Accessor for a turn's per-foreground-turn emission-authority façade (PR-4a).
+ * Returns the façade constructed at the turn ctor; lazily backfills one (keyed
+ * on the turn's chat/thread) for any turn that predates the field or was built
+ * outside the ctor. Per-turn: the memoized instance lives on the turn object,
+ * so it is discarded with the turn and never persists across turns.
+ */
+function emissionAuthorityFor(turn: CurrentTurn): EmissionAuthority {
+  if (turn.emissionAuthority == null) {
+    turn.emissionAuthority = new EmissionAuthority(
+      statusKey(turn.sessionChatId, turn.sessionThreadId),
+    )
+  }
+  // PR-4b — CENTRALIZE the OPEN-gate wiring here, the single accessor every
+  // routed call site already funnels through, so all 6 `openOrEditCard(...)`
+  // sites stay byte-identical `(producer, apply)`. The façade reads the LIVE
+  // turn view (a thunk — the card id / latch / tool-count mutate during the
+  // turn) + the injected history deps from this one place, not per-call.
+  // Idempotent; harmless under the flag OFF (the disabled branch never reads
+  // it). The turn IS a structural `FeedOpenGateView`.
+  turn.emissionAuthority.wireFeedOpenGate(() => turn, feedOpenGateDeps())
+  return turn.emissionAuthority
+}
+/**
+ * The injected history dependencies the PR-4b OPEN gate needs (the real
+ * `hasOutboundDeliveredSince` predicate + `HISTORY_ENABLED` + the substantive
+ * `FINAL_ANSWER_MIN_CHARS` floor). Centralized so both the façade's enabled
+ * branch AND the drain's own (now-redundant) inline gate consume the SAME deps
+ * via the SAME pure helpers — flag-ON and flag-OFF cannot diverge. Keeps
+ * `feed-open-gate.ts` sqlite-free (it never imports `history.js`).
+ */
+function feedOpenGateDeps(): FeedOpenGateDeps {
+  return {
+    hasOutboundDeliveredSince,
+    historyEnabled: HISTORY_ENABLED,
+    finalAnswerMinChars: FINAL_ANSWER_MIN_CHARS,
+  }
+}
+/**
+ * The deliver-before-drain inputs the PR-4d card-drain gate threads into the
+ * façade's pure `mayDrainCardNow`. Centralized so the card-drain helper sources
+ * the SAME `turnInFlightForGate()` + kill-switch the buffer-drain gate uses.
+ *
+ * `endingTurnFinalAnswerDelivered` is FIXED to `null` for the card path (§5
+ * modeling decision): the live foreground card single-flight is governed by
+ * `turn.activityInFlight` (via `mayDrain`), NOT by an ending turn's delivery
+ * state — so `mayDrainBufferedInbound` degenerates to `!turnInFlight` and a
+ * synthetic represent turn (finalAnswerDelivered=false) can never wedge the card.
+ */
+function cardDrainGateCtx(): CardDrainGateCtx {
+  return {
+    turnInFlight: turnInFlightForGate(),
+    endingTurnFinalAnswerDelivered: null,
+    enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
+  }
+}
+/**
+ * PR-4d centralized card-drain gate (Option A). The 6 foreground card-drain
+ * sites pass their EXISTING single-flight-guarded block (the `mayDrain` guard +
+ * the `openOrEditCard(producer, …)` thunk that assigns `turn.activityInFlight =
+ * drainActivitySummary(…)`) in VERBATIM as `run`, so those load-bearing literals
+ * stay byte-identical (the wiring oracles still see them).
+ *
+ *  - **Flag OFF (default):** runs the guarded block directly — NO `chatLock`
+ *    wrapper, byte-equivalent to base.
+ *  - **Flag ON:** acquires `chatLock` AROUND the deliver-before-drain decision
+ *    (`mayDrainCardNow`) + the synchronous block, unifying the card path with
+ *    the #2137 serialization gate. The lock spans ONLY the gate decision + the
+ *    synchronous `openOrEditCard` kick-off inside `run` (which only ASSIGNS
+ *    `turn.activityInFlight = drainActivitySummary(...)`; the async send is NOT
+ *    awaited inside the lock). The lock is released before any drain
+ *    `await sendMessage` suspends, so a card OPEN never holds `chatLock` across
+ *    the gate's release — a synthetic represent turn can never wedge the gate,
+ *    and `mayDrain` stays callable lock-free.
+ *
+ * LOCK ORDERING (no-deadlock invariant): `chatLock` is acquired EXCLUSIVELY
+ * here, around the gate; never the reverse. `mayDrainCardNow` is a pure read.
+ */
+function cardDrainGate(turn: CurrentTurn, ea: EmissionAuthority, run: () => void): void {
+  if (EMISSION_AUTHORITY_ENABLED) {
+    void chatLock.run(
+      statusKey(turn.sessionChatId, turn.sessionThreadId),
+      async () => {
+        if (ea.mayDrainCardNow(turn, cardDrainGateCtx())) run()
+      },
+    )
+    return
+  }
+  run()
+}
 // Component 3 (turn-origin reply routing). Recently-ended turns retained
 // by `turnId` so a LATE reply (the Brevo answer landing ~42s after
@@ -2115,7 +2446,22 @@ function deriveTurnId(
  */
 function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTurn | null {
   if (originTurnId == null || originTurnId === '') return null
-  if (currentTurn != null && currentTurn.turnId === originTurnId) return currentTurn
+  // PR-4e — resolve the LIVE turn by ITS OWN topic key under the flag. The
+  // turnId encodes the key: `deriveTurnId` builds `${chatKey}#${messageId}`, so
+  // the substring before `#` IS the statusKey. Flag-ON does an O(1)
+  // `byKey.get(key)` and matches on turnId — so a reply whose origin turn is
+  // STILL live for topic A resolves A even after the singleton mirror flipped to
+  // B. Flag-OFF keeps the singleton check, verbatim. The recentTurnsById
+  // registry fallback is UNCHANGED in both branches.
+  if (EMISSION_AUTHORITY_ENABLED) {
+    const hashIdx = originTurnId.indexOf('#')
+    if (hashIdx > 0) {
+      const live = currentTurnMap.get(originTurnId.slice(0, hashIdx))
+      if (live != null && live.turnId === originTurnId) return live
+    }
+  } else if (currentTurn != null && currentTurn.turnId === originTurnId) {
+    return currentTurn
+  }
   return recentTurnsById.get(originTurnId) ?? null
 }
@@ -2368,7 +2714,10 @@ function postQueuedStatus(chatId: string, bufferedThread: number, inFlightThread
   void (async () => {
     const sent = await swallowingApiCall(
       () =>
-        bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread }),
+        // Queued-placeholder status, not the user's answer — silence the
+        // open ping (BORDERLINE: it's a "your message is queued" notice;
+        // see PR description).
+        bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread, disable_notification: true }),
       { chat_id: chatId, verb: 'queued-status.post', threadId: bufferedThread },
     )
     const messageId = (sent as { message_id?: number } | undefined)?.message_id
@@ -2542,6 +2891,16 @@ const preambleSuppressor = new PreambleSuppressor({
     // long-lived and flushes can occur outside any session-event
     // handler's scope. If the turn has been cleared, the update is
     // dropped (no chat to send to, no stream to mutate).
+    //
+    // PR-4e — the module-scope suppressor carries NO per-topic key (it is a
+    // single global instance reset/flushed per turn), so there is no key to
+    // scope by here: the correct resolution under BOTH flag states is the
+    // most-recent-set live turn — exactly the singleton mirror `currentTurn`.
+    // The per-turn `reset()` / `flushNow()` / `dropNow()` lifecycle (driven from
+    // the live turn's own handlers) keeps the suppressor aligned with whichever
+    // topic is currently live, so the mirror read is byte-identical to base and
+    // cannot leak A's answer text into B (a flush for A runs while A is still the
+    // most-recent turn; once B flips, A's stream is already force-superseded).
     const stream = currentTurn?.answerStream ?? null
     if (stream != null) stream.update(cumulative)
   },
@@ -2590,6 +2949,28 @@ function streamKey(chatId: string, threadId?: number | null): string {
   return chatKey(chatId, threadId)
 }
+// PR1 (cross-turn stale-card guard, design §9 lever 4 / race C/D).
+// `obligationSweep` writes one entry here, keyed on the obligation's
+// `originTurnId`, the instant it pushes an `obligation_represent` inbound —
+// carrying the obligation's `openedAt` (when the obligation was RAISED). The
+// represent inbound reuses the obligation's chat/thread/messageId, so the
+// `enqueue` that spawns the synthetic represent turn reconstructs the SAME
+// `deriveTurnId` value as the key. That represent turn — and ONLY that turn —
+// consumes and clears it, stamping `turn.crossTurnGate = { sinceMs: openedAt }`.
+// That turn's first card-OPEN then consults `hasOutboundDeliveredSince(chat,
+// openedAt)` and is suppressed iff a SUBSTANTIVE answer already landed since the
+// obligation was raised — so a "thinking…" card never narrates beneath an answer
+// the user already received in the original turn. Keying on `originTurnId` (not
+// chat/thread) means an unrelated later foreground turn on the same chat/thread
+// derives a different turn id, finds no entry, and is never mis-gated — closing
+// the residual cross-contamination window where a represent that was armed but
+// never enqueued (degenerate bridge-death) left a stale chat/thread-keyed gate
+// that the next foreground turn could wrongly consume. A normal foreground turn
+// never has an entry here, so the gate stays scoped to the synthetic surface.
+// The map holds at most one entry per obligation; re-arming the same obligation
+// overwrites its own entry with the latest openedAt.
+const pendingCrossTurnGate = new Map<string, { sinceMs: number }>()
 /**
  * Component 1 — deliver-before-drain. The single chokepoint that both
  * turn-end drain sites (`purgeReactionTracking`, `releaseTurnBufferGate`)
@@ -2729,6 +3110,10 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
   // the markClaudeBusyForInbound on the delivery path. Safe no-op
   // when the key was never marked (synthetic purge from a sweep).
   claudeBusyKeys.delete(key)
+  // #2527: clear the per-key reaction-transition counter and first-reply
+  // sentinel alongside the controller so we don't leak state across turns.
+  reactionTransitionCounts.delete(key)
+  firstTextReplyLogged.delete(key)
   // Human-feel UX: stop the turn-long `typing…` indicator started in
   // the turn-start block. `purgeReactionTracking` is the canonical
   // turn-end, so this is the single owner of the stop. (If an abnormal
@@ -2933,8 +3318,16 @@ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
  * gone — handlers that already purge elsewhere are unharmed.
  */
 function endCurrentTurnAtomic(turn: CurrentTurn): void {
-  if (currentTurn !== turn) return
-  currentTurn = null
+  // PR-4e — keyed liveness + keyed clear (leak-close-at-origin). Flag-OFF: the
+  // guard is `currentTurn === turn` and the clear nulls the singleton, verbatim.
+  // Flag-ON: the guard becomes `byKey.get(turn'sKey) === turn` (so a flip to
+  // another topic doesn't spuriously short-circuit THIS topic's teardown) and
+  // the clear does `byKey.delete(key)` + nulls the mirror iff it still points at
+  // `turn`. `endCurrentTurnForKey` returns false (no delete) when the entry no
+  // longer matches — the same early-return semantics as the old `!== turn` guard.
+  const key = statusKey(turn.sessionChatId, turn.sessionThreadId)
+  if (!turnLiveForItsTopic(turn)) return
+  endCurrentTurnForKey(turn, key) // currentTurnByKey.delete(key) + mirror clear
   // Status-surface observability: one line at every turn CLEAR (with how far
   // the turn got), plus a DEGRADED warning when the turn did tool work but the
   // live feed never opened because its sends failed (the resume-400 signature).
@@ -3199,6 +3592,7 @@ async function postIdleClearNotice(idleClearMs: number): Promise<void> {
       () =>
         bot.api.sendMessage(chatId, text, {
           parse_mode: 'HTML',
+          disable_notification: true,
           ...(threadId != null ? { message_thread_id: threadId } : {}),
         }),
       { chat_id: chatId, verb: 'idleAutoClear.notice' },
@@ -3340,7 +3734,7 @@ async function resolveCompactCard(
 function finalizeStatusReaction(
   chatId: string,
   threadId: number | undefined,
-  reason: 'done' | 'error' = 'done',
+  reason: 'done' | 'undelivered' | 'error' = 'done',
 ): void {
   const key = statusKey(chatId, threadId)
   const ctrl = activeStatusReactions.get(key)
@@ -3354,6 +3748,17 @@ function finalizeStatusReaction(
   if (reason === 'done' && deferredDoneReactions.tryDefer(key, ctrl)) return
   deferredDoneReactions.drop(key)
   ctrl.finalize(reason)
+  // #2527: log controller dispose so the lifecycle end is observable. Use
+  // activeReactionMsgIds to reconstruct the turnId token before purge clears it.
+  const msgInfo = activeReactionMsgIds.get(key)
+  if (msgInfo != null) {
+    logStreamingEvent({
+      kind: 'status_reaction_dispose',
+      chatId,
+      turnId: `${chatId}:${msgInfo.messageId}`,
+      reason,
+    })
+  }
   purgeReactionTracking(key)
 }
@@ -4960,10 +5365,10 @@ function postLegacyBanner(
 // short-circuit to no-ops at runtime. `progressDriver` is typed `any`
 // so TS doesn't resolve `progressDriver?.X` to `never`.
 const streamMode = process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'
-// PR B: per-agent stream throttle override via channels.telegram.stream_throttle_ms.
-// When unset, draft-stream.ts applies transport-aware defaults (300 ms draft,
-// 1000 ms message). Parsed once at boot; sub-zero / NaN values fall back to
-// undefined so the per-transport default wins. See `src/agents/scaffold.ts`
+// Per-agent stream throttle override via channels.telegram.stream_throttle_ms.
+// When unset, draft-stream.ts applies DM/group defaults (400 ms DMs, 1000 ms
+// groups). Parsed once at boot; sub-zero / NaN values fall back to undefined
+// so the per-chat-type default wins. See `src/agents/scaffold.ts`
 // `channelsToEnv()` for the yaml → env wiring.
 const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
   const raw = process.env.SWITCHROOM_TG_STREAM_THROTTLE_MS
@@ -4973,74 +5378,30 @@ const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
 })()
 const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
-// #869-Phase1 / openclaw-pattern. When SET, the answer-lane stream
-// (telegram-plugin/answer-stream.ts) renders the model's transcript
-// text as a USER-VISIBLE edit-in-place message instead of writing to
-// Telegram's invisible compose-box draft (which is the default and
-// supports the #1664 "retract + re-prompt" contract). With this flag
-// on:
-//   1. createAnswerStream is instantiated without `sendMessageDraft`,
-//      so it falls back to `sendMessage` + `editMessageText` for a
-//      real chat-timeline message (`answer-stream.ts:212-214`).
-//   2. minInitialChars is set to 1 — the first text chunk pushes a
-//      visible message immediately (TTFO under 5s for short turns).
-//   3. At turn_end, if the model never called reply / stream_reply
-//      AND the streamed message has substantive captured text, the
-//      gateway DOES NOT retract (which would delete a user-visible
-//      message the user has been reading live); it calls
-//      `stream.stop()` to freeze the current text as the final
-//      answer, records the message in dedup + history, and marks
-//      `turn.finalAnswerDelivered = true` so the #1664 silent-end
-//      re-prompt does not fire. Turn-flush is suppressed for this
-//      branch — its job (deliver captured text) is structurally
-//      already done by the visible stream.
-//   4. The reply-tool / stream_reply path is unchanged — when the
-//      model uses an explicit reply tool the prior streamed message
-//      is retracted (delete) and the reply takes over as before.
-// Trade-off: a stream-as-final-answer turn does NOT push a device
-// notification (Telegram does not notify on edits, and we choose
-// not to send a duplicate fresh message for the ping). For short
-// turns where the user is actively watching, this is the right
-// shape — they see the answer materialise live. For longer waits,
-// the cross-turn pending-progress system (#1445/#1669) is the
-// canonical surface and DOES ping at the appropriate boundaries.
-//
-// 2026-05-25: default flipped ON after a fleet-log audit showed a ~19%
-// framework-fallback ("still working…") rate — the visible stream gave an
-// immediate in-timeline signal that suppressed the silence-poke.
-//
-// 2026-06-03: default flipped back OFF (operator request). In practice the
-// visible stream delivered ~none of its intended benefit while imposing a
-// jarring cost:
-//   - Telegram rate-limits editMessageText to roughly once/second, so real
-//     "watch it type" streaming is impossible; and the model emits almost no
-//     interstitial assistant.text (it thinks → tool → reply), so the
-//     preliminary was a near-empty bubble (observed: 5–13 byte edits).
-//   - On every turn where the model calls the reply tool (≈always), the reply
-//     posts a SEPARATE canonical message and the stream RETRACTS (deletes) its
-//     preliminary — the user sees a raw bubble appear then vanish, replaced by
-//     the formatted reply. In supergroup topics it also mis-routed (preliminary
-//     → General, reply → topic). Net: an unformatted flash + a delete, no
-//     streaming value.
-// The anti-silence role the visible stream once filled is now covered by the
-// live ACTIVITY FEED (tool-use streaming, below), the "…typing" chat-action
-// loop, and `thinking_effort: low` (fast tool-less turns) — so off-by-default
-// no longer regresses the framework-fallback rate. With the flag off the lane
-// uses the invisible compose-box draft (the original default, #1664-compatible)
-// and the reply tool is the single canonical, formatted message.
+// When SET, the answer-lane stream (telegram-plugin/answer-stream.ts) renders
+// the model's transcript text as a USER-VISIBLE edit-in-place message. Default
+// OFF: the lane stays dormant and the reply tool is the single canonical
+// formatted message — no unformatted preliminary that flashes and gets deleted.
+// With this flag on, minInitialChars is set to 1 and the first text chunk opens
+// a visible preview immediately. At turn_end, if the model never called reply /
+// stream_reply AND the streamed message has substantive captured text, the
+// gateway materializes it as a pinged final answer (materialize()) and deletes
+// the silent preview. When the model uses an explicit reply tool the prior
+// streamed message is retracted instead.
+// The draft transport (sendMessageDraft) is permanently retired — both ON and
+// OFF use sendMessage + editMessageText; the difference is whether a visible
+// preview is opened at all.
 // Opt back IN per agent with SWITCHROOM_VISIBLE_ANSWER_STREAM=1.
 const ANSWER_STREAM_VISIBLE_ENABLED = parseVisibleAnswerStreamEnabled(
   process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM,
 )
-// Single source of truth for the answer-lane behaviour (flash-decouple,
-// 2026-06-05). The visible preview gates on the visible flag ALONE; the draft
-// flag controls only the transport. Resolved here once and consulted at the
-// createAnswerStream config, the materialize-as-answer guard, and the boot log,
-// so all three can never drift back into the `visible || retired` conflation
-// that re-opened the flash. Total-enumerated in answer-stream-flag.test.ts.
+// Single source of truth for the answer-lane behaviour. The draft transport
+// (sendMessageDraft) is permanently retired — the lane is either VISIBLE
+// (opt-in) or DORMANT (the unconditional default: reply tool is the only
+// message). Resolved here once and consulted at the createAnswerStream config,
+// the materialize-as-answer guard, and the boot log.
 const ANSWER_LANE = resolveAnswerLaneConfig({
   visibleEnabled: ANSWER_STREAM_VISIBLE_ENABLED,
-  draftFnAvailable: sendMessageDraftFn != null,
 })
 // Whether to DELETE the activity/status feed when the final answer lands.
@@ -5076,6 +5437,11 @@ const completeProgressCardTurn:
 // #1122 PR3: flushProgressCardsForShutdown deleted with the card. No
 // replacement needed — there are no pinned progress messages to flush.
 let subagentWatcher: SubagentWatcherHandle | null = null
+// Background-worker activity feed manager. Module-scoped so shutdown can stop()
+// its internal heartbeat interval (mirrors subagentWatcher). Recreated per
+// bridge connect; the stale handle's interval is unref'd, so a missed stop()
+// can't keep the process alive, but we stop() on shutdown for cleanliness.
+let workerActivityFeed: ReturnType<typeof createWorkerActivityFeed> | null = null
 // ─── IPC server ───────────────────────────────────────────────────────────
 const SOCKET_PATH = process.env.SWITCHROOM_GATEWAY_SOCKET ?? join(STATE_DIR, 'gateway.sock')
@@ -5237,8 +5603,12 @@ let inFlightUpdate: { requestId: string; startedAt: number } | null = null
 //   SWITCHROOM_SILENCE_FALLBACK_MS         — base threshold (default 300000)
 //   SWITCHROOM_SILENCE_FALLBACK_HARD_MS    — hard ceiling for the in-flight-tool
 //                                            defer (default 900000 = 15min)
-//   SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer (default OFF;
-//                                            canary on marko against #2162 telemetry)
+//   SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer. NOTE: this is
+//                                            now set fleet-wide in defaults.env
+//                                            (was a marko canary against #2162;
+//                                            promoted to the fleet default). The
+//                                            code default below is still OFF, so
+//                                            the live behaviour comes from config.
 function parsePositiveMsEnv(name: string, fallbackMs: number): number {
   const raw = process.env[name]
   if (raw == null || raw === '') return fallbackMs
@@ -5247,6 +5617,20 @@ function parsePositiveMsEnv(name: string, fallbackMs: number): number {
 }
 const SILENCE_FALLBACK_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_MS', 300_000)
 const SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_HARD_MS', 900_000)
+// #2527 — mid-turn liveness floor threshold (default 45s). The early, quiet
+// beat: a `user` turn working silently this long without a substantive answer
+// gets ONE honest "still on it" interim, so the ambient 👀 never masquerades
+// as "done". Strictly below SILENCE_FALLBACK_MS (the loud 300s unwedge).
+// Whole floor is kill-switchable via SWITCHROOM_TG_LIVENESS_FLOOR=0.
+const SILENCE_FLOOR_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FLOOR_MS', 45_000)
+// #2527 — role-aware terminal reaction honesty (the "thumbs-up false done"
+// fix). Default ON; SWITCHROOM_TG_TERMINAL_HONESTY=0 reverts to always-👍.
+const LIVENESS_TERMINAL_HONESTY = process.env.SWITCHROOM_TG_TERMINAL_HONESTY !== '0'
+// SILENCE_DEFER_INFLIGHT_TOOLS: previously an opt-in (=1). The new
+// isLegitimatelyWorking callback supersedes this — defer is now the DEFAULT
+// when the callback is wired. The legacy flag is kept so `=0` still lets
+// operators force-disable the defer (handled inside silence-poke.ts tick()).
+// The old `=1` path is kept for back-compat but is now redundant.
 const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === '1'
 // Production-liveness (2026-06-05 UAT finding). Count an activity-feed render or
 // an answer-stream draft update as liveness for the silence clock, so a long
@@ -5255,13 +5639,109 @@ const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIG
 // restores the legacy "only a real reply resets the clock" behaviour.
 const SILENCE_LIVENESS_PRODUCTION = process.env.SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== '0'
+/**
+ * Feed-survival predicate — the single source of truth for "is this turn
+ * legitimately working?" used by BOTH teardown timers (orphaned-reply fuse
+ * and silence-poke framework fallback).
+ *
+ * Returns true if ANY of the following hold for the given chat key:
+ *
+ *   (a) A foreground tool call is in flight in the current turn
+ *       (`toolFlightTracker.isMidToolCall()`). This covers most tools
+ *       including ask_user while it blocks awaiting a tap.
+ *
+ *   (b) Detached background work was dispatched in the current turn and has
+ *       not yet resolved — `pendingProgress.hasPendingAsyncDispatch(key)`.
+ *       Covers `Bash run_in_background:true` (which returns a near-instant
+ *       handle, emptying inFlight, while the background process keeps
+ *       running) and `Agent` / `Task` dispatches.
+ *
+ *   (c) A human-wait tool (`ask_user`) is open for this chat. A pending
+ *       ask_user IS already captured by (a) while the tool_use is in flight,
+ *       but we include the explicit pendingAskUser check for defence-in-depth
+ *       (e.g. after an unlikely inFlight clear without a tool_result).
+ *
+ * The key is `statusKey(chatId, threadId)` — the same key used by
+ * silencePoke / pendingProgress.
+ */
+function isLegitimatelyWorking(key: string): boolean {
+  // (a) foreground in-flight tool.
+  // NOTE: toolFlightTracker is GLOBAL, not per-key. In a hypothetical
+  // multi-chat agent a tool in flight for chat A would make this return
+  // true for chat B's key. Accepted: the gateway runs one Claude session
+  // (one turn in flight at a time); true multi-chat concurrency is not
+  // currently supported. (b) and (c) below are correctly per-key.
+  if (toolFlightTracker.isMidToolCall()) return true
+  // (b) detached background work dispatched this turn
+  if (pendingProgress.hasPendingAsyncDispatch(key)) return true
+  // (c) ask_user open for this chat (defence-in-depth)
+  const { chatId: keyChatId } = parseKeyForSurvival(key)
+  for (const entry of pendingAskUser.values()) {
+    if (entry.chatId === keyChatId) return true
+  }
+  return false
+}
+/** Parse `<chatId>:<threadIdOrEmpty>` — mirrors silence-poke's parseKey.
+ *  Local copy so we don't need to re-export from silence-poke. */
+function parseKeyForSurvival(key: string): { chatId: string } {
+  const idx = key.indexOf(':')
+  return { chatId: idx < 0 ? key : key.slice(0, idx) }
+}
 silencePoke.startTimer({
-  thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
+  thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS, floor: SILENCE_FLOOR_MS },
   deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
+  isLegitimatelyWorking: (key) => isLegitimatelyWorking(key),
   emitMetric: (event) => {
     // Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
     emitRuntimeMetric(event)
   },
+  // #2527 — the gateway-owned half of the mid-turn-floor decision: only the
+  // live turn knows its loop role + whether a substantive answer has landed.
+  // Keyed on statusKey so a DM (threadId null) and a forum topic are identical.
+  floorState: (key) => {
+    const turn = currentTurn
+    if (turn == null) return null
+    if (statusKey(turn.sessionChatId, turn.sessionThreadId) !== key) return null
+    return { role: turn.role, finalAnswerDelivered: turn.finalAnswerDelivered }
+  },
+  // #2527 — the early, quiet liveness beat. Honest text from the longest
+  // in-flight tool (model-free, claude-native), routed through the SAME send
+  // path as the 300s fallback; pings OFF (this is the gentle beat, not the
+  // loud unwedge) and the turn is NOT torn down — it keeps working.
+  onMidTurnFloor: async (ctx) => {
+    // Late-fire guard, mirroring the fallback: a clean turn-end can race the
+    // tick. If the turn is gone, stay silent.
+    if (activeTurnStartedAt.get(ctx.key) == null && currentTurn == null) return
+    const blockedOnApproval = activeStatusReactions
+      .get(statusKey(ctx.chatId, ctx.threadId))
+      ?.isAwaiting() ?? false
+    const text = silencePoke.formatFrameworkFallbackText(
+      'working',
+      ctx.silenceMs,
+      ctx.inFlightTools,
+      blockedOnApproval,
+    )
+    try {
+      await robustApiCall(
+        () => bot.api.sendMessage(ctx.chatId, text, {
+          ...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
+          // The quiet beat: visible in-thread, no device buzz. (The 300s
+          // fallback pings; the floor must not train the user to mute.)
+          disable_notification: true,
+        }),
+        { chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
+      )
+      // Count it as production so the silence clock resets — the user just
+      // saw a real message, so the 300s loud fallback is measured from here.
+      silencePoke.noteProduction(ctx.key, Date.now())
+    } catch (err) {
+      process.stderr.write(
+        `silence-poke mid-turn floor sendMessage failed chat=${ctx.chatId} thread=${ctx.threadId}: ${err}\n`,
+      )
+    }
+  },
   onFrameworkFallback: async (ctx) => {
     // Late-fire short-circuit (2026-05-23 audit finding). The fallback
     // can race a clean turn-end: the model's actual reply lands inside
@@ -5281,6 +5761,14 @@ silencePoke.startTimer({
         `turn ended cleanly during silence window ` +
         `chat=${ctx.chatId} thread=${ctx.threadId ?? '-'} silence_ms=${ctx.silenceMs}\n`,
       )
+      // #2527: structured skip event so the late-fire race is machine-readable.
+      logStreamingEvent({
+        kind: 'silence_poke_skip',
+        chatId: ctx.chatId,
+        threadId: ctx.threadId ?? undefined,
+        silenceMs: ctx.silenceMs,
+        skipReason: 'turn_ended_cleanly_during_window',
+      })
       // Tell silence-poke this chat-thread is finished so the next
       // arming doesn't carry stale state.
       silencePoke.endTurn(ctx.key)
@@ -5294,6 +5782,15 @@ silencePoke.startTimer({
     // get_status snapshot → pure formatter. Any hostd unavailability
     // degrades silently to the existing generic text (zero regression).
     let text: string | null = null
+    // Hoisted out of the generic-fallback branch below because the send site
+    // gates `disable_notification` on it: when the turn is parked on an
+    // approval card, the fallback TEXT is a user-gating re-ping ("waiting for
+    // your approval — tap Approve or Deny …"), and that must stay LOUD so the
+    // user knows the ball is in their court. The reaction controller tracks the
+    // park via setAwaiting on the permission-request.
+    const blockedOnApproval = activeStatusReactions
+      .get(statusKey(ctx.chatId, ctx.threadId))
+      ?.isAwaiting() ?? false
     const upd = inFlightUpdate
     if (upd != null) {
       try {
@@ -5315,9 +5812,6 @@ silencePoke.startTimer({
       // benign "wedge" class — claude is alive, waiting on the operator's
       // tap), say so instead of "still working…". The reaction controller
       // already tracks this (setAwaiting on the permission-request park).
-      const blockedOnApproval = activeStatusReactions
-        .get(statusKey(ctx.chatId, ctx.threadId))
-        ?.isAwaiting() ?? false
       text = silencePoke.formatFrameworkFallbackText(
         ctx.fallbackKind,
         ctx.silenceMs,
@@ -5325,12 +5819,26 @@ silencePoke.startTimer({
         blockedOnApproval,
       )
     }
+    // #2527: log the actual poke fire with structured data before sending,
+    // so the event is visible even if the send fails.
+    logStreamingEvent({
+      kind: 'silence_poke_fire',
+      chatId: ctx.chatId,
+      threadId: ctx.threadId ?? undefined,
+      silenceMs: ctx.silenceMs,
+      fallbackKind: ctx.fallbackKind,
+    })
     try {
       await robustApiCall(
         () => bot.api.sendMessage(ctx.chatId, text, {
           ...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
-          // Framework fallback pings — user genuinely needs to know.
-          disable_notification: false,
+          // Conditional: the pure-liveness "still working…" notice is a status
+          // surface and stays SILENT. But when the turn is parked on an
+          // approval card, this same fallback carries a user-gating re-ping
+          // ("waiting for your approval — tap Approve or Deny …") — that must
+          // PING, because the user is the one being waited on. Gate on the same
+          // `blockedOnApproval` signal that selects the re-ping text above.
+          disable_notification: blockedOnApproval ? false : true,
         }),
         { chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
       )
@@ -5464,13 +5972,31 @@ silencePoke.startTimer({
         return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
       },
     )
+    // PR-4e self-heal backstop: drop any per-topic `currentTurnByKey` entries
+    // for fbChatId whose turn is stale-by-the-same-silence-gate the sibling
+    // sweep above used — the same precedent as `purgeStaleTurnsForChat`'s
+    // activeTurnStartedAt sweep, so a leaked map entry (a turn whose keyed
+    // delete somehow never ran) can never outlive its chat. Gated identically:
+    // the firing key is always stale; a sibling is stale iff it's silent ≥ the
+    // fallback threshold (or has no silence state). No-op under the flag OFF
+    // (the map is empty).
+    currentTurnMap.purgeChatStale(fbChatId, (siblingKey) => {
+      if (siblingKey === fbKey) return true
+      const sib = silencePoke.silenceMsForKey(siblingKey, fbNow)
+      return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
+    })
     // Null `currentTurn` if it's still pointing at the wedged turn —
     // when claude eventually fires a late `turn_end` for this session
     // (or never does), the handler's `const turn = currentTurn` snapshot
     // returns null and the regular teardown short-circuits. Without
     // this, the late event would re-emit `turn_ended` AND clobber
     // whatever fresh turn the next inbound started.
-    if (turnMatchesFallback && currentTurn === wedgedTurn && wedgedTurn != null) {
+    // PR-4e — keyed liveness for the guard. Flag-OFF: `turnLiveForItsTopic`
+    // reduces to `currentTurn === wedgedTurn` (singleton mirror), verbatim.
+    // Flag-ON: `byKey.get(fbKey) === wedgedTurn`, so the keyed delete still
+    // fires when the LIVE mirror has already flipped to another topic B (a bare
+    // `currentTurn === wedgedTurn` would falsely skip and leak A's byKey entry).
+    if (turnMatchesFallback && wedgedTurn != null && turnLiveForItsTopic(wedgedTurn)) {
       // Status-surface observability: emit the lifecycle CLEAR for the
       // silence-poke teardown so a fallback-nulled turn has a turn-lifecycle
       // line like every other clear path (the framework-fallback line below is
@@ -5478,7 +6004,12 @@ silencePoke.startTimer({
       process.stderr.write(
         `telegram gateway: ${formatTurnLifecycle('clear', 'silence_fallback', wedgedTurn, Date.now())}\n`,
       )
-      currentTurn = null
+      // PR-4e — keyed delete for the wedged turn's OWN key (fbKey == the
+      // statusKey this fallback fired for, == the wedgedTurn's key since
+      // turnMatchesFallback gated chat+thread equality). Flag-OFF nulls the
+      // singleton, verbatim; flag-ON deletes only this topic's entry and clears
+      // the mirror iff it still points here — a live sibling topic is untouched.
+      endCurrentTurnForKey(wedgedTurn, fbKey)
     }
     // Best-effort: clear any pending silent-end marker so the Stop hook
     // doesn't double-block when claude eventually exits the wedged turn.
@@ -5789,6 +6320,43 @@ function obligationSweep(): void {
     return
   }
   if (decision.action === 'represent') {
+    // Fix #2472 — duplicate-represent guard. Before re-presenting AGAIN, check
+    // whether the agent has ALREADY delivered a substantive outbound reply to
+    // this chat SINCE the obligation was most recently re-presented. If so the
+    // obligation is satisfied-but-misdetected (the reply landed but its routing
+    // didn't resolve back to this origin, so the normal close path missed it) —
+    // close silently and do NOT re-fire, which is what produced the near-identical
+    // duplicate in #2472 (reply 10608 answered represent_count=1, yet
+    // represent_count=2 fired anyway → duplicate 10609).
+    //
+    // The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
+    // `openedAt`. This is load-bearing: the genuine "agent wrote a plain-text
+    // answer and never called reply" case must still represent ONCE. On the first
+    // represent `lastRepresentedAt` is undefined, so this guard is a no-op and the
+    // single represent fires as before. Only the SECOND-and-later represent is
+    // gated — exactly where a reply that landed between fires must suppress the
+    // re-ask. Falls back to false (never suppresses) if history is unavailable.
+    if (
+      shouldSuppressRepresent(o, {
+        historyEnabled: HISTORY_ENABLED,
+        // Pass the represent-guard's OWN low threshold — a terse-but-real reply
+        // must suppress the duplicate (#2472/#2474), unlike the escalate branch
+        // below which keeps the 200-char default.
+        hasOutboundDeliveredSince: (chatId, sinceMs, threadId) =>
+          hasOutboundDeliveredSince(
+            chatId,
+            sinceMs,
+            threadId,
+            OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS,
+          ),
+      })
+    ) {
+      process.stderr.write(
+        `telegram gateway: obligation closed silently — reply delivered since last represent (no re-fire) origin=${o.originTurnId}\n`,
+      )
+      obligationLedger.close(o.originTurnId)
+      return
+    }
     // Re-present goes through the bridge → buffer. Only the represent path is
     // gated on an empty buffer (let the existing drain run first, avoid
     // double-presenting). Escalation below is NOT gated on the buffer — it is a
@@ -5796,6 +6364,25 @@ function obligationSweep(): void {
     // behind a dead bridge can never block the operator nudge.
     if (pendingInboundBuffer.depth(agent) > 0) return
     pendingInboundBuffer.push(agent, buildObligationRepresentInbound(o, Date.now()))
+    // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Arm the
+    // card-OPEN gate for the synthetic turn this represent inbound will spawn:
+    // carry the obligation's `openedAt` so that turn's first card-OPEN can ask
+    // "was a substantive answer already delivered since the obligation was
+    // raised?" and, if so, suppress the card (it would otherwise narrate beneath
+    // the answer the user already received). Keyed on the obligation's
+    // `originTurnId` — the SAME id the represent inbound carries
+    // (`buildObligationRepresentInbound` reuses `o.messageId`/`o.chatId`/
+    // `o.threadId`, so the enqueue-time `deriveTurnId` reconstructs exactly
+    // `o.originTurnId`). Keying on the turn id (not chat/thread) means ONLY the
+    // exact represent turn this gate was armed for can consume it; an unrelated
+    // later foreground turn on the same chat/thread has a different originTurnId
+    // → finds no entry → its card opens normally. This closes the residual
+    // cross-contamination window where a never-enqueued represent's stale gate
+    // could suppress an unrelated turn's card (the represent/duplicate-reply
+    // family). This does NOT gate the represent SEND — the represent guard above
+    // already owns suppressing an already-satisfied represent; this only governs
+    // the decorative card.
+    pendingCrossTurnGate.set(o.originTurnId, { sinceMs: o.openedAt })
     const attempt = obligationLedger.markRepresented(o.originTurnId)
     process.stderr.write(
       `telegram gateway: obligation re-presented origin=${o.originTurnId} attempt=${attempt}/${OBLIGATION_REPRESENT_MAX}\n`,
@@ -6196,7 +6783,10 @@ const ipcServer: IpcServer = createIpcServer({
           process.stderr.write(
             `telegram gateway: disconnect-flush nulled currentTurn (bridge died with turn in flight)\n`,
           )
-          currentTurn = null
+          // PR-4e — the bridge DIED with a turn in flight: EVERY per-topic entry
+          // is a ghost, not just the mirror's. Clear the whole map + mirror.
+          // Flag-OFF: this nulls the singleton only (the map is empty), verbatim.
+          clearAllCurrentTurns()
         }
       },
       log: (msg) => process.stderr.write(`${msg}\n`),
@@ -6722,10 +7312,10 @@ const ipcServer: IpcServer = createIpcServer({
           ...(cfgTopic != null ? { threadId: cfgTopic } : {}),
         }
       },
-      buildKeyboard: (requestId) =>
+      buildKeyboard: (requestId, epoch) =>
         new InlineKeyboard()
-          .text('✅ Approve', `cfg:${requestId}:approve`)
-          .text('🚫 Deny', `cfg:${requestId}:deny`),
+          .text('✅ Approve', `cfg:${requestId}:${epoch}:approve`)
+          .text('🚫 Deny', `cfg:${requestId}:${epoch}:deny`),
       postCard: async (args) => {
         try {
           const sent = await robustApiCall(
@@ -6757,6 +7347,9 @@ const ipcServer: IpcServer = createIpcServer({
             () =>
               bot.api.editMessageText(args.chatId, args.messageId, args.text, {
                 parse_mode: 'HTML',
+                // Strip the inline keyboard on a terminal/interim edit so the
+                // [Approve]/[Deny] buttons stop being tappable on a resolved card.
+                ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
               }),
             { chat_id: String(args.chatId), verb: 'config-approval-edit' },
           )
@@ -6826,6 +7419,8 @@ const ipcServer: IpcServer = createIpcServer({
             () =>
               bot.api.editMessageText(args.chatId, args.messageId, args.text, {
                 parse_mode: 'HTML',
+                // Finalize is terminal — drop the keyboard so buttons are gone.
+                ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
               }),
             { chat_id: String(args.chatId), verb: 'config-approval-finalize' },
           )
@@ -7377,6 +7972,23 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     }
   }
   process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}\n`)
+  // #2527: emit time_to_first_text_reply_ms on the FIRST text reply of each
+  // turn so operators can see how long users waited for any visible output.
+  // Only fires once per turn (firstTextReplyLogged guards the repeat).
+  if (turn != null) {
+    const threadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
+    const replyKey = statusKey(chat_id, threadId)
+    if (!firstTextReplyLogged.has(replyKey)) {
+      firstTextReplyLogged.add(replyKey)
+      logStreamingEvent({
+        kind: 'turn_reply_timing',
+        chatId: chat_id,
+        threadId,
+        turnId: turn.turnId,
+        timeToFirstTextReplyMs: Date.now() - turn.gatewayReceiveAt,
+      })
+    }
+  }
   // #546 dedup check: was this content just sent via turn-flush or
   // a sibling reply path? Skip the actual send and return a
@@ -7411,6 +8023,15 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   // existing call-sites and the typical "final answer" reply keep their
   // current behaviour without an explicit flag.
   let disableNotification = args.disable_notification === true
+  // #2527/#1664 — the over-ping safety net below may downgrade
+  // `disableNotification` ping→silent for ANTI-SPAM (one ping per turn). That
+  // delivery-channel decision must NOT pollute final-answer CLASSIFICATION: a
+  // final answer the model intended to ping is STILL the final answer even when
+  // the framework silences the actual ping. Classify on the model's original
+  // intent (what executeStreamReply already does), so an over-ping-silenced
+  // final answer sets finalAnswerDelivered=true — fixing both a spurious
+  // silent-end re-prompt and a false 'undelivered' (😐) terminal reaction.
+  const modelDisableNotification = args.disable_notification === true
   // #1675 over-ping safety net. The conversational-pacing contract
   // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
@@ -7441,32 +8062,92 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     const turn = currentTurn
     if (turn != null) {
       const now = Date.now()
-      const decision = decideOverPing({
-        modelRequestedPing: !disableNotification,
-        firstPingAt: turn.firstPingAt,
-        nowMs: now,
+      // Notification ownership (R8 / PR-2): on the `reply` path,
+      // substantiveness is purely the ≥200-char (or `done`) backstop —
+      // `isSubstantiveFinalReply` is `done === true || text.length >= 200`
+      // and ignores the notification flag entirely. `reply` carries no
+      // `done`, so it reduces to the ≥200-char length test. We still pass
+      // `modelDisableNotification` (the MODEL's original intent, not the
+      // possibly-downgraded `disableNotification`) to mirror the #2533
+      // final-answer decoupling call shape, but that arg does NOT
+      // participate in classification here — it is inert on this path.
+      const replySubstantive = isSubstantiveFinalReply({
+        text: rawText,
+        disableNotification: modelDisableNotification,
       })
-      if (decision.suppress) {
-        process.stderr.write(
-          `telegram gateway: reply over-ping safety net — ` +
-          `downgrading disable_notification:false → true ` +
-          `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
-          `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
-        )
-        // Observability: surface to the unified runtime-metrics
-        // fan-out so the cadence dashboard can track fleet-wide
-        // over-ping rate (leading indicator of model pacing drift).
-        emitRuntimeMetric({
-          kind: 'over_ping_suppressed',
-          key: statusKey(chat_id, args.message_thread_id != null
-            ? Number(args.message_thread_id) : undefined),
-          sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
-        })
-        disableNotification = true
-        wasOverPingSuppressed = true
-      } else if (decision.claimSlot) {
-        turn.firstPingAt = now
+      // PR-4c: the over-ping DECISION relocates into the emission-authority
+      // façade, behind the kill-switch (default OFF), the same structural way
+      // PR-4b moved the OPEN gate. `decideOverPing` is already pure, so PR-4c
+      // extracts NOTHING new — it relocates the *call* into the façade's enabled
+      // branch and keeps the *effects* (stderr, metric, the atomic
+      // `firstPingAt`/`firstPingWasSubstantive` pair-set, the
+      // `disableNotification`/`wasOverPingSuppressed` outer-scope writes) HERE,
+      // parameterized by the decision the façade hands back via `applyDecision`.
+      //
+      //  - Disabled branch runs `disabledOverPing()` — its own LITERAL
+      //    `decideOverPing(...)` call + the full effects block, VERBATIM from
+      //    PR-4b-base (the disabled-path-is-byte-identical proof).
+      //  - Enabled branch: the façade computes the decision and hands it to
+      //    `applyOverPingDecision(decision)`, which performs the IDENTICAL
+      //    effects. Same pure inputs ⇒ same decision ⇒ flag-ON ≡ flag-OFF ≡ base.
+      //
+      // The effects block is shared between both thunks by closing over `decision`
+      // — but the disabled thunk computes it via its OWN literal `decideOverPing(`
+      // first, so the disabled path never depends on the façade for the decision.
+      const applyOverPingDecision = (decision: OverPingDecision): void => {
+        if (decision.suppress) {
+          process.stderr.write(
+            `telegram gateway: reply over-ping safety net — ` +
+            `downgrading disable_notification:false → true ` +
+            `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
+            `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
+          )
+          // Observability: surface to the unified runtime-metrics
+          // fan-out so the cadence dashboard can track fleet-wide
+          // over-ping rate (leading indicator of model pacing drift).
+          emitRuntimeMetric({
+            kind: 'over_ping_suppressed',
+            key: statusKey(chat_id, args.message_thread_id != null
+              ? Number(args.message_thread_id) : undefined),
+            sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
+          })
+          disableNotification = true
+          wasOverPingSuppressed = true
+        } else if (decision.claimSlot) {
+          // Claim (first ping) OR upgrade (substantive answer pinging over an
+          // ack's slot). Set firstPingAt AND firstPingWasSubstantive ATOMICALLY
+          // (no await between) so a racing second reply reads a consistent pair.
+          turn.firstPingAt = now
+          turn.firstPingWasSubstantive = replySubstantive
+          if (decision.upgrade) {
+            process.stderr.write(
+              `telegram gateway: reply over-ping safety net — ` +
+              `UPGRADE: substantive answer pings over an ack's slot ` +
+              `(chat=${chat_id} thread=${args.message_thread_id ?? '-'})\n`,
+            )
+          }
+        }
       }
+      emissionAuthorityFor(turn).claimOrDowngradePing(
+        { modelRequestedPing: !disableNotification, substantive: replySubstantive },
+        {
+          firstPingAt: turn.firstPingAt,
+          firstPingWasSubstantive: turn.firstPingWasSubstantive,
+          nowMs: now,
+        },
+        applyOverPingDecision,
+        () => {
+          // Disabled-path: literal `decideOverPing(` + effects, VERBATIM base.
+          const decision = decideOverPing({
+            modelRequestedPing: !disableNotification,
+            firstPingAt: turn.firstPingAt,
+            substantive: replySubstantive,
+            firstPingWasSubstantive: turn.firstPingWasSubstantive,
+            nowMs: now,
+          })
+          applyOverPingDecision(decision)
+        },
+      )
     }
   }
@@ -7653,10 +8334,38 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   // clear; the main turn-end path also re-writes the state when
   // finalAnswerDelivered=false, so this is a belt-and-braces gate
   // for the turn_end-missing case (#1741).
-  if (isFinalAnswerReply({ text: rawText, disableNotification })) {
+  if (isFinalAnswerReply({ text: rawText, disableNotification: modelDisableNotification })) {
     clearSilentEndState(statusKey(chat_id, threadId))
   }
+  // Lever 2 (design §9 lever 2): finalize the activity card BEFORE the reply
+  // chunks send, so the card keeps its (lower) message_id and the reply is
+  // structurally last on screen. ONLY for a *substantive* final — for an ack
+  // (non-substantive) do NOTHING: finalizing an ack early would
+  // close → reopen → emit MORE messages (the #2141 ack-then-work feed, R3).
+  // `clearActivitySummary` edits the existing card in place (no new send) and
+  // nulls `activityMessageId`; combined with the sticky latch set here it
+  // prevents any post-reply re-OPEN below the answer. Idempotent with the
+  // tool_use-event clear at the first-reply handoff (the existing backstop).
+  {
+    const finalizeTurn = currentTurn
+    if (
+      finalizeTurn != null
+      && isSubstantiveFinalReply({ text: rawText, disableNotification: modelDisableNotification })
+    ) {
+      // PR-4a: routed through the emission-authority façade (no-op delegates —
+      // the latch-set and the finalize run exactly as before).
+      const ea = emissionAuthorityFor(finalizeTurn)
+      ea.markSubstantiveFinalDelivered(() => {
+        finalizeTurn.finalAnswerEverDelivered = true
+        finalizeTurn.finalAnswerDeliveredAt = Date.now()
+      })
+      ea.finalizeCard(() => {
+        clearActivitySummary(finalizeTurn)
+      })
+    }
+  }
   if (previewMessageId != null && reply_to != null && replyMode !== 'off') {
     await deleteStalePreview(previewMessageId)
     previewMessageId = null
@@ -7764,7 +8473,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
             turn != null
             && isFinalAnswerReply({
               text: decision.mergedText,
-              disableNotification,
+              disableNotification: modelDisableNotification,
             })
           ) {
             turn.finalAnswerDelivered = true
@@ -7772,8 +8481,12 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
             // answer must NOT re-open the feed on post-answer housekeeping.
             turn.finalAnswerSubstantive = isSubstantiveFinalReply({
               text: decision.mergedText,
-              disableNotification,
+              disableNotification: modelDisableNotification,
             })
+            // Sticky ordering latch (lever 1): a substantive final closes the
+            // card OPEN gate for the rest of the turn. NEVER cleared by reopen.
+            if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
+            if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
             if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn, replyRoutedOriginTurn)
           }
           outboundDedup.record(
@@ -8112,12 +8825,17 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     //
     // #1664 — `turn.finalAnswerDelivered = true` keeps the silent-
     // end re-prompt from spuriously firing on a delivered final.
-    if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
+    if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification: modelDisableNotification })) {
       turn.finalAnswerDelivered = true
       // Feed-reopen refinement: track whether this final was substantive
       // (≥200 chars or stream-done — not a short pinging ack) so post-answer
       // housekeeping tool work does NOT re-open the feed / trip silent-end.
-      turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification })
+      turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification: modelDisableNotification })
+      // Sticky ordering latch (lever 1): set once a SUBSTANTIVE final lands;
+      // never cleared by reopen. The card OPEN gate keys on this, not the
+      // mutable finalAnswerDelivered above (which reopen toggles).
+      if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
+      if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
       // #1728: release the buffer gate + emit terminal 👍. Mid-turn
       // acks bypass this branch and remain non-events for the
       // reaction (preserves #1713). The full turn-state teardown
@@ -8278,9 +8996,8 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
   }
   const access = loadAccess()
-  // Detect chat type for draft-transport selection.
+  // Detect chat type for throttle-default selection.
   // Private (DM) chats have positive numeric IDs; groups/channels are negative.
-  // Forum topics have a message_thread_id set — sendMessageDraft is unsupported there.
   const streamChatId = args.chat_id as string
   const streamIsPrivate = isDmChatId(streamChatId)
   const streamIsForumTopic = args.message_thread_id != null && args.message_thread_id !== ''
@@ -8322,6 +9039,19 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
       // PR3b-cutover: feed lastOutboundAt to the delivery machine (see
       // executeReply) so its TTL tick suppresses an active-turn fallback.
       shadowEmit({ kind: 'modelOutbound', key: sKey as _ChatKey, at: Date.now() })
+      // #2527: emit turn_reply_timing on the first stream_reply of the turn,
+      // mirroring the same gate in executeReply. Guards with firstTextReplyLogged
+      // so a turn that calls reply first and stream_reply second doesn't double-emit.
+      if (turn != null && !firstTextReplyLogged.has(sKey)) {
+        firstTextReplyLogged.add(sKey)
+        logStreamingEvent({
+          kind: 'turn_reply_timing',
+          chatId: streamChatId,
+          threadId: streamThreadId,
+          turnId: turn.turnId,
+          timeToFirstTextReplyMs: Date.now() - turn.gatewayReceiveAt,
+        })
+      }
       // #1741 — see executeReply for the rationale: only a plausibly-
       // final stream_reply clears the silent-end state. An interim
       // ack via stream_reply must NOT clear; the Stop hook needs
@@ -8338,6 +9068,33 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
     }
   }
+  // Lever 2 (design §9 lever 2): finalize the activity card BEFORE the stream
+  // send so the card keeps its lower message_id and the reply is structurally
+  // last. ONLY for a *substantive* final (a stream_reply done=true or ≥200
+  // chars) — for a short pinging interim chunk do NOTHING (finalizing an ack
+  // early would close → reopen → emit more, the #2141 ack-then-work feed, R3).
+  // `clearActivitySummary` edits in place + nulls activityMessageId; the sticky
+  // latch set here blocks any post-reply re-OPEN below the answer.
+  if (
+    turn != null
+    && isSubstantiveFinalReply({
+      text: (args.text as string | undefined) ?? '',
+      disableNotification: args.disable_notification === true,
+      done: args.done === true,
+    })
+  ) {
+    // PR-4a: routed through the emission-authority façade (no-op delegates —
+    // the latch-set and the finalize run exactly as before).
+    const ea = emissionAuthorityFor(turn)
+    ea.markSubstantiveFinalDelivered(() => {
+      turn.finalAnswerEverDelivered = true
+      if (turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
+    })
+    ea.finalizeCard(() => {
+      clearActivitySummary(turn)
+    })
+  }
   const result = await handleStreamReply(
     {
       chat_id: streamChatId,
@@ -8370,7 +9127,6 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
       logStreamingEvent,
       isPrivateChat: streamIsPrivate,
       isForumTopic: streamIsForumTopic,
-      ...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
       // Issue #310: deliver the outbound count bump BEFORE forceCompleteTurn
       // so the terminal render sees outboundDeliveredCount > 0. The handler
       // calls this dep in that order internally.
@@ -8390,12 +9146,10 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
       recordOutbound,
       ...(HISTORY_ENABLED ? { getLatestInboundMessageId } : {}),
       writeError: (line) => process.stderr.write(line),
-      // PR B: drop the legacy 600 ms compromise. When the operator sets
-      // `channels.telegram.stream_throttle_ms` in yaml, the env override
-      // wins; otherwise draft-stream's transport-aware default fires
-      // (300 ms draft / 1000 ms message). `throttleMs: undefined` is a
-      // signal — handlers downgrade to `?? undefined`, which then
-      // passes through to draft-stream where the default applies.
+      // When the operator sets `channels.telegram.stream_throttle_ms` in yaml,
+      // the env override wins; otherwise draft-stream's DM/group defaults apply
+      // (400 ms for DMs, 1000 ms for groups). `throttleMs: undefined` passes
+      // through to draft-stream where the per-chat-type default applies.
       ...(STREAM_THROTTLE_MS_OVERRIDE != null ? { throttleMs: STREAM_THROTTLE_MS_OVERRIDE } : {}),
       progressCardActive: streamMode === 'checklist',
     },
@@ -8495,6 +9249,10 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
       disableNotification: args.disable_notification === true,
       done: args.done === true,
     })
+    // Sticky ordering latch (lever 1): set once a SUBSTANTIVE final lands;
+    // never cleared by reopen. The card OPEN gate keys on this sticky latch.
+    if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
+    if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
     if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn, streamRoutedOriginTurn)
     // #1744 follow-up — stream_reply edge case. The first-emit gate at
     // L5178 only clears silent-end state on the FIRST emit of a stream.
@@ -9832,6 +10590,46 @@ function resetOrphanedReplyTimeout(): void {
         replyCalled: t.replyCalled,
         progressCardActive: progressDriver != null,
       })) {
+        // Feed-survival guard: re-arm the fuse while the turn is
+        // legitimately working — an in-flight tool, a detached background
+        // process (Bash run_in_background), or a human-wait tool (ask_user).
+        // This extends the original "isMidToolCall" guard to cover the
+        // detached-work cases that empty inFlight prematurely.
+        //
+        // Cap logic:
+        //  • Foreground tools / detached background work: bound by
+        //    ORPHANED_REPLY_MAX_REARMS (20 × 30 s = 10 min). A genuinely
+        //    hung tool still surfaces after the cap.
+        //  • Human-wait tools (ask_user): NEVER forcibly backstop while
+        //    ask_user is open for this chat — the human simply hasn't
+        //    tapped yet. We keep re-arming unconditionally until the prompt
+        //    resolves (TTL or tap) and inFlight empties.
+        const turnKey = statusKey(t.sessionChatId, t.sessionThreadId)
+        const working = isLegitimatelyWorking(turnKey)
+        const humanWaiting = (() => {
+          for (const entry of pendingAskUser.values()) {
+            if (entry.chatId === t.sessionChatId) return true
+          }
+          return false
+        })()
+        if (working || humanWaiting) {
+          const underCap = t.orphanedReplyRearmCount < ORPHANED_REPLY_MAX_REARMS
+          if (humanWaiting || underCap) {
+            t.orphanedReplyRearmCount++
+            process.stderr.write(
+              `telegram gateway: orphaned-reply fuse expired — re-arming` +
+              ` (rearm ${t.orphanedReplyRearmCount}/${ORPHANED_REPLY_MAX_REARMS},` +
+              ` in_flight=${toolFlightTracker.inFlightCount()},` +
+              ` human_wait=${humanWaiting},` +
+              ` bg_work=${pendingProgress.hasPendingAsyncDispatch(turnKey)})\n`,
+            )
+            resetOrphanedReplyTimeout()
+            return
+          }
+          process.stderr.write(
+            `telegram gateway: orphaned-reply rearm cap reached (${ORPHANED_REPLY_MAX_REARMS}) — forcing backstop despite working state\n`,
+          )
+        }
         process.stderr.write(
           `telegram gateway: orphaned-reply timeout (${ORPHANED_REPLY_TIMEOUT_MS}ms) — forcing backstop\n`,
         )
@@ -9876,13 +10674,112 @@ const FOREGROUND_SUBAGENT_ACCUM_MAX = 12
  * foreground sub-agents (rare — parallel Task dispatch) flatten in insertion
  * order; the single-sub-agent common case nests precisely under its
  * Delegating line.
+ *
+ * The header (elapsed + tool count) is now threaded into the render so the
+ * main-session card matches the worker card's two-line header style. This
+ * fixes the missing header regression where the worker card showed elapsed/
+ * tool-count metadata but the main-session card rendered step-lines only.
  */
 function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''): string | null {
   const childLines: string[] = []
   for (const narrative of turn.foregroundSubAgents.values()) {
     childLines.push(...narrative)
   }
-  return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix)
+  // Pass labeledToolCount as stepCount only on the terminal (final) render so
+  // the persisted feed record shows a `✓ N steps` total. The live in-progress
+  // feed omits it (stepCount undefined) to stay clean and minimal.
+  const stepCount = final ? turn.labeledToolCount : undefined
+  // Build the session header so the main-session card renders the same two-line
+  // elapsed/tool-count header as the worker card.
+  const header: SessionActivityHeader = {
+    label: 'Agent',
+    elapsedMs: turn.startedAt > 0 ? Date.now() - turn.startedAt : 0,
+    toolCount: turn.labeledToolCount,
+    state: final ? 'done' : 'running',
+  }
+  return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix, stepCount, header)
+}
+/**
+ * Render a SHOWN narrative text block as a transient liveness step — the
+ * same path a tool label takes (appendActivityLabel → renderStepFeed), so
+ * the narrative line is rolling-window-clipped and replaced by the next
+ * event exactly like a tool step. NOT a new message, NOT persisted as a
+ * parallel mirror (invariant `chat-is-the-single-source-of-truth`,
+ * reference/invariants.md). Clipped to a single 120-char line via
+ * clipNarrative so it reads as a step, not a paragraph.
+ */
+function showNarrativeStep(turn: CurrentTurn, text: string): void {
+  const rendered = appendActivityLabel(turn.mirrorLines, clipNarrative(text))
+  if (rendered == null) return
+  turn.activityPendingRender = composeTurnActivity(turn) ?? rendered
+  const ea = emissionAuthorityFor(turn)
+  // PR-4d: route the deliver-before-drain decision through the centralized
+  // card-drain gate (chatLock-serialized under the flag; verbatim block OFF).
+  cardDrainGate(turn, ea, () => {
+  if (ea.mayDrain(turn)) {
+    // Producer A (narrative SHOW): may only EDIT an already-open card, never
+    // OPEN one on a 0-tool turn (design §9 lever 5 base case — the
+    // triplication). The OPEN gate in the drain enforces this; accumulation
+    // into mirrorLines still happens so the narration renders once a tool
+    // label or liveness opens the card.
+    // PR-4a: routed through the emission-authority façade (no-op delegate).
+    ea.openOrEditCard('narrative', () => {
+      turn.activityInFlight = drainActivitySummary(turn, 'narrative')
+    })
+  }
+  })
+}
+/**
+ * Narrative-dedup gate, step 2 (reducer-side): a tool_use just arrived while
+ * a narrative block was pending. Decide SHOW vs SUPPRESS and clear the
+ * pending slot. SUPPRESS only when the tool is reply/stream_reply AND the
+ * pending text is a draft-then-send of that reply's `input.text`. Everything
+ * else (a working tool, or a reply whose text differs — post-action
+ * narration) is SHOWN. See narrative-dedup.ts §2b.
+ */
+function resolvePendingNarrativeOnTool(
+  turn: CurrentTurn,
+  toolName: string,
+  input: Record<string, unknown> | undefined,
+): void {
+  const pending = turn.pendingNarrative
+  if (pending == null) return
+  turn.pendingNarrative = null
+  if (REPLY_TOOLS.has(toolName)) {
+    const replyText = typeof input?.text === 'string' ? (input.text as string) : ''
+    if (isDraftOfReply(pending.text, replyText)) return // draft of the answer → SUPPRESS
+  }
+  showNarrativeStep(turn, pending.text) // working preamble / post-action narration → SHOW
+}
+/**
+ * Narrative-dedup gate, step 1 (reducer-side): a new narrative block
+ * arrived. A previously-pending block had nothing reply-shaped immediately
+ * after it (pure narration) → flush it as SHOWN, then stage the new one for
+ * one lookahead step. See narrative-dedup.ts §2b.
+ */
+function stagePendingNarrative(turn: CurrentTurn, text: string): void {
+  if (turn.pendingNarrative != null) {
+    showNarrativeStep(turn, turn.pendingNarrative.text)
+  }
+  turn.pendingNarrative = { text }
+}
+/**
+ * Narrative-dedup gate, step 3 (reducer-side): the turn is ending with a
+ * trailing narrative block and nothing after it. SUPPRESS only when the turn
+ * already delivered its answer via reply/stream_reply and the trailing text
+ * is a draft of that answer; otherwise SHOW (genuine trailing narration like
+ * "Done — all green."). See narrative-dedup.ts §2b.
+ */
+function flushPendingNarrativeAtTurnEnd(turn: CurrentTurn, lastReplyText: string): void {
+  const pending = turn.pendingNarrative
+  if (pending == null) return
+  turn.pendingNarrative = null
+  if (lastReplyText.length > 0 && isDraftOfReply(pending.text, lastReplyText)) return // trailing duplicate of the answer
+  showNarrativeStep(turn, pending.text)
 }
 /**
@@ -9902,11 +10799,69 @@ function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''):
  * doesn't corrupt the next turn's atom — late writes land on the
  * captured `turn` (already-completed turn, harmless).
  */
-async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
+async function drainActivitySummary(
+  turn: CurrentTurn,
+  // Which producer triggered this drain (design §9 levers 1 + 5). Gates the
+  // OPEN (first sendMessage) branch via `mayOpenActivityCard`; EDITs of an
+  // already-open card are never gated. Defaults to 'tool' — the historically
+  // unconditional OPEN behaviour — so any caller that does not opt into the
+  // gate is unaffected. Narrative-SHOW and liveness callers pass their producer
+  // explicitly.
+  producer: FeedOpenProducer = 'tool',
+  // Optional flags forwarded to `mayOpenActivityCard`.
+  openFlags?: { postAnswerSubagentActivity?: boolean },
+): Promise<void> {
   try {
     while (turn.activityPendingRender !== turn.activityLastSentRender) {
       const target = turn.activityPendingRender
       if (target == null) break
+      // OPEN gate (design §9 levers 1 + 5): when this drain would OPEN a fresh
+      // card (activityMessageId == null), consult the pure gate. Refusing an
+      // OPEN must NOT advance activityLastSentRender — the accumulated render
+      // stays pending so a later OPEN-eligible producer (a tool label, or
+      // liveness) renders it. An EDIT (activityMessageId != null) is never
+      // gated. Enforced HERE so it covers BOTH the inline producers AND the
+      // detached heartbeat setInterval drain (R7/concurrency). The gate guards
+      // gate EVALUATION, not an in-flight send: it is not a hard mutex — a send
+      // already PAST this check and suspended at its `await robustApiCall(
+      // sendMessage)` when a substantive final lands still completes and opens a
+      // card; that residual is reconciled by lever-2's `clearActivitySummary`
+      // chaining its finalize onto `turn.activityInFlight` (the suspended drain)
+      // and editing the card in place, not by this gate blocking it.
+      // Lever 4 (cross-turn / race C/D): a synthetic represent/owed-reply turn
+      // (and the liveness/heartbeat timer firing on it) starts with a CLEARED
+      // per-turn `finalAnswerEverDelivered` latch even when a substantive answer
+      // already reached the user in an EARLIER turn — so without this its first
+      // drain opens a card BELOW that prior reply. Only such a turn carries
+      // `crossTurnGate`; reuse the represent guard's delivered-since check
+      // (`hasOutboundDeliveredSince`) with the obligation's `openedAt` cutoff and
+      // the SUBSTANTIVE 200-char threshold (so an ack never trips it → #2141
+      // stays green). Computed ONLY when about to OPEN (activityMessageId ==
+      // null) AND only for a turn with a cross-turn gate — no history query on
+      // the common foreground path. Scoped to the synthetic surface by the
+      // presence of `crossTurnGate`, so it can never fire on a foreground turn.
+      // PR-4b: the cross-turn predicate is now the PURE, shared helper extracted
+      // into feed-open-gate.ts (body lifted verbatim) — the SAME function the
+      // emission-authority façade calls in its enabled branch, so flag-ON and
+      // flag-OFF compute an identical verdict. History deps injected (the module
+      // stays sqlite-free). The pure-gate consult + the `break` below stay
+      // LITERALLY in the drain (disabled-path byte-identity).
+      const crossTurnAnswerDelivered = computeCrossTurnAnswerDelivered(
+        turn,
+        feedOpenGateDeps(),
+      )
+      if (
+        turn.activityMessageId == null
+        && !mayOpenActivityCard({
+          producer,
+          finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
+          labeledToolCount: turn.labeledToolCount,
+          crossTurnAnswerDelivered,
+          postAnswerSubagentActivity: openFlags?.postAnswerSubagentActivity,
+        })
+      ) {
+        break
+      }
       // `renderActivityFeed` already emitted ready Telegram HTML with per-line
       // markup (<b>→ current</b> / <i>✓ done</i>) and escaped each label's
       // <,>,& itself (#1942 class) — send verbatim, do NOT re-escape or
@@ -9980,17 +10935,125 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
 function feedHeartbeatTick(): void {
   const turn = currentTurn
   if (turn == null) return
+  if (turn.finalAnswerDelivered) {
+    // Fix 2: post-answer background-agent liveness. When the sub-agent/workflow
+    // watcher has surfaced a new step AFTER the substantive final answer, drive
+    // a liveness card so the operator can see "background agent still working".
+    //
+    // Gate: `turn.subagentActivityAt` must be set (watcher fired) AND it must
+    // exceed `turn.finalAnswerDeliveredAt` (the watcher advanced AFTER the answer
+    // was delivered — not just any pre-answer label). This is the key fix:
+    // #2587 read `lastToolLabelAt`, which is frozen by the drop-guard after a
+    // substantive answer and therefore never crosses the threshold. `subagentActivityAt`
+    // is written by the watcher's onProgress callback INDEPENDENTLY of the
+    // tool_label / drop-guard path, so it correctly advances post-answer.
+    //
+    // Idle-gap suppression + staleness cap (concern 3) — the single pure decision
+    // `evaluatePostAnswerLiveness`:
+    //   - 'idle'  → no watcher activity after the answer (`subagentActivityAt`
+    //               undefined or ≤ finalAnswerDeliveredAt). Stay silent; the
+    //               reply-is-last invariant is fully preserved for idle turns.
+    //   - 'stale' → the worker's last advance is older than POST_ANSWER_LIVENESS_STALE_MS
+    //               (its `onFinish` froze `subagentActivityAt` and no new step has
+    //               arrived). STOP re-rendering so the card doesn't climb `running`
+    //               forever — mirrors the pre-answer FEED_LIVENESS_OPEN_MS cap. The
+    //               worker's own terminal card (workerActivityFeed.finish) is the
+    //               durable record once it completes.
+    //   - 'emit'  → genuine in-flight post-answer activity; render the card below.
+    const subagentAt = turn.subagentActivityAt
+    const livenessVerdict = evaluatePostAnswerLiveness({
+      subagentActivityAt: subagentAt,
+      finalAnswerDeliveredAt: turn.finalAnswerDeliveredAt,
+      now: Date.now(),
+      staleCapMs: POST_ANSWER_LIVENESS_STALE_MS,
+    })
+    if (livenessVerdict !== 'emit' || subagentAt == null) return // idle gap or stale worker → stay silent (the `== null` also narrows subagentAt for the elapsed below)
+    // A background worker is genuinely active after the answer. Open or maintain
+    // a liveness card below the reply. Route through `mayOpenActivityCard` with
+    // `postAnswerSubagentActivity:true` so Lever 1 is lifted for 'tool' producer
+    // (Fix 2's Lever 1 exception in feed-open-gate.ts). The card renders the
+    // turn's accumulated mirrorLines (which may be empty — in that case the drain
+    // opens a "Working…" placeholder matching the pre-answer liveness path).
+    if (turn.sessionChatId == null) return
+    const age = Date.now() - turn.startedAt
+    const livenessHeader: SessionActivityHeader = {
+      label: 'Agent', elapsedMs: age, toolCount: turn.labeledToolCount, state: 'running',
+    }
+    const lines = turn.mirrorLines.length > 0 ? turn.mirrorLines : ['Working in background…']
+    const elapsed = Date.now() - subagentAt
+    const rendered = renderActivityFeedWithNested(lines, [], false, ` · ${formatFeedElapsed(elapsed)}`, undefined, livenessHeader)
+    if (rendered == null) return
+    turn.activityPendingRender = rendered
+    const ea = emissionAuthorityFor(turn)
+    cardDrainGate(turn, ea, () => {
+    if (ea.mayDrain(turn)) {
+      // Producer 'tool' with postAnswerSubagentActivity=true: the Lever 1
+      // exception allows this OPEN. Lever 4 (cross-turn) and idle-liveness
+      // blocks are still respected by the drain. The card surfaces BELOW the
+      // reply showing the background agent's live activity.
+      ea.openOrEditCard('tool', () => {
+        turn.activityInFlight = drainActivitySummary(turn, 'tool', { postAnswerSubagentActivity: true })
+      })
+    }
+    })
+    return
+  }
+  // Liveness feed (open + maintain). `mirrorLines.length === 0` means no tool
+  // has ever produced a label this turn — pure thinking, or only suppressed
+  // tools. Open a minimal "Working…" feed once the turn passes the threshold,
+  // and keep its elapsed climbing until a real label arrives. The first label
+  // makes mirrorLines non-empty, so the labelled-feed heartbeat below takes
+  // over and its edit cleanly replaces the placeholder. drainActivitySummary
+  // sends (opens) when activityMessageId is null and edits (maintains) once set
+  // — so this one branch handles both the open and the climb.
+  if (turn.mirrorLines.length === 0) {
+    if (!FEED_LIVENESS_OPEN_ENABLED || turn.sessionChatId == null) return
+    const age = Date.now() - turn.startedAt
+    if (age < FEED_LIVENESS_OPEN_MS) return
+    const livenessHeader: SessionActivityHeader = {
+      label: 'Agent', elapsedMs: age, toolCount: 0, state: 'running',
+    }
+    const rendered = renderActivityFeedWithNested(['Working…'], [], false, ` · ${formatFeedElapsed(age)}`, undefined, livenessHeader)
+    if (rendered == null) return
+    turn.activityPendingRender = rendered
+    const ea = emissionAuthorityFor(turn)
+    // PR-4d: route through the centralized chatLock-serialized card-drain gate.
+    cardDrainGate(turn, ea, () => {
+    if (ea.mayDrain(turn)) {
+      // Producer C (liveness timer): the genuine ≥12s thinking-gap open. Now
+      // that Lever 5 is inert (narrative may open pre-answer — #2588), liveness
+      // remains the natural open for 0-tool pre-answer turns that are silent.
+      // The sticky-latch (lever 1) still gates it in the drain.
+      // PR-4a: routed through the emission-authority façade (no-op delegate).
+      ea.openOrEditCard('liveness', () => {
+        turn.activityInFlight = drainActivitySummary(turn, 'liveness')
+      })
+    }
+    })
+    return
+  }
+  // Labelled-feed heartbeat: keep a stale in-progress step visibly advancing.
   if (turn.activityMessageId == null) return // no live feed yet / already cleared
-  if (turn.finalAnswerDelivered) return // feed handed off to the answer
   if (turn.lastToolLabelAt == null) return // feed not driven by a labelled step
   const elapsed = Date.now() - turn.lastToolLabelAt
   if (elapsed < FEED_HEARTBEAT_MIN_STALE_MS) return // step is fresh; feed advancing normally
   const rendered = composeTurnActivity(turn, false, ` · ${formatFeedElapsed(elapsed)}`)
   if (rendered == null) return
   turn.activityPendingRender = rendered
-  if (turn.activityInFlight == null) {
-    turn.activityInFlight = drainActivitySummary(turn)
+  const ea = emissionAuthorityFor(turn)
+  // PR-4d: route through the centralized chatLock-serialized card-drain gate.
+  cardDrainGate(turn, ea, () => {
+  if (ea.mayDrain(turn)) {
+    // Maintains an already-open card (guarded above on activityMessageId !=
+    // null) → only ever EDITs. 'liveness' is correct either way.
+    // PR-4a: routed through the emission-authority façade (no-op delegate).
+    ea.openOrEditCard('liveness', () => {
+      turn.activityInFlight = drainActivitySummary(turn, 'liveness')
+    })
   }
+  })
 }
 if (!STATIC && FEED_HEARTBEAT_ENABLED) {
   setInterval(feedHeartbeatTick, FEED_HEARTBEAT_TICK_MS).unref()
@@ -10043,8 +11106,19 @@ function clearActivitySummary(turn: CurrentTurn, finalHtmlOverride?: string | nu
     }
     // Default: leave the status message as a record, edited to a terminal
     // all-done state so it doesn't freeze on a misleading "→ in-progress" line.
-    const finalHtml =
+    let finalHtml =
       finalHtmlOverride !== undefined ? finalHtmlOverride : composeTurnActivity(turn, true)
+    // Liveness-only feed: opened on the timer for a turn that never labelled a
+    // tool (pure thinking / suppressed tools), so mirrorLines is empty and the
+    // terminal render is null. Finalize to a done "✓ Working…" record instead
+    // of leaving the message frozen on the live "→ Working…" line.
+    if (finalHtml == null && turn.mirrorLines.length === 0 && turn.activityEverOpened) {
+      const livenessElapsed = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
+      const livenessHeader: SessionActivityHeader = {
+        label: 'Agent', elapsedMs: livenessElapsed, toolCount: turn.labeledToolCount, state: 'done',
+      }
+      finalHtml = renderActivityFeedWithNested(['Working…'], [], true, '', undefined, livenessHeader)
+    }
     if (finalHtml == null) return
     try {
       await robustApiCall(
@@ -10111,6 +11185,19 @@ function handleSessionEvent(ev: SessionEvent): void {
         const turnId =
           deriveTurnId(ev.chatId, enqThreadIdNum ?? null, ev.messageId)
           ?? `${chatKey(ev.chatId, enqThreadIdNum ?? null)}#synthetic-${startedAt}`
+        // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Consume any
+        // pending cross-turn gate `obligationSweep` armed for THIS exact turn
+        // when it pushed an `obligation_represent` inbound. The gate is keyed on
+        // the obligation's `originTurnId`, and the represent inbound reuses the
+        // original chat/thread/messageId, so this turn's `turnId` (derived just
+        // above) equals that key iff this turn IS the represent surface armed for.
+        // An unrelated foreground turn on the same chat/thread derives a
+        // different `turnId` → finds no entry → no gate → its card opens normally
+        // (correct). Consume-once: delete on read so the matched gate can't leak
+        // forward, and a never-matched stale gate can never suppress another turn.
+        const xTurnGateKey = turnId
+        const consumedCrossTurnGate = pendingCrossTurnGate.get(xTurnGateKey)
+        if (consumedCrossTurnGate != null) pendingCrossTurnGate.delete(xTurnGateKey)
         const next: CurrentTurn = {
           sessionChatId: ev.chatId,
           sessionThreadId: enqThreadIdNum,
@@ -10123,20 +11210,34 @@ function handleSessionEvent(ev: SessionEvent): void {
           sourceMessageId: parseSourceMessageId(ev.messageId),
           startedAt,
           gatewayReceiveAt: startedAt,
+          // #2527 — stamp the loop role once, from the enqueue envelope.
+          role: deriveTurnRole(ev.rawContent),
+          // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Only a
+          // synthetic represent/owed-reply turn carries this; a foreground turn
+          // leaves it undefined and the cross-turn card-OPEN gate is inert.
+          ...(consumedCrossTurnGate != null ? { crossTurnGate: consumedCrossTurnGate } : {}),
           replyCalled: false,
           finalAnswerDelivered: false,
           finalAnswerSubstantive: false,
+          // Sticky latch — reset ONLY here (turn start), never by reopen.
+          finalAnswerEverDelivered: false,
           firstPingAt: null,
+          // Notification ownership (R8 / PR-2): no slot claimed yet, so the
+          // "claimer was substantive" flag starts false. Set atomically with
+          // firstPingAt at the over-ping decision site.
+          firstPingWasSubstantive: false,
           silentAnchorMessageId: null,
           silentAnchorText: '',
           capturedText: [],
           orphanedReplyTimeoutId: null,
+          orphanedReplyRearmCount: 0,
           turnId,
           registryKey: null,
           noReplyDrainTimer: null,
           lastAssistantMsgId: null,
           lastAssistantDone: false,
           toolCallCount: 0,
+          labeledToolCount: 0,
           activityMessageId: null,
           activityInFlight: null,
           activityPendingRender: null,
@@ -10144,11 +11245,24 @@ function handleSessionEvent(ev: SessionEvent): void {
           activityEverOpened: false,
           activityDrainFailures: 0,
           mirrorLines: [],
+          pendingNarrative: null,
+          lastReplyText: '',
           foregroundSubAgents: new Map(),
           answerStream: null,
           isDm: isDmChatId(ev.chatId),
+          // PR-4a — construct ONE emission-authority façade per turn, passing
+          // the chat/thread key in EXPLICITLY (the PR-4e seam; today equal to
+          // the singleton-sourced key). Per-turn: born with this turn literal,
+          // discarded with it — never persists across turns.
+          emissionAuthority: new EmissionAuthority(
+            statusKey(ev.chatId, enqThreadIdNum),
+          ),
         }
-        currentTurn = next
+        // PR-4e — route the turn-SET through the keyed accessor: flag-OFF assigns
+        // the singleton (byte-identical to `currentTurn = next`); flag-ON sets the
+        // per-topic `byKey[statusKey]` entry AND the most-recent mirror. The key is
+        // the SAME statusKey the ctor's façade was constructed with just above.
+        setCurrentTurn(next, statusKey(ev.chatId, enqThreadIdNum))
         markIdleActivity() // any turn start (main session) is activity — re-arm idle clear
         // Status-surface observability: one line at every turn SET so a later
         // dark card is traceable to which turn/topic key it belonged to.
@@ -10278,6 +11392,14 @@ function handleSessionEvent(ev: SessionEvent): void {
     case 'tool_use': {
       const turn = currentTurn
       if (turn == null) return
+      // Narrative-dedup gate step 2 (JSONL-text-narrative primitive): a
+      // narrative block was pending; this tool_use is the lookahead event
+      // that decides it. reply/stream_reply with near-identical text ⇒
+      // draft-then-send ⇒ SUPPRESS (the reply prints the canonical answer);
+      // anything else ⇒ SHOW as a transient liveness step. Runs BEFORE the
+      // normal tool handling so a working preamble surfaces just ahead of
+      // its tool step.
+      resolvePendingNarrativeOnTool(turn, ev.toolName, ev.input)
       // Phase 1 of #332: count every tool_use in the current turn.
       turn.toolCallCount++
       // #412: bump turn-active marker mtime so the watchdog sees this
@@ -10300,6 +11422,15 @@ function handleSessionEvent(ev: SessionEvent): void {
       // placeholder-heartbeat label, which has been retired.
       if (isTelegramReplyTool(name)) {
         turn.replyCalled = true
+        // NIT 2 (reply-proxy precision): capture the ACTUAL delivered reply
+        // text so flushPendingNarrativeAtTurnEnd compares a trailing
+        // narrative block against the real answer surface, not
+        // capturedText.join('') (which mis-suppresses when the model emits
+        // the same short string twice in a turn). REPLY_TOOLS ('reply',
+        // 'stream_reply') carry the answer in input.text; only those count.
+        if (REPLY_TOOLS.has(name) && typeof ev.input?.text === 'string') {
+          turn.lastReplyText = ev.input.text as string
+        }
         if (turn.orphanedReplyTimeoutId != null) {
           clearTimeout(turn.orphanedReplyTimeoutId)
           turn.orphanedReplyTimeoutId = null
@@ -10339,6 +11470,12 @@ function handleSessionEvent(ev: SessionEvent): void {
       // where the JSONL tool_use rows arrive too late.
       const turn = currentTurn
       if (turn == null) return
+      // SECONDARY FIX: an active tool_label means the model is producing work
+      // right now — re-arm the orphaned-reply fuse so a multi-phase tool turn
+      // (write → compile → test → fix) that regularly emits labels doesn't let
+      // the 30 s timer run down between labels. Mirrors how `case 'text':` calls
+      // resetOrphanedReplyTimeout() at ~line 10786.
+      resetOrphanedReplyTimeout()
       // Surface tools (reply/stream_reply/react) are the conversation, not
       // activity — the hook labels them ("Replying"), so filter by name.
       if (isTelegramSurfaceTool(ev.toolName)) return
@@ -10400,6 +11537,14 @@ function handleSessionEvent(ev: SessionEvent): void {
       }
       const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
       if (rendered != null) {
+        // Count surfaced tool steps — the single source of truth for the `tools=`
+        // lifecycle field and the `✓ N steps` total. Incremented HERE (not at the
+        // top of the case) so the count stays consistent with what the feed
+        // actually surfaces: an empty label (appendActivityLabel → null) or a
+        // label dropped by the post-final-answer reopen guard never inflates it.
+        // Surface tools (reply/react) returned earlier; send_typing/sync_retain
+        // are suppressed at the hook (computeLabel → null) so they never arrive.
+        turn.labeledToolCount++
         // A new tool label = a new live step → re-anchor the heartbeat clock so
         // the " · Ns" elapsed restarts from this step (and the feed itself just
         // advanced, so it isn't stale).
@@ -10411,16 +11556,34 @@ function handleSessionEvent(ev: SessionEvent): void {
         // and would falsely reset the clock forever on a hung-mid-tool turn,
         // reintroducing the #1556 dangling-turn wedge. Only the model emitting a
         // fresh label reaches here.
-        if (SILENCE_LIVENESS_PRODUCTION && currentTurn === turn) {
+        // PR-4e — keyed liveness under the flag. Flag-OFF keeps the literal
+        // `currentTurn === turn` (a late tool-label for topic A must reset A's
+        // silence clock, not topic B's); flag-ON resolves A by ITS OWN key so a
+        // flip to B doesn't falsify A's liveness here.
+        if (
+          SILENCE_LIVENESS_PRODUCTION &&
+          (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn)
+        ) {
           silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
         }
         // Recompose so any active foreground sub-agent's nested block (Model A)
         // is preserved when the parent appends its own step. composeTurnActivity
         // == the flat render when no foreground sub-agent is active.
         turn.activityPendingRender = composeTurnActivity(turn) ?? rendered
-        if (turn.activityInFlight == null) {
-          turn.activityInFlight = drainActivitySummary(turn)
+        const ea = emissionAuthorityFor(turn)
+        // PR-4d: route through the centralized chatLock-serialized card-drain gate.
+        cardDrainGate(turn, ea, () => {
+        if (ea.mayDrain(turn)) {
+          // Producer B (tool label): always OPEN-eligible (labeledToolCount was
+          // incremented just above). A turn that started conversational and now
+          // dispatches a tool opens here, rendering any narration accumulated
+          // by the suppressed narrative-SHOW drains (design §9 lever 5 / R4).
+          // PR-4a: routed through the emission-authority façade (no-op delegate).
+          ea.openOrEditCard('tool', () => {
+            turn.activityInFlight = drainActivitySummary(turn, 'tool')
+          })
         }
+        })
       }
       return
     }
@@ -10447,58 +11610,33 @@ function handleSessionEvent(ev: SessionEvent): void {
       const turn = currentTurn
       if (turn != null) {
         turn.capturedText.push(ev.text)
+        // Narrative-dedup gate step 1 (JSONL-text-narrative primitive):
+        // stage this text block for one lookahead step. If a previous block
+        // was pending with nothing reply-shaped after it, it flushes here as
+        // a SHOWN transient liveness step. The eventual SHOW/SUPPRESS of THIS
+        // block is decided by the next tool_use / turn_end. Invariant
+        // `chat-is-the-single-source-of-truth` (reference/invariants.md): a
+        // SHOWN line rides the same renderStepFeed path as a tool step —
+        // transient + clipped, never a persisted parallel mirror. This is a
+        // separate lane from the answer-stream wiring below (which owns the
+        // canonical reply), so the two never fight over the same text.
+        stagePendingNarrative(turn, ev.text)
         // Issue #195: feed the answer-lane stream. The stream itself
         // gates on minInitialChars and throttles edits — short replies
         // stay below the threshold and never spawn a message.
         if (turn.answerStream == null) {
           turn.answerStream = createAnswerStream({
             chatId: turn.sessionChatId,
-            isPrivateChat: turn.isDm,
             threadId: turn.sessionThreadId,
-            // Transport selection:
-            // #869-Phase1 visible-answer-stream: omit the draft API so
-            // the lane edits a user-visible chat-timeline message
-            // (minInitialChars:1 opens it on the first chunk). The
-            // draft-mirror does NOT touch this lane — the canary proved
-            // the model emits almost no interstitial assistant.text
-            // (it thinks→tool→reply), so routing it to the draft just
-            // emptied the preview. The draft-mirror instead renders the
-            // tool_use stream (case 'tool_use' above) where the real
-            // signal lives. assistant.text keeps its visible-message
-            // home; the reply tool stays the canonical answer.
-            // Flag OFF (default): use the compose-box draft for DMs, and set
-            // minInitialChars effectively-infinite so the lane NEVER opens a
-            // visible chat message. This matters in supergroup TOPICS, where
-            // draft transport is unsupported (gateway.ts:6422) so the lane
-            // would otherwise fall to message transport and post a visible
-            // preview once interstitial text passed the default 50-char gate
-            // — which retract() then deletes (the unformatted flash, marko
-            // General). With the gate unreachable the only posted message is
-            // the canonical reply. (The gate is bypassed for DM draft
-            // transport, so DM draft streaming is unaffected.)
-            // VISIBLE preview gating decoupled from the draft-transport flag
-            // (2026-06-05 flash regression fix). The visible flag ALONE decides
-            // whether a user-visible preview opens; DRAFT_ANSWER_LANE_RETIRED
-            // controls only the TRANSPORT (whether sendMessageDraftFn exists).
-            // The earlier `|| DRAFT_ANSWER_LANE_RETIRED` here meant retiring the
-            // draft (the default since v0.14.68) silently forced minInitialChars:1
-            // → a visible preliminary opened on every streaming turn and was then
-            // retracted (deleted) when the reply tool fired — the exact "raw bubble
-            // appears, formatted reply lands, raw bubble vanishes" flash that
-            // turning the visible stream OFF (v0.14.52) was meant to remove. So
-            // v0.14.68 silently undid v0.14.52 fleet-wide. Now:
-            //   - VISIBLE on (opt-in) → minInitialChars:1, a real edit-in-place
-            //     preview (observable by UAT, silence-liveness reset on its sends).
-            //   - VISIBLE off (default) → minInitialChars:MAX so NO visible preview
-            //     ever opens; the reply tool is the single canonical formatted
-            //     message (no flash). With the draft retired (default) there is no
-            //     transport either, so the lane stays dormant; with the kill switch
-            //     DRAFT_ANSWER_LANE=0 the legacy compose-box draft transport is
-            //     restored (sendMessageDraftFn defined above, gate bypassed for DM
-            //     draft so #1664 DM draft streaming is unaffected).
-            ...(ANSWER_LANE.usesDraftTransport
-              ? { sendMessageDraft: sendMessageDraftFn, minInitialChars: ANSWER_LANE.minInitialChars }
-              : { minInitialChars: ANSWER_LANE.minInitialChars }),
+            // VISIBLE on (opt-in, SWITCHROOM_VISIBLE_ANSWER_STREAM=1) →
+            //   minInitialChars:1 opens a user-visible edit-in-place preview on the
+            //   first text chunk. At turn_end the preview is materialized as a pinged
+            //   final answer (materialize()) when the model never called reply.
+            // VISIBLE off (default) → minInitialChars:MAX so NO visible preview ever
+            //   opens; the reply tool is the single canonical formatted message
+            //   (no flash). The draft transport is permanently retired — both modes
+            //   use sendMessage + editMessageText for any message that does open.
+            minInitialChars: ANSWER_LANE.minInitialChars,
             // #1075: route through robustApiCall so flood-wait,
             // benign-400, and THREAD_NOT_FOUND are handled uniformly
             // instead of crashing the answer-stream loop on a deleted
@@ -10589,7 +11727,11 @@ function handleSessionEvent(ev: SessionEvent): void {
             // skip the tick (the new turn has its own answer stream).
             onMetric: (metricEv) => {
               logStreamingEvent(metricEv)
-              if (currentTurn === turn) {
+              // PR-4e — keyed liveness under the flag. Flag-OFF keeps the literal
+              // `currentTurn === turn` (a draft-update metric for topic A's stream
+              // must tick A's signal/silence clock); flag-ON resolves A by its own
+              // key so a flip to B doesn't skip A's tick.
+              if (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn) {
                 signalTracker.noteSignal(
                   statusKey(turn.sessionChatId, turn.sessionThreadId),
                   Date.now(),
@@ -10717,6 +11859,28 @@ function handleSessionEvent(ev: SessionEvent): void {
       return
     }
     case 'turn_end': {
+      // DEFENSIVE FIX: belt-and-braces guard against the synthetic backstop
+      // (`durationMs: -1`) racing live work. durationMs >= 0 is the
+      // authoritative signal from system/turn_duration; -1 is ONLY ever set
+      // by the orphaned-reply backstop. Reject the synthetic event here so that
+      // even if the PRIMARY fix's re-arm logic is bypassed (e.g. a very fast
+      // fire before isLegitimatelyWorking() is sampled) we still don't tear
+      // down a live feed mid-work. Extended from the original isMidToolCall()
+      // check to the full isLegitimatelyWorking predicate so detached background
+      // work and human-wait tools (ask_user) are also protected.
+      // INVARIANT: a REAL turn_end (durationMs >= 0) is NEVER suppressed.
+      if (ev.durationMs === -1) {
+        const turn = currentTurn
+        const key = turn != null ? statusKey(turn.sessionChatId, turn.sessionThreadId) : ''
+        if (isLegitimatelyWorking(key)) {
+          process.stderr.write(
+            `telegram gateway: synthetic turn_end suppressed — legitimately working` +
+            ` (in_flight=${toolFlightTracker.inFlightCount()},` +
+            ` bg_work=${turn != null ? pendingProgress.hasPendingAsyncDispatch(key) : false})\n`,
+          )
+          return
+        }
+      }
       // Drain any still-pending tool dispatch typing entries — covers
       // transcript truncation or a Claude Code crash mid-tool.
       typingWrapper.drainAll()
@@ -10733,6 +11897,33 @@ function handleSessionEvent(ev: SessionEvent): void {
         clearTimeout(turn.orphanedReplyTimeoutId)
         turn.orphanedReplyTimeoutId = null
       }
+      // Narrative-dedup gate step 3 (JSONL-text-narrative primitive): a
+      // trailing narrative block with nothing after it. When the turn
+      // delivered its answer via reply (replyCalled) the trailing text is
+      // almost always a draft of that answer — compare against the ACTUAL
+      // delivered reply text and SUPPRESS the duplicate; otherwise SHOW
+      // genuine trailing narration ("Done — all green."). Must run BEFORE
+      // clearActivitySummary so a SHOWN line lands in the feed's final
+      // render. Always clears turn.pendingNarrative so it can't leak across
+      // turns.
+      //
+      // NIT 2 (reply-proxy precision): use `turn.lastReplyText` (the
+      // most-recent reply/stream_reply input.text) rather than
+      // `capturedText.join('')`. The old proxy concatenated every captured
+      // text block, so a turn that emitted the same short string twice
+      // (e.g. "Done." as working narration, then "Done." as the reply) would
+      // compare the trailing narration against a doubled "DoneDone" — still
+      // a high-prefix match — and wrongly suppress genuine trailing
+      // narration. Comparing against the actual reply text is exact. When
+      // the turn delivered WITHOUT a reply tool (turn-flush emits
+      // capturedText as the answer), fall back to capturedText.join('') so
+      // that path's trailing-draft suppression is preserved.
+      if (turn != null) {
+        const deliveredText = turn.lastReplyText.length > 0
+          ? turn.lastReplyText
+          : (turn.replyCalled ? turn.capturedText.join('') : '')
+        flushPendingNarrativeAtTurnEnd(turn, deliveredText)
+      }
       // Clear the activity feed at the real end of the turn. This is the
       // no-reply safety net — a turn that ends without ever calling reply
       // (the answer is delivered by turn-flush / silent-end) still has its
@@ -10934,6 +12125,17 @@ function handleSessionEvent(ev: SessionEvent): void {
             ` chat=${chatId} turnStartedAt=${turn.startedAt} replyCalled=false capturedText=empty` +
             ` — the progress card steps were the only thing the user saw (#45)\n`,
           )
+          // #2527: emit structured WARN so the reaction-only failure mode is
+          // machine-readable in the streaming-metrics channel.
+          const tKey = statusKey(chatId, threadId)
+          logStreamingEvent({
+            kind: 'turn_no_reply_warn',
+            chatId,
+            threadId,
+            turnId: turn.turnId,
+            turnDurationMs: turn.startedAt > 0 ? Date.now() - turn.startedAt : 0,
+            reactionCount: reactionTransitionCounts.get(tKey) ?? 0,
+          })
         }
       }
@@ -11291,10 +12493,39 @@ function handleSessionEvent(ev: SessionEvent): void {
       }
       // #1713: turn_end is THE terminal trigger. Finalize via the
-      // single terminal path (👍). Any prior intermediate states
-      // pending in the debounce window are flushed by `finalize()`
-      // before the terminal emoji emits.
-      finalizeStatusReaction(chatId, threadId, 'done')
+      // single terminal path. Any prior intermediate states pending in
+      // the debounce window are flushed by `finalize()` before the
+      // terminal emoji emits.
+      //
+      // #2527 — role-aware terminal honesty: a USER turn that ends without
+      // a delivered answer must NOT paint 👍 (the operator's "thumbs up so
+      // it feels like you're done" report). It finalizes to the gentle
+      // 'undelivered' terminal (😐) instead; the silent-end fallback below
+      // carries the apology text. system/cron turns and NO_REPLY/HEARTBEAT_OK
+      // turns (which return earlier) keep 👍 — their silence is legitimate.
+      let terminalReason = decideTerminalReason({
+        enabled: LIVENESS_TERMINAL_HONESTY,
+        role: turn.role,
+        finalAnswerDelivered: turn.finalAnswerDelivered,
+      })
+      // #2527 review note 1 — worker-hold carve-out: if the turn is STILL
+      // legitimately working at turn_end (a background sub-agent the parent
+      // dispatched is running on), don't prematurely paint 😐. Fall back to
+      // 'done' so the existing deferred-done path holds ✍️ until the worker
+      // completes (then 👍) — the worker-activity feed carries the progress.
+      // Only a turn that genuinely ended undelivered AND is not still working
+      // gets the honest 😐.
+      if (terminalReason === 'undelivered' && isLegitimatelyWorking(statusKey(chatId, threadId))) {
+        terminalReason = 'done'
+      }
+      if (terminalReason === 'undelivered') {
+        process.stderr.write(
+          `telegram gateway: WARN turn_no_reply — user turn ended with an ` +
+          `ambient ack but no delivered answer; painting 😐 not 👍 ` +
+          `chat=${chatId} thread=${threadId ?? '-'} turnId=${turn.turnId} (#2527)\n`,
+        )
+      }
+      finalizeStatusReaction(chatId, threadId, terminalReason)
       {
         const sKey = streamKey(chatId, threadId)
         const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -11773,6 +13004,9 @@ async function handleInboundCoalesced(
   //   - msgId present (always true for `bot.on('message:*')` paths but
   //     defensive against future routers that might call this without one).
   maybeEarlyAckReaction(ctx, from)
+  // #2527 — if this lands mid-turn, the user is asking "what's happening?";
+  // fire the liveness floor immediately (DM + supergroup alike).
+  maybePokeFloorForMidTurnInbound(ctx, from)
   const key = inboundCoalesceKey(
     String(ctx.chat!.id),
@@ -11801,6 +13035,14 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
   const msgId = ctx.message?.message_id
   if (msgId == null) return
   const chatType = ctx.chat?.type
+  // Intentionally DM-only (#2527 surface-parity note): pre-acking a GROUP
+  // message risks reacting to one the full gate (requireMention / topic
+  // scoping) would later DROP. The SUBSTANTIVE liveness parity a supergroup
+  // needs — the mid-turn floor and the role-aware terminal reaction — is
+  // surface-agnostic (keyed on statusKey + loop role, no chat-type branch),
+  // so a forum topic gets identical never-silent guarantees without this
+  // sub-second 👀 optimisation. See `maybePokeFloorForMidTurnInbound` for
+  // the surface-agnostic "Status?" short-circuit.
   if (chatType !== 'private') return
   const chatId = String(ctx.chat!.id)
   const threadId = ctx.message?.is_topic_message ? ctx.message.message_thread_id : undefined
@@ -11810,6 +13052,9 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
   void bot.api.setMessageReaction(chatId, msgId, [
     { type: 'emoji', emoji: '👀' as ReactionTypeEmoji['emoji'] },
   ]).catch(() => {})
+  // #2527: log the early-ack fire so operators can see how often the
+  // fast pre-coalesce DM path triggers vs. the controller path.
+  logStreamingEvent({ kind: 'early_ack_reaction', chatId, messageId: msgId, emoji: '👀' })
   // #553 PR 3: also fire the native "typing…" indicator. Bridges the
   // visual gap between the early-ack 👀 reaction and the first real
   // model text. No fake content — Telegram clients render this natively
@@ -11818,6 +13063,26 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
   void bot.api.sendChatAction(chatId, 'typing').catch(() => {})
 }
+/**
+ * #2527 — "Status?" short-circuit. A message arriving DURING an active turn
+ * (the user explicitly asking what's happening) fires the mid-turn liveness
+ * floor immediately, bypassing the timer/working gates. Surface-agnostic:
+ * works identically in a DM and a forum-supergroup topic (keyed on statusKey).
+ * Idempotent per turn (the floor's fire-once latch) and kill-switch-gated.
+ */
+function maybePokeFloorForMidTurnInbound(ctx: Context, from: NonNullable<Context['from']>): void {
+  const rawChatId = ctx.chat?.id
+  if (rawChatId == null) return
+  const chatId = String(rawChatId)
+  const threadId = ctx.message?.is_topic_message ? ctx.message.message_thread_id : undefined
+  const key = statusKey(chatId, threadId)
+  // Only mid-turn: a turn must already be in flight for this (chat, thread).
+  if (!activeTurnStartedAt.has(key)) return
+  const access = loadAccess()
+  if (!access.allowFrom.includes(String(from.id))) return
+  silencePoke.pokeFloorNow(key, Date.now())
+}
 async function handleInbound(
   ctx: Context,
   text: string,
@@ -12723,17 +13988,42 @@ async function handleInbound(
         if (!chatAvailableReactions.has(chat_id)) {
           probeAvailableReactions(chat_id)
         }
+        // #2527: use inbound msgId as a stable per-turn reaction identifier.
+        // The controller is created before currentTurn.turnId is assigned
+        // (that happens in handleSessionEvent's enqueue branch), so we capture
+        // msgId here and use it as the reaction-session token in log events.
+        const ctrlTurnToken = `${chat_id}:${msgId}`
         const ctrl = new StatusReactionController(async (emoji) => {
           await bot.api.setMessageReaction(chat_id, msgId, [
             { type: 'emoji', emoji: emoji as ReactionTypeEmoji['emoji'] },
           ])
           // #203: every status-reaction transition is a user-visible signal.
           signalTracker.noteSignal(key, Date.now())
-        }, allowedReactions)
+        }, allowedReactions, {
+          // #2527: emit a structured transition event on each emoji change so
+          // the reaction lifecycle is visible in streaming-metrics logs. Also
+          // increment the per-key counter for the turn_no_reply_warn metric.
+          onTransition: (emoji) => {
+            reactionTransitionCounts.set(key, (reactionTransitionCounts.get(key) ?? 0) + 1)
+            logStreamingEvent({
+              kind: 'status_reaction_transition',
+              chatId: chat_id,
+              turnId: ctrlTurnToken,
+              emoji,
+            })
+          },
+        })
         activeStatusReactions.set(key, ctrl)
         activeReactionMsgIds.set(key, { chatId: chat_id, messageId: msgId })
         activeTurnStartedAt.set(key, Date.now())
         progressUpdateTurnCount.set(key, 0)  // Reset turn counter
+        // #2527: log controller install so the lifecycle start is observable.
+        logStreamingEvent({
+          kind: 'status_reaction_install',
+          chatId: chat_id,
+          turnId: ctrlTurnToken,
+          messageId: msgId,
+        })
         ctrl.setQueued()
         // #203: time-to-ack metric — setQueued() triggers the initial 👀 reaction
         // asynchronously through the controller chain.
@@ -13300,6 +14590,18 @@ function getCommandArgs(ctx: Context): string {
   return m ? m[1].trim() : ''
 }
+/**
+ * True when a slash command's argument string carries a trailing `demo`
+ * token — the per-command PII-mask modifier for screen recordings
+ * (`/usage demo`, `/auth demo`, `/status demo`, `/whoami demo`). Matches
+ * `demo` as the last whitespace-delimited token, case-insensitively, so
+ * `/auth show demo` and `/usage demo` both flip the flag while a label
+ * literally named `demo-foo` does not.
+ */
+function hasDemoFlag(args: string): boolean {
+  return /(?:^|\s)demo$/i.test(args.trim())
+}
 /** Validate that a string looks like a safe agent/resource name.
  *  Agent names should be alphanumeric with hyphens/underscores only.
  *  This prevents shell metacharacter injection even though both exec
@@ -13790,6 +15092,9 @@ function notifyDetachedFailure(
         lockedBot.api.sendMessage(chatId, text, {
           parse_mode: 'HTML',
           link_preview_options: { is_disabled: true },
+          // Detached restart/update child-failure notice — status, not
+          // the user's answer. Silence the open ping.
+          disable_notification: true,
           ...(threadId != null ? { message_thread_id: threadId } : {}),
         }),
       {
@@ -14577,9 +15882,10 @@ bot.command('status', async ctx => {
   const { access, senderId } = gated
   const from = ctx.from!
   if (access.allowFrom.includes(senderId)) {
+    const demo = hasDemoFlag(getCommandArgs(ctx))
     const userTag = from.username ? `@${from.username}` : senderId
     const meta = await buildAgentMetadata(getMyAgentName())
-    await ctx.reply(buildStatusPairedText({ user: userTag, meta }), { parse_mode: 'HTML' })
+    await ctx.reply(buildStatusPairedText({ user: userTag, meta, demo }), { parse_mode: 'HTML' })
     return
   }
   for (const [code, p] of Object.entries(access.pending)) {
@@ -14712,10 +16018,12 @@ bot.command('model', async ctx => {
 // `/effort` — show or switch the reasoning effort for the live session.
 // The effort sibling of `/model`: bare form renders a five-button menu
 // (low/medium/high/xhigh/max, the live level ✅), a typed form
-// `/effort <level>` sets it directly. Both ride the allowlisted inject
-// primitive (claude's own `/effort` REPL command), session-scoped — boot
-// re-pins the configured default via start.sh's `--effort`. Implementation
-// in effort-command.ts so it's unit-testable without booting the bot.
+// `/effort <level>` sets it directly. Both ride the dedicated `applyEffort`
+// driver (claude's own `/effort` REPL command, with the confirmation modal
+// answered so the pane never wedges — NOT the bare inject primitive, which
+// is blocklisted for `/effort` since #2471), session-scoped — boot re-pins
+// the configured default via start.sh's `--effort`. Implementation in
+// effort-command.ts so it's unit-testable without booting the bot.
 function buildEffortDeps(): EffortCommandDeps {
   return {
     applyEffort: (agent, level) => applyEffort(agent, level),
@@ -14799,6 +16107,9 @@ bot.command('restart', async ctx => {
         (tid) =>
           lockedBot.api.sendMessage(chatId, ackText, {
             parse_mode: 'HTML', link_preview_options: { is_disabled: true },
+            // Restart acknowledgement is a status notice — silence the
+            // open ping (the "restarted — ready" follow-up is what matters).
+            disable_notification: true,
             ...(tid != null ? { message_thread_id: tid } : {}),
           }),
         { threadId, chat_id: chatId, verb: 'restart.ack' },
@@ -14940,6 +16251,9 @@ async function handleNewCommand(ctx: Context): Promise<void> {
       (tid) =>
         lockedBot.api.sendMessage(chatId, ackText, {
           parse_mode: 'HTML', link_preview_options: { is_disabled: true },
+          // /new /reset acknowledgement is a status notice — silence the
+          // open ping (the post-restart greeting card is what matters).
+          disable_notification: true,
           ...(tid != null ? { message_thread_id: tid } : {}),
         }),
       { threadId, chat_id: chatId, verb: 'new-or-reset.ack' },
@@ -15142,6 +16456,9 @@ bot.command('update', async ctx => {
         lockedBot.api.sendMessage(chatId, ackText, {
           parse_mode: 'HTML',
           link_preview_options: { is_disabled: true },
+          // "update started" acknowledgement is a status notice — silence
+          // the open ping (the post-restart greeting card is what matters).
+          disable_notification: true,
           ...(tid != null ? { message_thread_id: tid } : {}),
         }),
       { threadId, chat_id: chatId, verb: 'update.ack' },
@@ -15603,6 +16920,36 @@ const fleetFallbackGate = createFleetFallbackGate({
   brokerReachable: isAuthBrokerSocketReachable,
 })
+/**
+ * Resume-after-swap gate (auth-failover-stall fix). Owns the single-flight +
+ * staleness decision for re-running the turn a mid-turn 429 killed. See
+ * fleet-fallback-resume.ts. Wired into doFireFleetAutoFallback below: on a
+ * 'switched' outcome we restart so the boot-resume path replays the dead turn
+ * on the freshly-active account. 3h staleness mirrors the boot-resume
+ * RESUME_MAX_AGE_MS failsafe (gateway boot path); single-flight stops a 429
+ * storm from loop-restarting the agent.
+ */
+const fleetFallbackResumeGate = createFleetFallbackResumeGate({
+  maxAgeMs: (() => {
+    const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
+    return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h, matches boot-resume
+  })(),
+})
+/**
+ * The start time (epoch-ms) of the most-recently-started active turn — the
+ * staleness signal for the resume gate. `activeTurnStartedAt` is stamped on
+ * inbound receipt (see its declaration), so the newest entry is the turn the
+ * 429 just killed. Returns null when no turn is tracked (then the resume gate
+ * defers staleness to the boot-resume 3h failsafe). */
+function newestActiveTurnStartedAtMs(): number | null {
+  let newest: number | null = null
+  for (const ms of activeTurnStartedAt.values()) {
+    if (newest == null || ms > newest) newest = ms
+  }
+  return newest
+}
 function wouldFireFleetAutoFallback(): boolean {
   return fleetFallbackGate.wouldFire()
 }
@@ -15658,6 +17005,17 @@ async function fireFleetAutoFallback(triggerAgent: string, untilMs?: number): Pr
  */
 let fallbackFailureNoticeState: FallbackFailureNoticeState = { lastSentAtMs: 0 }
+/**
+ * Bug 2 — per-gateway cooldown for the "All accounts blocked" card. The
+ * all-blocked outcome is a no-op swap (doFireFleetAutoFallback returns false),
+ * so the fleetFallbackGate dedup window never arms for it, and the ~60s
+ * quota_wall_detected re-trigger would otherwise re-broadcast the identical card
+ * every minute for the life of the wall. This bounds it to one card per window.
+ * Reset on a successful swap so a fresh all-blocked after a recovery (a real new
+ * transition) is not stale-suppressed.
+ */
+let fallbackAllBlockedNoticeState: FallbackAllBlockedNoticeState = { lastSentAtMs: 0 }
 function broadcastFleetFallbackFailure(triggerAgent: string, reason: string): void {
   if (process.env.SWITCHROOM_FLEET_FALLBACK_FAILURE_NOTICE === '0') return
   // Notice-level cooldown (30 min, per gateway). The fleetFallbackGate's
@@ -15743,19 +17101,66 @@ async function doFireFleetAutoFallback(triggerAgent: string, untilMs?: number):
         (outcome.kind === 'switched' ? ` old=${outcome.oldLabel} new=${outcome.newLabel}` : '') +
         '\n',
     )
+    // Bug 2 — the all-blocked card is a no-op outcome, so the gate's dedup
+    // window never arms for it and the ~60s quota_wall_detected re-trigger would
+    // re-broadcast the identical card every minute. Gate it behind a per-gateway
+    // cooldown; a successful swap resets the window so a later (genuinely new)
+    // all-blocked still emits promptly.
+    if (outcome.kind === 'switched') {
+      fallbackAllBlockedNoticeState = { lastSentAtMs: 0 }
+    } else if (outcome.kind === 'all-blocked') {
+      const verdict = evaluateAllBlockedNotice(fallbackAllBlockedNoticeState, Date.now())
+      if (!verdict.send) {
+        process.stderr.write(
+          `telegram gateway: [fleet-fallback] all-blocked card suppressed (cooldown) agent=${triggerAgent}\n`,
+        )
+        return false
+      }
+      fallbackAllBlockedNoticeState = verdict.next
+    }
     // Post the announcement to every authorized chat. Mirrors the
     // operator-event broadcast pattern (line ~2290) — DM-only opts
     // (no message_thread_id) so THREAD_NOT_FOUND can't fire here;
     // wrap in swallowingApiCall anyway per the codebase rule.
     const access = loadAccess()
     if (access.allowFrom.length === 0) return outcome.kind === 'switched'
-    const opts = { parse_mode: 'HTML' as const }
+    // Account-switch / all-blocked announcement is a system status notice,
+    // not the user's answer — silence the open ping.
+    const opts = { parse_mode: 'HTML' as const, disable_notification: true }
     for (const chat_id of access.allowFrom) {
       void swallowingApiCall(
         () => bot.api.sendMessage(chat_id, outcome.announcement, opts),
         { chat_id, verb: 'fleet-fallback:notify' },
       )
     }
+    // ── Resume the dead turn (auth-failover-stall fix) ──────────────────────
+    // A mid-turn 429 killed a turn; the swap above moved the fleet to a healthy
+    // account, but that only takes effect on the NEXT claude invocation. Re-run
+    // the dead turn via triggerSelfRestart: the boot-resume path (gateway boot,
+    // findLatestTurnIfInterrupted → buildResumeInterruptedInbound) replays the
+    // LATEST interrupted turn on the freshly-active account. We restart rather
+    // than redeliver because the failed inbound was already DELIVERED (the turn
+    // started, then the model 429'd) so it is NOT in pendingInboundBuffer —
+    // redeliverBufferedInbound would find nothing. Guards live in
+    // fleetFallbackResumeGate: single-flight (a 429 storm cannot loop-restart)
+    // + 3h staleness (an ancient interrupted turn is not resurrected). Only
+    // reached on 'switched'; all-blocked / no-op outcomes never get here, so the
+    // all-blocked cooldown path above is preserved.
+    if (outcome.kind === 'switched') {
+      const verdict = fleetFallbackResumeGate.decide(newestActiveTurnStartedAtMs())
+      if (verdict === 'resume') {
+        const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? triggerAgent
+        process.stderr.write(
+          `telegram gateway: [fleet-fallback] resuming dead turn via self-restart ` +
+          `agent=${selfAgent} (swap ${outcome.oldLabel}→${outcome.newLabel})\n`,
+        )
+        triggerSelfRestart(selfAgent, 'fleet-fallback-resume')
+      } else {
+        process.stderr.write(
+          `telegram gateway: [fleet-fallback] resume suppressed (${verdict}) agent=${triggerAgent}\n`,
+        )
+      }
+    }
     return outcome.kind === 'switched'
   } catch (err) {
     process.stderr.write(
@@ -15815,6 +17220,9 @@ async function runCreditWatch(): Promise<void> {
         bot.api.sendMessage(chat_id, decision.message, {
           parse_mode: 'HTML',
           link_preview_options: { is_disabled: true },
+          // Credit/quota warning is a system status notice — silence the
+          // open ping (the user isn't waiting to tap anything).
+          disable_notification: true,
         }),
       { chat_id, verb: 'credit-watch.notify' },
     )
@@ -15928,6 +17336,10 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
       accounts: listStateData.accounts,
       prev: fleetPrev,
       now,
+      // #2478: the same staleness ceiling the per-account loop uses. Gates the
+      // `entered` alert behind live corroboration so a probe blackout's stale
+      // marks can't false-fire 🔴 All accounts exhausted.
+      tuning,
     })
     if (fleetDecision.kind === 'notify') {
       for (const chat_id of access.allowFrom) {
@@ -15950,6 +17362,8 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
             bot.api.sendMessage(chat_id, fleetDecision.message, {
               parse_mode: 'HTML',
               link_preview_options: { is_disabled: true },
+              // Quota status notice — silence the open ping.
+              disable_notification: true,
             }),
           { chat_id, verb: 'quota-watch.fleet-all-exhausted' },
         )
@@ -16022,11 +17436,21 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
   // numbers for the notification message bodies. One batched RPC for all
   // crossing accounts (typically 1, rarely 2+).
   const crossingLabels = pendingTransitions.map(t => t.accountLabel)
-  let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
+  // #2495 BLOCKER fix — store the FULL entry (result + `served` tag), not just
+  // `entry.result`. The corroboration gate below needs `served` to tell a true
+  // live probe apart from a failed-probe cache fallback (which is `ok:true`
+  // but `served:"cache"` — vacuous corroboration).
+  let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]>()
   try {
-    const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
+    // #2495 Change 3 — forceLive bypasses the broker's probe-on-open TTL so the
+    // DECISION to alarm is corroborated by a TRUE live probe, never a cache hit.
+    // Only the transition-to-alarm pays for this; steady-state polls stay on the
+    // cheap cached listState read (no probe). Honors the existing fleet/consumer
+    // probe knobs upstream — this re-evaluation never fires without a detected
+    // transition.
+    const probeData = await brokerClient.probeQuota(crossingLabels, 8000, true)
     for (const entry of probeData.results) {
-      freshProbeMap.set(entry.label, entry.result)
+      freshProbeMap.set(entry.label, entry)
     }
   } catch (err) {
     process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
@@ -16058,17 +17482,25 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
   for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
     // Re-evaluate with fresh probe data to get an accurate message body.
     // If the fresh probe succeeded, replace the snap's quota with live data.
-    const freshResult = freshProbeMap.get(accountLabel)
+    const freshEntry = freshProbeMap.get(accountLabel)
     let enrichedDecision = decision
     // pendingTransitions only ever holds notify decisions (pushed under
     // `decision.kind !== 'skip'` / `!== 'reconcile'`). Narrow explicitly so
     // `decision.transition` type-checks below; this continue never fires
     // at runtime.
     if (decision.kind !== 'notify') continue
-    if (freshResult && freshResult.ok && snapIndex >= 0) {
+    // #2495 BLOCKER fix — only a GENUINE live probe corroborates the alarm. A
+    // forceLive entry that is `ok:true` but `served:"cache"` (the broker's
+    // failed-probe cache fallback) is NOT corroboration: the upstream probe
+    // failed, so we have no live confirmation that the throttling crossing is
+    // real right now. Treat it exactly like a probe failure → fall through to
+    // the defer branch below (state untouched, re-evaluated next tick). This
+    // also guarantees the "Live-probe corroborated (#2495)" footnote is only
+    // ever stamped on a real live probe.
+    if (isLiveCorroboration(freshEntry) && freshEntry!.result.ok && snapIndex >= 0) {
       // Live numbers replace the cache — and capturedAtMs is cleared so the
       // staleness gate never misfires on data we JUST probed.
-      const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data, capturedAtMs: undefined }
+      const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshEntry!.result.data, capturedAtMs: undefined }
       const prev = watchState[accountLabel] ?? emptyAccountState()
       const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now, bootTick, tuning })
       // If the fresh probe still shows the same transition, use the
@@ -16153,6 +17585,8 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
           bot.api.sendMessage(chat_id, message, {
             parse_mode: 'HTML',
             link_preview_options: { is_disabled: true },
+            // Quota throttling status notice — silence the open ping.
+            disable_notification: true,
           }),
         { chat_id, verb: 'quota-watch.notify' },
       )
@@ -16424,7 +17858,13 @@ bot.command("auth", async ctx => {
     }
     return
   }
-  const text = ctx.message?.text ?? ""
+  const rawText = ctx.message?.text ?? ""
+  // `/auth demo` (and `/auth show demo` / `/auth list demo`) — the trailing
+  // `demo` token masks account-email labels on the default dashboard view for
+  // screen recordings. Strip it before parsing so `demo` isn't mistaken for a
+  // verb/agent argument; it's honored only on the show/list path downstream.
+  const authDemo = hasDemoFlag(getCommandArgs(ctx))
+  const text = authDemo ? rawText.replace(/(\s+)demo\s*$/i, "") : rawText
   const parsed = parseAuthCommand(text)
   if (!parsed) return
   const currentAgent = getMyAgentName()
@@ -16486,11 +17926,12 @@ bot.command("auth", async ctx => {
       return
     }
     try {
-      const { loginUrl, scratchDir, child } = await startAccountAuthSession(parsed.label)
+      const { loginUrl, scratchDir, tmuxSocket, tmuxSession } = await startAccountAuthSession(parsed.label)
       pendingAuthAddFlows.set(authAddKey, {
         label: parsed.label,
         scratchDir,
-        child,
+        tmuxSocket,
+        tmuxSession,
         startedAt: Date.now(),
       })
       await switchroomReply(
@@ -16522,6 +17963,7 @@ bot.command("auth", async ctx => {
     isAdmin,
     client,
     chatId,
+    demo: authDemo,
     // Format 2 enricher — live quota probe via the broker (#1336).
     // Pre-broker this read `~/.switchroom/accounts/<label>/credentials.json`
     // off the agent's HOME, which post-RFC-H is never populated (broker
@@ -16533,18 +17975,27 @@ bot.command("auth", async ctx => {
     liveQuotas: async (accounts) => {
       try {
         const { results } = await client.probeQuota(accounts.map((a) => a.label))
+        // #2495 Change 2 — the broker tags each result `served:"live"|"cache"`
+        // (TTL hit or failed-probe fallback). When ANY account was served from
+        // cache, surface the OLDEST snapshot's capturedAt so the card stamps
+        // "⚠ cached Nm ago" instead of a false live stamp.
+        let staleCachedAtMs: number | undefined
         // Preserve input order (broker also preserves it, but be defensive).
-        return accounts.map((a) => {
+        const quotas = accounts.map((a) => {
           const hit = results.find((r) => r.label === a.label)
           if (!hit) return { ok: false as const, reason: "broker returned no result for account" }
+          if (hit.served === 'cache' && hit.capturedAt != null) {
+            staleCachedAtMs = staleCachedAtMs == null ? hit.capturedAt : Math.min(staleCachedAtMs, hit.capturedAt)
+          }
           return hit.result
         })
+        return { quotas, staleCachedAtMs }
       } catch (err) {
         // Surface a uniform per-account failure so the dashboard renders
         // gracefully (label badge stays UNKNOWN) instead of falling back
         // to the legacy table.
         const reason = `broker probe-quota failed: ${(err as Error)?.message ?? String(err)}`
-        return accounts.map(() => ({ ok: false as const, reason }))
+        return { quotas: accounts.map(() => ({ ok: false as const, reason })) }
       }
     },
     tz: process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ,
@@ -18899,6 +20350,7 @@ bot.command('issues', async ctx => {
 bot.command('usage', async ctx => {
   if (!isAuthorizedSender(ctx)) return
+  const demo = hasDemoFlag(getCommandArgs(ctx))
   // Format 2 path: enumerate every account in the broker's known set,
   // probe live quota in parallel, render the health-grouped snapshot.
   // Falls back to the legacy single-agent shape when the broker is
@@ -18911,9 +20363,17 @@ bot.command('usage', async ctx => {
       const state = await client.listState()
       if (state.accounts.length > 0) {
         // Broker-routed probe (#1336) — see gateway.ts:8910 for diagnosis.
+        // #2495 Change 2 — the broker applies a probe-on-open TTL + single-
+        // flight; a TTL-hit or failed-probe fallback is tagged served:"cache",
+        // which we surface as a "⚠ cached Nm ago" footer instead of a false
+        // live stamp.
         const probeResp = await client.probeQuota(state.accounts.map((a) => a.label)).catch(() => ({ results: [] }))
+        let staleCachedAtMs: number | undefined
         const quotas = state.accounts.map((a) => {
           const hit = probeResp.results.find((r) => r.label === a.label)
+          if (hit?.served === 'cache' && hit.capturedAt != null) {
+            staleCachedAtMs = staleCachedAtMs == null ? hit.capturedAt : Math.min(staleCachedAtMs, hit.capturedAt)
+          }
           return hit?.result ?? { ok: false as const, reason: 'broker returned no result for account' }
         })
         const { renderAuthSnapshotFormat2, buildSnapshotsFromState } = await import(
@@ -18924,7 +20384,8 @@ bot.command('usage', async ctx => {
         const text = renderAuthSnapshotFormat2(snapshots, {
           tz,
           now: new Date(),
-          liveProbedAtMs: Date.now(),
+          demo,
+          ...(staleCachedAtMs != null ? { staleCachedAtMs } : { liveProbedAtMs: Date.now() }),
         })
         await switchroomReply(ctx, text, { html: true })
         return
@@ -19091,13 +20552,14 @@ bot.command('version', async ctx => {
 // see at a glance what this agent is authorized for.
 bot.command('whoami', async ctx => {
   if (!isAuthorizedSender(ctx)) return
+  const demo = hasDemoFlag(getCommandArgs(ctx))
   try {
     let raw: string
     try { raw = switchroomExecCombined(['config', 'whoami'], 10000) }
     catch (err: unknown) { raw = (err as any).stdout ?? (err as any).message ?? 'whoami failed' }
     const trimmed = stripAnsi(raw).trim()
     let card: string
-    try { card = formatWhoamiCard(JSON.parse(trimmed.split('\n').pop() ?? trimmed)) }
+    try { card = formatWhoamiCard(JSON.parse(trimmed.split('\n').pop() ?? trimmed), demo) }
     catch { card = preBlock(formatSwitchroomOutput(trimmed || 'whoami: no output')) }
     await switchroomReply(ctx, card, { html: true })
   } catch (err: unknown) {
@@ -19105,14 +20567,17 @@ bot.command('whoami', async ctx => {
   }
 })
-/** Compact HTML card from the `config whoami` JSON view. Names/booleans only. */
+/** Compact HTML card from the `config whoami` JSON view. Names/booleans only.
+ *  `demo` (the `/whoami demo` suffix) masks the vault key NAMES via maskVaultKey
+ *  for screen recordings — agent/MCP/model/skills topology is left untouched
+ *  (out of scope). Off by default. */
 function formatWhoamiCard(v: {
   name?: string; persona?: string | null; model?: string | null; tier?: string;
   tools?: { allow?: string[]; deny?: string[] }; mcpServers?: string[]; skills?: string[];
   vault?: { key: string; readable: boolean }[];
   powers?: { admin?: boolean; root?: boolean; configEdit?: boolean; crossAgentHostVerbs?: boolean };
   scheduleCount?: number; memoryBackend?: string | null;
-}): string {
+}, demo = false): string {
   const esc = escapeHtmlForTg
   const yn = (b?: boolean) => (b ? '✓' : '✗')
   const lines: string[] = []
@@ -19125,7 +20590,7 @@ function formatWhoamiCard(v: {
   if ((v.mcpServers ?? []).length) lines.push(`MCP: ${esc(v.mcpServers!.join(', '))}`)
   if ((v.skills ?? []).length) lines.push(`Skills: ${esc(v.skills!.join(', '))}`)
   if ((v.vault ?? []).length) {
-    lines.push(`Vault keys (names only): ${v.vault!.map(k => `${esc(k.key)} ${yn(k.readable)}`).join(', ')}`)
+    lines.push(`Vault keys (names only): ${v.vault!.map(k => `${esc(demo ? maskVaultKey(k.key) : k.key)} ${yn(k.readable)}`).join(', ')}`)
   }
   const p = v.powers ?? {}
   lines.push(`Powers: admin ${yn(p.admin)} · root ${yn(p.root)} · config-edit ${yn(p.configEdit)} · cross-agent verbs ${yn(p.crossAgentHostVerbs)}`)
@@ -19350,6 +20815,8 @@ bot.on('callback_query:data', async ctx => {
             await robustApiCall(() =>
               bot.api.editMessageText(args.chatId, args.messageId, args.text, {
                 parse_mode: 'HTML',
+                // Resolved on tap — strip the keyboard so it can't be re-tapped.
+                ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
               }),
             )
           } catch {
@@ -19359,6 +20826,9 @@ bot.on('callback_query:data', async ctx => {
         log: (m) =>
           process.stderr.write(`telegram gateway: config-approval cb — ${m}\n`),
       },
+      // Verify the per-card epoch from the callback_data against the live
+      // pending entry — a stale tap (mismatched epoch) is rejected.
+      parsed.epoch,
     )
     await ctx.answerCallbackQuery({
       text: resolved
@@ -21136,6 +22606,11 @@ async function shutdown(signal: string): Promise<void> {
   subagentWatcher?.stop()
   subagentWatcher = null
+  // Worker-activity feed runs an internal heartbeat interval; stop it so no
+  // re-render fires during drain (mirrors subagentWatcher above).
+  workerActivityFeed?.stop()
+  workerActivityFeed = null
   // Issues watcher polls issues.jsonl on a setInterval (default 2s) and
   // edits the issues card on every tick. Without an explicit stop() the
   // poll keeps firing for the lifetime of the process and accumulates
@@ -21869,7 +23344,8 @@ void (async () => {
             // or the turn ended while it kept running — extended autonomous
             // work) is surfaced via the worker feed instead of vanishing.
             const orphanStatusEnabled = isOrphanSubagentStatusEnabled(process.env.SWITCHROOM_ORPHAN_SUBAGENT_STATUS)
-            const workerActivityFeed = createWorkerActivityFeed({
+            workerActivityFeed?.stop()
+            workerActivityFeed = createWorkerActivityFeed({
               bot: {
                 sendMessage: async (cid, text, sendOpts) => {
                   const sent = await robustApiCall(
@@ -22063,9 +23539,18 @@ void (async () => {
                       const rendered = composeTurnActivity(turn)
                       if (rendered != null) {
                         turn.activityPendingRender = rendered
-                        if (turn.activityInFlight == null) {
-                          turn.activityInFlight = drainActivitySummary(turn)
+                        // PR-4a: routed through the emission-authority façade
+                        // (no-op delegate). Producer made explicit ('tool' — the
+                        // drain default this foreground sub-agent render used).
+                        const ea = emissionAuthorityFor(turn)
+                        // PR-4d: route through the centralized card-drain gate.
+                        cardDrainGate(turn, ea, () => {
+                        if (ea.mayDrain(turn)) {
+                          ea.openOrEditCard('tool', () => {
+                            turn.activityInFlight = drainActivitySummary(turn, 'tool')
+                          })
                         }
+                        })
                       }
                     }
                     return
@@ -22083,7 +23568,7 @@ void (async () => {
                       orphanStatusEnabled,
                     }) === 'worker-feed'
                   ) {
-                    void workerActivityFeed.finish(agentId, {
+                    void workerActivityFeed?.finish(agentId, {
                       description: dispatch.feedDescription,
                       lastTool: null,
                       toolCount,
@@ -22100,7 +23585,7 @@ void (async () => {
                 // 'orphan' is a stale boot row, not a fresh completion — map
                 // it to 'done' so an already-posted message still finalizes.
                 if (workerFeedEnabled) {
-                  void workerActivityFeed.finish(agentId, {
+                  void workerActivityFeed?.finish(agentId, {
                     description: dispatch.feedDescription,
                     lastTool: null,
                     toolCount,
@@ -22235,7 +23720,7 @@ void (async () => {
                   })
                   if (surface === 'worker-feed') {
                     const origin = resolveSubagentOriginChat(agentId)
-                    void workerActivityFeed.update(
+                    void workerActivityFeed?.update(
                       agentId,
                       origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
                       {
@@ -22270,7 +23755,13 @@ void (async () => {
                   // feed (the foreground blindspot) — mirroring the
                   // main-turn activity feed, which surfaces both tool labels
                   // and prose.
-                  const child = (progressLine ?? latestSummary).trim().slice(0, 120)
+                  // Route through the SHARED clipNarrative so multi-line
+                  // narration first-line-collapses identically to the main
+                  // tier (the main path at showNarrativeStep already does
+                  // this). Previously this inlined `.trim().slice(0, 120)`
+                  // omitted the first-line collapse, so a multi-line
+                  // narrative rendered DIFFERENTLY here than on the main feed.
+                  const child = clipNarrative(progressLine ?? latestSummary)
                   if (child.length === 0) return
                   let narrative = turn.foregroundSubAgents.get(agentId)
                   if (narrative == null) {
@@ -22288,13 +23779,70 @@ void (async () => {
                   const rendered = composeTurnActivity(turn)
                   if (rendered != null) {
                     turn.activityPendingRender = rendered
-                    if (turn.activityInFlight == null) {
-                      turn.activityInFlight = drainActivitySummary(turn)
+                    // PR-4a: routed through the emission-authority façade (no-op
+                    // delegate). Producer made explicit ('tool' — the drain
+                    // default this foreground sub-agent render used).
+                    const ea = emissionAuthorityFor(turn)
+                    // PR-4d: route through the centralized card-drain gate.
+                    cardDrainGate(turn, ea, () => {
+                    if (ea.mayDrain(turn)) {
+                      ea.openOrEditCard('tool', () => {
+                        turn.activityInFlight = drainActivitySummary(turn, 'tool')
+                      })
+                    }
+                    })
+                    // A foreground sub-agent's nested activity IS user-visible
+                    // production — count it so the silence-poke clock resets,
+                    // exactly like the parent activity-render path (10665). Without
+                    // this, a long tools-only foreground sub-agent (no prose) lets
+                    // the 300s framework fallback (and the #2527 mid-turn floor)
+                    // measure silence against a turn that is visibly working,
+                    // risking a premature tear-down / unwanted liveness beat.
+                    // PR-4e — keyed liveness under the flag (a foreground
+                    // sub-agent's nested render for topic A is A's production).
+                    // Flag-OFF keeps the literal `currentTurn === turn`; flag-ON
+                    // resolves A by its own key.
+                    if (
+                      SILENCE_LIVENESS_PRODUCTION &&
+                      (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn)
+                    ) {
+                      silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
                     }
                   }
                   return
                 }
+                // Fix 2 (post-answer background-agent liveness): when the
+                // watcher surfaces a new step for a background worker, update
+                // the current turn's `subagentActivityAt` timestamp IF the turn
+                // has already delivered its substantive answer. This signal is
+                // written HERE — NOT in the tool_label path — so the drop-guard
+                // (`shouldReopenFeedAfterAck` / finalAnswerSubstantive) cannot
+                // gate it. `feedHeartbeatTick`'s post-answer branch reads
+                // `subagentActivityAt` (not `lastToolLabelAt`, which is frozen
+                // after the answer) to decide whether to open a liveness card.
+                // Only stamp when the turn is alive AND post-answer: pre-answer
+                // activity is already surfaced by the normal tool-label feed.
+                //
+                // SCOPE — this is the IN-TURN-WINDOW surface only. The
+                // `feedHeartbeatTick` post-answer card is driven off `currentTurn`,
+                // which `endCurrentTurnAtomic` nulls at `turn_end`. A genuinely
+                // DECOUPLED background worker keeps running PAST the parent
+                // turn's teardown, so `currentTurn` is null when its later
+                // onProgress ticks arrive → this stamp is inert and the
+                // heartbeat is silent for that worker. That is BY DESIGN, not a
+                // gap: a decoupled worker's ongoing activity is surfaced by the
+                // dedicated, currentTurn-independent `workerActivityFeed` (the
+                // edit-in-place worker message, driven below at `workerFeedEnabled`
+                // and bounded by its own non-running/`finish` teardown). So the
+                // currentTurn card covers the brief post-answer/pre-teardown
+                // window; the worker feed covers everything after teardown. Both
+                // are proven in telegram-activity-visibility-integration.test.ts.
+                const stampTurn = currentTurn
+                if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
+                  stampTurn.subagentActivityAt = Date.now()
+                }
                 // #PR2 live worker-feed: when ON, the worker's live chat
                 // message owns the progress beat. Push a running cue and
                 // return BEFORE the legacy bucket relay so the same activity
@@ -22306,7 +23854,7 @@ void (async () => {
                 // is gone — see resolveSubagentOriginChat).
                 if (workerFeedEnabled) {
                   const origin = resolveSubagentOriginChat(agentId)
-                  void workerActivityFeed.update(
+                  void workerActivityFeed?.update(
                     agentId,
                     origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
                     {
@@ -22370,11 +23918,9 @@ void (async () => {
       // Lane state (post flash-decouple): VISIBLE only when the visible flag is
       // Lane state from the single-source-of-truth resolver: 'visible' (preview
-      // on), 'draft' (compose-box transport), or 'dormant' (the default: no
-      // preview, no draft — reply tool is the only message). The old label
-      // wrongly reported 'visible(draft-retired)' for the dormant default, which
-      // masked the flash regression.
-      process.stderr.write(`telegram gateway: answer-stream lane=${ANSWER_LANE.state} draftFn=${sendMessageDraftFn != null ? 'available' : 'off'} visible=${ANSWER_STREAM_VISIBLE_ENABLED} draftRetired=${DRAFT_ANSWER_LANE_RETIRED} grammy=${GRAMMY_VERSION}\n`)
+      // Lane state: 'visible' (opt-in preview) or 'dormant' (default: reply
+      // tool is the only message). The draft transport is permanently retired.
+      process.stderr.write(`telegram gateway: answer-stream lane=${ANSWER_LANE.state} visible=${ANSWER_STREAM_VISIBLE_ENABLED} grammy=${GRAMMY_VERSION}\n`)
       process.stderr.write(`telegram gateway: starting bot polling pid=${process.pid} agent=${process.env.SWITCHROOM_AGENT_NAME ?? '-'} stateDir=${STATE_DIR} historyEnabled=${HISTORY_ENABLED} streamMode=${process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'}\n`)
       runnerHandle = run(bot, {
         runner: {