switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3158 -1178
  10. package/dist/host-control/main.js +2833 -355
  11. package/dist/vault/approvals/kernel-server.js +7479 -7439
  12. package/dist/vault/broker/server.js +11312 -11272
  13. package/examples/minimal.yaml +1 -0
  14. package/examples/switchroom.yaml +1 -0
  15. package/package.json +3 -3
  16. package/profiles/_base/start.sh.hbs +88 -1
  17. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  18. package/profiles/default/CLAUDE.md.hbs +0 -19
  19. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  20. package/telegram-plugin/answer-stream-flag.ts +12 -49
  21. package/telegram-plugin/answer-stream.ts +5 -150
  22. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  23. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  24. package/telegram-plugin/context-exhaustion.ts +12 -0
  25. package/telegram-plugin/demo-mask.ts +154 -0
  26. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  27. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  28. package/telegram-plugin/dist/server.js +215 -172
  29. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  30. package/telegram-plugin/draft-stream.ts +47 -410
  31. package/telegram-plugin/final-answer-detect.ts +17 -12
  32. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  33. package/telegram-plugin/format.ts +56 -19
  34. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  35. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  36. package/telegram-plugin/gateway/auth-command.ts +70 -14
  37. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  38. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  39. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  40. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  41. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  42. package/telegram-plugin/gateway/effort-command.ts +8 -3
  43. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  44. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  45. package/telegram-plugin/gateway/gateway.ts +1837 -291
  46. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  103. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  104. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  105. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  106. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  107. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  108. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  109. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  110. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  111. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  112. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  113. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  114. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  115. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  116. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  117. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  118. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  119. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  120. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  121. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  122. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  123. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  124. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  125. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  126. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  127. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  128. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  129. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  130. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  131. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  132. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  133. package/telegram-plugin/tool-activity-summary.ts +375 -58
  134. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  135. package/telegram-plugin/uat/assertions.ts +115 -0
  136. package/telegram-plugin/uat/driver.ts +68 -0
  137. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  138. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  139. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  145. package/telegram-plugin/welcome-text.ts +13 -1
  146. package/telegram-plugin/worker-activity-feed.ts +157 -82
  147. package/telegram-plugin/draft-transport.ts +0 -122
  148. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  149. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -75,7 +75,8 @@ import {
75
75
  } from './permission-timeout.js'
76
76
  import { pickRecoveredPermissionOrigin } from './permission-card-origin.js'
77
77
  import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
78
- import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
78
+ import { appendActivityLabel, clipNarrative, renderActivityFeedWithNested, type SessionActivityHeader } from '../tool-activity-summary.js'
79
+ import { REPLY_TOOLS, isDraftOfReply } from '../narrative-dedup.js'
79
80
  import { toolLabel } from '../tool-labels.js'
80
81
  import { createTypingWrapper } from '../typing-wrap.js'
81
82
  import { type DraftStreamHandle } from '../draft-stream.js'
@@ -97,15 +98,16 @@ import {
97
98
  shutdownAnalytics,
98
99
  } from '../analytics-posthog.js'
99
100
  import { emitRuntimeMetric } from '../runtime-metrics.js'
100
- import { decideOverPing } from '../over-ping-safety-net.js'
101
+ import { decideOverPing, type OverPingDecision } from '../over-ping-safety-net.js'
101
102
  import { decideSilentReplyAnchor } from '../silent-reply-anchor.js'
102
103
  import { classifyInbound } from '../inbound-classifier.js'
103
104
  import * as silencePoke from '../silence-poke.js'
104
105
  import * as pendingProgress from '../pending-work-progress.js'
105
106
  import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
106
- import { isFinalAnswerReply, isSubstantiveFinalReply } from '../final-answer-detect.js'
107
+ import { isFinalAnswerReply, isSubstantiveFinalReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
108
+ import { deriveTurnRole, decideTerminalReason, parsePostAnswerLivenessMs, evaluatePostAnswerLiveness, type LoopRole } from '../turn-liveness-floor.js'
107
109
  import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
108
- import { parseVisibleAnswerStreamEnabled, parseDraftLaneRetiredEnabled, resolveAnswerLaneConfig } from '../answer-stream-flag.js'
110
+ import { parseVisibleAnswerStreamEnabled, resolveAnswerLaneConfig } from '../answer-stream-flag.js'
109
111
  import { type SessionEvent } from '../session-tail.js'
110
112
  import {
111
113
  shouldSuppressToolActivity,
@@ -132,6 +134,7 @@ import {
132
134
  } from './microsoft-connect-flow.js'
133
135
  import { resolveAuthBrokerSocketPath } from '../../src/auth/broker/client.js'
134
136
  import { createFleetFallbackGate } from '../fleet-fallback-gate.js'
137
+ import { createFleetFallbackResumeGate } from '../fleet-fallback-resume.js'
135
138
  import { resolveExhaustUntil } from './exhaust-until.js'
136
139
  import {
137
140
  pendingAuthAddFlows,
@@ -165,7 +168,7 @@ import {
165
168
  formatModelUnavailableCard,
166
169
  resolveModelUnavailableFromOperatorEvent,
167
170
  } from '../model-unavailable.js'
168
- import { runFleetAutoFallback, renderFallbackFailureNotice, evaluateFallbackFailureNotice, type FallbackFailureNoticeState } from '../auto-fallback-fleet.js'
171
+ import { runFleetAutoFallback, renderFallbackFailureNotice, evaluateFallbackFailureNotice, evaluateAllBlockedNotice, type FallbackFailureNoticeState, type FallbackAllBlockedNoticeState } from '../auto-fallback-fleet.js'
169
172
  import { startRestartWatchdog } from './restart-watchdog.js'
170
173
  import { validateStringArray } from './access-validator.js'
171
174
 
@@ -221,6 +224,7 @@ import {
221
224
  isContextExhaustionText,
222
225
  shouldArmOrphanedReplyTimeout,
223
226
  ORPHANED_REPLY_TIMEOUT_MS,
227
+ ORPHANED_REPLY_MAX_REARMS,
224
228
  } from '../context-exhaustion.js'
225
229
  import {
226
230
  decideTurnFlush,
@@ -326,11 +330,24 @@ import {
326
330
  } from './obligation-ledger.js'
327
331
  import { loadObligations, persistObligations } from './obligation-store.js'
328
332
  import { driveEscalation } from './escalation-drive.js'
333
+ import { shouldSuppressRepresent } from './represent-guard.js'
329
334
  import { createInboundSpool } from './inbound-spool.js'
330
335
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
331
336
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
332
337
  import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
333
338
  import { decideFeedReopen } from './feed-reopen-gate.js'
339
+ import {
340
+ mayOpenActivityCard,
341
+ computeCrossTurnAnswerDelivered,
342
+ type FeedOpenProducer,
343
+ type FeedOpenGateDeps,
344
+ } from './feed-open-gate.js'
345
+ import {
346
+ EmissionAuthority,
347
+ EMISSION_AUTHORITY_ENABLED,
348
+ type CardDrainGateCtx,
349
+ } from './emission-authority.js'
350
+ import { CurrentTurnMap } from './current-turn-map.js'
334
351
  import { resolveAnswerThreadId } from './answer-thread-resolve.js'
335
352
  import {
336
353
  createDeliveryQueue,
@@ -411,6 +428,7 @@ import {
411
428
  // preceding shutdown only" semantics.
412
429
  clearCleanShutdownMarker,
413
430
  shouldSuppressRecoveryBanner,
431
+ shouldSuppressBootResume,
414
432
  resolveShutdownMarker,
415
433
  DEFAULT_MAX_AGE_MS as CLEAN_SHUTDOWN_MAX_AGE_MS,
416
434
  } from './clean-shutdown-marker.js'
@@ -468,8 +486,10 @@ import {
468
486
  resolveQuotaWatchTuning,
469
487
  buildQuotaClaimKey,
470
488
  QUOTA_WATCH_CLAIM_WINDOW_MS,
489
+ isLiveCorroboration,
471
490
  } from '../quota-watch.js'
472
491
  import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
492
+ import { maskUsername, maskVaultKey } from '../demo-mask.js'
473
493
  import {
474
494
  writeTurnActiveMarker,
475
495
  touchTurnActiveMarker,
@@ -739,20 +759,6 @@ const AGENT_ADMIN = process.env.SWITCHROOM_AGENT_ADMIN === 'true'
739
759
  const bot = new Bot(TOKEN)
740
760
  installTgPostLogger(bot)
741
761
 
742
- // Draft-answer-lane retirement (2026-06-05): default RETIRED so the live answer
743
- // lane uses a real, mtcute-observable message instead of the invisible
744
- // compose-box draft. Declared HERE (above the boot-probe block) because
745
- // `sendMessageDraftFn` below reads it — keep it above its first use to avoid a
746
- // temporal-dead-zone ReferenceError at boot. Kill switch
747
- // SWITCHROOM_DRAFT_ANSWER_LANE=0 restores the legacy draft.
748
- const DRAFT_ANSWER_LANE_RETIRED = parseDraftLaneRetiredEnabled(process.env.SWITCHROOM_DRAFT_ANSWER_LANE)
749
-
750
- // ─── sendMessageDraft boot probe ──────────────────────────────────────────
751
- // grammY 1.x exposes all Telegram Bot API methods through bot.api.raw.
752
- // bot.api.sendMessageDraft (the typed wrapper) takes chat_id as number, but
753
- // answer-stream passes chatId as string, so we bridge through raw with an
754
- // explicit Number() cast and positional → object param translation.
755
- const _rawSendMessageDraft = (bot.api.raw as unknown as Record<string, unknown>).sendMessageDraft
756
762
  const GRAMMY_VERSION: string = (() => {
757
763
  try {
758
764
  const raw = readFileSync(new URL('../../node_modules/grammy/package.json', import.meta.url), 'utf8')
@@ -761,22 +767,6 @@ const GRAMMY_VERSION: string = (() => {
761
767
  return 'unknown'
762
768
  }
763
769
  })()
764
- const sendMessageDraftFn: (
765
- (chatId: string, draftId: number, text: string, params?: { message_thread_id?: number; parse_mode?: 'HTML' }) => Promise<unknown>
766
- ) | undefined =
767
- // When the draft lane is retired (default), force this undefined so BOTH
768
- // consumers (the answer-stream config + the stream_reply handler) drop the
769
- // draft transport and fall back to visible message transport — the single
770
- // chokepoint for the retirement.
771
- !DRAFT_ANSWER_LANE_RETIRED && typeof _rawSendMessageDraft === 'function'
772
- ? (chatId, draftId, text, params) =>
773
- (_rawSendMessageDraft as (args: Record<string, unknown>) => Promise<unknown>)({
774
- chat_id: Number(chatId),
775
- draft_id: draftId,
776
- text,
777
- ...(params ?? {}),
778
- })
779
- : undefined
780
770
 
781
771
  // ─── sendChecklist / editMessageChecklist boot probes ─────────────────────
782
772
  // grammY 1.x exposes new Telegram Bot API methods via bot.api.raw before the
@@ -1157,43 +1147,70 @@ try {
1157
1147
  const pending = findLatestTurnIfInterrupted(turnsDb)
1158
1148
  const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
1159
1149
  if (pending != null && selfAgent) {
1160
- // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
1161
- // interrupted work older than RESUME_MAX_AGE_MS selectResumeBuilder
1162
- // downgrades a stale 'resume' to the passive 'report' so the user is told
1163
- // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
1164
- // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
1165
- const RESUME_MAX_AGE_MS = (() => {
1166
- const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
1167
- return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
1168
- })()
1169
- const kind = selectResumeBuilder(pending.ended_via, {
1170
- ageMs: Math.max(0, Date.now() - pending.started_at),
1171
- maxAgeMs: RESUME_MAX_AGE_MS,
1150
+ // Clean-shutdown gate: suppress auto-resume when the prior shutdown was
1151
+ // operator/roll/CLI-initiated (clean). A clean-shutdown marker present and
1152
+ // fresh means the agent was asked to stop; the "interrupted" turn was
1153
+ // abandoned by that decision. Replaying it on every planned restart wastes
1154
+ // subscription quota for no user benefit. Only unclean exits (crash/OOM/
1155
+ // unexpected kill) should auto-resume.
1156
+ //
1157
+ // NOTE: GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH is defined lower in this file
1158
+ // (module-init order); we compute the path inline here using the same
1159
+ // formula so we can read it at boot-resume time.
1160
+ // SWITCHROOM_BOOT_RESUME_ALWAYS=1 is an escape hatch that restores
1161
+ // unconditional resume if needed.
1162
+ const bootResumeMarkerPath =
1163
+ process.env.SWITCHROOM_GATEWAY_CLEAN_SHUTDOWN_MARKER ?? join(STATE_DIR, 'clean-shutdown.json')
1164
+ const bootResumeCleanMarker = readCleanShutdownMarker(bootResumeMarkerPath)
1165
+ const bootResumeForceAlways = process.env.SWITCHROOM_BOOT_RESUME_ALWAYS === '1'
1166
+ const bootResumeSuppressed = shouldSuppressBootResume(bootResumeCleanMarker, Date.now(), {
1167
+ forceAlways: bootResumeForceAlways,
1172
1168
  })
1173
- if (kind === 'resume') {
1174
- bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
1175
- } else if (kind === 'report') {
1176
- // idleMs: this boot's measured marker age if it just classified this
1177
- // turn; otherwise recover it from the persisted interrupt_reason (a
1178
- // later boot, marker already swept); else fall back to total runtime.
1179
- let idleMs = pending.turn_key === timeoutTurnKey && markerAgeMs != null ? markerAgeMs : null
1180
- if (idleMs == null && pending.interrupt_reason) {
1181
- try {
1182
- const parsed = JSON.parse(pending.interrupt_reason) as { idleMs?: unknown }
1183
- if (typeof parsed.idleMs === 'number' && Number.isFinite(parsed.idleMs)) idleMs = parsed.idleMs
1184
- } catch { /* malformed snapshot — fall through */ }
1185
- }
1186
- if (idleMs == null) idleMs = Math.max(0, Date.now() - pending.started_at)
1187
- bootResumeInbound = {
1188
- agent: selfAgent,
1189
- msg: buildResumeWatchdogReportInbound({ turn: pending, idleMs }),
1190
- }
1191
- }
1192
- if (bootResumeInbound != null) {
1169
+ if (bootResumeSuppressed) {
1193
1170
  process.stderr.write(
1194
- `telegram gateway: boot-resume queued kind=${kind} turnKey=${pending.turn_key} ` +
1195
- `endedVia=${pending.ended_via ?? 'open'} chat=${pending.chat_id}\n`,
1171
+ `telegram gateway: boot-resume suppressed (clean shutdown` +
1172
+ `${bootResumeCleanMarker?.reason ? ` reason=${JSON.stringify(bootResumeCleanMarker.reason)}` : ''}` +
1173
+ `) — unclean exits still resume turnKey=${pending.turn_key}\n`,
1196
1174
  )
1175
+ } else {
1176
+ // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
1177
+ // interrupted work older than RESUME_MAX_AGE_MS — selectResumeBuilder
1178
+ // downgrades a stale 'resume' to the passive 'report' so the user is told
1179
+ // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
1180
+ // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
1181
+ const RESUME_MAX_AGE_MS = (() => {
1182
+ const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
1183
+ return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
1184
+ })()
1185
+ const kind = selectResumeBuilder(pending.ended_via, {
1186
+ ageMs: Math.max(0, Date.now() - pending.started_at),
1187
+ maxAgeMs: RESUME_MAX_AGE_MS,
1188
+ })
1189
+ if (kind === 'resume') {
1190
+ bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
1191
+ } else if (kind === 'report') {
1192
+ // idleMs: this boot's measured marker age if it just classified this
1193
+ // turn; otherwise recover it from the persisted interrupt_reason (a
1194
+ // later boot, marker already swept); else fall back to total runtime.
1195
+ let idleMs = pending.turn_key === timeoutTurnKey && markerAgeMs != null ? markerAgeMs : null
1196
+ if (idleMs == null && pending.interrupt_reason) {
1197
+ try {
1198
+ const parsed = JSON.parse(pending.interrupt_reason) as { idleMs?: unknown }
1199
+ if (typeof parsed.idleMs === 'number' && Number.isFinite(parsed.idleMs)) idleMs = parsed.idleMs
1200
+ } catch { /* malformed snapshot — fall through */ }
1201
+ }
1202
+ if (idleMs == null) idleMs = Math.max(0, Date.now() - pending.started_at)
1203
+ bootResumeInbound = {
1204
+ agent: selfAgent,
1205
+ msg: buildResumeWatchdogReportInbound({ turn: pending, idleMs }),
1206
+ }
1207
+ }
1208
+ if (bootResumeInbound != null) {
1209
+ process.stderr.write(
1210
+ `telegram gateway: boot-resume queued kind=${kind} turnKey=${pending.turn_key} ` +
1211
+ `endedVia=${pending.ended_via ?? 'open'} chat=${pending.chat_id}\n`,
1212
+ )
1213
+ }
1197
1214
  }
1198
1215
  }
1199
1216
 
@@ -1425,6 +1442,21 @@ const activeTurnStartedAt = new Map<string, number>()
1425
1442
  // reading activeTurnStartedAt because they want the receipt timestamp.
1426
1443
  const claudeBusyKeys = new Set<string>()
1427
1444
 
1445
+ /**
1446
+ * #2527 observability: count emoji transitions per status-reaction controller
1447
+ * so `turn_no_reply_warn` can report how many reaction changes happened while
1448
+ * producing zero text. Keyed by statusKey(chatId, threadId); cleared in
1449
+ * purgeReactionTracking alongside the controller itself.
1450
+ */
1451
+ const reactionTransitionCounts = new Map<string, number>()
1452
+
1453
+ /**
1454
+ * #2527 observability: tracks which (chatId:threadId) keys have already emitted
1455
+ * a `turn_reply_timing` event this turn so we only fire it on the FIRST text
1456
+ * reply. Cleared in purgeReactionTracking at turn-end alongside the controller.
1457
+ */
1458
+ const firstTextReplyLogged = new Set<string>()
1459
+
1428
1460
  /**
1429
1461
  * Helper: stamp a claudeBusyKeys entry for an inbound about to be
1430
1462
  * handed to claude. Pulls the thread id from the top-level field if
@@ -1487,6 +1519,19 @@ const deliveryQueue = createDeliveryQueue<InboundMessage>()
1487
1519
  // SWITCHROOM_OBLIGATION_LEDGER=0 → every hook below is a no-op → zero change.
1488
1520
  const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== '0'
1489
1521
  const OBLIGATION_REPRESENT_MAX = 2
1522
+ // Minimum reply length (chars) the duplicate-represent guard (#2472/#2474) treats
1523
+ // as "the user was answered". DECOUPLED from the escalate branch's 200-char proxy:
1524
+ // for the represent guard ANY genuine assistant reply — even a terse "Yes — done."
1525
+ // — satisfies the obligation, so suppressing the duplicate re-ask must not require
1526
+ // 200 chars. Default 1 (any non-empty real reply; empty/whitespace rows are
1527
+ // clamped out inside hasOutboundDeliveredSince). Override via env for tuning. Safe
1528
+ // because only recordOutbound writes role='assistant' rows — progress-card edits
1529
+ // and typing indicators are never counted.
1530
+ const OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS = (() => {
1531
+ const raw = process.env.SWITCHROOM_OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS
1532
+ const n = raw != null ? Number.parseInt(raw, 10) : NaN
1533
+ return Number.isFinite(n) && n >= 1 ? n : 1
1534
+ })()
1490
1535
  const OBLIGATION_SWEEP_MS = 5_000
1491
1536
  // Bound on escalation SEND attempts. The escalation now closes only AFTER a
1492
1537
  // successful send (a transient failure stays OPEN and retries next sweep), so a
@@ -1710,6 +1755,38 @@ const FEED_REOPEN_AFTER_ACK_ENABLED =
1710
1755
  const FEED_HEARTBEAT_ENABLED = process.env.SWITCHROOM_FEED_HEARTBEAT !== '0'
1711
1756
  const FEED_HEARTBEAT_TICK_MS = 6_000
1712
1757
  const FEED_HEARTBEAT_MIN_STALE_MS = 6_000
1758
+ // Liveness-driven feed open. The activity feed is otherwise TOOL-driven — it
1759
+ // opens only when a tool emits a non-null label. A turn dominated by thinking
1760
+ // or by suppressed-by-design tools (typing / memory recall / reply) emits no
1761
+ // label, so the feed never opens and a long turn reads as pure silence until
1762
+ // the 300s silence-poke (the #680 dark-turn). When a turn has been alive >=
1763
+ // FEED_LIVENESS_OPEN_MS with no feed yet, open a minimal "Working…" feed so the
1764
+ // user always has a live indicator; the first real tool label edits it with
1765
+ // real content. Runs on the heartbeat interval, so the effective open lands in
1766
+ // [threshold, threshold + FEED_HEARTBEAT_TICK_MS). Kill switch:
1767
+ // SWITCHROOM_FEED_LIVENESS_OPEN=0. Default on.
1768
+ const FEED_LIVENESS_OPEN_ENABLED = process.env.SWITCHROOM_FEED_LIVENESS_OPEN !== '0'
1769
+ const FEED_LIVENESS_OPEN_MS = (() => {
1770
+ const raw = process.env.SWITCHROOM_FEED_LIVENESS_OPEN_MS
1771
+ const n = raw ? Number(raw) : NaN
1772
+ return Number.isFinite(n) && n > 0 ? n : 12_000
1773
+ })()
1774
+
1775
+ // Post-answer background-agent liveness STALENESS CAP (Fix 2 / #2587 supersede,
1776
+ // concern 3). The `feedHeartbeatTick` post-answer branch re-renders a "background
1777
+ // agent still working" card every FEED_HEARTBEAT_TICK_MS while the sub-agent
1778
+ // watcher keeps advancing `turn.subagentActivityAt`. Without a cap that card kept
1779
+ // emitting `state:'running'` with an ever-climbing `elapsed` FOREVER — even after
1780
+ // the worker's `onFinish` froze the timestamp — because (unlike the pre-answer
1781
+ // path's `FEED_LIVENESS_OPEN_MS` recency cap) the post-answer branch had no
1782
+ // staleness bound. This cap mirrors that pre-answer pattern: once the worker's
1783
+ // last advance is older than the cap, the heartbeat stops re-rendering and the
1784
+ // card freezes at its last state. Parsed via the same pure `parsePostAnswerLivenessMs`
1785
+ // helper (positive int or 0); `|| 30_000` supplies a default-ON 30s cap, so an
1786
+ // unset env keeps the cap active. Override with SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS.
1787
+ const POST_ANSWER_LIVENESS_STALE_MS = parsePostAnswerLivenessMs(
1788
+ process.env.SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS,
1789
+ ) || 30_000
1713
1790
 
1714
1791
  /** Compact mm/ss-ish elapsed for the live feed suffix: "18s", "1m05s". */
1715
1792
  function formatFeedElapsed(ms: number): string {
@@ -1884,6 +1961,27 @@ type CurrentTurn = {
1884
1961
  sourceMessageId: number | null
1885
1962
  startedAt: number
1886
1963
  gatewayReceiveAt: number
1964
+ // #2527 — the single turn-provenance discriminator, stamped once at
1965
+ // enqueue from the channel envelope `source`. `user` (a human is waiting:
1966
+ // never-silent guarantee + mid-turn floor), `system` (cron/scheduled:
1967
+ // silence is legitimate). The gateway never builds a turn atom for a
1968
+ // sub-agent, so `sub-agent` never appears here. Read by the mid-turn floor
1969
+ // eligibility and the role-aware terminal reaction gate. Replaces the
1970
+ // scattered `chatType`/`chatId==null`/`source==='cron'` predicates.
1971
+ role: LoopRole
1972
+ // PR1 (cross-turn stale-card guard, design `docs/message-emission-determinism.md`
1973
+ // §9 lever 4 / race C/D). Present ONLY when this turn is a cross-turn SYNTHETIC
1974
+ // surface whose card OPEN must be gated against an answer already delivered in
1975
+ // an EARLIER turn — i.e. an `obligation_represent` re-delivery (and the
1976
+ // liveness/heartbeat timer firing on it). `sinceMs` is the obligation's
1977
+ // `openedAt` — the moment the obligation was RAISED — so the card-OPEN gate
1978
+ // asks `hasOutboundDeliveredSince(chat, openedAt)`: did a substantive answer
1979
+ // already land since then? Stamped at the turn ctor from a pending marker that
1980
+ // `obligationSweep` writes when it pushes the represent inbound (see
1981
+ // `pendingCrossTurnGate`). `undefined` for a normal foreground turn → the
1982
+ // cross-turn lever-4 gate is inert there (the foreground turn's own card is
1983
+ // governed only by the per-turn `finalAnswerEverDelivered` latch, lever 1).
1984
+ crossTurnGate?: { sinceMs: number }
1887
1985
  replyCalled: boolean
1888
1986
  // #1664 — whether the model has delivered its *final answer* this turn
1889
1987
  // (as opposed to only an interim ack). `replyCalled` flips on the first
@@ -1912,6 +2010,18 @@ type CurrentTurn = {
1912
2010
  // Reset to false on every fresh-turn enqueue alongside
1913
2011
  // `finalAnswerDelivered`.
1914
2012
  finalAnswerSubstantive: boolean
2013
+ // Sticky "a substantive final answer has been delivered this turn" latch
2014
+ // (design `docs/message-emission-determinism.md` §9 preamble / R0). Distinct
2015
+ // from the MUTABLE `finalAnswerDelivered`, which the ack-reopen path clears
2016
+ // mid-turn (`feed-reopen-gate.ts:157`) so an "On it…" ack keeps a live feed
2017
+ // (#2141). Ordering gates (the no-OPEN-after-final card gate, lever 1) MUST
2018
+ // key on this sticky latch, not the mutable flag — keying on the mutable flag
2019
+ // is a no-op on exactly the ack-first turn where the reorder originates. Set
2020
+ // true ONLY at the points that set `finalAnswerDelivered = true` AND only when
2021
+ // the reply is `isSubstantiveFinalReply`; NEVER cleared by reopen. Reset to
2022
+ // false ONLY at turn start, mirroring `activityEverOpened`'s sticky-true
2023
+ // contract.
2024
+ finalAnswerEverDelivered: boolean
1915
2025
  // #1675 (over-ping safety net): wall-clock ms of the first reply
1916
2026
  // this turn that landed with `disable_notification: false` (a real
1917
2027
  // device ping). The conversational-pacing contract
@@ -1923,6 +2033,18 @@ type CurrentTurn = {
1923
2033
  // the framework. Null until the first ping lands. Reset on every
1924
2034
  // fresh-turn enqueue.
1925
2035
  firstPingAt: number | null
2036
+ // Notification ownership (R8 / PR-2 — design `docs/message-emission-
2037
+ // determinism.md` §over-ping). Whether the send that CLAIMED this turn's
2038
+ // ping slot (`firstPingAt`) was itself a *substantive* final answer
2039
+ // (`isSubstantiveFinalReply`) as opposed to a short interim ACK. The
2040
+ // over-ping safety net keys on this so a substantive answer pinging AFTER
2041
+ // an ack already pinged is UPGRADED (let through, owns the slot) rather
2042
+ // than silenced — otherwise "the reply is last but the phone never buzzed
2043
+ // for the answer." Set ATOMICALLY with `firstPingAt` (same synchronous
2044
+ // block, no await between) on a claim/upgrade so a racing second reply
2045
+ // reads a consistent pair. Init false; reset to false on every fresh-turn
2046
+ // enqueue alongside `firstPingAt`.
2047
+ firstPingWasSubstantive: boolean
1926
2048
  // #1677 silent-reply auto-edit. The first silent reply of a turn
1927
2049
  // captures `silentAnchorMessageId` + `silentAnchorText`; subsequent
1928
2050
  // silent replies in the SAME turn editMessageText that anchor
@@ -1935,6 +2057,13 @@ type CurrentTurn = {
1935
2057
  silentAnchorText: string
1936
2058
  capturedText: string[]
1937
2059
  orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
2060
+ // How many times the orphaned-reply backstop timer has been re-armed
2061
+ // mid-tool-call instead of firing a synthetic turn_end. Bounded so a
2062
+ // genuinely wedged single long-running tool still surfaces: the cap is
2063
+ // ORPHANED_REPLY_MAX_REARMS (20 × 30 s = 10 min of genuine tool activity).
2064
+ // Reset to 0 on a fresh enqueue; NOT reset on text/tool_label re-arms —
2065
+ // only a new turn resets the budget.
2066
+ orphanedReplyRearmCount: number
1938
2067
  // Component 3 (turn-origin reply routing). A stable per-turn identity,
1939
2068
  // `${registryKey-or-chatKey}#${startedAt}`, assigned when the turn
1940
2069
  // starts and stamped into the inbound meta (`origin_turn_id`) so a reply
@@ -1962,6 +2091,14 @@ type CurrentTurn = {
1962
2091
  // Phase 1 of #332: count of tool_use events in the current turn, for
1963
2092
  // the tool_call_count column in the turns registry.
1964
2093
  toolCallCount: number
2094
+ // Count of tool_label events that passed the isTelegramSurfaceTool guard
2095
+ // this turn — the deterministic, surface-tool-excluded step count used by
2096
+ // the `✓ N steps` activity-feed total and the `tools=` lifecycle log.
2097
+ // Incremented in `case 'tool_label':` AFTER the surface-tool guard so
2098
+ // reply/stream_reply/edit_message/react are never counted. send_typing and
2099
+ // sync_retain are suppressed at the hook (computeLabel returns null) and
2100
+ // never arrive as tool_label events — excluded automatically.
2101
+ labeledToolCount: number
1965
2102
  // Tool-activity summary — mirrors Claude Code's native chat-UI
1966
2103
  // rendering ("Ran 5 commands, read a file"). Counters are
1967
2104
  // incremented in `case 'tool_use'`; `activityMessageId` holds the
@@ -2002,11 +2139,45 @@ type CurrentTurn = {
2002
2139
  // step that emits no new label doesn't read as frozen (the feed is otherwise
2003
2140
  // pull-only). undefined until the first label of the turn renders.
2004
2141
  lastToolLabelAt?: number
2142
+ // Fix 2 (post-answer background-agent liveness): wall-clock timestamp last
2143
+ // updated by the sub-agent/workflow watcher's onProgress callback whenever
2144
+ // it surfaces a NEW sub-agent step AFTER this turn's substantive answer was
2145
+ // delivered. Written INDEPENDENTLY of the tool_label path so the drop-guard
2146
+ // (`shouldReopenFeedAfterAck` / `finalAnswerSubstantive`) cannot gate it.
2147
+ // `feedHeartbeatTick` reads THIS (not `lastToolLabelAt`, which is frozen by
2148
+ // the drop-guard) to drive the post-answer liveness card — the core fix for
2149
+ // #2587's inert state. undefined until the first post-answer watcher advance.
2150
+ subagentActivityAt?: number
2151
+ // Sticky wall-clock timestamp when finalAnswerEverDelivered first latched
2152
+ // true this turn. Allows the heartbeat to distinguish "tool label arrived
2153
+ // before the answer" (lastToolLabelAt ≤ finalAnswerDeliveredAt, inert) from
2154
+ // "sub-agent active after the answer" (subagentActivityAt >
2155
+ // finalAnswerDeliveredAt, liveness card warranted). undefined until the
2156
+ // first substantive final answer of the turn.
2157
+ finalAnswerDeliveredAt?: number
2005
2158
  // Accumulating friendly-action feed for this turn. Each non-surface
2006
2159
  // tool_label appends a line via `appendActivityLabel`; the feed renders
2007
2160
  // (via `renderActivityFeed`) as a capped chronological list into the
2008
2161
  // in-place edited activity message and clears on reply. Reset per turn.
2009
2162
  mirrorLines: string[]
2163
+ // Narrative-dedup gate state (JSONL-text-narrative primitive). A `text`
2164
+ // block is held here for ONE lookahead step so the next event (a tool_use
2165
+ // or turn_end) can decide draft-then-send (SUPPRESS, it duplicates the
2166
+ // reply) vs working-narration (SHOW it as a transient mirrorLines step).
2167
+ // Null when nothing is pending. The pure decision lives in
2168
+ // narrative-dedup.ts; this slot is the per-turn cursor. Reset per turn.
2169
+ // Invariant `chat-is-the-single-source-of-truth`: a SHOWN narrative is
2170
+ // rendered through the SAME appendActivityLabel→renderStepFeed path as a
2171
+ // tool step — a transient, clipped, rolling-window line replaced by the
2172
+ // next event, never a persisted parallel mirror.
2173
+ pendingNarrative: { text: string } | null
2174
+ // Most-recently-seen reply/stream_reply `input.text` for this turn — the
2175
+ // ACTUAL delivered answer surface. Set wherever a REPLY_TOOL tool_use is
2176
+ // handled in the reducer. `flushPendingNarrativeAtTurnEnd` compares a
2177
+ // trailing narrative block against THIS (not capturedText.join(''), which
2178
+ // can mis-suppress when the model emits the same short string twice in a
2179
+ // turn). Empty string until the turn delivers a reply. Reset per turn.
2180
+ lastReplyText: string
2010
2181
  // Model A — foreground sub-agent nesting. A foreground sub-agent (Task/Agent
2011
2182
  // with no run_in_background) runs INSIDE this turn while the parent blocks at
2012
2183
  // the Task tool, so its live steps nest under the parent's activity feed
@@ -2019,9 +2190,169 @@ type CurrentTurn = {
2019
2190
  // gates on minInitialChars). Materialized and cleared at turn_end.
2020
2191
  answerStream: AnswerStreamHandle | null
2021
2192
  isDm: boolean
2193
+ // PR-4a (message-emission-determinism, `emission-authority.ts`). The
2194
+ // per-foreground-turn emission-authority façade the foreground-lane card/ping
2195
+ // emission call sites route through. Constructed ONCE per turn in the ctor
2196
+ // with the chat/thread key passed in explicitly (the PR-4e seam). Per-turn
2197
+ // only — a fresh `CurrentTurn` literal gets a fresh façade, so it never
2198
+ // persists across turns. Optional in the type so the bounded recently-ended
2199
+ // registry's older entries (and any hand-built test turn) tolerate its
2200
+ // absence; `emissionAuthorityFor` lazily backfills one when missing.
2201
+ emissionAuthority?: EmissionAuthority
2202
+ }
2203
+
2204
+ // PR-4e — the singleton `currentTurn` is RETAINED as (a) the flag-OFF store and
2205
+ // (b) the flag-ON "most-recent-set" MIRROR. Every GLOBAL-liveness read in this
2206
+ // file (`isBusy`, the `if (currentTurn != null) return` poke guards, the
2207
+ // orphaned-reply guard, the synchronous-to-live-turn `const turn = currentTurn`
2208
+ // captures) keeps reading this variable, so under the sequential-CLI invariant
2209
+ // (the most-recently-set turn IS the live turn) those reads stay byte-identical.
2210
+ // The per-topic isolation lives in `currentTurnMap.byKey`: a LATE async event
2211
+ // captured for topic A resolves A's authority by ITS OWN key even after the live
2212
+ // turn flipped to topic B (see current-turn-map.ts). Under the flag OFF the map
2213
+ // is never written and this is exactly the old singleton.
2214
+ let currentTurn: CurrentTurn | null = null
2215
+ const currentTurnMap = new CurrentTurnMap<CurrentTurn>()
2216
+
2217
+ /**
2218
+ * Set the live turn for `key`. Flag-branches in ONE place (inside the map):
2219
+ * flag-OFF assigns the singleton only; flag-ON sets the per-topic entry AND
2220
+ * updates the most-recent mirror. We keep the module-scope `currentTurn`
2221
+ * variable in lock-step with the map's mirror so the 140 unchanged global reads
2222
+ * see the same value.
2223
+ */
2224
+ function setCurrentTurn(turn: CurrentTurn, key: string): void {
2225
+ currentTurnMap.set(turn, key)
2226
+ currentTurn = currentTurnMap.get() // mirror most-recent-set (== `turn`)
2022
2227
  }
2023
2228
 
2024
- let currentTurn: CurrentTurn | null = null
2229
+ /**
2230
+ * End (delete) the live turn for `key`, iff `key` still maps to `turn`. Routes
2231
+ * the clear through the keyed accessor (leak-close-at-origin) and re-syncs the
2232
+ * module-scope mirror to the map's mirror.
2233
+ */
2234
+ function endCurrentTurnForKey(turn: CurrentTurn, key: string): boolean {
2235
+ const ended = currentTurnMap.endTurnForKey(turn, key)
2236
+ currentTurn = currentTurnMap.get() // re-sync mirror (null iff it pointed at turn)
2237
+ return ended
2238
+ }
2239
+
2240
+ /**
2241
+ * Clear the ENTIRE per-topic store + mirror (disconnect-flush / bridge-died:
2242
+ * every entry is a ghost).
2243
+ */
2244
+ function clearAllCurrentTurns(): void {
2245
+ currentTurnMap.clearAll()
2246
+ currentTurn = null
2247
+ }
2248
+
2249
+ /**
2250
+ * Is `turn` still the live turn FOR ITS OWN topic? Flag-OFF: `currentTurn ===
2251
+ * turn` (the ambient check, verbatim). Flag-ON: `byKey.get(turn'sKey) === turn`,
2252
+ * so a B-flip never falsifies A's own liveness. The callsites keep the literal
2253
+ * `currentTurn === turn` in source (the silence-liveness-wiring oracle) by
2254
+ * inlining the flag-OFF branch and delegating the flag-ON branch here.
2255
+ */
2256
+ function turnLiveForItsTopic(turn: CurrentTurn): boolean {
2257
+ return currentTurnMap.isLiveForKey(
2258
+ turn,
2259
+ statusKey(turn.sessionChatId, turn.sessionThreadId),
2260
+ )
2261
+ }
2262
+
2263
+ /**
2264
+ * Accessor for a turn's per-foreground-turn emission-authority façade (PR-4a).
2265
+ * Returns the façade constructed at the turn ctor; lazily backfills one (keyed
2266
+ * on the turn's chat/thread) for any turn that predates the field or was built
2267
+ * outside the ctor. Per-turn: the memoized instance lives on the turn object,
2268
+ * so it is discarded with the turn and never persists across turns.
2269
+ */
2270
+ function emissionAuthorityFor(turn: CurrentTurn): EmissionAuthority {
2271
+ if (turn.emissionAuthority == null) {
2272
+ turn.emissionAuthority = new EmissionAuthority(
2273
+ statusKey(turn.sessionChatId, turn.sessionThreadId),
2274
+ )
2275
+ }
2276
+ // PR-4b — CENTRALIZE the OPEN-gate wiring here, the single accessor every
2277
+ // routed call site already funnels through, so all 6 `openOrEditCard(...)`
2278
+ // sites stay byte-identical `(producer, apply)`. The façade reads the LIVE
2279
+ // turn view (a thunk — the card id / latch / tool-count mutate during the
2280
+ // turn) + the injected history deps from this one place, not per-call.
2281
+ // Idempotent; harmless under the flag OFF (the disabled branch never reads
2282
+ // it). The turn IS a structural `FeedOpenGateView`.
2283
+ turn.emissionAuthority.wireFeedOpenGate(() => turn, feedOpenGateDeps())
2284
+ return turn.emissionAuthority
2285
+ }
2286
+
2287
+ /**
2288
+ * The injected history dependencies the PR-4b OPEN gate needs (the real
2289
+ * `hasOutboundDeliveredSince` predicate + `HISTORY_ENABLED` + the substantive
2290
+ * `FINAL_ANSWER_MIN_CHARS` floor). Centralized so both the façade's enabled
2291
+ * branch AND the drain's own (now-redundant) inline gate consume the SAME deps
2292
+ * via the SAME pure helpers — flag-ON and flag-OFF cannot diverge. Keeps
2293
+ * `feed-open-gate.ts` sqlite-free (it never imports `history.js`).
2294
+ */
2295
+ function feedOpenGateDeps(): FeedOpenGateDeps {
2296
+ return {
2297
+ hasOutboundDeliveredSince,
2298
+ historyEnabled: HISTORY_ENABLED,
2299
+ finalAnswerMinChars: FINAL_ANSWER_MIN_CHARS,
2300
+ }
2301
+ }
2302
+
2303
+ /**
2304
+ * The deliver-before-drain inputs the PR-4d card-drain gate threads into the
2305
+ * façade's pure `mayDrainCardNow`. Centralized so the card-drain helper sources
2306
+ * the SAME `turnInFlightForGate()` + kill-switch the buffer-drain gate uses.
2307
+ *
2308
+ * `endingTurnFinalAnswerDelivered` is FIXED to `null` for the card path (§5
2309
+ * modeling decision): the live foreground card single-flight is governed by
2310
+ * `turn.activityInFlight` (via `mayDrain`), NOT by an ending turn's delivery
2311
+ * state — so `mayDrainBufferedInbound` degenerates to `!turnInFlight` and a
2312
+ * synthetic represent turn (finalAnswerDelivered=false) can never wedge the card.
2313
+ */
2314
+ function cardDrainGateCtx(): CardDrainGateCtx {
2315
+ return {
2316
+ turnInFlight: turnInFlightForGate(),
2317
+ endingTurnFinalAnswerDelivered: null,
2318
+ enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
2319
+ }
2320
+ }
2321
+
2322
+ /**
2323
+ * PR-4d centralized card-drain gate (Option A). The 6 foreground card-drain
2324
+ * sites pass their EXISTING single-flight-guarded block (the `mayDrain` guard +
2325
+ * the `openOrEditCard(producer, …)` thunk that assigns `turn.activityInFlight =
2326
+ * drainActivitySummary(…)`) in VERBATIM as `run`, so those load-bearing literals
2327
+ * stay byte-identical (the wiring oracles still see them).
2328
+ *
2329
+ * - **Flag OFF (default):** runs the guarded block directly — NO `chatLock`
2330
+ * wrapper, byte-equivalent to base.
2331
+ * - **Flag ON:** acquires `chatLock` AROUND the deliver-before-drain decision
2332
+ * (`mayDrainCardNow`) + the synchronous block, unifying the card path with
2333
+ * the #2137 serialization gate. The lock spans ONLY the gate decision + the
2334
+ * synchronous `openOrEditCard` kick-off inside `run` (which only ASSIGNS
2335
+ * `turn.activityInFlight = drainActivitySummary(...)`; the async send is NOT
2336
+ * awaited inside the lock). The lock is released before any drain
2337
+ * `await sendMessage` suspends, so a card OPEN never holds `chatLock` across
2338
+ * the gate's release — a synthetic represent turn can never wedge the gate,
2339
+ * and `mayDrain` stays callable lock-free.
2340
+ *
2341
+ * LOCK ORDERING (no-deadlock invariant): `chatLock` is acquired EXCLUSIVELY
2342
+ * here, around the gate; never the reverse. `mayDrainCardNow` is a pure read.
2343
+ */
2344
+ function cardDrainGate(turn: CurrentTurn, ea: EmissionAuthority, run: () => void): void {
2345
+ if (EMISSION_AUTHORITY_ENABLED) {
2346
+ void chatLock.run(
2347
+ statusKey(turn.sessionChatId, turn.sessionThreadId),
2348
+ async () => {
2349
+ if (ea.mayDrainCardNow(turn, cardDrainGateCtx())) run()
2350
+ },
2351
+ )
2352
+ return
2353
+ }
2354
+ run()
2355
+ }
2025
2356
 
2026
2357
  // Component 3 (turn-origin reply routing). Recently-ended turns retained
2027
2358
  // by `turnId` so a LATE reply (the Brevo answer landing ~42s after
@@ -2115,7 +2446,22 @@ function deriveTurnId(
2115
2446
  */
2116
2447
  function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTurn | null {
2117
2448
  if (originTurnId == null || originTurnId === '') return null
2118
- if (currentTurn != null && currentTurn.turnId === originTurnId) return currentTurn
2449
+ // PR-4e resolve the LIVE turn by ITS OWN topic key under the flag. The
2450
+ // turnId encodes the key: `deriveTurnId` builds `${chatKey}#${messageId}`, so
2451
+ // the substring before `#` IS the statusKey. Flag-ON does an O(1)
2452
+ // `byKey.get(key)` and matches on turnId — so a reply whose origin turn is
2453
+ // STILL live for topic A resolves A even after the singleton mirror flipped to
2454
+ // B. Flag-OFF keeps the singleton check, verbatim. The recentTurnsById
2455
+ // registry fallback is UNCHANGED in both branches.
2456
+ if (EMISSION_AUTHORITY_ENABLED) {
2457
+ const hashIdx = originTurnId.indexOf('#')
2458
+ if (hashIdx > 0) {
2459
+ const live = currentTurnMap.get(originTurnId.slice(0, hashIdx))
2460
+ if (live != null && live.turnId === originTurnId) return live
2461
+ }
2462
+ } else if (currentTurn != null && currentTurn.turnId === originTurnId) {
2463
+ return currentTurn
2464
+ }
2119
2465
  return recentTurnsById.get(originTurnId) ?? null
2120
2466
  }
2121
2467
 
@@ -2368,7 +2714,10 @@ function postQueuedStatus(chatId: string, bufferedThread: number, inFlightThread
2368
2714
  void (async () => {
2369
2715
  const sent = await swallowingApiCall(
2370
2716
  () =>
2371
- bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread }),
2717
+ // Queued-placeholder status, not the user's answer — silence the
2718
+ // open ping (BORDERLINE: it's a "your message is queued" notice;
2719
+ // see PR description).
2720
+ bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread, disable_notification: true }),
2372
2721
  { chat_id: chatId, verb: 'queued-status.post', threadId: bufferedThread },
2373
2722
  )
2374
2723
  const messageId = (sent as { message_id?: number } | undefined)?.message_id
@@ -2542,6 +2891,16 @@ const preambleSuppressor = new PreambleSuppressor({
2542
2891
  // long-lived and flushes can occur outside any session-event
2543
2892
  // handler's scope. If the turn has been cleared, the update is
2544
2893
  // dropped (no chat to send to, no stream to mutate).
2894
+ //
2895
+ // PR-4e — the module-scope suppressor carries NO per-topic key (it is a
2896
+ // single global instance reset/flushed per turn), so there is no key to
2897
+ // scope by here: the correct resolution under BOTH flag states is the
2898
+ // most-recent-set live turn — exactly the singleton mirror `currentTurn`.
2899
+ // The per-turn `reset()` / `flushNow()` / `dropNow()` lifecycle (driven from
2900
+ // the live turn's own handlers) keeps the suppressor aligned with whichever
2901
+ // topic is currently live, so the mirror read is byte-identical to base and
2902
+ // cannot leak A's answer text into B (a flush for A runs while A is still the
2903
+ // most-recent turn; once B flips, A's stream is already force-superseded).
2545
2904
  const stream = currentTurn?.answerStream ?? null
2546
2905
  if (stream != null) stream.update(cumulative)
2547
2906
  },
@@ -2590,6 +2949,28 @@ function streamKey(chatId: string, threadId?: number | null): string {
2590
2949
  return chatKey(chatId, threadId)
2591
2950
  }
2592
2951
 
2952
+ // PR1 (cross-turn stale-card guard, design §9 lever 4 / race C/D).
2953
+ // `obligationSweep` writes one entry here, keyed on the obligation's
2954
+ // `originTurnId`, the instant it pushes an `obligation_represent` inbound —
2955
+ // carrying the obligation's `openedAt` (when the obligation was RAISED). The
2956
+ // represent inbound reuses the obligation's chat/thread/messageId, so the
2957
+ // `enqueue` that spawns the synthetic represent turn reconstructs the SAME
2958
+ // `deriveTurnId` value as the key. That represent turn — and ONLY that turn —
2959
+ // consumes and clears it, stamping `turn.crossTurnGate = { sinceMs: openedAt }`.
2960
+ // That turn's first card-OPEN then consults `hasOutboundDeliveredSince(chat,
2961
+ // openedAt)` and is suppressed iff a SUBSTANTIVE answer already landed since the
2962
+ // obligation was raised — so a "thinking…" card never narrates beneath an answer
2963
+ // the user already received in the original turn. Keying on `originTurnId` (not
2964
+ // chat/thread) means an unrelated later foreground turn on the same chat/thread
2965
+ // derives a different turn id, finds no entry, and is never mis-gated — closing
2966
+ // the residual cross-contamination window where a represent that was armed but
2967
+ // never enqueued (degenerate bridge-death) left a stale chat/thread-keyed gate
2968
+ // that the next foreground turn could wrongly consume. A normal foreground turn
2969
+ // never has an entry here, so the gate stays scoped to the synthetic surface.
2970
+ // The map holds at most one entry per obligation; re-arming the same obligation
2971
+ // overwrites its own entry with the latest openedAt.
2972
+ const pendingCrossTurnGate = new Map<string, { sinceMs: number }>()
2973
+
2593
2974
  /**
2594
2975
  * Component 1 — deliver-before-drain. The single chokepoint that both
2595
2976
  * turn-end drain sites (`purgeReactionTracking`, `releaseTurnBufferGate`)
@@ -2729,6 +3110,10 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
2729
3110
  // the markClaudeBusyForInbound on the delivery path. Safe no-op
2730
3111
  // when the key was never marked (synthetic purge from a sweep).
2731
3112
  claudeBusyKeys.delete(key)
3113
+ // #2527: clear the per-key reaction-transition counter and first-reply
3114
+ // sentinel alongside the controller so we don't leak state across turns.
3115
+ reactionTransitionCounts.delete(key)
3116
+ firstTextReplyLogged.delete(key)
2732
3117
  // Human-feel UX: stop the turn-long `typing…` indicator started in
2733
3118
  // the turn-start block. `purgeReactionTracking` is the canonical
2734
3119
  // turn-end, so this is the single owner of the stop. (If an abnormal
@@ -2933,8 +3318,16 @@ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
2933
3318
  * gone — handlers that already purge elsewhere are unharmed.
2934
3319
  */
2935
3320
  function endCurrentTurnAtomic(turn: CurrentTurn): void {
2936
- if (currentTurn !== turn) return
2937
- currentTurn = null
3321
+ // PR-4e keyed liveness + keyed clear (leak-close-at-origin). Flag-OFF: the
3322
+ // guard is `currentTurn === turn` and the clear nulls the singleton, verbatim.
3323
+ // Flag-ON: the guard becomes `byKey.get(turn'sKey) === turn` (so a flip to
3324
+ // another topic doesn't spuriously short-circuit THIS topic's teardown) and
3325
+ // the clear does `byKey.delete(key)` + nulls the mirror iff it still points at
3326
+ // `turn`. `endCurrentTurnForKey` returns false (no delete) when the entry no
3327
+ // longer matches — the same early-return semantics as the old `!== turn` guard.
3328
+ const key = statusKey(turn.sessionChatId, turn.sessionThreadId)
3329
+ if (!turnLiveForItsTopic(turn)) return
3330
+ endCurrentTurnForKey(turn, key) // currentTurnByKey.delete(key) + mirror clear
2938
3331
  // Status-surface observability: one line at every turn CLEAR (with how far
2939
3332
  // the turn got), plus a DEGRADED warning when the turn did tool work but the
2940
3333
  // live feed never opened because its sends failed (the resume-400 signature).
@@ -3199,6 +3592,7 @@ async function postIdleClearNotice(idleClearMs: number): Promise<void> {
3199
3592
  () =>
3200
3593
  bot.api.sendMessage(chatId, text, {
3201
3594
  parse_mode: 'HTML',
3595
+ disable_notification: true,
3202
3596
  ...(threadId != null ? { message_thread_id: threadId } : {}),
3203
3597
  }),
3204
3598
  { chat_id: chatId, verb: 'idleAutoClear.notice' },
@@ -3340,7 +3734,7 @@ async function resolveCompactCard(
3340
3734
  function finalizeStatusReaction(
3341
3735
  chatId: string,
3342
3736
  threadId: number | undefined,
3343
- reason: 'done' | 'error' = 'done',
3737
+ reason: 'done' | 'undelivered' | 'error' = 'done',
3344
3738
  ): void {
3345
3739
  const key = statusKey(chatId, threadId)
3346
3740
  const ctrl = activeStatusReactions.get(key)
@@ -3354,6 +3748,17 @@ function finalizeStatusReaction(
3354
3748
  if (reason === 'done' && deferredDoneReactions.tryDefer(key, ctrl)) return
3355
3749
  deferredDoneReactions.drop(key)
3356
3750
  ctrl.finalize(reason)
3751
+ // #2527: log controller dispose so the lifecycle end is observable. Use
3752
+ // activeReactionMsgIds to reconstruct the turnId token before purge clears it.
3753
+ const msgInfo = activeReactionMsgIds.get(key)
3754
+ if (msgInfo != null) {
3755
+ logStreamingEvent({
3756
+ kind: 'status_reaction_dispose',
3757
+ chatId,
3758
+ turnId: `${chatId}:${msgInfo.messageId}`,
3759
+ reason,
3760
+ })
3761
+ }
3357
3762
  purgeReactionTracking(key)
3358
3763
  }
3359
3764
 
@@ -4960,10 +5365,10 @@ function postLegacyBanner(
4960
5365
  // short-circuit to no-ops at runtime. `progressDriver` is typed `any`
4961
5366
  // so TS doesn't resolve `progressDriver?.X` to `never`.
4962
5367
  const streamMode = process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'
4963
- // PR B: per-agent stream throttle override via channels.telegram.stream_throttle_ms.
4964
- // When unset, draft-stream.ts applies transport-aware defaults (300 ms draft,
4965
- // 1000 ms message). Parsed once at boot; sub-zero / NaN values fall back to
4966
- // undefined so the per-transport default wins. See `src/agents/scaffold.ts`
5368
+ // Per-agent stream throttle override via channels.telegram.stream_throttle_ms.
5369
+ // When unset, draft-stream.ts applies DM/group defaults (400 ms DMs, 1000 ms
5370
+ // groups). Parsed once at boot; sub-zero / NaN values fall back to undefined
5371
+ // so the per-chat-type default wins. See `src/agents/scaffold.ts`
4967
5372
  // `channelsToEnv()` for the yaml → env wiring.
4968
5373
  const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
4969
5374
  const raw = process.env.SWITCHROOM_TG_STREAM_THROTTLE_MS
@@ -4973,74 +5378,30 @@ const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
4973
5378
  })()
4974
5379
  const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
4975
5380
 
4976
- // #869-Phase1 / openclaw-pattern. When SET, the answer-lane stream
4977
- // (telegram-plugin/answer-stream.ts) renders the model's transcript
4978
- // text as a USER-VISIBLE edit-in-place message instead of writing to
4979
- // Telegram's invisible compose-box draft (which is the default and
4980
- // supports the #1664 "retract + re-prompt" contract). With this flag
4981
- // on:
4982
- // 1. createAnswerStream is instantiated without `sendMessageDraft`,
4983
- // so it falls back to `sendMessage` + `editMessageText` for a
4984
- // real chat-timeline message (`answer-stream.ts:212-214`).
4985
- // 2. minInitialChars is set to 1 — the first text chunk pushes a
4986
- // visible message immediately (TTFO under 5s for short turns).
4987
- // 3. At turn_end, if the model never called reply / stream_reply
4988
- // AND the streamed message has substantive captured text, the
4989
- // gateway DOES NOT retract (which would delete a user-visible
4990
- // message the user has been reading live); it calls
4991
- // `stream.stop()` to freeze the current text as the final
4992
- // answer, records the message in dedup + history, and marks
4993
- // `turn.finalAnswerDelivered = true` so the #1664 silent-end
4994
- // re-prompt does not fire. Turn-flush is suppressed for this
4995
- // branch — its job (deliver captured text) is structurally
4996
- // already done by the visible stream.
4997
- // 4. The reply-tool / stream_reply path is unchanged — when the
4998
- // model uses an explicit reply tool the prior streamed message
4999
- // is retracted (delete) and the reply takes over as before.
5000
- // Trade-off: a stream-as-final-answer turn does NOT push a device
5001
- // notification (Telegram does not notify on edits, and we choose
5002
- // not to send a duplicate fresh message for the ping). For short
5003
- // turns where the user is actively watching, this is the right
5004
- // shape — they see the answer materialise live. For longer waits,
5005
- // the cross-turn pending-progress system (#1445/#1669) is the
5006
- // canonical surface and DOES ping at the appropriate boundaries.
5007
- //
5008
- // 2026-05-25: default flipped ON after a fleet-log audit showed a ~19%
5009
- // framework-fallback ("still working…") rate — the visible stream gave an
5010
- // immediate in-timeline signal that suppressed the silence-poke.
5011
- //
5012
- // 2026-06-03: default flipped back OFF (operator request). In practice the
5013
- // visible stream delivered ~none of its intended benefit while imposing a
5014
- // jarring cost:
5015
- // - Telegram rate-limits editMessageText to roughly once/second, so real
5016
- // "watch it type" streaming is impossible; and the model emits almost no
5017
- // interstitial assistant.text (it thinks → tool → reply), so the
5018
- // preliminary was a near-empty bubble (observed: 5–13 byte edits).
5019
- // - On every turn where the model calls the reply tool (≈always), the reply
5020
- // posts a SEPARATE canonical message and the stream RETRACTS (deletes) its
5021
- // preliminary — the user sees a raw bubble appear then vanish, replaced by
5022
- // the formatted reply. In supergroup topics it also mis-routed (preliminary
5023
- // → General, reply → topic). Net: an unformatted flash + a delete, no
5024
- // streaming value.
5025
- // The anti-silence role the visible stream once filled is now covered by the
5026
- // live ACTIVITY FEED (tool-use streaming, below), the "…typing" chat-action
5027
- // loop, and `thinking_effort: low` (fast tool-less turns) — so off-by-default
5028
- // no longer regresses the framework-fallback rate. With the flag off the lane
5029
- // uses the invisible compose-box draft (the original default, #1664-compatible)
5030
- // and the reply tool is the single canonical, formatted message.
5381
+ // When SET, the answer-lane stream (telegram-plugin/answer-stream.ts) renders
5382
+ // the model's transcript text as a USER-VISIBLE edit-in-place message. Default
5383
+ // OFF: the lane stays dormant and the reply tool is the single canonical
5384
+ // formatted message no unformatted preliminary that flashes and gets deleted.
5385
+ // With this flag on, minInitialChars is set to 1 and the first text chunk opens
5386
+ // a visible preview immediately. At turn_end, if the model never called reply /
5387
+ // stream_reply AND the streamed message has substantive captured text, the
5388
+ // gateway materializes it as a pinged final answer (materialize()) and deletes
5389
+ // the silent preview. When the model uses an explicit reply tool the prior
5390
+ // streamed message is retracted instead.
5391
+ // The draft transport (sendMessageDraft) is permanently retired both ON and
5392
+ // OFF use sendMessage + editMessageText; the difference is whether a visible
5393
+ // preview is opened at all.
5031
5394
  // Opt back IN per agent with SWITCHROOM_VISIBLE_ANSWER_STREAM=1.
5032
5395
  const ANSWER_STREAM_VISIBLE_ENABLED = parseVisibleAnswerStreamEnabled(
5033
5396
  process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM,
5034
5397
  )
5035
- // Single source of truth for the answer-lane behaviour (flash-decouple,
5036
- // 2026-06-05). The visible preview gates on the visible flag ALONE; the draft
5037
- // flag controls only the transport. Resolved here once and consulted at the
5038
- // createAnswerStream config, the materialize-as-answer guard, and the boot log,
5039
- // so all three can never drift back into the `visible || retired` conflation
5040
- // that re-opened the flash. Total-enumerated in answer-stream-flag.test.ts.
5398
+ // Single source of truth for the answer-lane behaviour. The draft transport
5399
+ // (sendMessageDraft) is permanently retired the lane is either VISIBLE
5400
+ // (opt-in) or DORMANT (the unconditional default: reply tool is the only
5401
+ // message). Resolved here once and consulted at the createAnswerStream config,
5402
+ // the materialize-as-answer guard, and the boot log.
5041
5403
  const ANSWER_LANE = resolveAnswerLaneConfig({
5042
5404
  visibleEnabled: ANSWER_STREAM_VISIBLE_ENABLED,
5043
- draftFnAvailable: sendMessageDraftFn != null,
5044
5405
  })
5045
5406
 
5046
5407
  // Whether to DELETE the activity/status feed when the final answer lands.
@@ -5076,6 +5437,11 @@ const completeProgressCardTurn:
5076
5437
  // #1122 PR3: flushProgressCardsForShutdown deleted with the card. No
5077
5438
  // replacement needed — there are no pinned progress messages to flush.
5078
5439
  let subagentWatcher: SubagentWatcherHandle | null = null
5440
+ // Background-worker activity feed manager. Module-scoped so shutdown can stop()
5441
+ // its internal heartbeat interval (mirrors subagentWatcher). Recreated per
5442
+ // bridge connect; the stale handle's interval is unref'd, so a missed stop()
5443
+ // can't keep the process alive, but we stop() on shutdown for cleanliness.
5444
+ let workerActivityFeed: ReturnType<typeof createWorkerActivityFeed> | null = null
5079
5445
 
5080
5446
  // ─── IPC server ───────────────────────────────────────────────────────────
5081
5447
  const SOCKET_PATH = process.env.SWITCHROOM_GATEWAY_SOCKET ?? join(STATE_DIR, 'gateway.sock')
@@ -5237,8 +5603,12 @@ let inFlightUpdate: { requestId: string; startedAt: number } | null = null
5237
5603
  // SWITCHROOM_SILENCE_FALLBACK_MS — base threshold (default 300000)
5238
5604
  // SWITCHROOM_SILENCE_FALLBACK_HARD_MS — hard ceiling for the in-flight-tool
5239
5605
  // defer (default 900000 = 15min)
5240
- // SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer (default OFF;
5241
- // canary on marko against #2162 telemetry)
5606
+ // SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer. NOTE: this is
5607
+ // now set fleet-wide in defaults.env
5608
+ // (was a marko canary against #2162;
5609
+ // promoted to the fleet default). The
5610
+ // code default below is still OFF, so
5611
+ // the live behaviour comes from config.
5242
5612
  function parsePositiveMsEnv(name: string, fallbackMs: number): number {
5243
5613
  const raw = process.env[name]
5244
5614
  if (raw == null || raw === '') return fallbackMs
@@ -5247,6 +5617,20 @@ function parsePositiveMsEnv(name: string, fallbackMs: number): number {
5247
5617
  }
5248
5618
  const SILENCE_FALLBACK_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_MS', 300_000)
5249
5619
  const SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_HARD_MS', 900_000)
5620
+ // #2527 — mid-turn liveness floor threshold (default 45s). The early, quiet
5621
+ // beat: a `user` turn working silently this long without a substantive answer
5622
+ // gets ONE honest "still on it" interim, so the ambient 👀 never masquerades
5623
+ // as "done". Strictly below SILENCE_FALLBACK_MS (the loud 300s unwedge).
5624
+ // Whole floor is kill-switchable via SWITCHROOM_TG_LIVENESS_FLOOR=0.
5625
+ const SILENCE_FLOOR_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FLOOR_MS', 45_000)
5626
+ // #2527 — role-aware terminal reaction honesty (the "thumbs-up false done"
5627
+ // fix). Default ON; SWITCHROOM_TG_TERMINAL_HONESTY=0 reverts to always-👍.
5628
+ const LIVENESS_TERMINAL_HONESTY = process.env.SWITCHROOM_TG_TERMINAL_HONESTY !== '0'
5629
+ // SILENCE_DEFER_INFLIGHT_TOOLS: previously an opt-in (=1). The new
5630
+ // isLegitimatelyWorking callback supersedes this — defer is now the DEFAULT
5631
+ // when the callback is wired. The legacy flag is kept so `=0` still lets
5632
+ // operators force-disable the defer (handled inside silence-poke.ts tick()).
5633
+ // The old `=1` path is kept for back-compat but is now redundant.
5250
5634
  const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === '1'
5251
5635
  // Production-liveness (2026-06-05 UAT finding). Count an activity-feed render or
5252
5636
  // an answer-stream draft update as liveness for the silence clock, so a long
@@ -5255,13 +5639,109 @@ const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIG
5255
5639
  // restores the legacy "only a real reply resets the clock" behaviour.
5256
5640
  const SILENCE_LIVENESS_PRODUCTION = process.env.SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== '0'
5257
5641
 
5642
+ /**
5643
+ * Feed-survival predicate — the single source of truth for "is this turn
5644
+ * legitimately working?" used by BOTH teardown timers (orphaned-reply fuse
5645
+ * and silence-poke framework fallback).
5646
+ *
5647
+ * Returns true if ANY of the following hold for the given chat key:
5648
+ *
5649
+ * (a) A foreground tool call is in flight in the current turn
5650
+ * (`toolFlightTracker.isMidToolCall()`). This covers most tools
5651
+ * including ask_user while it blocks awaiting a tap.
5652
+ *
5653
+ * (b) Detached background work was dispatched in the current turn and has
5654
+ * not yet resolved — `pendingProgress.hasPendingAsyncDispatch(key)`.
5655
+ * Covers `Bash run_in_background:true` (which returns a near-instant
5656
+ * handle, emptying inFlight, while the background process keeps
5657
+ * running) and `Agent` / `Task` dispatches.
5658
+ *
5659
+ * (c) A human-wait tool (`ask_user`) is open for this chat. A pending
5660
+ * ask_user IS already captured by (a) while the tool_use is in flight,
5661
+ * but we include the explicit pendingAskUser check for defence-in-depth
5662
+ * (e.g. after an unlikely inFlight clear without a tool_result).
5663
+ *
5664
+ * The key is `statusKey(chatId, threadId)` — the same key used by
5665
+ * silencePoke / pendingProgress.
5666
+ */
5667
+ function isLegitimatelyWorking(key: string): boolean {
5668
+ // (a) foreground in-flight tool.
5669
+ // NOTE: toolFlightTracker is GLOBAL, not per-key. In a hypothetical
5670
+ // multi-chat agent a tool in flight for chat A would make this return
5671
+ // true for chat B's key. Accepted: the gateway runs one Claude session
5672
+ // (one turn in flight at a time); true multi-chat concurrency is not
5673
+ // currently supported. (b) and (c) below are correctly per-key.
5674
+ if (toolFlightTracker.isMidToolCall()) return true
5675
+ // (b) detached background work dispatched this turn
5676
+ if (pendingProgress.hasPendingAsyncDispatch(key)) return true
5677
+ // (c) ask_user open for this chat (defence-in-depth)
5678
+ const { chatId: keyChatId } = parseKeyForSurvival(key)
5679
+ for (const entry of pendingAskUser.values()) {
5680
+ if (entry.chatId === keyChatId) return true
5681
+ }
5682
+ return false
5683
+ }
5684
+
5685
+ /** Parse `<chatId>:<threadIdOrEmpty>` — mirrors silence-poke's parseKey.
5686
+ * Local copy so we don't need to re-export from silence-poke. */
5687
+ function parseKeyForSurvival(key: string): { chatId: string } {
5688
+ const idx = key.indexOf(':')
5689
+ return { chatId: idx < 0 ? key : key.slice(0, idx) }
5690
+ }
5691
+
5258
5692
  silencePoke.startTimer({
5259
- thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
5693
+ thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS, floor: SILENCE_FLOOR_MS },
5260
5694
  deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
5695
+ isLegitimatelyWorking: (key) => isLegitimatelyWorking(key),
5261
5696
  emitMetric: (event) => {
5262
5697
  // Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
5263
5698
  emitRuntimeMetric(event)
5264
5699
  },
5700
+ // #2527 — the gateway-owned half of the mid-turn-floor decision: only the
5701
+ // live turn knows its loop role + whether a substantive answer has landed.
5702
+ // Keyed on statusKey so a DM (threadId null) and a forum topic are identical.
5703
+ floorState: (key) => {
5704
+ const turn = currentTurn
5705
+ if (turn == null) return null
5706
+ if (statusKey(turn.sessionChatId, turn.sessionThreadId) !== key) return null
5707
+ return { role: turn.role, finalAnswerDelivered: turn.finalAnswerDelivered }
5708
+ },
5709
+ // #2527 — the early, quiet liveness beat. Honest text from the longest
5710
+ // in-flight tool (model-free, claude-native), routed through the SAME send
5711
+ // path as the 300s fallback; pings OFF (this is the gentle beat, not the
5712
+ // loud unwedge) and the turn is NOT torn down — it keeps working.
5713
+ onMidTurnFloor: async (ctx) => {
5714
+ // Late-fire guard, mirroring the fallback: a clean turn-end can race the
5715
+ // tick. If the turn is gone, stay silent.
5716
+ if (activeTurnStartedAt.get(ctx.key) == null && currentTurn == null) return
5717
+ const blockedOnApproval = activeStatusReactions
5718
+ .get(statusKey(ctx.chatId, ctx.threadId))
5719
+ ?.isAwaiting() ?? false
5720
+ const text = silencePoke.formatFrameworkFallbackText(
5721
+ 'working',
5722
+ ctx.silenceMs,
5723
+ ctx.inFlightTools,
5724
+ blockedOnApproval,
5725
+ )
5726
+ try {
5727
+ await robustApiCall(
5728
+ () => bot.api.sendMessage(ctx.chatId, text, {
5729
+ ...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
5730
+ // The quiet beat: visible in-thread, no device buzz. (The 300s
5731
+ // fallback pings; the floor must not train the user to mute.)
5732
+ disable_notification: true,
5733
+ }),
5734
+ { chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
5735
+ )
5736
+ // Count it as production so the silence clock resets — the user just
5737
+ // saw a real message, so the 300s loud fallback is measured from here.
5738
+ silencePoke.noteProduction(ctx.key, Date.now())
5739
+ } catch (err) {
5740
+ process.stderr.write(
5741
+ `silence-poke mid-turn floor sendMessage failed chat=${ctx.chatId} thread=${ctx.threadId}: ${err}\n`,
5742
+ )
5743
+ }
5744
+ },
5265
5745
  onFrameworkFallback: async (ctx) => {
5266
5746
  // Late-fire short-circuit (2026-05-23 audit finding). The fallback
5267
5747
  // can race a clean turn-end: the model's actual reply lands inside
@@ -5281,6 +5761,14 @@ silencePoke.startTimer({
5281
5761
  `turn ended cleanly during silence window ` +
5282
5762
  `chat=${ctx.chatId} thread=${ctx.threadId ?? '-'} silence_ms=${ctx.silenceMs}\n`,
5283
5763
  )
5764
+ // #2527: structured skip event so the late-fire race is machine-readable.
5765
+ logStreamingEvent({
5766
+ kind: 'silence_poke_skip',
5767
+ chatId: ctx.chatId,
5768
+ threadId: ctx.threadId ?? undefined,
5769
+ silenceMs: ctx.silenceMs,
5770
+ skipReason: 'turn_ended_cleanly_during_window',
5771
+ })
5284
5772
  // Tell silence-poke this chat-thread is finished so the next
5285
5773
  // arming doesn't carry stale state.
5286
5774
  silencePoke.endTurn(ctx.key)
@@ -5294,6 +5782,15 @@ silencePoke.startTimer({
5294
5782
  // get_status snapshot → pure formatter. Any hostd unavailability
5295
5783
  // degrades silently to the existing generic text (zero regression).
5296
5784
  let text: string | null = null
5785
+ // Hoisted out of the generic-fallback branch below because the send site
5786
+ // gates `disable_notification` on it: when the turn is parked on an
5787
+ // approval card, the fallback TEXT is a user-gating re-ping ("waiting for
5788
+ // your approval — tap Approve or Deny …"), and that must stay LOUD so the
5789
+ // user knows the ball is in their court. The reaction controller tracks the
5790
+ // park via setAwaiting on the permission-request.
5791
+ const blockedOnApproval = activeStatusReactions
5792
+ .get(statusKey(ctx.chatId, ctx.threadId))
5793
+ ?.isAwaiting() ?? false
5297
5794
  const upd = inFlightUpdate
5298
5795
  if (upd != null) {
5299
5796
  try {
@@ -5315,9 +5812,6 @@ silencePoke.startTimer({
5315
5812
  // benign "wedge" class — claude is alive, waiting on the operator's
5316
5813
  // tap), say so instead of "still working…". The reaction controller
5317
5814
  // already tracks this (setAwaiting on the permission-request park).
5318
- const blockedOnApproval = activeStatusReactions
5319
- .get(statusKey(ctx.chatId, ctx.threadId))
5320
- ?.isAwaiting() ?? false
5321
5815
  text = silencePoke.formatFrameworkFallbackText(
5322
5816
  ctx.fallbackKind,
5323
5817
  ctx.silenceMs,
@@ -5325,12 +5819,26 @@ silencePoke.startTimer({
5325
5819
  blockedOnApproval,
5326
5820
  )
5327
5821
  }
5822
+ // #2527: log the actual poke fire with structured data before sending,
5823
+ // so the event is visible even if the send fails.
5824
+ logStreamingEvent({
5825
+ kind: 'silence_poke_fire',
5826
+ chatId: ctx.chatId,
5827
+ threadId: ctx.threadId ?? undefined,
5828
+ silenceMs: ctx.silenceMs,
5829
+ fallbackKind: ctx.fallbackKind,
5830
+ })
5328
5831
  try {
5329
5832
  await robustApiCall(
5330
5833
  () => bot.api.sendMessage(ctx.chatId, text, {
5331
5834
  ...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
5332
- // Framework fallback pings user genuinely needs to know.
5333
- disable_notification: false,
5835
+ // Conditional: the pure-liveness "still working…" notice is a status
5836
+ // surface and stays SILENT. But when the turn is parked on an
5837
+ // approval card, this same fallback carries a user-gating re-ping
5838
+ // ("waiting for your approval — tap Approve or Deny …") — that must
5839
+ // PING, because the user is the one being waited on. Gate on the same
5840
+ // `blockedOnApproval` signal that selects the re-ping text above.
5841
+ disable_notification: blockedOnApproval ? false : true,
5334
5842
  }),
5335
5843
  { chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
5336
5844
  )
@@ -5464,13 +5972,31 @@ silencePoke.startTimer({
5464
5972
  return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
5465
5973
  },
5466
5974
  )
5975
+ // PR-4e self-heal backstop: drop any per-topic `currentTurnByKey` entries
5976
+ // for fbChatId whose turn is stale-by-the-same-silence-gate the sibling
5977
+ // sweep above used — the same precedent as `purgeStaleTurnsForChat`'s
5978
+ // activeTurnStartedAt sweep, so a leaked map entry (a turn whose keyed
5979
+ // delete somehow never ran) can never outlive its chat. Gated identically:
5980
+ // the firing key is always stale; a sibling is stale iff it's silent ≥ the
5981
+ // fallback threshold (or has no silence state). No-op under the flag OFF
5982
+ // (the map is empty).
5983
+ currentTurnMap.purgeChatStale(fbChatId, (siblingKey) => {
5984
+ if (siblingKey === fbKey) return true
5985
+ const sib = silencePoke.silenceMsForKey(siblingKey, fbNow)
5986
+ return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
5987
+ })
5467
5988
  // Null `currentTurn` if it's still pointing at the wedged turn —
5468
5989
  // when claude eventually fires a late `turn_end` for this session
5469
5990
  // (or never does), the handler's `const turn = currentTurn` snapshot
5470
5991
  // returns null and the regular teardown short-circuits. Without
5471
5992
  // this, the late event would re-emit `turn_ended` AND clobber
5472
5993
  // whatever fresh turn the next inbound started.
5473
- if (turnMatchesFallback && currentTurn === wedgedTurn && wedgedTurn != null) {
5994
+ // PR-4e keyed liveness for the guard. Flag-OFF: `turnLiveForItsTopic`
5995
+ // reduces to `currentTurn === wedgedTurn` (singleton mirror), verbatim.
5996
+ // Flag-ON: `byKey.get(fbKey) === wedgedTurn`, so the keyed delete still
5997
+ // fires when the LIVE mirror has already flipped to another topic B (a bare
5998
+ // `currentTurn === wedgedTurn` would falsely skip and leak A's byKey entry).
5999
+ if (turnMatchesFallback && wedgedTurn != null && turnLiveForItsTopic(wedgedTurn)) {
5474
6000
  // Status-surface observability: emit the lifecycle CLEAR for the
5475
6001
  // silence-poke teardown so a fallback-nulled turn has a turn-lifecycle
5476
6002
  // line like every other clear path (the framework-fallback line below is
@@ -5478,7 +6004,12 @@ silencePoke.startTimer({
5478
6004
  process.stderr.write(
5479
6005
  `telegram gateway: ${formatTurnLifecycle('clear', 'silence_fallback', wedgedTurn, Date.now())}\n`,
5480
6006
  )
5481
- currentTurn = null
6007
+ // PR-4e — keyed delete for the wedged turn's OWN key (fbKey == the
6008
+ // statusKey this fallback fired for, == the wedgedTurn's key since
6009
+ // turnMatchesFallback gated chat+thread equality). Flag-OFF nulls the
6010
+ // singleton, verbatim; flag-ON deletes only this topic's entry and clears
6011
+ // the mirror iff it still points here — a live sibling topic is untouched.
6012
+ endCurrentTurnForKey(wedgedTurn, fbKey)
5482
6013
  }
5483
6014
  // Best-effort: clear any pending silent-end marker so the Stop hook
5484
6015
  // doesn't double-block when claude eventually exits the wedged turn.
@@ -5789,6 +6320,43 @@ function obligationSweep(): void {
5789
6320
  return
5790
6321
  }
5791
6322
  if (decision.action === 'represent') {
6323
+ // Fix #2472 — duplicate-represent guard. Before re-presenting AGAIN, check
6324
+ // whether the agent has ALREADY delivered a substantive outbound reply to
6325
+ // this chat SINCE the obligation was most recently re-presented. If so the
6326
+ // obligation is satisfied-but-misdetected (the reply landed but its routing
6327
+ // didn't resolve back to this origin, so the normal close path missed it) —
6328
+ // close silently and do NOT re-fire, which is what produced the near-identical
6329
+ // duplicate in #2472 (reply 10608 answered represent_count=1, yet
6330
+ // represent_count=2 fired anyway → duplicate 10609).
6331
+ //
6332
+ // The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
6333
+ // `openedAt`. This is load-bearing: the genuine "agent wrote a plain-text
6334
+ // answer and never called reply" case must still represent ONCE. On the first
6335
+ // represent `lastRepresentedAt` is undefined, so this guard is a no-op and the
6336
+ // single represent fires as before. Only the SECOND-and-later represent is
6337
+ // gated — exactly where a reply that landed between fires must suppress the
6338
+ // re-ask. Falls back to false (never suppresses) if history is unavailable.
6339
+ if (
6340
+ shouldSuppressRepresent(o, {
6341
+ historyEnabled: HISTORY_ENABLED,
6342
+ // Pass the represent-guard's OWN low threshold — a terse-but-real reply
6343
+ // must suppress the duplicate (#2472/#2474), unlike the escalate branch
6344
+ // below which keeps the 200-char default.
6345
+ hasOutboundDeliveredSince: (chatId, sinceMs, threadId) =>
6346
+ hasOutboundDeliveredSince(
6347
+ chatId,
6348
+ sinceMs,
6349
+ threadId,
6350
+ OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS,
6351
+ ),
6352
+ })
6353
+ ) {
6354
+ process.stderr.write(
6355
+ `telegram gateway: obligation closed silently — reply delivered since last represent (no re-fire) origin=${o.originTurnId}\n`,
6356
+ )
6357
+ obligationLedger.close(o.originTurnId)
6358
+ return
6359
+ }
5792
6360
  // Re-present goes through the bridge → buffer. Only the represent path is
5793
6361
  // gated on an empty buffer (let the existing drain run first, avoid
5794
6362
  // double-presenting). Escalation below is NOT gated on the buffer — it is a
@@ -5796,6 +6364,25 @@ function obligationSweep(): void {
5796
6364
  // behind a dead bridge can never block the operator nudge.
5797
6365
  if (pendingInboundBuffer.depth(agent) > 0) return
5798
6366
  pendingInboundBuffer.push(agent, buildObligationRepresentInbound(o, Date.now()))
6367
+ // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Arm the
6368
+ // card-OPEN gate for the synthetic turn this represent inbound will spawn:
6369
+ // carry the obligation's `openedAt` so that turn's first card-OPEN can ask
6370
+ // "was a substantive answer already delivered since the obligation was
6371
+ // raised?" and, if so, suppress the card (it would otherwise narrate beneath
6372
+ // the answer the user already received). Keyed on the obligation's
6373
+ // `originTurnId` — the SAME id the represent inbound carries
6374
+ // (`buildObligationRepresentInbound` reuses `o.messageId`/`o.chatId`/
6375
+ // `o.threadId`, so the enqueue-time `deriveTurnId` reconstructs exactly
6376
+ // `o.originTurnId`). Keying on the turn id (not chat/thread) means ONLY the
6377
+ // exact represent turn this gate was armed for can consume it; an unrelated
6378
+ // later foreground turn on the same chat/thread has a different originTurnId
6379
+ // → finds no entry → its card opens normally. This closes the residual
6380
+ // cross-contamination window where a never-enqueued represent's stale gate
6381
+ // could suppress an unrelated turn's card (the represent/duplicate-reply
6382
+ // family). This does NOT gate the represent SEND — the represent guard above
6383
+ // already owns suppressing an already-satisfied represent; this only governs
6384
+ // the decorative card.
6385
+ pendingCrossTurnGate.set(o.originTurnId, { sinceMs: o.openedAt })
5799
6386
  const attempt = obligationLedger.markRepresented(o.originTurnId)
5800
6387
  process.stderr.write(
5801
6388
  `telegram gateway: obligation re-presented origin=${o.originTurnId} attempt=${attempt}/${OBLIGATION_REPRESENT_MAX}\n`,
@@ -6196,7 +6783,10 @@ const ipcServer: IpcServer = createIpcServer({
6196
6783
  process.stderr.write(
6197
6784
  `telegram gateway: disconnect-flush nulled currentTurn (bridge died with turn in flight)\n`,
6198
6785
  )
6199
- currentTurn = null
6786
+ // PR-4e — the bridge DIED with a turn in flight: EVERY per-topic entry
6787
+ // is a ghost, not just the mirror's. Clear the whole map + mirror.
6788
+ // Flag-OFF: this nulls the singleton only (the map is empty), verbatim.
6789
+ clearAllCurrentTurns()
6200
6790
  }
6201
6791
  },
6202
6792
  log: (msg) => process.stderr.write(`${msg}\n`),
@@ -6722,10 +7312,10 @@ const ipcServer: IpcServer = createIpcServer({
6722
7312
  ...(cfgTopic != null ? { threadId: cfgTopic } : {}),
6723
7313
  }
6724
7314
  },
6725
- buildKeyboard: (requestId) =>
7315
+ buildKeyboard: (requestId, epoch) =>
6726
7316
  new InlineKeyboard()
6727
- .text('✅ Approve', `cfg:${requestId}:approve`)
6728
- .text('🚫 Deny', `cfg:${requestId}:deny`),
7317
+ .text('✅ Approve', `cfg:${requestId}:${epoch}:approve`)
7318
+ .text('🚫 Deny', `cfg:${requestId}:${epoch}:deny`),
6729
7319
  postCard: async (args) => {
6730
7320
  try {
6731
7321
  const sent = await robustApiCall(
@@ -6757,6 +7347,9 @@ const ipcServer: IpcServer = createIpcServer({
6757
7347
  () =>
6758
7348
  bot.api.editMessageText(args.chatId, args.messageId, args.text, {
6759
7349
  parse_mode: 'HTML',
7350
+ // Strip the inline keyboard on a terminal/interim edit so the
7351
+ // [Approve]/[Deny] buttons stop being tappable on a resolved card.
7352
+ ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
6760
7353
  }),
6761
7354
  { chat_id: String(args.chatId), verb: 'config-approval-edit' },
6762
7355
  )
@@ -6826,6 +7419,8 @@ const ipcServer: IpcServer = createIpcServer({
6826
7419
  () =>
6827
7420
  bot.api.editMessageText(args.chatId, args.messageId, args.text, {
6828
7421
  parse_mode: 'HTML',
7422
+ // Finalize is terminal — drop the keyboard so buttons are gone.
7423
+ ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
6829
7424
  }),
6830
7425
  { chat_id: String(args.chatId), verb: 'config-approval-finalize' },
6831
7426
  )
@@ -7377,6 +7972,23 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7377
7972
  }
7378
7973
  }
7379
7974
  process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}\n`)
7975
+ // #2527: emit time_to_first_text_reply_ms on the FIRST text reply of each
7976
+ // turn so operators can see how long users waited for any visible output.
7977
+ // Only fires once per turn (firstTextReplyLogged guards the repeat).
7978
+ if (turn != null) {
7979
+ const threadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
7980
+ const replyKey = statusKey(chat_id, threadId)
7981
+ if (!firstTextReplyLogged.has(replyKey)) {
7982
+ firstTextReplyLogged.add(replyKey)
7983
+ logStreamingEvent({
7984
+ kind: 'turn_reply_timing',
7985
+ chatId: chat_id,
7986
+ threadId,
7987
+ turnId: turn.turnId,
7988
+ timeToFirstTextReplyMs: Date.now() - turn.gatewayReceiveAt,
7989
+ })
7990
+ }
7991
+ }
7380
7992
 
7381
7993
  // #546 dedup check: was this content just sent via turn-flush or
7382
7994
  // a sibling reply path? Skip the actual send and return a
@@ -7411,6 +8023,15 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7411
8023
  // existing call-sites and the typical "final answer" reply keep their
7412
8024
  // current behaviour without an explicit flag.
7413
8025
  let disableNotification = args.disable_notification === true
8026
+ // #2527/#1664 — the over-ping safety net below may downgrade
8027
+ // `disableNotification` ping→silent for ANTI-SPAM (one ping per turn). That
8028
+ // delivery-channel decision must NOT pollute final-answer CLASSIFICATION: a
8029
+ // final answer the model intended to ping is STILL the final answer even when
8030
+ // the framework silences the actual ping. Classify on the model's original
8031
+ // intent (what executeStreamReply already does), so an over-ping-silenced
8032
+ // final answer sets finalAnswerDelivered=true — fixing both a spurious
8033
+ // silent-end re-prompt and a false 'undelivered' (😐) terminal reaction.
8034
+ const modelDisableNotification = args.disable_notification === true
7414
8035
 
7415
8036
  // #1675 over-ping safety net. The conversational-pacing contract
7416
8037
  // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
@@ -7441,32 +8062,92 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7441
8062
  const turn = currentTurn
7442
8063
  if (turn != null) {
7443
8064
  const now = Date.now()
7444
- const decision = decideOverPing({
7445
- modelRequestedPing: !disableNotification,
7446
- firstPingAt: turn.firstPingAt,
7447
- nowMs: now,
8065
+ // Notification ownership (R8 / PR-2): on the `reply` path,
8066
+ // substantiveness is purely the ≥200-char (or `done`) backstop —
8067
+ // `isSubstantiveFinalReply` is `done === true || text.length >= 200`
8068
+ // and ignores the notification flag entirely. `reply` carries no
8069
+ // `done`, so it reduces to the ≥200-char length test. We still pass
8070
+ // `modelDisableNotification` (the MODEL's original intent, not the
8071
+ // possibly-downgraded `disableNotification`) to mirror the #2533
8072
+ // final-answer decoupling call shape, but that arg does NOT
8073
+ // participate in classification here — it is inert on this path.
8074
+ const replySubstantive = isSubstantiveFinalReply({
8075
+ text: rawText,
8076
+ disableNotification: modelDisableNotification,
7448
8077
  })
7449
- if (decision.suppress) {
7450
- process.stderr.write(
7451
- `telegram gateway: reply over-ping safety net ` +
7452
- `downgrading disable_notification:false true ` +
7453
- `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
7454
- `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
7455
- )
7456
- // Observability: surface to the unified runtime-metrics
7457
- // fan-out so the cadence dashboard can track fleet-wide
7458
- // over-ping rate (leading indicator of model pacing drift).
7459
- emitRuntimeMetric({
7460
- kind: 'over_ping_suppressed',
7461
- key: statusKey(chat_id, args.message_thread_id != null
7462
- ? Number(args.message_thread_id) : undefined),
7463
- sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
7464
- })
7465
- disableNotification = true
7466
- wasOverPingSuppressed = true
7467
- } else if (decision.claimSlot) {
7468
- turn.firstPingAt = now
8078
+ // PR-4c: the over-ping DECISION relocates into the emission-authority
8079
+ // façade, behind the kill-switch (default OFF), the same structural way
8080
+ // PR-4b moved the OPEN gate. `decideOverPing` is already pure, so PR-4c
8081
+ // extracts NOTHING new it relocates the *call* into the façade's enabled
8082
+ // branch and keeps the *effects* (stderr, metric, the atomic
8083
+ // `firstPingAt`/`firstPingWasSubstantive` pair-set, the
8084
+ // `disableNotification`/`wasOverPingSuppressed` outer-scope writes) HERE,
8085
+ // parameterized by the decision the façade hands back via `applyDecision`.
8086
+ //
8087
+ // - Disabled branch runs `disabledOverPing()` its own LITERAL
8088
+ // `decideOverPing(...)` call + the full effects block, VERBATIM from
8089
+ // PR-4b-base (the disabled-path-is-byte-identical proof).
8090
+ // - Enabled branch: the façade computes the decision and hands it to
8091
+ // `applyOverPingDecision(decision)`, which performs the IDENTICAL
8092
+ // effects. Same pure inputs ⇒ same decision flag-ON ≡ flag-OFF ≡ base.
8093
+ //
8094
+ // The effects block is shared between both thunks by closing over `decision`
8095
+ // but the disabled thunk computes it via its OWN literal `decideOverPing(`
8096
+ // first, so the disabled path never depends on the façade for the decision.
8097
+ const applyOverPingDecision = (decision: OverPingDecision): void => {
8098
+ if (decision.suppress) {
8099
+ process.stderr.write(
8100
+ `telegram gateway: reply over-ping safety net — ` +
8101
+ `downgrading disable_notification:false → true ` +
8102
+ `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
8103
+ `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
8104
+ )
8105
+ // Observability: surface to the unified runtime-metrics
8106
+ // fan-out so the cadence dashboard can track fleet-wide
8107
+ // over-ping rate (leading indicator of model pacing drift).
8108
+ emitRuntimeMetric({
8109
+ kind: 'over_ping_suppressed',
8110
+ key: statusKey(chat_id, args.message_thread_id != null
8111
+ ? Number(args.message_thread_id) : undefined),
8112
+ sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
8113
+ })
8114
+ disableNotification = true
8115
+ wasOverPingSuppressed = true
8116
+ } else if (decision.claimSlot) {
8117
+ // Claim (first ping) OR upgrade (substantive answer pinging over an
8118
+ // ack's slot). Set firstPingAt AND firstPingWasSubstantive ATOMICALLY
8119
+ // (no await between) so a racing second reply reads a consistent pair.
8120
+ turn.firstPingAt = now
8121
+ turn.firstPingWasSubstantive = replySubstantive
8122
+ if (decision.upgrade) {
8123
+ process.stderr.write(
8124
+ `telegram gateway: reply over-ping safety net — ` +
8125
+ `UPGRADE: substantive answer pings over an ack's slot ` +
8126
+ `(chat=${chat_id} thread=${args.message_thread_id ?? '-'})\n`,
8127
+ )
8128
+ }
8129
+ }
7469
8130
  }
8131
+ emissionAuthorityFor(turn).claimOrDowngradePing(
8132
+ { modelRequestedPing: !disableNotification, substantive: replySubstantive },
8133
+ {
8134
+ firstPingAt: turn.firstPingAt,
8135
+ firstPingWasSubstantive: turn.firstPingWasSubstantive,
8136
+ nowMs: now,
8137
+ },
8138
+ applyOverPingDecision,
8139
+ () => {
8140
+ // Disabled-path: literal `decideOverPing(` + effects, VERBATIM base.
8141
+ const decision = decideOverPing({
8142
+ modelRequestedPing: !disableNotification,
8143
+ firstPingAt: turn.firstPingAt,
8144
+ substantive: replySubstantive,
8145
+ firstPingWasSubstantive: turn.firstPingWasSubstantive,
8146
+ nowMs: now,
8147
+ })
8148
+ applyOverPingDecision(decision)
8149
+ },
8150
+ )
7470
8151
  }
7471
8152
  }
7472
8153
 
@@ -7653,10 +8334,38 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7653
8334
  // clear; the main turn-end path also re-writes the state when
7654
8335
  // finalAnswerDelivered=false, so this is a belt-and-braces gate
7655
8336
  // for the turn_end-missing case (#1741).
7656
- if (isFinalAnswerReply({ text: rawText, disableNotification })) {
8337
+ if (isFinalAnswerReply({ text: rawText, disableNotification: modelDisableNotification })) {
7657
8338
  clearSilentEndState(statusKey(chat_id, threadId))
7658
8339
  }
7659
8340
 
8341
+ // Lever 2 (design §9 lever 2): finalize the activity card BEFORE the reply
8342
+ // chunks send, so the card keeps its (lower) message_id and the reply is
8343
+ // structurally last on screen. ONLY for a *substantive* final — for an ack
8344
+ // (non-substantive) do NOTHING: finalizing an ack early would
8345
+ // close → reopen → emit MORE messages (the #2141 ack-then-work feed, R3).
8346
+ // `clearActivitySummary` edits the existing card in place (no new send) and
8347
+ // nulls `activityMessageId`; combined with the sticky latch set here it
8348
+ // prevents any post-reply re-OPEN below the answer. Idempotent with the
8349
+ // tool_use-event clear at the first-reply handoff (the existing backstop).
8350
+ {
8351
+ const finalizeTurn = currentTurn
8352
+ if (
8353
+ finalizeTurn != null
8354
+ && isSubstantiveFinalReply({ text: rawText, disableNotification: modelDisableNotification })
8355
+ ) {
8356
+ // PR-4a: routed through the emission-authority façade (no-op delegates —
8357
+ // the latch-set and the finalize run exactly as before).
8358
+ const ea = emissionAuthorityFor(finalizeTurn)
8359
+ ea.markSubstantiveFinalDelivered(() => {
8360
+ finalizeTurn.finalAnswerEverDelivered = true
8361
+ finalizeTurn.finalAnswerDeliveredAt = Date.now()
8362
+ })
8363
+ ea.finalizeCard(() => {
8364
+ clearActivitySummary(finalizeTurn)
8365
+ })
8366
+ }
8367
+ }
8368
+
7660
8369
  if (previewMessageId != null && reply_to != null && replyMode !== 'off') {
7661
8370
  await deleteStalePreview(previewMessageId)
7662
8371
  previewMessageId = null
@@ -7764,7 +8473,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7764
8473
  turn != null
7765
8474
  && isFinalAnswerReply({
7766
8475
  text: decision.mergedText,
7767
- disableNotification,
8476
+ disableNotification: modelDisableNotification,
7768
8477
  })
7769
8478
  ) {
7770
8479
  turn.finalAnswerDelivered = true
@@ -7772,8 +8481,12 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7772
8481
  // answer must NOT re-open the feed on post-answer housekeeping.
7773
8482
  turn.finalAnswerSubstantive = isSubstantiveFinalReply({
7774
8483
  text: decision.mergedText,
7775
- disableNotification,
8484
+ disableNotification: modelDisableNotification,
7776
8485
  })
8486
+ // Sticky ordering latch (lever 1): a substantive final closes the
8487
+ // card OPEN gate for the rest of the turn. NEVER cleared by reopen.
8488
+ if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
8489
+ if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
7777
8490
  if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn, replyRoutedOriginTurn)
7778
8491
  }
7779
8492
  outboundDedup.record(
@@ -8112,12 +8825,17 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
8112
8825
  //
8113
8826
  // #1664 — `turn.finalAnswerDelivered = true` keeps the silent-
8114
8827
  // end re-prompt from spuriously firing on a delivered final.
8115
- if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
8828
+ if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification: modelDisableNotification })) {
8116
8829
  turn.finalAnswerDelivered = true
8117
8830
  // Feed-reopen refinement: track whether this final was substantive
8118
8831
  // (≥200 chars or stream-done — not a short pinging ack) so post-answer
8119
8832
  // housekeeping tool work does NOT re-open the feed / trip silent-end.
8120
- turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification })
8833
+ turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification: modelDisableNotification })
8834
+ // Sticky ordering latch (lever 1): set once a SUBSTANTIVE final lands;
8835
+ // never cleared by reopen. The card OPEN gate keys on this, not the
8836
+ // mutable finalAnswerDelivered above (which reopen toggles).
8837
+ if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
8838
+ if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
8121
8839
  // #1728: release the buffer gate + emit terminal 👍. Mid-turn
8122
8840
  // acks bypass this branch and remain non-events for the
8123
8841
  // reaction (preserves #1713). The full turn-state teardown
@@ -8278,9 +8996,8 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8278
8996
  }
8279
8997
 
8280
8998
  const access = loadAccess()
8281
- // Detect chat type for draft-transport selection.
8999
+ // Detect chat type for throttle-default selection.
8282
9000
  // Private (DM) chats have positive numeric IDs; groups/channels are negative.
8283
- // Forum topics have a message_thread_id set — sendMessageDraft is unsupported there.
8284
9001
  const streamChatId = args.chat_id as string
8285
9002
  const streamIsPrivate = isDmChatId(streamChatId)
8286
9003
  const streamIsForumTopic = args.message_thread_id != null && args.message_thread_id !== ''
@@ -8322,6 +9039,19 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8322
9039
  // PR3b-cutover: feed lastOutboundAt to the delivery machine (see
8323
9040
  // executeReply) so its TTL tick suppresses an active-turn fallback.
8324
9041
  shadowEmit({ kind: 'modelOutbound', key: sKey as _ChatKey, at: Date.now() })
9042
+ // #2527: emit turn_reply_timing on the first stream_reply of the turn,
9043
+ // mirroring the same gate in executeReply. Guards with firstTextReplyLogged
9044
+ // so a turn that calls reply first and stream_reply second doesn't double-emit.
9045
+ if (turn != null && !firstTextReplyLogged.has(sKey)) {
9046
+ firstTextReplyLogged.add(sKey)
9047
+ logStreamingEvent({
9048
+ kind: 'turn_reply_timing',
9049
+ chatId: streamChatId,
9050
+ threadId: streamThreadId,
9051
+ turnId: turn.turnId,
9052
+ timeToFirstTextReplyMs: Date.now() - turn.gatewayReceiveAt,
9053
+ })
9054
+ }
8325
9055
  // #1741 — see executeReply for the rationale: only a plausibly-
8326
9056
  // final stream_reply clears the silent-end state. An interim
8327
9057
  // ack via stream_reply must NOT clear; the Stop hook needs
@@ -8338,6 +9068,33 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8338
9068
  }
8339
9069
  }
8340
9070
 
9071
+ // Lever 2 (design §9 lever 2): finalize the activity card BEFORE the stream
9072
+ // send so the card keeps its lower message_id and the reply is structurally
9073
+ // last. ONLY for a *substantive* final (a stream_reply done=true or ≥200
9074
+ // chars) — for a short pinging interim chunk do NOTHING (finalizing an ack
9075
+ // early would close → reopen → emit more, the #2141 ack-then-work feed, R3).
9076
+ // `clearActivitySummary` edits in place + nulls activityMessageId; the sticky
9077
+ // latch set here blocks any post-reply re-OPEN below the answer.
9078
+ if (
9079
+ turn != null
9080
+ && isSubstantiveFinalReply({
9081
+ text: (args.text as string | undefined) ?? '',
9082
+ disableNotification: args.disable_notification === true,
9083
+ done: args.done === true,
9084
+ })
9085
+ ) {
9086
+ // PR-4a: routed through the emission-authority façade (no-op delegates —
9087
+ // the latch-set and the finalize run exactly as before).
9088
+ const ea = emissionAuthorityFor(turn)
9089
+ ea.markSubstantiveFinalDelivered(() => {
9090
+ turn.finalAnswerEverDelivered = true
9091
+ if (turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
9092
+ })
9093
+ ea.finalizeCard(() => {
9094
+ clearActivitySummary(turn)
9095
+ })
9096
+ }
9097
+
8341
9098
  const result = await handleStreamReply(
8342
9099
  {
8343
9100
  chat_id: streamChatId,
@@ -8370,7 +9127,6 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8370
9127
  logStreamingEvent,
8371
9128
  isPrivateChat: streamIsPrivate,
8372
9129
  isForumTopic: streamIsForumTopic,
8373
- ...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
8374
9130
  // Issue #310: deliver the outbound count bump BEFORE forceCompleteTurn
8375
9131
  // so the terminal render sees outboundDeliveredCount > 0. The handler
8376
9132
  // calls this dep in that order internally.
@@ -8390,12 +9146,10 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8390
9146
  recordOutbound,
8391
9147
  ...(HISTORY_ENABLED ? { getLatestInboundMessageId } : {}),
8392
9148
  writeError: (line) => process.stderr.write(line),
8393
- // PR B: drop the legacy 600 ms compromise. When the operator sets
8394
- // `channels.telegram.stream_throttle_ms` in yaml, the env override
8395
- // wins; otherwise draft-stream's transport-aware default fires
8396
- // (300 ms draft / 1000 ms message). `throttleMs: undefined` is a
8397
- // signal — handlers downgrade to `?? undefined`, which then
8398
- // passes through to draft-stream where the default applies.
9149
+ // When the operator sets `channels.telegram.stream_throttle_ms` in yaml,
9150
+ // the env override wins; otherwise draft-stream's DM/group defaults apply
9151
+ // (400 ms for DMs, 1000 ms for groups). `throttleMs: undefined` passes
9152
+ // through to draft-stream where the per-chat-type default applies.
8399
9153
  ...(STREAM_THROTTLE_MS_OVERRIDE != null ? { throttleMs: STREAM_THROTTLE_MS_OVERRIDE } : {}),
8400
9154
  progressCardActive: streamMode === 'checklist',
8401
9155
  },
@@ -8495,6 +9249,10 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8495
9249
  disableNotification: args.disable_notification === true,
8496
9250
  done: args.done === true,
8497
9251
  })
9252
+ // Sticky ordering latch (lever 1): set once a SUBSTANTIVE final lands;
9253
+ // never cleared by reopen. The card OPEN gate keys on this sticky latch.
9254
+ if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
9255
+ if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
8498
9256
  if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn, streamRoutedOriginTurn)
8499
9257
  // #1744 follow-up — stream_reply edge case. The first-emit gate at
8500
9258
  // L5178 only clears silent-end state on the FIRST emit of a stream.
@@ -9832,6 +10590,46 @@ function resetOrphanedReplyTimeout(): void {
9832
10590
  replyCalled: t.replyCalled,
9833
10591
  progressCardActive: progressDriver != null,
9834
10592
  })) {
10593
+ // Feed-survival guard: re-arm the fuse while the turn is
10594
+ // legitimately working — an in-flight tool, a detached background
10595
+ // process (Bash run_in_background), or a human-wait tool (ask_user).
10596
+ // This extends the original "isMidToolCall" guard to cover the
10597
+ // detached-work cases that empty inFlight prematurely.
10598
+ //
10599
+ // Cap logic:
10600
+ // • Foreground tools / detached background work: bound by
10601
+ // ORPHANED_REPLY_MAX_REARMS (20 × 30 s = 10 min). A genuinely
10602
+ // hung tool still surfaces after the cap.
10603
+ // • Human-wait tools (ask_user): NEVER forcibly backstop while
10604
+ // ask_user is open for this chat — the human simply hasn't
10605
+ // tapped yet. We keep re-arming unconditionally until the prompt
10606
+ // resolves (TTL or tap) and inFlight empties.
10607
+ const turnKey = statusKey(t.sessionChatId, t.sessionThreadId)
10608
+ const working = isLegitimatelyWorking(turnKey)
10609
+ const humanWaiting = (() => {
10610
+ for (const entry of pendingAskUser.values()) {
10611
+ if (entry.chatId === t.sessionChatId) return true
10612
+ }
10613
+ return false
10614
+ })()
10615
+ if (working || humanWaiting) {
10616
+ const underCap = t.orphanedReplyRearmCount < ORPHANED_REPLY_MAX_REARMS
10617
+ if (humanWaiting || underCap) {
10618
+ t.orphanedReplyRearmCount++
10619
+ process.stderr.write(
10620
+ `telegram gateway: orphaned-reply fuse expired — re-arming` +
10621
+ ` (rearm ${t.orphanedReplyRearmCount}/${ORPHANED_REPLY_MAX_REARMS},` +
10622
+ ` in_flight=${toolFlightTracker.inFlightCount()},` +
10623
+ ` human_wait=${humanWaiting},` +
10624
+ ` bg_work=${pendingProgress.hasPendingAsyncDispatch(turnKey)})\n`,
10625
+ )
10626
+ resetOrphanedReplyTimeout()
10627
+ return
10628
+ }
10629
+ process.stderr.write(
10630
+ `telegram gateway: orphaned-reply rearm cap reached (${ORPHANED_REPLY_MAX_REARMS}) — forcing backstop despite working state\n`,
10631
+ )
10632
+ }
9835
10633
  process.stderr.write(
9836
10634
  `telegram gateway: orphaned-reply timeout (${ORPHANED_REPLY_TIMEOUT_MS}ms) — forcing backstop\n`,
9837
10635
  )
@@ -9876,13 +10674,112 @@ const FOREGROUND_SUBAGENT_ACCUM_MAX = 12
9876
10674
  * foreground sub-agents (rare — parallel Task dispatch) flatten in insertion
9877
10675
  * order; the single-sub-agent common case nests precisely under its
9878
10676
  * Delegating line.
10677
+ *
10678
+ * The header (elapsed + tool count) is now threaded into the render so the
10679
+ * main-session card matches the worker card's two-line header style. This
10680
+ * fixes the missing header regression where the worker card showed elapsed/
10681
+ * tool-count metadata but the main-session card rendered step-lines only.
9879
10682
  */
9880
10683
  function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''): string | null {
9881
10684
  const childLines: string[] = []
9882
10685
  for (const narrative of turn.foregroundSubAgents.values()) {
9883
10686
  childLines.push(...narrative)
9884
10687
  }
9885
- return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix)
10688
+ // Pass labeledToolCount as stepCount only on the terminal (final) render so
10689
+ // the persisted feed record shows a `✓ N steps` total. The live in-progress
10690
+ // feed omits it (stepCount undefined) to stay clean and minimal.
10691
+ const stepCount = final ? turn.labeledToolCount : undefined
10692
+ // Build the session header so the main-session card renders the same two-line
10693
+ // elapsed/tool-count header as the worker card.
10694
+ const header: SessionActivityHeader = {
10695
+ label: 'Agent',
10696
+ elapsedMs: turn.startedAt > 0 ? Date.now() - turn.startedAt : 0,
10697
+ toolCount: turn.labeledToolCount,
10698
+ state: final ? 'done' : 'running',
10699
+ }
10700
+ return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix, stepCount, header)
10701
+ }
10702
+
10703
+ /**
10704
+ * Render a SHOWN narrative text block as a transient liveness step — the
10705
+ * same path a tool label takes (appendActivityLabel → renderStepFeed), so
10706
+ * the narrative line is rolling-window-clipped and replaced by the next
10707
+ * event exactly like a tool step. NOT a new message, NOT persisted as a
10708
+ * parallel mirror (invariant `chat-is-the-single-source-of-truth`,
10709
+ * reference/invariants.md). Clipped to a single 120-char line via
10710
+ * clipNarrative so it reads as a step, not a paragraph.
10711
+ */
10712
+ function showNarrativeStep(turn: CurrentTurn, text: string): void {
10713
+ const rendered = appendActivityLabel(turn.mirrorLines, clipNarrative(text))
10714
+ if (rendered == null) return
10715
+ turn.activityPendingRender = composeTurnActivity(turn) ?? rendered
10716
+ const ea = emissionAuthorityFor(turn)
10717
+ // PR-4d: route the deliver-before-drain decision through the centralized
10718
+ // card-drain gate (chatLock-serialized under the flag; verbatim block OFF).
10719
+ cardDrainGate(turn, ea, () => {
10720
+ if (ea.mayDrain(turn)) {
10721
+ // Producer A (narrative SHOW): may only EDIT an already-open card, never
10722
+ // OPEN one on a 0-tool turn (design §9 lever 5 base case — the
10723
+ // triplication). The OPEN gate in the drain enforces this; accumulation
10724
+ // into mirrorLines still happens so the narration renders once a tool
10725
+ // label or liveness opens the card.
10726
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
10727
+ ea.openOrEditCard('narrative', () => {
10728
+ turn.activityInFlight = drainActivitySummary(turn, 'narrative')
10729
+ })
10730
+ }
10731
+ })
10732
+ }
10733
+
10734
+ /**
10735
+ * Narrative-dedup gate, step 2 (reducer-side): a tool_use just arrived while
10736
+ * a narrative block was pending. Decide SHOW vs SUPPRESS and clear the
10737
+ * pending slot. SUPPRESS only when the tool is reply/stream_reply AND the
10738
+ * pending text is a draft-then-send of that reply's `input.text`. Everything
10739
+ * else (a working tool, or a reply whose text differs — post-action
10740
+ * narration) is SHOWN. See narrative-dedup.ts §2b.
10741
+ */
10742
+ function resolvePendingNarrativeOnTool(
10743
+ turn: CurrentTurn,
10744
+ toolName: string,
10745
+ input: Record<string, unknown> | undefined,
10746
+ ): void {
10747
+ const pending = turn.pendingNarrative
10748
+ if (pending == null) return
10749
+ turn.pendingNarrative = null
10750
+ if (REPLY_TOOLS.has(toolName)) {
10751
+ const replyText = typeof input?.text === 'string' ? (input.text as string) : ''
10752
+ if (isDraftOfReply(pending.text, replyText)) return // draft of the answer → SUPPRESS
10753
+ }
10754
+ showNarrativeStep(turn, pending.text) // working preamble / post-action narration → SHOW
10755
+ }
10756
+
10757
+ /**
10758
+ * Narrative-dedup gate, step 1 (reducer-side): a new narrative block
10759
+ * arrived. A previously-pending block had nothing reply-shaped immediately
10760
+ * after it (pure narration) → flush it as SHOWN, then stage the new one for
10761
+ * one lookahead step. See narrative-dedup.ts §2b.
10762
+ */
10763
+ function stagePendingNarrative(turn: CurrentTurn, text: string): void {
10764
+ if (turn.pendingNarrative != null) {
10765
+ showNarrativeStep(turn, turn.pendingNarrative.text)
10766
+ }
10767
+ turn.pendingNarrative = { text }
10768
+ }
10769
+
10770
+ /**
10771
+ * Narrative-dedup gate, step 3 (reducer-side): the turn is ending with a
10772
+ * trailing narrative block and nothing after it. SUPPRESS only when the turn
10773
+ * already delivered its answer via reply/stream_reply and the trailing text
10774
+ * is a draft of that answer; otherwise SHOW (genuine trailing narration like
10775
+ * "Done — all green."). See narrative-dedup.ts §2b.
10776
+ */
10777
+ function flushPendingNarrativeAtTurnEnd(turn: CurrentTurn, lastReplyText: string): void {
10778
+ const pending = turn.pendingNarrative
10779
+ if (pending == null) return
10780
+ turn.pendingNarrative = null
10781
+ if (lastReplyText.length > 0 && isDraftOfReply(pending.text, lastReplyText)) return // trailing duplicate of the answer
10782
+ showNarrativeStep(turn, pending.text)
9886
10783
  }
9887
10784
 
9888
10785
  /**
@@ -9902,11 +10799,69 @@ function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''):
9902
10799
  * doesn't corrupt the next turn's atom — late writes land on the
9903
10800
  * captured `turn` (already-completed turn, harmless).
9904
10801
  */
9905
- async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
10802
+ async function drainActivitySummary(
10803
+ turn: CurrentTurn,
10804
+ // Which producer triggered this drain (design §9 levers 1 + 5). Gates the
10805
+ // OPEN (first sendMessage) branch via `mayOpenActivityCard`; EDITs of an
10806
+ // already-open card are never gated. Defaults to 'tool' — the historically
10807
+ // unconditional OPEN behaviour — so any caller that does not opt into the
10808
+ // gate is unaffected. Narrative-SHOW and liveness callers pass their producer
10809
+ // explicitly.
10810
+ producer: FeedOpenProducer = 'tool',
10811
+ // Optional flags forwarded to `mayOpenActivityCard`.
10812
+ openFlags?: { postAnswerSubagentActivity?: boolean },
10813
+ ): Promise<void> {
9906
10814
  try {
9907
10815
  while (turn.activityPendingRender !== turn.activityLastSentRender) {
9908
10816
  const target = turn.activityPendingRender
9909
10817
  if (target == null) break
10818
+ // OPEN gate (design §9 levers 1 + 5): when this drain would OPEN a fresh
10819
+ // card (activityMessageId == null), consult the pure gate. Refusing an
10820
+ // OPEN must NOT advance activityLastSentRender — the accumulated render
10821
+ // stays pending so a later OPEN-eligible producer (a tool label, or
10822
+ // liveness) renders it. An EDIT (activityMessageId != null) is never
10823
+ // gated. Enforced HERE so it covers BOTH the inline producers AND the
10824
+ // detached heartbeat setInterval drain (R7/concurrency). The gate guards
10825
+ // gate EVALUATION, not an in-flight send: it is not a hard mutex — a send
10826
+ // already PAST this check and suspended at its `await robustApiCall(
10827
+ // sendMessage)` when a substantive final lands still completes and opens a
10828
+ // card; that residual is reconciled by lever-2's `clearActivitySummary`
10829
+ // chaining its finalize onto `turn.activityInFlight` (the suspended drain)
10830
+ // and editing the card in place, not by this gate blocking it.
10831
+ // Lever 4 (cross-turn / race C/D): a synthetic represent/owed-reply turn
10832
+ // (and the liveness/heartbeat timer firing on it) starts with a CLEARED
10833
+ // per-turn `finalAnswerEverDelivered` latch even when a substantive answer
10834
+ // already reached the user in an EARLIER turn — so without this its first
10835
+ // drain opens a card BELOW that prior reply. Only such a turn carries
10836
+ // `crossTurnGate`; reuse the represent guard's delivered-since check
10837
+ // (`hasOutboundDeliveredSince`) with the obligation's `openedAt` cutoff and
10838
+ // the SUBSTANTIVE 200-char threshold (so an ack never trips it → #2141
10839
+ // stays green). Computed ONLY when about to OPEN (activityMessageId ==
10840
+ // null) AND only for a turn with a cross-turn gate — no history query on
10841
+ // the common foreground path. Scoped to the synthetic surface by the
10842
+ // presence of `crossTurnGate`, so it can never fire on a foreground turn.
10843
+ // PR-4b: the cross-turn predicate is now the PURE, shared helper extracted
10844
+ // into feed-open-gate.ts (body lifted verbatim) — the SAME function the
10845
+ // emission-authority façade calls in its enabled branch, so flag-ON and
10846
+ // flag-OFF compute an identical verdict. History deps injected (the module
10847
+ // stays sqlite-free). The pure-gate consult + the `break` below stay
10848
+ // LITERALLY in the drain (disabled-path byte-identity).
10849
+ const crossTurnAnswerDelivered = computeCrossTurnAnswerDelivered(
10850
+ turn,
10851
+ feedOpenGateDeps(),
10852
+ )
10853
+ if (
10854
+ turn.activityMessageId == null
10855
+ && !mayOpenActivityCard({
10856
+ producer,
10857
+ finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
10858
+ labeledToolCount: turn.labeledToolCount,
10859
+ crossTurnAnswerDelivered,
10860
+ postAnswerSubagentActivity: openFlags?.postAnswerSubagentActivity,
10861
+ })
10862
+ ) {
10863
+ break
10864
+ }
9910
10865
  // `renderActivityFeed` already emitted ready Telegram HTML with per-line
9911
10866
  // markup (<b>→ current</b> / <i>✓ done</i>) and escaped each label's
9912
10867
  // <,>,& itself (#1942 class) — send verbatim, do NOT re-escape or
@@ -9980,17 +10935,125 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
9980
10935
  function feedHeartbeatTick(): void {
9981
10936
  const turn = currentTurn
9982
10937
  if (turn == null) return
10938
+ if (turn.finalAnswerDelivered) {
10939
+ // Fix 2: post-answer background-agent liveness. When the sub-agent/workflow
10940
+ // watcher has surfaced a new step AFTER the substantive final answer, drive
10941
+ // a liveness card so the operator can see "background agent still working".
10942
+ //
10943
+ // Gate: `turn.subagentActivityAt` must be set (watcher fired) AND it must
10944
+ // exceed `turn.finalAnswerDeliveredAt` (the watcher advanced AFTER the answer
10945
+ // was delivered — not just any pre-answer label). This is the key fix:
10946
+ // #2587 read `lastToolLabelAt`, which is frozen by the drop-guard after a
10947
+ // substantive answer and therefore never crosses the threshold. `subagentActivityAt`
10948
+ // is written by the watcher's onProgress callback INDEPENDENTLY of the
10949
+ // tool_label / drop-guard path, so it correctly advances post-answer.
10950
+ //
10951
+ // Idle-gap suppression + staleness cap (concern 3) — the single pure decision
10952
+ // `evaluatePostAnswerLiveness`:
10953
+ // - 'idle' → no watcher activity after the answer (`subagentActivityAt`
10954
+ // undefined or ≤ finalAnswerDeliveredAt). Stay silent; the
10955
+ // reply-is-last invariant is fully preserved for idle turns.
10956
+ // - 'stale' → the worker's last advance is older than POST_ANSWER_LIVENESS_STALE_MS
10957
+ // (its `onFinish` froze `subagentActivityAt` and no new step has
10958
+ // arrived). STOP re-rendering so the card doesn't climb `running`
10959
+ // forever — mirrors the pre-answer FEED_LIVENESS_OPEN_MS cap. The
10960
+ // worker's own terminal card (workerActivityFeed.finish) is the
10961
+ // durable record once it completes.
10962
+ // - 'emit' → genuine in-flight post-answer activity; render the card below.
10963
+ const subagentAt = turn.subagentActivityAt
10964
+ const livenessVerdict = evaluatePostAnswerLiveness({
10965
+ subagentActivityAt: subagentAt,
10966
+ finalAnswerDeliveredAt: turn.finalAnswerDeliveredAt,
10967
+ now: Date.now(),
10968
+ staleCapMs: POST_ANSWER_LIVENESS_STALE_MS,
10969
+ })
10970
+ if (livenessVerdict !== 'emit' || subagentAt == null) return // idle gap or stale worker → stay silent (the `== null` also narrows subagentAt for the elapsed below)
10971
+ // A background worker is genuinely active after the answer. Open or maintain
10972
+ // a liveness card below the reply. Route through `mayOpenActivityCard` with
10973
+ // `postAnswerSubagentActivity:true` so Lever 1 is lifted for 'tool' producer
10974
+ // (Fix 2's Lever 1 exception in feed-open-gate.ts). The card renders the
10975
+ // turn's accumulated mirrorLines (which may be empty — in that case the drain
10976
+ // opens a "Working…" placeholder matching the pre-answer liveness path).
10977
+ if (turn.sessionChatId == null) return
10978
+ const age = Date.now() - turn.startedAt
10979
+ const livenessHeader: SessionActivityHeader = {
10980
+ label: 'Agent', elapsedMs: age, toolCount: turn.labeledToolCount, state: 'running',
10981
+ }
10982
+ const lines = turn.mirrorLines.length > 0 ? turn.mirrorLines : ['Working in background…']
10983
+ const elapsed = Date.now() - subagentAt
10984
+ const rendered = renderActivityFeedWithNested(lines, [], false, ` · ${formatFeedElapsed(elapsed)}`, undefined, livenessHeader)
10985
+ if (rendered == null) return
10986
+ turn.activityPendingRender = rendered
10987
+ const ea = emissionAuthorityFor(turn)
10988
+ cardDrainGate(turn, ea, () => {
10989
+ if (ea.mayDrain(turn)) {
10990
+ // Producer 'tool' with postAnswerSubagentActivity=true: the Lever 1
10991
+ // exception allows this OPEN. Lever 4 (cross-turn) and idle-liveness
10992
+ // blocks are still respected by the drain. The card surfaces BELOW the
10993
+ // reply showing the background agent's live activity.
10994
+ ea.openOrEditCard('tool', () => {
10995
+ turn.activityInFlight = drainActivitySummary(turn, 'tool', { postAnswerSubagentActivity: true })
10996
+ })
10997
+ }
10998
+ })
10999
+ return
11000
+ }
11001
+
11002
+ // Liveness feed (open + maintain). `mirrorLines.length === 0` means no tool
11003
+ // has ever produced a label this turn — pure thinking, or only suppressed
11004
+ // tools. Open a minimal "Working…" feed once the turn passes the threshold,
11005
+ // and keep its elapsed climbing until a real label arrives. The first label
11006
+ // makes mirrorLines non-empty, so the labelled-feed heartbeat below takes
11007
+ // over and its edit cleanly replaces the placeholder. drainActivitySummary
11008
+ // sends (opens) when activityMessageId is null and edits (maintains) once set
11009
+ // — so this one branch handles both the open and the climb.
11010
+ if (turn.mirrorLines.length === 0) {
11011
+ if (!FEED_LIVENESS_OPEN_ENABLED || turn.sessionChatId == null) return
11012
+ const age = Date.now() - turn.startedAt
11013
+ if (age < FEED_LIVENESS_OPEN_MS) return
11014
+ const livenessHeader: SessionActivityHeader = {
11015
+ label: 'Agent', elapsedMs: age, toolCount: 0, state: 'running',
11016
+ }
11017
+ const rendered = renderActivityFeedWithNested(['Working…'], [], false, ` · ${formatFeedElapsed(age)}`, undefined, livenessHeader)
11018
+ if (rendered == null) return
11019
+ turn.activityPendingRender = rendered
11020
+ const ea = emissionAuthorityFor(turn)
11021
+ // PR-4d: route through the centralized chatLock-serialized card-drain gate.
11022
+ cardDrainGate(turn, ea, () => {
11023
+ if (ea.mayDrain(turn)) {
11024
+ // Producer C (liveness timer): the genuine ≥12s thinking-gap open. Now
11025
+ // that Lever 5 is inert (narrative may open pre-answer — #2588), liveness
11026
+ // remains the natural open for 0-tool pre-answer turns that are silent.
11027
+ // The sticky-latch (lever 1) still gates it in the drain.
11028
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
11029
+ ea.openOrEditCard('liveness', () => {
11030
+ turn.activityInFlight = drainActivitySummary(turn, 'liveness')
11031
+ })
11032
+ }
11033
+ })
11034
+ return
11035
+ }
11036
+
11037
+ // Labelled-feed heartbeat: keep a stale in-progress step visibly advancing.
9983
11038
  if (turn.activityMessageId == null) return // no live feed yet / already cleared
9984
- if (turn.finalAnswerDelivered) return // feed handed off to the answer
9985
11039
  if (turn.lastToolLabelAt == null) return // feed not driven by a labelled step
9986
11040
  const elapsed = Date.now() - turn.lastToolLabelAt
9987
11041
  if (elapsed < FEED_HEARTBEAT_MIN_STALE_MS) return // step is fresh; feed advancing normally
9988
11042
  const rendered = composeTurnActivity(turn, false, ` · ${formatFeedElapsed(elapsed)}`)
9989
11043
  if (rendered == null) return
9990
11044
  turn.activityPendingRender = rendered
9991
- if (turn.activityInFlight == null) {
9992
- turn.activityInFlight = drainActivitySummary(turn)
11045
+ const ea = emissionAuthorityFor(turn)
11046
+ // PR-4d: route through the centralized chatLock-serialized card-drain gate.
11047
+ cardDrainGate(turn, ea, () => {
11048
+ if (ea.mayDrain(turn)) {
11049
+ // Maintains an already-open card (guarded above on activityMessageId !=
11050
+ // null) → only ever EDITs. 'liveness' is correct either way.
11051
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
11052
+ ea.openOrEditCard('liveness', () => {
11053
+ turn.activityInFlight = drainActivitySummary(turn, 'liveness')
11054
+ })
9993
11055
  }
11056
+ })
9994
11057
  }
9995
11058
  if (!STATIC && FEED_HEARTBEAT_ENABLED) {
9996
11059
  setInterval(feedHeartbeatTick, FEED_HEARTBEAT_TICK_MS).unref()
@@ -10043,8 +11106,19 @@ function clearActivitySummary(turn: CurrentTurn, finalHtmlOverride?: string | nu
10043
11106
  }
10044
11107
  // Default: leave the status message as a record, edited to a terminal
10045
11108
  // all-done state so it doesn't freeze on a misleading "→ in-progress" line.
10046
- const finalHtml =
11109
+ let finalHtml =
10047
11110
  finalHtmlOverride !== undefined ? finalHtmlOverride : composeTurnActivity(turn, true)
11111
+ // Liveness-only feed: opened on the timer for a turn that never labelled a
11112
+ // tool (pure thinking / suppressed tools), so mirrorLines is empty and the
11113
+ // terminal render is null. Finalize to a done "✓ Working…" record instead
11114
+ // of leaving the message frozen on the live "→ Working…" line.
11115
+ if (finalHtml == null && turn.mirrorLines.length === 0 && turn.activityEverOpened) {
11116
+ const livenessElapsed = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
11117
+ const livenessHeader: SessionActivityHeader = {
11118
+ label: 'Agent', elapsedMs: livenessElapsed, toolCount: turn.labeledToolCount, state: 'done',
11119
+ }
11120
+ finalHtml = renderActivityFeedWithNested(['Working…'], [], true, '', undefined, livenessHeader)
11121
+ }
10048
11122
  if (finalHtml == null) return
10049
11123
  try {
10050
11124
  await robustApiCall(
@@ -10111,6 +11185,19 @@ function handleSessionEvent(ev: SessionEvent): void {
10111
11185
  const turnId =
10112
11186
  deriveTurnId(ev.chatId, enqThreadIdNum ?? null, ev.messageId)
10113
11187
  ?? `${chatKey(ev.chatId, enqThreadIdNum ?? null)}#synthetic-${startedAt}`
11188
+ // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Consume any
11189
+ // pending cross-turn gate `obligationSweep` armed for THIS exact turn
11190
+ // when it pushed an `obligation_represent` inbound. The gate is keyed on
11191
+ // the obligation's `originTurnId`, and the represent inbound reuses the
11192
+ // original chat/thread/messageId, so this turn's `turnId` (derived just
11193
+ // above) equals that key iff this turn IS the represent surface armed for.
11194
+ // An unrelated foreground turn on the same chat/thread derives a
11195
+ // different `turnId` → finds no entry → no gate → its card opens normally
11196
+ // (correct). Consume-once: delete on read so the matched gate can't leak
11197
+ // forward, and a never-matched stale gate can never suppress another turn.
11198
+ const xTurnGateKey = turnId
11199
+ const consumedCrossTurnGate = pendingCrossTurnGate.get(xTurnGateKey)
11200
+ if (consumedCrossTurnGate != null) pendingCrossTurnGate.delete(xTurnGateKey)
10114
11201
  const next: CurrentTurn = {
10115
11202
  sessionChatId: ev.chatId,
10116
11203
  sessionThreadId: enqThreadIdNum,
@@ -10123,20 +11210,34 @@ function handleSessionEvent(ev: SessionEvent): void {
10123
11210
  sourceMessageId: parseSourceMessageId(ev.messageId),
10124
11211
  startedAt,
10125
11212
  gatewayReceiveAt: startedAt,
11213
+ // #2527 — stamp the loop role once, from the enqueue envelope.
11214
+ role: deriveTurnRole(ev.rawContent),
11215
+ // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Only a
11216
+ // synthetic represent/owed-reply turn carries this; a foreground turn
11217
+ // leaves it undefined and the cross-turn card-OPEN gate is inert.
11218
+ ...(consumedCrossTurnGate != null ? { crossTurnGate: consumedCrossTurnGate } : {}),
10126
11219
  replyCalled: false,
10127
11220
  finalAnswerDelivered: false,
10128
11221
  finalAnswerSubstantive: false,
11222
+ // Sticky latch — reset ONLY here (turn start), never by reopen.
11223
+ finalAnswerEverDelivered: false,
10129
11224
  firstPingAt: null,
11225
+ // Notification ownership (R8 / PR-2): no slot claimed yet, so the
11226
+ // "claimer was substantive" flag starts false. Set atomically with
11227
+ // firstPingAt at the over-ping decision site.
11228
+ firstPingWasSubstantive: false,
10130
11229
  silentAnchorMessageId: null,
10131
11230
  silentAnchorText: '',
10132
11231
  capturedText: [],
10133
11232
  orphanedReplyTimeoutId: null,
11233
+ orphanedReplyRearmCount: 0,
10134
11234
  turnId,
10135
11235
  registryKey: null,
10136
11236
  noReplyDrainTimer: null,
10137
11237
  lastAssistantMsgId: null,
10138
11238
  lastAssistantDone: false,
10139
11239
  toolCallCount: 0,
11240
+ labeledToolCount: 0,
10140
11241
  activityMessageId: null,
10141
11242
  activityInFlight: null,
10142
11243
  activityPendingRender: null,
@@ -10144,11 +11245,24 @@ function handleSessionEvent(ev: SessionEvent): void {
10144
11245
  activityEverOpened: false,
10145
11246
  activityDrainFailures: 0,
10146
11247
  mirrorLines: [],
11248
+ pendingNarrative: null,
11249
+ lastReplyText: '',
10147
11250
  foregroundSubAgents: new Map(),
10148
11251
  answerStream: null,
10149
11252
  isDm: isDmChatId(ev.chatId),
11253
+ // PR-4a — construct ONE emission-authority façade per turn, passing
11254
+ // the chat/thread key in EXPLICITLY (the PR-4e seam; today equal to
11255
+ // the singleton-sourced key). Per-turn: born with this turn literal,
11256
+ // discarded with it — never persists across turns.
11257
+ emissionAuthority: new EmissionAuthority(
11258
+ statusKey(ev.chatId, enqThreadIdNum),
11259
+ ),
10150
11260
  }
10151
- currentTurn = next
11261
+ // PR-4e — route the turn-SET through the keyed accessor: flag-OFF assigns
11262
+ // the singleton (byte-identical to `currentTurn = next`); flag-ON sets the
11263
+ // per-topic `byKey[statusKey]` entry AND the most-recent mirror. The key is
11264
+ // the SAME statusKey the ctor's façade was constructed with just above.
11265
+ setCurrentTurn(next, statusKey(ev.chatId, enqThreadIdNum))
10152
11266
  markIdleActivity() // any turn start (main session) is activity — re-arm idle clear
10153
11267
  // Status-surface observability: one line at every turn SET so a later
10154
11268
  // dark card is traceable to which turn/topic key it belonged to.
@@ -10278,6 +11392,14 @@ function handleSessionEvent(ev: SessionEvent): void {
10278
11392
  case 'tool_use': {
10279
11393
  const turn = currentTurn
10280
11394
  if (turn == null) return
11395
+ // Narrative-dedup gate step 2 (JSONL-text-narrative primitive): a
11396
+ // narrative block was pending; this tool_use is the lookahead event
11397
+ // that decides it. reply/stream_reply with near-identical text ⇒
11398
+ // draft-then-send ⇒ SUPPRESS (the reply prints the canonical answer);
11399
+ // anything else ⇒ SHOW as a transient liveness step. Runs BEFORE the
11400
+ // normal tool handling so a working preamble surfaces just ahead of
11401
+ // its tool step.
11402
+ resolvePendingNarrativeOnTool(turn, ev.toolName, ev.input)
10281
11403
  // Phase 1 of #332: count every tool_use in the current turn.
10282
11404
  turn.toolCallCount++
10283
11405
  // #412: bump turn-active marker mtime so the watchdog sees this
@@ -10300,6 +11422,15 @@ function handleSessionEvent(ev: SessionEvent): void {
10300
11422
  // placeholder-heartbeat label, which has been retired.
10301
11423
  if (isTelegramReplyTool(name)) {
10302
11424
  turn.replyCalled = true
11425
+ // NIT 2 (reply-proxy precision): capture the ACTUAL delivered reply
11426
+ // text so flushPendingNarrativeAtTurnEnd compares a trailing
11427
+ // narrative block against the real answer surface, not
11428
+ // capturedText.join('') (which mis-suppresses when the model emits
11429
+ // the same short string twice in a turn). REPLY_TOOLS ('reply',
11430
+ // 'stream_reply') carry the answer in input.text; only those count.
11431
+ if (REPLY_TOOLS.has(name) && typeof ev.input?.text === 'string') {
11432
+ turn.lastReplyText = ev.input.text as string
11433
+ }
10303
11434
  if (turn.orphanedReplyTimeoutId != null) {
10304
11435
  clearTimeout(turn.orphanedReplyTimeoutId)
10305
11436
  turn.orphanedReplyTimeoutId = null
@@ -10339,6 +11470,12 @@ function handleSessionEvent(ev: SessionEvent): void {
10339
11470
  // where the JSONL tool_use rows arrive too late.
10340
11471
  const turn = currentTurn
10341
11472
  if (turn == null) return
11473
+ // SECONDARY FIX: an active tool_label means the model is producing work
11474
+ // right now — re-arm the orphaned-reply fuse so a multi-phase tool turn
11475
+ // (write → compile → test → fix) that regularly emits labels doesn't let
11476
+ // the 30 s timer run down between labels. Mirrors how `case 'text':` calls
11477
+ // resetOrphanedReplyTimeout() at ~line 10786.
11478
+ resetOrphanedReplyTimeout()
10342
11479
  // Surface tools (reply/stream_reply/react) are the conversation, not
10343
11480
  // activity — the hook labels them ("Replying"), so filter by name.
10344
11481
  if (isTelegramSurfaceTool(ev.toolName)) return
@@ -10400,6 +11537,14 @@ function handleSessionEvent(ev: SessionEvent): void {
10400
11537
  }
10401
11538
  const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
10402
11539
  if (rendered != null) {
11540
+ // Count surfaced tool steps — the single source of truth for the `tools=`
11541
+ // lifecycle field and the `✓ N steps` total. Incremented HERE (not at the
11542
+ // top of the case) so the count stays consistent with what the feed
11543
+ // actually surfaces: an empty label (appendActivityLabel → null) or a
11544
+ // label dropped by the post-final-answer reopen guard never inflates it.
11545
+ // Surface tools (reply/react) returned earlier; send_typing/sync_retain
11546
+ // are suppressed at the hook (computeLabel → null) so they never arrive.
11547
+ turn.labeledToolCount++
10403
11548
  // A new tool label = a new live step → re-anchor the heartbeat clock so
10404
11549
  // the " · Ns" elapsed restarts from this step (and the feed itself just
10405
11550
  // advanced, so it isn't stale).
@@ -10411,16 +11556,34 @@ function handleSessionEvent(ev: SessionEvent): void {
10411
11556
  // and would falsely reset the clock forever on a hung-mid-tool turn,
10412
11557
  // reintroducing the #1556 dangling-turn wedge. Only the model emitting a
10413
11558
  // fresh label reaches here.
10414
- if (SILENCE_LIVENESS_PRODUCTION && currentTurn === turn) {
11559
+ // PR-4e keyed liveness under the flag. Flag-OFF keeps the literal
11560
+ // `currentTurn === turn` (a late tool-label for topic A must reset A's
11561
+ // silence clock, not topic B's); flag-ON resolves A by ITS OWN key so a
11562
+ // flip to B doesn't falsify A's liveness here.
11563
+ if (
11564
+ SILENCE_LIVENESS_PRODUCTION &&
11565
+ (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn)
11566
+ ) {
10415
11567
  silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
10416
11568
  }
10417
11569
  // Recompose so any active foreground sub-agent's nested block (Model A)
10418
11570
  // is preserved when the parent appends its own step. composeTurnActivity
10419
11571
  // == the flat render when no foreground sub-agent is active.
10420
11572
  turn.activityPendingRender = composeTurnActivity(turn) ?? rendered
10421
- if (turn.activityInFlight == null) {
10422
- turn.activityInFlight = drainActivitySummary(turn)
11573
+ const ea = emissionAuthorityFor(turn)
11574
+ // PR-4d: route through the centralized chatLock-serialized card-drain gate.
11575
+ cardDrainGate(turn, ea, () => {
11576
+ if (ea.mayDrain(turn)) {
11577
+ // Producer B (tool label): always OPEN-eligible (labeledToolCount was
11578
+ // incremented just above). A turn that started conversational and now
11579
+ // dispatches a tool opens here, rendering any narration accumulated
11580
+ // by the suppressed narrative-SHOW drains (design §9 lever 5 / R4).
11581
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
11582
+ ea.openOrEditCard('tool', () => {
11583
+ turn.activityInFlight = drainActivitySummary(turn, 'tool')
11584
+ })
10423
11585
  }
11586
+ })
10424
11587
  }
10425
11588
  return
10426
11589
  }
@@ -10447,58 +11610,33 @@ function handleSessionEvent(ev: SessionEvent): void {
10447
11610
  const turn = currentTurn
10448
11611
  if (turn != null) {
10449
11612
  turn.capturedText.push(ev.text)
11613
+ // Narrative-dedup gate step 1 (JSONL-text-narrative primitive):
11614
+ // stage this text block for one lookahead step. If a previous block
11615
+ // was pending with nothing reply-shaped after it, it flushes here as
11616
+ // a SHOWN transient liveness step. The eventual SHOW/SUPPRESS of THIS
11617
+ // block is decided by the next tool_use / turn_end. Invariant
11618
+ // `chat-is-the-single-source-of-truth` (reference/invariants.md): a
11619
+ // SHOWN line rides the same renderStepFeed path as a tool step —
11620
+ // transient + clipped, never a persisted parallel mirror. This is a
11621
+ // separate lane from the answer-stream wiring below (which owns the
11622
+ // canonical reply), so the two never fight over the same text.
11623
+ stagePendingNarrative(turn, ev.text)
10450
11624
  // Issue #195: feed the answer-lane stream. The stream itself
10451
11625
  // gates on minInitialChars and throttles edits — short replies
10452
11626
  // stay below the threshold and never spawn a message.
10453
11627
  if (turn.answerStream == null) {
10454
11628
  turn.answerStream = createAnswerStream({
10455
11629
  chatId: turn.sessionChatId,
10456
- isPrivateChat: turn.isDm,
10457
11630
  threadId: turn.sessionThreadId,
10458
- // Transport selection:
10459
- // #869-Phase1 visible-answer-stream: omit the draft API so
10460
- // the lane edits a user-visible chat-timeline message
10461
- // (minInitialChars:1 opens it on the first chunk). The
10462
- // draft-mirror does NOT touch this lane the canary proved
10463
- // the model emits almost no interstitial assistant.text
10464
- // (it thinks→tool→reply), so routing it to the draft just
10465
- // emptied the preview. The draft-mirror instead renders the
10466
- // tool_use stream (case 'tool_use' above) where the real
10467
- // signal lives. assistant.text keeps its visible-message
10468
- // home; the reply tool stays the canonical answer.
10469
- // Flag OFF (default): use the compose-box draft for DMs, and set
10470
- // minInitialChars effectively-infinite so the lane NEVER opens a
10471
- // visible chat message. This matters in supergroup TOPICS, where
10472
- // draft transport is unsupported (gateway.ts:6422) so the lane
10473
- // would otherwise fall to message transport and post a visible
10474
- // preview once interstitial text passed the default 50-char gate
10475
- // — which retract() then deletes (the unformatted flash, marko
10476
- // General). With the gate unreachable the only posted message is
10477
- // the canonical reply. (The gate is bypassed for DM draft
10478
- // transport, so DM draft streaming is unaffected.)
10479
- // VISIBLE preview gating decoupled from the draft-transport flag
10480
- // (2026-06-05 flash regression fix). The visible flag ALONE decides
10481
- // whether a user-visible preview opens; DRAFT_ANSWER_LANE_RETIRED
10482
- // controls only the TRANSPORT (whether sendMessageDraftFn exists).
10483
- // The earlier `|| DRAFT_ANSWER_LANE_RETIRED` here meant retiring the
10484
- // draft (the default since v0.14.68) silently forced minInitialChars:1
10485
- // → a visible preliminary opened on every streaming turn and was then
10486
- // retracted (deleted) when the reply tool fired — the exact "raw bubble
10487
- // appears, formatted reply lands, raw bubble vanishes" flash that
10488
- // turning the visible stream OFF (v0.14.52) was meant to remove. So
10489
- // v0.14.68 silently undid v0.14.52 fleet-wide. Now:
10490
- // - VISIBLE on (opt-in) → minInitialChars:1, a real edit-in-place
10491
- // preview (observable by UAT, silence-liveness reset on its sends).
10492
- // - VISIBLE off (default) → minInitialChars:MAX so NO visible preview
10493
- // ever opens; the reply tool is the single canonical formatted
10494
- // message (no flash). With the draft retired (default) there is no
10495
- // transport either, so the lane stays dormant; with the kill switch
10496
- // DRAFT_ANSWER_LANE=0 the legacy compose-box draft transport is
10497
- // restored (sendMessageDraftFn defined above, gate bypassed for DM
10498
- // draft so #1664 DM draft streaming is unaffected).
10499
- ...(ANSWER_LANE.usesDraftTransport
10500
- ? { sendMessageDraft: sendMessageDraftFn, minInitialChars: ANSWER_LANE.minInitialChars }
10501
- : { minInitialChars: ANSWER_LANE.minInitialChars }),
11631
+ // VISIBLE on (opt-in, SWITCHROOM_VISIBLE_ANSWER_STREAM=1) →
11632
+ // minInitialChars:1 opens a user-visible edit-in-place preview on the
11633
+ // first text chunk. At turn_end the preview is materialized as a pinged
11634
+ // final answer (materialize()) when the model never called reply.
11635
+ // VISIBLE off (default) minInitialChars:MAX so NO visible preview ever
11636
+ // opens; the reply tool is the single canonical formatted message
11637
+ // (no flash). The draft transport is permanently retired — both modes
11638
+ // use sendMessage + editMessageText for any message that does open.
11639
+ minInitialChars: ANSWER_LANE.minInitialChars,
10502
11640
  // #1075: route through robustApiCall so flood-wait,
10503
11641
  // benign-400, and THREAD_NOT_FOUND are handled uniformly
10504
11642
  // instead of crashing the answer-stream loop on a deleted
@@ -10589,7 +11727,11 @@ function handleSessionEvent(ev: SessionEvent): void {
10589
11727
  // skip the tick (the new turn has its own answer stream).
10590
11728
  onMetric: (metricEv) => {
10591
11729
  logStreamingEvent(metricEv)
10592
- if (currentTurn === turn) {
11730
+ // PR-4e keyed liveness under the flag. Flag-OFF keeps the literal
11731
+ // `currentTurn === turn` (a draft-update metric for topic A's stream
11732
+ // must tick A's signal/silence clock); flag-ON resolves A by its own
11733
+ // key so a flip to B doesn't skip A's tick.
11734
+ if (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn) {
10593
11735
  signalTracker.noteSignal(
10594
11736
  statusKey(turn.sessionChatId, turn.sessionThreadId),
10595
11737
  Date.now(),
@@ -10717,6 +11859,28 @@ function handleSessionEvent(ev: SessionEvent): void {
10717
11859
  return
10718
11860
  }
10719
11861
  case 'turn_end': {
11862
+ // DEFENSIVE FIX: belt-and-braces guard against the synthetic backstop
11863
+ // (`durationMs: -1`) racing live work. durationMs >= 0 is the
11864
+ // authoritative signal from system/turn_duration; -1 is ONLY ever set
11865
+ // by the orphaned-reply backstop. Reject the synthetic event here so that
11866
+ // even if the PRIMARY fix's re-arm logic is bypassed (e.g. a very fast
11867
+ // fire before isLegitimatelyWorking() is sampled) we still don't tear
11868
+ // down a live feed mid-work. Extended from the original isMidToolCall()
11869
+ // check to the full isLegitimatelyWorking predicate so detached background
11870
+ // work and human-wait tools (ask_user) are also protected.
11871
+ // INVARIANT: a REAL turn_end (durationMs >= 0) is NEVER suppressed.
11872
+ if (ev.durationMs === -1) {
11873
+ const turn = currentTurn
11874
+ const key = turn != null ? statusKey(turn.sessionChatId, turn.sessionThreadId) : ''
11875
+ if (isLegitimatelyWorking(key)) {
11876
+ process.stderr.write(
11877
+ `telegram gateway: synthetic turn_end suppressed — legitimately working` +
11878
+ ` (in_flight=${toolFlightTracker.inFlightCount()},` +
11879
+ ` bg_work=${turn != null ? pendingProgress.hasPendingAsyncDispatch(key) : false})\n`,
11880
+ )
11881
+ return
11882
+ }
11883
+ }
10720
11884
  // Drain any still-pending tool dispatch typing entries — covers
10721
11885
  // transcript truncation or a Claude Code crash mid-tool.
10722
11886
  typingWrapper.drainAll()
@@ -10733,6 +11897,33 @@ function handleSessionEvent(ev: SessionEvent): void {
10733
11897
  clearTimeout(turn.orphanedReplyTimeoutId)
10734
11898
  turn.orphanedReplyTimeoutId = null
10735
11899
  }
11900
+ // Narrative-dedup gate step 3 (JSONL-text-narrative primitive): a
11901
+ // trailing narrative block with nothing after it. When the turn
11902
+ // delivered its answer via reply (replyCalled) the trailing text is
11903
+ // almost always a draft of that answer — compare against the ACTUAL
11904
+ // delivered reply text and SUPPRESS the duplicate; otherwise SHOW
11905
+ // genuine trailing narration ("Done — all green."). Must run BEFORE
11906
+ // clearActivitySummary so a SHOWN line lands in the feed's final
11907
+ // render. Always clears turn.pendingNarrative so it can't leak across
11908
+ // turns.
11909
+ //
11910
+ // NIT 2 (reply-proxy precision): use `turn.lastReplyText` (the
11911
+ // most-recent reply/stream_reply input.text) rather than
11912
+ // `capturedText.join('')`. The old proxy concatenated every captured
11913
+ // text block, so a turn that emitted the same short string twice
11914
+ // (e.g. "Done." as working narration, then "Done." as the reply) would
11915
+ // compare the trailing narration against a doubled "DoneDone" — still
11916
+ // a high-prefix match — and wrongly suppress genuine trailing
11917
+ // narration. Comparing against the actual reply text is exact. When
11918
+ // the turn delivered WITHOUT a reply tool (turn-flush emits
11919
+ // capturedText as the answer), fall back to capturedText.join('') so
11920
+ // that path's trailing-draft suppression is preserved.
11921
+ if (turn != null) {
11922
+ const deliveredText = turn.lastReplyText.length > 0
11923
+ ? turn.lastReplyText
11924
+ : (turn.replyCalled ? turn.capturedText.join('') : '')
11925
+ flushPendingNarrativeAtTurnEnd(turn, deliveredText)
11926
+ }
10736
11927
  // Clear the activity feed at the real end of the turn. This is the
10737
11928
  // no-reply safety net — a turn that ends without ever calling reply
10738
11929
  // (the answer is delivered by turn-flush / silent-end) still has its
@@ -10934,6 +12125,17 @@ function handleSessionEvent(ev: SessionEvent): void {
10934
12125
  ` chat=${chatId} turnStartedAt=${turn.startedAt} replyCalled=false capturedText=empty` +
10935
12126
  ` — the progress card steps were the only thing the user saw (#45)\n`,
10936
12127
  )
12128
+ // #2527: emit structured WARN so the reaction-only failure mode is
12129
+ // machine-readable in the streaming-metrics channel.
12130
+ const tKey = statusKey(chatId, threadId)
12131
+ logStreamingEvent({
12132
+ kind: 'turn_no_reply_warn',
12133
+ chatId,
12134
+ threadId,
12135
+ turnId: turn.turnId,
12136
+ turnDurationMs: turn.startedAt > 0 ? Date.now() - turn.startedAt : 0,
12137
+ reactionCount: reactionTransitionCounts.get(tKey) ?? 0,
12138
+ })
10937
12139
  }
10938
12140
  }
10939
12141
 
@@ -11291,10 +12493,39 @@ function handleSessionEvent(ev: SessionEvent): void {
11291
12493
  }
11292
12494
 
11293
12495
  // #1713: turn_end is THE terminal trigger. Finalize via the
11294
- // single terminal path (👍). Any prior intermediate states
11295
- // pending in the debounce window are flushed by `finalize()`
11296
- // before the terminal emoji emits.
11297
- finalizeStatusReaction(chatId, threadId, 'done')
12496
+ // single terminal path. Any prior intermediate states pending in
12497
+ // the debounce window are flushed by `finalize()` before the
12498
+ // terminal emoji emits.
12499
+ //
12500
+ // #2527 — role-aware terminal honesty: a USER turn that ends without
12501
+ // a delivered answer must NOT paint 👍 (the operator's "thumbs up so
12502
+ // it feels like you're done" report). It finalizes to the gentle
12503
+ // 'undelivered' terminal (😐) instead; the silent-end fallback below
12504
+ // carries the apology text. system/cron turns and NO_REPLY/HEARTBEAT_OK
12505
+ // turns (which return earlier) keep 👍 — their silence is legitimate.
12506
+ let terminalReason = decideTerminalReason({
12507
+ enabled: LIVENESS_TERMINAL_HONESTY,
12508
+ role: turn.role,
12509
+ finalAnswerDelivered: turn.finalAnswerDelivered,
12510
+ })
12511
+ // #2527 review note 1 — worker-hold carve-out: if the turn is STILL
12512
+ // legitimately working at turn_end (a background sub-agent the parent
12513
+ // dispatched is running on), don't prematurely paint 😐. Fall back to
12514
+ // 'done' so the existing deferred-done path holds ✍️ until the worker
12515
+ // completes (then 👍) — the worker-activity feed carries the progress.
12516
+ // Only a turn that genuinely ended undelivered AND is not still working
12517
+ // gets the honest 😐.
12518
+ if (terminalReason === 'undelivered' && isLegitimatelyWorking(statusKey(chatId, threadId))) {
12519
+ terminalReason = 'done'
12520
+ }
12521
+ if (terminalReason === 'undelivered') {
12522
+ process.stderr.write(
12523
+ `telegram gateway: WARN turn_no_reply — user turn ended with an ` +
12524
+ `ambient ack but no delivered answer; painting 😐 not 👍 ` +
12525
+ `chat=${chatId} thread=${threadId ?? '-'} turnId=${turn.turnId} (#2527)\n`,
12526
+ )
12527
+ }
12528
+ finalizeStatusReaction(chatId, threadId, terminalReason)
11298
12529
  {
11299
12530
  const sKey = streamKey(chatId, threadId)
11300
12531
  const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -11773,6 +13004,9 @@ async function handleInboundCoalesced(
11773
13004
  // - msgId present (always true for `bot.on('message:*')` paths but
11774
13005
  // defensive against future routers that might call this without one).
11775
13006
  maybeEarlyAckReaction(ctx, from)
13007
+ // #2527 — if this lands mid-turn, the user is asking "what's happening?";
13008
+ // fire the liveness floor immediately (DM + supergroup alike).
13009
+ maybePokeFloorForMidTurnInbound(ctx, from)
11776
13010
 
11777
13011
  const key = inboundCoalesceKey(
11778
13012
  String(ctx.chat!.id),
@@ -11801,6 +13035,14 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
11801
13035
  const msgId = ctx.message?.message_id
11802
13036
  if (msgId == null) return
11803
13037
  const chatType = ctx.chat?.type
13038
+ // Intentionally DM-only (#2527 surface-parity note): pre-acking a GROUP
13039
+ // message risks reacting to one the full gate (requireMention / topic
13040
+ // scoping) would later DROP. The SUBSTANTIVE liveness parity a supergroup
13041
+ // needs — the mid-turn floor and the role-aware terminal reaction — is
13042
+ // surface-agnostic (keyed on statusKey + loop role, no chat-type branch),
13043
+ // so a forum topic gets identical never-silent guarantees without this
13044
+ // sub-second 👀 optimisation. See `maybePokeFloorForMidTurnInbound` for
13045
+ // the surface-agnostic "Status?" short-circuit.
11804
13046
  if (chatType !== 'private') return
11805
13047
  const chatId = String(ctx.chat!.id)
11806
13048
  const threadId = ctx.message?.is_topic_message ? ctx.message.message_thread_id : undefined
@@ -11810,6 +13052,9 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
11810
13052
  void bot.api.setMessageReaction(chatId, msgId, [
11811
13053
  { type: 'emoji', emoji: '👀' as ReactionTypeEmoji['emoji'] },
11812
13054
  ]).catch(() => {})
13055
+ // #2527: log the early-ack fire so operators can see how often the
13056
+ // fast pre-coalesce DM path triggers vs. the controller path.
13057
+ logStreamingEvent({ kind: 'early_ack_reaction', chatId, messageId: msgId, emoji: '👀' })
11813
13058
  // #553 PR 3: also fire the native "typing…" indicator. Bridges the
11814
13059
  // visual gap between the early-ack 👀 reaction and the first real
11815
13060
  // model text. No fake content — Telegram clients render this natively
@@ -11818,6 +13063,26 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
11818
13063
  void bot.api.sendChatAction(chatId, 'typing').catch(() => {})
11819
13064
  }
11820
13065
 
13066
+ /**
13067
+ * #2527 — "Status?" short-circuit. A message arriving DURING an active turn
13068
+ * (the user explicitly asking what's happening) fires the mid-turn liveness
13069
+ * floor immediately, bypassing the timer/working gates. Surface-agnostic:
13070
+ * works identically in a DM and a forum-supergroup topic (keyed on statusKey).
13071
+ * Idempotent per turn (the floor's fire-once latch) and kill-switch-gated.
13072
+ */
13073
+ function maybePokeFloorForMidTurnInbound(ctx: Context, from: NonNullable<Context['from']>): void {
13074
+ const rawChatId = ctx.chat?.id
13075
+ if (rawChatId == null) return
13076
+ const chatId = String(rawChatId)
13077
+ const threadId = ctx.message?.is_topic_message ? ctx.message.message_thread_id : undefined
13078
+ const key = statusKey(chatId, threadId)
13079
+ // Only mid-turn: a turn must already be in flight for this (chat, thread).
13080
+ if (!activeTurnStartedAt.has(key)) return
13081
+ const access = loadAccess()
13082
+ if (!access.allowFrom.includes(String(from.id))) return
13083
+ silencePoke.pokeFloorNow(key, Date.now())
13084
+ }
13085
+
11821
13086
  async function handleInbound(
11822
13087
  ctx: Context,
11823
13088
  text: string,
@@ -12723,17 +13988,42 @@ async function handleInbound(
12723
13988
  if (!chatAvailableReactions.has(chat_id)) {
12724
13989
  probeAvailableReactions(chat_id)
12725
13990
  }
13991
+ // #2527: use inbound msgId as a stable per-turn reaction identifier.
13992
+ // The controller is created before currentTurn.turnId is assigned
13993
+ // (that happens in handleSessionEvent's enqueue branch), so we capture
13994
+ // msgId here and use it as the reaction-session token in log events.
13995
+ const ctrlTurnToken = `${chat_id}:${msgId}`
12726
13996
  const ctrl = new StatusReactionController(async (emoji) => {
12727
13997
  await bot.api.setMessageReaction(chat_id, msgId, [
12728
13998
  { type: 'emoji', emoji: emoji as ReactionTypeEmoji['emoji'] },
12729
13999
  ])
12730
14000
  // #203: every status-reaction transition is a user-visible signal.
12731
14001
  signalTracker.noteSignal(key, Date.now())
12732
- }, allowedReactions)
14002
+ }, allowedReactions, {
14003
+ // #2527: emit a structured transition event on each emoji change so
14004
+ // the reaction lifecycle is visible in streaming-metrics logs. Also
14005
+ // increment the per-key counter for the turn_no_reply_warn metric.
14006
+ onTransition: (emoji) => {
14007
+ reactionTransitionCounts.set(key, (reactionTransitionCounts.get(key) ?? 0) + 1)
14008
+ logStreamingEvent({
14009
+ kind: 'status_reaction_transition',
14010
+ chatId: chat_id,
14011
+ turnId: ctrlTurnToken,
14012
+ emoji,
14013
+ })
14014
+ },
14015
+ })
12733
14016
  activeStatusReactions.set(key, ctrl)
12734
14017
  activeReactionMsgIds.set(key, { chatId: chat_id, messageId: msgId })
12735
14018
  activeTurnStartedAt.set(key, Date.now())
12736
14019
  progressUpdateTurnCount.set(key, 0) // Reset turn counter
14020
+ // #2527: log controller install so the lifecycle start is observable.
14021
+ logStreamingEvent({
14022
+ kind: 'status_reaction_install',
14023
+ chatId: chat_id,
14024
+ turnId: ctrlTurnToken,
14025
+ messageId: msgId,
14026
+ })
12737
14027
  ctrl.setQueued()
12738
14028
  // #203: time-to-ack metric — setQueued() triggers the initial 👀 reaction
12739
14029
  // asynchronously through the controller chain.
@@ -13300,6 +14590,18 @@ function getCommandArgs(ctx: Context): string {
13300
14590
  return m ? m[1].trim() : ''
13301
14591
  }
13302
14592
 
14593
+ /**
14594
+ * True when a slash command's argument string carries a trailing `demo`
14595
+ * token — the per-command PII-mask modifier for screen recordings
14596
+ * (`/usage demo`, `/auth demo`, `/status demo`, `/whoami demo`). Matches
14597
+ * `demo` as the last whitespace-delimited token, case-insensitively, so
14598
+ * `/auth show demo` and `/usage demo` both flip the flag while a label
14599
+ * literally named `demo-foo` does not.
14600
+ */
14601
+ function hasDemoFlag(args: string): boolean {
14602
+ return /(?:^|\s)demo$/i.test(args.trim())
14603
+ }
14604
+
13303
14605
  /** Validate that a string looks like a safe agent/resource name.
13304
14606
  * Agent names should be alphanumeric with hyphens/underscores only.
13305
14607
  * This prevents shell metacharacter injection even though both exec
@@ -13790,6 +15092,9 @@ function notifyDetachedFailure(
13790
15092
  lockedBot.api.sendMessage(chatId, text, {
13791
15093
  parse_mode: 'HTML',
13792
15094
  link_preview_options: { is_disabled: true },
15095
+ // Detached restart/update child-failure notice — status, not
15096
+ // the user's answer. Silence the open ping.
15097
+ disable_notification: true,
13793
15098
  ...(threadId != null ? { message_thread_id: threadId } : {}),
13794
15099
  }),
13795
15100
  {
@@ -14577,9 +15882,10 @@ bot.command('status', async ctx => {
14577
15882
  const { access, senderId } = gated
14578
15883
  const from = ctx.from!
14579
15884
  if (access.allowFrom.includes(senderId)) {
15885
+ const demo = hasDemoFlag(getCommandArgs(ctx))
14580
15886
  const userTag = from.username ? `@${from.username}` : senderId
14581
15887
  const meta = await buildAgentMetadata(getMyAgentName())
14582
- await ctx.reply(buildStatusPairedText({ user: userTag, meta }), { parse_mode: 'HTML' })
15888
+ await ctx.reply(buildStatusPairedText({ user: userTag, meta, demo }), { parse_mode: 'HTML' })
14583
15889
  return
14584
15890
  }
14585
15891
  for (const [code, p] of Object.entries(access.pending)) {
@@ -14712,10 +16018,12 @@ bot.command('model', async ctx => {
14712
16018
  // `/effort` — show or switch the reasoning effort for the live session.
14713
16019
  // The effort sibling of `/model`: bare form renders a five-button menu
14714
16020
  // (low/medium/high/xhigh/max, the live level ✅), a typed form
14715
- // `/effort <level>` sets it directly. Both ride the allowlisted inject
14716
- // primitive (claude's own `/effort` REPL command), session-scoped boot
14717
- // re-pins the configured default via start.sh's `--effort`. Implementation
14718
- // in effort-command.ts so it's unit-testable without booting the bot.
16021
+ // `/effort <level>` sets it directly. Both ride the dedicated `applyEffort`
16022
+ // driver (claude's own `/effort` REPL command, with the confirmation modal
16023
+ // answered so the pane never wedges NOT the bare inject primitive, which
16024
+ // is blocklisted for `/effort` since #2471), session-scoped boot re-pins
16025
+ // the configured default via start.sh's `--effort`. Implementation in
16026
+ // effort-command.ts so it's unit-testable without booting the bot.
14719
16027
  function buildEffortDeps(): EffortCommandDeps {
14720
16028
  return {
14721
16029
  applyEffort: (agent, level) => applyEffort(agent, level),
@@ -14799,6 +16107,9 @@ bot.command('restart', async ctx => {
14799
16107
  (tid) =>
14800
16108
  lockedBot.api.sendMessage(chatId, ackText, {
14801
16109
  parse_mode: 'HTML', link_preview_options: { is_disabled: true },
16110
+ // Restart acknowledgement is a status notice — silence the
16111
+ // open ping (the "restarted — ready" follow-up is what matters).
16112
+ disable_notification: true,
14802
16113
  ...(tid != null ? { message_thread_id: tid } : {}),
14803
16114
  }),
14804
16115
  { threadId, chat_id: chatId, verb: 'restart.ack' },
@@ -14940,6 +16251,9 @@ async function handleNewCommand(ctx: Context): Promise<void> {
14940
16251
  (tid) =>
14941
16252
  lockedBot.api.sendMessage(chatId, ackText, {
14942
16253
  parse_mode: 'HTML', link_preview_options: { is_disabled: true },
16254
+ // /new /reset acknowledgement is a status notice — silence the
16255
+ // open ping (the post-restart greeting card is what matters).
16256
+ disable_notification: true,
14943
16257
  ...(tid != null ? { message_thread_id: tid } : {}),
14944
16258
  }),
14945
16259
  { threadId, chat_id: chatId, verb: 'new-or-reset.ack' },
@@ -15142,6 +16456,9 @@ bot.command('update', async ctx => {
15142
16456
  lockedBot.api.sendMessage(chatId, ackText, {
15143
16457
  parse_mode: 'HTML',
15144
16458
  link_preview_options: { is_disabled: true },
16459
+ // "update started" acknowledgement is a status notice — silence
16460
+ // the open ping (the post-restart greeting card is what matters).
16461
+ disable_notification: true,
15145
16462
  ...(tid != null ? { message_thread_id: tid } : {}),
15146
16463
  }),
15147
16464
  { threadId, chat_id: chatId, verb: 'update.ack' },
@@ -15603,6 +16920,36 @@ const fleetFallbackGate = createFleetFallbackGate({
15603
16920
  brokerReachable: isAuthBrokerSocketReachable,
15604
16921
  })
15605
16922
 
16923
+ /**
16924
+ * Resume-after-swap gate (auth-failover-stall fix). Owns the single-flight +
16925
+ * staleness decision for re-running the turn a mid-turn 429 killed. See
16926
+ * fleet-fallback-resume.ts. Wired into doFireFleetAutoFallback below: on a
16927
+ * 'switched' outcome we restart so the boot-resume path replays the dead turn
16928
+ * on the freshly-active account. 3h staleness mirrors the boot-resume
16929
+ * RESUME_MAX_AGE_MS failsafe (gateway boot path); single-flight stops a 429
16930
+ * storm from loop-restarting the agent.
16931
+ */
16932
+ const fleetFallbackResumeGate = createFleetFallbackResumeGate({
16933
+ maxAgeMs: (() => {
16934
+ const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
16935
+ return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h, matches boot-resume
16936
+ })(),
16937
+ })
16938
+
16939
+ /**
16940
+ * The start time (epoch-ms) of the most-recently-started active turn — the
16941
+ * staleness signal for the resume gate. `activeTurnStartedAt` is stamped on
16942
+ * inbound receipt (see its declaration), so the newest entry is the turn the
16943
+ * 429 just killed. Returns null when no turn is tracked (then the resume gate
16944
+ * defers staleness to the boot-resume 3h failsafe). */
16945
+ function newestActiveTurnStartedAtMs(): number | null {
16946
+ let newest: number | null = null
16947
+ for (const ms of activeTurnStartedAt.values()) {
16948
+ if (newest == null || ms > newest) newest = ms
16949
+ }
16950
+ return newest
16951
+ }
16952
+
15606
16953
  function wouldFireFleetAutoFallback(): boolean {
15607
16954
  return fleetFallbackGate.wouldFire()
15608
16955
  }
@@ -15658,6 +17005,17 @@ async function fireFleetAutoFallback(triggerAgent: string, untilMs?: number): Pr
15658
17005
  */
15659
17006
  let fallbackFailureNoticeState: FallbackFailureNoticeState = { lastSentAtMs: 0 }
15660
17007
 
17008
+ /**
17009
+ * Bug 2 — per-gateway cooldown for the "All accounts blocked" card. The
17010
+ * all-blocked outcome is a no-op swap (doFireFleetAutoFallback returns false),
17011
+ * so the fleetFallbackGate dedup window never arms for it, and the ~60s
17012
+ * quota_wall_detected re-trigger would otherwise re-broadcast the identical card
17013
+ * every minute for the life of the wall. This bounds it to one card per window.
17014
+ * Reset on a successful swap so a fresh all-blocked after a recovery (a real new
17015
+ * transition) is not stale-suppressed.
17016
+ */
17017
+ let fallbackAllBlockedNoticeState: FallbackAllBlockedNoticeState = { lastSentAtMs: 0 }
17018
+
15661
17019
  function broadcastFleetFallbackFailure(triggerAgent: string, reason: string): void {
15662
17020
  if (process.env.SWITCHROOM_FLEET_FALLBACK_FAILURE_NOTICE === '0') return
15663
17021
  // Notice-level cooldown (30 min, per gateway). The fleetFallbackGate's
@@ -15743,19 +17101,66 @@ async function doFireFleetAutoFallback(triggerAgent: string, untilMs?: number):
15743
17101
  (outcome.kind === 'switched' ? ` old=${outcome.oldLabel} new=${outcome.newLabel}` : '') +
15744
17102
  '\n',
15745
17103
  )
17104
+ // Bug 2 — the all-blocked card is a no-op outcome, so the gate's dedup
17105
+ // window never arms for it and the ~60s quota_wall_detected re-trigger would
17106
+ // re-broadcast the identical card every minute. Gate it behind a per-gateway
17107
+ // cooldown; a successful swap resets the window so a later (genuinely new)
17108
+ // all-blocked still emits promptly.
17109
+ if (outcome.kind === 'switched') {
17110
+ fallbackAllBlockedNoticeState = { lastSentAtMs: 0 }
17111
+ } else if (outcome.kind === 'all-blocked') {
17112
+ const verdict = evaluateAllBlockedNotice(fallbackAllBlockedNoticeState, Date.now())
17113
+ if (!verdict.send) {
17114
+ process.stderr.write(
17115
+ `telegram gateway: [fleet-fallback] all-blocked card suppressed (cooldown) agent=${triggerAgent}\n`,
17116
+ )
17117
+ return false
17118
+ }
17119
+ fallbackAllBlockedNoticeState = verdict.next
17120
+ }
15746
17121
  // Post the announcement to every authorized chat. Mirrors the
15747
17122
  // operator-event broadcast pattern (line ~2290) — DM-only opts
15748
17123
  // (no message_thread_id) so THREAD_NOT_FOUND can't fire here;
15749
17124
  // wrap in swallowingApiCall anyway per the codebase rule.
15750
17125
  const access = loadAccess()
15751
17126
  if (access.allowFrom.length === 0) return outcome.kind === 'switched'
15752
- const opts = { parse_mode: 'HTML' as const }
17127
+ // Account-switch / all-blocked announcement is a system status notice,
17128
+ // not the user's answer — silence the open ping.
17129
+ const opts = { parse_mode: 'HTML' as const, disable_notification: true }
15753
17130
  for (const chat_id of access.allowFrom) {
15754
17131
  void swallowingApiCall(
15755
17132
  () => bot.api.sendMessage(chat_id, outcome.announcement, opts),
15756
17133
  { chat_id, verb: 'fleet-fallback:notify' },
15757
17134
  )
15758
17135
  }
17136
+ // ── Resume the dead turn (auth-failover-stall fix) ──────────────────────
17137
+ // A mid-turn 429 killed a turn; the swap above moved the fleet to a healthy
17138
+ // account, but that only takes effect on the NEXT claude invocation. Re-run
17139
+ // the dead turn via triggerSelfRestart: the boot-resume path (gateway boot,
17140
+ // findLatestTurnIfInterrupted → buildResumeInterruptedInbound) replays the
17141
+ // LATEST interrupted turn on the freshly-active account. We restart rather
17142
+ // than redeliver because the failed inbound was already DELIVERED (the turn
17143
+ // started, then the model 429'd) so it is NOT in pendingInboundBuffer —
17144
+ // redeliverBufferedInbound would find nothing. Guards live in
17145
+ // fleetFallbackResumeGate: single-flight (a 429 storm cannot loop-restart)
17146
+ // + 3h staleness (an ancient interrupted turn is not resurrected). Only
17147
+ // reached on 'switched'; all-blocked / no-op outcomes never get here, so the
17148
+ // all-blocked cooldown path above is preserved.
17149
+ if (outcome.kind === 'switched') {
17150
+ const verdict = fleetFallbackResumeGate.decide(newestActiveTurnStartedAtMs())
17151
+ if (verdict === 'resume') {
17152
+ const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? triggerAgent
17153
+ process.stderr.write(
17154
+ `telegram gateway: [fleet-fallback] resuming dead turn via self-restart ` +
17155
+ `agent=${selfAgent} (swap ${outcome.oldLabel}→${outcome.newLabel})\n`,
17156
+ )
17157
+ triggerSelfRestart(selfAgent, 'fleet-fallback-resume')
17158
+ } else {
17159
+ process.stderr.write(
17160
+ `telegram gateway: [fleet-fallback] resume suppressed (${verdict}) agent=${triggerAgent}\n`,
17161
+ )
17162
+ }
17163
+ }
15759
17164
  return outcome.kind === 'switched'
15760
17165
  } catch (err) {
15761
17166
  process.stderr.write(
@@ -15815,6 +17220,9 @@ async function runCreditWatch(): Promise<void> {
15815
17220
  bot.api.sendMessage(chat_id, decision.message, {
15816
17221
  parse_mode: 'HTML',
15817
17222
  link_preview_options: { is_disabled: true },
17223
+ // Credit/quota warning is a system status notice — silence the
17224
+ // open ping (the user isn't waiting to tap anything).
17225
+ disable_notification: true,
15818
17226
  }),
15819
17227
  { chat_id, verb: 'credit-watch.notify' },
15820
17228
  )
@@ -15928,6 +17336,10 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
15928
17336
  accounts: listStateData.accounts,
15929
17337
  prev: fleetPrev,
15930
17338
  now,
17339
+ // #2478: the same staleness ceiling the per-account loop uses. Gates the
17340
+ // `entered` alert behind live corroboration so a probe blackout's stale
17341
+ // marks can't false-fire 🔴 All accounts exhausted.
17342
+ tuning,
15931
17343
  })
15932
17344
  if (fleetDecision.kind === 'notify') {
15933
17345
  for (const chat_id of access.allowFrom) {
@@ -15950,6 +17362,8 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
15950
17362
  bot.api.sendMessage(chat_id, fleetDecision.message, {
15951
17363
  parse_mode: 'HTML',
15952
17364
  link_preview_options: { is_disabled: true },
17365
+ // Quota status notice — silence the open ping.
17366
+ disable_notification: true,
15953
17367
  }),
15954
17368
  { chat_id, verb: 'quota-watch.fleet-all-exhausted' },
15955
17369
  )
@@ -16022,11 +17436,21 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
16022
17436
  // numbers for the notification message bodies. One batched RPC for all
16023
17437
  // crossing accounts (typically 1, rarely 2+).
16024
17438
  const crossingLabels = pendingTransitions.map(t => t.accountLabel)
16025
- let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
17439
+ // #2495 BLOCKER fix store the FULL entry (result + `served` tag), not just
17440
+ // `entry.result`. The corroboration gate below needs `served` to tell a true
17441
+ // live probe apart from a failed-probe cache fallback (which is `ok:true`
17442
+ // but `served:"cache"` — vacuous corroboration).
17443
+ let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]>()
16026
17444
  try {
16027
- const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
17445
+ // #2495 Change 3 forceLive bypasses the broker's probe-on-open TTL so the
17446
+ // DECISION to alarm is corroborated by a TRUE live probe, never a cache hit.
17447
+ // Only the transition-to-alarm pays for this; steady-state polls stay on the
17448
+ // cheap cached listState read (no probe). Honors the existing fleet/consumer
17449
+ // probe knobs upstream — this re-evaluation never fires without a detected
17450
+ // transition.
17451
+ const probeData = await brokerClient.probeQuota(crossingLabels, 8000, true)
16028
17452
  for (const entry of probeData.results) {
16029
- freshProbeMap.set(entry.label, entry.result)
17453
+ freshProbeMap.set(entry.label, entry)
16030
17454
  }
16031
17455
  } catch (err) {
16032
17456
  process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
@@ -16058,17 +17482,25 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
16058
17482
  for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
16059
17483
  // Re-evaluate with fresh probe data to get an accurate message body.
16060
17484
  // If the fresh probe succeeded, replace the snap's quota with live data.
16061
- const freshResult = freshProbeMap.get(accountLabel)
17485
+ const freshEntry = freshProbeMap.get(accountLabel)
16062
17486
  let enrichedDecision = decision
16063
17487
  // pendingTransitions only ever holds notify decisions (pushed under
16064
17488
  // `decision.kind !== 'skip'` / `!== 'reconcile'`). Narrow explicitly so
16065
17489
  // `decision.transition` type-checks below; this continue never fires
16066
17490
  // at runtime.
16067
17491
  if (decision.kind !== 'notify') continue
16068
- if (freshResult && freshResult.ok && snapIndex >= 0) {
17492
+ // #2495 BLOCKER fix only a GENUINE live probe corroborates the alarm. A
17493
+ // forceLive entry that is `ok:true` but `served:"cache"` (the broker's
17494
+ // failed-probe cache fallback) is NOT corroboration: the upstream probe
17495
+ // failed, so we have no live confirmation that the throttling crossing is
17496
+ // real right now. Treat it exactly like a probe failure → fall through to
17497
+ // the defer branch below (state untouched, re-evaluated next tick). This
17498
+ // also guarantees the "Live-probe corroborated (#2495)" footnote is only
17499
+ // ever stamped on a real live probe.
17500
+ if (isLiveCorroboration(freshEntry) && freshEntry!.result.ok && snapIndex >= 0) {
16069
17501
  // Live numbers replace the cache — and capturedAtMs is cleared so the
16070
17502
  // staleness gate never misfires on data we JUST probed.
16071
- const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data, capturedAtMs: undefined }
17503
+ const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshEntry!.result.data, capturedAtMs: undefined }
16072
17504
  const prev = watchState[accountLabel] ?? emptyAccountState()
16073
17505
  const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now, bootTick, tuning })
16074
17506
  // If the fresh probe still shows the same transition, use the
@@ -16153,6 +17585,8 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
16153
17585
  bot.api.sendMessage(chat_id, message, {
16154
17586
  parse_mode: 'HTML',
16155
17587
  link_preview_options: { is_disabled: true },
17588
+ // Quota throttling status notice — silence the open ping.
17589
+ disable_notification: true,
16156
17590
  }),
16157
17591
  { chat_id, verb: 'quota-watch.notify' },
16158
17592
  )
@@ -16424,7 +17858,13 @@ bot.command("auth", async ctx => {
16424
17858
  }
16425
17859
  return
16426
17860
  }
16427
- const text = ctx.message?.text ?? ""
17861
+ const rawText = ctx.message?.text ?? ""
17862
+ // `/auth demo` (and `/auth show demo` / `/auth list demo`) — the trailing
17863
+ // `demo` token masks account-email labels on the default dashboard view for
17864
+ // screen recordings. Strip it before parsing so `demo` isn't mistaken for a
17865
+ // verb/agent argument; it's honored only on the show/list path downstream.
17866
+ const authDemo = hasDemoFlag(getCommandArgs(ctx))
17867
+ const text = authDemo ? rawText.replace(/(\s+)demo\s*$/i, "") : rawText
16428
17868
  const parsed = parseAuthCommand(text)
16429
17869
  if (!parsed) return
16430
17870
  const currentAgent = getMyAgentName()
@@ -16486,11 +17926,12 @@ bot.command("auth", async ctx => {
16486
17926
  return
16487
17927
  }
16488
17928
  try {
16489
- const { loginUrl, scratchDir, child } = await startAccountAuthSession(parsed.label)
17929
+ const { loginUrl, scratchDir, tmuxSocket, tmuxSession } = await startAccountAuthSession(parsed.label)
16490
17930
  pendingAuthAddFlows.set(authAddKey, {
16491
17931
  label: parsed.label,
16492
17932
  scratchDir,
16493
- child,
17933
+ tmuxSocket,
17934
+ tmuxSession,
16494
17935
  startedAt: Date.now(),
16495
17936
  })
16496
17937
  await switchroomReply(
@@ -16522,6 +17963,7 @@ bot.command("auth", async ctx => {
16522
17963
  isAdmin,
16523
17964
  client,
16524
17965
  chatId,
17966
+ demo: authDemo,
16525
17967
  // Format 2 enricher — live quota probe via the broker (#1336).
16526
17968
  // Pre-broker this read `~/.switchroom/accounts/<label>/credentials.json`
16527
17969
  // off the agent's HOME, which post-RFC-H is never populated (broker
@@ -16533,18 +17975,27 @@ bot.command("auth", async ctx => {
16533
17975
  liveQuotas: async (accounts) => {
16534
17976
  try {
16535
17977
  const { results } = await client.probeQuota(accounts.map((a) => a.label))
17978
+ // #2495 Change 2 — the broker tags each result `served:"live"|"cache"`
17979
+ // (TTL hit or failed-probe fallback). When ANY account was served from
17980
+ // cache, surface the OLDEST snapshot's capturedAt so the card stamps
17981
+ // "⚠ cached Nm ago" instead of a false live stamp.
17982
+ let staleCachedAtMs: number | undefined
16536
17983
  // Preserve input order (broker also preserves it, but be defensive).
16537
- return accounts.map((a) => {
17984
+ const quotas = accounts.map((a) => {
16538
17985
  const hit = results.find((r) => r.label === a.label)
16539
17986
  if (!hit) return { ok: false as const, reason: "broker returned no result for account" }
17987
+ if (hit.served === 'cache' && hit.capturedAt != null) {
17988
+ staleCachedAtMs = staleCachedAtMs == null ? hit.capturedAt : Math.min(staleCachedAtMs, hit.capturedAt)
17989
+ }
16540
17990
  return hit.result
16541
17991
  })
17992
+ return { quotas, staleCachedAtMs }
16542
17993
  } catch (err) {
16543
17994
  // Surface a uniform per-account failure so the dashboard renders
16544
17995
  // gracefully (label badge stays UNKNOWN) instead of falling back
16545
17996
  // to the legacy table.
16546
17997
  const reason = `broker probe-quota failed: ${(err as Error)?.message ?? String(err)}`
16547
- return accounts.map(() => ({ ok: false as const, reason }))
17998
+ return { quotas: accounts.map(() => ({ ok: false as const, reason })) }
16548
17999
  }
16549
18000
  },
16550
18001
  tz: process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ,
@@ -18899,6 +20350,7 @@ bot.command('issues', async ctx => {
18899
20350
 
18900
20351
  bot.command('usage', async ctx => {
18901
20352
  if (!isAuthorizedSender(ctx)) return
20353
+ const demo = hasDemoFlag(getCommandArgs(ctx))
18902
20354
  // Format 2 path: enumerate every account in the broker's known set,
18903
20355
  // probe live quota in parallel, render the health-grouped snapshot.
18904
20356
  // Falls back to the legacy single-agent shape when the broker is
@@ -18911,9 +20363,17 @@ bot.command('usage', async ctx => {
18911
20363
  const state = await client.listState()
18912
20364
  if (state.accounts.length > 0) {
18913
20365
  // Broker-routed probe (#1336) — see gateway.ts:8910 for diagnosis.
20366
+ // #2495 Change 2 — the broker applies a probe-on-open TTL + single-
20367
+ // flight; a TTL-hit or failed-probe fallback is tagged served:"cache",
20368
+ // which we surface as a "⚠ cached Nm ago" footer instead of a false
20369
+ // live stamp.
18914
20370
  const probeResp = await client.probeQuota(state.accounts.map((a) => a.label)).catch(() => ({ results: [] }))
20371
+ let staleCachedAtMs: number | undefined
18915
20372
  const quotas = state.accounts.map((a) => {
18916
20373
  const hit = probeResp.results.find((r) => r.label === a.label)
20374
+ if (hit?.served === 'cache' && hit.capturedAt != null) {
20375
+ staleCachedAtMs = staleCachedAtMs == null ? hit.capturedAt : Math.min(staleCachedAtMs, hit.capturedAt)
20376
+ }
18917
20377
  return hit?.result ?? { ok: false as const, reason: 'broker returned no result for account' }
18918
20378
  })
18919
20379
  const { renderAuthSnapshotFormat2, buildSnapshotsFromState } = await import(
@@ -18924,7 +20384,8 @@ bot.command('usage', async ctx => {
18924
20384
  const text = renderAuthSnapshotFormat2(snapshots, {
18925
20385
  tz,
18926
20386
  now: new Date(),
18927
- liveProbedAtMs: Date.now(),
20387
+ demo,
20388
+ ...(staleCachedAtMs != null ? { staleCachedAtMs } : { liveProbedAtMs: Date.now() }),
18928
20389
  })
18929
20390
  await switchroomReply(ctx, text, { html: true })
18930
20391
  return
@@ -19091,13 +20552,14 @@ bot.command('version', async ctx => {
19091
20552
  // see at a glance what this agent is authorized for.
19092
20553
  bot.command('whoami', async ctx => {
19093
20554
  if (!isAuthorizedSender(ctx)) return
20555
+ const demo = hasDemoFlag(getCommandArgs(ctx))
19094
20556
  try {
19095
20557
  let raw: string
19096
20558
  try { raw = switchroomExecCombined(['config', 'whoami'], 10000) }
19097
20559
  catch (err: unknown) { raw = (err as any).stdout ?? (err as any).message ?? 'whoami failed' }
19098
20560
  const trimmed = stripAnsi(raw).trim()
19099
20561
  let card: string
19100
- try { card = formatWhoamiCard(JSON.parse(trimmed.split('\n').pop() ?? trimmed)) }
20562
+ try { card = formatWhoamiCard(JSON.parse(trimmed.split('\n').pop() ?? trimmed), demo) }
19101
20563
  catch { card = preBlock(formatSwitchroomOutput(trimmed || 'whoami: no output')) }
19102
20564
  await switchroomReply(ctx, card, { html: true })
19103
20565
  } catch (err: unknown) {
@@ -19105,14 +20567,17 @@ bot.command('whoami', async ctx => {
19105
20567
  }
19106
20568
  })
19107
20569
 
19108
- /** Compact HTML card from the `config whoami` JSON view. Names/booleans only. */
20570
+ /** Compact HTML card from the `config whoami` JSON view. Names/booleans only.
20571
+ * `demo` (the `/whoami demo` suffix) masks the vault key NAMES via maskVaultKey
20572
+ * for screen recordings — agent/MCP/model/skills topology is left untouched
20573
+ * (out of scope). Off by default. */
19109
20574
  function formatWhoamiCard(v: {
19110
20575
  name?: string; persona?: string | null; model?: string | null; tier?: string;
19111
20576
  tools?: { allow?: string[]; deny?: string[] }; mcpServers?: string[]; skills?: string[];
19112
20577
  vault?: { key: string; readable: boolean }[];
19113
20578
  powers?: { admin?: boolean; root?: boolean; configEdit?: boolean; crossAgentHostVerbs?: boolean };
19114
20579
  scheduleCount?: number; memoryBackend?: string | null;
19115
- }): string {
20580
+ }, demo = false): string {
19116
20581
  const esc = escapeHtmlForTg
19117
20582
  const yn = (b?: boolean) => (b ? '✓' : '✗')
19118
20583
  const lines: string[] = []
@@ -19125,7 +20590,7 @@ function formatWhoamiCard(v: {
19125
20590
  if ((v.mcpServers ?? []).length) lines.push(`MCP: ${esc(v.mcpServers!.join(', '))}`)
19126
20591
  if ((v.skills ?? []).length) lines.push(`Skills: ${esc(v.skills!.join(', '))}`)
19127
20592
  if ((v.vault ?? []).length) {
19128
- lines.push(`Vault keys (names only): ${v.vault!.map(k => `${esc(k.key)} ${yn(k.readable)}`).join(', ')}`)
20593
+ lines.push(`Vault keys (names only): ${v.vault!.map(k => `${esc(demo ? maskVaultKey(k.key) : k.key)} ${yn(k.readable)}`).join(', ')}`)
19129
20594
  }
19130
20595
  const p = v.powers ?? {}
19131
20596
  lines.push(`Powers: admin ${yn(p.admin)} · root ${yn(p.root)} · config-edit ${yn(p.configEdit)} · cross-agent verbs ${yn(p.crossAgentHostVerbs)}`)
@@ -19350,6 +20815,8 @@ bot.on('callback_query:data', async ctx => {
19350
20815
  await robustApiCall(() =>
19351
20816
  bot.api.editMessageText(args.chatId, args.messageId, args.text, {
19352
20817
  parse_mode: 'HTML',
20818
+ // Resolved on tap — strip the keyboard so it can't be re-tapped.
20819
+ ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
19353
20820
  }),
19354
20821
  )
19355
20822
  } catch {
@@ -19359,6 +20826,9 @@ bot.on('callback_query:data', async ctx => {
19359
20826
  log: (m) =>
19360
20827
  process.stderr.write(`telegram gateway: config-approval cb — ${m}\n`),
19361
20828
  },
20829
+ // Verify the per-card epoch from the callback_data against the live
20830
+ // pending entry — a stale tap (mismatched epoch) is rejected.
20831
+ parsed.epoch,
19362
20832
  )
19363
20833
  await ctx.answerCallbackQuery({
19364
20834
  text: resolved
@@ -21136,6 +22606,11 @@ async function shutdown(signal: string): Promise<void> {
21136
22606
  subagentWatcher?.stop()
21137
22607
  subagentWatcher = null
21138
22608
 
22609
+ // Worker-activity feed runs an internal heartbeat interval; stop it so no
22610
+ // re-render fires during drain (mirrors subagentWatcher above).
22611
+ workerActivityFeed?.stop()
22612
+ workerActivityFeed = null
22613
+
21139
22614
  // Issues watcher polls issues.jsonl on a setInterval (default 2s) and
21140
22615
  // edits the issues card on every tick. Without an explicit stop() the
21141
22616
  // poll keeps firing for the lifetime of the process and accumulates
@@ -21869,7 +23344,8 @@ void (async () => {
21869
23344
  // or the turn ended while it kept running — extended autonomous
21870
23345
  // work) is surfaced via the worker feed instead of vanishing.
21871
23346
  const orphanStatusEnabled = isOrphanSubagentStatusEnabled(process.env.SWITCHROOM_ORPHAN_SUBAGENT_STATUS)
21872
- const workerActivityFeed = createWorkerActivityFeed({
23347
+ workerActivityFeed?.stop()
23348
+ workerActivityFeed = createWorkerActivityFeed({
21873
23349
  bot: {
21874
23350
  sendMessage: async (cid, text, sendOpts) => {
21875
23351
  const sent = await robustApiCall(
@@ -22063,9 +23539,18 @@ void (async () => {
22063
23539
  const rendered = composeTurnActivity(turn)
22064
23540
  if (rendered != null) {
22065
23541
  turn.activityPendingRender = rendered
22066
- if (turn.activityInFlight == null) {
22067
- turn.activityInFlight = drainActivitySummary(turn)
23542
+ // PR-4a: routed through the emission-authority façade
23543
+ // (no-op delegate). Producer made explicit ('tool' — the
23544
+ // drain default this foreground sub-agent render used).
23545
+ const ea = emissionAuthorityFor(turn)
23546
+ // PR-4d: route through the centralized card-drain gate.
23547
+ cardDrainGate(turn, ea, () => {
23548
+ if (ea.mayDrain(turn)) {
23549
+ ea.openOrEditCard('tool', () => {
23550
+ turn.activityInFlight = drainActivitySummary(turn, 'tool')
23551
+ })
22068
23552
  }
23553
+ })
22069
23554
  }
22070
23555
  }
22071
23556
  return
@@ -22083,7 +23568,7 @@ void (async () => {
22083
23568
  orphanStatusEnabled,
22084
23569
  }) === 'worker-feed'
22085
23570
  ) {
22086
- void workerActivityFeed.finish(agentId, {
23571
+ void workerActivityFeed?.finish(agentId, {
22087
23572
  description: dispatch.feedDescription,
22088
23573
  lastTool: null,
22089
23574
  toolCount,
@@ -22100,7 +23585,7 @@ void (async () => {
22100
23585
  // 'orphan' is a stale boot row, not a fresh completion — map
22101
23586
  // it to 'done' so an already-posted message still finalizes.
22102
23587
  if (workerFeedEnabled) {
22103
- void workerActivityFeed.finish(agentId, {
23588
+ void workerActivityFeed?.finish(agentId, {
22104
23589
  description: dispatch.feedDescription,
22105
23590
  lastTool: null,
22106
23591
  toolCount,
@@ -22235,7 +23720,7 @@ void (async () => {
22235
23720
  })
22236
23721
  if (surface === 'worker-feed') {
22237
23722
  const origin = resolveSubagentOriginChat(agentId)
22238
- void workerActivityFeed.update(
23723
+ void workerActivityFeed?.update(
22239
23724
  agentId,
22240
23725
  origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
22241
23726
  {
@@ -22270,7 +23755,13 @@ void (async () => {
22270
23755
  // feed (the foreground blindspot) — mirroring the
22271
23756
  // main-turn activity feed, which surfaces both tool labels
22272
23757
  // and prose.
22273
- const child = (progressLine ?? latestSummary).trim().slice(0, 120)
23758
+ // Route through the SHARED clipNarrative so multi-line
23759
+ // narration first-line-collapses identically to the main
23760
+ // tier (the main path at showNarrativeStep already does
23761
+ // this). Previously this inlined `.trim().slice(0, 120)`
23762
+ // omitted the first-line collapse, so a multi-line
23763
+ // narrative rendered DIFFERENTLY here than on the main feed.
23764
+ const child = clipNarrative(progressLine ?? latestSummary)
22274
23765
  if (child.length === 0) return
22275
23766
  let narrative = turn.foregroundSubAgents.get(agentId)
22276
23767
  if (narrative == null) {
@@ -22288,13 +23779,70 @@ void (async () => {
22288
23779
  const rendered = composeTurnActivity(turn)
22289
23780
  if (rendered != null) {
22290
23781
  turn.activityPendingRender = rendered
22291
- if (turn.activityInFlight == null) {
22292
- turn.activityInFlight = drainActivitySummary(turn)
23782
+ // PR-4a: routed through the emission-authority façade (no-op
23783
+ // delegate). Producer made explicit ('tool' — the drain
23784
+ // default this foreground sub-agent render used).
23785
+ const ea = emissionAuthorityFor(turn)
23786
+ // PR-4d: route through the centralized card-drain gate.
23787
+ cardDrainGate(turn, ea, () => {
23788
+ if (ea.mayDrain(turn)) {
23789
+ ea.openOrEditCard('tool', () => {
23790
+ turn.activityInFlight = drainActivitySummary(turn, 'tool')
23791
+ })
23792
+ }
23793
+ })
23794
+ // A foreground sub-agent's nested activity IS user-visible
23795
+ // production — count it so the silence-poke clock resets,
23796
+ // exactly like the parent activity-render path (10665). Without
23797
+ // this, a long tools-only foreground sub-agent (no prose) lets
23798
+ // the 300s framework fallback (and the #2527 mid-turn floor)
23799
+ // measure silence against a turn that is visibly working,
23800
+ // risking a premature tear-down / unwanted liveness beat.
23801
+ // PR-4e — keyed liveness under the flag (a foreground
23802
+ // sub-agent's nested render for topic A is A's production).
23803
+ // Flag-OFF keeps the literal `currentTurn === turn`; flag-ON
23804
+ // resolves A by its own key.
23805
+ if (
23806
+ SILENCE_LIVENESS_PRODUCTION &&
23807
+ (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn)
23808
+ ) {
23809
+ silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
22293
23810
  }
22294
23811
  }
22295
23812
  return
22296
23813
  }
22297
23814
 
23815
+ // Fix 2 (post-answer background-agent liveness): when the
23816
+ // watcher surfaces a new step for a background worker, update
23817
+ // the current turn's `subagentActivityAt` timestamp IF the turn
23818
+ // has already delivered its substantive answer. This signal is
23819
+ // written HERE — NOT in the tool_label path — so the drop-guard
23820
+ // (`shouldReopenFeedAfterAck` / finalAnswerSubstantive) cannot
23821
+ // gate it. `feedHeartbeatTick`'s post-answer branch reads
23822
+ // `subagentActivityAt` (not `lastToolLabelAt`, which is frozen
23823
+ // after the answer) to decide whether to open a liveness card.
23824
+ // Only stamp when the turn is alive AND post-answer: pre-answer
23825
+ // activity is already surfaced by the normal tool-label feed.
23826
+ //
23827
+ // SCOPE — this is the IN-TURN-WINDOW surface only. The
23828
+ // `feedHeartbeatTick` post-answer card is driven off `currentTurn`,
23829
+ // which `endCurrentTurnAtomic` nulls at `turn_end`. A genuinely
23830
+ // DECOUPLED background worker keeps running PAST the parent
23831
+ // turn's teardown, so `currentTurn` is null when its later
23832
+ // onProgress ticks arrive → this stamp is inert and the
23833
+ // heartbeat is silent for that worker. That is BY DESIGN, not a
23834
+ // gap: a decoupled worker's ongoing activity is surfaced by the
23835
+ // dedicated, currentTurn-independent `workerActivityFeed` (the
23836
+ // edit-in-place worker message, driven below at `workerFeedEnabled`
23837
+ // and bounded by its own non-running/`finish` teardown). So the
23838
+ // currentTurn card covers the brief post-answer/pre-teardown
23839
+ // window; the worker feed covers everything after teardown. Both
23840
+ // are proven in telegram-activity-visibility-integration.test.ts.
23841
+ const stampTurn = currentTurn
23842
+ if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
23843
+ stampTurn.subagentActivityAt = Date.now()
23844
+ }
23845
+
22298
23846
  // #PR2 live worker-feed: when ON, the worker's live chat
22299
23847
  // message owns the progress beat. Push a running cue and
22300
23848
  // return BEFORE the legacy bucket relay so the same activity
@@ -22306,7 +23854,7 @@ void (async () => {
22306
23854
  // is gone — see resolveSubagentOriginChat).
22307
23855
  if (workerFeedEnabled) {
22308
23856
  const origin = resolveSubagentOriginChat(agentId)
22309
- void workerActivityFeed.update(
23857
+ void workerActivityFeed?.update(
22310
23858
  agentId,
22311
23859
  origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
22312
23860
  {
@@ -22370,11 +23918,9 @@ void (async () => {
22370
23918
 
22371
23919
  // Lane state (post flash-decouple): VISIBLE only when the visible flag is
22372
23920
  // Lane state from the single-source-of-truth resolver: 'visible' (preview
22373
- // on), 'draft' (compose-box transport), or 'dormant' (the default: no
22374
- // preview, no draft — reply tool is the only message). The old label
22375
- // wrongly reported 'visible(draft-retired)' for the dormant default, which
22376
- // masked the flash regression.
22377
- process.stderr.write(`telegram gateway: answer-stream lane=${ANSWER_LANE.state} draftFn=${sendMessageDraftFn != null ? 'available' : 'off'} visible=${ANSWER_STREAM_VISIBLE_ENABLED} draftRetired=${DRAFT_ANSWER_LANE_RETIRED} grammy=${GRAMMY_VERSION}\n`)
23921
+ // Lane state: 'visible' (opt-in preview) or 'dormant' (default: reply
23922
+ // tool is the only message). The draft transport is permanently retired.
23923
+ process.stderr.write(`telegram gateway: answer-stream lane=${ANSWER_LANE.state} visible=${ANSWER_STREAM_VISIBLE_ENABLED} grammy=${GRAMMY_VERSION}\n`)
22378
23924
  process.stderr.write(`telegram gateway: starting bot polling pid=${process.pid} agent=${process.env.SWITCHROOM_AGENT_NAME ?? '-'} stateDir=${STATE_DIR} historyEnabled=${HISTORY_ENABLED} streamMode=${process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'}\n`)
22379
23925
  runnerHandle = run(bot, {
22380
23926
  runner: {