switchroom 0.15.45 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +56 -15
  2. package/dist/auth-broker/index.js +383 -97
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +7 -4
  5. package/dist/cli/notion-write-pretool.mjs +35 -4
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/switchroom.js +2894 -841
  9. package/dist/host-control/main.js +2685 -207
  10. package/dist/vault/approvals/kernel-server.js +7453 -7413
  11. package/dist/vault/broker/server.js +11428 -11388
  12. package/examples/minimal.yaml +1 -0
  13. package/examples/switchroom.yaml +1 -0
  14. package/package.json +3 -3
  15. package/profiles/_base/start.sh.hbs +97 -1
  16. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  17. package/profiles/default/CLAUDE.md.hbs +0 -19
  18. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  19. package/telegram-plugin/answer-stream-flag.ts +12 -49
  20. package/telegram-plugin/answer-stream.ts +5 -150
  21. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  22. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  23. package/telegram-plugin/context-exhaustion.ts +12 -0
  24. package/telegram-plugin/demo-mask.ts +154 -0
  25. package/telegram-plugin/dist/bridge/bridge.js +55 -12
  26. package/telegram-plugin/dist/gateway/gateway.js +2938 -977
  27. package/telegram-plugin/dist/server.js +55 -12
  28. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  29. package/telegram-plugin/draft-stream.ts +47 -410
  30. package/telegram-plugin/final-answer-detect.ts +17 -12
  31. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  32. package/telegram-plugin/format.ts +56 -19
  33. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  34. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  35. package/telegram-plugin/gateway/auth-command.ts +70 -14
  36. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  37. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  38. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  39. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  40. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  41. package/telegram-plugin/gateway/effort-command.ts +8 -3
  42. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  43. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  44. package/telegram-plugin/gateway/gateway.ts +1857 -292
  45. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  46. package/telegram-plugin/gateway/model-command.ts +115 -4
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-command.test.ts +134 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -75,7 +75,8 @@ import {
75
75
  } from './permission-timeout.js'
76
76
  import { pickRecoveredPermissionOrigin } from './permission-card-origin.js'
77
77
  import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
78
- import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
78
+ import { appendActivityLabel, clipNarrative, renderActivityFeedWithNested, type SessionActivityHeader } from '../tool-activity-summary.js'
79
+ import { REPLY_TOOLS, isDraftOfReply } from '../narrative-dedup.js'
79
80
  import { toolLabel } from '../tool-labels.js'
80
81
  import { createTypingWrapper } from '../typing-wrap.js'
81
82
  import { type DraftStreamHandle } from '../draft-stream.js'
@@ -97,15 +98,16 @@ import {
97
98
  shutdownAnalytics,
98
99
  } from '../analytics-posthog.js'
99
100
  import { emitRuntimeMetric } from '../runtime-metrics.js'
100
- import { decideOverPing } from '../over-ping-safety-net.js'
101
+ import { decideOverPing, type OverPingDecision } from '../over-ping-safety-net.js'
101
102
  import { decideSilentReplyAnchor } from '../silent-reply-anchor.js'
102
103
  import { classifyInbound } from '../inbound-classifier.js'
103
104
  import * as silencePoke from '../silence-poke.js'
104
105
  import * as pendingProgress from '../pending-work-progress.js'
105
106
  import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
106
- import { isFinalAnswerReply, isSubstantiveFinalReply } from '../final-answer-detect.js'
107
+ import { isFinalAnswerReply, isSubstantiveFinalReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
108
+ import { deriveTurnRole, decideTerminalReason, parsePostAnswerLivenessMs, evaluatePostAnswerLiveness, type LoopRole } from '../turn-liveness-floor.js'
107
109
  import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
108
- import { parseVisibleAnswerStreamEnabled, parseDraftLaneRetiredEnabled, resolveAnswerLaneConfig } from '../answer-stream-flag.js'
110
+ import { parseVisibleAnswerStreamEnabled, resolveAnswerLaneConfig } from '../answer-stream-flag.js'
109
111
  import { type SessionEvent } from '../session-tail.js'
110
112
  import {
111
113
  shouldSuppressToolActivity,
@@ -132,6 +134,7 @@ import {
132
134
  } from './microsoft-connect-flow.js'
133
135
  import { resolveAuthBrokerSocketPath } from '../../src/auth/broker/client.js'
134
136
  import { createFleetFallbackGate } from '../fleet-fallback-gate.js'
137
+ import { createFleetFallbackResumeGate } from '../fleet-fallback-resume.js'
135
138
  import { resolveExhaustUntil } from './exhaust-until.js'
136
139
  import {
137
140
  pendingAuthAddFlows,
@@ -165,7 +168,7 @@ import {
165
168
  formatModelUnavailableCard,
166
169
  resolveModelUnavailableFromOperatorEvent,
167
170
  } from '../model-unavailable.js'
168
- import { runFleetAutoFallback, renderFallbackFailureNotice, evaluateFallbackFailureNotice, type FallbackFailureNoticeState } from '../auto-fallback-fleet.js'
171
+ import { runFleetAutoFallback, renderFallbackFailureNotice, evaluateFallbackFailureNotice, evaluateAllBlockedNotice, type FallbackFailureNoticeState, type FallbackAllBlockedNoticeState } from '../auto-fallback-fleet.js'
169
172
  import { startRestartWatchdog } from './restart-watchdog.js'
170
173
  import { validateStringArray } from './access-validator.js'
171
174
 
@@ -221,6 +224,7 @@ import {
221
224
  isContextExhaustionText,
222
225
  shouldArmOrphanedReplyTimeout,
223
226
  ORPHANED_REPLY_TIMEOUT_MS,
227
+ ORPHANED_REPLY_MAX_REARMS,
224
228
  } from '../context-exhaustion.js'
225
229
  import {
226
230
  decideTurnFlush,
@@ -326,11 +330,24 @@ import {
326
330
  } from './obligation-ledger.js'
327
331
  import { loadObligations, persistObligations } from './obligation-store.js'
328
332
  import { driveEscalation } from './escalation-drive.js'
333
+ import { shouldSuppressRepresent } from './represent-guard.js'
329
334
  import { createInboundSpool } from './inbound-spool.js'
330
335
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
331
336
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
332
337
  import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
333
338
  import { decideFeedReopen } from './feed-reopen-gate.js'
339
+ import {
340
+ mayOpenActivityCard,
341
+ computeCrossTurnAnswerDelivered,
342
+ type FeedOpenProducer,
343
+ type FeedOpenGateDeps,
344
+ } from './feed-open-gate.js'
345
+ import {
346
+ EmissionAuthority,
347
+ EMISSION_AUTHORITY_ENABLED,
348
+ type CardDrainGateCtx,
349
+ } from './emission-authority.js'
350
+ import { CurrentTurnMap } from './current-turn-map.js'
334
351
  import { resolveAnswerThreadId } from './answer-thread-resolve.js'
335
352
  import {
336
353
  createDeliveryQueue,
@@ -411,6 +428,7 @@ import {
411
428
  // preceding shutdown only" semantics.
412
429
  clearCleanShutdownMarker,
413
430
  shouldSuppressRecoveryBanner,
431
+ shouldSuppressBootResume,
414
432
  resolveShutdownMarker,
415
433
  DEFAULT_MAX_AGE_MS as CLEAN_SHUTDOWN_MAX_AGE_MS,
416
434
  } from './clean-shutdown-marker.js'
@@ -468,8 +486,10 @@ import {
468
486
  resolveQuotaWatchTuning,
469
487
  buildQuotaClaimKey,
470
488
  QUOTA_WATCH_CLAIM_WINDOW_MS,
489
+ isLiveCorroboration,
471
490
  } from '../quota-watch.js'
472
491
  import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
492
+ import { maskUsername, maskVaultKey } from '../demo-mask.js'
473
493
  import {
474
494
  writeTurnActiveMarker,
475
495
  touchTurnActiveMarker,
@@ -739,20 +759,6 @@ const AGENT_ADMIN = process.env.SWITCHROOM_AGENT_ADMIN === 'true'
739
759
  const bot = new Bot(TOKEN)
740
760
  installTgPostLogger(bot)
741
761
 
742
- // Draft-answer-lane retirement (2026-06-05): default RETIRED so the live answer
743
- // lane uses a real, mtcute-observable message instead of the invisible
744
- // compose-box draft. Declared HERE (above the boot-probe block) because
745
- // `sendMessageDraftFn` below reads it — keep it above its first use to avoid a
746
- // temporal-dead-zone ReferenceError at boot. Kill switch
747
- // SWITCHROOM_DRAFT_ANSWER_LANE=0 restores the legacy draft.
748
- const DRAFT_ANSWER_LANE_RETIRED = parseDraftLaneRetiredEnabled(process.env.SWITCHROOM_DRAFT_ANSWER_LANE)
749
-
750
- // ─── sendMessageDraft boot probe ──────────────────────────────────────────
751
- // grammY 1.x exposes all Telegram Bot API methods through bot.api.raw.
752
- // bot.api.sendMessageDraft (the typed wrapper) takes chat_id as number, but
753
- // answer-stream passes chatId as string, so we bridge through raw with an
754
- // explicit Number() cast and positional → object param translation.
755
- const _rawSendMessageDraft = (bot.api.raw as unknown as Record<string, unknown>).sendMessageDraft
756
762
  const GRAMMY_VERSION: string = (() => {
757
763
  try {
758
764
  const raw = readFileSync(new URL('../../node_modules/grammy/package.json', import.meta.url), 'utf8')
@@ -761,22 +767,6 @@ const GRAMMY_VERSION: string = (() => {
761
767
  return 'unknown'
762
768
  }
763
769
  })()
764
- const sendMessageDraftFn: (
765
- (chatId: string, draftId: number, text: string, params?: { message_thread_id?: number; parse_mode?: 'HTML' }) => Promise<unknown>
766
- ) | undefined =
767
- // When the draft lane is retired (default), force this undefined so BOTH
768
- // consumers (the answer-stream config + the stream_reply handler) drop the
769
- // draft transport and fall back to visible message transport — the single
770
- // chokepoint for the retirement.
771
- !DRAFT_ANSWER_LANE_RETIRED && typeof _rawSendMessageDraft === 'function'
772
- ? (chatId, draftId, text, params) =>
773
- (_rawSendMessageDraft as (args: Record<string, unknown>) => Promise<unknown>)({
774
- chat_id: Number(chatId),
775
- draft_id: draftId,
776
- text,
777
- ...(params ?? {}),
778
- })
779
- : undefined
780
770
 
781
771
  // ─── sendChecklist / editMessageChecklist boot probes ─────────────────────
782
772
  // grammY 1.x exposes new Telegram Bot API methods via bot.api.raw before the
@@ -1157,43 +1147,70 @@ try {
1157
1147
  const pending = findLatestTurnIfInterrupted(turnsDb)
1158
1148
  const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
1159
1149
  if (pending != null && selfAgent) {
1160
- // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
1161
- // interrupted work older than RESUME_MAX_AGE_MS selectResumeBuilder
1162
- // downgrades a stale 'resume' to the passive 'report' so the user is told
1163
- // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
1164
- // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
1165
- const RESUME_MAX_AGE_MS = (() => {
1166
- const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
1167
- return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
1168
- })()
1169
- const kind = selectResumeBuilder(pending.ended_via, {
1170
- ageMs: Math.max(0, Date.now() - pending.started_at),
1171
- maxAgeMs: RESUME_MAX_AGE_MS,
1150
+ // Clean-shutdown gate: suppress auto-resume when the prior shutdown was
1151
+ // operator/roll/CLI-initiated (clean). A clean-shutdown marker present and
1152
+ // fresh means the agent was asked to stop; the "interrupted" turn was
1153
+ // abandoned by that decision. Replaying it on every planned restart wastes
1154
+ // subscription quota for no user benefit. Only unclean exits (crash/OOM/
1155
+ // unexpected kill) should auto-resume.
1156
+ //
1157
+ // NOTE: GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH is defined lower in this file
1158
+ // (module-init order); we compute the path inline here using the same
1159
+ // formula so we can read it at boot-resume time.
1160
+ // SWITCHROOM_BOOT_RESUME_ALWAYS=1 is an escape hatch that restores
1161
+ // unconditional resume if needed.
1162
+ const bootResumeMarkerPath =
1163
+ process.env.SWITCHROOM_GATEWAY_CLEAN_SHUTDOWN_MARKER ?? join(STATE_DIR, 'clean-shutdown.json')
1164
+ const bootResumeCleanMarker = readCleanShutdownMarker(bootResumeMarkerPath)
1165
+ const bootResumeForceAlways = process.env.SWITCHROOM_BOOT_RESUME_ALWAYS === '1'
1166
+ const bootResumeSuppressed = shouldSuppressBootResume(bootResumeCleanMarker, Date.now(), {
1167
+ forceAlways: bootResumeForceAlways,
1172
1168
  })
1173
- if (kind === 'resume') {
1174
- bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
1175
- } else if (kind === 'report') {
1176
- // idleMs: this boot's measured marker age if it just classified this
1177
- // turn; otherwise recover it from the persisted interrupt_reason (a
1178
- // later boot, marker already swept); else fall back to total runtime.
1179
- let idleMs = pending.turn_key === timeoutTurnKey && markerAgeMs != null ? markerAgeMs : null
1180
- if (idleMs == null && pending.interrupt_reason) {
1181
- try {
1182
- const parsed = JSON.parse(pending.interrupt_reason) as { idleMs?: unknown }
1183
- if (typeof parsed.idleMs === 'number' && Number.isFinite(parsed.idleMs)) idleMs = parsed.idleMs
1184
- } catch { /* malformed snapshot — fall through */ }
1185
- }
1186
- if (idleMs == null) idleMs = Math.max(0, Date.now() - pending.started_at)
1187
- bootResumeInbound = {
1188
- agent: selfAgent,
1189
- msg: buildResumeWatchdogReportInbound({ turn: pending, idleMs }),
1190
- }
1191
- }
1192
- if (bootResumeInbound != null) {
1169
+ if (bootResumeSuppressed) {
1193
1170
  process.stderr.write(
1194
- `telegram gateway: boot-resume queued kind=${kind} turnKey=${pending.turn_key} ` +
1195
- `endedVia=${pending.ended_via ?? 'open'} chat=${pending.chat_id}\n`,
1171
+ `telegram gateway: boot-resume suppressed (clean shutdown` +
1172
+ `${bootResumeCleanMarker?.reason ? ` reason=${JSON.stringify(bootResumeCleanMarker.reason)}` : ''}` +
1173
+ `) — unclean exits still resume turnKey=${pending.turn_key}\n`,
1196
1174
  )
1175
+ } else {
1176
+ // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
1177
+ // interrupted work older than RESUME_MAX_AGE_MS — selectResumeBuilder
1178
+ // downgrades a stale 'resume' to the passive 'report' so the user is told
1179
+ // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
1180
+ // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
1181
+ const RESUME_MAX_AGE_MS = (() => {
1182
+ const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
1183
+ return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
1184
+ })()
1185
+ const kind = selectResumeBuilder(pending.ended_via, {
1186
+ ageMs: Math.max(0, Date.now() - pending.started_at),
1187
+ maxAgeMs: RESUME_MAX_AGE_MS,
1188
+ })
1189
+ if (kind === 'resume') {
1190
+ bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
1191
+ } else if (kind === 'report') {
1192
+ // idleMs: this boot's measured marker age if it just classified this
1193
+ // turn; otherwise recover it from the persisted interrupt_reason (a
1194
+ // later boot, marker already swept); else fall back to total runtime.
1195
+ let idleMs = pending.turn_key === timeoutTurnKey && markerAgeMs != null ? markerAgeMs : null
1196
+ if (idleMs == null && pending.interrupt_reason) {
1197
+ try {
1198
+ const parsed = JSON.parse(pending.interrupt_reason) as { idleMs?: unknown }
1199
+ if (typeof parsed.idleMs === 'number' && Number.isFinite(parsed.idleMs)) idleMs = parsed.idleMs
1200
+ } catch { /* malformed snapshot — fall through */ }
1201
+ }
1202
+ if (idleMs == null) idleMs = Math.max(0, Date.now() - pending.started_at)
1203
+ bootResumeInbound = {
1204
+ agent: selfAgent,
1205
+ msg: buildResumeWatchdogReportInbound({ turn: pending, idleMs }),
1206
+ }
1207
+ }
1208
+ if (bootResumeInbound != null) {
1209
+ process.stderr.write(
1210
+ `telegram gateway: boot-resume queued kind=${kind} turnKey=${pending.turn_key} ` +
1211
+ `endedVia=${pending.ended_via ?? 'open'} chat=${pending.chat_id}\n`,
1212
+ )
1213
+ }
1197
1214
  }
1198
1215
  }
1199
1216
 
@@ -1425,6 +1442,21 @@ const activeTurnStartedAt = new Map<string, number>()
1425
1442
  // reading activeTurnStartedAt because they want the receipt timestamp.
1426
1443
  const claudeBusyKeys = new Set<string>()
1427
1444
 
1445
+ /**
1446
+ * #2527 observability: count emoji transitions per status-reaction controller
1447
+ * so `turn_no_reply_warn` can report how many reaction changes happened while
1448
+ * producing zero text. Keyed by statusKey(chatId, threadId); cleared in
1449
+ * purgeReactionTracking alongside the controller itself.
1450
+ */
1451
+ const reactionTransitionCounts = new Map<string, number>()
1452
+
1453
+ /**
1454
+ * #2527 observability: tracks which (chatId:threadId) keys have already emitted
1455
+ * a `turn_reply_timing` event this turn so we only fire it on the FIRST text
1456
+ * reply. Cleared in purgeReactionTracking at turn-end alongside the controller.
1457
+ */
1458
+ const firstTextReplyLogged = new Set<string>()
1459
+
1428
1460
  /**
1429
1461
  * Helper: stamp a claudeBusyKeys entry for an inbound about to be
1430
1462
  * handed to claude. Pulls the thread id from the top-level field if
@@ -1487,6 +1519,19 @@ const deliveryQueue = createDeliveryQueue<InboundMessage>()
1487
1519
  // SWITCHROOM_OBLIGATION_LEDGER=0 → every hook below is a no-op → zero change.
1488
1520
  const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== '0'
1489
1521
  const OBLIGATION_REPRESENT_MAX = 2
1522
+ // Minimum reply length (chars) the duplicate-represent guard (#2472/#2474) treats
1523
+ // as "the user was answered". DECOUPLED from the escalate branch's 200-char proxy:
1524
+ // for the represent guard ANY genuine assistant reply — even a terse "Yes — done."
1525
+ // — satisfies the obligation, so suppressing the duplicate re-ask must not require
1526
+ // 200 chars. Default 1 (any non-empty real reply; empty/whitespace rows are
1527
+ // clamped out inside hasOutboundDeliveredSince). Override via env for tuning. Safe
1528
+ // because only recordOutbound writes role='assistant' rows — progress-card edits
1529
+ // and typing indicators are never counted.
1530
+ const OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS = (() => {
1531
+ const raw = process.env.SWITCHROOM_OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS
1532
+ const n = raw != null ? Number.parseInt(raw, 10) : NaN
1533
+ return Number.isFinite(n) && n >= 1 ? n : 1
1534
+ })()
1490
1535
  const OBLIGATION_SWEEP_MS = 5_000
1491
1536
  // Bound on escalation SEND attempts. The escalation now closes only AFTER a
1492
1537
  // successful send (a transient failure stays OPEN and retries next sweep), so a
@@ -1710,6 +1755,38 @@ const FEED_REOPEN_AFTER_ACK_ENABLED =
1710
1755
  const FEED_HEARTBEAT_ENABLED = process.env.SWITCHROOM_FEED_HEARTBEAT !== '0'
1711
1756
  const FEED_HEARTBEAT_TICK_MS = 6_000
1712
1757
  const FEED_HEARTBEAT_MIN_STALE_MS = 6_000
1758
+ // Liveness-driven feed open. The activity feed is otherwise TOOL-driven — it
1759
+ // opens only when a tool emits a non-null label. A turn dominated by thinking
1760
+ // or by suppressed-by-design tools (typing / memory recall / reply) emits no
1761
+ // label, so the feed never opens and a long turn reads as pure silence until
1762
+ // the 300s silence-poke (the #680 dark-turn). When a turn has been alive >=
1763
+ // FEED_LIVENESS_OPEN_MS with no feed yet, open a minimal "Working…" feed so the
1764
+ // user always has a live indicator; the first real tool label edits it with
1765
+ // real content. Runs on the heartbeat interval, so the effective open lands in
1766
+ // [threshold, threshold + FEED_HEARTBEAT_TICK_MS). Kill switch:
1767
+ // SWITCHROOM_FEED_LIVENESS_OPEN=0. Default on.
1768
+ const FEED_LIVENESS_OPEN_ENABLED = process.env.SWITCHROOM_FEED_LIVENESS_OPEN !== '0'
1769
+ const FEED_LIVENESS_OPEN_MS = (() => {
1770
+ const raw = process.env.SWITCHROOM_FEED_LIVENESS_OPEN_MS
1771
+ const n = raw ? Number(raw) : NaN
1772
+ return Number.isFinite(n) && n > 0 ? n : 12_000
1773
+ })()
1774
+
1775
+ // Post-answer background-agent liveness STALENESS CAP (Fix 2 / #2587 supersede,
1776
+ // concern 3). The `feedHeartbeatTick` post-answer branch re-renders a "background
1777
+ // agent still working" card every FEED_HEARTBEAT_TICK_MS while the sub-agent
1778
+ // watcher keeps advancing `turn.subagentActivityAt`. Without a cap that card kept
1779
+ // emitting `state:'running'` with an ever-climbing `elapsed` FOREVER — even after
1780
+ // the worker's `onFinish` froze the timestamp — because (unlike the pre-answer
1781
+ // path's `FEED_LIVENESS_OPEN_MS` recency cap) the post-answer branch had no
1782
+ // staleness bound. This cap mirrors that pre-answer pattern: once the worker's
1783
+ // last advance is older than the cap, the heartbeat stops re-rendering and the
1784
+ // card freezes at its last state. Parsed via the same pure `parsePostAnswerLivenessMs`
1785
+ // helper (positive int or 0); `|| 30_000` supplies a default-ON 30s cap, so an
1786
+ // unset env keeps the cap active. Override with SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS.
1787
+ const POST_ANSWER_LIVENESS_STALE_MS = parsePostAnswerLivenessMs(
1788
+ process.env.SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS,
1789
+ ) || 30_000
1713
1790
 
1714
1791
  /** Compact mm/ss-ish elapsed for the live feed suffix: "18s", "1m05s". */
1715
1792
  function formatFeedElapsed(ms: number): string {
@@ -1884,6 +1961,27 @@ type CurrentTurn = {
1884
1961
  sourceMessageId: number | null
1885
1962
  startedAt: number
1886
1963
  gatewayReceiveAt: number
1964
+ // #2527 — the single turn-provenance discriminator, stamped once at
1965
+ // enqueue from the channel envelope `source`. `user` (a human is waiting:
1966
+ // never-silent guarantee + mid-turn floor), `system` (cron/scheduled:
1967
+ // silence is legitimate). The gateway never builds a turn atom for a
1968
+ // sub-agent, so `sub-agent` never appears here. Read by the mid-turn floor
1969
+ // eligibility and the role-aware terminal reaction gate. Replaces the
1970
+ // scattered `chatType`/`chatId==null`/`source==='cron'` predicates.
1971
+ role: LoopRole
1972
+ // PR1 (cross-turn stale-card guard, design `docs/message-emission-determinism.md`
1973
+ // §9 lever 4 / race C/D). Present ONLY when this turn is a cross-turn SYNTHETIC
1974
+ // surface whose card OPEN must be gated against an answer already delivered in
1975
+ // an EARLIER turn — i.e. an `obligation_represent` re-delivery (and the
1976
+ // liveness/heartbeat timer firing on it). `sinceMs` is the obligation's
1977
+ // `openedAt` — the moment the obligation was RAISED — so the card-OPEN gate
1978
+ // asks `hasOutboundDeliveredSince(chat, openedAt)`: did a substantive answer
1979
+ // already land since then? Stamped at the turn ctor from a pending marker that
1980
+ // `obligationSweep` writes when it pushes the represent inbound (see
1981
+ // `pendingCrossTurnGate`). `undefined` for a normal foreground turn → the
1982
+ // cross-turn lever-4 gate is inert there (the foreground turn's own card is
1983
+ // governed only by the per-turn `finalAnswerEverDelivered` latch, lever 1).
1984
+ crossTurnGate?: { sinceMs: number }
1887
1985
  replyCalled: boolean
1888
1986
  // #1664 — whether the model has delivered its *final answer* this turn
1889
1987
  // (as opposed to only an interim ack). `replyCalled` flips on the first
@@ -1912,6 +2010,18 @@ type CurrentTurn = {
1912
2010
  // Reset to false on every fresh-turn enqueue alongside
1913
2011
  // `finalAnswerDelivered`.
1914
2012
  finalAnswerSubstantive: boolean
2013
+ // Sticky "a substantive final answer has been delivered this turn" latch
2014
+ // (design `docs/message-emission-determinism.md` §9 preamble / R0). Distinct
2015
+ // from the MUTABLE `finalAnswerDelivered`, which the ack-reopen path clears
2016
+ // mid-turn (`feed-reopen-gate.ts:157`) so an "On it…" ack keeps a live feed
2017
+ // (#2141). Ordering gates (the no-OPEN-after-final card gate, lever 1) MUST
2018
+ // key on this sticky latch, not the mutable flag — keying on the mutable flag
2019
+ // is a no-op on exactly the ack-first turn where the reorder originates. Set
2020
+ // true ONLY at the points that set `finalAnswerDelivered = true` AND only when
2021
+ // the reply is `isSubstantiveFinalReply`; NEVER cleared by reopen. Reset to
2022
+ // false ONLY at turn start, mirroring `activityEverOpened`'s sticky-true
2023
+ // contract.
2024
+ finalAnswerEverDelivered: boolean
1915
2025
  // #1675 (over-ping safety net): wall-clock ms of the first reply
1916
2026
  // this turn that landed with `disable_notification: false` (a real
1917
2027
  // device ping). The conversational-pacing contract
@@ -1923,6 +2033,18 @@ type CurrentTurn = {
1923
2033
  // the framework. Null until the first ping lands. Reset on every
1924
2034
  // fresh-turn enqueue.
1925
2035
  firstPingAt: number | null
2036
+ // Notification ownership (R8 / PR-2 — design `docs/message-emission-
2037
+ // determinism.md` §over-ping). Whether the send that CLAIMED this turn's
2038
+ // ping slot (`firstPingAt`) was itself a *substantive* final answer
2039
+ // (`isSubstantiveFinalReply`) as opposed to a short interim ACK. The
2040
+ // over-ping safety net keys on this so a substantive answer pinging AFTER
2041
+ // an ack already pinged is UPGRADED (let through, owns the slot) rather
2042
+ // than silenced — otherwise "the reply is last but the phone never buzzed
2043
+ // for the answer." Set ATOMICALLY with `firstPingAt` (same synchronous
2044
+ // block, no await between) on a claim/upgrade so a racing second reply
2045
+ // reads a consistent pair. Init false; reset to false on every fresh-turn
2046
+ // enqueue alongside `firstPingAt`.
2047
+ firstPingWasSubstantive: boolean
1926
2048
  // #1677 silent-reply auto-edit. The first silent reply of a turn
1927
2049
  // captures `silentAnchorMessageId` + `silentAnchorText`; subsequent
1928
2050
  // silent replies in the SAME turn editMessageText that anchor
@@ -1935,6 +2057,13 @@ type CurrentTurn = {
1935
2057
  silentAnchorText: string
1936
2058
  capturedText: string[]
1937
2059
  orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
2060
+ // How many times the orphaned-reply backstop timer has been re-armed
2061
+ // mid-tool-call instead of firing a synthetic turn_end. Bounded so a
2062
+ // genuinely wedged single long-running tool still surfaces: the cap is
2063
+ // ORPHANED_REPLY_MAX_REARMS (20 × 30 s = 10 min of genuine tool activity).
2064
+ // Reset to 0 on a fresh enqueue; NOT reset on text/tool_label re-arms —
2065
+ // only a new turn resets the budget.
2066
+ orphanedReplyRearmCount: number
1938
2067
  // Component 3 (turn-origin reply routing). A stable per-turn identity,
1939
2068
  // `${registryKey-or-chatKey}#${startedAt}`, assigned when the turn
1940
2069
  // starts and stamped into the inbound meta (`origin_turn_id`) so a reply
@@ -1962,6 +2091,14 @@ type CurrentTurn = {
1962
2091
  // Phase 1 of #332: count of tool_use events in the current turn, for
1963
2092
  // the tool_call_count column in the turns registry.
1964
2093
  toolCallCount: number
2094
+ // Count of tool_label events that passed the isTelegramSurfaceTool guard
2095
+ // this turn — the deterministic, surface-tool-excluded step count used by
2096
+ // the `✓ N steps` activity-feed total and the `tools=` lifecycle log.
2097
+ // Incremented in `case 'tool_label':` AFTER the surface-tool guard so
2098
+ // reply/stream_reply/edit_message/react are never counted. send_typing and
2099
+ // sync_retain are suppressed at the hook (computeLabel returns null) and
2100
+ // never arrive as tool_label events — excluded automatically.
2101
+ labeledToolCount: number
1965
2102
  // Tool-activity summary — mirrors Claude Code's native chat-UI
1966
2103
  // rendering ("Ran 5 commands, read a file"). Counters are
1967
2104
  // incremented in `case 'tool_use'`; `activityMessageId` holds the
@@ -2002,11 +2139,45 @@ type CurrentTurn = {
2002
2139
  // step that emits no new label doesn't read as frozen (the feed is otherwise
2003
2140
  // pull-only). undefined until the first label of the turn renders.
2004
2141
  lastToolLabelAt?: number
2142
+ // Fix 2 (post-answer background-agent liveness): wall-clock timestamp last
2143
+ // updated by the sub-agent/workflow watcher's onProgress callback whenever
2144
+ // it surfaces a NEW sub-agent step AFTER this turn's substantive answer was
2145
+ // delivered. Written INDEPENDENTLY of the tool_label path so the drop-guard
2146
+ // (`shouldReopenFeedAfterAck` / `finalAnswerSubstantive`) cannot gate it.
2147
+ // `feedHeartbeatTick` reads THIS (not `lastToolLabelAt`, which is frozen by
2148
+ // the drop-guard) to drive the post-answer liveness card — the core fix for
2149
+ // #2587's inert state. undefined until the first post-answer watcher advance.
2150
+ subagentActivityAt?: number
2151
+ // Sticky wall-clock timestamp when finalAnswerEverDelivered first latched
2152
+ // true this turn. Allows the heartbeat to distinguish "tool label arrived
2153
+ // before the answer" (lastToolLabelAt ≤ finalAnswerDeliveredAt, inert) from
2154
+ // "sub-agent active after the answer" (subagentActivityAt >
2155
+ // finalAnswerDeliveredAt, liveness card warranted). undefined until the
2156
+ // first substantive final answer of the turn.
2157
+ finalAnswerDeliveredAt?: number
2005
2158
  // Accumulating friendly-action feed for this turn. Each non-surface
2006
2159
  // tool_label appends a line via `appendActivityLabel`; the feed renders
2007
2160
  // (via `renderActivityFeed`) as a capped chronological list into the
2008
2161
  // in-place edited activity message and clears on reply. Reset per turn.
2009
2162
  mirrorLines: string[]
2163
+ // Narrative-dedup gate state (JSONL-text-narrative primitive). A `text`
2164
+ // block is held here for ONE lookahead step so the next event (a tool_use
2165
+ // or turn_end) can decide draft-then-send (SUPPRESS, it duplicates the
2166
+ // reply) vs working-narration (SHOW it as a transient mirrorLines step).
2167
+ // Null when nothing is pending. The pure decision lives in
2168
+ // narrative-dedup.ts; this slot is the per-turn cursor. Reset per turn.
2169
+ // Invariant `chat-is-the-single-source-of-truth`: a SHOWN narrative is
2170
+ // rendered through the SAME appendActivityLabel→renderStepFeed path as a
2171
+ // tool step — a transient, clipped, rolling-window line replaced by the
2172
+ // next event, never a persisted parallel mirror.
2173
+ pendingNarrative: { text: string } | null
2174
+ // Most-recently-seen reply/stream_reply `input.text` for this turn — the
2175
+ // ACTUAL delivered answer surface. Set wherever a REPLY_TOOL tool_use is
2176
+ // handled in the reducer. `flushPendingNarrativeAtTurnEnd` compares a
2177
+ // trailing narrative block against THIS (not capturedText.join(''), which
2178
+ // can mis-suppress when the model emits the same short string twice in a
2179
+ // turn). Empty string until the turn delivers a reply. Reset per turn.
2180
+ lastReplyText: string
2010
2181
  // Model A — foreground sub-agent nesting. A foreground sub-agent (Task/Agent
2011
2182
  // with no run_in_background) runs INSIDE this turn while the parent blocks at
2012
2183
  // the Task tool, so its live steps nest under the parent's activity feed
@@ -2019,9 +2190,169 @@ type CurrentTurn = {
2019
2190
  // gates on minInitialChars). Materialized and cleared at turn_end.
2020
2191
  answerStream: AnswerStreamHandle | null
2021
2192
  isDm: boolean
2193
+ // PR-4a (message-emission-determinism, `emission-authority.ts`). The
2194
+ // per-foreground-turn emission-authority façade the foreground-lane card/ping
2195
+ // emission call sites route through. Constructed ONCE per turn in the ctor
2196
+ // with the chat/thread key passed in explicitly (the PR-4e seam). Per-turn
2197
+ // only — a fresh `CurrentTurn` literal gets a fresh façade, so it never
2198
+ // persists across turns. Optional in the type so the bounded recently-ended
2199
+ // registry's older entries (and any hand-built test turn) tolerate its
2200
+ // absence; `emissionAuthorityFor` lazily backfills one when missing.
2201
+ emissionAuthority?: EmissionAuthority
2202
+ }
2203
+
2204
+ // PR-4e — the singleton `currentTurn` is RETAINED as (a) the flag-OFF store and
2205
+ // (b) the flag-ON "most-recent-set" MIRROR. Every GLOBAL-liveness read in this
2206
+ // file (`isBusy`, the `if (currentTurn != null) return` poke guards, the
2207
+ // orphaned-reply guard, the synchronous-to-live-turn `const turn = currentTurn`
2208
+ // captures) keeps reading this variable, so under the sequential-CLI invariant
2209
+ // (the most-recently-set turn IS the live turn) those reads stay byte-identical.
2210
+ // The per-topic isolation lives in `currentTurnMap.byKey`: a LATE async event
2211
+ // captured for topic A resolves A's authority by ITS OWN key even after the live
2212
+ // turn flipped to topic B (see current-turn-map.ts). Under the flag OFF the map
2213
+ // is never written and this is exactly the old singleton.
2214
+ let currentTurn: CurrentTurn | null = null
2215
+ const currentTurnMap = new CurrentTurnMap<CurrentTurn>()
2216
+
2217
+ /**
2218
+ * Set the live turn for `key`. Flag-branches in ONE place (inside the map):
2219
+ * flag-OFF assigns the singleton only; flag-ON sets the per-topic entry AND
2220
+ * updates the most-recent mirror. We keep the module-scope `currentTurn`
2221
+ * variable in lock-step with the map's mirror so the 140 unchanged global reads
2222
+ * see the same value.
2223
+ */
2224
+ function setCurrentTurn(turn: CurrentTurn, key: string): void {
2225
+ currentTurnMap.set(turn, key)
2226
+ currentTurn = currentTurnMap.get() // mirror most-recent-set (== `turn`)
2022
2227
  }
2023
2228
 
2024
- let currentTurn: CurrentTurn | null = null
2229
+ /**
2230
+ * End (delete) the live turn for `key`, iff `key` still maps to `turn`. Routes
2231
+ * the clear through the keyed accessor (leak-close-at-origin) and re-syncs the
2232
+ * module-scope mirror to the map's mirror.
2233
+ */
2234
+ function endCurrentTurnForKey(turn: CurrentTurn, key: string): boolean {
2235
+ const ended = currentTurnMap.endTurnForKey(turn, key)
2236
+ currentTurn = currentTurnMap.get() // re-sync mirror (null iff it pointed at turn)
2237
+ return ended
2238
+ }
2239
+
2240
+ /**
2241
+ * Clear the ENTIRE per-topic store + mirror (disconnect-flush / bridge-died:
2242
+ * every entry is a ghost).
2243
+ */
2244
+ function clearAllCurrentTurns(): void {
2245
+ currentTurnMap.clearAll()
2246
+ currentTurn = null
2247
+ }
2248
+
2249
+ /**
2250
+ * Is `turn` still the live turn FOR ITS OWN topic? Flag-OFF: `currentTurn ===
2251
+ * turn` (the ambient check, verbatim). Flag-ON: `byKey.get(turn'sKey) === turn`,
2252
+ * so a B-flip never falsifies A's own liveness. The callsites keep the literal
2253
+ * `currentTurn === turn` in source (the silence-liveness-wiring oracle) by
2254
+ * inlining the flag-OFF branch and delegating the flag-ON branch here.
2255
+ */
2256
+ function turnLiveForItsTopic(turn: CurrentTurn): boolean {
2257
+ return currentTurnMap.isLiveForKey(
2258
+ turn,
2259
+ statusKey(turn.sessionChatId, turn.sessionThreadId),
2260
+ )
2261
+ }
2262
+
2263
+ /**
2264
+ * Accessor for a turn's per-foreground-turn emission-authority façade (PR-4a).
2265
+ * Returns the façade constructed at the turn ctor; lazily backfills one (keyed
2266
+ * on the turn's chat/thread) for any turn that predates the field or was built
2267
+ * outside the ctor. Per-turn: the memoized instance lives on the turn object,
2268
+ * so it is discarded with the turn and never persists across turns.
2269
+ */
2270
+ function emissionAuthorityFor(turn: CurrentTurn): EmissionAuthority {
2271
+ if (turn.emissionAuthority == null) {
2272
+ turn.emissionAuthority = new EmissionAuthority(
2273
+ statusKey(turn.sessionChatId, turn.sessionThreadId),
2274
+ )
2275
+ }
2276
+ // PR-4b — CENTRALIZE the OPEN-gate wiring here, the single accessor every
2277
+ // routed call site already funnels through, so all 6 `openOrEditCard(...)`
2278
+ // sites stay byte-identical `(producer, apply)`. The façade reads the LIVE
2279
+ // turn view (a thunk — the card id / latch / tool-count mutate during the
2280
+ // turn) + the injected history deps from this one place, not per-call.
2281
+ // Idempotent; harmless under the flag OFF (the disabled branch never reads
2282
+ // it). The turn IS a structural `FeedOpenGateView`.
2283
+ turn.emissionAuthority.wireFeedOpenGate(() => turn, feedOpenGateDeps())
2284
+ return turn.emissionAuthority
2285
+ }
2286
+
2287
+ /**
2288
+ * The injected history dependencies the PR-4b OPEN gate needs (the real
2289
+ * `hasOutboundDeliveredSince` predicate + `HISTORY_ENABLED` + the substantive
2290
+ * `FINAL_ANSWER_MIN_CHARS` floor). Centralized so both the façade's enabled
2291
+ * branch AND the drain's own (now-redundant) inline gate consume the SAME deps
2292
+ * via the SAME pure helpers — flag-ON and flag-OFF cannot diverge. Keeps
2293
+ * `feed-open-gate.ts` sqlite-free (it never imports `history.js`).
2294
+ */
2295
+ function feedOpenGateDeps(): FeedOpenGateDeps {
2296
+ return {
2297
+ hasOutboundDeliveredSince,
2298
+ historyEnabled: HISTORY_ENABLED,
2299
+ finalAnswerMinChars: FINAL_ANSWER_MIN_CHARS,
2300
+ }
2301
+ }
2302
+
2303
+ /**
2304
+ * The deliver-before-drain inputs the PR-4d card-drain gate threads into the
2305
+ * façade's pure `mayDrainCardNow`. Centralized so the card-drain helper sources
2306
+ * the SAME `turnInFlightForGate()` + kill-switch the buffer-drain gate uses.
2307
+ *
2308
+ * `endingTurnFinalAnswerDelivered` is FIXED to `null` for the card path (§5
2309
+ * modeling decision): the live foreground card single-flight is governed by
2310
+ * `turn.activityInFlight` (via `mayDrain`), NOT by an ending turn's delivery
2311
+ * state — so `mayDrainBufferedInbound` degenerates to `!turnInFlight` and a
2312
+ * synthetic represent turn (finalAnswerDelivered=false) can never wedge the card.
2313
+ */
2314
+ function cardDrainGateCtx(): CardDrainGateCtx {
2315
+ return {
2316
+ turnInFlight: turnInFlightForGate(),
2317
+ endingTurnFinalAnswerDelivered: null,
2318
+ enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
2319
+ }
2320
+ }
2321
+
2322
+ /**
2323
+ * PR-4d centralized card-drain gate (Option A). The 6 foreground card-drain
2324
+ * sites pass their EXISTING single-flight-guarded block (the `mayDrain` guard +
2325
+ * the `openOrEditCard(producer, …)` thunk that assigns `turn.activityInFlight =
2326
+ * drainActivitySummary(…)`) in VERBATIM as `run`, so those load-bearing literals
2327
+ * stay byte-identical (the wiring oracles still see them).
2328
+ *
2329
+ * - **Flag OFF (default):** runs the guarded block directly — NO `chatLock`
2330
+ * wrapper, byte-equivalent to base.
2331
+ * - **Flag ON:** acquires `chatLock` AROUND the deliver-before-drain decision
2332
+ * (`mayDrainCardNow`) + the synchronous block, unifying the card path with
2333
+ * the #2137 serialization gate. The lock spans ONLY the gate decision + the
2334
+ * synchronous `openOrEditCard` kick-off inside `run` (which only ASSIGNS
2335
+ * `turn.activityInFlight = drainActivitySummary(...)`; the async send is NOT
2336
+ * awaited inside the lock). The lock is released before any drain
2337
+ * `await sendMessage` suspends, so a card OPEN never holds `chatLock` across
2338
+ * the gate's release — a synthetic represent turn can never wedge the gate,
2339
+ * and `mayDrain` stays callable lock-free.
2340
+ *
2341
+ * LOCK ORDERING (no-deadlock invariant): `chatLock` is acquired EXCLUSIVELY
2342
+ * here, around the gate; never the reverse. `mayDrainCardNow` is a pure read.
2343
+ */
2344
+ function cardDrainGate(turn: CurrentTurn, ea: EmissionAuthority, run: () => void): void {
2345
+ if (EMISSION_AUTHORITY_ENABLED) {
2346
+ void chatLock.run(
2347
+ statusKey(turn.sessionChatId, turn.sessionThreadId),
2348
+ async () => {
2349
+ if (ea.mayDrainCardNow(turn, cardDrainGateCtx())) run()
2350
+ },
2351
+ )
2352
+ return
2353
+ }
2354
+ run()
2355
+ }
2025
2356
 
2026
2357
  // Component 3 (turn-origin reply routing). Recently-ended turns retained
2027
2358
  // by `turnId` so a LATE reply (the Brevo answer landing ~42s after
@@ -2115,7 +2446,22 @@ function deriveTurnId(
2115
2446
  */
2116
2447
  function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTurn | null {
2117
2448
  if (originTurnId == null || originTurnId === '') return null
2118
- if (currentTurn != null && currentTurn.turnId === originTurnId) return currentTurn
2449
+ // PR-4e resolve the LIVE turn by ITS OWN topic key under the flag. The
2450
+ // turnId encodes the key: `deriveTurnId` builds `${chatKey}#${messageId}`, so
2451
+ // the substring before `#` IS the statusKey. Flag-ON does an O(1)
2452
+ // `byKey.get(key)` and matches on turnId — so a reply whose origin turn is
2453
+ // STILL live for topic A resolves A even after the singleton mirror flipped to
2454
+ // B. Flag-OFF keeps the singleton check, verbatim. The recentTurnsById
2455
+ // registry fallback is UNCHANGED in both branches.
2456
+ if (EMISSION_AUTHORITY_ENABLED) {
2457
+ const hashIdx = originTurnId.indexOf('#')
2458
+ if (hashIdx > 0) {
2459
+ const live = currentTurnMap.get(originTurnId.slice(0, hashIdx))
2460
+ if (live != null && live.turnId === originTurnId) return live
2461
+ }
2462
+ } else if (currentTurn != null && currentTurn.turnId === originTurnId) {
2463
+ return currentTurn
2464
+ }
2119
2465
  return recentTurnsById.get(originTurnId) ?? null
2120
2466
  }
2121
2467
 
@@ -2368,7 +2714,10 @@ function postQueuedStatus(chatId: string, bufferedThread: number, inFlightThread
2368
2714
  void (async () => {
2369
2715
  const sent = await swallowingApiCall(
2370
2716
  () =>
2371
- bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread }),
2717
+ // Queued-placeholder status, not the user's answer — silence the
2718
+ // open ping (BORDERLINE: it's a "your message is queued" notice;
2719
+ // see PR description).
2720
+ bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread, disable_notification: true }),
2372
2721
  { chat_id: chatId, verb: 'queued-status.post', threadId: bufferedThread },
2373
2722
  )
2374
2723
  const messageId = (sent as { message_id?: number } | undefined)?.message_id
@@ -2542,6 +2891,16 @@ const preambleSuppressor = new PreambleSuppressor({
2542
2891
  // long-lived and flushes can occur outside any session-event
2543
2892
  // handler's scope. If the turn has been cleared, the update is
2544
2893
  // dropped (no chat to send to, no stream to mutate).
2894
+ //
2895
+ // PR-4e — the module-scope suppressor carries NO per-topic key (it is a
2896
+ // single global instance reset/flushed per turn), so there is no key to
2897
+ // scope by here: the correct resolution under BOTH flag states is the
2898
+ // most-recent-set live turn — exactly the singleton mirror `currentTurn`.
2899
+ // The per-turn `reset()` / `flushNow()` / `dropNow()` lifecycle (driven from
2900
+ // the live turn's own handlers) keeps the suppressor aligned with whichever
2901
+ // topic is currently live, so the mirror read is byte-identical to base and
2902
+ // cannot leak A's answer text into B (a flush for A runs while A is still the
2903
+ // most-recent turn; once B flips, A's stream is already force-superseded).
2545
2904
  const stream = currentTurn?.answerStream ?? null
2546
2905
  if (stream != null) stream.update(cumulative)
2547
2906
  },
@@ -2590,6 +2949,28 @@ function streamKey(chatId: string, threadId?: number | null): string {
2590
2949
  return chatKey(chatId, threadId)
2591
2950
  }
2592
2951
 
2952
+ // PR1 (cross-turn stale-card guard, design §9 lever 4 / race C/D).
2953
+ // `obligationSweep` writes one entry here, keyed on the obligation's
2954
+ // `originTurnId`, the instant it pushes an `obligation_represent` inbound —
2955
+ // carrying the obligation's `openedAt` (when the obligation was RAISED). The
2956
+ // represent inbound reuses the obligation's chat/thread/messageId, so the
2957
+ // `enqueue` that spawns the synthetic represent turn reconstructs the SAME
2958
+ // `deriveTurnId` value as the key. That represent turn — and ONLY that turn —
2959
+ // consumes and clears it, stamping `turn.crossTurnGate = { sinceMs: openedAt }`.
2960
+ // That turn's first card-OPEN then consults `hasOutboundDeliveredSince(chat,
2961
+ // openedAt)` and is suppressed iff a SUBSTANTIVE answer already landed since the
2962
+ // obligation was raised — so a "thinking…" card never narrates beneath an answer
2963
+ // the user already received in the original turn. Keying on `originTurnId` (not
2964
+ // chat/thread) means an unrelated later foreground turn on the same chat/thread
2965
+ // derives a different turn id, finds no entry, and is never mis-gated — closing
2966
+ // the residual cross-contamination window where a represent that was armed but
2967
+ // never enqueued (degenerate bridge-death) left a stale chat/thread-keyed gate
2968
+ // that the next foreground turn could wrongly consume. A normal foreground turn
2969
+ // never has an entry here, so the gate stays scoped to the synthetic surface.
2970
+ // The map holds at most one entry per obligation; re-arming the same obligation
2971
+ // overwrites its own entry with the latest openedAt.
2972
+ const pendingCrossTurnGate = new Map<string, { sinceMs: number }>()
2973
+
2593
2974
  /**
2594
2975
  * Component 1 — deliver-before-drain. The single chokepoint that both
2595
2976
  * turn-end drain sites (`purgeReactionTracking`, `releaseTurnBufferGate`)
@@ -2729,6 +3110,10 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
2729
3110
  // the markClaudeBusyForInbound on the delivery path. Safe no-op
2730
3111
  // when the key was never marked (synthetic purge from a sweep).
2731
3112
  claudeBusyKeys.delete(key)
3113
+ // #2527: clear the per-key reaction-transition counter and first-reply
3114
+ // sentinel alongside the controller so we don't leak state across turns.
3115
+ reactionTransitionCounts.delete(key)
3116
+ firstTextReplyLogged.delete(key)
2732
3117
  // Human-feel UX: stop the turn-long `typing…` indicator started in
2733
3118
  // the turn-start block. `purgeReactionTracking` is the canonical
2734
3119
  // turn-end, so this is the single owner of the stop. (If an abnormal
@@ -2932,15 +3317,42 @@ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
2932
3317
  * Idempotent: a second purge is a no-op `.delete()` on a key already
2933
3318
  * gone — handlers that already purge elsewhere are unharmed.
2934
3319
  */
3320
+ function emitTurnRecord(turn: CurrentTurn, endedAt: number): void {
3321
+ try {
3322
+ const rec =
3323
+ JSON.stringify({
3324
+ ts: Math.floor(endedAt / 1000),
3325
+ agent: process.env.SWITCHROOM_AGENT_NAME ?? 'unknown',
3326
+ duration_ms: turn.startedAt > 0 ? endedAt - turn.startedAt : 0,
3327
+ tools: turn.toolCallCount ?? 0,
3328
+ status: turn.finalAnswerDelivered ? 'complete' : 'no_reply',
3329
+ turn_id: turn.turnId,
3330
+ }) + '\n'
3331
+ appendFileSync('/state/agent/turns.jsonl', rec)
3332
+ } catch {
3333
+ // best-effort — never let metrics emission break turn teardown
3334
+ }
3335
+ }
3336
+
2935
3337
  function endCurrentTurnAtomic(turn: CurrentTurn): void {
2936
- if (currentTurn !== turn) return
2937
- currentTurn = null
3338
+ // PR-4e keyed liveness + keyed clear (leak-close-at-origin). Flag-OFF: the
3339
+ // guard is `currentTurn === turn` and the clear nulls the singleton, verbatim.
3340
+ // Flag-ON: the guard becomes `byKey.get(turn'sKey) === turn` (so a flip to
3341
+ // another topic doesn't spuriously short-circuit THIS topic's teardown) and
3342
+ // the clear does `byKey.delete(key)` + nulls the mirror iff it still points at
3343
+ // `turn`. `endCurrentTurnForKey` returns false (no delete) when the entry no
3344
+ // longer matches — the same early-return semantics as the old `!== turn` guard.
3345
+ const key = statusKey(turn.sessionChatId, turn.sessionThreadId)
3346
+ if (!turnLiveForItsTopic(turn)) return
3347
+ endCurrentTurnForKey(turn, key) // currentTurnByKey.delete(key) + mirror clear
2938
3348
  // Status-surface observability: one line at every turn CLEAR (with how far
2939
3349
  // the turn got), plus a DEGRADED warning when the turn did tool work but the
2940
3350
  // live feed never opened because its sends failed (the resume-400 signature).
3351
+ const turnEndedAt = Date.now()
2941
3352
  process.stderr.write(
2942
- `telegram gateway: ${formatTurnLifecycle('clear', 'turn_end', turn, Date.now())}\n`,
3353
+ `telegram gateway: ${formatTurnLifecycle('clear', 'turn_end', turn, turnEndedAt)}\n`,
2943
3354
  )
3355
+ emitTurnRecord(turn, turnEndedAt)
2944
3356
  const degraded = detectStatusSurfaceDegraded(turn)
2945
3357
  if (degraded != null) {
2946
3358
  process.stderr.write(
@@ -3199,6 +3611,7 @@ async function postIdleClearNotice(idleClearMs: number): Promise<void> {
3199
3611
  () =>
3200
3612
  bot.api.sendMessage(chatId, text, {
3201
3613
  parse_mode: 'HTML',
3614
+ disable_notification: true,
3202
3615
  ...(threadId != null ? { message_thread_id: threadId } : {}),
3203
3616
  }),
3204
3617
  { chat_id: chatId, verb: 'idleAutoClear.notice' },
@@ -3340,7 +3753,7 @@ async function resolveCompactCard(
3340
3753
  function finalizeStatusReaction(
3341
3754
  chatId: string,
3342
3755
  threadId: number | undefined,
3343
- reason: 'done' | 'error' = 'done',
3756
+ reason: 'done' | 'undelivered' | 'error' = 'done',
3344
3757
  ): void {
3345
3758
  const key = statusKey(chatId, threadId)
3346
3759
  const ctrl = activeStatusReactions.get(key)
@@ -3354,6 +3767,17 @@ function finalizeStatusReaction(
3354
3767
  if (reason === 'done' && deferredDoneReactions.tryDefer(key, ctrl)) return
3355
3768
  deferredDoneReactions.drop(key)
3356
3769
  ctrl.finalize(reason)
3770
+ // #2527: log controller dispose so the lifecycle end is observable. Use
3771
+ // activeReactionMsgIds to reconstruct the turnId token before purge clears it.
3772
+ const msgInfo = activeReactionMsgIds.get(key)
3773
+ if (msgInfo != null) {
3774
+ logStreamingEvent({
3775
+ kind: 'status_reaction_dispose',
3776
+ chatId,
3777
+ turnId: `${chatId}:${msgInfo.messageId}`,
3778
+ reason,
3779
+ })
3780
+ }
3357
3781
  purgeReactionTracking(key)
3358
3782
  }
3359
3783
 
@@ -4960,10 +5384,10 @@ function postLegacyBanner(
4960
5384
  // short-circuit to no-ops at runtime. `progressDriver` is typed `any`
4961
5385
  // so TS doesn't resolve `progressDriver?.X` to `never`.
4962
5386
  const streamMode = process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'
4963
- // PR B: per-agent stream throttle override via channels.telegram.stream_throttle_ms.
4964
- // When unset, draft-stream.ts applies transport-aware defaults (300 ms draft,
4965
- // 1000 ms message). Parsed once at boot; sub-zero / NaN values fall back to
4966
- // undefined so the per-transport default wins. See `src/agents/scaffold.ts`
5387
+ // Per-agent stream throttle override via channels.telegram.stream_throttle_ms.
5388
+ // When unset, draft-stream.ts applies DM/group defaults (400 ms DMs, 1000 ms
5389
+ // groups). Parsed once at boot; sub-zero / NaN values fall back to undefined
5390
+ // so the per-chat-type default wins. See `src/agents/scaffold.ts`
4967
5391
  // `channelsToEnv()` for the yaml → env wiring.
4968
5392
  const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
4969
5393
  const raw = process.env.SWITCHROOM_TG_STREAM_THROTTLE_MS
@@ -4973,74 +5397,30 @@ const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
4973
5397
  })()
4974
5398
  const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
4975
5399
 
4976
- // #869-Phase1 / openclaw-pattern. When SET, the answer-lane stream
4977
- // (telegram-plugin/answer-stream.ts) renders the model's transcript
4978
- // text as a USER-VISIBLE edit-in-place message instead of writing to
4979
- // Telegram's invisible compose-box draft (which is the default and
4980
- // supports the #1664 "retract + re-prompt" contract). With this flag
4981
- // on:
4982
- // 1. createAnswerStream is instantiated without `sendMessageDraft`,
4983
- // so it falls back to `sendMessage` + `editMessageText` for a
4984
- // real chat-timeline message (`answer-stream.ts:212-214`).
4985
- // 2. minInitialChars is set to 1 — the first text chunk pushes a
4986
- // visible message immediately (TTFO under 5s for short turns).
4987
- // 3. At turn_end, if the model never called reply / stream_reply
4988
- // AND the streamed message has substantive captured text, the
4989
- // gateway DOES NOT retract (which would delete a user-visible
4990
- // message the user has been reading live); it calls
4991
- // `stream.stop()` to freeze the current text as the final
4992
- // answer, records the message in dedup + history, and marks
4993
- // `turn.finalAnswerDelivered = true` so the #1664 silent-end
4994
- // re-prompt does not fire. Turn-flush is suppressed for this
4995
- // branch — its job (deliver captured text) is structurally
4996
- // already done by the visible stream.
4997
- // 4. The reply-tool / stream_reply path is unchanged — when the
4998
- // model uses an explicit reply tool the prior streamed message
4999
- // is retracted (delete) and the reply takes over as before.
5000
- // Trade-off: a stream-as-final-answer turn does NOT push a device
5001
- // notification (Telegram does not notify on edits, and we choose
5002
- // not to send a duplicate fresh message for the ping). For short
5003
- // turns where the user is actively watching, this is the right
5004
- // shape — they see the answer materialise live. For longer waits,
5005
- // the cross-turn pending-progress system (#1445/#1669) is the
5006
- // canonical surface and DOES ping at the appropriate boundaries.
5007
- //
5008
- // 2026-05-25: default flipped ON after a fleet-log audit showed a ~19%
5009
- // framework-fallback ("still working…") rate — the visible stream gave an
5010
- // immediate in-timeline signal that suppressed the silence-poke.
5011
- //
5012
- // 2026-06-03: default flipped back OFF (operator request). In practice the
5013
- // visible stream delivered ~none of its intended benefit while imposing a
5014
- // jarring cost:
5015
- // - Telegram rate-limits editMessageText to roughly once/second, so real
5016
- // "watch it type" streaming is impossible; and the model emits almost no
5017
- // interstitial assistant.text (it thinks → tool → reply), so the
5018
- // preliminary was a near-empty bubble (observed: 5–13 byte edits).
5019
- // - On every turn where the model calls the reply tool (≈always), the reply
5020
- // posts a SEPARATE canonical message and the stream RETRACTS (deletes) its
5021
- // preliminary — the user sees a raw bubble appear then vanish, replaced by
5022
- // the formatted reply. In supergroup topics it also mis-routed (preliminary
5023
- // → General, reply → topic). Net: an unformatted flash + a delete, no
5024
- // streaming value.
5025
- // The anti-silence role the visible stream once filled is now covered by the
5026
- // live ACTIVITY FEED (tool-use streaming, below), the "…typing" chat-action
5027
- // loop, and `thinking_effort: low` (fast tool-less turns) — so off-by-default
5028
- // no longer regresses the framework-fallback rate. With the flag off the lane
5029
- // uses the invisible compose-box draft (the original default, #1664-compatible)
5030
- // and the reply tool is the single canonical, formatted message.
5400
+ // When SET, the answer-lane stream (telegram-plugin/answer-stream.ts) renders
5401
+ // the model's transcript text as a USER-VISIBLE edit-in-place message. Default
5402
+ // OFF: the lane stays dormant and the reply tool is the single canonical
5403
+ // formatted message no unformatted preliminary that flashes and gets deleted.
5404
+ // With this flag on, minInitialChars is set to 1 and the first text chunk opens
5405
+ // a visible preview immediately. At turn_end, if the model never called reply /
5406
+ // stream_reply AND the streamed message has substantive captured text, the
5407
+ // gateway materializes it as a pinged final answer (materialize()) and deletes
5408
+ // the silent preview. When the model uses an explicit reply tool the prior
5409
+ // streamed message is retracted instead.
5410
+ // The draft transport (sendMessageDraft) is permanently retired both ON and
5411
+ // OFF use sendMessage + editMessageText; the difference is whether a visible
5412
+ // preview is opened at all.
5031
5413
  // Opt back IN per agent with SWITCHROOM_VISIBLE_ANSWER_STREAM=1.
5032
5414
  const ANSWER_STREAM_VISIBLE_ENABLED = parseVisibleAnswerStreamEnabled(
5033
5415
  process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM,
5034
5416
  )
5035
- // Single source of truth for the answer-lane behaviour (flash-decouple,
5036
- // 2026-06-05). The visible preview gates on the visible flag ALONE; the draft
5037
- // flag controls only the transport. Resolved here once and consulted at the
5038
- // createAnswerStream config, the materialize-as-answer guard, and the boot log,
5039
- // so all three can never drift back into the `visible || retired` conflation
5040
- // that re-opened the flash. Total-enumerated in answer-stream-flag.test.ts.
5417
+ // Single source of truth for the answer-lane behaviour. The draft transport
5418
+ // (sendMessageDraft) is permanently retired the lane is either VISIBLE
5419
+ // (opt-in) or DORMANT (the unconditional default: reply tool is the only
5420
+ // message). Resolved here once and consulted at the createAnswerStream config,
5421
+ // the materialize-as-answer guard, and the boot log.
5041
5422
  const ANSWER_LANE = resolveAnswerLaneConfig({
5042
5423
  visibleEnabled: ANSWER_STREAM_VISIBLE_ENABLED,
5043
- draftFnAvailable: sendMessageDraftFn != null,
5044
5424
  })
5045
5425
 
5046
5426
  // Whether to DELETE the activity/status feed when the final answer lands.
@@ -5076,6 +5456,11 @@ const completeProgressCardTurn:
5076
5456
  // #1122 PR3: flushProgressCardsForShutdown deleted with the card. No
5077
5457
  // replacement needed — there are no pinned progress messages to flush.
5078
5458
  let subagentWatcher: SubagentWatcherHandle | null = null
5459
+ // Background-worker activity feed manager. Module-scoped so shutdown can stop()
5460
+ // its internal heartbeat interval (mirrors subagentWatcher). Recreated per
5461
+ // bridge connect; the stale handle's interval is unref'd, so a missed stop()
5462
+ // can't keep the process alive, but we stop() on shutdown for cleanliness.
5463
+ let workerActivityFeed: ReturnType<typeof createWorkerActivityFeed> | null = null
5079
5464
 
5080
5465
  // ─── IPC server ───────────────────────────────────────────────────────────
5081
5466
  const SOCKET_PATH = process.env.SWITCHROOM_GATEWAY_SOCKET ?? join(STATE_DIR, 'gateway.sock')
@@ -5237,8 +5622,12 @@ let inFlightUpdate: { requestId: string; startedAt: number } | null = null
5237
5622
  // SWITCHROOM_SILENCE_FALLBACK_MS — base threshold (default 300000)
5238
5623
  // SWITCHROOM_SILENCE_FALLBACK_HARD_MS — hard ceiling for the in-flight-tool
5239
5624
  // defer (default 900000 = 15min)
5240
- // SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer (default OFF;
5241
- // canary on marko against #2162 telemetry)
5625
+ // SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer. NOTE: this is
5626
+ // now set fleet-wide in defaults.env
5627
+ // (was a marko canary against #2162;
5628
+ // promoted to the fleet default). The
5629
+ // code default below is still OFF, so
5630
+ // the live behaviour comes from config.
5242
5631
  function parsePositiveMsEnv(name: string, fallbackMs: number): number {
5243
5632
  const raw = process.env[name]
5244
5633
  if (raw == null || raw === '') return fallbackMs
@@ -5247,6 +5636,20 @@ function parsePositiveMsEnv(name: string, fallbackMs: number): number {
5247
5636
  }
5248
5637
  const SILENCE_FALLBACK_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_MS', 300_000)
5249
5638
  const SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_HARD_MS', 900_000)
5639
+ // #2527 — mid-turn liveness floor threshold (default 45s). The early, quiet
5640
+ // beat: a `user` turn working silently this long without a substantive answer
5641
+ // gets ONE honest "still on it" interim, so the ambient 👀 never masquerades
5642
+ // as "done". Strictly below SILENCE_FALLBACK_MS (the loud 300s unwedge).
5643
+ // Whole floor is kill-switchable via SWITCHROOM_TG_LIVENESS_FLOOR=0.
5644
+ const SILENCE_FLOOR_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FLOOR_MS', 45_000)
5645
+ // #2527 — role-aware terminal reaction honesty (the "thumbs-up false done"
5646
+ // fix). Default ON; SWITCHROOM_TG_TERMINAL_HONESTY=0 reverts to always-👍.
5647
+ const LIVENESS_TERMINAL_HONESTY = process.env.SWITCHROOM_TG_TERMINAL_HONESTY !== '0'
5648
+ // SILENCE_DEFER_INFLIGHT_TOOLS: previously an opt-in (=1). The new
5649
+ // isLegitimatelyWorking callback supersedes this — defer is now the DEFAULT
5650
+ // when the callback is wired. The legacy flag is kept so `=0` still lets
5651
+ // operators force-disable the defer (handled inside silence-poke.ts tick()).
5652
+ // The old `=1` path is kept for back-compat but is now redundant.
5250
5653
  const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === '1'
5251
5654
  // Production-liveness (2026-06-05 UAT finding). Count an activity-feed render or
5252
5655
  // an answer-stream draft update as liveness for the silence clock, so a long
@@ -5255,13 +5658,109 @@ const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIG
5255
5658
  // restores the legacy "only a real reply resets the clock" behaviour.
5256
5659
  const SILENCE_LIVENESS_PRODUCTION = process.env.SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== '0'
5257
5660
 
5661
+ /**
5662
+ * Feed-survival predicate — the single source of truth for "is this turn
5663
+ * legitimately working?" used by BOTH teardown timers (orphaned-reply fuse
5664
+ * and silence-poke framework fallback).
5665
+ *
5666
+ * Returns true if ANY of the following hold for the given chat key:
5667
+ *
5668
+ * (a) A foreground tool call is in flight in the current turn
5669
+ * (`toolFlightTracker.isMidToolCall()`). This covers most tools
5670
+ * including ask_user while it blocks awaiting a tap.
5671
+ *
5672
+ * (b) Detached background work was dispatched in the current turn and has
5673
+ * not yet resolved — `pendingProgress.hasPendingAsyncDispatch(key)`.
5674
+ * Covers `Bash run_in_background:true` (which returns a near-instant
5675
+ * handle, emptying inFlight, while the background process keeps
5676
+ * running) and `Agent` / `Task` dispatches.
5677
+ *
5678
+ * (c) A human-wait tool (`ask_user`) is open for this chat. A pending
5679
+ * ask_user IS already captured by (a) while the tool_use is in flight,
5680
+ * but we include the explicit pendingAskUser check for defence-in-depth
5681
+ * (e.g. after an unlikely inFlight clear without a tool_result).
5682
+ *
5683
+ * The key is `statusKey(chatId, threadId)` — the same key used by
5684
+ * silencePoke / pendingProgress.
5685
+ */
5686
+ function isLegitimatelyWorking(key: string): boolean {
5687
+ // (a) foreground in-flight tool.
5688
+ // NOTE: toolFlightTracker is GLOBAL, not per-key. In a hypothetical
5689
+ // multi-chat agent a tool in flight for chat A would make this return
5690
+ // true for chat B's key. Accepted: the gateway runs one Claude session
5691
+ // (one turn in flight at a time); true multi-chat concurrency is not
5692
+ // currently supported. (b) and (c) below are correctly per-key.
5693
+ if (toolFlightTracker.isMidToolCall()) return true
5694
+ // (b) detached background work dispatched this turn
5695
+ if (pendingProgress.hasPendingAsyncDispatch(key)) return true
5696
+ // (c) ask_user open for this chat (defence-in-depth)
5697
+ const { chatId: keyChatId } = parseKeyForSurvival(key)
5698
+ for (const entry of pendingAskUser.values()) {
5699
+ if (entry.chatId === keyChatId) return true
5700
+ }
5701
+ return false
5702
+ }
5703
+
5704
+ /** Parse `<chatId>:<threadIdOrEmpty>` — mirrors silence-poke's parseKey.
5705
+ * Local copy so we don't need to re-export from silence-poke. */
5706
+ function parseKeyForSurvival(key: string): { chatId: string } {
5707
+ const idx = key.indexOf(':')
5708
+ return { chatId: idx < 0 ? key : key.slice(0, idx) }
5709
+ }
5710
+
5258
5711
  silencePoke.startTimer({
5259
- thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
5712
+ thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS, floor: SILENCE_FLOOR_MS },
5260
5713
  deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
5714
+ isLegitimatelyWorking: (key) => isLegitimatelyWorking(key),
5261
5715
  emitMetric: (event) => {
5262
5716
  // Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
5263
5717
  emitRuntimeMetric(event)
5264
5718
  },
5719
+ // #2527 — the gateway-owned half of the mid-turn-floor decision: only the
5720
+ // live turn knows its loop role + whether a substantive answer has landed.
5721
+ // Keyed on statusKey so a DM (threadId null) and a forum topic are identical.
5722
+ floorState: (key) => {
5723
+ const turn = currentTurn
5724
+ if (turn == null) return null
5725
+ if (statusKey(turn.sessionChatId, turn.sessionThreadId) !== key) return null
5726
+ return { role: turn.role, finalAnswerDelivered: turn.finalAnswerDelivered }
5727
+ },
5728
+ // #2527 — the early, quiet liveness beat. Honest text from the longest
5729
+ // in-flight tool (model-free, claude-native), routed through the SAME send
5730
+ // path as the 300s fallback; pings OFF (this is the gentle beat, not the
5731
+ // loud unwedge) and the turn is NOT torn down — it keeps working.
5732
+ onMidTurnFloor: async (ctx) => {
5733
+ // Late-fire guard, mirroring the fallback: a clean turn-end can race the
5734
+ // tick. If the turn is gone, stay silent.
5735
+ if (activeTurnStartedAt.get(ctx.key) == null && currentTurn == null) return
5736
+ const blockedOnApproval = activeStatusReactions
5737
+ .get(statusKey(ctx.chatId, ctx.threadId))
5738
+ ?.isAwaiting() ?? false
5739
+ const text = silencePoke.formatFrameworkFallbackText(
5740
+ 'working',
5741
+ ctx.silenceMs,
5742
+ ctx.inFlightTools,
5743
+ blockedOnApproval,
5744
+ )
5745
+ try {
5746
+ await robustApiCall(
5747
+ () => bot.api.sendMessage(ctx.chatId, text, {
5748
+ ...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
5749
+ // The quiet beat: visible in-thread, no device buzz. (The 300s
5750
+ // fallback pings; the floor must not train the user to mute.)
5751
+ disable_notification: true,
5752
+ }),
5753
+ { chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
5754
+ )
5755
+ // Count it as production so the silence clock resets — the user just
5756
+ // saw a real message, so the 300s loud fallback is measured from here.
5757
+ silencePoke.noteProduction(ctx.key, Date.now())
5758
+ } catch (err) {
5759
+ process.stderr.write(
5760
+ `silence-poke mid-turn floor sendMessage failed chat=${ctx.chatId} thread=${ctx.threadId}: ${err}\n`,
5761
+ )
5762
+ }
5763
+ },
5265
5764
  onFrameworkFallback: async (ctx) => {
5266
5765
  // Late-fire short-circuit (2026-05-23 audit finding). The fallback
5267
5766
  // can race a clean turn-end: the model's actual reply lands inside
@@ -5281,6 +5780,14 @@ silencePoke.startTimer({
5281
5780
  `turn ended cleanly during silence window ` +
5282
5781
  `chat=${ctx.chatId} thread=${ctx.threadId ?? '-'} silence_ms=${ctx.silenceMs}\n`,
5283
5782
  )
5783
+ // #2527: structured skip event so the late-fire race is machine-readable.
5784
+ logStreamingEvent({
5785
+ kind: 'silence_poke_skip',
5786
+ chatId: ctx.chatId,
5787
+ threadId: ctx.threadId ?? undefined,
5788
+ silenceMs: ctx.silenceMs,
5789
+ skipReason: 'turn_ended_cleanly_during_window',
5790
+ })
5284
5791
  // Tell silence-poke this chat-thread is finished so the next
5285
5792
  // arming doesn't carry stale state.
5286
5793
  silencePoke.endTurn(ctx.key)
@@ -5294,6 +5801,15 @@ silencePoke.startTimer({
5294
5801
  // get_status snapshot → pure formatter. Any hostd unavailability
5295
5802
  // degrades silently to the existing generic text (zero regression).
5296
5803
  let text: string | null = null
5804
+ // Hoisted out of the generic-fallback branch below because the send site
5805
+ // gates `disable_notification` on it: when the turn is parked on an
5806
+ // approval card, the fallback TEXT is a user-gating re-ping ("waiting for
5807
+ // your approval — tap Approve or Deny …"), and that must stay LOUD so the
5808
+ // user knows the ball is in their court. The reaction controller tracks the
5809
+ // park via setAwaiting on the permission-request.
5810
+ const blockedOnApproval = activeStatusReactions
5811
+ .get(statusKey(ctx.chatId, ctx.threadId))
5812
+ ?.isAwaiting() ?? false
5297
5813
  const upd = inFlightUpdate
5298
5814
  if (upd != null) {
5299
5815
  try {
@@ -5315,9 +5831,6 @@ silencePoke.startTimer({
5315
5831
  // benign "wedge" class — claude is alive, waiting on the operator's
5316
5832
  // tap), say so instead of "still working…". The reaction controller
5317
5833
  // already tracks this (setAwaiting on the permission-request park).
5318
- const blockedOnApproval = activeStatusReactions
5319
- .get(statusKey(ctx.chatId, ctx.threadId))
5320
- ?.isAwaiting() ?? false
5321
5834
  text = silencePoke.formatFrameworkFallbackText(
5322
5835
  ctx.fallbackKind,
5323
5836
  ctx.silenceMs,
@@ -5325,12 +5838,26 @@ silencePoke.startTimer({
5325
5838
  blockedOnApproval,
5326
5839
  )
5327
5840
  }
5841
+ // #2527: log the actual poke fire with structured data before sending,
5842
+ // so the event is visible even if the send fails.
5843
+ logStreamingEvent({
5844
+ kind: 'silence_poke_fire',
5845
+ chatId: ctx.chatId,
5846
+ threadId: ctx.threadId ?? undefined,
5847
+ silenceMs: ctx.silenceMs,
5848
+ fallbackKind: ctx.fallbackKind,
5849
+ })
5328
5850
  try {
5329
5851
  await robustApiCall(
5330
5852
  () => bot.api.sendMessage(ctx.chatId, text, {
5331
5853
  ...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
5332
- // Framework fallback pings user genuinely needs to know.
5333
- disable_notification: false,
5854
+ // Conditional: the pure-liveness "still working…" notice is a status
5855
+ // surface and stays SILENT. But when the turn is parked on an
5856
+ // approval card, this same fallback carries a user-gating re-ping
5857
+ // ("waiting for your approval — tap Approve or Deny …") — that must
5858
+ // PING, because the user is the one being waited on. Gate on the same
5859
+ // `blockedOnApproval` signal that selects the re-ping text above.
5860
+ disable_notification: blockedOnApproval ? false : true,
5334
5861
  }),
5335
5862
  { chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
5336
5863
  )
@@ -5464,13 +5991,31 @@ silencePoke.startTimer({
5464
5991
  return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
5465
5992
  },
5466
5993
  )
5994
+ // PR-4e self-heal backstop: drop any per-topic `currentTurnByKey` entries
5995
+ // for fbChatId whose turn is stale-by-the-same-silence-gate the sibling
5996
+ // sweep above used — the same precedent as `purgeStaleTurnsForChat`'s
5997
+ // activeTurnStartedAt sweep, so a leaked map entry (a turn whose keyed
5998
+ // delete somehow never ran) can never outlive its chat. Gated identically:
5999
+ // the firing key is always stale; a sibling is stale iff it's silent ≥ the
6000
+ // fallback threshold (or has no silence state). No-op under the flag OFF
6001
+ // (the map is empty).
6002
+ currentTurnMap.purgeChatStale(fbChatId, (siblingKey) => {
6003
+ if (siblingKey === fbKey) return true
6004
+ const sib = silencePoke.silenceMsForKey(siblingKey, fbNow)
6005
+ return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
6006
+ })
5467
6007
  // Null `currentTurn` if it's still pointing at the wedged turn —
5468
6008
  // when claude eventually fires a late `turn_end` for this session
5469
6009
  // (or never does), the handler's `const turn = currentTurn` snapshot
5470
6010
  // returns null and the regular teardown short-circuits. Without
5471
6011
  // this, the late event would re-emit `turn_ended` AND clobber
5472
6012
  // whatever fresh turn the next inbound started.
5473
- if (turnMatchesFallback && currentTurn === wedgedTurn && wedgedTurn != null) {
6013
+ // PR-4e keyed liveness for the guard. Flag-OFF: `turnLiveForItsTopic`
6014
+ // reduces to `currentTurn === wedgedTurn` (singleton mirror), verbatim.
6015
+ // Flag-ON: `byKey.get(fbKey) === wedgedTurn`, so the keyed delete still
6016
+ // fires when the LIVE mirror has already flipped to another topic B (a bare
6017
+ // `currentTurn === wedgedTurn` would falsely skip and leak A's byKey entry).
6018
+ if (turnMatchesFallback && wedgedTurn != null && turnLiveForItsTopic(wedgedTurn)) {
5474
6019
  // Status-surface observability: emit the lifecycle CLEAR for the
5475
6020
  // silence-poke teardown so a fallback-nulled turn has a turn-lifecycle
5476
6021
  // line like every other clear path (the framework-fallback line below is
@@ -5478,7 +6023,12 @@ silencePoke.startTimer({
5478
6023
  process.stderr.write(
5479
6024
  `telegram gateway: ${formatTurnLifecycle('clear', 'silence_fallback', wedgedTurn, Date.now())}\n`,
5480
6025
  )
5481
- currentTurn = null
6026
+ // PR-4e — keyed delete for the wedged turn's OWN key (fbKey == the
6027
+ // statusKey this fallback fired for, == the wedgedTurn's key since
6028
+ // turnMatchesFallback gated chat+thread equality). Flag-OFF nulls the
6029
+ // singleton, verbatim; flag-ON deletes only this topic's entry and clears
6030
+ // the mirror iff it still points here — a live sibling topic is untouched.
6031
+ endCurrentTurnForKey(wedgedTurn, fbKey)
5482
6032
  }
5483
6033
  // Best-effort: clear any pending silent-end marker so the Stop hook
5484
6034
  // doesn't double-block when claude eventually exits the wedged turn.
@@ -5789,6 +6339,43 @@ function obligationSweep(): void {
5789
6339
  return
5790
6340
  }
5791
6341
  if (decision.action === 'represent') {
6342
+ // Fix #2472 — duplicate-represent guard. Before re-presenting AGAIN, check
6343
+ // whether the agent has ALREADY delivered a substantive outbound reply to
6344
+ // this chat SINCE the obligation was most recently re-presented. If so the
6345
+ // obligation is satisfied-but-misdetected (the reply landed but its routing
6346
+ // didn't resolve back to this origin, so the normal close path missed it) —
6347
+ // close silently and do NOT re-fire, which is what produced the near-identical
6348
+ // duplicate in #2472 (reply 10608 answered represent_count=1, yet
6349
+ // represent_count=2 fired anyway → duplicate 10609).
6350
+ //
6351
+ // The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
6352
+ // `openedAt`. This is load-bearing: the genuine "agent wrote a plain-text
6353
+ // answer and never called reply" case must still represent ONCE. On the first
6354
+ // represent `lastRepresentedAt` is undefined, so this guard is a no-op and the
6355
+ // single represent fires as before. Only the SECOND-and-later represent is
6356
+ // gated — exactly where a reply that landed between fires must suppress the
6357
+ // re-ask. Falls back to false (never suppresses) if history is unavailable.
6358
+ if (
6359
+ shouldSuppressRepresent(o, {
6360
+ historyEnabled: HISTORY_ENABLED,
6361
+ // Pass the represent-guard's OWN low threshold — a terse-but-real reply
6362
+ // must suppress the duplicate (#2472/#2474), unlike the escalate branch
6363
+ // below which keeps the 200-char default.
6364
+ hasOutboundDeliveredSince: (chatId, sinceMs, threadId) =>
6365
+ hasOutboundDeliveredSince(
6366
+ chatId,
6367
+ sinceMs,
6368
+ threadId,
6369
+ OBLIGATION_REPRESENT_GUARD_MIN_REPLY_CHARS,
6370
+ ),
6371
+ })
6372
+ ) {
6373
+ process.stderr.write(
6374
+ `telegram gateway: obligation closed silently — reply delivered since last represent (no re-fire) origin=${o.originTurnId}\n`,
6375
+ )
6376
+ obligationLedger.close(o.originTurnId)
6377
+ return
6378
+ }
5792
6379
  // Re-present goes through the bridge → buffer. Only the represent path is
5793
6380
  // gated on an empty buffer (let the existing drain run first, avoid
5794
6381
  // double-presenting). Escalation below is NOT gated on the buffer — it is a
@@ -5796,6 +6383,25 @@ function obligationSweep(): void {
5796
6383
  // behind a dead bridge can never block the operator nudge.
5797
6384
  if (pendingInboundBuffer.depth(agent) > 0) return
5798
6385
  pendingInboundBuffer.push(agent, buildObligationRepresentInbound(o, Date.now()))
6386
+ // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Arm the
6387
+ // card-OPEN gate for the synthetic turn this represent inbound will spawn:
6388
+ // carry the obligation's `openedAt` so that turn's first card-OPEN can ask
6389
+ // "was a substantive answer already delivered since the obligation was
6390
+ // raised?" and, if so, suppress the card (it would otherwise narrate beneath
6391
+ // the answer the user already received). Keyed on the obligation's
6392
+ // `originTurnId` — the SAME id the represent inbound carries
6393
+ // (`buildObligationRepresentInbound` reuses `o.messageId`/`o.chatId`/
6394
+ // `o.threadId`, so the enqueue-time `deriveTurnId` reconstructs exactly
6395
+ // `o.originTurnId`). Keying on the turn id (not chat/thread) means ONLY the
6396
+ // exact represent turn this gate was armed for can consume it; an unrelated
6397
+ // later foreground turn on the same chat/thread has a different originTurnId
6398
+ // → finds no entry → its card opens normally. This closes the residual
6399
+ // cross-contamination window where a never-enqueued represent's stale gate
6400
+ // could suppress an unrelated turn's card (the represent/duplicate-reply
6401
+ // family). This does NOT gate the represent SEND — the represent guard above
6402
+ // already owns suppressing an already-satisfied represent; this only governs
6403
+ // the decorative card.
6404
+ pendingCrossTurnGate.set(o.originTurnId, { sinceMs: o.openedAt })
5799
6405
  const attempt = obligationLedger.markRepresented(o.originTurnId)
5800
6406
  process.stderr.write(
5801
6407
  `telegram gateway: obligation re-presented origin=${o.originTurnId} attempt=${attempt}/${OBLIGATION_REPRESENT_MAX}\n`,
@@ -6196,7 +6802,10 @@ const ipcServer: IpcServer = createIpcServer({
6196
6802
  process.stderr.write(
6197
6803
  `telegram gateway: disconnect-flush nulled currentTurn (bridge died with turn in flight)\n`,
6198
6804
  )
6199
- currentTurn = null
6805
+ // PR-4e — the bridge DIED with a turn in flight: EVERY per-topic entry
6806
+ // is a ghost, not just the mirror's. Clear the whole map + mirror.
6807
+ // Flag-OFF: this nulls the singleton only (the map is empty), verbatim.
6808
+ clearAllCurrentTurns()
6200
6809
  }
6201
6810
  },
6202
6811
  log: (msg) => process.stderr.write(`${msg}\n`),
@@ -6722,10 +7331,10 @@ const ipcServer: IpcServer = createIpcServer({
6722
7331
  ...(cfgTopic != null ? { threadId: cfgTopic } : {}),
6723
7332
  }
6724
7333
  },
6725
- buildKeyboard: (requestId) =>
7334
+ buildKeyboard: (requestId, epoch) =>
6726
7335
  new InlineKeyboard()
6727
- .text('✅ Approve', `cfg:${requestId}:approve`)
6728
- .text('🚫 Deny', `cfg:${requestId}:deny`),
7336
+ .text('✅ Approve', `cfg:${requestId}:${epoch}:approve`)
7337
+ .text('🚫 Deny', `cfg:${requestId}:${epoch}:deny`),
6729
7338
  postCard: async (args) => {
6730
7339
  try {
6731
7340
  const sent = await robustApiCall(
@@ -6757,6 +7366,9 @@ const ipcServer: IpcServer = createIpcServer({
6757
7366
  () =>
6758
7367
  bot.api.editMessageText(args.chatId, args.messageId, args.text, {
6759
7368
  parse_mode: 'HTML',
7369
+ // Strip the inline keyboard on a terminal/interim edit so the
7370
+ // [Approve]/[Deny] buttons stop being tappable on a resolved card.
7371
+ ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
6760
7372
  }),
6761
7373
  { chat_id: String(args.chatId), verb: 'config-approval-edit' },
6762
7374
  )
@@ -6826,6 +7438,8 @@ const ipcServer: IpcServer = createIpcServer({
6826
7438
  () =>
6827
7439
  bot.api.editMessageText(args.chatId, args.messageId, args.text, {
6828
7440
  parse_mode: 'HTML',
7441
+ // Finalize is terminal — drop the keyboard so buttons are gone.
7442
+ ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
6829
7443
  }),
6830
7444
  { chat_id: String(args.chatId), verb: 'config-approval-finalize' },
6831
7445
  )
@@ -7377,6 +7991,23 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7377
7991
  }
7378
7992
  }
7379
7993
  process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}\n`)
7994
+ // #2527: emit time_to_first_text_reply_ms on the FIRST text reply of each
7995
+ // turn so operators can see how long users waited for any visible output.
7996
+ // Only fires once per turn (firstTextReplyLogged guards the repeat).
7997
+ if (turn != null) {
7998
+ const threadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
7999
+ const replyKey = statusKey(chat_id, threadId)
8000
+ if (!firstTextReplyLogged.has(replyKey)) {
8001
+ firstTextReplyLogged.add(replyKey)
8002
+ logStreamingEvent({
8003
+ kind: 'turn_reply_timing',
8004
+ chatId: chat_id,
8005
+ threadId,
8006
+ turnId: turn.turnId,
8007
+ timeToFirstTextReplyMs: Date.now() - turn.gatewayReceiveAt,
8008
+ })
8009
+ }
8010
+ }
7380
8011
 
7381
8012
  // #546 dedup check: was this content just sent via turn-flush or
7382
8013
  // a sibling reply path? Skip the actual send and return a
@@ -7411,6 +8042,15 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7411
8042
  // existing call-sites and the typical "final answer" reply keep their
7412
8043
  // current behaviour without an explicit flag.
7413
8044
  let disableNotification = args.disable_notification === true
8045
+ // #2527/#1664 — the over-ping safety net below may downgrade
8046
+ // `disableNotification` ping→silent for ANTI-SPAM (one ping per turn). That
8047
+ // delivery-channel decision must NOT pollute final-answer CLASSIFICATION: a
8048
+ // final answer the model intended to ping is STILL the final answer even when
8049
+ // the framework silences the actual ping. Classify on the model's original
8050
+ // intent (what executeStreamReply already does), so an over-ping-silenced
8051
+ // final answer sets finalAnswerDelivered=true — fixing both a spurious
8052
+ // silent-end re-prompt and a false 'undelivered' (😐) terminal reaction.
8053
+ const modelDisableNotification = args.disable_notification === true
7414
8054
 
7415
8055
  // #1675 over-ping safety net. The conversational-pacing contract
7416
8056
  // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
@@ -7441,32 +8081,92 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7441
8081
  const turn = currentTurn
7442
8082
  if (turn != null) {
7443
8083
  const now = Date.now()
7444
- const decision = decideOverPing({
7445
- modelRequestedPing: !disableNotification,
7446
- firstPingAt: turn.firstPingAt,
7447
- nowMs: now,
8084
+ // Notification ownership (R8 / PR-2): on the `reply` path,
8085
+ // substantiveness is purely the ≥200-char (or `done`) backstop —
8086
+ // `isSubstantiveFinalReply` is `done === true || text.length >= 200`
8087
+ // and ignores the notification flag entirely. `reply` carries no
8088
+ // `done`, so it reduces to the ≥200-char length test. We still pass
8089
+ // `modelDisableNotification` (the MODEL's original intent, not the
8090
+ // possibly-downgraded `disableNotification`) to mirror the #2533
8091
+ // final-answer decoupling call shape, but that arg does NOT
8092
+ // participate in classification here — it is inert on this path.
8093
+ const replySubstantive = isSubstantiveFinalReply({
8094
+ text: rawText,
8095
+ disableNotification: modelDisableNotification,
7448
8096
  })
7449
- if (decision.suppress) {
7450
- process.stderr.write(
7451
- `telegram gateway: reply over-ping safety net ` +
7452
- `downgrading disable_notification:false true ` +
7453
- `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
7454
- `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
7455
- )
7456
- // Observability: surface to the unified runtime-metrics
7457
- // fan-out so the cadence dashboard can track fleet-wide
7458
- // over-ping rate (leading indicator of model pacing drift).
7459
- emitRuntimeMetric({
7460
- kind: 'over_ping_suppressed',
7461
- key: statusKey(chat_id, args.message_thread_id != null
7462
- ? Number(args.message_thread_id) : undefined),
7463
- sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
7464
- })
7465
- disableNotification = true
7466
- wasOverPingSuppressed = true
7467
- } else if (decision.claimSlot) {
7468
- turn.firstPingAt = now
8097
+ // PR-4c: the over-ping DECISION relocates into the emission-authority
8098
+ // façade, behind the kill-switch (default OFF), the same structural way
8099
+ // PR-4b moved the OPEN gate. `decideOverPing` is already pure, so PR-4c
8100
+ // extracts NOTHING new it relocates the *call* into the façade's enabled
8101
+ // branch and keeps the *effects* (stderr, metric, the atomic
8102
+ // `firstPingAt`/`firstPingWasSubstantive` pair-set, the
8103
+ // `disableNotification`/`wasOverPingSuppressed` outer-scope writes) HERE,
8104
+ // parameterized by the decision the façade hands back via `applyDecision`.
8105
+ //
8106
+ // - Disabled branch runs `disabledOverPing()` its own LITERAL
8107
+ // `decideOverPing(...)` call + the full effects block, VERBATIM from
8108
+ // PR-4b-base (the disabled-path-is-byte-identical proof).
8109
+ // - Enabled branch: the façade computes the decision and hands it to
8110
+ // `applyOverPingDecision(decision)`, which performs the IDENTICAL
8111
+ // effects. Same pure inputs ⇒ same decision flag-ON ≡ flag-OFF ≡ base.
8112
+ //
8113
+ // The effects block is shared between both thunks by closing over `decision`
8114
+ // but the disabled thunk computes it via its OWN literal `decideOverPing(`
8115
+ // first, so the disabled path never depends on the façade for the decision.
8116
+ const applyOverPingDecision = (decision: OverPingDecision): void => {
8117
+ if (decision.suppress) {
8118
+ process.stderr.write(
8119
+ `telegram gateway: reply over-ping safety net — ` +
8120
+ `downgrading disable_notification:false → true ` +
8121
+ `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
8122
+ `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
8123
+ )
8124
+ // Observability: surface to the unified runtime-metrics
8125
+ // fan-out so the cadence dashboard can track fleet-wide
8126
+ // over-ping rate (leading indicator of model pacing drift).
8127
+ emitRuntimeMetric({
8128
+ kind: 'over_ping_suppressed',
8129
+ key: statusKey(chat_id, args.message_thread_id != null
8130
+ ? Number(args.message_thread_id) : undefined),
8131
+ sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
8132
+ })
8133
+ disableNotification = true
8134
+ wasOverPingSuppressed = true
8135
+ } else if (decision.claimSlot) {
8136
+ // Claim (first ping) OR upgrade (substantive answer pinging over an
8137
+ // ack's slot). Set firstPingAt AND firstPingWasSubstantive ATOMICALLY
8138
+ // (no await between) so a racing second reply reads a consistent pair.
8139
+ turn.firstPingAt = now
8140
+ turn.firstPingWasSubstantive = replySubstantive
8141
+ if (decision.upgrade) {
8142
+ process.stderr.write(
8143
+ `telegram gateway: reply over-ping safety net — ` +
8144
+ `UPGRADE: substantive answer pings over an ack's slot ` +
8145
+ `(chat=${chat_id} thread=${args.message_thread_id ?? '-'})\n`,
8146
+ )
8147
+ }
8148
+ }
7469
8149
  }
8150
+ emissionAuthorityFor(turn).claimOrDowngradePing(
8151
+ { modelRequestedPing: !disableNotification, substantive: replySubstantive },
8152
+ {
8153
+ firstPingAt: turn.firstPingAt,
8154
+ firstPingWasSubstantive: turn.firstPingWasSubstantive,
8155
+ nowMs: now,
8156
+ },
8157
+ applyOverPingDecision,
8158
+ () => {
8159
+ // Disabled-path: literal `decideOverPing(` + effects, VERBATIM base.
8160
+ const decision = decideOverPing({
8161
+ modelRequestedPing: !disableNotification,
8162
+ firstPingAt: turn.firstPingAt,
8163
+ substantive: replySubstantive,
8164
+ firstPingWasSubstantive: turn.firstPingWasSubstantive,
8165
+ nowMs: now,
8166
+ })
8167
+ applyOverPingDecision(decision)
8168
+ },
8169
+ )
7470
8170
  }
7471
8171
  }
7472
8172
 
@@ -7653,10 +8353,38 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7653
8353
  // clear; the main turn-end path also re-writes the state when
7654
8354
  // finalAnswerDelivered=false, so this is a belt-and-braces gate
7655
8355
  // for the turn_end-missing case (#1741).
7656
- if (isFinalAnswerReply({ text: rawText, disableNotification })) {
8356
+ if (isFinalAnswerReply({ text: rawText, disableNotification: modelDisableNotification })) {
7657
8357
  clearSilentEndState(statusKey(chat_id, threadId))
7658
8358
  }
7659
8359
 
8360
+ // Lever 2 (design §9 lever 2): finalize the activity card BEFORE the reply
8361
+ // chunks send, so the card keeps its (lower) message_id and the reply is
8362
+ // structurally last on screen. ONLY for a *substantive* final — for an ack
8363
+ // (non-substantive) do NOTHING: finalizing an ack early would
8364
+ // close → reopen → emit MORE messages (the #2141 ack-then-work feed, R3).
8365
+ // `clearActivitySummary` edits the existing card in place (no new send) and
8366
+ // nulls `activityMessageId`; combined with the sticky latch set here it
8367
+ // prevents any post-reply re-OPEN below the answer. Idempotent with the
8368
+ // tool_use-event clear at the first-reply handoff (the existing backstop).
8369
+ {
8370
+ const finalizeTurn = currentTurn
8371
+ if (
8372
+ finalizeTurn != null
8373
+ && isSubstantiveFinalReply({ text: rawText, disableNotification: modelDisableNotification })
8374
+ ) {
8375
+ // PR-4a: routed through the emission-authority façade (no-op delegates —
8376
+ // the latch-set and the finalize run exactly as before).
8377
+ const ea = emissionAuthorityFor(finalizeTurn)
8378
+ ea.markSubstantiveFinalDelivered(() => {
8379
+ finalizeTurn.finalAnswerEverDelivered = true
8380
+ finalizeTurn.finalAnswerDeliveredAt = Date.now()
8381
+ })
8382
+ ea.finalizeCard(() => {
8383
+ clearActivitySummary(finalizeTurn)
8384
+ })
8385
+ }
8386
+ }
8387
+
7660
8388
  if (previewMessageId != null && reply_to != null && replyMode !== 'off') {
7661
8389
  await deleteStalePreview(previewMessageId)
7662
8390
  previewMessageId = null
@@ -7764,7 +8492,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7764
8492
  turn != null
7765
8493
  && isFinalAnswerReply({
7766
8494
  text: decision.mergedText,
7767
- disableNotification,
8495
+ disableNotification: modelDisableNotification,
7768
8496
  })
7769
8497
  ) {
7770
8498
  turn.finalAnswerDelivered = true
@@ -7772,8 +8500,12 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7772
8500
  // answer must NOT re-open the feed on post-answer housekeeping.
7773
8501
  turn.finalAnswerSubstantive = isSubstantiveFinalReply({
7774
8502
  text: decision.mergedText,
7775
- disableNotification,
8503
+ disableNotification: modelDisableNotification,
7776
8504
  })
8505
+ // Sticky ordering latch (lever 1): a substantive final closes the
8506
+ // card OPEN gate for the rest of the turn. NEVER cleared by reopen.
8507
+ if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
8508
+ if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
7777
8509
  if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn, replyRoutedOriginTurn)
7778
8510
  }
7779
8511
  outboundDedup.record(
@@ -8112,12 +8844,17 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
8112
8844
  //
8113
8845
  // #1664 — `turn.finalAnswerDelivered = true` keeps the silent-
8114
8846
  // end re-prompt from spuriously firing on a delivered final.
8115
- if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
8847
+ if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification: modelDisableNotification })) {
8116
8848
  turn.finalAnswerDelivered = true
8117
8849
  // Feed-reopen refinement: track whether this final was substantive
8118
8850
  // (≥200 chars or stream-done — not a short pinging ack) so post-answer
8119
8851
  // housekeeping tool work does NOT re-open the feed / trip silent-end.
8120
- turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification })
8852
+ turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification: modelDisableNotification })
8853
+ // Sticky ordering latch (lever 1): set once a SUBSTANTIVE final lands;
8854
+ // never cleared by reopen. The card OPEN gate keys on this, not the
8855
+ // mutable finalAnswerDelivered above (which reopen toggles).
8856
+ if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
8857
+ if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
8121
8858
  // #1728: release the buffer gate + emit terminal 👍. Mid-turn
8122
8859
  // acks bypass this branch and remain non-events for the
8123
8860
  // reaction (preserves #1713). The full turn-state teardown
@@ -8278,9 +9015,8 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8278
9015
  }
8279
9016
 
8280
9017
  const access = loadAccess()
8281
- // Detect chat type for draft-transport selection.
9018
+ // Detect chat type for throttle-default selection.
8282
9019
  // Private (DM) chats have positive numeric IDs; groups/channels are negative.
8283
- // Forum topics have a message_thread_id set — sendMessageDraft is unsupported there.
8284
9020
  const streamChatId = args.chat_id as string
8285
9021
  const streamIsPrivate = isDmChatId(streamChatId)
8286
9022
  const streamIsForumTopic = args.message_thread_id != null && args.message_thread_id !== ''
@@ -8322,6 +9058,19 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8322
9058
  // PR3b-cutover: feed lastOutboundAt to the delivery machine (see
8323
9059
  // executeReply) so its TTL tick suppresses an active-turn fallback.
8324
9060
  shadowEmit({ kind: 'modelOutbound', key: sKey as _ChatKey, at: Date.now() })
9061
+ // #2527: emit turn_reply_timing on the first stream_reply of the turn,
9062
+ // mirroring the same gate in executeReply. Guards with firstTextReplyLogged
9063
+ // so a turn that calls reply first and stream_reply second doesn't double-emit.
9064
+ if (turn != null && !firstTextReplyLogged.has(sKey)) {
9065
+ firstTextReplyLogged.add(sKey)
9066
+ logStreamingEvent({
9067
+ kind: 'turn_reply_timing',
9068
+ chatId: streamChatId,
9069
+ threadId: streamThreadId,
9070
+ turnId: turn.turnId,
9071
+ timeToFirstTextReplyMs: Date.now() - turn.gatewayReceiveAt,
9072
+ })
9073
+ }
8325
9074
  // #1741 — see executeReply for the rationale: only a plausibly-
8326
9075
  // final stream_reply clears the silent-end state. An interim
8327
9076
  // ack via stream_reply must NOT clear; the Stop hook needs
@@ -8338,6 +9087,33 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8338
9087
  }
8339
9088
  }
8340
9089
 
9090
+ // Lever 2 (design §9 lever 2): finalize the activity card BEFORE the stream
9091
+ // send so the card keeps its lower message_id and the reply is structurally
9092
+ // last. ONLY for a *substantive* final (a stream_reply done=true or ≥200
9093
+ // chars) — for a short pinging interim chunk do NOTHING (finalizing an ack
9094
+ // early would close → reopen → emit more, the #2141 ack-then-work feed, R3).
9095
+ // `clearActivitySummary` edits in place + nulls activityMessageId; the sticky
9096
+ // latch set here blocks any post-reply re-OPEN below the answer.
9097
+ if (
9098
+ turn != null
9099
+ && isSubstantiveFinalReply({
9100
+ text: (args.text as string | undefined) ?? '',
9101
+ disableNotification: args.disable_notification === true,
9102
+ done: args.done === true,
9103
+ })
9104
+ ) {
9105
+ // PR-4a: routed through the emission-authority façade (no-op delegates —
9106
+ // the latch-set and the finalize run exactly as before).
9107
+ const ea = emissionAuthorityFor(turn)
9108
+ ea.markSubstantiveFinalDelivered(() => {
9109
+ turn.finalAnswerEverDelivered = true
9110
+ if (turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
9111
+ })
9112
+ ea.finalizeCard(() => {
9113
+ clearActivitySummary(turn)
9114
+ })
9115
+ }
9116
+
8341
9117
  const result = await handleStreamReply(
8342
9118
  {
8343
9119
  chat_id: streamChatId,
@@ -8370,7 +9146,6 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8370
9146
  logStreamingEvent,
8371
9147
  isPrivateChat: streamIsPrivate,
8372
9148
  isForumTopic: streamIsForumTopic,
8373
- ...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
8374
9149
  // Issue #310: deliver the outbound count bump BEFORE forceCompleteTurn
8375
9150
  // so the terminal render sees outboundDeliveredCount > 0. The handler
8376
9151
  // calls this dep in that order internally.
@@ -8390,12 +9165,10 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8390
9165
  recordOutbound,
8391
9166
  ...(HISTORY_ENABLED ? { getLatestInboundMessageId } : {}),
8392
9167
  writeError: (line) => process.stderr.write(line),
8393
- // PR B: drop the legacy 600 ms compromise. When the operator sets
8394
- // `channels.telegram.stream_throttle_ms` in yaml, the env override
8395
- // wins; otherwise draft-stream's transport-aware default fires
8396
- // (300 ms draft / 1000 ms message). `throttleMs: undefined` is a
8397
- // signal — handlers downgrade to `?? undefined`, which then
8398
- // passes through to draft-stream where the default applies.
9168
+ // When the operator sets `channels.telegram.stream_throttle_ms` in yaml,
9169
+ // the env override wins; otherwise draft-stream's DM/group defaults apply
9170
+ // (400 ms for DMs, 1000 ms for groups). `throttleMs: undefined` passes
9171
+ // through to draft-stream where the per-chat-type default applies.
8399
9172
  ...(STREAM_THROTTLE_MS_OVERRIDE != null ? { throttleMs: STREAM_THROTTLE_MS_OVERRIDE } : {}),
8400
9173
  progressCardActive: streamMode === 'checklist',
8401
9174
  },
@@ -8495,6 +9268,10 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
8495
9268
  disableNotification: args.disable_notification === true,
8496
9269
  done: args.done === true,
8497
9270
  })
9271
+ // Sticky ordering latch (lever 1): set once a SUBSTANTIVE final lands;
9272
+ // never cleared by reopen. The card OPEN gate keys on this sticky latch.
9273
+ if (turn.finalAnswerSubstantive) turn.finalAnswerEverDelivered = true
9274
+ if (turn.finalAnswerSubstantive && turn.finalAnswerDeliveredAt == null) turn.finalAnswerDeliveredAt = Date.now()
8498
9275
  if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn, streamRoutedOriginTurn)
8499
9276
  // #1744 follow-up — stream_reply edge case. The first-emit gate at
8500
9277
  // L5178 only clears silent-end state on the FIRST emit of a stream.
@@ -9832,6 +10609,46 @@ function resetOrphanedReplyTimeout(): void {
9832
10609
  replyCalled: t.replyCalled,
9833
10610
  progressCardActive: progressDriver != null,
9834
10611
  })) {
10612
+ // Feed-survival guard: re-arm the fuse while the turn is
10613
+ // legitimately working — an in-flight tool, a detached background
10614
+ // process (Bash run_in_background), or a human-wait tool (ask_user).
10615
+ // This extends the original "isMidToolCall" guard to cover the
10616
+ // detached-work cases that empty inFlight prematurely.
10617
+ //
10618
+ // Cap logic:
10619
+ // • Foreground tools / detached background work: bound by
10620
+ // ORPHANED_REPLY_MAX_REARMS (20 × 30 s = 10 min). A genuinely
10621
+ // hung tool still surfaces after the cap.
10622
+ // • Human-wait tools (ask_user): NEVER forcibly backstop while
10623
+ // ask_user is open for this chat — the human simply hasn't
10624
+ // tapped yet. We keep re-arming unconditionally until the prompt
10625
+ // resolves (TTL or tap) and inFlight empties.
10626
+ const turnKey = statusKey(t.sessionChatId, t.sessionThreadId)
10627
+ const working = isLegitimatelyWorking(turnKey)
10628
+ const humanWaiting = (() => {
10629
+ for (const entry of pendingAskUser.values()) {
10630
+ if (entry.chatId === t.sessionChatId) return true
10631
+ }
10632
+ return false
10633
+ })()
10634
+ if (working || humanWaiting) {
10635
+ const underCap = t.orphanedReplyRearmCount < ORPHANED_REPLY_MAX_REARMS
10636
+ if (humanWaiting || underCap) {
10637
+ t.orphanedReplyRearmCount++
10638
+ process.stderr.write(
10639
+ `telegram gateway: orphaned-reply fuse expired — re-arming` +
10640
+ ` (rearm ${t.orphanedReplyRearmCount}/${ORPHANED_REPLY_MAX_REARMS},` +
10641
+ ` in_flight=${toolFlightTracker.inFlightCount()},` +
10642
+ ` human_wait=${humanWaiting},` +
10643
+ ` bg_work=${pendingProgress.hasPendingAsyncDispatch(turnKey)})\n`,
10644
+ )
10645
+ resetOrphanedReplyTimeout()
10646
+ return
10647
+ }
10648
+ process.stderr.write(
10649
+ `telegram gateway: orphaned-reply rearm cap reached (${ORPHANED_REPLY_MAX_REARMS}) — forcing backstop despite working state\n`,
10650
+ )
10651
+ }
9835
10652
  process.stderr.write(
9836
10653
  `telegram gateway: orphaned-reply timeout (${ORPHANED_REPLY_TIMEOUT_MS}ms) — forcing backstop\n`,
9837
10654
  )
@@ -9876,13 +10693,112 @@ const FOREGROUND_SUBAGENT_ACCUM_MAX = 12
9876
10693
  * foreground sub-agents (rare — parallel Task dispatch) flatten in insertion
9877
10694
  * order; the single-sub-agent common case nests precisely under its
9878
10695
  * Delegating line.
10696
+ *
10697
+ * The header (elapsed + tool count) is now threaded into the render so the
10698
+ * main-session card matches the worker card's two-line header style. This
10699
+ * fixes the missing header regression where the worker card showed elapsed/
10700
+ * tool-count metadata but the main-session card rendered step-lines only.
9879
10701
  */
9880
10702
  function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''): string | null {
9881
10703
  const childLines: string[] = []
9882
10704
  for (const narrative of turn.foregroundSubAgents.values()) {
9883
10705
  childLines.push(...narrative)
9884
10706
  }
9885
- return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix)
10707
+ // Pass labeledToolCount as stepCount only on the terminal (final) render so
10708
+ // the persisted feed record shows a `✓ N steps` total. The live in-progress
10709
+ // feed omits it (stepCount undefined) to stay clean and minimal.
10710
+ const stepCount = final ? turn.labeledToolCount : undefined
10711
+ // Build the session header so the main-session card renders the same two-line
10712
+ // elapsed/tool-count header as the worker card.
10713
+ const header: SessionActivityHeader = {
10714
+ label: 'Agent',
10715
+ elapsedMs: turn.startedAt > 0 ? Date.now() - turn.startedAt : 0,
10716
+ toolCount: turn.labeledToolCount,
10717
+ state: final ? 'done' : 'running',
10718
+ }
10719
+ return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix, stepCount, header)
10720
+ }
10721
+
10722
+ /**
10723
+ * Render a SHOWN narrative text block as a transient liveness step — the
10724
+ * same path a tool label takes (appendActivityLabel → renderStepFeed), so
10725
+ * the narrative line is rolling-window-clipped and replaced by the next
10726
+ * event exactly like a tool step. NOT a new message, NOT persisted as a
10727
+ * parallel mirror (invariant `chat-is-the-single-source-of-truth`,
10728
+ * reference/invariants.md). Clipped to a single 120-char line via
10729
+ * clipNarrative so it reads as a step, not a paragraph.
10730
+ */
10731
+ function showNarrativeStep(turn: CurrentTurn, text: string): void {
10732
+ const rendered = appendActivityLabel(turn.mirrorLines, clipNarrative(text))
10733
+ if (rendered == null) return
10734
+ turn.activityPendingRender = composeTurnActivity(turn) ?? rendered
10735
+ const ea = emissionAuthorityFor(turn)
10736
+ // PR-4d: route the deliver-before-drain decision through the centralized
10737
+ // card-drain gate (chatLock-serialized under the flag; verbatim block OFF).
10738
+ cardDrainGate(turn, ea, () => {
10739
+ if (ea.mayDrain(turn)) {
10740
+ // Producer A (narrative SHOW): may only EDIT an already-open card, never
10741
+ // OPEN one on a 0-tool turn (design §9 lever 5 base case — the
10742
+ // triplication). The OPEN gate in the drain enforces this; accumulation
10743
+ // into mirrorLines still happens so the narration renders once a tool
10744
+ // label or liveness opens the card.
10745
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
10746
+ ea.openOrEditCard('narrative', () => {
10747
+ turn.activityInFlight = drainActivitySummary(turn, 'narrative')
10748
+ })
10749
+ }
10750
+ })
10751
+ }
10752
+
10753
+ /**
10754
+ * Narrative-dedup gate, step 2 (reducer-side): a tool_use just arrived while
10755
+ * a narrative block was pending. Decide SHOW vs SUPPRESS and clear the
10756
+ * pending slot. SUPPRESS only when the tool is reply/stream_reply AND the
10757
+ * pending text is a draft-then-send of that reply's `input.text`. Everything
10758
+ * else (a working tool, or a reply whose text differs — post-action
10759
+ * narration) is SHOWN. See narrative-dedup.ts §2b.
10760
+ */
10761
+ function resolvePendingNarrativeOnTool(
10762
+ turn: CurrentTurn,
10763
+ toolName: string,
10764
+ input: Record<string, unknown> | undefined,
10765
+ ): void {
10766
+ const pending = turn.pendingNarrative
10767
+ if (pending == null) return
10768
+ turn.pendingNarrative = null
10769
+ if (REPLY_TOOLS.has(toolName)) {
10770
+ const replyText = typeof input?.text === 'string' ? (input.text as string) : ''
10771
+ if (isDraftOfReply(pending.text, replyText)) return // draft of the answer → SUPPRESS
10772
+ }
10773
+ showNarrativeStep(turn, pending.text) // working preamble / post-action narration → SHOW
10774
+ }
10775
+
10776
+ /**
10777
+ * Narrative-dedup gate, step 1 (reducer-side): a new narrative block
10778
+ * arrived. A previously-pending block had nothing reply-shaped immediately
10779
+ * after it (pure narration) → flush it as SHOWN, then stage the new one for
10780
+ * one lookahead step. See narrative-dedup.ts §2b.
10781
+ */
10782
+ function stagePendingNarrative(turn: CurrentTurn, text: string): void {
10783
+ if (turn.pendingNarrative != null) {
10784
+ showNarrativeStep(turn, turn.pendingNarrative.text)
10785
+ }
10786
+ turn.pendingNarrative = { text }
10787
+ }
10788
+
10789
+ /**
10790
+ * Narrative-dedup gate, step 3 (reducer-side): the turn is ending with a
10791
+ * trailing narrative block and nothing after it. SUPPRESS only when the turn
10792
+ * already delivered its answer via reply/stream_reply and the trailing text
10793
+ * is a draft of that answer; otherwise SHOW (genuine trailing narration like
10794
+ * "Done — all green."). See narrative-dedup.ts §2b.
10795
+ */
10796
+ function flushPendingNarrativeAtTurnEnd(turn: CurrentTurn, lastReplyText: string): void {
10797
+ const pending = turn.pendingNarrative
10798
+ if (pending == null) return
10799
+ turn.pendingNarrative = null
10800
+ if (lastReplyText.length > 0 && isDraftOfReply(pending.text, lastReplyText)) return // trailing duplicate of the answer
10801
+ showNarrativeStep(turn, pending.text)
9886
10802
  }
9887
10803
 
9888
10804
  /**
@@ -9902,11 +10818,69 @@ function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''):
9902
10818
  * doesn't corrupt the next turn's atom — late writes land on the
9903
10819
  * captured `turn` (already-completed turn, harmless).
9904
10820
  */
9905
- async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
10821
+ async function drainActivitySummary(
10822
+ turn: CurrentTurn,
10823
+ // Which producer triggered this drain (design §9 levers 1 + 5). Gates the
10824
+ // OPEN (first sendMessage) branch via `mayOpenActivityCard`; EDITs of an
10825
+ // already-open card are never gated. Defaults to 'tool' — the historically
10826
+ // unconditional OPEN behaviour — so any caller that does not opt into the
10827
+ // gate is unaffected. Narrative-SHOW and liveness callers pass their producer
10828
+ // explicitly.
10829
+ producer: FeedOpenProducer = 'tool',
10830
+ // Optional flags forwarded to `mayOpenActivityCard`.
10831
+ openFlags?: { postAnswerSubagentActivity?: boolean },
10832
+ ): Promise<void> {
9906
10833
  try {
9907
10834
  while (turn.activityPendingRender !== turn.activityLastSentRender) {
9908
10835
  const target = turn.activityPendingRender
9909
10836
  if (target == null) break
10837
+ // OPEN gate (design §9 levers 1 + 5): when this drain would OPEN a fresh
10838
+ // card (activityMessageId == null), consult the pure gate. Refusing an
10839
+ // OPEN must NOT advance activityLastSentRender — the accumulated render
10840
+ // stays pending so a later OPEN-eligible producer (a tool label, or
10841
+ // liveness) renders it. An EDIT (activityMessageId != null) is never
10842
+ // gated. Enforced HERE so it covers BOTH the inline producers AND the
10843
+ // detached heartbeat setInterval drain (R7/concurrency). The gate guards
10844
+ // gate EVALUATION, not an in-flight send: it is not a hard mutex — a send
10845
+ // already PAST this check and suspended at its `await robustApiCall(
10846
+ // sendMessage)` when a substantive final lands still completes and opens a
10847
+ // card; that residual is reconciled by lever-2's `clearActivitySummary`
10848
+ // chaining its finalize onto `turn.activityInFlight` (the suspended drain)
10849
+ // and editing the card in place, not by this gate blocking it.
10850
+ // Lever 4 (cross-turn / race C/D): a synthetic represent/owed-reply turn
10851
+ // (and the liveness/heartbeat timer firing on it) starts with a CLEARED
10852
+ // per-turn `finalAnswerEverDelivered` latch even when a substantive answer
10853
+ // already reached the user in an EARLIER turn — so without this its first
10854
+ // drain opens a card BELOW that prior reply. Only such a turn carries
10855
+ // `crossTurnGate`; reuse the represent guard's delivered-since check
10856
+ // (`hasOutboundDeliveredSince`) with the obligation's `openedAt` cutoff and
10857
+ // the SUBSTANTIVE 200-char threshold (so an ack never trips it → #2141
10858
+ // stays green). Computed ONLY when about to OPEN (activityMessageId ==
10859
+ // null) AND only for a turn with a cross-turn gate — no history query on
10860
+ // the common foreground path. Scoped to the synthetic surface by the
10861
+ // presence of `crossTurnGate`, so it can never fire on a foreground turn.
10862
+ // PR-4b: the cross-turn predicate is now the PURE, shared helper extracted
10863
+ // into feed-open-gate.ts (body lifted verbatim) — the SAME function the
10864
+ // emission-authority façade calls in its enabled branch, so flag-ON and
10865
+ // flag-OFF compute an identical verdict. History deps injected (the module
10866
+ // stays sqlite-free). The pure-gate consult + the `break` below stay
10867
+ // LITERALLY in the drain (disabled-path byte-identity).
10868
+ const crossTurnAnswerDelivered = computeCrossTurnAnswerDelivered(
10869
+ turn,
10870
+ feedOpenGateDeps(),
10871
+ )
10872
+ if (
10873
+ turn.activityMessageId == null
10874
+ && !mayOpenActivityCard({
10875
+ producer,
10876
+ finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
10877
+ labeledToolCount: turn.labeledToolCount,
10878
+ crossTurnAnswerDelivered,
10879
+ postAnswerSubagentActivity: openFlags?.postAnswerSubagentActivity,
10880
+ })
10881
+ ) {
10882
+ break
10883
+ }
9910
10884
  // `renderActivityFeed` already emitted ready Telegram HTML with per-line
9911
10885
  // markup (<b>→ current</b> / <i>✓ done</i>) and escaped each label's
9912
10886
  // <,>,& itself (#1942 class) — send verbatim, do NOT re-escape or
@@ -9980,17 +10954,125 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
9980
10954
  function feedHeartbeatTick(): void {
9981
10955
  const turn = currentTurn
9982
10956
  if (turn == null) return
10957
+ if (turn.finalAnswerDelivered) {
10958
+ // Fix 2: post-answer background-agent liveness. When the sub-agent/workflow
10959
+ // watcher has surfaced a new step AFTER the substantive final answer, drive
10960
+ // a liveness card so the operator can see "background agent still working".
10961
+ //
10962
+ // Gate: `turn.subagentActivityAt` must be set (watcher fired) AND it must
10963
+ // exceed `turn.finalAnswerDeliveredAt` (the watcher advanced AFTER the answer
10964
+ // was delivered — not just any pre-answer label). This is the key fix:
10965
+ // #2587 read `lastToolLabelAt`, which is frozen by the drop-guard after a
10966
+ // substantive answer and therefore never crosses the threshold. `subagentActivityAt`
10967
+ // is written by the watcher's onProgress callback INDEPENDENTLY of the
10968
+ // tool_label / drop-guard path, so it correctly advances post-answer.
10969
+ //
10970
+ // Idle-gap suppression + staleness cap (concern 3) — the single pure decision
10971
+ // `evaluatePostAnswerLiveness`:
10972
+ // - 'idle' → no watcher activity after the answer (`subagentActivityAt`
10973
+ // undefined or ≤ finalAnswerDeliveredAt). Stay silent; the
10974
+ // reply-is-last invariant is fully preserved for idle turns.
10975
+ // - 'stale' → the worker's last advance is older than POST_ANSWER_LIVENESS_STALE_MS
10976
+ // (its `onFinish` froze `subagentActivityAt` and no new step has
10977
+ // arrived). STOP re-rendering so the card doesn't climb `running`
10978
+ // forever — mirrors the pre-answer FEED_LIVENESS_OPEN_MS cap. The
10979
+ // worker's own terminal card (workerActivityFeed.finish) is the
10980
+ // durable record once it completes.
10981
+ // - 'emit' → genuine in-flight post-answer activity; render the card below.
10982
+ const subagentAt = turn.subagentActivityAt
10983
+ const livenessVerdict = evaluatePostAnswerLiveness({
10984
+ subagentActivityAt: subagentAt,
10985
+ finalAnswerDeliveredAt: turn.finalAnswerDeliveredAt,
10986
+ now: Date.now(),
10987
+ staleCapMs: POST_ANSWER_LIVENESS_STALE_MS,
10988
+ })
10989
+ if (livenessVerdict !== 'emit' || subagentAt == null) return // idle gap or stale worker → stay silent (the `== null` also narrows subagentAt for the elapsed below)
10990
+ // A background worker is genuinely active after the answer. Open or maintain
10991
+ // a liveness card below the reply. Route through `mayOpenActivityCard` with
10992
+ // `postAnswerSubagentActivity:true` so Lever 1 is lifted for 'tool' producer
10993
+ // (Fix 2's Lever 1 exception in feed-open-gate.ts). The card renders the
10994
+ // turn's accumulated mirrorLines (which may be empty — in that case the drain
10995
+ // opens a "Working…" placeholder matching the pre-answer liveness path).
10996
+ if (turn.sessionChatId == null) return
10997
+ const age = Date.now() - turn.startedAt
10998
+ const livenessHeader: SessionActivityHeader = {
10999
+ label: 'Agent', elapsedMs: age, toolCount: turn.labeledToolCount, state: 'running',
11000
+ }
11001
+ const lines = turn.mirrorLines.length > 0 ? turn.mirrorLines : ['Working in background…']
11002
+ const elapsed = Date.now() - subagentAt
11003
+ const rendered = renderActivityFeedWithNested(lines, [], false, ` · ${formatFeedElapsed(elapsed)}`, undefined, livenessHeader)
11004
+ if (rendered == null) return
11005
+ turn.activityPendingRender = rendered
11006
+ const ea = emissionAuthorityFor(turn)
11007
+ cardDrainGate(turn, ea, () => {
11008
+ if (ea.mayDrain(turn)) {
11009
+ // Producer 'tool' with postAnswerSubagentActivity=true: the Lever 1
11010
+ // exception allows this OPEN. Lever 4 (cross-turn) and idle-liveness
11011
+ // blocks are still respected by the drain. The card surfaces BELOW the
11012
+ // reply showing the background agent's live activity.
11013
+ ea.openOrEditCard('tool', () => {
11014
+ turn.activityInFlight = drainActivitySummary(turn, 'tool', { postAnswerSubagentActivity: true })
11015
+ })
11016
+ }
11017
+ })
11018
+ return
11019
+ }
11020
+
11021
+ // Liveness feed (open + maintain). `mirrorLines.length === 0` means no tool
11022
+ // has ever produced a label this turn — pure thinking, or only suppressed
11023
+ // tools. Open a minimal "Working…" feed once the turn passes the threshold,
11024
+ // and keep its elapsed climbing until a real label arrives. The first label
11025
+ // makes mirrorLines non-empty, so the labelled-feed heartbeat below takes
11026
+ // over and its edit cleanly replaces the placeholder. drainActivitySummary
11027
+ // sends (opens) when activityMessageId is null and edits (maintains) once set
11028
+ // — so this one branch handles both the open and the climb.
11029
+ if (turn.mirrorLines.length === 0) {
11030
+ if (!FEED_LIVENESS_OPEN_ENABLED || turn.sessionChatId == null) return
11031
+ const age = Date.now() - turn.startedAt
11032
+ if (age < FEED_LIVENESS_OPEN_MS) return
11033
+ const livenessHeader: SessionActivityHeader = {
11034
+ label: 'Agent', elapsedMs: age, toolCount: 0, state: 'running',
11035
+ }
11036
+ const rendered = renderActivityFeedWithNested(['Working…'], [], false, ` · ${formatFeedElapsed(age)}`, undefined, livenessHeader)
11037
+ if (rendered == null) return
11038
+ turn.activityPendingRender = rendered
11039
+ const ea = emissionAuthorityFor(turn)
11040
+ // PR-4d: route through the centralized chatLock-serialized card-drain gate.
11041
+ cardDrainGate(turn, ea, () => {
11042
+ if (ea.mayDrain(turn)) {
11043
+ // Producer C (liveness timer): the genuine ≥12s thinking-gap open. Now
11044
+ // that Lever 5 is inert (narrative may open pre-answer — #2588), liveness
11045
+ // remains the natural open for 0-tool pre-answer turns that are silent.
11046
+ // The sticky-latch (lever 1) still gates it in the drain.
11047
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
11048
+ ea.openOrEditCard('liveness', () => {
11049
+ turn.activityInFlight = drainActivitySummary(turn, 'liveness')
11050
+ })
11051
+ }
11052
+ })
11053
+ return
11054
+ }
11055
+
11056
+ // Labelled-feed heartbeat: keep a stale in-progress step visibly advancing.
9983
11057
  if (turn.activityMessageId == null) return // no live feed yet / already cleared
9984
- if (turn.finalAnswerDelivered) return // feed handed off to the answer
9985
11058
  if (turn.lastToolLabelAt == null) return // feed not driven by a labelled step
9986
11059
  const elapsed = Date.now() - turn.lastToolLabelAt
9987
11060
  if (elapsed < FEED_HEARTBEAT_MIN_STALE_MS) return // step is fresh; feed advancing normally
9988
11061
  const rendered = composeTurnActivity(turn, false, ` · ${formatFeedElapsed(elapsed)}`)
9989
11062
  if (rendered == null) return
9990
11063
  turn.activityPendingRender = rendered
9991
- if (turn.activityInFlight == null) {
9992
- turn.activityInFlight = drainActivitySummary(turn)
11064
+ const ea = emissionAuthorityFor(turn)
11065
+ // PR-4d: route through the centralized chatLock-serialized card-drain gate.
11066
+ cardDrainGate(turn, ea, () => {
11067
+ if (ea.mayDrain(turn)) {
11068
+ // Maintains an already-open card (guarded above on activityMessageId !=
11069
+ // null) → only ever EDITs. 'liveness' is correct either way.
11070
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
11071
+ ea.openOrEditCard('liveness', () => {
11072
+ turn.activityInFlight = drainActivitySummary(turn, 'liveness')
11073
+ })
9993
11074
  }
11075
+ })
9994
11076
  }
9995
11077
  if (!STATIC && FEED_HEARTBEAT_ENABLED) {
9996
11078
  setInterval(feedHeartbeatTick, FEED_HEARTBEAT_TICK_MS).unref()
@@ -10043,8 +11125,19 @@ function clearActivitySummary(turn: CurrentTurn, finalHtmlOverride?: string | nu
10043
11125
  }
10044
11126
  // Default: leave the status message as a record, edited to a terminal
10045
11127
  // all-done state so it doesn't freeze on a misleading "→ in-progress" line.
10046
- const finalHtml =
11128
+ let finalHtml =
10047
11129
  finalHtmlOverride !== undefined ? finalHtmlOverride : composeTurnActivity(turn, true)
11130
+ // Liveness-only feed: opened on the timer for a turn that never labelled a
11131
+ // tool (pure thinking / suppressed tools), so mirrorLines is empty and the
11132
+ // terminal render is null. Finalize to a done "✓ Working…" record instead
11133
+ // of leaving the message frozen on the live "→ Working…" line.
11134
+ if (finalHtml == null && turn.mirrorLines.length === 0 && turn.activityEverOpened) {
11135
+ const livenessElapsed = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
11136
+ const livenessHeader: SessionActivityHeader = {
11137
+ label: 'Agent', elapsedMs: livenessElapsed, toolCount: turn.labeledToolCount, state: 'done',
11138
+ }
11139
+ finalHtml = renderActivityFeedWithNested(['Working…'], [], true, '', undefined, livenessHeader)
11140
+ }
10048
11141
  if (finalHtml == null) return
10049
11142
  try {
10050
11143
  await robustApiCall(
@@ -10111,6 +11204,19 @@ function handleSessionEvent(ev: SessionEvent): void {
10111
11204
  const turnId =
10112
11205
  deriveTurnId(ev.chatId, enqThreadIdNum ?? null, ev.messageId)
10113
11206
  ?? `${chatKey(ev.chatId, enqThreadIdNum ?? null)}#synthetic-${startedAt}`
11207
+ // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Consume any
11208
+ // pending cross-turn gate `obligationSweep` armed for THIS exact turn
11209
+ // when it pushed an `obligation_represent` inbound. The gate is keyed on
11210
+ // the obligation's `originTurnId`, and the represent inbound reuses the
11211
+ // original chat/thread/messageId, so this turn's `turnId` (derived just
11212
+ // above) equals that key iff this turn IS the represent surface armed for.
11213
+ // An unrelated foreground turn on the same chat/thread derives a
11214
+ // different `turnId` → finds no entry → no gate → its card opens normally
11215
+ // (correct). Consume-once: delete on read so the matched gate can't leak
11216
+ // forward, and a never-matched stale gate can never suppress another turn.
11217
+ const xTurnGateKey = turnId
11218
+ const consumedCrossTurnGate = pendingCrossTurnGate.get(xTurnGateKey)
11219
+ if (consumedCrossTurnGate != null) pendingCrossTurnGate.delete(xTurnGateKey)
10114
11220
  const next: CurrentTurn = {
10115
11221
  sessionChatId: ev.chatId,
10116
11222
  sessionThreadId: enqThreadIdNum,
@@ -10123,20 +11229,34 @@ function handleSessionEvent(ev: SessionEvent): void {
10123
11229
  sourceMessageId: parseSourceMessageId(ev.messageId),
10124
11230
  startedAt,
10125
11231
  gatewayReceiveAt: startedAt,
11232
+ // #2527 — stamp the loop role once, from the enqueue envelope.
11233
+ role: deriveTurnRole(ev.rawContent),
11234
+ // PR1 (cross-turn stale-card guard, §9 lever 4 / race C/D). Only a
11235
+ // synthetic represent/owed-reply turn carries this; a foreground turn
11236
+ // leaves it undefined and the cross-turn card-OPEN gate is inert.
11237
+ ...(consumedCrossTurnGate != null ? { crossTurnGate: consumedCrossTurnGate } : {}),
10126
11238
  replyCalled: false,
10127
11239
  finalAnswerDelivered: false,
10128
11240
  finalAnswerSubstantive: false,
11241
+ // Sticky latch — reset ONLY here (turn start), never by reopen.
11242
+ finalAnswerEverDelivered: false,
10129
11243
  firstPingAt: null,
11244
+ // Notification ownership (R8 / PR-2): no slot claimed yet, so the
11245
+ // "claimer was substantive" flag starts false. Set atomically with
11246
+ // firstPingAt at the over-ping decision site.
11247
+ firstPingWasSubstantive: false,
10130
11248
  silentAnchorMessageId: null,
10131
11249
  silentAnchorText: '',
10132
11250
  capturedText: [],
10133
11251
  orphanedReplyTimeoutId: null,
11252
+ orphanedReplyRearmCount: 0,
10134
11253
  turnId,
10135
11254
  registryKey: null,
10136
11255
  noReplyDrainTimer: null,
10137
11256
  lastAssistantMsgId: null,
10138
11257
  lastAssistantDone: false,
10139
11258
  toolCallCount: 0,
11259
+ labeledToolCount: 0,
10140
11260
  activityMessageId: null,
10141
11261
  activityInFlight: null,
10142
11262
  activityPendingRender: null,
@@ -10144,11 +11264,24 @@ function handleSessionEvent(ev: SessionEvent): void {
10144
11264
  activityEverOpened: false,
10145
11265
  activityDrainFailures: 0,
10146
11266
  mirrorLines: [],
11267
+ pendingNarrative: null,
11268
+ lastReplyText: '',
10147
11269
  foregroundSubAgents: new Map(),
10148
11270
  answerStream: null,
10149
11271
  isDm: isDmChatId(ev.chatId),
11272
+ // PR-4a — construct ONE emission-authority façade per turn, passing
11273
+ // the chat/thread key in EXPLICITLY (the PR-4e seam; today equal to
11274
+ // the singleton-sourced key). Per-turn: born with this turn literal,
11275
+ // discarded with it — never persists across turns.
11276
+ emissionAuthority: new EmissionAuthority(
11277
+ statusKey(ev.chatId, enqThreadIdNum),
11278
+ ),
10150
11279
  }
10151
- currentTurn = next
11280
+ // PR-4e — route the turn-SET through the keyed accessor: flag-OFF assigns
11281
+ // the singleton (byte-identical to `currentTurn = next`); flag-ON sets the
11282
+ // per-topic `byKey[statusKey]` entry AND the most-recent mirror. The key is
11283
+ // the SAME statusKey the ctor's façade was constructed with just above.
11284
+ setCurrentTurn(next, statusKey(ev.chatId, enqThreadIdNum))
10152
11285
  markIdleActivity() // any turn start (main session) is activity — re-arm idle clear
10153
11286
  // Status-surface observability: one line at every turn SET so a later
10154
11287
  // dark card is traceable to which turn/topic key it belonged to.
@@ -10278,6 +11411,14 @@ function handleSessionEvent(ev: SessionEvent): void {
10278
11411
  case 'tool_use': {
10279
11412
  const turn = currentTurn
10280
11413
  if (turn == null) return
11414
+ // Narrative-dedup gate step 2 (JSONL-text-narrative primitive): a
11415
+ // narrative block was pending; this tool_use is the lookahead event
11416
+ // that decides it. reply/stream_reply with near-identical text ⇒
11417
+ // draft-then-send ⇒ SUPPRESS (the reply prints the canonical answer);
11418
+ // anything else ⇒ SHOW as a transient liveness step. Runs BEFORE the
11419
+ // normal tool handling so a working preamble surfaces just ahead of
11420
+ // its tool step.
11421
+ resolvePendingNarrativeOnTool(turn, ev.toolName, ev.input)
10281
11422
  // Phase 1 of #332: count every tool_use in the current turn.
10282
11423
  turn.toolCallCount++
10283
11424
  // #412: bump turn-active marker mtime so the watchdog sees this
@@ -10300,6 +11441,15 @@ function handleSessionEvent(ev: SessionEvent): void {
10300
11441
  // placeholder-heartbeat label, which has been retired.
10301
11442
  if (isTelegramReplyTool(name)) {
10302
11443
  turn.replyCalled = true
11444
+ // NIT 2 (reply-proxy precision): capture the ACTUAL delivered reply
11445
+ // text so flushPendingNarrativeAtTurnEnd compares a trailing
11446
+ // narrative block against the real answer surface, not
11447
+ // capturedText.join('') (which mis-suppresses when the model emits
11448
+ // the same short string twice in a turn). REPLY_TOOLS ('reply',
11449
+ // 'stream_reply') carry the answer in input.text; only those count.
11450
+ if (REPLY_TOOLS.has(name) && typeof ev.input?.text === 'string') {
11451
+ turn.lastReplyText = ev.input.text as string
11452
+ }
10303
11453
  if (turn.orphanedReplyTimeoutId != null) {
10304
11454
  clearTimeout(turn.orphanedReplyTimeoutId)
10305
11455
  turn.orphanedReplyTimeoutId = null
@@ -10339,6 +11489,12 @@ function handleSessionEvent(ev: SessionEvent): void {
10339
11489
  // where the JSONL tool_use rows arrive too late.
10340
11490
  const turn = currentTurn
10341
11491
  if (turn == null) return
11492
+ // SECONDARY FIX: an active tool_label means the model is producing work
11493
+ // right now — re-arm the orphaned-reply fuse so a multi-phase tool turn
11494
+ // (write → compile → test → fix) that regularly emits labels doesn't let
11495
+ // the 30 s timer run down between labels. Mirrors how `case 'text':` calls
11496
+ // resetOrphanedReplyTimeout() at ~line 10786.
11497
+ resetOrphanedReplyTimeout()
10342
11498
  // Surface tools (reply/stream_reply/react) are the conversation, not
10343
11499
  // activity — the hook labels them ("Replying"), so filter by name.
10344
11500
  if (isTelegramSurfaceTool(ev.toolName)) return
@@ -10400,6 +11556,14 @@ function handleSessionEvent(ev: SessionEvent): void {
10400
11556
  }
10401
11557
  const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
10402
11558
  if (rendered != null) {
11559
+ // Count surfaced tool steps — the single source of truth for the `tools=`
11560
+ // lifecycle field and the `✓ N steps` total. Incremented HERE (not at the
11561
+ // top of the case) so the count stays consistent with what the feed
11562
+ // actually surfaces: an empty label (appendActivityLabel → null) or a
11563
+ // label dropped by the post-final-answer reopen guard never inflates it.
11564
+ // Surface tools (reply/react) returned earlier; send_typing/sync_retain
11565
+ // are suppressed at the hook (computeLabel → null) so they never arrive.
11566
+ turn.labeledToolCount++
10403
11567
  // A new tool label = a new live step → re-anchor the heartbeat clock so
10404
11568
  // the " · Ns" elapsed restarts from this step (and the feed itself just
10405
11569
  // advanced, so it isn't stale).
@@ -10411,16 +11575,34 @@ function handleSessionEvent(ev: SessionEvent): void {
10411
11575
  // and would falsely reset the clock forever on a hung-mid-tool turn,
10412
11576
  // reintroducing the #1556 dangling-turn wedge. Only the model emitting a
10413
11577
  // fresh label reaches here.
10414
- if (SILENCE_LIVENESS_PRODUCTION && currentTurn === turn) {
11578
+ // PR-4e keyed liveness under the flag. Flag-OFF keeps the literal
11579
+ // `currentTurn === turn` (a late tool-label for topic A must reset A's
11580
+ // silence clock, not topic B's); flag-ON resolves A by ITS OWN key so a
11581
+ // flip to B doesn't falsify A's liveness here.
11582
+ if (
11583
+ SILENCE_LIVENESS_PRODUCTION &&
11584
+ (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn)
11585
+ ) {
10415
11586
  silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
10416
11587
  }
10417
11588
  // Recompose so any active foreground sub-agent's nested block (Model A)
10418
11589
  // is preserved when the parent appends its own step. composeTurnActivity
10419
11590
  // == the flat render when no foreground sub-agent is active.
10420
11591
  turn.activityPendingRender = composeTurnActivity(turn) ?? rendered
10421
- if (turn.activityInFlight == null) {
10422
- turn.activityInFlight = drainActivitySummary(turn)
11592
+ const ea = emissionAuthorityFor(turn)
11593
+ // PR-4d: route through the centralized chatLock-serialized card-drain gate.
11594
+ cardDrainGate(turn, ea, () => {
11595
+ if (ea.mayDrain(turn)) {
11596
+ // Producer B (tool label): always OPEN-eligible (labeledToolCount was
11597
+ // incremented just above). A turn that started conversational and now
11598
+ // dispatches a tool opens here, rendering any narration accumulated
11599
+ // by the suppressed narrative-SHOW drains (design §9 lever 5 / R4).
11600
+ // PR-4a: routed through the emission-authority façade (no-op delegate).
11601
+ ea.openOrEditCard('tool', () => {
11602
+ turn.activityInFlight = drainActivitySummary(turn, 'tool')
11603
+ })
10423
11604
  }
11605
+ })
10424
11606
  }
10425
11607
  return
10426
11608
  }
@@ -10447,58 +11629,33 @@ function handleSessionEvent(ev: SessionEvent): void {
10447
11629
  const turn = currentTurn
10448
11630
  if (turn != null) {
10449
11631
  turn.capturedText.push(ev.text)
11632
+ // Narrative-dedup gate step 1 (JSONL-text-narrative primitive):
11633
+ // stage this text block for one lookahead step. If a previous block
11634
+ // was pending with nothing reply-shaped after it, it flushes here as
11635
+ // a SHOWN transient liveness step. The eventual SHOW/SUPPRESS of THIS
11636
+ // block is decided by the next tool_use / turn_end. Invariant
11637
+ // `chat-is-the-single-source-of-truth` (reference/invariants.md): a
11638
+ // SHOWN line rides the same renderStepFeed path as a tool step —
11639
+ // transient + clipped, never a persisted parallel mirror. This is a
11640
+ // separate lane from the answer-stream wiring below (which owns the
11641
+ // canonical reply), so the two never fight over the same text.
11642
+ stagePendingNarrative(turn, ev.text)
10450
11643
  // Issue #195: feed the answer-lane stream. The stream itself
10451
11644
  // gates on minInitialChars and throttles edits — short replies
10452
11645
  // stay below the threshold and never spawn a message.
10453
11646
  if (turn.answerStream == null) {
10454
11647
  turn.answerStream = createAnswerStream({
10455
11648
  chatId: turn.sessionChatId,
10456
- isPrivateChat: turn.isDm,
10457
11649
  threadId: turn.sessionThreadId,
10458
- // Transport selection:
10459
- // #869-Phase1 visible-answer-stream: omit the draft API so
10460
- // the lane edits a user-visible chat-timeline message
10461
- // (minInitialChars:1 opens it on the first chunk). The
10462
- // draft-mirror does NOT touch this lane the canary proved
10463
- // the model emits almost no interstitial assistant.text
10464
- // (it thinks→tool→reply), so routing it to the draft just
10465
- // emptied the preview. The draft-mirror instead renders the
10466
- // tool_use stream (case 'tool_use' above) where the real
10467
- // signal lives. assistant.text keeps its visible-message
10468
- // home; the reply tool stays the canonical answer.
10469
- // Flag OFF (default): use the compose-box draft for DMs, and set
10470
- // minInitialChars effectively-infinite so the lane NEVER opens a
10471
- // visible chat message. This matters in supergroup TOPICS, where
10472
- // draft transport is unsupported (gateway.ts:6422) so the lane
10473
- // would otherwise fall to message transport and post a visible
10474
- // preview once interstitial text passed the default 50-char gate
10475
- // — which retract() then deletes (the unformatted flash, marko
10476
- // General). With the gate unreachable the only posted message is
10477
- // the canonical reply. (The gate is bypassed for DM draft
10478
- // transport, so DM draft streaming is unaffected.)
10479
- // VISIBLE preview gating decoupled from the draft-transport flag
10480
- // (2026-06-05 flash regression fix). The visible flag ALONE decides
10481
- // whether a user-visible preview opens; DRAFT_ANSWER_LANE_RETIRED
10482
- // controls only the TRANSPORT (whether sendMessageDraftFn exists).
10483
- // The earlier `|| DRAFT_ANSWER_LANE_RETIRED` here meant retiring the
10484
- // draft (the default since v0.14.68) silently forced minInitialChars:1
10485
- // → a visible preliminary opened on every streaming turn and was then
10486
- // retracted (deleted) when the reply tool fired — the exact "raw bubble
10487
- // appears, formatted reply lands, raw bubble vanishes" flash that
10488
- // turning the visible stream OFF (v0.14.52) was meant to remove. So
10489
- // v0.14.68 silently undid v0.14.52 fleet-wide. Now:
10490
- // - VISIBLE on (opt-in) → minInitialChars:1, a real edit-in-place
10491
- // preview (observable by UAT, silence-liveness reset on its sends).
10492
- // - VISIBLE off (default) → minInitialChars:MAX so NO visible preview
10493
- // ever opens; the reply tool is the single canonical formatted
10494
- // message (no flash). With the draft retired (default) there is no
10495
- // transport either, so the lane stays dormant; with the kill switch
10496
- // DRAFT_ANSWER_LANE=0 the legacy compose-box draft transport is
10497
- // restored (sendMessageDraftFn defined above, gate bypassed for DM
10498
- // draft so #1664 DM draft streaming is unaffected).
10499
- ...(ANSWER_LANE.usesDraftTransport
10500
- ? { sendMessageDraft: sendMessageDraftFn, minInitialChars: ANSWER_LANE.minInitialChars }
10501
- : { minInitialChars: ANSWER_LANE.minInitialChars }),
11650
+ // VISIBLE on (opt-in, SWITCHROOM_VISIBLE_ANSWER_STREAM=1) →
11651
+ // minInitialChars:1 opens a user-visible edit-in-place preview on the
11652
+ // first text chunk. At turn_end the preview is materialized as a pinged
11653
+ // final answer (materialize()) when the model never called reply.
11654
+ // VISIBLE off (default) minInitialChars:MAX so NO visible preview ever
11655
+ // opens; the reply tool is the single canonical formatted message
11656
+ // (no flash). The draft transport is permanently retired — both modes
11657
+ // use sendMessage + editMessageText for any message that does open.
11658
+ minInitialChars: ANSWER_LANE.minInitialChars,
10502
11659
  // #1075: route through robustApiCall so flood-wait,
10503
11660
  // benign-400, and THREAD_NOT_FOUND are handled uniformly
10504
11661
  // instead of crashing the answer-stream loop on a deleted
@@ -10589,7 +11746,11 @@ function handleSessionEvent(ev: SessionEvent): void {
10589
11746
  // skip the tick (the new turn has its own answer stream).
10590
11747
  onMetric: (metricEv) => {
10591
11748
  logStreamingEvent(metricEv)
10592
- if (currentTurn === turn) {
11749
+ // PR-4e keyed liveness under the flag. Flag-OFF keeps the literal
11750
+ // `currentTurn === turn` (a draft-update metric for topic A's stream
11751
+ // must tick A's signal/silence clock); flag-ON resolves A by its own
11752
+ // key so a flip to B doesn't skip A's tick.
11753
+ if (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn) {
10593
11754
  signalTracker.noteSignal(
10594
11755
  statusKey(turn.sessionChatId, turn.sessionThreadId),
10595
11756
  Date.now(),
@@ -10717,6 +11878,28 @@ function handleSessionEvent(ev: SessionEvent): void {
10717
11878
  return
10718
11879
  }
10719
11880
  case 'turn_end': {
11881
+ // DEFENSIVE FIX: belt-and-braces guard against the synthetic backstop
11882
+ // (`durationMs: -1`) racing live work. durationMs >= 0 is the
11883
+ // authoritative signal from system/turn_duration; -1 is ONLY ever set
11884
+ // by the orphaned-reply backstop. Reject the synthetic event here so that
11885
+ // even if the PRIMARY fix's re-arm logic is bypassed (e.g. a very fast
11886
+ // fire before isLegitimatelyWorking() is sampled) we still don't tear
11887
+ // down a live feed mid-work. Extended from the original isMidToolCall()
11888
+ // check to the full isLegitimatelyWorking predicate so detached background
11889
+ // work and human-wait tools (ask_user) are also protected.
11890
+ // INVARIANT: a REAL turn_end (durationMs >= 0) is NEVER suppressed.
11891
+ if (ev.durationMs === -1) {
11892
+ const turn = currentTurn
11893
+ const key = turn != null ? statusKey(turn.sessionChatId, turn.sessionThreadId) : ''
11894
+ if (isLegitimatelyWorking(key)) {
11895
+ process.stderr.write(
11896
+ `telegram gateway: synthetic turn_end suppressed — legitimately working` +
11897
+ ` (in_flight=${toolFlightTracker.inFlightCount()},` +
11898
+ ` bg_work=${turn != null ? pendingProgress.hasPendingAsyncDispatch(key) : false})\n`,
11899
+ )
11900
+ return
11901
+ }
11902
+ }
10720
11903
  // Drain any still-pending tool dispatch typing entries — covers
10721
11904
  // transcript truncation or a Claude Code crash mid-tool.
10722
11905
  typingWrapper.drainAll()
@@ -10733,6 +11916,33 @@ function handleSessionEvent(ev: SessionEvent): void {
10733
11916
  clearTimeout(turn.orphanedReplyTimeoutId)
10734
11917
  turn.orphanedReplyTimeoutId = null
10735
11918
  }
11919
+ // Narrative-dedup gate step 3 (JSONL-text-narrative primitive): a
11920
+ // trailing narrative block with nothing after it. When the turn
11921
+ // delivered its answer via reply (replyCalled) the trailing text is
11922
+ // almost always a draft of that answer — compare against the ACTUAL
11923
+ // delivered reply text and SUPPRESS the duplicate; otherwise SHOW
11924
+ // genuine trailing narration ("Done — all green."). Must run BEFORE
11925
+ // clearActivitySummary so a SHOWN line lands in the feed's final
11926
+ // render. Always clears turn.pendingNarrative so it can't leak across
11927
+ // turns.
11928
+ //
11929
+ // NIT 2 (reply-proxy precision): use `turn.lastReplyText` (the
11930
+ // most-recent reply/stream_reply input.text) rather than
11931
+ // `capturedText.join('')`. The old proxy concatenated every captured
11932
+ // text block, so a turn that emitted the same short string twice
11933
+ // (e.g. "Done." as working narration, then "Done." as the reply) would
11934
+ // compare the trailing narration against a doubled "DoneDone" — still
11935
+ // a high-prefix match — and wrongly suppress genuine trailing
11936
+ // narration. Comparing against the actual reply text is exact. When
11937
+ // the turn delivered WITHOUT a reply tool (turn-flush emits
11938
+ // capturedText as the answer), fall back to capturedText.join('') so
11939
+ // that path's trailing-draft suppression is preserved.
11940
+ if (turn != null) {
11941
+ const deliveredText = turn.lastReplyText.length > 0
11942
+ ? turn.lastReplyText
11943
+ : (turn.replyCalled ? turn.capturedText.join('') : '')
11944
+ flushPendingNarrativeAtTurnEnd(turn, deliveredText)
11945
+ }
10736
11946
  // Clear the activity feed at the real end of the turn. This is the
10737
11947
  // no-reply safety net — a turn that ends without ever calling reply
10738
11948
  // (the answer is delivered by turn-flush / silent-end) still has its
@@ -10934,6 +12144,17 @@ function handleSessionEvent(ev: SessionEvent): void {
10934
12144
  ` chat=${chatId} turnStartedAt=${turn.startedAt} replyCalled=false capturedText=empty` +
10935
12145
  ` — the progress card steps were the only thing the user saw (#45)\n`,
10936
12146
  )
12147
+ // #2527: emit structured WARN so the reaction-only failure mode is
12148
+ // machine-readable in the streaming-metrics channel.
12149
+ const tKey = statusKey(chatId, threadId)
12150
+ logStreamingEvent({
12151
+ kind: 'turn_no_reply_warn',
12152
+ chatId,
12153
+ threadId,
12154
+ turnId: turn.turnId,
12155
+ turnDurationMs: turn.startedAt > 0 ? Date.now() - turn.startedAt : 0,
12156
+ reactionCount: reactionTransitionCounts.get(tKey) ?? 0,
12157
+ })
10937
12158
  }
10938
12159
  }
10939
12160
 
@@ -11291,10 +12512,39 @@ function handleSessionEvent(ev: SessionEvent): void {
11291
12512
  }
11292
12513
 
11293
12514
  // #1713: turn_end is THE terminal trigger. Finalize via the
11294
- // single terminal path (👍). Any prior intermediate states
11295
- // pending in the debounce window are flushed by `finalize()`
11296
- // before the terminal emoji emits.
11297
- finalizeStatusReaction(chatId, threadId, 'done')
12515
+ // single terminal path. Any prior intermediate states pending in
12516
+ // the debounce window are flushed by `finalize()` before the
12517
+ // terminal emoji emits.
12518
+ //
12519
+ // #2527 — role-aware terminal honesty: a USER turn that ends without
12520
+ // a delivered answer must NOT paint 👍 (the operator's "thumbs up so
12521
+ // it feels like you're done" report). It finalizes to the gentle
12522
+ // 'undelivered' terminal (😐) instead; the silent-end fallback below
12523
+ // carries the apology text. system/cron turns and NO_REPLY/HEARTBEAT_OK
12524
+ // turns (which return earlier) keep 👍 — their silence is legitimate.
12525
+ let terminalReason = decideTerminalReason({
12526
+ enabled: LIVENESS_TERMINAL_HONESTY,
12527
+ role: turn.role,
12528
+ finalAnswerDelivered: turn.finalAnswerDelivered,
12529
+ })
12530
+ // #2527 review note 1 — worker-hold carve-out: if the turn is STILL
12531
+ // legitimately working at turn_end (a background sub-agent the parent
12532
+ // dispatched is running on), don't prematurely paint 😐. Fall back to
12533
+ // 'done' so the existing deferred-done path holds ✍️ until the worker
12534
+ // completes (then 👍) — the worker-activity feed carries the progress.
12535
+ // Only a turn that genuinely ended undelivered AND is not still working
12536
+ // gets the honest 😐.
12537
+ if (terminalReason === 'undelivered' && isLegitimatelyWorking(statusKey(chatId, threadId))) {
12538
+ terminalReason = 'done'
12539
+ }
12540
+ if (terminalReason === 'undelivered') {
12541
+ process.stderr.write(
12542
+ `telegram gateway: WARN turn_no_reply — user turn ended with an ` +
12543
+ `ambient ack but no delivered answer; painting 😐 not 👍 ` +
12544
+ `chat=${chatId} thread=${threadId ?? '-'} turnId=${turn.turnId} (#2527)\n`,
12545
+ )
12546
+ }
12547
+ finalizeStatusReaction(chatId, threadId, terminalReason)
11298
12548
  {
11299
12549
  const sKey = streamKey(chatId, threadId)
11300
12550
  const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -11773,6 +13023,9 @@ async function handleInboundCoalesced(
11773
13023
  // - msgId present (always true for `bot.on('message:*')` paths but
11774
13024
  // defensive against future routers that might call this without one).
11775
13025
  maybeEarlyAckReaction(ctx, from)
13026
+ // #2527 — if this lands mid-turn, the user is asking "what's happening?";
13027
+ // fire the liveness floor immediately (DM + supergroup alike).
13028
+ maybePokeFloorForMidTurnInbound(ctx, from)
11776
13029
 
11777
13030
  const key = inboundCoalesceKey(
11778
13031
  String(ctx.chat!.id),
@@ -11801,6 +13054,14 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
11801
13054
  const msgId = ctx.message?.message_id
11802
13055
  if (msgId == null) return
11803
13056
  const chatType = ctx.chat?.type
13057
+ // Intentionally DM-only (#2527 surface-parity note): pre-acking a GROUP
13058
+ // message risks reacting to one the full gate (requireMention / topic
13059
+ // scoping) would later DROP. The SUBSTANTIVE liveness parity a supergroup
13060
+ // needs — the mid-turn floor and the role-aware terminal reaction — is
13061
+ // surface-agnostic (keyed on statusKey + loop role, no chat-type branch),
13062
+ // so a forum topic gets identical never-silent guarantees without this
13063
+ // sub-second 👀 optimisation. See `maybePokeFloorForMidTurnInbound` for
13064
+ // the surface-agnostic "Status?" short-circuit.
11804
13065
  if (chatType !== 'private') return
11805
13066
  const chatId = String(ctx.chat!.id)
11806
13067
  const threadId = ctx.message?.is_topic_message ? ctx.message.message_thread_id : undefined
@@ -11810,6 +13071,9 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
11810
13071
  void bot.api.setMessageReaction(chatId, msgId, [
11811
13072
  { type: 'emoji', emoji: '👀' as ReactionTypeEmoji['emoji'] },
11812
13073
  ]).catch(() => {})
13074
+ // #2527: log the early-ack fire so operators can see how often the
13075
+ // fast pre-coalesce DM path triggers vs. the controller path.
13076
+ logStreamingEvent({ kind: 'early_ack_reaction', chatId, messageId: msgId, emoji: '👀' })
11813
13077
  // #553 PR 3: also fire the native "typing…" indicator. Bridges the
11814
13078
  // visual gap between the early-ack 👀 reaction and the first real
11815
13079
  // model text. No fake content — Telegram clients render this natively
@@ -11818,6 +13082,26 @@ function maybeEarlyAckReaction(ctx: Context, from: NonNullable<Context['from']>)
11818
13082
  void bot.api.sendChatAction(chatId, 'typing').catch(() => {})
11819
13083
  }
11820
13084
 
13085
+ /**
13086
+ * #2527 — "Status?" short-circuit. A message arriving DURING an active turn
13087
+ * (the user explicitly asking what's happening) fires the mid-turn liveness
13088
+ * floor immediately, bypassing the timer/working gates. Surface-agnostic:
13089
+ * works identically in a DM and a forum-supergroup topic (keyed on statusKey).
13090
+ * Idempotent per turn (the floor's fire-once latch) and kill-switch-gated.
13091
+ */
13092
+ function maybePokeFloorForMidTurnInbound(ctx: Context, from: NonNullable<Context['from']>): void {
13093
+ const rawChatId = ctx.chat?.id
13094
+ if (rawChatId == null) return
13095
+ const chatId = String(rawChatId)
13096
+ const threadId = ctx.message?.is_topic_message ? ctx.message.message_thread_id : undefined
13097
+ const key = statusKey(chatId, threadId)
13098
+ // Only mid-turn: a turn must already be in flight for this (chat, thread).
13099
+ if (!activeTurnStartedAt.has(key)) return
13100
+ const access = loadAccess()
13101
+ if (!access.allowFrom.includes(String(from.id))) return
13102
+ silencePoke.pokeFloorNow(key, Date.now())
13103
+ }
13104
+
11821
13105
  async function handleInbound(
11822
13106
  ctx: Context,
11823
13107
  text: string,
@@ -12723,17 +14007,42 @@ async function handleInbound(
12723
14007
  if (!chatAvailableReactions.has(chat_id)) {
12724
14008
  probeAvailableReactions(chat_id)
12725
14009
  }
14010
+ // #2527: use inbound msgId as a stable per-turn reaction identifier.
14011
+ // The controller is created before currentTurn.turnId is assigned
14012
+ // (that happens in handleSessionEvent's enqueue branch), so we capture
14013
+ // msgId here and use it as the reaction-session token in log events.
14014
+ const ctrlTurnToken = `${chat_id}:${msgId}`
12726
14015
  const ctrl = new StatusReactionController(async (emoji) => {
12727
14016
  await bot.api.setMessageReaction(chat_id, msgId, [
12728
14017
  { type: 'emoji', emoji: emoji as ReactionTypeEmoji['emoji'] },
12729
14018
  ])
12730
14019
  // #203: every status-reaction transition is a user-visible signal.
12731
14020
  signalTracker.noteSignal(key, Date.now())
12732
- }, allowedReactions)
14021
+ }, allowedReactions, {
14022
+ // #2527: emit a structured transition event on each emoji change so
14023
+ // the reaction lifecycle is visible in streaming-metrics logs. Also
14024
+ // increment the per-key counter for the turn_no_reply_warn metric.
14025
+ onTransition: (emoji) => {
14026
+ reactionTransitionCounts.set(key, (reactionTransitionCounts.get(key) ?? 0) + 1)
14027
+ logStreamingEvent({
14028
+ kind: 'status_reaction_transition',
14029
+ chatId: chat_id,
14030
+ turnId: ctrlTurnToken,
14031
+ emoji,
14032
+ })
14033
+ },
14034
+ })
12733
14035
  activeStatusReactions.set(key, ctrl)
12734
14036
  activeReactionMsgIds.set(key, { chatId: chat_id, messageId: msgId })
12735
14037
  activeTurnStartedAt.set(key, Date.now())
12736
14038
  progressUpdateTurnCount.set(key, 0) // Reset turn counter
14039
+ // #2527: log controller install so the lifecycle start is observable.
14040
+ logStreamingEvent({
14041
+ kind: 'status_reaction_install',
14042
+ chatId: chat_id,
14043
+ turnId: ctrlTurnToken,
14044
+ messageId: msgId,
14045
+ })
12737
14046
  ctrl.setQueued()
12738
14047
  // #203: time-to-ack metric — setQueued() triggers the initial 👀 reaction
12739
14048
  // asynchronously through the controller chain.
@@ -13300,6 +14609,18 @@ function getCommandArgs(ctx: Context): string {
13300
14609
  return m ? m[1].trim() : ''
13301
14610
  }
13302
14611
 
14612
+ /**
14613
+ * True when a slash command's argument string carries a trailing `demo`
14614
+ * token — the per-command PII-mask modifier for screen recordings
14615
+ * (`/usage demo`, `/auth demo`, `/status demo`, `/whoami demo`). Matches
14616
+ * `demo` as the last whitespace-delimited token, case-insensitively, so
14617
+ * `/auth show demo` and `/usage demo` both flip the flag while a label
14618
+ * literally named `demo-foo` does not.
14619
+ */
14620
+ function hasDemoFlag(args: string): boolean {
14621
+ return /(?:^|\s)demo$/i.test(args.trim())
14622
+ }
14623
+
13303
14624
  /** Validate that a string looks like a safe agent/resource name.
13304
14625
  * Agent names should be alphanumeric with hyphens/underscores only.
13305
14626
  * This prevents shell metacharacter injection even though both exec
@@ -13790,6 +15111,9 @@ function notifyDetachedFailure(
13790
15111
  lockedBot.api.sendMessage(chatId, text, {
13791
15112
  parse_mode: 'HTML',
13792
15113
  link_preview_options: { is_disabled: true },
15114
+ // Detached restart/update child-failure notice — status, not
15115
+ // the user's answer. Silence the open ping.
15116
+ disable_notification: true,
13793
15117
  ...(threadId != null ? { message_thread_id: threadId } : {}),
13794
15118
  }),
13795
15119
  {
@@ -14577,9 +15901,10 @@ bot.command('status', async ctx => {
14577
15901
  const { access, senderId } = gated
14578
15902
  const from = ctx.from!
14579
15903
  if (access.allowFrom.includes(senderId)) {
15904
+ const demo = hasDemoFlag(getCommandArgs(ctx))
14580
15905
  const userTag = from.username ? `@${from.username}` : senderId
14581
15906
  const meta = await buildAgentMetadata(getMyAgentName())
14582
- await ctx.reply(buildStatusPairedText({ user: userTag, meta }), { parse_mode: 'HTML' })
15907
+ await ctx.reply(buildStatusPairedText({ user: userTag, meta, demo }), { parse_mode: 'HTML' })
14583
15908
  return
14584
15909
  }
14585
15910
  for (const [code, p] of Object.entries(access.pending)) {
@@ -14712,10 +16037,12 @@ bot.command('model', async ctx => {
14712
16037
  // `/effort` — show or switch the reasoning effort for the live session.
14713
16038
  // The effort sibling of `/model`: bare form renders a five-button menu
14714
16039
  // (low/medium/high/xhigh/max, the live level ✅), a typed form
14715
- // `/effort <level>` sets it directly. Both ride the allowlisted inject
14716
- // primitive (claude's own `/effort` REPL command), session-scoped boot
14717
- // re-pins the configured default via start.sh's `--effort`. Implementation
14718
- // in effort-command.ts so it's unit-testable without booting the bot.
16040
+ // `/effort <level>` sets it directly. Both ride the dedicated `applyEffort`
16041
+ // driver (claude's own `/effort` REPL command, with the confirmation modal
16042
+ // answered so the pane never wedges NOT the bare inject primitive, which
16043
+ // is blocklisted for `/effort` since #2471), session-scoped boot re-pins
16044
+ // the configured default via start.sh's `--effort`. Implementation in
16045
+ // effort-command.ts so it's unit-testable without booting the bot.
14719
16046
  function buildEffortDeps(): EffortCommandDeps {
14720
16047
  return {
14721
16048
  applyEffort: (agent, level) => applyEffort(agent, level),
@@ -14799,6 +16126,9 @@ bot.command('restart', async ctx => {
14799
16126
  (tid) =>
14800
16127
  lockedBot.api.sendMessage(chatId, ackText, {
14801
16128
  parse_mode: 'HTML', link_preview_options: { is_disabled: true },
16129
+ // Restart acknowledgement is a status notice — silence the
16130
+ // open ping (the "restarted — ready" follow-up is what matters).
16131
+ disable_notification: true,
14802
16132
  ...(tid != null ? { message_thread_id: tid } : {}),
14803
16133
  }),
14804
16134
  { threadId, chat_id: chatId, verb: 'restart.ack' },
@@ -14940,6 +16270,9 @@ async function handleNewCommand(ctx: Context): Promise<void> {
14940
16270
  (tid) =>
14941
16271
  lockedBot.api.sendMessage(chatId, ackText, {
14942
16272
  parse_mode: 'HTML', link_preview_options: { is_disabled: true },
16273
+ // /new /reset acknowledgement is a status notice — silence the
16274
+ // open ping (the post-restart greeting card is what matters).
16275
+ disable_notification: true,
14943
16276
  ...(tid != null ? { message_thread_id: tid } : {}),
14944
16277
  }),
14945
16278
  { threadId, chat_id: chatId, verb: 'new-or-reset.ack' },
@@ -15142,6 +16475,9 @@ bot.command('update', async ctx => {
15142
16475
  lockedBot.api.sendMessage(chatId, ackText, {
15143
16476
  parse_mode: 'HTML',
15144
16477
  link_preview_options: { is_disabled: true },
16478
+ // "update started" acknowledgement is a status notice — silence
16479
+ // the open ping (the post-restart greeting card is what matters).
16480
+ disable_notification: true,
15145
16481
  ...(tid != null ? { message_thread_id: tid } : {}),
15146
16482
  }),
15147
16483
  { threadId, chat_id: chatId, verb: 'update.ack' },
@@ -15603,6 +16939,36 @@ const fleetFallbackGate = createFleetFallbackGate({
15603
16939
  brokerReachable: isAuthBrokerSocketReachable,
15604
16940
  })
15605
16941
 
16942
+ /**
16943
+ * Resume-after-swap gate (auth-failover-stall fix). Owns the single-flight +
16944
+ * staleness decision for re-running the turn a mid-turn 429 killed. See
16945
+ * fleet-fallback-resume.ts. Wired into doFireFleetAutoFallback below: on a
16946
+ * 'switched' outcome we restart so the boot-resume path replays the dead turn
16947
+ * on the freshly-active account. 3h staleness mirrors the boot-resume
16948
+ * RESUME_MAX_AGE_MS failsafe (gateway boot path); single-flight stops a 429
16949
+ * storm from loop-restarting the agent.
16950
+ */
16951
+ const fleetFallbackResumeGate = createFleetFallbackResumeGate({
16952
+ maxAgeMs: (() => {
16953
+ const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
16954
+ return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h, matches boot-resume
16955
+ })(),
16956
+ })
16957
+
16958
+ /**
16959
+ * The start time (epoch-ms) of the most-recently-started active turn — the
16960
+ * staleness signal for the resume gate. `activeTurnStartedAt` is stamped on
16961
+ * inbound receipt (see its declaration), so the newest entry is the turn the
16962
+ * 429 just killed. Returns null when no turn is tracked (then the resume gate
16963
+ * defers staleness to the boot-resume 3h failsafe). */
16964
+ function newestActiveTurnStartedAtMs(): number | null {
16965
+ let newest: number | null = null
16966
+ for (const ms of activeTurnStartedAt.values()) {
16967
+ if (newest == null || ms > newest) newest = ms
16968
+ }
16969
+ return newest
16970
+ }
16971
+
15606
16972
  function wouldFireFleetAutoFallback(): boolean {
15607
16973
  return fleetFallbackGate.wouldFire()
15608
16974
  }
@@ -15658,6 +17024,17 @@ async function fireFleetAutoFallback(triggerAgent: string, untilMs?: number): Pr
15658
17024
  */
15659
17025
  let fallbackFailureNoticeState: FallbackFailureNoticeState = { lastSentAtMs: 0 }
15660
17026
 
17027
+ /**
17028
+ * Bug 2 — per-gateway cooldown for the "All accounts blocked" card. The
17029
+ * all-blocked outcome is a no-op swap (doFireFleetAutoFallback returns false),
17030
+ * so the fleetFallbackGate dedup window never arms for it, and the ~60s
17031
+ * quota_wall_detected re-trigger would otherwise re-broadcast the identical card
17032
+ * every minute for the life of the wall. This bounds it to one card per window.
17033
+ * Reset on a successful swap so a fresh all-blocked after a recovery (a real new
17034
+ * transition) is not stale-suppressed.
17035
+ */
17036
+ let fallbackAllBlockedNoticeState: FallbackAllBlockedNoticeState = { lastSentAtMs: 0 }
17037
+
15661
17038
  function broadcastFleetFallbackFailure(triggerAgent: string, reason: string): void {
15662
17039
  if (process.env.SWITCHROOM_FLEET_FALLBACK_FAILURE_NOTICE === '0') return
15663
17040
  // Notice-level cooldown (30 min, per gateway). The fleetFallbackGate's
@@ -15743,19 +17120,66 @@ async function doFireFleetAutoFallback(triggerAgent: string, untilMs?: number):
15743
17120
  (outcome.kind === 'switched' ? ` old=${outcome.oldLabel} new=${outcome.newLabel}` : '') +
15744
17121
  '\n',
15745
17122
  )
17123
+ // Bug 2 — the all-blocked card is a no-op outcome, so the gate's dedup
17124
+ // window never arms for it and the ~60s quota_wall_detected re-trigger would
17125
+ // re-broadcast the identical card every minute. Gate it behind a per-gateway
17126
+ // cooldown; a successful swap resets the window so a later (genuinely new)
17127
+ // all-blocked still emits promptly.
17128
+ if (outcome.kind === 'switched') {
17129
+ fallbackAllBlockedNoticeState = { lastSentAtMs: 0 }
17130
+ } else if (outcome.kind === 'all-blocked') {
17131
+ const verdict = evaluateAllBlockedNotice(fallbackAllBlockedNoticeState, Date.now())
17132
+ if (!verdict.send) {
17133
+ process.stderr.write(
17134
+ `telegram gateway: [fleet-fallback] all-blocked card suppressed (cooldown) agent=${triggerAgent}\n`,
17135
+ )
17136
+ return false
17137
+ }
17138
+ fallbackAllBlockedNoticeState = verdict.next
17139
+ }
15746
17140
  // Post the announcement to every authorized chat. Mirrors the
15747
17141
  // operator-event broadcast pattern (line ~2290) — DM-only opts
15748
17142
  // (no message_thread_id) so THREAD_NOT_FOUND can't fire here;
15749
17143
  // wrap in swallowingApiCall anyway per the codebase rule.
15750
17144
  const access = loadAccess()
15751
17145
  if (access.allowFrom.length === 0) return outcome.kind === 'switched'
15752
- const opts = { parse_mode: 'HTML' as const }
17146
+ // Account-switch / all-blocked announcement is a system status notice,
17147
+ // not the user's answer — silence the open ping.
17148
+ const opts = { parse_mode: 'HTML' as const, disable_notification: true }
15753
17149
  for (const chat_id of access.allowFrom) {
15754
17150
  void swallowingApiCall(
15755
17151
  () => bot.api.sendMessage(chat_id, outcome.announcement, opts),
15756
17152
  { chat_id, verb: 'fleet-fallback:notify' },
15757
17153
  )
15758
17154
  }
17155
+ // ── Resume the dead turn (auth-failover-stall fix) ──────────────────────
17156
+ // A mid-turn 429 killed a turn; the swap above moved the fleet to a healthy
17157
+ // account, but that only takes effect on the NEXT claude invocation. Re-run
17158
+ // the dead turn via triggerSelfRestart: the boot-resume path (gateway boot,
17159
+ // findLatestTurnIfInterrupted → buildResumeInterruptedInbound) replays the
17160
+ // LATEST interrupted turn on the freshly-active account. We restart rather
17161
+ // than redeliver because the failed inbound was already DELIVERED (the turn
17162
+ // started, then the model 429'd) so it is NOT in pendingInboundBuffer —
17163
+ // redeliverBufferedInbound would find nothing. Guards live in
17164
+ // fleetFallbackResumeGate: single-flight (a 429 storm cannot loop-restart)
17165
+ // + 3h staleness (an ancient interrupted turn is not resurrected). Only
17166
+ // reached on 'switched'; all-blocked / no-op outcomes never get here, so the
17167
+ // all-blocked cooldown path above is preserved.
17168
+ if (outcome.kind === 'switched') {
17169
+ const verdict = fleetFallbackResumeGate.decide(newestActiveTurnStartedAtMs())
17170
+ if (verdict === 'resume') {
17171
+ const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? triggerAgent
17172
+ process.stderr.write(
17173
+ `telegram gateway: [fleet-fallback] resuming dead turn via self-restart ` +
17174
+ `agent=${selfAgent} (swap ${outcome.oldLabel}→${outcome.newLabel})\n`,
17175
+ )
17176
+ triggerSelfRestart(selfAgent, 'fleet-fallback-resume')
17177
+ } else {
17178
+ process.stderr.write(
17179
+ `telegram gateway: [fleet-fallback] resume suppressed (${verdict}) agent=${triggerAgent}\n`,
17180
+ )
17181
+ }
17182
+ }
15759
17183
  return outcome.kind === 'switched'
15760
17184
  } catch (err) {
15761
17185
  process.stderr.write(
@@ -15815,6 +17239,9 @@ async function runCreditWatch(): Promise<void> {
15815
17239
  bot.api.sendMessage(chat_id, decision.message, {
15816
17240
  parse_mode: 'HTML',
15817
17241
  link_preview_options: { is_disabled: true },
17242
+ // Credit/quota warning is a system status notice — silence the
17243
+ // open ping (the user isn't waiting to tap anything).
17244
+ disable_notification: true,
15818
17245
  }),
15819
17246
  { chat_id, verb: 'credit-watch.notify' },
15820
17247
  )
@@ -15928,6 +17355,10 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
15928
17355
  accounts: listStateData.accounts,
15929
17356
  prev: fleetPrev,
15930
17357
  now,
17358
+ // #2478: the same staleness ceiling the per-account loop uses. Gates the
17359
+ // `entered` alert behind live corroboration so a probe blackout's stale
17360
+ // marks can't false-fire 🔴 All accounts exhausted.
17361
+ tuning,
15931
17362
  })
15932
17363
  if (fleetDecision.kind === 'notify') {
15933
17364
  for (const chat_id of access.allowFrom) {
@@ -15950,6 +17381,8 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
15950
17381
  bot.api.sendMessage(chat_id, fleetDecision.message, {
15951
17382
  parse_mode: 'HTML',
15952
17383
  link_preview_options: { is_disabled: true },
17384
+ // Quota status notice — silence the open ping.
17385
+ disable_notification: true,
15953
17386
  }),
15954
17387
  { chat_id, verb: 'quota-watch.fleet-all-exhausted' },
15955
17388
  )
@@ -16022,11 +17455,21 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
16022
17455
  // numbers for the notification message bodies. One batched RPC for all
16023
17456
  // crossing accounts (typically 1, rarely 2+).
16024
17457
  const crossingLabels = pendingTransitions.map(t => t.accountLabel)
16025
- let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
17458
+ // #2495 BLOCKER fix store the FULL entry (result + `served` tag), not just
17459
+ // `entry.result`. The corroboration gate below needs `served` to tell a true
17460
+ // live probe apart from a failed-probe cache fallback (which is `ok:true`
17461
+ // but `served:"cache"` — vacuous corroboration).
17462
+ let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]>()
16026
17463
  try {
16027
- const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
17464
+ // #2495 Change 3 forceLive bypasses the broker's probe-on-open TTL so the
17465
+ // DECISION to alarm is corroborated by a TRUE live probe, never a cache hit.
17466
+ // Only the transition-to-alarm pays for this; steady-state polls stay on the
17467
+ // cheap cached listState read (no probe). Honors the existing fleet/consumer
17468
+ // probe knobs upstream — this re-evaluation never fires without a detected
17469
+ // transition.
17470
+ const probeData = await brokerClient.probeQuota(crossingLabels, 8000, true)
16028
17471
  for (const entry of probeData.results) {
16029
- freshProbeMap.set(entry.label, entry.result)
17472
+ freshProbeMap.set(entry.label, entry)
16030
17473
  }
16031
17474
  } catch (err) {
16032
17475
  process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
@@ -16058,17 +17501,25 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
16058
17501
  for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
16059
17502
  // Re-evaluate with fresh probe data to get an accurate message body.
16060
17503
  // If the fresh probe succeeded, replace the snap's quota with live data.
16061
- const freshResult = freshProbeMap.get(accountLabel)
17504
+ const freshEntry = freshProbeMap.get(accountLabel)
16062
17505
  let enrichedDecision = decision
16063
17506
  // pendingTransitions only ever holds notify decisions (pushed under
16064
17507
  // `decision.kind !== 'skip'` / `!== 'reconcile'`). Narrow explicitly so
16065
17508
  // `decision.transition` type-checks below; this continue never fires
16066
17509
  // at runtime.
16067
17510
  if (decision.kind !== 'notify') continue
16068
- if (freshResult && freshResult.ok && snapIndex >= 0) {
17511
+ // #2495 BLOCKER fix only a GENUINE live probe corroborates the alarm. A
17512
+ // forceLive entry that is `ok:true` but `served:"cache"` (the broker's
17513
+ // failed-probe cache fallback) is NOT corroboration: the upstream probe
17514
+ // failed, so we have no live confirmation that the throttling crossing is
17515
+ // real right now. Treat it exactly like a probe failure → fall through to
17516
+ // the defer branch below (state untouched, re-evaluated next tick). This
17517
+ // also guarantees the "Live-probe corroborated (#2495)" footnote is only
17518
+ // ever stamped on a real live probe.
17519
+ if (isLiveCorroboration(freshEntry) && freshEntry!.result.ok && snapIndex >= 0) {
16069
17520
  // Live numbers replace the cache — and capturedAtMs is cleared so the
16070
17521
  // staleness gate never misfires on data we JUST probed.
16071
- const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data, capturedAtMs: undefined }
17522
+ const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshEntry!.result.data, capturedAtMs: undefined }
16072
17523
  const prev = watchState[accountLabel] ?? emptyAccountState()
16073
17524
  const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now, bootTick, tuning })
16074
17525
  // If the fresh probe still shows the same transition, use the
@@ -16153,6 +17604,8 @@ async function runQuotaWatch(opts: { bootTick?: boolean } = {}): Promise<void> {
16153
17604
  bot.api.sendMessage(chat_id, message, {
16154
17605
  parse_mode: 'HTML',
16155
17606
  link_preview_options: { is_disabled: true },
17607
+ // Quota throttling status notice — silence the open ping.
17608
+ disable_notification: true,
16156
17609
  }),
16157
17610
  { chat_id, verb: 'quota-watch.notify' },
16158
17611
  )
@@ -16424,7 +17877,13 @@ bot.command("auth", async ctx => {
16424
17877
  }
16425
17878
  return
16426
17879
  }
16427
- const text = ctx.message?.text ?? ""
17880
+ const rawText = ctx.message?.text ?? ""
17881
+ // `/auth demo` (and `/auth show demo` / `/auth list demo`) — the trailing
17882
+ // `demo` token masks account-email labels on the default dashboard view for
17883
+ // screen recordings. Strip it before parsing so `demo` isn't mistaken for a
17884
+ // verb/agent argument; it's honored only on the show/list path downstream.
17885
+ const authDemo = hasDemoFlag(getCommandArgs(ctx))
17886
+ const text = authDemo ? rawText.replace(/(\s+)demo\s*$/i, "") : rawText
16428
17887
  const parsed = parseAuthCommand(text)
16429
17888
  if (!parsed) return
16430
17889
  const currentAgent = getMyAgentName()
@@ -16486,11 +17945,12 @@ bot.command("auth", async ctx => {
16486
17945
  return
16487
17946
  }
16488
17947
  try {
16489
- const { loginUrl, scratchDir, child } = await startAccountAuthSession(parsed.label)
17948
+ const { loginUrl, scratchDir, tmuxSocket, tmuxSession } = await startAccountAuthSession(parsed.label)
16490
17949
  pendingAuthAddFlows.set(authAddKey, {
16491
17950
  label: parsed.label,
16492
17951
  scratchDir,
16493
- child,
17952
+ tmuxSocket,
17953
+ tmuxSession,
16494
17954
  startedAt: Date.now(),
16495
17955
  })
16496
17956
  await switchroomReply(
@@ -16522,6 +17982,7 @@ bot.command("auth", async ctx => {
16522
17982
  isAdmin,
16523
17983
  client,
16524
17984
  chatId,
17985
+ demo: authDemo,
16525
17986
  // Format 2 enricher — live quota probe via the broker (#1336).
16526
17987
  // Pre-broker this read `~/.switchroom/accounts/<label>/credentials.json`
16527
17988
  // off the agent's HOME, which post-RFC-H is never populated (broker
@@ -16533,18 +17994,27 @@ bot.command("auth", async ctx => {
16533
17994
  liveQuotas: async (accounts) => {
16534
17995
  try {
16535
17996
  const { results } = await client.probeQuota(accounts.map((a) => a.label))
17997
+ // #2495 Change 2 — the broker tags each result `served:"live"|"cache"`
17998
+ // (TTL hit or failed-probe fallback). When ANY account was served from
17999
+ // cache, surface the OLDEST snapshot's capturedAt so the card stamps
18000
+ // "⚠ cached Nm ago" instead of a false live stamp.
18001
+ let staleCachedAtMs: number | undefined
16536
18002
  // Preserve input order (broker also preserves it, but be defensive).
16537
- return accounts.map((a) => {
18003
+ const quotas = accounts.map((a) => {
16538
18004
  const hit = results.find((r) => r.label === a.label)
16539
18005
  if (!hit) return { ok: false as const, reason: "broker returned no result for account" }
18006
+ if (hit.served === 'cache' && hit.capturedAt != null) {
18007
+ staleCachedAtMs = staleCachedAtMs == null ? hit.capturedAt : Math.min(staleCachedAtMs, hit.capturedAt)
18008
+ }
16540
18009
  return hit.result
16541
18010
  })
18011
+ return { quotas, staleCachedAtMs }
16542
18012
  } catch (err) {
16543
18013
  // Surface a uniform per-account failure so the dashboard renders
16544
18014
  // gracefully (label badge stays UNKNOWN) instead of falling back
16545
18015
  // to the legacy table.
16546
18016
  const reason = `broker probe-quota failed: ${(err as Error)?.message ?? String(err)}`
16547
- return accounts.map(() => ({ ok: false as const, reason }))
18017
+ return { quotas: accounts.map(() => ({ ok: false as const, reason })) }
16548
18018
  }
16549
18019
  },
16550
18020
  tz: process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ,
@@ -18899,6 +20369,7 @@ bot.command('issues', async ctx => {
18899
20369
 
18900
20370
  bot.command('usage', async ctx => {
18901
20371
  if (!isAuthorizedSender(ctx)) return
20372
+ const demo = hasDemoFlag(getCommandArgs(ctx))
18902
20373
  // Format 2 path: enumerate every account in the broker's known set,
18903
20374
  // probe live quota in parallel, render the health-grouped snapshot.
18904
20375
  // Falls back to the legacy single-agent shape when the broker is
@@ -18911,9 +20382,17 @@ bot.command('usage', async ctx => {
18911
20382
  const state = await client.listState()
18912
20383
  if (state.accounts.length > 0) {
18913
20384
  // Broker-routed probe (#1336) — see gateway.ts:8910 for diagnosis.
20385
+ // #2495 Change 2 — the broker applies a probe-on-open TTL + single-
20386
+ // flight; a TTL-hit or failed-probe fallback is tagged served:"cache",
20387
+ // which we surface as a "⚠ cached Nm ago" footer instead of a false
20388
+ // live stamp.
18914
20389
  const probeResp = await client.probeQuota(state.accounts.map((a) => a.label)).catch(() => ({ results: [] }))
20390
+ let staleCachedAtMs: number | undefined
18915
20391
  const quotas = state.accounts.map((a) => {
18916
20392
  const hit = probeResp.results.find((r) => r.label === a.label)
20393
+ if (hit?.served === 'cache' && hit.capturedAt != null) {
20394
+ staleCachedAtMs = staleCachedAtMs == null ? hit.capturedAt : Math.min(staleCachedAtMs, hit.capturedAt)
20395
+ }
18917
20396
  return hit?.result ?? { ok: false as const, reason: 'broker returned no result for account' }
18918
20397
  })
18919
20398
  const { renderAuthSnapshotFormat2, buildSnapshotsFromState } = await import(
@@ -18924,7 +20403,8 @@ bot.command('usage', async ctx => {
18924
20403
  const text = renderAuthSnapshotFormat2(snapshots, {
18925
20404
  tz,
18926
20405
  now: new Date(),
18927
- liveProbedAtMs: Date.now(),
20406
+ demo,
20407
+ ...(staleCachedAtMs != null ? { staleCachedAtMs } : { liveProbedAtMs: Date.now() }),
18928
20408
  })
18929
20409
  await switchroomReply(ctx, text, { html: true })
18930
20410
  return
@@ -19091,13 +20571,14 @@ bot.command('version', async ctx => {
19091
20571
  // see at a glance what this agent is authorized for.
19092
20572
  bot.command('whoami', async ctx => {
19093
20573
  if (!isAuthorizedSender(ctx)) return
20574
+ const demo = hasDemoFlag(getCommandArgs(ctx))
19094
20575
  try {
19095
20576
  let raw: string
19096
20577
  try { raw = switchroomExecCombined(['config', 'whoami'], 10000) }
19097
20578
  catch (err: unknown) { raw = (err as any).stdout ?? (err as any).message ?? 'whoami failed' }
19098
20579
  const trimmed = stripAnsi(raw).trim()
19099
20580
  let card: string
19100
- try { card = formatWhoamiCard(JSON.parse(trimmed.split('\n').pop() ?? trimmed)) }
20581
+ try { card = formatWhoamiCard(JSON.parse(trimmed.split('\n').pop() ?? trimmed), demo) }
19101
20582
  catch { card = preBlock(formatSwitchroomOutput(trimmed || 'whoami: no output')) }
19102
20583
  await switchroomReply(ctx, card, { html: true })
19103
20584
  } catch (err: unknown) {
@@ -19105,14 +20586,17 @@ bot.command('whoami', async ctx => {
19105
20586
  }
19106
20587
  })
19107
20588
 
19108
- /** Compact HTML card from the `config whoami` JSON view. Names/booleans only. */
20589
+ /** Compact HTML card from the `config whoami` JSON view. Names/booleans only.
20590
+ * `demo` (the `/whoami demo` suffix) masks the vault key NAMES via maskVaultKey
20591
+ * for screen recordings — agent/MCP/model/skills topology is left untouched
20592
+ * (out of scope). Off by default. */
19109
20593
  function formatWhoamiCard(v: {
19110
20594
  name?: string; persona?: string | null; model?: string | null; tier?: string;
19111
20595
  tools?: { allow?: string[]; deny?: string[] }; mcpServers?: string[]; skills?: string[];
19112
20596
  vault?: { key: string; readable: boolean }[];
19113
20597
  powers?: { admin?: boolean; root?: boolean; configEdit?: boolean; crossAgentHostVerbs?: boolean };
19114
20598
  scheduleCount?: number; memoryBackend?: string | null;
19115
- }): string {
20599
+ }, demo = false): string {
19116
20600
  const esc = escapeHtmlForTg
19117
20601
  const yn = (b?: boolean) => (b ? '✓' : '✗')
19118
20602
  const lines: string[] = []
@@ -19125,7 +20609,7 @@ function formatWhoamiCard(v: {
19125
20609
  if ((v.mcpServers ?? []).length) lines.push(`MCP: ${esc(v.mcpServers!.join(', '))}`)
19126
20610
  if ((v.skills ?? []).length) lines.push(`Skills: ${esc(v.skills!.join(', '))}`)
19127
20611
  if ((v.vault ?? []).length) {
19128
- lines.push(`Vault keys (names only): ${v.vault!.map(k => `${esc(k.key)} ${yn(k.readable)}`).join(', ')}`)
20612
+ lines.push(`Vault keys (names only): ${v.vault!.map(k => `${esc(demo ? maskVaultKey(k.key) : k.key)} ${yn(k.readable)}`).join(', ')}`)
19129
20613
  }
19130
20614
  const p = v.powers ?? {}
19131
20615
  lines.push(`Powers: admin ${yn(p.admin)} · root ${yn(p.root)} · config-edit ${yn(p.configEdit)} · cross-agent verbs ${yn(p.crossAgentHostVerbs)}`)
@@ -19350,6 +20834,8 @@ bot.on('callback_query:data', async ctx => {
19350
20834
  await robustApiCall(() =>
19351
20835
  bot.api.editMessageText(args.chatId, args.messageId, args.text, {
19352
20836
  parse_mode: 'HTML',
20837
+ // Resolved on tap — strip the keyboard so it can't be re-tapped.
20838
+ ...(args.stripKeyboard ? { reply_markup: { inline_keyboard: [] } } : {}),
19353
20839
  }),
19354
20840
  )
19355
20841
  } catch {
@@ -19359,6 +20845,9 @@ bot.on('callback_query:data', async ctx => {
19359
20845
  log: (m) =>
19360
20846
  process.stderr.write(`telegram gateway: config-approval cb — ${m}\n`),
19361
20847
  },
20848
+ // Verify the per-card epoch from the callback_data against the live
20849
+ // pending entry — a stale tap (mismatched epoch) is rejected.
20850
+ parsed.epoch,
19362
20851
  )
19363
20852
  await ctx.answerCallbackQuery({
19364
20853
  text: resolved
@@ -21136,6 +22625,11 @@ async function shutdown(signal: string): Promise<void> {
21136
22625
  subagentWatcher?.stop()
21137
22626
  subagentWatcher = null
21138
22627
 
22628
+ // Worker-activity feed runs an internal heartbeat interval; stop it so no
22629
+ // re-render fires during drain (mirrors subagentWatcher above).
22630
+ workerActivityFeed?.stop()
22631
+ workerActivityFeed = null
22632
+
21139
22633
  // Issues watcher polls issues.jsonl on a setInterval (default 2s) and
21140
22634
  // edits the issues card on every tick. Without an explicit stop() the
21141
22635
  // poll keeps firing for the lifetime of the process and accumulates
@@ -21869,7 +23363,8 @@ void (async () => {
21869
23363
  // or the turn ended while it kept running — extended autonomous
21870
23364
  // work) is surfaced via the worker feed instead of vanishing.
21871
23365
  const orphanStatusEnabled = isOrphanSubagentStatusEnabled(process.env.SWITCHROOM_ORPHAN_SUBAGENT_STATUS)
21872
- const workerActivityFeed = createWorkerActivityFeed({
23366
+ workerActivityFeed?.stop()
23367
+ workerActivityFeed = createWorkerActivityFeed({
21873
23368
  bot: {
21874
23369
  sendMessage: async (cid, text, sendOpts) => {
21875
23370
  const sent = await robustApiCall(
@@ -22063,9 +23558,18 @@ void (async () => {
22063
23558
  const rendered = composeTurnActivity(turn)
22064
23559
  if (rendered != null) {
22065
23560
  turn.activityPendingRender = rendered
22066
- if (turn.activityInFlight == null) {
22067
- turn.activityInFlight = drainActivitySummary(turn)
23561
+ // PR-4a: routed through the emission-authority façade
23562
+ // (no-op delegate). Producer made explicit ('tool' — the
23563
+ // drain default this foreground sub-agent render used).
23564
+ const ea = emissionAuthorityFor(turn)
23565
+ // PR-4d: route through the centralized card-drain gate.
23566
+ cardDrainGate(turn, ea, () => {
23567
+ if (ea.mayDrain(turn)) {
23568
+ ea.openOrEditCard('tool', () => {
23569
+ turn.activityInFlight = drainActivitySummary(turn, 'tool')
23570
+ })
22068
23571
  }
23572
+ })
22069
23573
  }
22070
23574
  }
22071
23575
  return
@@ -22083,7 +23587,7 @@ void (async () => {
22083
23587
  orphanStatusEnabled,
22084
23588
  }) === 'worker-feed'
22085
23589
  ) {
22086
- void workerActivityFeed.finish(agentId, {
23590
+ void workerActivityFeed?.finish(agentId, {
22087
23591
  description: dispatch.feedDescription,
22088
23592
  lastTool: null,
22089
23593
  toolCount,
@@ -22100,7 +23604,7 @@ void (async () => {
22100
23604
  // 'orphan' is a stale boot row, not a fresh completion — map
22101
23605
  // it to 'done' so an already-posted message still finalizes.
22102
23606
  if (workerFeedEnabled) {
22103
- void workerActivityFeed.finish(agentId, {
23607
+ void workerActivityFeed?.finish(agentId, {
22104
23608
  description: dispatch.feedDescription,
22105
23609
  lastTool: null,
22106
23610
  toolCount,
@@ -22235,7 +23739,7 @@ void (async () => {
22235
23739
  })
22236
23740
  if (surface === 'worker-feed') {
22237
23741
  const origin = resolveSubagentOriginChat(agentId)
22238
- void workerActivityFeed.update(
23742
+ void workerActivityFeed?.update(
22239
23743
  agentId,
22240
23744
  origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
22241
23745
  {
@@ -22270,7 +23774,13 @@ void (async () => {
22270
23774
  // feed (the foreground blindspot) — mirroring the
22271
23775
  // main-turn activity feed, which surfaces both tool labels
22272
23776
  // and prose.
22273
- const child = (progressLine ?? latestSummary).trim().slice(0, 120)
23777
+ // Route through the SHARED clipNarrative so multi-line
23778
+ // narration first-line-collapses identically to the main
23779
+ // tier (the main path at showNarrativeStep already does
23780
+ // this). Previously this inlined `.trim().slice(0, 120)`
23781
+ // omitted the first-line collapse, so a multi-line
23782
+ // narrative rendered DIFFERENTLY here than on the main feed.
23783
+ const child = clipNarrative(progressLine ?? latestSummary)
22274
23784
  if (child.length === 0) return
22275
23785
  let narrative = turn.foregroundSubAgents.get(agentId)
22276
23786
  if (narrative == null) {
@@ -22288,13 +23798,70 @@ void (async () => {
22288
23798
  const rendered = composeTurnActivity(turn)
22289
23799
  if (rendered != null) {
22290
23800
  turn.activityPendingRender = rendered
22291
- if (turn.activityInFlight == null) {
22292
- turn.activityInFlight = drainActivitySummary(turn)
23801
+ // PR-4a: routed through the emission-authority façade (no-op
23802
+ // delegate). Producer made explicit ('tool' — the drain
23803
+ // default this foreground sub-agent render used).
23804
+ const ea = emissionAuthorityFor(turn)
23805
+ // PR-4d: route through the centralized card-drain gate.
23806
+ cardDrainGate(turn, ea, () => {
23807
+ if (ea.mayDrain(turn)) {
23808
+ ea.openOrEditCard('tool', () => {
23809
+ turn.activityInFlight = drainActivitySummary(turn, 'tool')
23810
+ })
23811
+ }
23812
+ })
23813
+ // A foreground sub-agent's nested activity IS user-visible
23814
+ // production — count it so the silence-poke clock resets,
23815
+ // exactly like the parent activity-render path (10665). Without
23816
+ // this, a long tools-only foreground sub-agent (no prose) lets
23817
+ // the 300s framework fallback (and the #2527 mid-turn floor)
23818
+ // measure silence against a turn that is visibly working,
23819
+ // risking a premature tear-down / unwanted liveness beat.
23820
+ // PR-4e — keyed liveness under the flag (a foreground
23821
+ // sub-agent's nested render for topic A is A's production).
23822
+ // Flag-OFF keeps the literal `currentTurn === turn`; flag-ON
23823
+ // resolves A by its own key.
23824
+ if (
23825
+ SILENCE_LIVENESS_PRODUCTION &&
23826
+ (EMISSION_AUTHORITY_ENABLED ? turnLiveForItsTopic(turn) : currentTurn === turn)
23827
+ ) {
23828
+ silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
22293
23829
  }
22294
23830
  }
22295
23831
  return
22296
23832
  }
22297
23833
 
23834
+ // Fix 2 (post-answer background-agent liveness): when the
23835
+ // watcher surfaces a new step for a background worker, update
23836
+ // the current turn's `subagentActivityAt` timestamp IF the turn
23837
+ // has already delivered its substantive answer. This signal is
23838
+ // written HERE — NOT in the tool_label path — so the drop-guard
23839
+ // (`shouldReopenFeedAfterAck` / finalAnswerSubstantive) cannot
23840
+ // gate it. `feedHeartbeatTick`'s post-answer branch reads
23841
+ // `subagentActivityAt` (not `lastToolLabelAt`, which is frozen
23842
+ // after the answer) to decide whether to open a liveness card.
23843
+ // Only stamp when the turn is alive AND post-answer: pre-answer
23844
+ // activity is already surfaced by the normal tool-label feed.
23845
+ //
23846
+ // SCOPE — this is the IN-TURN-WINDOW surface only. The
23847
+ // `feedHeartbeatTick` post-answer card is driven off `currentTurn`,
23848
+ // which `endCurrentTurnAtomic` nulls at `turn_end`. A genuinely
23849
+ // DECOUPLED background worker keeps running PAST the parent
23850
+ // turn's teardown, so `currentTurn` is null when its later
23851
+ // onProgress ticks arrive → this stamp is inert and the
23852
+ // heartbeat is silent for that worker. That is BY DESIGN, not a
23853
+ // gap: a decoupled worker's ongoing activity is surfaced by the
23854
+ // dedicated, currentTurn-independent `workerActivityFeed` (the
23855
+ // edit-in-place worker message, driven below at `workerFeedEnabled`
23856
+ // and bounded by its own non-running/`finish` teardown). So the
23857
+ // currentTurn card covers the brief post-answer/pre-teardown
23858
+ // window; the worker feed covers everything after teardown. Both
23859
+ // are proven in telegram-activity-visibility-integration.test.ts.
23860
+ const stampTurn = currentTurn
23861
+ if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
23862
+ stampTurn.subagentActivityAt = Date.now()
23863
+ }
23864
+
22298
23865
  // #PR2 live worker-feed: when ON, the worker's live chat
22299
23866
  // message owns the progress beat. Push a running cue and
22300
23867
  // return BEFORE the legacy bucket relay so the same activity
@@ -22306,7 +23873,7 @@ void (async () => {
22306
23873
  // is gone — see resolveSubagentOriginChat).
22307
23874
  if (workerFeedEnabled) {
22308
23875
  const origin = resolveSubagentOriginChat(agentId)
22309
- void workerActivityFeed.update(
23876
+ void workerActivityFeed?.update(
22310
23877
  agentId,
22311
23878
  origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
22312
23879
  {
@@ -22370,11 +23937,9 @@ void (async () => {
22370
23937
 
22371
23938
  // Lane state (post flash-decouple): VISIBLE only when the visible flag is
22372
23939
  // Lane state from the single-source-of-truth resolver: 'visible' (preview
22373
- // on), 'draft' (compose-box transport), or 'dormant' (the default: no
22374
- // preview, no draft — reply tool is the only message). The old label
22375
- // wrongly reported 'visible(draft-retired)' for the dormant default, which
22376
- // masked the flash regression.
22377
- process.stderr.write(`telegram gateway: answer-stream lane=${ANSWER_LANE.state} draftFn=${sendMessageDraftFn != null ? 'available' : 'off'} visible=${ANSWER_STREAM_VISIBLE_ENABLED} draftRetired=${DRAFT_ANSWER_LANE_RETIRED} grammy=${GRAMMY_VERSION}\n`)
23940
+ // Lane state: 'visible' (opt-in preview) or 'dormant' (default: reply
23941
+ // tool is the only message). The draft transport is permanently retired.
23942
+ process.stderr.write(`telegram gateway: answer-stream lane=${ANSWER_LANE.state} visible=${ANSWER_STREAM_VISIBLE_ENABLED} grammy=${GRAMMY_VERSION}\n`)
22378
23943
  process.stderr.write(`telegram gateway: starting bot polling pid=${process.pid} agent=${process.env.SWITCHROOM_AGENT_NAME ?? '-'} stateDir=${STATE_DIR} historyEnabled=${HISTORY_ENABLED} streamMode=${process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'}\n`)
22379
23944
  runnerHandle = run(bot, {
22380
23945
  runner: {