switchroom 0.14.55 → 0.14.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -281,6 +281,8 @@ import { createPendingInboundBuffer, redeliverBufferedInbound, idleDrainTick } f
281
281
  import { createInboundSpool } from './inbound-spool.js'
282
282
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
283
283
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
284
+ import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
285
+ import { resolveAnswerThreadId } from './answer-thread-resolve.js'
284
286
  import {
285
287
  createDeliveryQueue,
286
288
  trackDelivery,
@@ -1267,6 +1269,14 @@ if (!STATIC) setInterval(checkApprovals, 5000).unref()
1267
1269
  const chatThreadMap = new Map<string, number>()
1268
1270
  const activeStatusReactions = new Map<string, StatusReactionController>()
1269
1271
  const activeReactionMsgIds = new Map<string, { chatId: string; messageId: number }>()
1272
+ // Component 5 (queued-status UX). When a cross-topic inbound buffers behind
1273
+ // an in-flight turn in another topic, we post ONE self-updating status into
1274
+ // the buffered message's OWN topic ("Queued — replying in <other topic>
1275
+ // first" → "On it — replying now" → deleted when the answer lands). Keyed
1276
+ // by chatKey(chat_id, bufferedThread). Delete-on-answer: never a dangling
1277
+ // placeholder. Reaped on answer (executeReply/stream), on turn-flush, and
1278
+ // in purgeReactionTracking cleanup so an abnormal turn-end can't strand it.
1279
+ const queuedStatusMsgIds = new Map<string, { chatId: string; threadId: number; messageId: number }>()
1270
1280
  // Reactions whose terminal 👍 is deferred because a background sub-agent
1271
1281
  // worker was still running when the parent's `turn_end` fired. Painting 👍
1272
1282
  // then would read as "done / nothing happening" while the worker keeps
@@ -1369,6 +1379,47 @@ const DELIVERY_CONFIRM_TIMEOUT_MS =
1369
1379
  const DELIVERY_CONFIRM_SWEEP_MS = 5_000
1370
1380
  const deliveryQueue = createDeliveryQueue<InboundMessage>()
1371
1381
 
1382
+ // ─── Serialize-until-replied (multitopic reply-routing) ───────────────────
1383
+ // Component 1 (deliver-before-drain gate). A buffered cross-topic inbound
1384
+ // drains ONLY after the just-ended turn delivered its reply to its own
1385
+ // thread — see `serialize-drain-gate.ts` for the full rationale (the
1386
+ // Brevo→Meta wrong-topic bug). Kill switch off (=0) → legacy behaviour:
1387
+ // drain on the bare turn-end signal.
1388
+ const SERIALIZE_UNTIL_REPLIED_ENABLED =
1389
+ process.env.SWITCHROOM_SERIALIZE_UNTIL_REPLIED !== '0'
1390
+ // Component 2 (bounded no-reply escape hatch). A turn that legitimately
1391
+ // ends with NO reply (handback ack, NO_REPLY marker, silent-end) sets
1392
+ // finalAnswerDelivered=false and would block the serialize gate forever.
1393
+ // When such a turn ends with a buffered inbound waiting, arm a short
1394
+ // bounded timer that force-drains regardless. The 300s silence-poke
1395
+ // unwedge fallback remains the long-stop. Tunable for tests/forensics;
1396
+ // clamped to a positive finite value (a degenerate override would either
1397
+ // drain instantly — defeating serialization — or, if negative, be
1398
+ // rejected). To disable the serialize feature entirely use
1399
+ // SWITCHROOM_SERIALIZE_UNTIL_REPLIED=0, not a degenerate timeout.
1400
+ const _noReplyDrainRaw = process.env.SWITCHROOM_SERIALIZE_NOREPLY_DRAIN_MS
1401
+ const _noReplyDrainParsed =
1402
+ _noReplyDrainRaw != null && _noReplyDrainRaw !== '' ? Number(_noReplyDrainRaw) : 2_500
1403
+ const SERIALIZE_NOREPLY_DRAIN_MS =
1404
+ Number.isFinite(_noReplyDrainParsed) && _noReplyDrainParsed > 0 ? _noReplyDrainParsed : 2_500
1405
+ // Component 3 (turn-origin reply routing). Resolve an answer's thread from
1406
+ // the turn that OWNS the reply (matched by origin_turn_id), not the live
1407
+ // currentTurn if it has flipped. Kill switch off (=0) → legacy turn-pinned
1408
+ // behaviour (#1664: thread from the live currentTurn capture).
1409
+ const TURN_ORIGIN_ROUTING_ENABLED =
1410
+ process.env.SWITCHROOM_TURN_ORIGIN_ROUTING !== '0'
1411
+ // Component 4 (per-turn topic framing). Add a one-line directive to the
1412
+ // channel meta + bridge instructions telling the model to answer ONLY the
1413
+ // current message's topic. Kill switch off (=0) → no framing field.
1414
+ const TOPIC_FRAMING_ENABLED =
1415
+ process.env.SWITCHROOM_TOPIC_FRAMING !== '0'
1416
+ // Component 5 (queued-status UX). Post a self-updating "Queued — replying
1417
+ // in <other topic> first" status into a cross-topic buffered message's own
1418
+ // topic, then edit→delete it as the turn progresses. Kill switch off (=0)
1419
+ // → no placeholder (the 👀 ack reaction still fires). Delete-on-answer.
1420
+ const QUEUED_STATUS_UX_ENABLED =
1421
+ process.env.SWITCHROOM_QUEUED_STATUS_UX !== '0'
1422
+
1372
1423
  /**
1373
1424
  * Authoritative "is a turn in flight?" for every gate that previously
1374
1425
  * read `claudeBusyKeys.size`. PR 3b cutover (extends PR 3a's bridgeUp
@@ -1524,7 +1575,22 @@ type CurrentTurn = {
1524
1575
  silentAnchorText: string
1525
1576
  capturedText: string[]
1526
1577
  orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
1578
+ // Component 3 (turn-origin reply routing). A stable per-turn identity,
1579
+ // `${registryKey-or-chatKey}#${startedAt}`, assigned when the turn
1580
+ // starts and stamped into the inbound meta (`origin_turn_id`) so a reply
1581
+ // can be matched back to the turn that OWNS it — even after `currentTurn`
1582
+ // has flipped to a successor. Recently-ended turns are retained in
1583
+ // `recentTurnsById` so a late reply (the Brevo answer landing 42s after
1584
+ // turn-end, when currentTurn already points at Meta) routes to its
1585
+ // origin thread instead of the live (wrong) turn's thread.
1586
+ turnId: string
1527
1587
  registryKey: string | null
1588
+ // Component 2 (bounded no-reply escape hatch). When a turn ends with
1589
+ // finalAnswerDelivered=false AND a buffered cross-topic inbound is
1590
+ // waiting, this timer force-drains after SERIALIZE_NOREPLY_DRAIN_MS so
1591
+ // the queue can never wedge on a legitimately silent turn. Armed in
1592
+ // `endCurrentTurnAtomic`, cleared if the turn delivers first.
1593
+ noReplyDrainTimer: ReturnType<typeof setTimeout> | null
1528
1594
  // Last assistant outbound message id for the current turn — populated
1529
1595
  // on reply / stream_reply emit, captured into recordTurnEnd. Stage 4
1530
1596
  // reads this on resume to thread-jump back to the in-flight conversation.
@@ -1583,6 +1649,134 @@ type CurrentTurn = {
1583
1649
 
1584
1650
  let currentTurn: CurrentTurn | null = null
1585
1651
 
1652
+ // Component 3 (turn-origin reply routing). Recently-ended turns retained
1653
+ // by `turnId` so a LATE reply (the Brevo answer landing ~42s after
1654
+ // turn-end, after `currentTurn` has flipped to the Meta turn) can still
1655
+ // resolve its ORIGIN turn's thread instead of the live successor's. Bug:
1656
+ // `executeReply` captured `const turn = currentTurn` at entry and, when
1657
+ // the model omitted message_thread_id, routed to `turn.sessionThreadId`
1658
+ // — the flipped turn's thread. Pinning by origin turnId closes that.
1659
+ // Bounded LRU (insertion-ordered Map) so it can't grow unbounded across
1660
+ // a long-lived supergroup session.
1661
+ const RECENT_TURNS_MAX = 32
1662
+ const recentTurnsById = new Map<string, CurrentTurn>()
1663
+ function rememberRecentTurn(turn: CurrentTurn): void {
1664
+ recentTurnsById.set(turn.turnId, turn)
1665
+ while (recentTurnsById.size > RECENT_TURNS_MAX) {
1666
+ const oldest = recentTurnsById.keys().next().value
1667
+ if (oldest === undefined) break
1668
+ recentTurnsById.delete(oldest)
1669
+ }
1670
+ }
1671
+ /**
1672
+ * Component 3 — derive the stable per-turn identity from the chat, thread,
1673
+ * and originating message id. Stamped into the inbound meta at build time
1674
+ * (`origin_turn_id`) AND reconstructed at enqueue time from the same three
1675
+ * values, so the id stamped on the message the model reads matches the id
1676
+ * on the turn the gateway started for it. Using the message id (not the
1677
+ * not-yet-known startedAt) is what lets the two sites agree. Returns null
1678
+ * when there is no message id (synthetic / cron / handback turns have no
1679
+ * originating inbound — they never need origin routing, the live turn IS
1680
+ * the origin).
1681
+ */
1682
+ function deriveTurnId(
1683
+ chatId: string,
1684
+ threadId: number | null | undefined,
1685
+ messageId: string | number | null | undefined,
1686
+ ): string | null {
1687
+ if (messageId == null || messageId === '' || String(messageId) === '0') return null
1688
+ return `${chatKey(chatId, threadId ?? null)}#${messageId}`
1689
+ }
1690
+
1691
+ /**
1692
+ * Component 3 — resolve the turn that OWNS a reply by its `origin_turn_id`
1693
+ * (the meta field the model echoes back from the channel tag). Checks the
1694
+ * live turn first, then the recently-ended registry. Returns null when the
1695
+ * id is absent or unknown (a model that didn't echo it, or a turn evicted
1696
+ * from the bounded registry) — callers then fall back to the existing
1697
+ * turn-pinned / chatThreadMap precedence.
1698
+ */
1699
+ function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTurn | null {
1700
+ if (originTurnId == null || originTurnId === '') return null
1701
+ if (currentTurn != null && currentTurn.turnId === originTurnId) return currentTurn
1702
+ return recentTurnsById.get(originTurnId) ?? null
1703
+ }
1704
+
1705
+ /**
1706
+ * Component 5 — post a "Queued — replying in another topic first" status
1707
+ * into a cross-topic buffered message's OWN topic. Fire-and-forget through
1708
+ * the swallowing wrapper (carries message_thread_id so it lands in the
1709
+ * right topic). Suppressed for DMs and same-topic queues by the CALLER.
1710
+ * Records the sent message id keyed by chatKey(chatId, bufferedThread) for
1711
+ * the later edit (Hook B) / delete (Hook C / reap).
1712
+ */
1713
+ function postQueuedStatus(chatId: string, bufferedThread: number, inFlightThread: number | undefined): void {
1714
+ if (!QUEUED_STATUS_UX_ENABLED) return
1715
+ const key = statusKey(chatId, bufferedThread)
1716
+ // Idempotent: a second buffered message in the same topic re-uses the
1717
+ // existing placeholder (don't stack).
1718
+ if (queuedStatusMsgIds.has(key)) return
1719
+ const otherTopic = inFlightThread != null ? `another topic` : `another conversation`
1720
+ const text = `⏳ Queued — replying in ${otherTopic} first, then I'll get to this.`
1721
+ void (async () => {
1722
+ const sent = await swallowingApiCall(
1723
+ () =>
1724
+ bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread }),
1725
+ { chat_id: chatId, verb: 'queued-status.post', threadId: bufferedThread },
1726
+ )
1727
+ const messageId = (sent as { message_id?: number } | undefined)?.message_id
1728
+ if (typeof messageId !== 'number') return
1729
+ // Re-check after the await: the turn may have already started and
1730
+ // edited/deleted via Hook B/C in the gap. If the key is now occupied
1731
+ // by a different placeholder, delete this orphan; otherwise record it.
1732
+ if (queuedStatusMsgIds.has(key)) {
1733
+ void swallowingApiCall(
1734
+ () => bot.api.deleteMessage(chatId, messageId),
1735
+ { chat_id: chatId, verb: 'queued-status.post-race-cleanup', threadId: bufferedThread },
1736
+ )
1737
+ return
1738
+ }
1739
+ queuedStatusMsgIds.set(key, { chatId, threadId: bufferedThread, messageId })
1740
+ })()
1741
+ }
1742
+
1743
+ /**
1744
+ * Component 5 (Hook B) — the buffered message's turn just STARTED. Edit the
1745
+ * queued placeholder in its topic to "On it — replying now." Best-effort.
1746
+ */
1747
+ function promoteQueuedStatus(chatId: string, thread: number | undefined): void {
1748
+ if (!QUEUED_STATUS_UX_ENABLED) return
1749
+ if (thread == null) return
1750
+ const key = statusKey(chatId, thread)
1751
+ const entry = queuedStatusMsgIds.get(key)
1752
+ if (entry == null) return
1753
+ // editMessageText targets a specific message id, which already implies
1754
+ // its thread — grammy's typed signature has no message_thread_id (the
1755
+ // swallowingApiCall opts still carry threadId for the deleted-topic
1756
+ // fallback policy).
1757
+ void swallowingApiCall(
1758
+ () =>
1759
+ bot.api.editMessageText(chatId, entry.messageId, '✍️ On it — replying now.', {}),
1760
+ { chat_id: chatId, verb: 'queued-status.promote', threadId: thread },
1761
+ )
1762
+ }
1763
+
1764
+ /**
1765
+ * Component 5 (Hook C / reap) — the answer landed (or the turn ended
1766
+ * abnormally). Delete the queued placeholder and drop the map entry so the
1767
+ * topic shows the real answer, never a stale "Queued" line. Idempotent.
1768
+ */
1769
+ function reapQueuedStatus(chatId: string, thread: number | undefined): void {
1770
+ const key = statusKey(chatId, thread ?? null)
1771
+ const entry = queuedStatusMsgIds.get(key)
1772
+ if (entry == null) return
1773
+ queuedStatusMsgIds.delete(key)
1774
+ void swallowingApiCall(
1775
+ () => bot.api.deleteMessage(chatId, entry.messageId),
1776
+ { chat_id: chatId, verb: 'queued-status.reap', ...(entry.threadId != null ? { threadId: entry.threadId } : {}) },
1777
+ )
1778
+ }
1779
+
1586
1780
  // Problem B — deferred safe-boundary interrupt.
1587
1781
  //
1588
1782
  // `toolFlightTracker` mirrors the session-event stream to know whether a
@@ -1722,6 +1916,105 @@ function streamKey(chatId: string, threadId?: number | null): string {
1722
1916
  return chatKey(chatId, threadId)
1723
1917
  }
1724
1918
 
1919
+ /**
1920
+ * Component 1 — deliver-before-drain. The single chokepoint that both
1921
+ * turn-end drain sites (`purgeReactionTracking`, `releaseTurnBufferGate`)
1922
+ * route through. Drains the pending-inbound buffer ONLY when
1923
+ * `mayDrainBufferedInbound` says so: claude is idle AND (the
1924
+ * serialize-until-replied kill switch is off, OR there is no ending-turn
1925
+ * handle, OR the ending turn delivered its final answer). A no-reply turn
1926
+ * (finalAnswerDelivered=false) deliberately does NOT drain here — the
1927
+ * bounded escape-hatch timer in `endCurrentTurnAtomic` covers that
1928
+ * liveness case. The 300s silence-poke fallback (`redeliverBufferedInbound`
1929
+ * called directly, bypassing this gate) remains the long-stop.
1930
+ */
1931
+ function performBufferDrain(reason: string): void {
1932
+ const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1933
+ if (pendingInboundBuffer.depth(selfAgentForFlush) <= 0) return
1934
+ const fr = redeliverBufferedInbound(
1935
+ pendingInboundBuffer,
1936
+ selfAgentForFlush,
1937
+ (m) => {
1938
+ const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1939
+ if (d) markClaudeBusyForInbound(m)
1940
+ return d
1941
+ },
1942
+ inboundSpool,
1943
+ trackRedeliveredInbound,
1944
+ )
1945
+ if (fr.redelivered > 0) {
1946
+ process.stderr.write(
1947
+ `telegram gateway: ${reason} flushed ${fr.redelivered}/${fr.drained} ` +
1948
+ `held inbound for ${selfAgentForFlush}` +
1949
+ `${fr.rebuffered > 0 ? ` (${fr.rebuffered} re-buffered)` : ''}\n`,
1950
+ )
1951
+ }
1952
+ }
1953
+
1954
+ function drainBufferedIfAllowed(endingTurn: CurrentTurn | undefined, reason: string): void {
1955
+ if (
1956
+ !mayDrainBufferedInbound({
1957
+ turnInFlight: turnInFlightForGate(),
1958
+ endingTurnFinalAnswerDelivered: endingTurn?.finalAnswerDelivered ?? null,
1959
+ enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
1960
+ })
1961
+ ) {
1962
+ return
1963
+ }
1964
+ performBufferDrain(reason)
1965
+ }
1966
+
1967
+ /**
1968
+ * Component 2 — bounded no-reply escape hatch (THE liveness guarantee).
1969
+ *
1970
+ * A turn that legitimately ends with NO reply (handback ack, NO_REPLY /
1971
+ * HEARTBEAT_OK marker, silent-end, greeting already handled) sets
1972
+ * `finalAnswerDelivered=false`. Under component 1's serialize gate that
1973
+ * turn would block `drainBufferedIfAllowed` FOREVER — and the 300s
1974
+ * silence-poke is disarmed for these silent-end turns, so without this
1975
+ * timer a queued cross-topic message would never be released (a permanent
1976
+ * wedge). This timer is the bounded force-drain: SERIALIZE_NOREPLY_DRAIN_MS
1977
+ * (default 2500ms) after such a turn ends with a buffered inbound waiting,
1978
+ * drain unconditionally — the serialize gate's delivered-check is bypassed
1979
+ * because the turn ended for real with no reply coming. The drain still
1980
+ * respects `turnInFlightForGate()` indirectly: if a new turn started in the
1981
+ * window (e.g. the 300s fallback or another path drained first), the buffer
1982
+ * is already empty so `performBufferDrain` is a depth-checked no-op.
1983
+ *
1984
+ * Liveness proof: a no-reply turn followed by a queued cross-topic message
1985
+ * releases within SERIALIZE_NOREPLY_DRAIN_MS. The 300s silence-poke
1986
+ * unwedge fallback (`redeliverBufferedInbound` at the silence-poke
1987
+ * framework-fallback, called directly) remains the independent long-stop.
1988
+ */
1989
+ function armNoReplyDrainTimer(turn: CurrentTurn): void {
1990
+ const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
1991
+ // Pure guard (shared with the test): arm only for a no-reply turn that
1992
+ // has a buffered inbound waiting, and only when the feature is enabled.
1993
+ if (
1994
+ !shouldArmNoReplyDrain({
1995
+ enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
1996
+ finalAnswerDelivered: turn.finalAnswerDelivered,
1997
+ bufferedDepth: pendingInboundBuffer.depth(selfAgent),
1998
+ })
1999
+ ) {
2000
+ return
2001
+ }
2002
+ // Idempotent: clear any prior timer for this turn before re-arming.
2003
+ if (turn.noReplyDrainTimer != null) {
2004
+ clearTimeout(turn.noReplyDrainTimer)
2005
+ turn.noReplyDrainTimer = null
2006
+ }
2007
+ turn.noReplyDrainTimer = setTimeout(() => {
2008
+ turn.noReplyDrainTimer = null
2009
+ process.stderr.write(
2010
+ `telegram gateway: no-reply bounded drain (${SERIALIZE_NOREPLY_DRAIN_MS}ms) — ` +
2011
+ `turn ${turn.turnId} ended without a reply; force-draining buffered inbound\n`,
2012
+ )
2013
+ performBufferDrain('no-reply-bounded-drain')
2014
+ }, SERIALIZE_NOREPLY_DRAIN_MS)
2015
+ turn.noReplyDrainTimer.unref?.()
2016
+ }
2017
+
1725
2018
  function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1726
2019
  // Phase 2b: turn end. The key was registered via setTurnStarted when
1727
2020
  // the inbound arrived; purge is the canonical turn-end signal.
@@ -1744,6 +2037,20 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1744
2037
  activeStatusReactions.delete(key)
1745
2038
  activeReactionMsgIds.delete(key)
1746
2039
  activeTurnStartedAt.delete(key)
2040
+ // Component 5 (reap) — defense-in-depth. The happy path deletes the
2041
+ // queued-status placeholder on the answer (executeReply / stream /
2042
+ // turn-flush). This catches the abnormal turn-end (silent-marker, wedge,
2043
+ // sibling purge) so a stale "Queued"/"On it" line can never dangle in
2044
+ // the topic. Idempotent: a no-op when already reaped. Prefer the ending
2045
+ // turn's session ids (canonical ownership); else parse the chatKey.
2046
+ if (endingTurn != null) {
2047
+ reapQueuedStatus(endingTurn.sessionChatId, endingTurn.sessionThreadId)
2048
+ } else {
2049
+ const pqChatId = chatIdOfChatKey(key as _ChatKey)
2050
+ const pqThreadPart = (key as string).slice(pqChatId.length + 1)
2051
+ const pqThread = pqThreadPart === '_' || pqThreadPart === '' ? null : Number(pqThreadPart)
2052
+ reapQueuedStatus(pqChatId, Number.isFinite(pqThread) ? (pqThread as number) : undefined)
2053
+ }
1747
2054
  // PR3b: clear the parallel-turns fleet-gate entry. Symmetric with
1748
2055
  // the markClaudeBusyForInbound on the delivery path. Safe no-op
1749
2056
  // when the key was never marked (synthetic purge from a sweep).
@@ -1796,36 +2103,21 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1796
2103
  // when the cutover kill-switch is on; the turnEnd event was emitted
1797
2104
  // just above (purgeReactionTracking head), so the machine is already
1798
2105
  // idle here.
2106
+ // #1556: the deterministic delivery point. claude has just gone idle —
2107
+ // flush any inbound held mid-turn so the channel notification lands at
2108
+ // the idle prompt and submits as a fresh turn (instead of stranding in
2109
+ // the composer, the lawgpt wedge). Component 1 (deliver-before-drain):
2110
+ // routed through `drainBufferedIfAllowed`, which additionally gates on
2111
+ // the ending turn having delivered its reply so a buffered cross-topic
2112
+ // message can't drain ahead of the just-ended turn's late reply (the
2113
+ // Brevo→Meta wrong-topic bug). Zero-churn: the helper depth-checks
2114
+ // first. Lossless: redeliver re-buffers any per-message miss.
2115
+ drainBufferedIfAllowed(endingTurn, 'turn-complete')
2116
+
2117
+ // Restart / compaction stay on the bare turn-end signal (NOT the
2118
+ // serialize gate): a pending self-restart or proactive compaction must
2119
+ // fire when claude is idle regardless of whether the last turn replied.
1799
2120
  if (!turnInFlightForGate()) {
1800
- // #1556: the deterministic delivery point. claude has just gone
1801
- // idle — flush any inbound held mid-turn so the channel
1802
- // notification lands at the idle prompt and submits as a fresh
1803
- // turn (instead of stranding in the composer, the lawgpt wedge).
1804
- // Zero-churn: depth check first, no work on the common empty path.
1805
- // Lossless: redeliver re-buffers any per-message miss (bridge
1806
- // mid-reconnect), which onClientRegistered then drains.
1807
- const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1808
- if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
1809
- const fr = redeliverBufferedInbound(
1810
- pendingInboundBuffer,
1811
- selfAgentForFlush,
1812
- (m) => {
1813
- const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1814
- if (d) markClaudeBusyForInbound(m)
1815
- return d
1816
- },
1817
- inboundSpool,
1818
- trackRedeliveredInbound,
1819
- )
1820
- if (fr.redelivered > 0) {
1821
- process.stderr.write(
1822
- `telegram gateway: turn-complete flushed ${fr.redelivered}/${fr.drained} ` +
1823
- `held inbound for ${selfAgentForFlush}` +
1824
- `${fr.rebuffered > 0 ? ` (${fr.rebuffered} re-buffered)` : ''}\n`,
1825
- )
1826
- }
1827
- }
1828
-
1829
2121
  if (pendingRestarts.size > 0) {
1830
2122
  for (const [agentName, _timestamp] of pendingRestarts.entries()) {
1831
2123
  triggerSelfRestart(agentName, 'turn-complete-pending-restart');
@@ -1879,7 +2171,7 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1879
2171
  * `executeReply`'s post-send block and by tests via source-level
1880
2172
  * pinning in `vault-approval-posture.test.ts` / wedge-guard suites.
1881
2173
  */
1882
- function releaseTurnBufferGate(key: string): void {
2174
+ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
1883
2175
  if (!activeTurnStartedAt.has(key)) return
1884
2176
  activeTurnStartedAt.delete(key)
1885
2177
  // PR3b: keep claudeBusyKeys in sync — same lifecycle as the
@@ -1890,38 +2182,21 @@ function releaseTurnBufferGate(key: string): void {
1890
2182
  // executeReply AFTER an outbound landed.
1891
2183
  shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted: true })
1892
2184
 
1893
- // Mirror the deterministic-delivery flush from
1894
- // `purgeReactionTracking` (gateway.ts:1376-1399). When the fleet
1895
- // hits zero-active-turns, drain any held inbound. This is the
1896
- // load-bearing wedge fix: the gate that pinned msg 1874+ in
2185
+ // Mirror the deterministic-delivery flush from `purgeReactionTracking`.
2186
+ // When the fleet hits zero-active-turns, drain any held inbound. This is
2187
+ // the load-bearing wedge fix: the gate that pinned msg 1874+ in
1897
2188
  // test-harness's 13:02 UAT now opens after the reply.
1898
2189
  //
1899
- // PR3b: gated on claudeBusyKeys (see purgeReactionTracking comment).
1900
- // PR3b-cutover: turnEnd was emitted just above (releaseTurnBufferGate
1901
- // head), so the machine is already idle when the cutover gate reads.
1902
- if (!turnInFlightForGate()) {
1903
- const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1904
- if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
1905
- const fr = redeliverBufferedInbound(
1906
- pendingInboundBuffer,
1907
- selfAgentForFlush,
1908
- (m) => {
1909
- const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1910
- if (d) markClaudeBusyForInbound(m)
1911
- return d
1912
- },
1913
- inboundSpool,
1914
- trackRedeliveredInbound,
1915
- )
1916
- if (fr.redelivered > 0) {
1917
- process.stderr.write(
1918
- `telegram gateway: reply-released-gate flushed ${fr.redelivered}/${fr.drained} ` +
1919
- `held inbound for ${selfAgentForFlush}` +
1920
- `${fr.rebuffered > 0 ? ` (${fr.rebuffered} re-buffered)` : ''}\n`,
1921
- )
1922
- }
1923
- }
1924
- }
2190
+ // Component 1 (deliver-before-drain): routed through the shared
2191
+ // `drainBufferedIfAllowed`. `releaseTurnBufferGate` is called on EVERY
2192
+ // reply finalize interim ack AND final answer. The serialize gate
2193
+ // checks `endingTurn.finalAnswerDelivered`, so an INTERIM ack ("On it")
2194
+ // does NOT drain the cross-topic buffer (its turn hasn't delivered its
2195
+ // real answer yet); only the final-answer reply releases it. That is
2196
+ // exactly the serialize-until-replied contract. When the kill switch is
2197
+ // off, or no turn handle is threaded, the helper falls back to the
2198
+ // legacy drain-on-idle behaviour.
2199
+ drainBufferedIfAllowed(endingTurn, 'reply-released-gate')
1925
2200
  }
1926
2201
 
1927
2202
  /**
@@ -1947,12 +2222,27 @@ function releaseTurnBufferGate(key: string): void {
1947
2222
  function endCurrentTurnAtomic(turn: CurrentTurn): void {
1948
2223
  if (currentTurn !== turn) return
1949
2224
  currentTurn = null
2225
+ // Component 2 — clear any prior no-reply drain timer for this turn; a
2226
+ // fresh end re-evaluates below. (Idempotent — null when never armed.)
2227
+ if (turn.noReplyDrainTimer != null) {
2228
+ clearTimeout(turn.noReplyDrainTimer)
2229
+ turn.noReplyDrainTimer = null
2230
+ }
1950
2231
  // Pass `turn` so purgeReactionTracking sees the authoritative
1951
2232
  // replyCalled flag even though we just nulled module-scope
1952
2233
  // currentTurn. Without this, the shadow trace's outboundEmitted
1953
2234
  // would be false on every replied turn (the dominant happy path),
1954
2235
  // producing strictly worse data than the blind `true` it replaced.
2236
+ // Component 1: purgeReactionTracking runs the serialize-gated drain —
2237
+ // it drains only if this turn delivered its final answer.
1955
2238
  purgeReactionTracking(statusKey(turn.sessionChatId, turn.sessionThreadId), turn)
2239
+ // Component 2 — bounded no-reply escape hatch. If this turn ended
2240
+ // WITHOUT delivering (finalAnswerDelivered=false) the serialize gate
2241
+ // above did NOT drain. Arm the bounded timer so a queued cross-topic
2242
+ // message still releases within SERIALIZE_NOREPLY_DRAIN_MS instead of
2243
+ // wedging forever. No-op when this turn delivered, when nothing is
2244
+ // buffered, or when the serialize feature is off.
2245
+ armNoReplyDrainTimer(turn)
1956
2246
  }
1957
2247
 
1958
2248
  /**
@@ -5446,10 +5736,16 @@ if (!STATIC) {
5446
5736
  // queued message ends either delivered or visibly retracted.
5447
5737
  inboundSpool?.sweepEscalations((e) => {
5448
5738
  const chat = e.msg.chatId
5449
- const threadOpts =
5739
+ const escThread =
5450
5740
  typeof e.msg.meta?.threadId === 'string' && e.msg.meta.threadId
5451
- ? { message_thread_id: Number(e.msg.meta.threadId) }
5452
- : {}
5741
+ ? Number(e.msg.meta.threadId)
5742
+ : undefined
5743
+ const threadOpts = escThread != null ? { message_thread_id: escThread } : {}
5744
+ // Reap any "Queued — replying in #X" placeholder for this topic first:
5745
+ // the message is being declared undeliverable, so the queued-status must
5746
+ // not dangle beside the "couldn't deliver" notice (idempotent best-effort;
5747
+ // a normal turn-start/turn-end reaps far sooner — this is the 15-min edge).
5748
+ reapQueuedStatus(chat, escThread)
5453
5749
  void swallowingApiCall(
5454
5750
  () =>
5455
5751
  bot.api.sendMessage(
@@ -5765,21 +6061,34 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
5765
6061
 
5766
6062
  assertAllowedChat(chat_id)
5767
6063
 
5768
- // Thread resolution precedence: (1) an explicit message_thread_id the
5769
- // model passed, else (2) THIS turn's own originating topic
5770
- // (turn-pinned, #1664), else (3) the chat's last-seen topic
5771
- // (chatThreadMap). Preferring the turn's own thread over the chat
5772
- // last-seen heuristic fixes synthetic turns (subagent handback/progress,
5773
- // cron) whose topic the model is never told and which never write
5774
- // chatThreadMap and is strictly more correct under multi-topic
5775
- // concurrency (a reply lands in the topic the turn came from, not
5776
- // whichever topic most recently received a message). DM: both are
5777
- // undefined → unchanged.
5778
- let threadId = resolveThreadId(
5779
- chat_id,
5780
- (args.message_thread_id as string | undefined) ??
5781
- (turn?.sessionThreadId != null ? turn.sessionThreadId : undefined),
5782
- )
6064
+ // Thread resolution precedence (ANSWER path, component 3 — turn-origin
6065
+ // routing): (1) explicit message_thread_id the model passed; else
6066
+ // (2) the ORIGIN turn's thread — the turn that OWNS this reply, matched
6067
+ // by origin_turn_id (the meta field the model echoes back). This is
6068
+ // authoritative even after `currentTurn` has flipped to a successor (the
6069
+ // Brevo→Meta late-reply bug). Else (3) the live turn's thread (legacy
6070
+ // #1664 fallback when no origin turn is resolvable). Answer paths
6071
+ // DELIBERATELY do NOT fall through to chatThreadMap last-seen that
6072
+ // heuristic is what mis-routed a late reply to whichever topic most
6073
+ // recently received a message. DM: every tier is undefined → unchanged.
6074
+ // Kill switch off → exact legacy resolveThreadId precedence.
6075
+ let threadId: number | undefined
6076
+ if (TURN_ORIGIN_ROUTING_ENABLED) {
6077
+ const explicit = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
6078
+ const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
6079
+ threadId = resolveAnswerThreadId({
6080
+ explicitThreadId: Number.isFinite(explicit as number) ? (explicit as number) : undefined,
6081
+ originResolved: originTurn != null,
6082
+ originThreadId: originTurn?.sessionThreadId,
6083
+ liveThreadId: turn?.sessionThreadId,
6084
+ })
6085
+ } else {
6086
+ threadId = resolveThreadId(
6087
+ chat_id,
6088
+ (args.message_thread_id as string | undefined) ??
6089
+ (turn?.sessionThreadId != null ? turn.sessionThreadId : undefined),
6090
+ )
6091
+ }
5783
6092
 
5784
6093
  if (reply_to == null && quoteOptIn && HISTORY_ENABLED) {
5785
6094
  try {
@@ -6360,7 +6669,19 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6360
6669
  // bidirectional ladder + the steer-vs-queue logic at
6361
6670
  // gateway.ts:8322 which reads `activeStatusReactions`). Only
6362
6671
  // the buffer gate flips.
6363
- releaseTurnBufferGate(statusKey(chat_id, threadId))
6672
+ //
6673
+ // Component 1: pass the turn so the serialize gate sees this turn's
6674
+ // `finalAnswerDelivered` (set just above for final-answer replies).
6675
+ // An interim ack leaves it false → the cross-topic buffer does NOT
6676
+ // drain yet; the real answer's reply releases it.
6677
+ releaseTurnBufferGate(statusKey(chat_id, threadId), turn ?? undefined)
6678
+ // Component 5: the final answer landed — reap the queued-status
6679
+ // placeholder for THIS turn's topic. Key on the turn's own session
6680
+ // thread (where the placeholder was posted / promoted), not the
6681
+ // answer's possibly-overridden threadId.
6682
+ if (turn?.finalAnswerDelivered === true) {
6683
+ reapQueuedStatus(turn.sessionChatId, turn.sessionThreadId)
6684
+ }
6364
6685
  }
6365
6686
 
6366
6687
  process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(',')}] chunks=${chunks.length}\n`)
@@ -6378,15 +6699,31 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
6378
6699
  const turn = currentTurn
6379
6700
  if (!args.chat_id) throw new Error('stream_reply: chat_id is required')
6380
6701
  if (args.text == null || args.text === '') throw new Error('stream_reply: text is required and cannot be empty')
6381
- // Thread precedence (matches executeReply): when the model passes no
6382
- // explicit message_thread_id, fall back to THIS turn's originating
6383
- // topic before handleStreamReply's chatThreadMap last-seen heuristic.
6384
- // Injecting here threads every downstream consumer consistently the
6385
- // dedup key, the voice-scrub metric, the draft transport, and the send
6386
- // so a streamed handback/synthetic-turn reply lands in the right
6387
- // supergroup topic. DM: sessionThreadId undefined unchanged.
6388
- if (args.message_thread_id == null && turn?.sessionThreadId != null) {
6389
- args.message_thread_id = String(turn.sessionThreadId)
6702
+ // Thread precedence (matches executeReply; component 3 turn-origin
6703
+ // routing): when the model passes no explicit message_thread_id, inject
6704
+ // the ORIGIN turn's thread (matched by origin_turn_id) — authoritative
6705
+ // even after currentTurn flips falling back to the live turn's thread
6706
+ // when no origin is resolvable (legacy #1664). Injecting into
6707
+ // args.message_thread_id threads every downstream consumer consistently
6708
+ // (dedup key, voice-scrub metric, draft transport, the send), so a
6709
+ // streamed handback/synthetic-turn reply lands in the right supergroup
6710
+ // topic and a late stream-reply can't be stolen by a successor turn. DM:
6711
+ // every tier undefined → unchanged. Kill switch off → legacy live-turn
6712
+ // injection only.
6713
+ if (args.message_thread_id == null) {
6714
+ let injected: number | undefined
6715
+ if (TURN_ORIGIN_ROUTING_ENABLED) {
6716
+ const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
6717
+ injected = resolveAnswerThreadId({
6718
+ explicitThreadId: undefined,
6719
+ originResolved: originTurn != null,
6720
+ originThreadId: originTurn?.sessionThreadId,
6721
+ liveThreadId: turn?.sessionThreadId,
6722
+ })
6723
+ } else {
6724
+ injected = turn?.sessionThreadId
6725
+ }
6726
+ if (injected != null) args.message_thread_id = String(injected)
6390
6727
  }
6391
6728
 
6392
6729
  // Outbound secret scrub (#2044): mask before the dedup key, the draft
@@ -6674,7 +7011,16 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
6674
7011
  {
6675
7012
  const sChat = args.chat_id as string
6676
7013
  const sThread = resolveThreadId(sChat, args.message_thread_id as string | undefined)
6677
- releaseTurnBufferGate(statusKey(sChat, sThread))
7014
+ // Component 1: pass the turn (finalAnswerDelivered set above for a
7015
+ // final stream emit). Interim stream chunks leave it false → no
7016
+ // cross-topic drain until the done=true / substantive emit lands.
7017
+ releaseTurnBufferGate(statusKey(sChat, sThread), turn ?? undefined)
7018
+ // Component 5: reap the queued-status placeholder for THIS turn's
7019
+ // topic once the final answer landed. Key on the turn's own session
7020
+ // thread (where the placeholder lives), not the answer's resolved one.
7021
+ if (turn?.finalAnswerDelivered === true) {
7022
+ reapQueuedStatus(turn.sessionChatId, turn.sessionThreadId)
7023
+ }
6678
7024
  }
6679
7025
  return { content: [{ type: 'text', text: `${result.status} (id: ${result.messageId ?? 'pending'})` }] }
6680
7026
  }
@@ -8121,8 +8467,18 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
8121
8467
  * Called on the first reply (hand-off) and again at turn_end (no-reply safety
8122
8468
  * net); finalize edits are idempotent (a 'message is not modified' on the
8123
8469
  * second call is swallowed).
8470
+ *
8471
+ * `finalHtmlOverride` (finalize path only): a render captured by the caller
8472
+ * BEFORE it tore down turn state the finalize render depends on. The
8473
+ * foreground handoff-clear path passes this — it deletes the just-finished
8474
+ * sub-agent's narrative right after this call, so the async
8475
+ * `composeTurnActivity(turn, true)` below would see an emptied feed (and, on
8476
+ * ack-first turns, empty `mirrorLines`), render null, and skip the finalize —
8477
+ * freezing the last live "→ in-progress" line. The captured render keeps the
8478
+ * persisted record reading done (✓). Omitted → compute it here (the common
8479
+ * reply/turn_end callers, where state is stable).
8124
8480
  */
8125
- function clearActivitySummary(turn: CurrentTurn): void {
8481
+ function clearActivitySummary(turn: CurrentTurn, finalHtmlOverride?: string | null): void {
8126
8482
  const chat = turn.sessionChatId
8127
8483
  const thread = turn.sessionThreadId
8128
8484
  const inFlight = turn.activityInFlight ?? Promise.resolve()
@@ -8143,7 +8499,8 @@ function clearActivitySummary(turn: CurrentTurn): void {
8143
8499
  }
8144
8500
  // Default: leave the status message as a record, edited to a terminal
8145
8501
  // all-done state so it doesn't freeze on a misleading "→ in-progress" line.
8146
- const finalHtml = composeTurnActivity(turn, true)
8502
+ const finalHtml =
8503
+ finalHtmlOverride !== undefined ? finalHtmlOverride : composeTurnActivity(turn, true)
8147
8504
  if (finalHtml == null) return
8148
8505
  try {
8149
8506
  await robustApiCall(
@@ -8200,9 +8557,19 @@ function handleSessionEvent(ev: SessionEvent): void {
8200
8557
  // handler captures `const turn = currentTurn` at entry, so a
8201
8558
  // captured-then-awaited read can't reattribute to the new turn.
8202
8559
  const startedAt = Date.now()
8560
+ // Component 3 — stable per-turn identity. For a real inbound this
8561
+ // matches the `origin_turn_id` stamped into the inbound meta at
8562
+ // build time (same chat/thread/messageId). Synthetic turns (cron /
8563
+ // handback — no messageId) get a unique startedAt-based fallback id
8564
+ // that no reply will ever echo, so they correctly fall through to
8565
+ // the live-turn routing in resolveAnswerThreadId.
8566
+ const enqThreadIdNum = ev.threadId != null ? Number(ev.threadId) : undefined
8567
+ const turnId =
8568
+ deriveTurnId(ev.chatId, enqThreadIdNum ?? null, ev.messageId)
8569
+ ?? `${chatKey(ev.chatId, enqThreadIdNum ?? null)}#synthetic-${startedAt}`
8203
8570
  const next: CurrentTurn = {
8204
8571
  sessionChatId: ev.chatId,
8205
- sessionThreadId: ev.threadId != null ? Number(ev.threadId) : undefined,
8572
+ sessionThreadId: enqThreadIdNum,
8206
8573
  sourceMessageId: ev.messageId != null && /^\d+$/.test(ev.messageId)
8207
8574
  ? Number(ev.messageId)
8208
8575
  : null,
@@ -8215,7 +8582,9 @@ function handleSessionEvent(ev: SessionEvent): void {
8215
8582
  silentAnchorText: '',
8216
8583
  capturedText: [],
8217
8584
  orphanedReplyTimeoutId: null,
8585
+ turnId,
8218
8586
  registryKey: null,
8587
+ noReplyDrainTimer: null,
8219
8588
  lastAssistantMsgId: null,
8220
8589
  lastAssistantDone: false,
8221
8590
  toolCallCount: 0,
@@ -8229,6 +8598,14 @@ function handleSessionEvent(ev: SessionEvent): void {
8229
8598
  isDm: isDmChatId(ev.chatId),
8230
8599
  }
8231
8600
  currentTurn = next
8601
+ // Component 3 — retain in the bounded recently-ended registry so a
8602
+ // LATE reply (landing after currentTurn flips to a successor) can
8603
+ // still resolve THIS turn's origin thread by its turnId.
8604
+ rememberRecentTurn(next)
8605
+ // Component 5 (Hook B) — this turn's topic had a queued placeholder
8606
+ // from Hook A; promote it to "On it — replying now." (deleted later
8607
+ // when the answer lands). No-op when there's no placeholder / DM.
8608
+ promoteQueuedStatus(ev.chatId, enqThreadIdNum)
8232
8609
  // Ack inbound delivery (the marko drop-wedge): claude actually started
8233
8610
  // this turn, so its delivered inbound landed — stop tracking it for
8234
8611
  // re-delivery. `enqueue` carries the same chat/thread the inbound was
@@ -8997,6 +9374,13 @@ function handleSessionEvent(ev: SessionEvent): void {
8997
9374
 
8998
9375
  if (flushDecision.kind === 'flush') {
8999
9376
  let capturedText = flushDecision.text
9377
+ // Component 3 — origin-thread backstop. `chatId`/`threadId` are
9378
+ // captured from the turn atom (turn.sessionChatId/sessionThreadId)
9379
+ // at the top of this turn_end handler, NOT from the live
9380
+ // currentTurn and NEVER from chatThreadMap. So the turn-flush
9381
+ // answer always lands in the thread the turn originated from, even
9382
+ // if currentTurn has flipped — the same guarantee the reply path
9383
+ // gets via origin_turn_id.
9000
9384
  const backstopChatId = chatId
9001
9385
  const backstopThreadId = threadId
9002
9386
  const backstopCtrl = ctrl
@@ -10812,6 +11196,21 @@ async function handleInbound(
10812
11196
  }
10813
11197
 
10814
11198
  // Dispatch to connected bridge via IPC
11199
+ // Component 3 — stable origin turn id stamped into the meta the model
11200
+ // reads, so a reply can echo it back (origin_turn_id) and the gateway
11201
+ // can route the answer to the turn that owns it even after currentTurn
11202
+ // flips. Derived from chat/thread/messageId, matching the turnId the
11203
+ // enqueue handler computes for the turn this inbound starts.
11204
+ const originTurnId = deriveTurnId(chat_id, messageThreadId ?? null, msgId)
11205
+ // Component 4 — per-turn topic framing. In a forum supergroup a queued
11206
+ // cross-topic message could tempt the model to also answer a pending
11207
+ // question from another topic. A one-line directive (only for topic
11208
+ // inbounds, only when framing is enabled) pins the model to THIS
11209
+ // message's topic. DMs / non-topic chats get nothing.
11210
+ const topicScope =
11211
+ TOPIC_FRAMING_ENABLED && messageThreadId != null
11212
+ ? 'This message belongs to the current topic only — answer ONLY this question, in this topic. Do not also answer a pending message from another topic.'
11213
+ : undefined
10815
11214
  const inboundMsg: InboundMessage = {
10816
11215
  type: 'inbound',
10817
11216
  chatId: chat_id,
@@ -10836,6 +11235,14 @@ async function handleInbound(
10836
11235
  user_id: String(from.id),
10837
11236
  ts: new Date((ctx.message?.date ?? 0) * 1000).toISOString(),
10838
11237
  ...(messageThreadId != null ? { message_thread_id: String(messageThreadId) } : {}),
11238
+ // Component 3 — origin turn id. The model is told to pass this back
11239
+ // as origin_turn_id on the reply so the answer routes to the topic
11240
+ // this message came from (turn-origin routing). The reply tool also
11241
+ // resolves it from the live/recent turn registry, so a model that
11242
+ // omits it still routes correctly via the live-turn fallback.
11243
+ ...(originTurnId != null ? { origin_turn_id: originTurnId } : {}),
11244
+ // Component 4 — per-turn topic-scope directive (supergroup topics).
11245
+ ...(topicScope != null ? { topic_scope: topicScope } : {}),
10839
11246
  ...(imagePath ? { image_path: imagePath } : {}),
10840
11247
  // Telegram-native reply context (issue #119). When set, the user
10841
11248
  // long-pressed a prior message and chose "Reply" — the agent should
@@ -10949,6 +11356,21 @@ async function handleInbound(
10949
11356
  `telegram gateway: inbound held mid-turn agent=${selfAgent} ` +
10950
11357
  `chat=${chat_id} msg=${msgId ?? '-'} — will flush on turn-complete\n`,
10951
11358
  )
11359
+ // Component 5 (Hook A) — queued-status UX. When this buffered inbound
11360
+ // is in a DIFFERENT forum topic than the in-flight turn, the user in
11361
+ // that topic otherwise sees only a 👀 reaction (or nothing). Post one
11362
+ // self-updating status into the buffered message's OWN topic so they
11363
+ // know they're queued. Suppressed for DMs (no topics) and same-topic
11364
+ // queues (the in-flight turn's own card already covers them).
11365
+ const inFlightThread = currentTurn?.sessionThreadId
11366
+ if (
11367
+ QUEUED_STATUS_UX_ENABLED &&
11368
+ !isDmChatId(chat_id) &&
11369
+ messageThreadId != null &&
11370
+ messageThreadId !== inFlightThread
11371
+ ) {
11372
+ postQueuedStatus(chat_id, messageThreadId, inFlightThread)
11373
+ }
10952
11374
  return
10953
11375
  }
10954
11376
 
@@ -18867,20 +19289,32 @@ void (async () => {
18867
19289
  // tool result, so there's no handback to deliver. Reaction
18868
19290
  // promotion already ran above.
18869
19291
  const turn = currentTurn
18870
- const removed = turn != null && turn.foregroundSubAgents.delete(agentId)
18871
- if (turn != null && removed) {
19292
+ // has()-then-delete (not delete-up-front): the handoff-clear
19293
+ // branch must render the finished sub-agent's steps as done
19294
+ // WHILE its narrative is still in the map, then remove it.
19295
+ if (turn != null && turn.foregroundSubAgents.has(agentId)) {
18872
19296
  const action = foregroundFinishAction({
18873
- removed,
19297
+ removed: true,
18874
19298
  replyCalled: turn.replyCalled,
18875
- remainingForeground: turn.foregroundSubAgents.size,
19299
+ // size AFTER this agent's impending removal
19300
+ remainingForeground: turn.foregroundSubAgents.size - 1,
18876
19301
  })
18877
19302
  if (action === 'handoff-clear') {
18878
19303
  // Post-ack: the last foreground sub-agent finished and
18879
19304
  // the parent will now produce its answer inline. Hand
18880
19305
  // the re-opened feed off to the answer, mirroring the
18881
- // first-reply clear (turn_end is the safety net).
18882
- clearActivitySummary(turn)
19306
+ // first-reply clear (turn_end is the safety net). Capture
19307
+ // the finalized render (child steps done ✓) BEFORE the
19308
+ // delete, then pass it so the persisted record doesn't
19309
+ // freeze on a stale "→ in-progress" line (the emptied-feed
19310
+ // skip — see clearActivitySummary's finalHtmlOverride doc).
19311
+ const finalHtml = composeTurnActivity(turn, true)
19312
+ turn.foregroundSubAgents.delete(agentId)
19313
+ clearActivitySummary(turn, finalHtml)
18883
19314
  } else if (action === 'recompose') {
19315
+ // Collapse the finished sub-agent's block: delete first,
19316
+ // then render WITHOUT it (live feed keeps its → step).
19317
+ turn.foregroundSubAgents.delete(agentId)
18884
19318
  const rendered = composeTurnActivity(turn)
18885
19319
  if (rendered != null) {
18886
19320
  turn.activityPendingRender = rendered