switchroom 0.14.55 → 0.14.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -281,6 +281,8 @@ import { createPendingInboundBuffer, redeliverBufferedInbound, idleDrainTick } f
281
281
  import { createInboundSpool } from './inbound-spool.js'
282
282
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
283
283
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
284
+ import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
285
+ import { resolveAnswerThreadId } from './answer-thread-resolve.js'
284
286
  import {
285
287
  createDeliveryQueue,
286
288
  trackDelivery,
@@ -1267,6 +1269,14 @@ if (!STATIC) setInterval(checkApprovals, 5000).unref()
1267
1269
  const chatThreadMap = new Map<string, number>()
1268
1270
  const activeStatusReactions = new Map<string, StatusReactionController>()
1269
1271
  const activeReactionMsgIds = new Map<string, { chatId: string; messageId: number }>()
1272
+ // Component 5 (queued-status UX). When a cross-topic inbound buffers behind
1273
+ // an in-flight turn in another topic, we post ONE self-updating status into
1274
+ // the buffered message's OWN topic ("Queued — replying in <other topic>
1275
+ // first" → "On it — replying now" → deleted when the answer lands). Keyed
1276
+ // by chatKey(chat_id, bufferedThread). Delete-on-answer: never a dangling
1277
+ // placeholder. Reaped on answer (executeReply/stream), on turn-flush, and
1278
+ // in purgeReactionTracking cleanup so an abnormal turn-end can't strand it.
1279
+ const queuedStatusMsgIds = new Map<string, { chatId: string; threadId: number; messageId: number }>()
1270
1280
  // Reactions whose terminal 👍 is deferred because a background sub-agent
1271
1281
  // worker was still running when the parent's `turn_end` fired. Painting 👍
1272
1282
  // then would read as "done / nothing happening" while the worker keeps
@@ -1369,6 +1379,47 @@ const DELIVERY_CONFIRM_TIMEOUT_MS =
1369
1379
  const DELIVERY_CONFIRM_SWEEP_MS = 5_000
1370
1380
  const deliveryQueue = createDeliveryQueue<InboundMessage>()
1371
1381
 
1382
+ // ─── Serialize-until-replied (multitopic reply-routing) ───────────────────
1383
+ // Component 1 (deliver-before-drain gate). A buffered cross-topic inbound
1384
+ // drains ONLY after the just-ended turn delivered its reply to its own
1385
+ // thread — see `serialize-drain-gate.ts` for the full rationale (the
1386
+ // Brevo→Meta wrong-topic bug). Kill switch off (=0) → legacy behaviour:
1387
+ // drain on the bare turn-end signal.
1388
+ const SERIALIZE_UNTIL_REPLIED_ENABLED =
1389
+ process.env.SWITCHROOM_SERIALIZE_UNTIL_REPLIED !== '0'
1390
+ // Component 2 (bounded no-reply escape hatch). A turn that legitimately
1391
+ // ends with NO reply (handback ack, NO_REPLY marker, silent-end) sets
1392
+ // finalAnswerDelivered=false and would block the serialize gate forever.
1393
+ // When such a turn ends with a buffered inbound waiting, arm a short
1394
+ // bounded timer that force-drains regardless. The 300s silence-poke
1395
+ // unwedge fallback remains the long-stop. Tunable for tests/forensics;
1396
+ // clamped to a positive finite value (a degenerate override would either
1397
+ // drain instantly — defeating serialization — or, if negative, be
1398
+ // rejected). To disable the serialize feature entirely use
1399
+ // SWITCHROOM_SERIALIZE_UNTIL_REPLIED=0, not a degenerate timeout.
1400
+ const _noReplyDrainRaw = process.env.SWITCHROOM_SERIALIZE_NOREPLY_DRAIN_MS
1401
+ const _noReplyDrainParsed =
1402
+ _noReplyDrainRaw != null && _noReplyDrainRaw !== '' ? Number(_noReplyDrainRaw) : 2_500
1403
+ const SERIALIZE_NOREPLY_DRAIN_MS =
1404
+ Number.isFinite(_noReplyDrainParsed) && _noReplyDrainParsed > 0 ? _noReplyDrainParsed : 2_500
1405
+ // Component 3 (turn-origin reply routing). Resolve an answer's thread from
1406
+ // the turn that OWNS the reply (matched by origin_turn_id), not the live
1407
+ // currentTurn if it has flipped. Kill switch off (=0) → legacy turn-pinned
1408
+ // behaviour (#1664: thread from the live currentTurn capture).
1409
+ const TURN_ORIGIN_ROUTING_ENABLED =
1410
+ process.env.SWITCHROOM_TURN_ORIGIN_ROUTING !== '0'
1411
+ // Component 4 (per-turn topic framing). Add a one-line directive to the
1412
+ // channel meta + bridge instructions telling the model to answer ONLY the
1413
+ // current message's topic. Kill switch off (=0) → no framing field.
1414
+ const TOPIC_FRAMING_ENABLED =
1415
+ process.env.SWITCHROOM_TOPIC_FRAMING !== '0'
1416
+ // Component 5 (queued-status UX). Post a self-updating "Queued — replying
1417
+ // in <other topic> first" status into a cross-topic buffered message's own
1418
+ // topic, then edit→delete it as the turn progresses. Kill switch off (=0)
1419
+ // → no placeholder (the 👀 ack reaction still fires). Delete-on-answer.
1420
+ const QUEUED_STATUS_UX_ENABLED =
1421
+ process.env.SWITCHROOM_QUEUED_STATUS_UX !== '0'
1422
+
1372
1423
  /**
1373
1424
  * Authoritative "is a turn in flight?" for every gate that previously
1374
1425
  * read `claudeBusyKeys.size`. PR 3b cutover (extends PR 3a's bridgeUp
@@ -1524,7 +1575,22 @@ type CurrentTurn = {
1524
1575
  silentAnchorText: string
1525
1576
  capturedText: string[]
1526
1577
  orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
1578
+ // Component 3 (turn-origin reply routing). A stable per-turn identity,
1579
+ // `${registryKey-or-chatKey}#${startedAt}`, assigned when the turn
1580
+ // starts and stamped into the inbound meta (`origin_turn_id`) so a reply
1581
+ // can be matched back to the turn that OWNS it — even after `currentTurn`
1582
+ // has flipped to a successor. Recently-ended turns are retained in
1583
+ // `recentTurnsById` so a late reply (the Brevo answer landing 42s after
1584
+ // turn-end, when currentTurn already points at Meta) routes to its
1585
+ // origin thread instead of the live (wrong) turn's thread.
1586
+ turnId: string
1527
1587
  registryKey: string | null
1588
+ // Component 2 (bounded no-reply escape hatch). When a turn ends with
1589
+ // finalAnswerDelivered=false AND a buffered cross-topic inbound is
1590
+ // waiting, this timer force-drains after SERIALIZE_NOREPLY_DRAIN_MS so
1591
+ // the queue can never wedge on a legitimately silent turn. Armed in
1592
+ // `endCurrentTurnAtomic`, cleared if the turn delivers first.
1593
+ noReplyDrainTimer: ReturnType<typeof setTimeout> | null
1528
1594
  // Last assistant outbound message id for the current turn — populated
1529
1595
  // on reply / stream_reply emit, captured into recordTurnEnd. Stage 4
1530
1596
  // reads this on resume to thread-jump back to the in-flight conversation.
@@ -1583,6 +1649,134 @@ type CurrentTurn = {
1583
1649
 
1584
1650
  let currentTurn: CurrentTurn | null = null
1585
1651
 
1652
+ // Component 3 (turn-origin reply routing). Recently-ended turns retained
1653
+ // by `turnId` so a LATE reply (the Brevo answer landing ~42s after
1654
+ // turn-end, after `currentTurn` has flipped to the Meta turn) can still
1655
+ // resolve its ORIGIN turn's thread instead of the live successor's. Bug:
1656
+ // `executeReply` captured `const turn = currentTurn` at entry and, when
1657
+ // the model omitted message_thread_id, routed to `turn.sessionThreadId`
1658
+ // — the flipped turn's thread. Pinning by origin turnId closes that.
1659
+ // Bounded LRU (insertion-ordered Map) so it can't grow unbounded across
1660
+ // a long-lived supergroup session.
1661
+ const RECENT_TURNS_MAX = 32
1662
+ const recentTurnsById = new Map<string, CurrentTurn>()
1663
+ function rememberRecentTurn(turn: CurrentTurn): void {
1664
+ recentTurnsById.set(turn.turnId, turn)
1665
+ while (recentTurnsById.size > RECENT_TURNS_MAX) {
1666
+ const oldest = recentTurnsById.keys().next().value
1667
+ if (oldest === undefined) break
1668
+ recentTurnsById.delete(oldest)
1669
+ }
1670
+ }
1671
+ /**
1672
+ * Component 3 — derive the stable per-turn identity from the chat, thread,
1673
+ * and originating message id. Stamped into the inbound meta at build time
1674
+ * (`origin_turn_id`) AND reconstructed at enqueue time from the same three
1675
+ * values, so the id stamped on the message the model reads matches the id
1676
+ * on the turn the gateway started for it. Using the message id (not the
1677
+ * not-yet-known startedAt) is what lets the two sites agree. Returns null
1678
+ * when there is no message id (synthetic / cron / handback turns have no
1679
+ * originating inbound — they never need origin routing, the live turn IS
1680
+ * the origin).
1681
+ */
1682
+ function deriveTurnId(
1683
+ chatId: string,
1684
+ threadId: number | null | undefined,
1685
+ messageId: string | number | null | undefined,
1686
+ ): string | null {
1687
+ if (messageId == null || messageId === '' || String(messageId) === '0') return null
1688
+ return `${chatKey(chatId, threadId ?? null)}#${messageId}`
1689
+ }
1690
+
1691
+ /**
1692
+ * Component 3 — resolve the turn that OWNS a reply by its `origin_turn_id`
1693
+ * (the meta field the model echoes back from the channel tag). Checks the
1694
+ * live turn first, then the recently-ended registry. Returns null when the
1695
+ * id is absent or unknown (a model that didn't echo it, or a turn evicted
1696
+ * from the bounded registry) — callers then fall back to the existing
1697
+ * turn-pinned / chatThreadMap precedence.
1698
+ */
1699
+ function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTurn | null {
1700
+ if (originTurnId == null || originTurnId === '') return null
1701
+ if (currentTurn != null && currentTurn.turnId === originTurnId) return currentTurn
1702
+ return recentTurnsById.get(originTurnId) ?? null
1703
+ }
1704
+
1705
+ /**
1706
+ * Component 5 — post a "Queued — replying in another topic first" status
1707
+ * into a cross-topic buffered message's OWN topic. Fire-and-forget through
1708
+ * the swallowing wrapper (carries message_thread_id so it lands in the
1709
+ * right topic). Suppressed for DMs and same-topic queues by the CALLER.
1710
+ * Records the sent message id keyed by chatKey(chatId, bufferedThread) for
1711
+ * the later edit (Hook B) / delete (Hook C / reap).
1712
+ */
1713
+ function postQueuedStatus(chatId: string, bufferedThread: number, inFlightThread: number | undefined): void {
1714
+ if (!QUEUED_STATUS_UX_ENABLED) return
1715
+ const key = statusKey(chatId, bufferedThread)
1716
+ // Idempotent: a second buffered message in the same topic re-uses the
1717
+ // existing placeholder (don't stack).
1718
+ if (queuedStatusMsgIds.has(key)) return
1719
+ const otherTopic = inFlightThread != null ? `another topic` : `another conversation`
1720
+ const text = `⏳ Queued — replying in ${otherTopic} first, then I'll get to this.`
1721
+ void (async () => {
1722
+ const sent = await swallowingApiCall(
1723
+ () =>
1724
+ bot.api.sendMessage(chatId, text, { message_thread_id: bufferedThread }),
1725
+ { chat_id: chatId, verb: 'queued-status.post', threadId: bufferedThread },
1726
+ )
1727
+ const messageId = (sent as { message_id?: number } | undefined)?.message_id
1728
+ if (typeof messageId !== 'number') return
1729
+ // Re-check after the await: the turn may have already started and
1730
+ // edited/deleted via Hook B/C in the gap. If the key is now occupied
1731
+ // by a different placeholder, delete this orphan; otherwise record it.
1732
+ if (queuedStatusMsgIds.has(key)) {
1733
+ void swallowingApiCall(
1734
+ () => bot.api.deleteMessage(chatId, messageId),
1735
+ { chat_id: chatId, verb: 'queued-status.post-race-cleanup', threadId: bufferedThread },
1736
+ )
1737
+ return
1738
+ }
1739
+ queuedStatusMsgIds.set(key, { chatId, threadId: bufferedThread, messageId })
1740
+ })()
1741
+ }
1742
+
1743
+ /**
1744
+ * Component 5 (Hook B) — the buffered message's turn just STARTED. Edit the
1745
+ * queued placeholder in its topic to "On it — replying now." Best-effort.
1746
+ */
1747
+ function promoteQueuedStatus(chatId: string, thread: number | undefined): void {
1748
+ if (!QUEUED_STATUS_UX_ENABLED) return
1749
+ if (thread == null) return
1750
+ const key = statusKey(chatId, thread)
1751
+ const entry = queuedStatusMsgIds.get(key)
1752
+ if (entry == null) return
1753
+ // editMessageText targets a specific message id, which already implies
1754
+ // its thread — grammy's typed signature has no message_thread_id (the
1755
+ // swallowingApiCall opts still carry threadId for the deleted-topic
1756
+ // fallback policy).
1757
+ void swallowingApiCall(
1758
+ () =>
1759
+ bot.api.editMessageText(chatId, entry.messageId, '✍️ On it — replying now.', {}),
1760
+ { chat_id: chatId, verb: 'queued-status.promote', threadId: thread },
1761
+ )
1762
+ }
1763
+
1764
+ /**
1765
+ * Component 5 (Hook C / reap) — the answer landed (or the turn ended
1766
+ * abnormally). Delete the queued placeholder and drop the map entry so the
1767
+ * topic shows the real answer, never a stale "Queued" line. Idempotent.
1768
+ */
1769
+ function reapQueuedStatus(chatId: string, thread: number | undefined): void {
1770
+ const key = statusKey(chatId, thread ?? null)
1771
+ const entry = queuedStatusMsgIds.get(key)
1772
+ if (entry == null) return
1773
+ queuedStatusMsgIds.delete(key)
1774
+ void swallowingApiCall(
1775
+ () => bot.api.deleteMessage(chatId, entry.messageId),
1776
+ { chat_id: chatId, verb: 'queued-status.reap', ...(entry.threadId != null ? { threadId: entry.threadId } : {}) },
1777
+ )
1778
+ }
1779
+
1586
1780
  // Problem B — deferred safe-boundary interrupt.
1587
1781
  //
1588
1782
  // `toolFlightTracker` mirrors the session-event stream to know whether a
@@ -1722,6 +1916,105 @@ function streamKey(chatId: string, threadId?: number | null): string {
1722
1916
  return chatKey(chatId, threadId)
1723
1917
  }
1724
1918
 
1919
+ /**
1920
+ * Component 1 — deliver-before-drain. The single chokepoint that both
1921
+ * turn-end drain sites (`purgeReactionTracking`, `releaseTurnBufferGate`)
1922
+ * route through. Drains the pending-inbound buffer ONLY when
1923
+ * `mayDrainBufferedInbound` says so: claude is idle AND (the
1924
+ * serialize-until-replied kill switch is off, OR there is no ending-turn
1925
+ * handle, OR the ending turn delivered its final answer). A no-reply turn
1926
+ * (finalAnswerDelivered=false) deliberately does NOT drain here — the
1927
+ * bounded escape-hatch timer in `endCurrentTurnAtomic` covers that
1928
+ * liveness case. The 300s silence-poke fallback (`redeliverBufferedInbound`
1929
+ * called directly, bypassing this gate) remains the long-stop.
1930
+ */
1931
+ function performBufferDrain(reason: string): void {
1932
+ const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1933
+ if (pendingInboundBuffer.depth(selfAgentForFlush) <= 0) return
1934
+ const fr = redeliverBufferedInbound(
1935
+ pendingInboundBuffer,
1936
+ selfAgentForFlush,
1937
+ (m) => {
1938
+ const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1939
+ if (d) markClaudeBusyForInbound(m)
1940
+ return d
1941
+ },
1942
+ inboundSpool,
1943
+ trackRedeliveredInbound,
1944
+ )
1945
+ if (fr.redelivered > 0) {
1946
+ process.stderr.write(
1947
+ `telegram gateway: ${reason} flushed ${fr.redelivered}/${fr.drained} ` +
1948
+ `held inbound for ${selfAgentForFlush}` +
1949
+ `${fr.rebuffered > 0 ? ` (${fr.rebuffered} re-buffered)` : ''}\n`,
1950
+ )
1951
+ }
1952
+ }
1953
+
1954
+ function drainBufferedIfAllowed(endingTurn: CurrentTurn | undefined, reason: string): void {
1955
+ if (
1956
+ !mayDrainBufferedInbound({
1957
+ turnInFlight: turnInFlightForGate(),
1958
+ endingTurnFinalAnswerDelivered: endingTurn?.finalAnswerDelivered ?? null,
1959
+ enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
1960
+ })
1961
+ ) {
1962
+ return
1963
+ }
1964
+ performBufferDrain(reason)
1965
+ }
1966
+
1967
+ /**
1968
+ * Component 2 — bounded no-reply escape hatch (THE liveness guarantee).
1969
+ *
1970
+ * A turn that legitimately ends with NO reply (handback ack, NO_REPLY /
1971
+ * HEARTBEAT_OK marker, silent-end, greeting already handled) sets
1972
+ * `finalAnswerDelivered=false`. Under component 1's serialize gate that
1973
+ * turn would block `drainBufferedIfAllowed` FOREVER — and the 300s
1974
+ * silence-poke is disarmed for these silent-end turns, so without this
1975
+ * timer a queued cross-topic message would never be released (a permanent
1976
+ * wedge). This timer is the bounded force-drain: SERIALIZE_NOREPLY_DRAIN_MS
1977
+ * (default 2500ms) after such a turn ends with a buffered inbound waiting,
1978
+ * drain unconditionally — the serialize gate's delivered-check is bypassed
1979
+ * because the turn ended for real with no reply coming. The drain still
1980
+ * respects `turnInFlightForGate()` indirectly: if a new turn started in the
1981
+ * window (e.g. the 300s fallback or another path drained first), the buffer
1982
+ * is already empty so `performBufferDrain` is a depth-checked no-op.
1983
+ *
1984
+ * Liveness proof: a no-reply turn followed by a queued cross-topic message
1985
+ * releases within SERIALIZE_NOREPLY_DRAIN_MS. The 300s silence-poke
1986
+ * unwedge fallback (`redeliverBufferedInbound` at the silence-poke
1987
+ * framework-fallback, called directly) remains the independent long-stop.
1988
+ */
1989
+ function armNoReplyDrainTimer(turn: CurrentTurn): void {
1990
+ const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
1991
+ // Pure guard (shared with the test): arm only for a no-reply turn that
1992
+ // has a buffered inbound waiting, and only when the feature is enabled.
1993
+ if (
1994
+ !shouldArmNoReplyDrain({
1995
+ enabled: SERIALIZE_UNTIL_REPLIED_ENABLED,
1996
+ finalAnswerDelivered: turn.finalAnswerDelivered,
1997
+ bufferedDepth: pendingInboundBuffer.depth(selfAgent),
1998
+ })
1999
+ ) {
2000
+ return
2001
+ }
2002
+ // Idempotent: clear any prior timer for this turn before re-arming.
2003
+ if (turn.noReplyDrainTimer != null) {
2004
+ clearTimeout(turn.noReplyDrainTimer)
2005
+ turn.noReplyDrainTimer = null
2006
+ }
2007
+ turn.noReplyDrainTimer = setTimeout(() => {
2008
+ turn.noReplyDrainTimer = null
2009
+ process.stderr.write(
2010
+ `telegram gateway: no-reply bounded drain (${SERIALIZE_NOREPLY_DRAIN_MS}ms) — ` +
2011
+ `turn ${turn.turnId} ended without a reply; force-draining buffered inbound\n`,
2012
+ )
2013
+ performBufferDrain('no-reply-bounded-drain')
2014
+ }, SERIALIZE_NOREPLY_DRAIN_MS)
2015
+ turn.noReplyDrainTimer.unref?.()
2016
+ }
2017
+
1725
2018
  function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1726
2019
  // Phase 2b: turn end. The key was registered via setTurnStarted when
1727
2020
  // the inbound arrived; purge is the canonical turn-end signal.
@@ -1744,6 +2037,20 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1744
2037
  activeStatusReactions.delete(key)
1745
2038
  activeReactionMsgIds.delete(key)
1746
2039
  activeTurnStartedAt.delete(key)
2040
+ // Component 5 (reap) — defense-in-depth. The happy path deletes the
2041
+ // queued-status placeholder on the answer (executeReply / stream /
2042
+ // turn-flush). This catches the abnormal turn-end (silent-marker, wedge,
2043
+ // sibling purge) so a stale "Queued"/"On it" line can never dangle in
2044
+ // the topic. Idempotent: a no-op when already reaped. Prefer the ending
2045
+ // turn's session ids (canonical ownership); else parse the chatKey.
2046
+ if (endingTurn != null) {
2047
+ reapQueuedStatus(endingTurn.sessionChatId, endingTurn.sessionThreadId)
2048
+ } else {
2049
+ const pqChatId = chatIdOfChatKey(key as _ChatKey)
2050
+ const pqThreadPart = (key as string).slice(pqChatId.length + 1)
2051
+ const pqThread = pqThreadPart === '_' || pqThreadPart === '' ? null : Number(pqThreadPart)
2052
+ reapQueuedStatus(pqChatId, Number.isFinite(pqThread) ? (pqThread as number) : undefined)
2053
+ }
1747
2054
  // PR3b: clear the parallel-turns fleet-gate entry. Symmetric with
1748
2055
  // the markClaudeBusyForInbound on the delivery path. Safe no-op
1749
2056
  // when the key was never marked (synthetic purge from a sweep).
@@ -1796,36 +2103,21 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1796
2103
  // when the cutover kill-switch is on; the turnEnd event was emitted
1797
2104
  // just above (purgeReactionTracking head), so the machine is already
1798
2105
  // idle here.
2106
+ // #1556: the deterministic delivery point. claude has just gone idle —
2107
+ // flush any inbound held mid-turn so the channel notification lands at
2108
+ // the idle prompt and submits as a fresh turn (instead of stranding in
2109
+ // the composer, the lawgpt wedge). Component 1 (deliver-before-drain):
2110
+ // routed through `drainBufferedIfAllowed`, which additionally gates on
2111
+ // the ending turn having delivered its reply so a buffered cross-topic
2112
+ // message can't drain ahead of the just-ended turn's late reply (the
2113
+ // Brevo→Meta wrong-topic bug). Zero-churn: the helper depth-checks
2114
+ // first. Lossless: redeliver re-buffers any per-message miss.
2115
+ drainBufferedIfAllowed(endingTurn, 'turn-complete')
2116
+
2117
+ // Restart / compaction stay on the bare turn-end signal (NOT the
2118
+ // serialize gate): a pending self-restart or proactive compaction must
2119
+ // fire when claude is idle regardless of whether the last turn replied.
1799
2120
  if (!turnInFlightForGate()) {
1800
- // #1556: the deterministic delivery point. claude has just gone
1801
- // idle — flush any inbound held mid-turn so the channel
1802
- // notification lands at the idle prompt and submits as a fresh
1803
- // turn (instead of stranding in the composer, the lawgpt wedge).
1804
- // Zero-churn: depth check first, no work on the common empty path.
1805
- // Lossless: redeliver re-buffers any per-message miss (bridge
1806
- // mid-reconnect), which onClientRegistered then drains.
1807
- const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1808
- if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
1809
- const fr = redeliverBufferedInbound(
1810
- pendingInboundBuffer,
1811
- selfAgentForFlush,
1812
- (m) => {
1813
- const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1814
- if (d) markClaudeBusyForInbound(m)
1815
- return d
1816
- },
1817
- inboundSpool,
1818
- trackRedeliveredInbound,
1819
- )
1820
- if (fr.redelivered > 0) {
1821
- process.stderr.write(
1822
- `telegram gateway: turn-complete flushed ${fr.redelivered}/${fr.drained} ` +
1823
- `held inbound for ${selfAgentForFlush}` +
1824
- `${fr.rebuffered > 0 ? ` (${fr.rebuffered} re-buffered)` : ''}\n`,
1825
- )
1826
- }
1827
- }
1828
-
1829
2121
  if (pendingRestarts.size > 0) {
1830
2122
  for (const [agentName, _timestamp] of pendingRestarts.entries()) {
1831
2123
  triggerSelfRestart(agentName, 'turn-complete-pending-restart');
@@ -1879,7 +2171,7 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1879
2171
  * `executeReply`'s post-send block and by tests via source-level
1880
2172
  * pinning in `vault-approval-posture.test.ts` / wedge-guard suites.
1881
2173
  */
1882
- function releaseTurnBufferGate(key: string): void {
2174
+ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
1883
2175
  if (!activeTurnStartedAt.has(key)) return
1884
2176
  activeTurnStartedAt.delete(key)
1885
2177
  // PR3b: keep claudeBusyKeys in sync — same lifecycle as the
@@ -1890,38 +2182,21 @@ function releaseTurnBufferGate(key: string): void {
1890
2182
  // executeReply AFTER an outbound landed.
1891
2183
  shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted: true })
1892
2184
 
1893
- // Mirror the deterministic-delivery flush from
1894
- // `purgeReactionTracking` (gateway.ts:1376-1399). When the fleet
1895
- // hits zero-active-turns, drain any held inbound. This is the
1896
- // load-bearing wedge fix: the gate that pinned msg 1874+ in
2185
+ // Mirror the deterministic-delivery flush from `purgeReactionTracking`.
2186
+ // When the fleet hits zero-active-turns, drain any held inbound. This is
2187
+ // the load-bearing wedge fix: the gate that pinned msg 1874+ in
1897
2188
  // test-harness's 13:02 UAT now opens after the reply.
1898
2189
  //
1899
- // PR3b: gated on claudeBusyKeys (see purgeReactionTracking comment).
1900
- // PR3b-cutover: turnEnd was emitted just above (releaseTurnBufferGate
1901
- // head), so the machine is already idle when the cutover gate reads.
1902
- if (!turnInFlightForGate()) {
1903
- const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1904
- if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
1905
- const fr = redeliverBufferedInbound(
1906
- pendingInboundBuffer,
1907
- selfAgentForFlush,
1908
- (m) => {
1909
- const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1910
- if (d) markClaudeBusyForInbound(m)
1911
- return d
1912
- },
1913
- inboundSpool,
1914
- trackRedeliveredInbound,
1915
- )
1916
- if (fr.redelivered > 0) {
1917
- process.stderr.write(
1918
- `telegram gateway: reply-released-gate flushed ${fr.redelivered}/${fr.drained} ` +
1919
- `held inbound for ${selfAgentForFlush}` +
1920
- `${fr.rebuffered > 0 ? ` (${fr.rebuffered} re-buffered)` : ''}\n`,
1921
- )
1922
- }
1923
- }
1924
- }
2190
+ // Component 1 (deliver-before-drain): routed through the shared
2191
+ // `drainBufferedIfAllowed`. `releaseTurnBufferGate` is called on EVERY
2192
+ // reply finalize interim ack AND final answer. The serialize gate
2193
+ // checks `endingTurn.finalAnswerDelivered`, so an INTERIM ack ("On it")
2194
+ // does NOT drain the cross-topic buffer (its turn hasn't delivered its
2195
+ // real answer yet); only the final-answer reply releases it. That is
2196
+ // exactly the serialize-until-replied contract. When the kill switch is
2197
+ // off, or no turn handle is threaded, the helper falls back to the
2198
+ // legacy drain-on-idle behaviour.
2199
+ drainBufferedIfAllowed(endingTurn, 'reply-released-gate')
1925
2200
  }
1926
2201
 
1927
2202
  /**
@@ -1947,12 +2222,27 @@ function releaseTurnBufferGate(key: string): void {
1947
2222
  function endCurrentTurnAtomic(turn: CurrentTurn): void {
1948
2223
  if (currentTurn !== turn) return
1949
2224
  currentTurn = null
2225
+ // Component 2 — clear any prior no-reply drain timer for this turn; a
2226
+ // fresh end re-evaluates below. (Idempotent — null when never armed.)
2227
+ if (turn.noReplyDrainTimer != null) {
2228
+ clearTimeout(turn.noReplyDrainTimer)
2229
+ turn.noReplyDrainTimer = null
2230
+ }
1950
2231
  // Pass `turn` so purgeReactionTracking sees the authoritative
1951
2232
  // replyCalled flag even though we just nulled module-scope
1952
2233
  // currentTurn. Without this, the shadow trace's outboundEmitted
1953
2234
  // would be false on every replied turn (the dominant happy path),
1954
2235
  // producing strictly worse data than the blind `true` it replaced.
2236
+ // Component 1: purgeReactionTracking runs the serialize-gated drain —
2237
+ // it drains only if this turn delivered its final answer.
1955
2238
  purgeReactionTracking(statusKey(turn.sessionChatId, turn.sessionThreadId), turn)
2239
+ // Component 2 — bounded no-reply escape hatch. If this turn ended
2240
+ // WITHOUT delivering (finalAnswerDelivered=false) the serialize gate
2241
+ // above did NOT drain. Arm the bounded timer so a queued cross-topic
2242
+ // message still releases within SERIALIZE_NOREPLY_DRAIN_MS instead of
2243
+ // wedging forever. No-op when this turn delivered, when nothing is
2244
+ // buffered, or when the serialize feature is off.
2245
+ armNoReplyDrainTimer(turn)
1956
2246
  }
1957
2247
 
1958
2248
  /**
@@ -5446,10 +5736,16 @@ if (!STATIC) {
5446
5736
  // queued message ends either delivered or visibly retracted.
5447
5737
  inboundSpool?.sweepEscalations((e) => {
5448
5738
  const chat = e.msg.chatId
5449
- const threadOpts =
5739
+ const escThread =
5450
5740
  typeof e.msg.meta?.threadId === 'string' && e.msg.meta.threadId
5451
- ? { message_thread_id: Number(e.msg.meta.threadId) }
5452
- : {}
5741
+ ? Number(e.msg.meta.threadId)
5742
+ : undefined
5743
+ const threadOpts = escThread != null ? { message_thread_id: escThread } : {}
5744
+ // Reap any "Queued — replying in #X" placeholder for this topic first:
5745
+ // the message is being declared undeliverable, so the queued-status must
5746
+ // not dangle beside the "couldn't deliver" notice (idempotent best-effort;
5747
+ // a normal turn-start/turn-end reaps far sooner — this is the 15-min edge).
5748
+ reapQueuedStatus(chat, escThread)
5453
5749
  void swallowingApiCall(
5454
5750
  () =>
5455
5751
  bot.api.sendMessage(
@@ -5765,21 +6061,34 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
5765
6061
 
5766
6062
  assertAllowedChat(chat_id)
5767
6063
 
5768
- // Thread resolution precedence: (1) an explicit message_thread_id the
5769
- // model passed, else (2) THIS turn's own originating topic
5770
- // (turn-pinned, #1664), else (3) the chat's last-seen topic
5771
- // (chatThreadMap). Preferring the turn's own thread over the chat
5772
- // last-seen heuristic fixes synthetic turns (subagent handback/progress,
5773
- // cron) whose topic the model is never told and which never write
5774
- // chatThreadMap and is strictly more correct under multi-topic
5775
- // concurrency (a reply lands in the topic the turn came from, not
5776
- // whichever topic most recently received a message). DM: both are
5777
- // undefined → unchanged.
5778
- let threadId = resolveThreadId(
5779
- chat_id,
5780
- (args.message_thread_id as string | undefined) ??
5781
- (turn?.sessionThreadId != null ? turn.sessionThreadId : undefined),
5782
- )
6064
+ // Thread resolution precedence (ANSWER path, component 3 — turn-origin
6065
+ // routing): (1) explicit message_thread_id the model passed; else
6066
+ // (2) the ORIGIN turn's thread — the turn that OWNS this reply, matched
6067
+ // by origin_turn_id (the meta field the model echoes back). This is
6068
+ // authoritative even after `currentTurn` has flipped to a successor (the
6069
+ // Brevo→Meta late-reply bug). Else (3) the live turn's thread (legacy
6070
+ // #1664 fallback when no origin turn is resolvable). Answer paths
6071
+ // DELIBERATELY do NOT fall through to chatThreadMap last-seen that
6072
+ // heuristic is what mis-routed a late reply to whichever topic most
6073
+ // recently received a message. DM: every tier is undefined → unchanged.
6074
+ // Kill switch off → exact legacy resolveThreadId precedence.
6075
+ let threadId: number | undefined
6076
+ if (TURN_ORIGIN_ROUTING_ENABLED) {
6077
+ const explicit = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
6078
+ const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
6079
+ threadId = resolveAnswerThreadId({
6080
+ explicitThreadId: Number.isFinite(explicit as number) ? (explicit as number) : undefined,
6081
+ originResolved: originTurn != null,
6082
+ originThreadId: originTurn?.sessionThreadId,
6083
+ liveThreadId: turn?.sessionThreadId,
6084
+ })
6085
+ } else {
6086
+ threadId = resolveThreadId(
6087
+ chat_id,
6088
+ (args.message_thread_id as string | undefined) ??
6089
+ (turn?.sessionThreadId != null ? turn.sessionThreadId : undefined),
6090
+ )
6091
+ }
5783
6092
 
5784
6093
  if (reply_to == null && quoteOptIn && HISTORY_ENABLED) {
5785
6094
  try {
@@ -6360,7 +6669,19 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6360
6669
  // bidirectional ladder + the steer-vs-queue logic at
6361
6670
  // gateway.ts:8322 which reads `activeStatusReactions`). Only
6362
6671
  // the buffer gate flips.
6363
- releaseTurnBufferGate(statusKey(chat_id, threadId))
6672
+ //
6673
+ // Component 1: pass the turn so the serialize gate sees this turn's
6674
+ // `finalAnswerDelivered` (set just above for final-answer replies).
6675
+ // An interim ack leaves it false → the cross-topic buffer does NOT
6676
+ // drain yet; the real answer's reply releases it.
6677
+ releaseTurnBufferGate(statusKey(chat_id, threadId), turn ?? undefined)
6678
+ // Component 5: the final answer landed — reap the queued-status
6679
+ // placeholder for THIS turn's topic. Key on the turn's own session
6680
+ // thread (where the placeholder was posted / promoted), not the
6681
+ // answer's possibly-overridden threadId.
6682
+ if (turn?.finalAnswerDelivered === true) {
6683
+ reapQueuedStatus(turn.sessionChatId, turn.sessionThreadId)
6684
+ }
6364
6685
  }
6365
6686
 
6366
6687
  process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(',')}] chunks=${chunks.length}\n`)
@@ -6378,15 +6699,31 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
6378
6699
  const turn = currentTurn
6379
6700
  if (!args.chat_id) throw new Error('stream_reply: chat_id is required')
6380
6701
  if (args.text == null || args.text === '') throw new Error('stream_reply: text is required and cannot be empty')
6381
- // Thread precedence (matches executeReply): when the model passes no
6382
- // explicit message_thread_id, fall back to THIS turn's originating
6383
- // topic before handleStreamReply's chatThreadMap last-seen heuristic.
6384
- // Injecting here threads every downstream consumer consistently the
6385
- // dedup key, the voice-scrub metric, the draft transport, and the send
6386
- // so a streamed handback/synthetic-turn reply lands in the right
6387
- // supergroup topic. DM: sessionThreadId undefined unchanged.
6388
- if (args.message_thread_id == null && turn?.sessionThreadId != null) {
6389
- args.message_thread_id = String(turn.sessionThreadId)
6702
+ // Thread precedence (matches executeReply; component 3 turn-origin
6703
+ // routing): when the model passes no explicit message_thread_id, inject
6704
+ // the ORIGIN turn's thread (matched by origin_turn_id) — authoritative
6705
+ // even after currentTurn flips falling back to the live turn's thread
6706
+ // when no origin is resolvable (legacy #1664). Injecting into
6707
+ // args.message_thread_id threads every downstream consumer consistently
6708
+ // (dedup key, voice-scrub metric, draft transport, the send), so a
6709
+ // streamed handback/synthetic-turn reply lands in the right supergroup
6710
+ // topic and a late stream-reply can't be stolen by a successor turn. DM:
6711
+ // every tier undefined → unchanged. Kill switch off → legacy live-turn
6712
+ // injection only.
6713
+ if (args.message_thread_id == null) {
6714
+ let injected: number | undefined
6715
+ if (TURN_ORIGIN_ROUTING_ENABLED) {
6716
+ const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
6717
+ injected = resolveAnswerThreadId({
6718
+ explicitThreadId: undefined,
6719
+ originResolved: originTurn != null,
6720
+ originThreadId: originTurn?.sessionThreadId,
6721
+ liveThreadId: turn?.sessionThreadId,
6722
+ })
6723
+ } else {
6724
+ injected = turn?.sessionThreadId
6725
+ }
6726
+ if (injected != null) args.message_thread_id = String(injected)
6390
6727
  }
6391
6728
 
6392
6729
  // Outbound secret scrub (#2044): mask before the dedup key, the draft
@@ -6674,7 +7011,16 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
6674
7011
  {
6675
7012
  const sChat = args.chat_id as string
6676
7013
  const sThread = resolveThreadId(sChat, args.message_thread_id as string | undefined)
6677
- releaseTurnBufferGate(statusKey(sChat, sThread))
7014
+ // Component 1: pass the turn (finalAnswerDelivered set above for a
7015
+ // final stream emit). Interim stream chunks leave it false → no
7016
+ // cross-topic drain until the done=true / substantive emit lands.
7017
+ releaseTurnBufferGate(statusKey(sChat, sThread), turn ?? undefined)
7018
+ // Component 5: reap the queued-status placeholder for THIS turn's
7019
+ // topic once the final answer landed. Key on the turn's own session
7020
+ // thread (where the placeholder lives), not the answer's resolved one.
7021
+ if (turn?.finalAnswerDelivered === true) {
7022
+ reapQueuedStatus(turn.sessionChatId, turn.sessionThreadId)
7023
+ }
6678
7024
  }
6679
7025
  return { content: [{ type: 'text', text: `${result.status} (id: ${result.messageId ?? 'pending'})` }] }
6680
7026
  }
@@ -8200,9 +8546,19 @@ function handleSessionEvent(ev: SessionEvent): void {
8200
8546
  // handler captures `const turn = currentTurn` at entry, so a
8201
8547
  // captured-then-awaited read can't reattribute to the new turn.
8202
8548
  const startedAt = Date.now()
8549
+ // Component 3 — stable per-turn identity. For a real inbound this
8550
+ // matches the `origin_turn_id` stamped into the inbound meta at
8551
+ // build time (same chat/thread/messageId). Synthetic turns (cron /
8552
+ // handback — no messageId) get a unique startedAt-based fallback id
8553
+ // that no reply will ever echo, so they correctly fall through to
8554
+ // the live-turn routing in resolveAnswerThreadId.
8555
+ const enqThreadIdNum = ev.threadId != null ? Number(ev.threadId) : undefined
8556
+ const turnId =
8557
+ deriveTurnId(ev.chatId, enqThreadIdNum ?? null, ev.messageId)
8558
+ ?? `${chatKey(ev.chatId, enqThreadIdNum ?? null)}#synthetic-${startedAt}`
8203
8559
  const next: CurrentTurn = {
8204
8560
  sessionChatId: ev.chatId,
8205
- sessionThreadId: ev.threadId != null ? Number(ev.threadId) : undefined,
8561
+ sessionThreadId: enqThreadIdNum,
8206
8562
  sourceMessageId: ev.messageId != null && /^\d+$/.test(ev.messageId)
8207
8563
  ? Number(ev.messageId)
8208
8564
  : null,
@@ -8215,7 +8571,9 @@ function handleSessionEvent(ev: SessionEvent): void {
8215
8571
  silentAnchorText: '',
8216
8572
  capturedText: [],
8217
8573
  orphanedReplyTimeoutId: null,
8574
+ turnId,
8218
8575
  registryKey: null,
8576
+ noReplyDrainTimer: null,
8219
8577
  lastAssistantMsgId: null,
8220
8578
  lastAssistantDone: false,
8221
8579
  toolCallCount: 0,
@@ -8229,6 +8587,14 @@ function handleSessionEvent(ev: SessionEvent): void {
8229
8587
  isDm: isDmChatId(ev.chatId),
8230
8588
  }
8231
8589
  currentTurn = next
8590
+ // Component 3 — retain in the bounded recently-ended registry so a
8591
+ // LATE reply (landing after currentTurn flips to a successor) can
8592
+ // still resolve THIS turn's origin thread by its turnId.
8593
+ rememberRecentTurn(next)
8594
+ // Component 5 (Hook B) — this turn's topic had a queued placeholder
8595
+ // from Hook A; promote it to "On it — replying now." (deleted later
8596
+ // when the answer lands). No-op when there's no placeholder / DM.
8597
+ promoteQueuedStatus(ev.chatId, enqThreadIdNum)
8232
8598
  // Ack inbound delivery (the marko drop-wedge): claude actually started
8233
8599
  // this turn, so its delivered inbound landed — stop tracking it for
8234
8600
  // re-delivery. `enqueue` carries the same chat/thread the inbound was
@@ -8997,6 +9363,13 @@ function handleSessionEvent(ev: SessionEvent): void {
8997
9363
 
8998
9364
  if (flushDecision.kind === 'flush') {
8999
9365
  let capturedText = flushDecision.text
9366
+ // Component 3 — origin-thread backstop. `chatId`/`threadId` are
9367
+ // captured from the turn atom (turn.sessionChatId/sessionThreadId)
9368
+ // at the top of this turn_end handler, NOT from the live
9369
+ // currentTurn and NEVER from chatThreadMap. So the turn-flush
9370
+ // answer always lands in the thread the turn originated from, even
9371
+ // if currentTurn has flipped — the same guarantee the reply path
9372
+ // gets via origin_turn_id.
9000
9373
  const backstopChatId = chatId
9001
9374
  const backstopThreadId = threadId
9002
9375
  const backstopCtrl = ctrl
@@ -10812,6 +11185,21 @@ async function handleInbound(
10812
11185
  }
10813
11186
 
10814
11187
  // Dispatch to connected bridge via IPC
11188
+ // Component 3 — stable origin turn id stamped into the meta the model
11189
+ // reads, so a reply can echo it back (origin_turn_id) and the gateway
11190
+ // can route the answer to the turn that owns it even after currentTurn
11191
+ // flips. Derived from chat/thread/messageId, matching the turnId the
11192
+ // enqueue handler computes for the turn this inbound starts.
11193
+ const originTurnId = deriveTurnId(chat_id, messageThreadId ?? null, msgId)
11194
+ // Component 4 — per-turn topic framing. In a forum supergroup a queued
11195
+ // cross-topic message could tempt the model to also answer a pending
11196
+ // question from another topic. A one-line directive (only for topic
11197
+ // inbounds, only when framing is enabled) pins the model to THIS
11198
+ // message's topic. DMs / non-topic chats get nothing.
11199
+ const topicScope =
11200
+ TOPIC_FRAMING_ENABLED && messageThreadId != null
11201
+ ? 'This message belongs to the current topic only — answer ONLY this question, in this topic. Do not also answer a pending message from another topic.'
11202
+ : undefined
10815
11203
  const inboundMsg: InboundMessage = {
10816
11204
  type: 'inbound',
10817
11205
  chatId: chat_id,
@@ -10836,6 +11224,14 @@ async function handleInbound(
10836
11224
  user_id: String(from.id),
10837
11225
  ts: new Date((ctx.message?.date ?? 0) * 1000).toISOString(),
10838
11226
  ...(messageThreadId != null ? { message_thread_id: String(messageThreadId) } : {}),
11227
+ // Component 3 — origin turn id. The model is told to pass this back
11228
+ // as origin_turn_id on the reply so the answer routes to the topic
11229
+ // this message came from (turn-origin routing). The reply tool also
11230
+ // resolves it from the live/recent turn registry, so a model that
11231
+ // omits it still routes correctly via the live-turn fallback.
11232
+ ...(originTurnId != null ? { origin_turn_id: originTurnId } : {}),
11233
+ // Component 4 — per-turn topic-scope directive (supergroup topics).
11234
+ ...(topicScope != null ? { topic_scope: topicScope } : {}),
10839
11235
  ...(imagePath ? { image_path: imagePath } : {}),
10840
11236
  // Telegram-native reply context (issue #119). When set, the user
10841
11237
  // long-pressed a prior message and chose "Reply" — the agent should
@@ -10949,6 +11345,21 @@ async function handleInbound(
10949
11345
  `telegram gateway: inbound held mid-turn agent=${selfAgent} ` +
10950
11346
  `chat=${chat_id} msg=${msgId ?? '-'} — will flush on turn-complete\n`,
10951
11347
  )
11348
+ // Component 5 (Hook A) — queued-status UX. When this buffered inbound
11349
+ // is in a DIFFERENT forum topic than the in-flight turn, the user in
11350
+ // that topic otherwise sees only a 👀 reaction (or nothing). Post one
11351
+ // self-updating status into the buffered message's OWN topic so they
11352
+ // know they're queued. Suppressed for DMs (no topics) and same-topic
11353
+ // queues (the in-flight turn's own card already covers them).
11354
+ const inFlightThread = currentTurn?.sessionThreadId
11355
+ if (
11356
+ QUEUED_STATUS_UX_ENABLED &&
11357
+ !isDmChatId(chat_id) &&
11358
+ messageThreadId != null &&
11359
+ messageThreadId !== inFlightThread
11360
+ ) {
11361
+ postQueuedStatus(chat_id, messageThreadId, inFlightThread)
11362
+ }
10952
11363
  return
10953
11364
  }
10954
11365